1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/ThreadAutoLock.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_priv.h> 53 #include <vm/VMAddressSpace.h> 54 #include <vm/VMArea.h> 55 #include <vm/VMCache.h> 56 57 #include "VMAddressSpaceLocking.h" 58 #include "VMAnonymousCache.h" 59 #include "VMAnonymousNoSwapCache.h" 60 #include "IORequest.h" 61 62 63 //#define TRACE_VM 64 //#define TRACE_FAULTS 65 #ifdef TRACE_VM 66 # define TRACE(x) dprintf x 67 #else 68 # define TRACE(x) ; 69 #endif 70 #ifdef TRACE_FAULTS 71 # define FTRACE(x) dprintf x 72 #else 73 # define FTRACE(x) ; 74 #endif 75 76 77 namespace { 78 79 class AreaCacheLocking { 80 public: 81 inline bool Lock(VMCache* lockable) 82 { 83 return false; 84 } 85 86 inline void Unlock(VMCache* lockable) 87 { 88 vm_area_put_locked_cache(lockable); 89 } 90 }; 91 92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 93 public: 94 inline AreaCacheLocker(VMCache* cache = NULL) 95 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 96 { 97 } 98 99 inline AreaCacheLocker(VMArea* area) 100 : AutoLocker<VMCache, AreaCacheLocking>() 101 { 102 SetTo(area); 103 } 104 105 inline void SetTo(VMCache* cache, bool alreadyLocked) 106 { 107 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 108 } 109 110 inline void SetTo(VMArea* area) 111 { 112 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 113 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 114 } 115 }; 116 117 118 class VMCacheChainLocker { 119 public: 120 VMCacheChainLocker() 121 : 122 fTopCache(NULL), 123 fBottomCache(NULL) 124 { 125 } 126 127 VMCacheChainLocker(VMCache* topCache) 128 : 129 fTopCache(topCache), 130 fBottomCache(topCache) 131 { 132 } 133 134 ~VMCacheChainLocker() 135 { 136 Unlock(); 137 } 138 139 void SetTo(VMCache* topCache) 140 { 141 fTopCache = topCache; 142 fBottomCache = topCache; 143 144 if (topCache != NULL) 145 topCache->SetUserData(NULL); 146 } 147 148 VMCache* LockSourceCache() 149 { 150 if (fBottomCache == NULL || fBottomCache->source == NULL) 151 return NULL; 152 153 VMCache* previousCache = fBottomCache; 154 155 fBottomCache = fBottomCache->source; 156 fBottomCache->Lock(); 157 fBottomCache->AcquireRefLocked(); 158 fBottomCache->SetUserData(previousCache); 159 160 return fBottomCache; 161 } 162 163 void LockAllSourceCaches() 164 { 165 while (LockSourceCache() != NULL) { 166 } 167 } 168 169 void Unlock(VMCache* exceptCache = NULL) 170 { 171 if (fTopCache == NULL) 172 return; 173 174 // Unlock caches in source -> consumer direction. This is important to 175 // avoid double-locking and a reversal of locking order in case a cache 176 // is eligable for merging. 177 VMCache* cache = fBottomCache; 178 while (cache != NULL) { 179 VMCache* nextCache = (VMCache*)cache->UserData(); 180 if (cache != exceptCache) 181 cache->ReleaseRefAndUnlock(cache != fTopCache); 182 183 if (cache == fTopCache) 184 break; 185 186 cache = nextCache; 187 } 188 189 fTopCache = NULL; 190 fBottomCache = NULL; 191 } 192 193 void UnlockKeepRefs(bool keepTopCacheLocked) 194 { 195 if (fTopCache == NULL) 196 return; 197 198 VMCache* nextCache = fBottomCache; 199 VMCache* cache = NULL; 200 201 while (keepTopCacheLocked 202 ? nextCache != fTopCache : cache != fTopCache) { 203 cache = nextCache; 204 nextCache = (VMCache*)cache->UserData(); 205 cache->Unlock(cache != fTopCache); 206 } 207 } 208 209 void RelockCaches(bool topCacheLocked) 210 { 211 if (fTopCache == NULL) 212 return; 213 214 VMCache* nextCache = fTopCache; 215 VMCache* cache = NULL; 216 if (topCacheLocked) { 217 cache = nextCache; 218 nextCache = cache->source; 219 } 220 221 while (cache != fBottomCache && nextCache != NULL) { 222 VMCache* consumer = cache; 223 cache = nextCache; 224 nextCache = cache->source; 225 cache->Lock(); 226 cache->SetUserData(consumer); 227 } 228 } 229 230 private: 231 VMCache* fTopCache; 232 VMCache* fBottomCache; 233 }; 234 235 } // namespace 236 237 238 // The memory reserve an allocation of the certain priority must not touch. 239 static const size_t kMemoryReserveForPriority[] = { 240 VM_MEMORY_RESERVE_USER, // user 241 VM_MEMORY_RESERVE_SYSTEM, // system 242 0 // VIP 243 }; 244 245 246 ObjectCache* gPageMappingsObjectCache; 247 248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 249 250 static off_t sAvailableMemory; 251 static off_t sNeededMemory; 252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 253 static uint32 sPageFaults; 254 255 static VMPhysicalPageMapper* sPhysicalPageMapper; 256 257 #if DEBUG_CACHE_LIST 258 259 struct cache_info { 260 VMCache* cache; 261 addr_t page_count; 262 addr_t committed; 263 }; 264 265 static const int kCacheInfoTableCount = 100 * 1024; 266 static cache_info* sCacheInfoTable; 267 268 #endif // DEBUG_CACHE_LIST 269 270 271 // function declarations 272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 273 bool addressSpaceCleanup); 274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 275 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 276 static status_t map_backing_store(VMAddressSpace* addressSpace, 277 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 278 int protection, int protectionMax, int mapping, uint32 flags, 279 const virtual_address_restrictions* addressRestrictions, bool kernel, 280 VMArea** _area, void** _virtualAddress); 281 static void fix_protection(uint32* protection); 282 283 284 // #pragma mark - 285 286 287 #if VM_PAGE_FAULT_TRACING 288 289 namespace VMPageFaultTracing { 290 291 class PageFaultStart : public AbstractTraceEntry { 292 public: 293 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 294 : 295 fAddress(address), 296 fPC(pc), 297 fWrite(write), 298 fUser(user) 299 { 300 Initialized(); 301 } 302 303 virtual void AddDump(TraceOutput& out) 304 { 305 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 306 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 307 } 308 309 private: 310 addr_t fAddress; 311 addr_t fPC; 312 bool fWrite; 313 bool fUser; 314 }; 315 316 317 // page fault errors 318 enum { 319 PAGE_FAULT_ERROR_NO_AREA = 0, 320 PAGE_FAULT_ERROR_KERNEL_ONLY, 321 PAGE_FAULT_ERROR_WRITE_PROTECTED, 322 PAGE_FAULT_ERROR_READ_PROTECTED, 323 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 324 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 325 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 326 }; 327 328 329 class PageFaultError : public AbstractTraceEntry { 330 public: 331 PageFaultError(area_id area, status_t error) 332 : 333 fArea(area), 334 fError(error) 335 { 336 Initialized(); 337 } 338 339 virtual void AddDump(TraceOutput& out) 340 { 341 switch (fError) { 342 case PAGE_FAULT_ERROR_NO_AREA: 343 out.Print("page fault error: no area"); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_ONLY: 346 out.Print("page fault error: area: %ld, kernel only", fArea); 347 break; 348 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 349 out.Print("page fault error: area: %ld, write protected", 350 fArea); 351 break; 352 case PAGE_FAULT_ERROR_READ_PROTECTED: 353 out.Print("page fault error: area: %ld, read protected", fArea); 354 break; 355 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 356 out.Print("page fault error: area: %ld, execute protected", 357 fArea); 358 break; 359 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 360 out.Print("page fault error: kernel touching bad user memory"); 361 break; 362 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 363 out.Print("page fault error: no address space"); 364 break; 365 default: 366 out.Print("page fault error: area: %ld, error: %s", fArea, 367 strerror(fError)); 368 break; 369 } 370 } 371 372 private: 373 area_id fArea; 374 status_t fError; 375 }; 376 377 378 class PageFaultDone : public AbstractTraceEntry { 379 public: 380 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 381 vm_page* page) 382 : 383 fArea(area), 384 fTopCache(topCache), 385 fCache(cache), 386 fPage(page) 387 { 388 Initialized(); 389 } 390 391 virtual void AddDump(TraceOutput& out) 392 { 393 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 394 "page: %p", fArea, fTopCache, fCache, fPage); 395 } 396 397 private: 398 area_id fArea; 399 VMCache* fTopCache; 400 VMCache* fCache; 401 vm_page* fPage; 402 }; 403 404 } // namespace VMPageFaultTracing 405 406 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 407 #else 408 # define TPF(x) ; 409 #endif // VM_PAGE_FAULT_TRACING 410 411 412 // #pragma mark - 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 increment_page_wired_count(vm_page* page) 419 { 420 if (!page->IsMapped()) 421 atomic_add(&gMappedPagesCount, 1); 422 page->IncrementWiredCount(); 423 } 424 425 426 /*! The page's cache must be locked. 427 */ 428 static inline void 429 decrement_page_wired_count(vm_page* page) 430 { 431 page->DecrementWiredCount(); 432 if (!page->IsMapped()) 433 atomic_add(&gMappedPagesCount, -1); 434 } 435 436 437 static inline addr_t 438 virtual_page_address(VMArea* area, vm_page* page) 439 { 440 return area->Base() 441 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 442 } 443 444 445 //! You need to have the address space locked when calling this function 446 static VMArea* 447 lookup_area(VMAddressSpace* addressSpace, area_id id) 448 { 449 VMAreaHash::ReadLock(); 450 451 VMArea* area = VMAreaHash::LookupLocked(id); 452 if (area != NULL && area->address_space != addressSpace) 453 area = NULL; 454 455 VMAreaHash::ReadUnlock(); 456 457 return area; 458 } 459 460 461 static status_t 462 allocate_area_page_protections(VMArea* area) 463 { 464 // In the page protections we store only the three user protections, 465 // so we use 4 bits per page. 466 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 467 area->page_protections = (uint8*)malloc_etc(bytes, 468 area->address_space == VMAddressSpace::Kernel() 469 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 470 if (area->page_protections == NULL) 471 return B_NO_MEMORY; 472 473 // init the page protections for all pages to that of the area 474 uint32 areaProtection = area->protection 475 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 476 memset(area->page_protections, areaProtection | (areaProtection << 4), 477 bytes); 478 return B_OK; 479 } 480 481 482 static inline void 483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 484 { 485 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 486 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 487 uint8& entry = area->page_protections[pageIndex / 2]; 488 if (pageIndex % 2 == 0) 489 entry = (entry & 0xf0) | protection; 490 else 491 entry = (entry & 0x0f) | (protection << 4); 492 } 493 494 495 static inline uint32 496 get_area_page_protection(VMArea* area, addr_t pageAddress) 497 { 498 if (area->page_protections == NULL) 499 return area->protection; 500 501 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 502 uint32 protection = area->page_protections[pageIndex / 2]; 503 if (pageIndex % 2 == 0) 504 protection &= 0x0f; 505 else 506 protection >>= 4; 507 508 // If this is a kernel area we translate the user flags to kernel flags. 509 if (area->address_space == VMAddressSpace::Kernel()) { 510 uint32 kernelProtection = 0; 511 if ((protection & B_READ_AREA) != 0) 512 kernelProtection |= B_KERNEL_READ_AREA; 513 if ((protection & B_WRITE_AREA) != 0) 514 kernelProtection |= B_KERNEL_WRITE_AREA; 515 516 return kernelProtection; 517 } 518 519 return protection | B_KERNEL_READ_AREA 520 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 521 } 522 523 524 /*! The caller must have reserved enough pages the translation map 525 implementation might need to map this page. 526 The page's cache must be locked. 527 */ 528 static status_t 529 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 530 vm_page_reservation* reservation) 531 { 532 VMTranslationMap* map = area->address_space->TranslationMap(); 533 534 bool wasMapped = page->IsMapped(); 535 536 if (area->wiring == B_NO_LOCK) { 537 DEBUG_PAGE_ACCESS_CHECK(page); 538 539 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 540 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 541 gPageMappingsObjectCache, 542 CACHE_DONT_WAIT_FOR_MEMORY 543 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 544 if (mapping == NULL) 545 return B_NO_MEMORY; 546 547 mapping->page = page; 548 mapping->area = area; 549 550 map->Lock(); 551 552 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 553 area->MemoryType(), reservation); 554 555 // insert mapping into lists 556 if (!page->IsMapped()) 557 atomic_add(&gMappedPagesCount, 1); 558 559 page->mappings.Add(mapping); 560 area->mappings.Add(mapping); 561 562 map->Unlock(); 563 } else { 564 DEBUG_PAGE_ACCESS_CHECK(page); 565 566 map->Lock(); 567 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 568 area->MemoryType(), reservation); 569 map->Unlock(); 570 571 increment_page_wired_count(page); 572 } 573 574 if (!wasMapped) { 575 // The page is mapped now, so we must not remain in the cached queue. 576 // It also makes sense to move it from the inactive to the active, since 577 // otherwise the page daemon wouldn't come to keep track of it (in idle 578 // mode) -- if the page isn't touched, it will be deactivated after a 579 // full iteration through the queue at the latest. 580 if (page->State() == PAGE_STATE_CACHED 581 || page->State() == PAGE_STATE_INACTIVE) { 582 vm_page_set_state(page, PAGE_STATE_ACTIVE); 583 } 584 } 585 586 return B_OK; 587 } 588 589 590 /*! If \a preserveModified is \c true, the caller must hold the lock of the 591 page's cache. 592 */ 593 static inline bool 594 unmap_page(VMArea* area, addr_t virtualAddress) 595 { 596 return area->address_space->TranslationMap()->UnmapPage(area, 597 virtualAddress, true); 598 } 599 600 601 /*! If \a preserveModified is \c true, the caller must hold the lock of all 602 mapped pages' caches. 603 */ 604 static inline void 605 unmap_pages(VMArea* area, addr_t base, size_t size) 606 { 607 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 608 } 609 610 611 static inline bool 612 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 613 { 614 if (address < area->Base()) { 615 offset = area->Base() - address; 616 if (offset >= size) 617 return false; 618 619 address = area->Base(); 620 size -= offset; 621 offset = 0; 622 if (size > area->Size()) 623 size = area->Size(); 624 625 return true; 626 } 627 628 offset = address - area->Base(); 629 if (offset >= area->Size()) 630 return false; 631 632 if (size >= area->Size() - offset) 633 size = area->Size() - offset; 634 635 return true; 636 } 637 638 639 /*! Cuts a piece out of an area. If the given cut range covers the complete 640 area, it is deleted. If it covers the beginning or the end, the area is 641 resized accordingly. If the range covers some part in the middle of the 642 area, it is split in two; in this case the second area is returned via 643 \a _secondArea (the variable is left untouched in the other cases). 644 The address space must be write locked. 645 The caller must ensure that no part of the given range is wired. 646 */ 647 static status_t 648 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 649 addr_t size, VMArea** _secondArea, bool kernel) 650 { 651 addr_t offset; 652 if (!intersect_area(area, address, size, offset)) 653 return B_OK; 654 655 // Is the area fully covered? 656 if (address == area->Base() && size == area->Size()) { 657 delete_area(addressSpace, area, false); 658 return B_OK; 659 } 660 661 int priority; 662 uint32 allocationFlags; 663 if (addressSpace == VMAddressSpace::Kernel()) { 664 priority = VM_PRIORITY_SYSTEM; 665 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 666 | HEAP_DONT_LOCK_KERNEL_SPACE; 667 } else { 668 priority = VM_PRIORITY_USER; 669 allocationFlags = 0; 670 } 671 672 VMCache* cache = vm_area_get_locked_cache(area); 673 VMCacheChainLocker cacheChainLocker(cache); 674 cacheChainLocker.LockAllSourceCaches(); 675 676 // If no one else uses the area's cache and it's an anonymous cache, we can 677 // resize or split it, too. 678 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 679 && cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM; 680 681 // Cut the end only? 682 if (offset > 0 && size == area->Size() - offset) { 683 status_t error = addressSpace->ShrinkAreaTail(area, offset, 684 allocationFlags); 685 if (error != B_OK) 686 return error; 687 688 // unmap pages 689 unmap_pages(area, address, size); 690 691 if (onlyCacheUser) { 692 // Since VMCache::Resize() can temporarily drop the lock, we must 693 // unlock all lower caches to prevent locking order inversion. 694 cacheChainLocker.Unlock(cache); 695 cache->Resize(cache->virtual_base + offset, priority); 696 cache->ReleaseRefAndUnlock(); 697 } 698 699 return B_OK; 700 } 701 702 // Cut the beginning only? 703 if (area->Base() == address) { 704 // resize the area 705 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 706 allocationFlags); 707 if (error != B_OK) 708 return error; 709 710 // unmap pages 711 unmap_pages(area, address, size); 712 713 if (onlyCacheUser) { 714 // Since VMCache::Rebase() can temporarily drop the lock, we must 715 // unlock all lower caches to prevent locking order inversion. 716 cacheChainLocker.Unlock(cache); 717 cache->Rebase(cache->virtual_base + size, priority); 718 cache->ReleaseRefAndUnlock(); 719 } 720 area->cache_offset += size; 721 722 return B_OK; 723 } 724 725 // The tough part -- cut a piece out of the middle of the area. 726 // We do that by shrinking the area to the begin section and creating a 727 // new area for the end section. 728 addr_t firstNewSize = offset; 729 addr_t secondBase = address + size; 730 addr_t secondSize = area->Size() - offset - size; 731 732 // unmap pages 733 unmap_pages(area, address, area->Size() - firstNewSize); 734 735 // resize the area 736 addr_t oldSize = area->Size(); 737 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 738 allocationFlags); 739 if (error != B_OK) 740 return error; 741 742 virtual_address_restrictions addressRestrictions = {}; 743 addressRestrictions.address = (void*)secondBase; 744 addressRestrictions.address_specification = B_EXACT_ADDRESS; 745 VMArea* secondArea; 746 747 if (onlyCacheUser) { 748 // Create a new cache for the second area. 749 VMCache* secondCache; 750 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 751 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 752 if (error != B_OK) { 753 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 754 return error; 755 } 756 757 secondCache->Lock(); 758 secondCache->temporary = cache->temporary; 759 secondCache->virtual_base = area->cache_offset; 760 secondCache->virtual_end = area->cache_offset + secondSize; 761 762 // Transfer the concerned pages from the first cache. 763 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 764 error = secondCache->Adopt(cache, adoptOffset, secondSize, 765 area->cache_offset); 766 767 if (error == B_OK) { 768 // Since VMCache::Resize() can temporarily drop the lock, we must 769 // unlock all lower caches to prevent locking order inversion. 770 cacheChainLocker.Unlock(cache); 771 cache->Resize(cache->virtual_base + firstNewSize, priority); 772 // Don't unlock the cache yet because we might have to resize it 773 // back. 774 775 // Map the second area. 776 error = map_backing_store(addressSpace, secondCache, 777 area->cache_offset, area->name, secondSize, area->wiring, 778 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 779 &addressRestrictions, kernel, &secondArea, NULL); 780 } 781 782 if (error != B_OK) { 783 // Restore the original cache. 784 cache->Resize(cache->virtual_base + oldSize, priority); 785 786 // Move the pages back. 787 status_t readoptStatus = cache->Adopt(secondCache, 788 area->cache_offset, secondSize, adoptOffset); 789 if (readoptStatus != B_OK) { 790 // Some (swap) pages have not been moved back and will be lost 791 // once the second cache is deleted. 792 panic("failed to restore cache range: %s", 793 strerror(readoptStatus)); 794 795 // TODO: Handle out of memory cases by freeing memory and 796 // retrying. 797 } 798 799 cache->ReleaseRefAndUnlock(); 800 secondCache->ReleaseRefAndUnlock(); 801 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 802 return error; 803 } 804 805 // Now we can unlock it. 806 cache->ReleaseRefAndUnlock(); 807 secondCache->Unlock(); 808 } else { 809 error = map_backing_store(addressSpace, cache, area->cache_offset 810 + (secondBase - area->Base()), 811 area->name, secondSize, area->wiring, area->protection, 812 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 813 &addressRestrictions, kernel, &secondArea, NULL); 814 if (error != B_OK) { 815 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 816 return error; 817 } 818 // We need a cache reference for the new area. 819 cache->AcquireRefLocked(); 820 } 821 822 if (_secondArea != NULL) 823 *_secondArea = secondArea; 824 825 return B_OK; 826 } 827 828 829 /*! Deletes or cuts all areas in the given address range. 830 The address space must be write-locked. 831 The caller must ensure that no part of the given range is wired. 832 */ 833 static status_t 834 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 835 bool kernel) 836 { 837 size = PAGE_ALIGN(size); 838 839 // Check, whether the caller is allowed to modify the concerned areas. 840 if (!kernel) { 841 for (VMAddressSpace::AreaRangeIterator it 842 = addressSpace->GetAreaRangeIterator(address, size); 843 VMArea* area = it.Next();) { 844 845 if ((area->protection & B_KERNEL_AREA) != 0) { 846 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 847 "unmap range of kernel area %" B_PRId32 " (%s)\n", 848 team_get_current_team_id(), area->id, area->name); 849 return B_NOT_ALLOWED; 850 } 851 } 852 } 853 854 for (VMAddressSpace::AreaRangeIterator it 855 = addressSpace->GetAreaRangeIterator(address, size); 856 VMArea* area = it.Next();) { 857 858 status_t error = cut_area(addressSpace, area, address, size, NULL, 859 kernel); 860 if (error != B_OK) 861 return error; 862 // Failing after already messing with areas is ugly, but we 863 // can't do anything about it. 864 } 865 866 return B_OK; 867 } 868 869 870 static status_t 871 discard_area_range(VMArea* area, addr_t address, addr_t size) 872 { 873 addr_t offset; 874 if (!intersect_area(area, address, size, offset)) 875 return B_OK; 876 877 // If someone else uses the area's cache or it's not an anonymous cache, we 878 // can't discard. 879 VMCache* cache = vm_area_get_locked_cache(area); 880 if (cache->areas != area || area->cache_next != NULL 881 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 882 return B_OK; 883 } 884 885 VMCacheChainLocker cacheChainLocker(cache); 886 cacheChainLocker.LockAllSourceCaches(); 887 888 unmap_pages(area, address, size); 889 890 // Since VMCache::Discard() can temporarily drop the lock, we must 891 // unlock all lower caches to prevent locking order inversion. 892 cacheChainLocker.Unlock(cache); 893 cache->Discard(cache->virtual_base + offset, size); 894 cache->ReleaseRefAndUnlock(); 895 896 return B_OK; 897 } 898 899 900 static status_t 901 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 902 bool kernel) 903 { 904 for (VMAddressSpace::AreaRangeIterator it 905 = addressSpace->GetAreaRangeIterator(address, size); 906 VMArea* area = it.Next();) { 907 status_t error = discard_area_range(area, address, size); 908 if (error != B_OK) 909 return error; 910 } 911 912 return B_OK; 913 } 914 915 916 /*! You need to hold the lock of the cache and the write lock of the address 917 space when calling this function. 918 Note, that in case of error your cache will be temporarily unlocked. 919 If \a addressSpec is \c B_EXACT_ADDRESS and the 920 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 921 that no part of the specified address range (base \c *_virtualAddress, size 922 \a size) is wired. 923 */ 924 static status_t 925 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 926 const char* areaName, addr_t size, int wiring, int protection, 927 int protectionMax, int mapping, 928 uint32 flags, const virtual_address_restrictions* addressRestrictions, 929 bool kernel, VMArea** _area, void** _virtualAddress) 930 { 931 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 932 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 933 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 934 addressSpace, cache, addressRestrictions->address, offset, size, 935 addressRestrictions->address_specification, wiring, protection, 936 protectionMax, _area, areaName)); 937 cache->AssertLocked(); 938 939 if (size == 0) { 940 #if KDEBUG 941 panic("map_backing_store(): called with size=0 for area '%s'!", 942 areaName); 943 #endif 944 return B_BAD_VALUE; 945 } 946 947 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 948 | HEAP_DONT_LOCK_KERNEL_SPACE; 949 int priority; 950 if (addressSpace != VMAddressSpace::Kernel()) { 951 priority = VM_PRIORITY_USER; 952 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 953 priority = VM_PRIORITY_VIP; 954 allocationFlags |= HEAP_PRIORITY_VIP; 955 } else 956 priority = VM_PRIORITY_SYSTEM; 957 958 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 959 allocationFlags); 960 if (mapping != REGION_PRIVATE_MAP) 961 area->protection_max = protectionMax & B_USER_PROTECTION; 962 if (area == NULL) 963 return B_NO_MEMORY; 964 965 status_t status; 966 967 // if this is a private map, we need to create a new cache 968 // to handle the private copies of pages as they are written to 969 VMCache* sourceCache = cache; 970 if (mapping == REGION_PRIVATE_MAP) { 971 VMCache* newCache; 972 973 // create an anonymous cache 974 status = VMCacheFactory::CreateAnonymousCache(newCache, 975 (protection & B_STACK_AREA) != 0 976 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 977 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 978 if (status != B_OK) 979 goto err1; 980 981 newCache->Lock(); 982 newCache->temporary = 1; 983 newCache->virtual_base = offset; 984 newCache->virtual_end = offset + size; 985 986 cache->AddConsumer(newCache); 987 988 cache = newCache; 989 } 990 991 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 992 status = cache->SetMinimalCommitment(size, priority); 993 if (status != B_OK) 994 goto err2; 995 } 996 997 // check to see if this address space has entered DELETE state 998 if (addressSpace->IsBeingDeleted()) { 999 // okay, someone is trying to delete this address space now, so we can't 1000 // insert the area, so back out 1001 status = B_BAD_TEAM_ID; 1002 goto err2; 1003 } 1004 1005 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1006 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1007 status = unmap_address_range(addressSpace, 1008 (addr_t)addressRestrictions->address, size, kernel); 1009 if (status != B_OK) 1010 goto err2; 1011 } 1012 1013 status = addressSpace->InsertArea(area, size, addressRestrictions, 1014 allocationFlags, _virtualAddress); 1015 if (status == B_NO_MEMORY 1016 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1017 // TODO: At present, there is no way to notify the low_resource monitor 1018 // that kernel addresss space is fragmented, nor does it check for this 1019 // automatically. Due to how many locks are held, we cannot wait here 1020 // for space to be freed up, but it would be good to at least notify 1021 // that we tried and failed to allocate some amount. 1022 } 1023 if (status != B_OK) 1024 goto err2; 1025 1026 // attach the cache to the area 1027 area->cache = cache; 1028 area->cache_offset = offset; 1029 1030 // point the cache back to the area 1031 cache->InsertAreaLocked(area); 1032 if (mapping == REGION_PRIVATE_MAP) 1033 cache->Unlock(); 1034 1035 // insert the area in the global area hash table 1036 VMAreaHash::Insert(area); 1037 1038 // grab a ref to the address space (the area holds this) 1039 addressSpace->Get(); 1040 1041 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1042 // cache, sourceCache, areaName, area); 1043 1044 *_area = area; 1045 return B_OK; 1046 1047 err2: 1048 if (mapping == REGION_PRIVATE_MAP) { 1049 // We created this cache, so we must delete it again. Note, that we 1050 // need to temporarily unlock the source cache or we'll otherwise 1051 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1052 sourceCache->Unlock(); 1053 cache->ReleaseRefAndUnlock(); 1054 sourceCache->Lock(); 1055 } 1056 err1: 1057 addressSpace->DeleteArea(area, allocationFlags); 1058 return status; 1059 } 1060 1061 1062 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1063 locker1, locker2). 1064 */ 1065 template<typename LockerType1, typename LockerType2> 1066 static inline bool 1067 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1068 { 1069 area->cache->AssertLocked(); 1070 1071 VMAreaUnwiredWaiter waiter; 1072 if (!area->AddWaiterIfWired(&waiter)) 1073 return false; 1074 1075 // unlock everything and wait 1076 if (locker1 != NULL) 1077 locker1->Unlock(); 1078 if (locker2 != NULL) 1079 locker2->Unlock(); 1080 1081 waiter.waitEntry.Wait(); 1082 1083 return true; 1084 } 1085 1086 1087 /*! Checks whether the given area has any wired ranges intersecting with the 1088 specified range and waits, if so. 1089 1090 When it has to wait, the function calls \c Unlock() on both \a locker1 1091 and \a locker2, if given. 1092 The area's top cache must be locked and must be unlocked as a side effect 1093 of calling \c Unlock() on either \a locker1 or \a locker2. 1094 1095 If the function does not have to wait it does not modify or unlock any 1096 object. 1097 1098 \param area The area to be checked. 1099 \param base The base address of the range to check. 1100 \param size The size of the address range to check. 1101 \param locker1 An object to be unlocked when before starting to wait (may 1102 be \c NULL). 1103 \param locker2 An object to be unlocked when before starting to wait (may 1104 be \c NULL). 1105 \return \c true, if the function had to wait, \c false otherwise. 1106 */ 1107 template<typename LockerType1, typename LockerType2> 1108 static inline bool 1109 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1110 LockerType1* locker1, LockerType2* locker2) 1111 { 1112 area->cache->AssertLocked(); 1113 1114 VMAreaUnwiredWaiter waiter; 1115 if (!area->AddWaiterIfWired(&waiter, base, size)) 1116 return false; 1117 1118 // unlock everything and wait 1119 if (locker1 != NULL) 1120 locker1->Unlock(); 1121 if (locker2 != NULL) 1122 locker2->Unlock(); 1123 1124 waiter.waitEntry.Wait(); 1125 1126 return true; 1127 } 1128 1129 1130 /*! Checks whether the given address space has any wired ranges intersecting 1131 with the specified range and waits, if so. 1132 1133 Similar to wait_if_area_range_is_wired(), with the following differences: 1134 - All areas intersecting with the range are checked (respectively all until 1135 one is found that contains a wired range intersecting with the given 1136 range). 1137 - The given address space must at least be read-locked and must be unlocked 1138 when \c Unlock() is called on \a locker. 1139 - None of the areas' caches are allowed to be locked. 1140 */ 1141 template<typename LockerType> 1142 static inline bool 1143 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1144 size_t size, LockerType* locker) 1145 { 1146 for (VMAddressSpace::AreaRangeIterator it 1147 = addressSpace->GetAreaRangeIterator(base, size); 1148 VMArea* area = it.Next();) { 1149 1150 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1151 1152 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1153 return true; 1154 } 1155 1156 return false; 1157 } 1158 1159 1160 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1161 It must be called in a situation where the kernel address space may be 1162 locked. 1163 */ 1164 status_t 1165 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1166 { 1167 AddressSpaceReadLocker locker; 1168 VMArea* area; 1169 status_t status = locker.SetFromArea(id, area); 1170 if (status != B_OK) 1171 return status; 1172 1173 if (area->page_protections == NULL) { 1174 status = allocate_area_page_protections(area); 1175 if (status != B_OK) 1176 return status; 1177 } 1178 1179 *cookie = (void*)area; 1180 return B_OK; 1181 } 1182 1183 1184 /*! This is a debug helper function that can only be used with very specific 1185 use cases. 1186 Sets protection for the given address range to the protection specified. 1187 If \a protection is 0 then the involved pages will be marked non-present 1188 in the translation map to cause a fault on access. The pages aren't 1189 actually unmapped however so that they can be marked present again with 1190 additional calls to this function. For this to work the area must be 1191 fully locked in memory so that the pages aren't otherwise touched. 1192 This function does not lock the kernel address space and needs to be 1193 supplied with a \a cookie retrieved from a successful call to 1194 vm_prepare_kernel_area_debug_protection(). 1195 */ 1196 status_t 1197 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1198 uint32 protection) 1199 { 1200 // check address range 1201 addr_t address = (addr_t)_address; 1202 size = PAGE_ALIGN(size); 1203 1204 if ((address % B_PAGE_SIZE) != 0 1205 || (addr_t)address + size < (addr_t)address 1206 || !IS_KERNEL_ADDRESS(address) 1207 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1208 return B_BAD_VALUE; 1209 } 1210 1211 // Translate the kernel protection to user protection as we only store that. 1212 if ((protection & B_KERNEL_READ_AREA) != 0) 1213 protection |= B_READ_AREA; 1214 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1215 protection |= B_WRITE_AREA; 1216 1217 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1218 VMTranslationMap* map = addressSpace->TranslationMap(); 1219 VMArea* area = (VMArea*)cookie; 1220 1221 addr_t offset = address - area->Base(); 1222 if (area->Size() - offset < size) { 1223 panic("protect range not fully within supplied area"); 1224 return B_BAD_VALUE; 1225 } 1226 1227 if (area->page_protections == NULL) { 1228 panic("area has no page protections"); 1229 return B_BAD_VALUE; 1230 } 1231 1232 // Invalidate the mapping entries so any access to them will fault or 1233 // restore the mapping entries unchanged so that lookup will success again. 1234 map->Lock(); 1235 map->DebugMarkRangePresent(address, address + size, protection != 0); 1236 map->Unlock(); 1237 1238 // And set the proper page protections so that the fault case will actually 1239 // fail and not simply try to map a new page. 1240 for (addr_t pageAddress = address; pageAddress < address + size; 1241 pageAddress += B_PAGE_SIZE) { 1242 set_area_page_protection(area, pageAddress, protection); 1243 } 1244 1245 return B_OK; 1246 } 1247 1248 1249 status_t 1250 vm_block_address_range(const char* name, void* address, addr_t size) 1251 { 1252 if (!arch_vm_supports_protection(0)) 1253 return B_NOT_SUPPORTED; 1254 1255 AddressSpaceWriteLocker locker; 1256 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1257 if (status != B_OK) 1258 return status; 1259 1260 VMAddressSpace* addressSpace = locker.AddressSpace(); 1261 1262 // create an anonymous cache 1263 VMCache* cache; 1264 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1265 VM_PRIORITY_SYSTEM); 1266 if (status != B_OK) 1267 return status; 1268 1269 cache->temporary = 1; 1270 cache->virtual_end = size; 1271 cache->Lock(); 1272 1273 VMArea* area; 1274 virtual_address_restrictions addressRestrictions = {}; 1275 addressRestrictions.address = address; 1276 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1277 status = map_backing_store(addressSpace, cache, 0, name, size, 1278 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1279 true, &area, NULL); 1280 if (status != B_OK) { 1281 cache->ReleaseRefAndUnlock(); 1282 return status; 1283 } 1284 1285 cache->Unlock(); 1286 area->cache_type = CACHE_TYPE_RAM; 1287 return area->id; 1288 } 1289 1290 1291 status_t 1292 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1293 { 1294 AddressSpaceWriteLocker locker(team); 1295 if (!locker.IsLocked()) 1296 return B_BAD_TEAM_ID; 1297 1298 VMAddressSpace* addressSpace = locker.AddressSpace(); 1299 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1300 addressSpace == VMAddressSpace::Kernel() 1301 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1302 } 1303 1304 1305 status_t 1306 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1307 addr_t size, uint32 flags) 1308 { 1309 if (size == 0) 1310 return B_BAD_VALUE; 1311 1312 AddressSpaceWriteLocker locker(team); 1313 if (!locker.IsLocked()) 1314 return B_BAD_TEAM_ID; 1315 1316 virtual_address_restrictions addressRestrictions = {}; 1317 addressRestrictions.address = *_address; 1318 addressRestrictions.address_specification = addressSpec; 1319 VMAddressSpace* addressSpace = locker.AddressSpace(); 1320 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1321 addressSpace == VMAddressSpace::Kernel() 1322 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1323 _address); 1324 } 1325 1326 1327 area_id 1328 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1329 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1330 const virtual_address_restrictions* virtualAddressRestrictions, 1331 const physical_address_restrictions* physicalAddressRestrictions, 1332 bool kernel, void** _address) 1333 { 1334 VMArea* area; 1335 VMCache* cache; 1336 vm_page* page = NULL; 1337 bool isStack = (protection & B_STACK_AREA) != 0; 1338 page_num_t guardPages; 1339 bool canOvercommit = false; 1340 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1341 ? VM_PAGE_ALLOC_CLEAR : 0; 1342 1343 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1344 team, name, size)); 1345 1346 size = PAGE_ALIGN(size); 1347 guardSize = PAGE_ALIGN(guardSize); 1348 guardPages = guardSize / B_PAGE_SIZE; 1349 1350 if (size == 0 || size < guardSize) 1351 return B_BAD_VALUE; 1352 if (!arch_vm_supports_protection(protection)) 1353 return B_NOT_SUPPORTED; 1354 1355 if (team == B_CURRENT_TEAM) 1356 team = VMAddressSpace::CurrentID(); 1357 if (team < 0) 1358 return B_BAD_TEAM_ID; 1359 1360 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1361 canOvercommit = true; 1362 1363 #ifdef DEBUG_KERNEL_STACKS 1364 if ((protection & B_KERNEL_STACK_AREA) != 0) 1365 isStack = true; 1366 #endif 1367 1368 // check parameters 1369 switch (virtualAddressRestrictions->address_specification) { 1370 case B_ANY_ADDRESS: 1371 case B_EXACT_ADDRESS: 1372 case B_BASE_ADDRESS: 1373 case B_ANY_KERNEL_ADDRESS: 1374 case B_ANY_KERNEL_BLOCK_ADDRESS: 1375 case B_RANDOMIZED_ANY_ADDRESS: 1376 case B_RANDOMIZED_BASE_ADDRESS: 1377 break; 1378 1379 default: 1380 return B_BAD_VALUE; 1381 } 1382 1383 // If low or high physical address restrictions are given, we force 1384 // B_CONTIGUOUS wiring, since only then we'll use 1385 // vm_page_allocate_page_run() which deals with those restrictions. 1386 if (physicalAddressRestrictions->low_address != 0 1387 || physicalAddressRestrictions->high_address != 0) { 1388 wiring = B_CONTIGUOUS; 1389 } 1390 1391 physical_address_restrictions stackPhysicalRestrictions; 1392 bool doReserveMemory = false; 1393 switch (wiring) { 1394 case B_NO_LOCK: 1395 break; 1396 case B_FULL_LOCK: 1397 case B_LAZY_LOCK: 1398 case B_CONTIGUOUS: 1399 doReserveMemory = true; 1400 break; 1401 case B_ALREADY_WIRED: 1402 break; 1403 case B_LOMEM: 1404 stackPhysicalRestrictions = *physicalAddressRestrictions; 1405 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1406 physicalAddressRestrictions = &stackPhysicalRestrictions; 1407 wiring = B_CONTIGUOUS; 1408 doReserveMemory = true; 1409 break; 1410 case B_32_BIT_FULL_LOCK: 1411 if (B_HAIKU_PHYSICAL_BITS <= 32 1412 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1413 wiring = B_FULL_LOCK; 1414 doReserveMemory = true; 1415 break; 1416 } 1417 // TODO: We don't really support this mode efficiently. Just fall 1418 // through for now ... 1419 case B_32_BIT_CONTIGUOUS: 1420 #if B_HAIKU_PHYSICAL_BITS > 32 1421 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1422 stackPhysicalRestrictions = *physicalAddressRestrictions; 1423 stackPhysicalRestrictions.high_address 1424 = (phys_addr_t)1 << 32; 1425 physicalAddressRestrictions = &stackPhysicalRestrictions; 1426 } 1427 #endif 1428 wiring = B_CONTIGUOUS; 1429 doReserveMemory = true; 1430 break; 1431 default: 1432 return B_BAD_VALUE; 1433 } 1434 1435 // Optimization: For a single-page contiguous allocation without low/high 1436 // memory restriction B_FULL_LOCK wiring suffices. 1437 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1438 && physicalAddressRestrictions->low_address == 0 1439 && physicalAddressRestrictions->high_address == 0) { 1440 wiring = B_FULL_LOCK; 1441 } 1442 1443 // For full lock or contiguous areas we're also going to map the pages and 1444 // thus need to reserve pages for the mapping backend upfront. 1445 addr_t reservedMapPages = 0; 1446 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1447 AddressSpaceWriteLocker locker; 1448 status_t status = locker.SetTo(team); 1449 if (status != B_OK) 1450 return status; 1451 1452 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1453 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1454 } 1455 1456 int priority; 1457 if (team != VMAddressSpace::KernelID()) 1458 priority = VM_PRIORITY_USER; 1459 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1460 priority = VM_PRIORITY_VIP; 1461 else 1462 priority = VM_PRIORITY_SYSTEM; 1463 1464 // Reserve memory before acquiring the address space lock. This reduces the 1465 // chances of failure, since while holding the write lock to the address 1466 // space (if it is the kernel address space that is), the low memory handler 1467 // won't be able to free anything for us. 1468 addr_t reservedMemory = 0; 1469 if (doReserveMemory) { 1470 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1471 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1472 return B_NO_MEMORY; 1473 reservedMemory = size; 1474 // TODO: We don't reserve the memory for the pages for the page 1475 // directories/tables. We actually need to do since we currently don't 1476 // reclaim them (and probably can't reclaim all of them anyway). Thus 1477 // there are actually less physical pages than there should be, which 1478 // can get the VM into trouble in low memory situations. 1479 } 1480 1481 AddressSpaceWriteLocker locker; 1482 VMAddressSpace* addressSpace; 1483 status_t status; 1484 1485 // For full lock areas reserve the pages before locking the address 1486 // space. E.g. block caches can't release their memory while we hold the 1487 // address space lock. 1488 page_num_t reservedPages = reservedMapPages; 1489 if (wiring == B_FULL_LOCK) 1490 reservedPages += size / B_PAGE_SIZE; 1491 1492 vm_page_reservation reservation; 1493 if (reservedPages > 0) { 1494 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1495 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1496 priority)) { 1497 reservedPages = 0; 1498 status = B_WOULD_BLOCK; 1499 goto err0; 1500 } 1501 } else 1502 vm_page_reserve_pages(&reservation, reservedPages, priority); 1503 } 1504 1505 if (wiring == B_CONTIGUOUS) { 1506 // we try to allocate the page run here upfront as this may easily 1507 // fail for obvious reasons 1508 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1509 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1510 if (page == NULL) { 1511 status = B_NO_MEMORY; 1512 goto err0; 1513 } 1514 } 1515 1516 // Lock the address space and, if B_EXACT_ADDRESS and 1517 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1518 // is not wired. 1519 do { 1520 status = locker.SetTo(team); 1521 if (status != B_OK) 1522 goto err1; 1523 1524 addressSpace = locker.AddressSpace(); 1525 } while (virtualAddressRestrictions->address_specification 1526 == B_EXACT_ADDRESS 1527 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1528 && wait_if_address_range_is_wired(addressSpace, 1529 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1530 1531 // create an anonymous cache 1532 // if it's a stack, make sure that two pages are available at least 1533 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1534 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1535 wiring == B_NO_LOCK, priority); 1536 if (status != B_OK) 1537 goto err1; 1538 1539 cache->temporary = 1; 1540 cache->virtual_end = size; 1541 cache->committed_size = reservedMemory; 1542 // TODO: This should be done via a method. 1543 reservedMemory = 0; 1544 1545 cache->Lock(); 1546 1547 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1548 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1549 virtualAddressRestrictions, kernel, &area, _address); 1550 1551 if (status != B_OK) { 1552 cache->ReleaseRefAndUnlock(); 1553 goto err1; 1554 } 1555 1556 locker.DegradeToReadLock(); 1557 1558 switch (wiring) { 1559 case B_NO_LOCK: 1560 case B_LAZY_LOCK: 1561 // do nothing - the pages are mapped in as needed 1562 break; 1563 1564 case B_FULL_LOCK: 1565 { 1566 // Allocate and map all pages for this area 1567 1568 off_t offset = 0; 1569 for (addr_t address = area->Base(); 1570 address < area->Base() + (area->Size() - 1); 1571 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1572 #ifdef DEBUG_KERNEL_STACKS 1573 # ifdef STACK_GROWS_DOWNWARDS 1574 if (isStack && address < area->Base() 1575 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1576 # else 1577 if (isStack && address >= area->Base() + area->Size() 1578 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1579 # endif 1580 continue; 1581 #endif 1582 vm_page* page = vm_page_allocate_page(&reservation, 1583 PAGE_STATE_WIRED | pageAllocFlags); 1584 cache->InsertPage(page, offset); 1585 map_page(area, page, address, protection, &reservation); 1586 1587 DEBUG_PAGE_ACCESS_END(page); 1588 } 1589 1590 break; 1591 } 1592 1593 case B_ALREADY_WIRED: 1594 { 1595 // The pages should already be mapped. This is only really useful 1596 // during boot time. Find the appropriate vm_page objects and stick 1597 // them in the cache object. 1598 VMTranslationMap* map = addressSpace->TranslationMap(); 1599 off_t offset = 0; 1600 1601 if (!gKernelStartup) 1602 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1603 1604 map->Lock(); 1605 1606 for (addr_t virtualAddress = area->Base(); 1607 virtualAddress < area->Base() + (area->Size() - 1); 1608 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1609 phys_addr_t physicalAddress; 1610 uint32 flags; 1611 status = map->Query(virtualAddress, &physicalAddress, &flags); 1612 if (status < B_OK) { 1613 panic("looking up mapping failed for va 0x%lx\n", 1614 virtualAddress); 1615 } 1616 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1617 if (page == NULL) { 1618 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1619 "\n", physicalAddress); 1620 } 1621 1622 DEBUG_PAGE_ACCESS_START(page); 1623 1624 cache->InsertPage(page, offset); 1625 increment_page_wired_count(page); 1626 vm_page_set_state(page, PAGE_STATE_WIRED); 1627 page->busy = false; 1628 1629 DEBUG_PAGE_ACCESS_END(page); 1630 } 1631 1632 map->Unlock(); 1633 break; 1634 } 1635 1636 case B_CONTIGUOUS: 1637 { 1638 // We have already allocated our continuous pages run, so we can now 1639 // just map them in the address space 1640 VMTranslationMap* map = addressSpace->TranslationMap(); 1641 phys_addr_t physicalAddress 1642 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1643 addr_t virtualAddress = area->Base(); 1644 off_t offset = 0; 1645 1646 map->Lock(); 1647 1648 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1649 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1650 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1651 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1652 if (page == NULL) 1653 panic("couldn't lookup physical page just allocated\n"); 1654 1655 status = map->Map(virtualAddress, physicalAddress, protection, 1656 area->MemoryType(), &reservation); 1657 if (status < B_OK) 1658 panic("couldn't map physical page in page run\n"); 1659 1660 cache->InsertPage(page, offset); 1661 increment_page_wired_count(page); 1662 1663 DEBUG_PAGE_ACCESS_END(page); 1664 } 1665 1666 map->Unlock(); 1667 break; 1668 } 1669 1670 default: 1671 break; 1672 } 1673 1674 cache->Unlock(); 1675 1676 if (reservedPages > 0) 1677 vm_page_unreserve_pages(&reservation); 1678 1679 TRACE(("vm_create_anonymous_area: done\n")); 1680 1681 area->cache_type = CACHE_TYPE_RAM; 1682 return area->id; 1683 1684 err1: 1685 if (wiring == B_CONTIGUOUS) { 1686 // we had reserved the area space upfront... 1687 phys_addr_t pageNumber = page->physical_page_number; 1688 int32 i; 1689 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1690 page = vm_lookup_page(pageNumber); 1691 if (page == NULL) 1692 panic("couldn't lookup physical page just allocated\n"); 1693 1694 vm_page_set_state(page, PAGE_STATE_FREE); 1695 } 1696 } 1697 1698 err0: 1699 if (reservedPages > 0) 1700 vm_page_unreserve_pages(&reservation); 1701 if (reservedMemory > 0) 1702 vm_unreserve_memory(reservedMemory); 1703 1704 return status; 1705 } 1706 1707 1708 area_id 1709 vm_map_physical_memory(team_id team, const char* name, void** _address, 1710 uint32 addressSpec, addr_t size, uint32 protection, 1711 phys_addr_t physicalAddress, bool alreadyWired) 1712 { 1713 VMArea* area; 1714 VMCache* cache; 1715 addr_t mapOffset; 1716 1717 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1718 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1719 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1720 addressSpec, size, protection, physicalAddress)); 1721 1722 if (!arch_vm_supports_protection(protection)) 1723 return B_NOT_SUPPORTED; 1724 1725 AddressSpaceWriteLocker locker(team); 1726 if (!locker.IsLocked()) 1727 return B_BAD_TEAM_ID; 1728 1729 // if the physical address is somewhat inside a page, 1730 // move the actual area down to align on a page boundary 1731 mapOffset = physicalAddress % B_PAGE_SIZE; 1732 size += mapOffset; 1733 physicalAddress -= mapOffset; 1734 1735 size = PAGE_ALIGN(size); 1736 1737 // create a device cache 1738 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1739 if (status != B_OK) 1740 return status; 1741 1742 cache->virtual_end = size; 1743 1744 cache->Lock(); 1745 1746 virtual_address_restrictions addressRestrictions = {}; 1747 addressRestrictions.address = *_address; 1748 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1749 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1750 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1751 true, &area, _address); 1752 1753 if (status < B_OK) 1754 cache->ReleaseRefLocked(); 1755 1756 cache->Unlock(); 1757 1758 if (status == B_OK) { 1759 // set requested memory type -- use uncached, if not given 1760 uint32 memoryType = addressSpec & B_MTR_MASK; 1761 if (memoryType == 0) 1762 memoryType = B_MTR_UC; 1763 1764 area->SetMemoryType(memoryType); 1765 1766 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1767 if (status != B_OK) 1768 delete_area(locker.AddressSpace(), area, false); 1769 } 1770 1771 if (status != B_OK) 1772 return status; 1773 1774 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1775 1776 if (alreadyWired) { 1777 // The area is already mapped, but possibly not with the right 1778 // memory type. 1779 map->Lock(); 1780 map->ProtectArea(area, area->protection); 1781 map->Unlock(); 1782 } else { 1783 // Map the area completely. 1784 1785 // reserve pages needed for the mapping 1786 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1787 area->Base() + (size - 1)); 1788 vm_page_reservation reservation; 1789 vm_page_reserve_pages(&reservation, reservePages, 1790 team == VMAddressSpace::KernelID() 1791 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1792 1793 map->Lock(); 1794 1795 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1796 map->Map(area->Base() + offset, physicalAddress + offset, 1797 protection, area->MemoryType(), &reservation); 1798 } 1799 1800 map->Unlock(); 1801 1802 vm_page_unreserve_pages(&reservation); 1803 } 1804 1805 // modify the pointer returned to be offset back into the new area 1806 // the same way the physical address in was offset 1807 *_address = (void*)((addr_t)*_address + mapOffset); 1808 1809 area->cache_type = CACHE_TYPE_DEVICE; 1810 return area->id; 1811 } 1812 1813 1814 /*! Don't use! 1815 TODO: This function was introduced to map physical page vecs to 1816 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1817 use a device cache and does not track vm_page::wired_count! 1818 */ 1819 area_id 1820 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1821 uint32 addressSpec, addr_t* _size, uint32 protection, 1822 struct generic_io_vec* vecs, uint32 vecCount) 1823 { 1824 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1825 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1826 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1827 addressSpec, _size, protection, vecs, vecCount)); 1828 1829 if (!arch_vm_supports_protection(protection) 1830 || (addressSpec & B_MTR_MASK) != 0) { 1831 return B_NOT_SUPPORTED; 1832 } 1833 1834 AddressSpaceWriteLocker locker(team); 1835 if (!locker.IsLocked()) 1836 return B_BAD_TEAM_ID; 1837 1838 if (vecCount == 0) 1839 return B_BAD_VALUE; 1840 1841 addr_t size = 0; 1842 for (uint32 i = 0; i < vecCount; i++) { 1843 if (vecs[i].base % B_PAGE_SIZE != 0 1844 || vecs[i].length % B_PAGE_SIZE != 0) { 1845 return B_BAD_VALUE; 1846 } 1847 1848 size += vecs[i].length; 1849 } 1850 1851 // create a device cache 1852 VMCache* cache; 1853 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1854 if (result != B_OK) 1855 return result; 1856 1857 cache->virtual_end = size; 1858 1859 cache->Lock(); 1860 1861 VMArea* area; 1862 virtual_address_restrictions addressRestrictions = {}; 1863 addressRestrictions.address = *_address; 1864 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1865 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1866 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1867 &addressRestrictions, true, &area, _address); 1868 1869 if (result != B_OK) 1870 cache->ReleaseRefLocked(); 1871 1872 cache->Unlock(); 1873 1874 if (result != B_OK) 1875 return result; 1876 1877 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1878 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1879 area->Base() + (size - 1)); 1880 1881 vm_page_reservation reservation; 1882 vm_page_reserve_pages(&reservation, reservePages, 1883 team == VMAddressSpace::KernelID() 1884 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1885 map->Lock(); 1886 1887 uint32 vecIndex = 0; 1888 size_t vecOffset = 0; 1889 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1890 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1891 vecOffset = 0; 1892 vecIndex++; 1893 } 1894 1895 if (vecIndex >= vecCount) 1896 break; 1897 1898 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1899 protection, area->MemoryType(), &reservation); 1900 1901 vecOffset += B_PAGE_SIZE; 1902 } 1903 1904 map->Unlock(); 1905 vm_page_unreserve_pages(&reservation); 1906 1907 if (_size != NULL) 1908 *_size = size; 1909 1910 area->cache_type = CACHE_TYPE_DEVICE; 1911 return area->id; 1912 } 1913 1914 1915 area_id 1916 vm_create_null_area(team_id team, const char* name, void** address, 1917 uint32 addressSpec, addr_t size, uint32 flags) 1918 { 1919 size = PAGE_ALIGN(size); 1920 1921 // Lock the address space and, if B_EXACT_ADDRESS and 1922 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1923 // is not wired. 1924 AddressSpaceWriteLocker locker; 1925 do { 1926 if (locker.SetTo(team) != B_OK) 1927 return B_BAD_TEAM_ID; 1928 } while (addressSpec == B_EXACT_ADDRESS 1929 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1930 && wait_if_address_range_is_wired(locker.AddressSpace(), 1931 (addr_t)*address, size, &locker)); 1932 1933 // create a null cache 1934 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1935 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1936 VMCache* cache; 1937 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1938 if (status != B_OK) 1939 return status; 1940 1941 cache->temporary = 1; 1942 cache->virtual_end = size; 1943 1944 cache->Lock(); 1945 1946 VMArea* area; 1947 virtual_address_restrictions addressRestrictions = {}; 1948 addressRestrictions.address = *address; 1949 addressRestrictions.address_specification = addressSpec; 1950 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1951 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1952 REGION_NO_PRIVATE_MAP, flags, 1953 &addressRestrictions, true, &area, address); 1954 1955 if (status < B_OK) { 1956 cache->ReleaseRefAndUnlock(); 1957 return status; 1958 } 1959 1960 cache->Unlock(); 1961 1962 area->cache_type = CACHE_TYPE_NULL; 1963 return area->id; 1964 } 1965 1966 1967 /*! Creates the vnode cache for the specified \a vnode. 1968 The vnode has to be marked busy when calling this function. 1969 */ 1970 status_t 1971 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1972 { 1973 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1974 } 1975 1976 1977 /*! \a cache must be locked. The area's address space must be read-locked. 1978 */ 1979 static void 1980 pre_map_area_pages(VMArea* area, VMCache* cache, 1981 vm_page_reservation* reservation) 1982 { 1983 addr_t baseAddress = area->Base(); 1984 addr_t cacheOffset = area->cache_offset; 1985 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1986 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1987 1988 for (VMCachePagesTree::Iterator it 1989 = cache->pages.GetIterator(firstPage, true, true); 1990 vm_page* page = it.Next();) { 1991 if (page->cache_offset >= endPage) 1992 break; 1993 1994 // skip busy and inactive pages 1995 if (page->busy || page->usage_count == 0) 1996 continue; 1997 1998 DEBUG_PAGE_ACCESS_START(page); 1999 map_page(area, page, 2000 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2001 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2002 DEBUG_PAGE_ACCESS_END(page); 2003 } 2004 } 2005 2006 2007 /*! Will map the file specified by \a fd to an area in memory. 2008 The file will be mirrored beginning at the specified \a offset. The 2009 \a offset and \a size arguments have to be page aligned. 2010 */ 2011 static area_id 2012 _vm_map_file(team_id team, const char* name, void** _address, 2013 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2014 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2015 { 2016 // TODO: for binary files, we want to make sure that they get the 2017 // copy of a file at a given time, ie. later changes should not 2018 // make it into the mapped copy -- this will need quite some changes 2019 // to be done in a nice way 2020 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2021 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2022 2023 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2024 size = PAGE_ALIGN(size); 2025 2026 if (mapping == REGION_NO_PRIVATE_MAP) 2027 protection |= B_SHARED_AREA; 2028 if (addressSpec != B_EXACT_ADDRESS) 2029 unmapAddressRange = false; 2030 2031 if (fd < 0) { 2032 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2033 virtual_address_restrictions virtualRestrictions = {}; 2034 virtualRestrictions.address = *_address; 2035 virtualRestrictions.address_specification = addressSpec; 2036 physical_address_restrictions physicalRestrictions = {}; 2037 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2038 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2039 _address); 2040 } 2041 2042 // get the open flags of the FD 2043 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2044 if (descriptor == NULL) 2045 return EBADF; 2046 int32 openMode = descriptor->open_mode; 2047 put_fd(descriptor); 2048 2049 // The FD must open for reading at any rate. For shared mapping with write 2050 // access, additionally the FD must be open for writing. 2051 if ((openMode & O_ACCMODE) == O_WRONLY 2052 || (mapping == REGION_NO_PRIVATE_MAP 2053 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2054 && (openMode & O_ACCMODE) == O_RDONLY)) { 2055 return EACCES; 2056 } 2057 2058 uint32 protectionMax = 0; 2059 if (mapping != REGION_PRIVATE_MAP) { 2060 protectionMax = protection | B_READ_AREA; 2061 if ((openMode & O_ACCMODE) == O_RDWR) 2062 protectionMax |= B_WRITE_AREA; 2063 } 2064 2065 // get the vnode for the object, this also grabs a ref to it 2066 struct vnode* vnode = NULL; 2067 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2068 if (status < B_OK) 2069 return status; 2070 VnodePutter vnodePutter(vnode); 2071 2072 // If we're going to pre-map pages, we need to reserve the pages needed by 2073 // the mapping backend upfront. 2074 page_num_t reservedPreMapPages = 0; 2075 vm_page_reservation reservation; 2076 if ((protection & B_READ_AREA) != 0) { 2077 AddressSpaceWriteLocker locker; 2078 status = locker.SetTo(team); 2079 if (status != B_OK) 2080 return status; 2081 2082 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2083 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2084 2085 locker.Unlock(); 2086 2087 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2088 team == VMAddressSpace::KernelID() 2089 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2090 } 2091 2092 struct PageUnreserver { 2093 PageUnreserver(vm_page_reservation* reservation) 2094 : 2095 fReservation(reservation) 2096 { 2097 } 2098 2099 ~PageUnreserver() 2100 { 2101 if (fReservation != NULL) 2102 vm_page_unreserve_pages(fReservation); 2103 } 2104 2105 vm_page_reservation* fReservation; 2106 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2107 2108 // Lock the address space and, if the specified address range shall be 2109 // unmapped, ensure it is not wired. 2110 AddressSpaceWriteLocker locker; 2111 do { 2112 if (locker.SetTo(team) != B_OK) 2113 return B_BAD_TEAM_ID; 2114 } while (unmapAddressRange 2115 && wait_if_address_range_is_wired(locker.AddressSpace(), 2116 (addr_t)*_address, size, &locker)); 2117 2118 // TODO: this only works for file systems that use the file cache 2119 VMCache* cache; 2120 status = vfs_get_vnode_cache(vnode, &cache, false); 2121 if (status < B_OK) 2122 return status; 2123 2124 cache->Lock(); 2125 2126 VMArea* area; 2127 virtual_address_restrictions addressRestrictions = {}; 2128 addressRestrictions.address = *_address; 2129 addressRestrictions.address_specification = addressSpec; 2130 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2131 0, protection, protectionMax, mapping, 2132 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2133 &addressRestrictions, kernel, &area, _address); 2134 2135 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2136 // map_backing_store() cannot know we no longer need the ref 2137 cache->ReleaseRefLocked(); 2138 } 2139 2140 if (status == B_OK && (protection & B_READ_AREA) != 0) 2141 pre_map_area_pages(area, cache, &reservation); 2142 2143 cache->Unlock(); 2144 2145 if (status == B_OK) { 2146 // TODO: this probably deserves a smarter solution, ie. don't always 2147 // prefetch stuff, and also, probably don't trigger it at this place. 2148 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2149 // prefetches at max 10 MB starting from "offset" 2150 } 2151 2152 if (status != B_OK) 2153 return status; 2154 2155 area->cache_type = CACHE_TYPE_VNODE; 2156 return area->id; 2157 } 2158 2159 2160 area_id 2161 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2162 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2163 int fd, off_t offset) 2164 { 2165 if (!arch_vm_supports_protection(protection)) 2166 return B_NOT_SUPPORTED; 2167 2168 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2169 mapping, unmapAddressRange, fd, offset, true); 2170 } 2171 2172 2173 VMCache* 2174 vm_area_get_locked_cache(VMArea* area) 2175 { 2176 rw_lock_read_lock(&sAreaCacheLock); 2177 2178 while (true) { 2179 VMCache* cache = area->cache; 2180 2181 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2182 // cache has been deleted 2183 rw_lock_read_lock(&sAreaCacheLock); 2184 continue; 2185 } 2186 2187 rw_lock_read_lock(&sAreaCacheLock); 2188 2189 if (cache == area->cache) { 2190 cache->AcquireRefLocked(); 2191 rw_lock_read_unlock(&sAreaCacheLock); 2192 return cache; 2193 } 2194 2195 // the cache changed in the meantime 2196 cache->Unlock(); 2197 } 2198 } 2199 2200 2201 void 2202 vm_area_put_locked_cache(VMCache* cache) 2203 { 2204 cache->ReleaseRefAndUnlock(); 2205 } 2206 2207 2208 area_id 2209 vm_clone_area(team_id team, const char* name, void** address, 2210 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2211 bool kernel) 2212 { 2213 VMArea* newArea = NULL; 2214 VMArea* sourceArea; 2215 2216 // Check whether the source area exists and is cloneable. If so, mark it 2217 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2218 { 2219 AddressSpaceWriteLocker locker; 2220 status_t status = locker.SetFromArea(sourceID, sourceArea); 2221 if (status != B_OK) 2222 return status; 2223 2224 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2225 return B_NOT_ALLOWED; 2226 2227 sourceArea->protection |= B_SHARED_AREA; 2228 protection |= B_SHARED_AREA; 2229 } 2230 2231 // Now lock both address spaces and actually do the cloning. 2232 2233 MultiAddressSpaceLocker locker; 2234 VMAddressSpace* sourceAddressSpace; 2235 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2236 if (status != B_OK) 2237 return status; 2238 2239 VMAddressSpace* targetAddressSpace; 2240 status = locker.AddTeam(team, true, &targetAddressSpace); 2241 if (status != B_OK) 2242 return status; 2243 2244 status = locker.Lock(); 2245 if (status != B_OK) 2246 return status; 2247 2248 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2249 if (sourceArea == NULL) 2250 return B_BAD_VALUE; 2251 2252 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2253 return B_NOT_ALLOWED; 2254 2255 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2256 2257 if (!kernel && sourceAddressSpace != targetAddressSpace 2258 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2259 #if KDEBUG 2260 Team* team = thread_get_current_thread()->team; 2261 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2262 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2263 #endif 2264 status = B_NOT_ALLOWED; 2265 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2266 status = B_NOT_ALLOWED; 2267 } else { 2268 virtual_address_restrictions addressRestrictions = {}; 2269 addressRestrictions.address = *address; 2270 addressRestrictions.address_specification = addressSpec; 2271 status = map_backing_store(targetAddressSpace, cache, 2272 sourceArea->cache_offset, name, sourceArea->Size(), 2273 sourceArea->wiring, protection, sourceArea->protection_max, 2274 mapping, 0, &addressRestrictions, 2275 kernel, &newArea, address); 2276 } 2277 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2278 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2279 // to create a new cache, and has therefore already acquired a reference 2280 // to the source cache - but otherwise it has no idea that we need 2281 // one. 2282 cache->AcquireRefLocked(); 2283 } 2284 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2285 // we need to map in everything at this point 2286 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2287 // we don't have actual pages to map but a physical area 2288 VMTranslationMap* map 2289 = sourceArea->address_space->TranslationMap(); 2290 map->Lock(); 2291 2292 phys_addr_t physicalAddress; 2293 uint32 oldProtection; 2294 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2295 2296 map->Unlock(); 2297 2298 map = targetAddressSpace->TranslationMap(); 2299 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2300 newArea->Base() + (newArea->Size() - 1)); 2301 2302 vm_page_reservation reservation; 2303 vm_page_reserve_pages(&reservation, reservePages, 2304 targetAddressSpace == VMAddressSpace::Kernel() 2305 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2306 map->Lock(); 2307 2308 for (addr_t offset = 0; offset < newArea->Size(); 2309 offset += B_PAGE_SIZE) { 2310 map->Map(newArea->Base() + offset, physicalAddress + offset, 2311 protection, newArea->MemoryType(), &reservation); 2312 } 2313 2314 map->Unlock(); 2315 vm_page_unreserve_pages(&reservation); 2316 } else { 2317 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2318 size_t reservePages = map->MaxPagesNeededToMap( 2319 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2320 vm_page_reservation reservation; 2321 vm_page_reserve_pages(&reservation, reservePages, 2322 targetAddressSpace == VMAddressSpace::Kernel() 2323 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2324 2325 // map in all pages from source 2326 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2327 vm_page* page = it.Next();) { 2328 if (!page->busy) { 2329 DEBUG_PAGE_ACCESS_START(page); 2330 map_page(newArea, page, 2331 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2332 - newArea->cache_offset), 2333 protection, &reservation); 2334 DEBUG_PAGE_ACCESS_END(page); 2335 } 2336 } 2337 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2338 // ensuring that! 2339 2340 vm_page_unreserve_pages(&reservation); 2341 } 2342 } 2343 if (status == B_OK) 2344 newArea->cache_type = sourceArea->cache_type; 2345 2346 vm_area_put_locked_cache(cache); 2347 2348 if (status < B_OK) 2349 return status; 2350 2351 return newArea->id; 2352 } 2353 2354 2355 /*! Deletes the specified area of the given address space. 2356 2357 The address space must be write-locked. 2358 The caller must ensure that the area does not have any wired ranges. 2359 2360 \param addressSpace The address space containing the area. 2361 \param area The area to be deleted. 2362 \param deletingAddressSpace \c true, if the address space is in the process 2363 of being deleted. 2364 */ 2365 static void 2366 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2367 bool deletingAddressSpace) 2368 { 2369 ASSERT(!area->IsWired()); 2370 2371 VMAreaHash::Remove(area); 2372 2373 // At this point the area is removed from the global hash table, but 2374 // still exists in the area list. 2375 2376 // Unmap the virtual address space the area occupied. 2377 { 2378 // We need to lock the complete cache chain. 2379 VMCache* topCache = vm_area_get_locked_cache(area); 2380 VMCacheChainLocker cacheChainLocker(topCache); 2381 cacheChainLocker.LockAllSourceCaches(); 2382 2383 // If the area's top cache is a temporary cache and the area is the only 2384 // one referencing it (besides us currently holding a second reference), 2385 // the unmapping code doesn't need to care about preserving the accessed 2386 // and dirty flags of the top cache page mappings. 2387 bool ignoreTopCachePageFlags 2388 = topCache->temporary && topCache->RefCount() == 2; 2389 2390 area->address_space->TranslationMap()->UnmapArea(area, 2391 deletingAddressSpace, ignoreTopCachePageFlags); 2392 } 2393 2394 if (!area->cache->temporary) 2395 area->cache->WriteModified(); 2396 2397 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2398 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2399 2400 arch_vm_unset_memory_type(area); 2401 addressSpace->RemoveArea(area, allocationFlags); 2402 addressSpace->Put(); 2403 2404 area->cache->RemoveArea(area); 2405 area->cache->ReleaseRef(); 2406 2407 addressSpace->DeleteArea(area, allocationFlags); 2408 } 2409 2410 2411 status_t 2412 vm_delete_area(team_id team, area_id id, bool kernel) 2413 { 2414 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2415 team, id)); 2416 2417 // lock the address space and make sure the area isn't wired 2418 AddressSpaceWriteLocker locker; 2419 VMArea* area; 2420 AreaCacheLocker cacheLocker; 2421 2422 do { 2423 status_t status = locker.SetFromArea(team, id, area); 2424 if (status != B_OK) 2425 return status; 2426 2427 cacheLocker.SetTo(area); 2428 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2429 2430 cacheLocker.Unlock(); 2431 2432 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2433 return B_NOT_ALLOWED; 2434 2435 delete_area(locker.AddressSpace(), area, false); 2436 return B_OK; 2437 } 2438 2439 2440 /*! Creates a new cache on top of given cache, moves all areas from 2441 the old cache to the new one, and changes the protection of all affected 2442 areas' pages to read-only. If requested, wired pages are moved up to the 2443 new cache and copies are added to the old cache in their place. 2444 Preconditions: 2445 - The given cache must be locked. 2446 - All of the cache's areas' address spaces must be read locked. 2447 - Either the cache must not have any wired ranges or a page reservation for 2448 all wired pages must be provided, so they can be copied. 2449 2450 \param lowerCache The cache on top of which a new cache shall be created. 2451 \param wiredPagesReservation If \c NULL there must not be any wired pages 2452 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2453 has wired page. The wired pages are copied in this case. 2454 */ 2455 static status_t 2456 vm_copy_on_write_area(VMCache* lowerCache, 2457 vm_page_reservation* wiredPagesReservation) 2458 { 2459 VMCache* upperCache; 2460 2461 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2462 2463 // We need to separate the cache from its areas. The cache goes one level 2464 // deeper and we create a new cache inbetween. 2465 2466 // create an anonymous cache 2467 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2468 lowerCache->GuardSize() / B_PAGE_SIZE, 2469 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2470 VM_PRIORITY_USER); 2471 if (status != B_OK) 2472 return status; 2473 2474 upperCache->Lock(); 2475 2476 upperCache->temporary = 1; 2477 upperCache->virtual_base = lowerCache->virtual_base; 2478 upperCache->virtual_end = lowerCache->virtual_end; 2479 2480 // transfer the lower cache areas to the upper cache 2481 rw_lock_write_lock(&sAreaCacheLock); 2482 upperCache->TransferAreas(lowerCache); 2483 rw_lock_write_unlock(&sAreaCacheLock); 2484 2485 lowerCache->AddConsumer(upperCache); 2486 2487 // We now need to remap all pages from all of the cache's areas read-only, 2488 // so that a copy will be created on next write access. If there are wired 2489 // pages, we keep their protection, move them to the upper cache and create 2490 // copies for the lower cache. 2491 if (wiredPagesReservation != NULL) { 2492 // We need to handle wired pages -- iterate through the cache's pages. 2493 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2494 vm_page* page = it.Next();) { 2495 if (page->WiredCount() > 0) { 2496 // allocate a new page and copy the wired one 2497 vm_page* copiedPage = vm_page_allocate_page( 2498 wiredPagesReservation, PAGE_STATE_ACTIVE); 2499 2500 vm_memcpy_physical_page( 2501 copiedPage->physical_page_number * B_PAGE_SIZE, 2502 page->physical_page_number * B_PAGE_SIZE); 2503 2504 // move the wired page to the upper cache (note: removing is OK 2505 // with the SplayTree iterator) and insert the copy 2506 upperCache->MovePage(page); 2507 lowerCache->InsertPage(copiedPage, 2508 page->cache_offset * B_PAGE_SIZE); 2509 2510 DEBUG_PAGE_ACCESS_END(copiedPage); 2511 } else { 2512 // Change the protection of this page in all areas. 2513 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2514 tempArea = tempArea->cache_next) { 2515 // The area must be readable in the same way it was 2516 // previously writable. 2517 uint32 protection = B_KERNEL_READ_AREA; 2518 if ((tempArea->protection & B_READ_AREA) != 0) 2519 protection |= B_READ_AREA; 2520 2521 VMTranslationMap* map 2522 = tempArea->address_space->TranslationMap(); 2523 map->Lock(); 2524 map->ProtectPage(tempArea, 2525 virtual_page_address(tempArea, page), protection); 2526 map->Unlock(); 2527 } 2528 } 2529 } 2530 } else { 2531 ASSERT(lowerCache->WiredPagesCount() == 0); 2532 2533 // just change the protection of all areas 2534 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2535 tempArea = tempArea->cache_next) { 2536 // The area must be readable in the same way it was previously 2537 // writable. 2538 uint32 protection = B_KERNEL_READ_AREA; 2539 if ((tempArea->protection & B_READ_AREA) != 0) 2540 protection |= B_READ_AREA; 2541 2542 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2543 map->Lock(); 2544 map->ProtectArea(tempArea, protection); 2545 map->Unlock(); 2546 } 2547 } 2548 2549 vm_area_put_locked_cache(upperCache); 2550 2551 return B_OK; 2552 } 2553 2554 2555 area_id 2556 vm_copy_area(team_id team, const char* name, void** _address, 2557 uint32 addressSpec, area_id sourceID) 2558 { 2559 // Do the locking: target address space, all address spaces associated with 2560 // the source cache, and the cache itself. 2561 MultiAddressSpaceLocker locker; 2562 VMAddressSpace* targetAddressSpace; 2563 VMCache* cache; 2564 VMArea* source; 2565 AreaCacheLocker cacheLocker; 2566 status_t status; 2567 bool sharedArea; 2568 2569 page_num_t wiredPages = 0; 2570 vm_page_reservation wiredPagesReservation; 2571 2572 bool restart; 2573 do { 2574 restart = false; 2575 2576 locker.Unset(); 2577 status = locker.AddTeam(team, true, &targetAddressSpace); 2578 if (status == B_OK) { 2579 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2580 &cache); 2581 } 2582 if (status != B_OK) 2583 return status; 2584 2585 cacheLocker.SetTo(cache, true); // already locked 2586 2587 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2588 2589 page_num_t oldWiredPages = wiredPages; 2590 wiredPages = 0; 2591 2592 // If the source area isn't shared, count the number of wired pages in 2593 // the cache and reserve as many pages. 2594 if (!sharedArea) { 2595 wiredPages = cache->WiredPagesCount(); 2596 2597 if (wiredPages > oldWiredPages) { 2598 cacheLocker.Unlock(); 2599 locker.Unlock(); 2600 2601 if (oldWiredPages > 0) 2602 vm_page_unreserve_pages(&wiredPagesReservation); 2603 2604 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2605 VM_PRIORITY_USER); 2606 2607 restart = true; 2608 } 2609 } else if (oldWiredPages > 0) 2610 vm_page_unreserve_pages(&wiredPagesReservation); 2611 } while (restart); 2612 2613 // unreserve pages later 2614 struct PagesUnreserver { 2615 PagesUnreserver(vm_page_reservation* reservation) 2616 : 2617 fReservation(reservation) 2618 { 2619 } 2620 2621 ~PagesUnreserver() 2622 { 2623 if (fReservation != NULL) 2624 vm_page_unreserve_pages(fReservation); 2625 } 2626 2627 private: 2628 vm_page_reservation* fReservation; 2629 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2630 2631 bool writableCopy 2632 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2633 uint8* targetPageProtections = NULL; 2634 2635 if (source->page_protections != NULL) { 2636 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2637 targetPageProtections = (uint8*)malloc_etc(bytes, 2638 (source->address_space == VMAddressSpace::Kernel() 2639 || targetAddressSpace == VMAddressSpace::Kernel()) 2640 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2641 if (targetPageProtections == NULL) 2642 return B_NO_MEMORY; 2643 2644 memcpy(targetPageProtections, source->page_protections, bytes); 2645 2646 if (!writableCopy) { 2647 for (size_t i = 0; i < bytes; i++) { 2648 if ((targetPageProtections[i] 2649 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2650 writableCopy = true; 2651 break; 2652 } 2653 } 2654 } 2655 } 2656 2657 if (addressSpec == B_CLONE_ADDRESS) { 2658 addressSpec = B_EXACT_ADDRESS; 2659 *_address = (void*)source->Base(); 2660 } 2661 2662 // First, create a cache on top of the source area, respectively use the 2663 // existing one, if this is a shared area. 2664 2665 VMArea* target; 2666 virtual_address_restrictions addressRestrictions = {}; 2667 addressRestrictions.address = *_address; 2668 addressRestrictions.address_specification = addressSpec; 2669 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2670 name, source->Size(), source->wiring, source->protection, 2671 source->protection_max, 2672 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2673 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2674 &addressRestrictions, true, &target, _address); 2675 if (status < B_OK) { 2676 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2677 return status; 2678 } 2679 2680 if (targetPageProtections != NULL) 2681 target->page_protections = targetPageProtections; 2682 2683 if (sharedArea) { 2684 // The new area uses the old area's cache, but map_backing_store() 2685 // hasn't acquired a ref. So we have to do that now. 2686 cache->AcquireRefLocked(); 2687 } 2688 2689 // If the source area is writable, we need to move it one layer up as well 2690 2691 if (!sharedArea) { 2692 if (writableCopy) { 2693 // TODO: do something more useful if this fails! 2694 if (vm_copy_on_write_area(cache, 2695 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2696 panic("vm_copy_on_write_area() failed!\n"); 2697 } 2698 } 2699 } 2700 2701 // we return the ID of the newly created area 2702 return target->id; 2703 } 2704 2705 2706 status_t 2707 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2708 bool kernel) 2709 { 2710 fix_protection(&newProtection); 2711 2712 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2713 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2714 2715 if (!arch_vm_supports_protection(newProtection)) 2716 return B_NOT_SUPPORTED; 2717 2718 bool becomesWritable 2719 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2720 2721 // lock address spaces and cache 2722 MultiAddressSpaceLocker locker; 2723 VMCache* cache; 2724 VMArea* area; 2725 status_t status; 2726 AreaCacheLocker cacheLocker; 2727 bool isWritable; 2728 2729 bool restart; 2730 do { 2731 restart = false; 2732 2733 locker.Unset(); 2734 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2735 if (status != B_OK) 2736 return status; 2737 2738 cacheLocker.SetTo(cache, true); // already locked 2739 2740 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2741 || (area->protection & B_KERNEL_AREA) != 0)) { 2742 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2743 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2744 " (%s)\n", team, newProtection, areaID, area->name); 2745 return B_NOT_ALLOWED; 2746 } 2747 if (!kernel && area->protection_max != 0 2748 && (newProtection & area->protection_max) 2749 != (newProtection & B_USER_PROTECTION)) { 2750 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2751 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2752 "area %" B_PRId32 " (%s)\n", team, newProtection, 2753 area->protection_max, areaID, area->name); 2754 return B_NOT_ALLOWED; 2755 } 2756 2757 if (area->protection == newProtection) 2758 return B_OK; 2759 2760 if (team != VMAddressSpace::KernelID() 2761 && area->address_space->ID() != team) { 2762 // unless you're the kernel, you are only allowed to set 2763 // the protection of your own areas 2764 return B_NOT_ALLOWED; 2765 } 2766 2767 isWritable 2768 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2769 2770 // Make sure the area (respectively, if we're going to call 2771 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2772 // wired ranges. 2773 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2774 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2775 otherArea = otherArea->cache_next) { 2776 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2777 restart = true; 2778 break; 2779 } 2780 } 2781 } else { 2782 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2783 restart = true; 2784 } 2785 } while (restart); 2786 2787 bool changePageProtection = true; 2788 bool changeTopCachePagesOnly = false; 2789 2790 if (isWritable && !becomesWritable) { 2791 // writable -> !writable 2792 2793 if (cache->source != NULL && cache->temporary) { 2794 if (cache->CountWritableAreas(area) == 0) { 2795 // Since this cache now lives from the pages in its source cache, 2796 // we can change the cache's commitment to take only those pages 2797 // into account that really are in this cache. 2798 2799 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2800 team == VMAddressSpace::KernelID() 2801 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2802 2803 // TODO: we may be able to join with our source cache, if 2804 // count == 0 2805 } 2806 } 2807 2808 // If only the writability changes, we can just remap the pages of the 2809 // top cache, since the pages of lower caches are mapped read-only 2810 // anyway. That's advantageous only, if the number of pages in the cache 2811 // is significantly smaller than the number of pages in the area, 2812 // though. 2813 if (newProtection 2814 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2815 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2816 changeTopCachePagesOnly = true; 2817 } 2818 } else if (!isWritable && becomesWritable) { 2819 // !writable -> writable 2820 2821 if (!cache->consumers.IsEmpty()) { 2822 // There are consumers -- we have to insert a new cache. Fortunately 2823 // vm_copy_on_write_area() does everything that's needed. 2824 changePageProtection = false; 2825 status = vm_copy_on_write_area(cache, NULL); 2826 } else { 2827 // No consumers, so we don't need to insert a new one. 2828 if (cache->source != NULL && cache->temporary) { 2829 // the cache's commitment must contain all possible pages 2830 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2831 team == VMAddressSpace::KernelID() 2832 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2833 } 2834 2835 if (status == B_OK && cache->source != NULL) { 2836 // There's a source cache, hence we can't just change all pages' 2837 // protection or we might allow writing into pages belonging to 2838 // a lower cache. 2839 changeTopCachePagesOnly = true; 2840 } 2841 } 2842 } else { 2843 // we don't have anything special to do in all other cases 2844 } 2845 2846 if (status == B_OK) { 2847 // remap existing pages in this cache 2848 if (changePageProtection) { 2849 VMTranslationMap* map = area->address_space->TranslationMap(); 2850 map->Lock(); 2851 2852 if (changeTopCachePagesOnly) { 2853 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2854 page_num_t lastPageOffset 2855 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2856 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2857 vm_page* page = it.Next();) { 2858 if (page->cache_offset >= firstPageOffset 2859 && page->cache_offset <= lastPageOffset) { 2860 addr_t address = virtual_page_address(area, page); 2861 map->ProtectPage(area, address, newProtection); 2862 } 2863 } 2864 } else 2865 map->ProtectArea(area, newProtection); 2866 2867 map->Unlock(); 2868 } 2869 2870 area->protection = newProtection; 2871 } 2872 2873 return status; 2874 } 2875 2876 2877 status_t 2878 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2879 { 2880 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2881 if (addressSpace == NULL) 2882 return B_BAD_TEAM_ID; 2883 2884 VMTranslationMap* map = addressSpace->TranslationMap(); 2885 2886 map->Lock(); 2887 uint32 dummyFlags; 2888 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2889 map->Unlock(); 2890 2891 addressSpace->Put(); 2892 return status; 2893 } 2894 2895 2896 /*! The page's cache must be locked. 2897 */ 2898 bool 2899 vm_test_map_modification(vm_page* page) 2900 { 2901 if (page->modified) 2902 return true; 2903 2904 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2905 vm_page_mapping* mapping; 2906 while ((mapping = iterator.Next()) != NULL) { 2907 VMArea* area = mapping->area; 2908 VMTranslationMap* map = area->address_space->TranslationMap(); 2909 2910 phys_addr_t physicalAddress; 2911 uint32 flags; 2912 map->Lock(); 2913 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2914 map->Unlock(); 2915 2916 if ((flags & PAGE_MODIFIED) != 0) 2917 return true; 2918 } 2919 2920 return false; 2921 } 2922 2923 2924 /*! The page's cache must be locked. 2925 */ 2926 void 2927 vm_clear_map_flags(vm_page* page, uint32 flags) 2928 { 2929 if ((flags & PAGE_ACCESSED) != 0) 2930 page->accessed = false; 2931 if ((flags & PAGE_MODIFIED) != 0) 2932 page->modified = false; 2933 2934 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2935 vm_page_mapping* mapping; 2936 while ((mapping = iterator.Next()) != NULL) { 2937 VMArea* area = mapping->area; 2938 VMTranslationMap* map = area->address_space->TranslationMap(); 2939 2940 map->Lock(); 2941 map->ClearFlags(virtual_page_address(area, page), flags); 2942 map->Unlock(); 2943 } 2944 } 2945 2946 2947 /*! Removes all mappings from a page. 2948 After you've called this function, the page is unmapped from memory and 2949 the page's \c accessed and \c modified flags have been updated according 2950 to the state of the mappings. 2951 The page's cache must be locked. 2952 */ 2953 void 2954 vm_remove_all_page_mappings(vm_page* page) 2955 { 2956 while (vm_page_mapping* mapping = page->mappings.Head()) { 2957 VMArea* area = mapping->area; 2958 VMTranslationMap* map = area->address_space->TranslationMap(); 2959 addr_t address = virtual_page_address(area, page); 2960 map->UnmapPage(area, address, false); 2961 } 2962 } 2963 2964 2965 int32 2966 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2967 { 2968 int32 count = 0; 2969 2970 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2971 vm_page_mapping* mapping; 2972 while ((mapping = iterator.Next()) != NULL) { 2973 VMArea* area = mapping->area; 2974 VMTranslationMap* map = area->address_space->TranslationMap(); 2975 2976 bool modified; 2977 if (map->ClearAccessedAndModified(area, 2978 virtual_page_address(area, page), false, modified)) { 2979 count++; 2980 } 2981 2982 page->modified |= modified; 2983 } 2984 2985 2986 if (page->accessed) { 2987 count++; 2988 page->accessed = false; 2989 } 2990 2991 return count; 2992 } 2993 2994 2995 /*! Removes all mappings of a page and/or clears the accessed bits of the 2996 mappings. 2997 The function iterates through the page mappings and removes them until 2998 encountering one that has been accessed. From then on it will continue to 2999 iterate, but only clear the accessed flag of the mapping. The page's 3000 \c modified bit will be updated accordingly, the \c accessed bit will be 3001 cleared. 3002 \return The number of mapping accessed bits encountered, including the 3003 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3004 of the page have been removed. 3005 */ 3006 int32 3007 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3008 { 3009 ASSERT(page->WiredCount() == 0); 3010 3011 if (page->accessed) 3012 return vm_clear_page_mapping_accessed_flags(page); 3013 3014 while (vm_page_mapping* mapping = page->mappings.Head()) { 3015 VMArea* area = mapping->area; 3016 VMTranslationMap* map = area->address_space->TranslationMap(); 3017 addr_t address = virtual_page_address(area, page); 3018 bool modified = false; 3019 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3020 page->accessed = true; 3021 page->modified |= modified; 3022 return vm_clear_page_mapping_accessed_flags(page); 3023 } 3024 page->modified |= modified; 3025 } 3026 3027 return 0; 3028 } 3029 3030 3031 static int 3032 display_mem(int argc, char** argv) 3033 { 3034 bool physical = false; 3035 addr_t copyAddress; 3036 int32 displayWidth; 3037 int32 itemSize; 3038 int32 num = -1; 3039 addr_t address; 3040 int i = 1, j; 3041 3042 if (argc > 1 && argv[1][0] == '-') { 3043 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3044 physical = true; 3045 i++; 3046 } else 3047 i = 99; 3048 } 3049 3050 if (argc < i + 1 || argc > i + 2) { 3051 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3052 "\tdl - 8 bytes\n" 3053 "\tdw - 4 bytes\n" 3054 "\tds - 2 bytes\n" 3055 "\tdb - 1 byte\n" 3056 "\tstring - a whole string\n" 3057 " -p or --physical only allows memory from a single page to be " 3058 "displayed.\n"); 3059 return 0; 3060 } 3061 3062 address = parse_expression(argv[i]); 3063 3064 if (argc > i + 1) 3065 num = parse_expression(argv[i + 1]); 3066 3067 // build the format string 3068 if (strcmp(argv[0], "db") == 0) { 3069 itemSize = 1; 3070 displayWidth = 16; 3071 } else if (strcmp(argv[0], "ds") == 0) { 3072 itemSize = 2; 3073 displayWidth = 8; 3074 } else if (strcmp(argv[0], "dw") == 0) { 3075 itemSize = 4; 3076 displayWidth = 4; 3077 } else if (strcmp(argv[0], "dl") == 0) { 3078 itemSize = 8; 3079 displayWidth = 2; 3080 } else if (strcmp(argv[0], "string") == 0) { 3081 itemSize = 1; 3082 displayWidth = -1; 3083 } else { 3084 kprintf("display_mem called in an invalid way!\n"); 3085 return 0; 3086 } 3087 3088 if (num <= 0) 3089 num = displayWidth; 3090 3091 void* physicalPageHandle = NULL; 3092 3093 if (physical) { 3094 int32 offset = address & (B_PAGE_SIZE - 1); 3095 if (num * itemSize + offset > B_PAGE_SIZE) { 3096 num = (B_PAGE_SIZE - offset) / itemSize; 3097 kprintf("NOTE: number of bytes has been cut to page size\n"); 3098 } 3099 3100 address = ROUNDDOWN(address, B_PAGE_SIZE); 3101 3102 if (vm_get_physical_page_debug(address, ©Address, 3103 &physicalPageHandle) != B_OK) { 3104 kprintf("getting the hardware page failed."); 3105 return 0; 3106 } 3107 3108 address += offset; 3109 copyAddress += offset; 3110 } else 3111 copyAddress = address; 3112 3113 if (!strcmp(argv[0], "string")) { 3114 kprintf("%p \"", (char*)copyAddress); 3115 3116 // string mode 3117 for (i = 0; true; i++) { 3118 char c; 3119 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3120 != B_OK 3121 || c == '\0') { 3122 break; 3123 } 3124 3125 if (c == '\n') 3126 kprintf("\\n"); 3127 else if (c == '\t') 3128 kprintf("\\t"); 3129 else { 3130 if (!isprint(c)) 3131 c = '.'; 3132 3133 kprintf("%c", c); 3134 } 3135 } 3136 3137 kprintf("\"\n"); 3138 } else { 3139 // number mode 3140 for (i = 0; i < num; i++) { 3141 uint64 value; 3142 3143 if ((i % displayWidth) == 0) { 3144 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3145 if (i != 0) 3146 kprintf("\n"); 3147 3148 kprintf("[0x%lx] ", address + i * itemSize); 3149 3150 for (j = 0; j < displayed; j++) { 3151 char c; 3152 if (debug_memcpy(B_CURRENT_TEAM, &c, 3153 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3154 displayed = j; 3155 break; 3156 } 3157 if (!isprint(c)) 3158 c = '.'; 3159 3160 kprintf("%c", c); 3161 } 3162 if (num > displayWidth) { 3163 // make sure the spacing in the last line is correct 3164 for (j = displayed; j < displayWidth * itemSize; j++) 3165 kprintf(" "); 3166 } 3167 kprintf(" "); 3168 } 3169 3170 if (debug_memcpy(B_CURRENT_TEAM, &value, 3171 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3172 kprintf("read fault"); 3173 break; 3174 } 3175 3176 switch (itemSize) { 3177 case 1: 3178 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3179 break; 3180 case 2: 3181 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3182 break; 3183 case 4: 3184 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3185 break; 3186 case 8: 3187 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3188 break; 3189 } 3190 } 3191 3192 kprintf("\n"); 3193 } 3194 3195 if (physical) { 3196 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3197 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3198 } 3199 return 0; 3200 } 3201 3202 3203 static void 3204 dump_cache_tree_recursively(VMCache* cache, int level, 3205 VMCache* highlightCache) 3206 { 3207 // print this cache 3208 for (int i = 0; i < level; i++) 3209 kprintf(" "); 3210 if (cache == highlightCache) 3211 kprintf("%p <--\n", cache); 3212 else 3213 kprintf("%p\n", cache); 3214 3215 // recursively print its consumers 3216 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3217 VMCache* consumer = it.Next();) { 3218 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3219 } 3220 } 3221 3222 3223 static int 3224 dump_cache_tree(int argc, char** argv) 3225 { 3226 if (argc != 2 || !strcmp(argv[1], "--help")) { 3227 kprintf("usage: %s <address>\n", argv[0]); 3228 return 0; 3229 } 3230 3231 addr_t address = parse_expression(argv[1]); 3232 if (address == 0) 3233 return 0; 3234 3235 VMCache* cache = (VMCache*)address; 3236 VMCache* root = cache; 3237 3238 // find the root cache (the transitive source) 3239 while (root->source != NULL) 3240 root = root->source; 3241 3242 dump_cache_tree_recursively(root, 0, cache); 3243 3244 return 0; 3245 } 3246 3247 3248 const char* 3249 vm_cache_type_to_string(int32 type) 3250 { 3251 switch (type) { 3252 case CACHE_TYPE_RAM: 3253 return "RAM"; 3254 case CACHE_TYPE_DEVICE: 3255 return "device"; 3256 case CACHE_TYPE_VNODE: 3257 return "vnode"; 3258 case CACHE_TYPE_NULL: 3259 return "null"; 3260 3261 default: 3262 return "unknown"; 3263 } 3264 } 3265 3266 3267 #if DEBUG_CACHE_LIST 3268 3269 static void 3270 update_cache_info_recursively(VMCache* cache, cache_info& info) 3271 { 3272 info.page_count += cache->page_count; 3273 if (cache->type == CACHE_TYPE_RAM) 3274 info.committed += cache->committed_size; 3275 3276 // recurse 3277 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3278 VMCache* consumer = it.Next();) { 3279 update_cache_info_recursively(consumer, info); 3280 } 3281 } 3282 3283 3284 static int 3285 cache_info_compare_page_count(const void* _a, const void* _b) 3286 { 3287 const cache_info* a = (const cache_info*)_a; 3288 const cache_info* b = (const cache_info*)_b; 3289 if (a->page_count == b->page_count) 3290 return 0; 3291 return a->page_count < b->page_count ? 1 : -1; 3292 } 3293 3294 3295 static int 3296 cache_info_compare_committed(const void* _a, const void* _b) 3297 { 3298 const cache_info* a = (const cache_info*)_a; 3299 const cache_info* b = (const cache_info*)_b; 3300 if (a->committed == b->committed) 3301 return 0; 3302 return a->committed < b->committed ? 1 : -1; 3303 } 3304 3305 3306 static void 3307 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3308 { 3309 for (int i = 0; i < level; i++) 3310 kprintf(" "); 3311 3312 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3313 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3314 cache->virtual_base, cache->virtual_end, cache->page_count); 3315 3316 if (level == 0) 3317 kprintf("/%lu", info.page_count); 3318 3319 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3320 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3321 3322 if (level == 0) 3323 kprintf("/%lu", info.committed); 3324 } 3325 3326 // areas 3327 if (cache->areas != NULL) { 3328 VMArea* area = cache->areas; 3329 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3330 area->name, area->address_space->ID()); 3331 3332 while (area->cache_next != NULL) { 3333 area = area->cache_next; 3334 kprintf(", %" B_PRId32, area->id); 3335 } 3336 } 3337 3338 kputs("\n"); 3339 3340 // recurse 3341 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3342 VMCache* consumer = it.Next();) { 3343 dump_caches_recursively(consumer, info, level + 1); 3344 } 3345 } 3346 3347 3348 static int 3349 dump_caches(int argc, char** argv) 3350 { 3351 if (sCacheInfoTable == NULL) { 3352 kprintf("No cache info table!\n"); 3353 return 0; 3354 } 3355 3356 bool sortByPageCount = true; 3357 3358 for (int32 i = 1; i < argc; i++) { 3359 if (strcmp(argv[i], "-c") == 0) { 3360 sortByPageCount = false; 3361 } else { 3362 print_debugger_command_usage(argv[0]); 3363 return 0; 3364 } 3365 } 3366 3367 uint32 totalCount = 0; 3368 uint32 rootCount = 0; 3369 off_t totalCommitted = 0; 3370 page_num_t totalPages = 0; 3371 3372 VMCache* cache = gDebugCacheList; 3373 while (cache) { 3374 totalCount++; 3375 if (cache->source == NULL) { 3376 cache_info stackInfo; 3377 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3378 ? sCacheInfoTable[rootCount] : stackInfo; 3379 rootCount++; 3380 info.cache = cache; 3381 info.page_count = 0; 3382 info.committed = 0; 3383 update_cache_info_recursively(cache, info); 3384 totalCommitted += info.committed; 3385 totalPages += info.page_count; 3386 } 3387 3388 cache = cache->debug_next; 3389 } 3390 3391 if (rootCount <= (uint32)kCacheInfoTableCount) { 3392 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3393 sortByPageCount 3394 ? &cache_info_compare_page_count 3395 : &cache_info_compare_committed); 3396 } 3397 3398 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3399 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3400 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3401 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3402 "page count" : "committed size"); 3403 3404 if (rootCount <= (uint32)kCacheInfoTableCount) { 3405 for (uint32 i = 0; i < rootCount; i++) { 3406 cache_info& info = sCacheInfoTable[i]; 3407 dump_caches_recursively(info.cache, info, 0); 3408 } 3409 } else 3410 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3411 3412 return 0; 3413 } 3414 3415 #endif // DEBUG_CACHE_LIST 3416 3417 3418 static int 3419 dump_cache(int argc, char** argv) 3420 { 3421 VMCache* cache; 3422 bool showPages = false; 3423 int i = 1; 3424 3425 if (argc < 2 || !strcmp(argv[1], "--help")) { 3426 kprintf("usage: %s [-ps] <address>\n" 3427 " if -p is specified, all pages are shown, if -s is used\n" 3428 " only the cache info is shown respectively.\n", argv[0]); 3429 return 0; 3430 } 3431 while (argv[i][0] == '-') { 3432 char* arg = argv[i] + 1; 3433 while (arg[0]) { 3434 if (arg[0] == 'p') 3435 showPages = true; 3436 arg++; 3437 } 3438 i++; 3439 } 3440 if (argv[i] == NULL) { 3441 kprintf("%s: invalid argument, pass address\n", argv[0]); 3442 return 0; 3443 } 3444 3445 addr_t address = parse_expression(argv[i]); 3446 if (address == 0) 3447 return 0; 3448 3449 cache = (VMCache*)address; 3450 3451 cache->Dump(showPages); 3452 3453 set_debug_variable("_sourceCache", (addr_t)cache->source); 3454 3455 return 0; 3456 } 3457 3458 3459 static void 3460 dump_area_struct(VMArea* area, bool mappings) 3461 { 3462 kprintf("AREA: %p\n", area); 3463 kprintf("name:\t\t'%s'\n", area->name); 3464 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3465 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3466 kprintf("base:\t\t0x%lx\n", area->Base()); 3467 kprintf("size:\t\t0x%lx\n", area->Size()); 3468 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3469 kprintf("page_protection:%p\n", area->page_protections); 3470 kprintf("wiring:\t\t0x%x\n", area->wiring); 3471 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3472 kprintf("cache:\t\t%p\n", area->cache); 3473 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3474 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3475 kprintf("cache_next:\t%p\n", area->cache_next); 3476 kprintf("cache_prev:\t%p\n", area->cache_prev); 3477 3478 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3479 if (mappings) { 3480 kprintf("page mappings:\n"); 3481 while (iterator.HasNext()) { 3482 vm_page_mapping* mapping = iterator.Next(); 3483 kprintf(" %p", mapping->page); 3484 } 3485 kprintf("\n"); 3486 } else { 3487 uint32 count = 0; 3488 while (iterator.Next() != NULL) { 3489 count++; 3490 } 3491 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3492 } 3493 } 3494 3495 3496 static int 3497 dump_area(int argc, char** argv) 3498 { 3499 bool mappings = false; 3500 bool found = false; 3501 int32 index = 1; 3502 VMArea* area; 3503 addr_t num; 3504 3505 if (argc < 2 || !strcmp(argv[1], "--help")) { 3506 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3507 "All areas matching either id/address/name are listed. You can\n" 3508 "force to check only a specific item by prefixing the specifier\n" 3509 "with the id/contains/address/name keywords.\n" 3510 "-m shows the area's mappings as well.\n"); 3511 return 0; 3512 } 3513 3514 if (!strcmp(argv[1], "-m")) { 3515 mappings = true; 3516 index++; 3517 } 3518 3519 int32 mode = 0xf; 3520 if (!strcmp(argv[index], "id")) 3521 mode = 1; 3522 else if (!strcmp(argv[index], "contains")) 3523 mode = 2; 3524 else if (!strcmp(argv[index], "name")) 3525 mode = 4; 3526 else if (!strcmp(argv[index], "address")) 3527 mode = 0; 3528 if (mode != 0xf) 3529 index++; 3530 3531 if (index >= argc) { 3532 kprintf("No area specifier given.\n"); 3533 return 0; 3534 } 3535 3536 num = parse_expression(argv[index]); 3537 3538 if (mode == 0) { 3539 dump_area_struct((struct VMArea*)num, mappings); 3540 } else { 3541 // walk through the area list, looking for the arguments as a name 3542 3543 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3544 while ((area = it.Next()) != NULL) { 3545 if (((mode & 4) != 0 3546 && !strcmp(argv[index], area->name)) 3547 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3548 || (((mode & 2) != 0 && area->Base() <= num 3549 && area->Base() + area->Size() > num))))) { 3550 dump_area_struct(area, mappings); 3551 found = true; 3552 } 3553 } 3554 3555 if (!found) 3556 kprintf("could not find area %s (%ld)\n", argv[index], num); 3557 } 3558 3559 return 0; 3560 } 3561 3562 3563 static int 3564 dump_area_list(int argc, char** argv) 3565 { 3566 VMArea* area; 3567 const char* name = NULL; 3568 int32 id = 0; 3569 3570 if (argc > 1) { 3571 id = parse_expression(argv[1]); 3572 if (id == 0) 3573 name = argv[1]; 3574 } 3575 3576 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3577 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3578 B_PRINTF_POINTER_WIDTH, "size"); 3579 3580 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3581 while ((area = it.Next()) != NULL) { 3582 if ((id != 0 && area->address_space->ID() != id) 3583 || (name != NULL && strstr(area->name, name) == NULL)) 3584 continue; 3585 3586 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3587 area->id, (void*)area->Base(), (void*)area->Size(), 3588 area->protection, area->wiring, area->name); 3589 } 3590 return 0; 3591 } 3592 3593 3594 static int 3595 dump_available_memory(int argc, char** argv) 3596 { 3597 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3598 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3599 return 0; 3600 } 3601 3602 3603 static int 3604 dump_mapping_info(int argc, char** argv) 3605 { 3606 bool reverseLookup = false; 3607 bool pageLookup = false; 3608 3609 int argi = 1; 3610 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3611 const char* arg = argv[argi]; 3612 if (strcmp(arg, "-r") == 0) { 3613 reverseLookup = true; 3614 } else if (strcmp(arg, "-p") == 0) { 3615 reverseLookup = true; 3616 pageLookup = true; 3617 } else { 3618 print_debugger_command_usage(argv[0]); 3619 return 0; 3620 } 3621 } 3622 3623 // We need at least one argument, the address. Optionally a thread ID can be 3624 // specified. 3625 if (argi >= argc || argi + 2 < argc) { 3626 print_debugger_command_usage(argv[0]); 3627 return 0; 3628 } 3629 3630 uint64 addressValue; 3631 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3632 return 0; 3633 3634 Team* team = NULL; 3635 if (argi < argc) { 3636 uint64 threadID; 3637 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3638 return 0; 3639 3640 Thread* thread = Thread::GetDebug(threadID); 3641 if (thread == NULL) { 3642 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3643 return 0; 3644 } 3645 3646 team = thread->team; 3647 } 3648 3649 if (reverseLookup) { 3650 phys_addr_t physicalAddress; 3651 if (pageLookup) { 3652 vm_page* page = (vm_page*)(addr_t)addressValue; 3653 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3654 } else { 3655 physicalAddress = (phys_addr_t)addressValue; 3656 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3657 } 3658 3659 kprintf(" Team Virtual Address Area\n"); 3660 kprintf("--------------------------------------\n"); 3661 3662 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3663 Callback() 3664 : 3665 fAddressSpace(NULL) 3666 { 3667 } 3668 3669 void SetAddressSpace(VMAddressSpace* addressSpace) 3670 { 3671 fAddressSpace = addressSpace; 3672 } 3673 3674 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3675 { 3676 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3677 virtualAddress); 3678 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3679 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3680 else 3681 kprintf("\n"); 3682 return false; 3683 } 3684 3685 private: 3686 VMAddressSpace* fAddressSpace; 3687 } callback; 3688 3689 if (team != NULL) { 3690 // team specified -- get its address space 3691 VMAddressSpace* addressSpace = team->address_space; 3692 if (addressSpace == NULL) { 3693 kprintf("Failed to get address space!\n"); 3694 return 0; 3695 } 3696 3697 callback.SetAddressSpace(addressSpace); 3698 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3699 physicalAddress, callback); 3700 } else { 3701 // no team specified -- iterate through all address spaces 3702 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3703 addressSpace != NULL; 3704 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3705 callback.SetAddressSpace(addressSpace); 3706 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3707 physicalAddress, callback); 3708 } 3709 } 3710 } else { 3711 // get the address space 3712 addr_t virtualAddress = (addr_t)addressValue; 3713 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3714 VMAddressSpace* addressSpace; 3715 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3716 addressSpace = VMAddressSpace::Kernel(); 3717 } else if (team != NULL) { 3718 addressSpace = team->address_space; 3719 } else { 3720 Thread* thread = debug_get_debugged_thread(); 3721 if (thread == NULL || thread->team == NULL) { 3722 kprintf("Failed to get team!\n"); 3723 return 0; 3724 } 3725 3726 addressSpace = thread->team->address_space; 3727 } 3728 3729 if (addressSpace == NULL) { 3730 kprintf("Failed to get address space!\n"); 3731 return 0; 3732 } 3733 3734 // let the translation map implementation do the job 3735 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3736 } 3737 3738 return 0; 3739 } 3740 3741 3742 /*! Deletes all areas and reserved regions in the given address space. 3743 3744 The caller must ensure that none of the areas has any wired ranges. 3745 3746 \param addressSpace The address space. 3747 \param deletingAddressSpace \c true, if the address space is in the process 3748 of being deleted. 3749 */ 3750 void 3751 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3752 { 3753 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3754 addressSpace->ID())); 3755 3756 addressSpace->WriteLock(); 3757 3758 // remove all reserved areas in this address space 3759 addressSpace->UnreserveAllAddressRanges(0); 3760 3761 // delete all the areas in this address space 3762 while (VMArea* area = addressSpace->FirstArea()) { 3763 ASSERT(!area->IsWired()); 3764 delete_area(addressSpace, area, deletingAddressSpace); 3765 } 3766 3767 addressSpace->WriteUnlock(); 3768 } 3769 3770 3771 static area_id 3772 vm_area_for(addr_t address, bool kernel) 3773 { 3774 team_id team; 3775 if (IS_USER_ADDRESS(address)) { 3776 // we try the user team address space, if any 3777 team = VMAddressSpace::CurrentID(); 3778 if (team < 0) 3779 return team; 3780 } else 3781 team = VMAddressSpace::KernelID(); 3782 3783 AddressSpaceReadLocker locker(team); 3784 if (!locker.IsLocked()) 3785 return B_BAD_TEAM_ID; 3786 3787 VMArea* area = locker.AddressSpace()->LookupArea(address); 3788 if (area != NULL) { 3789 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3790 return B_ERROR; 3791 3792 return area->id; 3793 } 3794 3795 return B_ERROR; 3796 } 3797 3798 3799 /*! Frees physical pages that were used during the boot process. 3800 \a end is inclusive. 3801 */ 3802 static void 3803 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3804 { 3805 // free all physical pages in the specified range 3806 3807 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3808 phys_addr_t physicalAddress; 3809 uint32 flags; 3810 3811 if (map->Query(current, &physicalAddress, &flags) == B_OK 3812 && (flags & PAGE_PRESENT) != 0) { 3813 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3814 if (page != NULL && page->State() != PAGE_STATE_FREE 3815 && page->State() != PAGE_STATE_CLEAR 3816 && page->State() != PAGE_STATE_UNUSED) { 3817 DEBUG_PAGE_ACCESS_START(page); 3818 vm_page_set_state(page, PAGE_STATE_FREE); 3819 } 3820 } 3821 } 3822 3823 // unmap the memory 3824 map->Unmap(start, end); 3825 } 3826 3827 3828 void 3829 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3830 { 3831 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3832 addr_t end = start + (size - 1); 3833 addr_t lastEnd = start; 3834 3835 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3836 (void*)start, (void*)end)); 3837 3838 // The areas are sorted in virtual address space order, so 3839 // we just have to find the holes between them that fall 3840 // into the area we should dispose 3841 3842 map->Lock(); 3843 3844 for (VMAddressSpace::AreaIterator it 3845 = VMAddressSpace::Kernel()->GetAreaIterator(); 3846 VMArea* area = it.Next();) { 3847 addr_t areaStart = area->Base(); 3848 addr_t areaEnd = areaStart + (area->Size() - 1); 3849 3850 if (areaEnd < start) 3851 continue; 3852 3853 if (areaStart > end) { 3854 // we are done, the area is already beyond of what we have to free 3855 break; 3856 } 3857 3858 if (areaStart > lastEnd) { 3859 // this is something we can free 3860 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3861 (void*)areaStart)); 3862 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3863 } 3864 3865 if (areaEnd >= end) { 3866 lastEnd = areaEnd; 3867 // no +1 to prevent potential overflow 3868 break; 3869 } 3870 3871 lastEnd = areaEnd + 1; 3872 } 3873 3874 if (lastEnd < end) { 3875 // we can also get rid of some space at the end of the area 3876 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3877 (void*)end)); 3878 unmap_and_free_physical_pages(map, lastEnd, end); 3879 } 3880 3881 map->Unlock(); 3882 } 3883 3884 3885 static void 3886 create_preloaded_image_areas(struct preloaded_image* _image) 3887 { 3888 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3889 char name[B_OS_NAME_LENGTH]; 3890 void* address; 3891 int32 length; 3892 3893 // use file name to create a good area name 3894 char* fileName = strrchr(image->name, '/'); 3895 if (fileName == NULL) 3896 fileName = image->name; 3897 else 3898 fileName++; 3899 3900 length = strlen(fileName); 3901 // make sure there is enough space for the suffix 3902 if (length > 25) 3903 length = 25; 3904 3905 memcpy(name, fileName, length); 3906 strcpy(name + length, "_text"); 3907 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3908 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3909 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3910 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3911 // this will later be remapped read-only/executable by the 3912 // ELF initialization code 3913 3914 strcpy(name + length, "_data"); 3915 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3916 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3917 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3918 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3919 } 3920 3921 3922 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3923 Any boot loader resources contained in that arguments must not be accessed 3924 anymore past this point. 3925 */ 3926 void 3927 vm_free_kernel_args(kernel_args* args) 3928 { 3929 uint32 i; 3930 3931 TRACE(("vm_free_kernel_args()\n")); 3932 3933 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3934 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3935 if (area >= B_OK) 3936 delete_area(area); 3937 } 3938 } 3939 3940 3941 static void 3942 allocate_kernel_args(kernel_args* args) 3943 { 3944 TRACE(("allocate_kernel_args()\n")); 3945 3946 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3947 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3948 3949 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3950 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3951 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3952 } 3953 } 3954 3955 3956 static void 3957 unreserve_boot_loader_ranges(kernel_args* args) 3958 { 3959 TRACE(("unreserve_boot_loader_ranges()\n")); 3960 3961 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3962 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3963 (void*)(addr_t)args->virtual_allocated_range[i].start, 3964 args->virtual_allocated_range[i].size); 3965 } 3966 } 3967 3968 3969 static void 3970 reserve_boot_loader_ranges(kernel_args* args) 3971 { 3972 TRACE(("reserve_boot_loader_ranges()\n")); 3973 3974 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3975 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3976 3977 // If the address is no kernel address, we just skip it. The 3978 // architecture specific code has to deal with it. 3979 if (!IS_KERNEL_ADDRESS(address)) { 3980 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3981 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3982 continue; 3983 } 3984 3985 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3986 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3987 if (status < B_OK) 3988 panic("could not reserve boot loader ranges\n"); 3989 } 3990 } 3991 3992 3993 static addr_t 3994 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3995 { 3996 size = PAGE_ALIGN(size); 3997 3998 // find a slot in the virtual allocation addr range 3999 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4000 // check to see if the space between this one and the last is big enough 4001 addr_t rangeStart = args->virtual_allocated_range[i].start; 4002 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4003 + args->virtual_allocated_range[i - 1].size; 4004 4005 addr_t base = alignment > 0 4006 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4007 4008 if (base >= KERNEL_BASE && base < rangeStart 4009 && rangeStart - base >= size) { 4010 args->virtual_allocated_range[i - 1].size 4011 += base + size - previousRangeEnd; 4012 return base; 4013 } 4014 } 4015 4016 // we hadn't found one between allocation ranges. this is ok. 4017 // see if there's a gap after the last one 4018 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4019 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4020 + args->virtual_allocated_range[lastEntryIndex].size; 4021 addr_t base = alignment > 0 4022 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4023 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4024 args->virtual_allocated_range[lastEntryIndex].size 4025 += base + size - lastRangeEnd; 4026 return base; 4027 } 4028 4029 // see if there's a gap before the first one 4030 addr_t rangeStart = args->virtual_allocated_range[0].start; 4031 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4032 base = rangeStart - size; 4033 if (alignment > 0) 4034 base = ROUNDDOWN(base, alignment); 4035 4036 if (base >= KERNEL_BASE) { 4037 args->virtual_allocated_range[0].start = base; 4038 args->virtual_allocated_range[0].size += rangeStart - base; 4039 return base; 4040 } 4041 } 4042 4043 return 0; 4044 } 4045 4046 4047 static bool 4048 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4049 { 4050 // TODO: horrible brute-force method of determining if the page can be 4051 // allocated 4052 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4053 if (address >= args->physical_memory_range[i].start 4054 && address < args->physical_memory_range[i].start 4055 + args->physical_memory_range[i].size) 4056 return true; 4057 } 4058 return false; 4059 } 4060 4061 4062 page_num_t 4063 vm_allocate_early_physical_page(kernel_args* args) 4064 { 4065 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4066 phys_addr_t nextPage; 4067 4068 nextPage = args->physical_allocated_range[i].start 4069 + args->physical_allocated_range[i].size; 4070 // see if the page after the next allocated paddr run can be allocated 4071 if (i + 1 < args->num_physical_allocated_ranges 4072 && args->physical_allocated_range[i + 1].size != 0) { 4073 // see if the next page will collide with the next allocated range 4074 if (nextPage >= args->physical_allocated_range[i+1].start) 4075 continue; 4076 } 4077 // see if the next physical page fits in the memory block 4078 if (is_page_in_physical_memory_range(args, nextPage)) { 4079 // we got one! 4080 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4081 return nextPage / B_PAGE_SIZE; 4082 } 4083 } 4084 4085 // Expanding upwards didn't work, try going downwards. 4086 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4087 phys_addr_t nextPage; 4088 4089 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4090 // see if the page after the prev allocated paddr run can be allocated 4091 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4092 // see if the next page will collide with the next allocated range 4093 if (nextPage < args->physical_allocated_range[i-1].start 4094 + args->physical_allocated_range[i-1].size) 4095 continue; 4096 } 4097 // see if the next physical page fits in the memory block 4098 if (is_page_in_physical_memory_range(args, nextPage)) { 4099 // we got one! 4100 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4101 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4102 return nextPage / B_PAGE_SIZE; 4103 } 4104 } 4105 4106 return 0; 4107 // could not allocate a block 4108 } 4109 4110 4111 /*! This one uses the kernel_args' physical and virtual memory ranges to 4112 allocate some pages before the VM is completely up. 4113 */ 4114 addr_t 4115 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4116 uint32 attributes, addr_t alignment) 4117 { 4118 if (physicalSize > virtualSize) 4119 physicalSize = virtualSize; 4120 4121 // find the vaddr to allocate at 4122 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4123 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4124 if (virtualBase == 0) { 4125 panic("vm_allocate_early: could not allocate virtual address\n"); 4126 return 0; 4127 } 4128 4129 // map the pages 4130 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4131 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4132 if (physicalAddress == 0) 4133 panic("error allocating early page!\n"); 4134 4135 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4136 4137 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4138 physicalAddress * B_PAGE_SIZE, attributes, 4139 &vm_allocate_early_physical_page); 4140 } 4141 4142 return virtualBase; 4143 } 4144 4145 4146 /*! The main entrance point to initialize the VM. */ 4147 status_t 4148 vm_init(kernel_args* args) 4149 { 4150 struct preloaded_image* image; 4151 void* address; 4152 status_t err = 0; 4153 uint32 i; 4154 4155 TRACE(("vm_init: entry\n")); 4156 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4157 err = arch_vm_init(args); 4158 4159 // initialize some globals 4160 vm_page_init_num_pages(args); 4161 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4162 4163 slab_init(args); 4164 4165 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4166 off_t heapSize = INITIAL_HEAP_SIZE; 4167 // try to accomodate low memory systems 4168 while (heapSize > sAvailableMemory / 8) 4169 heapSize /= 2; 4170 if (heapSize < 1024 * 1024) 4171 panic("vm_init: go buy some RAM please."); 4172 4173 // map in the new heap and initialize it 4174 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4175 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4176 TRACE(("heap at 0x%lx\n", heapBase)); 4177 heap_init(heapBase, heapSize); 4178 #endif 4179 4180 // initialize the free page list and physical page mapper 4181 vm_page_init(args); 4182 4183 // initialize the cache allocators 4184 vm_cache_init(args); 4185 4186 { 4187 status_t error = VMAreaHash::Init(); 4188 if (error != B_OK) 4189 panic("vm_init: error initializing area hash table\n"); 4190 } 4191 4192 VMAddressSpace::Init(); 4193 reserve_boot_loader_ranges(args); 4194 4195 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4196 heap_init_post_area(); 4197 #endif 4198 4199 // Do any further initialization that the architecture dependant layers may 4200 // need now 4201 arch_vm_translation_map_init_post_area(args); 4202 arch_vm_init_post_area(args); 4203 vm_page_init_post_area(args); 4204 slab_init_post_area(); 4205 4206 // allocate areas to represent stuff that already exists 4207 4208 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4209 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4210 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4211 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4212 #endif 4213 4214 allocate_kernel_args(args); 4215 4216 create_preloaded_image_areas(args->kernel_image); 4217 4218 // allocate areas for preloaded images 4219 for (image = args->preloaded_images; image != NULL; image = image->next) 4220 create_preloaded_image_areas(image); 4221 4222 // allocate kernel stacks 4223 for (i = 0; i < args->num_cpus; i++) { 4224 char name[64]; 4225 4226 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4227 address = (void*)args->cpu_kstack[i].start; 4228 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4229 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4230 } 4231 4232 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4233 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4234 4235 #if PARANOID_KERNEL_MALLOC 4236 vm_block_address_range("uninitialized heap memory", 4237 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4238 #endif 4239 #if PARANOID_KERNEL_FREE 4240 vm_block_address_range("freed heap memory", 4241 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4242 #endif 4243 4244 // create the object cache for the page mappings 4245 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4246 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4247 NULL, NULL); 4248 if (gPageMappingsObjectCache == NULL) 4249 panic("failed to create page mappings object cache"); 4250 4251 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4252 4253 #if DEBUG_CACHE_LIST 4254 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4255 virtual_address_restrictions virtualRestrictions = {}; 4256 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4257 physical_address_restrictions physicalRestrictions = {}; 4258 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4259 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4260 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4261 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4262 &physicalRestrictions, (void**)&sCacheInfoTable); 4263 } 4264 #endif // DEBUG_CACHE_LIST 4265 4266 // add some debugger commands 4267 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4268 add_debugger_command("area", &dump_area, 4269 "Dump info about a particular area"); 4270 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4271 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4272 #if DEBUG_CACHE_LIST 4273 if (sCacheInfoTable != NULL) { 4274 add_debugger_command_etc("caches", &dump_caches, 4275 "List all VMCache trees", 4276 "[ \"-c\" ]\n" 4277 "All cache trees are listed sorted in decreasing order by number " 4278 "of\n" 4279 "used pages or, if \"-c\" is specified, by size of committed " 4280 "memory.\n", 4281 0); 4282 } 4283 #endif 4284 add_debugger_command("avail", &dump_available_memory, 4285 "Dump available memory"); 4286 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4287 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4288 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4289 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4290 add_debugger_command("string", &display_mem, "dump strings"); 4291 4292 add_debugger_command_etc("mapping", &dump_mapping_info, 4293 "Print address mapping information", 4294 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4295 "Prints low-level page mapping information for a given address. If\n" 4296 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4297 "address that is looked up in the translation map of the current\n" 4298 "team, respectively the team specified by thread ID <thread ID>. If\n" 4299 "\"-r\" is specified, <address> is a physical address that is\n" 4300 "searched in the translation map of all teams, respectively the team\n" 4301 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4302 "<address> is the address of a vm_page structure. The behavior is\n" 4303 "equivalent to specifying \"-r\" with the physical address of that\n" 4304 "page.\n", 4305 0); 4306 4307 TRACE(("vm_init: exit\n")); 4308 4309 vm_cache_init_post_heap(); 4310 4311 return err; 4312 } 4313 4314 4315 status_t 4316 vm_init_post_sem(kernel_args* args) 4317 { 4318 // This frees all unused boot loader resources and makes its space available 4319 // again 4320 arch_vm_init_end(args); 4321 unreserve_boot_loader_ranges(args); 4322 4323 // fill in all of the semaphores that were not allocated before 4324 // since we're still single threaded and only the kernel address space 4325 // exists, it isn't that hard to find all of the ones we need to create 4326 4327 arch_vm_translation_map_init_post_sem(args); 4328 4329 slab_init_post_sem(); 4330 4331 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4332 heap_init_post_sem(); 4333 #endif 4334 4335 return B_OK; 4336 } 4337 4338 4339 status_t 4340 vm_init_post_thread(kernel_args* args) 4341 { 4342 vm_page_init_post_thread(args); 4343 slab_init_post_thread(); 4344 return heap_init_post_thread(); 4345 } 4346 4347 4348 status_t 4349 vm_init_post_modules(kernel_args* args) 4350 { 4351 return arch_vm_init_post_modules(args); 4352 } 4353 4354 4355 void 4356 permit_page_faults(void) 4357 { 4358 Thread* thread = thread_get_current_thread(); 4359 if (thread != NULL) 4360 atomic_add(&thread->page_faults_allowed, 1); 4361 } 4362 4363 4364 void 4365 forbid_page_faults(void) 4366 { 4367 Thread* thread = thread_get_current_thread(); 4368 if (thread != NULL) 4369 atomic_add(&thread->page_faults_allowed, -1); 4370 } 4371 4372 4373 status_t 4374 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4375 bool isUser, addr_t* newIP) 4376 { 4377 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4378 faultAddress)); 4379 4380 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4381 4382 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4383 VMAddressSpace* addressSpace = NULL; 4384 4385 status_t status = B_OK; 4386 *newIP = 0; 4387 atomic_add((int32*)&sPageFaults, 1); 4388 4389 if (IS_KERNEL_ADDRESS(pageAddress)) { 4390 addressSpace = VMAddressSpace::GetKernel(); 4391 } else if (IS_USER_ADDRESS(pageAddress)) { 4392 addressSpace = VMAddressSpace::GetCurrent(); 4393 if (addressSpace == NULL) { 4394 if (!isUser) { 4395 dprintf("vm_page_fault: kernel thread accessing invalid user " 4396 "memory!\n"); 4397 status = B_BAD_ADDRESS; 4398 TPF(PageFaultError(-1, 4399 VMPageFaultTracing 4400 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4401 } else { 4402 // XXX weird state. 4403 panic("vm_page_fault: non kernel thread accessing user memory " 4404 "that doesn't exist!\n"); 4405 status = B_BAD_ADDRESS; 4406 } 4407 } 4408 } else { 4409 // the hit was probably in the 64k DMZ between kernel and user space 4410 // this keeps a user space thread from passing a buffer that crosses 4411 // into kernel space 4412 status = B_BAD_ADDRESS; 4413 TPF(PageFaultError(-1, 4414 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4415 } 4416 4417 if (status == B_OK) { 4418 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4419 isUser, NULL); 4420 } 4421 4422 if (status < B_OK) { 4423 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4424 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n", 4425 strerror(status), address, faultAddress, isWrite, isUser, 4426 thread_get_current_thread_id()); 4427 if (!isUser) { 4428 Thread* thread = thread_get_current_thread(); 4429 if (thread != NULL && thread->fault_handler != 0) { 4430 // this will cause the arch dependant page fault handler to 4431 // modify the IP on the interrupt frame or whatever to return 4432 // to this address 4433 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4434 } else { 4435 // unhandled page fault in the kernel 4436 panic("vm_page_fault: unhandled page fault in kernel space at " 4437 "0x%lx, ip 0x%lx\n", address, faultAddress); 4438 } 4439 } else { 4440 Thread* thread = thread_get_current_thread(); 4441 4442 #ifdef TRACE_FAULTS 4443 VMArea* area = NULL; 4444 if (addressSpace != NULL) { 4445 addressSpace->ReadLock(); 4446 area = addressSpace->LookupArea(faultAddress); 4447 } 4448 4449 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4450 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4451 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4452 thread->team->Name(), thread->team->id, 4453 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4454 faultAddress, area ? area->name : "???", faultAddress - (area ? 4455 area->Base() : 0x0)); 4456 4457 if (addressSpace != NULL) 4458 addressSpace->ReadUnlock(); 4459 #endif 4460 4461 // If the thread has a signal handler for SIGSEGV, we simply 4462 // send it the signal. Otherwise we notify the user debugger 4463 // first. 4464 struct sigaction action; 4465 if ((sigaction(SIGSEGV, NULL, &action) == 0 4466 && action.sa_handler != SIG_DFL 4467 && action.sa_handler != SIG_IGN) 4468 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4469 SIGSEGV)) { 4470 Signal signal(SIGSEGV, 4471 status == B_PERMISSION_DENIED 4472 ? SEGV_ACCERR : SEGV_MAPERR, 4473 EFAULT, thread->team->id); 4474 signal.SetAddress((void*)address); 4475 send_signal_to_thread(thread, signal, 0); 4476 } 4477 } 4478 } 4479 4480 if (addressSpace != NULL) 4481 addressSpace->Put(); 4482 4483 return B_HANDLED_INTERRUPT; 4484 } 4485 4486 4487 struct PageFaultContext { 4488 AddressSpaceReadLocker addressSpaceLocker; 4489 VMCacheChainLocker cacheChainLocker; 4490 4491 VMTranslationMap* map; 4492 VMCache* topCache; 4493 off_t cacheOffset; 4494 vm_page_reservation reservation; 4495 bool isWrite; 4496 4497 // return values 4498 vm_page* page; 4499 bool restart; 4500 bool pageAllocated; 4501 4502 4503 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4504 : 4505 addressSpaceLocker(addressSpace, true), 4506 map(addressSpace->TranslationMap()), 4507 isWrite(isWrite) 4508 { 4509 } 4510 4511 ~PageFaultContext() 4512 { 4513 UnlockAll(); 4514 vm_page_unreserve_pages(&reservation); 4515 } 4516 4517 void Prepare(VMCache* topCache, off_t cacheOffset) 4518 { 4519 this->topCache = topCache; 4520 this->cacheOffset = cacheOffset; 4521 page = NULL; 4522 restart = false; 4523 pageAllocated = false; 4524 4525 cacheChainLocker.SetTo(topCache); 4526 } 4527 4528 void UnlockAll(VMCache* exceptCache = NULL) 4529 { 4530 topCache = NULL; 4531 addressSpaceLocker.Unlock(); 4532 cacheChainLocker.Unlock(exceptCache); 4533 } 4534 }; 4535 4536 4537 /*! Gets the page that should be mapped into the area. 4538 Returns an error code other than \c B_OK, if the page couldn't be found or 4539 paged in. The locking state of the address space and the caches is undefined 4540 in that case. 4541 Returns \c B_OK with \c context.restart set to \c true, if the functions 4542 had to unlock the address space and all caches and is supposed to be called 4543 again. 4544 Returns \c B_OK with \c context.restart set to \c false, if the page was 4545 found. It is returned in \c context.page. The address space will still be 4546 locked as well as all caches starting from the top cache to at least the 4547 cache the page lives in. 4548 */ 4549 static status_t 4550 fault_get_page(PageFaultContext& context) 4551 { 4552 VMCache* cache = context.topCache; 4553 VMCache* lastCache = NULL; 4554 vm_page* page = NULL; 4555 4556 while (cache != NULL) { 4557 // We already hold the lock of the cache at this point. 4558 4559 lastCache = cache; 4560 4561 page = cache->LookupPage(context.cacheOffset); 4562 if (page != NULL && page->busy) { 4563 // page must be busy -- wait for it to become unbusy 4564 context.UnlockAll(cache); 4565 cache->ReleaseRefLocked(); 4566 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4567 4568 // restart the whole process 4569 context.restart = true; 4570 return B_OK; 4571 } 4572 4573 if (page != NULL) 4574 break; 4575 4576 // The current cache does not contain the page we're looking for. 4577 4578 // see if the backing store has it 4579 if (cache->HasPage(context.cacheOffset)) { 4580 // insert a fresh page and mark it busy -- we're going to read it in 4581 page = vm_page_allocate_page(&context.reservation, 4582 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4583 cache->InsertPage(page, context.cacheOffset); 4584 4585 // We need to unlock all caches and the address space while reading 4586 // the page in. Keep a reference to the cache around. 4587 cache->AcquireRefLocked(); 4588 context.UnlockAll(); 4589 4590 // read the page in 4591 generic_io_vec vec; 4592 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4593 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4594 4595 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4596 B_PHYSICAL_IO_REQUEST, &bytesRead); 4597 4598 cache->Lock(); 4599 4600 if (status < B_OK) { 4601 // on error remove and free the page 4602 dprintf("reading page from cache %p returned: %s!\n", 4603 cache, strerror(status)); 4604 4605 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4606 cache->RemovePage(page); 4607 vm_page_set_state(page, PAGE_STATE_FREE); 4608 4609 cache->ReleaseRefAndUnlock(); 4610 return status; 4611 } 4612 4613 // mark the page unbusy again 4614 cache->MarkPageUnbusy(page); 4615 4616 DEBUG_PAGE_ACCESS_END(page); 4617 4618 // Since we needed to unlock everything temporarily, the area 4619 // situation might have changed. So we need to restart the whole 4620 // process. 4621 cache->ReleaseRefAndUnlock(); 4622 context.restart = true; 4623 return B_OK; 4624 } 4625 4626 cache = context.cacheChainLocker.LockSourceCache(); 4627 } 4628 4629 if (page == NULL) { 4630 // There was no adequate page, determine the cache for a clean one. 4631 // Read-only pages come in the deepest cache, only the top most cache 4632 // may have direct write access. 4633 cache = context.isWrite ? context.topCache : lastCache; 4634 4635 // allocate a clean page 4636 page = vm_page_allocate_page(&context.reservation, 4637 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4638 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4639 page->physical_page_number)); 4640 4641 // insert the new page into our cache 4642 cache->InsertPage(page, context.cacheOffset); 4643 context.pageAllocated = true; 4644 } else if (page->Cache() != context.topCache && context.isWrite) { 4645 // We have a page that has the data we want, but in the wrong cache 4646 // object so we need to copy it and stick it into the top cache. 4647 vm_page* sourcePage = page; 4648 4649 // TODO: If memory is low, it might be a good idea to steal the page 4650 // from our source cache -- if possible, that is. 4651 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4652 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4653 4654 // To not needlessly kill concurrency we unlock all caches but the top 4655 // one while copying the page. Lacking another mechanism to ensure that 4656 // the source page doesn't disappear, we mark it busy. 4657 sourcePage->busy = true; 4658 context.cacheChainLocker.UnlockKeepRefs(true); 4659 4660 // copy the page 4661 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4662 sourcePage->physical_page_number * B_PAGE_SIZE); 4663 4664 context.cacheChainLocker.RelockCaches(true); 4665 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4666 4667 // insert the new page into our cache 4668 context.topCache->InsertPage(page, context.cacheOffset); 4669 context.pageAllocated = true; 4670 } else 4671 DEBUG_PAGE_ACCESS_START(page); 4672 4673 context.page = page; 4674 return B_OK; 4675 } 4676 4677 4678 /*! Makes sure the address in the given address space is mapped. 4679 4680 \param addressSpace The address space. 4681 \param originalAddress The address. Doesn't need to be page aligned. 4682 \param isWrite If \c true the address shall be write-accessible. 4683 \param isUser If \c true the access is requested by a userland team. 4684 \param wirePage On success, if non \c NULL, the wired count of the page 4685 mapped at the given address is incremented and the page is returned 4686 via this parameter. 4687 \return \c B_OK on success, another error code otherwise. 4688 */ 4689 static status_t 4690 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4691 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4692 { 4693 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4694 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4695 originalAddress, isWrite, isUser)); 4696 4697 PageFaultContext context(addressSpace, isWrite); 4698 4699 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4700 status_t status = B_OK; 4701 4702 addressSpace->IncrementFaultCount(); 4703 4704 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4705 // the pages upfront makes sure we don't have any cache locked, so that the 4706 // page daemon/thief can do their job without problems. 4707 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4708 originalAddress); 4709 context.addressSpaceLocker.Unlock(); 4710 vm_page_reserve_pages(&context.reservation, reservePages, 4711 addressSpace == VMAddressSpace::Kernel() 4712 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4713 4714 while (true) { 4715 context.addressSpaceLocker.Lock(); 4716 4717 // get the area the fault was in 4718 VMArea* area = addressSpace->LookupArea(address); 4719 if (area == NULL) { 4720 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4721 "space\n", originalAddress); 4722 TPF(PageFaultError(-1, 4723 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4724 status = B_BAD_ADDRESS; 4725 break; 4726 } 4727 4728 // check permissions 4729 uint32 protection = get_area_page_protection(area, address); 4730 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4731 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4732 area->id, (void*)originalAddress); 4733 TPF(PageFaultError(area->id, 4734 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4735 status = B_PERMISSION_DENIED; 4736 break; 4737 } 4738 if (isWrite && (protection 4739 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4740 dprintf("write access attempted on write-protected area 0x%" 4741 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4742 TPF(PageFaultError(area->id, 4743 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4744 status = B_PERMISSION_DENIED; 4745 break; 4746 } else if (isExecute && (protection 4747 & (B_EXECUTE_AREA 4748 | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4749 dprintf("instruction fetch attempted on execute-protected area 0x%" 4750 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4751 TPF(PageFaultError(area->id, 4752 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4753 status = B_PERMISSION_DENIED; 4754 break; 4755 } else if (!isWrite && !isExecute && (protection 4756 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4757 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4758 " at %p\n", area->id, (void*)originalAddress); 4759 TPF(PageFaultError(area->id, 4760 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4761 status = B_PERMISSION_DENIED; 4762 break; 4763 } 4764 4765 // We have the area, it was a valid access, so let's try to resolve the 4766 // page fault now. 4767 // At first, the top most cache from the area is investigated. 4768 4769 context.Prepare(vm_area_get_locked_cache(area), 4770 address - area->Base() + area->cache_offset); 4771 4772 // See if this cache has a fault handler -- this will do all the work 4773 // for us. 4774 { 4775 // Note, since the page fault is resolved with interrupts enabled, 4776 // the fault handler could be called more than once for the same 4777 // reason -- the store must take this into account. 4778 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4779 if (status != B_BAD_HANDLER) 4780 break; 4781 } 4782 4783 // The top most cache has no fault handler, so let's see if the cache or 4784 // its sources already have the page we're searching for (we're going 4785 // from top to bottom). 4786 status = fault_get_page(context); 4787 if (status != B_OK) { 4788 TPF(PageFaultError(area->id, status)); 4789 break; 4790 } 4791 4792 if (context.restart) 4793 continue; 4794 4795 // All went fine, all there is left to do is to map the page into the 4796 // address space. 4797 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4798 context.page)); 4799 4800 // If the page doesn't reside in the area's cache, we need to make sure 4801 // it's mapped in read-only, so that we cannot overwrite someone else's 4802 // data (copy-on-write) 4803 uint32 newProtection = protection; 4804 if (context.page->Cache() != context.topCache && !isWrite) 4805 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4806 4807 bool unmapPage = false; 4808 bool mapPage = true; 4809 4810 // check whether there's already a page mapped at the address 4811 context.map->Lock(); 4812 4813 phys_addr_t physicalAddress; 4814 uint32 flags; 4815 vm_page* mappedPage = NULL; 4816 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4817 && (flags & PAGE_PRESENT) != 0 4818 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4819 != NULL) { 4820 // Yep there's already a page. If it's ours, we can simply adjust 4821 // its protection. Otherwise we have to unmap it. 4822 if (mappedPage == context.page) { 4823 context.map->ProtectPage(area, address, newProtection); 4824 // Note: We assume that ProtectPage() is atomic (i.e. 4825 // the page isn't temporarily unmapped), otherwise we'd have 4826 // to make sure it isn't wired. 4827 mapPage = false; 4828 } else 4829 unmapPage = true; 4830 } 4831 4832 context.map->Unlock(); 4833 4834 if (unmapPage) { 4835 // If the page is wired, we can't unmap it. Wait until it is unwired 4836 // again and restart. Note that the page cannot be wired for 4837 // writing, since it it isn't in the topmost cache. So we can safely 4838 // ignore ranges wired for writing (our own and other concurrent 4839 // wiring attempts in progress) and in fact have to do that to avoid 4840 // a deadlock. 4841 VMAreaUnwiredWaiter waiter; 4842 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4843 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4844 // unlock everything and wait 4845 if (context.pageAllocated) { 4846 // ... but since we allocated a page and inserted it into 4847 // the top cache, remove and free it first. Otherwise we'd 4848 // have a page from a lower cache mapped while an upper 4849 // cache has a page that would shadow it. 4850 context.topCache->RemovePage(context.page); 4851 vm_page_free_etc(context.topCache, context.page, 4852 &context.reservation); 4853 } else 4854 DEBUG_PAGE_ACCESS_END(context.page); 4855 4856 context.UnlockAll(); 4857 waiter.waitEntry.Wait(); 4858 continue; 4859 } 4860 4861 // Note: The mapped page is a page of a lower cache. We are 4862 // guaranteed to have that cached locked, our new page is a copy of 4863 // that page, and the page is not busy. The logic for that guarantee 4864 // is as follows: Since the page is mapped, it must live in the top 4865 // cache (ruled out above) or any of its lower caches, and there is 4866 // (was before the new page was inserted) no other page in any 4867 // cache between the top cache and the page's cache (otherwise that 4868 // would be mapped instead). That in turn means that our algorithm 4869 // must have found it and therefore it cannot be busy either. 4870 DEBUG_PAGE_ACCESS_START(mappedPage); 4871 unmap_page(area, address); 4872 DEBUG_PAGE_ACCESS_END(mappedPage); 4873 } 4874 4875 if (mapPage) { 4876 if (map_page(area, context.page, address, newProtection, 4877 &context.reservation) != B_OK) { 4878 // Mapping can only fail, when the page mapping object couldn't 4879 // be allocated. Save for the missing mapping everything is 4880 // fine, though. If this was a regular page fault, we'll simply 4881 // leave and probably fault again. To make sure we'll have more 4882 // luck then, we ensure that the minimum object reserve is 4883 // available. 4884 DEBUG_PAGE_ACCESS_END(context.page); 4885 4886 context.UnlockAll(); 4887 4888 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4889 != B_OK) { 4890 // Apparently the situation is serious. Let's get ourselves 4891 // killed. 4892 status = B_NO_MEMORY; 4893 } else if (wirePage != NULL) { 4894 // The caller expects us to wire the page. Since 4895 // object_cache_reserve() succeeded, we should now be able 4896 // to allocate a mapping structure. Restart. 4897 continue; 4898 } 4899 4900 break; 4901 } 4902 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4903 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4904 4905 // also wire the page, if requested 4906 if (wirePage != NULL && status == B_OK) { 4907 increment_page_wired_count(context.page); 4908 *wirePage = context.page; 4909 } 4910 4911 DEBUG_PAGE_ACCESS_END(context.page); 4912 4913 break; 4914 } 4915 4916 return status; 4917 } 4918 4919 4920 status_t 4921 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4922 { 4923 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4924 } 4925 4926 status_t 4927 vm_put_physical_page(addr_t vaddr, void* handle) 4928 { 4929 return sPhysicalPageMapper->PutPage(vaddr, handle); 4930 } 4931 4932 4933 status_t 4934 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4935 void** _handle) 4936 { 4937 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4938 } 4939 4940 status_t 4941 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4942 { 4943 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4944 } 4945 4946 4947 status_t 4948 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4949 { 4950 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4951 } 4952 4953 status_t 4954 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4955 { 4956 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4957 } 4958 4959 4960 void 4961 vm_get_info(system_info* info) 4962 { 4963 swap_get_info(info); 4964 4965 MutexLocker locker(sAvailableMemoryLock); 4966 info->needed_memory = sNeededMemory; 4967 info->free_memory = sAvailableMemory; 4968 } 4969 4970 4971 uint32 4972 vm_num_page_faults(void) 4973 { 4974 return sPageFaults; 4975 } 4976 4977 4978 off_t 4979 vm_available_memory(void) 4980 { 4981 MutexLocker locker(sAvailableMemoryLock); 4982 return sAvailableMemory; 4983 } 4984 4985 4986 off_t 4987 vm_available_not_needed_memory(void) 4988 { 4989 MutexLocker locker(sAvailableMemoryLock); 4990 return sAvailableMemory - sNeededMemory; 4991 } 4992 4993 4994 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4995 debugger. 4996 */ 4997 off_t 4998 vm_available_not_needed_memory_debug(void) 4999 { 5000 return sAvailableMemory - sNeededMemory; 5001 } 5002 5003 5004 size_t 5005 vm_kernel_address_space_left(void) 5006 { 5007 return VMAddressSpace::Kernel()->FreeSpace(); 5008 } 5009 5010 5011 void 5012 vm_unreserve_memory(size_t amount) 5013 { 5014 mutex_lock(&sAvailableMemoryLock); 5015 5016 sAvailableMemory += amount; 5017 5018 mutex_unlock(&sAvailableMemoryLock); 5019 } 5020 5021 5022 status_t 5023 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5024 { 5025 size_t reserve = kMemoryReserveForPriority[priority]; 5026 5027 MutexLocker locker(sAvailableMemoryLock); 5028 5029 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5030 5031 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5032 sAvailableMemory -= amount; 5033 return B_OK; 5034 } 5035 5036 if (timeout <= 0) 5037 return B_NO_MEMORY; 5038 5039 // turn timeout into an absolute timeout 5040 timeout += system_time(); 5041 5042 // loop until we've got the memory or the timeout occurs 5043 do { 5044 sNeededMemory += amount; 5045 5046 // call the low resource manager 5047 locker.Unlock(); 5048 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5049 B_ABSOLUTE_TIMEOUT, timeout); 5050 locker.Lock(); 5051 5052 sNeededMemory -= amount; 5053 5054 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5055 sAvailableMemory -= amount; 5056 return B_OK; 5057 } 5058 } while (timeout > system_time()); 5059 5060 return B_NO_MEMORY; 5061 } 5062 5063 5064 status_t 5065 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5066 { 5067 // NOTE: The caller is responsible for synchronizing calls to this function! 5068 5069 AddressSpaceReadLocker locker; 5070 VMArea* area; 5071 status_t status = locker.SetFromArea(id, area); 5072 if (status != B_OK) 5073 return status; 5074 5075 // nothing to do, if the type doesn't change 5076 uint32 oldType = area->MemoryType(); 5077 if (type == oldType) 5078 return B_OK; 5079 5080 // set the memory type of the area and the mapped pages 5081 VMTranslationMap* map = area->address_space->TranslationMap(); 5082 map->Lock(); 5083 area->SetMemoryType(type); 5084 map->ProtectArea(area, area->protection); 5085 map->Unlock(); 5086 5087 // set the physical memory type 5088 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5089 if (error != B_OK) { 5090 // reset the memory type of the area and the mapped pages 5091 map->Lock(); 5092 area->SetMemoryType(oldType); 5093 map->ProtectArea(area, area->protection); 5094 map->Unlock(); 5095 return error; 5096 } 5097 5098 return B_OK; 5099 5100 } 5101 5102 5103 /*! This function enforces some protection properties: 5104 - kernel areas must be W^X (after kernel startup) 5105 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5106 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5107 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5108 and B_KERNEL_WRITE_AREA. 5109 */ 5110 static void 5111 fix_protection(uint32* protection) 5112 { 5113 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5114 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5115 || (*protection & B_WRITE_AREA) != 0) 5116 && !gKernelStartup) 5117 panic("kernel areas cannot be both writable and executable!"); 5118 5119 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5120 if ((*protection & B_USER_PROTECTION) == 0 5121 || (*protection & B_WRITE_AREA) != 0) 5122 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5123 else 5124 *protection |= B_KERNEL_READ_AREA; 5125 } 5126 } 5127 5128 5129 static void 5130 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5131 { 5132 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5133 info->area = area->id; 5134 info->address = (void*)area->Base(); 5135 info->size = area->Size(); 5136 info->protection = area->protection; 5137 info->lock = area->wiring; 5138 info->team = area->address_space->ID(); 5139 info->copy_count = 0; 5140 info->in_count = 0; 5141 info->out_count = 0; 5142 // TODO: retrieve real values here! 5143 5144 VMCache* cache = vm_area_get_locked_cache(area); 5145 5146 // Note, this is a simplification; the cache could be larger than this area 5147 info->ram_size = cache->page_count * B_PAGE_SIZE; 5148 5149 vm_area_put_locked_cache(cache); 5150 } 5151 5152 5153 static status_t 5154 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5155 { 5156 // is newSize a multiple of B_PAGE_SIZE? 5157 if (newSize & (B_PAGE_SIZE - 1)) 5158 return B_BAD_VALUE; 5159 5160 // lock all affected address spaces and the cache 5161 VMArea* area; 5162 VMCache* cache; 5163 5164 MultiAddressSpaceLocker locker; 5165 AreaCacheLocker cacheLocker; 5166 5167 status_t status; 5168 size_t oldSize; 5169 bool anyKernelArea; 5170 bool restart; 5171 5172 do { 5173 anyKernelArea = false; 5174 restart = false; 5175 5176 locker.Unset(); 5177 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5178 if (status != B_OK) 5179 return status; 5180 cacheLocker.SetTo(cache, true); // already locked 5181 5182 // enforce restrictions 5183 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5184 || (area->protection & B_KERNEL_AREA) != 0)) { 5185 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5186 "resize kernel area %" B_PRId32 " (%s)\n", 5187 team_get_current_team_id(), areaID, area->name); 5188 return B_NOT_ALLOWED; 5189 } 5190 // TODO: Enforce all restrictions (team, etc.)! 5191 5192 oldSize = area->Size(); 5193 if (newSize == oldSize) 5194 return B_OK; 5195 5196 if (cache->type != CACHE_TYPE_RAM) 5197 return B_NOT_ALLOWED; 5198 5199 if (oldSize < newSize) { 5200 // We need to check if all areas of this cache can be resized. 5201 for (VMArea* current = cache->areas; current != NULL; 5202 current = current->cache_next) { 5203 if (!current->address_space->CanResizeArea(current, newSize)) 5204 return B_ERROR; 5205 anyKernelArea 5206 |= current->address_space == VMAddressSpace::Kernel(); 5207 } 5208 } else { 5209 // We're shrinking the areas, so we must make sure the affected 5210 // ranges are not wired. 5211 for (VMArea* current = cache->areas; current != NULL; 5212 current = current->cache_next) { 5213 anyKernelArea 5214 |= current->address_space == VMAddressSpace::Kernel(); 5215 5216 if (wait_if_area_range_is_wired(current, 5217 current->Base() + newSize, oldSize - newSize, &locker, 5218 &cacheLocker)) { 5219 restart = true; 5220 break; 5221 } 5222 } 5223 } 5224 } while (restart); 5225 5226 // Okay, looks good so far, so let's do it 5227 5228 int priority = kernel && anyKernelArea 5229 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5230 uint32 allocationFlags = kernel && anyKernelArea 5231 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5232 5233 if (oldSize < newSize) { 5234 // Growing the cache can fail, so we do it first. 5235 status = cache->Resize(cache->virtual_base + newSize, priority); 5236 if (status != B_OK) 5237 return status; 5238 } 5239 5240 for (VMArea* current = cache->areas; current != NULL; 5241 current = current->cache_next) { 5242 status = current->address_space->ResizeArea(current, newSize, 5243 allocationFlags); 5244 if (status != B_OK) 5245 break; 5246 5247 // We also need to unmap all pages beyond the new size, if the area has 5248 // shrunk 5249 if (newSize < oldSize) { 5250 VMCacheChainLocker cacheChainLocker(cache); 5251 cacheChainLocker.LockAllSourceCaches(); 5252 5253 unmap_pages(current, current->Base() + newSize, 5254 oldSize - newSize); 5255 5256 cacheChainLocker.Unlock(cache); 5257 } 5258 } 5259 5260 if (status == B_OK) { 5261 // Shrink or grow individual page protections if in use. 5262 if (area->page_protections != NULL) { 5263 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5264 uint8* newProtections 5265 = (uint8*)realloc(area->page_protections, bytes); 5266 if (newProtections == NULL) 5267 status = B_NO_MEMORY; 5268 else { 5269 area->page_protections = newProtections; 5270 5271 if (oldSize < newSize) { 5272 // init the additional page protections to that of the area 5273 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5274 uint32 areaProtection = area->protection 5275 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5276 memset(area->page_protections + offset, 5277 areaProtection | (areaProtection << 4), bytes - offset); 5278 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5279 uint8& entry = area->page_protections[offset - 1]; 5280 entry = (entry & 0x0f) | (areaProtection << 4); 5281 } 5282 } 5283 } 5284 } 5285 } 5286 5287 // shrinking the cache can't fail, so we do it now 5288 if (status == B_OK && newSize < oldSize) 5289 status = cache->Resize(cache->virtual_base + newSize, priority); 5290 5291 if (status != B_OK) { 5292 // Something failed -- resize the areas back to their original size. 5293 // This can fail, too, in which case we're seriously screwed. 5294 for (VMArea* current = cache->areas; current != NULL; 5295 current = current->cache_next) { 5296 if (current->address_space->ResizeArea(current, oldSize, 5297 allocationFlags) != B_OK) { 5298 panic("vm_resize_area(): Failed and not being able to restore " 5299 "original state."); 5300 } 5301 } 5302 5303 cache->Resize(cache->virtual_base + oldSize, priority); 5304 } 5305 5306 // TODO: we must honour the lock restrictions of this area 5307 return status; 5308 } 5309 5310 5311 status_t 5312 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5313 { 5314 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5315 } 5316 5317 5318 status_t 5319 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5320 { 5321 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5322 } 5323 5324 5325 status_t 5326 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5327 bool user) 5328 { 5329 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5330 } 5331 5332 5333 void 5334 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5335 { 5336 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5337 } 5338 5339 5340 /*! Copies a range of memory directly from/to a page that might not be mapped 5341 at the moment. 5342 5343 For \a unsafeMemory the current mapping (if any is ignored). The function 5344 walks through the respective area's cache chain to find the physical page 5345 and copies from/to it directly. 5346 The memory range starting at \a unsafeMemory with a length of \a size bytes 5347 must not cross a page boundary. 5348 5349 \param teamID The team ID identifying the address space \a unsafeMemory is 5350 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5351 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5352 is passed, the address space of the thread returned by 5353 debug_get_debugged_thread() is used. 5354 \param unsafeMemory The start of the unsafe memory range to be copied 5355 from/to. 5356 \param buffer A safely accessible kernel buffer to be copied from/to. 5357 \param size The number of bytes to be copied. 5358 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5359 \a unsafeMemory, the other way around otherwise. 5360 */ 5361 status_t 5362 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5363 size_t size, bool copyToUnsafe) 5364 { 5365 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5366 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5367 return B_BAD_VALUE; 5368 } 5369 5370 // get the address space for the debugged thread 5371 VMAddressSpace* addressSpace; 5372 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5373 addressSpace = VMAddressSpace::Kernel(); 5374 } else if (teamID == B_CURRENT_TEAM) { 5375 Thread* thread = debug_get_debugged_thread(); 5376 if (thread == NULL || thread->team == NULL) 5377 return B_BAD_ADDRESS; 5378 5379 addressSpace = thread->team->address_space; 5380 } else 5381 addressSpace = VMAddressSpace::DebugGet(teamID); 5382 5383 if (addressSpace == NULL) 5384 return B_BAD_ADDRESS; 5385 5386 // get the area 5387 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5388 if (area == NULL) 5389 return B_BAD_ADDRESS; 5390 5391 // search the page 5392 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5393 + area->cache_offset; 5394 VMCache* cache = area->cache; 5395 vm_page* page = NULL; 5396 while (cache != NULL) { 5397 page = cache->DebugLookupPage(cacheOffset); 5398 if (page != NULL) 5399 break; 5400 5401 // Page not found in this cache -- if it is paged out, we must not try 5402 // to get it from lower caches. 5403 if (cache->DebugHasPage(cacheOffset)) 5404 break; 5405 5406 cache = cache->source; 5407 } 5408 5409 if (page == NULL) 5410 return B_UNSUPPORTED; 5411 5412 // copy from/to physical memory 5413 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5414 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5415 5416 if (copyToUnsafe) { 5417 if (page->Cache() != area->cache) 5418 return B_UNSUPPORTED; 5419 5420 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5421 } 5422 5423 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5424 } 5425 5426 5427 /** Validate that a memory range is either fully in kernel space, or fully in 5428 * userspace */ 5429 static inline bool 5430 validate_memory_range(const void* addr, size_t size) 5431 { 5432 addr_t address = (addr_t)addr; 5433 5434 // Check for overflows on all addresses. 5435 if ((address + size) < address) 5436 return false; 5437 5438 // Validate that the address range does not cross the kernel/user boundary. 5439 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5440 } 5441 5442 5443 /** Validate that a memory range is fully in userspace. */ 5444 static inline bool 5445 validate_user_memory_range(const void* addr, size_t size) 5446 { 5447 addr_t address = (addr_t)addr; 5448 5449 // Check for overflows on all addresses. 5450 if ((address + size) < address) 5451 return false; 5452 5453 // Validate that both the start and end address are in userspace 5454 return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1); 5455 } 5456 5457 5458 // #pragma mark - kernel public API 5459 5460 5461 status_t 5462 user_memcpy(void* to, const void* from, size_t size) 5463 { 5464 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5465 return B_BAD_ADDRESS; 5466 5467 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5468 return B_BAD_ADDRESS; 5469 5470 return B_OK; 5471 } 5472 5473 5474 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5475 the string in \a to, NULL-terminating the result. 5476 5477 \param to Pointer to the destination C-string. 5478 \param from Pointer to the source C-string. 5479 \param size Size in bytes of the string buffer pointed to by \a to. 5480 5481 \return strlen(\a from). 5482 */ 5483 ssize_t 5484 user_strlcpy(char* to, const char* from, size_t size) 5485 { 5486 if (to == NULL && size != 0) 5487 return B_BAD_VALUE; 5488 if (from == NULL) 5489 return B_BAD_ADDRESS; 5490 5491 // Protect the source address from overflows. 5492 size_t maxSize = size; 5493 if ((addr_t)from + maxSize < (addr_t)from) 5494 maxSize -= (addr_t)from + maxSize; 5495 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5496 maxSize = USER_TOP - (addr_t)from; 5497 5498 if (!validate_memory_range(to, maxSize)) 5499 return B_BAD_ADDRESS; 5500 5501 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5502 if (result < 0) 5503 return result; 5504 5505 // If we hit the address overflow boundary, fail. 5506 if ((size_t)result >= maxSize && maxSize < size) 5507 return B_BAD_ADDRESS; 5508 5509 return result; 5510 } 5511 5512 5513 status_t 5514 user_memset(void* s, char c, size_t count) 5515 { 5516 if (!validate_memory_range(s, count)) 5517 return B_BAD_ADDRESS; 5518 5519 if (arch_cpu_user_memset(s, c, count) < B_OK) 5520 return B_BAD_ADDRESS; 5521 5522 return B_OK; 5523 } 5524 5525 5526 /*! Wires a single page at the given address. 5527 5528 \param team The team whose address space the address belongs to. Supports 5529 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5530 parameter is ignored. 5531 \param address address The virtual address to wire down. Does not need to 5532 be page aligned. 5533 \param writable If \c true the page shall be writable. 5534 \param info On success the info is filled in, among other things 5535 containing the physical address the given virtual one translates to. 5536 \return \c B_OK, when the page could be wired, another error code otherwise. 5537 */ 5538 status_t 5539 vm_wire_page(team_id team, addr_t address, bool writable, 5540 VMPageWiringInfo* info) 5541 { 5542 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5543 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5544 5545 // compute the page protection that is required 5546 bool isUser = IS_USER_ADDRESS(address); 5547 uint32 requiredProtection = PAGE_PRESENT 5548 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5549 if (writable) 5550 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5551 5552 // get and read lock the address space 5553 VMAddressSpace* addressSpace = NULL; 5554 if (isUser) { 5555 if (team == B_CURRENT_TEAM) 5556 addressSpace = VMAddressSpace::GetCurrent(); 5557 else 5558 addressSpace = VMAddressSpace::Get(team); 5559 } else 5560 addressSpace = VMAddressSpace::GetKernel(); 5561 if (addressSpace == NULL) 5562 return B_ERROR; 5563 5564 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5565 5566 VMTranslationMap* map = addressSpace->TranslationMap(); 5567 status_t error = B_OK; 5568 5569 // get the area 5570 VMArea* area = addressSpace->LookupArea(pageAddress); 5571 if (area == NULL) { 5572 addressSpace->Put(); 5573 return B_BAD_ADDRESS; 5574 } 5575 5576 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5577 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5578 5579 // mark the area range wired 5580 area->Wire(&info->range); 5581 5582 // Lock the area's cache chain and the translation map. Needed to look 5583 // up the page and play with its wired count. 5584 cacheChainLocker.LockAllSourceCaches(); 5585 map->Lock(); 5586 5587 phys_addr_t physicalAddress; 5588 uint32 flags; 5589 vm_page* page; 5590 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5591 && (flags & requiredProtection) == requiredProtection 5592 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5593 != NULL) { 5594 // Already mapped with the correct permissions -- just increment 5595 // the page's wired count. 5596 increment_page_wired_count(page); 5597 5598 map->Unlock(); 5599 cacheChainLocker.Unlock(); 5600 addressSpaceLocker.Unlock(); 5601 } else { 5602 // Let vm_soft_fault() map the page for us, if possible. We need 5603 // to fully unlock to avoid deadlocks. Since we have already 5604 // wired the area itself, nothing disturbing will happen with it 5605 // in the meantime. 5606 map->Unlock(); 5607 cacheChainLocker.Unlock(); 5608 addressSpaceLocker.Unlock(); 5609 5610 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5611 isUser, &page); 5612 5613 if (error != B_OK) { 5614 // The page could not be mapped -- clean up. 5615 VMCache* cache = vm_area_get_locked_cache(area); 5616 area->Unwire(&info->range); 5617 cache->ReleaseRefAndUnlock(); 5618 addressSpace->Put(); 5619 return error; 5620 } 5621 } 5622 5623 info->physicalAddress 5624 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5625 + address % B_PAGE_SIZE; 5626 info->page = page; 5627 5628 return B_OK; 5629 } 5630 5631 5632 /*! Unwires a single page previously wired via vm_wire_page(). 5633 5634 \param info The same object passed to vm_wire_page() before. 5635 */ 5636 void 5637 vm_unwire_page(VMPageWiringInfo* info) 5638 { 5639 // lock the address space 5640 VMArea* area = info->range.area; 5641 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5642 // takes over our reference 5643 5644 // lock the top cache 5645 VMCache* cache = vm_area_get_locked_cache(area); 5646 VMCacheChainLocker cacheChainLocker(cache); 5647 5648 if (info->page->Cache() != cache) { 5649 // The page is not in the top cache, so we lock the whole cache chain 5650 // before touching the page's wired count. 5651 cacheChainLocker.LockAllSourceCaches(); 5652 } 5653 5654 decrement_page_wired_count(info->page); 5655 5656 // remove the wired range from the range 5657 area->Unwire(&info->range); 5658 5659 cacheChainLocker.Unlock(); 5660 } 5661 5662 5663 /*! Wires down the given address range in the specified team's address space. 5664 5665 If successful the function 5666 - acquires a reference to the specified team's address space, 5667 - adds respective wired ranges to all areas that intersect with the given 5668 address range, 5669 - makes sure all pages in the given address range are mapped with the 5670 requested access permissions and increments their wired count. 5671 5672 It fails, when \a team doesn't specify a valid address space, when any part 5673 of the specified address range is not covered by areas, when the concerned 5674 areas don't allow mapping with the requested permissions, or when mapping 5675 failed for another reason. 5676 5677 When successful the call must be balanced by a unlock_memory_etc() call with 5678 the exact same parameters. 5679 5680 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5681 supported. 5682 \param address The start of the address range to be wired. 5683 \param numBytes The size of the address range to be wired. 5684 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5685 requests that the range must be wired writable ("read from device 5686 into memory"). 5687 \return \c B_OK on success, another error code otherwise. 5688 */ 5689 status_t 5690 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5691 { 5692 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5693 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5694 5695 // compute the page protection that is required 5696 bool isUser = IS_USER_ADDRESS(address); 5697 bool writable = (flags & B_READ_DEVICE) == 0; 5698 uint32 requiredProtection = PAGE_PRESENT 5699 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5700 if (writable) 5701 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5702 5703 uint32 mallocFlags = isUser 5704 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5705 5706 // get and read lock the address space 5707 VMAddressSpace* addressSpace = NULL; 5708 if (isUser) { 5709 if (team == B_CURRENT_TEAM) 5710 addressSpace = VMAddressSpace::GetCurrent(); 5711 else 5712 addressSpace = VMAddressSpace::Get(team); 5713 } else 5714 addressSpace = VMAddressSpace::GetKernel(); 5715 if (addressSpace == NULL) 5716 return B_ERROR; 5717 5718 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5719 // We get a new address space reference here. The one we got above will 5720 // be freed by unlock_memory_etc(). 5721 5722 VMTranslationMap* map = addressSpace->TranslationMap(); 5723 status_t error = B_OK; 5724 5725 // iterate through all concerned areas 5726 addr_t nextAddress = lockBaseAddress; 5727 while (nextAddress != lockEndAddress) { 5728 // get the next area 5729 VMArea* area = addressSpace->LookupArea(nextAddress); 5730 if (area == NULL) { 5731 error = B_BAD_ADDRESS; 5732 break; 5733 } 5734 5735 addr_t areaStart = nextAddress; 5736 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5737 5738 // allocate the wired range (do that before locking the cache to avoid 5739 // deadlocks) 5740 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5741 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5742 if (range == NULL) { 5743 error = B_NO_MEMORY; 5744 break; 5745 } 5746 5747 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5748 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5749 5750 // mark the area range wired 5751 area->Wire(range); 5752 5753 // Depending on the area cache type and the wiring, we may not need to 5754 // look at the individual pages. 5755 if (area->cache_type == CACHE_TYPE_NULL 5756 || area->cache_type == CACHE_TYPE_DEVICE 5757 || area->wiring == B_FULL_LOCK 5758 || area->wiring == B_CONTIGUOUS) { 5759 nextAddress = areaEnd; 5760 continue; 5761 } 5762 5763 // Lock the area's cache chain and the translation map. Needed to look 5764 // up pages and play with their wired count. 5765 cacheChainLocker.LockAllSourceCaches(); 5766 map->Lock(); 5767 5768 // iterate through the pages and wire them 5769 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5770 phys_addr_t physicalAddress; 5771 uint32 flags; 5772 5773 vm_page* page; 5774 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5775 && (flags & requiredProtection) == requiredProtection 5776 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5777 != NULL) { 5778 // Already mapped with the correct permissions -- just increment 5779 // the page's wired count. 5780 increment_page_wired_count(page); 5781 } else { 5782 // Let vm_soft_fault() map the page for us, if possible. We need 5783 // to fully unlock to avoid deadlocks. Since we have already 5784 // wired the area itself, nothing disturbing will happen with it 5785 // in the meantime. 5786 map->Unlock(); 5787 cacheChainLocker.Unlock(); 5788 addressSpaceLocker.Unlock(); 5789 5790 error = vm_soft_fault(addressSpace, nextAddress, writable, 5791 false, isUser, &page); 5792 5793 addressSpaceLocker.Lock(); 5794 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5795 cacheChainLocker.LockAllSourceCaches(); 5796 map->Lock(); 5797 } 5798 5799 if (error != B_OK) 5800 break; 5801 } 5802 5803 map->Unlock(); 5804 5805 if (error == B_OK) { 5806 cacheChainLocker.Unlock(); 5807 } else { 5808 // An error occurred, so abort right here. If the current address 5809 // is the first in this area, unwire the area, since we won't get 5810 // to it when reverting what we've done so far. 5811 if (nextAddress == areaStart) { 5812 area->Unwire(range); 5813 cacheChainLocker.Unlock(); 5814 range->~VMAreaWiredRange(); 5815 free_etc(range, mallocFlags); 5816 } else 5817 cacheChainLocker.Unlock(); 5818 5819 break; 5820 } 5821 } 5822 5823 if (error != B_OK) { 5824 // An error occurred, so unwire all that we've already wired. Note that 5825 // even if not a single page was wired, unlock_memory_etc() is called 5826 // to put the address space reference. 5827 addressSpaceLocker.Unlock(); 5828 unlock_memory_etc(team, (void*)lockBaseAddress, 5829 nextAddress - lockBaseAddress, flags); 5830 } 5831 5832 return error; 5833 } 5834 5835 5836 status_t 5837 lock_memory(void* address, size_t numBytes, uint32 flags) 5838 { 5839 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5840 } 5841 5842 5843 /*! Unwires an address range previously wired with lock_memory_etc(). 5844 5845 Note that a call to this function must balance a previous lock_memory_etc() 5846 call with exactly the same parameters. 5847 */ 5848 status_t 5849 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5850 { 5851 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5852 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5853 5854 // compute the page protection that is required 5855 bool isUser = IS_USER_ADDRESS(address); 5856 bool writable = (flags & B_READ_DEVICE) == 0; 5857 uint32 requiredProtection = PAGE_PRESENT 5858 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5859 if (writable) 5860 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5861 5862 uint32 mallocFlags = isUser 5863 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5864 5865 // get and read lock the address space 5866 VMAddressSpace* addressSpace = NULL; 5867 if (isUser) { 5868 if (team == B_CURRENT_TEAM) 5869 addressSpace = VMAddressSpace::GetCurrent(); 5870 else 5871 addressSpace = VMAddressSpace::Get(team); 5872 } else 5873 addressSpace = VMAddressSpace::GetKernel(); 5874 if (addressSpace == NULL) 5875 return B_ERROR; 5876 5877 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5878 // Take over the address space reference. We don't unlock until we're 5879 // done. 5880 5881 VMTranslationMap* map = addressSpace->TranslationMap(); 5882 status_t error = B_OK; 5883 5884 // iterate through all concerned areas 5885 addr_t nextAddress = lockBaseAddress; 5886 while (nextAddress != lockEndAddress) { 5887 // get the next area 5888 VMArea* area = addressSpace->LookupArea(nextAddress); 5889 if (area == NULL) { 5890 error = B_BAD_ADDRESS; 5891 break; 5892 } 5893 5894 addr_t areaStart = nextAddress; 5895 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5896 5897 // Lock the area's top cache. This is a requirement for 5898 // VMArea::Unwire(). 5899 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5900 5901 // Depending on the area cache type and the wiring, we may not need to 5902 // look at the individual pages. 5903 if (area->cache_type == CACHE_TYPE_NULL 5904 || area->cache_type == CACHE_TYPE_DEVICE 5905 || area->wiring == B_FULL_LOCK 5906 || area->wiring == B_CONTIGUOUS) { 5907 // unwire the range (to avoid deadlocks we delete the range after 5908 // unlocking the cache) 5909 nextAddress = areaEnd; 5910 VMAreaWiredRange* range = area->Unwire(areaStart, 5911 areaEnd - areaStart, writable); 5912 cacheChainLocker.Unlock(); 5913 if (range != NULL) { 5914 range->~VMAreaWiredRange(); 5915 free_etc(range, mallocFlags); 5916 } 5917 continue; 5918 } 5919 5920 // Lock the area's cache chain and the translation map. Needed to look 5921 // up pages and play with their wired count. 5922 cacheChainLocker.LockAllSourceCaches(); 5923 map->Lock(); 5924 5925 // iterate through the pages and unwire them 5926 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5927 phys_addr_t physicalAddress; 5928 uint32 flags; 5929 5930 vm_page* page; 5931 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5932 && (flags & PAGE_PRESENT) != 0 5933 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5934 != NULL) { 5935 // Already mapped with the correct permissions -- just increment 5936 // the page's wired count. 5937 decrement_page_wired_count(page); 5938 } else { 5939 panic("unlock_memory_etc(): Failed to unwire page: address " 5940 "space %p, address: %#" B_PRIxADDR, addressSpace, 5941 nextAddress); 5942 error = B_BAD_VALUE; 5943 break; 5944 } 5945 } 5946 5947 map->Unlock(); 5948 5949 // All pages are unwired. Remove the area's wired range as well (to 5950 // avoid deadlocks we delete the range after unlocking the cache). 5951 VMAreaWiredRange* range = area->Unwire(areaStart, 5952 areaEnd - areaStart, writable); 5953 5954 cacheChainLocker.Unlock(); 5955 5956 if (range != NULL) { 5957 range->~VMAreaWiredRange(); 5958 free_etc(range, mallocFlags); 5959 } 5960 5961 if (error != B_OK) 5962 break; 5963 } 5964 5965 // get rid of the address space reference lock_memory_etc() acquired 5966 addressSpace->Put(); 5967 5968 return error; 5969 } 5970 5971 5972 status_t 5973 unlock_memory(void* address, size_t numBytes, uint32 flags) 5974 { 5975 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5976 } 5977 5978 5979 /*! Similar to get_memory_map(), but also allows to specify the address space 5980 for the memory in question and has a saner semantics. 5981 Returns \c B_OK when the complete range could be translated or 5982 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5983 case the actual number of entries is written to \c *_numEntries. Any other 5984 error case indicates complete failure; \c *_numEntries will be set to \c 0 5985 in this case. 5986 */ 5987 status_t 5988 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5989 physical_entry* table, uint32* _numEntries) 5990 { 5991 uint32 numEntries = *_numEntries; 5992 *_numEntries = 0; 5993 5994 VMAddressSpace* addressSpace; 5995 addr_t virtualAddress = (addr_t)address; 5996 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5997 phys_addr_t physicalAddress; 5998 status_t status = B_OK; 5999 int32 index = -1; 6000 addr_t offset = 0; 6001 bool interrupts = are_interrupts_enabled(); 6002 6003 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6004 "entries)\n", team, address, numBytes, numEntries)); 6005 6006 if (numEntries == 0 || numBytes == 0) 6007 return B_BAD_VALUE; 6008 6009 // in which address space is the address to be found? 6010 if (IS_USER_ADDRESS(virtualAddress)) { 6011 if (team == B_CURRENT_TEAM) 6012 addressSpace = VMAddressSpace::GetCurrent(); 6013 else 6014 addressSpace = VMAddressSpace::Get(team); 6015 } else 6016 addressSpace = VMAddressSpace::GetKernel(); 6017 6018 if (addressSpace == NULL) 6019 return B_ERROR; 6020 6021 VMTranslationMap* map = addressSpace->TranslationMap(); 6022 6023 if (interrupts) 6024 map->Lock(); 6025 6026 while (offset < numBytes) { 6027 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6028 uint32 flags; 6029 6030 if (interrupts) { 6031 status = map->Query((addr_t)address + offset, &physicalAddress, 6032 &flags); 6033 } else { 6034 status = map->QueryInterrupt((addr_t)address + offset, 6035 &physicalAddress, &flags); 6036 } 6037 if (status < B_OK) 6038 break; 6039 if ((flags & PAGE_PRESENT) == 0) { 6040 panic("get_memory_map() called on unmapped memory!"); 6041 return B_BAD_ADDRESS; 6042 } 6043 6044 if (index < 0 && pageOffset > 0) { 6045 physicalAddress += pageOffset; 6046 if (bytes > B_PAGE_SIZE - pageOffset) 6047 bytes = B_PAGE_SIZE - pageOffset; 6048 } 6049 6050 // need to switch to the next physical_entry? 6051 if (index < 0 || table[index].address 6052 != physicalAddress - table[index].size) { 6053 if ((uint32)++index + 1 > numEntries) { 6054 // table to small 6055 break; 6056 } 6057 table[index].address = physicalAddress; 6058 table[index].size = bytes; 6059 } else { 6060 // page does fit in current entry 6061 table[index].size += bytes; 6062 } 6063 6064 offset += bytes; 6065 } 6066 6067 if (interrupts) 6068 map->Unlock(); 6069 6070 if (status != B_OK) 6071 return status; 6072 6073 if ((uint32)index + 1 > numEntries) { 6074 *_numEntries = index; 6075 return B_BUFFER_OVERFLOW; 6076 } 6077 6078 *_numEntries = index + 1; 6079 return B_OK; 6080 } 6081 6082 6083 /*! According to the BeBook, this function should always succeed. 6084 This is no longer the case. 6085 */ 6086 extern "C" int32 6087 __get_memory_map_haiku(const void* address, size_t numBytes, 6088 physical_entry* table, int32 numEntries) 6089 { 6090 uint32 entriesRead = numEntries; 6091 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6092 table, &entriesRead); 6093 if (error != B_OK) 6094 return error; 6095 6096 // close the entry list 6097 6098 // if it's only one entry, we will silently accept the missing ending 6099 if (numEntries == 1) 6100 return B_OK; 6101 6102 if (entriesRead + 1 > (uint32)numEntries) 6103 return B_BUFFER_OVERFLOW; 6104 6105 table[entriesRead].address = 0; 6106 table[entriesRead].size = 0; 6107 6108 return B_OK; 6109 } 6110 6111 6112 area_id 6113 area_for(void* address) 6114 { 6115 return vm_area_for((addr_t)address, true); 6116 } 6117 6118 6119 area_id 6120 find_area(const char* name) 6121 { 6122 return VMAreaHash::Find(name); 6123 } 6124 6125 6126 status_t 6127 _get_area_info(area_id id, area_info* info, size_t size) 6128 { 6129 if (size != sizeof(area_info) || info == NULL) 6130 return B_BAD_VALUE; 6131 6132 AddressSpaceReadLocker locker; 6133 VMArea* area; 6134 status_t status = locker.SetFromArea(id, area); 6135 if (status != B_OK) 6136 return status; 6137 6138 fill_area_info(area, info, size); 6139 return B_OK; 6140 } 6141 6142 6143 status_t 6144 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6145 { 6146 addr_t nextBase = *(addr_t*)cookie; 6147 6148 // we're already through the list 6149 if (nextBase == (addr_t)-1) 6150 return B_ENTRY_NOT_FOUND; 6151 6152 if (team == B_CURRENT_TEAM) 6153 team = team_get_current_team_id(); 6154 6155 AddressSpaceReadLocker locker(team); 6156 if (!locker.IsLocked()) 6157 return B_BAD_TEAM_ID; 6158 6159 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6160 if (area == NULL) { 6161 nextBase = (addr_t)-1; 6162 return B_ENTRY_NOT_FOUND; 6163 } 6164 6165 fill_area_info(area, info, size); 6166 *cookie = (ssize_t)(area->Base() + 1); 6167 6168 return B_OK; 6169 } 6170 6171 6172 status_t 6173 set_area_protection(area_id area, uint32 newProtection) 6174 { 6175 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6176 newProtection, true); 6177 } 6178 6179 6180 status_t 6181 resize_area(area_id areaID, size_t newSize) 6182 { 6183 return vm_resize_area(areaID, newSize, true); 6184 } 6185 6186 6187 /*! Transfers the specified area to a new team. The caller must be the owner 6188 of the area. 6189 */ 6190 area_id 6191 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6192 bool kernel) 6193 { 6194 area_info info; 6195 status_t status = get_area_info(id, &info); 6196 if (status != B_OK) 6197 return status; 6198 6199 if (info.team != thread_get_current_thread()->team->id) 6200 return B_PERMISSION_DENIED; 6201 6202 // We need to mark the area cloneable so the following operations work. 6203 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6204 if (status != B_OK) 6205 return status; 6206 6207 area_id clonedArea = vm_clone_area(target, info.name, _address, 6208 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6209 if (clonedArea < 0) 6210 return clonedArea; 6211 6212 status = vm_delete_area(info.team, id, kernel); 6213 if (status != B_OK) { 6214 vm_delete_area(target, clonedArea, kernel); 6215 return status; 6216 } 6217 6218 // Now we can reset the protection to whatever it was before. 6219 set_area_protection(clonedArea, info.protection); 6220 6221 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6222 6223 return clonedArea; 6224 } 6225 6226 6227 extern "C" area_id 6228 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6229 size_t numBytes, uint32 addressSpec, uint32 protection, 6230 void** _virtualAddress) 6231 { 6232 if (!arch_vm_supports_protection(protection)) 6233 return B_NOT_SUPPORTED; 6234 6235 fix_protection(&protection); 6236 6237 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6238 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6239 false); 6240 } 6241 6242 6243 area_id 6244 clone_area(const char* name, void** _address, uint32 addressSpec, 6245 uint32 protection, area_id source) 6246 { 6247 if ((protection & B_KERNEL_PROTECTION) == 0) 6248 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6249 6250 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6251 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6252 } 6253 6254 6255 area_id 6256 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6257 uint32 protection, uint32 flags, uint32 guardSize, 6258 const virtual_address_restrictions* virtualAddressRestrictions, 6259 const physical_address_restrictions* physicalAddressRestrictions, 6260 void** _address) 6261 { 6262 fix_protection(&protection); 6263 6264 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6265 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6266 true, _address); 6267 } 6268 6269 6270 extern "C" area_id 6271 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6272 size_t size, uint32 lock, uint32 protection) 6273 { 6274 fix_protection(&protection); 6275 6276 virtual_address_restrictions virtualRestrictions = {}; 6277 virtualRestrictions.address = *_address; 6278 virtualRestrictions.address_specification = addressSpec; 6279 physical_address_restrictions physicalRestrictions = {}; 6280 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6281 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6282 true, _address); 6283 } 6284 6285 6286 status_t 6287 delete_area(area_id area) 6288 { 6289 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6290 } 6291 6292 6293 // #pragma mark - Userland syscalls 6294 6295 6296 status_t 6297 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6298 addr_t size) 6299 { 6300 // filter out some unavailable values (for userland) 6301 switch (addressSpec) { 6302 case B_ANY_KERNEL_ADDRESS: 6303 case B_ANY_KERNEL_BLOCK_ADDRESS: 6304 return B_BAD_VALUE; 6305 } 6306 6307 addr_t address; 6308 6309 if (!IS_USER_ADDRESS(userAddress) 6310 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6311 return B_BAD_ADDRESS; 6312 6313 status_t status = vm_reserve_address_range( 6314 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6315 RESERVED_AVOID_BASE); 6316 if (status != B_OK) 6317 return status; 6318 6319 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6320 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6321 (void*)address, size); 6322 return B_BAD_ADDRESS; 6323 } 6324 6325 return B_OK; 6326 } 6327 6328 6329 status_t 6330 _user_unreserve_address_range(addr_t address, addr_t size) 6331 { 6332 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6333 (void*)address, size); 6334 } 6335 6336 6337 area_id 6338 _user_area_for(void* address) 6339 { 6340 return vm_area_for((addr_t)address, false); 6341 } 6342 6343 6344 area_id 6345 _user_find_area(const char* userName) 6346 { 6347 char name[B_OS_NAME_LENGTH]; 6348 6349 if (!IS_USER_ADDRESS(userName) 6350 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6351 return B_BAD_ADDRESS; 6352 6353 return find_area(name); 6354 } 6355 6356 6357 status_t 6358 _user_get_area_info(area_id area, area_info* userInfo) 6359 { 6360 if (!IS_USER_ADDRESS(userInfo)) 6361 return B_BAD_ADDRESS; 6362 6363 area_info info; 6364 status_t status = get_area_info(area, &info); 6365 if (status < B_OK) 6366 return status; 6367 6368 // TODO: do we want to prevent userland from seeing kernel protections? 6369 //info.protection &= B_USER_PROTECTION; 6370 6371 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6372 return B_BAD_ADDRESS; 6373 6374 return status; 6375 } 6376 6377 6378 status_t 6379 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6380 { 6381 ssize_t cookie; 6382 6383 if (!IS_USER_ADDRESS(userCookie) 6384 || !IS_USER_ADDRESS(userInfo) 6385 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6386 return B_BAD_ADDRESS; 6387 6388 area_info info; 6389 status_t status = _get_next_area_info(team, &cookie, &info, 6390 sizeof(area_info)); 6391 if (status != B_OK) 6392 return status; 6393 6394 //info.protection &= B_USER_PROTECTION; 6395 6396 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6397 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6398 return B_BAD_ADDRESS; 6399 6400 return status; 6401 } 6402 6403 6404 status_t 6405 _user_set_area_protection(area_id area, uint32 newProtection) 6406 { 6407 if ((newProtection & ~B_USER_PROTECTION) != 0) 6408 return B_BAD_VALUE; 6409 6410 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6411 newProtection, false); 6412 } 6413 6414 6415 status_t 6416 _user_resize_area(area_id area, size_t newSize) 6417 { 6418 // TODO: Since we restrict deleting of areas to those owned by the team, 6419 // we should also do that for resizing (check other functions, too). 6420 return vm_resize_area(area, newSize, false); 6421 } 6422 6423 6424 area_id 6425 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6426 team_id target) 6427 { 6428 // filter out some unavailable values (for userland) 6429 switch (addressSpec) { 6430 case B_ANY_KERNEL_ADDRESS: 6431 case B_ANY_KERNEL_BLOCK_ADDRESS: 6432 return B_BAD_VALUE; 6433 } 6434 6435 void* address; 6436 if (!IS_USER_ADDRESS(userAddress) 6437 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6438 return B_BAD_ADDRESS; 6439 6440 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6441 if (newArea < B_OK) 6442 return newArea; 6443 6444 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6445 return B_BAD_ADDRESS; 6446 6447 return newArea; 6448 } 6449 6450 6451 area_id 6452 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6453 uint32 protection, area_id sourceArea) 6454 { 6455 char name[B_OS_NAME_LENGTH]; 6456 void* address; 6457 6458 // filter out some unavailable values (for userland) 6459 switch (addressSpec) { 6460 case B_ANY_KERNEL_ADDRESS: 6461 case B_ANY_KERNEL_BLOCK_ADDRESS: 6462 return B_BAD_VALUE; 6463 } 6464 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6465 return B_BAD_VALUE; 6466 6467 if (!IS_USER_ADDRESS(userName) 6468 || !IS_USER_ADDRESS(userAddress) 6469 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6470 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6471 return B_BAD_ADDRESS; 6472 6473 fix_protection(&protection); 6474 6475 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6476 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6477 false); 6478 if (clonedArea < B_OK) 6479 return clonedArea; 6480 6481 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6482 delete_area(clonedArea); 6483 return B_BAD_ADDRESS; 6484 } 6485 6486 return clonedArea; 6487 } 6488 6489 6490 area_id 6491 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6492 size_t size, uint32 lock, uint32 protection) 6493 { 6494 char name[B_OS_NAME_LENGTH]; 6495 void* address; 6496 6497 // filter out some unavailable values (for userland) 6498 switch (addressSpec) { 6499 case B_ANY_KERNEL_ADDRESS: 6500 case B_ANY_KERNEL_BLOCK_ADDRESS: 6501 return B_BAD_VALUE; 6502 } 6503 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6504 return B_BAD_VALUE; 6505 6506 if (!IS_USER_ADDRESS(userName) 6507 || !IS_USER_ADDRESS(userAddress) 6508 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6509 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6510 return B_BAD_ADDRESS; 6511 6512 if (addressSpec == B_EXACT_ADDRESS 6513 && IS_KERNEL_ADDRESS(address)) 6514 return B_BAD_VALUE; 6515 6516 if (addressSpec == B_ANY_ADDRESS) 6517 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6518 if (addressSpec == B_BASE_ADDRESS) 6519 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6520 6521 fix_protection(&protection); 6522 6523 virtual_address_restrictions virtualRestrictions = {}; 6524 virtualRestrictions.address = address; 6525 virtualRestrictions.address_specification = addressSpec; 6526 physical_address_restrictions physicalRestrictions = {}; 6527 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6528 size, lock, protection, 0, 0, &virtualRestrictions, 6529 &physicalRestrictions, false, &address); 6530 6531 if (area >= B_OK 6532 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6533 delete_area(area); 6534 return B_BAD_ADDRESS; 6535 } 6536 6537 return area; 6538 } 6539 6540 6541 status_t 6542 _user_delete_area(area_id area) 6543 { 6544 // Unlike the BeOS implementation, you can now only delete areas 6545 // that you have created yourself from userland. 6546 // The documentation to delete_area() explicitly states that this 6547 // will be restricted in the future, and so it will. 6548 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6549 } 6550 6551 6552 // TODO: create a BeOS style call for this! 6553 6554 area_id 6555 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6556 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6557 int fd, off_t offset) 6558 { 6559 char name[B_OS_NAME_LENGTH]; 6560 void* address; 6561 area_id area; 6562 6563 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6564 return B_BAD_VALUE; 6565 6566 fix_protection(&protection); 6567 6568 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6569 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6570 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6571 return B_BAD_ADDRESS; 6572 6573 if (addressSpec == B_EXACT_ADDRESS) { 6574 if ((addr_t)address + size < (addr_t)address 6575 || (addr_t)address % B_PAGE_SIZE != 0) { 6576 return B_BAD_VALUE; 6577 } 6578 if (!IS_USER_ADDRESS(address) 6579 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6580 return B_BAD_ADDRESS; 6581 } 6582 } 6583 6584 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6585 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6586 false); 6587 if (area < B_OK) 6588 return area; 6589 6590 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6591 return B_BAD_ADDRESS; 6592 6593 return area; 6594 } 6595 6596 6597 status_t 6598 _user_unmap_memory(void* _address, size_t size) 6599 { 6600 addr_t address = (addr_t)_address; 6601 6602 // check params 6603 if (size == 0 || (addr_t)address + size < (addr_t)address 6604 || (addr_t)address % B_PAGE_SIZE != 0) { 6605 return B_BAD_VALUE; 6606 } 6607 6608 if (!IS_USER_ADDRESS(address) 6609 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6610 return B_BAD_ADDRESS; 6611 } 6612 6613 // Write lock the address space and ensure the address range is not wired. 6614 AddressSpaceWriteLocker locker; 6615 do { 6616 status_t status = locker.SetTo(team_get_current_team_id()); 6617 if (status != B_OK) 6618 return status; 6619 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6620 size, &locker)); 6621 6622 // unmap 6623 return unmap_address_range(locker.AddressSpace(), address, size, false); 6624 } 6625 6626 6627 status_t 6628 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6629 { 6630 // check address range 6631 addr_t address = (addr_t)_address; 6632 size = PAGE_ALIGN(size); 6633 6634 if ((address % B_PAGE_SIZE) != 0) 6635 return B_BAD_VALUE; 6636 if (!validate_user_memory_range(_address, size)) { 6637 // weird error code required by POSIX 6638 return ENOMEM; 6639 } 6640 6641 // extend and check protection 6642 if ((protection & ~B_USER_PROTECTION) != 0) 6643 return B_BAD_VALUE; 6644 6645 fix_protection(&protection); 6646 6647 // We need to write lock the address space, since we're going to play with 6648 // the areas. Also make sure that none of the areas is wired and that we're 6649 // actually allowed to change the protection. 6650 AddressSpaceWriteLocker locker; 6651 6652 bool restart; 6653 do { 6654 restart = false; 6655 6656 status_t status = locker.SetTo(team_get_current_team_id()); 6657 if (status != B_OK) 6658 return status; 6659 6660 // First round: Check whether the whole range is covered by areas and we 6661 // are allowed to modify them. 6662 addr_t currentAddress = address; 6663 size_t sizeLeft = size; 6664 while (sizeLeft > 0) { 6665 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6666 if (area == NULL) 6667 return B_NO_MEMORY; 6668 6669 if ((area->protection & B_KERNEL_AREA) != 0) 6670 return B_NOT_ALLOWED; 6671 if (area->protection_max != 0 6672 && (protection & area->protection_max) != protection) { 6673 return B_NOT_ALLOWED; 6674 } 6675 6676 addr_t offset = currentAddress - area->Base(); 6677 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6678 6679 AreaCacheLocker cacheLocker(area); 6680 6681 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6682 &locker, &cacheLocker)) { 6683 restart = true; 6684 break; 6685 } 6686 6687 cacheLocker.Unlock(); 6688 6689 currentAddress += rangeSize; 6690 sizeLeft -= rangeSize; 6691 } 6692 } while (restart); 6693 6694 // Second round: If the protections differ from that of the area, create a 6695 // page protection array and re-map mapped pages. 6696 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6697 addr_t currentAddress = address; 6698 size_t sizeLeft = size; 6699 while (sizeLeft > 0) { 6700 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6701 if (area == NULL) 6702 return B_NO_MEMORY; 6703 6704 addr_t offset = currentAddress - area->Base(); 6705 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6706 6707 currentAddress += rangeSize; 6708 sizeLeft -= rangeSize; 6709 6710 if (area->page_protections == NULL) { 6711 if (area->protection == protection) 6712 continue; 6713 6714 status_t status = allocate_area_page_protections(area); 6715 if (status != B_OK) 6716 return status; 6717 } 6718 6719 // We need to lock the complete cache chain, since we potentially unmap 6720 // pages of lower caches. 6721 VMCache* topCache = vm_area_get_locked_cache(area); 6722 VMCacheChainLocker cacheChainLocker(topCache); 6723 cacheChainLocker.LockAllSourceCaches(); 6724 6725 for (addr_t pageAddress = area->Base() + offset; 6726 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6727 map->Lock(); 6728 6729 set_area_page_protection(area, pageAddress, protection); 6730 6731 phys_addr_t physicalAddress; 6732 uint32 flags; 6733 6734 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6735 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6736 map->Unlock(); 6737 continue; 6738 } 6739 6740 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6741 if (page == NULL) { 6742 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6743 "\n", area, physicalAddress); 6744 map->Unlock(); 6745 return B_ERROR; 6746 } 6747 6748 // If the page is not in the topmost cache and write access is 6749 // requested, we have to unmap it. Otherwise we can re-map it with 6750 // the new protection. 6751 bool unmapPage = page->Cache() != topCache 6752 && (protection & B_WRITE_AREA) != 0; 6753 6754 if (!unmapPage) 6755 map->ProtectPage(area, pageAddress, protection); 6756 6757 map->Unlock(); 6758 6759 if (unmapPage) { 6760 DEBUG_PAGE_ACCESS_START(page); 6761 unmap_page(area, pageAddress); 6762 DEBUG_PAGE_ACCESS_END(page); 6763 } 6764 } 6765 } 6766 6767 return B_OK; 6768 } 6769 6770 6771 status_t 6772 _user_sync_memory(void* _address, size_t size, uint32 flags) 6773 { 6774 addr_t address = (addr_t)_address; 6775 size = PAGE_ALIGN(size); 6776 6777 // check params 6778 if ((address % B_PAGE_SIZE) != 0) 6779 return B_BAD_VALUE; 6780 if (!validate_user_memory_range(_address, size)) { 6781 // weird error code required by POSIX 6782 return ENOMEM; 6783 } 6784 6785 bool writeSync = (flags & MS_SYNC) != 0; 6786 bool writeAsync = (flags & MS_ASYNC) != 0; 6787 if (writeSync && writeAsync) 6788 return B_BAD_VALUE; 6789 6790 if (size == 0 || (!writeSync && !writeAsync)) 6791 return B_OK; 6792 6793 // iterate through the range and sync all concerned areas 6794 while (size > 0) { 6795 // read lock the address space 6796 AddressSpaceReadLocker locker; 6797 status_t error = locker.SetTo(team_get_current_team_id()); 6798 if (error != B_OK) 6799 return error; 6800 6801 // get the first area 6802 VMArea* area = locker.AddressSpace()->LookupArea(address); 6803 if (area == NULL) 6804 return B_NO_MEMORY; 6805 6806 uint32 offset = address - area->Base(); 6807 size_t rangeSize = min_c(area->Size() - offset, size); 6808 offset += area->cache_offset; 6809 6810 // lock the cache 6811 AreaCacheLocker cacheLocker(area); 6812 if (!cacheLocker) 6813 return B_BAD_VALUE; 6814 VMCache* cache = area->cache; 6815 6816 locker.Unlock(); 6817 6818 uint32 firstPage = offset >> PAGE_SHIFT; 6819 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6820 6821 // write the pages 6822 if (cache->type == CACHE_TYPE_VNODE) { 6823 if (writeSync) { 6824 // synchronous 6825 error = vm_page_write_modified_page_range(cache, firstPage, 6826 endPage); 6827 if (error != B_OK) 6828 return error; 6829 } else { 6830 // asynchronous 6831 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6832 // TODO: This is probably not quite what is supposed to happen. 6833 // Especially when a lot has to be written, it might take ages 6834 // until it really hits the disk. 6835 } 6836 } 6837 6838 address += rangeSize; 6839 size -= rangeSize; 6840 } 6841 6842 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6843 // synchronize multiple mappings of the same file. In our VM they never get 6844 // out of sync, though, so we don't have to do anything. 6845 6846 return B_OK; 6847 } 6848 6849 6850 status_t 6851 _user_memory_advice(void* _address, size_t size, uint32 advice) 6852 { 6853 addr_t address = (addr_t)_address; 6854 if ((address % B_PAGE_SIZE) != 0) 6855 return B_BAD_VALUE; 6856 6857 size = PAGE_ALIGN(size); 6858 if (!validate_user_memory_range(_address, size)) { 6859 // weird error code required by POSIX 6860 return B_NO_MEMORY; 6861 } 6862 6863 switch (advice) { 6864 case MADV_NORMAL: 6865 case MADV_SEQUENTIAL: 6866 case MADV_RANDOM: 6867 case MADV_WILLNEED: 6868 case MADV_DONTNEED: 6869 // TODO: Implement! 6870 break; 6871 6872 case MADV_FREE: 6873 { 6874 AddressSpaceWriteLocker locker; 6875 do { 6876 status_t status = locker.SetTo(team_get_current_team_id()); 6877 if (status != B_OK) 6878 return status; 6879 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6880 address, size, &locker)); 6881 6882 discard_address_range(locker.AddressSpace(), address, size, false); 6883 break; 6884 } 6885 6886 default: 6887 return B_BAD_VALUE; 6888 } 6889 6890 return B_OK; 6891 } 6892 6893 6894 status_t 6895 _user_get_memory_properties(team_id teamID, const void* address, 6896 uint32* _protected, uint32* _lock) 6897 { 6898 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6899 return B_BAD_ADDRESS; 6900 6901 AddressSpaceReadLocker locker; 6902 status_t error = locker.SetTo(teamID); 6903 if (error != B_OK) 6904 return error; 6905 6906 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6907 if (area == NULL) 6908 return B_NO_MEMORY; 6909 6910 6911 uint32 protection = area->protection; 6912 if (area->page_protections != NULL) 6913 protection = get_area_page_protection(area, (addr_t)address); 6914 6915 uint32 wiring = area->wiring; 6916 6917 locker.Unlock(); 6918 6919 error = user_memcpy(_protected, &protection, sizeof(protection)); 6920 if (error != B_OK) 6921 return error; 6922 6923 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6924 6925 return error; 6926 } 6927 6928 6929 // An ordered list of non-overlapping ranges to track mlock/munlock locking. 6930 // It is allowed to call mlock/munlock in unbalanced ways (lock a range 6931 // multiple times, unlock a part of it, lock several consecutive ranges and 6932 // unlock them in one go, etc). However the low level lock_memory and 6933 // unlock_memory calls require the locks/unlocks to be balanced (you lock a 6934 // fixed range, and then unlock exactly the same range). This list allows to 6935 // keep track of what was locked exactly so we can unlock the correct things. 6936 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> { 6937 addr_t start; 6938 addr_t end; 6939 6940 status_t LockMemory() 6941 { 6942 return lock_memory((void*)start, end - start, 0); 6943 } 6944 6945 status_t UnlockMemory() 6946 { 6947 return unlock_memory((void*)start, end - start, 0); 6948 } 6949 6950 status_t Move(addr_t start, addr_t end) 6951 { 6952 status_t result = lock_memory((void*)start, end - start, 0); 6953 if (result != B_OK) 6954 return result; 6955 6956 result = UnlockMemory(); 6957 6958 if (result != B_OK) { 6959 // What can we do if the unlock fails? 6960 panic("Failed to unlock memory: %s", strerror(result)); 6961 return result; 6962 } 6963 6964 this->start = start; 6965 this->end = end; 6966 6967 return B_OK; 6968 } 6969 }; 6970 6971 6972 status_t 6973 _user_mlock(const void* address, size_t size) { 6974 // Maybe there's nothing to do, in which case, do nothing 6975 if (size == 0) 6976 return B_OK; 6977 6978 // Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to 6979 // reject the call otherwise) 6980 if ((addr_t)address % B_PAGE_SIZE != 0) 6981 return EINVAL; 6982 6983 size = ROUNDUP(size, B_PAGE_SIZE); 6984 6985 addr_t endAddress = (addr_t)address + size; 6986 6987 // Pre-allocate a linked list element we may need (it's simpler to do it 6988 // now than run out of memory in the midle of changing things) 6989 LockedPages* newRange = new(std::nothrow) LockedPages(); 6990 if (newRange == NULL) 6991 return ENOMEM; 6992 6993 // Get and lock the team 6994 Team* team = thread_get_current_thread()->team; 6995 TeamLocker teamLocker(team); 6996 teamLocker.Lock(); 6997 6998 status_t error = B_OK; 6999 LockedPagesList* lockedPages = &team->locked_pages_list; 7000 7001 // Locate the first locked range possibly overlapping ours 7002 LockedPages* currentRange = lockedPages->Head(); 7003 while (currentRange != NULL && currentRange->end <= (addr_t)address) 7004 currentRange = lockedPages->GetNext(currentRange); 7005 7006 if (currentRange == NULL || currentRange->start >= endAddress) { 7007 // No existing range is overlapping with ours. We can just lock our 7008 // range and stop here. 7009 newRange->start = (addr_t)address; 7010 newRange->end = endAddress; 7011 error = newRange->LockMemory(); 7012 if (error != B_OK) { 7013 delete newRange; 7014 return error; 7015 } 7016 lockedPages->InsertBefore(currentRange, newRange); 7017 return B_OK; 7018 } 7019 7020 // We get here when there is at least one existing overlapping range. 7021 7022 if (currentRange->start <= (addr_t)address) { 7023 if (currentRange->end >= endAddress) { 7024 // An existing range is already fully covering the pages we need to 7025 // lock. Nothing to do then. 7026 delete newRange; 7027 return B_OK; 7028 } else { 7029 // An existing range covers the start of the area we want to lock. 7030 // Advance our start address to avoid it. 7031 address = (void*)currentRange->end; 7032 7033 // Move on to the next range for the next step 7034 currentRange = lockedPages->GetNext(currentRange); 7035 } 7036 } 7037 7038 // First, lock the new range 7039 newRange->start = (addr_t)address; 7040 newRange->end = endAddress; 7041 error = newRange->LockMemory(); 7042 if (error != B_OK) { 7043 delete newRange; 7044 return error; 7045 } 7046 7047 // Unlock all ranges fully overlapping with the area we need to lock 7048 while (currentRange != NULL && currentRange->end < endAddress) { 7049 // The existing range is fully contained inside the new one we're 7050 // trying to lock. Delete/unlock it, and replace it with a new one 7051 // (this limits fragmentation of the range list, and is simpler to 7052 // manage) 7053 error = currentRange->UnlockMemory(); 7054 if (error != B_OK) { 7055 panic("Failed to unlock a memory range: %s", strerror(error)); 7056 newRange->UnlockMemory(); 7057 delete newRange; 7058 return error; 7059 } 7060 LockedPages* temp = currentRange; 7061 currentRange = lockedPages->GetNext(currentRange); 7062 lockedPages->Remove(temp); 7063 delete temp; 7064 } 7065 7066 if (currentRange != NULL) { 7067 // One last range may cover the end of the area we're trying to lock 7068 7069 if (currentRange->start == (addr_t)address) { 7070 // In case two overlapping ranges (one at the start and the other 7071 // at the end) already cover the area we're after, there's nothing 7072 // more to do. So we destroy our new extra allocation 7073 error = newRange->UnlockMemory(); 7074 delete newRange; 7075 return error; 7076 } 7077 7078 if (currentRange->start < endAddress) { 7079 // Make sure the last range is not overlapping, by moving its start 7080 error = currentRange->Move(endAddress, currentRange->end); 7081 if (error != B_OK) { 7082 panic("Failed to move a memory range: %s", strerror(error)); 7083 newRange->UnlockMemory(); 7084 delete newRange; 7085 return error; 7086 } 7087 } 7088 } 7089 7090 // Finally, store the new range in the locked list 7091 lockedPages->InsertBefore(currentRange, newRange); 7092 return B_OK; 7093 } 7094 7095 7096 status_t 7097 _user_munlock(const void* address, size_t size) { 7098 // Maybe there's nothing to do, in which case, do nothing 7099 if (size == 0) 7100 return B_OK; 7101 7102 // Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to 7103 // reject the call otherwise) 7104 if ((addr_t)address % B_PAGE_SIZE != 0) 7105 return EINVAL; 7106 7107 // Round size up to the next page 7108 size = ROUNDUP(size, B_PAGE_SIZE); 7109 7110 addr_t endAddress = (addr_t)address + size; 7111 7112 // Get and lock the team 7113 Team* team = thread_get_current_thread()->team; 7114 TeamLocker teamLocker(team); 7115 teamLocker.Lock(); 7116 LockedPagesList* lockedPages = &team->locked_pages_list; 7117 7118 status_t error = B_OK; 7119 7120 // Locate the first locked range possibly overlapping ours 7121 LockedPages* currentRange = lockedPages->Head(); 7122 while (currentRange != NULL && currentRange->end <= (addr_t)address) 7123 currentRange = lockedPages->GetNext(currentRange); 7124 7125 if (currentRange == NULL || currentRange->start >= endAddress) { 7126 // No range is intersecting, nothing to unlock 7127 return B_OK; 7128 } 7129 7130 if (currentRange->start < (addr_t)address) { 7131 if (currentRange->end > endAddress) { 7132 // There is a range fully covering the area we want to unlock, 7133 // and it extends on both sides. We need to split it in two 7134 LockedPages* newRange = new(std::nothrow) LockedPages(); 7135 if (newRange == NULL) 7136 return ENOMEM; 7137 7138 newRange->start = endAddress; 7139 newRange->end = currentRange->end; 7140 7141 error = newRange->LockMemory(); 7142 if (error != B_OK) { 7143 delete newRange; 7144 return error; 7145 } 7146 7147 error = currentRange->Move(currentRange->start, (addr_t)address); 7148 if (error != B_OK) { 7149 delete newRange; 7150 return error; 7151 } 7152 7153 lockedPages->InsertAfter(currentRange, newRange); 7154 return B_OK; 7155 } else { 7156 // There is a range that overlaps and extends before the one we 7157 // want to unlock, we need to shrink it 7158 error = currentRange->Move(currentRange->start, (addr_t)address); 7159 if (error != B_OK) 7160 return error; 7161 } 7162 } 7163 7164 while (currentRange != NULL && currentRange->end <= endAddress) { 7165 // Unlock all fully overlapping ranges 7166 error = currentRange->UnlockMemory(); 7167 if (error != B_OK) 7168 return error; 7169 LockedPages* temp = currentRange; 7170 currentRange = lockedPages->GetNext(currentRange); 7171 lockedPages->Remove(temp); 7172 delete temp; 7173 } 7174 7175 // Finally split the last partially overlapping range if any 7176 if (currentRange != NULL && currentRange->start < endAddress) { 7177 error = currentRange->Move(endAddress, currentRange->end); 7178 if (error != B_OK) 7179 return error; 7180 } 7181 7182 return B_OK; 7183 } 7184 7185 7186 // #pragma mark -- compatibility 7187 7188 7189 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7190 7191 7192 struct physical_entry_beos { 7193 uint32 address; 7194 uint32 size; 7195 }; 7196 7197 7198 /*! The physical_entry structure has changed. We need to translate it to the 7199 old one. 7200 */ 7201 extern "C" int32 7202 __get_memory_map_beos(const void* _address, size_t numBytes, 7203 physical_entry_beos* table, int32 numEntries) 7204 { 7205 if (numEntries <= 0) 7206 return B_BAD_VALUE; 7207 7208 const uint8* address = (const uint8*)_address; 7209 7210 int32 count = 0; 7211 while (numBytes > 0 && count < numEntries) { 7212 physical_entry entry; 7213 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7214 if (result < 0) { 7215 if (result != B_BUFFER_OVERFLOW) 7216 return result; 7217 } 7218 7219 if (entry.address >= (phys_addr_t)1 << 32) { 7220 panic("get_memory_map(): Address is greater 4 GB!"); 7221 return B_ERROR; 7222 } 7223 7224 table[count].address = entry.address; 7225 table[count++].size = entry.size; 7226 7227 address += entry.size; 7228 numBytes -= entry.size; 7229 } 7230 7231 // null-terminate the table, if possible 7232 if (count < numEntries) { 7233 table[count].address = 0; 7234 table[count].size = 0; 7235 } 7236 7237 return B_OK; 7238 } 7239 7240 7241 /*! The type of the \a physicalAddress parameter has changed from void* to 7242 phys_addr_t. 7243 */ 7244 extern "C" area_id 7245 __map_physical_memory_beos(const char* name, void* physicalAddress, 7246 size_t numBytes, uint32 addressSpec, uint32 protection, 7247 void** _virtualAddress) 7248 { 7249 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7250 addressSpec, protection, _virtualAddress); 7251 } 7252 7253 7254 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7255 we meddle with the \a lock parameter to force 32 bit. 7256 */ 7257 extern "C" area_id 7258 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7259 size_t size, uint32 lock, uint32 protection) 7260 { 7261 switch (lock) { 7262 case B_NO_LOCK: 7263 break; 7264 case B_FULL_LOCK: 7265 case B_LAZY_LOCK: 7266 lock = B_32_BIT_FULL_LOCK; 7267 break; 7268 case B_CONTIGUOUS: 7269 lock = B_32_BIT_CONTIGUOUS; 7270 break; 7271 } 7272 7273 return __create_area_haiku(name, _address, addressSpec, size, lock, 7274 protection); 7275 } 7276 7277 7278 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7279 "BASE"); 7280 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7281 "map_physical_memory@", "BASE"); 7282 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7283 "BASE"); 7284 7285 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7286 "get_memory_map@@", "1_ALPHA3"); 7287 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7288 "map_physical_memory@@", "1_ALPHA3"); 7289 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7290 "1_ALPHA3"); 7291 7292 7293 #else 7294 7295 7296 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7297 "get_memory_map@@", "BASE"); 7298 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7299 "map_physical_memory@@", "BASE"); 7300 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7301 "BASE"); 7302 7303 7304 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7305