1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/ThreadAutoLock.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_priv.h> 53 #include <vm/VMAddressSpace.h> 54 #include <vm/VMArea.h> 55 #include <vm/VMCache.h> 56 57 #include "VMAddressSpaceLocking.h" 58 #include "VMAnonymousCache.h" 59 #include "VMAnonymousNoSwapCache.h" 60 #include "IORequest.h" 61 62 63 //#define TRACE_VM 64 //#define TRACE_FAULTS 65 #ifdef TRACE_VM 66 # define TRACE(x) dprintf x 67 #else 68 # define TRACE(x) ; 69 #endif 70 #ifdef TRACE_FAULTS 71 # define FTRACE(x) dprintf x 72 #else 73 # define FTRACE(x) ; 74 #endif 75 76 77 namespace { 78 79 class AreaCacheLocking { 80 public: 81 inline bool Lock(VMCache* lockable) 82 { 83 return false; 84 } 85 86 inline void Unlock(VMCache* lockable) 87 { 88 vm_area_put_locked_cache(lockable); 89 } 90 }; 91 92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 93 public: 94 inline AreaCacheLocker(VMCache* cache = NULL) 95 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 96 { 97 } 98 99 inline AreaCacheLocker(VMArea* area) 100 : AutoLocker<VMCache, AreaCacheLocking>() 101 { 102 SetTo(area); 103 } 104 105 inline void SetTo(VMCache* cache, bool alreadyLocked) 106 { 107 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 108 } 109 110 inline void SetTo(VMArea* area) 111 { 112 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 113 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 114 } 115 }; 116 117 118 class VMCacheChainLocker { 119 public: 120 VMCacheChainLocker() 121 : 122 fTopCache(NULL), 123 fBottomCache(NULL) 124 { 125 } 126 127 VMCacheChainLocker(VMCache* topCache) 128 : 129 fTopCache(topCache), 130 fBottomCache(topCache) 131 { 132 } 133 134 ~VMCacheChainLocker() 135 { 136 Unlock(); 137 } 138 139 void SetTo(VMCache* topCache) 140 { 141 fTopCache = topCache; 142 fBottomCache = topCache; 143 144 if (topCache != NULL) 145 topCache->SetUserData(NULL); 146 } 147 148 VMCache* LockSourceCache() 149 { 150 if (fBottomCache == NULL || fBottomCache->source == NULL) 151 return NULL; 152 153 VMCache* previousCache = fBottomCache; 154 155 fBottomCache = fBottomCache->source; 156 fBottomCache->Lock(); 157 fBottomCache->AcquireRefLocked(); 158 fBottomCache->SetUserData(previousCache); 159 160 return fBottomCache; 161 } 162 163 void LockAllSourceCaches() 164 { 165 while (LockSourceCache() != NULL) { 166 } 167 } 168 169 void Unlock(VMCache* exceptCache = NULL) 170 { 171 if (fTopCache == NULL) 172 return; 173 174 // Unlock caches in source -> consumer direction. This is important to 175 // avoid double-locking and a reversal of locking order in case a cache 176 // is eligable for merging. 177 VMCache* cache = fBottomCache; 178 while (cache != NULL) { 179 VMCache* nextCache = (VMCache*)cache->UserData(); 180 if (cache != exceptCache) 181 cache->ReleaseRefAndUnlock(cache != fTopCache); 182 183 if (cache == fTopCache) 184 break; 185 186 cache = nextCache; 187 } 188 189 fTopCache = NULL; 190 fBottomCache = NULL; 191 } 192 193 void UnlockKeepRefs(bool keepTopCacheLocked) 194 { 195 if (fTopCache == NULL) 196 return; 197 198 VMCache* nextCache = fBottomCache; 199 VMCache* cache = NULL; 200 201 while (keepTopCacheLocked 202 ? nextCache != fTopCache : cache != fTopCache) { 203 cache = nextCache; 204 nextCache = (VMCache*)cache->UserData(); 205 cache->Unlock(cache != fTopCache); 206 } 207 } 208 209 void RelockCaches(bool topCacheLocked) 210 { 211 if (fTopCache == NULL) 212 return; 213 214 VMCache* nextCache = fTopCache; 215 VMCache* cache = NULL; 216 if (topCacheLocked) { 217 cache = nextCache; 218 nextCache = cache->source; 219 } 220 221 while (cache != fBottomCache && nextCache != NULL) { 222 VMCache* consumer = cache; 223 cache = nextCache; 224 nextCache = cache->source; 225 cache->Lock(); 226 cache->SetUserData(consumer); 227 } 228 } 229 230 private: 231 VMCache* fTopCache; 232 VMCache* fBottomCache; 233 }; 234 235 } // namespace 236 237 238 // The memory reserve an allocation of the certain priority must not touch. 239 static const size_t kMemoryReserveForPriority[] = { 240 VM_MEMORY_RESERVE_USER, // user 241 VM_MEMORY_RESERVE_SYSTEM, // system 242 0 // VIP 243 }; 244 245 246 ObjectCache* gPageMappingsObjectCache; 247 248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 249 250 static off_t sAvailableMemory; 251 static off_t sNeededMemory; 252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 253 static uint32 sPageFaults; 254 255 static VMPhysicalPageMapper* sPhysicalPageMapper; 256 257 #if DEBUG_CACHE_LIST 258 259 struct cache_info { 260 VMCache* cache; 261 addr_t page_count; 262 addr_t committed; 263 }; 264 265 static const int kCacheInfoTableCount = 100 * 1024; 266 static cache_info* sCacheInfoTable; 267 268 #endif // DEBUG_CACHE_LIST 269 270 271 // function declarations 272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 273 bool addressSpaceCleanup); 274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 275 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 276 static status_t map_backing_store(VMAddressSpace* addressSpace, 277 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 278 int protection, int protectionMax, int mapping, uint32 flags, 279 const virtual_address_restrictions* addressRestrictions, bool kernel, 280 VMArea** _area, void** _virtualAddress); 281 static void fix_protection(uint32* protection); 282 283 284 // #pragma mark - 285 286 287 #if VM_PAGE_FAULT_TRACING 288 289 namespace VMPageFaultTracing { 290 291 class PageFaultStart : public AbstractTraceEntry { 292 public: 293 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 294 : 295 fAddress(address), 296 fPC(pc), 297 fWrite(write), 298 fUser(user) 299 { 300 Initialized(); 301 } 302 303 virtual void AddDump(TraceOutput& out) 304 { 305 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 306 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 307 } 308 309 private: 310 addr_t fAddress; 311 addr_t fPC; 312 bool fWrite; 313 bool fUser; 314 }; 315 316 317 // page fault errors 318 enum { 319 PAGE_FAULT_ERROR_NO_AREA = 0, 320 PAGE_FAULT_ERROR_KERNEL_ONLY, 321 PAGE_FAULT_ERROR_WRITE_PROTECTED, 322 PAGE_FAULT_ERROR_READ_PROTECTED, 323 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 324 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 325 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 326 }; 327 328 329 class PageFaultError : public AbstractTraceEntry { 330 public: 331 PageFaultError(area_id area, status_t error) 332 : 333 fArea(area), 334 fError(error) 335 { 336 Initialized(); 337 } 338 339 virtual void AddDump(TraceOutput& out) 340 { 341 switch (fError) { 342 case PAGE_FAULT_ERROR_NO_AREA: 343 out.Print("page fault error: no area"); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_ONLY: 346 out.Print("page fault error: area: %ld, kernel only", fArea); 347 break; 348 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 349 out.Print("page fault error: area: %ld, write protected", 350 fArea); 351 break; 352 case PAGE_FAULT_ERROR_READ_PROTECTED: 353 out.Print("page fault error: area: %ld, read protected", fArea); 354 break; 355 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 356 out.Print("page fault error: area: %ld, execute protected", 357 fArea); 358 break; 359 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 360 out.Print("page fault error: kernel touching bad user memory"); 361 break; 362 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 363 out.Print("page fault error: no address space"); 364 break; 365 default: 366 out.Print("page fault error: area: %ld, error: %s", fArea, 367 strerror(fError)); 368 break; 369 } 370 } 371 372 private: 373 area_id fArea; 374 status_t fError; 375 }; 376 377 378 class PageFaultDone : public AbstractTraceEntry { 379 public: 380 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 381 vm_page* page) 382 : 383 fArea(area), 384 fTopCache(topCache), 385 fCache(cache), 386 fPage(page) 387 { 388 Initialized(); 389 } 390 391 virtual void AddDump(TraceOutput& out) 392 { 393 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 394 "page: %p", fArea, fTopCache, fCache, fPage); 395 } 396 397 private: 398 area_id fArea; 399 VMCache* fTopCache; 400 VMCache* fCache; 401 vm_page* fPage; 402 }; 403 404 } // namespace VMPageFaultTracing 405 406 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 407 #else 408 # define TPF(x) ; 409 #endif // VM_PAGE_FAULT_TRACING 410 411 412 // #pragma mark - 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 increment_page_wired_count(vm_page* page) 419 { 420 if (!page->IsMapped()) 421 atomic_add(&gMappedPagesCount, 1); 422 page->IncrementWiredCount(); 423 } 424 425 426 /*! The page's cache must be locked. 427 */ 428 static inline void 429 decrement_page_wired_count(vm_page* page) 430 { 431 page->DecrementWiredCount(); 432 if (!page->IsMapped()) 433 atomic_add(&gMappedPagesCount, -1); 434 } 435 436 437 static inline addr_t 438 virtual_page_address(VMArea* area, vm_page* page) 439 { 440 return area->Base() 441 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 442 } 443 444 445 //! You need to have the address space locked when calling this function 446 static VMArea* 447 lookup_area(VMAddressSpace* addressSpace, area_id id) 448 { 449 VMAreaHash::ReadLock(); 450 451 VMArea* area = VMAreaHash::LookupLocked(id); 452 if (area != NULL && area->address_space != addressSpace) 453 area = NULL; 454 455 VMAreaHash::ReadUnlock(); 456 457 return area; 458 } 459 460 461 static status_t 462 allocate_area_page_protections(VMArea* area) 463 { 464 // In the page protections we store only the three user protections, 465 // so we use 4 bits per page. 466 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 467 area->page_protections = (uint8*)malloc_etc(bytes, 468 area->address_space == VMAddressSpace::Kernel() 469 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 470 if (area->page_protections == NULL) 471 return B_NO_MEMORY; 472 473 // init the page protections for all pages to that of the area 474 uint32 areaProtection = area->protection 475 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 476 memset(area->page_protections, areaProtection | (areaProtection << 4), 477 bytes); 478 return B_OK; 479 } 480 481 482 static inline void 483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 484 { 485 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 486 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 487 uint8& entry = area->page_protections[pageIndex / 2]; 488 if (pageIndex % 2 == 0) 489 entry = (entry & 0xf0) | protection; 490 else 491 entry = (entry & 0x0f) | (protection << 4); 492 } 493 494 495 static inline uint32 496 get_area_page_protection(VMArea* area, addr_t pageAddress) 497 { 498 if (area->page_protections == NULL) 499 return area->protection; 500 501 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 502 uint32 protection = area->page_protections[pageIndex / 2]; 503 if (pageIndex % 2 == 0) 504 protection &= 0x0f; 505 else 506 protection >>= 4; 507 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 // If this is a kernel area we return only the kernel flags. 515 if (area->address_space == VMAddressSpace::Kernel()) 516 return kernelProtection; 517 518 return protection | kernelProtection; 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 811 &addressRestrictions, kernel, &secondArea, NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, 925 int protectionMax, int mapping, 926 uint32 flags, const virtual_address_restrictions* addressRestrictions, 927 bool kernel, VMArea** _area, void** _virtualAddress) 928 { 929 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 930 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 931 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 932 addressSpace, cache, addressRestrictions->address, offset, size, 933 addressRestrictions->address_specification, wiring, protection, 934 protectionMax, _area, areaName)); 935 cache->AssertLocked(); 936 937 if (size == 0) { 938 #if KDEBUG 939 panic("map_backing_store(): called with size=0 for area '%s'!", 940 areaName); 941 #endif 942 return B_BAD_VALUE; 943 } 944 945 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 946 | HEAP_DONT_LOCK_KERNEL_SPACE; 947 int priority; 948 if (addressSpace != VMAddressSpace::Kernel()) { 949 priority = VM_PRIORITY_USER; 950 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 951 priority = VM_PRIORITY_VIP; 952 allocationFlags |= HEAP_PRIORITY_VIP; 953 } else 954 priority = VM_PRIORITY_SYSTEM; 955 956 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 957 allocationFlags); 958 if (mapping != REGION_PRIVATE_MAP) 959 area->protection_max = protectionMax & B_USER_PROTECTION; 960 if (area == NULL) 961 return B_NO_MEMORY; 962 963 status_t status; 964 965 // if this is a private map, we need to create a new cache 966 // to handle the private copies of pages as they are written to 967 VMCache* sourceCache = cache; 968 if (mapping == REGION_PRIVATE_MAP) { 969 VMCache* newCache; 970 971 // create an anonymous cache 972 status = VMCacheFactory::CreateAnonymousCache(newCache, 973 (protection & B_STACK_AREA) != 0 974 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 975 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 976 if (status != B_OK) 977 goto err1; 978 979 newCache->Lock(); 980 newCache->temporary = 1; 981 newCache->virtual_base = offset; 982 newCache->virtual_end = offset + size; 983 984 cache->AddConsumer(newCache); 985 986 cache = newCache; 987 } 988 989 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 990 status = cache->SetMinimalCommitment(size, priority); 991 if (status != B_OK) 992 goto err2; 993 } 994 995 // check to see if this address space has entered DELETE state 996 if (addressSpace->IsBeingDeleted()) { 997 // okay, someone is trying to delete this address space now, so we can't 998 // insert the area, so back out 999 status = B_BAD_TEAM_ID; 1000 goto err2; 1001 } 1002 1003 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1004 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1005 status = unmap_address_range(addressSpace, 1006 (addr_t)addressRestrictions->address, size, kernel); 1007 if (status != B_OK) 1008 goto err2; 1009 } 1010 1011 status = addressSpace->InsertArea(area, size, addressRestrictions, 1012 allocationFlags, _virtualAddress); 1013 if (status == B_NO_MEMORY 1014 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1015 // Due to how many locks are held, we cannot wait here for space to be 1016 // freed up, but we can at least notify the low_resource handler. 1017 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1018 } 1019 if (status != B_OK) 1020 goto err2; 1021 1022 // attach the cache to the area 1023 area->cache = cache; 1024 area->cache_offset = offset; 1025 1026 // point the cache back to the area 1027 cache->InsertAreaLocked(area); 1028 if (mapping == REGION_PRIVATE_MAP) 1029 cache->Unlock(); 1030 1031 // insert the area in the global area hash table 1032 VMAreaHash::Insert(area); 1033 1034 // grab a ref to the address space (the area holds this) 1035 addressSpace->Get(); 1036 1037 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1038 // cache, sourceCache, areaName, area); 1039 1040 *_area = area; 1041 return B_OK; 1042 1043 err2: 1044 if (mapping == REGION_PRIVATE_MAP) { 1045 // We created this cache, so we must delete it again. Note, that we 1046 // need to temporarily unlock the source cache or we'll otherwise 1047 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1048 sourceCache->Unlock(); 1049 cache->ReleaseRefAndUnlock(); 1050 sourceCache->Lock(); 1051 } 1052 err1: 1053 addressSpace->DeleteArea(area, allocationFlags); 1054 return status; 1055 } 1056 1057 1058 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1059 locker1, locker2). 1060 */ 1061 template<typename LockerType1, typename LockerType2> 1062 static inline bool 1063 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1064 { 1065 area->cache->AssertLocked(); 1066 1067 VMAreaUnwiredWaiter waiter; 1068 if (!area->AddWaiterIfWired(&waiter)) 1069 return false; 1070 1071 // unlock everything and wait 1072 if (locker1 != NULL) 1073 locker1->Unlock(); 1074 if (locker2 != NULL) 1075 locker2->Unlock(); 1076 1077 waiter.waitEntry.Wait(); 1078 1079 return true; 1080 } 1081 1082 1083 /*! Checks whether the given area has any wired ranges intersecting with the 1084 specified range and waits, if so. 1085 1086 When it has to wait, the function calls \c Unlock() on both \a locker1 1087 and \a locker2, if given. 1088 The area's top cache must be locked and must be unlocked as a side effect 1089 of calling \c Unlock() on either \a locker1 or \a locker2. 1090 1091 If the function does not have to wait it does not modify or unlock any 1092 object. 1093 1094 \param area The area to be checked. 1095 \param base The base address of the range to check. 1096 \param size The size of the address range to check. 1097 \param locker1 An object to be unlocked when before starting to wait (may 1098 be \c NULL). 1099 \param locker2 An object to be unlocked when before starting to wait (may 1100 be \c NULL). 1101 \return \c true, if the function had to wait, \c false otherwise. 1102 */ 1103 template<typename LockerType1, typename LockerType2> 1104 static inline bool 1105 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1106 LockerType1* locker1, LockerType2* locker2) 1107 { 1108 area->cache->AssertLocked(); 1109 1110 VMAreaUnwiredWaiter waiter; 1111 if (!area->AddWaiterIfWired(&waiter, base, size)) 1112 return false; 1113 1114 // unlock everything and wait 1115 if (locker1 != NULL) 1116 locker1->Unlock(); 1117 if (locker2 != NULL) 1118 locker2->Unlock(); 1119 1120 waiter.waitEntry.Wait(); 1121 1122 return true; 1123 } 1124 1125 1126 /*! Checks whether the given address space has any wired ranges intersecting 1127 with the specified range and waits, if so. 1128 1129 Similar to wait_if_area_range_is_wired(), with the following differences: 1130 - All areas intersecting with the range are checked (respectively all until 1131 one is found that contains a wired range intersecting with the given 1132 range). 1133 - The given address space must at least be read-locked and must be unlocked 1134 when \c Unlock() is called on \a locker. 1135 - None of the areas' caches are allowed to be locked. 1136 */ 1137 template<typename LockerType> 1138 static inline bool 1139 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1140 size_t size, LockerType* locker) 1141 { 1142 for (VMAddressSpace::AreaRangeIterator it 1143 = addressSpace->GetAreaRangeIterator(base, size); 1144 VMArea* area = it.Next();) { 1145 1146 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1147 1148 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1149 return true; 1150 } 1151 1152 return false; 1153 } 1154 1155 1156 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1157 It must be called in a situation where the kernel address space may be 1158 locked. 1159 */ 1160 status_t 1161 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1162 { 1163 AddressSpaceReadLocker locker; 1164 VMArea* area; 1165 status_t status = locker.SetFromArea(id, area); 1166 if (status != B_OK) 1167 return status; 1168 1169 if (area->page_protections == NULL) { 1170 status = allocate_area_page_protections(area); 1171 if (status != B_OK) 1172 return status; 1173 } 1174 1175 *cookie = (void*)area; 1176 return B_OK; 1177 } 1178 1179 1180 /*! This is a debug helper function that can only be used with very specific 1181 use cases. 1182 Sets protection for the given address range to the protection specified. 1183 If \a protection is 0 then the involved pages will be marked non-present 1184 in the translation map to cause a fault on access. The pages aren't 1185 actually unmapped however so that they can be marked present again with 1186 additional calls to this function. For this to work the area must be 1187 fully locked in memory so that the pages aren't otherwise touched. 1188 This function does not lock the kernel address space and needs to be 1189 supplied with a \a cookie retrieved from a successful call to 1190 vm_prepare_kernel_area_debug_protection(). 1191 */ 1192 status_t 1193 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1194 uint32 protection) 1195 { 1196 // check address range 1197 addr_t address = (addr_t)_address; 1198 size = PAGE_ALIGN(size); 1199 1200 if ((address % B_PAGE_SIZE) != 0 1201 || (addr_t)address + size < (addr_t)address 1202 || !IS_KERNEL_ADDRESS(address) 1203 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1204 return B_BAD_VALUE; 1205 } 1206 1207 // Translate the kernel protection to user protection as we only store that. 1208 if ((protection & B_KERNEL_READ_AREA) != 0) 1209 protection |= B_READ_AREA; 1210 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1211 protection |= B_WRITE_AREA; 1212 1213 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1214 VMTranslationMap* map = addressSpace->TranslationMap(); 1215 VMArea* area = (VMArea*)cookie; 1216 1217 addr_t offset = address - area->Base(); 1218 if (area->Size() - offset < size) { 1219 panic("protect range not fully within supplied area"); 1220 return B_BAD_VALUE; 1221 } 1222 1223 if (area->page_protections == NULL) { 1224 panic("area has no page protections"); 1225 return B_BAD_VALUE; 1226 } 1227 1228 // Invalidate the mapping entries so any access to them will fault or 1229 // restore the mapping entries unchanged so that lookup will success again. 1230 map->Lock(); 1231 map->DebugMarkRangePresent(address, address + size, protection != 0); 1232 map->Unlock(); 1233 1234 // And set the proper page protections so that the fault case will actually 1235 // fail and not simply try to map a new page. 1236 for (addr_t pageAddress = address; pageAddress < address + size; 1237 pageAddress += B_PAGE_SIZE) { 1238 set_area_page_protection(area, pageAddress, protection); 1239 } 1240 1241 return B_OK; 1242 } 1243 1244 1245 status_t 1246 vm_block_address_range(const char* name, void* address, addr_t size) 1247 { 1248 if (!arch_vm_supports_protection(0)) 1249 return B_NOT_SUPPORTED; 1250 1251 AddressSpaceWriteLocker locker; 1252 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1253 if (status != B_OK) 1254 return status; 1255 1256 VMAddressSpace* addressSpace = locker.AddressSpace(); 1257 1258 // create an anonymous cache 1259 VMCache* cache; 1260 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1261 VM_PRIORITY_SYSTEM); 1262 if (status != B_OK) 1263 return status; 1264 1265 cache->temporary = 1; 1266 cache->virtual_end = size; 1267 cache->Lock(); 1268 1269 VMArea* area; 1270 virtual_address_restrictions addressRestrictions = {}; 1271 addressRestrictions.address = address; 1272 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1273 status = map_backing_store(addressSpace, cache, 0, name, size, 1274 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1275 true, &area, NULL); 1276 if (status != B_OK) { 1277 cache->ReleaseRefAndUnlock(); 1278 return status; 1279 } 1280 1281 cache->Unlock(); 1282 area->cache_type = CACHE_TYPE_RAM; 1283 return area->id; 1284 } 1285 1286 1287 status_t 1288 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1289 { 1290 AddressSpaceWriteLocker locker(team); 1291 if (!locker.IsLocked()) 1292 return B_BAD_TEAM_ID; 1293 1294 VMAddressSpace* addressSpace = locker.AddressSpace(); 1295 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1296 addressSpace == VMAddressSpace::Kernel() 1297 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1298 } 1299 1300 1301 status_t 1302 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1303 addr_t size, uint32 flags) 1304 { 1305 if (size == 0) 1306 return B_BAD_VALUE; 1307 1308 AddressSpaceWriteLocker locker(team); 1309 if (!locker.IsLocked()) 1310 return B_BAD_TEAM_ID; 1311 1312 virtual_address_restrictions addressRestrictions = {}; 1313 addressRestrictions.address = *_address; 1314 addressRestrictions.address_specification = addressSpec; 1315 VMAddressSpace* addressSpace = locker.AddressSpace(); 1316 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1317 addressSpace == VMAddressSpace::Kernel() 1318 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1319 _address); 1320 } 1321 1322 1323 area_id 1324 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1325 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1326 const virtual_address_restrictions* virtualAddressRestrictions, 1327 const physical_address_restrictions* physicalAddressRestrictions, 1328 bool kernel, void** _address) 1329 { 1330 VMArea* area; 1331 VMCache* cache; 1332 vm_page* page = NULL; 1333 bool isStack = (protection & B_STACK_AREA) != 0; 1334 page_num_t guardPages; 1335 bool canOvercommit = false; 1336 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1337 ? VM_PAGE_ALLOC_CLEAR : 0; 1338 1339 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1340 team, name, size)); 1341 1342 size = PAGE_ALIGN(size); 1343 guardSize = PAGE_ALIGN(guardSize); 1344 guardPages = guardSize / B_PAGE_SIZE; 1345 1346 if (size == 0 || size < guardSize) 1347 return B_BAD_VALUE; 1348 if (!arch_vm_supports_protection(protection)) 1349 return B_NOT_SUPPORTED; 1350 1351 if (team == B_CURRENT_TEAM) 1352 team = VMAddressSpace::CurrentID(); 1353 if (team < 0) 1354 return B_BAD_TEAM_ID; 1355 1356 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1357 canOvercommit = true; 1358 1359 #ifdef DEBUG_KERNEL_STACKS 1360 if ((protection & B_KERNEL_STACK_AREA) != 0) 1361 isStack = true; 1362 #endif 1363 1364 // check parameters 1365 switch (virtualAddressRestrictions->address_specification) { 1366 case B_ANY_ADDRESS: 1367 case B_EXACT_ADDRESS: 1368 case B_BASE_ADDRESS: 1369 case B_ANY_KERNEL_ADDRESS: 1370 case B_ANY_KERNEL_BLOCK_ADDRESS: 1371 case B_RANDOMIZED_ANY_ADDRESS: 1372 case B_RANDOMIZED_BASE_ADDRESS: 1373 break; 1374 1375 default: 1376 return B_BAD_VALUE; 1377 } 1378 1379 // If low or high physical address restrictions are given, we force 1380 // B_CONTIGUOUS wiring, since only then we'll use 1381 // vm_page_allocate_page_run() which deals with those restrictions. 1382 if (physicalAddressRestrictions->low_address != 0 1383 || physicalAddressRestrictions->high_address != 0) { 1384 wiring = B_CONTIGUOUS; 1385 } 1386 1387 physical_address_restrictions stackPhysicalRestrictions; 1388 bool doReserveMemory = false; 1389 switch (wiring) { 1390 case B_NO_LOCK: 1391 break; 1392 case B_FULL_LOCK: 1393 case B_LAZY_LOCK: 1394 case B_CONTIGUOUS: 1395 doReserveMemory = true; 1396 break; 1397 case B_ALREADY_WIRED: 1398 break; 1399 case B_LOMEM: 1400 stackPhysicalRestrictions = *physicalAddressRestrictions; 1401 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1402 physicalAddressRestrictions = &stackPhysicalRestrictions; 1403 wiring = B_CONTIGUOUS; 1404 doReserveMemory = true; 1405 break; 1406 case B_32_BIT_FULL_LOCK: 1407 if (B_HAIKU_PHYSICAL_BITS <= 32 1408 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1409 wiring = B_FULL_LOCK; 1410 doReserveMemory = true; 1411 break; 1412 } 1413 // TODO: We don't really support this mode efficiently. Just fall 1414 // through for now ... 1415 case B_32_BIT_CONTIGUOUS: 1416 #if B_HAIKU_PHYSICAL_BITS > 32 1417 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1418 stackPhysicalRestrictions = *physicalAddressRestrictions; 1419 stackPhysicalRestrictions.high_address 1420 = (phys_addr_t)1 << 32; 1421 physicalAddressRestrictions = &stackPhysicalRestrictions; 1422 } 1423 #endif 1424 wiring = B_CONTIGUOUS; 1425 doReserveMemory = true; 1426 break; 1427 default: 1428 return B_BAD_VALUE; 1429 } 1430 1431 // Optimization: For a single-page contiguous allocation without low/high 1432 // memory restriction B_FULL_LOCK wiring suffices. 1433 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1434 && physicalAddressRestrictions->low_address == 0 1435 && physicalAddressRestrictions->high_address == 0) { 1436 wiring = B_FULL_LOCK; 1437 } 1438 1439 // For full lock or contiguous areas we're also going to map the pages and 1440 // thus need to reserve pages for the mapping backend upfront. 1441 addr_t reservedMapPages = 0; 1442 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1443 AddressSpaceWriteLocker locker; 1444 status_t status = locker.SetTo(team); 1445 if (status != B_OK) 1446 return status; 1447 1448 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1449 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1450 } 1451 1452 int priority; 1453 if (team != VMAddressSpace::KernelID()) 1454 priority = VM_PRIORITY_USER; 1455 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1456 priority = VM_PRIORITY_VIP; 1457 else 1458 priority = VM_PRIORITY_SYSTEM; 1459 1460 // Reserve memory before acquiring the address space lock. This reduces the 1461 // chances of failure, since while holding the write lock to the address 1462 // space (if it is the kernel address space that is), the low memory handler 1463 // won't be able to free anything for us. 1464 addr_t reservedMemory = 0; 1465 if (doReserveMemory) { 1466 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1467 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1468 return B_NO_MEMORY; 1469 reservedMemory = size; 1470 // TODO: We don't reserve the memory for the pages for the page 1471 // directories/tables. We actually need to do since we currently don't 1472 // reclaim them (and probably can't reclaim all of them anyway). Thus 1473 // there are actually less physical pages than there should be, which 1474 // can get the VM into trouble in low memory situations. 1475 } 1476 1477 AddressSpaceWriteLocker locker; 1478 VMAddressSpace* addressSpace; 1479 status_t status; 1480 1481 // For full lock areas reserve the pages before locking the address 1482 // space. E.g. block caches can't release their memory while we hold the 1483 // address space lock. 1484 page_num_t reservedPages = reservedMapPages; 1485 if (wiring == B_FULL_LOCK) 1486 reservedPages += size / B_PAGE_SIZE; 1487 1488 vm_page_reservation reservation; 1489 if (reservedPages > 0) { 1490 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1491 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1492 priority)) { 1493 reservedPages = 0; 1494 status = B_WOULD_BLOCK; 1495 goto err0; 1496 } 1497 } else 1498 vm_page_reserve_pages(&reservation, reservedPages, priority); 1499 } 1500 1501 if (wiring == B_CONTIGUOUS) { 1502 // we try to allocate the page run here upfront as this may easily 1503 // fail for obvious reasons 1504 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1505 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1506 if (page == NULL) { 1507 status = B_NO_MEMORY; 1508 goto err0; 1509 } 1510 } 1511 1512 // Lock the address space and, if B_EXACT_ADDRESS and 1513 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1514 // is not wired. 1515 do { 1516 status = locker.SetTo(team); 1517 if (status != B_OK) 1518 goto err1; 1519 1520 addressSpace = locker.AddressSpace(); 1521 } while (virtualAddressRestrictions->address_specification 1522 == B_EXACT_ADDRESS 1523 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1524 && wait_if_address_range_is_wired(addressSpace, 1525 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1526 1527 // create an anonymous cache 1528 // if it's a stack, make sure that two pages are available at least 1529 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1530 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1531 wiring == B_NO_LOCK, priority); 1532 if (status != B_OK) 1533 goto err1; 1534 1535 cache->temporary = 1; 1536 cache->virtual_end = size; 1537 cache->committed_size = reservedMemory; 1538 // TODO: This should be done via a method. 1539 reservedMemory = 0; 1540 1541 cache->Lock(); 1542 1543 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1544 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1545 virtualAddressRestrictions, kernel, &area, _address); 1546 1547 if (status != B_OK) { 1548 cache->ReleaseRefAndUnlock(); 1549 goto err1; 1550 } 1551 1552 locker.DegradeToReadLock(); 1553 1554 switch (wiring) { 1555 case B_NO_LOCK: 1556 case B_LAZY_LOCK: 1557 // do nothing - the pages are mapped in as needed 1558 break; 1559 1560 case B_FULL_LOCK: 1561 { 1562 // Allocate and map all pages for this area 1563 1564 off_t offset = 0; 1565 for (addr_t address = area->Base(); 1566 address < area->Base() + (area->Size() - 1); 1567 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1568 #ifdef DEBUG_KERNEL_STACKS 1569 # ifdef STACK_GROWS_DOWNWARDS 1570 if (isStack && address < area->Base() 1571 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1572 # else 1573 if (isStack && address >= area->Base() + area->Size() 1574 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1575 # endif 1576 continue; 1577 #endif 1578 vm_page* page = vm_page_allocate_page(&reservation, 1579 PAGE_STATE_WIRED | pageAllocFlags); 1580 cache->InsertPage(page, offset); 1581 map_page(area, page, address, protection, &reservation); 1582 1583 DEBUG_PAGE_ACCESS_END(page); 1584 } 1585 1586 break; 1587 } 1588 1589 case B_ALREADY_WIRED: 1590 { 1591 // The pages should already be mapped. This is only really useful 1592 // during boot time. Find the appropriate vm_page objects and stick 1593 // them in the cache object. 1594 VMTranslationMap* map = addressSpace->TranslationMap(); 1595 off_t offset = 0; 1596 1597 if (!gKernelStartup) 1598 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1599 1600 map->Lock(); 1601 1602 for (addr_t virtualAddress = area->Base(); 1603 virtualAddress < area->Base() + (area->Size() - 1); 1604 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1605 phys_addr_t physicalAddress; 1606 uint32 flags; 1607 status = map->Query(virtualAddress, &physicalAddress, &flags); 1608 if (status < B_OK) { 1609 panic("looking up mapping failed for va 0x%lx\n", 1610 virtualAddress); 1611 } 1612 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1613 if (page == NULL) { 1614 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1615 "\n", physicalAddress); 1616 } 1617 1618 DEBUG_PAGE_ACCESS_START(page); 1619 1620 cache->InsertPage(page, offset); 1621 increment_page_wired_count(page); 1622 vm_page_set_state(page, PAGE_STATE_WIRED); 1623 page->busy = false; 1624 1625 DEBUG_PAGE_ACCESS_END(page); 1626 } 1627 1628 map->Unlock(); 1629 break; 1630 } 1631 1632 case B_CONTIGUOUS: 1633 { 1634 // We have already allocated our continuous pages run, so we can now 1635 // just map them in the address space 1636 VMTranslationMap* map = addressSpace->TranslationMap(); 1637 phys_addr_t physicalAddress 1638 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1639 addr_t virtualAddress = area->Base(); 1640 off_t offset = 0; 1641 1642 map->Lock(); 1643 1644 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1645 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1646 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1647 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1648 if (page == NULL) 1649 panic("couldn't lookup physical page just allocated\n"); 1650 1651 status = map->Map(virtualAddress, physicalAddress, protection, 1652 area->MemoryType(), &reservation); 1653 if (status < B_OK) 1654 panic("couldn't map physical page in page run\n"); 1655 1656 cache->InsertPage(page, offset); 1657 increment_page_wired_count(page); 1658 1659 DEBUG_PAGE_ACCESS_END(page); 1660 } 1661 1662 map->Unlock(); 1663 break; 1664 } 1665 1666 default: 1667 break; 1668 } 1669 1670 cache->Unlock(); 1671 1672 if (reservedPages > 0) 1673 vm_page_unreserve_pages(&reservation); 1674 1675 TRACE(("vm_create_anonymous_area: done\n")); 1676 1677 area->cache_type = CACHE_TYPE_RAM; 1678 return area->id; 1679 1680 err1: 1681 if (wiring == B_CONTIGUOUS) { 1682 // we had reserved the area space upfront... 1683 phys_addr_t pageNumber = page->physical_page_number; 1684 int32 i; 1685 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1686 page = vm_lookup_page(pageNumber); 1687 if (page == NULL) 1688 panic("couldn't lookup physical page just allocated\n"); 1689 1690 vm_page_set_state(page, PAGE_STATE_FREE); 1691 } 1692 } 1693 1694 err0: 1695 if (reservedPages > 0) 1696 vm_page_unreserve_pages(&reservation); 1697 if (reservedMemory > 0) 1698 vm_unreserve_memory(reservedMemory); 1699 1700 return status; 1701 } 1702 1703 1704 area_id 1705 vm_map_physical_memory(team_id team, const char* name, void** _address, 1706 uint32 addressSpec, addr_t size, uint32 protection, 1707 phys_addr_t physicalAddress, bool alreadyWired) 1708 { 1709 VMArea* area; 1710 VMCache* cache; 1711 addr_t mapOffset; 1712 1713 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1714 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1715 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1716 addressSpec, size, protection, physicalAddress)); 1717 1718 if (!arch_vm_supports_protection(protection)) 1719 return B_NOT_SUPPORTED; 1720 1721 AddressSpaceWriteLocker locker(team); 1722 if (!locker.IsLocked()) 1723 return B_BAD_TEAM_ID; 1724 1725 // if the physical address is somewhat inside a page, 1726 // move the actual area down to align on a page boundary 1727 mapOffset = physicalAddress % B_PAGE_SIZE; 1728 size += mapOffset; 1729 physicalAddress -= mapOffset; 1730 1731 size = PAGE_ALIGN(size); 1732 1733 // create a device cache 1734 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1735 if (status != B_OK) 1736 return status; 1737 1738 cache->virtual_end = size; 1739 1740 cache->Lock(); 1741 1742 virtual_address_restrictions addressRestrictions = {}; 1743 addressRestrictions.address = *_address; 1744 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1745 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1746 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1747 true, &area, _address); 1748 1749 if (status < B_OK) 1750 cache->ReleaseRefLocked(); 1751 1752 cache->Unlock(); 1753 1754 if (status == B_OK) { 1755 // set requested memory type -- use uncached, if not given 1756 uint32 memoryType = addressSpec & B_MTR_MASK; 1757 if (memoryType == 0) 1758 memoryType = B_MTR_UC; 1759 1760 area->SetMemoryType(memoryType); 1761 1762 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1763 if (status != B_OK) 1764 delete_area(locker.AddressSpace(), area, false); 1765 } 1766 1767 if (status != B_OK) 1768 return status; 1769 1770 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1771 1772 if (alreadyWired) { 1773 // The area is already mapped, but possibly not with the right 1774 // memory type. 1775 map->Lock(); 1776 map->ProtectArea(area, area->protection); 1777 map->Unlock(); 1778 } else { 1779 // Map the area completely. 1780 1781 // reserve pages needed for the mapping 1782 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1783 area->Base() + (size - 1)); 1784 vm_page_reservation reservation; 1785 vm_page_reserve_pages(&reservation, reservePages, 1786 team == VMAddressSpace::KernelID() 1787 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1788 1789 map->Lock(); 1790 1791 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1792 map->Map(area->Base() + offset, physicalAddress + offset, 1793 protection, area->MemoryType(), &reservation); 1794 } 1795 1796 map->Unlock(); 1797 1798 vm_page_unreserve_pages(&reservation); 1799 } 1800 1801 // modify the pointer returned to be offset back into the new area 1802 // the same way the physical address in was offset 1803 *_address = (void*)((addr_t)*_address + mapOffset); 1804 1805 area->cache_type = CACHE_TYPE_DEVICE; 1806 return area->id; 1807 } 1808 1809 1810 /*! Don't use! 1811 TODO: This function was introduced to map physical page vecs to 1812 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1813 use a device cache and does not track vm_page::wired_count! 1814 */ 1815 area_id 1816 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1817 uint32 addressSpec, addr_t* _size, uint32 protection, 1818 struct generic_io_vec* vecs, uint32 vecCount) 1819 { 1820 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1821 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1822 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1823 addressSpec, _size, protection, vecs, vecCount)); 1824 1825 if (!arch_vm_supports_protection(protection) 1826 || (addressSpec & B_MTR_MASK) != 0) { 1827 return B_NOT_SUPPORTED; 1828 } 1829 1830 AddressSpaceWriteLocker locker(team); 1831 if (!locker.IsLocked()) 1832 return B_BAD_TEAM_ID; 1833 1834 if (vecCount == 0) 1835 return B_BAD_VALUE; 1836 1837 addr_t size = 0; 1838 for (uint32 i = 0; i < vecCount; i++) { 1839 if (vecs[i].base % B_PAGE_SIZE != 0 1840 || vecs[i].length % B_PAGE_SIZE != 0) { 1841 return B_BAD_VALUE; 1842 } 1843 1844 size += vecs[i].length; 1845 } 1846 1847 // create a device cache 1848 VMCache* cache; 1849 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1850 if (result != B_OK) 1851 return result; 1852 1853 cache->virtual_end = size; 1854 1855 cache->Lock(); 1856 1857 VMArea* area; 1858 virtual_address_restrictions addressRestrictions = {}; 1859 addressRestrictions.address = *_address; 1860 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1861 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1862 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1863 &addressRestrictions, true, &area, _address); 1864 1865 if (result != B_OK) 1866 cache->ReleaseRefLocked(); 1867 1868 cache->Unlock(); 1869 1870 if (result != B_OK) 1871 return result; 1872 1873 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1874 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1875 area->Base() + (size - 1)); 1876 1877 vm_page_reservation reservation; 1878 vm_page_reserve_pages(&reservation, reservePages, 1879 team == VMAddressSpace::KernelID() 1880 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1881 map->Lock(); 1882 1883 uint32 vecIndex = 0; 1884 size_t vecOffset = 0; 1885 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1886 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1887 vecOffset = 0; 1888 vecIndex++; 1889 } 1890 1891 if (vecIndex >= vecCount) 1892 break; 1893 1894 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1895 protection, area->MemoryType(), &reservation); 1896 1897 vecOffset += B_PAGE_SIZE; 1898 } 1899 1900 map->Unlock(); 1901 vm_page_unreserve_pages(&reservation); 1902 1903 if (_size != NULL) 1904 *_size = size; 1905 1906 area->cache_type = CACHE_TYPE_DEVICE; 1907 return area->id; 1908 } 1909 1910 1911 area_id 1912 vm_create_null_area(team_id team, const char* name, void** address, 1913 uint32 addressSpec, addr_t size, uint32 flags) 1914 { 1915 size = PAGE_ALIGN(size); 1916 1917 // Lock the address space and, if B_EXACT_ADDRESS and 1918 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1919 // is not wired. 1920 AddressSpaceWriteLocker locker; 1921 do { 1922 if (locker.SetTo(team) != B_OK) 1923 return B_BAD_TEAM_ID; 1924 } while (addressSpec == B_EXACT_ADDRESS 1925 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1926 && wait_if_address_range_is_wired(locker.AddressSpace(), 1927 (addr_t)*address, size, &locker)); 1928 1929 // create a null cache 1930 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1931 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1932 VMCache* cache; 1933 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1934 if (status != B_OK) 1935 return status; 1936 1937 cache->temporary = 1; 1938 cache->virtual_end = size; 1939 1940 cache->Lock(); 1941 1942 VMArea* area; 1943 virtual_address_restrictions addressRestrictions = {}; 1944 addressRestrictions.address = *address; 1945 addressRestrictions.address_specification = addressSpec; 1946 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1947 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1948 REGION_NO_PRIVATE_MAP, flags, 1949 &addressRestrictions, true, &area, address); 1950 1951 if (status < B_OK) { 1952 cache->ReleaseRefAndUnlock(); 1953 return status; 1954 } 1955 1956 cache->Unlock(); 1957 1958 area->cache_type = CACHE_TYPE_NULL; 1959 return area->id; 1960 } 1961 1962 1963 /*! Creates the vnode cache for the specified \a vnode. 1964 The vnode has to be marked busy when calling this function. 1965 */ 1966 status_t 1967 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1968 { 1969 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1970 } 1971 1972 1973 /*! \a cache must be locked. The area's address space must be read-locked. 1974 */ 1975 static void 1976 pre_map_area_pages(VMArea* area, VMCache* cache, 1977 vm_page_reservation* reservation) 1978 { 1979 addr_t baseAddress = area->Base(); 1980 addr_t cacheOffset = area->cache_offset; 1981 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1982 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1983 1984 for (VMCachePagesTree::Iterator it 1985 = cache->pages.GetIterator(firstPage, true, true); 1986 vm_page* page = it.Next();) { 1987 if (page->cache_offset >= endPage) 1988 break; 1989 1990 // skip busy and inactive pages 1991 if (page->busy || page->usage_count == 0) 1992 continue; 1993 1994 DEBUG_PAGE_ACCESS_START(page); 1995 map_page(area, page, 1996 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1997 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1998 DEBUG_PAGE_ACCESS_END(page); 1999 } 2000 } 2001 2002 2003 /*! Will map the file specified by \a fd to an area in memory. 2004 The file will be mirrored beginning at the specified \a offset. The 2005 \a offset and \a size arguments have to be page aligned. 2006 */ 2007 static area_id 2008 _vm_map_file(team_id team, const char* name, void** _address, 2009 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2010 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2011 { 2012 // TODO: for binary files, we want to make sure that they get the 2013 // copy of a file at a given time, ie. later changes should not 2014 // make it into the mapped copy -- this will need quite some changes 2015 // to be done in a nice way 2016 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2017 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2018 2019 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2020 size = PAGE_ALIGN(size); 2021 2022 if (mapping == REGION_NO_PRIVATE_MAP) 2023 protection |= B_SHARED_AREA; 2024 if (addressSpec != B_EXACT_ADDRESS) 2025 unmapAddressRange = false; 2026 2027 if (fd < 0) { 2028 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2029 virtual_address_restrictions virtualRestrictions = {}; 2030 virtualRestrictions.address = *_address; 2031 virtualRestrictions.address_specification = addressSpec; 2032 physical_address_restrictions physicalRestrictions = {}; 2033 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2034 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2035 _address); 2036 } 2037 2038 // get the open flags of the FD 2039 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2040 if (descriptor == NULL) 2041 return EBADF; 2042 int32 openMode = descriptor->open_mode; 2043 put_fd(descriptor); 2044 2045 // The FD must open for reading at any rate. For shared mapping with write 2046 // access, additionally the FD must be open for writing. 2047 if ((openMode & O_ACCMODE) == O_WRONLY 2048 || (mapping == REGION_NO_PRIVATE_MAP 2049 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2050 && (openMode & O_ACCMODE) == O_RDONLY)) { 2051 return EACCES; 2052 } 2053 2054 uint32 protectionMax = 0; 2055 if (mapping != REGION_PRIVATE_MAP) { 2056 protectionMax = protection | B_READ_AREA; 2057 if ((openMode & O_ACCMODE) == O_RDWR) 2058 protectionMax |= B_WRITE_AREA; 2059 } 2060 2061 // get the vnode for the object, this also grabs a ref to it 2062 struct vnode* vnode = NULL; 2063 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2064 if (status < B_OK) 2065 return status; 2066 VnodePutter vnodePutter(vnode); 2067 2068 // If we're going to pre-map pages, we need to reserve the pages needed by 2069 // the mapping backend upfront. 2070 page_num_t reservedPreMapPages = 0; 2071 vm_page_reservation reservation; 2072 if ((protection & B_READ_AREA) != 0) { 2073 AddressSpaceWriteLocker locker; 2074 status = locker.SetTo(team); 2075 if (status != B_OK) 2076 return status; 2077 2078 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2079 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2080 2081 locker.Unlock(); 2082 2083 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2084 team == VMAddressSpace::KernelID() 2085 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2086 } 2087 2088 struct PageUnreserver { 2089 PageUnreserver(vm_page_reservation* reservation) 2090 : 2091 fReservation(reservation) 2092 { 2093 } 2094 2095 ~PageUnreserver() 2096 { 2097 if (fReservation != NULL) 2098 vm_page_unreserve_pages(fReservation); 2099 } 2100 2101 vm_page_reservation* fReservation; 2102 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2103 2104 // Lock the address space and, if the specified address range shall be 2105 // unmapped, ensure it is not wired. 2106 AddressSpaceWriteLocker locker; 2107 do { 2108 if (locker.SetTo(team) != B_OK) 2109 return B_BAD_TEAM_ID; 2110 } while (unmapAddressRange 2111 && wait_if_address_range_is_wired(locker.AddressSpace(), 2112 (addr_t)*_address, size, &locker)); 2113 2114 // TODO: this only works for file systems that use the file cache 2115 VMCache* cache; 2116 status = vfs_get_vnode_cache(vnode, &cache, false); 2117 if (status < B_OK) 2118 return status; 2119 2120 cache->Lock(); 2121 2122 VMArea* area; 2123 virtual_address_restrictions addressRestrictions = {}; 2124 addressRestrictions.address = *_address; 2125 addressRestrictions.address_specification = addressSpec; 2126 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2127 0, protection, protectionMax, mapping, 2128 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2129 &addressRestrictions, kernel, &area, _address); 2130 2131 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2132 // map_backing_store() cannot know we no longer need the ref 2133 cache->ReleaseRefLocked(); 2134 } 2135 2136 if (status == B_OK && (protection & B_READ_AREA) != 0) 2137 pre_map_area_pages(area, cache, &reservation); 2138 2139 cache->Unlock(); 2140 2141 if (status == B_OK) { 2142 // TODO: this probably deserves a smarter solution, ie. don't always 2143 // prefetch stuff, and also, probably don't trigger it at this place. 2144 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2145 // prefetches at max 10 MB starting from "offset" 2146 } 2147 2148 if (status != B_OK) 2149 return status; 2150 2151 area->cache_type = CACHE_TYPE_VNODE; 2152 return area->id; 2153 } 2154 2155 2156 area_id 2157 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2158 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2159 int fd, off_t offset) 2160 { 2161 if (!arch_vm_supports_protection(protection)) 2162 return B_NOT_SUPPORTED; 2163 2164 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2165 mapping, unmapAddressRange, fd, offset, true); 2166 } 2167 2168 2169 VMCache* 2170 vm_area_get_locked_cache(VMArea* area) 2171 { 2172 rw_lock_read_lock(&sAreaCacheLock); 2173 2174 while (true) { 2175 VMCache* cache = area->cache; 2176 2177 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2178 // cache has been deleted 2179 rw_lock_read_lock(&sAreaCacheLock); 2180 continue; 2181 } 2182 2183 rw_lock_read_lock(&sAreaCacheLock); 2184 2185 if (cache == area->cache) { 2186 cache->AcquireRefLocked(); 2187 rw_lock_read_unlock(&sAreaCacheLock); 2188 return cache; 2189 } 2190 2191 // the cache changed in the meantime 2192 cache->Unlock(); 2193 } 2194 } 2195 2196 2197 void 2198 vm_area_put_locked_cache(VMCache* cache) 2199 { 2200 cache->ReleaseRefAndUnlock(); 2201 } 2202 2203 2204 area_id 2205 vm_clone_area(team_id team, const char* name, void** address, 2206 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2207 bool kernel) 2208 { 2209 VMArea* newArea = NULL; 2210 VMArea* sourceArea; 2211 2212 // Check whether the source area exists and is cloneable. If so, mark it 2213 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2214 { 2215 AddressSpaceWriteLocker locker; 2216 status_t status = locker.SetFromArea(sourceID, sourceArea); 2217 if (status != B_OK) 2218 return status; 2219 2220 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2221 return B_NOT_ALLOWED; 2222 2223 sourceArea->protection |= B_SHARED_AREA; 2224 protection |= B_SHARED_AREA; 2225 } 2226 2227 // Now lock both address spaces and actually do the cloning. 2228 2229 MultiAddressSpaceLocker locker; 2230 VMAddressSpace* sourceAddressSpace; 2231 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2232 if (status != B_OK) 2233 return status; 2234 2235 VMAddressSpace* targetAddressSpace; 2236 status = locker.AddTeam(team, true, &targetAddressSpace); 2237 if (status != B_OK) 2238 return status; 2239 2240 status = locker.Lock(); 2241 if (status != B_OK) 2242 return status; 2243 2244 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2245 if (sourceArea == NULL) 2246 return B_BAD_VALUE; 2247 2248 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2249 return B_NOT_ALLOWED; 2250 2251 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2252 2253 if (!kernel && sourceAddressSpace != targetAddressSpace 2254 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2255 #if KDEBUG 2256 Team* team = thread_get_current_thread()->team; 2257 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2258 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2259 #endif 2260 status = B_NOT_ALLOWED; 2261 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2262 status = B_NOT_ALLOWED; 2263 } else { 2264 virtual_address_restrictions addressRestrictions = {}; 2265 addressRestrictions.address = *address; 2266 addressRestrictions.address_specification = addressSpec; 2267 status = map_backing_store(targetAddressSpace, cache, 2268 sourceArea->cache_offset, name, sourceArea->Size(), 2269 sourceArea->wiring, protection, sourceArea->protection_max, 2270 mapping, 0, &addressRestrictions, 2271 kernel, &newArea, address); 2272 } 2273 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2274 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2275 // to create a new cache, and has therefore already acquired a reference 2276 // to the source cache - but otherwise it has no idea that we need 2277 // one. 2278 cache->AcquireRefLocked(); 2279 } 2280 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2281 // we need to map in everything at this point 2282 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2283 // we don't have actual pages to map but a physical area 2284 VMTranslationMap* map 2285 = sourceArea->address_space->TranslationMap(); 2286 map->Lock(); 2287 2288 phys_addr_t physicalAddress; 2289 uint32 oldProtection; 2290 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2291 2292 map->Unlock(); 2293 2294 map = targetAddressSpace->TranslationMap(); 2295 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2296 newArea->Base() + (newArea->Size() - 1)); 2297 2298 vm_page_reservation reservation; 2299 vm_page_reserve_pages(&reservation, reservePages, 2300 targetAddressSpace == VMAddressSpace::Kernel() 2301 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2302 map->Lock(); 2303 2304 for (addr_t offset = 0; offset < newArea->Size(); 2305 offset += B_PAGE_SIZE) { 2306 map->Map(newArea->Base() + offset, physicalAddress + offset, 2307 protection, newArea->MemoryType(), &reservation); 2308 } 2309 2310 map->Unlock(); 2311 vm_page_unreserve_pages(&reservation); 2312 } else { 2313 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2314 size_t reservePages = map->MaxPagesNeededToMap( 2315 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2316 vm_page_reservation reservation; 2317 vm_page_reserve_pages(&reservation, reservePages, 2318 targetAddressSpace == VMAddressSpace::Kernel() 2319 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2320 2321 // map in all pages from source 2322 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2323 vm_page* page = it.Next();) { 2324 if (!page->busy) { 2325 DEBUG_PAGE_ACCESS_START(page); 2326 map_page(newArea, page, 2327 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2328 - newArea->cache_offset), 2329 protection, &reservation); 2330 DEBUG_PAGE_ACCESS_END(page); 2331 } 2332 } 2333 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2334 // ensuring that! 2335 2336 vm_page_unreserve_pages(&reservation); 2337 } 2338 } 2339 if (status == B_OK) 2340 newArea->cache_type = sourceArea->cache_type; 2341 2342 vm_area_put_locked_cache(cache); 2343 2344 if (status < B_OK) 2345 return status; 2346 2347 return newArea->id; 2348 } 2349 2350 2351 /*! Deletes the specified area of the given address space. 2352 2353 The address space must be write-locked. 2354 The caller must ensure that the area does not have any wired ranges. 2355 2356 \param addressSpace The address space containing the area. 2357 \param area The area to be deleted. 2358 \param deletingAddressSpace \c true, if the address space is in the process 2359 of being deleted. 2360 */ 2361 static void 2362 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2363 bool deletingAddressSpace) 2364 { 2365 ASSERT(!area->IsWired()); 2366 2367 VMAreaHash::Remove(area); 2368 2369 // At this point the area is removed from the global hash table, but 2370 // still exists in the area list. 2371 2372 // Unmap the virtual address space the area occupied. 2373 { 2374 // We need to lock the complete cache chain. 2375 VMCache* topCache = vm_area_get_locked_cache(area); 2376 VMCacheChainLocker cacheChainLocker(topCache); 2377 cacheChainLocker.LockAllSourceCaches(); 2378 2379 // If the area's top cache is a temporary cache and the area is the only 2380 // one referencing it (besides us currently holding a second reference), 2381 // the unmapping code doesn't need to care about preserving the accessed 2382 // and dirty flags of the top cache page mappings. 2383 bool ignoreTopCachePageFlags 2384 = topCache->temporary && topCache->RefCount() == 2; 2385 2386 area->address_space->TranslationMap()->UnmapArea(area, 2387 deletingAddressSpace, ignoreTopCachePageFlags); 2388 } 2389 2390 if (!area->cache->temporary) 2391 area->cache->WriteModified(); 2392 2393 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2394 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2395 2396 arch_vm_unset_memory_type(area); 2397 addressSpace->RemoveArea(area, allocationFlags); 2398 addressSpace->Put(); 2399 2400 area->cache->RemoveArea(area); 2401 area->cache->ReleaseRef(); 2402 2403 addressSpace->DeleteArea(area, allocationFlags); 2404 } 2405 2406 2407 status_t 2408 vm_delete_area(team_id team, area_id id, bool kernel) 2409 { 2410 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2411 team, id)); 2412 2413 // lock the address space and make sure the area isn't wired 2414 AddressSpaceWriteLocker locker; 2415 VMArea* area; 2416 AreaCacheLocker cacheLocker; 2417 2418 do { 2419 status_t status = locker.SetFromArea(team, id, area); 2420 if (status != B_OK) 2421 return status; 2422 2423 cacheLocker.SetTo(area); 2424 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2425 2426 cacheLocker.Unlock(); 2427 2428 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2429 return B_NOT_ALLOWED; 2430 2431 delete_area(locker.AddressSpace(), area, false); 2432 return B_OK; 2433 } 2434 2435 2436 /*! Creates a new cache on top of given cache, moves all areas from 2437 the old cache to the new one, and changes the protection of all affected 2438 areas' pages to read-only. If requested, wired pages are moved up to the 2439 new cache and copies are added to the old cache in their place. 2440 Preconditions: 2441 - The given cache must be locked. 2442 - All of the cache's areas' address spaces must be read locked. 2443 - Either the cache must not have any wired ranges or a page reservation for 2444 all wired pages must be provided, so they can be copied. 2445 2446 \param lowerCache The cache on top of which a new cache shall be created. 2447 \param wiredPagesReservation If \c NULL there must not be any wired pages 2448 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2449 has wired page. The wired pages are copied in this case. 2450 */ 2451 static status_t 2452 vm_copy_on_write_area(VMCache* lowerCache, 2453 vm_page_reservation* wiredPagesReservation) 2454 { 2455 VMCache* upperCache; 2456 2457 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2458 2459 // We need to separate the cache from its areas. The cache goes one level 2460 // deeper and we create a new cache inbetween. 2461 2462 // create an anonymous cache 2463 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2464 lowerCache->GuardSize() / B_PAGE_SIZE, 2465 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2466 VM_PRIORITY_USER); 2467 if (status != B_OK) 2468 return status; 2469 2470 upperCache->Lock(); 2471 2472 upperCache->temporary = 1; 2473 upperCache->virtual_base = lowerCache->virtual_base; 2474 upperCache->virtual_end = lowerCache->virtual_end; 2475 2476 // transfer the lower cache areas to the upper cache 2477 rw_lock_write_lock(&sAreaCacheLock); 2478 upperCache->TransferAreas(lowerCache); 2479 rw_lock_write_unlock(&sAreaCacheLock); 2480 2481 lowerCache->AddConsumer(upperCache); 2482 2483 // We now need to remap all pages from all of the cache's areas read-only, 2484 // so that a copy will be created on next write access. If there are wired 2485 // pages, we keep their protection, move them to the upper cache and create 2486 // copies for the lower cache. 2487 if (wiredPagesReservation != NULL) { 2488 // We need to handle wired pages -- iterate through the cache's pages. 2489 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2490 vm_page* page = it.Next();) { 2491 if (page->WiredCount() > 0) { 2492 // allocate a new page and copy the wired one 2493 vm_page* copiedPage = vm_page_allocate_page( 2494 wiredPagesReservation, PAGE_STATE_ACTIVE); 2495 2496 vm_memcpy_physical_page( 2497 copiedPage->physical_page_number * B_PAGE_SIZE, 2498 page->physical_page_number * B_PAGE_SIZE); 2499 2500 // move the wired page to the upper cache (note: removing is OK 2501 // with the SplayTree iterator) and insert the copy 2502 upperCache->MovePage(page); 2503 lowerCache->InsertPage(copiedPage, 2504 page->cache_offset * B_PAGE_SIZE); 2505 2506 DEBUG_PAGE_ACCESS_END(copiedPage); 2507 } else { 2508 // Change the protection of this page in all areas. 2509 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2510 tempArea = tempArea->cache_next) { 2511 // The area must be readable in the same way it was 2512 // previously writable. 2513 uint32 protection = B_KERNEL_READ_AREA; 2514 if ((tempArea->protection & B_READ_AREA) != 0) 2515 protection |= B_READ_AREA; 2516 2517 VMTranslationMap* map 2518 = tempArea->address_space->TranslationMap(); 2519 map->Lock(); 2520 map->ProtectPage(tempArea, 2521 virtual_page_address(tempArea, page), protection); 2522 map->Unlock(); 2523 } 2524 } 2525 } 2526 } else { 2527 ASSERT(lowerCache->WiredPagesCount() == 0); 2528 2529 // just change the protection of all areas 2530 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2531 tempArea = tempArea->cache_next) { 2532 // The area must be readable in the same way it was previously 2533 // writable. 2534 uint32 protection = B_KERNEL_READ_AREA; 2535 if ((tempArea->protection & B_READ_AREA) != 0) 2536 protection |= B_READ_AREA; 2537 2538 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2539 map->Lock(); 2540 map->ProtectArea(tempArea, protection); 2541 map->Unlock(); 2542 } 2543 } 2544 2545 vm_area_put_locked_cache(upperCache); 2546 2547 return B_OK; 2548 } 2549 2550 2551 area_id 2552 vm_copy_area(team_id team, const char* name, void** _address, 2553 uint32 addressSpec, area_id sourceID) 2554 { 2555 // Do the locking: target address space, all address spaces associated with 2556 // the source cache, and the cache itself. 2557 MultiAddressSpaceLocker locker; 2558 VMAddressSpace* targetAddressSpace; 2559 VMCache* cache; 2560 VMArea* source; 2561 AreaCacheLocker cacheLocker; 2562 status_t status; 2563 bool sharedArea; 2564 2565 page_num_t wiredPages = 0; 2566 vm_page_reservation wiredPagesReservation; 2567 2568 bool restart; 2569 do { 2570 restart = false; 2571 2572 locker.Unset(); 2573 status = locker.AddTeam(team, true, &targetAddressSpace); 2574 if (status == B_OK) { 2575 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2576 &cache); 2577 } 2578 if (status != B_OK) 2579 return status; 2580 2581 cacheLocker.SetTo(cache, true); // already locked 2582 2583 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2584 2585 page_num_t oldWiredPages = wiredPages; 2586 wiredPages = 0; 2587 2588 // If the source area isn't shared, count the number of wired pages in 2589 // the cache and reserve as many pages. 2590 if (!sharedArea) { 2591 wiredPages = cache->WiredPagesCount(); 2592 2593 if (wiredPages > oldWiredPages) { 2594 cacheLocker.Unlock(); 2595 locker.Unlock(); 2596 2597 if (oldWiredPages > 0) 2598 vm_page_unreserve_pages(&wiredPagesReservation); 2599 2600 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2601 VM_PRIORITY_USER); 2602 2603 restart = true; 2604 } 2605 } else if (oldWiredPages > 0) 2606 vm_page_unreserve_pages(&wiredPagesReservation); 2607 } while (restart); 2608 2609 // unreserve pages later 2610 struct PagesUnreserver { 2611 PagesUnreserver(vm_page_reservation* reservation) 2612 : 2613 fReservation(reservation) 2614 { 2615 } 2616 2617 ~PagesUnreserver() 2618 { 2619 if (fReservation != NULL) 2620 vm_page_unreserve_pages(fReservation); 2621 } 2622 2623 private: 2624 vm_page_reservation* fReservation; 2625 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2626 2627 bool writableCopy 2628 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2629 uint8* targetPageProtections = NULL; 2630 2631 if (source->page_protections != NULL) { 2632 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2633 targetPageProtections = (uint8*)malloc_etc(bytes, 2634 (source->address_space == VMAddressSpace::Kernel() 2635 || targetAddressSpace == VMAddressSpace::Kernel()) 2636 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2637 if (targetPageProtections == NULL) 2638 return B_NO_MEMORY; 2639 2640 memcpy(targetPageProtections, source->page_protections, bytes); 2641 2642 if (!writableCopy) { 2643 for (size_t i = 0; i < bytes; i++) { 2644 if ((targetPageProtections[i] 2645 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2646 writableCopy = true; 2647 break; 2648 } 2649 } 2650 } 2651 } 2652 2653 if (addressSpec == B_CLONE_ADDRESS) { 2654 addressSpec = B_EXACT_ADDRESS; 2655 *_address = (void*)source->Base(); 2656 } 2657 2658 // First, create a cache on top of the source area, respectively use the 2659 // existing one, if this is a shared area. 2660 2661 VMArea* target; 2662 virtual_address_restrictions addressRestrictions = {}; 2663 addressRestrictions.address = *_address; 2664 addressRestrictions.address_specification = addressSpec; 2665 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2666 name, source->Size(), source->wiring, source->protection, 2667 source->protection_max, 2668 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2669 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2670 &addressRestrictions, true, &target, _address); 2671 if (status < B_OK) { 2672 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2673 return status; 2674 } 2675 2676 if (targetPageProtections != NULL) 2677 target->page_protections = targetPageProtections; 2678 2679 if (sharedArea) { 2680 // The new area uses the old area's cache, but map_backing_store() 2681 // hasn't acquired a ref. So we have to do that now. 2682 cache->AcquireRefLocked(); 2683 } 2684 2685 // If the source area is writable, we need to move it one layer up as well 2686 2687 if (!sharedArea) { 2688 if (writableCopy) { 2689 // TODO: do something more useful if this fails! 2690 if (vm_copy_on_write_area(cache, 2691 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2692 panic("vm_copy_on_write_area() failed!\n"); 2693 } 2694 } 2695 } 2696 2697 // we return the ID of the newly created area 2698 return target->id; 2699 } 2700 2701 2702 status_t 2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2704 bool kernel) 2705 { 2706 fix_protection(&newProtection); 2707 2708 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2709 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2710 2711 if (!arch_vm_supports_protection(newProtection)) 2712 return B_NOT_SUPPORTED; 2713 2714 bool becomesWritable 2715 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2716 2717 // lock address spaces and cache 2718 MultiAddressSpaceLocker locker; 2719 VMCache* cache; 2720 VMArea* area; 2721 status_t status; 2722 AreaCacheLocker cacheLocker; 2723 bool isWritable; 2724 2725 bool restart; 2726 do { 2727 restart = false; 2728 2729 locker.Unset(); 2730 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2731 if (status != B_OK) 2732 return status; 2733 2734 cacheLocker.SetTo(cache, true); // already locked 2735 2736 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2737 || (area->protection & B_KERNEL_AREA) != 0)) { 2738 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2739 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2740 " (%s)\n", team, newProtection, areaID, area->name); 2741 return B_NOT_ALLOWED; 2742 } 2743 if (!kernel && area->protection_max != 0 2744 && (newProtection & area->protection_max) 2745 != (newProtection & B_USER_PROTECTION)) { 2746 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2747 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2748 "area %" B_PRId32 " (%s)\n", team, newProtection, 2749 area->protection_max, areaID, area->name); 2750 return B_NOT_ALLOWED; 2751 } 2752 2753 if (area->protection == newProtection) 2754 return B_OK; 2755 2756 if (team != VMAddressSpace::KernelID() 2757 && area->address_space->ID() != team) { 2758 // unless you're the kernel, you are only allowed to set 2759 // the protection of your own areas 2760 return B_NOT_ALLOWED; 2761 } 2762 2763 isWritable 2764 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2765 2766 // Make sure the area (respectively, if we're going to call 2767 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2768 // wired ranges. 2769 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2770 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2771 otherArea = otherArea->cache_next) { 2772 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2773 restart = true; 2774 break; 2775 } 2776 } 2777 } else { 2778 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2779 restart = true; 2780 } 2781 } while (restart); 2782 2783 bool changePageProtection = true; 2784 bool changeTopCachePagesOnly = false; 2785 2786 if (isWritable && !becomesWritable) { 2787 // writable -> !writable 2788 2789 if (cache->source != NULL && cache->temporary) { 2790 if (cache->CountWritableAreas(area) == 0) { 2791 // Since this cache now lives from the pages in its source cache, 2792 // we can change the cache's commitment to take only those pages 2793 // into account that really are in this cache. 2794 2795 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2796 team == VMAddressSpace::KernelID() 2797 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2798 2799 // TODO: we may be able to join with our source cache, if 2800 // count == 0 2801 } 2802 } 2803 2804 // If only the writability changes, we can just remap the pages of the 2805 // top cache, since the pages of lower caches are mapped read-only 2806 // anyway. That's advantageous only, if the number of pages in the cache 2807 // is significantly smaller than the number of pages in the area, 2808 // though. 2809 if (newProtection 2810 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2811 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2812 changeTopCachePagesOnly = true; 2813 } 2814 } else if (!isWritable && becomesWritable) { 2815 // !writable -> writable 2816 2817 if (!cache->consumers.IsEmpty()) { 2818 // There are consumers -- we have to insert a new cache. Fortunately 2819 // vm_copy_on_write_area() does everything that's needed. 2820 changePageProtection = false; 2821 status = vm_copy_on_write_area(cache, NULL); 2822 } else { 2823 // No consumers, so we don't need to insert a new one. 2824 if (cache->source != NULL && cache->temporary) { 2825 // the cache's commitment must contain all possible pages 2826 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2827 team == VMAddressSpace::KernelID() 2828 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2829 } 2830 2831 if (status == B_OK && cache->source != NULL) { 2832 // There's a source cache, hence we can't just change all pages' 2833 // protection or we might allow writing into pages belonging to 2834 // a lower cache. 2835 changeTopCachePagesOnly = true; 2836 } 2837 } 2838 } else { 2839 // we don't have anything special to do in all other cases 2840 } 2841 2842 if (status == B_OK) { 2843 // remap existing pages in this cache 2844 if (changePageProtection) { 2845 VMTranslationMap* map = area->address_space->TranslationMap(); 2846 map->Lock(); 2847 2848 if (changeTopCachePagesOnly) { 2849 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2850 page_num_t lastPageOffset 2851 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2852 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2853 vm_page* page = it.Next();) { 2854 if (page->cache_offset >= firstPageOffset 2855 && page->cache_offset <= lastPageOffset) { 2856 addr_t address = virtual_page_address(area, page); 2857 map->ProtectPage(area, address, newProtection); 2858 } 2859 } 2860 } else 2861 map->ProtectArea(area, newProtection); 2862 2863 map->Unlock(); 2864 } 2865 2866 area->protection = newProtection; 2867 } 2868 2869 return status; 2870 } 2871 2872 2873 status_t 2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2875 { 2876 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2877 if (addressSpace == NULL) 2878 return B_BAD_TEAM_ID; 2879 2880 VMTranslationMap* map = addressSpace->TranslationMap(); 2881 2882 map->Lock(); 2883 uint32 dummyFlags; 2884 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2885 map->Unlock(); 2886 2887 addressSpace->Put(); 2888 return status; 2889 } 2890 2891 2892 /*! The page's cache must be locked. 2893 */ 2894 bool 2895 vm_test_map_modification(vm_page* page) 2896 { 2897 if (page->modified) 2898 return true; 2899 2900 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2901 vm_page_mapping* mapping; 2902 while ((mapping = iterator.Next()) != NULL) { 2903 VMArea* area = mapping->area; 2904 VMTranslationMap* map = area->address_space->TranslationMap(); 2905 2906 phys_addr_t physicalAddress; 2907 uint32 flags; 2908 map->Lock(); 2909 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2910 map->Unlock(); 2911 2912 if ((flags & PAGE_MODIFIED) != 0) 2913 return true; 2914 } 2915 2916 return false; 2917 } 2918 2919 2920 /*! The page's cache must be locked. 2921 */ 2922 void 2923 vm_clear_map_flags(vm_page* page, uint32 flags) 2924 { 2925 if ((flags & PAGE_ACCESSED) != 0) 2926 page->accessed = false; 2927 if ((flags & PAGE_MODIFIED) != 0) 2928 page->modified = false; 2929 2930 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2931 vm_page_mapping* mapping; 2932 while ((mapping = iterator.Next()) != NULL) { 2933 VMArea* area = mapping->area; 2934 VMTranslationMap* map = area->address_space->TranslationMap(); 2935 2936 map->Lock(); 2937 map->ClearFlags(virtual_page_address(area, page), flags); 2938 map->Unlock(); 2939 } 2940 } 2941 2942 2943 /*! Removes all mappings from a page. 2944 After you've called this function, the page is unmapped from memory and 2945 the page's \c accessed and \c modified flags have been updated according 2946 to the state of the mappings. 2947 The page's cache must be locked. 2948 */ 2949 void 2950 vm_remove_all_page_mappings(vm_page* page) 2951 { 2952 while (vm_page_mapping* mapping = page->mappings.Head()) { 2953 VMArea* area = mapping->area; 2954 VMTranslationMap* map = area->address_space->TranslationMap(); 2955 addr_t address = virtual_page_address(area, page); 2956 map->UnmapPage(area, address, false); 2957 } 2958 } 2959 2960 2961 int32 2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2963 { 2964 int32 count = 0; 2965 2966 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2967 vm_page_mapping* mapping; 2968 while ((mapping = iterator.Next()) != NULL) { 2969 VMArea* area = mapping->area; 2970 VMTranslationMap* map = area->address_space->TranslationMap(); 2971 2972 bool modified; 2973 if (map->ClearAccessedAndModified(area, 2974 virtual_page_address(area, page), false, modified)) { 2975 count++; 2976 } 2977 2978 page->modified |= modified; 2979 } 2980 2981 2982 if (page->accessed) { 2983 count++; 2984 page->accessed = false; 2985 } 2986 2987 return count; 2988 } 2989 2990 2991 /*! Removes all mappings of a page and/or clears the accessed bits of the 2992 mappings. 2993 The function iterates through the page mappings and removes them until 2994 encountering one that has been accessed. From then on it will continue to 2995 iterate, but only clear the accessed flag of the mapping. The page's 2996 \c modified bit will be updated accordingly, the \c accessed bit will be 2997 cleared. 2998 \return The number of mapping accessed bits encountered, including the 2999 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3000 of the page have been removed. 3001 */ 3002 int32 3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3004 { 3005 ASSERT(page->WiredCount() == 0); 3006 3007 if (page->accessed) 3008 return vm_clear_page_mapping_accessed_flags(page); 3009 3010 while (vm_page_mapping* mapping = page->mappings.Head()) { 3011 VMArea* area = mapping->area; 3012 VMTranslationMap* map = area->address_space->TranslationMap(); 3013 addr_t address = virtual_page_address(area, page); 3014 bool modified = false; 3015 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3016 page->accessed = true; 3017 page->modified |= modified; 3018 return vm_clear_page_mapping_accessed_flags(page); 3019 } 3020 page->modified |= modified; 3021 } 3022 3023 return 0; 3024 } 3025 3026 3027 static int 3028 display_mem(int argc, char** argv) 3029 { 3030 bool physical = false; 3031 addr_t copyAddress; 3032 int32 displayWidth; 3033 int32 itemSize; 3034 int32 num = -1; 3035 addr_t address; 3036 int i = 1, j; 3037 3038 if (argc > 1 && argv[1][0] == '-') { 3039 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3040 physical = true; 3041 i++; 3042 } else 3043 i = 99; 3044 } 3045 3046 if (argc < i + 1 || argc > i + 2) { 3047 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3048 "\tdl - 8 bytes\n" 3049 "\tdw - 4 bytes\n" 3050 "\tds - 2 bytes\n" 3051 "\tdb - 1 byte\n" 3052 "\tstring - a whole string\n" 3053 " -p or --physical only allows memory from a single page to be " 3054 "displayed.\n"); 3055 return 0; 3056 } 3057 3058 address = parse_expression(argv[i]); 3059 3060 if (argc > i + 1) 3061 num = parse_expression(argv[i + 1]); 3062 3063 // build the format string 3064 if (strcmp(argv[0], "db") == 0) { 3065 itemSize = 1; 3066 displayWidth = 16; 3067 } else if (strcmp(argv[0], "ds") == 0) { 3068 itemSize = 2; 3069 displayWidth = 8; 3070 } else if (strcmp(argv[0], "dw") == 0) { 3071 itemSize = 4; 3072 displayWidth = 4; 3073 } else if (strcmp(argv[0], "dl") == 0) { 3074 itemSize = 8; 3075 displayWidth = 2; 3076 } else if (strcmp(argv[0], "string") == 0) { 3077 itemSize = 1; 3078 displayWidth = -1; 3079 } else { 3080 kprintf("display_mem called in an invalid way!\n"); 3081 return 0; 3082 } 3083 3084 if (num <= 0) 3085 num = displayWidth; 3086 3087 void* physicalPageHandle = NULL; 3088 3089 if (physical) { 3090 int32 offset = address & (B_PAGE_SIZE - 1); 3091 if (num * itemSize + offset > B_PAGE_SIZE) { 3092 num = (B_PAGE_SIZE - offset) / itemSize; 3093 kprintf("NOTE: number of bytes has been cut to page size\n"); 3094 } 3095 3096 address = ROUNDDOWN(address, B_PAGE_SIZE); 3097 3098 if (vm_get_physical_page_debug(address, ©Address, 3099 &physicalPageHandle) != B_OK) { 3100 kprintf("getting the hardware page failed."); 3101 return 0; 3102 } 3103 3104 address += offset; 3105 copyAddress += offset; 3106 } else 3107 copyAddress = address; 3108 3109 if (!strcmp(argv[0], "string")) { 3110 kprintf("%p \"", (char*)copyAddress); 3111 3112 // string mode 3113 for (i = 0; true; i++) { 3114 char c; 3115 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3116 != B_OK 3117 || c == '\0') { 3118 break; 3119 } 3120 3121 if (c == '\n') 3122 kprintf("\\n"); 3123 else if (c == '\t') 3124 kprintf("\\t"); 3125 else { 3126 if (!isprint(c)) 3127 c = '.'; 3128 3129 kprintf("%c", c); 3130 } 3131 } 3132 3133 kprintf("\"\n"); 3134 } else { 3135 // number mode 3136 for (i = 0; i < num; i++) { 3137 uint64 value; 3138 3139 if ((i % displayWidth) == 0) { 3140 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3141 if (i != 0) 3142 kprintf("\n"); 3143 3144 kprintf("[0x%lx] ", address + i * itemSize); 3145 3146 for (j = 0; j < displayed; j++) { 3147 char c; 3148 if (debug_memcpy(B_CURRENT_TEAM, &c, 3149 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3150 displayed = j; 3151 break; 3152 } 3153 if (!isprint(c)) 3154 c = '.'; 3155 3156 kprintf("%c", c); 3157 } 3158 if (num > displayWidth) { 3159 // make sure the spacing in the last line is correct 3160 for (j = displayed; j < displayWidth * itemSize; j++) 3161 kprintf(" "); 3162 } 3163 kprintf(" "); 3164 } 3165 3166 if (debug_memcpy(B_CURRENT_TEAM, &value, 3167 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3168 kprintf("read fault"); 3169 break; 3170 } 3171 3172 switch (itemSize) { 3173 case 1: 3174 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3175 break; 3176 case 2: 3177 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3178 break; 3179 case 4: 3180 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3181 break; 3182 case 8: 3183 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3184 break; 3185 } 3186 } 3187 3188 kprintf("\n"); 3189 } 3190 3191 if (physical) { 3192 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3193 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3194 } 3195 return 0; 3196 } 3197 3198 3199 static void 3200 dump_cache_tree_recursively(VMCache* cache, int level, 3201 VMCache* highlightCache) 3202 { 3203 // print this cache 3204 for (int i = 0; i < level; i++) 3205 kprintf(" "); 3206 if (cache == highlightCache) 3207 kprintf("%p <--\n", cache); 3208 else 3209 kprintf("%p\n", cache); 3210 3211 // recursively print its consumers 3212 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3213 VMCache* consumer = it.Next();) { 3214 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3215 } 3216 } 3217 3218 3219 static int 3220 dump_cache_tree(int argc, char** argv) 3221 { 3222 if (argc != 2 || !strcmp(argv[1], "--help")) { 3223 kprintf("usage: %s <address>\n", argv[0]); 3224 return 0; 3225 } 3226 3227 addr_t address = parse_expression(argv[1]); 3228 if (address == 0) 3229 return 0; 3230 3231 VMCache* cache = (VMCache*)address; 3232 VMCache* root = cache; 3233 3234 // find the root cache (the transitive source) 3235 while (root->source != NULL) 3236 root = root->source; 3237 3238 dump_cache_tree_recursively(root, 0, cache); 3239 3240 return 0; 3241 } 3242 3243 3244 const char* 3245 vm_cache_type_to_string(int32 type) 3246 { 3247 switch (type) { 3248 case CACHE_TYPE_RAM: 3249 return "RAM"; 3250 case CACHE_TYPE_DEVICE: 3251 return "device"; 3252 case CACHE_TYPE_VNODE: 3253 return "vnode"; 3254 case CACHE_TYPE_NULL: 3255 return "null"; 3256 3257 default: 3258 return "unknown"; 3259 } 3260 } 3261 3262 3263 #if DEBUG_CACHE_LIST 3264 3265 static void 3266 update_cache_info_recursively(VMCache* cache, cache_info& info) 3267 { 3268 info.page_count += cache->page_count; 3269 if (cache->type == CACHE_TYPE_RAM) 3270 info.committed += cache->committed_size; 3271 3272 // recurse 3273 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3274 VMCache* consumer = it.Next();) { 3275 update_cache_info_recursively(consumer, info); 3276 } 3277 } 3278 3279 3280 static int 3281 cache_info_compare_page_count(const void* _a, const void* _b) 3282 { 3283 const cache_info* a = (const cache_info*)_a; 3284 const cache_info* b = (const cache_info*)_b; 3285 if (a->page_count == b->page_count) 3286 return 0; 3287 return a->page_count < b->page_count ? 1 : -1; 3288 } 3289 3290 3291 static int 3292 cache_info_compare_committed(const void* _a, const void* _b) 3293 { 3294 const cache_info* a = (const cache_info*)_a; 3295 const cache_info* b = (const cache_info*)_b; 3296 if (a->committed == b->committed) 3297 return 0; 3298 return a->committed < b->committed ? 1 : -1; 3299 } 3300 3301 3302 static void 3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3304 { 3305 for (int i = 0; i < level; i++) 3306 kprintf(" "); 3307 3308 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3309 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3310 cache->virtual_base, cache->virtual_end, cache->page_count); 3311 3312 if (level == 0) 3313 kprintf("/%lu", info.page_count); 3314 3315 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3316 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3317 3318 if (level == 0) 3319 kprintf("/%lu", info.committed); 3320 } 3321 3322 // areas 3323 if (cache->areas != NULL) { 3324 VMArea* area = cache->areas; 3325 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3326 area->name, area->address_space->ID()); 3327 3328 while (area->cache_next != NULL) { 3329 area = area->cache_next; 3330 kprintf(", %" B_PRId32, area->id); 3331 } 3332 } 3333 3334 kputs("\n"); 3335 3336 // recurse 3337 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3338 VMCache* consumer = it.Next();) { 3339 dump_caches_recursively(consumer, info, level + 1); 3340 } 3341 } 3342 3343 3344 static int 3345 dump_caches(int argc, char** argv) 3346 { 3347 if (sCacheInfoTable == NULL) { 3348 kprintf("No cache info table!\n"); 3349 return 0; 3350 } 3351 3352 bool sortByPageCount = true; 3353 3354 for (int32 i = 1; i < argc; i++) { 3355 if (strcmp(argv[i], "-c") == 0) { 3356 sortByPageCount = false; 3357 } else { 3358 print_debugger_command_usage(argv[0]); 3359 return 0; 3360 } 3361 } 3362 3363 uint32 totalCount = 0; 3364 uint32 rootCount = 0; 3365 off_t totalCommitted = 0; 3366 page_num_t totalPages = 0; 3367 3368 VMCache* cache = gDebugCacheList; 3369 while (cache) { 3370 totalCount++; 3371 if (cache->source == NULL) { 3372 cache_info stackInfo; 3373 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3374 ? sCacheInfoTable[rootCount] : stackInfo; 3375 rootCount++; 3376 info.cache = cache; 3377 info.page_count = 0; 3378 info.committed = 0; 3379 update_cache_info_recursively(cache, info); 3380 totalCommitted += info.committed; 3381 totalPages += info.page_count; 3382 } 3383 3384 cache = cache->debug_next; 3385 } 3386 3387 if (rootCount <= (uint32)kCacheInfoTableCount) { 3388 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3389 sortByPageCount 3390 ? &cache_info_compare_page_count 3391 : &cache_info_compare_committed); 3392 } 3393 3394 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3395 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3396 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3397 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3398 "page count" : "committed size"); 3399 3400 if (rootCount <= (uint32)kCacheInfoTableCount) { 3401 for (uint32 i = 0; i < rootCount; i++) { 3402 cache_info& info = sCacheInfoTable[i]; 3403 dump_caches_recursively(info.cache, info, 0); 3404 } 3405 } else 3406 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3407 3408 return 0; 3409 } 3410 3411 #endif // DEBUG_CACHE_LIST 3412 3413 3414 static int 3415 dump_cache(int argc, char** argv) 3416 { 3417 VMCache* cache; 3418 bool showPages = false; 3419 int i = 1; 3420 3421 if (argc < 2 || !strcmp(argv[1], "--help")) { 3422 kprintf("usage: %s [-ps] <address>\n" 3423 " if -p is specified, all pages are shown, if -s is used\n" 3424 " only the cache info is shown respectively.\n", argv[0]); 3425 return 0; 3426 } 3427 while (argv[i][0] == '-') { 3428 char* arg = argv[i] + 1; 3429 while (arg[0]) { 3430 if (arg[0] == 'p') 3431 showPages = true; 3432 arg++; 3433 } 3434 i++; 3435 } 3436 if (argv[i] == NULL) { 3437 kprintf("%s: invalid argument, pass address\n", argv[0]); 3438 return 0; 3439 } 3440 3441 addr_t address = parse_expression(argv[i]); 3442 if (address == 0) 3443 return 0; 3444 3445 cache = (VMCache*)address; 3446 3447 cache->Dump(showPages); 3448 3449 set_debug_variable("_sourceCache", (addr_t)cache->source); 3450 3451 return 0; 3452 } 3453 3454 3455 static void 3456 dump_area_struct(VMArea* area, bool mappings) 3457 { 3458 kprintf("AREA: %p\n", area); 3459 kprintf("name:\t\t'%s'\n", area->name); 3460 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3461 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3462 kprintf("base:\t\t0x%lx\n", area->Base()); 3463 kprintf("size:\t\t0x%lx\n", area->Size()); 3464 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3465 kprintf("page_protection:%p\n", area->page_protections); 3466 kprintf("wiring:\t\t0x%x\n", area->wiring); 3467 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3468 kprintf("cache:\t\t%p\n", area->cache); 3469 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3470 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3471 kprintf("cache_next:\t%p\n", area->cache_next); 3472 kprintf("cache_prev:\t%p\n", area->cache_prev); 3473 3474 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3475 if (mappings) { 3476 kprintf("page mappings:\n"); 3477 while (iterator.HasNext()) { 3478 vm_page_mapping* mapping = iterator.Next(); 3479 kprintf(" %p", mapping->page); 3480 } 3481 kprintf("\n"); 3482 } else { 3483 uint32 count = 0; 3484 while (iterator.Next() != NULL) { 3485 count++; 3486 } 3487 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3488 } 3489 } 3490 3491 3492 static int 3493 dump_area(int argc, char** argv) 3494 { 3495 bool mappings = false; 3496 bool found = false; 3497 int32 index = 1; 3498 VMArea* area; 3499 addr_t num; 3500 3501 if (argc < 2 || !strcmp(argv[1], "--help")) { 3502 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3503 "All areas matching either id/address/name are listed. You can\n" 3504 "force to check only a specific item by prefixing the specifier\n" 3505 "with the id/contains/address/name keywords.\n" 3506 "-m shows the area's mappings as well.\n"); 3507 return 0; 3508 } 3509 3510 if (!strcmp(argv[1], "-m")) { 3511 mappings = true; 3512 index++; 3513 } 3514 3515 int32 mode = 0xf; 3516 if (!strcmp(argv[index], "id")) 3517 mode = 1; 3518 else if (!strcmp(argv[index], "contains")) 3519 mode = 2; 3520 else if (!strcmp(argv[index], "name")) 3521 mode = 4; 3522 else if (!strcmp(argv[index], "address")) 3523 mode = 0; 3524 if (mode != 0xf) 3525 index++; 3526 3527 if (index >= argc) { 3528 kprintf("No area specifier given.\n"); 3529 return 0; 3530 } 3531 3532 num = parse_expression(argv[index]); 3533 3534 if (mode == 0) { 3535 dump_area_struct((struct VMArea*)num, mappings); 3536 } else { 3537 // walk through the area list, looking for the arguments as a name 3538 3539 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3540 while ((area = it.Next()) != NULL) { 3541 if (((mode & 4) != 0 3542 && !strcmp(argv[index], area->name)) 3543 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3544 || (((mode & 2) != 0 && area->Base() <= num 3545 && area->Base() + area->Size() > num))))) { 3546 dump_area_struct(area, mappings); 3547 found = true; 3548 } 3549 } 3550 3551 if (!found) 3552 kprintf("could not find area %s (%ld)\n", argv[index], num); 3553 } 3554 3555 return 0; 3556 } 3557 3558 3559 static int 3560 dump_area_list(int argc, char** argv) 3561 { 3562 VMArea* area; 3563 const char* name = NULL; 3564 int32 id = 0; 3565 3566 if (argc > 1) { 3567 id = parse_expression(argv[1]); 3568 if (id == 0) 3569 name = argv[1]; 3570 } 3571 3572 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3573 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3574 B_PRINTF_POINTER_WIDTH, "size"); 3575 3576 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3577 while ((area = it.Next()) != NULL) { 3578 if ((id != 0 && area->address_space->ID() != id) 3579 || (name != NULL && strstr(area->name, name) == NULL)) 3580 continue; 3581 3582 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3583 area->id, (void*)area->Base(), (void*)area->Size(), 3584 area->protection, area->wiring, area->name); 3585 } 3586 return 0; 3587 } 3588 3589 3590 static int 3591 dump_available_memory(int argc, char** argv) 3592 { 3593 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3594 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3595 return 0; 3596 } 3597 3598 3599 static int 3600 dump_mapping_info(int argc, char** argv) 3601 { 3602 bool reverseLookup = false; 3603 bool pageLookup = false; 3604 3605 int argi = 1; 3606 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3607 const char* arg = argv[argi]; 3608 if (strcmp(arg, "-r") == 0) { 3609 reverseLookup = true; 3610 } else if (strcmp(arg, "-p") == 0) { 3611 reverseLookup = true; 3612 pageLookup = true; 3613 } else { 3614 print_debugger_command_usage(argv[0]); 3615 return 0; 3616 } 3617 } 3618 3619 // We need at least one argument, the address. Optionally a thread ID can be 3620 // specified. 3621 if (argi >= argc || argi + 2 < argc) { 3622 print_debugger_command_usage(argv[0]); 3623 return 0; 3624 } 3625 3626 uint64 addressValue; 3627 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3628 return 0; 3629 3630 Team* team = NULL; 3631 if (argi < argc) { 3632 uint64 threadID; 3633 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3634 return 0; 3635 3636 Thread* thread = Thread::GetDebug(threadID); 3637 if (thread == NULL) { 3638 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3639 return 0; 3640 } 3641 3642 team = thread->team; 3643 } 3644 3645 if (reverseLookup) { 3646 phys_addr_t physicalAddress; 3647 if (pageLookup) { 3648 vm_page* page = (vm_page*)(addr_t)addressValue; 3649 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3650 } else { 3651 physicalAddress = (phys_addr_t)addressValue; 3652 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3653 } 3654 3655 kprintf(" Team Virtual Address Area\n"); 3656 kprintf("--------------------------------------\n"); 3657 3658 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3659 Callback() 3660 : 3661 fAddressSpace(NULL) 3662 { 3663 } 3664 3665 void SetAddressSpace(VMAddressSpace* addressSpace) 3666 { 3667 fAddressSpace = addressSpace; 3668 } 3669 3670 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3671 { 3672 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3673 virtualAddress); 3674 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3675 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3676 else 3677 kprintf("\n"); 3678 return false; 3679 } 3680 3681 private: 3682 VMAddressSpace* fAddressSpace; 3683 } callback; 3684 3685 if (team != NULL) { 3686 // team specified -- get its address space 3687 VMAddressSpace* addressSpace = team->address_space; 3688 if (addressSpace == NULL) { 3689 kprintf("Failed to get address space!\n"); 3690 return 0; 3691 } 3692 3693 callback.SetAddressSpace(addressSpace); 3694 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3695 physicalAddress, callback); 3696 } else { 3697 // no team specified -- iterate through all address spaces 3698 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3699 addressSpace != NULL; 3700 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3701 callback.SetAddressSpace(addressSpace); 3702 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3703 physicalAddress, callback); 3704 } 3705 } 3706 } else { 3707 // get the address space 3708 addr_t virtualAddress = (addr_t)addressValue; 3709 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3710 VMAddressSpace* addressSpace; 3711 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3712 addressSpace = VMAddressSpace::Kernel(); 3713 } else if (team != NULL) { 3714 addressSpace = team->address_space; 3715 } else { 3716 Thread* thread = debug_get_debugged_thread(); 3717 if (thread == NULL || thread->team == NULL) { 3718 kprintf("Failed to get team!\n"); 3719 return 0; 3720 } 3721 3722 addressSpace = thread->team->address_space; 3723 } 3724 3725 if (addressSpace == NULL) { 3726 kprintf("Failed to get address space!\n"); 3727 return 0; 3728 } 3729 3730 // let the translation map implementation do the job 3731 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3732 } 3733 3734 return 0; 3735 } 3736 3737 3738 /*! Deletes all areas and reserved regions in the given address space. 3739 3740 The caller must ensure that none of the areas has any wired ranges. 3741 3742 \param addressSpace The address space. 3743 \param deletingAddressSpace \c true, if the address space is in the process 3744 of being deleted. 3745 */ 3746 void 3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3748 { 3749 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3750 addressSpace->ID())); 3751 3752 addressSpace->WriteLock(); 3753 3754 // remove all reserved areas in this address space 3755 addressSpace->UnreserveAllAddressRanges(0); 3756 3757 // delete all the areas in this address space 3758 while (VMArea* area = addressSpace->FirstArea()) { 3759 ASSERT(!area->IsWired()); 3760 delete_area(addressSpace, area, deletingAddressSpace); 3761 } 3762 3763 addressSpace->WriteUnlock(); 3764 } 3765 3766 3767 static area_id 3768 vm_area_for(addr_t address, bool kernel) 3769 { 3770 team_id team; 3771 if (IS_USER_ADDRESS(address)) { 3772 // we try the user team address space, if any 3773 team = VMAddressSpace::CurrentID(); 3774 if (team < 0) 3775 return team; 3776 } else 3777 team = VMAddressSpace::KernelID(); 3778 3779 AddressSpaceReadLocker locker(team); 3780 if (!locker.IsLocked()) 3781 return B_BAD_TEAM_ID; 3782 3783 VMArea* area = locker.AddressSpace()->LookupArea(address); 3784 if (area != NULL) { 3785 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3786 return B_ERROR; 3787 3788 return area->id; 3789 } 3790 3791 return B_ERROR; 3792 } 3793 3794 3795 /*! Frees physical pages that were used during the boot process. 3796 \a end is inclusive. 3797 */ 3798 static void 3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3800 { 3801 // free all physical pages in the specified range 3802 3803 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3804 phys_addr_t physicalAddress; 3805 uint32 flags; 3806 3807 if (map->Query(current, &physicalAddress, &flags) == B_OK 3808 && (flags & PAGE_PRESENT) != 0) { 3809 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3810 if (page != NULL && page->State() != PAGE_STATE_FREE 3811 && page->State() != PAGE_STATE_CLEAR 3812 && page->State() != PAGE_STATE_UNUSED) { 3813 DEBUG_PAGE_ACCESS_START(page); 3814 vm_page_set_state(page, PAGE_STATE_FREE); 3815 } 3816 } 3817 } 3818 3819 // unmap the memory 3820 map->Unmap(start, end); 3821 } 3822 3823 3824 void 3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3826 { 3827 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3828 addr_t end = start + (size - 1); 3829 addr_t lastEnd = start; 3830 3831 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3832 (void*)start, (void*)end)); 3833 3834 // The areas are sorted in virtual address space order, so 3835 // we just have to find the holes between them that fall 3836 // into the area we should dispose 3837 3838 map->Lock(); 3839 3840 for (VMAddressSpace::AreaIterator it 3841 = VMAddressSpace::Kernel()->GetAreaIterator(); 3842 VMArea* area = it.Next();) { 3843 addr_t areaStart = area->Base(); 3844 addr_t areaEnd = areaStart + (area->Size() - 1); 3845 3846 if (areaEnd < start) 3847 continue; 3848 3849 if (areaStart > end) { 3850 // we are done, the area is already beyond of what we have to free 3851 break; 3852 } 3853 3854 if (areaStart > lastEnd) { 3855 // this is something we can free 3856 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3857 (void*)areaStart)); 3858 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3859 } 3860 3861 if (areaEnd >= end) { 3862 lastEnd = areaEnd; 3863 // no +1 to prevent potential overflow 3864 break; 3865 } 3866 3867 lastEnd = areaEnd + 1; 3868 } 3869 3870 if (lastEnd < end) { 3871 // we can also get rid of some space at the end of the area 3872 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3873 (void*)end)); 3874 unmap_and_free_physical_pages(map, lastEnd, end); 3875 } 3876 3877 map->Unlock(); 3878 } 3879 3880 3881 static void 3882 create_preloaded_image_areas(struct preloaded_image* _image) 3883 { 3884 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3885 char name[B_OS_NAME_LENGTH]; 3886 void* address; 3887 int32 length; 3888 3889 // use file name to create a good area name 3890 char* fileName = strrchr(image->name, '/'); 3891 if (fileName == NULL) 3892 fileName = image->name; 3893 else 3894 fileName++; 3895 3896 length = strlen(fileName); 3897 // make sure there is enough space for the suffix 3898 if (length > 25) 3899 length = 25; 3900 3901 memcpy(name, fileName, length); 3902 strcpy(name + length, "_text"); 3903 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3904 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3905 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3906 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3907 // this will later be remapped read-only/executable by the 3908 // ELF initialization code 3909 3910 strcpy(name + length, "_data"); 3911 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3912 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3913 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3914 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3915 } 3916 3917 3918 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3919 Any boot loader resources contained in that arguments must not be accessed 3920 anymore past this point. 3921 */ 3922 void 3923 vm_free_kernel_args(kernel_args* args) 3924 { 3925 uint32 i; 3926 3927 TRACE(("vm_free_kernel_args()\n")); 3928 3929 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3930 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3931 if (area >= B_OK) 3932 delete_area(area); 3933 } 3934 } 3935 3936 3937 static void 3938 allocate_kernel_args(kernel_args* args) 3939 { 3940 TRACE(("allocate_kernel_args()\n")); 3941 3942 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3943 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3944 3945 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3946 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3947 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3948 } 3949 } 3950 3951 3952 static void 3953 unreserve_boot_loader_ranges(kernel_args* args) 3954 { 3955 TRACE(("unreserve_boot_loader_ranges()\n")); 3956 3957 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3958 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3959 (void*)(addr_t)args->virtual_allocated_range[i].start, 3960 args->virtual_allocated_range[i].size); 3961 } 3962 } 3963 3964 3965 static void 3966 reserve_boot_loader_ranges(kernel_args* args) 3967 { 3968 TRACE(("reserve_boot_loader_ranges()\n")); 3969 3970 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3971 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3972 3973 // If the address is no kernel address, we just skip it. The 3974 // architecture specific code has to deal with it. 3975 if (!IS_KERNEL_ADDRESS(address)) { 3976 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3977 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3978 continue; 3979 } 3980 3981 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3982 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3983 if (status < B_OK) 3984 panic("could not reserve boot loader ranges\n"); 3985 } 3986 } 3987 3988 3989 static addr_t 3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3991 { 3992 size = PAGE_ALIGN(size); 3993 3994 // find a slot in the virtual allocation addr range 3995 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3996 // check to see if the space between this one and the last is big enough 3997 addr_t rangeStart = args->virtual_allocated_range[i].start; 3998 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3999 + args->virtual_allocated_range[i - 1].size; 4000 4001 addr_t base = alignment > 0 4002 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4003 4004 if (base >= KERNEL_BASE && base < rangeStart 4005 && rangeStart - base >= size) { 4006 args->virtual_allocated_range[i - 1].size 4007 += base + size - previousRangeEnd; 4008 return base; 4009 } 4010 } 4011 4012 // we hadn't found one between allocation ranges. this is ok. 4013 // see if there's a gap after the last one 4014 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4015 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4016 + args->virtual_allocated_range[lastEntryIndex].size; 4017 addr_t base = alignment > 0 4018 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4019 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4020 args->virtual_allocated_range[lastEntryIndex].size 4021 += base + size - lastRangeEnd; 4022 return base; 4023 } 4024 4025 // see if there's a gap before the first one 4026 addr_t rangeStart = args->virtual_allocated_range[0].start; 4027 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4028 base = rangeStart - size; 4029 if (alignment > 0) 4030 base = ROUNDDOWN(base, alignment); 4031 4032 if (base >= KERNEL_BASE) { 4033 args->virtual_allocated_range[0].start = base; 4034 args->virtual_allocated_range[0].size += rangeStart - base; 4035 return base; 4036 } 4037 } 4038 4039 return 0; 4040 } 4041 4042 4043 static bool 4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4045 { 4046 // TODO: horrible brute-force method of determining if the page can be 4047 // allocated 4048 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4049 if (address >= args->physical_memory_range[i].start 4050 && address < args->physical_memory_range[i].start 4051 + args->physical_memory_range[i].size) 4052 return true; 4053 } 4054 return false; 4055 } 4056 4057 4058 page_num_t 4059 vm_allocate_early_physical_page(kernel_args* args) 4060 { 4061 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4062 phys_addr_t nextPage; 4063 4064 nextPage = args->physical_allocated_range[i].start 4065 + args->physical_allocated_range[i].size; 4066 // see if the page after the next allocated paddr run can be allocated 4067 if (i + 1 < args->num_physical_allocated_ranges 4068 && args->physical_allocated_range[i + 1].size != 0) { 4069 // see if the next page will collide with the next allocated range 4070 if (nextPage >= args->physical_allocated_range[i+1].start) 4071 continue; 4072 } 4073 // see if the next physical page fits in the memory block 4074 if (is_page_in_physical_memory_range(args, nextPage)) { 4075 // we got one! 4076 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4077 return nextPage / B_PAGE_SIZE; 4078 } 4079 } 4080 4081 // Expanding upwards didn't work, try going downwards. 4082 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4083 phys_addr_t nextPage; 4084 4085 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4086 // see if the page after the prev allocated paddr run can be allocated 4087 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4088 // see if the next page will collide with the next allocated range 4089 if (nextPage < args->physical_allocated_range[i-1].start 4090 + args->physical_allocated_range[i-1].size) 4091 continue; 4092 } 4093 // see if the next physical page fits in the memory block 4094 if (is_page_in_physical_memory_range(args, nextPage)) { 4095 // we got one! 4096 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4097 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4098 return nextPage / B_PAGE_SIZE; 4099 } 4100 } 4101 4102 return 0; 4103 // could not allocate a block 4104 } 4105 4106 4107 /*! This one uses the kernel_args' physical and virtual memory ranges to 4108 allocate some pages before the VM is completely up. 4109 */ 4110 addr_t 4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4112 uint32 attributes, addr_t alignment) 4113 { 4114 if (physicalSize > virtualSize) 4115 physicalSize = virtualSize; 4116 4117 // find the vaddr to allocate at 4118 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4119 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4120 if (virtualBase == 0) { 4121 panic("vm_allocate_early: could not allocate virtual address\n"); 4122 return 0; 4123 } 4124 4125 // map the pages 4126 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4127 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4128 if (physicalAddress == 0) 4129 panic("error allocating early page!\n"); 4130 4131 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4132 4133 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4134 physicalAddress * B_PAGE_SIZE, attributes, 4135 &vm_allocate_early_physical_page); 4136 } 4137 4138 return virtualBase; 4139 } 4140 4141 4142 /*! The main entrance point to initialize the VM. */ 4143 status_t 4144 vm_init(kernel_args* args) 4145 { 4146 struct preloaded_image* image; 4147 void* address; 4148 status_t err = 0; 4149 uint32 i; 4150 4151 TRACE(("vm_init: entry\n")); 4152 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4153 err = arch_vm_init(args); 4154 4155 // initialize some globals 4156 vm_page_init_num_pages(args); 4157 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4158 4159 slab_init(args); 4160 4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4162 off_t heapSize = INITIAL_HEAP_SIZE; 4163 // try to accomodate low memory systems 4164 while (heapSize > sAvailableMemory / 8) 4165 heapSize /= 2; 4166 if (heapSize < 1024 * 1024) 4167 panic("vm_init: go buy some RAM please."); 4168 4169 // map in the new heap and initialize it 4170 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4171 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4172 TRACE(("heap at 0x%lx\n", heapBase)); 4173 heap_init(heapBase, heapSize); 4174 #endif 4175 4176 // initialize the free page list and physical page mapper 4177 vm_page_init(args); 4178 4179 // initialize the cache allocators 4180 vm_cache_init(args); 4181 4182 { 4183 status_t error = VMAreaHash::Init(); 4184 if (error != B_OK) 4185 panic("vm_init: error initializing area hash table\n"); 4186 } 4187 4188 VMAddressSpace::Init(); 4189 reserve_boot_loader_ranges(args); 4190 4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4192 heap_init_post_area(); 4193 #endif 4194 4195 // Do any further initialization that the architecture dependant layers may 4196 // need now 4197 arch_vm_translation_map_init_post_area(args); 4198 arch_vm_init_post_area(args); 4199 vm_page_init_post_area(args); 4200 slab_init_post_area(); 4201 4202 // allocate areas to represent stuff that already exists 4203 4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4205 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4206 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4207 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4208 #endif 4209 4210 allocate_kernel_args(args); 4211 4212 create_preloaded_image_areas(args->kernel_image); 4213 4214 // allocate areas for preloaded images 4215 for (image = args->preloaded_images; image != NULL; image = image->next) 4216 create_preloaded_image_areas(image); 4217 4218 // allocate kernel stacks 4219 for (i = 0; i < args->num_cpus; i++) { 4220 char name[64]; 4221 4222 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4223 address = (void*)args->cpu_kstack[i].start; 4224 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4225 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4226 } 4227 4228 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4229 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4230 4231 #if PARANOID_KERNEL_MALLOC 4232 vm_block_address_range("uninitialized heap memory", 4233 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4234 #endif 4235 #if PARANOID_KERNEL_FREE 4236 vm_block_address_range("freed heap memory", 4237 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4238 #endif 4239 4240 // create the object cache for the page mappings 4241 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4242 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4243 NULL, NULL); 4244 if (gPageMappingsObjectCache == NULL) 4245 panic("failed to create page mappings object cache"); 4246 4247 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4248 4249 #if DEBUG_CACHE_LIST 4250 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4251 virtual_address_restrictions virtualRestrictions = {}; 4252 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4253 physical_address_restrictions physicalRestrictions = {}; 4254 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4255 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4256 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4257 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4258 &physicalRestrictions, (void**)&sCacheInfoTable); 4259 } 4260 #endif // DEBUG_CACHE_LIST 4261 4262 // add some debugger commands 4263 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4264 add_debugger_command("area", &dump_area, 4265 "Dump info about a particular area"); 4266 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4267 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4268 #if DEBUG_CACHE_LIST 4269 if (sCacheInfoTable != NULL) { 4270 add_debugger_command_etc("caches", &dump_caches, 4271 "List all VMCache trees", 4272 "[ \"-c\" ]\n" 4273 "All cache trees are listed sorted in decreasing order by number " 4274 "of\n" 4275 "used pages or, if \"-c\" is specified, by size of committed " 4276 "memory.\n", 4277 0); 4278 } 4279 #endif 4280 add_debugger_command("avail", &dump_available_memory, 4281 "Dump available memory"); 4282 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4283 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4284 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4285 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4286 add_debugger_command("string", &display_mem, "dump strings"); 4287 4288 add_debugger_command_etc("mapping", &dump_mapping_info, 4289 "Print address mapping information", 4290 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4291 "Prints low-level page mapping information for a given address. If\n" 4292 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4293 "address that is looked up in the translation map of the current\n" 4294 "team, respectively the team specified by thread ID <thread ID>. If\n" 4295 "\"-r\" is specified, <address> is a physical address that is\n" 4296 "searched in the translation map of all teams, respectively the team\n" 4297 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4298 "<address> is the address of a vm_page structure. The behavior is\n" 4299 "equivalent to specifying \"-r\" with the physical address of that\n" 4300 "page.\n", 4301 0); 4302 4303 TRACE(("vm_init: exit\n")); 4304 4305 vm_cache_init_post_heap(); 4306 4307 return err; 4308 } 4309 4310 4311 status_t 4312 vm_init_post_sem(kernel_args* args) 4313 { 4314 // This frees all unused boot loader resources and makes its space available 4315 // again 4316 arch_vm_init_end(args); 4317 unreserve_boot_loader_ranges(args); 4318 4319 // fill in all of the semaphores that were not allocated before 4320 // since we're still single threaded and only the kernel address space 4321 // exists, it isn't that hard to find all of the ones we need to create 4322 4323 arch_vm_translation_map_init_post_sem(args); 4324 4325 slab_init_post_sem(); 4326 4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4328 heap_init_post_sem(); 4329 #endif 4330 4331 return B_OK; 4332 } 4333 4334 4335 status_t 4336 vm_init_post_thread(kernel_args* args) 4337 { 4338 vm_page_init_post_thread(args); 4339 slab_init_post_thread(); 4340 return heap_init_post_thread(); 4341 } 4342 4343 4344 status_t 4345 vm_init_post_modules(kernel_args* args) 4346 { 4347 return arch_vm_init_post_modules(args); 4348 } 4349 4350 4351 void 4352 permit_page_faults(void) 4353 { 4354 Thread* thread = thread_get_current_thread(); 4355 if (thread != NULL) 4356 atomic_add(&thread->page_faults_allowed, 1); 4357 } 4358 4359 4360 void 4361 forbid_page_faults(void) 4362 { 4363 Thread* thread = thread_get_current_thread(); 4364 if (thread != NULL) 4365 atomic_add(&thread->page_faults_allowed, -1); 4366 } 4367 4368 4369 status_t 4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4371 bool isUser, addr_t* newIP) 4372 { 4373 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4374 faultAddress)); 4375 4376 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4377 4378 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4379 VMAddressSpace* addressSpace = NULL; 4380 4381 status_t status = B_OK; 4382 *newIP = 0; 4383 atomic_add((int32*)&sPageFaults, 1); 4384 4385 if (IS_KERNEL_ADDRESS(pageAddress)) { 4386 addressSpace = VMAddressSpace::GetKernel(); 4387 } else if (IS_USER_ADDRESS(pageAddress)) { 4388 addressSpace = VMAddressSpace::GetCurrent(); 4389 if (addressSpace == NULL) { 4390 if (!isUser) { 4391 dprintf("vm_page_fault: kernel thread accessing invalid user " 4392 "memory!\n"); 4393 status = B_BAD_ADDRESS; 4394 TPF(PageFaultError(-1, 4395 VMPageFaultTracing 4396 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4397 } else { 4398 // XXX weird state. 4399 panic("vm_page_fault: non kernel thread accessing user memory " 4400 "that doesn't exist!\n"); 4401 status = B_BAD_ADDRESS; 4402 } 4403 } 4404 } else { 4405 // the hit was probably in the 64k DMZ between kernel and user space 4406 // this keeps a user space thread from passing a buffer that crosses 4407 // into kernel space 4408 status = B_BAD_ADDRESS; 4409 TPF(PageFaultError(-1, 4410 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4411 } 4412 4413 if (status == B_OK) { 4414 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4415 isUser, NULL); 4416 } 4417 4418 if (status < B_OK) { 4419 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4420 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4421 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4422 thread_get_current_thread_id()); 4423 if (!isUser) { 4424 Thread* thread = thread_get_current_thread(); 4425 if (thread != NULL && thread->fault_handler != 0) { 4426 // this will cause the arch dependant page fault handler to 4427 // modify the IP on the interrupt frame or whatever to return 4428 // to this address 4429 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4430 } else { 4431 // unhandled page fault in the kernel 4432 panic("vm_page_fault: unhandled page fault in kernel space at " 4433 "0x%lx, ip 0x%lx\n", address, faultAddress); 4434 } 4435 } else { 4436 Thread* thread = thread_get_current_thread(); 4437 4438 #ifdef TRACE_FAULTS 4439 VMArea* area = NULL; 4440 if (addressSpace != NULL) { 4441 addressSpace->ReadLock(); 4442 area = addressSpace->LookupArea(faultAddress); 4443 } 4444 4445 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4446 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4447 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4448 thread->team->Name(), thread->team->id, 4449 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4450 faultAddress, area ? area->name : "???", faultAddress - (area ? 4451 area->Base() : 0x0)); 4452 4453 if (addressSpace != NULL) 4454 addressSpace->ReadUnlock(); 4455 #endif 4456 4457 // If the thread has a signal handler for SIGSEGV, we simply 4458 // send it the signal. Otherwise we notify the user debugger 4459 // first. 4460 struct sigaction action; 4461 if ((sigaction(SIGSEGV, NULL, &action) == 0 4462 && action.sa_handler != SIG_DFL 4463 && action.sa_handler != SIG_IGN) 4464 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4465 SIGSEGV)) { 4466 Signal signal(SIGSEGV, 4467 status == B_PERMISSION_DENIED 4468 ? SEGV_ACCERR : SEGV_MAPERR, 4469 EFAULT, thread->team->id); 4470 signal.SetAddress((void*)address); 4471 send_signal_to_thread(thread, signal, 0); 4472 } 4473 } 4474 } 4475 4476 if (addressSpace != NULL) 4477 addressSpace->Put(); 4478 4479 return B_HANDLED_INTERRUPT; 4480 } 4481 4482 4483 struct PageFaultContext { 4484 AddressSpaceReadLocker addressSpaceLocker; 4485 VMCacheChainLocker cacheChainLocker; 4486 4487 VMTranslationMap* map; 4488 VMCache* topCache; 4489 off_t cacheOffset; 4490 vm_page_reservation reservation; 4491 bool isWrite; 4492 4493 // return values 4494 vm_page* page; 4495 bool restart; 4496 bool pageAllocated; 4497 4498 4499 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4500 : 4501 addressSpaceLocker(addressSpace, true), 4502 map(addressSpace->TranslationMap()), 4503 isWrite(isWrite) 4504 { 4505 } 4506 4507 ~PageFaultContext() 4508 { 4509 UnlockAll(); 4510 vm_page_unreserve_pages(&reservation); 4511 } 4512 4513 void Prepare(VMCache* topCache, off_t cacheOffset) 4514 { 4515 this->topCache = topCache; 4516 this->cacheOffset = cacheOffset; 4517 page = NULL; 4518 restart = false; 4519 pageAllocated = false; 4520 4521 cacheChainLocker.SetTo(topCache); 4522 } 4523 4524 void UnlockAll(VMCache* exceptCache = NULL) 4525 { 4526 topCache = NULL; 4527 addressSpaceLocker.Unlock(); 4528 cacheChainLocker.Unlock(exceptCache); 4529 } 4530 }; 4531 4532 4533 /*! Gets the page that should be mapped into the area. 4534 Returns an error code other than \c B_OK, if the page couldn't be found or 4535 paged in. The locking state of the address space and the caches is undefined 4536 in that case. 4537 Returns \c B_OK with \c context.restart set to \c true, if the functions 4538 had to unlock the address space and all caches and is supposed to be called 4539 again. 4540 Returns \c B_OK with \c context.restart set to \c false, if the page was 4541 found. It is returned in \c context.page. The address space will still be 4542 locked as well as all caches starting from the top cache to at least the 4543 cache the page lives in. 4544 */ 4545 static status_t 4546 fault_get_page(PageFaultContext& context) 4547 { 4548 VMCache* cache = context.topCache; 4549 VMCache* lastCache = NULL; 4550 vm_page* page = NULL; 4551 4552 while (cache != NULL) { 4553 // We already hold the lock of the cache at this point. 4554 4555 lastCache = cache; 4556 4557 page = cache->LookupPage(context.cacheOffset); 4558 if (page != NULL && page->busy) { 4559 // page must be busy -- wait for it to become unbusy 4560 context.UnlockAll(cache); 4561 cache->ReleaseRefLocked(); 4562 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4563 4564 // restart the whole process 4565 context.restart = true; 4566 return B_OK; 4567 } 4568 4569 if (page != NULL) 4570 break; 4571 4572 // The current cache does not contain the page we're looking for. 4573 4574 // see if the backing store has it 4575 if (cache->HasPage(context.cacheOffset)) { 4576 // insert a fresh page and mark it busy -- we're going to read it in 4577 page = vm_page_allocate_page(&context.reservation, 4578 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4579 cache->InsertPage(page, context.cacheOffset); 4580 4581 // We need to unlock all caches and the address space while reading 4582 // the page in. Keep a reference to the cache around. 4583 cache->AcquireRefLocked(); 4584 context.UnlockAll(); 4585 4586 // read the page in 4587 generic_io_vec vec; 4588 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4589 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4590 4591 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4592 B_PHYSICAL_IO_REQUEST, &bytesRead); 4593 4594 cache->Lock(); 4595 4596 if (status < B_OK) { 4597 // on error remove and free the page 4598 dprintf("reading page from cache %p returned: %s!\n", 4599 cache, strerror(status)); 4600 4601 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4602 cache->RemovePage(page); 4603 vm_page_set_state(page, PAGE_STATE_FREE); 4604 4605 cache->ReleaseRefAndUnlock(); 4606 return status; 4607 } 4608 4609 // mark the page unbusy again 4610 cache->MarkPageUnbusy(page); 4611 4612 DEBUG_PAGE_ACCESS_END(page); 4613 4614 // Since we needed to unlock everything temporarily, the area 4615 // situation might have changed. So we need to restart the whole 4616 // process. 4617 cache->ReleaseRefAndUnlock(); 4618 context.restart = true; 4619 return B_OK; 4620 } 4621 4622 cache = context.cacheChainLocker.LockSourceCache(); 4623 } 4624 4625 if (page == NULL) { 4626 // There was no adequate page, determine the cache for a clean one. 4627 // Read-only pages come in the deepest cache, only the top most cache 4628 // may have direct write access. 4629 cache = context.isWrite ? context.topCache : lastCache; 4630 4631 // allocate a clean page 4632 page = vm_page_allocate_page(&context.reservation, 4633 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4634 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4635 page->physical_page_number)); 4636 4637 // insert the new page into our cache 4638 cache->InsertPage(page, context.cacheOffset); 4639 context.pageAllocated = true; 4640 } else if (page->Cache() != context.topCache && context.isWrite) { 4641 // We have a page that has the data we want, but in the wrong cache 4642 // object so we need to copy it and stick it into the top cache. 4643 vm_page* sourcePage = page; 4644 4645 // TODO: If memory is low, it might be a good idea to steal the page 4646 // from our source cache -- if possible, that is. 4647 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4648 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4649 4650 // To not needlessly kill concurrency we unlock all caches but the top 4651 // one while copying the page. Lacking another mechanism to ensure that 4652 // the source page doesn't disappear, we mark it busy. 4653 sourcePage->busy = true; 4654 context.cacheChainLocker.UnlockKeepRefs(true); 4655 4656 // copy the page 4657 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4658 sourcePage->physical_page_number * B_PAGE_SIZE); 4659 4660 context.cacheChainLocker.RelockCaches(true); 4661 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4662 4663 // insert the new page into our cache 4664 context.topCache->InsertPage(page, context.cacheOffset); 4665 context.pageAllocated = true; 4666 } else 4667 DEBUG_PAGE_ACCESS_START(page); 4668 4669 context.page = page; 4670 return B_OK; 4671 } 4672 4673 4674 /*! Makes sure the address in the given address space is mapped. 4675 4676 \param addressSpace The address space. 4677 \param originalAddress The address. Doesn't need to be page aligned. 4678 \param isWrite If \c true the address shall be write-accessible. 4679 \param isUser If \c true the access is requested by a userland team. 4680 \param wirePage On success, if non \c NULL, the wired count of the page 4681 mapped at the given address is incremented and the page is returned 4682 via this parameter. 4683 \return \c B_OK on success, another error code otherwise. 4684 */ 4685 static status_t 4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4687 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4688 { 4689 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4690 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4691 originalAddress, isWrite, isUser)); 4692 4693 PageFaultContext context(addressSpace, isWrite); 4694 4695 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4696 status_t status = B_OK; 4697 4698 addressSpace->IncrementFaultCount(); 4699 4700 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4701 // the pages upfront makes sure we don't have any cache locked, so that the 4702 // page daemon/thief can do their job without problems. 4703 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4704 originalAddress); 4705 context.addressSpaceLocker.Unlock(); 4706 vm_page_reserve_pages(&context.reservation, reservePages, 4707 addressSpace == VMAddressSpace::Kernel() 4708 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4709 4710 while (true) { 4711 context.addressSpaceLocker.Lock(); 4712 4713 // get the area the fault was in 4714 VMArea* area = addressSpace->LookupArea(address); 4715 if (area == NULL) { 4716 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4717 "space\n", originalAddress); 4718 TPF(PageFaultError(-1, 4719 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4720 status = B_BAD_ADDRESS; 4721 break; 4722 } 4723 4724 // check permissions 4725 uint32 protection = get_area_page_protection(area, address); 4726 if (isUser && (protection & B_USER_PROTECTION) == 0 4727 && (area->protection & B_KERNEL_AREA) != 0) { 4728 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4729 area->id, (void*)originalAddress); 4730 TPF(PageFaultError(area->id, 4731 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4732 status = B_PERMISSION_DENIED; 4733 break; 4734 } 4735 if (isWrite && (protection 4736 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4737 dprintf("write access attempted on write-protected area 0x%" 4738 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4739 TPF(PageFaultError(area->id, 4740 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4741 status = B_PERMISSION_DENIED; 4742 break; 4743 } else if (isExecute && (protection 4744 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4745 dprintf("instruction fetch attempted on execute-protected area 0x%" 4746 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4747 TPF(PageFaultError(area->id, 4748 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4749 status = B_PERMISSION_DENIED; 4750 break; 4751 } else if (!isWrite && !isExecute && (protection 4752 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4753 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4754 " at %p\n", area->id, (void*)originalAddress); 4755 TPF(PageFaultError(area->id, 4756 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4757 status = B_PERMISSION_DENIED; 4758 break; 4759 } 4760 4761 // We have the area, it was a valid access, so let's try to resolve the 4762 // page fault now. 4763 // At first, the top most cache from the area is investigated. 4764 4765 context.Prepare(vm_area_get_locked_cache(area), 4766 address - area->Base() + area->cache_offset); 4767 4768 // See if this cache has a fault handler -- this will do all the work 4769 // for us. 4770 { 4771 // Note, since the page fault is resolved with interrupts enabled, 4772 // the fault handler could be called more than once for the same 4773 // reason -- the store must take this into account. 4774 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4775 if (status != B_BAD_HANDLER) 4776 break; 4777 } 4778 4779 // The top most cache has no fault handler, so let's see if the cache or 4780 // its sources already have the page we're searching for (we're going 4781 // from top to bottom). 4782 status = fault_get_page(context); 4783 if (status != B_OK) { 4784 TPF(PageFaultError(area->id, status)); 4785 break; 4786 } 4787 4788 if (context.restart) 4789 continue; 4790 4791 // All went fine, all there is left to do is to map the page into the 4792 // address space. 4793 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4794 context.page)); 4795 4796 // If the page doesn't reside in the area's cache, we need to make sure 4797 // it's mapped in read-only, so that we cannot overwrite someone else's 4798 // data (copy-on-write) 4799 uint32 newProtection = protection; 4800 if (context.page->Cache() != context.topCache && !isWrite) 4801 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4802 4803 bool unmapPage = false; 4804 bool mapPage = true; 4805 4806 // check whether there's already a page mapped at the address 4807 context.map->Lock(); 4808 4809 phys_addr_t physicalAddress; 4810 uint32 flags; 4811 vm_page* mappedPage = NULL; 4812 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4813 && (flags & PAGE_PRESENT) != 0 4814 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4815 != NULL) { 4816 // Yep there's already a page. If it's ours, we can simply adjust 4817 // its protection. Otherwise we have to unmap it. 4818 if (mappedPage == context.page) { 4819 context.map->ProtectPage(area, address, newProtection); 4820 // Note: We assume that ProtectPage() is atomic (i.e. 4821 // the page isn't temporarily unmapped), otherwise we'd have 4822 // to make sure it isn't wired. 4823 mapPage = false; 4824 } else 4825 unmapPage = true; 4826 } 4827 4828 context.map->Unlock(); 4829 4830 if (unmapPage) { 4831 // If the page is wired, we can't unmap it. Wait until it is unwired 4832 // again and restart. Note that the page cannot be wired for 4833 // writing, since it it isn't in the topmost cache. So we can safely 4834 // ignore ranges wired for writing (our own and other concurrent 4835 // wiring attempts in progress) and in fact have to do that to avoid 4836 // a deadlock. 4837 VMAreaUnwiredWaiter waiter; 4838 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4839 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4840 // unlock everything and wait 4841 if (context.pageAllocated) { 4842 // ... but since we allocated a page and inserted it into 4843 // the top cache, remove and free it first. Otherwise we'd 4844 // have a page from a lower cache mapped while an upper 4845 // cache has a page that would shadow it. 4846 context.topCache->RemovePage(context.page); 4847 vm_page_free_etc(context.topCache, context.page, 4848 &context.reservation); 4849 } else 4850 DEBUG_PAGE_ACCESS_END(context.page); 4851 4852 context.UnlockAll(); 4853 waiter.waitEntry.Wait(); 4854 continue; 4855 } 4856 4857 // Note: The mapped page is a page of a lower cache. We are 4858 // guaranteed to have that cached locked, our new page is a copy of 4859 // that page, and the page is not busy. The logic for that guarantee 4860 // is as follows: Since the page is mapped, it must live in the top 4861 // cache (ruled out above) or any of its lower caches, and there is 4862 // (was before the new page was inserted) no other page in any 4863 // cache between the top cache and the page's cache (otherwise that 4864 // would be mapped instead). That in turn means that our algorithm 4865 // must have found it and therefore it cannot be busy either. 4866 DEBUG_PAGE_ACCESS_START(mappedPage); 4867 unmap_page(area, address); 4868 DEBUG_PAGE_ACCESS_END(mappedPage); 4869 } 4870 4871 if (mapPage) { 4872 if (map_page(area, context.page, address, newProtection, 4873 &context.reservation) != B_OK) { 4874 // Mapping can only fail, when the page mapping object couldn't 4875 // be allocated. Save for the missing mapping everything is 4876 // fine, though. If this was a regular page fault, we'll simply 4877 // leave and probably fault again. To make sure we'll have more 4878 // luck then, we ensure that the minimum object reserve is 4879 // available. 4880 DEBUG_PAGE_ACCESS_END(context.page); 4881 4882 context.UnlockAll(); 4883 4884 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4885 != B_OK) { 4886 // Apparently the situation is serious. Let's get ourselves 4887 // killed. 4888 status = B_NO_MEMORY; 4889 } else if (wirePage != NULL) { 4890 // The caller expects us to wire the page. Since 4891 // object_cache_reserve() succeeded, we should now be able 4892 // to allocate a mapping structure. Restart. 4893 continue; 4894 } 4895 4896 break; 4897 } 4898 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4899 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4900 4901 // also wire the page, if requested 4902 if (wirePage != NULL && status == B_OK) { 4903 increment_page_wired_count(context.page); 4904 *wirePage = context.page; 4905 } 4906 4907 DEBUG_PAGE_ACCESS_END(context.page); 4908 4909 break; 4910 } 4911 4912 return status; 4913 } 4914 4915 4916 status_t 4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4918 { 4919 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4920 } 4921 4922 status_t 4923 vm_put_physical_page(addr_t vaddr, void* handle) 4924 { 4925 return sPhysicalPageMapper->PutPage(vaddr, handle); 4926 } 4927 4928 4929 status_t 4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4931 void** _handle) 4932 { 4933 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4934 } 4935 4936 status_t 4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4938 { 4939 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4940 } 4941 4942 4943 status_t 4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4945 { 4946 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4947 } 4948 4949 status_t 4950 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4951 { 4952 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4953 } 4954 4955 4956 void 4957 vm_get_info(system_info* info) 4958 { 4959 swap_get_info(info); 4960 4961 MutexLocker locker(sAvailableMemoryLock); 4962 info->needed_memory = sNeededMemory; 4963 info->free_memory = sAvailableMemory; 4964 } 4965 4966 4967 uint32 4968 vm_num_page_faults(void) 4969 { 4970 return sPageFaults; 4971 } 4972 4973 4974 off_t 4975 vm_available_memory(void) 4976 { 4977 MutexLocker locker(sAvailableMemoryLock); 4978 return sAvailableMemory; 4979 } 4980 4981 4982 off_t 4983 vm_available_not_needed_memory(void) 4984 { 4985 MutexLocker locker(sAvailableMemoryLock); 4986 return sAvailableMemory - sNeededMemory; 4987 } 4988 4989 4990 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4991 debugger. 4992 */ 4993 off_t 4994 vm_available_not_needed_memory_debug(void) 4995 { 4996 return sAvailableMemory - sNeededMemory; 4997 } 4998 4999 5000 size_t 5001 vm_kernel_address_space_left(void) 5002 { 5003 return VMAddressSpace::Kernel()->FreeSpace(); 5004 } 5005 5006 5007 void 5008 vm_unreserve_memory(size_t amount) 5009 { 5010 mutex_lock(&sAvailableMemoryLock); 5011 5012 sAvailableMemory += amount; 5013 5014 mutex_unlock(&sAvailableMemoryLock); 5015 } 5016 5017 5018 status_t 5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5020 { 5021 size_t reserve = kMemoryReserveForPriority[priority]; 5022 5023 MutexLocker locker(sAvailableMemoryLock); 5024 5025 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5026 5027 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5028 sAvailableMemory -= amount; 5029 return B_OK; 5030 } 5031 5032 if (timeout <= 0) 5033 return B_NO_MEMORY; 5034 5035 // turn timeout into an absolute timeout 5036 timeout += system_time(); 5037 5038 // loop until we've got the memory or the timeout occurs 5039 do { 5040 sNeededMemory += amount; 5041 5042 // call the low resource manager 5043 locker.Unlock(); 5044 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5045 B_ABSOLUTE_TIMEOUT, timeout); 5046 locker.Lock(); 5047 5048 sNeededMemory -= amount; 5049 5050 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5051 sAvailableMemory -= amount; 5052 return B_OK; 5053 } 5054 } while (timeout > system_time()); 5055 5056 return B_NO_MEMORY; 5057 } 5058 5059 5060 status_t 5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5062 { 5063 // NOTE: The caller is responsible for synchronizing calls to this function! 5064 5065 AddressSpaceReadLocker locker; 5066 VMArea* area; 5067 status_t status = locker.SetFromArea(id, area); 5068 if (status != B_OK) 5069 return status; 5070 5071 // nothing to do, if the type doesn't change 5072 uint32 oldType = area->MemoryType(); 5073 if (type == oldType) 5074 return B_OK; 5075 5076 // set the memory type of the area and the mapped pages 5077 VMTranslationMap* map = area->address_space->TranslationMap(); 5078 map->Lock(); 5079 area->SetMemoryType(type); 5080 map->ProtectArea(area, area->protection); 5081 map->Unlock(); 5082 5083 // set the physical memory type 5084 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5085 if (error != B_OK) { 5086 // reset the memory type of the area and the mapped pages 5087 map->Lock(); 5088 area->SetMemoryType(oldType); 5089 map->ProtectArea(area, area->protection); 5090 map->Unlock(); 5091 return error; 5092 } 5093 5094 return B_OK; 5095 5096 } 5097 5098 5099 /*! This function enforces some protection properties: 5100 - kernel areas must be W^X (after kernel startup) 5101 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5102 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5103 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5104 and B_KERNEL_WRITE_AREA. 5105 */ 5106 static void 5107 fix_protection(uint32* protection) 5108 { 5109 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5110 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5111 || (*protection & B_WRITE_AREA) != 0) 5112 && !gKernelStartup) 5113 panic("kernel areas cannot be both writable and executable!"); 5114 5115 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5116 if ((*protection & B_USER_PROTECTION) == 0 5117 || (*protection & B_WRITE_AREA) != 0) 5118 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5119 else 5120 *protection |= B_KERNEL_READ_AREA; 5121 } 5122 } 5123 5124 5125 static void 5126 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5127 { 5128 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5129 info->area = area->id; 5130 info->address = (void*)area->Base(); 5131 info->size = area->Size(); 5132 info->protection = area->protection; 5133 info->lock = area->wiring; 5134 info->team = area->address_space->ID(); 5135 info->copy_count = 0; 5136 info->in_count = 0; 5137 info->out_count = 0; 5138 // TODO: retrieve real values here! 5139 5140 VMCache* cache = vm_area_get_locked_cache(area); 5141 5142 // Note, this is a simplification; the cache could be larger than this area 5143 info->ram_size = cache->page_count * B_PAGE_SIZE; 5144 5145 vm_area_put_locked_cache(cache); 5146 } 5147 5148 5149 static status_t 5150 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5151 { 5152 // is newSize a multiple of B_PAGE_SIZE? 5153 if (newSize & (B_PAGE_SIZE - 1)) 5154 return B_BAD_VALUE; 5155 5156 // lock all affected address spaces and the cache 5157 VMArea* area; 5158 VMCache* cache; 5159 5160 MultiAddressSpaceLocker locker; 5161 AreaCacheLocker cacheLocker; 5162 5163 status_t status; 5164 size_t oldSize; 5165 bool anyKernelArea; 5166 bool restart; 5167 5168 do { 5169 anyKernelArea = false; 5170 restart = false; 5171 5172 locker.Unset(); 5173 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5174 if (status != B_OK) 5175 return status; 5176 cacheLocker.SetTo(cache, true); // already locked 5177 5178 // enforce restrictions 5179 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5180 || (area->protection & B_KERNEL_AREA) != 0)) { 5181 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5182 "resize kernel area %" B_PRId32 " (%s)\n", 5183 team_get_current_team_id(), areaID, area->name); 5184 return B_NOT_ALLOWED; 5185 } 5186 // TODO: Enforce all restrictions (team, etc.)! 5187 5188 oldSize = area->Size(); 5189 if (newSize == oldSize) 5190 return B_OK; 5191 5192 if (cache->type != CACHE_TYPE_RAM) 5193 return B_NOT_ALLOWED; 5194 5195 if (oldSize < newSize) { 5196 // We need to check if all areas of this cache can be resized. 5197 for (VMArea* current = cache->areas; current != NULL; 5198 current = current->cache_next) { 5199 if (!current->address_space->CanResizeArea(current, newSize)) 5200 return B_ERROR; 5201 anyKernelArea 5202 |= current->address_space == VMAddressSpace::Kernel(); 5203 } 5204 } else { 5205 // We're shrinking the areas, so we must make sure the affected 5206 // ranges are not wired. 5207 for (VMArea* current = cache->areas; current != NULL; 5208 current = current->cache_next) { 5209 anyKernelArea 5210 |= current->address_space == VMAddressSpace::Kernel(); 5211 5212 if (wait_if_area_range_is_wired(current, 5213 current->Base() + newSize, oldSize - newSize, &locker, 5214 &cacheLocker)) { 5215 restart = true; 5216 break; 5217 } 5218 } 5219 } 5220 } while (restart); 5221 5222 // Okay, looks good so far, so let's do it 5223 5224 int priority = kernel && anyKernelArea 5225 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5226 uint32 allocationFlags = kernel && anyKernelArea 5227 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5228 5229 if (oldSize < newSize) { 5230 // Growing the cache can fail, so we do it first. 5231 status = cache->Resize(cache->virtual_base + newSize, priority); 5232 if (status != B_OK) 5233 return status; 5234 } 5235 5236 for (VMArea* current = cache->areas; current != NULL; 5237 current = current->cache_next) { 5238 status = current->address_space->ResizeArea(current, newSize, 5239 allocationFlags); 5240 if (status != B_OK) 5241 break; 5242 5243 // We also need to unmap all pages beyond the new size, if the area has 5244 // shrunk 5245 if (newSize < oldSize) { 5246 VMCacheChainLocker cacheChainLocker(cache); 5247 cacheChainLocker.LockAllSourceCaches(); 5248 5249 unmap_pages(current, current->Base() + newSize, 5250 oldSize - newSize); 5251 5252 cacheChainLocker.Unlock(cache); 5253 } 5254 } 5255 5256 if (status == B_OK) { 5257 // Shrink or grow individual page protections if in use. 5258 if (area->page_protections != NULL) { 5259 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5260 uint8* newProtections 5261 = (uint8*)realloc(area->page_protections, bytes); 5262 if (newProtections == NULL) 5263 status = B_NO_MEMORY; 5264 else { 5265 area->page_protections = newProtections; 5266 5267 if (oldSize < newSize) { 5268 // init the additional page protections to that of the area 5269 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5270 uint32 areaProtection = area->protection 5271 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5272 memset(area->page_protections + offset, 5273 areaProtection | (areaProtection << 4), bytes - offset); 5274 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5275 uint8& entry = area->page_protections[offset - 1]; 5276 entry = (entry & 0x0f) | (areaProtection << 4); 5277 } 5278 } 5279 } 5280 } 5281 } 5282 5283 // shrinking the cache can't fail, so we do it now 5284 if (status == B_OK && newSize < oldSize) 5285 status = cache->Resize(cache->virtual_base + newSize, priority); 5286 5287 if (status != B_OK) { 5288 // Something failed -- resize the areas back to their original size. 5289 // This can fail, too, in which case we're seriously screwed. 5290 for (VMArea* current = cache->areas; current != NULL; 5291 current = current->cache_next) { 5292 if (current->address_space->ResizeArea(current, oldSize, 5293 allocationFlags) != B_OK) { 5294 panic("vm_resize_area(): Failed and not being able to restore " 5295 "original state."); 5296 } 5297 } 5298 5299 cache->Resize(cache->virtual_base + oldSize, priority); 5300 } 5301 5302 // TODO: we must honour the lock restrictions of this area 5303 return status; 5304 } 5305 5306 5307 status_t 5308 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5309 { 5310 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5311 } 5312 5313 5314 status_t 5315 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5316 { 5317 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5318 } 5319 5320 5321 status_t 5322 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5323 bool user) 5324 { 5325 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5326 } 5327 5328 5329 void 5330 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5331 { 5332 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5333 } 5334 5335 5336 /*! Copies a range of memory directly from/to a page that might not be mapped 5337 at the moment. 5338 5339 For \a unsafeMemory the current mapping (if any is ignored). The function 5340 walks through the respective area's cache chain to find the physical page 5341 and copies from/to it directly. 5342 The memory range starting at \a unsafeMemory with a length of \a size bytes 5343 must not cross a page boundary. 5344 5345 \param teamID The team ID identifying the address space \a unsafeMemory is 5346 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5347 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5348 is passed, the address space of the thread returned by 5349 debug_get_debugged_thread() is used. 5350 \param unsafeMemory The start of the unsafe memory range to be copied 5351 from/to. 5352 \param buffer A safely accessible kernel buffer to be copied from/to. 5353 \param size The number of bytes to be copied. 5354 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5355 \a unsafeMemory, the other way around otherwise. 5356 */ 5357 status_t 5358 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5359 size_t size, bool copyToUnsafe) 5360 { 5361 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5362 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5363 return B_BAD_VALUE; 5364 } 5365 5366 // get the address space for the debugged thread 5367 VMAddressSpace* addressSpace; 5368 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5369 addressSpace = VMAddressSpace::Kernel(); 5370 } else if (teamID == B_CURRENT_TEAM) { 5371 Thread* thread = debug_get_debugged_thread(); 5372 if (thread == NULL || thread->team == NULL) 5373 return B_BAD_ADDRESS; 5374 5375 addressSpace = thread->team->address_space; 5376 } else 5377 addressSpace = VMAddressSpace::DebugGet(teamID); 5378 5379 if (addressSpace == NULL) 5380 return B_BAD_ADDRESS; 5381 5382 // get the area 5383 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5384 if (area == NULL) 5385 return B_BAD_ADDRESS; 5386 5387 // search the page 5388 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5389 + area->cache_offset; 5390 VMCache* cache = area->cache; 5391 vm_page* page = NULL; 5392 while (cache != NULL) { 5393 page = cache->DebugLookupPage(cacheOffset); 5394 if (page != NULL) 5395 break; 5396 5397 // Page not found in this cache -- if it is paged out, we must not try 5398 // to get it from lower caches. 5399 if (cache->DebugHasPage(cacheOffset)) 5400 break; 5401 5402 cache = cache->source; 5403 } 5404 5405 if (page == NULL) 5406 return B_UNSUPPORTED; 5407 5408 // copy from/to physical memory 5409 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5410 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5411 5412 if (copyToUnsafe) { 5413 if (page->Cache() != area->cache) 5414 return B_UNSUPPORTED; 5415 5416 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5417 } 5418 5419 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5420 } 5421 5422 5423 /** Validate that a memory range is either fully in kernel space, or fully in 5424 * userspace */ 5425 static inline bool 5426 validate_memory_range(const void* addr, size_t size) 5427 { 5428 addr_t address = (addr_t)addr; 5429 5430 // Check for overflows on all addresses. 5431 if ((address + size) < address) 5432 return false; 5433 5434 // Validate that the address range does not cross the kernel/user boundary. 5435 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5436 } 5437 5438 5439 /** Validate that a memory range is fully in userspace. */ 5440 static inline bool 5441 validate_user_memory_range(const void* addr, size_t size) 5442 { 5443 addr_t address = (addr_t)addr; 5444 5445 // Check for overflows on all addresses. 5446 if ((address + size) < address) 5447 return false; 5448 5449 // Validate that both the start and end address are in userspace 5450 return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1); 5451 } 5452 5453 5454 // #pragma mark - kernel public API 5455 5456 5457 status_t 5458 user_memcpy(void* to, const void* from, size_t size) 5459 { 5460 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5461 return B_BAD_ADDRESS; 5462 5463 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5464 return B_BAD_ADDRESS; 5465 5466 return B_OK; 5467 } 5468 5469 5470 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5471 the string in \a to, NULL-terminating the result. 5472 5473 \param to Pointer to the destination C-string. 5474 \param from Pointer to the source C-string. 5475 \param size Size in bytes of the string buffer pointed to by \a to. 5476 5477 \return strlen(\a from). 5478 */ 5479 ssize_t 5480 user_strlcpy(char* to, const char* from, size_t size) 5481 { 5482 if (to == NULL && size != 0) 5483 return B_BAD_VALUE; 5484 if (from == NULL) 5485 return B_BAD_ADDRESS; 5486 5487 // Protect the source address from overflows. 5488 size_t maxSize = size; 5489 if ((addr_t)from + maxSize < (addr_t)from) 5490 maxSize -= (addr_t)from + maxSize; 5491 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5492 maxSize = USER_TOP - (addr_t)from; 5493 5494 if (!validate_memory_range(to, maxSize)) 5495 return B_BAD_ADDRESS; 5496 5497 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5498 if (result < 0) 5499 return result; 5500 5501 // If we hit the address overflow boundary, fail. 5502 if ((size_t)result >= maxSize && maxSize < size) 5503 return B_BAD_ADDRESS; 5504 5505 return result; 5506 } 5507 5508 5509 status_t 5510 user_memset(void* s, char c, size_t count) 5511 { 5512 if (!validate_memory_range(s, count)) 5513 return B_BAD_ADDRESS; 5514 5515 if (arch_cpu_user_memset(s, c, count) < B_OK) 5516 return B_BAD_ADDRESS; 5517 5518 return B_OK; 5519 } 5520 5521 5522 /*! Wires a single page at the given address. 5523 5524 \param team The team whose address space the address belongs to. Supports 5525 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5526 parameter is ignored. 5527 \param address address The virtual address to wire down. Does not need to 5528 be page aligned. 5529 \param writable If \c true the page shall be writable. 5530 \param info On success the info is filled in, among other things 5531 containing the physical address the given virtual one translates to. 5532 \return \c B_OK, when the page could be wired, another error code otherwise. 5533 */ 5534 status_t 5535 vm_wire_page(team_id team, addr_t address, bool writable, 5536 VMPageWiringInfo* info) 5537 { 5538 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5539 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5540 5541 // compute the page protection that is required 5542 bool isUser = IS_USER_ADDRESS(address); 5543 uint32 requiredProtection = PAGE_PRESENT 5544 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5545 if (writable) 5546 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5547 5548 // get and read lock the address space 5549 VMAddressSpace* addressSpace = NULL; 5550 if (isUser) { 5551 if (team == B_CURRENT_TEAM) 5552 addressSpace = VMAddressSpace::GetCurrent(); 5553 else 5554 addressSpace = VMAddressSpace::Get(team); 5555 } else 5556 addressSpace = VMAddressSpace::GetKernel(); 5557 if (addressSpace == NULL) 5558 return B_ERROR; 5559 5560 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5561 5562 VMTranslationMap* map = addressSpace->TranslationMap(); 5563 status_t error = B_OK; 5564 5565 // get the area 5566 VMArea* area = addressSpace->LookupArea(pageAddress); 5567 if (area == NULL) { 5568 addressSpace->Put(); 5569 return B_BAD_ADDRESS; 5570 } 5571 5572 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5573 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5574 5575 // mark the area range wired 5576 area->Wire(&info->range); 5577 5578 // Lock the area's cache chain and the translation map. Needed to look 5579 // up the page and play with its wired count. 5580 cacheChainLocker.LockAllSourceCaches(); 5581 map->Lock(); 5582 5583 phys_addr_t physicalAddress; 5584 uint32 flags; 5585 vm_page* page; 5586 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5587 && (flags & requiredProtection) == requiredProtection 5588 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5589 != NULL) { 5590 // Already mapped with the correct permissions -- just increment 5591 // the page's wired count. 5592 increment_page_wired_count(page); 5593 5594 map->Unlock(); 5595 cacheChainLocker.Unlock(); 5596 addressSpaceLocker.Unlock(); 5597 } else { 5598 // Let vm_soft_fault() map the page for us, if possible. We need 5599 // to fully unlock to avoid deadlocks. Since we have already 5600 // wired the area itself, nothing disturbing will happen with it 5601 // in the meantime. 5602 map->Unlock(); 5603 cacheChainLocker.Unlock(); 5604 addressSpaceLocker.Unlock(); 5605 5606 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5607 isUser, &page); 5608 5609 if (error != B_OK) { 5610 // The page could not be mapped -- clean up. 5611 VMCache* cache = vm_area_get_locked_cache(area); 5612 area->Unwire(&info->range); 5613 cache->ReleaseRefAndUnlock(); 5614 addressSpace->Put(); 5615 return error; 5616 } 5617 } 5618 5619 info->physicalAddress 5620 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5621 + address % B_PAGE_SIZE; 5622 info->page = page; 5623 5624 return B_OK; 5625 } 5626 5627 5628 /*! Unwires a single page previously wired via vm_wire_page(). 5629 5630 \param info The same object passed to vm_wire_page() before. 5631 */ 5632 void 5633 vm_unwire_page(VMPageWiringInfo* info) 5634 { 5635 // lock the address space 5636 VMArea* area = info->range.area; 5637 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5638 // takes over our reference 5639 5640 // lock the top cache 5641 VMCache* cache = vm_area_get_locked_cache(area); 5642 VMCacheChainLocker cacheChainLocker(cache); 5643 5644 if (info->page->Cache() != cache) { 5645 // The page is not in the top cache, so we lock the whole cache chain 5646 // before touching the page's wired count. 5647 cacheChainLocker.LockAllSourceCaches(); 5648 } 5649 5650 decrement_page_wired_count(info->page); 5651 5652 // remove the wired range from the range 5653 area->Unwire(&info->range); 5654 5655 cacheChainLocker.Unlock(); 5656 } 5657 5658 5659 /*! Wires down the given address range in the specified team's address space. 5660 5661 If successful the function 5662 - acquires a reference to the specified team's address space, 5663 - adds respective wired ranges to all areas that intersect with the given 5664 address range, 5665 - makes sure all pages in the given address range are mapped with the 5666 requested access permissions and increments their wired count. 5667 5668 It fails, when \a team doesn't specify a valid address space, when any part 5669 of the specified address range is not covered by areas, when the concerned 5670 areas don't allow mapping with the requested permissions, or when mapping 5671 failed for another reason. 5672 5673 When successful the call must be balanced by a unlock_memory_etc() call with 5674 the exact same parameters. 5675 5676 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5677 supported. 5678 \param address The start of the address range to be wired. 5679 \param numBytes The size of the address range to be wired. 5680 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5681 requests that the range must be wired writable ("read from device 5682 into memory"). 5683 \return \c B_OK on success, another error code otherwise. 5684 */ 5685 status_t 5686 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5687 { 5688 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5689 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5690 5691 // compute the page protection that is required 5692 bool isUser = IS_USER_ADDRESS(address); 5693 bool writable = (flags & B_READ_DEVICE) == 0; 5694 uint32 requiredProtection = PAGE_PRESENT 5695 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5696 if (writable) 5697 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5698 5699 uint32 mallocFlags = isUser 5700 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5701 5702 // get and read lock the address space 5703 VMAddressSpace* addressSpace = NULL; 5704 if (isUser) { 5705 if (team == B_CURRENT_TEAM) 5706 addressSpace = VMAddressSpace::GetCurrent(); 5707 else 5708 addressSpace = VMAddressSpace::Get(team); 5709 } else 5710 addressSpace = VMAddressSpace::GetKernel(); 5711 if (addressSpace == NULL) 5712 return B_ERROR; 5713 5714 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5715 // We get a new address space reference here. The one we got above will 5716 // be freed by unlock_memory_etc(). 5717 5718 VMTranslationMap* map = addressSpace->TranslationMap(); 5719 status_t error = B_OK; 5720 5721 // iterate through all concerned areas 5722 addr_t nextAddress = lockBaseAddress; 5723 while (nextAddress != lockEndAddress) { 5724 // get the next area 5725 VMArea* area = addressSpace->LookupArea(nextAddress); 5726 if (area == NULL) { 5727 error = B_BAD_ADDRESS; 5728 break; 5729 } 5730 5731 addr_t areaStart = nextAddress; 5732 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5733 5734 // allocate the wired range (do that before locking the cache to avoid 5735 // deadlocks) 5736 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5737 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5738 if (range == NULL) { 5739 error = B_NO_MEMORY; 5740 break; 5741 } 5742 5743 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5744 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5745 5746 // mark the area range wired 5747 area->Wire(range); 5748 5749 // Depending on the area cache type and the wiring, we may not need to 5750 // look at the individual pages. 5751 if (area->cache_type == CACHE_TYPE_NULL 5752 || area->cache_type == CACHE_TYPE_DEVICE 5753 || area->wiring == B_FULL_LOCK 5754 || area->wiring == B_CONTIGUOUS) { 5755 nextAddress = areaEnd; 5756 continue; 5757 } 5758 5759 // Lock the area's cache chain and the translation map. Needed to look 5760 // up pages and play with their wired count. 5761 cacheChainLocker.LockAllSourceCaches(); 5762 map->Lock(); 5763 5764 // iterate through the pages and wire them 5765 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5766 phys_addr_t physicalAddress; 5767 uint32 flags; 5768 5769 vm_page* page; 5770 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5771 && (flags & requiredProtection) == requiredProtection 5772 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5773 != NULL) { 5774 // Already mapped with the correct permissions -- just increment 5775 // the page's wired count. 5776 increment_page_wired_count(page); 5777 } else { 5778 // Let vm_soft_fault() map the page for us, if possible. We need 5779 // to fully unlock to avoid deadlocks. Since we have already 5780 // wired the area itself, nothing disturbing will happen with it 5781 // in the meantime. 5782 map->Unlock(); 5783 cacheChainLocker.Unlock(); 5784 addressSpaceLocker.Unlock(); 5785 5786 error = vm_soft_fault(addressSpace, nextAddress, writable, 5787 false, isUser, &page); 5788 5789 addressSpaceLocker.Lock(); 5790 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5791 cacheChainLocker.LockAllSourceCaches(); 5792 map->Lock(); 5793 } 5794 5795 if (error != B_OK) 5796 break; 5797 } 5798 5799 map->Unlock(); 5800 5801 if (error == B_OK) { 5802 cacheChainLocker.Unlock(); 5803 } else { 5804 // An error occurred, so abort right here. If the current address 5805 // is the first in this area, unwire the area, since we won't get 5806 // to it when reverting what we've done so far. 5807 if (nextAddress == areaStart) { 5808 area->Unwire(range); 5809 cacheChainLocker.Unlock(); 5810 range->~VMAreaWiredRange(); 5811 free_etc(range, mallocFlags); 5812 } else 5813 cacheChainLocker.Unlock(); 5814 5815 break; 5816 } 5817 } 5818 5819 if (error != B_OK) { 5820 // An error occurred, so unwire all that we've already wired. Note that 5821 // even if not a single page was wired, unlock_memory_etc() is called 5822 // to put the address space reference. 5823 addressSpaceLocker.Unlock(); 5824 unlock_memory_etc(team, (void*)lockBaseAddress, 5825 nextAddress - lockBaseAddress, flags); 5826 } 5827 5828 return error; 5829 } 5830 5831 5832 status_t 5833 lock_memory(void* address, size_t numBytes, uint32 flags) 5834 { 5835 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5836 } 5837 5838 5839 /*! Unwires an address range previously wired with lock_memory_etc(). 5840 5841 Note that a call to this function must balance a previous lock_memory_etc() 5842 call with exactly the same parameters. 5843 */ 5844 status_t 5845 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5846 { 5847 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5848 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5849 5850 // compute the page protection that is required 5851 bool isUser = IS_USER_ADDRESS(address); 5852 bool writable = (flags & B_READ_DEVICE) == 0; 5853 uint32 requiredProtection = PAGE_PRESENT 5854 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5855 if (writable) 5856 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5857 5858 uint32 mallocFlags = isUser 5859 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5860 5861 // get and read lock the address space 5862 VMAddressSpace* addressSpace = NULL; 5863 if (isUser) { 5864 if (team == B_CURRENT_TEAM) 5865 addressSpace = VMAddressSpace::GetCurrent(); 5866 else 5867 addressSpace = VMAddressSpace::Get(team); 5868 } else 5869 addressSpace = VMAddressSpace::GetKernel(); 5870 if (addressSpace == NULL) 5871 return B_ERROR; 5872 5873 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5874 // Take over the address space reference. We don't unlock until we're 5875 // done. 5876 5877 VMTranslationMap* map = addressSpace->TranslationMap(); 5878 status_t error = B_OK; 5879 5880 // iterate through all concerned areas 5881 addr_t nextAddress = lockBaseAddress; 5882 while (nextAddress != lockEndAddress) { 5883 // get the next area 5884 VMArea* area = addressSpace->LookupArea(nextAddress); 5885 if (area == NULL) { 5886 error = B_BAD_ADDRESS; 5887 break; 5888 } 5889 5890 addr_t areaStart = nextAddress; 5891 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5892 5893 // Lock the area's top cache. This is a requirement for 5894 // VMArea::Unwire(). 5895 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5896 5897 // Depending on the area cache type and the wiring, we may not need to 5898 // look at the individual pages. 5899 if (area->cache_type == CACHE_TYPE_NULL 5900 || area->cache_type == CACHE_TYPE_DEVICE 5901 || area->wiring == B_FULL_LOCK 5902 || area->wiring == B_CONTIGUOUS) { 5903 // unwire the range (to avoid deadlocks we delete the range after 5904 // unlocking the cache) 5905 nextAddress = areaEnd; 5906 VMAreaWiredRange* range = area->Unwire(areaStart, 5907 areaEnd - areaStart, writable); 5908 cacheChainLocker.Unlock(); 5909 if (range != NULL) { 5910 range->~VMAreaWiredRange(); 5911 free_etc(range, mallocFlags); 5912 } 5913 continue; 5914 } 5915 5916 // Lock the area's cache chain and the translation map. Needed to look 5917 // up pages and play with their wired count. 5918 cacheChainLocker.LockAllSourceCaches(); 5919 map->Lock(); 5920 5921 // iterate through the pages and unwire them 5922 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5923 phys_addr_t physicalAddress; 5924 uint32 flags; 5925 5926 vm_page* page; 5927 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5928 && (flags & PAGE_PRESENT) != 0 5929 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5930 != NULL) { 5931 // Already mapped with the correct permissions -- just increment 5932 // the page's wired count. 5933 decrement_page_wired_count(page); 5934 } else { 5935 panic("unlock_memory_etc(): Failed to unwire page: address " 5936 "space %p, address: %#" B_PRIxADDR, addressSpace, 5937 nextAddress); 5938 error = B_BAD_VALUE; 5939 break; 5940 } 5941 } 5942 5943 map->Unlock(); 5944 5945 // All pages are unwired. Remove the area's wired range as well (to 5946 // avoid deadlocks we delete the range after unlocking the cache). 5947 VMAreaWiredRange* range = area->Unwire(areaStart, 5948 areaEnd - areaStart, writable); 5949 5950 cacheChainLocker.Unlock(); 5951 5952 if (range != NULL) { 5953 range->~VMAreaWiredRange(); 5954 free_etc(range, mallocFlags); 5955 } 5956 5957 if (error != B_OK) 5958 break; 5959 } 5960 5961 // get rid of the address space reference lock_memory_etc() acquired 5962 addressSpace->Put(); 5963 5964 return error; 5965 } 5966 5967 5968 status_t 5969 unlock_memory(void* address, size_t numBytes, uint32 flags) 5970 { 5971 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5972 } 5973 5974 5975 /*! Similar to get_memory_map(), but also allows to specify the address space 5976 for the memory in question and has a saner semantics. 5977 Returns \c B_OK when the complete range could be translated or 5978 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5979 case the actual number of entries is written to \c *_numEntries. Any other 5980 error case indicates complete failure; \c *_numEntries will be set to \c 0 5981 in this case. 5982 */ 5983 status_t 5984 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5985 physical_entry* table, uint32* _numEntries) 5986 { 5987 uint32 numEntries = *_numEntries; 5988 *_numEntries = 0; 5989 5990 VMAddressSpace* addressSpace; 5991 addr_t virtualAddress = (addr_t)address; 5992 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5993 phys_addr_t physicalAddress; 5994 status_t status = B_OK; 5995 int32 index = -1; 5996 addr_t offset = 0; 5997 bool interrupts = are_interrupts_enabled(); 5998 5999 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6000 "entries)\n", team, address, numBytes, numEntries)); 6001 6002 if (numEntries == 0 || numBytes == 0) 6003 return B_BAD_VALUE; 6004 6005 // in which address space is the address to be found? 6006 if (IS_USER_ADDRESS(virtualAddress)) { 6007 if (team == B_CURRENT_TEAM) 6008 addressSpace = VMAddressSpace::GetCurrent(); 6009 else 6010 addressSpace = VMAddressSpace::Get(team); 6011 } else 6012 addressSpace = VMAddressSpace::GetKernel(); 6013 6014 if (addressSpace == NULL) 6015 return B_ERROR; 6016 6017 VMTranslationMap* map = addressSpace->TranslationMap(); 6018 6019 if (interrupts) 6020 map->Lock(); 6021 6022 while (offset < numBytes) { 6023 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6024 uint32 flags; 6025 6026 if (interrupts) { 6027 status = map->Query((addr_t)address + offset, &physicalAddress, 6028 &flags); 6029 } else { 6030 status = map->QueryInterrupt((addr_t)address + offset, 6031 &physicalAddress, &flags); 6032 } 6033 if (status < B_OK) 6034 break; 6035 if ((flags & PAGE_PRESENT) == 0) { 6036 panic("get_memory_map() called on unmapped memory!"); 6037 return B_BAD_ADDRESS; 6038 } 6039 6040 if (index < 0 && pageOffset > 0) { 6041 physicalAddress += pageOffset; 6042 if (bytes > B_PAGE_SIZE - pageOffset) 6043 bytes = B_PAGE_SIZE - pageOffset; 6044 } 6045 6046 // need to switch to the next physical_entry? 6047 if (index < 0 || table[index].address 6048 != physicalAddress - table[index].size) { 6049 if ((uint32)++index + 1 > numEntries) { 6050 // table to small 6051 break; 6052 } 6053 table[index].address = physicalAddress; 6054 table[index].size = bytes; 6055 } else { 6056 // page does fit in current entry 6057 table[index].size += bytes; 6058 } 6059 6060 offset += bytes; 6061 } 6062 6063 if (interrupts) 6064 map->Unlock(); 6065 6066 if (status != B_OK) 6067 return status; 6068 6069 if ((uint32)index + 1 > numEntries) { 6070 *_numEntries = index; 6071 return B_BUFFER_OVERFLOW; 6072 } 6073 6074 *_numEntries = index + 1; 6075 return B_OK; 6076 } 6077 6078 6079 /*! According to the BeBook, this function should always succeed. 6080 This is no longer the case. 6081 */ 6082 extern "C" int32 6083 __get_memory_map_haiku(const void* address, size_t numBytes, 6084 physical_entry* table, int32 numEntries) 6085 { 6086 uint32 entriesRead = numEntries; 6087 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6088 table, &entriesRead); 6089 if (error != B_OK) 6090 return error; 6091 6092 // close the entry list 6093 6094 // if it's only one entry, we will silently accept the missing ending 6095 if (numEntries == 1) 6096 return B_OK; 6097 6098 if (entriesRead + 1 > (uint32)numEntries) 6099 return B_BUFFER_OVERFLOW; 6100 6101 table[entriesRead].address = 0; 6102 table[entriesRead].size = 0; 6103 6104 return B_OK; 6105 } 6106 6107 6108 area_id 6109 area_for(void* address) 6110 { 6111 return vm_area_for((addr_t)address, true); 6112 } 6113 6114 6115 area_id 6116 find_area(const char* name) 6117 { 6118 return VMAreaHash::Find(name); 6119 } 6120 6121 6122 status_t 6123 _get_area_info(area_id id, area_info* info, size_t size) 6124 { 6125 if (size != sizeof(area_info) || info == NULL) 6126 return B_BAD_VALUE; 6127 6128 AddressSpaceReadLocker locker; 6129 VMArea* area; 6130 status_t status = locker.SetFromArea(id, area); 6131 if (status != B_OK) 6132 return status; 6133 6134 fill_area_info(area, info, size); 6135 return B_OK; 6136 } 6137 6138 6139 status_t 6140 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6141 { 6142 addr_t nextBase = *(addr_t*)cookie; 6143 6144 // we're already through the list 6145 if (nextBase == (addr_t)-1) 6146 return B_ENTRY_NOT_FOUND; 6147 6148 if (team == B_CURRENT_TEAM) 6149 team = team_get_current_team_id(); 6150 6151 AddressSpaceReadLocker locker(team); 6152 if (!locker.IsLocked()) 6153 return B_BAD_TEAM_ID; 6154 6155 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6156 if (area == NULL) { 6157 nextBase = (addr_t)-1; 6158 return B_ENTRY_NOT_FOUND; 6159 } 6160 6161 fill_area_info(area, info, size); 6162 *cookie = (ssize_t)(area->Base() + 1); 6163 6164 return B_OK; 6165 } 6166 6167 6168 status_t 6169 set_area_protection(area_id area, uint32 newProtection) 6170 { 6171 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6172 newProtection, true); 6173 } 6174 6175 6176 status_t 6177 resize_area(area_id areaID, size_t newSize) 6178 { 6179 return vm_resize_area(areaID, newSize, true); 6180 } 6181 6182 6183 /*! Transfers the specified area to a new team. The caller must be the owner 6184 of the area. 6185 */ 6186 area_id 6187 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6188 bool kernel) 6189 { 6190 area_info info; 6191 status_t status = get_area_info(id, &info); 6192 if (status != B_OK) 6193 return status; 6194 6195 if (info.team != thread_get_current_thread()->team->id) 6196 return B_PERMISSION_DENIED; 6197 6198 // We need to mark the area cloneable so the following operations work. 6199 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6200 if (status != B_OK) 6201 return status; 6202 6203 area_id clonedArea = vm_clone_area(target, info.name, _address, 6204 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6205 if (clonedArea < 0) 6206 return clonedArea; 6207 6208 status = vm_delete_area(info.team, id, kernel); 6209 if (status != B_OK) { 6210 vm_delete_area(target, clonedArea, kernel); 6211 return status; 6212 } 6213 6214 // Now we can reset the protection to whatever it was before. 6215 set_area_protection(clonedArea, info.protection); 6216 6217 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6218 6219 return clonedArea; 6220 } 6221 6222 6223 extern "C" area_id 6224 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6225 size_t numBytes, uint32 addressSpec, uint32 protection, 6226 void** _virtualAddress) 6227 { 6228 if (!arch_vm_supports_protection(protection)) 6229 return B_NOT_SUPPORTED; 6230 6231 fix_protection(&protection); 6232 6233 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6234 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6235 false); 6236 } 6237 6238 6239 area_id 6240 clone_area(const char* name, void** _address, uint32 addressSpec, 6241 uint32 protection, area_id source) 6242 { 6243 if ((protection & B_KERNEL_PROTECTION) == 0) 6244 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6245 6246 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6247 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6248 } 6249 6250 6251 area_id 6252 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6253 uint32 protection, uint32 flags, uint32 guardSize, 6254 const virtual_address_restrictions* virtualAddressRestrictions, 6255 const physical_address_restrictions* physicalAddressRestrictions, 6256 void** _address) 6257 { 6258 fix_protection(&protection); 6259 6260 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6261 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6262 true, _address); 6263 } 6264 6265 6266 extern "C" area_id 6267 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6268 size_t size, uint32 lock, uint32 protection) 6269 { 6270 fix_protection(&protection); 6271 6272 virtual_address_restrictions virtualRestrictions = {}; 6273 virtualRestrictions.address = *_address; 6274 virtualRestrictions.address_specification = addressSpec; 6275 physical_address_restrictions physicalRestrictions = {}; 6276 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6277 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6278 true, _address); 6279 } 6280 6281 6282 status_t 6283 delete_area(area_id area) 6284 { 6285 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6286 } 6287 6288 6289 // #pragma mark - Userland syscalls 6290 6291 6292 status_t 6293 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6294 addr_t size) 6295 { 6296 // filter out some unavailable values (for userland) 6297 switch (addressSpec) { 6298 case B_ANY_KERNEL_ADDRESS: 6299 case B_ANY_KERNEL_BLOCK_ADDRESS: 6300 return B_BAD_VALUE; 6301 } 6302 6303 addr_t address; 6304 6305 if (!IS_USER_ADDRESS(userAddress) 6306 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6307 return B_BAD_ADDRESS; 6308 6309 status_t status = vm_reserve_address_range( 6310 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6311 RESERVED_AVOID_BASE); 6312 if (status != B_OK) 6313 return status; 6314 6315 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6316 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6317 (void*)address, size); 6318 return B_BAD_ADDRESS; 6319 } 6320 6321 return B_OK; 6322 } 6323 6324 6325 status_t 6326 _user_unreserve_address_range(addr_t address, addr_t size) 6327 { 6328 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6329 (void*)address, size); 6330 } 6331 6332 6333 area_id 6334 _user_area_for(void* address) 6335 { 6336 return vm_area_for((addr_t)address, false); 6337 } 6338 6339 6340 area_id 6341 _user_find_area(const char* userName) 6342 { 6343 char name[B_OS_NAME_LENGTH]; 6344 6345 if (!IS_USER_ADDRESS(userName) 6346 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6347 return B_BAD_ADDRESS; 6348 6349 return find_area(name); 6350 } 6351 6352 6353 status_t 6354 _user_get_area_info(area_id area, area_info* userInfo) 6355 { 6356 if (!IS_USER_ADDRESS(userInfo)) 6357 return B_BAD_ADDRESS; 6358 6359 area_info info; 6360 status_t status = get_area_info(area, &info); 6361 if (status < B_OK) 6362 return status; 6363 6364 // TODO: do we want to prevent userland from seeing kernel protections? 6365 //info.protection &= B_USER_PROTECTION; 6366 6367 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6368 return B_BAD_ADDRESS; 6369 6370 return status; 6371 } 6372 6373 6374 status_t 6375 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6376 { 6377 ssize_t cookie; 6378 6379 if (!IS_USER_ADDRESS(userCookie) 6380 || !IS_USER_ADDRESS(userInfo) 6381 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6382 return B_BAD_ADDRESS; 6383 6384 area_info info; 6385 status_t status = _get_next_area_info(team, &cookie, &info, 6386 sizeof(area_info)); 6387 if (status != B_OK) 6388 return status; 6389 6390 //info.protection &= B_USER_PROTECTION; 6391 6392 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6393 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6394 return B_BAD_ADDRESS; 6395 6396 return status; 6397 } 6398 6399 6400 status_t 6401 _user_set_area_protection(area_id area, uint32 newProtection) 6402 { 6403 if ((newProtection & ~B_USER_PROTECTION) != 0) 6404 return B_BAD_VALUE; 6405 6406 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6407 newProtection, false); 6408 } 6409 6410 6411 status_t 6412 _user_resize_area(area_id area, size_t newSize) 6413 { 6414 // TODO: Since we restrict deleting of areas to those owned by the team, 6415 // we should also do that for resizing (check other functions, too). 6416 return vm_resize_area(area, newSize, false); 6417 } 6418 6419 6420 area_id 6421 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6422 team_id target) 6423 { 6424 // filter out some unavailable values (for userland) 6425 switch (addressSpec) { 6426 case B_ANY_KERNEL_ADDRESS: 6427 case B_ANY_KERNEL_BLOCK_ADDRESS: 6428 return B_BAD_VALUE; 6429 } 6430 6431 void* address; 6432 if (!IS_USER_ADDRESS(userAddress) 6433 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6434 return B_BAD_ADDRESS; 6435 6436 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6437 if (newArea < B_OK) 6438 return newArea; 6439 6440 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6441 return B_BAD_ADDRESS; 6442 6443 return newArea; 6444 } 6445 6446 6447 area_id 6448 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6449 uint32 protection, area_id sourceArea) 6450 { 6451 char name[B_OS_NAME_LENGTH]; 6452 void* address; 6453 6454 // filter out some unavailable values (for userland) 6455 switch (addressSpec) { 6456 case B_ANY_KERNEL_ADDRESS: 6457 case B_ANY_KERNEL_BLOCK_ADDRESS: 6458 return B_BAD_VALUE; 6459 } 6460 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6461 return B_BAD_VALUE; 6462 6463 if (!IS_USER_ADDRESS(userName) 6464 || !IS_USER_ADDRESS(userAddress) 6465 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6466 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6467 return B_BAD_ADDRESS; 6468 6469 fix_protection(&protection); 6470 6471 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6472 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6473 false); 6474 if (clonedArea < B_OK) 6475 return clonedArea; 6476 6477 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6478 delete_area(clonedArea); 6479 return B_BAD_ADDRESS; 6480 } 6481 6482 return clonedArea; 6483 } 6484 6485 6486 area_id 6487 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6488 size_t size, uint32 lock, uint32 protection) 6489 { 6490 char name[B_OS_NAME_LENGTH]; 6491 void* address; 6492 6493 // filter out some unavailable values (for userland) 6494 switch (addressSpec) { 6495 case B_ANY_KERNEL_ADDRESS: 6496 case B_ANY_KERNEL_BLOCK_ADDRESS: 6497 return B_BAD_VALUE; 6498 } 6499 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6500 return B_BAD_VALUE; 6501 6502 if (!IS_USER_ADDRESS(userName) 6503 || !IS_USER_ADDRESS(userAddress) 6504 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6505 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6506 return B_BAD_ADDRESS; 6507 6508 if (addressSpec == B_EXACT_ADDRESS 6509 && IS_KERNEL_ADDRESS(address)) 6510 return B_BAD_VALUE; 6511 6512 if (addressSpec == B_ANY_ADDRESS) 6513 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6514 if (addressSpec == B_BASE_ADDRESS) 6515 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6516 6517 fix_protection(&protection); 6518 6519 virtual_address_restrictions virtualRestrictions = {}; 6520 virtualRestrictions.address = address; 6521 virtualRestrictions.address_specification = addressSpec; 6522 physical_address_restrictions physicalRestrictions = {}; 6523 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6524 size, lock, protection, 0, 0, &virtualRestrictions, 6525 &physicalRestrictions, false, &address); 6526 6527 if (area >= B_OK 6528 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6529 delete_area(area); 6530 return B_BAD_ADDRESS; 6531 } 6532 6533 return area; 6534 } 6535 6536 6537 status_t 6538 _user_delete_area(area_id area) 6539 { 6540 // Unlike the BeOS implementation, you can now only delete areas 6541 // that you have created yourself from userland. 6542 // The documentation to delete_area() explicitly states that this 6543 // will be restricted in the future, and so it will. 6544 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6545 } 6546 6547 6548 // TODO: create a BeOS style call for this! 6549 6550 area_id 6551 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6552 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6553 int fd, off_t offset) 6554 { 6555 char name[B_OS_NAME_LENGTH]; 6556 void* address; 6557 area_id area; 6558 6559 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6560 return B_BAD_VALUE; 6561 6562 fix_protection(&protection); 6563 6564 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6565 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6566 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6567 return B_BAD_ADDRESS; 6568 6569 if (addressSpec == B_EXACT_ADDRESS) { 6570 if ((addr_t)address + size < (addr_t)address 6571 || (addr_t)address % B_PAGE_SIZE != 0) { 6572 return B_BAD_VALUE; 6573 } 6574 if (!IS_USER_ADDRESS(address) 6575 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6576 return B_BAD_ADDRESS; 6577 } 6578 } 6579 6580 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6581 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6582 false); 6583 if (area < B_OK) 6584 return area; 6585 6586 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6587 return B_BAD_ADDRESS; 6588 6589 return area; 6590 } 6591 6592 6593 status_t 6594 _user_unmap_memory(void* _address, size_t size) 6595 { 6596 addr_t address = (addr_t)_address; 6597 6598 // check params 6599 if (size == 0 || (addr_t)address + size < (addr_t)address 6600 || (addr_t)address % B_PAGE_SIZE != 0) { 6601 return B_BAD_VALUE; 6602 } 6603 6604 if (!IS_USER_ADDRESS(address) 6605 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6606 return B_BAD_ADDRESS; 6607 } 6608 6609 // Write lock the address space and ensure the address range is not wired. 6610 AddressSpaceWriteLocker locker; 6611 do { 6612 status_t status = locker.SetTo(team_get_current_team_id()); 6613 if (status != B_OK) 6614 return status; 6615 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6616 size, &locker)); 6617 6618 // unmap 6619 return unmap_address_range(locker.AddressSpace(), address, size, false); 6620 } 6621 6622 6623 status_t 6624 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6625 { 6626 // check address range 6627 addr_t address = (addr_t)_address; 6628 size = PAGE_ALIGN(size); 6629 6630 if ((address % B_PAGE_SIZE) != 0) 6631 return B_BAD_VALUE; 6632 if (!validate_user_memory_range(_address, size)) { 6633 // weird error code required by POSIX 6634 return ENOMEM; 6635 } 6636 6637 // extend and check protection 6638 if ((protection & ~B_USER_PROTECTION) != 0) 6639 return B_BAD_VALUE; 6640 6641 fix_protection(&protection); 6642 6643 // We need to write lock the address space, since we're going to play with 6644 // the areas. Also make sure that none of the areas is wired and that we're 6645 // actually allowed to change the protection. 6646 AddressSpaceWriteLocker locker; 6647 6648 bool restart; 6649 do { 6650 restart = false; 6651 6652 status_t status = locker.SetTo(team_get_current_team_id()); 6653 if (status != B_OK) 6654 return status; 6655 6656 // First round: Check whether the whole range is covered by areas and we 6657 // are allowed to modify them. 6658 addr_t currentAddress = address; 6659 size_t sizeLeft = size; 6660 while (sizeLeft > 0) { 6661 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6662 if (area == NULL) 6663 return B_NO_MEMORY; 6664 6665 if ((area->protection & B_KERNEL_AREA) != 0) 6666 return B_NOT_ALLOWED; 6667 if (area->protection_max != 0 6668 && (protection & area->protection_max) != protection) { 6669 return B_NOT_ALLOWED; 6670 } 6671 6672 addr_t offset = currentAddress - area->Base(); 6673 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6674 6675 AreaCacheLocker cacheLocker(area); 6676 6677 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6678 &locker, &cacheLocker)) { 6679 restart = true; 6680 break; 6681 } 6682 6683 cacheLocker.Unlock(); 6684 6685 currentAddress += rangeSize; 6686 sizeLeft -= rangeSize; 6687 } 6688 } while (restart); 6689 6690 // Second round: If the protections differ from that of the area, create a 6691 // page protection array and re-map mapped pages. 6692 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6693 addr_t currentAddress = address; 6694 size_t sizeLeft = size; 6695 while (sizeLeft > 0) { 6696 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6697 if (area == NULL) 6698 return B_NO_MEMORY; 6699 6700 addr_t offset = currentAddress - area->Base(); 6701 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6702 6703 currentAddress += rangeSize; 6704 sizeLeft -= rangeSize; 6705 6706 if (area->page_protections == NULL) { 6707 if (area->protection == protection) 6708 continue; 6709 if (offset == 0 && rangeSize == area->Size()) { 6710 status_t status = vm_set_area_protection(area->address_space->ID(), 6711 area->id, protection, false); 6712 if (status != B_OK) 6713 return status; 6714 continue; 6715 } 6716 6717 status_t status = allocate_area_page_protections(area); 6718 if (status != B_OK) 6719 return status; 6720 } 6721 6722 // We need to lock the complete cache chain, since we potentially unmap 6723 // pages of lower caches. 6724 VMCache* topCache = vm_area_get_locked_cache(area); 6725 VMCacheChainLocker cacheChainLocker(topCache); 6726 cacheChainLocker.LockAllSourceCaches(); 6727 6728 for (addr_t pageAddress = area->Base() + offset; 6729 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6730 map->Lock(); 6731 6732 set_area_page_protection(area, pageAddress, protection); 6733 6734 phys_addr_t physicalAddress; 6735 uint32 flags; 6736 6737 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6738 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6739 map->Unlock(); 6740 continue; 6741 } 6742 6743 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6744 if (page == NULL) { 6745 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6746 "\n", area, physicalAddress); 6747 map->Unlock(); 6748 return B_ERROR; 6749 } 6750 6751 // If the page is not in the topmost cache and write access is 6752 // requested, we have to unmap it. Otherwise we can re-map it with 6753 // the new protection. 6754 bool unmapPage = page->Cache() != topCache 6755 && (protection & B_WRITE_AREA) != 0; 6756 6757 if (!unmapPage) 6758 map->ProtectPage(area, pageAddress, protection); 6759 6760 map->Unlock(); 6761 6762 if (unmapPage) { 6763 DEBUG_PAGE_ACCESS_START(page); 6764 unmap_page(area, pageAddress); 6765 DEBUG_PAGE_ACCESS_END(page); 6766 } 6767 } 6768 } 6769 6770 return B_OK; 6771 } 6772 6773 6774 status_t 6775 _user_sync_memory(void* _address, size_t size, uint32 flags) 6776 { 6777 addr_t address = (addr_t)_address; 6778 size = PAGE_ALIGN(size); 6779 6780 // check params 6781 if ((address % B_PAGE_SIZE) != 0) 6782 return B_BAD_VALUE; 6783 if (!validate_user_memory_range(_address, size)) { 6784 // weird error code required by POSIX 6785 return ENOMEM; 6786 } 6787 6788 bool writeSync = (flags & MS_SYNC) != 0; 6789 bool writeAsync = (flags & MS_ASYNC) != 0; 6790 if (writeSync && writeAsync) 6791 return B_BAD_VALUE; 6792 6793 if (size == 0 || (!writeSync && !writeAsync)) 6794 return B_OK; 6795 6796 // iterate through the range and sync all concerned areas 6797 while (size > 0) { 6798 // read lock the address space 6799 AddressSpaceReadLocker locker; 6800 status_t error = locker.SetTo(team_get_current_team_id()); 6801 if (error != B_OK) 6802 return error; 6803 6804 // get the first area 6805 VMArea* area = locker.AddressSpace()->LookupArea(address); 6806 if (area == NULL) 6807 return B_NO_MEMORY; 6808 6809 uint32 offset = address - area->Base(); 6810 size_t rangeSize = min_c(area->Size() - offset, size); 6811 offset += area->cache_offset; 6812 6813 // lock the cache 6814 AreaCacheLocker cacheLocker(area); 6815 if (!cacheLocker) 6816 return B_BAD_VALUE; 6817 VMCache* cache = area->cache; 6818 6819 locker.Unlock(); 6820 6821 uint32 firstPage = offset >> PAGE_SHIFT; 6822 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6823 6824 // write the pages 6825 if (cache->type == CACHE_TYPE_VNODE) { 6826 if (writeSync) { 6827 // synchronous 6828 error = vm_page_write_modified_page_range(cache, firstPage, 6829 endPage); 6830 if (error != B_OK) 6831 return error; 6832 } else { 6833 // asynchronous 6834 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6835 // TODO: This is probably not quite what is supposed to happen. 6836 // Especially when a lot has to be written, it might take ages 6837 // until it really hits the disk. 6838 } 6839 } 6840 6841 address += rangeSize; 6842 size -= rangeSize; 6843 } 6844 6845 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6846 // synchronize multiple mappings of the same file. In our VM they never get 6847 // out of sync, though, so we don't have to do anything. 6848 6849 return B_OK; 6850 } 6851 6852 6853 status_t 6854 _user_memory_advice(void* _address, size_t size, uint32 advice) 6855 { 6856 addr_t address = (addr_t)_address; 6857 if ((address % B_PAGE_SIZE) != 0) 6858 return B_BAD_VALUE; 6859 6860 size = PAGE_ALIGN(size); 6861 if (!validate_user_memory_range(_address, size)) { 6862 // weird error code required by POSIX 6863 return B_NO_MEMORY; 6864 } 6865 6866 switch (advice) { 6867 case MADV_NORMAL: 6868 case MADV_SEQUENTIAL: 6869 case MADV_RANDOM: 6870 case MADV_WILLNEED: 6871 case MADV_DONTNEED: 6872 // TODO: Implement! 6873 break; 6874 6875 case MADV_FREE: 6876 { 6877 AddressSpaceWriteLocker locker; 6878 do { 6879 status_t status = locker.SetTo(team_get_current_team_id()); 6880 if (status != B_OK) 6881 return status; 6882 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6883 address, size, &locker)); 6884 6885 discard_address_range(locker.AddressSpace(), address, size, false); 6886 break; 6887 } 6888 6889 default: 6890 return B_BAD_VALUE; 6891 } 6892 6893 return B_OK; 6894 } 6895 6896 6897 status_t 6898 _user_get_memory_properties(team_id teamID, const void* address, 6899 uint32* _protected, uint32* _lock) 6900 { 6901 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6902 return B_BAD_ADDRESS; 6903 6904 AddressSpaceReadLocker locker; 6905 status_t error = locker.SetTo(teamID); 6906 if (error != B_OK) 6907 return error; 6908 6909 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6910 if (area == NULL) 6911 return B_NO_MEMORY; 6912 6913 uint32 protection = get_area_page_protection(area, (addr_t)address); 6914 uint32 wiring = area->wiring; 6915 6916 locker.Unlock(); 6917 6918 error = user_memcpy(_protected, &protection, sizeof(protection)); 6919 if (error != B_OK) 6920 return error; 6921 6922 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6923 6924 return error; 6925 } 6926 6927 6928 // An ordered list of non-overlapping ranges to track mlock/munlock locking. 6929 // It is allowed to call mlock/munlock in unbalanced ways (lock a range 6930 // multiple times, unlock a part of it, lock several consecutive ranges and 6931 // unlock them in one go, etc). However the low level lock_memory and 6932 // unlock_memory calls require the locks/unlocks to be balanced (you lock a 6933 // fixed range, and then unlock exactly the same range). This list allows to 6934 // keep track of what was locked exactly so we can unlock the correct things. 6935 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> { 6936 addr_t start; 6937 addr_t end; 6938 6939 status_t LockMemory() 6940 { 6941 return lock_memory((void*)start, end - start, 0); 6942 } 6943 6944 status_t UnlockMemory() 6945 { 6946 return unlock_memory((void*)start, end - start, 0); 6947 } 6948 6949 status_t Move(addr_t start, addr_t end) 6950 { 6951 status_t result = lock_memory((void*)start, end - start, 0); 6952 if (result != B_OK) 6953 return result; 6954 6955 result = UnlockMemory(); 6956 6957 if (result != B_OK) { 6958 // What can we do if the unlock fails? 6959 panic("Failed to unlock memory: %s", strerror(result)); 6960 return result; 6961 } 6962 6963 this->start = start; 6964 this->end = end; 6965 6966 return B_OK; 6967 } 6968 }; 6969 6970 6971 status_t 6972 _user_mlock(const void* _address, size_t size) 6973 { 6974 // check address range 6975 addr_t address = (addr_t)_address; 6976 size = PAGE_ALIGN(size); 6977 6978 if ((address % B_PAGE_SIZE) != 0) 6979 return EINVAL; 6980 if (!validate_user_memory_range(_address, size)) 6981 return EINVAL; 6982 6983 addr_t endAddress = address + size; 6984 6985 // Pre-allocate a linked list element we may need (it's simpler to do it 6986 // now than run out of memory in the midle of changing things) 6987 LockedPages* newRange = new(std::nothrow) LockedPages(); 6988 if (newRange == NULL) 6989 return ENOMEM; 6990 ObjectDeleter<LockedPages> newRangeDeleter(newRange); 6991 6992 // Get and lock the team 6993 Team* team = thread_get_current_thread()->team; 6994 TeamLocker teamLocker(team); 6995 teamLocker.Lock(); 6996 6997 status_t error = B_OK; 6998 LockedPagesList* lockedPages = &team->locked_pages_list; 6999 7000 // Locate the first locked range possibly overlapping ours 7001 LockedPages* currentRange = lockedPages->Head(); 7002 while (currentRange != NULL && currentRange->end <= address) 7003 currentRange = lockedPages->GetNext(currentRange); 7004 7005 if (currentRange == NULL || currentRange->start >= endAddress) { 7006 // No existing range is overlapping with ours. We can just lock our 7007 // range and stop here. 7008 newRange->start = address; 7009 newRange->end = endAddress; 7010 error = newRange->LockMemory(); 7011 if (error != B_OK) 7012 return error; 7013 7014 lockedPages->InsertBefore(currentRange, newRange); 7015 newRangeDeleter.Detach(); 7016 return B_OK; 7017 } 7018 7019 // We get here when there is at least one existing overlapping range. 7020 7021 if (currentRange->start <= address) { 7022 if (currentRange->end >= endAddress) { 7023 // An existing range is already fully covering the pages we need to 7024 // lock. Nothing to do then. 7025 return B_OK; 7026 } else { 7027 // An existing range covers the start of the area we want to lock. 7028 // Advance our start address to avoid it. 7029 address = currentRange->end; 7030 7031 // Move on to the next range for the next step 7032 currentRange = lockedPages->GetNext(currentRange); 7033 } 7034 } 7035 7036 // First, lock the new range 7037 newRange->start = address; 7038 newRange->end = endAddress; 7039 error = newRange->LockMemory(); 7040 if (error != B_OK) 7041 return error; 7042 7043 // Unlock all ranges fully overlapping with the area we need to lock 7044 while (currentRange != NULL && currentRange->end < endAddress) { 7045 // The existing range is fully contained inside the new one we're 7046 // trying to lock. Delete/unlock it, and replace it with a new one 7047 // (this limits fragmentation of the range list, and is simpler to 7048 // manage) 7049 error = currentRange->UnlockMemory(); 7050 if (error != B_OK) { 7051 panic("Failed to unlock a memory range: %s", strerror(error)); 7052 newRange->UnlockMemory(); 7053 return error; 7054 } 7055 LockedPages* temp = currentRange; 7056 currentRange = lockedPages->GetNext(currentRange); 7057 lockedPages->Remove(temp); 7058 delete temp; 7059 } 7060 7061 if (currentRange != NULL) { 7062 // One last range may cover the end of the area we're trying to lock 7063 7064 if (currentRange->start == address) { 7065 // In case two overlapping ranges (one at the start and the other 7066 // at the end) already cover the area we're after, there's nothing 7067 // more to do. So we destroy our new extra allocation 7068 error = newRange->UnlockMemory(); 7069 return error; 7070 } 7071 7072 if (currentRange->start < endAddress) { 7073 // Make sure the last range is not overlapping, by moving its start 7074 error = currentRange->Move(endAddress, currentRange->end); 7075 if (error != B_OK) { 7076 panic("Failed to move a memory range: %s", strerror(error)); 7077 newRange->UnlockMemory(); 7078 return error; 7079 } 7080 } 7081 } 7082 7083 // Finally, store the new range in the locked list 7084 lockedPages->InsertBefore(currentRange, newRange); 7085 newRangeDeleter.Detach(); 7086 return B_OK; 7087 } 7088 7089 7090 status_t 7091 _user_munlock(const void* _address, size_t size) 7092 { 7093 // check address range 7094 addr_t address = (addr_t)_address; 7095 size = PAGE_ALIGN(size); 7096 7097 if ((address % B_PAGE_SIZE) != 0) 7098 return EINVAL; 7099 if (!validate_user_memory_range(_address, size)) 7100 return EINVAL; 7101 7102 addr_t endAddress = address + size; 7103 7104 // Get and lock the team 7105 Team* team = thread_get_current_thread()->team; 7106 TeamLocker teamLocker(team); 7107 teamLocker.Lock(); 7108 LockedPagesList* lockedPages = &team->locked_pages_list; 7109 7110 status_t error = B_OK; 7111 7112 // Locate the first locked range possibly overlapping ours 7113 LockedPages* currentRange = lockedPages->Head(); 7114 while (currentRange != NULL && currentRange->end <= address) 7115 currentRange = lockedPages->GetNext(currentRange); 7116 7117 if (currentRange == NULL || currentRange->start >= endAddress) { 7118 // No range is intersecting, nothing to unlock 7119 return B_OK; 7120 } 7121 7122 if (currentRange->start < address) { 7123 if (currentRange->end > endAddress) { 7124 // There is a range fully covering the area we want to unlock, 7125 // and it extends on both sides. We need to split it in two 7126 LockedPages* newRange = new(std::nothrow) LockedPages(); 7127 if (newRange == NULL) 7128 return ENOMEM; 7129 7130 newRange->start = endAddress; 7131 newRange->end = currentRange->end; 7132 7133 error = newRange->LockMemory(); 7134 if (error != B_OK) { 7135 delete newRange; 7136 return error; 7137 } 7138 7139 error = currentRange->Move(currentRange->start, address); 7140 if (error != B_OK) { 7141 delete newRange; 7142 return error; 7143 } 7144 7145 lockedPages->InsertAfter(currentRange, newRange); 7146 return B_OK; 7147 } else { 7148 // There is a range that overlaps and extends before the one we 7149 // want to unlock, we need to shrink it 7150 error = currentRange->Move(currentRange->start, address); 7151 if (error != B_OK) 7152 return error; 7153 } 7154 } 7155 7156 while (currentRange != NULL && currentRange->end <= endAddress) { 7157 // Unlock all fully overlapping ranges 7158 error = currentRange->UnlockMemory(); 7159 if (error != B_OK) 7160 return error; 7161 LockedPages* temp = currentRange; 7162 currentRange = lockedPages->GetNext(currentRange); 7163 lockedPages->Remove(temp); 7164 delete temp; 7165 } 7166 7167 // Finally split the last partially overlapping range if any 7168 if (currentRange != NULL && currentRange->start < endAddress) { 7169 error = currentRange->Move(endAddress, currentRange->end); 7170 if (error != B_OK) 7171 return error; 7172 } 7173 7174 return B_OK; 7175 } 7176 7177 7178 // #pragma mark -- compatibility 7179 7180 7181 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7182 7183 7184 struct physical_entry_beos { 7185 uint32 address; 7186 uint32 size; 7187 }; 7188 7189 7190 /*! The physical_entry structure has changed. We need to translate it to the 7191 old one. 7192 */ 7193 extern "C" int32 7194 __get_memory_map_beos(const void* _address, size_t numBytes, 7195 physical_entry_beos* table, int32 numEntries) 7196 { 7197 if (numEntries <= 0) 7198 return B_BAD_VALUE; 7199 7200 const uint8* address = (const uint8*)_address; 7201 7202 int32 count = 0; 7203 while (numBytes > 0 && count < numEntries) { 7204 physical_entry entry; 7205 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7206 if (result < 0) { 7207 if (result != B_BUFFER_OVERFLOW) 7208 return result; 7209 } 7210 7211 if (entry.address >= (phys_addr_t)1 << 32) { 7212 panic("get_memory_map(): Address is greater 4 GB!"); 7213 return B_ERROR; 7214 } 7215 7216 table[count].address = entry.address; 7217 table[count++].size = entry.size; 7218 7219 address += entry.size; 7220 numBytes -= entry.size; 7221 } 7222 7223 // null-terminate the table, if possible 7224 if (count < numEntries) { 7225 table[count].address = 0; 7226 table[count].size = 0; 7227 } 7228 7229 return B_OK; 7230 } 7231 7232 7233 /*! The type of the \a physicalAddress parameter has changed from void* to 7234 phys_addr_t. 7235 */ 7236 extern "C" area_id 7237 __map_physical_memory_beos(const char* name, void* physicalAddress, 7238 size_t numBytes, uint32 addressSpec, uint32 protection, 7239 void** _virtualAddress) 7240 { 7241 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7242 addressSpec, protection, _virtualAddress); 7243 } 7244 7245 7246 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7247 we meddle with the \a lock parameter to force 32 bit. 7248 */ 7249 extern "C" area_id 7250 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7251 size_t size, uint32 lock, uint32 protection) 7252 { 7253 switch (lock) { 7254 case B_NO_LOCK: 7255 break; 7256 case B_FULL_LOCK: 7257 case B_LAZY_LOCK: 7258 lock = B_32_BIT_FULL_LOCK; 7259 break; 7260 case B_CONTIGUOUS: 7261 lock = B_32_BIT_CONTIGUOUS; 7262 break; 7263 } 7264 7265 return __create_area_haiku(name, _address, addressSpec, size, lock, 7266 protection); 7267 } 7268 7269 7270 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7271 "BASE"); 7272 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7273 "map_physical_memory@", "BASE"); 7274 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7275 "BASE"); 7276 7277 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7278 "get_memory_map@@", "1_ALPHA3"); 7279 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7280 "map_physical_memory@@", "1_ALPHA3"); 7281 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7282 "1_ALPHA3"); 7283 7284 7285 #else 7286 7287 7288 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7289 "get_memory_map@@", "BASE"); 7290 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7291 "map_physical_memory@@", "BASE"); 7292 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7293 "BASE"); 7294 7295 7296 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7297