1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/ThreadAutoLock.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_priv.h> 53 #include <vm/VMAddressSpace.h> 54 #include <vm/VMArea.h> 55 #include <vm/VMCache.h> 56 57 #include "VMAddressSpaceLocking.h" 58 #include "VMAnonymousCache.h" 59 #include "VMAnonymousNoSwapCache.h" 60 #include "IORequest.h" 61 62 63 //#define TRACE_VM 64 //#define TRACE_FAULTS 65 #ifdef TRACE_VM 66 # define TRACE(x) dprintf x 67 #else 68 # define TRACE(x) ; 69 #endif 70 #ifdef TRACE_FAULTS 71 # define FTRACE(x) dprintf x 72 #else 73 # define FTRACE(x) ; 74 #endif 75 76 77 namespace { 78 79 class AreaCacheLocking { 80 public: 81 inline bool Lock(VMCache* lockable) 82 { 83 return false; 84 } 85 86 inline void Unlock(VMCache* lockable) 87 { 88 vm_area_put_locked_cache(lockable); 89 } 90 }; 91 92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 93 public: 94 inline AreaCacheLocker(VMCache* cache = NULL) 95 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 96 { 97 } 98 99 inline AreaCacheLocker(VMArea* area) 100 : AutoLocker<VMCache, AreaCacheLocking>() 101 { 102 SetTo(area); 103 } 104 105 inline void SetTo(VMCache* cache, bool alreadyLocked) 106 { 107 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 108 } 109 110 inline void SetTo(VMArea* area) 111 { 112 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 113 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 114 } 115 }; 116 117 118 class VMCacheChainLocker { 119 public: 120 VMCacheChainLocker() 121 : 122 fTopCache(NULL), 123 fBottomCache(NULL) 124 { 125 } 126 127 VMCacheChainLocker(VMCache* topCache) 128 : 129 fTopCache(topCache), 130 fBottomCache(topCache) 131 { 132 } 133 134 ~VMCacheChainLocker() 135 { 136 Unlock(); 137 } 138 139 void SetTo(VMCache* topCache) 140 { 141 fTopCache = topCache; 142 fBottomCache = topCache; 143 144 if (topCache != NULL) 145 topCache->SetUserData(NULL); 146 } 147 148 VMCache* LockSourceCache() 149 { 150 if (fBottomCache == NULL || fBottomCache->source == NULL) 151 return NULL; 152 153 VMCache* previousCache = fBottomCache; 154 155 fBottomCache = fBottomCache->source; 156 fBottomCache->Lock(); 157 fBottomCache->AcquireRefLocked(); 158 fBottomCache->SetUserData(previousCache); 159 160 return fBottomCache; 161 } 162 163 void LockAllSourceCaches() 164 { 165 while (LockSourceCache() != NULL) { 166 } 167 } 168 169 void Unlock(VMCache* exceptCache = NULL) 170 { 171 if (fTopCache == NULL) 172 return; 173 174 // Unlock caches in source -> consumer direction. This is important to 175 // avoid double-locking and a reversal of locking order in case a cache 176 // is eligable for merging. 177 VMCache* cache = fBottomCache; 178 while (cache != NULL) { 179 VMCache* nextCache = (VMCache*)cache->UserData(); 180 if (cache != exceptCache) 181 cache->ReleaseRefAndUnlock(cache != fTopCache); 182 183 if (cache == fTopCache) 184 break; 185 186 cache = nextCache; 187 } 188 189 fTopCache = NULL; 190 fBottomCache = NULL; 191 } 192 193 void UnlockKeepRefs(bool keepTopCacheLocked) 194 { 195 if (fTopCache == NULL) 196 return; 197 198 VMCache* nextCache = fBottomCache; 199 VMCache* cache = NULL; 200 201 while (keepTopCacheLocked 202 ? nextCache != fTopCache : cache != fTopCache) { 203 cache = nextCache; 204 nextCache = (VMCache*)cache->UserData(); 205 cache->Unlock(cache != fTopCache); 206 } 207 } 208 209 void RelockCaches(bool topCacheLocked) 210 { 211 if (fTopCache == NULL) 212 return; 213 214 VMCache* nextCache = fTopCache; 215 VMCache* cache = NULL; 216 if (topCacheLocked) { 217 cache = nextCache; 218 nextCache = cache->source; 219 } 220 221 while (cache != fBottomCache && nextCache != NULL) { 222 VMCache* consumer = cache; 223 cache = nextCache; 224 nextCache = cache->source; 225 cache->Lock(); 226 cache->SetUserData(consumer); 227 } 228 } 229 230 private: 231 VMCache* fTopCache; 232 VMCache* fBottomCache; 233 }; 234 235 } // namespace 236 237 238 // The memory reserve an allocation of the certain priority must not touch. 239 static const size_t kMemoryReserveForPriority[] = { 240 VM_MEMORY_RESERVE_USER, // user 241 VM_MEMORY_RESERVE_SYSTEM, // system 242 0 // VIP 243 }; 244 245 246 ObjectCache* gPageMappingsObjectCache; 247 248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 249 250 static off_t sAvailableMemory; 251 static off_t sNeededMemory; 252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 253 static uint32 sPageFaults; 254 255 static VMPhysicalPageMapper* sPhysicalPageMapper; 256 257 #if DEBUG_CACHE_LIST 258 259 struct cache_info { 260 VMCache* cache; 261 addr_t page_count; 262 addr_t committed; 263 }; 264 265 static const int kCacheInfoTableCount = 100 * 1024; 266 static cache_info* sCacheInfoTable; 267 268 #endif // DEBUG_CACHE_LIST 269 270 271 // function declarations 272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 273 bool addressSpaceCleanup); 274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 275 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 276 static status_t map_backing_store(VMAddressSpace* addressSpace, 277 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 278 int protection, int protectionMax, int mapping, uint32 flags, 279 const virtual_address_restrictions* addressRestrictions, bool kernel, 280 VMArea** _area, void** _virtualAddress); 281 static void fix_protection(uint32* protection); 282 283 284 // #pragma mark - 285 286 287 #if VM_PAGE_FAULT_TRACING 288 289 namespace VMPageFaultTracing { 290 291 class PageFaultStart : public AbstractTraceEntry { 292 public: 293 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 294 : 295 fAddress(address), 296 fPC(pc), 297 fWrite(write), 298 fUser(user) 299 { 300 Initialized(); 301 } 302 303 virtual void AddDump(TraceOutput& out) 304 { 305 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 306 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 307 } 308 309 private: 310 addr_t fAddress; 311 addr_t fPC; 312 bool fWrite; 313 bool fUser; 314 }; 315 316 317 // page fault errors 318 enum { 319 PAGE_FAULT_ERROR_NO_AREA = 0, 320 PAGE_FAULT_ERROR_KERNEL_ONLY, 321 PAGE_FAULT_ERROR_WRITE_PROTECTED, 322 PAGE_FAULT_ERROR_READ_PROTECTED, 323 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 324 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 325 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 326 }; 327 328 329 class PageFaultError : public AbstractTraceEntry { 330 public: 331 PageFaultError(area_id area, status_t error) 332 : 333 fArea(area), 334 fError(error) 335 { 336 Initialized(); 337 } 338 339 virtual void AddDump(TraceOutput& out) 340 { 341 switch (fError) { 342 case PAGE_FAULT_ERROR_NO_AREA: 343 out.Print("page fault error: no area"); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_ONLY: 346 out.Print("page fault error: area: %ld, kernel only", fArea); 347 break; 348 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 349 out.Print("page fault error: area: %ld, write protected", 350 fArea); 351 break; 352 case PAGE_FAULT_ERROR_READ_PROTECTED: 353 out.Print("page fault error: area: %ld, read protected", fArea); 354 break; 355 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 356 out.Print("page fault error: area: %ld, execute protected", 357 fArea); 358 break; 359 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 360 out.Print("page fault error: kernel touching bad user memory"); 361 break; 362 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 363 out.Print("page fault error: no address space"); 364 break; 365 default: 366 out.Print("page fault error: area: %ld, error: %s", fArea, 367 strerror(fError)); 368 break; 369 } 370 } 371 372 private: 373 area_id fArea; 374 status_t fError; 375 }; 376 377 378 class PageFaultDone : public AbstractTraceEntry { 379 public: 380 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 381 vm_page* page) 382 : 383 fArea(area), 384 fTopCache(topCache), 385 fCache(cache), 386 fPage(page) 387 { 388 Initialized(); 389 } 390 391 virtual void AddDump(TraceOutput& out) 392 { 393 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 394 "page: %p", fArea, fTopCache, fCache, fPage); 395 } 396 397 private: 398 area_id fArea; 399 VMCache* fTopCache; 400 VMCache* fCache; 401 vm_page* fPage; 402 }; 403 404 } // namespace VMPageFaultTracing 405 406 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 407 #else 408 # define TPF(x) ; 409 #endif // VM_PAGE_FAULT_TRACING 410 411 412 // #pragma mark - 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 increment_page_wired_count(vm_page* page) 419 { 420 if (!page->IsMapped()) 421 atomic_add(&gMappedPagesCount, 1); 422 page->IncrementWiredCount(); 423 } 424 425 426 /*! The page's cache must be locked. 427 */ 428 static inline void 429 decrement_page_wired_count(vm_page* page) 430 { 431 page->DecrementWiredCount(); 432 if (!page->IsMapped()) 433 atomic_add(&gMappedPagesCount, -1); 434 } 435 436 437 static inline addr_t 438 virtual_page_address(VMArea* area, vm_page* page) 439 { 440 return area->Base() 441 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 442 } 443 444 445 //! You need to have the address space locked when calling this function 446 static VMArea* 447 lookup_area(VMAddressSpace* addressSpace, area_id id) 448 { 449 VMAreaHash::ReadLock(); 450 451 VMArea* area = VMAreaHash::LookupLocked(id); 452 if (area != NULL && area->address_space != addressSpace) 453 area = NULL; 454 455 VMAreaHash::ReadUnlock(); 456 457 return area; 458 } 459 460 461 static status_t 462 allocate_area_page_protections(VMArea* area) 463 { 464 // In the page protections we store only the three user protections, 465 // so we use 4 bits per page. 466 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 467 area->page_protections = (uint8*)malloc_etc(bytes, 468 area->address_space == VMAddressSpace::Kernel() 469 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 470 if (area->page_protections == NULL) 471 return B_NO_MEMORY; 472 473 // init the page protections for all pages to that of the area 474 uint32 areaProtection = area->protection 475 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 476 memset(area->page_protections, areaProtection | (areaProtection << 4), 477 bytes); 478 return B_OK; 479 } 480 481 482 static inline void 483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 484 { 485 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 486 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 487 uint8& entry = area->page_protections[pageIndex / 2]; 488 if (pageIndex % 2 == 0) 489 entry = (entry & 0xf0) | protection; 490 else 491 entry = (entry & 0x0f) | (protection << 4); 492 } 493 494 495 static inline uint32 496 get_area_page_protection(VMArea* area, addr_t pageAddress) 497 { 498 if (area->page_protections == NULL) 499 return area->protection; 500 501 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 502 uint32 protection = area->page_protections[pageIndex / 2]; 503 if (pageIndex % 2 == 0) 504 protection &= 0x0f; 505 else 506 protection >>= 4; 507 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 // If this is a kernel area we return only the kernel flags. 515 if (area->address_space == VMAddressSpace::Kernel()) 516 return kernelProtection; 517 518 return protection | kernelProtection; 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 811 &addressRestrictions, kernel, &secondArea, NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, 925 int protectionMax, int mapping, 926 uint32 flags, const virtual_address_restrictions* addressRestrictions, 927 bool kernel, VMArea** _area, void** _virtualAddress) 928 { 929 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 930 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 931 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 932 addressSpace, cache, addressRestrictions->address, offset, size, 933 addressRestrictions->address_specification, wiring, protection, 934 protectionMax, _area, areaName)); 935 cache->AssertLocked(); 936 937 if (size == 0) { 938 #if KDEBUG 939 panic("map_backing_store(): called with size=0 for area '%s'!", 940 areaName); 941 #endif 942 return B_BAD_VALUE; 943 } 944 945 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 946 | HEAP_DONT_LOCK_KERNEL_SPACE; 947 int priority; 948 if (addressSpace != VMAddressSpace::Kernel()) { 949 priority = VM_PRIORITY_USER; 950 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 951 priority = VM_PRIORITY_VIP; 952 allocationFlags |= HEAP_PRIORITY_VIP; 953 } else 954 priority = VM_PRIORITY_SYSTEM; 955 956 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 957 allocationFlags); 958 if (mapping != REGION_PRIVATE_MAP) 959 area->protection_max = protectionMax & B_USER_PROTECTION; 960 if (area == NULL) 961 return B_NO_MEMORY; 962 963 status_t status; 964 965 // if this is a private map, we need to create a new cache 966 // to handle the private copies of pages as they are written to 967 VMCache* sourceCache = cache; 968 if (mapping == REGION_PRIVATE_MAP) { 969 VMCache* newCache; 970 971 // create an anonymous cache 972 status = VMCacheFactory::CreateAnonymousCache(newCache, 973 (protection & B_STACK_AREA) != 0 974 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 975 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 976 if (status != B_OK) 977 goto err1; 978 979 newCache->Lock(); 980 newCache->temporary = 1; 981 newCache->virtual_base = offset; 982 newCache->virtual_end = offset + size; 983 984 cache->AddConsumer(newCache); 985 986 cache = newCache; 987 } 988 989 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 990 status = cache->SetMinimalCommitment(size, priority); 991 if (status != B_OK) 992 goto err2; 993 } 994 995 // check to see if this address space has entered DELETE state 996 if (addressSpace->IsBeingDeleted()) { 997 // okay, someone is trying to delete this address space now, so we can't 998 // insert the area, so back out 999 status = B_BAD_TEAM_ID; 1000 goto err2; 1001 } 1002 1003 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1004 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1005 status = unmap_address_range(addressSpace, 1006 (addr_t)addressRestrictions->address, size, kernel); 1007 if (status != B_OK) 1008 goto err2; 1009 } 1010 1011 status = addressSpace->InsertArea(area, size, addressRestrictions, 1012 allocationFlags, _virtualAddress); 1013 if (status == B_NO_MEMORY 1014 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1015 // Due to how many locks are held, we cannot wait here for space to be 1016 // freed up, but we can at least notify the low_resource handler. 1017 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1018 } 1019 if (status != B_OK) 1020 goto err2; 1021 1022 // attach the cache to the area 1023 area->cache = cache; 1024 area->cache_offset = offset; 1025 1026 // point the cache back to the area 1027 cache->InsertAreaLocked(area); 1028 if (mapping == REGION_PRIVATE_MAP) 1029 cache->Unlock(); 1030 1031 // insert the area in the global area hash table 1032 VMAreaHash::Insert(area); 1033 1034 // grab a ref to the address space (the area holds this) 1035 addressSpace->Get(); 1036 1037 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1038 // cache, sourceCache, areaName, area); 1039 1040 *_area = area; 1041 return B_OK; 1042 1043 err2: 1044 if (mapping == REGION_PRIVATE_MAP) { 1045 // We created this cache, so we must delete it again. Note, that we 1046 // need to temporarily unlock the source cache or we'll otherwise 1047 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1048 sourceCache->Unlock(); 1049 cache->ReleaseRefAndUnlock(); 1050 sourceCache->Lock(); 1051 } 1052 err1: 1053 addressSpace->DeleteArea(area, allocationFlags); 1054 return status; 1055 } 1056 1057 1058 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1059 locker1, locker2). 1060 */ 1061 template<typename LockerType1, typename LockerType2> 1062 static inline bool 1063 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1064 { 1065 area->cache->AssertLocked(); 1066 1067 VMAreaUnwiredWaiter waiter; 1068 if (!area->AddWaiterIfWired(&waiter)) 1069 return false; 1070 1071 // unlock everything and wait 1072 if (locker1 != NULL) 1073 locker1->Unlock(); 1074 if (locker2 != NULL) 1075 locker2->Unlock(); 1076 1077 waiter.waitEntry.Wait(); 1078 1079 return true; 1080 } 1081 1082 1083 /*! Checks whether the given area has any wired ranges intersecting with the 1084 specified range and waits, if so. 1085 1086 When it has to wait, the function calls \c Unlock() on both \a locker1 1087 and \a locker2, if given. 1088 The area's top cache must be locked and must be unlocked as a side effect 1089 of calling \c Unlock() on either \a locker1 or \a locker2. 1090 1091 If the function does not have to wait it does not modify or unlock any 1092 object. 1093 1094 \param area The area to be checked. 1095 \param base The base address of the range to check. 1096 \param size The size of the address range to check. 1097 \param locker1 An object to be unlocked when before starting to wait (may 1098 be \c NULL). 1099 \param locker2 An object to be unlocked when before starting to wait (may 1100 be \c NULL). 1101 \return \c true, if the function had to wait, \c false otherwise. 1102 */ 1103 template<typename LockerType1, typename LockerType2> 1104 static inline bool 1105 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1106 LockerType1* locker1, LockerType2* locker2) 1107 { 1108 area->cache->AssertLocked(); 1109 1110 VMAreaUnwiredWaiter waiter; 1111 if (!area->AddWaiterIfWired(&waiter, base, size)) 1112 return false; 1113 1114 // unlock everything and wait 1115 if (locker1 != NULL) 1116 locker1->Unlock(); 1117 if (locker2 != NULL) 1118 locker2->Unlock(); 1119 1120 waiter.waitEntry.Wait(); 1121 1122 return true; 1123 } 1124 1125 1126 /*! Checks whether the given address space has any wired ranges intersecting 1127 with the specified range and waits, if so. 1128 1129 Similar to wait_if_area_range_is_wired(), with the following differences: 1130 - All areas intersecting with the range are checked (respectively all until 1131 one is found that contains a wired range intersecting with the given 1132 range). 1133 - The given address space must at least be read-locked and must be unlocked 1134 when \c Unlock() is called on \a locker. 1135 - None of the areas' caches are allowed to be locked. 1136 */ 1137 template<typename LockerType> 1138 static inline bool 1139 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1140 size_t size, LockerType* locker) 1141 { 1142 for (VMAddressSpace::AreaRangeIterator it 1143 = addressSpace->GetAreaRangeIterator(base, size); 1144 VMArea* area = it.Next();) { 1145 1146 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1147 1148 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1149 return true; 1150 } 1151 1152 return false; 1153 } 1154 1155 1156 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1157 It must be called in a situation where the kernel address space may be 1158 locked. 1159 */ 1160 status_t 1161 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1162 { 1163 AddressSpaceReadLocker locker; 1164 VMArea* area; 1165 status_t status = locker.SetFromArea(id, area); 1166 if (status != B_OK) 1167 return status; 1168 1169 if (area->page_protections == NULL) { 1170 status = allocate_area_page_protections(area); 1171 if (status != B_OK) 1172 return status; 1173 } 1174 1175 *cookie = (void*)area; 1176 return B_OK; 1177 } 1178 1179 1180 /*! This is a debug helper function that can only be used with very specific 1181 use cases. 1182 Sets protection for the given address range to the protection specified. 1183 If \a protection is 0 then the involved pages will be marked non-present 1184 in the translation map to cause a fault on access. The pages aren't 1185 actually unmapped however so that they can be marked present again with 1186 additional calls to this function. For this to work the area must be 1187 fully locked in memory so that the pages aren't otherwise touched. 1188 This function does not lock the kernel address space and needs to be 1189 supplied with a \a cookie retrieved from a successful call to 1190 vm_prepare_kernel_area_debug_protection(). 1191 */ 1192 status_t 1193 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1194 uint32 protection) 1195 { 1196 // check address range 1197 addr_t address = (addr_t)_address; 1198 size = PAGE_ALIGN(size); 1199 1200 if ((address % B_PAGE_SIZE) != 0 1201 || (addr_t)address + size < (addr_t)address 1202 || !IS_KERNEL_ADDRESS(address) 1203 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1204 return B_BAD_VALUE; 1205 } 1206 1207 // Translate the kernel protection to user protection as we only store that. 1208 if ((protection & B_KERNEL_READ_AREA) != 0) 1209 protection |= B_READ_AREA; 1210 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1211 protection |= B_WRITE_AREA; 1212 1213 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1214 VMTranslationMap* map = addressSpace->TranslationMap(); 1215 VMArea* area = (VMArea*)cookie; 1216 1217 addr_t offset = address - area->Base(); 1218 if (area->Size() - offset < size) { 1219 panic("protect range not fully within supplied area"); 1220 return B_BAD_VALUE; 1221 } 1222 1223 if (area->page_protections == NULL) { 1224 panic("area has no page protections"); 1225 return B_BAD_VALUE; 1226 } 1227 1228 // Invalidate the mapping entries so any access to them will fault or 1229 // restore the mapping entries unchanged so that lookup will success again. 1230 map->Lock(); 1231 map->DebugMarkRangePresent(address, address + size, protection != 0); 1232 map->Unlock(); 1233 1234 // And set the proper page protections so that the fault case will actually 1235 // fail and not simply try to map a new page. 1236 for (addr_t pageAddress = address; pageAddress < address + size; 1237 pageAddress += B_PAGE_SIZE) { 1238 set_area_page_protection(area, pageAddress, protection); 1239 } 1240 1241 return B_OK; 1242 } 1243 1244 1245 status_t 1246 vm_block_address_range(const char* name, void* address, addr_t size) 1247 { 1248 if (!arch_vm_supports_protection(0)) 1249 return B_NOT_SUPPORTED; 1250 1251 AddressSpaceWriteLocker locker; 1252 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1253 if (status != B_OK) 1254 return status; 1255 1256 VMAddressSpace* addressSpace = locker.AddressSpace(); 1257 1258 // create an anonymous cache 1259 VMCache* cache; 1260 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1261 VM_PRIORITY_SYSTEM); 1262 if (status != B_OK) 1263 return status; 1264 1265 cache->temporary = 1; 1266 cache->virtual_end = size; 1267 cache->Lock(); 1268 1269 VMArea* area; 1270 virtual_address_restrictions addressRestrictions = {}; 1271 addressRestrictions.address = address; 1272 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1273 status = map_backing_store(addressSpace, cache, 0, name, size, 1274 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1275 true, &area, NULL); 1276 if (status != B_OK) { 1277 cache->ReleaseRefAndUnlock(); 1278 return status; 1279 } 1280 1281 cache->Unlock(); 1282 area->cache_type = CACHE_TYPE_RAM; 1283 return area->id; 1284 } 1285 1286 1287 status_t 1288 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1289 { 1290 AddressSpaceWriteLocker locker(team); 1291 if (!locker.IsLocked()) 1292 return B_BAD_TEAM_ID; 1293 1294 VMAddressSpace* addressSpace = locker.AddressSpace(); 1295 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1296 addressSpace == VMAddressSpace::Kernel() 1297 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1298 } 1299 1300 1301 status_t 1302 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1303 addr_t size, uint32 flags) 1304 { 1305 if (size == 0) 1306 return B_BAD_VALUE; 1307 1308 AddressSpaceWriteLocker locker(team); 1309 if (!locker.IsLocked()) 1310 return B_BAD_TEAM_ID; 1311 1312 virtual_address_restrictions addressRestrictions = {}; 1313 addressRestrictions.address = *_address; 1314 addressRestrictions.address_specification = addressSpec; 1315 VMAddressSpace* addressSpace = locker.AddressSpace(); 1316 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1317 addressSpace == VMAddressSpace::Kernel() 1318 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1319 _address); 1320 } 1321 1322 1323 area_id 1324 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1325 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1326 const virtual_address_restrictions* virtualAddressRestrictions, 1327 const physical_address_restrictions* physicalAddressRestrictions, 1328 bool kernel, void** _address) 1329 { 1330 VMArea* area; 1331 VMCache* cache; 1332 vm_page* page = NULL; 1333 bool isStack = (protection & B_STACK_AREA) != 0; 1334 page_num_t guardPages; 1335 bool canOvercommit = false; 1336 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1337 ? VM_PAGE_ALLOC_CLEAR : 0; 1338 1339 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1340 team, name, size)); 1341 1342 size = PAGE_ALIGN(size); 1343 guardSize = PAGE_ALIGN(guardSize); 1344 guardPages = guardSize / B_PAGE_SIZE; 1345 1346 if (size == 0 || size < guardSize) 1347 return B_BAD_VALUE; 1348 if (!arch_vm_supports_protection(protection)) 1349 return B_NOT_SUPPORTED; 1350 1351 if (team == B_CURRENT_TEAM) 1352 team = VMAddressSpace::CurrentID(); 1353 if (team < 0) 1354 return B_BAD_TEAM_ID; 1355 1356 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1357 canOvercommit = true; 1358 1359 #ifdef DEBUG_KERNEL_STACKS 1360 if ((protection & B_KERNEL_STACK_AREA) != 0) 1361 isStack = true; 1362 #endif 1363 1364 // check parameters 1365 switch (virtualAddressRestrictions->address_specification) { 1366 case B_ANY_ADDRESS: 1367 case B_EXACT_ADDRESS: 1368 case B_BASE_ADDRESS: 1369 case B_ANY_KERNEL_ADDRESS: 1370 case B_ANY_KERNEL_BLOCK_ADDRESS: 1371 case B_RANDOMIZED_ANY_ADDRESS: 1372 case B_RANDOMIZED_BASE_ADDRESS: 1373 break; 1374 1375 default: 1376 return B_BAD_VALUE; 1377 } 1378 1379 // If low or high physical address restrictions are given, we force 1380 // B_CONTIGUOUS wiring, since only then we'll use 1381 // vm_page_allocate_page_run() which deals with those restrictions. 1382 if (physicalAddressRestrictions->low_address != 0 1383 || physicalAddressRestrictions->high_address != 0) { 1384 wiring = B_CONTIGUOUS; 1385 } 1386 1387 physical_address_restrictions stackPhysicalRestrictions; 1388 bool doReserveMemory = false; 1389 switch (wiring) { 1390 case B_NO_LOCK: 1391 break; 1392 case B_FULL_LOCK: 1393 case B_LAZY_LOCK: 1394 case B_CONTIGUOUS: 1395 doReserveMemory = true; 1396 break; 1397 case B_ALREADY_WIRED: 1398 break; 1399 case B_LOMEM: 1400 stackPhysicalRestrictions = *physicalAddressRestrictions; 1401 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1402 physicalAddressRestrictions = &stackPhysicalRestrictions; 1403 wiring = B_CONTIGUOUS; 1404 doReserveMemory = true; 1405 break; 1406 case B_32_BIT_FULL_LOCK: 1407 if (B_HAIKU_PHYSICAL_BITS <= 32 1408 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1409 wiring = B_FULL_LOCK; 1410 doReserveMemory = true; 1411 break; 1412 } 1413 // TODO: We don't really support this mode efficiently. Just fall 1414 // through for now ... 1415 case B_32_BIT_CONTIGUOUS: 1416 #if B_HAIKU_PHYSICAL_BITS > 32 1417 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1418 stackPhysicalRestrictions = *physicalAddressRestrictions; 1419 stackPhysicalRestrictions.high_address 1420 = (phys_addr_t)1 << 32; 1421 physicalAddressRestrictions = &stackPhysicalRestrictions; 1422 } 1423 #endif 1424 wiring = B_CONTIGUOUS; 1425 doReserveMemory = true; 1426 break; 1427 default: 1428 return B_BAD_VALUE; 1429 } 1430 1431 // Optimization: For a single-page contiguous allocation without low/high 1432 // memory restriction B_FULL_LOCK wiring suffices. 1433 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1434 && physicalAddressRestrictions->low_address == 0 1435 && physicalAddressRestrictions->high_address == 0) { 1436 wiring = B_FULL_LOCK; 1437 } 1438 1439 // For full lock or contiguous areas we're also going to map the pages and 1440 // thus need to reserve pages for the mapping backend upfront. 1441 addr_t reservedMapPages = 0; 1442 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1443 AddressSpaceWriteLocker locker; 1444 status_t status = locker.SetTo(team); 1445 if (status != B_OK) 1446 return status; 1447 1448 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1449 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1450 } 1451 1452 int priority; 1453 if (team != VMAddressSpace::KernelID()) 1454 priority = VM_PRIORITY_USER; 1455 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1456 priority = VM_PRIORITY_VIP; 1457 else 1458 priority = VM_PRIORITY_SYSTEM; 1459 1460 // Reserve memory before acquiring the address space lock. This reduces the 1461 // chances of failure, since while holding the write lock to the address 1462 // space (if it is the kernel address space that is), the low memory handler 1463 // won't be able to free anything for us. 1464 addr_t reservedMemory = 0; 1465 if (doReserveMemory) { 1466 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1467 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1468 return B_NO_MEMORY; 1469 reservedMemory = size; 1470 // TODO: We don't reserve the memory for the pages for the page 1471 // directories/tables. We actually need to do since we currently don't 1472 // reclaim them (and probably can't reclaim all of them anyway). Thus 1473 // there are actually less physical pages than there should be, which 1474 // can get the VM into trouble in low memory situations. 1475 } 1476 1477 AddressSpaceWriteLocker locker; 1478 VMAddressSpace* addressSpace; 1479 status_t status; 1480 1481 // For full lock areas reserve the pages before locking the address 1482 // space. E.g. block caches can't release their memory while we hold the 1483 // address space lock. 1484 page_num_t reservedPages = reservedMapPages; 1485 if (wiring == B_FULL_LOCK) 1486 reservedPages += size / B_PAGE_SIZE; 1487 1488 vm_page_reservation reservation; 1489 if (reservedPages > 0) { 1490 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1491 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1492 priority)) { 1493 reservedPages = 0; 1494 status = B_WOULD_BLOCK; 1495 goto err0; 1496 } 1497 } else 1498 vm_page_reserve_pages(&reservation, reservedPages, priority); 1499 } 1500 1501 if (wiring == B_CONTIGUOUS) { 1502 // we try to allocate the page run here upfront as this may easily 1503 // fail for obvious reasons 1504 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1505 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1506 if (page == NULL) { 1507 status = B_NO_MEMORY; 1508 goto err0; 1509 } 1510 } 1511 1512 // Lock the address space and, if B_EXACT_ADDRESS and 1513 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1514 // is not wired. 1515 do { 1516 status = locker.SetTo(team); 1517 if (status != B_OK) 1518 goto err1; 1519 1520 addressSpace = locker.AddressSpace(); 1521 } while (virtualAddressRestrictions->address_specification 1522 == B_EXACT_ADDRESS 1523 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1524 && wait_if_address_range_is_wired(addressSpace, 1525 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1526 1527 // create an anonymous cache 1528 // if it's a stack, make sure that two pages are available at least 1529 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1530 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1531 wiring == B_NO_LOCK, priority); 1532 if (status != B_OK) 1533 goto err1; 1534 1535 cache->temporary = 1; 1536 cache->virtual_end = size; 1537 cache->committed_size = reservedMemory; 1538 // TODO: This should be done via a method. 1539 reservedMemory = 0; 1540 1541 cache->Lock(); 1542 1543 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1544 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1545 virtualAddressRestrictions, kernel, &area, _address); 1546 1547 if (status != B_OK) { 1548 cache->ReleaseRefAndUnlock(); 1549 goto err1; 1550 } 1551 1552 locker.DegradeToReadLock(); 1553 1554 switch (wiring) { 1555 case B_NO_LOCK: 1556 case B_LAZY_LOCK: 1557 // do nothing - the pages are mapped in as needed 1558 break; 1559 1560 case B_FULL_LOCK: 1561 { 1562 // Allocate and map all pages for this area 1563 1564 off_t offset = 0; 1565 for (addr_t address = area->Base(); 1566 address < area->Base() + (area->Size() - 1); 1567 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1568 #ifdef DEBUG_KERNEL_STACKS 1569 # ifdef STACK_GROWS_DOWNWARDS 1570 if (isStack && address < area->Base() 1571 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1572 # else 1573 if (isStack && address >= area->Base() + area->Size() 1574 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1575 # endif 1576 continue; 1577 #endif 1578 vm_page* page = vm_page_allocate_page(&reservation, 1579 PAGE_STATE_WIRED | pageAllocFlags); 1580 cache->InsertPage(page, offset); 1581 map_page(area, page, address, protection, &reservation); 1582 1583 DEBUG_PAGE_ACCESS_END(page); 1584 } 1585 1586 break; 1587 } 1588 1589 case B_ALREADY_WIRED: 1590 { 1591 // The pages should already be mapped. This is only really useful 1592 // during boot time. Find the appropriate vm_page objects and stick 1593 // them in the cache object. 1594 VMTranslationMap* map = addressSpace->TranslationMap(); 1595 off_t offset = 0; 1596 1597 if (!gKernelStartup) 1598 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1599 1600 map->Lock(); 1601 1602 for (addr_t virtualAddress = area->Base(); 1603 virtualAddress < area->Base() + (area->Size() - 1); 1604 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1605 phys_addr_t physicalAddress; 1606 uint32 flags; 1607 status = map->Query(virtualAddress, &physicalAddress, &flags); 1608 if (status < B_OK) { 1609 panic("looking up mapping failed for va 0x%lx\n", 1610 virtualAddress); 1611 } 1612 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1613 if (page == NULL) { 1614 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1615 "\n", physicalAddress); 1616 } 1617 1618 DEBUG_PAGE_ACCESS_START(page); 1619 1620 cache->InsertPage(page, offset); 1621 increment_page_wired_count(page); 1622 vm_page_set_state(page, PAGE_STATE_WIRED); 1623 page->busy = false; 1624 1625 DEBUG_PAGE_ACCESS_END(page); 1626 } 1627 1628 map->Unlock(); 1629 break; 1630 } 1631 1632 case B_CONTIGUOUS: 1633 { 1634 // We have already allocated our continuous pages run, so we can now 1635 // just map them in the address space 1636 VMTranslationMap* map = addressSpace->TranslationMap(); 1637 phys_addr_t physicalAddress 1638 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1639 addr_t virtualAddress = area->Base(); 1640 off_t offset = 0; 1641 1642 map->Lock(); 1643 1644 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1645 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1646 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1647 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1648 if (page == NULL) 1649 panic("couldn't lookup physical page just allocated\n"); 1650 1651 status = map->Map(virtualAddress, physicalAddress, protection, 1652 area->MemoryType(), &reservation); 1653 if (status < B_OK) 1654 panic("couldn't map physical page in page run\n"); 1655 1656 cache->InsertPage(page, offset); 1657 increment_page_wired_count(page); 1658 1659 DEBUG_PAGE_ACCESS_END(page); 1660 } 1661 1662 map->Unlock(); 1663 break; 1664 } 1665 1666 default: 1667 break; 1668 } 1669 1670 cache->Unlock(); 1671 1672 if (reservedPages > 0) 1673 vm_page_unreserve_pages(&reservation); 1674 1675 TRACE(("vm_create_anonymous_area: done\n")); 1676 1677 area->cache_type = CACHE_TYPE_RAM; 1678 return area->id; 1679 1680 err1: 1681 if (wiring == B_CONTIGUOUS) { 1682 // we had reserved the area space upfront... 1683 phys_addr_t pageNumber = page->physical_page_number; 1684 int32 i; 1685 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1686 page = vm_lookup_page(pageNumber); 1687 if (page == NULL) 1688 panic("couldn't lookup physical page just allocated\n"); 1689 1690 vm_page_set_state(page, PAGE_STATE_FREE); 1691 } 1692 } 1693 1694 err0: 1695 if (reservedPages > 0) 1696 vm_page_unreserve_pages(&reservation); 1697 if (reservedMemory > 0) 1698 vm_unreserve_memory(reservedMemory); 1699 1700 return status; 1701 } 1702 1703 1704 area_id 1705 vm_map_physical_memory(team_id team, const char* name, void** _address, 1706 uint32 addressSpec, addr_t size, uint32 protection, 1707 phys_addr_t physicalAddress, bool alreadyWired) 1708 { 1709 VMArea* area; 1710 VMCache* cache; 1711 addr_t mapOffset; 1712 1713 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1714 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1715 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1716 addressSpec, size, protection, physicalAddress)); 1717 1718 if (!arch_vm_supports_protection(protection)) 1719 return B_NOT_SUPPORTED; 1720 1721 AddressSpaceWriteLocker locker(team); 1722 if (!locker.IsLocked()) 1723 return B_BAD_TEAM_ID; 1724 1725 // if the physical address is somewhat inside a page, 1726 // move the actual area down to align on a page boundary 1727 mapOffset = physicalAddress % B_PAGE_SIZE; 1728 size += mapOffset; 1729 physicalAddress -= mapOffset; 1730 1731 size = PAGE_ALIGN(size); 1732 1733 // create a device cache 1734 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1735 if (status != B_OK) 1736 return status; 1737 1738 cache->virtual_end = size; 1739 1740 cache->Lock(); 1741 1742 virtual_address_restrictions addressRestrictions = {}; 1743 addressRestrictions.address = *_address; 1744 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1745 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1746 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1747 true, &area, _address); 1748 1749 if (status < B_OK) 1750 cache->ReleaseRefLocked(); 1751 1752 cache->Unlock(); 1753 1754 if (status == B_OK) { 1755 // set requested memory type -- use uncached, if not given 1756 uint32 memoryType = addressSpec & B_MTR_MASK; 1757 if (memoryType == 0) 1758 memoryType = B_MTR_UC; 1759 1760 area->SetMemoryType(memoryType); 1761 1762 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1763 if (status != B_OK) 1764 delete_area(locker.AddressSpace(), area, false); 1765 } 1766 1767 if (status != B_OK) 1768 return status; 1769 1770 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1771 1772 if (alreadyWired) { 1773 // The area is already mapped, but possibly not with the right 1774 // memory type. 1775 map->Lock(); 1776 map->ProtectArea(area, area->protection); 1777 map->Unlock(); 1778 } else { 1779 // Map the area completely. 1780 1781 // reserve pages needed for the mapping 1782 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1783 area->Base() + (size - 1)); 1784 vm_page_reservation reservation; 1785 vm_page_reserve_pages(&reservation, reservePages, 1786 team == VMAddressSpace::KernelID() 1787 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1788 1789 map->Lock(); 1790 1791 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1792 map->Map(area->Base() + offset, physicalAddress + offset, 1793 protection, area->MemoryType(), &reservation); 1794 } 1795 1796 map->Unlock(); 1797 1798 vm_page_unreserve_pages(&reservation); 1799 } 1800 1801 // modify the pointer returned to be offset back into the new area 1802 // the same way the physical address in was offset 1803 *_address = (void*)((addr_t)*_address + mapOffset); 1804 1805 area->cache_type = CACHE_TYPE_DEVICE; 1806 return area->id; 1807 } 1808 1809 1810 /*! Don't use! 1811 TODO: This function was introduced to map physical page vecs to 1812 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1813 use a device cache and does not track vm_page::wired_count! 1814 */ 1815 area_id 1816 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1817 uint32 addressSpec, addr_t* _size, uint32 protection, 1818 struct generic_io_vec* vecs, uint32 vecCount) 1819 { 1820 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1821 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1822 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1823 addressSpec, _size, protection, vecs, vecCount)); 1824 1825 if (!arch_vm_supports_protection(protection) 1826 || (addressSpec & B_MTR_MASK) != 0) { 1827 return B_NOT_SUPPORTED; 1828 } 1829 1830 AddressSpaceWriteLocker locker(team); 1831 if (!locker.IsLocked()) 1832 return B_BAD_TEAM_ID; 1833 1834 if (vecCount == 0) 1835 return B_BAD_VALUE; 1836 1837 addr_t size = 0; 1838 for (uint32 i = 0; i < vecCount; i++) { 1839 if (vecs[i].base % B_PAGE_SIZE != 0 1840 || vecs[i].length % B_PAGE_SIZE != 0) { 1841 return B_BAD_VALUE; 1842 } 1843 1844 size += vecs[i].length; 1845 } 1846 1847 // create a device cache 1848 VMCache* cache; 1849 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1850 if (result != B_OK) 1851 return result; 1852 1853 cache->virtual_end = size; 1854 1855 cache->Lock(); 1856 1857 VMArea* area; 1858 virtual_address_restrictions addressRestrictions = {}; 1859 addressRestrictions.address = *_address; 1860 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1861 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1862 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1863 &addressRestrictions, true, &area, _address); 1864 1865 if (result != B_OK) 1866 cache->ReleaseRefLocked(); 1867 1868 cache->Unlock(); 1869 1870 if (result != B_OK) 1871 return result; 1872 1873 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1874 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1875 area->Base() + (size - 1)); 1876 1877 vm_page_reservation reservation; 1878 vm_page_reserve_pages(&reservation, reservePages, 1879 team == VMAddressSpace::KernelID() 1880 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1881 map->Lock(); 1882 1883 uint32 vecIndex = 0; 1884 size_t vecOffset = 0; 1885 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1886 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1887 vecOffset = 0; 1888 vecIndex++; 1889 } 1890 1891 if (vecIndex >= vecCount) 1892 break; 1893 1894 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1895 protection, area->MemoryType(), &reservation); 1896 1897 vecOffset += B_PAGE_SIZE; 1898 } 1899 1900 map->Unlock(); 1901 vm_page_unreserve_pages(&reservation); 1902 1903 if (_size != NULL) 1904 *_size = size; 1905 1906 area->cache_type = CACHE_TYPE_DEVICE; 1907 return area->id; 1908 } 1909 1910 1911 area_id 1912 vm_create_null_area(team_id team, const char* name, void** address, 1913 uint32 addressSpec, addr_t size, uint32 flags) 1914 { 1915 size = PAGE_ALIGN(size); 1916 1917 // Lock the address space and, if B_EXACT_ADDRESS and 1918 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1919 // is not wired. 1920 AddressSpaceWriteLocker locker; 1921 do { 1922 if (locker.SetTo(team) != B_OK) 1923 return B_BAD_TEAM_ID; 1924 } while (addressSpec == B_EXACT_ADDRESS 1925 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1926 && wait_if_address_range_is_wired(locker.AddressSpace(), 1927 (addr_t)*address, size, &locker)); 1928 1929 // create a null cache 1930 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1931 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1932 VMCache* cache; 1933 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1934 if (status != B_OK) 1935 return status; 1936 1937 cache->temporary = 1; 1938 cache->virtual_end = size; 1939 1940 cache->Lock(); 1941 1942 VMArea* area; 1943 virtual_address_restrictions addressRestrictions = {}; 1944 addressRestrictions.address = *address; 1945 addressRestrictions.address_specification = addressSpec; 1946 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1947 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1948 REGION_NO_PRIVATE_MAP, flags, 1949 &addressRestrictions, true, &area, address); 1950 1951 if (status < B_OK) { 1952 cache->ReleaseRefAndUnlock(); 1953 return status; 1954 } 1955 1956 cache->Unlock(); 1957 1958 area->cache_type = CACHE_TYPE_NULL; 1959 return area->id; 1960 } 1961 1962 1963 /*! Creates the vnode cache for the specified \a vnode. 1964 The vnode has to be marked busy when calling this function. 1965 */ 1966 status_t 1967 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1968 { 1969 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1970 } 1971 1972 1973 /*! \a cache must be locked. The area's address space must be read-locked. 1974 */ 1975 static void 1976 pre_map_area_pages(VMArea* area, VMCache* cache, 1977 vm_page_reservation* reservation) 1978 { 1979 addr_t baseAddress = area->Base(); 1980 addr_t cacheOffset = area->cache_offset; 1981 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1982 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1983 1984 for (VMCachePagesTree::Iterator it 1985 = cache->pages.GetIterator(firstPage, true, true); 1986 vm_page* page = it.Next();) { 1987 if (page->cache_offset >= endPage) 1988 break; 1989 1990 // skip busy and inactive pages 1991 if (page->busy || page->usage_count == 0) 1992 continue; 1993 1994 DEBUG_PAGE_ACCESS_START(page); 1995 map_page(area, page, 1996 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1997 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1998 DEBUG_PAGE_ACCESS_END(page); 1999 } 2000 } 2001 2002 2003 /*! Will map the file specified by \a fd to an area in memory. 2004 The file will be mirrored beginning at the specified \a offset. The 2005 \a offset and \a size arguments have to be page aligned. 2006 */ 2007 static area_id 2008 _vm_map_file(team_id team, const char* name, void** _address, 2009 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2010 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2011 { 2012 // TODO: for binary files, we want to make sure that they get the 2013 // copy of a file at a given time, ie. later changes should not 2014 // make it into the mapped copy -- this will need quite some changes 2015 // to be done in a nice way 2016 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2017 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2018 2019 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2020 size = PAGE_ALIGN(size); 2021 2022 if (mapping == REGION_NO_PRIVATE_MAP) 2023 protection |= B_SHARED_AREA; 2024 if (addressSpec != B_EXACT_ADDRESS) 2025 unmapAddressRange = false; 2026 2027 if (fd < 0) { 2028 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2029 virtual_address_restrictions virtualRestrictions = {}; 2030 virtualRestrictions.address = *_address; 2031 virtualRestrictions.address_specification = addressSpec; 2032 physical_address_restrictions physicalRestrictions = {}; 2033 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2034 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2035 _address); 2036 } 2037 2038 // get the open flags of the FD 2039 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2040 if (descriptor == NULL) 2041 return EBADF; 2042 int32 openMode = descriptor->open_mode; 2043 put_fd(descriptor); 2044 2045 // The FD must open for reading at any rate. For shared mapping with write 2046 // access, additionally the FD must be open for writing. 2047 if ((openMode & O_ACCMODE) == O_WRONLY 2048 || (mapping == REGION_NO_PRIVATE_MAP 2049 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2050 && (openMode & O_ACCMODE) == O_RDONLY)) { 2051 return EACCES; 2052 } 2053 2054 uint32 protectionMax = 0; 2055 if (mapping != REGION_PRIVATE_MAP) { 2056 protectionMax = protection | B_READ_AREA; 2057 if ((openMode & O_ACCMODE) == O_RDWR) 2058 protectionMax |= B_WRITE_AREA; 2059 } 2060 2061 // get the vnode for the object, this also grabs a ref to it 2062 struct vnode* vnode = NULL; 2063 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2064 if (status < B_OK) 2065 return status; 2066 VnodePutter vnodePutter(vnode); 2067 2068 // If we're going to pre-map pages, we need to reserve the pages needed by 2069 // the mapping backend upfront. 2070 page_num_t reservedPreMapPages = 0; 2071 vm_page_reservation reservation; 2072 if ((protection & B_READ_AREA) != 0) { 2073 AddressSpaceWriteLocker locker; 2074 status = locker.SetTo(team); 2075 if (status != B_OK) 2076 return status; 2077 2078 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2079 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2080 2081 locker.Unlock(); 2082 2083 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2084 team == VMAddressSpace::KernelID() 2085 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2086 } 2087 2088 struct PageUnreserver { 2089 PageUnreserver(vm_page_reservation* reservation) 2090 : 2091 fReservation(reservation) 2092 { 2093 } 2094 2095 ~PageUnreserver() 2096 { 2097 if (fReservation != NULL) 2098 vm_page_unreserve_pages(fReservation); 2099 } 2100 2101 vm_page_reservation* fReservation; 2102 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2103 2104 // Lock the address space and, if the specified address range shall be 2105 // unmapped, ensure it is not wired. 2106 AddressSpaceWriteLocker locker; 2107 do { 2108 if (locker.SetTo(team) != B_OK) 2109 return B_BAD_TEAM_ID; 2110 } while (unmapAddressRange 2111 && wait_if_address_range_is_wired(locker.AddressSpace(), 2112 (addr_t)*_address, size, &locker)); 2113 2114 // TODO: this only works for file systems that use the file cache 2115 VMCache* cache; 2116 status = vfs_get_vnode_cache(vnode, &cache, false); 2117 if (status < B_OK) 2118 return status; 2119 2120 cache->Lock(); 2121 2122 VMArea* area; 2123 virtual_address_restrictions addressRestrictions = {}; 2124 addressRestrictions.address = *_address; 2125 addressRestrictions.address_specification = addressSpec; 2126 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2127 0, protection, protectionMax, mapping, 2128 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2129 &addressRestrictions, kernel, &area, _address); 2130 2131 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2132 // map_backing_store() cannot know we no longer need the ref 2133 cache->ReleaseRefLocked(); 2134 } 2135 2136 if (status == B_OK && (protection & B_READ_AREA) != 0) 2137 pre_map_area_pages(area, cache, &reservation); 2138 2139 cache->Unlock(); 2140 2141 if (status == B_OK) { 2142 // TODO: this probably deserves a smarter solution, ie. don't always 2143 // prefetch stuff, and also, probably don't trigger it at this place. 2144 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2145 // prefetches at max 10 MB starting from "offset" 2146 } 2147 2148 if (status != B_OK) 2149 return status; 2150 2151 area->cache_type = CACHE_TYPE_VNODE; 2152 return area->id; 2153 } 2154 2155 2156 area_id 2157 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2158 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2159 int fd, off_t offset) 2160 { 2161 if (!arch_vm_supports_protection(protection)) 2162 return B_NOT_SUPPORTED; 2163 2164 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2165 mapping, unmapAddressRange, fd, offset, true); 2166 } 2167 2168 2169 VMCache* 2170 vm_area_get_locked_cache(VMArea* area) 2171 { 2172 rw_lock_read_lock(&sAreaCacheLock); 2173 2174 while (true) { 2175 VMCache* cache = area->cache; 2176 2177 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2178 // cache has been deleted 2179 rw_lock_read_lock(&sAreaCacheLock); 2180 continue; 2181 } 2182 2183 rw_lock_read_lock(&sAreaCacheLock); 2184 2185 if (cache == area->cache) { 2186 cache->AcquireRefLocked(); 2187 rw_lock_read_unlock(&sAreaCacheLock); 2188 return cache; 2189 } 2190 2191 // the cache changed in the meantime 2192 cache->Unlock(); 2193 } 2194 } 2195 2196 2197 void 2198 vm_area_put_locked_cache(VMCache* cache) 2199 { 2200 cache->ReleaseRefAndUnlock(); 2201 } 2202 2203 2204 area_id 2205 vm_clone_area(team_id team, const char* name, void** address, 2206 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2207 bool kernel) 2208 { 2209 VMArea* newArea = NULL; 2210 VMArea* sourceArea; 2211 2212 // Check whether the source area exists and is cloneable. If so, mark it 2213 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2214 { 2215 AddressSpaceWriteLocker locker; 2216 status_t status = locker.SetFromArea(sourceID, sourceArea); 2217 if (status != B_OK) 2218 return status; 2219 2220 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2221 return B_NOT_ALLOWED; 2222 2223 sourceArea->protection |= B_SHARED_AREA; 2224 protection |= B_SHARED_AREA; 2225 } 2226 2227 // Now lock both address spaces and actually do the cloning. 2228 2229 MultiAddressSpaceLocker locker; 2230 VMAddressSpace* sourceAddressSpace; 2231 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2232 if (status != B_OK) 2233 return status; 2234 2235 VMAddressSpace* targetAddressSpace; 2236 status = locker.AddTeam(team, true, &targetAddressSpace); 2237 if (status != B_OK) 2238 return status; 2239 2240 status = locker.Lock(); 2241 if (status != B_OK) 2242 return status; 2243 2244 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2245 if (sourceArea == NULL) 2246 return B_BAD_VALUE; 2247 2248 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2249 return B_NOT_ALLOWED; 2250 2251 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2252 2253 if (!kernel && sourceAddressSpace != targetAddressSpace 2254 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2255 #if KDEBUG 2256 Team* team = thread_get_current_thread()->team; 2257 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2258 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2259 #endif 2260 status = B_NOT_ALLOWED; 2261 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2262 status = B_NOT_ALLOWED; 2263 } else { 2264 virtual_address_restrictions addressRestrictions = {}; 2265 addressRestrictions.address = *address; 2266 addressRestrictions.address_specification = addressSpec; 2267 status = map_backing_store(targetAddressSpace, cache, 2268 sourceArea->cache_offset, name, sourceArea->Size(), 2269 sourceArea->wiring, protection, sourceArea->protection_max, 2270 mapping, 0, &addressRestrictions, 2271 kernel, &newArea, address); 2272 } 2273 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2274 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2275 // to create a new cache, and has therefore already acquired a reference 2276 // to the source cache - but otherwise it has no idea that we need 2277 // one. 2278 cache->AcquireRefLocked(); 2279 } 2280 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2281 // we need to map in everything at this point 2282 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2283 // we don't have actual pages to map but a physical area 2284 VMTranslationMap* map 2285 = sourceArea->address_space->TranslationMap(); 2286 map->Lock(); 2287 2288 phys_addr_t physicalAddress; 2289 uint32 oldProtection; 2290 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2291 2292 map->Unlock(); 2293 2294 map = targetAddressSpace->TranslationMap(); 2295 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2296 newArea->Base() + (newArea->Size() - 1)); 2297 2298 vm_page_reservation reservation; 2299 vm_page_reserve_pages(&reservation, reservePages, 2300 targetAddressSpace == VMAddressSpace::Kernel() 2301 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2302 map->Lock(); 2303 2304 for (addr_t offset = 0; offset < newArea->Size(); 2305 offset += B_PAGE_SIZE) { 2306 map->Map(newArea->Base() + offset, physicalAddress + offset, 2307 protection, newArea->MemoryType(), &reservation); 2308 } 2309 2310 map->Unlock(); 2311 vm_page_unreserve_pages(&reservation); 2312 } else { 2313 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2314 size_t reservePages = map->MaxPagesNeededToMap( 2315 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2316 vm_page_reservation reservation; 2317 vm_page_reserve_pages(&reservation, reservePages, 2318 targetAddressSpace == VMAddressSpace::Kernel() 2319 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2320 2321 // map in all pages from source 2322 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2323 vm_page* page = it.Next();) { 2324 if (!page->busy) { 2325 DEBUG_PAGE_ACCESS_START(page); 2326 map_page(newArea, page, 2327 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2328 - newArea->cache_offset), 2329 protection, &reservation); 2330 DEBUG_PAGE_ACCESS_END(page); 2331 } 2332 } 2333 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2334 // ensuring that! 2335 2336 vm_page_unreserve_pages(&reservation); 2337 } 2338 } 2339 if (status == B_OK) 2340 newArea->cache_type = sourceArea->cache_type; 2341 2342 vm_area_put_locked_cache(cache); 2343 2344 if (status < B_OK) 2345 return status; 2346 2347 return newArea->id; 2348 } 2349 2350 2351 /*! Deletes the specified area of the given address space. 2352 2353 The address space must be write-locked. 2354 The caller must ensure that the area does not have any wired ranges. 2355 2356 \param addressSpace The address space containing the area. 2357 \param area The area to be deleted. 2358 \param deletingAddressSpace \c true, if the address space is in the process 2359 of being deleted. 2360 */ 2361 static void 2362 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2363 bool deletingAddressSpace) 2364 { 2365 ASSERT(!area->IsWired()); 2366 2367 VMAreaHash::Remove(area); 2368 2369 // At this point the area is removed from the global hash table, but 2370 // still exists in the area list. 2371 2372 // Unmap the virtual address space the area occupied. 2373 { 2374 // We need to lock the complete cache chain. 2375 VMCache* topCache = vm_area_get_locked_cache(area); 2376 VMCacheChainLocker cacheChainLocker(topCache); 2377 cacheChainLocker.LockAllSourceCaches(); 2378 2379 // If the area's top cache is a temporary cache and the area is the only 2380 // one referencing it (besides us currently holding a second reference), 2381 // the unmapping code doesn't need to care about preserving the accessed 2382 // and dirty flags of the top cache page mappings. 2383 bool ignoreTopCachePageFlags 2384 = topCache->temporary && topCache->RefCount() == 2; 2385 2386 area->address_space->TranslationMap()->UnmapArea(area, 2387 deletingAddressSpace, ignoreTopCachePageFlags); 2388 } 2389 2390 if (!area->cache->temporary) 2391 area->cache->WriteModified(); 2392 2393 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2394 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2395 2396 arch_vm_unset_memory_type(area); 2397 addressSpace->RemoveArea(area, allocationFlags); 2398 addressSpace->Put(); 2399 2400 area->cache->RemoveArea(area); 2401 area->cache->ReleaseRef(); 2402 2403 addressSpace->DeleteArea(area, allocationFlags); 2404 } 2405 2406 2407 status_t 2408 vm_delete_area(team_id team, area_id id, bool kernel) 2409 { 2410 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2411 team, id)); 2412 2413 // lock the address space and make sure the area isn't wired 2414 AddressSpaceWriteLocker locker; 2415 VMArea* area; 2416 AreaCacheLocker cacheLocker; 2417 2418 do { 2419 status_t status = locker.SetFromArea(team, id, area); 2420 if (status != B_OK) 2421 return status; 2422 2423 cacheLocker.SetTo(area); 2424 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2425 2426 cacheLocker.Unlock(); 2427 2428 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2429 return B_NOT_ALLOWED; 2430 2431 delete_area(locker.AddressSpace(), area, false); 2432 return B_OK; 2433 } 2434 2435 2436 /*! Creates a new cache on top of given cache, moves all areas from 2437 the old cache to the new one, and changes the protection of all affected 2438 areas' pages to read-only. If requested, wired pages are moved up to the 2439 new cache and copies are added to the old cache in their place. 2440 Preconditions: 2441 - The given cache must be locked. 2442 - All of the cache's areas' address spaces must be read locked. 2443 - Either the cache must not have any wired ranges or a page reservation for 2444 all wired pages must be provided, so they can be copied. 2445 2446 \param lowerCache The cache on top of which a new cache shall be created. 2447 \param wiredPagesReservation If \c NULL there must not be any wired pages 2448 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2449 has wired page. The wired pages are copied in this case. 2450 */ 2451 static status_t 2452 vm_copy_on_write_area(VMCache* lowerCache, 2453 vm_page_reservation* wiredPagesReservation) 2454 { 2455 VMCache* upperCache; 2456 2457 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2458 2459 // We need to separate the cache from its areas. The cache goes one level 2460 // deeper and we create a new cache inbetween. 2461 2462 // create an anonymous cache 2463 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2464 lowerCache->GuardSize() / B_PAGE_SIZE, 2465 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2466 VM_PRIORITY_USER); 2467 if (status != B_OK) 2468 return status; 2469 2470 upperCache->Lock(); 2471 2472 upperCache->temporary = 1; 2473 upperCache->virtual_base = lowerCache->virtual_base; 2474 upperCache->virtual_end = lowerCache->virtual_end; 2475 2476 // transfer the lower cache areas to the upper cache 2477 rw_lock_write_lock(&sAreaCacheLock); 2478 upperCache->TransferAreas(lowerCache); 2479 rw_lock_write_unlock(&sAreaCacheLock); 2480 2481 lowerCache->AddConsumer(upperCache); 2482 2483 // We now need to remap all pages from all of the cache's areas read-only, 2484 // so that a copy will be created on next write access. If there are wired 2485 // pages, we keep their protection, move them to the upper cache and create 2486 // copies for the lower cache. 2487 if (wiredPagesReservation != NULL) { 2488 // We need to handle wired pages -- iterate through the cache's pages. 2489 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2490 vm_page* page = it.Next();) { 2491 if (page->WiredCount() > 0) { 2492 // allocate a new page and copy the wired one 2493 vm_page* copiedPage = vm_page_allocate_page( 2494 wiredPagesReservation, PAGE_STATE_ACTIVE); 2495 2496 vm_memcpy_physical_page( 2497 copiedPage->physical_page_number * B_PAGE_SIZE, 2498 page->physical_page_number * B_PAGE_SIZE); 2499 2500 // move the wired page to the upper cache (note: removing is OK 2501 // with the SplayTree iterator) and insert the copy 2502 upperCache->MovePage(page); 2503 lowerCache->InsertPage(copiedPage, 2504 page->cache_offset * B_PAGE_SIZE); 2505 2506 DEBUG_PAGE_ACCESS_END(copiedPage); 2507 } else { 2508 // Change the protection of this page in all areas. 2509 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2510 tempArea = tempArea->cache_next) { 2511 // The area must be readable in the same way it was 2512 // previously writable. 2513 uint32 protection = B_KERNEL_READ_AREA; 2514 if ((tempArea->protection & B_READ_AREA) != 0) 2515 protection |= B_READ_AREA; 2516 2517 VMTranslationMap* map 2518 = tempArea->address_space->TranslationMap(); 2519 map->Lock(); 2520 map->ProtectPage(tempArea, 2521 virtual_page_address(tempArea, page), protection); 2522 map->Unlock(); 2523 } 2524 } 2525 } 2526 } else { 2527 ASSERT(lowerCache->WiredPagesCount() == 0); 2528 2529 // just change the protection of all areas 2530 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2531 tempArea = tempArea->cache_next) { 2532 // The area must be readable in the same way it was previously 2533 // writable. 2534 uint32 protection = B_KERNEL_READ_AREA; 2535 if ((tempArea->protection & B_READ_AREA) != 0) 2536 protection |= B_READ_AREA; 2537 2538 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2539 map->Lock(); 2540 map->ProtectArea(tempArea, protection); 2541 map->Unlock(); 2542 } 2543 } 2544 2545 vm_area_put_locked_cache(upperCache); 2546 2547 return B_OK; 2548 } 2549 2550 2551 area_id 2552 vm_copy_area(team_id team, const char* name, void** _address, 2553 uint32 addressSpec, area_id sourceID) 2554 { 2555 // Do the locking: target address space, all address spaces associated with 2556 // the source cache, and the cache itself. 2557 MultiAddressSpaceLocker locker; 2558 VMAddressSpace* targetAddressSpace; 2559 VMCache* cache; 2560 VMArea* source; 2561 AreaCacheLocker cacheLocker; 2562 status_t status; 2563 bool sharedArea; 2564 2565 page_num_t wiredPages = 0; 2566 vm_page_reservation wiredPagesReservation; 2567 2568 bool restart; 2569 do { 2570 restart = false; 2571 2572 locker.Unset(); 2573 status = locker.AddTeam(team, true, &targetAddressSpace); 2574 if (status == B_OK) { 2575 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2576 &cache); 2577 } 2578 if (status != B_OK) 2579 return status; 2580 2581 cacheLocker.SetTo(cache, true); // already locked 2582 2583 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2584 2585 page_num_t oldWiredPages = wiredPages; 2586 wiredPages = 0; 2587 2588 // If the source area isn't shared, count the number of wired pages in 2589 // the cache and reserve as many pages. 2590 if (!sharedArea) { 2591 wiredPages = cache->WiredPagesCount(); 2592 2593 if (wiredPages > oldWiredPages) { 2594 cacheLocker.Unlock(); 2595 locker.Unlock(); 2596 2597 if (oldWiredPages > 0) 2598 vm_page_unreserve_pages(&wiredPagesReservation); 2599 2600 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2601 VM_PRIORITY_USER); 2602 2603 restart = true; 2604 } 2605 } else if (oldWiredPages > 0) 2606 vm_page_unreserve_pages(&wiredPagesReservation); 2607 } while (restart); 2608 2609 // unreserve pages later 2610 struct PagesUnreserver { 2611 PagesUnreserver(vm_page_reservation* reservation) 2612 : 2613 fReservation(reservation) 2614 { 2615 } 2616 2617 ~PagesUnreserver() 2618 { 2619 if (fReservation != NULL) 2620 vm_page_unreserve_pages(fReservation); 2621 } 2622 2623 private: 2624 vm_page_reservation* fReservation; 2625 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2626 2627 bool writableCopy 2628 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2629 uint8* targetPageProtections = NULL; 2630 2631 if (source->page_protections != NULL) { 2632 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2633 targetPageProtections = (uint8*)malloc_etc(bytes, 2634 (source->address_space == VMAddressSpace::Kernel() 2635 || targetAddressSpace == VMAddressSpace::Kernel()) 2636 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2637 if (targetPageProtections == NULL) 2638 return B_NO_MEMORY; 2639 2640 memcpy(targetPageProtections, source->page_protections, bytes); 2641 2642 if (!writableCopy) { 2643 for (size_t i = 0; i < bytes; i++) { 2644 if ((targetPageProtections[i] 2645 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2646 writableCopy = true; 2647 break; 2648 } 2649 } 2650 } 2651 } 2652 2653 if (addressSpec == B_CLONE_ADDRESS) { 2654 addressSpec = B_EXACT_ADDRESS; 2655 *_address = (void*)source->Base(); 2656 } 2657 2658 // First, create a cache on top of the source area, respectively use the 2659 // existing one, if this is a shared area. 2660 2661 VMArea* target; 2662 virtual_address_restrictions addressRestrictions = {}; 2663 addressRestrictions.address = *_address; 2664 addressRestrictions.address_specification = addressSpec; 2665 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2666 name, source->Size(), source->wiring, source->protection, 2667 source->protection_max, 2668 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2669 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2670 &addressRestrictions, true, &target, _address); 2671 if (status < B_OK) { 2672 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2673 return status; 2674 } 2675 2676 if (targetPageProtections != NULL) 2677 target->page_protections = targetPageProtections; 2678 2679 if (sharedArea) { 2680 // The new area uses the old area's cache, but map_backing_store() 2681 // hasn't acquired a ref. So we have to do that now. 2682 cache->AcquireRefLocked(); 2683 } 2684 2685 // If the source area is writable, we need to move it one layer up as well 2686 2687 if (!sharedArea) { 2688 if (writableCopy) { 2689 // TODO: do something more useful if this fails! 2690 if (vm_copy_on_write_area(cache, 2691 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2692 panic("vm_copy_on_write_area() failed!\n"); 2693 } 2694 } 2695 } 2696 2697 // we return the ID of the newly created area 2698 return target->id; 2699 } 2700 2701 2702 status_t 2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2704 bool kernel) 2705 { 2706 fix_protection(&newProtection); 2707 2708 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2709 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2710 2711 if (!arch_vm_supports_protection(newProtection)) 2712 return B_NOT_SUPPORTED; 2713 2714 bool becomesWritable 2715 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2716 2717 // lock address spaces and cache 2718 MultiAddressSpaceLocker locker; 2719 VMCache* cache; 2720 VMArea* area; 2721 status_t status; 2722 AreaCacheLocker cacheLocker; 2723 bool isWritable; 2724 2725 bool restart; 2726 do { 2727 restart = false; 2728 2729 locker.Unset(); 2730 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2731 if (status != B_OK) 2732 return status; 2733 2734 cacheLocker.SetTo(cache, true); // already locked 2735 2736 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2737 || (area->protection & B_KERNEL_AREA) != 0)) { 2738 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2739 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2740 " (%s)\n", team, newProtection, areaID, area->name); 2741 return B_NOT_ALLOWED; 2742 } 2743 if (!kernel && area->protection_max != 0 2744 && (newProtection & area->protection_max) 2745 != (newProtection & B_USER_PROTECTION)) { 2746 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2747 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2748 "area %" B_PRId32 " (%s)\n", team, newProtection, 2749 area->protection_max, areaID, area->name); 2750 return B_NOT_ALLOWED; 2751 } 2752 2753 if (area->protection == newProtection) 2754 return B_OK; 2755 2756 if (team != VMAddressSpace::KernelID() 2757 && area->address_space->ID() != team) { 2758 // unless you're the kernel, you are only allowed to set 2759 // the protection of your own areas 2760 return B_NOT_ALLOWED; 2761 } 2762 2763 isWritable 2764 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2765 2766 // Make sure the area (respectively, if we're going to call 2767 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2768 // wired ranges. 2769 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2770 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2771 otherArea = otherArea->cache_next) { 2772 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2773 restart = true; 2774 break; 2775 } 2776 } 2777 } else { 2778 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2779 restart = true; 2780 } 2781 } while (restart); 2782 2783 bool changePageProtection = true; 2784 bool changeTopCachePagesOnly = false; 2785 2786 if (isWritable && !becomesWritable) { 2787 // writable -> !writable 2788 2789 if (cache->source != NULL && cache->temporary) { 2790 if (cache->CountWritableAreas(area) == 0) { 2791 // Since this cache now lives from the pages in its source cache, 2792 // we can change the cache's commitment to take only those pages 2793 // into account that really are in this cache. 2794 2795 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2796 team == VMAddressSpace::KernelID() 2797 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2798 2799 // TODO: we may be able to join with our source cache, if 2800 // count == 0 2801 } 2802 } 2803 2804 // If only the writability changes, we can just remap the pages of the 2805 // top cache, since the pages of lower caches are mapped read-only 2806 // anyway. That's advantageous only, if the number of pages in the cache 2807 // is significantly smaller than the number of pages in the area, 2808 // though. 2809 if (newProtection 2810 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2811 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2812 changeTopCachePagesOnly = true; 2813 } 2814 } else if (!isWritable && becomesWritable) { 2815 // !writable -> writable 2816 2817 if (!cache->consumers.IsEmpty()) { 2818 // There are consumers -- we have to insert a new cache. Fortunately 2819 // vm_copy_on_write_area() does everything that's needed. 2820 changePageProtection = false; 2821 status = vm_copy_on_write_area(cache, NULL); 2822 } else { 2823 // No consumers, so we don't need to insert a new one. 2824 if (cache->source != NULL && cache->temporary) { 2825 // the cache's commitment must contain all possible pages 2826 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2827 team == VMAddressSpace::KernelID() 2828 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2829 } 2830 2831 if (status == B_OK && cache->source != NULL) { 2832 // There's a source cache, hence we can't just change all pages' 2833 // protection or we might allow writing into pages belonging to 2834 // a lower cache. 2835 changeTopCachePagesOnly = true; 2836 } 2837 } 2838 } else { 2839 // we don't have anything special to do in all other cases 2840 } 2841 2842 if (status == B_OK) { 2843 // remap existing pages in this cache 2844 if (changePageProtection) { 2845 VMTranslationMap* map = area->address_space->TranslationMap(); 2846 map->Lock(); 2847 2848 if (changeTopCachePagesOnly) { 2849 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2850 page_num_t lastPageOffset 2851 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2852 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2853 vm_page* page = it.Next();) { 2854 if (page->cache_offset >= firstPageOffset 2855 && page->cache_offset <= lastPageOffset) { 2856 addr_t address = virtual_page_address(area, page); 2857 map->ProtectPage(area, address, newProtection); 2858 } 2859 } 2860 } else 2861 map->ProtectArea(area, newProtection); 2862 2863 map->Unlock(); 2864 } 2865 2866 area->protection = newProtection; 2867 } 2868 2869 return status; 2870 } 2871 2872 2873 status_t 2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2875 { 2876 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2877 if (addressSpace == NULL) 2878 return B_BAD_TEAM_ID; 2879 2880 VMTranslationMap* map = addressSpace->TranslationMap(); 2881 2882 map->Lock(); 2883 uint32 dummyFlags; 2884 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2885 map->Unlock(); 2886 2887 addressSpace->Put(); 2888 return status; 2889 } 2890 2891 2892 /*! The page's cache must be locked. 2893 */ 2894 bool 2895 vm_test_map_modification(vm_page* page) 2896 { 2897 if (page->modified) 2898 return true; 2899 2900 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2901 vm_page_mapping* mapping; 2902 while ((mapping = iterator.Next()) != NULL) { 2903 VMArea* area = mapping->area; 2904 VMTranslationMap* map = area->address_space->TranslationMap(); 2905 2906 phys_addr_t physicalAddress; 2907 uint32 flags; 2908 map->Lock(); 2909 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2910 map->Unlock(); 2911 2912 if ((flags & PAGE_MODIFIED) != 0) 2913 return true; 2914 } 2915 2916 return false; 2917 } 2918 2919 2920 /*! The page's cache must be locked. 2921 */ 2922 void 2923 vm_clear_map_flags(vm_page* page, uint32 flags) 2924 { 2925 if ((flags & PAGE_ACCESSED) != 0) 2926 page->accessed = false; 2927 if ((flags & PAGE_MODIFIED) != 0) 2928 page->modified = false; 2929 2930 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2931 vm_page_mapping* mapping; 2932 while ((mapping = iterator.Next()) != NULL) { 2933 VMArea* area = mapping->area; 2934 VMTranslationMap* map = area->address_space->TranslationMap(); 2935 2936 map->Lock(); 2937 map->ClearFlags(virtual_page_address(area, page), flags); 2938 map->Unlock(); 2939 } 2940 } 2941 2942 2943 /*! Removes all mappings from a page. 2944 After you've called this function, the page is unmapped from memory and 2945 the page's \c accessed and \c modified flags have been updated according 2946 to the state of the mappings. 2947 The page's cache must be locked. 2948 */ 2949 void 2950 vm_remove_all_page_mappings(vm_page* page) 2951 { 2952 while (vm_page_mapping* mapping = page->mappings.Head()) { 2953 VMArea* area = mapping->area; 2954 VMTranslationMap* map = area->address_space->TranslationMap(); 2955 addr_t address = virtual_page_address(area, page); 2956 map->UnmapPage(area, address, false); 2957 } 2958 } 2959 2960 2961 int32 2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2963 { 2964 int32 count = 0; 2965 2966 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2967 vm_page_mapping* mapping; 2968 while ((mapping = iterator.Next()) != NULL) { 2969 VMArea* area = mapping->area; 2970 VMTranslationMap* map = area->address_space->TranslationMap(); 2971 2972 bool modified; 2973 if (map->ClearAccessedAndModified(area, 2974 virtual_page_address(area, page), false, modified)) { 2975 count++; 2976 } 2977 2978 page->modified |= modified; 2979 } 2980 2981 2982 if (page->accessed) { 2983 count++; 2984 page->accessed = false; 2985 } 2986 2987 return count; 2988 } 2989 2990 2991 /*! Removes all mappings of a page and/or clears the accessed bits of the 2992 mappings. 2993 The function iterates through the page mappings and removes them until 2994 encountering one that has been accessed. From then on it will continue to 2995 iterate, but only clear the accessed flag of the mapping. The page's 2996 \c modified bit will be updated accordingly, the \c accessed bit will be 2997 cleared. 2998 \return The number of mapping accessed bits encountered, including the 2999 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3000 of the page have been removed. 3001 */ 3002 int32 3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3004 { 3005 ASSERT(page->WiredCount() == 0); 3006 3007 if (page->accessed) 3008 return vm_clear_page_mapping_accessed_flags(page); 3009 3010 while (vm_page_mapping* mapping = page->mappings.Head()) { 3011 VMArea* area = mapping->area; 3012 VMTranslationMap* map = area->address_space->TranslationMap(); 3013 addr_t address = virtual_page_address(area, page); 3014 bool modified = false; 3015 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3016 page->accessed = true; 3017 page->modified |= modified; 3018 return vm_clear_page_mapping_accessed_flags(page); 3019 } 3020 page->modified |= modified; 3021 } 3022 3023 return 0; 3024 } 3025 3026 3027 static int 3028 display_mem(int argc, char** argv) 3029 { 3030 bool physical = false; 3031 addr_t copyAddress; 3032 int32 displayWidth; 3033 int32 itemSize; 3034 int32 num = -1; 3035 addr_t address; 3036 int i = 1, j; 3037 3038 if (argc > 1 && argv[1][0] == '-') { 3039 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3040 physical = true; 3041 i++; 3042 } else 3043 i = 99; 3044 } 3045 3046 if (argc < i + 1 || argc > i + 2) { 3047 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3048 "\tdl - 8 bytes\n" 3049 "\tdw - 4 bytes\n" 3050 "\tds - 2 bytes\n" 3051 "\tdb - 1 byte\n" 3052 "\tstring - a whole string\n" 3053 " -p or --physical only allows memory from a single page to be " 3054 "displayed.\n"); 3055 return 0; 3056 } 3057 3058 address = parse_expression(argv[i]); 3059 3060 if (argc > i + 1) 3061 num = parse_expression(argv[i + 1]); 3062 3063 // build the format string 3064 if (strcmp(argv[0], "db") == 0) { 3065 itemSize = 1; 3066 displayWidth = 16; 3067 } else if (strcmp(argv[0], "ds") == 0) { 3068 itemSize = 2; 3069 displayWidth = 8; 3070 } else if (strcmp(argv[0], "dw") == 0) { 3071 itemSize = 4; 3072 displayWidth = 4; 3073 } else if (strcmp(argv[0], "dl") == 0) { 3074 itemSize = 8; 3075 displayWidth = 2; 3076 } else if (strcmp(argv[0], "string") == 0) { 3077 itemSize = 1; 3078 displayWidth = -1; 3079 } else { 3080 kprintf("display_mem called in an invalid way!\n"); 3081 return 0; 3082 } 3083 3084 if (num <= 0) 3085 num = displayWidth; 3086 3087 void* physicalPageHandle = NULL; 3088 3089 if (physical) { 3090 int32 offset = address & (B_PAGE_SIZE - 1); 3091 if (num * itemSize + offset > B_PAGE_SIZE) { 3092 num = (B_PAGE_SIZE - offset) / itemSize; 3093 kprintf("NOTE: number of bytes has been cut to page size\n"); 3094 } 3095 3096 address = ROUNDDOWN(address, B_PAGE_SIZE); 3097 3098 if (vm_get_physical_page_debug(address, ©Address, 3099 &physicalPageHandle) != B_OK) { 3100 kprintf("getting the hardware page failed."); 3101 return 0; 3102 } 3103 3104 address += offset; 3105 copyAddress += offset; 3106 } else 3107 copyAddress = address; 3108 3109 if (!strcmp(argv[0], "string")) { 3110 kprintf("%p \"", (char*)copyAddress); 3111 3112 // string mode 3113 for (i = 0; true; i++) { 3114 char c; 3115 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3116 != B_OK 3117 || c == '\0') { 3118 break; 3119 } 3120 3121 if (c == '\n') 3122 kprintf("\\n"); 3123 else if (c == '\t') 3124 kprintf("\\t"); 3125 else { 3126 if (!isprint(c)) 3127 c = '.'; 3128 3129 kprintf("%c", c); 3130 } 3131 } 3132 3133 kprintf("\"\n"); 3134 } else { 3135 // number mode 3136 for (i = 0; i < num; i++) { 3137 uint64 value; 3138 3139 if ((i % displayWidth) == 0) { 3140 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3141 if (i != 0) 3142 kprintf("\n"); 3143 3144 kprintf("[0x%lx] ", address + i * itemSize); 3145 3146 for (j = 0; j < displayed; j++) { 3147 char c; 3148 if (debug_memcpy(B_CURRENT_TEAM, &c, 3149 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3150 displayed = j; 3151 break; 3152 } 3153 if (!isprint(c)) 3154 c = '.'; 3155 3156 kprintf("%c", c); 3157 } 3158 if (num > displayWidth) { 3159 // make sure the spacing in the last line is correct 3160 for (j = displayed; j < displayWidth * itemSize; j++) 3161 kprintf(" "); 3162 } 3163 kprintf(" "); 3164 } 3165 3166 if (debug_memcpy(B_CURRENT_TEAM, &value, 3167 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3168 kprintf("read fault"); 3169 break; 3170 } 3171 3172 switch (itemSize) { 3173 case 1: 3174 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3175 break; 3176 case 2: 3177 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3178 break; 3179 case 4: 3180 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3181 break; 3182 case 8: 3183 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3184 break; 3185 } 3186 } 3187 3188 kprintf("\n"); 3189 } 3190 3191 if (physical) { 3192 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3193 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3194 } 3195 return 0; 3196 } 3197 3198 3199 static void 3200 dump_cache_tree_recursively(VMCache* cache, int level, 3201 VMCache* highlightCache) 3202 { 3203 // print this cache 3204 for (int i = 0; i < level; i++) 3205 kprintf(" "); 3206 if (cache == highlightCache) 3207 kprintf("%p <--\n", cache); 3208 else 3209 kprintf("%p\n", cache); 3210 3211 // recursively print its consumers 3212 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3213 VMCache* consumer = it.Next();) { 3214 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3215 } 3216 } 3217 3218 3219 static int 3220 dump_cache_tree(int argc, char** argv) 3221 { 3222 if (argc != 2 || !strcmp(argv[1], "--help")) { 3223 kprintf("usage: %s <address>\n", argv[0]); 3224 return 0; 3225 } 3226 3227 addr_t address = parse_expression(argv[1]); 3228 if (address == 0) 3229 return 0; 3230 3231 VMCache* cache = (VMCache*)address; 3232 VMCache* root = cache; 3233 3234 // find the root cache (the transitive source) 3235 while (root->source != NULL) 3236 root = root->source; 3237 3238 dump_cache_tree_recursively(root, 0, cache); 3239 3240 return 0; 3241 } 3242 3243 3244 const char* 3245 vm_cache_type_to_string(int32 type) 3246 { 3247 switch (type) { 3248 case CACHE_TYPE_RAM: 3249 return "RAM"; 3250 case CACHE_TYPE_DEVICE: 3251 return "device"; 3252 case CACHE_TYPE_VNODE: 3253 return "vnode"; 3254 case CACHE_TYPE_NULL: 3255 return "null"; 3256 3257 default: 3258 return "unknown"; 3259 } 3260 } 3261 3262 3263 #if DEBUG_CACHE_LIST 3264 3265 static void 3266 update_cache_info_recursively(VMCache* cache, cache_info& info) 3267 { 3268 info.page_count += cache->page_count; 3269 if (cache->type == CACHE_TYPE_RAM) 3270 info.committed += cache->committed_size; 3271 3272 // recurse 3273 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3274 VMCache* consumer = it.Next();) { 3275 update_cache_info_recursively(consumer, info); 3276 } 3277 } 3278 3279 3280 static int 3281 cache_info_compare_page_count(const void* _a, const void* _b) 3282 { 3283 const cache_info* a = (const cache_info*)_a; 3284 const cache_info* b = (const cache_info*)_b; 3285 if (a->page_count == b->page_count) 3286 return 0; 3287 return a->page_count < b->page_count ? 1 : -1; 3288 } 3289 3290 3291 static int 3292 cache_info_compare_committed(const void* _a, const void* _b) 3293 { 3294 const cache_info* a = (const cache_info*)_a; 3295 const cache_info* b = (const cache_info*)_b; 3296 if (a->committed == b->committed) 3297 return 0; 3298 return a->committed < b->committed ? 1 : -1; 3299 } 3300 3301 3302 static void 3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3304 { 3305 for (int i = 0; i < level; i++) 3306 kprintf(" "); 3307 3308 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3309 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3310 cache->virtual_base, cache->virtual_end, cache->page_count); 3311 3312 if (level == 0) 3313 kprintf("/%lu", info.page_count); 3314 3315 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3316 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3317 3318 if (level == 0) 3319 kprintf("/%lu", info.committed); 3320 } 3321 3322 // areas 3323 if (cache->areas != NULL) { 3324 VMArea* area = cache->areas; 3325 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3326 area->name, area->address_space->ID()); 3327 3328 while (area->cache_next != NULL) { 3329 area = area->cache_next; 3330 kprintf(", %" B_PRId32, area->id); 3331 } 3332 } 3333 3334 kputs("\n"); 3335 3336 // recurse 3337 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3338 VMCache* consumer = it.Next();) { 3339 dump_caches_recursively(consumer, info, level + 1); 3340 } 3341 } 3342 3343 3344 static int 3345 dump_caches(int argc, char** argv) 3346 { 3347 if (sCacheInfoTable == NULL) { 3348 kprintf("No cache info table!\n"); 3349 return 0; 3350 } 3351 3352 bool sortByPageCount = true; 3353 3354 for (int32 i = 1; i < argc; i++) { 3355 if (strcmp(argv[i], "-c") == 0) { 3356 sortByPageCount = false; 3357 } else { 3358 print_debugger_command_usage(argv[0]); 3359 return 0; 3360 } 3361 } 3362 3363 uint32 totalCount = 0; 3364 uint32 rootCount = 0; 3365 off_t totalCommitted = 0; 3366 page_num_t totalPages = 0; 3367 3368 VMCache* cache = gDebugCacheList; 3369 while (cache) { 3370 totalCount++; 3371 if (cache->source == NULL) { 3372 cache_info stackInfo; 3373 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3374 ? sCacheInfoTable[rootCount] : stackInfo; 3375 rootCount++; 3376 info.cache = cache; 3377 info.page_count = 0; 3378 info.committed = 0; 3379 update_cache_info_recursively(cache, info); 3380 totalCommitted += info.committed; 3381 totalPages += info.page_count; 3382 } 3383 3384 cache = cache->debug_next; 3385 } 3386 3387 if (rootCount <= (uint32)kCacheInfoTableCount) { 3388 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3389 sortByPageCount 3390 ? &cache_info_compare_page_count 3391 : &cache_info_compare_committed); 3392 } 3393 3394 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3395 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3396 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3397 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3398 "page count" : "committed size"); 3399 3400 if (rootCount <= (uint32)kCacheInfoTableCount) { 3401 for (uint32 i = 0; i < rootCount; i++) { 3402 cache_info& info = sCacheInfoTable[i]; 3403 dump_caches_recursively(info.cache, info, 0); 3404 } 3405 } else 3406 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3407 3408 return 0; 3409 } 3410 3411 #endif // DEBUG_CACHE_LIST 3412 3413 3414 static int 3415 dump_cache(int argc, char** argv) 3416 { 3417 VMCache* cache; 3418 bool showPages = false; 3419 int i = 1; 3420 3421 if (argc < 2 || !strcmp(argv[1], "--help")) { 3422 kprintf("usage: %s [-ps] <address>\n" 3423 " if -p is specified, all pages are shown, if -s is used\n" 3424 " only the cache info is shown respectively.\n", argv[0]); 3425 return 0; 3426 } 3427 while (argv[i][0] == '-') { 3428 char* arg = argv[i] + 1; 3429 while (arg[0]) { 3430 if (arg[0] == 'p') 3431 showPages = true; 3432 arg++; 3433 } 3434 i++; 3435 } 3436 if (argv[i] == NULL) { 3437 kprintf("%s: invalid argument, pass address\n", argv[0]); 3438 return 0; 3439 } 3440 3441 addr_t address = parse_expression(argv[i]); 3442 if (address == 0) 3443 return 0; 3444 3445 cache = (VMCache*)address; 3446 3447 cache->Dump(showPages); 3448 3449 set_debug_variable("_sourceCache", (addr_t)cache->source); 3450 3451 return 0; 3452 } 3453 3454 3455 static void 3456 dump_area_struct(VMArea* area, bool mappings) 3457 { 3458 kprintf("AREA: %p\n", area); 3459 kprintf("name:\t\t'%s'\n", area->name); 3460 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3461 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3462 kprintf("base:\t\t0x%lx\n", area->Base()); 3463 kprintf("size:\t\t0x%lx\n", area->Size()); 3464 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3465 kprintf("page_protection:%p\n", area->page_protections); 3466 kprintf("wiring:\t\t0x%x\n", area->wiring); 3467 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3468 kprintf("cache:\t\t%p\n", area->cache); 3469 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3470 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3471 kprintf("cache_next:\t%p\n", area->cache_next); 3472 kprintf("cache_prev:\t%p\n", area->cache_prev); 3473 3474 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3475 if (mappings) { 3476 kprintf("page mappings:\n"); 3477 while (iterator.HasNext()) { 3478 vm_page_mapping* mapping = iterator.Next(); 3479 kprintf(" %p", mapping->page); 3480 } 3481 kprintf("\n"); 3482 } else { 3483 uint32 count = 0; 3484 while (iterator.Next() != NULL) { 3485 count++; 3486 } 3487 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3488 } 3489 } 3490 3491 3492 static int 3493 dump_area(int argc, char** argv) 3494 { 3495 bool mappings = false; 3496 bool found = false; 3497 int32 index = 1; 3498 VMArea* area; 3499 addr_t num; 3500 3501 if (argc < 2 || !strcmp(argv[1], "--help")) { 3502 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3503 "All areas matching either id/address/name are listed. You can\n" 3504 "force to check only a specific item by prefixing the specifier\n" 3505 "with the id/contains/address/name keywords.\n" 3506 "-m shows the area's mappings as well.\n"); 3507 return 0; 3508 } 3509 3510 if (!strcmp(argv[1], "-m")) { 3511 mappings = true; 3512 index++; 3513 } 3514 3515 int32 mode = 0xf; 3516 if (!strcmp(argv[index], "id")) 3517 mode = 1; 3518 else if (!strcmp(argv[index], "contains")) 3519 mode = 2; 3520 else if (!strcmp(argv[index], "name")) 3521 mode = 4; 3522 else if (!strcmp(argv[index], "address")) 3523 mode = 0; 3524 if (mode != 0xf) 3525 index++; 3526 3527 if (index >= argc) { 3528 kprintf("No area specifier given.\n"); 3529 return 0; 3530 } 3531 3532 num = parse_expression(argv[index]); 3533 3534 if (mode == 0) { 3535 dump_area_struct((struct VMArea*)num, mappings); 3536 } else { 3537 // walk through the area list, looking for the arguments as a name 3538 3539 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3540 while ((area = it.Next()) != NULL) { 3541 if (((mode & 4) != 0 3542 && !strcmp(argv[index], area->name)) 3543 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3544 || (((mode & 2) != 0 && area->Base() <= num 3545 && area->Base() + area->Size() > num))))) { 3546 dump_area_struct(area, mappings); 3547 found = true; 3548 } 3549 } 3550 3551 if (!found) 3552 kprintf("could not find area %s (%ld)\n", argv[index], num); 3553 } 3554 3555 return 0; 3556 } 3557 3558 3559 static int 3560 dump_area_list(int argc, char** argv) 3561 { 3562 VMArea* area; 3563 const char* name = NULL; 3564 int32 id = 0; 3565 3566 if (argc > 1) { 3567 id = parse_expression(argv[1]); 3568 if (id == 0) 3569 name = argv[1]; 3570 } 3571 3572 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3573 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3574 B_PRINTF_POINTER_WIDTH, "size"); 3575 3576 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3577 while ((area = it.Next()) != NULL) { 3578 if ((id != 0 && area->address_space->ID() != id) 3579 || (name != NULL && strstr(area->name, name) == NULL)) 3580 continue; 3581 3582 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3583 area->id, (void*)area->Base(), (void*)area->Size(), 3584 area->protection, area->wiring, area->name); 3585 } 3586 return 0; 3587 } 3588 3589 3590 static int 3591 dump_available_memory(int argc, char** argv) 3592 { 3593 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3594 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3595 return 0; 3596 } 3597 3598 3599 static int 3600 dump_mapping_info(int argc, char** argv) 3601 { 3602 bool reverseLookup = false; 3603 bool pageLookup = false; 3604 3605 int argi = 1; 3606 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3607 const char* arg = argv[argi]; 3608 if (strcmp(arg, "-r") == 0) { 3609 reverseLookup = true; 3610 } else if (strcmp(arg, "-p") == 0) { 3611 reverseLookup = true; 3612 pageLookup = true; 3613 } else { 3614 print_debugger_command_usage(argv[0]); 3615 return 0; 3616 } 3617 } 3618 3619 // We need at least one argument, the address. Optionally a thread ID can be 3620 // specified. 3621 if (argi >= argc || argi + 2 < argc) { 3622 print_debugger_command_usage(argv[0]); 3623 return 0; 3624 } 3625 3626 uint64 addressValue; 3627 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3628 return 0; 3629 3630 Team* team = NULL; 3631 if (argi < argc) { 3632 uint64 threadID; 3633 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3634 return 0; 3635 3636 Thread* thread = Thread::GetDebug(threadID); 3637 if (thread == NULL) { 3638 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3639 return 0; 3640 } 3641 3642 team = thread->team; 3643 } 3644 3645 if (reverseLookup) { 3646 phys_addr_t physicalAddress; 3647 if (pageLookup) { 3648 vm_page* page = (vm_page*)(addr_t)addressValue; 3649 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3650 } else { 3651 physicalAddress = (phys_addr_t)addressValue; 3652 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3653 } 3654 3655 kprintf(" Team Virtual Address Area\n"); 3656 kprintf("--------------------------------------\n"); 3657 3658 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3659 Callback() 3660 : 3661 fAddressSpace(NULL) 3662 { 3663 } 3664 3665 void SetAddressSpace(VMAddressSpace* addressSpace) 3666 { 3667 fAddressSpace = addressSpace; 3668 } 3669 3670 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3671 { 3672 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3673 virtualAddress); 3674 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3675 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3676 else 3677 kprintf("\n"); 3678 return false; 3679 } 3680 3681 private: 3682 VMAddressSpace* fAddressSpace; 3683 } callback; 3684 3685 if (team != NULL) { 3686 // team specified -- get its address space 3687 VMAddressSpace* addressSpace = team->address_space; 3688 if (addressSpace == NULL) { 3689 kprintf("Failed to get address space!\n"); 3690 return 0; 3691 } 3692 3693 callback.SetAddressSpace(addressSpace); 3694 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3695 physicalAddress, callback); 3696 } else { 3697 // no team specified -- iterate through all address spaces 3698 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3699 addressSpace != NULL; 3700 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3701 callback.SetAddressSpace(addressSpace); 3702 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3703 physicalAddress, callback); 3704 } 3705 } 3706 } else { 3707 // get the address space 3708 addr_t virtualAddress = (addr_t)addressValue; 3709 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3710 VMAddressSpace* addressSpace; 3711 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3712 addressSpace = VMAddressSpace::Kernel(); 3713 } else if (team != NULL) { 3714 addressSpace = team->address_space; 3715 } else { 3716 Thread* thread = debug_get_debugged_thread(); 3717 if (thread == NULL || thread->team == NULL) { 3718 kprintf("Failed to get team!\n"); 3719 return 0; 3720 } 3721 3722 addressSpace = thread->team->address_space; 3723 } 3724 3725 if (addressSpace == NULL) { 3726 kprintf("Failed to get address space!\n"); 3727 return 0; 3728 } 3729 3730 // let the translation map implementation do the job 3731 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3732 } 3733 3734 return 0; 3735 } 3736 3737 3738 /*! Deletes all areas and reserved regions in the given address space. 3739 3740 The caller must ensure that none of the areas has any wired ranges. 3741 3742 \param addressSpace The address space. 3743 \param deletingAddressSpace \c true, if the address space is in the process 3744 of being deleted. 3745 */ 3746 void 3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3748 { 3749 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3750 addressSpace->ID())); 3751 3752 addressSpace->WriteLock(); 3753 3754 // remove all reserved areas in this address space 3755 addressSpace->UnreserveAllAddressRanges(0); 3756 3757 // delete all the areas in this address space 3758 while (VMArea* area = addressSpace->FirstArea()) { 3759 ASSERT(!area->IsWired()); 3760 delete_area(addressSpace, area, deletingAddressSpace); 3761 } 3762 3763 addressSpace->WriteUnlock(); 3764 } 3765 3766 3767 static area_id 3768 vm_area_for(addr_t address, bool kernel) 3769 { 3770 team_id team; 3771 if (IS_USER_ADDRESS(address)) { 3772 // we try the user team address space, if any 3773 team = VMAddressSpace::CurrentID(); 3774 if (team < 0) 3775 return team; 3776 } else 3777 team = VMAddressSpace::KernelID(); 3778 3779 AddressSpaceReadLocker locker(team); 3780 if (!locker.IsLocked()) 3781 return B_BAD_TEAM_ID; 3782 3783 VMArea* area = locker.AddressSpace()->LookupArea(address); 3784 if (area != NULL) { 3785 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3786 return B_ERROR; 3787 3788 return area->id; 3789 } 3790 3791 return B_ERROR; 3792 } 3793 3794 3795 /*! Frees physical pages that were used during the boot process. 3796 \a end is inclusive. 3797 */ 3798 static void 3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3800 { 3801 // free all physical pages in the specified range 3802 3803 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3804 phys_addr_t physicalAddress; 3805 uint32 flags; 3806 3807 if (map->Query(current, &physicalAddress, &flags) == B_OK 3808 && (flags & PAGE_PRESENT) != 0) { 3809 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3810 if (page != NULL && page->State() != PAGE_STATE_FREE 3811 && page->State() != PAGE_STATE_CLEAR 3812 && page->State() != PAGE_STATE_UNUSED) { 3813 DEBUG_PAGE_ACCESS_START(page); 3814 vm_page_set_state(page, PAGE_STATE_FREE); 3815 } 3816 } 3817 } 3818 3819 // unmap the memory 3820 map->Unmap(start, end); 3821 } 3822 3823 3824 void 3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3826 { 3827 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3828 addr_t end = start + (size - 1); 3829 addr_t lastEnd = start; 3830 3831 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3832 (void*)start, (void*)end)); 3833 3834 // The areas are sorted in virtual address space order, so 3835 // we just have to find the holes between them that fall 3836 // into the area we should dispose 3837 3838 map->Lock(); 3839 3840 for (VMAddressSpace::AreaIterator it 3841 = VMAddressSpace::Kernel()->GetAreaIterator(); 3842 VMArea* area = it.Next();) { 3843 addr_t areaStart = area->Base(); 3844 addr_t areaEnd = areaStart + (area->Size() - 1); 3845 3846 if (areaEnd < start) 3847 continue; 3848 3849 if (areaStart > end) { 3850 // we are done, the area is already beyond of what we have to free 3851 break; 3852 } 3853 3854 if (areaStart > lastEnd) { 3855 // this is something we can free 3856 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3857 (void*)areaStart)); 3858 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3859 } 3860 3861 if (areaEnd >= end) { 3862 lastEnd = areaEnd; 3863 // no +1 to prevent potential overflow 3864 break; 3865 } 3866 3867 lastEnd = areaEnd + 1; 3868 } 3869 3870 if (lastEnd < end) { 3871 // we can also get rid of some space at the end of the area 3872 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3873 (void*)end)); 3874 unmap_and_free_physical_pages(map, lastEnd, end); 3875 } 3876 3877 map->Unlock(); 3878 } 3879 3880 3881 static void 3882 create_preloaded_image_areas(struct preloaded_image* _image) 3883 { 3884 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3885 char name[B_OS_NAME_LENGTH]; 3886 void* address; 3887 int32 length; 3888 3889 // use file name to create a good area name 3890 char* fileName = strrchr(image->name, '/'); 3891 if (fileName == NULL) 3892 fileName = image->name; 3893 else 3894 fileName++; 3895 3896 length = strlen(fileName); 3897 // make sure there is enough space for the suffix 3898 if (length > 25) 3899 length = 25; 3900 3901 memcpy(name, fileName, length); 3902 strcpy(name + length, "_text"); 3903 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3904 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3905 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3906 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3907 // this will later be remapped read-only/executable by the 3908 // ELF initialization code 3909 3910 strcpy(name + length, "_data"); 3911 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3912 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3913 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3914 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3915 } 3916 3917 3918 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3919 Any boot loader resources contained in that arguments must not be accessed 3920 anymore past this point. 3921 */ 3922 void 3923 vm_free_kernel_args(kernel_args* args) 3924 { 3925 uint32 i; 3926 3927 TRACE(("vm_free_kernel_args()\n")); 3928 3929 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3930 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3931 if (area >= B_OK) 3932 delete_area(area); 3933 } 3934 } 3935 3936 3937 static void 3938 allocate_kernel_args(kernel_args* args) 3939 { 3940 TRACE(("allocate_kernel_args()\n")); 3941 3942 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3943 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3944 3945 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3946 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3947 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3948 } 3949 } 3950 3951 3952 static void 3953 unreserve_boot_loader_ranges(kernel_args* args) 3954 { 3955 TRACE(("unreserve_boot_loader_ranges()\n")); 3956 3957 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3958 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3959 (void*)(addr_t)args->virtual_allocated_range[i].start, 3960 args->virtual_allocated_range[i].size); 3961 } 3962 } 3963 3964 3965 static void 3966 reserve_boot_loader_ranges(kernel_args* args) 3967 { 3968 TRACE(("reserve_boot_loader_ranges()\n")); 3969 3970 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3971 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3972 3973 // If the address is no kernel address, we just skip it. The 3974 // architecture specific code has to deal with it. 3975 if (!IS_KERNEL_ADDRESS(address)) { 3976 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3977 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3978 continue; 3979 } 3980 3981 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3982 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3983 if (status < B_OK) 3984 panic("could not reserve boot loader ranges\n"); 3985 } 3986 } 3987 3988 3989 static addr_t 3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3991 { 3992 size = PAGE_ALIGN(size); 3993 3994 // find a slot in the virtual allocation addr range 3995 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3996 // check to see if the space between this one and the last is big enough 3997 addr_t rangeStart = args->virtual_allocated_range[i].start; 3998 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3999 + args->virtual_allocated_range[i - 1].size; 4000 4001 addr_t base = alignment > 0 4002 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4003 4004 if (base >= KERNEL_BASE && base < rangeStart 4005 && rangeStart - base >= size) { 4006 args->virtual_allocated_range[i - 1].size 4007 += base + size - previousRangeEnd; 4008 return base; 4009 } 4010 } 4011 4012 // we hadn't found one between allocation ranges. this is ok. 4013 // see if there's a gap after the last one 4014 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4015 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4016 + args->virtual_allocated_range[lastEntryIndex].size; 4017 addr_t base = alignment > 0 4018 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4019 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4020 args->virtual_allocated_range[lastEntryIndex].size 4021 += base + size - lastRangeEnd; 4022 return base; 4023 } 4024 4025 // see if there's a gap before the first one 4026 addr_t rangeStart = args->virtual_allocated_range[0].start; 4027 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4028 base = rangeStart - size; 4029 if (alignment > 0) 4030 base = ROUNDDOWN(base, alignment); 4031 4032 if (base >= KERNEL_BASE) { 4033 args->virtual_allocated_range[0].start = base; 4034 args->virtual_allocated_range[0].size += rangeStart - base; 4035 return base; 4036 } 4037 } 4038 4039 return 0; 4040 } 4041 4042 4043 static bool 4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4045 { 4046 // TODO: horrible brute-force method of determining if the page can be 4047 // allocated 4048 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4049 if (address >= args->physical_memory_range[i].start 4050 && address < args->physical_memory_range[i].start 4051 + args->physical_memory_range[i].size) 4052 return true; 4053 } 4054 return false; 4055 } 4056 4057 4058 page_num_t 4059 vm_allocate_early_physical_page(kernel_args* args) 4060 { 4061 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4062 phys_addr_t nextPage; 4063 4064 nextPage = args->physical_allocated_range[i].start 4065 + args->physical_allocated_range[i].size; 4066 // see if the page after the next allocated paddr run can be allocated 4067 if (i + 1 < args->num_physical_allocated_ranges 4068 && args->physical_allocated_range[i + 1].size != 0) { 4069 // see if the next page will collide with the next allocated range 4070 if (nextPage >= args->physical_allocated_range[i+1].start) 4071 continue; 4072 } 4073 // see if the next physical page fits in the memory block 4074 if (is_page_in_physical_memory_range(args, nextPage)) { 4075 // we got one! 4076 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4077 return nextPage / B_PAGE_SIZE; 4078 } 4079 } 4080 4081 // Expanding upwards didn't work, try going downwards. 4082 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4083 phys_addr_t nextPage; 4084 4085 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4086 // see if the page after the prev allocated paddr run can be allocated 4087 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4088 // see if the next page will collide with the next allocated range 4089 if (nextPage < args->physical_allocated_range[i-1].start 4090 + args->physical_allocated_range[i-1].size) 4091 continue; 4092 } 4093 // see if the next physical page fits in the memory block 4094 if (is_page_in_physical_memory_range(args, nextPage)) { 4095 // we got one! 4096 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4097 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4098 return nextPage / B_PAGE_SIZE; 4099 } 4100 } 4101 4102 return 0; 4103 // could not allocate a block 4104 } 4105 4106 4107 /*! This one uses the kernel_args' physical and virtual memory ranges to 4108 allocate some pages before the VM is completely up. 4109 */ 4110 addr_t 4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4112 uint32 attributes, addr_t alignment) 4113 { 4114 if (physicalSize > virtualSize) 4115 physicalSize = virtualSize; 4116 4117 // find the vaddr to allocate at 4118 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4119 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4120 if (virtualBase == 0) { 4121 panic("vm_allocate_early: could not allocate virtual address\n"); 4122 return 0; 4123 } 4124 4125 // map the pages 4126 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4127 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4128 if (physicalAddress == 0) 4129 panic("error allocating early page!\n"); 4130 4131 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4132 4133 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4134 physicalAddress * B_PAGE_SIZE, attributes, 4135 &vm_allocate_early_physical_page); 4136 } 4137 4138 return virtualBase; 4139 } 4140 4141 4142 /*! The main entrance point to initialize the VM. */ 4143 status_t 4144 vm_init(kernel_args* args) 4145 { 4146 struct preloaded_image* image; 4147 void* address; 4148 status_t err = 0; 4149 uint32 i; 4150 4151 TRACE(("vm_init: entry\n")); 4152 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4153 err = arch_vm_init(args); 4154 4155 // initialize some globals 4156 vm_page_init_num_pages(args); 4157 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4158 4159 slab_init(args); 4160 4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4162 off_t heapSize = INITIAL_HEAP_SIZE; 4163 // try to accomodate low memory systems 4164 while (heapSize > sAvailableMemory / 8) 4165 heapSize /= 2; 4166 if (heapSize < 1024 * 1024) 4167 panic("vm_init: go buy some RAM please."); 4168 4169 // map in the new heap and initialize it 4170 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4171 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4172 TRACE(("heap at 0x%lx\n", heapBase)); 4173 heap_init(heapBase, heapSize); 4174 #endif 4175 4176 // initialize the free page list and physical page mapper 4177 vm_page_init(args); 4178 4179 // initialize the cache allocators 4180 vm_cache_init(args); 4181 4182 { 4183 status_t error = VMAreaHash::Init(); 4184 if (error != B_OK) 4185 panic("vm_init: error initializing area hash table\n"); 4186 } 4187 4188 VMAddressSpace::Init(); 4189 reserve_boot_loader_ranges(args); 4190 4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4192 heap_init_post_area(); 4193 #endif 4194 4195 // Do any further initialization that the architecture dependant layers may 4196 // need now 4197 arch_vm_translation_map_init_post_area(args); 4198 arch_vm_init_post_area(args); 4199 vm_page_init_post_area(args); 4200 slab_init_post_area(); 4201 4202 // allocate areas to represent stuff that already exists 4203 4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4205 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4206 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4207 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4208 #endif 4209 4210 allocate_kernel_args(args); 4211 4212 create_preloaded_image_areas(args->kernel_image); 4213 4214 // allocate areas for preloaded images 4215 for (image = args->preloaded_images; image != NULL; image = image->next) 4216 create_preloaded_image_areas(image); 4217 4218 // allocate kernel stacks 4219 for (i = 0; i < args->num_cpus; i++) { 4220 char name[64]; 4221 4222 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4223 address = (void*)args->cpu_kstack[i].start; 4224 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4225 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4226 } 4227 4228 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4229 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4230 4231 #if PARANOID_KERNEL_MALLOC 4232 vm_block_address_range("uninitialized heap memory", 4233 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4234 #endif 4235 #if PARANOID_KERNEL_FREE 4236 vm_block_address_range("freed heap memory", 4237 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4238 #endif 4239 4240 // create the object cache for the page mappings 4241 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4242 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4243 NULL, NULL); 4244 if (gPageMappingsObjectCache == NULL) 4245 panic("failed to create page mappings object cache"); 4246 4247 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4248 4249 #if DEBUG_CACHE_LIST 4250 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4251 virtual_address_restrictions virtualRestrictions = {}; 4252 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4253 physical_address_restrictions physicalRestrictions = {}; 4254 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4255 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4256 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4257 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4258 &physicalRestrictions, (void**)&sCacheInfoTable); 4259 } 4260 #endif // DEBUG_CACHE_LIST 4261 4262 // add some debugger commands 4263 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4264 add_debugger_command("area", &dump_area, 4265 "Dump info about a particular area"); 4266 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4267 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4268 #if DEBUG_CACHE_LIST 4269 if (sCacheInfoTable != NULL) { 4270 add_debugger_command_etc("caches", &dump_caches, 4271 "List all VMCache trees", 4272 "[ \"-c\" ]\n" 4273 "All cache trees are listed sorted in decreasing order by number " 4274 "of\n" 4275 "used pages or, if \"-c\" is specified, by size of committed " 4276 "memory.\n", 4277 0); 4278 } 4279 #endif 4280 add_debugger_command("avail", &dump_available_memory, 4281 "Dump available memory"); 4282 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4283 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4284 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4285 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4286 add_debugger_command("string", &display_mem, "dump strings"); 4287 4288 add_debugger_command_etc("mapping", &dump_mapping_info, 4289 "Print address mapping information", 4290 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4291 "Prints low-level page mapping information for a given address. If\n" 4292 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4293 "address that is looked up in the translation map of the current\n" 4294 "team, respectively the team specified by thread ID <thread ID>. If\n" 4295 "\"-r\" is specified, <address> is a physical address that is\n" 4296 "searched in the translation map of all teams, respectively the team\n" 4297 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4298 "<address> is the address of a vm_page structure. The behavior is\n" 4299 "equivalent to specifying \"-r\" with the physical address of that\n" 4300 "page.\n", 4301 0); 4302 4303 TRACE(("vm_init: exit\n")); 4304 4305 vm_cache_init_post_heap(); 4306 4307 return err; 4308 } 4309 4310 4311 status_t 4312 vm_init_post_sem(kernel_args* args) 4313 { 4314 // This frees all unused boot loader resources and makes its space available 4315 // again 4316 arch_vm_init_end(args); 4317 unreserve_boot_loader_ranges(args); 4318 4319 // fill in all of the semaphores that were not allocated before 4320 // since we're still single threaded and only the kernel address space 4321 // exists, it isn't that hard to find all of the ones we need to create 4322 4323 arch_vm_translation_map_init_post_sem(args); 4324 4325 slab_init_post_sem(); 4326 4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4328 heap_init_post_sem(); 4329 #endif 4330 4331 return B_OK; 4332 } 4333 4334 4335 status_t 4336 vm_init_post_thread(kernel_args* args) 4337 { 4338 vm_page_init_post_thread(args); 4339 slab_init_post_thread(); 4340 return heap_init_post_thread(); 4341 } 4342 4343 4344 status_t 4345 vm_init_post_modules(kernel_args* args) 4346 { 4347 return arch_vm_init_post_modules(args); 4348 } 4349 4350 4351 void 4352 permit_page_faults(void) 4353 { 4354 Thread* thread = thread_get_current_thread(); 4355 if (thread != NULL) 4356 atomic_add(&thread->page_faults_allowed, 1); 4357 } 4358 4359 4360 void 4361 forbid_page_faults(void) 4362 { 4363 Thread* thread = thread_get_current_thread(); 4364 if (thread != NULL) 4365 atomic_add(&thread->page_faults_allowed, -1); 4366 } 4367 4368 4369 status_t 4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4371 bool isUser, addr_t* newIP) 4372 { 4373 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4374 faultAddress)); 4375 4376 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4377 4378 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4379 VMAddressSpace* addressSpace = NULL; 4380 4381 status_t status = B_OK; 4382 *newIP = 0; 4383 atomic_add((int32*)&sPageFaults, 1); 4384 4385 if (IS_KERNEL_ADDRESS(pageAddress)) { 4386 addressSpace = VMAddressSpace::GetKernel(); 4387 } else if (IS_USER_ADDRESS(pageAddress)) { 4388 addressSpace = VMAddressSpace::GetCurrent(); 4389 if (addressSpace == NULL) { 4390 if (!isUser) { 4391 dprintf("vm_page_fault: kernel thread accessing invalid user " 4392 "memory!\n"); 4393 status = B_BAD_ADDRESS; 4394 TPF(PageFaultError(-1, 4395 VMPageFaultTracing 4396 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4397 } else { 4398 // XXX weird state. 4399 panic("vm_page_fault: non kernel thread accessing user memory " 4400 "that doesn't exist!\n"); 4401 status = B_BAD_ADDRESS; 4402 } 4403 } 4404 } else { 4405 // the hit was probably in the 64k DMZ between kernel and user space 4406 // this keeps a user space thread from passing a buffer that crosses 4407 // into kernel space 4408 status = B_BAD_ADDRESS; 4409 TPF(PageFaultError(-1, 4410 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4411 } 4412 4413 if (status == B_OK) { 4414 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4415 isUser, NULL); 4416 } 4417 4418 if (status < B_OK) { 4419 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4420 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4421 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4422 thread_get_current_thread_id()); 4423 if (!isUser) { 4424 Thread* thread = thread_get_current_thread(); 4425 if (thread != NULL && thread->fault_handler != 0) { 4426 // this will cause the arch dependant page fault handler to 4427 // modify the IP on the interrupt frame or whatever to return 4428 // to this address 4429 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4430 } else { 4431 // unhandled page fault in the kernel 4432 panic("vm_page_fault: unhandled page fault in kernel space at " 4433 "0x%lx, ip 0x%lx\n", address, faultAddress); 4434 } 4435 } else { 4436 Thread* thread = thread_get_current_thread(); 4437 4438 #ifdef TRACE_FAULTS 4439 VMArea* area = NULL; 4440 if (addressSpace != NULL) { 4441 addressSpace->ReadLock(); 4442 area = addressSpace->LookupArea(faultAddress); 4443 } 4444 4445 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4446 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4447 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4448 thread->team->Name(), thread->team->id, 4449 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4450 faultAddress, area ? area->name : "???", faultAddress - (area ? 4451 area->Base() : 0x0)); 4452 4453 if (addressSpace != NULL) 4454 addressSpace->ReadUnlock(); 4455 #endif 4456 4457 // If the thread has a signal handler for SIGSEGV, we simply 4458 // send it the signal. Otherwise we notify the user debugger 4459 // first. 4460 struct sigaction action; 4461 if ((sigaction(SIGSEGV, NULL, &action) == 0 4462 && action.sa_handler != SIG_DFL 4463 && action.sa_handler != SIG_IGN) 4464 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4465 SIGSEGV)) { 4466 Signal signal(SIGSEGV, 4467 status == B_PERMISSION_DENIED 4468 ? SEGV_ACCERR : SEGV_MAPERR, 4469 EFAULT, thread->team->id); 4470 signal.SetAddress((void*)address); 4471 send_signal_to_thread(thread, signal, 0); 4472 } 4473 } 4474 } 4475 4476 if (addressSpace != NULL) 4477 addressSpace->Put(); 4478 4479 return B_HANDLED_INTERRUPT; 4480 } 4481 4482 4483 struct PageFaultContext { 4484 AddressSpaceReadLocker addressSpaceLocker; 4485 VMCacheChainLocker cacheChainLocker; 4486 4487 VMTranslationMap* map; 4488 VMCache* topCache; 4489 off_t cacheOffset; 4490 vm_page_reservation reservation; 4491 bool isWrite; 4492 4493 // return values 4494 vm_page* page; 4495 bool restart; 4496 bool pageAllocated; 4497 4498 4499 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4500 : 4501 addressSpaceLocker(addressSpace, true), 4502 map(addressSpace->TranslationMap()), 4503 isWrite(isWrite) 4504 { 4505 } 4506 4507 ~PageFaultContext() 4508 { 4509 UnlockAll(); 4510 vm_page_unreserve_pages(&reservation); 4511 } 4512 4513 void Prepare(VMCache* topCache, off_t cacheOffset) 4514 { 4515 this->topCache = topCache; 4516 this->cacheOffset = cacheOffset; 4517 page = NULL; 4518 restart = false; 4519 pageAllocated = false; 4520 4521 cacheChainLocker.SetTo(topCache); 4522 } 4523 4524 void UnlockAll(VMCache* exceptCache = NULL) 4525 { 4526 topCache = NULL; 4527 addressSpaceLocker.Unlock(); 4528 cacheChainLocker.Unlock(exceptCache); 4529 } 4530 }; 4531 4532 4533 /*! Gets the page that should be mapped into the area. 4534 Returns an error code other than \c B_OK, if the page couldn't be found or 4535 paged in. The locking state of the address space and the caches is undefined 4536 in that case. 4537 Returns \c B_OK with \c context.restart set to \c true, if the functions 4538 had to unlock the address space and all caches and is supposed to be called 4539 again. 4540 Returns \c B_OK with \c context.restart set to \c false, if the page was 4541 found. It is returned in \c context.page. The address space will still be 4542 locked as well as all caches starting from the top cache to at least the 4543 cache the page lives in. 4544 */ 4545 static status_t 4546 fault_get_page(PageFaultContext& context) 4547 { 4548 VMCache* cache = context.topCache; 4549 VMCache* lastCache = NULL; 4550 vm_page* page = NULL; 4551 4552 while (cache != NULL) { 4553 // We already hold the lock of the cache at this point. 4554 4555 lastCache = cache; 4556 4557 page = cache->LookupPage(context.cacheOffset); 4558 if (page != NULL && page->busy) { 4559 // page must be busy -- wait for it to become unbusy 4560 context.UnlockAll(cache); 4561 cache->ReleaseRefLocked(); 4562 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4563 4564 // restart the whole process 4565 context.restart = true; 4566 return B_OK; 4567 } 4568 4569 if (page != NULL) 4570 break; 4571 4572 // The current cache does not contain the page we're looking for. 4573 4574 // see if the backing store has it 4575 if (cache->HasPage(context.cacheOffset)) { 4576 // insert a fresh page and mark it busy -- we're going to read it in 4577 page = vm_page_allocate_page(&context.reservation, 4578 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4579 cache->InsertPage(page, context.cacheOffset); 4580 4581 // We need to unlock all caches and the address space while reading 4582 // the page in. Keep a reference to the cache around. 4583 cache->AcquireRefLocked(); 4584 context.UnlockAll(); 4585 4586 // read the page in 4587 generic_io_vec vec; 4588 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4589 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4590 4591 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4592 B_PHYSICAL_IO_REQUEST, &bytesRead); 4593 4594 cache->Lock(); 4595 4596 if (status < B_OK) { 4597 // on error remove and free the page 4598 dprintf("reading page from cache %p returned: %s!\n", 4599 cache, strerror(status)); 4600 4601 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4602 cache->RemovePage(page); 4603 vm_page_set_state(page, PAGE_STATE_FREE); 4604 4605 cache->ReleaseRefAndUnlock(); 4606 return status; 4607 } 4608 4609 // mark the page unbusy again 4610 cache->MarkPageUnbusy(page); 4611 4612 DEBUG_PAGE_ACCESS_END(page); 4613 4614 // Since we needed to unlock everything temporarily, the area 4615 // situation might have changed. So we need to restart the whole 4616 // process. 4617 cache->ReleaseRefAndUnlock(); 4618 context.restart = true; 4619 return B_OK; 4620 } 4621 4622 cache = context.cacheChainLocker.LockSourceCache(); 4623 } 4624 4625 if (page == NULL) { 4626 // There was no adequate page, determine the cache for a clean one. 4627 // Read-only pages come in the deepest cache, only the top most cache 4628 // may have direct write access. 4629 cache = context.isWrite ? context.topCache : lastCache; 4630 4631 // allocate a clean page 4632 page = vm_page_allocate_page(&context.reservation, 4633 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4634 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4635 page->physical_page_number)); 4636 4637 // insert the new page into our cache 4638 cache->InsertPage(page, context.cacheOffset); 4639 context.pageAllocated = true; 4640 } else if (page->Cache() != context.topCache && context.isWrite) { 4641 // We have a page that has the data we want, but in the wrong cache 4642 // object so we need to copy it and stick it into the top cache. 4643 vm_page* sourcePage = page; 4644 4645 // TODO: If memory is low, it might be a good idea to steal the page 4646 // from our source cache -- if possible, that is. 4647 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4648 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4649 4650 // To not needlessly kill concurrency we unlock all caches but the top 4651 // one while copying the page. Lacking another mechanism to ensure that 4652 // the source page doesn't disappear, we mark it busy. 4653 sourcePage->busy = true; 4654 context.cacheChainLocker.UnlockKeepRefs(true); 4655 4656 // copy the page 4657 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4658 sourcePage->physical_page_number * B_PAGE_SIZE); 4659 4660 context.cacheChainLocker.RelockCaches(true); 4661 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4662 4663 // insert the new page into our cache 4664 context.topCache->InsertPage(page, context.cacheOffset); 4665 context.pageAllocated = true; 4666 } else 4667 DEBUG_PAGE_ACCESS_START(page); 4668 4669 context.page = page; 4670 return B_OK; 4671 } 4672 4673 4674 /*! Makes sure the address in the given address space is mapped. 4675 4676 \param addressSpace The address space. 4677 \param originalAddress The address. Doesn't need to be page aligned. 4678 \param isWrite If \c true the address shall be write-accessible. 4679 \param isUser If \c true the access is requested by a userland team. 4680 \param wirePage On success, if non \c NULL, the wired count of the page 4681 mapped at the given address is incremented and the page is returned 4682 via this parameter. 4683 \return \c B_OK on success, another error code otherwise. 4684 */ 4685 static status_t 4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4687 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4688 { 4689 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4690 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4691 originalAddress, isWrite, isUser)); 4692 4693 PageFaultContext context(addressSpace, isWrite); 4694 4695 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4696 status_t status = B_OK; 4697 4698 addressSpace->IncrementFaultCount(); 4699 4700 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4701 // the pages upfront makes sure we don't have any cache locked, so that the 4702 // page daemon/thief can do their job without problems. 4703 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4704 originalAddress); 4705 context.addressSpaceLocker.Unlock(); 4706 vm_page_reserve_pages(&context.reservation, reservePages, 4707 addressSpace == VMAddressSpace::Kernel() 4708 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4709 4710 while (true) { 4711 context.addressSpaceLocker.Lock(); 4712 4713 // get the area the fault was in 4714 VMArea* area = addressSpace->LookupArea(address); 4715 if (area == NULL) { 4716 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4717 "space\n", originalAddress); 4718 TPF(PageFaultError(-1, 4719 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4720 status = B_BAD_ADDRESS; 4721 break; 4722 } 4723 4724 // check permissions 4725 uint32 protection = get_area_page_protection(area, address); 4726 if (isUser && (protection & B_USER_PROTECTION) == 0 4727 && (area->protection & B_KERNEL_AREA) != 0) { 4728 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4729 area->id, (void*)originalAddress); 4730 TPF(PageFaultError(area->id, 4731 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4732 status = B_PERMISSION_DENIED; 4733 break; 4734 } 4735 if (isWrite && (protection 4736 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4737 dprintf("write access attempted on write-protected area 0x%" 4738 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4739 TPF(PageFaultError(area->id, 4740 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4741 status = B_PERMISSION_DENIED; 4742 break; 4743 } else if (isExecute && (protection 4744 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4745 dprintf("instruction fetch attempted on execute-protected area 0x%" 4746 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4747 TPF(PageFaultError(area->id, 4748 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4749 status = B_PERMISSION_DENIED; 4750 break; 4751 } else if (!isWrite && !isExecute && (protection 4752 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4753 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4754 " at %p\n", area->id, (void*)originalAddress); 4755 TPF(PageFaultError(area->id, 4756 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4757 status = B_PERMISSION_DENIED; 4758 break; 4759 } 4760 4761 // We have the area, it was a valid access, so let's try to resolve the 4762 // page fault now. 4763 // At first, the top most cache from the area is investigated. 4764 4765 context.Prepare(vm_area_get_locked_cache(area), 4766 address - area->Base() + area->cache_offset); 4767 4768 // See if this cache has a fault handler -- this will do all the work 4769 // for us. 4770 { 4771 // Note, since the page fault is resolved with interrupts enabled, 4772 // the fault handler could be called more than once for the same 4773 // reason -- the store must take this into account. 4774 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4775 if (status != B_BAD_HANDLER) 4776 break; 4777 } 4778 4779 // The top most cache has no fault handler, so let's see if the cache or 4780 // its sources already have the page we're searching for (we're going 4781 // from top to bottom). 4782 status = fault_get_page(context); 4783 if (status != B_OK) { 4784 TPF(PageFaultError(area->id, status)); 4785 break; 4786 } 4787 4788 if (context.restart) 4789 continue; 4790 4791 // All went fine, all there is left to do is to map the page into the 4792 // address space. 4793 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4794 context.page)); 4795 4796 // If the page doesn't reside in the area's cache, we need to make sure 4797 // it's mapped in read-only, so that we cannot overwrite someone else's 4798 // data (copy-on-write) 4799 uint32 newProtection = protection; 4800 if (context.page->Cache() != context.topCache && !isWrite) 4801 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4802 4803 bool unmapPage = false; 4804 bool mapPage = true; 4805 4806 // check whether there's already a page mapped at the address 4807 context.map->Lock(); 4808 4809 phys_addr_t physicalAddress; 4810 uint32 flags; 4811 vm_page* mappedPage = NULL; 4812 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4813 && (flags & PAGE_PRESENT) != 0 4814 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4815 != NULL) { 4816 // Yep there's already a page. If it's ours, we can simply adjust 4817 // its protection. Otherwise we have to unmap it. 4818 if (mappedPage == context.page) { 4819 context.map->ProtectPage(area, address, newProtection); 4820 // Note: We assume that ProtectPage() is atomic (i.e. 4821 // the page isn't temporarily unmapped), otherwise we'd have 4822 // to make sure it isn't wired. 4823 mapPage = false; 4824 } else 4825 unmapPage = true; 4826 } 4827 4828 context.map->Unlock(); 4829 4830 if (unmapPage) { 4831 // If the page is wired, we can't unmap it. Wait until it is unwired 4832 // again and restart. Note that the page cannot be wired for 4833 // writing, since it it isn't in the topmost cache. So we can safely 4834 // ignore ranges wired for writing (our own and other concurrent 4835 // wiring attempts in progress) and in fact have to do that to avoid 4836 // a deadlock. 4837 VMAreaUnwiredWaiter waiter; 4838 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4839 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4840 // unlock everything and wait 4841 if (context.pageAllocated) { 4842 // ... but since we allocated a page and inserted it into 4843 // the top cache, remove and free it first. Otherwise we'd 4844 // have a page from a lower cache mapped while an upper 4845 // cache has a page that would shadow it. 4846 context.topCache->RemovePage(context.page); 4847 vm_page_free_etc(context.topCache, context.page, 4848 &context.reservation); 4849 } else 4850 DEBUG_PAGE_ACCESS_END(context.page); 4851 4852 context.UnlockAll(); 4853 waiter.waitEntry.Wait(); 4854 continue; 4855 } 4856 4857 // Note: The mapped page is a page of a lower cache. We are 4858 // guaranteed to have that cached locked, our new page is a copy of 4859 // that page, and the page is not busy. The logic for that guarantee 4860 // is as follows: Since the page is mapped, it must live in the top 4861 // cache (ruled out above) or any of its lower caches, and there is 4862 // (was before the new page was inserted) no other page in any 4863 // cache between the top cache and the page's cache (otherwise that 4864 // would be mapped instead). That in turn means that our algorithm 4865 // must have found it and therefore it cannot be busy either. 4866 DEBUG_PAGE_ACCESS_START(mappedPage); 4867 unmap_page(area, address); 4868 DEBUG_PAGE_ACCESS_END(mappedPage); 4869 } 4870 4871 if (mapPage) { 4872 if (map_page(area, context.page, address, newProtection, 4873 &context.reservation) != B_OK) { 4874 // Mapping can only fail, when the page mapping object couldn't 4875 // be allocated. Save for the missing mapping everything is 4876 // fine, though. If this was a regular page fault, we'll simply 4877 // leave and probably fault again. To make sure we'll have more 4878 // luck then, we ensure that the minimum object reserve is 4879 // available. 4880 DEBUG_PAGE_ACCESS_END(context.page); 4881 4882 context.UnlockAll(); 4883 4884 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4885 != B_OK) { 4886 // Apparently the situation is serious. Let's get ourselves 4887 // killed. 4888 status = B_NO_MEMORY; 4889 } else if (wirePage != NULL) { 4890 // The caller expects us to wire the page. Since 4891 // object_cache_reserve() succeeded, we should now be able 4892 // to allocate a mapping structure. Restart. 4893 continue; 4894 } 4895 4896 break; 4897 } 4898 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4899 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4900 4901 // also wire the page, if requested 4902 if (wirePage != NULL && status == B_OK) { 4903 increment_page_wired_count(context.page); 4904 *wirePage = context.page; 4905 } 4906 4907 DEBUG_PAGE_ACCESS_END(context.page); 4908 4909 break; 4910 } 4911 4912 return status; 4913 } 4914 4915 4916 status_t 4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4918 { 4919 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4920 } 4921 4922 status_t 4923 vm_put_physical_page(addr_t vaddr, void* handle) 4924 { 4925 return sPhysicalPageMapper->PutPage(vaddr, handle); 4926 } 4927 4928 4929 status_t 4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4931 void** _handle) 4932 { 4933 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4934 } 4935 4936 status_t 4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4938 { 4939 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4940 } 4941 4942 4943 status_t 4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4945 { 4946 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4947 } 4948 4949 status_t 4950 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4951 { 4952 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4953 } 4954 4955 4956 void 4957 vm_get_info(system_info* info) 4958 { 4959 swap_get_info(info); 4960 4961 MutexLocker locker(sAvailableMemoryLock); 4962 info->needed_memory = sNeededMemory; 4963 info->free_memory = sAvailableMemory; 4964 } 4965 4966 4967 uint32 4968 vm_num_page_faults(void) 4969 { 4970 return sPageFaults; 4971 } 4972 4973 4974 off_t 4975 vm_available_memory(void) 4976 { 4977 MutexLocker locker(sAvailableMemoryLock); 4978 return sAvailableMemory; 4979 } 4980 4981 4982 off_t 4983 vm_available_not_needed_memory(void) 4984 { 4985 MutexLocker locker(sAvailableMemoryLock); 4986 return sAvailableMemory - sNeededMemory; 4987 } 4988 4989 4990 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4991 debugger. 4992 */ 4993 off_t 4994 vm_available_not_needed_memory_debug(void) 4995 { 4996 return sAvailableMemory - sNeededMemory; 4997 } 4998 4999 5000 size_t 5001 vm_kernel_address_space_left(void) 5002 { 5003 return VMAddressSpace::Kernel()->FreeSpace(); 5004 } 5005 5006 5007 void 5008 vm_unreserve_memory(size_t amount) 5009 { 5010 mutex_lock(&sAvailableMemoryLock); 5011 5012 sAvailableMemory += amount; 5013 5014 mutex_unlock(&sAvailableMemoryLock); 5015 } 5016 5017 5018 status_t 5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5020 { 5021 size_t reserve = kMemoryReserveForPriority[priority]; 5022 5023 MutexLocker locker(sAvailableMemoryLock); 5024 5025 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5026 5027 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5028 sAvailableMemory -= amount; 5029 return B_OK; 5030 } 5031 5032 if (timeout <= 0) 5033 return B_NO_MEMORY; 5034 5035 // turn timeout into an absolute timeout 5036 timeout += system_time(); 5037 5038 // loop until we've got the memory or the timeout occurs 5039 do { 5040 sNeededMemory += amount; 5041 5042 // call the low resource manager 5043 locker.Unlock(); 5044 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5045 B_ABSOLUTE_TIMEOUT, timeout); 5046 locker.Lock(); 5047 5048 sNeededMemory -= amount; 5049 5050 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5051 sAvailableMemory -= amount; 5052 return B_OK; 5053 } 5054 } while (timeout > system_time()); 5055 5056 return B_NO_MEMORY; 5057 } 5058 5059 5060 status_t 5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5062 { 5063 // NOTE: The caller is responsible for synchronizing calls to this function! 5064 5065 AddressSpaceReadLocker locker; 5066 VMArea* area; 5067 status_t status = locker.SetFromArea(id, area); 5068 if (status != B_OK) 5069 return status; 5070 5071 // nothing to do, if the type doesn't change 5072 uint32 oldType = area->MemoryType(); 5073 if (type == oldType) 5074 return B_OK; 5075 5076 // set the memory type of the area and the mapped pages 5077 VMTranslationMap* map = area->address_space->TranslationMap(); 5078 map->Lock(); 5079 area->SetMemoryType(type); 5080 map->ProtectArea(area, area->protection); 5081 map->Unlock(); 5082 5083 // set the physical memory type 5084 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5085 if (error != B_OK) { 5086 // reset the memory type of the area and the mapped pages 5087 map->Lock(); 5088 area->SetMemoryType(oldType); 5089 map->ProtectArea(area, area->protection); 5090 map->Unlock(); 5091 return error; 5092 } 5093 5094 return B_OK; 5095 5096 } 5097 5098 5099 /*! This function enforces some protection properties: 5100 - kernel areas must be W^X (after kernel startup) 5101 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5102 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5103 */ 5104 static void 5105 fix_protection(uint32* protection) 5106 { 5107 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5108 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5109 || (*protection & B_WRITE_AREA) != 0) 5110 && !gKernelStartup) 5111 panic("kernel areas cannot be both writable and executable!"); 5112 5113 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5114 if ((*protection & B_WRITE_AREA) != 0) 5115 *protection |= B_KERNEL_WRITE_AREA; 5116 if ((*protection & B_READ_AREA) != 0) 5117 *protection |= B_KERNEL_READ_AREA; 5118 } 5119 } 5120 5121 5122 static void 5123 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5124 { 5125 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5126 info->area = area->id; 5127 info->address = (void*)area->Base(); 5128 info->size = area->Size(); 5129 info->protection = area->protection; 5130 info->lock = area->wiring; 5131 info->team = area->address_space->ID(); 5132 info->copy_count = 0; 5133 info->in_count = 0; 5134 info->out_count = 0; 5135 // TODO: retrieve real values here! 5136 5137 VMCache* cache = vm_area_get_locked_cache(area); 5138 5139 // Note, this is a simplification; the cache could be larger than this area 5140 info->ram_size = cache->page_count * B_PAGE_SIZE; 5141 5142 vm_area_put_locked_cache(cache); 5143 } 5144 5145 5146 static status_t 5147 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5148 { 5149 // is newSize a multiple of B_PAGE_SIZE? 5150 if (newSize & (B_PAGE_SIZE - 1)) 5151 return B_BAD_VALUE; 5152 5153 // lock all affected address spaces and the cache 5154 VMArea* area; 5155 VMCache* cache; 5156 5157 MultiAddressSpaceLocker locker; 5158 AreaCacheLocker cacheLocker; 5159 5160 status_t status; 5161 size_t oldSize; 5162 bool anyKernelArea; 5163 bool restart; 5164 5165 do { 5166 anyKernelArea = false; 5167 restart = false; 5168 5169 locker.Unset(); 5170 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5171 if (status != B_OK) 5172 return status; 5173 cacheLocker.SetTo(cache, true); // already locked 5174 5175 // enforce restrictions 5176 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5177 || (area->protection & B_KERNEL_AREA) != 0)) { 5178 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5179 "resize kernel area %" B_PRId32 " (%s)\n", 5180 team_get_current_team_id(), areaID, area->name); 5181 return B_NOT_ALLOWED; 5182 } 5183 // TODO: Enforce all restrictions (team, etc.)! 5184 5185 oldSize = area->Size(); 5186 if (newSize == oldSize) 5187 return B_OK; 5188 5189 if (cache->type != CACHE_TYPE_RAM) 5190 return B_NOT_ALLOWED; 5191 5192 if (oldSize < newSize) { 5193 // We need to check if all areas of this cache can be resized. 5194 for (VMArea* current = cache->areas; current != NULL; 5195 current = current->cache_next) { 5196 if (!current->address_space->CanResizeArea(current, newSize)) 5197 return B_ERROR; 5198 anyKernelArea 5199 |= current->address_space == VMAddressSpace::Kernel(); 5200 } 5201 } else { 5202 // We're shrinking the areas, so we must make sure the affected 5203 // ranges are not wired. 5204 for (VMArea* current = cache->areas; current != NULL; 5205 current = current->cache_next) { 5206 anyKernelArea 5207 |= current->address_space == VMAddressSpace::Kernel(); 5208 5209 if (wait_if_area_range_is_wired(current, 5210 current->Base() + newSize, oldSize - newSize, &locker, 5211 &cacheLocker)) { 5212 restart = true; 5213 break; 5214 } 5215 } 5216 } 5217 } while (restart); 5218 5219 // Okay, looks good so far, so let's do it 5220 5221 int priority = kernel && anyKernelArea 5222 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5223 uint32 allocationFlags = kernel && anyKernelArea 5224 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5225 5226 if (oldSize < newSize) { 5227 // Growing the cache can fail, so we do it first. 5228 status = cache->Resize(cache->virtual_base + newSize, priority); 5229 if (status != B_OK) 5230 return status; 5231 } 5232 5233 for (VMArea* current = cache->areas; current != NULL; 5234 current = current->cache_next) { 5235 status = current->address_space->ResizeArea(current, newSize, 5236 allocationFlags); 5237 if (status != B_OK) 5238 break; 5239 5240 // We also need to unmap all pages beyond the new size, if the area has 5241 // shrunk 5242 if (newSize < oldSize) { 5243 VMCacheChainLocker cacheChainLocker(cache); 5244 cacheChainLocker.LockAllSourceCaches(); 5245 5246 unmap_pages(current, current->Base() + newSize, 5247 oldSize - newSize); 5248 5249 cacheChainLocker.Unlock(cache); 5250 } 5251 } 5252 5253 if (status == B_OK) { 5254 // Shrink or grow individual page protections if in use. 5255 if (area->page_protections != NULL) { 5256 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5257 uint8* newProtections 5258 = (uint8*)realloc(area->page_protections, bytes); 5259 if (newProtections == NULL) 5260 status = B_NO_MEMORY; 5261 else { 5262 area->page_protections = newProtections; 5263 5264 if (oldSize < newSize) { 5265 // init the additional page protections to that of the area 5266 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5267 uint32 areaProtection = area->protection 5268 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5269 memset(area->page_protections + offset, 5270 areaProtection | (areaProtection << 4), bytes - offset); 5271 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5272 uint8& entry = area->page_protections[offset - 1]; 5273 entry = (entry & 0x0f) | (areaProtection << 4); 5274 } 5275 } 5276 } 5277 } 5278 } 5279 5280 // shrinking the cache can't fail, so we do it now 5281 if (status == B_OK && newSize < oldSize) 5282 status = cache->Resize(cache->virtual_base + newSize, priority); 5283 5284 if (status != B_OK) { 5285 // Something failed -- resize the areas back to their original size. 5286 // This can fail, too, in which case we're seriously screwed. 5287 for (VMArea* current = cache->areas; current != NULL; 5288 current = current->cache_next) { 5289 if (current->address_space->ResizeArea(current, oldSize, 5290 allocationFlags) != B_OK) { 5291 panic("vm_resize_area(): Failed and not being able to restore " 5292 "original state."); 5293 } 5294 } 5295 5296 cache->Resize(cache->virtual_base + oldSize, priority); 5297 } 5298 5299 // TODO: we must honour the lock restrictions of this area 5300 return status; 5301 } 5302 5303 5304 status_t 5305 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5306 { 5307 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5308 } 5309 5310 5311 status_t 5312 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5313 { 5314 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5315 } 5316 5317 5318 status_t 5319 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5320 bool user) 5321 { 5322 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5323 } 5324 5325 5326 void 5327 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5328 { 5329 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5330 } 5331 5332 5333 /*! Copies a range of memory directly from/to a page that might not be mapped 5334 at the moment. 5335 5336 For \a unsafeMemory the current mapping (if any is ignored). The function 5337 walks through the respective area's cache chain to find the physical page 5338 and copies from/to it directly. 5339 The memory range starting at \a unsafeMemory with a length of \a size bytes 5340 must not cross a page boundary. 5341 5342 \param teamID The team ID identifying the address space \a unsafeMemory is 5343 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5344 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5345 is passed, the address space of the thread returned by 5346 debug_get_debugged_thread() is used. 5347 \param unsafeMemory The start of the unsafe memory range to be copied 5348 from/to. 5349 \param buffer A safely accessible kernel buffer to be copied from/to. 5350 \param size The number of bytes to be copied. 5351 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5352 \a unsafeMemory, the other way around otherwise. 5353 */ 5354 status_t 5355 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5356 size_t size, bool copyToUnsafe) 5357 { 5358 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5359 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5360 return B_BAD_VALUE; 5361 } 5362 5363 // get the address space for the debugged thread 5364 VMAddressSpace* addressSpace; 5365 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5366 addressSpace = VMAddressSpace::Kernel(); 5367 } else if (teamID == B_CURRENT_TEAM) { 5368 Thread* thread = debug_get_debugged_thread(); 5369 if (thread == NULL || thread->team == NULL) 5370 return B_BAD_ADDRESS; 5371 5372 addressSpace = thread->team->address_space; 5373 } else 5374 addressSpace = VMAddressSpace::DebugGet(teamID); 5375 5376 if (addressSpace == NULL) 5377 return B_BAD_ADDRESS; 5378 5379 // get the area 5380 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5381 if (area == NULL) 5382 return B_BAD_ADDRESS; 5383 5384 // search the page 5385 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5386 + area->cache_offset; 5387 VMCache* cache = area->cache; 5388 vm_page* page = NULL; 5389 while (cache != NULL) { 5390 page = cache->DebugLookupPage(cacheOffset); 5391 if (page != NULL) 5392 break; 5393 5394 // Page not found in this cache -- if it is paged out, we must not try 5395 // to get it from lower caches. 5396 if (cache->DebugHasPage(cacheOffset)) 5397 break; 5398 5399 cache = cache->source; 5400 } 5401 5402 if (page == NULL) 5403 return B_UNSUPPORTED; 5404 5405 // copy from/to physical memory 5406 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5407 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5408 5409 if (copyToUnsafe) { 5410 if (page->Cache() != area->cache) 5411 return B_UNSUPPORTED; 5412 5413 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5414 } 5415 5416 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5417 } 5418 5419 5420 /** Validate that a memory range is either fully in kernel space, or fully in 5421 * userspace */ 5422 static inline bool 5423 validate_memory_range(const void* addr, size_t size) 5424 { 5425 addr_t address = (addr_t)addr; 5426 5427 // Check for overflows on all addresses. 5428 if ((address + size) < address) 5429 return false; 5430 5431 // Validate that the address range does not cross the kernel/user boundary. 5432 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5433 } 5434 5435 5436 /** Validate that a memory range is fully in userspace. */ 5437 static inline bool 5438 validate_user_memory_range(const void* addr, size_t size) 5439 { 5440 addr_t address = (addr_t)addr; 5441 5442 // Check for overflows on all addresses. 5443 if ((address + size) < address) 5444 return false; 5445 5446 // Validate that both the start and end address are in userspace 5447 return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1); 5448 } 5449 5450 5451 // #pragma mark - kernel public API 5452 5453 5454 status_t 5455 user_memcpy(void* to, const void* from, size_t size) 5456 { 5457 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5458 return B_BAD_ADDRESS; 5459 5460 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5461 return B_BAD_ADDRESS; 5462 5463 return B_OK; 5464 } 5465 5466 5467 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5468 the string in \a to, NULL-terminating the result. 5469 5470 \param to Pointer to the destination C-string. 5471 \param from Pointer to the source C-string. 5472 \param size Size in bytes of the string buffer pointed to by \a to. 5473 5474 \return strlen(\a from). 5475 */ 5476 ssize_t 5477 user_strlcpy(char* to, const char* from, size_t size) 5478 { 5479 if (to == NULL && size != 0) 5480 return B_BAD_VALUE; 5481 if (from == NULL) 5482 return B_BAD_ADDRESS; 5483 5484 // Protect the source address from overflows. 5485 size_t maxSize = size; 5486 if ((addr_t)from + maxSize < (addr_t)from) 5487 maxSize -= (addr_t)from + maxSize; 5488 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5489 maxSize = USER_TOP - (addr_t)from; 5490 5491 if (!validate_memory_range(to, maxSize)) 5492 return B_BAD_ADDRESS; 5493 5494 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5495 if (result < 0) 5496 return result; 5497 5498 // If we hit the address overflow boundary, fail. 5499 if ((size_t)result >= maxSize && maxSize < size) 5500 return B_BAD_ADDRESS; 5501 5502 return result; 5503 } 5504 5505 5506 status_t 5507 user_memset(void* s, char c, size_t count) 5508 { 5509 if (!validate_memory_range(s, count)) 5510 return B_BAD_ADDRESS; 5511 5512 if (arch_cpu_user_memset(s, c, count) < B_OK) 5513 return B_BAD_ADDRESS; 5514 5515 return B_OK; 5516 } 5517 5518 5519 /*! Wires a single page at the given address. 5520 5521 \param team The team whose address space the address belongs to. Supports 5522 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5523 parameter is ignored. 5524 \param address address The virtual address to wire down. Does not need to 5525 be page aligned. 5526 \param writable If \c true the page shall be writable. 5527 \param info On success the info is filled in, among other things 5528 containing the physical address the given virtual one translates to. 5529 \return \c B_OK, when the page could be wired, another error code otherwise. 5530 */ 5531 status_t 5532 vm_wire_page(team_id team, addr_t address, bool writable, 5533 VMPageWiringInfo* info) 5534 { 5535 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5536 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5537 5538 // compute the page protection that is required 5539 bool isUser = IS_USER_ADDRESS(address); 5540 uint32 requiredProtection = PAGE_PRESENT 5541 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5542 if (writable) 5543 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5544 5545 // get and read lock the address space 5546 VMAddressSpace* addressSpace = NULL; 5547 if (isUser) { 5548 if (team == B_CURRENT_TEAM) 5549 addressSpace = VMAddressSpace::GetCurrent(); 5550 else 5551 addressSpace = VMAddressSpace::Get(team); 5552 } else 5553 addressSpace = VMAddressSpace::GetKernel(); 5554 if (addressSpace == NULL) 5555 return B_ERROR; 5556 5557 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5558 5559 VMTranslationMap* map = addressSpace->TranslationMap(); 5560 status_t error = B_OK; 5561 5562 // get the area 5563 VMArea* area = addressSpace->LookupArea(pageAddress); 5564 if (area == NULL) { 5565 addressSpace->Put(); 5566 return B_BAD_ADDRESS; 5567 } 5568 5569 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5570 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5571 5572 // mark the area range wired 5573 area->Wire(&info->range); 5574 5575 // Lock the area's cache chain and the translation map. Needed to look 5576 // up the page and play with its wired count. 5577 cacheChainLocker.LockAllSourceCaches(); 5578 map->Lock(); 5579 5580 phys_addr_t physicalAddress; 5581 uint32 flags; 5582 vm_page* page; 5583 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5584 && (flags & requiredProtection) == requiredProtection 5585 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5586 != NULL) { 5587 // Already mapped with the correct permissions -- just increment 5588 // the page's wired count. 5589 increment_page_wired_count(page); 5590 5591 map->Unlock(); 5592 cacheChainLocker.Unlock(); 5593 addressSpaceLocker.Unlock(); 5594 } else { 5595 // Let vm_soft_fault() map the page for us, if possible. We need 5596 // to fully unlock to avoid deadlocks. Since we have already 5597 // wired the area itself, nothing disturbing will happen with it 5598 // in the meantime. 5599 map->Unlock(); 5600 cacheChainLocker.Unlock(); 5601 addressSpaceLocker.Unlock(); 5602 5603 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5604 isUser, &page); 5605 5606 if (error != B_OK) { 5607 // The page could not be mapped -- clean up. 5608 VMCache* cache = vm_area_get_locked_cache(area); 5609 area->Unwire(&info->range); 5610 cache->ReleaseRefAndUnlock(); 5611 addressSpace->Put(); 5612 return error; 5613 } 5614 } 5615 5616 info->physicalAddress 5617 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5618 + address % B_PAGE_SIZE; 5619 info->page = page; 5620 5621 return B_OK; 5622 } 5623 5624 5625 /*! Unwires a single page previously wired via vm_wire_page(). 5626 5627 \param info The same object passed to vm_wire_page() before. 5628 */ 5629 void 5630 vm_unwire_page(VMPageWiringInfo* info) 5631 { 5632 // lock the address space 5633 VMArea* area = info->range.area; 5634 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5635 // takes over our reference 5636 5637 // lock the top cache 5638 VMCache* cache = vm_area_get_locked_cache(area); 5639 VMCacheChainLocker cacheChainLocker(cache); 5640 5641 if (info->page->Cache() != cache) { 5642 // The page is not in the top cache, so we lock the whole cache chain 5643 // before touching the page's wired count. 5644 cacheChainLocker.LockAllSourceCaches(); 5645 } 5646 5647 decrement_page_wired_count(info->page); 5648 5649 // remove the wired range from the range 5650 area->Unwire(&info->range); 5651 5652 cacheChainLocker.Unlock(); 5653 } 5654 5655 5656 /*! Wires down the given address range in the specified team's address space. 5657 5658 If successful the function 5659 - acquires a reference to the specified team's address space, 5660 - adds respective wired ranges to all areas that intersect with the given 5661 address range, 5662 - makes sure all pages in the given address range are mapped with the 5663 requested access permissions and increments their wired count. 5664 5665 It fails, when \a team doesn't specify a valid address space, when any part 5666 of the specified address range is not covered by areas, when the concerned 5667 areas don't allow mapping with the requested permissions, or when mapping 5668 failed for another reason. 5669 5670 When successful the call must be balanced by a unlock_memory_etc() call with 5671 the exact same parameters. 5672 5673 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5674 supported. 5675 \param address The start of the address range to be wired. 5676 \param numBytes The size of the address range to be wired. 5677 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5678 requests that the range must be wired writable ("read from device 5679 into memory"). 5680 \return \c B_OK on success, another error code otherwise. 5681 */ 5682 status_t 5683 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5684 { 5685 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5686 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5687 5688 // compute the page protection that is required 5689 bool isUser = IS_USER_ADDRESS(address); 5690 bool writable = (flags & B_READ_DEVICE) == 0; 5691 uint32 requiredProtection = PAGE_PRESENT 5692 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5693 if (writable) 5694 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5695 5696 uint32 mallocFlags = isUser 5697 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5698 5699 // get and read lock the address space 5700 VMAddressSpace* addressSpace = NULL; 5701 if (isUser) { 5702 if (team == B_CURRENT_TEAM) 5703 addressSpace = VMAddressSpace::GetCurrent(); 5704 else 5705 addressSpace = VMAddressSpace::Get(team); 5706 } else 5707 addressSpace = VMAddressSpace::GetKernel(); 5708 if (addressSpace == NULL) 5709 return B_ERROR; 5710 5711 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5712 // We get a new address space reference here. The one we got above will 5713 // be freed by unlock_memory_etc(). 5714 5715 VMTranslationMap* map = addressSpace->TranslationMap(); 5716 status_t error = B_OK; 5717 5718 // iterate through all concerned areas 5719 addr_t nextAddress = lockBaseAddress; 5720 while (nextAddress != lockEndAddress) { 5721 // get the next area 5722 VMArea* area = addressSpace->LookupArea(nextAddress); 5723 if (area == NULL) { 5724 error = B_BAD_ADDRESS; 5725 break; 5726 } 5727 5728 addr_t areaStart = nextAddress; 5729 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5730 5731 // allocate the wired range (do that before locking the cache to avoid 5732 // deadlocks) 5733 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5734 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5735 if (range == NULL) { 5736 error = B_NO_MEMORY; 5737 break; 5738 } 5739 5740 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5741 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5742 5743 // mark the area range wired 5744 area->Wire(range); 5745 5746 // Depending on the area cache type and the wiring, we may not need to 5747 // look at the individual pages. 5748 if (area->cache_type == CACHE_TYPE_NULL 5749 || area->cache_type == CACHE_TYPE_DEVICE 5750 || area->wiring == B_FULL_LOCK 5751 || area->wiring == B_CONTIGUOUS) { 5752 nextAddress = areaEnd; 5753 continue; 5754 } 5755 5756 // Lock the area's cache chain and the translation map. Needed to look 5757 // up pages and play with their wired count. 5758 cacheChainLocker.LockAllSourceCaches(); 5759 map->Lock(); 5760 5761 // iterate through the pages and wire them 5762 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5763 phys_addr_t physicalAddress; 5764 uint32 flags; 5765 5766 vm_page* page; 5767 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5768 && (flags & requiredProtection) == requiredProtection 5769 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5770 != NULL) { 5771 // Already mapped with the correct permissions -- just increment 5772 // the page's wired count. 5773 increment_page_wired_count(page); 5774 } else { 5775 // Let vm_soft_fault() map the page for us, if possible. We need 5776 // to fully unlock to avoid deadlocks. Since we have already 5777 // wired the area itself, nothing disturbing will happen with it 5778 // in the meantime. 5779 map->Unlock(); 5780 cacheChainLocker.Unlock(); 5781 addressSpaceLocker.Unlock(); 5782 5783 error = vm_soft_fault(addressSpace, nextAddress, writable, 5784 false, isUser, &page); 5785 5786 addressSpaceLocker.Lock(); 5787 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5788 cacheChainLocker.LockAllSourceCaches(); 5789 map->Lock(); 5790 } 5791 5792 if (error != B_OK) 5793 break; 5794 } 5795 5796 map->Unlock(); 5797 5798 if (error == B_OK) { 5799 cacheChainLocker.Unlock(); 5800 } else { 5801 // An error occurred, so abort right here. If the current address 5802 // is the first in this area, unwire the area, since we won't get 5803 // to it when reverting what we've done so far. 5804 if (nextAddress == areaStart) { 5805 area->Unwire(range); 5806 cacheChainLocker.Unlock(); 5807 range->~VMAreaWiredRange(); 5808 free_etc(range, mallocFlags); 5809 } else 5810 cacheChainLocker.Unlock(); 5811 5812 break; 5813 } 5814 } 5815 5816 if (error != B_OK) { 5817 // An error occurred, so unwire all that we've already wired. Note that 5818 // even if not a single page was wired, unlock_memory_etc() is called 5819 // to put the address space reference. 5820 addressSpaceLocker.Unlock(); 5821 unlock_memory_etc(team, (void*)lockBaseAddress, 5822 nextAddress - lockBaseAddress, flags); 5823 } 5824 5825 return error; 5826 } 5827 5828 5829 status_t 5830 lock_memory(void* address, size_t numBytes, uint32 flags) 5831 { 5832 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5833 } 5834 5835 5836 /*! Unwires an address range previously wired with lock_memory_etc(). 5837 5838 Note that a call to this function must balance a previous lock_memory_etc() 5839 call with exactly the same parameters. 5840 */ 5841 status_t 5842 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5843 { 5844 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5845 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5846 5847 // compute the page protection that is required 5848 bool isUser = IS_USER_ADDRESS(address); 5849 bool writable = (flags & B_READ_DEVICE) == 0; 5850 uint32 requiredProtection = PAGE_PRESENT 5851 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5852 if (writable) 5853 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5854 5855 uint32 mallocFlags = isUser 5856 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5857 5858 // get and read lock the address space 5859 VMAddressSpace* addressSpace = NULL; 5860 if (isUser) { 5861 if (team == B_CURRENT_TEAM) 5862 addressSpace = VMAddressSpace::GetCurrent(); 5863 else 5864 addressSpace = VMAddressSpace::Get(team); 5865 } else 5866 addressSpace = VMAddressSpace::GetKernel(); 5867 if (addressSpace == NULL) 5868 return B_ERROR; 5869 5870 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5871 // Take over the address space reference. We don't unlock until we're 5872 // done. 5873 5874 VMTranslationMap* map = addressSpace->TranslationMap(); 5875 status_t error = B_OK; 5876 5877 // iterate through all concerned areas 5878 addr_t nextAddress = lockBaseAddress; 5879 while (nextAddress != lockEndAddress) { 5880 // get the next area 5881 VMArea* area = addressSpace->LookupArea(nextAddress); 5882 if (area == NULL) { 5883 error = B_BAD_ADDRESS; 5884 break; 5885 } 5886 5887 addr_t areaStart = nextAddress; 5888 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5889 5890 // Lock the area's top cache. This is a requirement for 5891 // VMArea::Unwire(). 5892 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5893 5894 // Depending on the area cache type and the wiring, we may not need to 5895 // look at the individual pages. 5896 if (area->cache_type == CACHE_TYPE_NULL 5897 || area->cache_type == CACHE_TYPE_DEVICE 5898 || area->wiring == B_FULL_LOCK 5899 || area->wiring == B_CONTIGUOUS) { 5900 // unwire the range (to avoid deadlocks we delete the range after 5901 // unlocking the cache) 5902 nextAddress = areaEnd; 5903 VMAreaWiredRange* range = area->Unwire(areaStart, 5904 areaEnd - areaStart, writable); 5905 cacheChainLocker.Unlock(); 5906 if (range != NULL) { 5907 range->~VMAreaWiredRange(); 5908 free_etc(range, mallocFlags); 5909 } 5910 continue; 5911 } 5912 5913 // Lock the area's cache chain and the translation map. Needed to look 5914 // up pages and play with their wired count. 5915 cacheChainLocker.LockAllSourceCaches(); 5916 map->Lock(); 5917 5918 // iterate through the pages and unwire them 5919 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5920 phys_addr_t physicalAddress; 5921 uint32 flags; 5922 5923 vm_page* page; 5924 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5925 && (flags & PAGE_PRESENT) != 0 5926 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5927 != NULL) { 5928 // Already mapped with the correct permissions -- just increment 5929 // the page's wired count. 5930 decrement_page_wired_count(page); 5931 } else { 5932 panic("unlock_memory_etc(): Failed to unwire page: address " 5933 "space %p, address: %#" B_PRIxADDR, addressSpace, 5934 nextAddress); 5935 error = B_BAD_VALUE; 5936 break; 5937 } 5938 } 5939 5940 map->Unlock(); 5941 5942 // All pages are unwired. Remove the area's wired range as well (to 5943 // avoid deadlocks we delete the range after unlocking the cache). 5944 VMAreaWiredRange* range = area->Unwire(areaStart, 5945 areaEnd - areaStart, writable); 5946 5947 cacheChainLocker.Unlock(); 5948 5949 if (range != NULL) { 5950 range->~VMAreaWiredRange(); 5951 free_etc(range, mallocFlags); 5952 } 5953 5954 if (error != B_OK) 5955 break; 5956 } 5957 5958 // get rid of the address space reference lock_memory_etc() acquired 5959 addressSpace->Put(); 5960 5961 return error; 5962 } 5963 5964 5965 status_t 5966 unlock_memory(void* address, size_t numBytes, uint32 flags) 5967 { 5968 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5969 } 5970 5971 5972 /*! Similar to get_memory_map(), but also allows to specify the address space 5973 for the memory in question and has a saner semantics. 5974 Returns \c B_OK when the complete range could be translated or 5975 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5976 case the actual number of entries is written to \c *_numEntries. Any other 5977 error case indicates complete failure; \c *_numEntries will be set to \c 0 5978 in this case. 5979 */ 5980 status_t 5981 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5982 physical_entry* table, uint32* _numEntries) 5983 { 5984 uint32 numEntries = *_numEntries; 5985 *_numEntries = 0; 5986 5987 VMAddressSpace* addressSpace; 5988 addr_t virtualAddress = (addr_t)address; 5989 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5990 phys_addr_t physicalAddress; 5991 status_t status = B_OK; 5992 int32 index = -1; 5993 addr_t offset = 0; 5994 bool interrupts = are_interrupts_enabled(); 5995 5996 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5997 "entries)\n", team, address, numBytes, numEntries)); 5998 5999 if (numEntries == 0 || numBytes == 0) 6000 return B_BAD_VALUE; 6001 6002 // in which address space is the address to be found? 6003 if (IS_USER_ADDRESS(virtualAddress)) { 6004 if (team == B_CURRENT_TEAM) 6005 addressSpace = VMAddressSpace::GetCurrent(); 6006 else 6007 addressSpace = VMAddressSpace::Get(team); 6008 } else 6009 addressSpace = VMAddressSpace::GetKernel(); 6010 6011 if (addressSpace == NULL) 6012 return B_ERROR; 6013 6014 VMTranslationMap* map = addressSpace->TranslationMap(); 6015 6016 if (interrupts) 6017 map->Lock(); 6018 6019 while (offset < numBytes) { 6020 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6021 uint32 flags; 6022 6023 if (interrupts) { 6024 status = map->Query((addr_t)address + offset, &physicalAddress, 6025 &flags); 6026 } else { 6027 status = map->QueryInterrupt((addr_t)address + offset, 6028 &physicalAddress, &flags); 6029 } 6030 if (status < B_OK) 6031 break; 6032 if ((flags & PAGE_PRESENT) == 0) { 6033 panic("get_memory_map() called on unmapped memory!"); 6034 return B_BAD_ADDRESS; 6035 } 6036 6037 if (index < 0 && pageOffset > 0) { 6038 physicalAddress += pageOffset; 6039 if (bytes > B_PAGE_SIZE - pageOffset) 6040 bytes = B_PAGE_SIZE - pageOffset; 6041 } 6042 6043 // need to switch to the next physical_entry? 6044 if (index < 0 || table[index].address 6045 != physicalAddress - table[index].size) { 6046 if ((uint32)++index + 1 > numEntries) { 6047 // table to small 6048 break; 6049 } 6050 table[index].address = physicalAddress; 6051 table[index].size = bytes; 6052 } else { 6053 // page does fit in current entry 6054 table[index].size += bytes; 6055 } 6056 6057 offset += bytes; 6058 } 6059 6060 if (interrupts) 6061 map->Unlock(); 6062 6063 if (status != B_OK) 6064 return status; 6065 6066 if ((uint32)index + 1 > numEntries) { 6067 *_numEntries = index; 6068 return B_BUFFER_OVERFLOW; 6069 } 6070 6071 *_numEntries = index + 1; 6072 return B_OK; 6073 } 6074 6075 6076 /*! According to the BeBook, this function should always succeed. 6077 This is no longer the case. 6078 */ 6079 extern "C" int32 6080 __get_memory_map_haiku(const void* address, size_t numBytes, 6081 physical_entry* table, int32 numEntries) 6082 { 6083 uint32 entriesRead = numEntries; 6084 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6085 table, &entriesRead); 6086 if (error != B_OK) 6087 return error; 6088 6089 // close the entry list 6090 6091 // if it's only one entry, we will silently accept the missing ending 6092 if (numEntries == 1) 6093 return B_OK; 6094 6095 if (entriesRead + 1 > (uint32)numEntries) 6096 return B_BUFFER_OVERFLOW; 6097 6098 table[entriesRead].address = 0; 6099 table[entriesRead].size = 0; 6100 6101 return B_OK; 6102 } 6103 6104 6105 area_id 6106 area_for(void* address) 6107 { 6108 return vm_area_for((addr_t)address, true); 6109 } 6110 6111 6112 area_id 6113 find_area(const char* name) 6114 { 6115 return VMAreaHash::Find(name); 6116 } 6117 6118 6119 status_t 6120 _get_area_info(area_id id, area_info* info, size_t size) 6121 { 6122 if (size != sizeof(area_info) || info == NULL) 6123 return B_BAD_VALUE; 6124 6125 AddressSpaceReadLocker locker; 6126 VMArea* area; 6127 status_t status = locker.SetFromArea(id, area); 6128 if (status != B_OK) 6129 return status; 6130 6131 fill_area_info(area, info, size); 6132 return B_OK; 6133 } 6134 6135 6136 status_t 6137 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6138 { 6139 addr_t nextBase = *(addr_t*)cookie; 6140 6141 // we're already through the list 6142 if (nextBase == (addr_t)-1) 6143 return B_ENTRY_NOT_FOUND; 6144 6145 if (team == B_CURRENT_TEAM) 6146 team = team_get_current_team_id(); 6147 6148 AddressSpaceReadLocker locker(team); 6149 if (!locker.IsLocked()) 6150 return B_BAD_TEAM_ID; 6151 6152 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6153 if (area == NULL) { 6154 nextBase = (addr_t)-1; 6155 return B_ENTRY_NOT_FOUND; 6156 } 6157 6158 fill_area_info(area, info, size); 6159 *cookie = (ssize_t)(area->Base() + 1); 6160 6161 return B_OK; 6162 } 6163 6164 6165 status_t 6166 set_area_protection(area_id area, uint32 newProtection) 6167 { 6168 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6169 newProtection, true); 6170 } 6171 6172 6173 status_t 6174 resize_area(area_id areaID, size_t newSize) 6175 { 6176 return vm_resize_area(areaID, newSize, true); 6177 } 6178 6179 6180 /*! Transfers the specified area to a new team. The caller must be the owner 6181 of the area. 6182 */ 6183 area_id 6184 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6185 bool kernel) 6186 { 6187 area_info info; 6188 status_t status = get_area_info(id, &info); 6189 if (status != B_OK) 6190 return status; 6191 6192 if (info.team != thread_get_current_thread()->team->id) 6193 return B_PERMISSION_DENIED; 6194 6195 // We need to mark the area cloneable so the following operations work. 6196 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6197 if (status != B_OK) 6198 return status; 6199 6200 area_id clonedArea = vm_clone_area(target, info.name, _address, 6201 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6202 if (clonedArea < 0) 6203 return clonedArea; 6204 6205 status = vm_delete_area(info.team, id, kernel); 6206 if (status != B_OK) { 6207 vm_delete_area(target, clonedArea, kernel); 6208 return status; 6209 } 6210 6211 // Now we can reset the protection to whatever it was before. 6212 set_area_protection(clonedArea, info.protection); 6213 6214 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6215 6216 return clonedArea; 6217 } 6218 6219 6220 extern "C" area_id 6221 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6222 size_t numBytes, uint32 addressSpec, uint32 protection, 6223 void** _virtualAddress) 6224 { 6225 if (!arch_vm_supports_protection(protection)) 6226 return B_NOT_SUPPORTED; 6227 6228 fix_protection(&protection); 6229 6230 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6231 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6232 false); 6233 } 6234 6235 6236 area_id 6237 clone_area(const char* name, void** _address, uint32 addressSpec, 6238 uint32 protection, area_id source) 6239 { 6240 if ((protection & B_KERNEL_PROTECTION) == 0) 6241 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6242 6243 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6244 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6245 } 6246 6247 6248 area_id 6249 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6250 uint32 protection, uint32 flags, uint32 guardSize, 6251 const virtual_address_restrictions* virtualAddressRestrictions, 6252 const physical_address_restrictions* physicalAddressRestrictions, 6253 void** _address) 6254 { 6255 fix_protection(&protection); 6256 6257 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6258 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6259 true, _address); 6260 } 6261 6262 6263 extern "C" area_id 6264 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6265 size_t size, uint32 lock, uint32 protection) 6266 { 6267 fix_protection(&protection); 6268 6269 virtual_address_restrictions virtualRestrictions = {}; 6270 virtualRestrictions.address = *_address; 6271 virtualRestrictions.address_specification = addressSpec; 6272 physical_address_restrictions physicalRestrictions = {}; 6273 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6274 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6275 true, _address); 6276 } 6277 6278 6279 status_t 6280 delete_area(area_id area) 6281 { 6282 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6283 } 6284 6285 6286 // #pragma mark - Userland syscalls 6287 6288 6289 status_t 6290 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6291 addr_t size) 6292 { 6293 // filter out some unavailable values (for userland) 6294 switch (addressSpec) { 6295 case B_ANY_KERNEL_ADDRESS: 6296 case B_ANY_KERNEL_BLOCK_ADDRESS: 6297 return B_BAD_VALUE; 6298 } 6299 6300 addr_t address; 6301 6302 if (!IS_USER_ADDRESS(userAddress) 6303 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6304 return B_BAD_ADDRESS; 6305 6306 status_t status = vm_reserve_address_range( 6307 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6308 RESERVED_AVOID_BASE); 6309 if (status != B_OK) 6310 return status; 6311 6312 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6313 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6314 (void*)address, size); 6315 return B_BAD_ADDRESS; 6316 } 6317 6318 return B_OK; 6319 } 6320 6321 6322 status_t 6323 _user_unreserve_address_range(addr_t address, addr_t size) 6324 { 6325 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6326 (void*)address, size); 6327 } 6328 6329 6330 area_id 6331 _user_area_for(void* address) 6332 { 6333 return vm_area_for((addr_t)address, false); 6334 } 6335 6336 6337 area_id 6338 _user_find_area(const char* userName) 6339 { 6340 char name[B_OS_NAME_LENGTH]; 6341 6342 if (!IS_USER_ADDRESS(userName) 6343 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6344 return B_BAD_ADDRESS; 6345 6346 return find_area(name); 6347 } 6348 6349 6350 status_t 6351 _user_get_area_info(area_id area, area_info* userInfo) 6352 { 6353 if (!IS_USER_ADDRESS(userInfo)) 6354 return B_BAD_ADDRESS; 6355 6356 area_info info; 6357 status_t status = get_area_info(area, &info); 6358 if (status < B_OK) 6359 return status; 6360 6361 // TODO: do we want to prevent userland from seeing kernel protections? 6362 //info.protection &= B_USER_PROTECTION; 6363 6364 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6365 return B_BAD_ADDRESS; 6366 6367 return status; 6368 } 6369 6370 6371 status_t 6372 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6373 { 6374 ssize_t cookie; 6375 6376 if (!IS_USER_ADDRESS(userCookie) 6377 || !IS_USER_ADDRESS(userInfo) 6378 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6379 return B_BAD_ADDRESS; 6380 6381 area_info info; 6382 status_t status = _get_next_area_info(team, &cookie, &info, 6383 sizeof(area_info)); 6384 if (status != B_OK) 6385 return status; 6386 6387 //info.protection &= B_USER_PROTECTION; 6388 6389 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6390 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6391 return B_BAD_ADDRESS; 6392 6393 return status; 6394 } 6395 6396 6397 status_t 6398 _user_set_area_protection(area_id area, uint32 newProtection) 6399 { 6400 if ((newProtection & ~B_USER_PROTECTION) != 0) 6401 return B_BAD_VALUE; 6402 6403 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6404 newProtection, false); 6405 } 6406 6407 6408 status_t 6409 _user_resize_area(area_id area, size_t newSize) 6410 { 6411 // TODO: Since we restrict deleting of areas to those owned by the team, 6412 // we should also do that for resizing (check other functions, too). 6413 return vm_resize_area(area, newSize, false); 6414 } 6415 6416 6417 area_id 6418 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6419 team_id target) 6420 { 6421 // filter out some unavailable values (for userland) 6422 switch (addressSpec) { 6423 case B_ANY_KERNEL_ADDRESS: 6424 case B_ANY_KERNEL_BLOCK_ADDRESS: 6425 return B_BAD_VALUE; 6426 } 6427 6428 void* address; 6429 if (!IS_USER_ADDRESS(userAddress) 6430 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6431 return B_BAD_ADDRESS; 6432 6433 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6434 if (newArea < B_OK) 6435 return newArea; 6436 6437 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6438 return B_BAD_ADDRESS; 6439 6440 return newArea; 6441 } 6442 6443 6444 area_id 6445 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6446 uint32 protection, area_id sourceArea) 6447 { 6448 char name[B_OS_NAME_LENGTH]; 6449 void* address; 6450 6451 // filter out some unavailable values (for userland) 6452 switch (addressSpec) { 6453 case B_ANY_KERNEL_ADDRESS: 6454 case B_ANY_KERNEL_BLOCK_ADDRESS: 6455 return B_BAD_VALUE; 6456 } 6457 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6458 return B_BAD_VALUE; 6459 6460 if (!IS_USER_ADDRESS(userName) 6461 || !IS_USER_ADDRESS(userAddress) 6462 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6463 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6464 return B_BAD_ADDRESS; 6465 6466 fix_protection(&protection); 6467 6468 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6469 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6470 false); 6471 if (clonedArea < B_OK) 6472 return clonedArea; 6473 6474 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6475 delete_area(clonedArea); 6476 return B_BAD_ADDRESS; 6477 } 6478 6479 return clonedArea; 6480 } 6481 6482 6483 area_id 6484 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6485 size_t size, uint32 lock, uint32 protection) 6486 { 6487 char name[B_OS_NAME_LENGTH]; 6488 void* address; 6489 6490 // filter out some unavailable values (for userland) 6491 switch (addressSpec) { 6492 case B_ANY_KERNEL_ADDRESS: 6493 case B_ANY_KERNEL_BLOCK_ADDRESS: 6494 return B_BAD_VALUE; 6495 } 6496 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6497 return B_BAD_VALUE; 6498 6499 if (!IS_USER_ADDRESS(userName) 6500 || !IS_USER_ADDRESS(userAddress) 6501 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6502 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6503 return B_BAD_ADDRESS; 6504 6505 if (addressSpec == B_EXACT_ADDRESS 6506 && IS_KERNEL_ADDRESS(address)) 6507 return B_BAD_VALUE; 6508 6509 if (addressSpec == B_ANY_ADDRESS) 6510 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6511 if (addressSpec == B_BASE_ADDRESS) 6512 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6513 6514 fix_protection(&protection); 6515 6516 virtual_address_restrictions virtualRestrictions = {}; 6517 virtualRestrictions.address = address; 6518 virtualRestrictions.address_specification = addressSpec; 6519 physical_address_restrictions physicalRestrictions = {}; 6520 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6521 size, lock, protection, 0, 0, &virtualRestrictions, 6522 &physicalRestrictions, false, &address); 6523 6524 if (area >= B_OK 6525 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6526 delete_area(area); 6527 return B_BAD_ADDRESS; 6528 } 6529 6530 return area; 6531 } 6532 6533 6534 status_t 6535 _user_delete_area(area_id area) 6536 { 6537 // Unlike the BeOS implementation, you can now only delete areas 6538 // that you have created yourself from userland. 6539 // The documentation to delete_area() explicitly states that this 6540 // will be restricted in the future, and so it will. 6541 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6542 } 6543 6544 6545 // TODO: create a BeOS style call for this! 6546 6547 area_id 6548 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6549 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6550 int fd, off_t offset) 6551 { 6552 char name[B_OS_NAME_LENGTH]; 6553 void* address; 6554 area_id area; 6555 6556 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6557 return B_BAD_VALUE; 6558 6559 fix_protection(&protection); 6560 6561 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6562 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6563 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6564 return B_BAD_ADDRESS; 6565 6566 if (addressSpec == B_EXACT_ADDRESS) { 6567 if ((addr_t)address + size < (addr_t)address 6568 || (addr_t)address % B_PAGE_SIZE != 0) { 6569 return B_BAD_VALUE; 6570 } 6571 if (!IS_USER_ADDRESS(address) 6572 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6573 return B_BAD_ADDRESS; 6574 } 6575 } 6576 6577 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6578 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6579 false); 6580 if (area < B_OK) 6581 return area; 6582 6583 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6584 return B_BAD_ADDRESS; 6585 6586 return area; 6587 } 6588 6589 6590 status_t 6591 _user_unmap_memory(void* _address, size_t size) 6592 { 6593 addr_t address = (addr_t)_address; 6594 6595 // check params 6596 if (size == 0 || (addr_t)address + size < (addr_t)address 6597 || (addr_t)address % B_PAGE_SIZE != 0) { 6598 return B_BAD_VALUE; 6599 } 6600 6601 if (!IS_USER_ADDRESS(address) 6602 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6603 return B_BAD_ADDRESS; 6604 } 6605 6606 // Write lock the address space and ensure the address range is not wired. 6607 AddressSpaceWriteLocker locker; 6608 do { 6609 status_t status = locker.SetTo(team_get_current_team_id()); 6610 if (status != B_OK) 6611 return status; 6612 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6613 size, &locker)); 6614 6615 // unmap 6616 return unmap_address_range(locker.AddressSpace(), address, size, false); 6617 } 6618 6619 6620 status_t 6621 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6622 { 6623 // check address range 6624 addr_t address = (addr_t)_address; 6625 size = PAGE_ALIGN(size); 6626 6627 if ((address % B_PAGE_SIZE) != 0) 6628 return B_BAD_VALUE; 6629 if (!validate_user_memory_range(_address, size)) { 6630 // weird error code required by POSIX 6631 return ENOMEM; 6632 } 6633 6634 // extend and check protection 6635 if ((protection & ~B_USER_PROTECTION) != 0) 6636 return B_BAD_VALUE; 6637 6638 fix_protection(&protection); 6639 6640 // We need to write lock the address space, since we're going to play with 6641 // the areas. Also make sure that none of the areas is wired and that we're 6642 // actually allowed to change the protection. 6643 AddressSpaceWriteLocker locker; 6644 6645 bool restart; 6646 do { 6647 restart = false; 6648 6649 status_t status = locker.SetTo(team_get_current_team_id()); 6650 if (status != B_OK) 6651 return status; 6652 6653 // First round: Check whether the whole range is covered by areas and we 6654 // are allowed to modify them. 6655 addr_t currentAddress = address; 6656 size_t sizeLeft = size; 6657 while (sizeLeft > 0) { 6658 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6659 if (area == NULL) 6660 return B_NO_MEMORY; 6661 6662 if ((area->protection & B_KERNEL_AREA) != 0) 6663 return B_NOT_ALLOWED; 6664 if (area->protection_max != 0 6665 && (protection & area->protection_max) != protection) { 6666 return B_NOT_ALLOWED; 6667 } 6668 6669 addr_t offset = currentAddress - area->Base(); 6670 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6671 6672 AreaCacheLocker cacheLocker(area); 6673 6674 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6675 &locker, &cacheLocker)) { 6676 restart = true; 6677 break; 6678 } 6679 6680 cacheLocker.Unlock(); 6681 6682 currentAddress += rangeSize; 6683 sizeLeft -= rangeSize; 6684 } 6685 } while (restart); 6686 6687 // Second round: If the protections differ from that of the area, create a 6688 // page protection array and re-map mapped pages. 6689 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6690 addr_t currentAddress = address; 6691 size_t sizeLeft = size; 6692 while (sizeLeft > 0) { 6693 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6694 if (area == NULL) 6695 return B_NO_MEMORY; 6696 6697 addr_t offset = currentAddress - area->Base(); 6698 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6699 6700 currentAddress += rangeSize; 6701 sizeLeft -= rangeSize; 6702 6703 if (area->page_protections == NULL) { 6704 if (area->protection == protection) 6705 continue; 6706 if (offset == 0 && rangeSize == area->Size()) { 6707 status_t status = vm_set_area_protection(area->address_space->ID(), 6708 area->id, protection, false); 6709 if (status != B_OK) 6710 return status; 6711 continue; 6712 } 6713 6714 status_t status = allocate_area_page_protections(area); 6715 if (status != B_OK) 6716 return status; 6717 } 6718 6719 // We need to lock the complete cache chain, since we potentially unmap 6720 // pages of lower caches. 6721 VMCache* topCache = vm_area_get_locked_cache(area); 6722 VMCacheChainLocker cacheChainLocker(topCache); 6723 cacheChainLocker.LockAllSourceCaches(); 6724 6725 for (addr_t pageAddress = area->Base() + offset; 6726 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6727 map->Lock(); 6728 6729 set_area_page_protection(area, pageAddress, protection); 6730 6731 phys_addr_t physicalAddress; 6732 uint32 flags; 6733 6734 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6735 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6736 map->Unlock(); 6737 continue; 6738 } 6739 6740 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6741 if (page == NULL) { 6742 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6743 "\n", area, physicalAddress); 6744 map->Unlock(); 6745 return B_ERROR; 6746 } 6747 6748 // If the page is not in the topmost cache and write access is 6749 // requested, we have to unmap it. Otherwise we can re-map it with 6750 // the new protection. 6751 bool unmapPage = page->Cache() != topCache 6752 && (protection & B_WRITE_AREA) != 0; 6753 6754 if (!unmapPage) 6755 map->ProtectPage(area, pageAddress, protection); 6756 6757 map->Unlock(); 6758 6759 if (unmapPage) { 6760 DEBUG_PAGE_ACCESS_START(page); 6761 unmap_page(area, pageAddress); 6762 DEBUG_PAGE_ACCESS_END(page); 6763 } 6764 } 6765 } 6766 6767 return B_OK; 6768 } 6769 6770 6771 status_t 6772 _user_sync_memory(void* _address, size_t size, uint32 flags) 6773 { 6774 addr_t address = (addr_t)_address; 6775 size = PAGE_ALIGN(size); 6776 6777 // check params 6778 if ((address % B_PAGE_SIZE) != 0) 6779 return B_BAD_VALUE; 6780 if (!validate_user_memory_range(_address, size)) { 6781 // weird error code required by POSIX 6782 return ENOMEM; 6783 } 6784 6785 bool writeSync = (flags & MS_SYNC) != 0; 6786 bool writeAsync = (flags & MS_ASYNC) != 0; 6787 if (writeSync && writeAsync) 6788 return B_BAD_VALUE; 6789 6790 if (size == 0 || (!writeSync && !writeAsync)) 6791 return B_OK; 6792 6793 // iterate through the range and sync all concerned areas 6794 while (size > 0) { 6795 // read lock the address space 6796 AddressSpaceReadLocker locker; 6797 status_t error = locker.SetTo(team_get_current_team_id()); 6798 if (error != B_OK) 6799 return error; 6800 6801 // get the first area 6802 VMArea* area = locker.AddressSpace()->LookupArea(address); 6803 if (area == NULL) 6804 return B_NO_MEMORY; 6805 6806 uint32 offset = address - area->Base(); 6807 size_t rangeSize = min_c(area->Size() - offset, size); 6808 offset += area->cache_offset; 6809 6810 // lock the cache 6811 AreaCacheLocker cacheLocker(area); 6812 if (!cacheLocker) 6813 return B_BAD_VALUE; 6814 VMCache* cache = area->cache; 6815 6816 locker.Unlock(); 6817 6818 uint32 firstPage = offset >> PAGE_SHIFT; 6819 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6820 6821 // write the pages 6822 if (cache->type == CACHE_TYPE_VNODE) { 6823 if (writeSync) { 6824 // synchronous 6825 error = vm_page_write_modified_page_range(cache, firstPage, 6826 endPage); 6827 if (error != B_OK) 6828 return error; 6829 } else { 6830 // asynchronous 6831 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6832 // TODO: This is probably not quite what is supposed to happen. 6833 // Especially when a lot has to be written, it might take ages 6834 // until it really hits the disk. 6835 } 6836 } 6837 6838 address += rangeSize; 6839 size -= rangeSize; 6840 } 6841 6842 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6843 // synchronize multiple mappings of the same file. In our VM they never get 6844 // out of sync, though, so we don't have to do anything. 6845 6846 return B_OK; 6847 } 6848 6849 6850 status_t 6851 _user_memory_advice(void* _address, size_t size, uint32 advice) 6852 { 6853 addr_t address = (addr_t)_address; 6854 if ((address % B_PAGE_SIZE) != 0) 6855 return B_BAD_VALUE; 6856 6857 size = PAGE_ALIGN(size); 6858 if (!validate_user_memory_range(_address, size)) { 6859 // weird error code required by POSIX 6860 return B_NO_MEMORY; 6861 } 6862 6863 switch (advice) { 6864 case MADV_NORMAL: 6865 case MADV_SEQUENTIAL: 6866 case MADV_RANDOM: 6867 case MADV_WILLNEED: 6868 case MADV_DONTNEED: 6869 // TODO: Implement! 6870 break; 6871 6872 case MADV_FREE: 6873 { 6874 AddressSpaceWriteLocker locker; 6875 do { 6876 status_t status = locker.SetTo(team_get_current_team_id()); 6877 if (status != B_OK) 6878 return status; 6879 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6880 address, size, &locker)); 6881 6882 discard_address_range(locker.AddressSpace(), address, size, false); 6883 break; 6884 } 6885 6886 default: 6887 return B_BAD_VALUE; 6888 } 6889 6890 return B_OK; 6891 } 6892 6893 6894 status_t 6895 _user_get_memory_properties(team_id teamID, const void* address, 6896 uint32* _protected, uint32* _lock) 6897 { 6898 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6899 return B_BAD_ADDRESS; 6900 6901 AddressSpaceReadLocker locker; 6902 status_t error = locker.SetTo(teamID); 6903 if (error != B_OK) 6904 return error; 6905 6906 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6907 if (area == NULL) 6908 return B_NO_MEMORY; 6909 6910 uint32 protection = get_area_page_protection(area, (addr_t)address); 6911 uint32 wiring = area->wiring; 6912 6913 locker.Unlock(); 6914 6915 error = user_memcpy(_protected, &protection, sizeof(protection)); 6916 if (error != B_OK) 6917 return error; 6918 6919 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6920 6921 return error; 6922 } 6923 6924 6925 static status_t 6926 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 6927 { 6928 #if ENABLE_SWAP_SUPPORT 6929 // check address range 6930 addr_t address = (addr_t)_address; 6931 size = PAGE_ALIGN(size); 6932 6933 if ((address % B_PAGE_SIZE) != 0) 6934 return EINVAL; 6935 if (!validate_user_memory_range(_address, size)) 6936 return EINVAL; 6937 6938 const addr_t endAddress = address + size; 6939 6940 AddressSpaceReadLocker addressSpaceLocker; 6941 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 6942 if (error != B_OK) 6943 return error; 6944 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 6945 6946 // iterate through all concerned areas 6947 addr_t nextAddress = address; 6948 while (nextAddress != endAddress) { 6949 // get the next area 6950 VMArea* area = addressSpace->LookupArea(nextAddress); 6951 if (area == NULL) { 6952 error = B_BAD_ADDRESS; 6953 break; 6954 } 6955 6956 const addr_t areaStart = nextAddress; 6957 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 6958 nextAddress = areaEnd; 6959 6960 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6961 if (error != B_OK) { 6962 // We don't need to unset or reset things on failure. 6963 break; 6964 } 6965 6966 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6967 VMAnonymousCache* anonCache = NULL; 6968 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 6969 // This memory will aready never be swapped. Nothing to do. 6970 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 6971 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 6972 areaEnd - areaStart, swappable); 6973 } else { 6974 // Some other cache type? We cannot affect anything here. 6975 error = EINVAL; 6976 } 6977 6978 cacheChainLocker.Unlock(); 6979 6980 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6981 if (error != B_OK) 6982 break; 6983 } 6984 6985 return error; 6986 #else 6987 // No swap support? Nothing to do. 6988 return B_OK; 6989 #endif 6990 } 6991 6992 6993 status_t 6994 _user_mlock(const void* _address, size_t size) 6995 { 6996 return user_set_memory_swappable(_address, size, false); 6997 } 6998 6999 7000 status_t 7001 _user_munlock(const void* _address, size_t size) 7002 { 7003 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7004 // if multiple clones of an area had mlock() called on them, 7005 // munlock() must also be called on all of them to actually unlock. 7006 // (At present, the first munlock() will unlock all.) 7007 // TODO: fork() should automatically unlock memory in the child. 7008 return user_set_memory_swappable(_address, size, true); 7009 } 7010 7011 7012 // #pragma mark -- compatibility 7013 7014 7015 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7016 7017 7018 struct physical_entry_beos { 7019 uint32 address; 7020 uint32 size; 7021 }; 7022 7023 7024 /*! The physical_entry structure has changed. We need to translate it to the 7025 old one. 7026 */ 7027 extern "C" int32 7028 __get_memory_map_beos(const void* _address, size_t numBytes, 7029 physical_entry_beos* table, int32 numEntries) 7030 { 7031 if (numEntries <= 0) 7032 return B_BAD_VALUE; 7033 7034 const uint8* address = (const uint8*)_address; 7035 7036 int32 count = 0; 7037 while (numBytes > 0 && count < numEntries) { 7038 physical_entry entry; 7039 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7040 if (result < 0) { 7041 if (result != B_BUFFER_OVERFLOW) 7042 return result; 7043 } 7044 7045 if (entry.address >= (phys_addr_t)1 << 32) { 7046 panic("get_memory_map(): Address is greater 4 GB!"); 7047 return B_ERROR; 7048 } 7049 7050 table[count].address = entry.address; 7051 table[count++].size = entry.size; 7052 7053 address += entry.size; 7054 numBytes -= entry.size; 7055 } 7056 7057 // null-terminate the table, if possible 7058 if (count < numEntries) { 7059 table[count].address = 0; 7060 table[count].size = 0; 7061 } 7062 7063 return B_OK; 7064 } 7065 7066 7067 /*! The type of the \a physicalAddress parameter has changed from void* to 7068 phys_addr_t. 7069 */ 7070 extern "C" area_id 7071 __map_physical_memory_beos(const char* name, void* physicalAddress, 7072 size_t numBytes, uint32 addressSpec, uint32 protection, 7073 void** _virtualAddress) 7074 { 7075 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7076 addressSpec, protection, _virtualAddress); 7077 } 7078 7079 7080 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7081 we meddle with the \a lock parameter to force 32 bit. 7082 */ 7083 extern "C" area_id 7084 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7085 size_t size, uint32 lock, uint32 protection) 7086 { 7087 switch (lock) { 7088 case B_NO_LOCK: 7089 break; 7090 case B_FULL_LOCK: 7091 case B_LAZY_LOCK: 7092 lock = B_32_BIT_FULL_LOCK; 7093 break; 7094 case B_CONTIGUOUS: 7095 lock = B_32_BIT_CONTIGUOUS; 7096 break; 7097 } 7098 7099 return __create_area_haiku(name, _address, addressSpec, size, lock, 7100 protection); 7101 } 7102 7103 7104 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7105 "BASE"); 7106 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7107 "map_physical_memory@", "BASE"); 7108 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7109 "BASE"); 7110 7111 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7112 "get_memory_map@@", "1_ALPHA3"); 7113 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7114 "map_physical_memory@@", "1_ALPHA3"); 7115 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7116 "1_ALPHA3"); 7117 7118 7119 #else 7120 7121 7122 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7123 "get_memory_map@@", "BASE"); 7124 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7125 "map_physical_memory@@", "BASE"); 7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7127 "BASE"); 7128 7129 7130 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7131