1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/ThreadAutoLock.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_priv.h> 53 #include <vm/VMAddressSpace.h> 54 #include <vm/VMArea.h> 55 #include <vm/VMCache.h> 56 57 #include "VMAddressSpaceLocking.h" 58 #include "VMAnonymousCache.h" 59 #include "VMAnonymousNoSwapCache.h" 60 #include "IORequest.h" 61 62 63 //#define TRACE_VM 64 //#define TRACE_FAULTS 65 #ifdef TRACE_VM 66 # define TRACE(x) dprintf x 67 #else 68 # define TRACE(x) ; 69 #endif 70 #ifdef TRACE_FAULTS 71 # define FTRACE(x) dprintf x 72 #else 73 # define FTRACE(x) ; 74 #endif 75 76 77 namespace { 78 79 class AreaCacheLocking { 80 public: 81 inline bool Lock(VMCache* lockable) 82 { 83 return false; 84 } 85 86 inline void Unlock(VMCache* lockable) 87 { 88 vm_area_put_locked_cache(lockable); 89 } 90 }; 91 92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 93 public: 94 inline AreaCacheLocker(VMCache* cache = NULL) 95 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 96 { 97 } 98 99 inline AreaCacheLocker(VMArea* area) 100 : AutoLocker<VMCache, AreaCacheLocking>() 101 { 102 SetTo(area); 103 } 104 105 inline void SetTo(VMCache* cache, bool alreadyLocked) 106 { 107 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 108 } 109 110 inline void SetTo(VMArea* area) 111 { 112 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 113 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 114 } 115 }; 116 117 118 class VMCacheChainLocker { 119 public: 120 VMCacheChainLocker() 121 : 122 fTopCache(NULL), 123 fBottomCache(NULL) 124 { 125 } 126 127 VMCacheChainLocker(VMCache* topCache) 128 : 129 fTopCache(topCache), 130 fBottomCache(topCache) 131 { 132 } 133 134 ~VMCacheChainLocker() 135 { 136 Unlock(); 137 } 138 139 void SetTo(VMCache* topCache) 140 { 141 fTopCache = topCache; 142 fBottomCache = topCache; 143 144 if (topCache != NULL) 145 topCache->SetUserData(NULL); 146 } 147 148 VMCache* LockSourceCache() 149 { 150 if (fBottomCache == NULL || fBottomCache->source == NULL) 151 return NULL; 152 153 VMCache* previousCache = fBottomCache; 154 155 fBottomCache = fBottomCache->source; 156 fBottomCache->Lock(); 157 fBottomCache->AcquireRefLocked(); 158 fBottomCache->SetUserData(previousCache); 159 160 return fBottomCache; 161 } 162 163 void LockAllSourceCaches() 164 { 165 while (LockSourceCache() != NULL) { 166 } 167 } 168 169 void Unlock(VMCache* exceptCache = NULL) 170 { 171 if (fTopCache == NULL) 172 return; 173 174 // Unlock caches in source -> consumer direction. This is important to 175 // avoid double-locking and a reversal of locking order in case a cache 176 // is eligable for merging. 177 VMCache* cache = fBottomCache; 178 while (cache != NULL) { 179 VMCache* nextCache = (VMCache*)cache->UserData(); 180 if (cache != exceptCache) 181 cache->ReleaseRefAndUnlock(cache != fTopCache); 182 183 if (cache == fTopCache) 184 break; 185 186 cache = nextCache; 187 } 188 189 fTopCache = NULL; 190 fBottomCache = NULL; 191 } 192 193 void UnlockKeepRefs(bool keepTopCacheLocked) 194 { 195 if (fTopCache == NULL) 196 return; 197 198 VMCache* nextCache = fBottomCache; 199 VMCache* cache = NULL; 200 201 while (keepTopCacheLocked 202 ? nextCache != fTopCache : cache != fTopCache) { 203 cache = nextCache; 204 nextCache = (VMCache*)cache->UserData(); 205 cache->Unlock(cache != fTopCache); 206 } 207 } 208 209 void RelockCaches(bool topCacheLocked) 210 { 211 if (fTopCache == NULL) 212 return; 213 214 VMCache* nextCache = fTopCache; 215 VMCache* cache = NULL; 216 if (topCacheLocked) { 217 cache = nextCache; 218 nextCache = cache->source; 219 } 220 221 while (cache != fBottomCache && nextCache != NULL) { 222 VMCache* consumer = cache; 223 cache = nextCache; 224 nextCache = cache->source; 225 cache->Lock(); 226 cache->SetUserData(consumer); 227 } 228 } 229 230 private: 231 VMCache* fTopCache; 232 VMCache* fBottomCache; 233 }; 234 235 } // namespace 236 237 238 // The memory reserve an allocation of the certain priority must not touch. 239 static const size_t kMemoryReserveForPriority[] = { 240 VM_MEMORY_RESERVE_USER, // user 241 VM_MEMORY_RESERVE_SYSTEM, // system 242 0 // VIP 243 }; 244 245 246 ObjectCache* gPageMappingsObjectCache; 247 248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 249 250 static off_t sAvailableMemory; 251 static off_t sNeededMemory; 252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 253 static uint32 sPageFaults; 254 255 static VMPhysicalPageMapper* sPhysicalPageMapper; 256 257 #if DEBUG_CACHE_LIST 258 259 struct cache_info { 260 VMCache* cache; 261 addr_t page_count; 262 addr_t committed; 263 }; 264 265 static const int kCacheInfoTableCount = 100 * 1024; 266 static cache_info* sCacheInfoTable; 267 268 #endif // DEBUG_CACHE_LIST 269 270 271 // function declarations 272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 273 bool addressSpaceCleanup); 274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 275 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 276 static status_t map_backing_store(VMAddressSpace* addressSpace, 277 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 278 int protection, int protectionMax, int mapping, uint32 flags, 279 const virtual_address_restrictions* addressRestrictions, bool kernel, 280 VMArea** _area, void** _virtualAddress); 281 static void fix_protection(uint32* protection); 282 283 284 // #pragma mark - 285 286 287 #if VM_PAGE_FAULT_TRACING 288 289 namespace VMPageFaultTracing { 290 291 class PageFaultStart : public AbstractTraceEntry { 292 public: 293 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 294 : 295 fAddress(address), 296 fPC(pc), 297 fWrite(write), 298 fUser(user) 299 { 300 Initialized(); 301 } 302 303 virtual void AddDump(TraceOutput& out) 304 { 305 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 306 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 307 } 308 309 private: 310 addr_t fAddress; 311 addr_t fPC; 312 bool fWrite; 313 bool fUser; 314 }; 315 316 317 // page fault errors 318 enum { 319 PAGE_FAULT_ERROR_NO_AREA = 0, 320 PAGE_FAULT_ERROR_KERNEL_ONLY, 321 PAGE_FAULT_ERROR_WRITE_PROTECTED, 322 PAGE_FAULT_ERROR_READ_PROTECTED, 323 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 324 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 325 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 326 }; 327 328 329 class PageFaultError : public AbstractTraceEntry { 330 public: 331 PageFaultError(area_id area, status_t error) 332 : 333 fArea(area), 334 fError(error) 335 { 336 Initialized(); 337 } 338 339 virtual void AddDump(TraceOutput& out) 340 { 341 switch (fError) { 342 case PAGE_FAULT_ERROR_NO_AREA: 343 out.Print("page fault error: no area"); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_ONLY: 346 out.Print("page fault error: area: %ld, kernel only", fArea); 347 break; 348 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 349 out.Print("page fault error: area: %ld, write protected", 350 fArea); 351 break; 352 case PAGE_FAULT_ERROR_READ_PROTECTED: 353 out.Print("page fault error: area: %ld, read protected", fArea); 354 break; 355 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 356 out.Print("page fault error: area: %ld, execute protected", 357 fArea); 358 break; 359 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 360 out.Print("page fault error: kernel touching bad user memory"); 361 break; 362 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 363 out.Print("page fault error: no address space"); 364 break; 365 default: 366 out.Print("page fault error: area: %ld, error: %s", fArea, 367 strerror(fError)); 368 break; 369 } 370 } 371 372 private: 373 area_id fArea; 374 status_t fError; 375 }; 376 377 378 class PageFaultDone : public AbstractTraceEntry { 379 public: 380 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 381 vm_page* page) 382 : 383 fArea(area), 384 fTopCache(topCache), 385 fCache(cache), 386 fPage(page) 387 { 388 Initialized(); 389 } 390 391 virtual void AddDump(TraceOutput& out) 392 { 393 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 394 "page: %p", fArea, fTopCache, fCache, fPage); 395 } 396 397 private: 398 area_id fArea; 399 VMCache* fTopCache; 400 VMCache* fCache; 401 vm_page* fPage; 402 }; 403 404 } // namespace VMPageFaultTracing 405 406 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 407 #else 408 # define TPF(x) ; 409 #endif // VM_PAGE_FAULT_TRACING 410 411 412 // #pragma mark - 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 increment_page_wired_count(vm_page* page) 419 { 420 if (!page->IsMapped()) 421 atomic_add(&gMappedPagesCount, 1); 422 page->IncrementWiredCount(); 423 } 424 425 426 /*! The page's cache must be locked. 427 */ 428 static inline void 429 decrement_page_wired_count(vm_page* page) 430 { 431 page->DecrementWiredCount(); 432 if (!page->IsMapped()) 433 atomic_add(&gMappedPagesCount, -1); 434 } 435 436 437 static inline addr_t 438 virtual_page_address(VMArea* area, vm_page* page) 439 { 440 return area->Base() 441 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 442 } 443 444 445 //! You need to have the address space locked when calling this function 446 static VMArea* 447 lookup_area(VMAddressSpace* addressSpace, area_id id) 448 { 449 VMAreas::ReadLock(); 450 451 VMArea* area = VMAreas::LookupLocked(id); 452 if (area != NULL && area->address_space != addressSpace) 453 area = NULL; 454 455 VMAreas::ReadUnlock(); 456 457 return area; 458 } 459 460 461 static status_t 462 allocate_area_page_protections(VMArea* area) 463 { 464 // In the page protections we store only the three user protections, 465 // so we use 4 bits per page. 466 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 467 area->page_protections = (uint8*)malloc_etc(bytes, 468 area->address_space == VMAddressSpace::Kernel() 469 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 470 if (area->page_protections == NULL) 471 return B_NO_MEMORY; 472 473 // init the page protections for all pages to that of the area 474 uint32 areaProtection = area->protection 475 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 476 memset(area->page_protections, areaProtection | (areaProtection << 4), 477 bytes); 478 return B_OK; 479 } 480 481 482 static inline void 483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 484 { 485 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 486 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 487 uint8& entry = area->page_protections[pageIndex / 2]; 488 if (pageIndex % 2 == 0) 489 entry = (entry & 0xf0) | protection; 490 else 491 entry = (entry & 0x0f) | (protection << 4); 492 } 493 494 495 static inline uint32 496 get_area_page_protection(VMArea* area, addr_t pageAddress) 497 { 498 if (area->page_protections == NULL) 499 return area->protection; 500 501 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 502 uint32 protection = area->page_protections[pageIndex / 2]; 503 if (pageIndex % 2 == 0) 504 protection &= 0x0f; 505 else 506 protection >>= 4; 507 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 // If this is a kernel area we return only the kernel flags. 515 if (area->address_space == VMAddressSpace::Kernel()) 516 return kernelProtection; 517 518 return protection | kernelProtection; 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 811 &addressRestrictions, kernel, &secondArea, NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, 925 int protectionMax, int mapping, 926 uint32 flags, const virtual_address_restrictions* addressRestrictions, 927 bool kernel, VMArea** _area, void** _virtualAddress) 928 { 929 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 930 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 931 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 932 addressSpace, cache, addressRestrictions->address, offset, size, 933 addressRestrictions->address_specification, wiring, protection, 934 protectionMax, _area, areaName)); 935 cache->AssertLocked(); 936 937 if (size == 0) { 938 #if KDEBUG 939 panic("map_backing_store(): called with size=0 for area '%s'!", 940 areaName); 941 #endif 942 return B_BAD_VALUE; 943 } 944 if (offset < 0) 945 return B_BAD_VALUE; 946 947 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 948 | HEAP_DONT_LOCK_KERNEL_SPACE; 949 int priority; 950 if (addressSpace != VMAddressSpace::Kernel()) { 951 priority = VM_PRIORITY_USER; 952 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 953 priority = VM_PRIORITY_VIP; 954 allocationFlags |= HEAP_PRIORITY_VIP; 955 } else 956 priority = VM_PRIORITY_SYSTEM; 957 958 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 959 allocationFlags); 960 if (mapping != REGION_PRIVATE_MAP) 961 area->protection_max = protectionMax & B_USER_PROTECTION; 962 if (area == NULL) 963 return B_NO_MEMORY; 964 965 status_t status; 966 967 // if this is a private map, we need to create a new cache 968 // to handle the private copies of pages as they are written to 969 VMCache* sourceCache = cache; 970 if (mapping == REGION_PRIVATE_MAP) { 971 VMCache* newCache; 972 973 // create an anonymous cache 974 status = VMCacheFactory::CreateAnonymousCache(newCache, 975 (protection & B_STACK_AREA) != 0 976 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 977 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 978 if (status != B_OK) 979 goto err1; 980 981 newCache->Lock(); 982 newCache->temporary = 1; 983 newCache->virtual_base = offset; 984 newCache->virtual_end = offset + size; 985 986 cache->AddConsumer(newCache); 987 988 cache = newCache; 989 } 990 991 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 992 status = cache->SetMinimalCommitment(size, priority); 993 if (status != B_OK) 994 goto err2; 995 } 996 997 // check to see if this address space has entered DELETE state 998 if (addressSpace->IsBeingDeleted()) { 999 // okay, someone is trying to delete this address space now, so we can't 1000 // insert the area, so back out 1001 status = B_BAD_TEAM_ID; 1002 goto err2; 1003 } 1004 1005 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1006 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1007 status = unmap_address_range(addressSpace, 1008 (addr_t)addressRestrictions->address, size, kernel); 1009 if (status != B_OK) 1010 goto err2; 1011 } 1012 1013 status = addressSpace->InsertArea(area, size, addressRestrictions, 1014 allocationFlags, _virtualAddress); 1015 if (status == B_NO_MEMORY 1016 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1017 // Due to how many locks are held, we cannot wait here for space to be 1018 // freed up, but we can at least notify the low_resource handler. 1019 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1020 } 1021 if (status != B_OK) 1022 goto err2; 1023 1024 // attach the cache to the area 1025 area->cache = cache; 1026 area->cache_offset = offset; 1027 1028 // point the cache back to the area 1029 cache->InsertAreaLocked(area); 1030 if (mapping == REGION_PRIVATE_MAP) 1031 cache->Unlock(); 1032 1033 // insert the area in the global areas map 1034 VMAreas::Insert(area); 1035 1036 // grab a ref to the address space (the area holds this) 1037 addressSpace->Get(); 1038 1039 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1040 // cache, sourceCache, areaName, area); 1041 1042 *_area = area; 1043 return B_OK; 1044 1045 err2: 1046 if (mapping == REGION_PRIVATE_MAP) { 1047 // We created this cache, so we must delete it again. Note, that we 1048 // need to temporarily unlock the source cache or we'll otherwise 1049 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1050 sourceCache->Unlock(); 1051 cache->ReleaseRefAndUnlock(); 1052 sourceCache->Lock(); 1053 } 1054 err1: 1055 addressSpace->DeleteArea(area, allocationFlags); 1056 return status; 1057 } 1058 1059 1060 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1061 locker1, locker2). 1062 */ 1063 template<typename LockerType1, typename LockerType2> 1064 static inline bool 1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1066 { 1067 area->cache->AssertLocked(); 1068 1069 VMAreaUnwiredWaiter waiter; 1070 if (!area->AddWaiterIfWired(&waiter)) 1071 return false; 1072 1073 // unlock everything and wait 1074 if (locker1 != NULL) 1075 locker1->Unlock(); 1076 if (locker2 != NULL) 1077 locker2->Unlock(); 1078 1079 waiter.waitEntry.Wait(); 1080 1081 return true; 1082 } 1083 1084 1085 /*! Checks whether the given area has any wired ranges intersecting with the 1086 specified range and waits, if so. 1087 1088 When it has to wait, the function calls \c Unlock() on both \a locker1 1089 and \a locker2, if given. 1090 The area's top cache must be locked and must be unlocked as a side effect 1091 of calling \c Unlock() on either \a locker1 or \a locker2. 1092 1093 If the function does not have to wait it does not modify or unlock any 1094 object. 1095 1096 \param area The area to be checked. 1097 \param base The base address of the range to check. 1098 \param size The size of the address range to check. 1099 \param locker1 An object to be unlocked when before starting to wait (may 1100 be \c NULL). 1101 \param locker2 An object to be unlocked when before starting to wait (may 1102 be \c NULL). 1103 \return \c true, if the function had to wait, \c false otherwise. 1104 */ 1105 template<typename LockerType1, typename LockerType2> 1106 static inline bool 1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1108 LockerType1* locker1, LockerType2* locker2) 1109 { 1110 area->cache->AssertLocked(); 1111 1112 VMAreaUnwiredWaiter waiter; 1113 if (!area->AddWaiterIfWired(&waiter, base, size)) 1114 return false; 1115 1116 // unlock everything and wait 1117 if (locker1 != NULL) 1118 locker1->Unlock(); 1119 if (locker2 != NULL) 1120 locker2->Unlock(); 1121 1122 waiter.waitEntry.Wait(); 1123 1124 return true; 1125 } 1126 1127 1128 /*! Checks whether the given address space has any wired ranges intersecting 1129 with the specified range and waits, if so. 1130 1131 Similar to wait_if_area_range_is_wired(), with the following differences: 1132 - All areas intersecting with the range are checked (respectively all until 1133 one is found that contains a wired range intersecting with the given 1134 range). 1135 - The given address space must at least be read-locked and must be unlocked 1136 when \c Unlock() is called on \a locker. 1137 - None of the areas' caches are allowed to be locked. 1138 */ 1139 template<typename LockerType> 1140 static inline bool 1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1142 size_t size, LockerType* locker) 1143 { 1144 for (VMAddressSpace::AreaRangeIterator it 1145 = addressSpace->GetAreaRangeIterator(base, size); 1146 VMArea* area = it.Next();) { 1147 1148 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1149 1150 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1151 return true; 1152 } 1153 1154 return false; 1155 } 1156 1157 1158 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1159 It must be called in a situation where the kernel address space may be 1160 locked. 1161 */ 1162 status_t 1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1164 { 1165 AddressSpaceReadLocker locker; 1166 VMArea* area; 1167 status_t status = locker.SetFromArea(id, area); 1168 if (status != B_OK) 1169 return status; 1170 1171 if (area->page_protections == NULL) { 1172 status = allocate_area_page_protections(area); 1173 if (status != B_OK) 1174 return status; 1175 } 1176 1177 *cookie = (void*)area; 1178 return B_OK; 1179 } 1180 1181 1182 /*! This is a debug helper function that can only be used with very specific 1183 use cases. 1184 Sets protection for the given address range to the protection specified. 1185 If \a protection is 0 then the involved pages will be marked non-present 1186 in the translation map to cause a fault on access. The pages aren't 1187 actually unmapped however so that they can be marked present again with 1188 additional calls to this function. For this to work the area must be 1189 fully locked in memory so that the pages aren't otherwise touched. 1190 This function does not lock the kernel address space and needs to be 1191 supplied with a \a cookie retrieved from a successful call to 1192 vm_prepare_kernel_area_debug_protection(). 1193 */ 1194 status_t 1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1196 uint32 protection) 1197 { 1198 // check address range 1199 addr_t address = (addr_t)_address; 1200 size = PAGE_ALIGN(size); 1201 1202 if ((address % B_PAGE_SIZE) != 0 1203 || (addr_t)address + size < (addr_t)address 1204 || !IS_KERNEL_ADDRESS(address) 1205 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1206 return B_BAD_VALUE; 1207 } 1208 1209 // Translate the kernel protection to user protection as we only store that. 1210 if ((protection & B_KERNEL_READ_AREA) != 0) 1211 protection |= B_READ_AREA; 1212 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1213 protection |= B_WRITE_AREA; 1214 1215 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1216 VMTranslationMap* map = addressSpace->TranslationMap(); 1217 VMArea* area = (VMArea*)cookie; 1218 1219 addr_t offset = address - area->Base(); 1220 if (area->Size() - offset < size) { 1221 panic("protect range not fully within supplied area"); 1222 return B_BAD_VALUE; 1223 } 1224 1225 if (area->page_protections == NULL) { 1226 panic("area has no page protections"); 1227 return B_BAD_VALUE; 1228 } 1229 1230 // Invalidate the mapping entries so any access to them will fault or 1231 // restore the mapping entries unchanged so that lookup will success again. 1232 map->Lock(); 1233 map->DebugMarkRangePresent(address, address + size, protection != 0); 1234 map->Unlock(); 1235 1236 // And set the proper page protections so that the fault case will actually 1237 // fail and not simply try to map a new page. 1238 for (addr_t pageAddress = address; pageAddress < address + size; 1239 pageAddress += B_PAGE_SIZE) { 1240 set_area_page_protection(area, pageAddress, protection); 1241 } 1242 1243 return B_OK; 1244 } 1245 1246 1247 status_t 1248 vm_block_address_range(const char* name, void* address, addr_t size) 1249 { 1250 if (!arch_vm_supports_protection(0)) 1251 return B_NOT_SUPPORTED; 1252 1253 AddressSpaceWriteLocker locker; 1254 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1255 if (status != B_OK) 1256 return status; 1257 1258 VMAddressSpace* addressSpace = locker.AddressSpace(); 1259 1260 // create an anonymous cache 1261 VMCache* cache; 1262 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1263 VM_PRIORITY_SYSTEM); 1264 if (status != B_OK) 1265 return status; 1266 1267 cache->temporary = 1; 1268 cache->virtual_end = size; 1269 cache->Lock(); 1270 1271 VMArea* area; 1272 virtual_address_restrictions addressRestrictions = {}; 1273 addressRestrictions.address = address; 1274 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1275 status = map_backing_store(addressSpace, cache, 0, name, size, 1276 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1277 true, &area, NULL); 1278 if (status != B_OK) { 1279 cache->ReleaseRefAndUnlock(); 1280 return status; 1281 } 1282 1283 cache->Unlock(); 1284 area->cache_type = CACHE_TYPE_RAM; 1285 return area->id; 1286 } 1287 1288 1289 status_t 1290 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1291 { 1292 AddressSpaceWriteLocker locker(team); 1293 if (!locker.IsLocked()) 1294 return B_BAD_TEAM_ID; 1295 1296 VMAddressSpace* addressSpace = locker.AddressSpace(); 1297 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1298 addressSpace == VMAddressSpace::Kernel() 1299 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1300 } 1301 1302 1303 status_t 1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1305 addr_t size, uint32 flags) 1306 { 1307 if (size == 0) 1308 return B_BAD_VALUE; 1309 1310 AddressSpaceWriteLocker locker(team); 1311 if (!locker.IsLocked()) 1312 return B_BAD_TEAM_ID; 1313 1314 virtual_address_restrictions addressRestrictions = {}; 1315 addressRestrictions.address = *_address; 1316 addressRestrictions.address_specification = addressSpec; 1317 VMAddressSpace* addressSpace = locker.AddressSpace(); 1318 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1319 addressSpace == VMAddressSpace::Kernel() 1320 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1321 _address); 1322 } 1323 1324 1325 area_id 1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1327 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1328 const virtual_address_restrictions* virtualAddressRestrictions, 1329 const physical_address_restrictions* physicalAddressRestrictions, 1330 bool kernel, void** _address) 1331 { 1332 VMArea* area; 1333 VMCache* cache; 1334 vm_page* page = NULL; 1335 bool isStack = (protection & B_STACK_AREA) != 0; 1336 page_num_t guardPages; 1337 bool canOvercommit = false; 1338 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1339 ? VM_PAGE_ALLOC_CLEAR : 0; 1340 1341 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1342 team, name, size)); 1343 1344 size = PAGE_ALIGN(size); 1345 guardSize = PAGE_ALIGN(guardSize); 1346 guardPages = guardSize / B_PAGE_SIZE; 1347 1348 if (size == 0 || size < guardSize) 1349 return B_BAD_VALUE; 1350 if (!arch_vm_supports_protection(protection)) 1351 return B_NOT_SUPPORTED; 1352 1353 if (team == B_CURRENT_TEAM) 1354 team = VMAddressSpace::CurrentID(); 1355 if (team < 0) 1356 return B_BAD_TEAM_ID; 1357 1358 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1359 canOvercommit = true; 1360 1361 #ifdef DEBUG_KERNEL_STACKS 1362 if ((protection & B_KERNEL_STACK_AREA) != 0) 1363 isStack = true; 1364 #endif 1365 1366 // check parameters 1367 switch (virtualAddressRestrictions->address_specification) { 1368 case B_ANY_ADDRESS: 1369 case B_EXACT_ADDRESS: 1370 case B_BASE_ADDRESS: 1371 case B_ANY_KERNEL_ADDRESS: 1372 case B_ANY_KERNEL_BLOCK_ADDRESS: 1373 case B_RANDOMIZED_ANY_ADDRESS: 1374 case B_RANDOMIZED_BASE_ADDRESS: 1375 break; 1376 1377 default: 1378 return B_BAD_VALUE; 1379 } 1380 1381 // If low or high physical address restrictions are given, we force 1382 // B_CONTIGUOUS wiring, since only then we'll use 1383 // vm_page_allocate_page_run() which deals with those restrictions. 1384 if (physicalAddressRestrictions->low_address != 0 1385 || physicalAddressRestrictions->high_address != 0) { 1386 wiring = B_CONTIGUOUS; 1387 } 1388 1389 physical_address_restrictions stackPhysicalRestrictions; 1390 bool doReserveMemory = false; 1391 switch (wiring) { 1392 case B_NO_LOCK: 1393 break; 1394 case B_FULL_LOCK: 1395 case B_LAZY_LOCK: 1396 case B_CONTIGUOUS: 1397 doReserveMemory = true; 1398 break; 1399 case B_ALREADY_WIRED: 1400 break; 1401 case B_LOMEM: 1402 stackPhysicalRestrictions = *physicalAddressRestrictions; 1403 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1404 physicalAddressRestrictions = &stackPhysicalRestrictions; 1405 wiring = B_CONTIGUOUS; 1406 doReserveMemory = true; 1407 break; 1408 case B_32_BIT_FULL_LOCK: 1409 if (B_HAIKU_PHYSICAL_BITS <= 32 1410 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1411 wiring = B_FULL_LOCK; 1412 doReserveMemory = true; 1413 break; 1414 } 1415 // TODO: We don't really support this mode efficiently. Just fall 1416 // through for now ... 1417 case B_32_BIT_CONTIGUOUS: 1418 #if B_HAIKU_PHYSICAL_BITS > 32 1419 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1420 stackPhysicalRestrictions = *physicalAddressRestrictions; 1421 stackPhysicalRestrictions.high_address 1422 = (phys_addr_t)1 << 32; 1423 physicalAddressRestrictions = &stackPhysicalRestrictions; 1424 } 1425 #endif 1426 wiring = B_CONTIGUOUS; 1427 doReserveMemory = true; 1428 break; 1429 default: 1430 return B_BAD_VALUE; 1431 } 1432 1433 // Optimization: For a single-page contiguous allocation without low/high 1434 // memory restriction B_FULL_LOCK wiring suffices. 1435 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1436 && physicalAddressRestrictions->low_address == 0 1437 && physicalAddressRestrictions->high_address == 0) { 1438 wiring = B_FULL_LOCK; 1439 } 1440 1441 // For full lock or contiguous areas we're also going to map the pages and 1442 // thus need to reserve pages for the mapping backend upfront. 1443 addr_t reservedMapPages = 0; 1444 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1445 AddressSpaceWriteLocker locker; 1446 status_t status = locker.SetTo(team); 1447 if (status != B_OK) 1448 return status; 1449 1450 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1451 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1452 } 1453 1454 int priority; 1455 if (team != VMAddressSpace::KernelID()) 1456 priority = VM_PRIORITY_USER; 1457 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1458 priority = VM_PRIORITY_VIP; 1459 else 1460 priority = VM_PRIORITY_SYSTEM; 1461 1462 // Reserve memory before acquiring the address space lock. This reduces the 1463 // chances of failure, since while holding the write lock to the address 1464 // space (if it is the kernel address space that is), the low memory handler 1465 // won't be able to free anything for us. 1466 addr_t reservedMemory = 0; 1467 if (doReserveMemory) { 1468 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1469 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1470 return B_NO_MEMORY; 1471 reservedMemory = size; 1472 // TODO: We don't reserve the memory for the pages for the page 1473 // directories/tables. We actually need to do since we currently don't 1474 // reclaim them (and probably can't reclaim all of them anyway). Thus 1475 // there are actually less physical pages than there should be, which 1476 // can get the VM into trouble in low memory situations. 1477 } 1478 1479 AddressSpaceWriteLocker locker; 1480 VMAddressSpace* addressSpace; 1481 status_t status; 1482 1483 // For full lock areas reserve the pages before locking the address 1484 // space. E.g. block caches can't release their memory while we hold the 1485 // address space lock. 1486 page_num_t reservedPages = reservedMapPages; 1487 if (wiring == B_FULL_LOCK) 1488 reservedPages += size / B_PAGE_SIZE; 1489 1490 vm_page_reservation reservation; 1491 if (reservedPages > 0) { 1492 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1493 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1494 priority)) { 1495 reservedPages = 0; 1496 status = B_WOULD_BLOCK; 1497 goto err0; 1498 } 1499 } else 1500 vm_page_reserve_pages(&reservation, reservedPages, priority); 1501 } 1502 1503 if (wiring == B_CONTIGUOUS) { 1504 // we try to allocate the page run here upfront as this may easily 1505 // fail for obvious reasons 1506 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1507 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1508 if (page == NULL) { 1509 status = B_NO_MEMORY; 1510 goto err0; 1511 } 1512 } 1513 1514 // Lock the address space and, if B_EXACT_ADDRESS and 1515 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1516 // is not wired. 1517 do { 1518 status = locker.SetTo(team); 1519 if (status != B_OK) 1520 goto err1; 1521 1522 addressSpace = locker.AddressSpace(); 1523 } while (virtualAddressRestrictions->address_specification 1524 == B_EXACT_ADDRESS 1525 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1526 && wait_if_address_range_is_wired(addressSpace, 1527 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1528 1529 // create an anonymous cache 1530 // if it's a stack, make sure that two pages are available at least 1531 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1532 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1533 wiring == B_NO_LOCK, priority); 1534 if (status != B_OK) 1535 goto err1; 1536 1537 cache->temporary = 1; 1538 cache->virtual_end = size; 1539 cache->committed_size = reservedMemory; 1540 // TODO: This should be done via a method. 1541 reservedMemory = 0; 1542 1543 cache->Lock(); 1544 1545 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1546 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1547 virtualAddressRestrictions, kernel, &area, _address); 1548 1549 if (status != B_OK) { 1550 cache->ReleaseRefAndUnlock(); 1551 goto err1; 1552 } 1553 1554 locker.DegradeToReadLock(); 1555 1556 switch (wiring) { 1557 case B_NO_LOCK: 1558 case B_LAZY_LOCK: 1559 // do nothing - the pages are mapped in as needed 1560 break; 1561 1562 case B_FULL_LOCK: 1563 { 1564 // Allocate and map all pages for this area 1565 1566 off_t offset = 0; 1567 for (addr_t address = area->Base(); 1568 address < area->Base() + (area->Size() - 1); 1569 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1570 #ifdef DEBUG_KERNEL_STACKS 1571 # ifdef STACK_GROWS_DOWNWARDS 1572 if (isStack && address < area->Base() 1573 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1574 # else 1575 if (isStack && address >= area->Base() + area->Size() 1576 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1577 # endif 1578 continue; 1579 #endif 1580 vm_page* page = vm_page_allocate_page(&reservation, 1581 PAGE_STATE_WIRED | pageAllocFlags); 1582 cache->InsertPage(page, offset); 1583 map_page(area, page, address, protection, &reservation); 1584 1585 DEBUG_PAGE_ACCESS_END(page); 1586 } 1587 1588 break; 1589 } 1590 1591 case B_ALREADY_WIRED: 1592 { 1593 // The pages should already be mapped. This is only really useful 1594 // during boot time. Find the appropriate vm_page objects and stick 1595 // them in the cache object. 1596 VMTranslationMap* map = addressSpace->TranslationMap(); 1597 off_t offset = 0; 1598 1599 if (!gKernelStartup) 1600 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1601 1602 map->Lock(); 1603 1604 for (addr_t virtualAddress = area->Base(); 1605 virtualAddress < area->Base() + (area->Size() - 1); 1606 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1607 phys_addr_t physicalAddress; 1608 uint32 flags; 1609 status = map->Query(virtualAddress, &physicalAddress, &flags); 1610 if (status < B_OK) { 1611 panic("looking up mapping failed for va 0x%lx\n", 1612 virtualAddress); 1613 } 1614 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1615 if (page == NULL) { 1616 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1617 "\n", physicalAddress); 1618 } 1619 1620 DEBUG_PAGE_ACCESS_START(page); 1621 1622 cache->InsertPage(page, offset); 1623 increment_page_wired_count(page); 1624 vm_page_set_state(page, PAGE_STATE_WIRED); 1625 page->busy = false; 1626 1627 DEBUG_PAGE_ACCESS_END(page); 1628 } 1629 1630 map->Unlock(); 1631 break; 1632 } 1633 1634 case B_CONTIGUOUS: 1635 { 1636 // We have already allocated our continuous pages run, so we can now 1637 // just map them in the address space 1638 VMTranslationMap* map = addressSpace->TranslationMap(); 1639 phys_addr_t physicalAddress 1640 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1641 addr_t virtualAddress = area->Base(); 1642 off_t offset = 0; 1643 1644 map->Lock(); 1645 1646 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1647 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1648 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1649 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1650 if (page == NULL) 1651 panic("couldn't lookup physical page just allocated\n"); 1652 1653 status = map->Map(virtualAddress, physicalAddress, protection, 1654 area->MemoryType(), &reservation); 1655 if (status < B_OK) 1656 panic("couldn't map physical page in page run\n"); 1657 1658 cache->InsertPage(page, offset); 1659 increment_page_wired_count(page); 1660 1661 DEBUG_PAGE_ACCESS_END(page); 1662 } 1663 1664 map->Unlock(); 1665 break; 1666 } 1667 1668 default: 1669 break; 1670 } 1671 1672 cache->Unlock(); 1673 1674 if (reservedPages > 0) 1675 vm_page_unreserve_pages(&reservation); 1676 1677 TRACE(("vm_create_anonymous_area: done\n")); 1678 1679 area->cache_type = CACHE_TYPE_RAM; 1680 return area->id; 1681 1682 err1: 1683 if (wiring == B_CONTIGUOUS) { 1684 // we had reserved the area space upfront... 1685 phys_addr_t pageNumber = page->physical_page_number; 1686 int32 i; 1687 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1688 page = vm_lookup_page(pageNumber); 1689 if (page == NULL) 1690 panic("couldn't lookup physical page just allocated\n"); 1691 1692 vm_page_set_state(page, PAGE_STATE_FREE); 1693 } 1694 } 1695 1696 err0: 1697 if (reservedPages > 0) 1698 vm_page_unreserve_pages(&reservation); 1699 if (reservedMemory > 0) 1700 vm_unreserve_memory(reservedMemory); 1701 1702 return status; 1703 } 1704 1705 1706 area_id 1707 vm_map_physical_memory(team_id team, const char* name, void** _address, 1708 uint32 addressSpec, addr_t size, uint32 protection, 1709 phys_addr_t physicalAddress, bool alreadyWired) 1710 { 1711 VMArea* area; 1712 VMCache* cache; 1713 addr_t mapOffset; 1714 1715 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1716 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1717 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1718 addressSpec, size, protection, physicalAddress)); 1719 1720 if (!arch_vm_supports_protection(protection)) 1721 return B_NOT_SUPPORTED; 1722 1723 AddressSpaceWriteLocker locker(team); 1724 if (!locker.IsLocked()) 1725 return B_BAD_TEAM_ID; 1726 1727 // if the physical address is somewhat inside a page, 1728 // move the actual area down to align on a page boundary 1729 mapOffset = physicalAddress % B_PAGE_SIZE; 1730 size += mapOffset; 1731 physicalAddress -= mapOffset; 1732 1733 size = PAGE_ALIGN(size); 1734 1735 // create a device cache 1736 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1737 if (status != B_OK) 1738 return status; 1739 1740 cache->virtual_end = size; 1741 1742 cache->Lock(); 1743 1744 virtual_address_restrictions addressRestrictions = {}; 1745 addressRestrictions.address = *_address; 1746 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1747 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1748 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1749 true, &area, _address); 1750 1751 if (status < B_OK) 1752 cache->ReleaseRefLocked(); 1753 1754 cache->Unlock(); 1755 1756 if (status == B_OK) { 1757 // set requested memory type -- use uncached, if not given 1758 uint32 memoryType = addressSpec & B_MTR_MASK; 1759 if (memoryType == 0) 1760 memoryType = B_MTR_UC; 1761 1762 area->SetMemoryType(memoryType); 1763 1764 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1765 if (status != B_OK) 1766 delete_area(locker.AddressSpace(), area, false); 1767 } 1768 1769 if (status != B_OK) 1770 return status; 1771 1772 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1773 1774 if (alreadyWired) { 1775 // The area is already mapped, but possibly not with the right 1776 // memory type. 1777 map->Lock(); 1778 map->ProtectArea(area, area->protection); 1779 map->Unlock(); 1780 } else { 1781 // Map the area completely. 1782 1783 // reserve pages needed for the mapping 1784 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1785 area->Base() + (size - 1)); 1786 vm_page_reservation reservation; 1787 vm_page_reserve_pages(&reservation, reservePages, 1788 team == VMAddressSpace::KernelID() 1789 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1790 1791 map->Lock(); 1792 1793 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1794 map->Map(area->Base() + offset, physicalAddress + offset, 1795 protection, area->MemoryType(), &reservation); 1796 } 1797 1798 map->Unlock(); 1799 1800 vm_page_unreserve_pages(&reservation); 1801 } 1802 1803 // modify the pointer returned to be offset back into the new area 1804 // the same way the physical address in was offset 1805 *_address = (void*)((addr_t)*_address + mapOffset); 1806 1807 area->cache_type = CACHE_TYPE_DEVICE; 1808 return area->id; 1809 } 1810 1811 1812 /*! Don't use! 1813 TODO: This function was introduced to map physical page vecs to 1814 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1815 use a device cache and does not track vm_page::wired_count! 1816 */ 1817 area_id 1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1819 uint32 addressSpec, addr_t* _size, uint32 protection, 1820 struct generic_io_vec* vecs, uint32 vecCount) 1821 { 1822 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1823 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1824 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1825 addressSpec, _size, protection, vecs, vecCount)); 1826 1827 if (!arch_vm_supports_protection(protection) 1828 || (addressSpec & B_MTR_MASK) != 0) { 1829 return B_NOT_SUPPORTED; 1830 } 1831 1832 AddressSpaceWriteLocker locker(team); 1833 if (!locker.IsLocked()) 1834 return B_BAD_TEAM_ID; 1835 1836 if (vecCount == 0) 1837 return B_BAD_VALUE; 1838 1839 addr_t size = 0; 1840 for (uint32 i = 0; i < vecCount; i++) { 1841 if (vecs[i].base % B_PAGE_SIZE != 0 1842 || vecs[i].length % B_PAGE_SIZE != 0) { 1843 return B_BAD_VALUE; 1844 } 1845 1846 size += vecs[i].length; 1847 } 1848 1849 // create a device cache 1850 VMCache* cache; 1851 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1852 if (result != B_OK) 1853 return result; 1854 1855 cache->virtual_end = size; 1856 1857 cache->Lock(); 1858 1859 VMArea* area; 1860 virtual_address_restrictions addressRestrictions = {}; 1861 addressRestrictions.address = *_address; 1862 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1863 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1864 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1865 &addressRestrictions, true, &area, _address); 1866 1867 if (result != B_OK) 1868 cache->ReleaseRefLocked(); 1869 1870 cache->Unlock(); 1871 1872 if (result != B_OK) 1873 return result; 1874 1875 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1876 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1877 area->Base() + (size - 1)); 1878 1879 vm_page_reservation reservation; 1880 vm_page_reserve_pages(&reservation, reservePages, 1881 team == VMAddressSpace::KernelID() 1882 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1883 map->Lock(); 1884 1885 uint32 vecIndex = 0; 1886 size_t vecOffset = 0; 1887 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1888 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1889 vecOffset = 0; 1890 vecIndex++; 1891 } 1892 1893 if (vecIndex >= vecCount) 1894 break; 1895 1896 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1897 protection, area->MemoryType(), &reservation); 1898 1899 vecOffset += B_PAGE_SIZE; 1900 } 1901 1902 map->Unlock(); 1903 vm_page_unreserve_pages(&reservation); 1904 1905 if (_size != NULL) 1906 *_size = size; 1907 1908 area->cache_type = CACHE_TYPE_DEVICE; 1909 return area->id; 1910 } 1911 1912 1913 area_id 1914 vm_create_null_area(team_id team, const char* name, void** address, 1915 uint32 addressSpec, addr_t size, uint32 flags) 1916 { 1917 size = PAGE_ALIGN(size); 1918 1919 // Lock the address space and, if B_EXACT_ADDRESS and 1920 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1921 // is not wired. 1922 AddressSpaceWriteLocker locker; 1923 do { 1924 if (locker.SetTo(team) != B_OK) 1925 return B_BAD_TEAM_ID; 1926 } while (addressSpec == B_EXACT_ADDRESS 1927 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1928 && wait_if_address_range_is_wired(locker.AddressSpace(), 1929 (addr_t)*address, size, &locker)); 1930 1931 // create a null cache 1932 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1933 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1934 VMCache* cache; 1935 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1936 if (status != B_OK) 1937 return status; 1938 1939 cache->temporary = 1; 1940 cache->virtual_end = size; 1941 1942 cache->Lock(); 1943 1944 VMArea* area; 1945 virtual_address_restrictions addressRestrictions = {}; 1946 addressRestrictions.address = *address; 1947 addressRestrictions.address_specification = addressSpec; 1948 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1949 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1950 REGION_NO_PRIVATE_MAP, flags, 1951 &addressRestrictions, true, &area, address); 1952 1953 if (status < B_OK) { 1954 cache->ReleaseRefAndUnlock(); 1955 return status; 1956 } 1957 1958 cache->Unlock(); 1959 1960 area->cache_type = CACHE_TYPE_NULL; 1961 return area->id; 1962 } 1963 1964 1965 /*! Creates the vnode cache for the specified \a vnode. 1966 The vnode has to be marked busy when calling this function. 1967 */ 1968 status_t 1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1970 { 1971 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1972 } 1973 1974 1975 /*! \a cache must be locked. The area's address space must be read-locked. 1976 */ 1977 static void 1978 pre_map_area_pages(VMArea* area, VMCache* cache, 1979 vm_page_reservation* reservation) 1980 { 1981 addr_t baseAddress = area->Base(); 1982 addr_t cacheOffset = area->cache_offset; 1983 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1984 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1985 1986 for (VMCachePagesTree::Iterator it 1987 = cache->pages.GetIterator(firstPage, true, true); 1988 vm_page* page = it.Next();) { 1989 if (page->cache_offset >= endPage) 1990 break; 1991 1992 // skip busy and inactive pages 1993 if (page->busy || page->usage_count == 0) 1994 continue; 1995 1996 DEBUG_PAGE_ACCESS_START(page); 1997 map_page(area, page, 1998 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1999 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2000 DEBUG_PAGE_ACCESS_END(page); 2001 } 2002 } 2003 2004 2005 /*! Will map the file specified by \a fd to an area in memory. 2006 The file will be mirrored beginning at the specified \a offset. The 2007 \a offset and \a size arguments have to be page aligned. 2008 */ 2009 static area_id 2010 _vm_map_file(team_id team, const char* name, void** _address, 2011 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2012 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2013 { 2014 // TODO: for binary files, we want to make sure that they get the 2015 // copy of a file at a given time, ie. later changes should not 2016 // make it into the mapped copy -- this will need quite some changes 2017 // to be done in a nice way 2018 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2019 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2020 2021 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2022 size = PAGE_ALIGN(size); 2023 2024 if (mapping == REGION_NO_PRIVATE_MAP) 2025 protection |= B_SHARED_AREA; 2026 if (addressSpec != B_EXACT_ADDRESS) 2027 unmapAddressRange = false; 2028 2029 if (fd < 0) { 2030 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2031 virtual_address_restrictions virtualRestrictions = {}; 2032 virtualRestrictions.address = *_address; 2033 virtualRestrictions.address_specification = addressSpec; 2034 physical_address_restrictions physicalRestrictions = {}; 2035 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2036 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2037 _address); 2038 } 2039 2040 // get the open flags of the FD 2041 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2042 if (descriptor == NULL) 2043 return EBADF; 2044 int32 openMode = descriptor->open_mode; 2045 put_fd(descriptor); 2046 2047 // The FD must open for reading at any rate. For shared mapping with write 2048 // access, additionally the FD must be open for writing. 2049 if ((openMode & O_ACCMODE) == O_WRONLY 2050 || (mapping == REGION_NO_PRIVATE_MAP 2051 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2052 && (openMode & O_ACCMODE) == O_RDONLY)) { 2053 return EACCES; 2054 } 2055 2056 uint32 protectionMax = 0; 2057 if (mapping != REGION_PRIVATE_MAP) { 2058 if ((openMode & O_ACCMODE) == O_RDWR) 2059 protectionMax = protection | B_USER_PROTECTION; 2060 else 2061 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA); 2062 } 2063 2064 // get the vnode for the object, this also grabs a ref to it 2065 struct vnode* vnode = NULL; 2066 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2067 if (status < B_OK) 2068 return status; 2069 VnodePutter vnodePutter(vnode); 2070 2071 // If we're going to pre-map pages, we need to reserve the pages needed by 2072 // the mapping backend upfront. 2073 page_num_t reservedPreMapPages = 0; 2074 vm_page_reservation reservation; 2075 if ((protection & B_READ_AREA) != 0) { 2076 AddressSpaceWriteLocker locker; 2077 status = locker.SetTo(team); 2078 if (status != B_OK) 2079 return status; 2080 2081 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2082 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2083 2084 locker.Unlock(); 2085 2086 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2087 team == VMAddressSpace::KernelID() 2088 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2089 } 2090 2091 struct PageUnreserver { 2092 PageUnreserver(vm_page_reservation* reservation) 2093 : 2094 fReservation(reservation) 2095 { 2096 } 2097 2098 ~PageUnreserver() 2099 { 2100 if (fReservation != NULL) 2101 vm_page_unreserve_pages(fReservation); 2102 } 2103 2104 vm_page_reservation* fReservation; 2105 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2106 2107 // Lock the address space and, if the specified address range shall be 2108 // unmapped, ensure it is not wired. 2109 AddressSpaceWriteLocker locker; 2110 do { 2111 if (locker.SetTo(team) != B_OK) 2112 return B_BAD_TEAM_ID; 2113 } while (unmapAddressRange 2114 && wait_if_address_range_is_wired(locker.AddressSpace(), 2115 (addr_t)*_address, size, &locker)); 2116 2117 // TODO: this only works for file systems that use the file cache 2118 VMCache* cache; 2119 status = vfs_get_vnode_cache(vnode, &cache, false); 2120 if (status < B_OK) 2121 return status; 2122 2123 cache->Lock(); 2124 2125 VMArea* area; 2126 virtual_address_restrictions addressRestrictions = {}; 2127 addressRestrictions.address = *_address; 2128 addressRestrictions.address_specification = addressSpec; 2129 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2130 0, protection, protectionMax, mapping, 2131 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2132 &addressRestrictions, kernel, &area, _address); 2133 2134 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2135 // map_backing_store() cannot know we no longer need the ref 2136 cache->ReleaseRefLocked(); 2137 } 2138 2139 if (status == B_OK && (protection & B_READ_AREA) != 0) 2140 pre_map_area_pages(area, cache, &reservation); 2141 2142 cache->Unlock(); 2143 2144 if (status == B_OK) { 2145 // TODO: this probably deserves a smarter solution, ie. don't always 2146 // prefetch stuff, and also, probably don't trigger it at this place. 2147 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2148 // prefetches at max 10 MB starting from "offset" 2149 } 2150 2151 if (status != B_OK) 2152 return status; 2153 2154 area->cache_type = CACHE_TYPE_VNODE; 2155 return area->id; 2156 } 2157 2158 2159 area_id 2160 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2161 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2162 int fd, off_t offset) 2163 { 2164 if (!arch_vm_supports_protection(protection)) 2165 return B_NOT_SUPPORTED; 2166 2167 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2168 mapping, unmapAddressRange, fd, offset, true); 2169 } 2170 2171 2172 VMCache* 2173 vm_area_get_locked_cache(VMArea* area) 2174 { 2175 rw_lock_read_lock(&sAreaCacheLock); 2176 2177 while (true) { 2178 VMCache* cache = area->cache; 2179 2180 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2181 // cache has been deleted 2182 rw_lock_read_lock(&sAreaCacheLock); 2183 continue; 2184 } 2185 2186 rw_lock_read_lock(&sAreaCacheLock); 2187 2188 if (cache == area->cache) { 2189 cache->AcquireRefLocked(); 2190 rw_lock_read_unlock(&sAreaCacheLock); 2191 return cache; 2192 } 2193 2194 // the cache changed in the meantime 2195 cache->Unlock(); 2196 } 2197 } 2198 2199 2200 void 2201 vm_area_put_locked_cache(VMCache* cache) 2202 { 2203 cache->ReleaseRefAndUnlock(); 2204 } 2205 2206 2207 area_id 2208 vm_clone_area(team_id team, const char* name, void** address, 2209 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2210 bool kernel) 2211 { 2212 VMArea* newArea = NULL; 2213 VMArea* sourceArea; 2214 2215 // Check whether the source area exists and is cloneable. If so, mark it 2216 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2217 { 2218 AddressSpaceWriteLocker locker; 2219 status_t status = locker.SetFromArea(sourceID, sourceArea); 2220 if (status != B_OK) 2221 return status; 2222 2223 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2224 return B_NOT_ALLOWED; 2225 2226 sourceArea->protection |= B_SHARED_AREA; 2227 protection |= B_SHARED_AREA; 2228 } 2229 2230 // Now lock both address spaces and actually do the cloning. 2231 2232 MultiAddressSpaceLocker locker; 2233 VMAddressSpace* sourceAddressSpace; 2234 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2235 if (status != B_OK) 2236 return status; 2237 2238 VMAddressSpace* targetAddressSpace; 2239 status = locker.AddTeam(team, true, &targetAddressSpace); 2240 if (status != B_OK) 2241 return status; 2242 2243 status = locker.Lock(); 2244 if (status != B_OK) 2245 return status; 2246 2247 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2248 if (sourceArea == NULL) 2249 return B_BAD_VALUE; 2250 2251 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2252 return B_NOT_ALLOWED; 2253 2254 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2255 2256 if (!kernel && sourceAddressSpace != targetAddressSpace 2257 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2258 #if KDEBUG 2259 Team* team = thread_get_current_thread()->team; 2260 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2261 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2262 #endif 2263 status = B_NOT_ALLOWED; 2264 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2265 status = B_NOT_ALLOWED; 2266 } else { 2267 virtual_address_restrictions addressRestrictions = {}; 2268 addressRestrictions.address = *address; 2269 addressRestrictions.address_specification = addressSpec; 2270 status = map_backing_store(targetAddressSpace, cache, 2271 sourceArea->cache_offset, name, sourceArea->Size(), 2272 sourceArea->wiring, protection, sourceArea->protection_max, 2273 mapping, 0, &addressRestrictions, 2274 kernel, &newArea, address); 2275 } 2276 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2277 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2278 // to create a new cache, and has therefore already acquired a reference 2279 // to the source cache - but otherwise it has no idea that we need 2280 // one. 2281 cache->AcquireRefLocked(); 2282 } 2283 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2284 // we need to map in everything at this point 2285 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2286 // we don't have actual pages to map but a physical area 2287 VMTranslationMap* map 2288 = sourceArea->address_space->TranslationMap(); 2289 map->Lock(); 2290 2291 phys_addr_t physicalAddress; 2292 uint32 oldProtection; 2293 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2294 2295 map->Unlock(); 2296 2297 map = targetAddressSpace->TranslationMap(); 2298 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2299 newArea->Base() + (newArea->Size() - 1)); 2300 2301 vm_page_reservation reservation; 2302 vm_page_reserve_pages(&reservation, reservePages, 2303 targetAddressSpace == VMAddressSpace::Kernel() 2304 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2305 map->Lock(); 2306 2307 for (addr_t offset = 0; offset < newArea->Size(); 2308 offset += B_PAGE_SIZE) { 2309 map->Map(newArea->Base() + offset, physicalAddress + offset, 2310 protection, newArea->MemoryType(), &reservation); 2311 } 2312 2313 map->Unlock(); 2314 vm_page_unreserve_pages(&reservation); 2315 } else { 2316 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2317 size_t reservePages = map->MaxPagesNeededToMap( 2318 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2319 vm_page_reservation reservation; 2320 vm_page_reserve_pages(&reservation, reservePages, 2321 targetAddressSpace == VMAddressSpace::Kernel() 2322 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2323 2324 // map in all pages from source 2325 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2326 vm_page* page = it.Next();) { 2327 if (!page->busy) { 2328 DEBUG_PAGE_ACCESS_START(page); 2329 map_page(newArea, page, 2330 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2331 - newArea->cache_offset), 2332 protection, &reservation); 2333 DEBUG_PAGE_ACCESS_END(page); 2334 } 2335 } 2336 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2337 // ensuring that! 2338 2339 vm_page_unreserve_pages(&reservation); 2340 } 2341 } 2342 if (status == B_OK) 2343 newArea->cache_type = sourceArea->cache_type; 2344 2345 vm_area_put_locked_cache(cache); 2346 2347 if (status < B_OK) 2348 return status; 2349 2350 return newArea->id; 2351 } 2352 2353 2354 /*! Deletes the specified area of the given address space. 2355 2356 The address space must be write-locked. 2357 The caller must ensure that the area does not have any wired ranges. 2358 2359 \param addressSpace The address space containing the area. 2360 \param area The area to be deleted. 2361 \param deletingAddressSpace \c true, if the address space is in the process 2362 of being deleted. 2363 */ 2364 static void 2365 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2366 bool deletingAddressSpace) 2367 { 2368 ASSERT(!area->IsWired()); 2369 2370 VMAreas::Remove(area); 2371 2372 // At this point the area is removed from the global hash table, but 2373 // still exists in the area list. 2374 2375 // Unmap the virtual address space the area occupied. 2376 { 2377 // We need to lock the complete cache chain. 2378 VMCache* topCache = vm_area_get_locked_cache(area); 2379 VMCacheChainLocker cacheChainLocker(topCache); 2380 cacheChainLocker.LockAllSourceCaches(); 2381 2382 // If the area's top cache is a temporary cache and the area is the only 2383 // one referencing it (besides us currently holding a second reference), 2384 // the unmapping code doesn't need to care about preserving the accessed 2385 // and dirty flags of the top cache page mappings. 2386 bool ignoreTopCachePageFlags 2387 = topCache->temporary && topCache->RefCount() == 2; 2388 2389 area->address_space->TranslationMap()->UnmapArea(area, 2390 deletingAddressSpace, ignoreTopCachePageFlags); 2391 } 2392 2393 if (!area->cache->temporary) 2394 area->cache->WriteModified(); 2395 2396 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2397 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2398 2399 arch_vm_unset_memory_type(area); 2400 addressSpace->RemoveArea(area, allocationFlags); 2401 addressSpace->Put(); 2402 2403 area->cache->RemoveArea(area); 2404 area->cache->ReleaseRef(); 2405 2406 addressSpace->DeleteArea(area, allocationFlags); 2407 } 2408 2409 2410 status_t 2411 vm_delete_area(team_id team, area_id id, bool kernel) 2412 { 2413 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2414 team, id)); 2415 2416 // lock the address space and make sure the area isn't wired 2417 AddressSpaceWriteLocker locker; 2418 VMArea* area; 2419 AreaCacheLocker cacheLocker; 2420 2421 do { 2422 status_t status = locker.SetFromArea(team, id, area); 2423 if (status != B_OK) 2424 return status; 2425 2426 cacheLocker.SetTo(area); 2427 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2428 2429 cacheLocker.Unlock(); 2430 2431 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2432 return B_NOT_ALLOWED; 2433 2434 delete_area(locker.AddressSpace(), area, false); 2435 return B_OK; 2436 } 2437 2438 2439 /*! Creates a new cache on top of given cache, moves all areas from 2440 the old cache to the new one, and changes the protection of all affected 2441 areas' pages to read-only. If requested, wired pages are moved up to the 2442 new cache and copies are added to the old cache in their place. 2443 Preconditions: 2444 - The given cache must be locked. 2445 - All of the cache's areas' address spaces must be read locked. 2446 - Either the cache must not have any wired ranges or a page reservation for 2447 all wired pages must be provided, so they can be copied. 2448 2449 \param lowerCache The cache on top of which a new cache shall be created. 2450 \param wiredPagesReservation If \c NULL there must not be any wired pages 2451 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2452 has wired page. The wired pages are copied in this case. 2453 */ 2454 static status_t 2455 vm_copy_on_write_area(VMCache* lowerCache, 2456 vm_page_reservation* wiredPagesReservation) 2457 { 2458 VMCache* upperCache; 2459 2460 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2461 2462 // We need to separate the cache from its areas. The cache goes one level 2463 // deeper and we create a new cache inbetween. 2464 2465 // create an anonymous cache 2466 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2467 lowerCache->GuardSize() / B_PAGE_SIZE, 2468 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2469 VM_PRIORITY_USER); 2470 if (status != B_OK) 2471 return status; 2472 2473 upperCache->Lock(); 2474 2475 upperCache->temporary = 1; 2476 upperCache->virtual_base = lowerCache->virtual_base; 2477 upperCache->virtual_end = lowerCache->virtual_end; 2478 2479 // transfer the lower cache areas to the upper cache 2480 rw_lock_write_lock(&sAreaCacheLock); 2481 upperCache->TransferAreas(lowerCache); 2482 rw_lock_write_unlock(&sAreaCacheLock); 2483 2484 lowerCache->AddConsumer(upperCache); 2485 2486 // We now need to remap all pages from all of the cache's areas read-only, 2487 // so that a copy will be created on next write access. If there are wired 2488 // pages, we keep their protection, move them to the upper cache and create 2489 // copies for the lower cache. 2490 if (wiredPagesReservation != NULL) { 2491 // We need to handle wired pages -- iterate through the cache's pages. 2492 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2493 vm_page* page = it.Next();) { 2494 if (page->WiredCount() > 0) { 2495 // allocate a new page and copy the wired one 2496 vm_page* copiedPage = vm_page_allocate_page( 2497 wiredPagesReservation, PAGE_STATE_ACTIVE); 2498 2499 vm_memcpy_physical_page( 2500 copiedPage->physical_page_number * B_PAGE_SIZE, 2501 page->physical_page_number * B_PAGE_SIZE); 2502 2503 // move the wired page to the upper cache (note: removing is OK 2504 // with the SplayTree iterator) and insert the copy 2505 upperCache->MovePage(page); 2506 lowerCache->InsertPage(copiedPage, 2507 page->cache_offset * B_PAGE_SIZE); 2508 2509 DEBUG_PAGE_ACCESS_END(copiedPage); 2510 } else { 2511 // Change the protection of this page in all areas. 2512 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2513 tempArea = tempArea->cache_next) { 2514 // The area must be readable in the same way it was 2515 // previously writable. 2516 addr_t address = virtual_page_address(tempArea, page); 2517 uint32 protection = 0; 2518 uint32 pageProtection = get_area_page_protection(tempArea, address); 2519 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2520 protection |= B_KERNEL_READ_AREA; 2521 if ((pageProtection & B_READ_AREA) != 0) 2522 protection |= B_READ_AREA; 2523 2524 VMTranslationMap* map 2525 = tempArea->address_space->TranslationMap(); 2526 map->Lock(); 2527 map->ProtectPage(tempArea, address, protection); 2528 map->Unlock(); 2529 } 2530 } 2531 } 2532 } else { 2533 ASSERT(lowerCache->WiredPagesCount() == 0); 2534 2535 // just change the protection of all areas 2536 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2537 tempArea = tempArea->cache_next) { 2538 if (tempArea->page_protections != NULL) { 2539 // Change the protection of all pages in this area. 2540 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2541 map->Lock(); 2542 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2543 vm_page* page = it.Next();) { 2544 // The area must be readable in the same way it was 2545 // previously writable. 2546 addr_t address = virtual_page_address(tempArea, page); 2547 uint32 protection = 0; 2548 uint32 pageProtection = get_area_page_protection(tempArea, address); 2549 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2550 protection |= B_KERNEL_READ_AREA; 2551 if ((pageProtection & B_READ_AREA) != 0) 2552 protection |= B_READ_AREA; 2553 2554 map->ProtectPage(tempArea, address, protection); 2555 } 2556 map->Unlock(); 2557 continue; 2558 } 2559 // The area must be readable in the same way it was previously 2560 // writable. 2561 uint32 protection = 0; 2562 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2563 protection |= B_KERNEL_READ_AREA; 2564 if ((tempArea->protection & B_READ_AREA) != 0) 2565 protection |= B_READ_AREA; 2566 2567 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2568 map->Lock(); 2569 map->ProtectArea(tempArea, protection); 2570 map->Unlock(); 2571 } 2572 } 2573 2574 vm_area_put_locked_cache(upperCache); 2575 2576 return B_OK; 2577 } 2578 2579 2580 area_id 2581 vm_copy_area(team_id team, const char* name, void** _address, 2582 uint32 addressSpec, area_id sourceID) 2583 { 2584 // Do the locking: target address space, all address spaces associated with 2585 // the source cache, and the cache itself. 2586 MultiAddressSpaceLocker locker; 2587 VMAddressSpace* targetAddressSpace; 2588 VMCache* cache; 2589 VMArea* source; 2590 AreaCacheLocker cacheLocker; 2591 status_t status; 2592 bool sharedArea; 2593 2594 page_num_t wiredPages = 0; 2595 vm_page_reservation wiredPagesReservation; 2596 2597 bool restart; 2598 do { 2599 restart = false; 2600 2601 locker.Unset(); 2602 status = locker.AddTeam(team, true, &targetAddressSpace); 2603 if (status == B_OK) { 2604 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2605 &cache); 2606 } 2607 if (status != B_OK) 2608 return status; 2609 2610 cacheLocker.SetTo(cache, true); // already locked 2611 2612 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2613 2614 page_num_t oldWiredPages = wiredPages; 2615 wiredPages = 0; 2616 2617 // If the source area isn't shared, count the number of wired pages in 2618 // the cache and reserve as many pages. 2619 if (!sharedArea) { 2620 wiredPages = cache->WiredPagesCount(); 2621 2622 if (wiredPages > oldWiredPages) { 2623 cacheLocker.Unlock(); 2624 locker.Unlock(); 2625 2626 if (oldWiredPages > 0) 2627 vm_page_unreserve_pages(&wiredPagesReservation); 2628 2629 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2630 VM_PRIORITY_USER); 2631 2632 restart = true; 2633 } 2634 } else if (oldWiredPages > 0) 2635 vm_page_unreserve_pages(&wiredPagesReservation); 2636 } while (restart); 2637 2638 // unreserve pages later 2639 struct PagesUnreserver { 2640 PagesUnreserver(vm_page_reservation* reservation) 2641 : 2642 fReservation(reservation) 2643 { 2644 } 2645 2646 ~PagesUnreserver() 2647 { 2648 if (fReservation != NULL) 2649 vm_page_unreserve_pages(fReservation); 2650 } 2651 2652 private: 2653 vm_page_reservation* fReservation; 2654 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2655 2656 bool writableCopy 2657 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2658 uint8* targetPageProtections = NULL; 2659 2660 if (source->page_protections != NULL) { 2661 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2662 targetPageProtections = (uint8*)malloc_etc(bytes, 2663 (source->address_space == VMAddressSpace::Kernel() 2664 || targetAddressSpace == VMAddressSpace::Kernel()) 2665 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2666 if (targetPageProtections == NULL) 2667 return B_NO_MEMORY; 2668 2669 memcpy(targetPageProtections, source->page_protections, bytes); 2670 2671 if (!writableCopy) { 2672 for (size_t i = 0; i < bytes; i++) { 2673 if ((targetPageProtections[i] 2674 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2675 writableCopy = true; 2676 break; 2677 } 2678 } 2679 } 2680 } 2681 2682 if (addressSpec == B_CLONE_ADDRESS) { 2683 addressSpec = B_EXACT_ADDRESS; 2684 *_address = (void*)source->Base(); 2685 } 2686 2687 // First, create a cache on top of the source area, respectively use the 2688 // existing one, if this is a shared area. 2689 2690 VMArea* target; 2691 virtual_address_restrictions addressRestrictions = {}; 2692 addressRestrictions.address = *_address; 2693 addressRestrictions.address_specification = addressSpec; 2694 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2695 name, source->Size(), source->wiring, source->protection, 2696 source->protection_max, 2697 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2698 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2699 &addressRestrictions, true, &target, _address); 2700 if (status < B_OK) { 2701 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2702 return status; 2703 } 2704 2705 if (targetPageProtections != NULL) 2706 target->page_protections = targetPageProtections; 2707 2708 if (sharedArea) { 2709 // The new area uses the old area's cache, but map_backing_store() 2710 // hasn't acquired a ref. So we have to do that now. 2711 cache->AcquireRefLocked(); 2712 } 2713 2714 // If the source area is writable, we need to move it one layer up as well 2715 2716 if (!sharedArea) { 2717 if (writableCopy) { 2718 // TODO: do something more useful if this fails! 2719 if (vm_copy_on_write_area(cache, 2720 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2721 panic("vm_copy_on_write_area() failed!\n"); 2722 } 2723 } 2724 } 2725 2726 // we return the ID of the newly created area 2727 return target->id; 2728 } 2729 2730 2731 status_t 2732 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2733 bool kernel) 2734 { 2735 fix_protection(&newProtection); 2736 2737 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2738 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2739 2740 if (!arch_vm_supports_protection(newProtection)) 2741 return B_NOT_SUPPORTED; 2742 2743 bool becomesWritable 2744 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2745 2746 // lock address spaces and cache 2747 MultiAddressSpaceLocker locker; 2748 VMCache* cache; 2749 VMArea* area; 2750 status_t status; 2751 AreaCacheLocker cacheLocker; 2752 bool isWritable; 2753 2754 bool restart; 2755 do { 2756 restart = false; 2757 2758 locker.Unset(); 2759 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2760 if (status != B_OK) 2761 return status; 2762 2763 cacheLocker.SetTo(cache, true); // already locked 2764 2765 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2766 || (area->protection & B_KERNEL_AREA) != 0)) { 2767 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2768 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2769 " (%s)\n", team, newProtection, areaID, area->name); 2770 return B_NOT_ALLOWED; 2771 } 2772 if (!kernel && area->protection_max != 0 2773 && (newProtection & area->protection_max) 2774 != (newProtection & B_USER_PROTECTION)) { 2775 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2776 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2777 "area %" B_PRId32 " (%s)\n", team, newProtection, 2778 area->protection_max, areaID, area->name); 2779 return B_NOT_ALLOWED; 2780 } 2781 2782 if (area->protection == newProtection) 2783 return B_OK; 2784 2785 if (team != VMAddressSpace::KernelID() 2786 && area->address_space->ID() != team) { 2787 // unless you're the kernel, you are only allowed to set 2788 // the protection of your own areas 2789 return B_NOT_ALLOWED; 2790 } 2791 2792 isWritable 2793 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2794 2795 // Make sure the area (respectively, if we're going to call 2796 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2797 // wired ranges. 2798 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2799 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2800 otherArea = otherArea->cache_next) { 2801 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2802 restart = true; 2803 break; 2804 } 2805 } 2806 } else { 2807 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2808 restart = true; 2809 } 2810 } while (restart); 2811 2812 bool changePageProtection = true; 2813 bool changeTopCachePagesOnly = false; 2814 2815 if (isWritable && !becomesWritable) { 2816 // writable -> !writable 2817 2818 if (cache->source != NULL && cache->temporary) { 2819 if (cache->CountWritableAreas(area) == 0) { 2820 // Since this cache now lives from the pages in its source cache, 2821 // we can change the cache's commitment to take only those pages 2822 // into account that really are in this cache. 2823 2824 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2825 team == VMAddressSpace::KernelID() 2826 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2827 2828 // TODO: we may be able to join with our source cache, if 2829 // count == 0 2830 } 2831 } 2832 2833 // If only the writability changes, we can just remap the pages of the 2834 // top cache, since the pages of lower caches are mapped read-only 2835 // anyway. That's advantageous only, if the number of pages in the cache 2836 // is significantly smaller than the number of pages in the area, 2837 // though. 2838 if (newProtection 2839 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2840 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2841 changeTopCachePagesOnly = true; 2842 } 2843 } else if (!isWritable && becomesWritable) { 2844 // !writable -> writable 2845 2846 if (!cache->consumers.IsEmpty()) { 2847 // There are consumers -- we have to insert a new cache. Fortunately 2848 // vm_copy_on_write_area() does everything that's needed. 2849 changePageProtection = false; 2850 status = vm_copy_on_write_area(cache, NULL); 2851 } else { 2852 // No consumers, so we don't need to insert a new one. 2853 if (cache->source != NULL && cache->temporary) { 2854 // the cache's commitment must contain all possible pages 2855 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2856 team == VMAddressSpace::KernelID() 2857 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2858 } 2859 2860 if (status == B_OK && cache->source != NULL) { 2861 // There's a source cache, hence we can't just change all pages' 2862 // protection or we might allow writing into pages belonging to 2863 // a lower cache. 2864 changeTopCachePagesOnly = true; 2865 } 2866 } 2867 } else { 2868 // we don't have anything special to do in all other cases 2869 } 2870 2871 if (status == B_OK) { 2872 // remap existing pages in this cache 2873 if (changePageProtection) { 2874 VMTranslationMap* map = area->address_space->TranslationMap(); 2875 map->Lock(); 2876 2877 if (changeTopCachePagesOnly) { 2878 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2879 page_num_t lastPageOffset 2880 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2881 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2882 vm_page* page = it.Next();) { 2883 if (page->cache_offset >= firstPageOffset 2884 && page->cache_offset <= lastPageOffset) { 2885 addr_t address = virtual_page_address(area, page); 2886 map->ProtectPage(area, address, newProtection); 2887 } 2888 } 2889 } else 2890 map->ProtectArea(area, newProtection); 2891 2892 map->Unlock(); 2893 } 2894 2895 area->protection = newProtection; 2896 } 2897 2898 return status; 2899 } 2900 2901 2902 status_t 2903 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2904 { 2905 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2906 if (addressSpace == NULL) 2907 return B_BAD_TEAM_ID; 2908 2909 VMTranslationMap* map = addressSpace->TranslationMap(); 2910 2911 map->Lock(); 2912 uint32 dummyFlags; 2913 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2914 map->Unlock(); 2915 2916 addressSpace->Put(); 2917 return status; 2918 } 2919 2920 2921 /*! The page's cache must be locked. 2922 */ 2923 bool 2924 vm_test_map_modification(vm_page* page) 2925 { 2926 if (page->modified) 2927 return true; 2928 2929 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2930 vm_page_mapping* mapping; 2931 while ((mapping = iterator.Next()) != NULL) { 2932 VMArea* area = mapping->area; 2933 VMTranslationMap* map = area->address_space->TranslationMap(); 2934 2935 phys_addr_t physicalAddress; 2936 uint32 flags; 2937 map->Lock(); 2938 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2939 map->Unlock(); 2940 2941 if ((flags & PAGE_MODIFIED) != 0) 2942 return true; 2943 } 2944 2945 return false; 2946 } 2947 2948 2949 /*! The page's cache must be locked. 2950 */ 2951 void 2952 vm_clear_map_flags(vm_page* page, uint32 flags) 2953 { 2954 if ((flags & PAGE_ACCESSED) != 0) 2955 page->accessed = false; 2956 if ((flags & PAGE_MODIFIED) != 0) 2957 page->modified = false; 2958 2959 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2960 vm_page_mapping* mapping; 2961 while ((mapping = iterator.Next()) != NULL) { 2962 VMArea* area = mapping->area; 2963 VMTranslationMap* map = area->address_space->TranslationMap(); 2964 2965 map->Lock(); 2966 map->ClearFlags(virtual_page_address(area, page), flags); 2967 map->Unlock(); 2968 } 2969 } 2970 2971 2972 /*! Removes all mappings from a page. 2973 After you've called this function, the page is unmapped from memory and 2974 the page's \c accessed and \c modified flags have been updated according 2975 to the state of the mappings. 2976 The page's cache must be locked. 2977 */ 2978 void 2979 vm_remove_all_page_mappings(vm_page* page) 2980 { 2981 while (vm_page_mapping* mapping = page->mappings.Head()) { 2982 VMArea* area = mapping->area; 2983 VMTranslationMap* map = area->address_space->TranslationMap(); 2984 addr_t address = virtual_page_address(area, page); 2985 map->UnmapPage(area, address, false); 2986 } 2987 } 2988 2989 2990 int32 2991 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2992 { 2993 int32 count = 0; 2994 2995 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2996 vm_page_mapping* mapping; 2997 while ((mapping = iterator.Next()) != NULL) { 2998 VMArea* area = mapping->area; 2999 VMTranslationMap* map = area->address_space->TranslationMap(); 3000 3001 bool modified; 3002 if (map->ClearAccessedAndModified(area, 3003 virtual_page_address(area, page), false, modified)) { 3004 count++; 3005 } 3006 3007 page->modified |= modified; 3008 } 3009 3010 3011 if (page->accessed) { 3012 count++; 3013 page->accessed = false; 3014 } 3015 3016 return count; 3017 } 3018 3019 3020 /*! Removes all mappings of a page and/or clears the accessed bits of the 3021 mappings. 3022 The function iterates through the page mappings and removes them until 3023 encountering one that has been accessed. From then on it will continue to 3024 iterate, but only clear the accessed flag of the mapping. The page's 3025 \c modified bit will be updated accordingly, the \c accessed bit will be 3026 cleared. 3027 \return The number of mapping accessed bits encountered, including the 3028 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3029 of the page have been removed. 3030 */ 3031 int32 3032 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3033 { 3034 ASSERT(page->WiredCount() == 0); 3035 3036 if (page->accessed) 3037 return vm_clear_page_mapping_accessed_flags(page); 3038 3039 while (vm_page_mapping* mapping = page->mappings.Head()) { 3040 VMArea* area = mapping->area; 3041 VMTranslationMap* map = area->address_space->TranslationMap(); 3042 addr_t address = virtual_page_address(area, page); 3043 bool modified = false; 3044 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3045 page->accessed = true; 3046 page->modified |= modified; 3047 return vm_clear_page_mapping_accessed_flags(page); 3048 } 3049 page->modified |= modified; 3050 } 3051 3052 return 0; 3053 } 3054 3055 3056 static int 3057 display_mem(int argc, char** argv) 3058 { 3059 bool physical = false; 3060 addr_t copyAddress; 3061 int32 displayWidth; 3062 int32 itemSize; 3063 int32 num = -1; 3064 addr_t address; 3065 int i = 1, j; 3066 3067 if (argc > 1 && argv[1][0] == '-') { 3068 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3069 physical = true; 3070 i++; 3071 } else 3072 i = 99; 3073 } 3074 3075 if (argc < i + 1 || argc > i + 2) { 3076 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3077 "\tdl - 8 bytes\n" 3078 "\tdw - 4 bytes\n" 3079 "\tds - 2 bytes\n" 3080 "\tdb - 1 byte\n" 3081 "\tstring - a whole string\n" 3082 " -p or --physical only allows memory from a single page to be " 3083 "displayed.\n"); 3084 return 0; 3085 } 3086 3087 address = parse_expression(argv[i]); 3088 3089 if (argc > i + 1) 3090 num = parse_expression(argv[i + 1]); 3091 3092 // build the format string 3093 if (strcmp(argv[0], "db") == 0) { 3094 itemSize = 1; 3095 displayWidth = 16; 3096 } else if (strcmp(argv[0], "ds") == 0) { 3097 itemSize = 2; 3098 displayWidth = 8; 3099 } else if (strcmp(argv[0], "dw") == 0) { 3100 itemSize = 4; 3101 displayWidth = 4; 3102 } else if (strcmp(argv[0], "dl") == 0) { 3103 itemSize = 8; 3104 displayWidth = 2; 3105 } else if (strcmp(argv[0], "string") == 0) { 3106 itemSize = 1; 3107 displayWidth = -1; 3108 } else { 3109 kprintf("display_mem called in an invalid way!\n"); 3110 return 0; 3111 } 3112 3113 if (num <= 0) 3114 num = displayWidth; 3115 3116 void* physicalPageHandle = NULL; 3117 3118 if (physical) { 3119 int32 offset = address & (B_PAGE_SIZE - 1); 3120 if (num * itemSize + offset > B_PAGE_SIZE) { 3121 num = (B_PAGE_SIZE - offset) / itemSize; 3122 kprintf("NOTE: number of bytes has been cut to page size\n"); 3123 } 3124 3125 address = ROUNDDOWN(address, B_PAGE_SIZE); 3126 3127 if (vm_get_physical_page_debug(address, ©Address, 3128 &physicalPageHandle) != B_OK) { 3129 kprintf("getting the hardware page failed."); 3130 return 0; 3131 } 3132 3133 address += offset; 3134 copyAddress += offset; 3135 } else 3136 copyAddress = address; 3137 3138 if (!strcmp(argv[0], "string")) { 3139 kprintf("%p \"", (char*)copyAddress); 3140 3141 // string mode 3142 for (i = 0; true; i++) { 3143 char c; 3144 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3145 != B_OK 3146 || c == '\0') { 3147 break; 3148 } 3149 3150 if (c == '\n') 3151 kprintf("\\n"); 3152 else if (c == '\t') 3153 kprintf("\\t"); 3154 else { 3155 if (!isprint(c)) 3156 c = '.'; 3157 3158 kprintf("%c", c); 3159 } 3160 } 3161 3162 kprintf("\"\n"); 3163 } else { 3164 // number mode 3165 for (i = 0; i < num; i++) { 3166 uint64 value; 3167 3168 if ((i % displayWidth) == 0) { 3169 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3170 if (i != 0) 3171 kprintf("\n"); 3172 3173 kprintf("[0x%lx] ", address + i * itemSize); 3174 3175 for (j = 0; j < displayed; j++) { 3176 char c; 3177 if (debug_memcpy(B_CURRENT_TEAM, &c, 3178 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3179 displayed = j; 3180 break; 3181 } 3182 if (!isprint(c)) 3183 c = '.'; 3184 3185 kprintf("%c", c); 3186 } 3187 if (num > displayWidth) { 3188 // make sure the spacing in the last line is correct 3189 for (j = displayed; j < displayWidth * itemSize; j++) 3190 kprintf(" "); 3191 } 3192 kprintf(" "); 3193 } 3194 3195 if (debug_memcpy(B_CURRENT_TEAM, &value, 3196 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3197 kprintf("read fault"); 3198 break; 3199 } 3200 3201 switch (itemSize) { 3202 case 1: 3203 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3204 break; 3205 case 2: 3206 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3207 break; 3208 case 4: 3209 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3210 break; 3211 case 8: 3212 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3213 break; 3214 } 3215 } 3216 3217 kprintf("\n"); 3218 } 3219 3220 if (physical) { 3221 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3222 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3223 } 3224 return 0; 3225 } 3226 3227 3228 static void 3229 dump_cache_tree_recursively(VMCache* cache, int level, 3230 VMCache* highlightCache) 3231 { 3232 // print this cache 3233 for (int i = 0; i < level; i++) 3234 kprintf(" "); 3235 if (cache == highlightCache) 3236 kprintf("%p <--\n", cache); 3237 else 3238 kprintf("%p\n", cache); 3239 3240 // recursively print its consumers 3241 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3242 VMCache* consumer = it.Next();) { 3243 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3244 } 3245 } 3246 3247 3248 static int 3249 dump_cache_tree(int argc, char** argv) 3250 { 3251 if (argc != 2 || !strcmp(argv[1], "--help")) { 3252 kprintf("usage: %s <address>\n", argv[0]); 3253 return 0; 3254 } 3255 3256 addr_t address = parse_expression(argv[1]); 3257 if (address == 0) 3258 return 0; 3259 3260 VMCache* cache = (VMCache*)address; 3261 VMCache* root = cache; 3262 3263 // find the root cache (the transitive source) 3264 while (root->source != NULL) 3265 root = root->source; 3266 3267 dump_cache_tree_recursively(root, 0, cache); 3268 3269 return 0; 3270 } 3271 3272 3273 const char* 3274 vm_cache_type_to_string(int32 type) 3275 { 3276 switch (type) { 3277 case CACHE_TYPE_RAM: 3278 return "RAM"; 3279 case CACHE_TYPE_DEVICE: 3280 return "device"; 3281 case CACHE_TYPE_VNODE: 3282 return "vnode"; 3283 case CACHE_TYPE_NULL: 3284 return "null"; 3285 3286 default: 3287 return "unknown"; 3288 } 3289 } 3290 3291 3292 #if DEBUG_CACHE_LIST 3293 3294 static void 3295 update_cache_info_recursively(VMCache* cache, cache_info& info) 3296 { 3297 info.page_count += cache->page_count; 3298 if (cache->type == CACHE_TYPE_RAM) 3299 info.committed += cache->committed_size; 3300 3301 // recurse 3302 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3303 VMCache* consumer = it.Next();) { 3304 update_cache_info_recursively(consumer, info); 3305 } 3306 } 3307 3308 3309 static int 3310 cache_info_compare_page_count(const void* _a, const void* _b) 3311 { 3312 const cache_info* a = (const cache_info*)_a; 3313 const cache_info* b = (const cache_info*)_b; 3314 if (a->page_count == b->page_count) 3315 return 0; 3316 return a->page_count < b->page_count ? 1 : -1; 3317 } 3318 3319 3320 static int 3321 cache_info_compare_committed(const void* _a, const void* _b) 3322 { 3323 const cache_info* a = (const cache_info*)_a; 3324 const cache_info* b = (const cache_info*)_b; 3325 if (a->committed == b->committed) 3326 return 0; 3327 return a->committed < b->committed ? 1 : -1; 3328 } 3329 3330 3331 static void 3332 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3333 { 3334 for (int i = 0; i < level; i++) 3335 kprintf(" "); 3336 3337 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3338 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3339 cache->virtual_base, cache->virtual_end, cache->page_count); 3340 3341 if (level == 0) 3342 kprintf("/%lu", info.page_count); 3343 3344 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3345 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3346 3347 if (level == 0) 3348 kprintf("/%lu", info.committed); 3349 } 3350 3351 // areas 3352 if (cache->areas != NULL) { 3353 VMArea* area = cache->areas; 3354 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3355 area->name, area->address_space->ID()); 3356 3357 while (area->cache_next != NULL) { 3358 area = area->cache_next; 3359 kprintf(", %" B_PRId32, area->id); 3360 } 3361 } 3362 3363 kputs("\n"); 3364 3365 // recurse 3366 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3367 VMCache* consumer = it.Next();) { 3368 dump_caches_recursively(consumer, info, level + 1); 3369 } 3370 } 3371 3372 3373 static int 3374 dump_caches(int argc, char** argv) 3375 { 3376 if (sCacheInfoTable == NULL) { 3377 kprintf("No cache info table!\n"); 3378 return 0; 3379 } 3380 3381 bool sortByPageCount = true; 3382 3383 for (int32 i = 1; i < argc; i++) { 3384 if (strcmp(argv[i], "-c") == 0) { 3385 sortByPageCount = false; 3386 } else { 3387 print_debugger_command_usage(argv[0]); 3388 return 0; 3389 } 3390 } 3391 3392 uint32 totalCount = 0; 3393 uint32 rootCount = 0; 3394 off_t totalCommitted = 0; 3395 page_num_t totalPages = 0; 3396 3397 VMCache* cache = gDebugCacheList; 3398 while (cache) { 3399 totalCount++; 3400 if (cache->source == NULL) { 3401 cache_info stackInfo; 3402 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3403 ? sCacheInfoTable[rootCount] : stackInfo; 3404 rootCount++; 3405 info.cache = cache; 3406 info.page_count = 0; 3407 info.committed = 0; 3408 update_cache_info_recursively(cache, info); 3409 totalCommitted += info.committed; 3410 totalPages += info.page_count; 3411 } 3412 3413 cache = cache->debug_next; 3414 } 3415 3416 if (rootCount <= (uint32)kCacheInfoTableCount) { 3417 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3418 sortByPageCount 3419 ? &cache_info_compare_page_count 3420 : &cache_info_compare_committed); 3421 } 3422 3423 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3424 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3425 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3426 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3427 "page count" : "committed size"); 3428 3429 if (rootCount <= (uint32)kCacheInfoTableCount) { 3430 for (uint32 i = 0; i < rootCount; i++) { 3431 cache_info& info = sCacheInfoTable[i]; 3432 dump_caches_recursively(info.cache, info, 0); 3433 } 3434 } else 3435 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3436 3437 return 0; 3438 } 3439 3440 #endif // DEBUG_CACHE_LIST 3441 3442 3443 static int 3444 dump_cache(int argc, char** argv) 3445 { 3446 VMCache* cache; 3447 bool showPages = false; 3448 int i = 1; 3449 3450 if (argc < 2 || !strcmp(argv[1], "--help")) { 3451 kprintf("usage: %s [-ps] <address>\n" 3452 " if -p is specified, all pages are shown, if -s is used\n" 3453 " only the cache info is shown respectively.\n", argv[0]); 3454 return 0; 3455 } 3456 while (argv[i][0] == '-') { 3457 char* arg = argv[i] + 1; 3458 while (arg[0]) { 3459 if (arg[0] == 'p') 3460 showPages = true; 3461 arg++; 3462 } 3463 i++; 3464 } 3465 if (argv[i] == NULL) { 3466 kprintf("%s: invalid argument, pass address\n", argv[0]); 3467 return 0; 3468 } 3469 3470 addr_t address = parse_expression(argv[i]); 3471 if (address == 0) 3472 return 0; 3473 3474 cache = (VMCache*)address; 3475 3476 cache->Dump(showPages); 3477 3478 set_debug_variable("_sourceCache", (addr_t)cache->source); 3479 3480 return 0; 3481 } 3482 3483 3484 static void 3485 dump_area_struct(VMArea* area, bool mappings) 3486 { 3487 kprintf("AREA: %p\n", area); 3488 kprintf("name:\t\t'%s'\n", area->name); 3489 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3490 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3491 kprintf("base:\t\t0x%lx\n", area->Base()); 3492 kprintf("size:\t\t0x%lx\n", area->Size()); 3493 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3494 kprintf("page_protection:%p\n", area->page_protections); 3495 kprintf("wiring:\t\t0x%x\n", area->wiring); 3496 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3497 kprintf("cache:\t\t%p\n", area->cache); 3498 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3499 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3500 kprintf("cache_next:\t%p\n", area->cache_next); 3501 kprintf("cache_prev:\t%p\n", area->cache_prev); 3502 3503 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3504 if (mappings) { 3505 kprintf("page mappings:\n"); 3506 while (iterator.HasNext()) { 3507 vm_page_mapping* mapping = iterator.Next(); 3508 kprintf(" %p", mapping->page); 3509 } 3510 kprintf("\n"); 3511 } else { 3512 uint32 count = 0; 3513 while (iterator.Next() != NULL) { 3514 count++; 3515 } 3516 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3517 } 3518 } 3519 3520 3521 static int 3522 dump_area(int argc, char** argv) 3523 { 3524 bool mappings = false; 3525 bool found = false; 3526 int32 index = 1; 3527 VMArea* area; 3528 addr_t num; 3529 3530 if (argc < 2 || !strcmp(argv[1], "--help")) { 3531 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3532 "All areas matching either id/address/name are listed. You can\n" 3533 "force to check only a specific item by prefixing the specifier\n" 3534 "with the id/contains/address/name keywords.\n" 3535 "-m shows the area's mappings as well.\n"); 3536 return 0; 3537 } 3538 3539 if (!strcmp(argv[1], "-m")) { 3540 mappings = true; 3541 index++; 3542 } 3543 3544 int32 mode = 0xf; 3545 if (!strcmp(argv[index], "id")) 3546 mode = 1; 3547 else if (!strcmp(argv[index], "contains")) 3548 mode = 2; 3549 else if (!strcmp(argv[index], "name")) 3550 mode = 4; 3551 else if (!strcmp(argv[index], "address")) 3552 mode = 0; 3553 if (mode != 0xf) 3554 index++; 3555 3556 if (index >= argc) { 3557 kprintf("No area specifier given.\n"); 3558 return 0; 3559 } 3560 3561 num = parse_expression(argv[index]); 3562 3563 if (mode == 0) { 3564 dump_area_struct((struct VMArea*)num, mappings); 3565 } else { 3566 // walk through the area list, looking for the arguments as a name 3567 3568 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3569 while ((area = it.Next()) != NULL) { 3570 if (((mode & 4) != 0 3571 && !strcmp(argv[index], area->name)) 3572 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3573 || (((mode & 2) != 0 && area->Base() <= num 3574 && area->Base() + area->Size() > num))))) { 3575 dump_area_struct(area, mappings); 3576 found = true; 3577 } 3578 } 3579 3580 if (!found) 3581 kprintf("could not find area %s (%ld)\n", argv[index], num); 3582 } 3583 3584 return 0; 3585 } 3586 3587 3588 static int 3589 dump_area_list(int argc, char** argv) 3590 { 3591 VMArea* area; 3592 const char* name = NULL; 3593 int32 id = 0; 3594 3595 if (argc > 1) { 3596 id = parse_expression(argv[1]); 3597 if (id == 0) 3598 name = argv[1]; 3599 } 3600 3601 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3602 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3603 B_PRINTF_POINTER_WIDTH, "size"); 3604 3605 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3606 while ((area = it.Next()) != NULL) { 3607 if ((id != 0 && area->address_space->ID() != id) 3608 || (name != NULL && strstr(area->name, name) == NULL)) 3609 continue; 3610 3611 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3612 area->id, (void*)area->Base(), (void*)area->Size(), 3613 area->protection, area->wiring, area->name); 3614 } 3615 return 0; 3616 } 3617 3618 3619 static int 3620 dump_available_memory(int argc, char** argv) 3621 { 3622 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3623 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3624 return 0; 3625 } 3626 3627 3628 static int 3629 dump_mapping_info(int argc, char** argv) 3630 { 3631 bool reverseLookup = false; 3632 bool pageLookup = false; 3633 3634 int argi = 1; 3635 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3636 const char* arg = argv[argi]; 3637 if (strcmp(arg, "-r") == 0) { 3638 reverseLookup = true; 3639 } else if (strcmp(arg, "-p") == 0) { 3640 reverseLookup = true; 3641 pageLookup = true; 3642 } else { 3643 print_debugger_command_usage(argv[0]); 3644 return 0; 3645 } 3646 } 3647 3648 // We need at least one argument, the address. Optionally a thread ID can be 3649 // specified. 3650 if (argi >= argc || argi + 2 < argc) { 3651 print_debugger_command_usage(argv[0]); 3652 return 0; 3653 } 3654 3655 uint64 addressValue; 3656 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3657 return 0; 3658 3659 Team* team = NULL; 3660 if (argi < argc) { 3661 uint64 threadID; 3662 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3663 return 0; 3664 3665 Thread* thread = Thread::GetDebug(threadID); 3666 if (thread == NULL) { 3667 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3668 return 0; 3669 } 3670 3671 team = thread->team; 3672 } 3673 3674 if (reverseLookup) { 3675 phys_addr_t physicalAddress; 3676 if (pageLookup) { 3677 vm_page* page = (vm_page*)(addr_t)addressValue; 3678 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3679 } else { 3680 physicalAddress = (phys_addr_t)addressValue; 3681 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3682 } 3683 3684 kprintf(" Team Virtual Address Area\n"); 3685 kprintf("--------------------------------------\n"); 3686 3687 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3688 Callback() 3689 : 3690 fAddressSpace(NULL) 3691 { 3692 } 3693 3694 void SetAddressSpace(VMAddressSpace* addressSpace) 3695 { 3696 fAddressSpace = addressSpace; 3697 } 3698 3699 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3700 { 3701 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3702 virtualAddress); 3703 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3704 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3705 else 3706 kprintf("\n"); 3707 return false; 3708 } 3709 3710 private: 3711 VMAddressSpace* fAddressSpace; 3712 } callback; 3713 3714 if (team != NULL) { 3715 // team specified -- get its address space 3716 VMAddressSpace* addressSpace = team->address_space; 3717 if (addressSpace == NULL) { 3718 kprintf("Failed to get address space!\n"); 3719 return 0; 3720 } 3721 3722 callback.SetAddressSpace(addressSpace); 3723 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3724 physicalAddress, callback); 3725 } else { 3726 // no team specified -- iterate through all address spaces 3727 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3728 addressSpace != NULL; 3729 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3730 callback.SetAddressSpace(addressSpace); 3731 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3732 physicalAddress, callback); 3733 } 3734 } 3735 } else { 3736 // get the address space 3737 addr_t virtualAddress = (addr_t)addressValue; 3738 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3739 VMAddressSpace* addressSpace; 3740 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3741 addressSpace = VMAddressSpace::Kernel(); 3742 } else if (team != NULL) { 3743 addressSpace = team->address_space; 3744 } else { 3745 Thread* thread = debug_get_debugged_thread(); 3746 if (thread == NULL || thread->team == NULL) { 3747 kprintf("Failed to get team!\n"); 3748 return 0; 3749 } 3750 3751 addressSpace = thread->team->address_space; 3752 } 3753 3754 if (addressSpace == NULL) { 3755 kprintf("Failed to get address space!\n"); 3756 return 0; 3757 } 3758 3759 // let the translation map implementation do the job 3760 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3761 } 3762 3763 return 0; 3764 } 3765 3766 3767 /*! Deletes all areas and reserved regions in the given address space. 3768 3769 The caller must ensure that none of the areas has any wired ranges. 3770 3771 \param addressSpace The address space. 3772 \param deletingAddressSpace \c true, if the address space is in the process 3773 of being deleted. 3774 */ 3775 void 3776 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3777 { 3778 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3779 addressSpace->ID())); 3780 3781 addressSpace->WriteLock(); 3782 3783 // remove all reserved areas in this address space 3784 addressSpace->UnreserveAllAddressRanges(0); 3785 3786 // delete all the areas in this address space 3787 while (VMArea* area = addressSpace->FirstArea()) { 3788 ASSERT(!area->IsWired()); 3789 delete_area(addressSpace, area, deletingAddressSpace); 3790 } 3791 3792 addressSpace->WriteUnlock(); 3793 } 3794 3795 3796 static area_id 3797 vm_area_for(addr_t address, bool kernel) 3798 { 3799 team_id team; 3800 if (IS_USER_ADDRESS(address)) { 3801 // we try the user team address space, if any 3802 team = VMAddressSpace::CurrentID(); 3803 if (team < 0) 3804 return team; 3805 } else 3806 team = VMAddressSpace::KernelID(); 3807 3808 AddressSpaceReadLocker locker(team); 3809 if (!locker.IsLocked()) 3810 return B_BAD_TEAM_ID; 3811 3812 VMArea* area = locker.AddressSpace()->LookupArea(address); 3813 if (area != NULL) { 3814 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0 3815 && (area->protection & B_KERNEL_AREA) != 0) 3816 return B_ERROR; 3817 3818 return area->id; 3819 } 3820 3821 return B_ERROR; 3822 } 3823 3824 3825 /*! Frees physical pages that were used during the boot process. 3826 \a end is inclusive. 3827 */ 3828 static void 3829 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3830 { 3831 // free all physical pages in the specified range 3832 3833 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3834 phys_addr_t physicalAddress; 3835 uint32 flags; 3836 3837 if (map->Query(current, &physicalAddress, &flags) == B_OK 3838 && (flags & PAGE_PRESENT) != 0) { 3839 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3840 if (page != NULL && page->State() != PAGE_STATE_FREE 3841 && page->State() != PAGE_STATE_CLEAR 3842 && page->State() != PAGE_STATE_UNUSED) { 3843 DEBUG_PAGE_ACCESS_START(page); 3844 vm_page_set_state(page, PAGE_STATE_FREE); 3845 } 3846 } 3847 } 3848 3849 // unmap the memory 3850 map->Unmap(start, end); 3851 } 3852 3853 3854 void 3855 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3856 { 3857 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3858 addr_t end = start + (size - 1); 3859 addr_t lastEnd = start; 3860 3861 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3862 (void*)start, (void*)end)); 3863 3864 // The areas are sorted in virtual address space order, so 3865 // we just have to find the holes between them that fall 3866 // into the area we should dispose 3867 3868 map->Lock(); 3869 3870 for (VMAddressSpace::AreaIterator it 3871 = VMAddressSpace::Kernel()->GetAreaIterator(); 3872 VMArea* area = it.Next();) { 3873 addr_t areaStart = area->Base(); 3874 addr_t areaEnd = areaStart + (area->Size() - 1); 3875 3876 if (areaEnd < start) 3877 continue; 3878 3879 if (areaStart > end) { 3880 // we are done, the area is already beyond of what we have to free 3881 break; 3882 } 3883 3884 if (areaStart > lastEnd) { 3885 // this is something we can free 3886 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3887 (void*)areaStart)); 3888 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3889 } 3890 3891 if (areaEnd >= end) { 3892 lastEnd = areaEnd; 3893 // no +1 to prevent potential overflow 3894 break; 3895 } 3896 3897 lastEnd = areaEnd + 1; 3898 } 3899 3900 if (lastEnd < end) { 3901 // we can also get rid of some space at the end of the area 3902 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3903 (void*)end)); 3904 unmap_and_free_physical_pages(map, lastEnd, end); 3905 } 3906 3907 map->Unlock(); 3908 } 3909 3910 3911 static void 3912 create_preloaded_image_areas(struct preloaded_image* _image) 3913 { 3914 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3915 char name[B_OS_NAME_LENGTH]; 3916 void* address; 3917 int32 length; 3918 3919 // use file name to create a good area name 3920 char* fileName = strrchr(image->name, '/'); 3921 if (fileName == NULL) 3922 fileName = image->name; 3923 else 3924 fileName++; 3925 3926 length = strlen(fileName); 3927 // make sure there is enough space for the suffix 3928 if (length > 25) 3929 length = 25; 3930 3931 memcpy(name, fileName, length); 3932 strcpy(name + length, "_text"); 3933 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3934 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3935 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3936 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3937 // this will later be remapped read-only/executable by the 3938 // ELF initialization code 3939 3940 strcpy(name + length, "_data"); 3941 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3942 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3943 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3944 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3945 } 3946 3947 3948 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3949 Any boot loader resources contained in that arguments must not be accessed 3950 anymore past this point. 3951 */ 3952 void 3953 vm_free_kernel_args(kernel_args* args) 3954 { 3955 uint32 i; 3956 3957 TRACE(("vm_free_kernel_args()\n")); 3958 3959 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3960 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3961 if (area >= B_OK) 3962 delete_area(area); 3963 } 3964 } 3965 3966 3967 static void 3968 allocate_kernel_args(kernel_args* args) 3969 { 3970 TRACE(("allocate_kernel_args()\n")); 3971 3972 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3973 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3974 3975 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3976 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3977 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3978 } 3979 } 3980 3981 3982 static void 3983 unreserve_boot_loader_ranges(kernel_args* args) 3984 { 3985 TRACE(("unreserve_boot_loader_ranges()\n")); 3986 3987 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3988 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3989 (void*)(addr_t)args->virtual_allocated_range[i].start, 3990 args->virtual_allocated_range[i].size); 3991 } 3992 } 3993 3994 3995 static void 3996 reserve_boot_loader_ranges(kernel_args* args) 3997 { 3998 TRACE(("reserve_boot_loader_ranges()\n")); 3999 4000 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4001 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 4002 4003 // If the address is no kernel address, we just skip it. The 4004 // architecture specific code has to deal with it. 4005 if (!IS_KERNEL_ADDRESS(address)) { 4006 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4007 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4008 continue; 4009 } 4010 4011 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4012 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4013 if (status < B_OK) 4014 panic("could not reserve boot loader ranges\n"); 4015 } 4016 } 4017 4018 4019 static addr_t 4020 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4021 { 4022 size = PAGE_ALIGN(size); 4023 4024 // find a slot in the virtual allocation addr range 4025 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4026 // check to see if the space between this one and the last is big enough 4027 addr_t rangeStart = args->virtual_allocated_range[i].start; 4028 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4029 + args->virtual_allocated_range[i - 1].size; 4030 4031 addr_t base = alignment > 0 4032 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4033 4034 if (base >= KERNEL_BASE && base < rangeStart 4035 && rangeStart - base >= size) { 4036 args->virtual_allocated_range[i - 1].size 4037 += base + size - previousRangeEnd; 4038 return base; 4039 } 4040 } 4041 4042 // we hadn't found one between allocation ranges. this is ok. 4043 // see if there's a gap after the last one 4044 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4045 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4046 + args->virtual_allocated_range[lastEntryIndex].size; 4047 addr_t base = alignment > 0 4048 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4049 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4050 args->virtual_allocated_range[lastEntryIndex].size 4051 += base + size - lastRangeEnd; 4052 return base; 4053 } 4054 4055 // see if there's a gap before the first one 4056 addr_t rangeStart = args->virtual_allocated_range[0].start; 4057 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4058 base = rangeStart - size; 4059 if (alignment > 0) 4060 base = ROUNDDOWN(base, alignment); 4061 4062 if (base >= KERNEL_BASE) { 4063 args->virtual_allocated_range[0].start = base; 4064 args->virtual_allocated_range[0].size += rangeStart - base; 4065 return base; 4066 } 4067 } 4068 4069 return 0; 4070 } 4071 4072 4073 static bool 4074 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4075 { 4076 // TODO: horrible brute-force method of determining if the page can be 4077 // allocated 4078 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4079 if (address >= args->physical_memory_range[i].start 4080 && address < args->physical_memory_range[i].start 4081 + args->physical_memory_range[i].size) 4082 return true; 4083 } 4084 return false; 4085 } 4086 4087 4088 page_num_t 4089 vm_allocate_early_physical_page(kernel_args* args) 4090 { 4091 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4092 phys_addr_t nextPage; 4093 4094 nextPage = args->physical_allocated_range[i].start 4095 + args->physical_allocated_range[i].size; 4096 // see if the page after the next allocated paddr run can be allocated 4097 if (i + 1 < args->num_physical_allocated_ranges 4098 && args->physical_allocated_range[i + 1].size != 0) { 4099 // see if the next page will collide with the next allocated range 4100 if (nextPage >= args->physical_allocated_range[i+1].start) 4101 continue; 4102 } 4103 // see if the next physical page fits in the memory block 4104 if (is_page_in_physical_memory_range(args, nextPage)) { 4105 // we got one! 4106 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4107 return nextPage / B_PAGE_SIZE; 4108 } 4109 } 4110 4111 // Expanding upwards didn't work, try going downwards. 4112 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4113 phys_addr_t nextPage; 4114 4115 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4116 // see if the page after the prev allocated paddr run can be allocated 4117 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4118 // see if the next page will collide with the next allocated range 4119 if (nextPage < args->physical_allocated_range[i-1].start 4120 + args->physical_allocated_range[i-1].size) 4121 continue; 4122 } 4123 // see if the next physical page fits in the memory block 4124 if (is_page_in_physical_memory_range(args, nextPage)) { 4125 // we got one! 4126 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4127 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4128 return nextPage / B_PAGE_SIZE; 4129 } 4130 } 4131 4132 return 0; 4133 // could not allocate a block 4134 } 4135 4136 4137 /*! This one uses the kernel_args' physical and virtual memory ranges to 4138 allocate some pages before the VM is completely up. 4139 */ 4140 addr_t 4141 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4142 uint32 attributes, addr_t alignment) 4143 { 4144 if (physicalSize > virtualSize) 4145 physicalSize = virtualSize; 4146 4147 // find the vaddr to allocate at 4148 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4149 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4150 if (virtualBase == 0) { 4151 panic("vm_allocate_early: could not allocate virtual address\n"); 4152 return 0; 4153 } 4154 4155 // map the pages 4156 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4157 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4158 if (physicalAddress == 0) 4159 panic("error allocating early page!\n"); 4160 4161 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4162 4163 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4164 physicalAddress * B_PAGE_SIZE, attributes, 4165 &vm_allocate_early_physical_page); 4166 } 4167 4168 return virtualBase; 4169 } 4170 4171 4172 /*! The main entrance point to initialize the VM. */ 4173 status_t 4174 vm_init(kernel_args* args) 4175 { 4176 struct preloaded_image* image; 4177 void* address; 4178 status_t err = 0; 4179 uint32 i; 4180 4181 TRACE(("vm_init: entry\n")); 4182 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4183 err = arch_vm_init(args); 4184 4185 // initialize some globals 4186 vm_page_init_num_pages(args); 4187 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4188 4189 slab_init(args); 4190 4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4192 off_t heapSize = INITIAL_HEAP_SIZE; 4193 // try to accomodate low memory systems 4194 while (heapSize > sAvailableMemory / 8) 4195 heapSize /= 2; 4196 if (heapSize < 1024 * 1024) 4197 panic("vm_init: go buy some RAM please."); 4198 4199 // map in the new heap and initialize it 4200 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4201 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4202 TRACE(("heap at 0x%lx\n", heapBase)); 4203 heap_init(heapBase, heapSize); 4204 #endif 4205 4206 // initialize the free page list and physical page mapper 4207 vm_page_init(args); 4208 4209 // initialize the cache allocators 4210 vm_cache_init(args); 4211 4212 { 4213 status_t error = VMAreas::Init(); 4214 if (error != B_OK) 4215 panic("vm_init: error initializing areas map\n"); 4216 } 4217 4218 VMAddressSpace::Init(); 4219 reserve_boot_loader_ranges(args); 4220 4221 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4222 heap_init_post_area(); 4223 #endif 4224 4225 // Do any further initialization that the architecture dependant layers may 4226 // need now 4227 arch_vm_translation_map_init_post_area(args); 4228 arch_vm_init_post_area(args); 4229 vm_page_init_post_area(args); 4230 slab_init_post_area(); 4231 4232 // allocate areas to represent stuff that already exists 4233 4234 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4235 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4236 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4237 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4238 #endif 4239 4240 allocate_kernel_args(args); 4241 4242 create_preloaded_image_areas(args->kernel_image); 4243 4244 // allocate areas for preloaded images 4245 for (image = args->preloaded_images; image != NULL; image = image->next) 4246 create_preloaded_image_areas(image); 4247 4248 // allocate kernel stacks 4249 for (i = 0; i < args->num_cpus; i++) { 4250 char name[64]; 4251 4252 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4253 address = (void*)args->cpu_kstack[i].start; 4254 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4255 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4256 } 4257 4258 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4259 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4260 4261 #if PARANOID_KERNEL_MALLOC 4262 vm_block_address_range("uninitialized heap memory", 4263 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4264 #endif 4265 #if PARANOID_KERNEL_FREE 4266 vm_block_address_range("freed heap memory", 4267 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4268 #endif 4269 4270 // create the object cache for the page mappings 4271 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4272 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4273 NULL, NULL); 4274 if (gPageMappingsObjectCache == NULL) 4275 panic("failed to create page mappings object cache"); 4276 4277 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4278 4279 #if DEBUG_CACHE_LIST 4280 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4281 virtual_address_restrictions virtualRestrictions = {}; 4282 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4283 physical_address_restrictions physicalRestrictions = {}; 4284 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4285 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4286 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4287 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4288 &physicalRestrictions, (void**)&sCacheInfoTable); 4289 } 4290 #endif // DEBUG_CACHE_LIST 4291 4292 // add some debugger commands 4293 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4294 add_debugger_command("area", &dump_area, 4295 "Dump info about a particular area"); 4296 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4297 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4298 #if DEBUG_CACHE_LIST 4299 if (sCacheInfoTable != NULL) { 4300 add_debugger_command_etc("caches", &dump_caches, 4301 "List all VMCache trees", 4302 "[ \"-c\" ]\n" 4303 "All cache trees are listed sorted in decreasing order by number " 4304 "of\n" 4305 "used pages or, if \"-c\" is specified, by size of committed " 4306 "memory.\n", 4307 0); 4308 } 4309 #endif 4310 add_debugger_command("avail", &dump_available_memory, 4311 "Dump available memory"); 4312 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4313 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4314 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4315 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4316 add_debugger_command("string", &display_mem, "dump strings"); 4317 4318 add_debugger_command_etc("mapping", &dump_mapping_info, 4319 "Print address mapping information", 4320 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4321 "Prints low-level page mapping information for a given address. If\n" 4322 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4323 "address that is looked up in the translation map of the current\n" 4324 "team, respectively the team specified by thread ID <thread ID>. If\n" 4325 "\"-r\" is specified, <address> is a physical address that is\n" 4326 "searched in the translation map of all teams, respectively the team\n" 4327 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4328 "<address> is the address of a vm_page structure. The behavior is\n" 4329 "equivalent to specifying \"-r\" with the physical address of that\n" 4330 "page.\n", 4331 0); 4332 4333 TRACE(("vm_init: exit\n")); 4334 4335 vm_cache_init_post_heap(); 4336 4337 return err; 4338 } 4339 4340 4341 status_t 4342 vm_init_post_sem(kernel_args* args) 4343 { 4344 // This frees all unused boot loader resources and makes its space available 4345 // again 4346 arch_vm_init_end(args); 4347 unreserve_boot_loader_ranges(args); 4348 4349 // fill in all of the semaphores that were not allocated before 4350 // since we're still single threaded and only the kernel address space 4351 // exists, it isn't that hard to find all of the ones we need to create 4352 4353 arch_vm_translation_map_init_post_sem(args); 4354 4355 slab_init_post_sem(); 4356 4357 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4358 heap_init_post_sem(); 4359 #endif 4360 4361 return B_OK; 4362 } 4363 4364 4365 status_t 4366 vm_init_post_thread(kernel_args* args) 4367 { 4368 vm_page_init_post_thread(args); 4369 slab_init_post_thread(); 4370 return heap_init_post_thread(); 4371 } 4372 4373 4374 status_t 4375 vm_init_post_modules(kernel_args* args) 4376 { 4377 return arch_vm_init_post_modules(args); 4378 } 4379 4380 4381 void 4382 permit_page_faults(void) 4383 { 4384 Thread* thread = thread_get_current_thread(); 4385 if (thread != NULL) 4386 atomic_add(&thread->page_faults_allowed, 1); 4387 } 4388 4389 4390 void 4391 forbid_page_faults(void) 4392 { 4393 Thread* thread = thread_get_current_thread(); 4394 if (thread != NULL) 4395 atomic_add(&thread->page_faults_allowed, -1); 4396 } 4397 4398 4399 status_t 4400 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4401 bool isUser, addr_t* newIP) 4402 { 4403 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4404 faultAddress)); 4405 4406 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4407 4408 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4409 VMAddressSpace* addressSpace = NULL; 4410 4411 status_t status = B_OK; 4412 *newIP = 0; 4413 atomic_add((int32*)&sPageFaults, 1); 4414 4415 if (IS_KERNEL_ADDRESS(pageAddress)) { 4416 addressSpace = VMAddressSpace::GetKernel(); 4417 } else if (IS_USER_ADDRESS(pageAddress)) { 4418 addressSpace = VMAddressSpace::GetCurrent(); 4419 if (addressSpace == NULL) { 4420 if (!isUser) { 4421 dprintf("vm_page_fault: kernel thread accessing invalid user " 4422 "memory!\n"); 4423 status = B_BAD_ADDRESS; 4424 TPF(PageFaultError(-1, 4425 VMPageFaultTracing 4426 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4427 } else { 4428 // XXX weird state. 4429 panic("vm_page_fault: non kernel thread accessing user memory " 4430 "that doesn't exist!\n"); 4431 status = B_BAD_ADDRESS; 4432 } 4433 } 4434 } else { 4435 // the hit was probably in the 64k DMZ between kernel and user space 4436 // this keeps a user space thread from passing a buffer that crosses 4437 // into kernel space 4438 status = B_BAD_ADDRESS; 4439 TPF(PageFaultError(-1, 4440 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4441 } 4442 4443 if (status == B_OK) { 4444 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4445 isUser, NULL); 4446 } 4447 4448 if (status < B_OK) { 4449 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4450 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4451 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4452 thread_get_current_thread_id()); 4453 if (!isUser) { 4454 Thread* thread = thread_get_current_thread(); 4455 if (thread != NULL && thread->fault_handler != 0) { 4456 // this will cause the arch dependant page fault handler to 4457 // modify the IP on the interrupt frame or whatever to return 4458 // to this address 4459 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4460 } else { 4461 // unhandled page fault in the kernel 4462 panic("vm_page_fault: unhandled page fault in kernel space at " 4463 "0x%lx, ip 0x%lx\n", address, faultAddress); 4464 } 4465 } else { 4466 Thread* thread = thread_get_current_thread(); 4467 4468 #ifdef TRACE_FAULTS 4469 VMArea* area = NULL; 4470 if (addressSpace != NULL) { 4471 addressSpace->ReadLock(); 4472 area = addressSpace->LookupArea(faultAddress); 4473 } 4474 4475 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4476 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4477 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4478 thread->team->Name(), thread->team->id, 4479 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4480 faultAddress, area ? area->name : "???", faultAddress - (area ? 4481 area->Base() : 0x0)); 4482 4483 if (addressSpace != NULL) 4484 addressSpace->ReadUnlock(); 4485 #endif 4486 4487 // If the thread has a signal handler for SIGSEGV, we simply 4488 // send it the signal. Otherwise we notify the user debugger 4489 // first. 4490 struct sigaction action; 4491 if ((sigaction(SIGSEGV, NULL, &action) == 0 4492 && action.sa_handler != SIG_DFL 4493 && action.sa_handler != SIG_IGN) 4494 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4495 SIGSEGV)) { 4496 Signal signal(SIGSEGV, 4497 status == B_PERMISSION_DENIED 4498 ? SEGV_ACCERR : SEGV_MAPERR, 4499 EFAULT, thread->team->id); 4500 signal.SetAddress((void*)address); 4501 send_signal_to_thread(thread, signal, 0); 4502 } 4503 } 4504 } 4505 4506 if (addressSpace != NULL) 4507 addressSpace->Put(); 4508 4509 return B_HANDLED_INTERRUPT; 4510 } 4511 4512 4513 struct PageFaultContext { 4514 AddressSpaceReadLocker addressSpaceLocker; 4515 VMCacheChainLocker cacheChainLocker; 4516 4517 VMTranslationMap* map; 4518 VMCache* topCache; 4519 off_t cacheOffset; 4520 vm_page_reservation reservation; 4521 bool isWrite; 4522 4523 // return values 4524 vm_page* page; 4525 bool restart; 4526 bool pageAllocated; 4527 4528 4529 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4530 : 4531 addressSpaceLocker(addressSpace, true), 4532 map(addressSpace->TranslationMap()), 4533 isWrite(isWrite) 4534 { 4535 } 4536 4537 ~PageFaultContext() 4538 { 4539 UnlockAll(); 4540 vm_page_unreserve_pages(&reservation); 4541 } 4542 4543 void Prepare(VMCache* topCache, off_t cacheOffset) 4544 { 4545 this->topCache = topCache; 4546 this->cacheOffset = cacheOffset; 4547 page = NULL; 4548 restart = false; 4549 pageAllocated = false; 4550 4551 cacheChainLocker.SetTo(topCache); 4552 } 4553 4554 void UnlockAll(VMCache* exceptCache = NULL) 4555 { 4556 topCache = NULL; 4557 addressSpaceLocker.Unlock(); 4558 cacheChainLocker.Unlock(exceptCache); 4559 } 4560 }; 4561 4562 4563 /*! Gets the page that should be mapped into the area. 4564 Returns an error code other than \c B_OK, if the page couldn't be found or 4565 paged in. The locking state of the address space and the caches is undefined 4566 in that case. 4567 Returns \c B_OK with \c context.restart set to \c true, if the functions 4568 had to unlock the address space and all caches and is supposed to be called 4569 again. 4570 Returns \c B_OK with \c context.restart set to \c false, if the page was 4571 found. It is returned in \c context.page. The address space will still be 4572 locked as well as all caches starting from the top cache to at least the 4573 cache the page lives in. 4574 */ 4575 static status_t 4576 fault_get_page(PageFaultContext& context) 4577 { 4578 VMCache* cache = context.topCache; 4579 VMCache* lastCache = NULL; 4580 vm_page* page = NULL; 4581 4582 while (cache != NULL) { 4583 // We already hold the lock of the cache at this point. 4584 4585 lastCache = cache; 4586 4587 page = cache->LookupPage(context.cacheOffset); 4588 if (page != NULL && page->busy) { 4589 // page must be busy -- wait for it to become unbusy 4590 context.UnlockAll(cache); 4591 cache->ReleaseRefLocked(); 4592 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4593 4594 // restart the whole process 4595 context.restart = true; 4596 return B_OK; 4597 } 4598 4599 if (page != NULL) 4600 break; 4601 4602 // The current cache does not contain the page we're looking for. 4603 4604 // see if the backing store has it 4605 if (cache->HasPage(context.cacheOffset)) { 4606 // insert a fresh page and mark it busy -- we're going to read it in 4607 page = vm_page_allocate_page(&context.reservation, 4608 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4609 cache->InsertPage(page, context.cacheOffset); 4610 4611 // We need to unlock all caches and the address space while reading 4612 // the page in. Keep a reference to the cache around. 4613 cache->AcquireRefLocked(); 4614 context.UnlockAll(); 4615 4616 // read the page in 4617 generic_io_vec vec; 4618 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4619 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4620 4621 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4622 B_PHYSICAL_IO_REQUEST, &bytesRead); 4623 4624 cache->Lock(); 4625 4626 if (status < B_OK) { 4627 // on error remove and free the page 4628 dprintf("reading page from cache %p returned: %s!\n", 4629 cache, strerror(status)); 4630 4631 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4632 cache->RemovePage(page); 4633 vm_page_set_state(page, PAGE_STATE_FREE); 4634 4635 cache->ReleaseRefAndUnlock(); 4636 return status; 4637 } 4638 4639 // mark the page unbusy again 4640 cache->MarkPageUnbusy(page); 4641 4642 DEBUG_PAGE_ACCESS_END(page); 4643 4644 // Since we needed to unlock everything temporarily, the area 4645 // situation might have changed. So we need to restart the whole 4646 // process. 4647 cache->ReleaseRefAndUnlock(); 4648 context.restart = true; 4649 return B_OK; 4650 } 4651 4652 cache = context.cacheChainLocker.LockSourceCache(); 4653 } 4654 4655 if (page == NULL) { 4656 // There was no adequate page, determine the cache for a clean one. 4657 // Read-only pages come in the deepest cache, only the top most cache 4658 // may have direct write access. 4659 cache = context.isWrite ? context.topCache : lastCache; 4660 4661 // allocate a clean page 4662 page = vm_page_allocate_page(&context.reservation, 4663 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4664 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4665 page->physical_page_number)); 4666 4667 // insert the new page into our cache 4668 cache->InsertPage(page, context.cacheOffset); 4669 context.pageAllocated = true; 4670 } else if (page->Cache() != context.topCache && context.isWrite) { 4671 // We have a page that has the data we want, but in the wrong cache 4672 // object so we need to copy it and stick it into the top cache. 4673 vm_page* sourcePage = page; 4674 4675 // TODO: If memory is low, it might be a good idea to steal the page 4676 // from our source cache -- if possible, that is. 4677 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4678 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4679 4680 // To not needlessly kill concurrency we unlock all caches but the top 4681 // one while copying the page. Lacking another mechanism to ensure that 4682 // the source page doesn't disappear, we mark it busy. 4683 sourcePage->busy = true; 4684 context.cacheChainLocker.UnlockKeepRefs(true); 4685 4686 // copy the page 4687 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4688 sourcePage->physical_page_number * B_PAGE_SIZE); 4689 4690 context.cacheChainLocker.RelockCaches(true); 4691 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4692 4693 // insert the new page into our cache 4694 context.topCache->InsertPage(page, context.cacheOffset); 4695 context.pageAllocated = true; 4696 } else 4697 DEBUG_PAGE_ACCESS_START(page); 4698 4699 context.page = page; 4700 return B_OK; 4701 } 4702 4703 4704 /*! Makes sure the address in the given address space is mapped. 4705 4706 \param addressSpace The address space. 4707 \param originalAddress The address. Doesn't need to be page aligned. 4708 \param isWrite If \c true the address shall be write-accessible. 4709 \param isUser If \c true the access is requested by a userland team. 4710 \param wirePage On success, if non \c NULL, the wired count of the page 4711 mapped at the given address is incremented and the page is returned 4712 via this parameter. 4713 \return \c B_OK on success, another error code otherwise. 4714 */ 4715 static status_t 4716 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4717 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4718 { 4719 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4720 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4721 originalAddress, isWrite, isUser)); 4722 4723 PageFaultContext context(addressSpace, isWrite); 4724 4725 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4726 status_t status = B_OK; 4727 4728 addressSpace->IncrementFaultCount(); 4729 4730 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4731 // the pages upfront makes sure we don't have any cache locked, so that the 4732 // page daemon/thief can do their job without problems. 4733 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4734 originalAddress); 4735 context.addressSpaceLocker.Unlock(); 4736 vm_page_reserve_pages(&context.reservation, reservePages, 4737 addressSpace == VMAddressSpace::Kernel() 4738 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4739 4740 while (true) { 4741 context.addressSpaceLocker.Lock(); 4742 4743 // get the area the fault was in 4744 VMArea* area = addressSpace->LookupArea(address); 4745 if (area == NULL) { 4746 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4747 "space\n", originalAddress); 4748 TPF(PageFaultError(-1, 4749 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4750 status = B_BAD_ADDRESS; 4751 break; 4752 } 4753 4754 // check permissions 4755 uint32 protection = get_area_page_protection(area, address); 4756 if (isUser && (protection & B_USER_PROTECTION) == 0 4757 && (area->protection & B_KERNEL_AREA) != 0) { 4758 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4759 area->id, (void*)originalAddress); 4760 TPF(PageFaultError(area->id, 4761 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4762 status = B_PERMISSION_DENIED; 4763 break; 4764 } 4765 if (isWrite && (protection 4766 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4767 dprintf("write access attempted on write-protected area 0x%" 4768 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4769 TPF(PageFaultError(area->id, 4770 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4771 status = B_PERMISSION_DENIED; 4772 break; 4773 } else if (isExecute && (protection 4774 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4775 dprintf("instruction fetch attempted on execute-protected area 0x%" 4776 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4777 TPF(PageFaultError(area->id, 4778 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4779 status = B_PERMISSION_DENIED; 4780 break; 4781 } else if (!isWrite && !isExecute && (protection 4782 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4783 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4784 " at %p\n", area->id, (void*)originalAddress); 4785 TPF(PageFaultError(area->id, 4786 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4787 status = B_PERMISSION_DENIED; 4788 break; 4789 } 4790 4791 // We have the area, it was a valid access, so let's try to resolve the 4792 // page fault now. 4793 // At first, the top most cache from the area is investigated. 4794 4795 context.Prepare(vm_area_get_locked_cache(area), 4796 address - area->Base() + area->cache_offset); 4797 4798 // See if this cache has a fault handler -- this will do all the work 4799 // for us. 4800 { 4801 // Note, since the page fault is resolved with interrupts enabled, 4802 // the fault handler could be called more than once for the same 4803 // reason -- the store must take this into account. 4804 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4805 if (status != B_BAD_HANDLER) 4806 break; 4807 } 4808 4809 // The top most cache has no fault handler, so let's see if the cache or 4810 // its sources already have the page we're searching for (we're going 4811 // from top to bottom). 4812 status = fault_get_page(context); 4813 if (status != B_OK) { 4814 TPF(PageFaultError(area->id, status)); 4815 break; 4816 } 4817 4818 if (context.restart) 4819 continue; 4820 4821 // All went fine, all there is left to do is to map the page into the 4822 // address space. 4823 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4824 context.page)); 4825 4826 // If the page doesn't reside in the area's cache, we need to make sure 4827 // it's mapped in read-only, so that we cannot overwrite someone else's 4828 // data (copy-on-write) 4829 uint32 newProtection = protection; 4830 if (context.page->Cache() != context.topCache && !isWrite) 4831 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4832 4833 bool unmapPage = false; 4834 bool mapPage = true; 4835 4836 // check whether there's already a page mapped at the address 4837 context.map->Lock(); 4838 4839 phys_addr_t physicalAddress; 4840 uint32 flags; 4841 vm_page* mappedPage = NULL; 4842 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4843 && (flags & PAGE_PRESENT) != 0 4844 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4845 != NULL) { 4846 // Yep there's already a page. If it's ours, we can simply adjust 4847 // its protection. Otherwise we have to unmap it. 4848 if (mappedPage == context.page) { 4849 context.map->ProtectPage(area, address, newProtection); 4850 // Note: We assume that ProtectPage() is atomic (i.e. 4851 // the page isn't temporarily unmapped), otherwise we'd have 4852 // to make sure it isn't wired. 4853 mapPage = false; 4854 } else 4855 unmapPage = true; 4856 } 4857 4858 context.map->Unlock(); 4859 4860 if (unmapPage) { 4861 // If the page is wired, we can't unmap it. Wait until it is unwired 4862 // again and restart. Note that the page cannot be wired for 4863 // writing, since it it isn't in the topmost cache. So we can safely 4864 // ignore ranges wired for writing (our own and other concurrent 4865 // wiring attempts in progress) and in fact have to do that to avoid 4866 // a deadlock. 4867 VMAreaUnwiredWaiter waiter; 4868 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4869 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4870 // unlock everything and wait 4871 if (context.pageAllocated) { 4872 // ... but since we allocated a page and inserted it into 4873 // the top cache, remove and free it first. Otherwise we'd 4874 // have a page from a lower cache mapped while an upper 4875 // cache has a page that would shadow it. 4876 context.topCache->RemovePage(context.page); 4877 vm_page_free_etc(context.topCache, context.page, 4878 &context.reservation); 4879 } else 4880 DEBUG_PAGE_ACCESS_END(context.page); 4881 4882 context.UnlockAll(); 4883 waiter.waitEntry.Wait(); 4884 continue; 4885 } 4886 4887 // Note: The mapped page is a page of a lower cache. We are 4888 // guaranteed to have that cached locked, our new page is a copy of 4889 // that page, and the page is not busy. The logic for that guarantee 4890 // is as follows: Since the page is mapped, it must live in the top 4891 // cache (ruled out above) or any of its lower caches, and there is 4892 // (was before the new page was inserted) no other page in any 4893 // cache between the top cache and the page's cache (otherwise that 4894 // would be mapped instead). That in turn means that our algorithm 4895 // must have found it and therefore it cannot be busy either. 4896 DEBUG_PAGE_ACCESS_START(mappedPage); 4897 unmap_page(area, address); 4898 DEBUG_PAGE_ACCESS_END(mappedPage); 4899 } 4900 4901 if (mapPage) { 4902 if (map_page(area, context.page, address, newProtection, 4903 &context.reservation) != B_OK) { 4904 // Mapping can only fail, when the page mapping object couldn't 4905 // be allocated. Save for the missing mapping everything is 4906 // fine, though. If this was a regular page fault, we'll simply 4907 // leave and probably fault again. To make sure we'll have more 4908 // luck then, we ensure that the minimum object reserve is 4909 // available. 4910 DEBUG_PAGE_ACCESS_END(context.page); 4911 4912 context.UnlockAll(); 4913 4914 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4915 != B_OK) { 4916 // Apparently the situation is serious. Let's get ourselves 4917 // killed. 4918 status = B_NO_MEMORY; 4919 } else if (wirePage != NULL) { 4920 // The caller expects us to wire the page. Since 4921 // object_cache_reserve() succeeded, we should now be able 4922 // to allocate a mapping structure. Restart. 4923 continue; 4924 } 4925 4926 break; 4927 } 4928 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4929 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4930 4931 // also wire the page, if requested 4932 if (wirePage != NULL && status == B_OK) { 4933 increment_page_wired_count(context.page); 4934 *wirePage = context.page; 4935 } 4936 4937 DEBUG_PAGE_ACCESS_END(context.page); 4938 4939 break; 4940 } 4941 4942 return status; 4943 } 4944 4945 4946 status_t 4947 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4948 { 4949 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4950 } 4951 4952 status_t 4953 vm_put_physical_page(addr_t vaddr, void* handle) 4954 { 4955 return sPhysicalPageMapper->PutPage(vaddr, handle); 4956 } 4957 4958 4959 status_t 4960 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4961 void** _handle) 4962 { 4963 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4964 } 4965 4966 status_t 4967 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4968 { 4969 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4970 } 4971 4972 4973 status_t 4974 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4975 { 4976 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4977 } 4978 4979 status_t 4980 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4981 { 4982 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4983 } 4984 4985 4986 void 4987 vm_get_info(system_info* info) 4988 { 4989 swap_get_info(info); 4990 4991 MutexLocker locker(sAvailableMemoryLock); 4992 info->needed_memory = sNeededMemory; 4993 info->free_memory = sAvailableMemory; 4994 } 4995 4996 4997 uint32 4998 vm_num_page_faults(void) 4999 { 5000 return sPageFaults; 5001 } 5002 5003 5004 off_t 5005 vm_available_memory(void) 5006 { 5007 MutexLocker locker(sAvailableMemoryLock); 5008 return sAvailableMemory; 5009 } 5010 5011 5012 off_t 5013 vm_available_not_needed_memory(void) 5014 { 5015 MutexLocker locker(sAvailableMemoryLock); 5016 return sAvailableMemory - sNeededMemory; 5017 } 5018 5019 5020 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5021 debugger. 5022 */ 5023 off_t 5024 vm_available_not_needed_memory_debug(void) 5025 { 5026 return sAvailableMemory - sNeededMemory; 5027 } 5028 5029 5030 size_t 5031 vm_kernel_address_space_left(void) 5032 { 5033 return VMAddressSpace::Kernel()->FreeSpace(); 5034 } 5035 5036 5037 void 5038 vm_unreserve_memory(size_t amount) 5039 { 5040 mutex_lock(&sAvailableMemoryLock); 5041 5042 sAvailableMemory += amount; 5043 5044 mutex_unlock(&sAvailableMemoryLock); 5045 } 5046 5047 5048 status_t 5049 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5050 { 5051 size_t reserve = kMemoryReserveForPriority[priority]; 5052 5053 MutexLocker locker(sAvailableMemoryLock); 5054 5055 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5056 5057 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5058 sAvailableMemory -= amount; 5059 return B_OK; 5060 } 5061 5062 if (timeout <= 0) 5063 return B_NO_MEMORY; 5064 5065 // turn timeout into an absolute timeout 5066 timeout += system_time(); 5067 5068 // loop until we've got the memory or the timeout occurs 5069 do { 5070 sNeededMemory += amount; 5071 5072 // call the low resource manager 5073 locker.Unlock(); 5074 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5075 B_ABSOLUTE_TIMEOUT, timeout); 5076 locker.Lock(); 5077 5078 sNeededMemory -= amount; 5079 5080 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5081 sAvailableMemory -= amount; 5082 return B_OK; 5083 } 5084 } while (timeout > system_time()); 5085 5086 return B_NO_MEMORY; 5087 } 5088 5089 5090 status_t 5091 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5092 { 5093 // NOTE: The caller is responsible for synchronizing calls to this function! 5094 5095 AddressSpaceReadLocker locker; 5096 VMArea* area; 5097 status_t status = locker.SetFromArea(id, area); 5098 if (status != B_OK) 5099 return status; 5100 5101 // nothing to do, if the type doesn't change 5102 uint32 oldType = area->MemoryType(); 5103 if (type == oldType) 5104 return B_OK; 5105 5106 // set the memory type of the area and the mapped pages 5107 VMTranslationMap* map = area->address_space->TranslationMap(); 5108 map->Lock(); 5109 area->SetMemoryType(type); 5110 map->ProtectArea(area, area->protection); 5111 map->Unlock(); 5112 5113 // set the physical memory type 5114 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5115 if (error != B_OK) { 5116 // reset the memory type of the area and the mapped pages 5117 map->Lock(); 5118 area->SetMemoryType(oldType); 5119 map->ProtectArea(area, area->protection); 5120 map->Unlock(); 5121 return error; 5122 } 5123 5124 return B_OK; 5125 5126 } 5127 5128 5129 /*! This function enforces some protection properties: 5130 - kernel areas must be W^X (after kernel startup) 5131 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5132 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5133 */ 5134 static void 5135 fix_protection(uint32* protection) 5136 { 5137 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5138 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5139 || (*protection & B_WRITE_AREA) != 0) 5140 && !gKernelStartup) 5141 panic("kernel areas cannot be both writable and executable!"); 5142 5143 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5144 if ((*protection & B_WRITE_AREA) != 0) 5145 *protection |= B_KERNEL_WRITE_AREA; 5146 if ((*protection & B_READ_AREA) != 0) 5147 *protection |= B_KERNEL_READ_AREA; 5148 } 5149 } 5150 5151 5152 static void 5153 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5154 { 5155 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5156 info->area = area->id; 5157 info->address = (void*)area->Base(); 5158 info->size = area->Size(); 5159 info->protection = area->protection; 5160 info->lock = area->wiring; 5161 info->team = area->address_space->ID(); 5162 info->copy_count = 0; 5163 info->in_count = 0; 5164 info->out_count = 0; 5165 // TODO: retrieve real values here! 5166 5167 VMCache* cache = vm_area_get_locked_cache(area); 5168 5169 // Note, this is a simplification; the cache could be larger than this area 5170 info->ram_size = cache->page_count * B_PAGE_SIZE; 5171 5172 vm_area_put_locked_cache(cache); 5173 } 5174 5175 5176 static status_t 5177 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5178 { 5179 // is newSize a multiple of B_PAGE_SIZE? 5180 if (newSize & (B_PAGE_SIZE - 1)) 5181 return B_BAD_VALUE; 5182 5183 // lock all affected address spaces and the cache 5184 VMArea* area; 5185 VMCache* cache; 5186 5187 MultiAddressSpaceLocker locker; 5188 AreaCacheLocker cacheLocker; 5189 5190 status_t status; 5191 size_t oldSize; 5192 bool anyKernelArea; 5193 bool restart; 5194 5195 do { 5196 anyKernelArea = false; 5197 restart = false; 5198 5199 locker.Unset(); 5200 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5201 if (status != B_OK) 5202 return status; 5203 cacheLocker.SetTo(cache, true); // already locked 5204 5205 // enforce restrictions 5206 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5207 || (area->protection & B_KERNEL_AREA) != 0)) { 5208 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5209 "resize kernel area %" B_PRId32 " (%s)\n", 5210 team_get_current_team_id(), areaID, area->name); 5211 return B_NOT_ALLOWED; 5212 } 5213 // TODO: Enforce all restrictions (team, etc.)! 5214 5215 oldSize = area->Size(); 5216 if (newSize == oldSize) 5217 return B_OK; 5218 5219 if (cache->type != CACHE_TYPE_RAM) 5220 return B_NOT_ALLOWED; 5221 5222 if (oldSize < newSize) { 5223 // We need to check if all areas of this cache can be resized. 5224 for (VMArea* current = cache->areas; current != NULL; 5225 current = current->cache_next) { 5226 if (!current->address_space->CanResizeArea(current, newSize)) 5227 return B_ERROR; 5228 anyKernelArea 5229 |= current->address_space == VMAddressSpace::Kernel(); 5230 } 5231 } else { 5232 // We're shrinking the areas, so we must make sure the affected 5233 // ranges are not wired. 5234 for (VMArea* current = cache->areas; current != NULL; 5235 current = current->cache_next) { 5236 anyKernelArea 5237 |= current->address_space == VMAddressSpace::Kernel(); 5238 5239 if (wait_if_area_range_is_wired(current, 5240 current->Base() + newSize, oldSize - newSize, &locker, 5241 &cacheLocker)) { 5242 restart = true; 5243 break; 5244 } 5245 } 5246 } 5247 } while (restart); 5248 5249 // Okay, looks good so far, so let's do it 5250 5251 int priority = kernel && anyKernelArea 5252 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5253 uint32 allocationFlags = kernel && anyKernelArea 5254 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5255 5256 if (oldSize < newSize) { 5257 // Growing the cache can fail, so we do it first. 5258 status = cache->Resize(cache->virtual_base + newSize, priority); 5259 if (status != B_OK) 5260 return status; 5261 } 5262 5263 for (VMArea* current = cache->areas; current != NULL; 5264 current = current->cache_next) { 5265 status = current->address_space->ResizeArea(current, newSize, 5266 allocationFlags); 5267 if (status != B_OK) 5268 break; 5269 5270 // We also need to unmap all pages beyond the new size, if the area has 5271 // shrunk 5272 if (newSize < oldSize) { 5273 VMCacheChainLocker cacheChainLocker(cache); 5274 cacheChainLocker.LockAllSourceCaches(); 5275 5276 unmap_pages(current, current->Base() + newSize, 5277 oldSize - newSize); 5278 5279 cacheChainLocker.Unlock(cache); 5280 } 5281 } 5282 5283 if (status == B_OK) { 5284 // Shrink or grow individual page protections if in use. 5285 if (area->page_protections != NULL) { 5286 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5287 uint8* newProtections 5288 = (uint8*)realloc(area->page_protections, bytes); 5289 if (newProtections == NULL) 5290 status = B_NO_MEMORY; 5291 else { 5292 area->page_protections = newProtections; 5293 5294 if (oldSize < newSize) { 5295 // init the additional page protections to that of the area 5296 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5297 uint32 areaProtection = area->protection 5298 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5299 memset(area->page_protections + offset, 5300 areaProtection | (areaProtection << 4), bytes - offset); 5301 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5302 uint8& entry = area->page_protections[offset - 1]; 5303 entry = (entry & 0x0f) | (areaProtection << 4); 5304 } 5305 } 5306 } 5307 } 5308 } 5309 5310 // shrinking the cache can't fail, so we do it now 5311 if (status == B_OK && newSize < oldSize) 5312 status = cache->Resize(cache->virtual_base + newSize, priority); 5313 5314 if (status != B_OK) { 5315 // Something failed -- resize the areas back to their original size. 5316 // This can fail, too, in which case we're seriously screwed. 5317 for (VMArea* current = cache->areas; current != NULL; 5318 current = current->cache_next) { 5319 if (current->address_space->ResizeArea(current, oldSize, 5320 allocationFlags) != B_OK) { 5321 panic("vm_resize_area(): Failed and not being able to restore " 5322 "original state."); 5323 } 5324 } 5325 5326 cache->Resize(cache->virtual_base + oldSize, priority); 5327 } 5328 5329 // TODO: we must honour the lock restrictions of this area 5330 return status; 5331 } 5332 5333 5334 status_t 5335 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5336 { 5337 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5338 } 5339 5340 5341 status_t 5342 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5343 { 5344 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5345 } 5346 5347 5348 status_t 5349 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5350 bool user) 5351 { 5352 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5353 } 5354 5355 5356 void 5357 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5358 { 5359 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5360 } 5361 5362 5363 /*! Copies a range of memory directly from/to a page that might not be mapped 5364 at the moment. 5365 5366 For \a unsafeMemory the current mapping (if any is ignored). The function 5367 walks through the respective area's cache chain to find the physical page 5368 and copies from/to it directly. 5369 The memory range starting at \a unsafeMemory with a length of \a size bytes 5370 must not cross a page boundary. 5371 5372 \param teamID The team ID identifying the address space \a unsafeMemory is 5373 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5374 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5375 is passed, the address space of the thread returned by 5376 debug_get_debugged_thread() is used. 5377 \param unsafeMemory The start of the unsafe memory range to be copied 5378 from/to. 5379 \param buffer A safely accessible kernel buffer to be copied from/to. 5380 \param size The number of bytes to be copied. 5381 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5382 \a unsafeMemory, the other way around otherwise. 5383 */ 5384 status_t 5385 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5386 size_t size, bool copyToUnsafe) 5387 { 5388 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5389 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5390 return B_BAD_VALUE; 5391 } 5392 5393 // get the address space for the debugged thread 5394 VMAddressSpace* addressSpace; 5395 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5396 addressSpace = VMAddressSpace::Kernel(); 5397 } else if (teamID == B_CURRENT_TEAM) { 5398 Thread* thread = debug_get_debugged_thread(); 5399 if (thread == NULL || thread->team == NULL) 5400 return B_BAD_ADDRESS; 5401 5402 addressSpace = thread->team->address_space; 5403 } else 5404 addressSpace = VMAddressSpace::DebugGet(teamID); 5405 5406 if (addressSpace == NULL) 5407 return B_BAD_ADDRESS; 5408 5409 // get the area 5410 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5411 if (area == NULL) 5412 return B_BAD_ADDRESS; 5413 5414 // search the page 5415 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5416 + area->cache_offset; 5417 VMCache* cache = area->cache; 5418 vm_page* page = NULL; 5419 while (cache != NULL) { 5420 page = cache->DebugLookupPage(cacheOffset); 5421 if (page != NULL) 5422 break; 5423 5424 // Page not found in this cache -- if it is paged out, we must not try 5425 // to get it from lower caches. 5426 if (cache->DebugHasPage(cacheOffset)) 5427 break; 5428 5429 cache = cache->source; 5430 } 5431 5432 if (page == NULL) 5433 return B_UNSUPPORTED; 5434 5435 // copy from/to physical memory 5436 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5437 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5438 5439 if (copyToUnsafe) { 5440 if (page->Cache() != area->cache) 5441 return B_UNSUPPORTED; 5442 5443 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5444 } 5445 5446 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5447 } 5448 5449 5450 /** Validate that a memory range is either fully in kernel space, or fully in 5451 * userspace */ 5452 static inline bool 5453 validate_memory_range(const void* addr, size_t size) 5454 { 5455 addr_t address = (addr_t)addr; 5456 5457 // Check for overflows on all addresses. 5458 if ((address + size) < address) 5459 return false; 5460 5461 // Validate that the address range does not cross the kernel/user boundary. 5462 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5463 } 5464 5465 5466 // #pragma mark - kernel public API 5467 5468 5469 status_t 5470 user_memcpy(void* to, const void* from, size_t size) 5471 { 5472 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5473 return B_BAD_ADDRESS; 5474 5475 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5476 return B_BAD_ADDRESS; 5477 5478 return B_OK; 5479 } 5480 5481 5482 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5483 the string in \a to, NULL-terminating the result. 5484 5485 \param to Pointer to the destination C-string. 5486 \param from Pointer to the source C-string. 5487 \param size Size in bytes of the string buffer pointed to by \a to. 5488 5489 \return strlen(\a from). 5490 */ 5491 ssize_t 5492 user_strlcpy(char* to, const char* from, size_t size) 5493 { 5494 if (to == NULL && size != 0) 5495 return B_BAD_VALUE; 5496 if (from == NULL) 5497 return B_BAD_ADDRESS; 5498 5499 // Protect the source address from overflows. 5500 size_t maxSize = size; 5501 if ((addr_t)from + maxSize < (addr_t)from) 5502 maxSize -= (addr_t)from + maxSize; 5503 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5504 maxSize = USER_TOP - (addr_t)from; 5505 5506 if (!validate_memory_range(to, maxSize)) 5507 return B_BAD_ADDRESS; 5508 5509 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5510 if (result < 0) 5511 return result; 5512 5513 // If we hit the address overflow boundary, fail. 5514 if ((size_t)result >= maxSize && maxSize < size) 5515 return B_BAD_ADDRESS; 5516 5517 return result; 5518 } 5519 5520 5521 status_t 5522 user_memset(void* s, char c, size_t count) 5523 { 5524 if (!validate_memory_range(s, count)) 5525 return B_BAD_ADDRESS; 5526 5527 if (arch_cpu_user_memset(s, c, count) < B_OK) 5528 return B_BAD_ADDRESS; 5529 5530 return B_OK; 5531 } 5532 5533 5534 /*! Wires a single page at the given address. 5535 5536 \param team The team whose address space the address belongs to. Supports 5537 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5538 parameter is ignored. 5539 \param address address The virtual address to wire down. Does not need to 5540 be page aligned. 5541 \param writable If \c true the page shall be writable. 5542 \param info On success the info is filled in, among other things 5543 containing the physical address the given virtual one translates to. 5544 \return \c B_OK, when the page could be wired, another error code otherwise. 5545 */ 5546 status_t 5547 vm_wire_page(team_id team, addr_t address, bool writable, 5548 VMPageWiringInfo* info) 5549 { 5550 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5551 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5552 5553 // compute the page protection that is required 5554 bool isUser = IS_USER_ADDRESS(address); 5555 uint32 requiredProtection = PAGE_PRESENT 5556 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5557 if (writable) 5558 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5559 5560 // get and read lock the address space 5561 VMAddressSpace* addressSpace = NULL; 5562 if (isUser) { 5563 if (team == B_CURRENT_TEAM) 5564 addressSpace = VMAddressSpace::GetCurrent(); 5565 else 5566 addressSpace = VMAddressSpace::Get(team); 5567 } else 5568 addressSpace = VMAddressSpace::GetKernel(); 5569 if (addressSpace == NULL) 5570 return B_ERROR; 5571 5572 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5573 5574 VMTranslationMap* map = addressSpace->TranslationMap(); 5575 status_t error = B_OK; 5576 5577 // get the area 5578 VMArea* area = addressSpace->LookupArea(pageAddress); 5579 if (area == NULL) { 5580 addressSpace->Put(); 5581 return B_BAD_ADDRESS; 5582 } 5583 5584 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5585 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5586 5587 // mark the area range wired 5588 area->Wire(&info->range); 5589 5590 // Lock the area's cache chain and the translation map. Needed to look 5591 // up the page and play with its wired count. 5592 cacheChainLocker.LockAllSourceCaches(); 5593 map->Lock(); 5594 5595 phys_addr_t physicalAddress; 5596 uint32 flags; 5597 vm_page* page; 5598 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5599 && (flags & requiredProtection) == requiredProtection 5600 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5601 != NULL) { 5602 // Already mapped with the correct permissions -- just increment 5603 // the page's wired count. 5604 increment_page_wired_count(page); 5605 5606 map->Unlock(); 5607 cacheChainLocker.Unlock(); 5608 addressSpaceLocker.Unlock(); 5609 } else { 5610 // Let vm_soft_fault() map the page for us, if possible. We need 5611 // to fully unlock to avoid deadlocks. Since we have already 5612 // wired the area itself, nothing disturbing will happen with it 5613 // in the meantime. 5614 map->Unlock(); 5615 cacheChainLocker.Unlock(); 5616 addressSpaceLocker.Unlock(); 5617 5618 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5619 isUser, &page); 5620 5621 if (error != B_OK) { 5622 // The page could not be mapped -- clean up. 5623 VMCache* cache = vm_area_get_locked_cache(area); 5624 area->Unwire(&info->range); 5625 cache->ReleaseRefAndUnlock(); 5626 addressSpace->Put(); 5627 return error; 5628 } 5629 } 5630 5631 info->physicalAddress 5632 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5633 + address % B_PAGE_SIZE; 5634 info->page = page; 5635 5636 return B_OK; 5637 } 5638 5639 5640 /*! Unwires a single page previously wired via vm_wire_page(). 5641 5642 \param info The same object passed to vm_wire_page() before. 5643 */ 5644 void 5645 vm_unwire_page(VMPageWiringInfo* info) 5646 { 5647 // lock the address space 5648 VMArea* area = info->range.area; 5649 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5650 // takes over our reference 5651 5652 // lock the top cache 5653 VMCache* cache = vm_area_get_locked_cache(area); 5654 VMCacheChainLocker cacheChainLocker(cache); 5655 5656 if (info->page->Cache() != cache) { 5657 // The page is not in the top cache, so we lock the whole cache chain 5658 // before touching the page's wired count. 5659 cacheChainLocker.LockAllSourceCaches(); 5660 } 5661 5662 decrement_page_wired_count(info->page); 5663 5664 // remove the wired range from the range 5665 area->Unwire(&info->range); 5666 5667 cacheChainLocker.Unlock(); 5668 } 5669 5670 5671 /*! Wires down the given address range in the specified team's address space. 5672 5673 If successful the function 5674 - acquires a reference to the specified team's address space, 5675 - adds respective wired ranges to all areas that intersect with the given 5676 address range, 5677 - makes sure all pages in the given address range are mapped with the 5678 requested access permissions and increments their wired count. 5679 5680 It fails, when \a team doesn't specify a valid address space, when any part 5681 of the specified address range is not covered by areas, when the concerned 5682 areas don't allow mapping with the requested permissions, or when mapping 5683 failed for another reason. 5684 5685 When successful the call must be balanced by a unlock_memory_etc() call with 5686 the exact same parameters. 5687 5688 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5689 supported. 5690 \param address The start of the address range to be wired. 5691 \param numBytes The size of the address range to be wired. 5692 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5693 requests that the range must be wired writable ("read from device 5694 into memory"). 5695 \return \c B_OK on success, another error code otherwise. 5696 */ 5697 status_t 5698 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5699 { 5700 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5701 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5702 5703 // compute the page protection that is required 5704 bool isUser = IS_USER_ADDRESS(address); 5705 bool writable = (flags & B_READ_DEVICE) == 0; 5706 uint32 requiredProtection = PAGE_PRESENT 5707 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5708 if (writable) 5709 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5710 5711 uint32 mallocFlags = isUser 5712 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5713 5714 // get and read lock the address space 5715 VMAddressSpace* addressSpace = NULL; 5716 if (isUser) { 5717 if (team == B_CURRENT_TEAM) 5718 addressSpace = VMAddressSpace::GetCurrent(); 5719 else 5720 addressSpace = VMAddressSpace::Get(team); 5721 } else 5722 addressSpace = VMAddressSpace::GetKernel(); 5723 if (addressSpace == NULL) 5724 return B_ERROR; 5725 5726 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5727 // We get a new address space reference here. The one we got above will 5728 // be freed by unlock_memory_etc(). 5729 5730 VMTranslationMap* map = addressSpace->TranslationMap(); 5731 status_t error = B_OK; 5732 5733 // iterate through all concerned areas 5734 addr_t nextAddress = lockBaseAddress; 5735 while (nextAddress != lockEndAddress) { 5736 // get the next area 5737 VMArea* area = addressSpace->LookupArea(nextAddress); 5738 if (area == NULL) { 5739 error = B_BAD_ADDRESS; 5740 break; 5741 } 5742 5743 addr_t areaStart = nextAddress; 5744 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5745 5746 // allocate the wired range (do that before locking the cache to avoid 5747 // deadlocks) 5748 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5749 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5750 if (range == NULL) { 5751 error = B_NO_MEMORY; 5752 break; 5753 } 5754 5755 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5756 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5757 5758 // mark the area range wired 5759 area->Wire(range); 5760 5761 // Depending on the area cache type and the wiring, we may not need to 5762 // look at the individual pages. 5763 if (area->cache_type == CACHE_TYPE_NULL 5764 || area->cache_type == CACHE_TYPE_DEVICE 5765 || area->wiring == B_FULL_LOCK 5766 || area->wiring == B_CONTIGUOUS) { 5767 nextAddress = areaEnd; 5768 continue; 5769 } 5770 5771 // Lock the area's cache chain and the translation map. Needed to look 5772 // up pages and play with their wired count. 5773 cacheChainLocker.LockAllSourceCaches(); 5774 map->Lock(); 5775 5776 // iterate through the pages and wire them 5777 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5778 phys_addr_t physicalAddress; 5779 uint32 flags; 5780 5781 vm_page* page; 5782 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5783 && (flags & requiredProtection) == requiredProtection 5784 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5785 != NULL) { 5786 // Already mapped with the correct permissions -- just increment 5787 // the page's wired count. 5788 increment_page_wired_count(page); 5789 } else { 5790 // Let vm_soft_fault() map the page for us, if possible. We need 5791 // to fully unlock to avoid deadlocks. Since we have already 5792 // wired the area itself, nothing disturbing will happen with it 5793 // in the meantime. 5794 map->Unlock(); 5795 cacheChainLocker.Unlock(); 5796 addressSpaceLocker.Unlock(); 5797 5798 error = vm_soft_fault(addressSpace, nextAddress, writable, 5799 false, isUser, &page); 5800 5801 addressSpaceLocker.Lock(); 5802 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5803 cacheChainLocker.LockAllSourceCaches(); 5804 map->Lock(); 5805 } 5806 5807 if (error != B_OK) 5808 break; 5809 } 5810 5811 map->Unlock(); 5812 5813 if (error == B_OK) { 5814 cacheChainLocker.Unlock(); 5815 } else { 5816 // An error occurred, so abort right here. If the current address 5817 // is the first in this area, unwire the area, since we won't get 5818 // to it when reverting what we've done so far. 5819 if (nextAddress == areaStart) { 5820 area->Unwire(range); 5821 cacheChainLocker.Unlock(); 5822 range->~VMAreaWiredRange(); 5823 free_etc(range, mallocFlags); 5824 } else 5825 cacheChainLocker.Unlock(); 5826 5827 break; 5828 } 5829 } 5830 5831 if (error != B_OK) { 5832 // An error occurred, so unwire all that we've already wired. Note that 5833 // even if not a single page was wired, unlock_memory_etc() is called 5834 // to put the address space reference. 5835 addressSpaceLocker.Unlock(); 5836 unlock_memory_etc(team, (void*)lockBaseAddress, 5837 nextAddress - lockBaseAddress, flags); 5838 } 5839 5840 return error; 5841 } 5842 5843 5844 status_t 5845 lock_memory(void* address, size_t numBytes, uint32 flags) 5846 { 5847 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5848 } 5849 5850 5851 /*! Unwires an address range previously wired with lock_memory_etc(). 5852 5853 Note that a call to this function must balance a previous lock_memory_etc() 5854 call with exactly the same parameters. 5855 */ 5856 status_t 5857 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5858 { 5859 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5860 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5861 5862 // compute the page protection that is required 5863 bool isUser = IS_USER_ADDRESS(address); 5864 bool writable = (flags & B_READ_DEVICE) == 0; 5865 uint32 requiredProtection = PAGE_PRESENT 5866 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5867 if (writable) 5868 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5869 5870 uint32 mallocFlags = isUser 5871 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5872 5873 // get and read lock the address space 5874 VMAddressSpace* addressSpace = NULL; 5875 if (isUser) { 5876 if (team == B_CURRENT_TEAM) 5877 addressSpace = VMAddressSpace::GetCurrent(); 5878 else 5879 addressSpace = VMAddressSpace::Get(team); 5880 } else 5881 addressSpace = VMAddressSpace::GetKernel(); 5882 if (addressSpace == NULL) 5883 return B_ERROR; 5884 5885 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5886 // Take over the address space reference. We don't unlock until we're 5887 // done. 5888 5889 VMTranslationMap* map = addressSpace->TranslationMap(); 5890 status_t error = B_OK; 5891 5892 // iterate through all concerned areas 5893 addr_t nextAddress = lockBaseAddress; 5894 while (nextAddress != lockEndAddress) { 5895 // get the next area 5896 VMArea* area = addressSpace->LookupArea(nextAddress); 5897 if (area == NULL) { 5898 error = B_BAD_ADDRESS; 5899 break; 5900 } 5901 5902 addr_t areaStart = nextAddress; 5903 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5904 5905 // Lock the area's top cache. This is a requirement for 5906 // VMArea::Unwire(). 5907 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5908 5909 // Depending on the area cache type and the wiring, we may not need to 5910 // look at the individual pages. 5911 if (area->cache_type == CACHE_TYPE_NULL 5912 || area->cache_type == CACHE_TYPE_DEVICE 5913 || area->wiring == B_FULL_LOCK 5914 || area->wiring == B_CONTIGUOUS) { 5915 // unwire the range (to avoid deadlocks we delete the range after 5916 // unlocking the cache) 5917 nextAddress = areaEnd; 5918 VMAreaWiredRange* range = area->Unwire(areaStart, 5919 areaEnd - areaStart, writable); 5920 cacheChainLocker.Unlock(); 5921 if (range != NULL) { 5922 range->~VMAreaWiredRange(); 5923 free_etc(range, mallocFlags); 5924 } 5925 continue; 5926 } 5927 5928 // Lock the area's cache chain and the translation map. Needed to look 5929 // up pages and play with their wired count. 5930 cacheChainLocker.LockAllSourceCaches(); 5931 map->Lock(); 5932 5933 // iterate through the pages and unwire them 5934 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5935 phys_addr_t physicalAddress; 5936 uint32 flags; 5937 5938 vm_page* page; 5939 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5940 && (flags & PAGE_PRESENT) != 0 5941 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5942 != NULL) { 5943 // Already mapped with the correct permissions -- just increment 5944 // the page's wired count. 5945 decrement_page_wired_count(page); 5946 } else { 5947 panic("unlock_memory_etc(): Failed to unwire page: address " 5948 "space %p, address: %#" B_PRIxADDR, addressSpace, 5949 nextAddress); 5950 error = B_BAD_VALUE; 5951 break; 5952 } 5953 } 5954 5955 map->Unlock(); 5956 5957 // All pages are unwired. Remove the area's wired range as well (to 5958 // avoid deadlocks we delete the range after unlocking the cache). 5959 VMAreaWiredRange* range = area->Unwire(areaStart, 5960 areaEnd - areaStart, writable); 5961 5962 cacheChainLocker.Unlock(); 5963 5964 if (range != NULL) { 5965 range->~VMAreaWiredRange(); 5966 free_etc(range, mallocFlags); 5967 } 5968 5969 if (error != B_OK) 5970 break; 5971 } 5972 5973 // get rid of the address space reference lock_memory_etc() acquired 5974 addressSpace->Put(); 5975 5976 return error; 5977 } 5978 5979 5980 status_t 5981 unlock_memory(void* address, size_t numBytes, uint32 flags) 5982 { 5983 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5984 } 5985 5986 5987 /*! Similar to get_memory_map(), but also allows to specify the address space 5988 for the memory in question and has a saner semantics. 5989 Returns \c B_OK when the complete range could be translated or 5990 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5991 case the actual number of entries is written to \c *_numEntries. Any other 5992 error case indicates complete failure; \c *_numEntries will be set to \c 0 5993 in this case. 5994 */ 5995 status_t 5996 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5997 physical_entry* table, uint32* _numEntries) 5998 { 5999 uint32 numEntries = *_numEntries; 6000 *_numEntries = 0; 6001 6002 VMAddressSpace* addressSpace; 6003 addr_t virtualAddress = (addr_t)address; 6004 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6005 phys_addr_t physicalAddress; 6006 status_t status = B_OK; 6007 int32 index = -1; 6008 addr_t offset = 0; 6009 bool interrupts = are_interrupts_enabled(); 6010 6011 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6012 "entries)\n", team, address, numBytes, numEntries)); 6013 6014 if (numEntries == 0 || numBytes == 0) 6015 return B_BAD_VALUE; 6016 6017 // in which address space is the address to be found? 6018 if (IS_USER_ADDRESS(virtualAddress)) { 6019 if (team == B_CURRENT_TEAM) 6020 addressSpace = VMAddressSpace::GetCurrent(); 6021 else 6022 addressSpace = VMAddressSpace::Get(team); 6023 } else 6024 addressSpace = VMAddressSpace::GetKernel(); 6025 6026 if (addressSpace == NULL) 6027 return B_ERROR; 6028 6029 VMTranslationMap* map = addressSpace->TranslationMap(); 6030 6031 if (interrupts) 6032 map->Lock(); 6033 6034 while (offset < numBytes) { 6035 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6036 uint32 flags; 6037 6038 if (interrupts) { 6039 status = map->Query((addr_t)address + offset, &physicalAddress, 6040 &flags); 6041 } else { 6042 status = map->QueryInterrupt((addr_t)address + offset, 6043 &physicalAddress, &flags); 6044 } 6045 if (status < B_OK) 6046 break; 6047 if ((flags & PAGE_PRESENT) == 0) { 6048 panic("get_memory_map() called on unmapped memory!"); 6049 return B_BAD_ADDRESS; 6050 } 6051 6052 if (index < 0 && pageOffset > 0) { 6053 physicalAddress += pageOffset; 6054 if (bytes > B_PAGE_SIZE - pageOffset) 6055 bytes = B_PAGE_SIZE - pageOffset; 6056 } 6057 6058 // need to switch to the next physical_entry? 6059 if (index < 0 || table[index].address 6060 != physicalAddress - table[index].size) { 6061 if ((uint32)++index + 1 > numEntries) { 6062 // table to small 6063 break; 6064 } 6065 table[index].address = physicalAddress; 6066 table[index].size = bytes; 6067 } else { 6068 // page does fit in current entry 6069 table[index].size += bytes; 6070 } 6071 6072 offset += bytes; 6073 } 6074 6075 if (interrupts) 6076 map->Unlock(); 6077 6078 if (status != B_OK) 6079 return status; 6080 6081 if ((uint32)index + 1 > numEntries) { 6082 *_numEntries = index; 6083 return B_BUFFER_OVERFLOW; 6084 } 6085 6086 *_numEntries = index + 1; 6087 return B_OK; 6088 } 6089 6090 6091 /*! According to the BeBook, this function should always succeed. 6092 This is no longer the case. 6093 */ 6094 extern "C" int32 6095 __get_memory_map_haiku(const void* address, size_t numBytes, 6096 physical_entry* table, int32 numEntries) 6097 { 6098 uint32 entriesRead = numEntries; 6099 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6100 table, &entriesRead); 6101 if (error != B_OK) 6102 return error; 6103 6104 // close the entry list 6105 6106 // if it's only one entry, we will silently accept the missing ending 6107 if (numEntries == 1) 6108 return B_OK; 6109 6110 if (entriesRead + 1 > (uint32)numEntries) 6111 return B_BUFFER_OVERFLOW; 6112 6113 table[entriesRead].address = 0; 6114 table[entriesRead].size = 0; 6115 6116 return B_OK; 6117 } 6118 6119 6120 area_id 6121 area_for(void* address) 6122 { 6123 return vm_area_for((addr_t)address, true); 6124 } 6125 6126 6127 area_id 6128 find_area(const char* name) 6129 { 6130 return VMAreas::Find(name); 6131 } 6132 6133 6134 status_t 6135 _get_area_info(area_id id, area_info* info, size_t size) 6136 { 6137 if (size != sizeof(area_info) || info == NULL) 6138 return B_BAD_VALUE; 6139 6140 AddressSpaceReadLocker locker; 6141 VMArea* area; 6142 status_t status = locker.SetFromArea(id, area); 6143 if (status != B_OK) 6144 return status; 6145 6146 fill_area_info(area, info, size); 6147 return B_OK; 6148 } 6149 6150 6151 status_t 6152 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6153 { 6154 addr_t nextBase = *(addr_t*)cookie; 6155 6156 // we're already through the list 6157 if (nextBase == (addr_t)-1) 6158 return B_ENTRY_NOT_FOUND; 6159 6160 if (team == B_CURRENT_TEAM) 6161 team = team_get_current_team_id(); 6162 6163 AddressSpaceReadLocker locker(team); 6164 if (!locker.IsLocked()) 6165 return B_BAD_TEAM_ID; 6166 6167 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6168 if (area == NULL) { 6169 nextBase = (addr_t)-1; 6170 return B_ENTRY_NOT_FOUND; 6171 } 6172 6173 fill_area_info(area, info, size); 6174 *cookie = (ssize_t)(area->Base() + 1); 6175 6176 return B_OK; 6177 } 6178 6179 6180 status_t 6181 set_area_protection(area_id area, uint32 newProtection) 6182 { 6183 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6184 newProtection, true); 6185 } 6186 6187 6188 status_t 6189 resize_area(area_id areaID, size_t newSize) 6190 { 6191 return vm_resize_area(areaID, newSize, true); 6192 } 6193 6194 6195 /*! Transfers the specified area to a new team. The caller must be the owner 6196 of the area. 6197 */ 6198 area_id 6199 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6200 bool kernel) 6201 { 6202 area_info info; 6203 status_t status = get_area_info(id, &info); 6204 if (status != B_OK) 6205 return status; 6206 6207 if (info.team != thread_get_current_thread()->team->id) 6208 return B_PERMISSION_DENIED; 6209 6210 // We need to mark the area cloneable so the following operations work. 6211 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6212 if (status != B_OK) 6213 return status; 6214 6215 area_id clonedArea = vm_clone_area(target, info.name, _address, 6216 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6217 if (clonedArea < 0) 6218 return clonedArea; 6219 6220 status = vm_delete_area(info.team, id, kernel); 6221 if (status != B_OK) { 6222 vm_delete_area(target, clonedArea, kernel); 6223 return status; 6224 } 6225 6226 // Now we can reset the protection to whatever it was before. 6227 set_area_protection(clonedArea, info.protection); 6228 6229 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6230 6231 return clonedArea; 6232 } 6233 6234 6235 extern "C" area_id 6236 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6237 size_t numBytes, uint32 addressSpec, uint32 protection, 6238 void** _virtualAddress) 6239 { 6240 if (!arch_vm_supports_protection(protection)) 6241 return B_NOT_SUPPORTED; 6242 6243 fix_protection(&protection); 6244 6245 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6246 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6247 false); 6248 } 6249 6250 6251 area_id 6252 clone_area(const char* name, void** _address, uint32 addressSpec, 6253 uint32 protection, area_id source) 6254 { 6255 if ((protection & B_KERNEL_PROTECTION) == 0) 6256 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6257 6258 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6259 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6260 } 6261 6262 6263 area_id 6264 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6265 uint32 protection, uint32 flags, uint32 guardSize, 6266 const virtual_address_restrictions* virtualAddressRestrictions, 6267 const physical_address_restrictions* physicalAddressRestrictions, 6268 void** _address) 6269 { 6270 fix_protection(&protection); 6271 6272 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6273 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6274 true, _address); 6275 } 6276 6277 6278 extern "C" area_id 6279 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6280 size_t size, uint32 lock, uint32 protection) 6281 { 6282 fix_protection(&protection); 6283 6284 virtual_address_restrictions virtualRestrictions = {}; 6285 virtualRestrictions.address = *_address; 6286 virtualRestrictions.address_specification = addressSpec; 6287 physical_address_restrictions physicalRestrictions = {}; 6288 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6289 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6290 true, _address); 6291 } 6292 6293 6294 status_t 6295 delete_area(area_id area) 6296 { 6297 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6298 } 6299 6300 6301 // #pragma mark - Userland syscalls 6302 6303 6304 status_t 6305 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6306 addr_t size) 6307 { 6308 // filter out some unavailable values (for userland) 6309 switch (addressSpec) { 6310 case B_ANY_KERNEL_ADDRESS: 6311 case B_ANY_KERNEL_BLOCK_ADDRESS: 6312 return B_BAD_VALUE; 6313 } 6314 6315 addr_t address; 6316 6317 if (!IS_USER_ADDRESS(userAddress) 6318 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6319 return B_BAD_ADDRESS; 6320 6321 status_t status = vm_reserve_address_range( 6322 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6323 RESERVED_AVOID_BASE); 6324 if (status != B_OK) 6325 return status; 6326 6327 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6328 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6329 (void*)address, size); 6330 return B_BAD_ADDRESS; 6331 } 6332 6333 return B_OK; 6334 } 6335 6336 6337 status_t 6338 _user_unreserve_address_range(addr_t address, addr_t size) 6339 { 6340 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6341 (void*)address, size); 6342 } 6343 6344 6345 area_id 6346 _user_area_for(void* address) 6347 { 6348 return vm_area_for((addr_t)address, false); 6349 } 6350 6351 6352 area_id 6353 _user_find_area(const char* userName) 6354 { 6355 char name[B_OS_NAME_LENGTH]; 6356 6357 if (!IS_USER_ADDRESS(userName) 6358 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6359 return B_BAD_ADDRESS; 6360 6361 return find_area(name); 6362 } 6363 6364 6365 status_t 6366 _user_get_area_info(area_id area, area_info* userInfo) 6367 { 6368 if (!IS_USER_ADDRESS(userInfo)) 6369 return B_BAD_ADDRESS; 6370 6371 area_info info; 6372 status_t status = get_area_info(area, &info); 6373 if (status < B_OK) 6374 return status; 6375 6376 // TODO: do we want to prevent userland from seeing kernel protections? 6377 //info.protection &= B_USER_PROTECTION; 6378 6379 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6380 return B_BAD_ADDRESS; 6381 6382 return status; 6383 } 6384 6385 6386 status_t 6387 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6388 { 6389 ssize_t cookie; 6390 6391 if (!IS_USER_ADDRESS(userCookie) 6392 || !IS_USER_ADDRESS(userInfo) 6393 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6394 return B_BAD_ADDRESS; 6395 6396 area_info info; 6397 status_t status = _get_next_area_info(team, &cookie, &info, 6398 sizeof(area_info)); 6399 if (status != B_OK) 6400 return status; 6401 6402 //info.protection &= B_USER_PROTECTION; 6403 6404 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6405 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6406 return B_BAD_ADDRESS; 6407 6408 return status; 6409 } 6410 6411 6412 status_t 6413 _user_set_area_protection(area_id area, uint32 newProtection) 6414 { 6415 if ((newProtection & ~B_USER_PROTECTION) != 0) 6416 return B_BAD_VALUE; 6417 6418 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6419 newProtection, false); 6420 } 6421 6422 6423 status_t 6424 _user_resize_area(area_id area, size_t newSize) 6425 { 6426 // TODO: Since we restrict deleting of areas to those owned by the team, 6427 // we should also do that for resizing (check other functions, too). 6428 return vm_resize_area(area, newSize, false); 6429 } 6430 6431 6432 area_id 6433 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6434 team_id target) 6435 { 6436 // filter out some unavailable values (for userland) 6437 switch (addressSpec) { 6438 case B_ANY_KERNEL_ADDRESS: 6439 case B_ANY_KERNEL_BLOCK_ADDRESS: 6440 return B_BAD_VALUE; 6441 } 6442 6443 void* address; 6444 if (!IS_USER_ADDRESS(userAddress) 6445 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6446 return B_BAD_ADDRESS; 6447 6448 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6449 if (newArea < B_OK) 6450 return newArea; 6451 6452 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6453 return B_BAD_ADDRESS; 6454 6455 return newArea; 6456 } 6457 6458 6459 area_id 6460 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6461 uint32 protection, area_id sourceArea) 6462 { 6463 char name[B_OS_NAME_LENGTH]; 6464 void* address; 6465 6466 // filter out some unavailable values (for userland) 6467 switch (addressSpec) { 6468 case B_ANY_KERNEL_ADDRESS: 6469 case B_ANY_KERNEL_BLOCK_ADDRESS: 6470 return B_BAD_VALUE; 6471 } 6472 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6473 return B_BAD_VALUE; 6474 6475 if (!IS_USER_ADDRESS(userName) 6476 || !IS_USER_ADDRESS(userAddress) 6477 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6478 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6479 return B_BAD_ADDRESS; 6480 6481 fix_protection(&protection); 6482 6483 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6484 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6485 false); 6486 if (clonedArea < B_OK) 6487 return clonedArea; 6488 6489 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6490 delete_area(clonedArea); 6491 return B_BAD_ADDRESS; 6492 } 6493 6494 return clonedArea; 6495 } 6496 6497 6498 area_id 6499 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6500 size_t size, uint32 lock, uint32 protection) 6501 { 6502 char name[B_OS_NAME_LENGTH]; 6503 void* address; 6504 6505 // filter out some unavailable values (for userland) 6506 switch (addressSpec) { 6507 case B_ANY_KERNEL_ADDRESS: 6508 case B_ANY_KERNEL_BLOCK_ADDRESS: 6509 return B_BAD_VALUE; 6510 } 6511 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6512 return B_BAD_VALUE; 6513 6514 if (!IS_USER_ADDRESS(userName) 6515 || !IS_USER_ADDRESS(userAddress) 6516 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6517 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6518 return B_BAD_ADDRESS; 6519 6520 if (addressSpec == B_EXACT_ADDRESS 6521 && IS_KERNEL_ADDRESS(address)) 6522 return B_BAD_VALUE; 6523 6524 if (addressSpec == B_ANY_ADDRESS) 6525 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6526 if (addressSpec == B_BASE_ADDRESS) 6527 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6528 6529 fix_protection(&protection); 6530 6531 virtual_address_restrictions virtualRestrictions = {}; 6532 virtualRestrictions.address = address; 6533 virtualRestrictions.address_specification = addressSpec; 6534 physical_address_restrictions physicalRestrictions = {}; 6535 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6536 size, lock, protection, 0, 0, &virtualRestrictions, 6537 &physicalRestrictions, false, &address); 6538 6539 if (area >= B_OK 6540 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6541 delete_area(area); 6542 return B_BAD_ADDRESS; 6543 } 6544 6545 return area; 6546 } 6547 6548 6549 status_t 6550 _user_delete_area(area_id area) 6551 { 6552 // Unlike the BeOS implementation, you can now only delete areas 6553 // that you have created yourself from userland. 6554 // The documentation to delete_area() explicitly states that this 6555 // will be restricted in the future, and so it will. 6556 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6557 } 6558 6559 6560 // TODO: create a BeOS style call for this! 6561 6562 area_id 6563 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6564 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6565 int fd, off_t offset) 6566 { 6567 char name[B_OS_NAME_LENGTH]; 6568 void* address; 6569 area_id area; 6570 6571 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6572 return B_BAD_VALUE; 6573 6574 fix_protection(&protection); 6575 6576 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6577 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6578 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6579 return B_BAD_ADDRESS; 6580 6581 if (addressSpec == B_EXACT_ADDRESS) { 6582 if ((addr_t)address + size < (addr_t)address 6583 || (addr_t)address % B_PAGE_SIZE != 0) { 6584 return B_BAD_VALUE; 6585 } 6586 if (!IS_USER_ADDRESS(address) 6587 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6588 return B_BAD_ADDRESS; 6589 } 6590 } 6591 6592 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6593 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6594 false); 6595 if (area < B_OK) 6596 return area; 6597 6598 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6599 return B_BAD_ADDRESS; 6600 6601 return area; 6602 } 6603 6604 6605 status_t 6606 _user_unmap_memory(void* _address, size_t size) 6607 { 6608 addr_t address = (addr_t)_address; 6609 6610 // check params 6611 if (size == 0 || (addr_t)address + size < (addr_t)address 6612 || (addr_t)address % B_PAGE_SIZE != 0) { 6613 return B_BAD_VALUE; 6614 } 6615 6616 if (!IS_USER_ADDRESS(address) 6617 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6618 return B_BAD_ADDRESS; 6619 } 6620 6621 // Write lock the address space and ensure the address range is not wired. 6622 AddressSpaceWriteLocker locker; 6623 do { 6624 status_t status = locker.SetTo(team_get_current_team_id()); 6625 if (status != B_OK) 6626 return status; 6627 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6628 size, &locker)); 6629 6630 // unmap 6631 return unmap_address_range(locker.AddressSpace(), address, size, false); 6632 } 6633 6634 6635 status_t 6636 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6637 { 6638 // check address range 6639 addr_t address = (addr_t)_address; 6640 size = PAGE_ALIGN(size); 6641 6642 if ((address % B_PAGE_SIZE) != 0) 6643 return B_BAD_VALUE; 6644 if (!is_user_address_range(_address, size)) { 6645 // weird error code required by POSIX 6646 return ENOMEM; 6647 } 6648 6649 // extend and check protection 6650 if ((protection & ~B_USER_PROTECTION) != 0) 6651 return B_BAD_VALUE; 6652 6653 fix_protection(&protection); 6654 6655 // We need to write lock the address space, since we're going to play with 6656 // the areas. Also make sure that none of the areas is wired and that we're 6657 // actually allowed to change the protection. 6658 AddressSpaceWriteLocker locker; 6659 6660 bool restart; 6661 do { 6662 restart = false; 6663 6664 status_t status = locker.SetTo(team_get_current_team_id()); 6665 if (status != B_OK) 6666 return status; 6667 6668 // First round: Check whether the whole range is covered by areas and we 6669 // are allowed to modify them. 6670 addr_t currentAddress = address; 6671 size_t sizeLeft = size; 6672 while (sizeLeft > 0) { 6673 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6674 if (area == NULL) 6675 return B_NO_MEMORY; 6676 6677 if ((area->protection & B_KERNEL_AREA) != 0) 6678 return B_NOT_ALLOWED; 6679 if (area->protection_max != 0 6680 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6681 return B_NOT_ALLOWED; 6682 } 6683 6684 addr_t offset = currentAddress - area->Base(); 6685 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6686 6687 AreaCacheLocker cacheLocker(area); 6688 6689 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6690 &locker, &cacheLocker)) { 6691 restart = true; 6692 break; 6693 } 6694 6695 cacheLocker.Unlock(); 6696 6697 currentAddress += rangeSize; 6698 sizeLeft -= rangeSize; 6699 } 6700 } while (restart); 6701 6702 // Second round: If the protections differ from that of the area, create a 6703 // page protection array and re-map mapped pages. 6704 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6705 addr_t currentAddress = address; 6706 size_t sizeLeft = size; 6707 while (sizeLeft > 0) { 6708 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6709 if (area == NULL) 6710 return B_NO_MEMORY; 6711 6712 addr_t offset = currentAddress - area->Base(); 6713 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6714 6715 currentAddress += rangeSize; 6716 sizeLeft -= rangeSize; 6717 6718 if (area->page_protections == NULL) { 6719 if (area->protection == protection) 6720 continue; 6721 if (offset == 0 && rangeSize == area->Size()) { 6722 status_t status = vm_set_area_protection(area->address_space->ID(), 6723 area->id, protection, false); 6724 if (status != B_OK) 6725 return status; 6726 continue; 6727 } 6728 6729 status_t status = allocate_area_page_protections(area); 6730 if (status != B_OK) 6731 return status; 6732 } 6733 6734 // We need to lock the complete cache chain, since we potentially unmap 6735 // pages of lower caches. 6736 VMCache* topCache = vm_area_get_locked_cache(area); 6737 VMCacheChainLocker cacheChainLocker(topCache); 6738 cacheChainLocker.LockAllSourceCaches(); 6739 6740 for (addr_t pageAddress = area->Base() + offset; 6741 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6742 map->Lock(); 6743 6744 set_area_page_protection(area, pageAddress, protection); 6745 6746 phys_addr_t physicalAddress; 6747 uint32 flags; 6748 6749 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6750 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6751 map->Unlock(); 6752 continue; 6753 } 6754 6755 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6756 if (page == NULL) { 6757 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6758 "\n", area, physicalAddress); 6759 map->Unlock(); 6760 return B_ERROR; 6761 } 6762 6763 // If the page is not in the topmost cache and write access is 6764 // requested, we have to unmap it. Otherwise we can re-map it with 6765 // the new protection. 6766 bool unmapPage = page->Cache() != topCache 6767 && (protection & B_WRITE_AREA) != 0; 6768 6769 if (!unmapPage) 6770 map->ProtectPage(area, pageAddress, protection); 6771 6772 map->Unlock(); 6773 6774 if (unmapPage) { 6775 DEBUG_PAGE_ACCESS_START(page); 6776 unmap_page(area, pageAddress); 6777 DEBUG_PAGE_ACCESS_END(page); 6778 } 6779 } 6780 } 6781 6782 return B_OK; 6783 } 6784 6785 6786 status_t 6787 _user_sync_memory(void* _address, size_t size, uint32 flags) 6788 { 6789 addr_t address = (addr_t)_address; 6790 size = PAGE_ALIGN(size); 6791 6792 // check params 6793 if ((address % B_PAGE_SIZE) != 0) 6794 return B_BAD_VALUE; 6795 if (!is_user_address_range(_address, size)) { 6796 // weird error code required by POSIX 6797 return ENOMEM; 6798 } 6799 6800 bool writeSync = (flags & MS_SYNC) != 0; 6801 bool writeAsync = (flags & MS_ASYNC) != 0; 6802 if (writeSync && writeAsync) 6803 return B_BAD_VALUE; 6804 6805 if (size == 0 || (!writeSync && !writeAsync)) 6806 return B_OK; 6807 6808 // iterate through the range and sync all concerned areas 6809 while (size > 0) { 6810 // read lock the address space 6811 AddressSpaceReadLocker locker; 6812 status_t error = locker.SetTo(team_get_current_team_id()); 6813 if (error != B_OK) 6814 return error; 6815 6816 // get the first area 6817 VMArea* area = locker.AddressSpace()->LookupArea(address); 6818 if (area == NULL) 6819 return B_NO_MEMORY; 6820 6821 uint32 offset = address - area->Base(); 6822 size_t rangeSize = min_c(area->Size() - offset, size); 6823 offset += area->cache_offset; 6824 6825 // lock the cache 6826 AreaCacheLocker cacheLocker(area); 6827 if (!cacheLocker) 6828 return B_BAD_VALUE; 6829 VMCache* cache = area->cache; 6830 6831 locker.Unlock(); 6832 6833 uint32 firstPage = offset >> PAGE_SHIFT; 6834 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6835 6836 // write the pages 6837 if (cache->type == CACHE_TYPE_VNODE) { 6838 if (writeSync) { 6839 // synchronous 6840 error = vm_page_write_modified_page_range(cache, firstPage, 6841 endPage); 6842 if (error != B_OK) 6843 return error; 6844 } else { 6845 // asynchronous 6846 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6847 // TODO: This is probably not quite what is supposed to happen. 6848 // Especially when a lot has to be written, it might take ages 6849 // until it really hits the disk. 6850 } 6851 } 6852 6853 address += rangeSize; 6854 size -= rangeSize; 6855 } 6856 6857 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6858 // synchronize multiple mappings of the same file. In our VM they never get 6859 // out of sync, though, so we don't have to do anything. 6860 6861 return B_OK; 6862 } 6863 6864 6865 status_t 6866 _user_memory_advice(void* _address, size_t size, uint32 advice) 6867 { 6868 addr_t address = (addr_t)_address; 6869 if ((address % B_PAGE_SIZE) != 0) 6870 return B_BAD_VALUE; 6871 6872 size = PAGE_ALIGN(size); 6873 if (!is_user_address_range(_address, size)) { 6874 // weird error code required by POSIX 6875 return B_NO_MEMORY; 6876 } 6877 6878 switch (advice) { 6879 case MADV_NORMAL: 6880 case MADV_SEQUENTIAL: 6881 case MADV_RANDOM: 6882 case MADV_WILLNEED: 6883 case MADV_DONTNEED: 6884 // TODO: Implement! 6885 break; 6886 6887 case MADV_FREE: 6888 { 6889 AddressSpaceWriteLocker locker; 6890 do { 6891 status_t status = locker.SetTo(team_get_current_team_id()); 6892 if (status != B_OK) 6893 return status; 6894 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6895 address, size, &locker)); 6896 6897 discard_address_range(locker.AddressSpace(), address, size, false); 6898 break; 6899 } 6900 6901 default: 6902 return B_BAD_VALUE; 6903 } 6904 6905 return B_OK; 6906 } 6907 6908 6909 status_t 6910 _user_get_memory_properties(team_id teamID, const void* address, 6911 uint32* _protected, uint32* _lock) 6912 { 6913 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6914 return B_BAD_ADDRESS; 6915 6916 AddressSpaceReadLocker locker; 6917 status_t error = locker.SetTo(teamID); 6918 if (error != B_OK) 6919 return error; 6920 6921 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6922 if (area == NULL) 6923 return B_NO_MEMORY; 6924 6925 uint32 protection = get_area_page_protection(area, (addr_t)address); 6926 uint32 wiring = area->wiring; 6927 6928 locker.Unlock(); 6929 6930 error = user_memcpy(_protected, &protection, sizeof(protection)); 6931 if (error != B_OK) 6932 return error; 6933 6934 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6935 6936 return error; 6937 } 6938 6939 6940 static status_t 6941 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 6942 { 6943 #if ENABLE_SWAP_SUPPORT 6944 // check address range 6945 addr_t address = (addr_t)_address; 6946 size = PAGE_ALIGN(size); 6947 6948 if ((address % B_PAGE_SIZE) != 0) 6949 return EINVAL; 6950 if (!is_user_address_range(_address, size)) 6951 return EINVAL; 6952 6953 const addr_t endAddress = address + size; 6954 6955 AddressSpaceReadLocker addressSpaceLocker; 6956 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 6957 if (error != B_OK) 6958 return error; 6959 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 6960 6961 // iterate through all concerned areas 6962 addr_t nextAddress = address; 6963 while (nextAddress != endAddress) { 6964 // get the next area 6965 VMArea* area = addressSpace->LookupArea(nextAddress); 6966 if (area == NULL) { 6967 error = B_BAD_ADDRESS; 6968 break; 6969 } 6970 6971 const addr_t areaStart = nextAddress; 6972 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 6973 nextAddress = areaEnd; 6974 6975 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6976 if (error != B_OK) { 6977 // We don't need to unset or reset things on failure. 6978 break; 6979 } 6980 6981 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6982 VMAnonymousCache* anonCache = NULL; 6983 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 6984 // This memory will aready never be swapped. Nothing to do. 6985 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 6986 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 6987 areaEnd - areaStart, swappable); 6988 } else { 6989 // Some other cache type? We cannot affect anything here. 6990 error = EINVAL; 6991 } 6992 6993 cacheChainLocker.Unlock(); 6994 6995 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6996 if (error != B_OK) 6997 break; 6998 } 6999 7000 return error; 7001 #else 7002 // No swap support? Nothing to do. 7003 return B_OK; 7004 #endif 7005 } 7006 7007 7008 status_t 7009 _user_mlock(const void* _address, size_t size) 7010 { 7011 return user_set_memory_swappable(_address, size, false); 7012 } 7013 7014 7015 status_t 7016 _user_munlock(const void* _address, size_t size) 7017 { 7018 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7019 // if multiple clones of an area had mlock() called on them, 7020 // munlock() must also be called on all of them to actually unlock. 7021 // (At present, the first munlock() will unlock all.) 7022 // TODO: fork() should automatically unlock memory in the child. 7023 return user_set_memory_swappable(_address, size, true); 7024 } 7025 7026 7027 // #pragma mark -- compatibility 7028 7029 7030 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7031 7032 7033 struct physical_entry_beos { 7034 uint32 address; 7035 uint32 size; 7036 }; 7037 7038 7039 /*! The physical_entry structure has changed. We need to translate it to the 7040 old one. 7041 */ 7042 extern "C" int32 7043 __get_memory_map_beos(const void* _address, size_t numBytes, 7044 physical_entry_beos* table, int32 numEntries) 7045 { 7046 if (numEntries <= 0) 7047 return B_BAD_VALUE; 7048 7049 const uint8* address = (const uint8*)_address; 7050 7051 int32 count = 0; 7052 while (numBytes > 0 && count < numEntries) { 7053 physical_entry entry; 7054 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7055 if (result < 0) { 7056 if (result != B_BUFFER_OVERFLOW) 7057 return result; 7058 } 7059 7060 if (entry.address >= (phys_addr_t)1 << 32) { 7061 panic("get_memory_map(): Address is greater 4 GB!"); 7062 return B_ERROR; 7063 } 7064 7065 table[count].address = entry.address; 7066 table[count++].size = entry.size; 7067 7068 address += entry.size; 7069 numBytes -= entry.size; 7070 } 7071 7072 // null-terminate the table, if possible 7073 if (count < numEntries) { 7074 table[count].address = 0; 7075 table[count].size = 0; 7076 } 7077 7078 return B_OK; 7079 } 7080 7081 7082 /*! The type of the \a physicalAddress parameter has changed from void* to 7083 phys_addr_t. 7084 */ 7085 extern "C" area_id 7086 __map_physical_memory_beos(const char* name, void* physicalAddress, 7087 size_t numBytes, uint32 addressSpec, uint32 protection, 7088 void** _virtualAddress) 7089 { 7090 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7091 addressSpec, protection, _virtualAddress); 7092 } 7093 7094 7095 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7096 we meddle with the \a lock parameter to force 32 bit. 7097 */ 7098 extern "C" area_id 7099 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7100 size_t size, uint32 lock, uint32 protection) 7101 { 7102 switch (lock) { 7103 case B_NO_LOCK: 7104 break; 7105 case B_FULL_LOCK: 7106 case B_LAZY_LOCK: 7107 lock = B_32_BIT_FULL_LOCK; 7108 break; 7109 case B_CONTIGUOUS: 7110 lock = B_32_BIT_CONTIGUOUS; 7111 break; 7112 } 7113 7114 return __create_area_haiku(name, _address, addressSpec, size, lock, 7115 protection); 7116 } 7117 7118 7119 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7120 "BASE"); 7121 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7122 "map_physical_memory@", "BASE"); 7123 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7124 "BASE"); 7125 7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7127 "get_memory_map@@", "1_ALPHA3"); 7128 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7129 "map_physical_memory@@", "1_ALPHA3"); 7130 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7131 "1_ALPHA3"); 7132 7133 7134 #else 7135 7136 7137 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7138 "get_memory_map@@", "BASE"); 7139 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7140 "map_physical_memory@@", "BASE"); 7141 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7142 "BASE"); 7143 7144 7145 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7146