1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/ThreadAutoLock.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_priv.h> 53 #include <vm/VMAddressSpace.h> 54 #include <vm/VMArea.h> 55 #include <vm/VMCache.h> 56 57 #include "VMAddressSpaceLocking.h" 58 #include "VMAnonymousCache.h" 59 #include "VMAnonymousNoSwapCache.h" 60 #include "IORequest.h" 61 62 63 //#define TRACE_VM 64 //#define TRACE_FAULTS 65 #ifdef TRACE_VM 66 # define TRACE(x) dprintf x 67 #else 68 # define TRACE(x) ; 69 #endif 70 #ifdef TRACE_FAULTS 71 # define FTRACE(x) dprintf x 72 #else 73 # define FTRACE(x) ; 74 #endif 75 76 77 namespace { 78 79 class AreaCacheLocking { 80 public: 81 inline bool Lock(VMCache* lockable) 82 { 83 return false; 84 } 85 86 inline void Unlock(VMCache* lockable) 87 { 88 vm_area_put_locked_cache(lockable); 89 } 90 }; 91 92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 93 public: 94 inline AreaCacheLocker(VMCache* cache = NULL) 95 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 96 { 97 } 98 99 inline AreaCacheLocker(VMArea* area) 100 : AutoLocker<VMCache, AreaCacheLocking>() 101 { 102 SetTo(area); 103 } 104 105 inline void SetTo(VMCache* cache, bool alreadyLocked) 106 { 107 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 108 } 109 110 inline void SetTo(VMArea* area) 111 { 112 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 113 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 114 } 115 }; 116 117 118 class VMCacheChainLocker { 119 public: 120 VMCacheChainLocker() 121 : 122 fTopCache(NULL), 123 fBottomCache(NULL) 124 { 125 } 126 127 VMCacheChainLocker(VMCache* topCache) 128 : 129 fTopCache(topCache), 130 fBottomCache(topCache) 131 { 132 } 133 134 ~VMCacheChainLocker() 135 { 136 Unlock(); 137 } 138 139 void SetTo(VMCache* topCache) 140 { 141 fTopCache = topCache; 142 fBottomCache = topCache; 143 144 if (topCache != NULL) 145 topCache->SetUserData(NULL); 146 } 147 148 VMCache* LockSourceCache() 149 { 150 if (fBottomCache == NULL || fBottomCache->source == NULL) 151 return NULL; 152 153 VMCache* previousCache = fBottomCache; 154 155 fBottomCache = fBottomCache->source; 156 fBottomCache->Lock(); 157 fBottomCache->AcquireRefLocked(); 158 fBottomCache->SetUserData(previousCache); 159 160 return fBottomCache; 161 } 162 163 void LockAllSourceCaches() 164 { 165 while (LockSourceCache() != NULL) { 166 } 167 } 168 169 void Unlock(VMCache* exceptCache = NULL) 170 { 171 if (fTopCache == NULL) 172 return; 173 174 // Unlock caches in source -> consumer direction. This is important to 175 // avoid double-locking and a reversal of locking order in case a cache 176 // is eligable for merging. 177 VMCache* cache = fBottomCache; 178 while (cache != NULL) { 179 VMCache* nextCache = (VMCache*)cache->UserData(); 180 if (cache != exceptCache) 181 cache->ReleaseRefAndUnlock(cache != fTopCache); 182 183 if (cache == fTopCache) 184 break; 185 186 cache = nextCache; 187 } 188 189 fTopCache = NULL; 190 fBottomCache = NULL; 191 } 192 193 void UnlockKeepRefs(bool keepTopCacheLocked) 194 { 195 if (fTopCache == NULL) 196 return; 197 198 VMCache* nextCache = fBottomCache; 199 VMCache* cache = NULL; 200 201 while (keepTopCacheLocked 202 ? nextCache != fTopCache : cache != fTopCache) { 203 cache = nextCache; 204 nextCache = (VMCache*)cache->UserData(); 205 cache->Unlock(cache != fTopCache); 206 } 207 } 208 209 void RelockCaches(bool topCacheLocked) 210 { 211 if (fTopCache == NULL) 212 return; 213 214 VMCache* nextCache = fTopCache; 215 VMCache* cache = NULL; 216 if (topCacheLocked) { 217 cache = nextCache; 218 nextCache = cache->source; 219 } 220 221 while (cache != fBottomCache && nextCache != NULL) { 222 VMCache* consumer = cache; 223 cache = nextCache; 224 nextCache = cache->source; 225 cache->Lock(); 226 cache->SetUserData(consumer); 227 } 228 } 229 230 private: 231 VMCache* fTopCache; 232 VMCache* fBottomCache; 233 }; 234 235 } // namespace 236 237 238 // The memory reserve an allocation of the certain priority must not touch. 239 static const size_t kMemoryReserveForPriority[] = { 240 VM_MEMORY_RESERVE_USER, // user 241 VM_MEMORY_RESERVE_SYSTEM, // system 242 0 // VIP 243 }; 244 245 246 ObjectCache* gPageMappingsObjectCache; 247 248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 249 250 static off_t sAvailableMemory; 251 static off_t sNeededMemory; 252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 253 static uint32 sPageFaults; 254 255 static VMPhysicalPageMapper* sPhysicalPageMapper; 256 257 #if DEBUG_CACHE_LIST 258 259 struct cache_info { 260 VMCache* cache; 261 addr_t page_count; 262 addr_t committed; 263 }; 264 265 static const int kCacheInfoTableCount = 100 * 1024; 266 static cache_info* sCacheInfoTable; 267 268 #endif // DEBUG_CACHE_LIST 269 270 271 // function declarations 272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 273 bool addressSpaceCleanup); 274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 275 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 276 static status_t map_backing_store(VMAddressSpace* addressSpace, 277 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 278 int protection, int protectionMax, int mapping, uint32 flags, 279 const virtual_address_restrictions* addressRestrictions, bool kernel, 280 VMArea** _area, void** _virtualAddress); 281 static void fix_protection(uint32* protection); 282 283 284 // #pragma mark - 285 286 287 #if VM_PAGE_FAULT_TRACING 288 289 namespace VMPageFaultTracing { 290 291 class PageFaultStart : public AbstractTraceEntry { 292 public: 293 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 294 : 295 fAddress(address), 296 fPC(pc), 297 fWrite(write), 298 fUser(user) 299 { 300 Initialized(); 301 } 302 303 virtual void AddDump(TraceOutput& out) 304 { 305 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 306 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 307 } 308 309 private: 310 addr_t fAddress; 311 addr_t fPC; 312 bool fWrite; 313 bool fUser; 314 }; 315 316 317 // page fault errors 318 enum { 319 PAGE_FAULT_ERROR_NO_AREA = 0, 320 PAGE_FAULT_ERROR_KERNEL_ONLY, 321 PAGE_FAULT_ERROR_WRITE_PROTECTED, 322 PAGE_FAULT_ERROR_READ_PROTECTED, 323 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 324 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 325 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 326 }; 327 328 329 class PageFaultError : public AbstractTraceEntry { 330 public: 331 PageFaultError(area_id area, status_t error) 332 : 333 fArea(area), 334 fError(error) 335 { 336 Initialized(); 337 } 338 339 virtual void AddDump(TraceOutput& out) 340 { 341 switch (fError) { 342 case PAGE_FAULT_ERROR_NO_AREA: 343 out.Print("page fault error: no area"); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_ONLY: 346 out.Print("page fault error: area: %ld, kernel only", fArea); 347 break; 348 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 349 out.Print("page fault error: area: %ld, write protected", 350 fArea); 351 break; 352 case PAGE_FAULT_ERROR_READ_PROTECTED: 353 out.Print("page fault error: area: %ld, read protected", fArea); 354 break; 355 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 356 out.Print("page fault error: area: %ld, execute protected", 357 fArea); 358 break; 359 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 360 out.Print("page fault error: kernel touching bad user memory"); 361 break; 362 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 363 out.Print("page fault error: no address space"); 364 break; 365 default: 366 out.Print("page fault error: area: %ld, error: %s", fArea, 367 strerror(fError)); 368 break; 369 } 370 } 371 372 private: 373 area_id fArea; 374 status_t fError; 375 }; 376 377 378 class PageFaultDone : public AbstractTraceEntry { 379 public: 380 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 381 vm_page* page) 382 : 383 fArea(area), 384 fTopCache(topCache), 385 fCache(cache), 386 fPage(page) 387 { 388 Initialized(); 389 } 390 391 virtual void AddDump(TraceOutput& out) 392 { 393 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 394 "page: %p", fArea, fTopCache, fCache, fPage); 395 } 396 397 private: 398 area_id fArea; 399 VMCache* fTopCache; 400 VMCache* fCache; 401 vm_page* fPage; 402 }; 403 404 } // namespace VMPageFaultTracing 405 406 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 407 #else 408 # define TPF(x) ; 409 #endif // VM_PAGE_FAULT_TRACING 410 411 412 // #pragma mark - 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 increment_page_wired_count(vm_page* page) 419 { 420 if (!page->IsMapped()) 421 atomic_add(&gMappedPagesCount, 1); 422 page->IncrementWiredCount(); 423 } 424 425 426 /*! The page's cache must be locked. 427 */ 428 static inline void 429 decrement_page_wired_count(vm_page* page) 430 { 431 page->DecrementWiredCount(); 432 if (!page->IsMapped()) 433 atomic_add(&gMappedPagesCount, -1); 434 } 435 436 437 static inline addr_t 438 virtual_page_address(VMArea* area, vm_page* page) 439 { 440 return area->Base() 441 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 442 } 443 444 445 //! You need to have the address space locked when calling this function 446 static VMArea* 447 lookup_area(VMAddressSpace* addressSpace, area_id id) 448 { 449 VMAreas::ReadLock(); 450 451 VMArea* area = VMAreas::LookupLocked(id); 452 if (area != NULL && area->address_space != addressSpace) 453 area = NULL; 454 455 VMAreas::ReadUnlock(); 456 457 return area; 458 } 459 460 461 static status_t 462 allocate_area_page_protections(VMArea* area) 463 { 464 // In the page protections we store only the three user protections, 465 // so we use 4 bits per page. 466 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 467 area->page_protections = (uint8*)malloc_etc(bytes, 468 area->address_space == VMAddressSpace::Kernel() 469 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 470 if (area->page_protections == NULL) 471 return B_NO_MEMORY; 472 473 // init the page protections for all pages to that of the area 474 uint32 areaProtection = area->protection 475 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 476 memset(area->page_protections, areaProtection | (areaProtection << 4), 477 bytes); 478 return B_OK; 479 } 480 481 482 static inline void 483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 484 { 485 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 486 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 487 uint8& entry = area->page_protections[pageIndex / 2]; 488 if (pageIndex % 2 == 0) 489 entry = (entry & 0xf0) | protection; 490 else 491 entry = (entry & 0x0f) | (protection << 4); 492 } 493 494 495 static inline uint32 496 get_area_page_protection(VMArea* area, addr_t pageAddress) 497 { 498 if (area->page_protections == NULL) 499 return area->protection; 500 501 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 502 uint32 protection = area->page_protections[pageIndex / 2]; 503 if (pageIndex % 2 == 0) 504 protection &= 0x0f; 505 else 506 protection >>= 4; 507 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 // If this is a kernel area we return only the kernel flags. 515 if (area->address_space == VMAddressSpace::Kernel()) 516 return kernelProtection; 517 518 return protection | kernelProtection; 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 811 &addressRestrictions, kernel, &secondArea, NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, 925 int protectionMax, int mapping, 926 uint32 flags, const virtual_address_restrictions* addressRestrictions, 927 bool kernel, VMArea** _area, void** _virtualAddress) 928 { 929 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 930 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 931 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 932 addressSpace, cache, addressRestrictions->address, offset, size, 933 addressRestrictions->address_specification, wiring, protection, 934 protectionMax, _area, areaName)); 935 cache->AssertLocked(); 936 937 if (size == 0) { 938 #if KDEBUG 939 panic("map_backing_store(): called with size=0 for area '%s'!", 940 areaName); 941 #endif 942 return B_BAD_VALUE; 943 } 944 if (offset < 0) 945 return B_BAD_VALUE; 946 947 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 948 | HEAP_DONT_LOCK_KERNEL_SPACE; 949 int priority; 950 if (addressSpace != VMAddressSpace::Kernel()) { 951 priority = VM_PRIORITY_USER; 952 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 953 priority = VM_PRIORITY_VIP; 954 allocationFlags |= HEAP_PRIORITY_VIP; 955 } else 956 priority = VM_PRIORITY_SYSTEM; 957 958 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 959 allocationFlags); 960 if (mapping != REGION_PRIVATE_MAP) 961 area->protection_max = protectionMax & B_USER_PROTECTION; 962 if (area == NULL) 963 return B_NO_MEMORY; 964 965 status_t status; 966 967 // if this is a private map, we need to create a new cache 968 // to handle the private copies of pages as they are written to 969 VMCache* sourceCache = cache; 970 if (mapping == REGION_PRIVATE_MAP) { 971 VMCache* newCache; 972 973 // create an anonymous cache 974 status = VMCacheFactory::CreateAnonymousCache(newCache, 975 (protection & B_STACK_AREA) != 0 976 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 977 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 978 if (status != B_OK) 979 goto err1; 980 981 newCache->Lock(); 982 newCache->temporary = 1; 983 newCache->virtual_base = offset; 984 newCache->virtual_end = offset + size; 985 986 cache->AddConsumer(newCache); 987 988 cache = newCache; 989 } 990 991 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 992 status = cache->SetMinimalCommitment(size, priority); 993 if (status != B_OK) 994 goto err2; 995 } 996 997 // check to see if this address space has entered DELETE state 998 if (addressSpace->IsBeingDeleted()) { 999 // okay, someone is trying to delete this address space now, so we can't 1000 // insert the area, so back out 1001 status = B_BAD_TEAM_ID; 1002 goto err2; 1003 } 1004 1005 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1006 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1007 status = unmap_address_range(addressSpace, 1008 (addr_t)addressRestrictions->address, size, kernel); 1009 if (status != B_OK) 1010 goto err2; 1011 } 1012 1013 status = addressSpace->InsertArea(area, size, addressRestrictions, 1014 allocationFlags, _virtualAddress); 1015 if (status == B_NO_MEMORY 1016 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1017 // Due to how many locks are held, we cannot wait here for space to be 1018 // freed up, but we can at least notify the low_resource handler. 1019 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1020 } 1021 if (status != B_OK) 1022 goto err2; 1023 1024 // attach the cache to the area 1025 area->cache = cache; 1026 area->cache_offset = offset; 1027 1028 // point the cache back to the area 1029 cache->InsertAreaLocked(area); 1030 if (mapping == REGION_PRIVATE_MAP) 1031 cache->Unlock(); 1032 1033 // insert the area in the global areas map 1034 VMAreas::Insert(area); 1035 1036 // grab a ref to the address space (the area holds this) 1037 addressSpace->Get(); 1038 1039 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1040 // cache, sourceCache, areaName, area); 1041 1042 *_area = area; 1043 return B_OK; 1044 1045 err2: 1046 if (mapping == REGION_PRIVATE_MAP) { 1047 // We created this cache, so we must delete it again. Note, that we 1048 // need to temporarily unlock the source cache or we'll otherwise 1049 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1050 sourceCache->Unlock(); 1051 cache->ReleaseRefAndUnlock(); 1052 sourceCache->Lock(); 1053 } 1054 err1: 1055 addressSpace->DeleteArea(area, allocationFlags); 1056 return status; 1057 } 1058 1059 1060 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1061 locker1, locker2). 1062 */ 1063 template<typename LockerType1, typename LockerType2> 1064 static inline bool 1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1066 { 1067 area->cache->AssertLocked(); 1068 1069 VMAreaUnwiredWaiter waiter; 1070 if (!area->AddWaiterIfWired(&waiter)) 1071 return false; 1072 1073 // unlock everything and wait 1074 if (locker1 != NULL) 1075 locker1->Unlock(); 1076 if (locker2 != NULL) 1077 locker2->Unlock(); 1078 1079 waiter.waitEntry.Wait(); 1080 1081 return true; 1082 } 1083 1084 1085 /*! Checks whether the given area has any wired ranges intersecting with the 1086 specified range and waits, if so. 1087 1088 When it has to wait, the function calls \c Unlock() on both \a locker1 1089 and \a locker2, if given. 1090 The area's top cache must be locked and must be unlocked as a side effect 1091 of calling \c Unlock() on either \a locker1 or \a locker2. 1092 1093 If the function does not have to wait it does not modify or unlock any 1094 object. 1095 1096 \param area The area to be checked. 1097 \param base The base address of the range to check. 1098 \param size The size of the address range to check. 1099 \param locker1 An object to be unlocked when before starting to wait (may 1100 be \c NULL). 1101 \param locker2 An object to be unlocked when before starting to wait (may 1102 be \c NULL). 1103 \return \c true, if the function had to wait, \c false otherwise. 1104 */ 1105 template<typename LockerType1, typename LockerType2> 1106 static inline bool 1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1108 LockerType1* locker1, LockerType2* locker2) 1109 { 1110 area->cache->AssertLocked(); 1111 1112 VMAreaUnwiredWaiter waiter; 1113 if (!area->AddWaiterIfWired(&waiter, base, size)) 1114 return false; 1115 1116 // unlock everything and wait 1117 if (locker1 != NULL) 1118 locker1->Unlock(); 1119 if (locker2 != NULL) 1120 locker2->Unlock(); 1121 1122 waiter.waitEntry.Wait(); 1123 1124 return true; 1125 } 1126 1127 1128 /*! Checks whether the given address space has any wired ranges intersecting 1129 with the specified range and waits, if so. 1130 1131 Similar to wait_if_area_range_is_wired(), with the following differences: 1132 - All areas intersecting with the range are checked (respectively all until 1133 one is found that contains a wired range intersecting with the given 1134 range). 1135 - The given address space must at least be read-locked and must be unlocked 1136 when \c Unlock() is called on \a locker. 1137 - None of the areas' caches are allowed to be locked. 1138 */ 1139 template<typename LockerType> 1140 static inline bool 1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1142 size_t size, LockerType* locker) 1143 { 1144 for (VMAddressSpace::AreaRangeIterator it 1145 = addressSpace->GetAreaRangeIterator(base, size); 1146 VMArea* area = it.Next();) { 1147 1148 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1149 1150 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1151 return true; 1152 } 1153 1154 return false; 1155 } 1156 1157 1158 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1159 It must be called in a situation where the kernel address space may be 1160 locked. 1161 */ 1162 status_t 1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1164 { 1165 AddressSpaceReadLocker locker; 1166 VMArea* area; 1167 status_t status = locker.SetFromArea(id, area); 1168 if (status != B_OK) 1169 return status; 1170 1171 if (area->page_protections == NULL) { 1172 status = allocate_area_page_protections(area); 1173 if (status != B_OK) 1174 return status; 1175 } 1176 1177 *cookie = (void*)area; 1178 return B_OK; 1179 } 1180 1181 1182 /*! This is a debug helper function that can only be used with very specific 1183 use cases. 1184 Sets protection for the given address range to the protection specified. 1185 If \a protection is 0 then the involved pages will be marked non-present 1186 in the translation map to cause a fault on access. The pages aren't 1187 actually unmapped however so that they can be marked present again with 1188 additional calls to this function. For this to work the area must be 1189 fully locked in memory so that the pages aren't otherwise touched. 1190 This function does not lock the kernel address space and needs to be 1191 supplied with a \a cookie retrieved from a successful call to 1192 vm_prepare_kernel_area_debug_protection(). 1193 */ 1194 status_t 1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1196 uint32 protection) 1197 { 1198 // check address range 1199 addr_t address = (addr_t)_address; 1200 size = PAGE_ALIGN(size); 1201 1202 if ((address % B_PAGE_SIZE) != 0 1203 || (addr_t)address + size < (addr_t)address 1204 || !IS_KERNEL_ADDRESS(address) 1205 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1206 return B_BAD_VALUE; 1207 } 1208 1209 // Translate the kernel protection to user protection as we only store that. 1210 if ((protection & B_KERNEL_READ_AREA) != 0) 1211 protection |= B_READ_AREA; 1212 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1213 protection |= B_WRITE_AREA; 1214 1215 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1216 VMTranslationMap* map = addressSpace->TranslationMap(); 1217 VMArea* area = (VMArea*)cookie; 1218 1219 addr_t offset = address - area->Base(); 1220 if (area->Size() - offset < size) { 1221 panic("protect range not fully within supplied area"); 1222 return B_BAD_VALUE; 1223 } 1224 1225 if (area->page_protections == NULL) { 1226 panic("area has no page protections"); 1227 return B_BAD_VALUE; 1228 } 1229 1230 // Invalidate the mapping entries so any access to them will fault or 1231 // restore the mapping entries unchanged so that lookup will success again. 1232 map->Lock(); 1233 map->DebugMarkRangePresent(address, address + size, protection != 0); 1234 map->Unlock(); 1235 1236 // And set the proper page protections so that the fault case will actually 1237 // fail and not simply try to map a new page. 1238 for (addr_t pageAddress = address; pageAddress < address + size; 1239 pageAddress += B_PAGE_SIZE) { 1240 set_area_page_protection(area, pageAddress, protection); 1241 } 1242 1243 return B_OK; 1244 } 1245 1246 1247 status_t 1248 vm_block_address_range(const char* name, void* address, addr_t size) 1249 { 1250 if (!arch_vm_supports_protection(0)) 1251 return B_NOT_SUPPORTED; 1252 1253 AddressSpaceWriteLocker locker; 1254 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1255 if (status != B_OK) 1256 return status; 1257 1258 VMAddressSpace* addressSpace = locker.AddressSpace(); 1259 1260 // create an anonymous cache 1261 VMCache* cache; 1262 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1263 VM_PRIORITY_SYSTEM); 1264 if (status != B_OK) 1265 return status; 1266 1267 cache->temporary = 1; 1268 cache->virtual_end = size; 1269 cache->Lock(); 1270 1271 VMArea* area; 1272 virtual_address_restrictions addressRestrictions = {}; 1273 addressRestrictions.address = address; 1274 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1275 status = map_backing_store(addressSpace, cache, 0, name, size, 1276 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1277 true, &area, NULL); 1278 if (status != B_OK) { 1279 cache->ReleaseRefAndUnlock(); 1280 return status; 1281 } 1282 1283 cache->Unlock(); 1284 area->cache_type = CACHE_TYPE_RAM; 1285 return area->id; 1286 } 1287 1288 1289 status_t 1290 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1291 { 1292 AddressSpaceWriteLocker locker(team); 1293 if (!locker.IsLocked()) 1294 return B_BAD_TEAM_ID; 1295 1296 VMAddressSpace* addressSpace = locker.AddressSpace(); 1297 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1298 addressSpace == VMAddressSpace::Kernel() 1299 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1300 } 1301 1302 1303 status_t 1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1305 addr_t size, uint32 flags) 1306 { 1307 if (size == 0) 1308 return B_BAD_VALUE; 1309 1310 AddressSpaceWriteLocker locker(team); 1311 if (!locker.IsLocked()) 1312 return B_BAD_TEAM_ID; 1313 1314 virtual_address_restrictions addressRestrictions = {}; 1315 addressRestrictions.address = *_address; 1316 addressRestrictions.address_specification = addressSpec; 1317 VMAddressSpace* addressSpace = locker.AddressSpace(); 1318 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1319 addressSpace == VMAddressSpace::Kernel() 1320 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1321 _address); 1322 } 1323 1324 1325 area_id 1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1327 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1328 const virtual_address_restrictions* virtualAddressRestrictions, 1329 const physical_address_restrictions* physicalAddressRestrictions, 1330 bool kernel, void** _address) 1331 { 1332 VMArea* area; 1333 VMCache* cache; 1334 vm_page* page = NULL; 1335 bool isStack = (protection & B_STACK_AREA) != 0; 1336 page_num_t guardPages; 1337 bool canOvercommit = false; 1338 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1339 ? VM_PAGE_ALLOC_CLEAR : 0; 1340 1341 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1342 team, name, size)); 1343 1344 size = PAGE_ALIGN(size); 1345 guardSize = PAGE_ALIGN(guardSize); 1346 guardPages = guardSize / B_PAGE_SIZE; 1347 1348 if (size == 0 || size < guardSize) 1349 return B_BAD_VALUE; 1350 if (!arch_vm_supports_protection(protection)) 1351 return B_NOT_SUPPORTED; 1352 1353 if (team == B_CURRENT_TEAM) 1354 team = VMAddressSpace::CurrentID(); 1355 if (team < 0) 1356 return B_BAD_TEAM_ID; 1357 1358 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1359 canOvercommit = true; 1360 1361 #ifdef DEBUG_KERNEL_STACKS 1362 if ((protection & B_KERNEL_STACK_AREA) != 0) 1363 isStack = true; 1364 #endif 1365 1366 // check parameters 1367 switch (virtualAddressRestrictions->address_specification) { 1368 case B_ANY_ADDRESS: 1369 case B_EXACT_ADDRESS: 1370 case B_BASE_ADDRESS: 1371 case B_ANY_KERNEL_ADDRESS: 1372 case B_ANY_KERNEL_BLOCK_ADDRESS: 1373 case B_RANDOMIZED_ANY_ADDRESS: 1374 case B_RANDOMIZED_BASE_ADDRESS: 1375 break; 1376 1377 default: 1378 return B_BAD_VALUE; 1379 } 1380 1381 // If low or high physical address restrictions are given, we force 1382 // B_CONTIGUOUS wiring, since only then we'll use 1383 // vm_page_allocate_page_run() which deals with those restrictions. 1384 if (physicalAddressRestrictions->low_address != 0 1385 || physicalAddressRestrictions->high_address != 0) { 1386 wiring = B_CONTIGUOUS; 1387 } 1388 1389 physical_address_restrictions stackPhysicalRestrictions; 1390 bool doReserveMemory = false; 1391 switch (wiring) { 1392 case B_NO_LOCK: 1393 break; 1394 case B_FULL_LOCK: 1395 case B_LAZY_LOCK: 1396 case B_CONTIGUOUS: 1397 doReserveMemory = true; 1398 break; 1399 case B_ALREADY_WIRED: 1400 break; 1401 case B_LOMEM: 1402 stackPhysicalRestrictions = *physicalAddressRestrictions; 1403 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1404 physicalAddressRestrictions = &stackPhysicalRestrictions; 1405 wiring = B_CONTIGUOUS; 1406 doReserveMemory = true; 1407 break; 1408 case B_32_BIT_FULL_LOCK: 1409 if (B_HAIKU_PHYSICAL_BITS <= 32 1410 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1411 wiring = B_FULL_LOCK; 1412 doReserveMemory = true; 1413 break; 1414 } 1415 // TODO: We don't really support this mode efficiently. Just fall 1416 // through for now ... 1417 case B_32_BIT_CONTIGUOUS: 1418 #if B_HAIKU_PHYSICAL_BITS > 32 1419 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1420 stackPhysicalRestrictions = *physicalAddressRestrictions; 1421 stackPhysicalRestrictions.high_address 1422 = (phys_addr_t)1 << 32; 1423 physicalAddressRestrictions = &stackPhysicalRestrictions; 1424 } 1425 #endif 1426 wiring = B_CONTIGUOUS; 1427 doReserveMemory = true; 1428 break; 1429 default: 1430 return B_BAD_VALUE; 1431 } 1432 1433 // Optimization: For a single-page contiguous allocation without low/high 1434 // memory restriction B_FULL_LOCK wiring suffices. 1435 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1436 && physicalAddressRestrictions->low_address == 0 1437 && physicalAddressRestrictions->high_address == 0) { 1438 wiring = B_FULL_LOCK; 1439 } 1440 1441 // For full lock or contiguous areas we're also going to map the pages and 1442 // thus need to reserve pages for the mapping backend upfront. 1443 addr_t reservedMapPages = 0; 1444 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1445 AddressSpaceWriteLocker locker; 1446 status_t status = locker.SetTo(team); 1447 if (status != B_OK) 1448 return status; 1449 1450 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1451 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1452 } 1453 1454 int priority; 1455 if (team != VMAddressSpace::KernelID()) 1456 priority = VM_PRIORITY_USER; 1457 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1458 priority = VM_PRIORITY_VIP; 1459 else 1460 priority = VM_PRIORITY_SYSTEM; 1461 1462 // Reserve memory before acquiring the address space lock. This reduces the 1463 // chances of failure, since while holding the write lock to the address 1464 // space (if it is the kernel address space that is), the low memory handler 1465 // won't be able to free anything for us. 1466 addr_t reservedMemory = 0; 1467 if (doReserveMemory) { 1468 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1469 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1470 return B_NO_MEMORY; 1471 reservedMemory = size; 1472 // TODO: We don't reserve the memory for the pages for the page 1473 // directories/tables. We actually need to do since we currently don't 1474 // reclaim them (and probably can't reclaim all of them anyway). Thus 1475 // there are actually less physical pages than there should be, which 1476 // can get the VM into trouble in low memory situations. 1477 } 1478 1479 AddressSpaceWriteLocker locker; 1480 VMAddressSpace* addressSpace; 1481 status_t status; 1482 1483 // For full lock areas reserve the pages before locking the address 1484 // space. E.g. block caches can't release their memory while we hold the 1485 // address space lock. 1486 page_num_t reservedPages = reservedMapPages; 1487 if (wiring == B_FULL_LOCK) 1488 reservedPages += size / B_PAGE_SIZE; 1489 1490 vm_page_reservation reservation; 1491 if (reservedPages > 0) { 1492 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1493 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1494 priority)) { 1495 reservedPages = 0; 1496 status = B_WOULD_BLOCK; 1497 goto err0; 1498 } 1499 } else 1500 vm_page_reserve_pages(&reservation, reservedPages, priority); 1501 } 1502 1503 if (wiring == B_CONTIGUOUS) { 1504 // we try to allocate the page run here upfront as this may easily 1505 // fail for obvious reasons 1506 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1507 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1508 if (page == NULL) { 1509 status = B_NO_MEMORY; 1510 goto err0; 1511 } 1512 } 1513 1514 // Lock the address space and, if B_EXACT_ADDRESS and 1515 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1516 // is not wired. 1517 do { 1518 status = locker.SetTo(team); 1519 if (status != B_OK) 1520 goto err1; 1521 1522 addressSpace = locker.AddressSpace(); 1523 } while (virtualAddressRestrictions->address_specification 1524 == B_EXACT_ADDRESS 1525 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1526 && wait_if_address_range_is_wired(addressSpace, 1527 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1528 1529 // create an anonymous cache 1530 // if it's a stack, make sure that two pages are available at least 1531 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1532 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1533 wiring == B_NO_LOCK, priority); 1534 if (status != B_OK) 1535 goto err1; 1536 1537 cache->temporary = 1; 1538 cache->virtual_end = size; 1539 cache->committed_size = reservedMemory; 1540 // TODO: This should be done via a method. 1541 reservedMemory = 0; 1542 1543 cache->Lock(); 1544 1545 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1546 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1547 virtualAddressRestrictions, kernel, &area, _address); 1548 1549 if (status != B_OK) { 1550 cache->ReleaseRefAndUnlock(); 1551 goto err1; 1552 } 1553 1554 locker.DegradeToReadLock(); 1555 1556 switch (wiring) { 1557 case B_NO_LOCK: 1558 case B_LAZY_LOCK: 1559 // do nothing - the pages are mapped in as needed 1560 break; 1561 1562 case B_FULL_LOCK: 1563 { 1564 // Allocate and map all pages for this area 1565 1566 off_t offset = 0; 1567 for (addr_t address = area->Base(); 1568 address < area->Base() + (area->Size() - 1); 1569 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1570 #ifdef DEBUG_KERNEL_STACKS 1571 # ifdef STACK_GROWS_DOWNWARDS 1572 if (isStack && address < area->Base() 1573 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1574 # else 1575 if (isStack && address >= area->Base() + area->Size() 1576 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1577 # endif 1578 continue; 1579 #endif 1580 vm_page* page = vm_page_allocate_page(&reservation, 1581 PAGE_STATE_WIRED | pageAllocFlags); 1582 cache->InsertPage(page, offset); 1583 map_page(area, page, address, protection, &reservation); 1584 1585 DEBUG_PAGE_ACCESS_END(page); 1586 } 1587 1588 break; 1589 } 1590 1591 case B_ALREADY_WIRED: 1592 { 1593 // The pages should already be mapped. This is only really useful 1594 // during boot time. Find the appropriate vm_page objects and stick 1595 // them in the cache object. 1596 VMTranslationMap* map = addressSpace->TranslationMap(); 1597 off_t offset = 0; 1598 1599 if (!gKernelStartup) 1600 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1601 1602 map->Lock(); 1603 1604 for (addr_t virtualAddress = area->Base(); 1605 virtualAddress < area->Base() + (area->Size() - 1); 1606 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1607 phys_addr_t physicalAddress; 1608 uint32 flags; 1609 status = map->Query(virtualAddress, &physicalAddress, &flags); 1610 if (status < B_OK) { 1611 panic("looking up mapping failed for va 0x%lx\n", 1612 virtualAddress); 1613 } 1614 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1615 if (page == NULL) { 1616 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1617 "\n", physicalAddress); 1618 } 1619 1620 DEBUG_PAGE_ACCESS_START(page); 1621 1622 cache->InsertPage(page, offset); 1623 increment_page_wired_count(page); 1624 vm_page_set_state(page, PAGE_STATE_WIRED); 1625 page->busy = false; 1626 1627 DEBUG_PAGE_ACCESS_END(page); 1628 } 1629 1630 map->Unlock(); 1631 break; 1632 } 1633 1634 case B_CONTIGUOUS: 1635 { 1636 // We have already allocated our continuous pages run, so we can now 1637 // just map them in the address space 1638 VMTranslationMap* map = addressSpace->TranslationMap(); 1639 phys_addr_t physicalAddress 1640 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1641 addr_t virtualAddress = area->Base(); 1642 off_t offset = 0; 1643 1644 map->Lock(); 1645 1646 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1647 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1648 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1649 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1650 if (page == NULL) 1651 panic("couldn't lookup physical page just allocated\n"); 1652 1653 status = map->Map(virtualAddress, physicalAddress, protection, 1654 area->MemoryType(), &reservation); 1655 if (status < B_OK) 1656 panic("couldn't map physical page in page run\n"); 1657 1658 cache->InsertPage(page, offset); 1659 increment_page_wired_count(page); 1660 1661 DEBUG_PAGE_ACCESS_END(page); 1662 } 1663 1664 map->Unlock(); 1665 break; 1666 } 1667 1668 default: 1669 break; 1670 } 1671 1672 cache->Unlock(); 1673 1674 if (reservedPages > 0) 1675 vm_page_unreserve_pages(&reservation); 1676 1677 TRACE(("vm_create_anonymous_area: done\n")); 1678 1679 area->cache_type = CACHE_TYPE_RAM; 1680 return area->id; 1681 1682 err1: 1683 if (wiring == B_CONTIGUOUS) { 1684 // we had reserved the area space upfront... 1685 phys_addr_t pageNumber = page->physical_page_number; 1686 int32 i; 1687 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1688 page = vm_lookup_page(pageNumber); 1689 if (page == NULL) 1690 panic("couldn't lookup physical page just allocated\n"); 1691 1692 vm_page_set_state(page, PAGE_STATE_FREE); 1693 } 1694 } 1695 1696 err0: 1697 if (reservedPages > 0) 1698 vm_page_unreserve_pages(&reservation); 1699 if (reservedMemory > 0) 1700 vm_unreserve_memory(reservedMemory); 1701 1702 return status; 1703 } 1704 1705 1706 area_id 1707 vm_map_physical_memory(team_id team, const char* name, void** _address, 1708 uint32 addressSpec, addr_t size, uint32 protection, 1709 phys_addr_t physicalAddress, bool alreadyWired) 1710 { 1711 VMArea* area; 1712 VMCache* cache; 1713 addr_t mapOffset; 1714 1715 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1716 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1717 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1718 addressSpec, size, protection, physicalAddress)); 1719 1720 if (!arch_vm_supports_protection(protection)) 1721 return B_NOT_SUPPORTED; 1722 1723 AddressSpaceWriteLocker locker(team); 1724 if (!locker.IsLocked()) 1725 return B_BAD_TEAM_ID; 1726 1727 // if the physical address is somewhat inside a page, 1728 // move the actual area down to align on a page boundary 1729 mapOffset = physicalAddress % B_PAGE_SIZE; 1730 size += mapOffset; 1731 physicalAddress -= mapOffset; 1732 1733 size = PAGE_ALIGN(size); 1734 1735 // create a device cache 1736 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1737 if (status != B_OK) 1738 return status; 1739 1740 cache->virtual_end = size; 1741 1742 cache->Lock(); 1743 1744 virtual_address_restrictions addressRestrictions = {}; 1745 addressRestrictions.address = *_address; 1746 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1747 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1748 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1749 true, &area, _address); 1750 1751 if (status < B_OK) 1752 cache->ReleaseRefLocked(); 1753 1754 cache->Unlock(); 1755 1756 if (status == B_OK) { 1757 // set requested memory type -- use uncached, if not given 1758 uint32 memoryType = addressSpec & B_MTR_MASK; 1759 if (memoryType == 0) 1760 memoryType = B_MTR_UC; 1761 1762 area->SetMemoryType(memoryType); 1763 1764 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1765 if (status != B_OK) 1766 delete_area(locker.AddressSpace(), area, false); 1767 } 1768 1769 if (status != B_OK) 1770 return status; 1771 1772 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1773 1774 if (alreadyWired) { 1775 // The area is already mapped, but possibly not with the right 1776 // memory type. 1777 map->Lock(); 1778 map->ProtectArea(area, area->protection); 1779 map->Unlock(); 1780 } else { 1781 // Map the area completely. 1782 1783 // reserve pages needed for the mapping 1784 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1785 area->Base() + (size - 1)); 1786 vm_page_reservation reservation; 1787 vm_page_reserve_pages(&reservation, reservePages, 1788 team == VMAddressSpace::KernelID() 1789 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1790 1791 map->Lock(); 1792 1793 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1794 map->Map(area->Base() + offset, physicalAddress + offset, 1795 protection, area->MemoryType(), &reservation); 1796 } 1797 1798 map->Unlock(); 1799 1800 vm_page_unreserve_pages(&reservation); 1801 } 1802 1803 // modify the pointer returned to be offset back into the new area 1804 // the same way the physical address in was offset 1805 *_address = (void*)((addr_t)*_address + mapOffset); 1806 1807 area->cache_type = CACHE_TYPE_DEVICE; 1808 return area->id; 1809 } 1810 1811 1812 /*! Don't use! 1813 TODO: This function was introduced to map physical page vecs to 1814 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1815 use a device cache and does not track vm_page::wired_count! 1816 */ 1817 area_id 1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1819 uint32 addressSpec, addr_t* _size, uint32 protection, 1820 struct generic_io_vec* vecs, uint32 vecCount) 1821 { 1822 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1823 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1824 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1825 addressSpec, _size, protection, vecs, vecCount)); 1826 1827 if (!arch_vm_supports_protection(protection) 1828 || (addressSpec & B_MTR_MASK) != 0) { 1829 return B_NOT_SUPPORTED; 1830 } 1831 1832 AddressSpaceWriteLocker locker(team); 1833 if (!locker.IsLocked()) 1834 return B_BAD_TEAM_ID; 1835 1836 if (vecCount == 0) 1837 return B_BAD_VALUE; 1838 1839 addr_t size = 0; 1840 for (uint32 i = 0; i < vecCount; i++) { 1841 if (vecs[i].base % B_PAGE_SIZE != 0 1842 || vecs[i].length % B_PAGE_SIZE != 0) { 1843 return B_BAD_VALUE; 1844 } 1845 1846 size += vecs[i].length; 1847 } 1848 1849 // create a device cache 1850 VMCache* cache; 1851 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1852 if (result != B_OK) 1853 return result; 1854 1855 cache->virtual_end = size; 1856 1857 cache->Lock(); 1858 1859 VMArea* area; 1860 virtual_address_restrictions addressRestrictions = {}; 1861 addressRestrictions.address = *_address; 1862 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1863 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1864 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1865 &addressRestrictions, true, &area, _address); 1866 1867 if (result != B_OK) 1868 cache->ReleaseRefLocked(); 1869 1870 cache->Unlock(); 1871 1872 if (result != B_OK) 1873 return result; 1874 1875 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1876 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1877 area->Base() + (size - 1)); 1878 1879 vm_page_reservation reservation; 1880 vm_page_reserve_pages(&reservation, reservePages, 1881 team == VMAddressSpace::KernelID() 1882 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1883 map->Lock(); 1884 1885 uint32 vecIndex = 0; 1886 size_t vecOffset = 0; 1887 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1888 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1889 vecOffset = 0; 1890 vecIndex++; 1891 } 1892 1893 if (vecIndex >= vecCount) 1894 break; 1895 1896 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1897 protection, area->MemoryType(), &reservation); 1898 1899 vecOffset += B_PAGE_SIZE; 1900 } 1901 1902 map->Unlock(); 1903 vm_page_unreserve_pages(&reservation); 1904 1905 if (_size != NULL) 1906 *_size = size; 1907 1908 area->cache_type = CACHE_TYPE_DEVICE; 1909 return area->id; 1910 } 1911 1912 1913 area_id 1914 vm_create_null_area(team_id team, const char* name, void** address, 1915 uint32 addressSpec, addr_t size, uint32 flags) 1916 { 1917 size = PAGE_ALIGN(size); 1918 1919 // Lock the address space and, if B_EXACT_ADDRESS and 1920 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1921 // is not wired. 1922 AddressSpaceWriteLocker locker; 1923 do { 1924 if (locker.SetTo(team) != B_OK) 1925 return B_BAD_TEAM_ID; 1926 } while (addressSpec == B_EXACT_ADDRESS 1927 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1928 && wait_if_address_range_is_wired(locker.AddressSpace(), 1929 (addr_t)*address, size, &locker)); 1930 1931 // create a null cache 1932 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1933 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1934 VMCache* cache; 1935 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1936 if (status != B_OK) 1937 return status; 1938 1939 cache->temporary = 1; 1940 cache->virtual_end = size; 1941 1942 cache->Lock(); 1943 1944 VMArea* area; 1945 virtual_address_restrictions addressRestrictions = {}; 1946 addressRestrictions.address = *address; 1947 addressRestrictions.address_specification = addressSpec; 1948 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1949 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1950 REGION_NO_PRIVATE_MAP, flags, 1951 &addressRestrictions, true, &area, address); 1952 1953 if (status < B_OK) { 1954 cache->ReleaseRefAndUnlock(); 1955 return status; 1956 } 1957 1958 cache->Unlock(); 1959 1960 area->cache_type = CACHE_TYPE_NULL; 1961 return area->id; 1962 } 1963 1964 1965 /*! Creates the vnode cache for the specified \a vnode. 1966 The vnode has to be marked busy when calling this function. 1967 */ 1968 status_t 1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1970 { 1971 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1972 } 1973 1974 1975 /*! \a cache must be locked. The area's address space must be read-locked. 1976 */ 1977 static void 1978 pre_map_area_pages(VMArea* area, VMCache* cache, 1979 vm_page_reservation* reservation) 1980 { 1981 addr_t baseAddress = area->Base(); 1982 addr_t cacheOffset = area->cache_offset; 1983 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1984 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1985 1986 for (VMCachePagesTree::Iterator it 1987 = cache->pages.GetIterator(firstPage, true, true); 1988 vm_page* page = it.Next();) { 1989 if (page->cache_offset >= endPage) 1990 break; 1991 1992 // skip busy and inactive pages 1993 if (page->busy || page->usage_count == 0) 1994 continue; 1995 1996 DEBUG_PAGE_ACCESS_START(page); 1997 map_page(area, page, 1998 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1999 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2000 DEBUG_PAGE_ACCESS_END(page); 2001 } 2002 } 2003 2004 2005 /*! Will map the file specified by \a fd to an area in memory. 2006 The file will be mirrored beginning at the specified \a offset. The 2007 \a offset and \a size arguments have to be page aligned. 2008 */ 2009 static area_id 2010 _vm_map_file(team_id team, const char* name, void** _address, 2011 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2012 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2013 { 2014 // TODO: for binary files, we want to make sure that they get the 2015 // copy of a file at a given time, ie. later changes should not 2016 // make it into the mapped copy -- this will need quite some changes 2017 // to be done in a nice way 2018 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2019 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2020 2021 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2022 size = PAGE_ALIGN(size); 2023 2024 if (mapping == REGION_NO_PRIVATE_MAP) 2025 protection |= B_SHARED_AREA; 2026 if (addressSpec != B_EXACT_ADDRESS) 2027 unmapAddressRange = false; 2028 2029 if (fd < 0) { 2030 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2031 virtual_address_restrictions virtualRestrictions = {}; 2032 virtualRestrictions.address = *_address; 2033 virtualRestrictions.address_specification = addressSpec; 2034 physical_address_restrictions physicalRestrictions = {}; 2035 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2036 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2037 _address); 2038 } 2039 2040 // get the open flags of the FD 2041 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2042 if (descriptor == NULL) 2043 return EBADF; 2044 int32 openMode = descriptor->open_mode; 2045 put_fd(descriptor); 2046 2047 // The FD must open for reading at any rate. For shared mapping with write 2048 // access, additionally the FD must be open for writing. 2049 if ((openMode & O_ACCMODE) == O_WRONLY 2050 || (mapping == REGION_NO_PRIVATE_MAP 2051 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2052 && (openMode & O_ACCMODE) == O_RDONLY)) { 2053 return EACCES; 2054 } 2055 2056 uint32 protectionMax = 0; 2057 if (mapping != REGION_PRIVATE_MAP) { 2058 protectionMax = protection | B_READ_AREA; 2059 if ((openMode & O_ACCMODE) == O_RDWR) 2060 protectionMax |= B_WRITE_AREA; 2061 } 2062 2063 // get the vnode for the object, this also grabs a ref to it 2064 struct vnode* vnode = NULL; 2065 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2066 if (status < B_OK) 2067 return status; 2068 VnodePutter vnodePutter(vnode); 2069 2070 // If we're going to pre-map pages, we need to reserve the pages needed by 2071 // the mapping backend upfront. 2072 page_num_t reservedPreMapPages = 0; 2073 vm_page_reservation reservation; 2074 if ((protection & B_READ_AREA) != 0) { 2075 AddressSpaceWriteLocker locker; 2076 status = locker.SetTo(team); 2077 if (status != B_OK) 2078 return status; 2079 2080 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2081 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2082 2083 locker.Unlock(); 2084 2085 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2086 team == VMAddressSpace::KernelID() 2087 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2088 } 2089 2090 struct PageUnreserver { 2091 PageUnreserver(vm_page_reservation* reservation) 2092 : 2093 fReservation(reservation) 2094 { 2095 } 2096 2097 ~PageUnreserver() 2098 { 2099 if (fReservation != NULL) 2100 vm_page_unreserve_pages(fReservation); 2101 } 2102 2103 vm_page_reservation* fReservation; 2104 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2105 2106 // Lock the address space and, if the specified address range shall be 2107 // unmapped, ensure it is not wired. 2108 AddressSpaceWriteLocker locker; 2109 do { 2110 if (locker.SetTo(team) != B_OK) 2111 return B_BAD_TEAM_ID; 2112 } while (unmapAddressRange 2113 && wait_if_address_range_is_wired(locker.AddressSpace(), 2114 (addr_t)*_address, size, &locker)); 2115 2116 // TODO: this only works for file systems that use the file cache 2117 VMCache* cache; 2118 status = vfs_get_vnode_cache(vnode, &cache, false); 2119 if (status < B_OK) 2120 return status; 2121 2122 cache->Lock(); 2123 2124 VMArea* area; 2125 virtual_address_restrictions addressRestrictions = {}; 2126 addressRestrictions.address = *_address; 2127 addressRestrictions.address_specification = addressSpec; 2128 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2129 0, protection, protectionMax, mapping, 2130 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2131 &addressRestrictions, kernel, &area, _address); 2132 2133 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2134 // map_backing_store() cannot know we no longer need the ref 2135 cache->ReleaseRefLocked(); 2136 } 2137 2138 if (status == B_OK && (protection & B_READ_AREA) != 0) 2139 pre_map_area_pages(area, cache, &reservation); 2140 2141 cache->Unlock(); 2142 2143 if (status == B_OK) { 2144 // TODO: this probably deserves a smarter solution, ie. don't always 2145 // prefetch stuff, and also, probably don't trigger it at this place. 2146 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2147 // prefetches at max 10 MB starting from "offset" 2148 } 2149 2150 if (status != B_OK) 2151 return status; 2152 2153 area->cache_type = CACHE_TYPE_VNODE; 2154 return area->id; 2155 } 2156 2157 2158 area_id 2159 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2160 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2161 int fd, off_t offset) 2162 { 2163 if (!arch_vm_supports_protection(protection)) 2164 return B_NOT_SUPPORTED; 2165 2166 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2167 mapping, unmapAddressRange, fd, offset, true); 2168 } 2169 2170 2171 VMCache* 2172 vm_area_get_locked_cache(VMArea* area) 2173 { 2174 rw_lock_read_lock(&sAreaCacheLock); 2175 2176 while (true) { 2177 VMCache* cache = area->cache; 2178 2179 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2180 // cache has been deleted 2181 rw_lock_read_lock(&sAreaCacheLock); 2182 continue; 2183 } 2184 2185 rw_lock_read_lock(&sAreaCacheLock); 2186 2187 if (cache == area->cache) { 2188 cache->AcquireRefLocked(); 2189 rw_lock_read_unlock(&sAreaCacheLock); 2190 return cache; 2191 } 2192 2193 // the cache changed in the meantime 2194 cache->Unlock(); 2195 } 2196 } 2197 2198 2199 void 2200 vm_area_put_locked_cache(VMCache* cache) 2201 { 2202 cache->ReleaseRefAndUnlock(); 2203 } 2204 2205 2206 area_id 2207 vm_clone_area(team_id team, const char* name, void** address, 2208 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2209 bool kernel) 2210 { 2211 VMArea* newArea = NULL; 2212 VMArea* sourceArea; 2213 2214 // Check whether the source area exists and is cloneable. If so, mark it 2215 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2216 { 2217 AddressSpaceWriteLocker locker; 2218 status_t status = locker.SetFromArea(sourceID, sourceArea); 2219 if (status != B_OK) 2220 return status; 2221 2222 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2223 return B_NOT_ALLOWED; 2224 2225 sourceArea->protection |= B_SHARED_AREA; 2226 protection |= B_SHARED_AREA; 2227 } 2228 2229 // Now lock both address spaces and actually do the cloning. 2230 2231 MultiAddressSpaceLocker locker; 2232 VMAddressSpace* sourceAddressSpace; 2233 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2234 if (status != B_OK) 2235 return status; 2236 2237 VMAddressSpace* targetAddressSpace; 2238 status = locker.AddTeam(team, true, &targetAddressSpace); 2239 if (status != B_OK) 2240 return status; 2241 2242 status = locker.Lock(); 2243 if (status != B_OK) 2244 return status; 2245 2246 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2247 if (sourceArea == NULL) 2248 return B_BAD_VALUE; 2249 2250 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2251 return B_NOT_ALLOWED; 2252 2253 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2254 2255 if (!kernel && sourceAddressSpace != targetAddressSpace 2256 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2257 #if KDEBUG 2258 Team* team = thread_get_current_thread()->team; 2259 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2260 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2261 #endif 2262 status = B_NOT_ALLOWED; 2263 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2264 status = B_NOT_ALLOWED; 2265 } else { 2266 virtual_address_restrictions addressRestrictions = {}; 2267 addressRestrictions.address = *address; 2268 addressRestrictions.address_specification = addressSpec; 2269 status = map_backing_store(targetAddressSpace, cache, 2270 sourceArea->cache_offset, name, sourceArea->Size(), 2271 sourceArea->wiring, protection, sourceArea->protection_max, 2272 mapping, 0, &addressRestrictions, 2273 kernel, &newArea, address); 2274 } 2275 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2276 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2277 // to create a new cache, and has therefore already acquired a reference 2278 // to the source cache - but otherwise it has no idea that we need 2279 // one. 2280 cache->AcquireRefLocked(); 2281 } 2282 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2283 // we need to map in everything at this point 2284 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2285 // we don't have actual pages to map but a physical area 2286 VMTranslationMap* map 2287 = sourceArea->address_space->TranslationMap(); 2288 map->Lock(); 2289 2290 phys_addr_t physicalAddress; 2291 uint32 oldProtection; 2292 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2293 2294 map->Unlock(); 2295 2296 map = targetAddressSpace->TranslationMap(); 2297 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2298 newArea->Base() + (newArea->Size() - 1)); 2299 2300 vm_page_reservation reservation; 2301 vm_page_reserve_pages(&reservation, reservePages, 2302 targetAddressSpace == VMAddressSpace::Kernel() 2303 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2304 map->Lock(); 2305 2306 for (addr_t offset = 0; offset < newArea->Size(); 2307 offset += B_PAGE_SIZE) { 2308 map->Map(newArea->Base() + offset, physicalAddress + offset, 2309 protection, newArea->MemoryType(), &reservation); 2310 } 2311 2312 map->Unlock(); 2313 vm_page_unreserve_pages(&reservation); 2314 } else { 2315 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2316 size_t reservePages = map->MaxPagesNeededToMap( 2317 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2318 vm_page_reservation reservation; 2319 vm_page_reserve_pages(&reservation, reservePages, 2320 targetAddressSpace == VMAddressSpace::Kernel() 2321 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2322 2323 // map in all pages from source 2324 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2325 vm_page* page = it.Next();) { 2326 if (!page->busy) { 2327 DEBUG_PAGE_ACCESS_START(page); 2328 map_page(newArea, page, 2329 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2330 - newArea->cache_offset), 2331 protection, &reservation); 2332 DEBUG_PAGE_ACCESS_END(page); 2333 } 2334 } 2335 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2336 // ensuring that! 2337 2338 vm_page_unreserve_pages(&reservation); 2339 } 2340 } 2341 if (status == B_OK) 2342 newArea->cache_type = sourceArea->cache_type; 2343 2344 vm_area_put_locked_cache(cache); 2345 2346 if (status < B_OK) 2347 return status; 2348 2349 return newArea->id; 2350 } 2351 2352 2353 /*! Deletes the specified area of the given address space. 2354 2355 The address space must be write-locked. 2356 The caller must ensure that the area does not have any wired ranges. 2357 2358 \param addressSpace The address space containing the area. 2359 \param area The area to be deleted. 2360 \param deletingAddressSpace \c true, if the address space is in the process 2361 of being deleted. 2362 */ 2363 static void 2364 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2365 bool deletingAddressSpace) 2366 { 2367 ASSERT(!area->IsWired()); 2368 2369 VMAreas::Remove(area); 2370 2371 // At this point the area is removed from the global hash table, but 2372 // still exists in the area list. 2373 2374 // Unmap the virtual address space the area occupied. 2375 { 2376 // We need to lock the complete cache chain. 2377 VMCache* topCache = vm_area_get_locked_cache(area); 2378 VMCacheChainLocker cacheChainLocker(topCache); 2379 cacheChainLocker.LockAllSourceCaches(); 2380 2381 // If the area's top cache is a temporary cache and the area is the only 2382 // one referencing it (besides us currently holding a second reference), 2383 // the unmapping code doesn't need to care about preserving the accessed 2384 // and dirty flags of the top cache page mappings. 2385 bool ignoreTopCachePageFlags 2386 = topCache->temporary && topCache->RefCount() == 2; 2387 2388 area->address_space->TranslationMap()->UnmapArea(area, 2389 deletingAddressSpace, ignoreTopCachePageFlags); 2390 } 2391 2392 if (!area->cache->temporary) 2393 area->cache->WriteModified(); 2394 2395 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2396 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2397 2398 arch_vm_unset_memory_type(area); 2399 addressSpace->RemoveArea(area, allocationFlags); 2400 addressSpace->Put(); 2401 2402 area->cache->RemoveArea(area); 2403 area->cache->ReleaseRef(); 2404 2405 addressSpace->DeleteArea(area, allocationFlags); 2406 } 2407 2408 2409 status_t 2410 vm_delete_area(team_id team, area_id id, bool kernel) 2411 { 2412 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2413 team, id)); 2414 2415 // lock the address space and make sure the area isn't wired 2416 AddressSpaceWriteLocker locker; 2417 VMArea* area; 2418 AreaCacheLocker cacheLocker; 2419 2420 do { 2421 status_t status = locker.SetFromArea(team, id, area); 2422 if (status != B_OK) 2423 return status; 2424 2425 cacheLocker.SetTo(area); 2426 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2427 2428 cacheLocker.Unlock(); 2429 2430 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2431 return B_NOT_ALLOWED; 2432 2433 delete_area(locker.AddressSpace(), area, false); 2434 return B_OK; 2435 } 2436 2437 2438 /*! Creates a new cache on top of given cache, moves all areas from 2439 the old cache to the new one, and changes the protection of all affected 2440 areas' pages to read-only. If requested, wired pages are moved up to the 2441 new cache and copies are added to the old cache in their place. 2442 Preconditions: 2443 - The given cache must be locked. 2444 - All of the cache's areas' address spaces must be read locked. 2445 - Either the cache must not have any wired ranges or a page reservation for 2446 all wired pages must be provided, so they can be copied. 2447 2448 \param lowerCache The cache on top of which a new cache shall be created. 2449 \param wiredPagesReservation If \c NULL there must not be any wired pages 2450 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2451 has wired page. The wired pages are copied in this case. 2452 */ 2453 static status_t 2454 vm_copy_on_write_area(VMCache* lowerCache, 2455 vm_page_reservation* wiredPagesReservation) 2456 { 2457 VMCache* upperCache; 2458 2459 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2460 2461 // We need to separate the cache from its areas. The cache goes one level 2462 // deeper and we create a new cache inbetween. 2463 2464 // create an anonymous cache 2465 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2466 lowerCache->GuardSize() / B_PAGE_SIZE, 2467 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2468 VM_PRIORITY_USER); 2469 if (status != B_OK) 2470 return status; 2471 2472 upperCache->Lock(); 2473 2474 upperCache->temporary = 1; 2475 upperCache->virtual_base = lowerCache->virtual_base; 2476 upperCache->virtual_end = lowerCache->virtual_end; 2477 2478 // transfer the lower cache areas to the upper cache 2479 rw_lock_write_lock(&sAreaCacheLock); 2480 upperCache->TransferAreas(lowerCache); 2481 rw_lock_write_unlock(&sAreaCacheLock); 2482 2483 lowerCache->AddConsumer(upperCache); 2484 2485 // We now need to remap all pages from all of the cache's areas read-only, 2486 // so that a copy will be created on next write access. If there are wired 2487 // pages, we keep their protection, move them to the upper cache and create 2488 // copies for the lower cache. 2489 if (wiredPagesReservation != NULL) { 2490 // We need to handle wired pages -- iterate through the cache's pages. 2491 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2492 vm_page* page = it.Next();) { 2493 if (page->WiredCount() > 0) { 2494 // allocate a new page and copy the wired one 2495 vm_page* copiedPage = vm_page_allocate_page( 2496 wiredPagesReservation, PAGE_STATE_ACTIVE); 2497 2498 vm_memcpy_physical_page( 2499 copiedPage->physical_page_number * B_PAGE_SIZE, 2500 page->physical_page_number * B_PAGE_SIZE); 2501 2502 // move the wired page to the upper cache (note: removing is OK 2503 // with the SplayTree iterator) and insert the copy 2504 upperCache->MovePage(page); 2505 lowerCache->InsertPage(copiedPage, 2506 page->cache_offset * B_PAGE_SIZE); 2507 2508 DEBUG_PAGE_ACCESS_END(copiedPage); 2509 } else { 2510 // Change the protection of this page in all areas. 2511 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2512 tempArea = tempArea->cache_next) { 2513 // The area must be readable in the same way it was 2514 // previously writable. 2515 addr_t address = virtual_page_address(tempArea, page); 2516 uint32 protection = 0; 2517 uint32 pageProtection = get_area_page_protection(tempArea, address); 2518 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2519 protection |= B_KERNEL_READ_AREA; 2520 if ((pageProtection & B_READ_AREA) != 0) 2521 protection |= B_READ_AREA; 2522 2523 VMTranslationMap* map 2524 = tempArea->address_space->TranslationMap(); 2525 map->Lock(); 2526 map->ProtectPage(tempArea, address, protection); 2527 map->Unlock(); 2528 } 2529 } 2530 } 2531 } else { 2532 ASSERT(lowerCache->WiredPagesCount() == 0); 2533 2534 // just change the protection of all areas 2535 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2536 tempArea = tempArea->cache_next) { 2537 if (tempArea->page_protections != NULL) { 2538 // Change the protection of all pages in this area. 2539 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2540 map->Lock(); 2541 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2542 vm_page* page = it.Next();) { 2543 // The area must be readable in the same way it was 2544 // previously writable. 2545 addr_t address = virtual_page_address(tempArea, page); 2546 uint32 protection = 0; 2547 uint32 pageProtection = get_area_page_protection(tempArea, address); 2548 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2549 protection |= B_KERNEL_READ_AREA; 2550 if ((pageProtection & B_READ_AREA) != 0) 2551 protection |= B_READ_AREA; 2552 2553 map->ProtectPage(tempArea, address, protection); 2554 } 2555 map->Unlock(); 2556 continue; 2557 } 2558 // The area must be readable in the same way it was previously 2559 // writable. 2560 uint32 protection = 0; 2561 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2562 protection |= B_KERNEL_READ_AREA; 2563 if ((tempArea->protection & B_READ_AREA) != 0) 2564 protection |= B_READ_AREA; 2565 2566 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2567 map->Lock(); 2568 map->ProtectArea(tempArea, protection); 2569 map->Unlock(); 2570 } 2571 } 2572 2573 vm_area_put_locked_cache(upperCache); 2574 2575 return B_OK; 2576 } 2577 2578 2579 area_id 2580 vm_copy_area(team_id team, const char* name, void** _address, 2581 uint32 addressSpec, area_id sourceID) 2582 { 2583 // Do the locking: target address space, all address spaces associated with 2584 // the source cache, and the cache itself. 2585 MultiAddressSpaceLocker locker; 2586 VMAddressSpace* targetAddressSpace; 2587 VMCache* cache; 2588 VMArea* source; 2589 AreaCacheLocker cacheLocker; 2590 status_t status; 2591 bool sharedArea; 2592 2593 page_num_t wiredPages = 0; 2594 vm_page_reservation wiredPagesReservation; 2595 2596 bool restart; 2597 do { 2598 restart = false; 2599 2600 locker.Unset(); 2601 status = locker.AddTeam(team, true, &targetAddressSpace); 2602 if (status == B_OK) { 2603 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2604 &cache); 2605 } 2606 if (status != B_OK) 2607 return status; 2608 2609 cacheLocker.SetTo(cache, true); // already locked 2610 2611 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2612 2613 page_num_t oldWiredPages = wiredPages; 2614 wiredPages = 0; 2615 2616 // If the source area isn't shared, count the number of wired pages in 2617 // the cache and reserve as many pages. 2618 if (!sharedArea) { 2619 wiredPages = cache->WiredPagesCount(); 2620 2621 if (wiredPages > oldWiredPages) { 2622 cacheLocker.Unlock(); 2623 locker.Unlock(); 2624 2625 if (oldWiredPages > 0) 2626 vm_page_unreserve_pages(&wiredPagesReservation); 2627 2628 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2629 VM_PRIORITY_USER); 2630 2631 restart = true; 2632 } 2633 } else if (oldWiredPages > 0) 2634 vm_page_unreserve_pages(&wiredPagesReservation); 2635 } while (restart); 2636 2637 // unreserve pages later 2638 struct PagesUnreserver { 2639 PagesUnreserver(vm_page_reservation* reservation) 2640 : 2641 fReservation(reservation) 2642 { 2643 } 2644 2645 ~PagesUnreserver() 2646 { 2647 if (fReservation != NULL) 2648 vm_page_unreserve_pages(fReservation); 2649 } 2650 2651 private: 2652 vm_page_reservation* fReservation; 2653 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2654 2655 bool writableCopy 2656 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2657 uint8* targetPageProtections = NULL; 2658 2659 if (source->page_protections != NULL) { 2660 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2661 targetPageProtections = (uint8*)malloc_etc(bytes, 2662 (source->address_space == VMAddressSpace::Kernel() 2663 || targetAddressSpace == VMAddressSpace::Kernel()) 2664 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2665 if (targetPageProtections == NULL) 2666 return B_NO_MEMORY; 2667 2668 memcpy(targetPageProtections, source->page_protections, bytes); 2669 2670 if (!writableCopy) { 2671 for (size_t i = 0; i < bytes; i++) { 2672 if ((targetPageProtections[i] 2673 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2674 writableCopy = true; 2675 break; 2676 } 2677 } 2678 } 2679 } 2680 2681 if (addressSpec == B_CLONE_ADDRESS) { 2682 addressSpec = B_EXACT_ADDRESS; 2683 *_address = (void*)source->Base(); 2684 } 2685 2686 // First, create a cache on top of the source area, respectively use the 2687 // existing one, if this is a shared area. 2688 2689 VMArea* target; 2690 virtual_address_restrictions addressRestrictions = {}; 2691 addressRestrictions.address = *_address; 2692 addressRestrictions.address_specification = addressSpec; 2693 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2694 name, source->Size(), source->wiring, source->protection, 2695 source->protection_max, 2696 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2697 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2698 &addressRestrictions, true, &target, _address); 2699 if (status < B_OK) { 2700 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2701 return status; 2702 } 2703 2704 if (targetPageProtections != NULL) 2705 target->page_protections = targetPageProtections; 2706 2707 if (sharedArea) { 2708 // The new area uses the old area's cache, but map_backing_store() 2709 // hasn't acquired a ref. So we have to do that now. 2710 cache->AcquireRefLocked(); 2711 } 2712 2713 // If the source area is writable, we need to move it one layer up as well 2714 2715 if (!sharedArea) { 2716 if (writableCopy) { 2717 // TODO: do something more useful if this fails! 2718 if (vm_copy_on_write_area(cache, 2719 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2720 panic("vm_copy_on_write_area() failed!\n"); 2721 } 2722 } 2723 } 2724 2725 // we return the ID of the newly created area 2726 return target->id; 2727 } 2728 2729 2730 status_t 2731 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2732 bool kernel) 2733 { 2734 fix_protection(&newProtection); 2735 2736 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2737 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2738 2739 if (!arch_vm_supports_protection(newProtection)) 2740 return B_NOT_SUPPORTED; 2741 2742 bool becomesWritable 2743 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2744 2745 // lock address spaces and cache 2746 MultiAddressSpaceLocker locker; 2747 VMCache* cache; 2748 VMArea* area; 2749 status_t status; 2750 AreaCacheLocker cacheLocker; 2751 bool isWritable; 2752 2753 bool restart; 2754 do { 2755 restart = false; 2756 2757 locker.Unset(); 2758 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2759 if (status != B_OK) 2760 return status; 2761 2762 cacheLocker.SetTo(cache, true); // already locked 2763 2764 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2765 || (area->protection & B_KERNEL_AREA) != 0)) { 2766 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2767 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2768 " (%s)\n", team, newProtection, areaID, area->name); 2769 return B_NOT_ALLOWED; 2770 } 2771 if (!kernel && area->protection_max != 0 2772 && (newProtection & area->protection_max) 2773 != (newProtection & B_USER_PROTECTION)) { 2774 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2775 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2776 "area %" B_PRId32 " (%s)\n", team, newProtection, 2777 area->protection_max, areaID, area->name); 2778 return B_NOT_ALLOWED; 2779 } 2780 2781 if (area->protection == newProtection) 2782 return B_OK; 2783 2784 if (team != VMAddressSpace::KernelID() 2785 && area->address_space->ID() != team) { 2786 // unless you're the kernel, you are only allowed to set 2787 // the protection of your own areas 2788 return B_NOT_ALLOWED; 2789 } 2790 2791 isWritable 2792 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2793 2794 // Make sure the area (respectively, if we're going to call 2795 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2796 // wired ranges. 2797 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2798 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2799 otherArea = otherArea->cache_next) { 2800 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2801 restart = true; 2802 break; 2803 } 2804 } 2805 } else { 2806 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2807 restart = true; 2808 } 2809 } while (restart); 2810 2811 bool changePageProtection = true; 2812 bool changeTopCachePagesOnly = false; 2813 2814 if (isWritable && !becomesWritable) { 2815 // writable -> !writable 2816 2817 if (cache->source != NULL && cache->temporary) { 2818 if (cache->CountWritableAreas(area) == 0) { 2819 // Since this cache now lives from the pages in its source cache, 2820 // we can change the cache's commitment to take only those pages 2821 // into account that really are in this cache. 2822 2823 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2824 team == VMAddressSpace::KernelID() 2825 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2826 2827 // TODO: we may be able to join with our source cache, if 2828 // count == 0 2829 } 2830 } 2831 2832 // If only the writability changes, we can just remap the pages of the 2833 // top cache, since the pages of lower caches are mapped read-only 2834 // anyway. That's advantageous only, if the number of pages in the cache 2835 // is significantly smaller than the number of pages in the area, 2836 // though. 2837 if (newProtection 2838 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2839 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2840 changeTopCachePagesOnly = true; 2841 } 2842 } else if (!isWritable && becomesWritable) { 2843 // !writable -> writable 2844 2845 if (!cache->consumers.IsEmpty()) { 2846 // There are consumers -- we have to insert a new cache. Fortunately 2847 // vm_copy_on_write_area() does everything that's needed. 2848 changePageProtection = false; 2849 status = vm_copy_on_write_area(cache, NULL); 2850 } else { 2851 // No consumers, so we don't need to insert a new one. 2852 if (cache->source != NULL && cache->temporary) { 2853 // the cache's commitment must contain all possible pages 2854 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2855 team == VMAddressSpace::KernelID() 2856 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2857 } 2858 2859 if (status == B_OK && cache->source != NULL) { 2860 // There's a source cache, hence we can't just change all pages' 2861 // protection or we might allow writing into pages belonging to 2862 // a lower cache. 2863 changeTopCachePagesOnly = true; 2864 } 2865 } 2866 } else { 2867 // we don't have anything special to do in all other cases 2868 } 2869 2870 if (status == B_OK) { 2871 // remap existing pages in this cache 2872 if (changePageProtection) { 2873 VMTranslationMap* map = area->address_space->TranslationMap(); 2874 map->Lock(); 2875 2876 if (changeTopCachePagesOnly) { 2877 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2878 page_num_t lastPageOffset 2879 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2880 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2881 vm_page* page = it.Next();) { 2882 if (page->cache_offset >= firstPageOffset 2883 && page->cache_offset <= lastPageOffset) { 2884 addr_t address = virtual_page_address(area, page); 2885 map->ProtectPage(area, address, newProtection); 2886 } 2887 } 2888 } else 2889 map->ProtectArea(area, newProtection); 2890 2891 map->Unlock(); 2892 } 2893 2894 area->protection = newProtection; 2895 } 2896 2897 return status; 2898 } 2899 2900 2901 status_t 2902 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2903 { 2904 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2905 if (addressSpace == NULL) 2906 return B_BAD_TEAM_ID; 2907 2908 VMTranslationMap* map = addressSpace->TranslationMap(); 2909 2910 map->Lock(); 2911 uint32 dummyFlags; 2912 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2913 map->Unlock(); 2914 2915 addressSpace->Put(); 2916 return status; 2917 } 2918 2919 2920 /*! The page's cache must be locked. 2921 */ 2922 bool 2923 vm_test_map_modification(vm_page* page) 2924 { 2925 if (page->modified) 2926 return true; 2927 2928 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2929 vm_page_mapping* mapping; 2930 while ((mapping = iterator.Next()) != NULL) { 2931 VMArea* area = mapping->area; 2932 VMTranslationMap* map = area->address_space->TranslationMap(); 2933 2934 phys_addr_t physicalAddress; 2935 uint32 flags; 2936 map->Lock(); 2937 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2938 map->Unlock(); 2939 2940 if ((flags & PAGE_MODIFIED) != 0) 2941 return true; 2942 } 2943 2944 return false; 2945 } 2946 2947 2948 /*! The page's cache must be locked. 2949 */ 2950 void 2951 vm_clear_map_flags(vm_page* page, uint32 flags) 2952 { 2953 if ((flags & PAGE_ACCESSED) != 0) 2954 page->accessed = false; 2955 if ((flags & PAGE_MODIFIED) != 0) 2956 page->modified = false; 2957 2958 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2959 vm_page_mapping* mapping; 2960 while ((mapping = iterator.Next()) != NULL) { 2961 VMArea* area = mapping->area; 2962 VMTranslationMap* map = area->address_space->TranslationMap(); 2963 2964 map->Lock(); 2965 map->ClearFlags(virtual_page_address(area, page), flags); 2966 map->Unlock(); 2967 } 2968 } 2969 2970 2971 /*! Removes all mappings from a page. 2972 After you've called this function, the page is unmapped from memory and 2973 the page's \c accessed and \c modified flags have been updated according 2974 to the state of the mappings. 2975 The page's cache must be locked. 2976 */ 2977 void 2978 vm_remove_all_page_mappings(vm_page* page) 2979 { 2980 while (vm_page_mapping* mapping = page->mappings.Head()) { 2981 VMArea* area = mapping->area; 2982 VMTranslationMap* map = area->address_space->TranslationMap(); 2983 addr_t address = virtual_page_address(area, page); 2984 map->UnmapPage(area, address, false); 2985 } 2986 } 2987 2988 2989 int32 2990 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2991 { 2992 int32 count = 0; 2993 2994 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2995 vm_page_mapping* mapping; 2996 while ((mapping = iterator.Next()) != NULL) { 2997 VMArea* area = mapping->area; 2998 VMTranslationMap* map = area->address_space->TranslationMap(); 2999 3000 bool modified; 3001 if (map->ClearAccessedAndModified(area, 3002 virtual_page_address(area, page), false, modified)) { 3003 count++; 3004 } 3005 3006 page->modified |= modified; 3007 } 3008 3009 3010 if (page->accessed) { 3011 count++; 3012 page->accessed = false; 3013 } 3014 3015 return count; 3016 } 3017 3018 3019 /*! Removes all mappings of a page and/or clears the accessed bits of the 3020 mappings. 3021 The function iterates through the page mappings and removes them until 3022 encountering one that has been accessed. From then on it will continue to 3023 iterate, but only clear the accessed flag of the mapping. The page's 3024 \c modified bit will be updated accordingly, the \c accessed bit will be 3025 cleared. 3026 \return The number of mapping accessed bits encountered, including the 3027 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3028 of the page have been removed. 3029 */ 3030 int32 3031 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3032 { 3033 ASSERT(page->WiredCount() == 0); 3034 3035 if (page->accessed) 3036 return vm_clear_page_mapping_accessed_flags(page); 3037 3038 while (vm_page_mapping* mapping = page->mappings.Head()) { 3039 VMArea* area = mapping->area; 3040 VMTranslationMap* map = area->address_space->TranslationMap(); 3041 addr_t address = virtual_page_address(area, page); 3042 bool modified = false; 3043 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3044 page->accessed = true; 3045 page->modified |= modified; 3046 return vm_clear_page_mapping_accessed_flags(page); 3047 } 3048 page->modified |= modified; 3049 } 3050 3051 return 0; 3052 } 3053 3054 3055 static int 3056 display_mem(int argc, char** argv) 3057 { 3058 bool physical = false; 3059 addr_t copyAddress; 3060 int32 displayWidth; 3061 int32 itemSize; 3062 int32 num = -1; 3063 addr_t address; 3064 int i = 1, j; 3065 3066 if (argc > 1 && argv[1][0] == '-') { 3067 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3068 physical = true; 3069 i++; 3070 } else 3071 i = 99; 3072 } 3073 3074 if (argc < i + 1 || argc > i + 2) { 3075 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3076 "\tdl - 8 bytes\n" 3077 "\tdw - 4 bytes\n" 3078 "\tds - 2 bytes\n" 3079 "\tdb - 1 byte\n" 3080 "\tstring - a whole string\n" 3081 " -p or --physical only allows memory from a single page to be " 3082 "displayed.\n"); 3083 return 0; 3084 } 3085 3086 address = parse_expression(argv[i]); 3087 3088 if (argc > i + 1) 3089 num = parse_expression(argv[i + 1]); 3090 3091 // build the format string 3092 if (strcmp(argv[0], "db") == 0) { 3093 itemSize = 1; 3094 displayWidth = 16; 3095 } else if (strcmp(argv[0], "ds") == 0) { 3096 itemSize = 2; 3097 displayWidth = 8; 3098 } else if (strcmp(argv[0], "dw") == 0) { 3099 itemSize = 4; 3100 displayWidth = 4; 3101 } else if (strcmp(argv[0], "dl") == 0) { 3102 itemSize = 8; 3103 displayWidth = 2; 3104 } else if (strcmp(argv[0], "string") == 0) { 3105 itemSize = 1; 3106 displayWidth = -1; 3107 } else { 3108 kprintf("display_mem called in an invalid way!\n"); 3109 return 0; 3110 } 3111 3112 if (num <= 0) 3113 num = displayWidth; 3114 3115 void* physicalPageHandle = NULL; 3116 3117 if (physical) { 3118 int32 offset = address & (B_PAGE_SIZE - 1); 3119 if (num * itemSize + offset > B_PAGE_SIZE) { 3120 num = (B_PAGE_SIZE - offset) / itemSize; 3121 kprintf("NOTE: number of bytes has been cut to page size\n"); 3122 } 3123 3124 address = ROUNDDOWN(address, B_PAGE_SIZE); 3125 3126 if (vm_get_physical_page_debug(address, ©Address, 3127 &physicalPageHandle) != B_OK) { 3128 kprintf("getting the hardware page failed."); 3129 return 0; 3130 } 3131 3132 address += offset; 3133 copyAddress += offset; 3134 } else 3135 copyAddress = address; 3136 3137 if (!strcmp(argv[0], "string")) { 3138 kprintf("%p \"", (char*)copyAddress); 3139 3140 // string mode 3141 for (i = 0; true; i++) { 3142 char c; 3143 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3144 != B_OK 3145 || c == '\0') { 3146 break; 3147 } 3148 3149 if (c == '\n') 3150 kprintf("\\n"); 3151 else if (c == '\t') 3152 kprintf("\\t"); 3153 else { 3154 if (!isprint(c)) 3155 c = '.'; 3156 3157 kprintf("%c", c); 3158 } 3159 } 3160 3161 kprintf("\"\n"); 3162 } else { 3163 // number mode 3164 for (i = 0; i < num; i++) { 3165 uint64 value; 3166 3167 if ((i % displayWidth) == 0) { 3168 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3169 if (i != 0) 3170 kprintf("\n"); 3171 3172 kprintf("[0x%lx] ", address + i * itemSize); 3173 3174 for (j = 0; j < displayed; j++) { 3175 char c; 3176 if (debug_memcpy(B_CURRENT_TEAM, &c, 3177 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3178 displayed = j; 3179 break; 3180 } 3181 if (!isprint(c)) 3182 c = '.'; 3183 3184 kprintf("%c", c); 3185 } 3186 if (num > displayWidth) { 3187 // make sure the spacing in the last line is correct 3188 for (j = displayed; j < displayWidth * itemSize; j++) 3189 kprintf(" "); 3190 } 3191 kprintf(" "); 3192 } 3193 3194 if (debug_memcpy(B_CURRENT_TEAM, &value, 3195 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3196 kprintf("read fault"); 3197 break; 3198 } 3199 3200 switch (itemSize) { 3201 case 1: 3202 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3203 break; 3204 case 2: 3205 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3206 break; 3207 case 4: 3208 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3209 break; 3210 case 8: 3211 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3212 break; 3213 } 3214 } 3215 3216 kprintf("\n"); 3217 } 3218 3219 if (physical) { 3220 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3221 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3222 } 3223 return 0; 3224 } 3225 3226 3227 static void 3228 dump_cache_tree_recursively(VMCache* cache, int level, 3229 VMCache* highlightCache) 3230 { 3231 // print this cache 3232 for (int i = 0; i < level; i++) 3233 kprintf(" "); 3234 if (cache == highlightCache) 3235 kprintf("%p <--\n", cache); 3236 else 3237 kprintf("%p\n", cache); 3238 3239 // recursively print its consumers 3240 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3241 VMCache* consumer = it.Next();) { 3242 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3243 } 3244 } 3245 3246 3247 static int 3248 dump_cache_tree(int argc, char** argv) 3249 { 3250 if (argc != 2 || !strcmp(argv[1], "--help")) { 3251 kprintf("usage: %s <address>\n", argv[0]); 3252 return 0; 3253 } 3254 3255 addr_t address = parse_expression(argv[1]); 3256 if (address == 0) 3257 return 0; 3258 3259 VMCache* cache = (VMCache*)address; 3260 VMCache* root = cache; 3261 3262 // find the root cache (the transitive source) 3263 while (root->source != NULL) 3264 root = root->source; 3265 3266 dump_cache_tree_recursively(root, 0, cache); 3267 3268 return 0; 3269 } 3270 3271 3272 const char* 3273 vm_cache_type_to_string(int32 type) 3274 { 3275 switch (type) { 3276 case CACHE_TYPE_RAM: 3277 return "RAM"; 3278 case CACHE_TYPE_DEVICE: 3279 return "device"; 3280 case CACHE_TYPE_VNODE: 3281 return "vnode"; 3282 case CACHE_TYPE_NULL: 3283 return "null"; 3284 3285 default: 3286 return "unknown"; 3287 } 3288 } 3289 3290 3291 #if DEBUG_CACHE_LIST 3292 3293 static void 3294 update_cache_info_recursively(VMCache* cache, cache_info& info) 3295 { 3296 info.page_count += cache->page_count; 3297 if (cache->type == CACHE_TYPE_RAM) 3298 info.committed += cache->committed_size; 3299 3300 // recurse 3301 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3302 VMCache* consumer = it.Next();) { 3303 update_cache_info_recursively(consumer, info); 3304 } 3305 } 3306 3307 3308 static int 3309 cache_info_compare_page_count(const void* _a, const void* _b) 3310 { 3311 const cache_info* a = (const cache_info*)_a; 3312 const cache_info* b = (const cache_info*)_b; 3313 if (a->page_count == b->page_count) 3314 return 0; 3315 return a->page_count < b->page_count ? 1 : -1; 3316 } 3317 3318 3319 static int 3320 cache_info_compare_committed(const void* _a, const void* _b) 3321 { 3322 const cache_info* a = (const cache_info*)_a; 3323 const cache_info* b = (const cache_info*)_b; 3324 if (a->committed == b->committed) 3325 return 0; 3326 return a->committed < b->committed ? 1 : -1; 3327 } 3328 3329 3330 static void 3331 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3332 { 3333 for (int i = 0; i < level; i++) 3334 kprintf(" "); 3335 3336 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3337 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3338 cache->virtual_base, cache->virtual_end, cache->page_count); 3339 3340 if (level == 0) 3341 kprintf("/%lu", info.page_count); 3342 3343 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3344 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3345 3346 if (level == 0) 3347 kprintf("/%lu", info.committed); 3348 } 3349 3350 // areas 3351 if (cache->areas != NULL) { 3352 VMArea* area = cache->areas; 3353 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3354 area->name, area->address_space->ID()); 3355 3356 while (area->cache_next != NULL) { 3357 area = area->cache_next; 3358 kprintf(", %" B_PRId32, area->id); 3359 } 3360 } 3361 3362 kputs("\n"); 3363 3364 // recurse 3365 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3366 VMCache* consumer = it.Next();) { 3367 dump_caches_recursively(consumer, info, level + 1); 3368 } 3369 } 3370 3371 3372 static int 3373 dump_caches(int argc, char** argv) 3374 { 3375 if (sCacheInfoTable == NULL) { 3376 kprintf("No cache info table!\n"); 3377 return 0; 3378 } 3379 3380 bool sortByPageCount = true; 3381 3382 for (int32 i = 1; i < argc; i++) { 3383 if (strcmp(argv[i], "-c") == 0) { 3384 sortByPageCount = false; 3385 } else { 3386 print_debugger_command_usage(argv[0]); 3387 return 0; 3388 } 3389 } 3390 3391 uint32 totalCount = 0; 3392 uint32 rootCount = 0; 3393 off_t totalCommitted = 0; 3394 page_num_t totalPages = 0; 3395 3396 VMCache* cache = gDebugCacheList; 3397 while (cache) { 3398 totalCount++; 3399 if (cache->source == NULL) { 3400 cache_info stackInfo; 3401 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3402 ? sCacheInfoTable[rootCount] : stackInfo; 3403 rootCount++; 3404 info.cache = cache; 3405 info.page_count = 0; 3406 info.committed = 0; 3407 update_cache_info_recursively(cache, info); 3408 totalCommitted += info.committed; 3409 totalPages += info.page_count; 3410 } 3411 3412 cache = cache->debug_next; 3413 } 3414 3415 if (rootCount <= (uint32)kCacheInfoTableCount) { 3416 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3417 sortByPageCount 3418 ? &cache_info_compare_page_count 3419 : &cache_info_compare_committed); 3420 } 3421 3422 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3423 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3424 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3425 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3426 "page count" : "committed size"); 3427 3428 if (rootCount <= (uint32)kCacheInfoTableCount) { 3429 for (uint32 i = 0; i < rootCount; i++) { 3430 cache_info& info = sCacheInfoTable[i]; 3431 dump_caches_recursively(info.cache, info, 0); 3432 } 3433 } else 3434 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3435 3436 return 0; 3437 } 3438 3439 #endif // DEBUG_CACHE_LIST 3440 3441 3442 static int 3443 dump_cache(int argc, char** argv) 3444 { 3445 VMCache* cache; 3446 bool showPages = false; 3447 int i = 1; 3448 3449 if (argc < 2 || !strcmp(argv[1], "--help")) { 3450 kprintf("usage: %s [-ps] <address>\n" 3451 " if -p is specified, all pages are shown, if -s is used\n" 3452 " only the cache info is shown respectively.\n", argv[0]); 3453 return 0; 3454 } 3455 while (argv[i][0] == '-') { 3456 char* arg = argv[i] + 1; 3457 while (arg[0]) { 3458 if (arg[0] == 'p') 3459 showPages = true; 3460 arg++; 3461 } 3462 i++; 3463 } 3464 if (argv[i] == NULL) { 3465 kprintf("%s: invalid argument, pass address\n", argv[0]); 3466 return 0; 3467 } 3468 3469 addr_t address = parse_expression(argv[i]); 3470 if (address == 0) 3471 return 0; 3472 3473 cache = (VMCache*)address; 3474 3475 cache->Dump(showPages); 3476 3477 set_debug_variable("_sourceCache", (addr_t)cache->source); 3478 3479 return 0; 3480 } 3481 3482 3483 static void 3484 dump_area_struct(VMArea* area, bool mappings) 3485 { 3486 kprintf("AREA: %p\n", area); 3487 kprintf("name:\t\t'%s'\n", area->name); 3488 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3489 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3490 kprintf("base:\t\t0x%lx\n", area->Base()); 3491 kprintf("size:\t\t0x%lx\n", area->Size()); 3492 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3493 kprintf("page_protection:%p\n", area->page_protections); 3494 kprintf("wiring:\t\t0x%x\n", area->wiring); 3495 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3496 kprintf("cache:\t\t%p\n", area->cache); 3497 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3498 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3499 kprintf("cache_next:\t%p\n", area->cache_next); 3500 kprintf("cache_prev:\t%p\n", area->cache_prev); 3501 3502 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3503 if (mappings) { 3504 kprintf("page mappings:\n"); 3505 while (iterator.HasNext()) { 3506 vm_page_mapping* mapping = iterator.Next(); 3507 kprintf(" %p", mapping->page); 3508 } 3509 kprintf("\n"); 3510 } else { 3511 uint32 count = 0; 3512 while (iterator.Next() != NULL) { 3513 count++; 3514 } 3515 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3516 } 3517 } 3518 3519 3520 static int 3521 dump_area(int argc, char** argv) 3522 { 3523 bool mappings = false; 3524 bool found = false; 3525 int32 index = 1; 3526 VMArea* area; 3527 addr_t num; 3528 3529 if (argc < 2 || !strcmp(argv[1], "--help")) { 3530 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3531 "All areas matching either id/address/name are listed. You can\n" 3532 "force to check only a specific item by prefixing the specifier\n" 3533 "with the id/contains/address/name keywords.\n" 3534 "-m shows the area's mappings as well.\n"); 3535 return 0; 3536 } 3537 3538 if (!strcmp(argv[1], "-m")) { 3539 mappings = true; 3540 index++; 3541 } 3542 3543 int32 mode = 0xf; 3544 if (!strcmp(argv[index], "id")) 3545 mode = 1; 3546 else if (!strcmp(argv[index], "contains")) 3547 mode = 2; 3548 else if (!strcmp(argv[index], "name")) 3549 mode = 4; 3550 else if (!strcmp(argv[index], "address")) 3551 mode = 0; 3552 if (mode != 0xf) 3553 index++; 3554 3555 if (index >= argc) { 3556 kprintf("No area specifier given.\n"); 3557 return 0; 3558 } 3559 3560 num = parse_expression(argv[index]); 3561 3562 if (mode == 0) { 3563 dump_area_struct((struct VMArea*)num, mappings); 3564 } else { 3565 // walk through the area list, looking for the arguments as a name 3566 3567 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3568 while ((area = it.Next()) != NULL) { 3569 if (((mode & 4) != 0 3570 && !strcmp(argv[index], area->name)) 3571 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3572 || (((mode & 2) != 0 && area->Base() <= num 3573 && area->Base() + area->Size() > num))))) { 3574 dump_area_struct(area, mappings); 3575 found = true; 3576 } 3577 } 3578 3579 if (!found) 3580 kprintf("could not find area %s (%ld)\n", argv[index], num); 3581 } 3582 3583 return 0; 3584 } 3585 3586 3587 static int 3588 dump_area_list(int argc, char** argv) 3589 { 3590 VMArea* area; 3591 const char* name = NULL; 3592 int32 id = 0; 3593 3594 if (argc > 1) { 3595 id = parse_expression(argv[1]); 3596 if (id == 0) 3597 name = argv[1]; 3598 } 3599 3600 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3601 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3602 B_PRINTF_POINTER_WIDTH, "size"); 3603 3604 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3605 while ((area = it.Next()) != NULL) { 3606 if ((id != 0 && area->address_space->ID() != id) 3607 || (name != NULL && strstr(area->name, name) == NULL)) 3608 continue; 3609 3610 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3611 area->id, (void*)area->Base(), (void*)area->Size(), 3612 area->protection, area->wiring, area->name); 3613 } 3614 return 0; 3615 } 3616 3617 3618 static int 3619 dump_available_memory(int argc, char** argv) 3620 { 3621 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3622 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3623 return 0; 3624 } 3625 3626 3627 static int 3628 dump_mapping_info(int argc, char** argv) 3629 { 3630 bool reverseLookup = false; 3631 bool pageLookup = false; 3632 3633 int argi = 1; 3634 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3635 const char* arg = argv[argi]; 3636 if (strcmp(arg, "-r") == 0) { 3637 reverseLookup = true; 3638 } else if (strcmp(arg, "-p") == 0) { 3639 reverseLookup = true; 3640 pageLookup = true; 3641 } else { 3642 print_debugger_command_usage(argv[0]); 3643 return 0; 3644 } 3645 } 3646 3647 // We need at least one argument, the address. Optionally a thread ID can be 3648 // specified. 3649 if (argi >= argc || argi + 2 < argc) { 3650 print_debugger_command_usage(argv[0]); 3651 return 0; 3652 } 3653 3654 uint64 addressValue; 3655 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3656 return 0; 3657 3658 Team* team = NULL; 3659 if (argi < argc) { 3660 uint64 threadID; 3661 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3662 return 0; 3663 3664 Thread* thread = Thread::GetDebug(threadID); 3665 if (thread == NULL) { 3666 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3667 return 0; 3668 } 3669 3670 team = thread->team; 3671 } 3672 3673 if (reverseLookup) { 3674 phys_addr_t physicalAddress; 3675 if (pageLookup) { 3676 vm_page* page = (vm_page*)(addr_t)addressValue; 3677 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3678 } else { 3679 physicalAddress = (phys_addr_t)addressValue; 3680 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3681 } 3682 3683 kprintf(" Team Virtual Address Area\n"); 3684 kprintf("--------------------------------------\n"); 3685 3686 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3687 Callback() 3688 : 3689 fAddressSpace(NULL) 3690 { 3691 } 3692 3693 void SetAddressSpace(VMAddressSpace* addressSpace) 3694 { 3695 fAddressSpace = addressSpace; 3696 } 3697 3698 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3699 { 3700 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3701 virtualAddress); 3702 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3703 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3704 else 3705 kprintf("\n"); 3706 return false; 3707 } 3708 3709 private: 3710 VMAddressSpace* fAddressSpace; 3711 } callback; 3712 3713 if (team != NULL) { 3714 // team specified -- get its address space 3715 VMAddressSpace* addressSpace = team->address_space; 3716 if (addressSpace == NULL) { 3717 kprintf("Failed to get address space!\n"); 3718 return 0; 3719 } 3720 3721 callback.SetAddressSpace(addressSpace); 3722 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3723 physicalAddress, callback); 3724 } else { 3725 // no team specified -- iterate through all address spaces 3726 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3727 addressSpace != NULL; 3728 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3729 callback.SetAddressSpace(addressSpace); 3730 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3731 physicalAddress, callback); 3732 } 3733 } 3734 } else { 3735 // get the address space 3736 addr_t virtualAddress = (addr_t)addressValue; 3737 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3738 VMAddressSpace* addressSpace; 3739 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3740 addressSpace = VMAddressSpace::Kernel(); 3741 } else if (team != NULL) { 3742 addressSpace = team->address_space; 3743 } else { 3744 Thread* thread = debug_get_debugged_thread(); 3745 if (thread == NULL || thread->team == NULL) { 3746 kprintf("Failed to get team!\n"); 3747 return 0; 3748 } 3749 3750 addressSpace = thread->team->address_space; 3751 } 3752 3753 if (addressSpace == NULL) { 3754 kprintf("Failed to get address space!\n"); 3755 return 0; 3756 } 3757 3758 // let the translation map implementation do the job 3759 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3760 } 3761 3762 return 0; 3763 } 3764 3765 3766 /*! Deletes all areas and reserved regions in the given address space. 3767 3768 The caller must ensure that none of the areas has any wired ranges. 3769 3770 \param addressSpace The address space. 3771 \param deletingAddressSpace \c true, if the address space is in the process 3772 of being deleted. 3773 */ 3774 void 3775 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3776 { 3777 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3778 addressSpace->ID())); 3779 3780 addressSpace->WriteLock(); 3781 3782 // remove all reserved areas in this address space 3783 addressSpace->UnreserveAllAddressRanges(0); 3784 3785 // delete all the areas in this address space 3786 while (VMArea* area = addressSpace->FirstArea()) { 3787 ASSERT(!area->IsWired()); 3788 delete_area(addressSpace, area, deletingAddressSpace); 3789 } 3790 3791 addressSpace->WriteUnlock(); 3792 } 3793 3794 3795 static area_id 3796 vm_area_for(addr_t address, bool kernel) 3797 { 3798 team_id team; 3799 if (IS_USER_ADDRESS(address)) { 3800 // we try the user team address space, if any 3801 team = VMAddressSpace::CurrentID(); 3802 if (team < 0) 3803 return team; 3804 } else 3805 team = VMAddressSpace::KernelID(); 3806 3807 AddressSpaceReadLocker locker(team); 3808 if (!locker.IsLocked()) 3809 return B_BAD_TEAM_ID; 3810 3811 VMArea* area = locker.AddressSpace()->LookupArea(address); 3812 if (area != NULL) { 3813 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3814 return B_ERROR; 3815 3816 return area->id; 3817 } 3818 3819 return B_ERROR; 3820 } 3821 3822 3823 /*! Frees physical pages that were used during the boot process. 3824 \a end is inclusive. 3825 */ 3826 static void 3827 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3828 { 3829 // free all physical pages in the specified range 3830 3831 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3832 phys_addr_t physicalAddress; 3833 uint32 flags; 3834 3835 if (map->Query(current, &physicalAddress, &flags) == B_OK 3836 && (flags & PAGE_PRESENT) != 0) { 3837 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3838 if (page != NULL && page->State() != PAGE_STATE_FREE 3839 && page->State() != PAGE_STATE_CLEAR 3840 && page->State() != PAGE_STATE_UNUSED) { 3841 DEBUG_PAGE_ACCESS_START(page); 3842 vm_page_set_state(page, PAGE_STATE_FREE); 3843 } 3844 } 3845 } 3846 3847 // unmap the memory 3848 map->Unmap(start, end); 3849 } 3850 3851 3852 void 3853 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3854 { 3855 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3856 addr_t end = start + (size - 1); 3857 addr_t lastEnd = start; 3858 3859 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3860 (void*)start, (void*)end)); 3861 3862 // The areas are sorted in virtual address space order, so 3863 // we just have to find the holes between them that fall 3864 // into the area we should dispose 3865 3866 map->Lock(); 3867 3868 for (VMAddressSpace::AreaIterator it 3869 = VMAddressSpace::Kernel()->GetAreaIterator(); 3870 VMArea* area = it.Next();) { 3871 addr_t areaStart = area->Base(); 3872 addr_t areaEnd = areaStart + (area->Size() - 1); 3873 3874 if (areaEnd < start) 3875 continue; 3876 3877 if (areaStart > end) { 3878 // we are done, the area is already beyond of what we have to free 3879 break; 3880 } 3881 3882 if (areaStart > lastEnd) { 3883 // this is something we can free 3884 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3885 (void*)areaStart)); 3886 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3887 } 3888 3889 if (areaEnd >= end) { 3890 lastEnd = areaEnd; 3891 // no +1 to prevent potential overflow 3892 break; 3893 } 3894 3895 lastEnd = areaEnd + 1; 3896 } 3897 3898 if (lastEnd < end) { 3899 // we can also get rid of some space at the end of the area 3900 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3901 (void*)end)); 3902 unmap_and_free_physical_pages(map, lastEnd, end); 3903 } 3904 3905 map->Unlock(); 3906 } 3907 3908 3909 static void 3910 create_preloaded_image_areas(struct preloaded_image* _image) 3911 { 3912 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3913 char name[B_OS_NAME_LENGTH]; 3914 void* address; 3915 int32 length; 3916 3917 // use file name to create a good area name 3918 char* fileName = strrchr(image->name, '/'); 3919 if (fileName == NULL) 3920 fileName = image->name; 3921 else 3922 fileName++; 3923 3924 length = strlen(fileName); 3925 // make sure there is enough space for the suffix 3926 if (length > 25) 3927 length = 25; 3928 3929 memcpy(name, fileName, length); 3930 strcpy(name + length, "_text"); 3931 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3932 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3933 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3934 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3935 // this will later be remapped read-only/executable by the 3936 // ELF initialization code 3937 3938 strcpy(name + length, "_data"); 3939 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3940 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3941 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3942 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3943 } 3944 3945 3946 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3947 Any boot loader resources contained in that arguments must not be accessed 3948 anymore past this point. 3949 */ 3950 void 3951 vm_free_kernel_args(kernel_args* args) 3952 { 3953 uint32 i; 3954 3955 TRACE(("vm_free_kernel_args()\n")); 3956 3957 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3958 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3959 if (area >= B_OK) 3960 delete_area(area); 3961 } 3962 } 3963 3964 3965 static void 3966 allocate_kernel_args(kernel_args* args) 3967 { 3968 TRACE(("allocate_kernel_args()\n")); 3969 3970 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3971 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3972 3973 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3974 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3975 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3976 } 3977 } 3978 3979 3980 static void 3981 unreserve_boot_loader_ranges(kernel_args* args) 3982 { 3983 TRACE(("unreserve_boot_loader_ranges()\n")); 3984 3985 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3986 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3987 (void*)(addr_t)args->virtual_allocated_range[i].start, 3988 args->virtual_allocated_range[i].size); 3989 } 3990 } 3991 3992 3993 static void 3994 reserve_boot_loader_ranges(kernel_args* args) 3995 { 3996 TRACE(("reserve_boot_loader_ranges()\n")); 3997 3998 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3999 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 4000 4001 // If the address is no kernel address, we just skip it. The 4002 // architecture specific code has to deal with it. 4003 if (!IS_KERNEL_ADDRESS(address)) { 4004 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4005 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4006 continue; 4007 } 4008 4009 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4010 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4011 if (status < B_OK) 4012 panic("could not reserve boot loader ranges\n"); 4013 } 4014 } 4015 4016 4017 static addr_t 4018 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4019 { 4020 size = PAGE_ALIGN(size); 4021 4022 // find a slot in the virtual allocation addr range 4023 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4024 // check to see if the space between this one and the last is big enough 4025 addr_t rangeStart = args->virtual_allocated_range[i].start; 4026 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4027 + args->virtual_allocated_range[i - 1].size; 4028 4029 addr_t base = alignment > 0 4030 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4031 4032 if (base >= KERNEL_BASE && base < rangeStart 4033 && rangeStart - base >= size) { 4034 args->virtual_allocated_range[i - 1].size 4035 += base + size - previousRangeEnd; 4036 return base; 4037 } 4038 } 4039 4040 // we hadn't found one between allocation ranges. this is ok. 4041 // see if there's a gap after the last one 4042 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4043 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4044 + args->virtual_allocated_range[lastEntryIndex].size; 4045 addr_t base = alignment > 0 4046 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4047 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4048 args->virtual_allocated_range[lastEntryIndex].size 4049 += base + size - lastRangeEnd; 4050 return base; 4051 } 4052 4053 // see if there's a gap before the first one 4054 addr_t rangeStart = args->virtual_allocated_range[0].start; 4055 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4056 base = rangeStart - size; 4057 if (alignment > 0) 4058 base = ROUNDDOWN(base, alignment); 4059 4060 if (base >= KERNEL_BASE) { 4061 args->virtual_allocated_range[0].start = base; 4062 args->virtual_allocated_range[0].size += rangeStart - base; 4063 return base; 4064 } 4065 } 4066 4067 return 0; 4068 } 4069 4070 4071 static bool 4072 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4073 { 4074 // TODO: horrible brute-force method of determining if the page can be 4075 // allocated 4076 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4077 if (address >= args->physical_memory_range[i].start 4078 && address < args->physical_memory_range[i].start 4079 + args->physical_memory_range[i].size) 4080 return true; 4081 } 4082 return false; 4083 } 4084 4085 4086 page_num_t 4087 vm_allocate_early_physical_page(kernel_args* args) 4088 { 4089 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4090 phys_addr_t nextPage; 4091 4092 nextPage = args->physical_allocated_range[i].start 4093 + args->physical_allocated_range[i].size; 4094 // see if the page after the next allocated paddr run can be allocated 4095 if (i + 1 < args->num_physical_allocated_ranges 4096 && args->physical_allocated_range[i + 1].size != 0) { 4097 // see if the next page will collide with the next allocated range 4098 if (nextPage >= args->physical_allocated_range[i+1].start) 4099 continue; 4100 } 4101 // see if the next physical page fits in the memory block 4102 if (is_page_in_physical_memory_range(args, nextPage)) { 4103 // we got one! 4104 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4105 return nextPage / B_PAGE_SIZE; 4106 } 4107 } 4108 4109 // Expanding upwards didn't work, try going downwards. 4110 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4111 phys_addr_t nextPage; 4112 4113 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4114 // see if the page after the prev allocated paddr run can be allocated 4115 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4116 // see if the next page will collide with the next allocated range 4117 if (nextPage < args->physical_allocated_range[i-1].start 4118 + args->physical_allocated_range[i-1].size) 4119 continue; 4120 } 4121 // see if the next physical page fits in the memory block 4122 if (is_page_in_physical_memory_range(args, nextPage)) { 4123 // we got one! 4124 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4125 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4126 return nextPage / B_PAGE_SIZE; 4127 } 4128 } 4129 4130 return 0; 4131 // could not allocate a block 4132 } 4133 4134 4135 /*! This one uses the kernel_args' physical and virtual memory ranges to 4136 allocate some pages before the VM is completely up. 4137 */ 4138 addr_t 4139 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4140 uint32 attributes, addr_t alignment) 4141 { 4142 if (physicalSize > virtualSize) 4143 physicalSize = virtualSize; 4144 4145 // find the vaddr to allocate at 4146 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4147 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4148 if (virtualBase == 0) { 4149 panic("vm_allocate_early: could not allocate virtual address\n"); 4150 return 0; 4151 } 4152 4153 // map the pages 4154 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4155 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4156 if (physicalAddress == 0) 4157 panic("error allocating early page!\n"); 4158 4159 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4160 4161 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4162 physicalAddress * B_PAGE_SIZE, attributes, 4163 &vm_allocate_early_physical_page); 4164 } 4165 4166 return virtualBase; 4167 } 4168 4169 4170 /*! The main entrance point to initialize the VM. */ 4171 status_t 4172 vm_init(kernel_args* args) 4173 { 4174 struct preloaded_image* image; 4175 void* address; 4176 status_t err = 0; 4177 uint32 i; 4178 4179 TRACE(("vm_init: entry\n")); 4180 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4181 err = arch_vm_init(args); 4182 4183 // initialize some globals 4184 vm_page_init_num_pages(args); 4185 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4186 4187 slab_init(args); 4188 4189 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4190 off_t heapSize = INITIAL_HEAP_SIZE; 4191 // try to accomodate low memory systems 4192 while (heapSize > sAvailableMemory / 8) 4193 heapSize /= 2; 4194 if (heapSize < 1024 * 1024) 4195 panic("vm_init: go buy some RAM please."); 4196 4197 // map in the new heap and initialize it 4198 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4199 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4200 TRACE(("heap at 0x%lx\n", heapBase)); 4201 heap_init(heapBase, heapSize); 4202 #endif 4203 4204 // initialize the free page list and physical page mapper 4205 vm_page_init(args); 4206 4207 // initialize the cache allocators 4208 vm_cache_init(args); 4209 4210 { 4211 status_t error = VMAreas::Init(); 4212 if (error != B_OK) 4213 panic("vm_init: error initializing areas map\n"); 4214 } 4215 4216 VMAddressSpace::Init(); 4217 reserve_boot_loader_ranges(args); 4218 4219 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4220 heap_init_post_area(); 4221 #endif 4222 4223 // Do any further initialization that the architecture dependant layers may 4224 // need now 4225 arch_vm_translation_map_init_post_area(args); 4226 arch_vm_init_post_area(args); 4227 vm_page_init_post_area(args); 4228 slab_init_post_area(); 4229 4230 // allocate areas to represent stuff that already exists 4231 4232 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4233 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4234 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4235 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4236 #endif 4237 4238 allocate_kernel_args(args); 4239 4240 create_preloaded_image_areas(args->kernel_image); 4241 4242 // allocate areas for preloaded images 4243 for (image = args->preloaded_images; image != NULL; image = image->next) 4244 create_preloaded_image_areas(image); 4245 4246 // allocate kernel stacks 4247 for (i = 0; i < args->num_cpus; i++) { 4248 char name[64]; 4249 4250 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4251 address = (void*)args->cpu_kstack[i].start; 4252 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4253 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4254 } 4255 4256 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4257 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4258 4259 #if PARANOID_KERNEL_MALLOC 4260 vm_block_address_range("uninitialized heap memory", 4261 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4262 #endif 4263 #if PARANOID_KERNEL_FREE 4264 vm_block_address_range("freed heap memory", 4265 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4266 #endif 4267 4268 // create the object cache for the page mappings 4269 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4270 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4271 NULL, NULL); 4272 if (gPageMappingsObjectCache == NULL) 4273 panic("failed to create page mappings object cache"); 4274 4275 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4276 4277 #if DEBUG_CACHE_LIST 4278 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4279 virtual_address_restrictions virtualRestrictions = {}; 4280 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4281 physical_address_restrictions physicalRestrictions = {}; 4282 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4283 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4284 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4285 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4286 &physicalRestrictions, (void**)&sCacheInfoTable); 4287 } 4288 #endif // DEBUG_CACHE_LIST 4289 4290 // add some debugger commands 4291 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4292 add_debugger_command("area", &dump_area, 4293 "Dump info about a particular area"); 4294 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4295 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4296 #if DEBUG_CACHE_LIST 4297 if (sCacheInfoTable != NULL) { 4298 add_debugger_command_etc("caches", &dump_caches, 4299 "List all VMCache trees", 4300 "[ \"-c\" ]\n" 4301 "All cache trees are listed sorted in decreasing order by number " 4302 "of\n" 4303 "used pages or, if \"-c\" is specified, by size of committed " 4304 "memory.\n", 4305 0); 4306 } 4307 #endif 4308 add_debugger_command("avail", &dump_available_memory, 4309 "Dump available memory"); 4310 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4311 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4312 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4313 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4314 add_debugger_command("string", &display_mem, "dump strings"); 4315 4316 add_debugger_command_etc("mapping", &dump_mapping_info, 4317 "Print address mapping information", 4318 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4319 "Prints low-level page mapping information for a given address. If\n" 4320 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4321 "address that is looked up in the translation map of the current\n" 4322 "team, respectively the team specified by thread ID <thread ID>. If\n" 4323 "\"-r\" is specified, <address> is a physical address that is\n" 4324 "searched in the translation map of all teams, respectively the team\n" 4325 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4326 "<address> is the address of a vm_page structure. The behavior is\n" 4327 "equivalent to specifying \"-r\" with the physical address of that\n" 4328 "page.\n", 4329 0); 4330 4331 TRACE(("vm_init: exit\n")); 4332 4333 vm_cache_init_post_heap(); 4334 4335 return err; 4336 } 4337 4338 4339 status_t 4340 vm_init_post_sem(kernel_args* args) 4341 { 4342 // This frees all unused boot loader resources and makes its space available 4343 // again 4344 arch_vm_init_end(args); 4345 unreserve_boot_loader_ranges(args); 4346 4347 // fill in all of the semaphores that were not allocated before 4348 // since we're still single threaded and only the kernel address space 4349 // exists, it isn't that hard to find all of the ones we need to create 4350 4351 arch_vm_translation_map_init_post_sem(args); 4352 4353 slab_init_post_sem(); 4354 4355 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4356 heap_init_post_sem(); 4357 #endif 4358 4359 return B_OK; 4360 } 4361 4362 4363 status_t 4364 vm_init_post_thread(kernel_args* args) 4365 { 4366 vm_page_init_post_thread(args); 4367 slab_init_post_thread(); 4368 return heap_init_post_thread(); 4369 } 4370 4371 4372 status_t 4373 vm_init_post_modules(kernel_args* args) 4374 { 4375 return arch_vm_init_post_modules(args); 4376 } 4377 4378 4379 void 4380 permit_page_faults(void) 4381 { 4382 Thread* thread = thread_get_current_thread(); 4383 if (thread != NULL) 4384 atomic_add(&thread->page_faults_allowed, 1); 4385 } 4386 4387 4388 void 4389 forbid_page_faults(void) 4390 { 4391 Thread* thread = thread_get_current_thread(); 4392 if (thread != NULL) 4393 atomic_add(&thread->page_faults_allowed, -1); 4394 } 4395 4396 4397 status_t 4398 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4399 bool isUser, addr_t* newIP) 4400 { 4401 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4402 faultAddress)); 4403 4404 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4405 4406 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4407 VMAddressSpace* addressSpace = NULL; 4408 4409 status_t status = B_OK; 4410 *newIP = 0; 4411 atomic_add((int32*)&sPageFaults, 1); 4412 4413 if (IS_KERNEL_ADDRESS(pageAddress)) { 4414 addressSpace = VMAddressSpace::GetKernel(); 4415 } else if (IS_USER_ADDRESS(pageAddress)) { 4416 addressSpace = VMAddressSpace::GetCurrent(); 4417 if (addressSpace == NULL) { 4418 if (!isUser) { 4419 dprintf("vm_page_fault: kernel thread accessing invalid user " 4420 "memory!\n"); 4421 status = B_BAD_ADDRESS; 4422 TPF(PageFaultError(-1, 4423 VMPageFaultTracing 4424 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4425 } else { 4426 // XXX weird state. 4427 panic("vm_page_fault: non kernel thread accessing user memory " 4428 "that doesn't exist!\n"); 4429 status = B_BAD_ADDRESS; 4430 } 4431 } 4432 } else { 4433 // the hit was probably in the 64k DMZ between kernel and user space 4434 // this keeps a user space thread from passing a buffer that crosses 4435 // into kernel space 4436 status = B_BAD_ADDRESS; 4437 TPF(PageFaultError(-1, 4438 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4439 } 4440 4441 if (status == B_OK) { 4442 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4443 isUser, NULL); 4444 } 4445 4446 if (status < B_OK) { 4447 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4448 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4449 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4450 thread_get_current_thread_id()); 4451 if (!isUser) { 4452 Thread* thread = thread_get_current_thread(); 4453 if (thread != NULL && thread->fault_handler != 0) { 4454 // this will cause the arch dependant page fault handler to 4455 // modify the IP on the interrupt frame or whatever to return 4456 // to this address 4457 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4458 } else { 4459 // unhandled page fault in the kernel 4460 panic("vm_page_fault: unhandled page fault in kernel space at " 4461 "0x%lx, ip 0x%lx\n", address, faultAddress); 4462 } 4463 } else { 4464 Thread* thread = thread_get_current_thread(); 4465 4466 #ifdef TRACE_FAULTS 4467 VMArea* area = NULL; 4468 if (addressSpace != NULL) { 4469 addressSpace->ReadLock(); 4470 area = addressSpace->LookupArea(faultAddress); 4471 } 4472 4473 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4474 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4475 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4476 thread->team->Name(), thread->team->id, 4477 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4478 faultAddress, area ? area->name : "???", faultAddress - (area ? 4479 area->Base() : 0x0)); 4480 4481 if (addressSpace != NULL) 4482 addressSpace->ReadUnlock(); 4483 #endif 4484 4485 // If the thread has a signal handler for SIGSEGV, we simply 4486 // send it the signal. Otherwise we notify the user debugger 4487 // first. 4488 struct sigaction action; 4489 if ((sigaction(SIGSEGV, NULL, &action) == 0 4490 && action.sa_handler != SIG_DFL 4491 && action.sa_handler != SIG_IGN) 4492 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4493 SIGSEGV)) { 4494 Signal signal(SIGSEGV, 4495 status == B_PERMISSION_DENIED 4496 ? SEGV_ACCERR : SEGV_MAPERR, 4497 EFAULT, thread->team->id); 4498 signal.SetAddress((void*)address); 4499 send_signal_to_thread(thread, signal, 0); 4500 } 4501 } 4502 } 4503 4504 if (addressSpace != NULL) 4505 addressSpace->Put(); 4506 4507 return B_HANDLED_INTERRUPT; 4508 } 4509 4510 4511 struct PageFaultContext { 4512 AddressSpaceReadLocker addressSpaceLocker; 4513 VMCacheChainLocker cacheChainLocker; 4514 4515 VMTranslationMap* map; 4516 VMCache* topCache; 4517 off_t cacheOffset; 4518 vm_page_reservation reservation; 4519 bool isWrite; 4520 4521 // return values 4522 vm_page* page; 4523 bool restart; 4524 bool pageAllocated; 4525 4526 4527 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4528 : 4529 addressSpaceLocker(addressSpace, true), 4530 map(addressSpace->TranslationMap()), 4531 isWrite(isWrite) 4532 { 4533 } 4534 4535 ~PageFaultContext() 4536 { 4537 UnlockAll(); 4538 vm_page_unreserve_pages(&reservation); 4539 } 4540 4541 void Prepare(VMCache* topCache, off_t cacheOffset) 4542 { 4543 this->topCache = topCache; 4544 this->cacheOffset = cacheOffset; 4545 page = NULL; 4546 restart = false; 4547 pageAllocated = false; 4548 4549 cacheChainLocker.SetTo(topCache); 4550 } 4551 4552 void UnlockAll(VMCache* exceptCache = NULL) 4553 { 4554 topCache = NULL; 4555 addressSpaceLocker.Unlock(); 4556 cacheChainLocker.Unlock(exceptCache); 4557 } 4558 }; 4559 4560 4561 /*! Gets the page that should be mapped into the area. 4562 Returns an error code other than \c B_OK, if the page couldn't be found or 4563 paged in. The locking state of the address space and the caches is undefined 4564 in that case. 4565 Returns \c B_OK with \c context.restart set to \c true, if the functions 4566 had to unlock the address space and all caches and is supposed to be called 4567 again. 4568 Returns \c B_OK with \c context.restart set to \c false, if the page was 4569 found. It is returned in \c context.page. The address space will still be 4570 locked as well as all caches starting from the top cache to at least the 4571 cache the page lives in. 4572 */ 4573 static status_t 4574 fault_get_page(PageFaultContext& context) 4575 { 4576 VMCache* cache = context.topCache; 4577 VMCache* lastCache = NULL; 4578 vm_page* page = NULL; 4579 4580 while (cache != NULL) { 4581 // We already hold the lock of the cache at this point. 4582 4583 lastCache = cache; 4584 4585 page = cache->LookupPage(context.cacheOffset); 4586 if (page != NULL && page->busy) { 4587 // page must be busy -- wait for it to become unbusy 4588 context.UnlockAll(cache); 4589 cache->ReleaseRefLocked(); 4590 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4591 4592 // restart the whole process 4593 context.restart = true; 4594 return B_OK; 4595 } 4596 4597 if (page != NULL) 4598 break; 4599 4600 // The current cache does not contain the page we're looking for. 4601 4602 // see if the backing store has it 4603 if (cache->HasPage(context.cacheOffset)) { 4604 // insert a fresh page and mark it busy -- we're going to read it in 4605 page = vm_page_allocate_page(&context.reservation, 4606 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4607 cache->InsertPage(page, context.cacheOffset); 4608 4609 // We need to unlock all caches and the address space while reading 4610 // the page in. Keep a reference to the cache around. 4611 cache->AcquireRefLocked(); 4612 context.UnlockAll(); 4613 4614 // read the page in 4615 generic_io_vec vec; 4616 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4617 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4618 4619 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4620 B_PHYSICAL_IO_REQUEST, &bytesRead); 4621 4622 cache->Lock(); 4623 4624 if (status < B_OK) { 4625 // on error remove and free the page 4626 dprintf("reading page from cache %p returned: %s!\n", 4627 cache, strerror(status)); 4628 4629 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4630 cache->RemovePage(page); 4631 vm_page_set_state(page, PAGE_STATE_FREE); 4632 4633 cache->ReleaseRefAndUnlock(); 4634 return status; 4635 } 4636 4637 // mark the page unbusy again 4638 cache->MarkPageUnbusy(page); 4639 4640 DEBUG_PAGE_ACCESS_END(page); 4641 4642 // Since we needed to unlock everything temporarily, the area 4643 // situation might have changed. So we need to restart the whole 4644 // process. 4645 cache->ReleaseRefAndUnlock(); 4646 context.restart = true; 4647 return B_OK; 4648 } 4649 4650 cache = context.cacheChainLocker.LockSourceCache(); 4651 } 4652 4653 if (page == NULL) { 4654 // There was no adequate page, determine the cache for a clean one. 4655 // Read-only pages come in the deepest cache, only the top most cache 4656 // may have direct write access. 4657 cache = context.isWrite ? context.topCache : lastCache; 4658 4659 // allocate a clean page 4660 page = vm_page_allocate_page(&context.reservation, 4661 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4662 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4663 page->physical_page_number)); 4664 4665 // insert the new page into our cache 4666 cache->InsertPage(page, context.cacheOffset); 4667 context.pageAllocated = true; 4668 } else if (page->Cache() != context.topCache && context.isWrite) { 4669 // We have a page that has the data we want, but in the wrong cache 4670 // object so we need to copy it and stick it into the top cache. 4671 vm_page* sourcePage = page; 4672 4673 // TODO: If memory is low, it might be a good idea to steal the page 4674 // from our source cache -- if possible, that is. 4675 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4676 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4677 4678 // To not needlessly kill concurrency we unlock all caches but the top 4679 // one while copying the page. Lacking another mechanism to ensure that 4680 // the source page doesn't disappear, we mark it busy. 4681 sourcePage->busy = true; 4682 context.cacheChainLocker.UnlockKeepRefs(true); 4683 4684 // copy the page 4685 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4686 sourcePage->physical_page_number * B_PAGE_SIZE); 4687 4688 context.cacheChainLocker.RelockCaches(true); 4689 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4690 4691 // insert the new page into our cache 4692 context.topCache->InsertPage(page, context.cacheOffset); 4693 context.pageAllocated = true; 4694 } else 4695 DEBUG_PAGE_ACCESS_START(page); 4696 4697 context.page = page; 4698 return B_OK; 4699 } 4700 4701 4702 /*! Makes sure the address in the given address space is mapped. 4703 4704 \param addressSpace The address space. 4705 \param originalAddress The address. Doesn't need to be page aligned. 4706 \param isWrite If \c true the address shall be write-accessible. 4707 \param isUser If \c true the access is requested by a userland team. 4708 \param wirePage On success, if non \c NULL, the wired count of the page 4709 mapped at the given address is incremented and the page is returned 4710 via this parameter. 4711 \return \c B_OK on success, another error code otherwise. 4712 */ 4713 static status_t 4714 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4715 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4716 { 4717 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4718 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4719 originalAddress, isWrite, isUser)); 4720 4721 PageFaultContext context(addressSpace, isWrite); 4722 4723 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4724 status_t status = B_OK; 4725 4726 addressSpace->IncrementFaultCount(); 4727 4728 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4729 // the pages upfront makes sure we don't have any cache locked, so that the 4730 // page daemon/thief can do their job without problems. 4731 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4732 originalAddress); 4733 context.addressSpaceLocker.Unlock(); 4734 vm_page_reserve_pages(&context.reservation, reservePages, 4735 addressSpace == VMAddressSpace::Kernel() 4736 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4737 4738 while (true) { 4739 context.addressSpaceLocker.Lock(); 4740 4741 // get the area the fault was in 4742 VMArea* area = addressSpace->LookupArea(address); 4743 if (area == NULL) { 4744 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4745 "space\n", originalAddress); 4746 TPF(PageFaultError(-1, 4747 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4748 status = B_BAD_ADDRESS; 4749 break; 4750 } 4751 4752 // check permissions 4753 uint32 protection = get_area_page_protection(area, address); 4754 if (isUser && (protection & B_USER_PROTECTION) == 0 4755 && (area->protection & B_KERNEL_AREA) != 0) { 4756 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4757 area->id, (void*)originalAddress); 4758 TPF(PageFaultError(area->id, 4759 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4760 status = B_PERMISSION_DENIED; 4761 break; 4762 } 4763 if (isWrite && (protection 4764 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4765 dprintf("write access attempted on write-protected area 0x%" 4766 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4767 TPF(PageFaultError(area->id, 4768 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4769 status = B_PERMISSION_DENIED; 4770 break; 4771 } else if (isExecute && (protection 4772 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4773 dprintf("instruction fetch attempted on execute-protected area 0x%" 4774 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4775 TPF(PageFaultError(area->id, 4776 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4777 status = B_PERMISSION_DENIED; 4778 break; 4779 } else if (!isWrite && !isExecute && (protection 4780 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4781 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4782 " at %p\n", area->id, (void*)originalAddress); 4783 TPF(PageFaultError(area->id, 4784 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4785 status = B_PERMISSION_DENIED; 4786 break; 4787 } 4788 4789 // We have the area, it was a valid access, so let's try to resolve the 4790 // page fault now. 4791 // At first, the top most cache from the area is investigated. 4792 4793 context.Prepare(vm_area_get_locked_cache(area), 4794 address - area->Base() + area->cache_offset); 4795 4796 // See if this cache has a fault handler -- this will do all the work 4797 // for us. 4798 { 4799 // Note, since the page fault is resolved with interrupts enabled, 4800 // the fault handler could be called more than once for the same 4801 // reason -- the store must take this into account. 4802 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4803 if (status != B_BAD_HANDLER) 4804 break; 4805 } 4806 4807 // The top most cache has no fault handler, so let's see if the cache or 4808 // its sources already have the page we're searching for (we're going 4809 // from top to bottom). 4810 status = fault_get_page(context); 4811 if (status != B_OK) { 4812 TPF(PageFaultError(area->id, status)); 4813 break; 4814 } 4815 4816 if (context.restart) 4817 continue; 4818 4819 // All went fine, all there is left to do is to map the page into the 4820 // address space. 4821 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4822 context.page)); 4823 4824 // If the page doesn't reside in the area's cache, we need to make sure 4825 // it's mapped in read-only, so that we cannot overwrite someone else's 4826 // data (copy-on-write) 4827 uint32 newProtection = protection; 4828 if (context.page->Cache() != context.topCache && !isWrite) 4829 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4830 4831 bool unmapPage = false; 4832 bool mapPage = true; 4833 4834 // check whether there's already a page mapped at the address 4835 context.map->Lock(); 4836 4837 phys_addr_t physicalAddress; 4838 uint32 flags; 4839 vm_page* mappedPage = NULL; 4840 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4841 && (flags & PAGE_PRESENT) != 0 4842 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4843 != NULL) { 4844 // Yep there's already a page. If it's ours, we can simply adjust 4845 // its protection. Otherwise we have to unmap it. 4846 if (mappedPage == context.page) { 4847 context.map->ProtectPage(area, address, newProtection); 4848 // Note: We assume that ProtectPage() is atomic (i.e. 4849 // the page isn't temporarily unmapped), otherwise we'd have 4850 // to make sure it isn't wired. 4851 mapPage = false; 4852 } else 4853 unmapPage = true; 4854 } 4855 4856 context.map->Unlock(); 4857 4858 if (unmapPage) { 4859 // If the page is wired, we can't unmap it. Wait until it is unwired 4860 // again and restart. Note that the page cannot be wired for 4861 // writing, since it it isn't in the topmost cache. So we can safely 4862 // ignore ranges wired for writing (our own and other concurrent 4863 // wiring attempts in progress) and in fact have to do that to avoid 4864 // a deadlock. 4865 VMAreaUnwiredWaiter waiter; 4866 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4867 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4868 // unlock everything and wait 4869 if (context.pageAllocated) { 4870 // ... but since we allocated a page and inserted it into 4871 // the top cache, remove and free it first. Otherwise we'd 4872 // have a page from a lower cache mapped while an upper 4873 // cache has a page that would shadow it. 4874 context.topCache->RemovePage(context.page); 4875 vm_page_free_etc(context.topCache, context.page, 4876 &context.reservation); 4877 } else 4878 DEBUG_PAGE_ACCESS_END(context.page); 4879 4880 context.UnlockAll(); 4881 waiter.waitEntry.Wait(); 4882 continue; 4883 } 4884 4885 // Note: The mapped page is a page of a lower cache. We are 4886 // guaranteed to have that cached locked, our new page is a copy of 4887 // that page, and the page is not busy. The logic for that guarantee 4888 // is as follows: Since the page is mapped, it must live in the top 4889 // cache (ruled out above) or any of its lower caches, and there is 4890 // (was before the new page was inserted) no other page in any 4891 // cache between the top cache and the page's cache (otherwise that 4892 // would be mapped instead). That in turn means that our algorithm 4893 // must have found it and therefore it cannot be busy either. 4894 DEBUG_PAGE_ACCESS_START(mappedPage); 4895 unmap_page(area, address); 4896 DEBUG_PAGE_ACCESS_END(mappedPage); 4897 } 4898 4899 if (mapPage) { 4900 if (map_page(area, context.page, address, newProtection, 4901 &context.reservation) != B_OK) { 4902 // Mapping can only fail, when the page mapping object couldn't 4903 // be allocated. Save for the missing mapping everything is 4904 // fine, though. If this was a regular page fault, we'll simply 4905 // leave and probably fault again. To make sure we'll have more 4906 // luck then, we ensure that the minimum object reserve is 4907 // available. 4908 DEBUG_PAGE_ACCESS_END(context.page); 4909 4910 context.UnlockAll(); 4911 4912 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4913 != B_OK) { 4914 // Apparently the situation is serious. Let's get ourselves 4915 // killed. 4916 status = B_NO_MEMORY; 4917 } else if (wirePage != NULL) { 4918 // The caller expects us to wire the page. Since 4919 // object_cache_reserve() succeeded, we should now be able 4920 // to allocate a mapping structure. Restart. 4921 continue; 4922 } 4923 4924 break; 4925 } 4926 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4927 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4928 4929 // also wire the page, if requested 4930 if (wirePage != NULL && status == B_OK) { 4931 increment_page_wired_count(context.page); 4932 *wirePage = context.page; 4933 } 4934 4935 DEBUG_PAGE_ACCESS_END(context.page); 4936 4937 break; 4938 } 4939 4940 return status; 4941 } 4942 4943 4944 status_t 4945 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4946 { 4947 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4948 } 4949 4950 status_t 4951 vm_put_physical_page(addr_t vaddr, void* handle) 4952 { 4953 return sPhysicalPageMapper->PutPage(vaddr, handle); 4954 } 4955 4956 4957 status_t 4958 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4959 void** _handle) 4960 { 4961 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4962 } 4963 4964 status_t 4965 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4966 { 4967 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4968 } 4969 4970 4971 status_t 4972 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4973 { 4974 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4975 } 4976 4977 status_t 4978 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4979 { 4980 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4981 } 4982 4983 4984 void 4985 vm_get_info(system_info* info) 4986 { 4987 swap_get_info(info); 4988 4989 MutexLocker locker(sAvailableMemoryLock); 4990 info->needed_memory = sNeededMemory; 4991 info->free_memory = sAvailableMemory; 4992 } 4993 4994 4995 uint32 4996 vm_num_page_faults(void) 4997 { 4998 return sPageFaults; 4999 } 5000 5001 5002 off_t 5003 vm_available_memory(void) 5004 { 5005 MutexLocker locker(sAvailableMemoryLock); 5006 return sAvailableMemory; 5007 } 5008 5009 5010 off_t 5011 vm_available_not_needed_memory(void) 5012 { 5013 MutexLocker locker(sAvailableMemoryLock); 5014 return sAvailableMemory - sNeededMemory; 5015 } 5016 5017 5018 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5019 debugger. 5020 */ 5021 off_t 5022 vm_available_not_needed_memory_debug(void) 5023 { 5024 return sAvailableMemory - sNeededMemory; 5025 } 5026 5027 5028 size_t 5029 vm_kernel_address_space_left(void) 5030 { 5031 return VMAddressSpace::Kernel()->FreeSpace(); 5032 } 5033 5034 5035 void 5036 vm_unreserve_memory(size_t amount) 5037 { 5038 mutex_lock(&sAvailableMemoryLock); 5039 5040 sAvailableMemory += amount; 5041 5042 mutex_unlock(&sAvailableMemoryLock); 5043 } 5044 5045 5046 status_t 5047 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5048 { 5049 size_t reserve = kMemoryReserveForPriority[priority]; 5050 5051 MutexLocker locker(sAvailableMemoryLock); 5052 5053 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5054 5055 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5056 sAvailableMemory -= amount; 5057 return B_OK; 5058 } 5059 5060 if (timeout <= 0) 5061 return B_NO_MEMORY; 5062 5063 // turn timeout into an absolute timeout 5064 timeout += system_time(); 5065 5066 // loop until we've got the memory or the timeout occurs 5067 do { 5068 sNeededMemory += amount; 5069 5070 // call the low resource manager 5071 locker.Unlock(); 5072 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5073 B_ABSOLUTE_TIMEOUT, timeout); 5074 locker.Lock(); 5075 5076 sNeededMemory -= amount; 5077 5078 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5079 sAvailableMemory -= amount; 5080 return B_OK; 5081 } 5082 } while (timeout > system_time()); 5083 5084 return B_NO_MEMORY; 5085 } 5086 5087 5088 status_t 5089 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5090 { 5091 // NOTE: The caller is responsible for synchronizing calls to this function! 5092 5093 AddressSpaceReadLocker locker; 5094 VMArea* area; 5095 status_t status = locker.SetFromArea(id, area); 5096 if (status != B_OK) 5097 return status; 5098 5099 // nothing to do, if the type doesn't change 5100 uint32 oldType = area->MemoryType(); 5101 if (type == oldType) 5102 return B_OK; 5103 5104 // set the memory type of the area and the mapped pages 5105 VMTranslationMap* map = area->address_space->TranslationMap(); 5106 map->Lock(); 5107 area->SetMemoryType(type); 5108 map->ProtectArea(area, area->protection); 5109 map->Unlock(); 5110 5111 // set the physical memory type 5112 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5113 if (error != B_OK) { 5114 // reset the memory type of the area and the mapped pages 5115 map->Lock(); 5116 area->SetMemoryType(oldType); 5117 map->ProtectArea(area, area->protection); 5118 map->Unlock(); 5119 return error; 5120 } 5121 5122 return B_OK; 5123 5124 } 5125 5126 5127 /*! This function enforces some protection properties: 5128 - kernel areas must be W^X (after kernel startup) 5129 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5130 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5131 */ 5132 static void 5133 fix_protection(uint32* protection) 5134 { 5135 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5136 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5137 || (*protection & B_WRITE_AREA) != 0) 5138 && !gKernelStartup) 5139 panic("kernel areas cannot be both writable and executable!"); 5140 5141 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5142 if ((*protection & B_WRITE_AREA) != 0) 5143 *protection |= B_KERNEL_WRITE_AREA; 5144 if ((*protection & B_READ_AREA) != 0) 5145 *protection |= B_KERNEL_READ_AREA; 5146 } 5147 } 5148 5149 5150 static void 5151 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5152 { 5153 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5154 info->area = area->id; 5155 info->address = (void*)area->Base(); 5156 info->size = area->Size(); 5157 info->protection = area->protection; 5158 info->lock = area->wiring; 5159 info->team = area->address_space->ID(); 5160 info->copy_count = 0; 5161 info->in_count = 0; 5162 info->out_count = 0; 5163 // TODO: retrieve real values here! 5164 5165 VMCache* cache = vm_area_get_locked_cache(area); 5166 5167 // Note, this is a simplification; the cache could be larger than this area 5168 info->ram_size = cache->page_count * B_PAGE_SIZE; 5169 5170 vm_area_put_locked_cache(cache); 5171 } 5172 5173 5174 static status_t 5175 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5176 { 5177 // is newSize a multiple of B_PAGE_SIZE? 5178 if (newSize & (B_PAGE_SIZE - 1)) 5179 return B_BAD_VALUE; 5180 5181 // lock all affected address spaces and the cache 5182 VMArea* area; 5183 VMCache* cache; 5184 5185 MultiAddressSpaceLocker locker; 5186 AreaCacheLocker cacheLocker; 5187 5188 status_t status; 5189 size_t oldSize; 5190 bool anyKernelArea; 5191 bool restart; 5192 5193 do { 5194 anyKernelArea = false; 5195 restart = false; 5196 5197 locker.Unset(); 5198 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5199 if (status != B_OK) 5200 return status; 5201 cacheLocker.SetTo(cache, true); // already locked 5202 5203 // enforce restrictions 5204 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5205 || (area->protection & B_KERNEL_AREA) != 0)) { 5206 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5207 "resize kernel area %" B_PRId32 " (%s)\n", 5208 team_get_current_team_id(), areaID, area->name); 5209 return B_NOT_ALLOWED; 5210 } 5211 // TODO: Enforce all restrictions (team, etc.)! 5212 5213 oldSize = area->Size(); 5214 if (newSize == oldSize) 5215 return B_OK; 5216 5217 if (cache->type != CACHE_TYPE_RAM) 5218 return B_NOT_ALLOWED; 5219 5220 if (oldSize < newSize) { 5221 // We need to check if all areas of this cache can be resized. 5222 for (VMArea* current = cache->areas; current != NULL; 5223 current = current->cache_next) { 5224 if (!current->address_space->CanResizeArea(current, newSize)) 5225 return B_ERROR; 5226 anyKernelArea 5227 |= current->address_space == VMAddressSpace::Kernel(); 5228 } 5229 } else { 5230 // We're shrinking the areas, so we must make sure the affected 5231 // ranges are not wired. 5232 for (VMArea* current = cache->areas; current != NULL; 5233 current = current->cache_next) { 5234 anyKernelArea 5235 |= current->address_space == VMAddressSpace::Kernel(); 5236 5237 if (wait_if_area_range_is_wired(current, 5238 current->Base() + newSize, oldSize - newSize, &locker, 5239 &cacheLocker)) { 5240 restart = true; 5241 break; 5242 } 5243 } 5244 } 5245 } while (restart); 5246 5247 // Okay, looks good so far, so let's do it 5248 5249 int priority = kernel && anyKernelArea 5250 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5251 uint32 allocationFlags = kernel && anyKernelArea 5252 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5253 5254 if (oldSize < newSize) { 5255 // Growing the cache can fail, so we do it first. 5256 status = cache->Resize(cache->virtual_base + newSize, priority); 5257 if (status != B_OK) 5258 return status; 5259 } 5260 5261 for (VMArea* current = cache->areas; current != NULL; 5262 current = current->cache_next) { 5263 status = current->address_space->ResizeArea(current, newSize, 5264 allocationFlags); 5265 if (status != B_OK) 5266 break; 5267 5268 // We also need to unmap all pages beyond the new size, if the area has 5269 // shrunk 5270 if (newSize < oldSize) { 5271 VMCacheChainLocker cacheChainLocker(cache); 5272 cacheChainLocker.LockAllSourceCaches(); 5273 5274 unmap_pages(current, current->Base() + newSize, 5275 oldSize - newSize); 5276 5277 cacheChainLocker.Unlock(cache); 5278 } 5279 } 5280 5281 if (status == B_OK) { 5282 // Shrink or grow individual page protections if in use. 5283 if (area->page_protections != NULL) { 5284 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5285 uint8* newProtections 5286 = (uint8*)realloc(area->page_protections, bytes); 5287 if (newProtections == NULL) 5288 status = B_NO_MEMORY; 5289 else { 5290 area->page_protections = newProtections; 5291 5292 if (oldSize < newSize) { 5293 // init the additional page protections to that of the area 5294 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5295 uint32 areaProtection = area->protection 5296 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5297 memset(area->page_protections + offset, 5298 areaProtection | (areaProtection << 4), bytes - offset); 5299 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5300 uint8& entry = area->page_protections[offset - 1]; 5301 entry = (entry & 0x0f) | (areaProtection << 4); 5302 } 5303 } 5304 } 5305 } 5306 } 5307 5308 // shrinking the cache can't fail, so we do it now 5309 if (status == B_OK && newSize < oldSize) 5310 status = cache->Resize(cache->virtual_base + newSize, priority); 5311 5312 if (status != B_OK) { 5313 // Something failed -- resize the areas back to their original size. 5314 // This can fail, too, in which case we're seriously screwed. 5315 for (VMArea* current = cache->areas; current != NULL; 5316 current = current->cache_next) { 5317 if (current->address_space->ResizeArea(current, oldSize, 5318 allocationFlags) != B_OK) { 5319 panic("vm_resize_area(): Failed and not being able to restore " 5320 "original state."); 5321 } 5322 } 5323 5324 cache->Resize(cache->virtual_base + oldSize, priority); 5325 } 5326 5327 // TODO: we must honour the lock restrictions of this area 5328 return status; 5329 } 5330 5331 5332 status_t 5333 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5334 { 5335 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5336 } 5337 5338 5339 status_t 5340 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5341 { 5342 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5343 } 5344 5345 5346 status_t 5347 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5348 bool user) 5349 { 5350 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5351 } 5352 5353 5354 void 5355 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5356 { 5357 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5358 } 5359 5360 5361 /*! Copies a range of memory directly from/to a page that might not be mapped 5362 at the moment. 5363 5364 For \a unsafeMemory the current mapping (if any is ignored). The function 5365 walks through the respective area's cache chain to find the physical page 5366 and copies from/to it directly. 5367 The memory range starting at \a unsafeMemory with a length of \a size bytes 5368 must not cross a page boundary. 5369 5370 \param teamID The team ID identifying the address space \a unsafeMemory is 5371 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5372 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5373 is passed, the address space of the thread returned by 5374 debug_get_debugged_thread() is used. 5375 \param unsafeMemory The start of the unsafe memory range to be copied 5376 from/to. 5377 \param buffer A safely accessible kernel buffer to be copied from/to. 5378 \param size The number of bytes to be copied. 5379 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5380 \a unsafeMemory, the other way around otherwise. 5381 */ 5382 status_t 5383 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5384 size_t size, bool copyToUnsafe) 5385 { 5386 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5387 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5388 return B_BAD_VALUE; 5389 } 5390 5391 // get the address space for the debugged thread 5392 VMAddressSpace* addressSpace; 5393 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5394 addressSpace = VMAddressSpace::Kernel(); 5395 } else if (teamID == B_CURRENT_TEAM) { 5396 Thread* thread = debug_get_debugged_thread(); 5397 if (thread == NULL || thread->team == NULL) 5398 return B_BAD_ADDRESS; 5399 5400 addressSpace = thread->team->address_space; 5401 } else 5402 addressSpace = VMAddressSpace::DebugGet(teamID); 5403 5404 if (addressSpace == NULL) 5405 return B_BAD_ADDRESS; 5406 5407 // get the area 5408 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5409 if (area == NULL) 5410 return B_BAD_ADDRESS; 5411 5412 // search the page 5413 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5414 + area->cache_offset; 5415 VMCache* cache = area->cache; 5416 vm_page* page = NULL; 5417 while (cache != NULL) { 5418 page = cache->DebugLookupPage(cacheOffset); 5419 if (page != NULL) 5420 break; 5421 5422 // Page not found in this cache -- if it is paged out, we must not try 5423 // to get it from lower caches. 5424 if (cache->DebugHasPage(cacheOffset)) 5425 break; 5426 5427 cache = cache->source; 5428 } 5429 5430 if (page == NULL) 5431 return B_UNSUPPORTED; 5432 5433 // copy from/to physical memory 5434 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5435 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5436 5437 if (copyToUnsafe) { 5438 if (page->Cache() != area->cache) 5439 return B_UNSUPPORTED; 5440 5441 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5442 } 5443 5444 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5445 } 5446 5447 5448 /** Validate that a memory range is either fully in kernel space, or fully in 5449 * userspace */ 5450 static inline bool 5451 validate_memory_range(const void* addr, size_t size) 5452 { 5453 addr_t address = (addr_t)addr; 5454 5455 // Check for overflows on all addresses. 5456 if ((address + size) < address) 5457 return false; 5458 5459 // Validate that the address range does not cross the kernel/user boundary. 5460 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5461 } 5462 5463 5464 // #pragma mark - kernel public API 5465 5466 5467 status_t 5468 user_memcpy(void* to, const void* from, size_t size) 5469 { 5470 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5471 return B_BAD_ADDRESS; 5472 5473 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5474 return B_BAD_ADDRESS; 5475 5476 return B_OK; 5477 } 5478 5479 5480 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5481 the string in \a to, NULL-terminating the result. 5482 5483 \param to Pointer to the destination C-string. 5484 \param from Pointer to the source C-string. 5485 \param size Size in bytes of the string buffer pointed to by \a to. 5486 5487 \return strlen(\a from). 5488 */ 5489 ssize_t 5490 user_strlcpy(char* to, const char* from, size_t size) 5491 { 5492 if (to == NULL && size != 0) 5493 return B_BAD_VALUE; 5494 if (from == NULL) 5495 return B_BAD_ADDRESS; 5496 5497 // Protect the source address from overflows. 5498 size_t maxSize = size; 5499 if ((addr_t)from + maxSize < (addr_t)from) 5500 maxSize -= (addr_t)from + maxSize; 5501 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5502 maxSize = USER_TOP - (addr_t)from; 5503 5504 if (!validate_memory_range(to, maxSize)) 5505 return B_BAD_ADDRESS; 5506 5507 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5508 if (result < 0) 5509 return result; 5510 5511 // If we hit the address overflow boundary, fail. 5512 if ((size_t)result >= maxSize && maxSize < size) 5513 return B_BAD_ADDRESS; 5514 5515 return result; 5516 } 5517 5518 5519 status_t 5520 user_memset(void* s, char c, size_t count) 5521 { 5522 if (!validate_memory_range(s, count)) 5523 return B_BAD_ADDRESS; 5524 5525 if (arch_cpu_user_memset(s, c, count) < B_OK) 5526 return B_BAD_ADDRESS; 5527 5528 return B_OK; 5529 } 5530 5531 5532 /*! Wires a single page at the given address. 5533 5534 \param team The team whose address space the address belongs to. Supports 5535 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5536 parameter is ignored. 5537 \param address address The virtual address to wire down. Does not need to 5538 be page aligned. 5539 \param writable If \c true the page shall be writable. 5540 \param info On success the info is filled in, among other things 5541 containing the physical address the given virtual one translates to. 5542 \return \c B_OK, when the page could be wired, another error code otherwise. 5543 */ 5544 status_t 5545 vm_wire_page(team_id team, addr_t address, bool writable, 5546 VMPageWiringInfo* info) 5547 { 5548 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5549 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5550 5551 // compute the page protection that is required 5552 bool isUser = IS_USER_ADDRESS(address); 5553 uint32 requiredProtection = PAGE_PRESENT 5554 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5555 if (writable) 5556 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5557 5558 // get and read lock the address space 5559 VMAddressSpace* addressSpace = NULL; 5560 if (isUser) { 5561 if (team == B_CURRENT_TEAM) 5562 addressSpace = VMAddressSpace::GetCurrent(); 5563 else 5564 addressSpace = VMAddressSpace::Get(team); 5565 } else 5566 addressSpace = VMAddressSpace::GetKernel(); 5567 if (addressSpace == NULL) 5568 return B_ERROR; 5569 5570 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5571 5572 VMTranslationMap* map = addressSpace->TranslationMap(); 5573 status_t error = B_OK; 5574 5575 // get the area 5576 VMArea* area = addressSpace->LookupArea(pageAddress); 5577 if (area == NULL) { 5578 addressSpace->Put(); 5579 return B_BAD_ADDRESS; 5580 } 5581 5582 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5583 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5584 5585 // mark the area range wired 5586 area->Wire(&info->range); 5587 5588 // Lock the area's cache chain and the translation map. Needed to look 5589 // up the page and play with its wired count. 5590 cacheChainLocker.LockAllSourceCaches(); 5591 map->Lock(); 5592 5593 phys_addr_t physicalAddress; 5594 uint32 flags; 5595 vm_page* page; 5596 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5597 && (flags & requiredProtection) == requiredProtection 5598 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5599 != NULL) { 5600 // Already mapped with the correct permissions -- just increment 5601 // the page's wired count. 5602 increment_page_wired_count(page); 5603 5604 map->Unlock(); 5605 cacheChainLocker.Unlock(); 5606 addressSpaceLocker.Unlock(); 5607 } else { 5608 // Let vm_soft_fault() map the page for us, if possible. We need 5609 // to fully unlock to avoid deadlocks. Since we have already 5610 // wired the area itself, nothing disturbing will happen with it 5611 // in the meantime. 5612 map->Unlock(); 5613 cacheChainLocker.Unlock(); 5614 addressSpaceLocker.Unlock(); 5615 5616 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5617 isUser, &page); 5618 5619 if (error != B_OK) { 5620 // The page could not be mapped -- clean up. 5621 VMCache* cache = vm_area_get_locked_cache(area); 5622 area->Unwire(&info->range); 5623 cache->ReleaseRefAndUnlock(); 5624 addressSpace->Put(); 5625 return error; 5626 } 5627 } 5628 5629 info->physicalAddress 5630 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5631 + address % B_PAGE_SIZE; 5632 info->page = page; 5633 5634 return B_OK; 5635 } 5636 5637 5638 /*! Unwires a single page previously wired via vm_wire_page(). 5639 5640 \param info The same object passed to vm_wire_page() before. 5641 */ 5642 void 5643 vm_unwire_page(VMPageWiringInfo* info) 5644 { 5645 // lock the address space 5646 VMArea* area = info->range.area; 5647 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5648 // takes over our reference 5649 5650 // lock the top cache 5651 VMCache* cache = vm_area_get_locked_cache(area); 5652 VMCacheChainLocker cacheChainLocker(cache); 5653 5654 if (info->page->Cache() != cache) { 5655 // The page is not in the top cache, so we lock the whole cache chain 5656 // before touching the page's wired count. 5657 cacheChainLocker.LockAllSourceCaches(); 5658 } 5659 5660 decrement_page_wired_count(info->page); 5661 5662 // remove the wired range from the range 5663 area->Unwire(&info->range); 5664 5665 cacheChainLocker.Unlock(); 5666 } 5667 5668 5669 /*! Wires down the given address range in the specified team's address space. 5670 5671 If successful the function 5672 - acquires a reference to the specified team's address space, 5673 - adds respective wired ranges to all areas that intersect with the given 5674 address range, 5675 - makes sure all pages in the given address range are mapped with the 5676 requested access permissions and increments their wired count. 5677 5678 It fails, when \a team doesn't specify a valid address space, when any part 5679 of the specified address range is not covered by areas, when the concerned 5680 areas don't allow mapping with the requested permissions, or when mapping 5681 failed for another reason. 5682 5683 When successful the call must be balanced by a unlock_memory_etc() call with 5684 the exact same parameters. 5685 5686 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5687 supported. 5688 \param address The start of the address range to be wired. 5689 \param numBytes The size of the address range to be wired. 5690 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5691 requests that the range must be wired writable ("read from device 5692 into memory"). 5693 \return \c B_OK on success, another error code otherwise. 5694 */ 5695 status_t 5696 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5697 { 5698 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5699 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5700 5701 // compute the page protection that is required 5702 bool isUser = IS_USER_ADDRESS(address); 5703 bool writable = (flags & B_READ_DEVICE) == 0; 5704 uint32 requiredProtection = PAGE_PRESENT 5705 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5706 if (writable) 5707 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5708 5709 uint32 mallocFlags = isUser 5710 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5711 5712 // get and read lock the address space 5713 VMAddressSpace* addressSpace = NULL; 5714 if (isUser) { 5715 if (team == B_CURRENT_TEAM) 5716 addressSpace = VMAddressSpace::GetCurrent(); 5717 else 5718 addressSpace = VMAddressSpace::Get(team); 5719 } else 5720 addressSpace = VMAddressSpace::GetKernel(); 5721 if (addressSpace == NULL) 5722 return B_ERROR; 5723 5724 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5725 // We get a new address space reference here. The one we got above will 5726 // be freed by unlock_memory_etc(). 5727 5728 VMTranslationMap* map = addressSpace->TranslationMap(); 5729 status_t error = B_OK; 5730 5731 // iterate through all concerned areas 5732 addr_t nextAddress = lockBaseAddress; 5733 while (nextAddress != lockEndAddress) { 5734 // get the next area 5735 VMArea* area = addressSpace->LookupArea(nextAddress); 5736 if (area == NULL) { 5737 error = B_BAD_ADDRESS; 5738 break; 5739 } 5740 5741 addr_t areaStart = nextAddress; 5742 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5743 5744 // allocate the wired range (do that before locking the cache to avoid 5745 // deadlocks) 5746 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5747 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5748 if (range == NULL) { 5749 error = B_NO_MEMORY; 5750 break; 5751 } 5752 5753 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5754 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5755 5756 // mark the area range wired 5757 area->Wire(range); 5758 5759 // Depending on the area cache type and the wiring, we may not need to 5760 // look at the individual pages. 5761 if (area->cache_type == CACHE_TYPE_NULL 5762 || area->cache_type == CACHE_TYPE_DEVICE 5763 || area->wiring == B_FULL_LOCK 5764 || area->wiring == B_CONTIGUOUS) { 5765 nextAddress = areaEnd; 5766 continue; 5767 } 5768 5769 // Lock the area's cache chain and the translation map. Needed to look 5770 // up pages and play with their wired count. 5771 cacheChainLocker.LockAllSourceCaches(); 5772 map->Lock(); 5773 5774 // iterate through the pages and wire them 5775 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5776 phys_addr_t physicalAddress; 5777 uint32 flags; 5778 5779 vm_page* page; 5780 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5781 && (flags & requiredProtection) == requiredProtection 5782 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5783 != NULL) { 5784 // Already mapped with the correct permissions -- just increment 5785 // the page's wired count. 5786 increment_page_wired_count(page); 5787 } else { 5788 // Let vm_soft_fault() map the page for us, if possible. We need 5789 // to fully unlock to avoid deadlocks. Since we have already 5790 // wired the area itself, nothing disturbing will happen with it 5791 // in the meantime. 5792 map->Unlock(); 5793 cacheChainLocker.Unlock(); 5794 addressSpaceLocker.Unlock(); 5795 5796 error = vm_soft_fault(addressSpace, nextAddress, writable, 5797 false, isUser, &page); 5798 5799 addressSpaceLocker.Lock(); 5800 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5801 cacheChainLocker.LockAllSourceCaches(); 5802 map->Lock(); 5803 } 5804 5805 if (error != B_OK) 5806 break; 5807 } 5808 5809 map->Unlock(); 5810 5811 if (error == B_OK) { 5812 cacheChainLocker.Unlock(); 5813 } else { 5814 // An error occurred, so abort right here. If the current address 5815 // is the first in this area, unwire the area, since we won't get 5816 // to it when reverting what we've done so far. 5817 if (nextAddress == areaStart) { 5818 area->Unwire(range); 5819 cacheChainLocker.Unlock(); 5820 range->~VMAreaWiredRange(); 5821 free_etc(range, mallocFlags); 5822 } else 5823 cacheChainLocker.Unlock(); 5824 5825 break; 5826 } 5827 } 5828 5829 if (error != B_OK) { 5830 // An error occurred, so unwire all that we've already wired. Note that 5831 // even if not a single page was wired, unlock_memory_etc() is called 5832 // to put the address space reference. 5833 addressSpaceLocker.Unlock(); 5834 unlock_memory_etc(team, (void*)lockBaseAddress, 5835 nextAddress - lockBaseAddress, flags); 5836 } 5837 5838 return error; 5839 } 5840 5841 5842 status_t 5843 lock_memory(void* address, size_t numBytes, uint32 flags) 5844 { 5845 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5846 } 5847 5848 5849 /*! Unwires an address range previously wired with lock_memory_etc(). 5850 5851 Note that a call to this function must balance a previous lock_memory_etc() 5852 call with exactly the same parameters. 5853 */ 5854 status_t 5855 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5856 { 5857 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5858 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5859 5860 // compute the page protection that is required 5861 bool isUser = IS_USER_ADDRESS(address); 5862 bool writable = (flags & B_READ_DEVICE) == 0; 5863 uint32 requiredProtection = PAGE_PRESENT 5864 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5865 if (writable) 5866 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5867 5868 uint32 mallocFlags = isUser 5869 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5870 5871 // get and read lock the address space 5872 VMAddressSpace* addressSpace = NULL; 5873 if (isUser) { 5874 if (team == B_CURRENT_TEAM) 5875 addressSpace = VMAddressSpace::GetCurrent(); 5876 else 5877 addressSpace = VMAddressSpace::Get(team); 5878 } else 5879 addressSpace = VMAddressSpace::GetKernel(); 5880 if (addressSpace == NULL) 5881 return B_ERROR; 5882 5883 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5884 // Take over the address space reference. We don't unlock until we're 5885 // done. 5886 5887 VMTranslationMap* map = addressSpace->TranslationMap(); 5888 status_t error = B_OK; 5889 5890 // iterate through all concerned areas 5891 addr_t nextAddress = lockBaseAddress; 5892 while (nextAddress != lockEndAddress) { 5893 // get the next area 5894 VMArea* area = addressSpace->LookupArea(nextAddress); 5895 if (area == NULL) { 5896 error = B_BAD_ADDRESS; 5897 break; 5898 } 5899 5900 addr_t areaStart = nextAddress; 5901 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5902 5903 // Lock the area's top cache. This is a requirement for 5904 // VMArea::Unwire(). 5905 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5906 5907 // Depending on the area cache type and the wiring, we may not need to 5908 // look at the individual pages. 5909 if (area->cache_type == CACHE_TYPE_NULL 5910 || area->cache_type == CACHE_TYPE_DEVICE 5911 || area->wiring == B_FULL_LOCK 5912 || area->wiring == B_CONTIGUOUS) { 5913 // unwire the range (to avoid deadlocks we delete the range after 5914 // unlocking the cache) 5915 nextAddress = areaEnd; 5916 VMAreaWiredRange* range = area->Unwire(areaStart, 5917 areaEnd - areaStart, writable); 5918 cacheChainLocker.Unlock(); 5919 if (range != NULL) { 5920 range->~VMAreaWiredRange(); 5921 free_etc(range, mallocFlags); 5922 } 5923 continue; 5924 } 5925 5926 // Lock the area's cache chain and the translation map. Needed to look 5927 // up pages and play with their wired count. 5928 cacheChainLocker.LockAllSourceCaches(); 5929 map->Lock(); 5930 5931 // iterate through the pages and unwire them 5932 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5933 phys_addr_t physicalAddress; 5934 uint32 flags; 5935 5936 vm_page* page; 5937 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5938 && (flags & PAGE_PRESENT) != 0 5939 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5940 != NULL) { 5941 // Already mapped with the correct permissions -- just increment 5942 // the page's wired count. 5943 decrement_page_wired_count(page); 5944 } else { 5945 panic("unlock_memory_etc(): Failed to unwire page: address " 5946 "space %p, address: %#" B_PRIxADDR, addressSpace, 5947 nextAddress); 5948 error = B_BAD_VALUE; 5949 break; 5950 } 5951 } 5952 5953 map->Unlock(); 5954 5955 // All pages are unwired. Remove the area's wired range as well (to 5956 // avoid deadlocks we delete the range after unlocking the cache). 5957 VMAreaWiredRange* range = area->Unwire(areaStart, 5958 areaEnd - areaStart, writable); 5959 5960 cacheChainLocker.Unlock(); 5961 5962 if (range != NULL) { 5963 range->~VMAreaWiredRange(); 5964 free_etc(range, mallocFlags); 5965 } 5966 5967 if (error != B_OK) 5968 break; 5969 } 5970 5971 // get rid of the address space reference lock_memory_etc() acquired 5972 addressSpace->Put(); 5973 5974 return error; 5975 } 5976 5977 5978 status_t 5979 unlock_memory(void* address, size_t numBytes, uint32 flags) 5980 { 5981 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5982 } 5983 5984 5985 /*! Similar to get_memory_map(), but also allows to specify the address space 5986 for the memory in question and has a saner semantics. 5987 Returns \c B_OK when the complete range could be translated or 5988 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5989 case the actual number of entries is written to \c *_numEntries. Any other 5990 error case indicates complete failure; \c *_numEntries will be set to \c 0 5991 in this case. 5992 */ 5993 status_t 5994 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5995 physical_entry* table, uint32* _numEntries) 5996 { 5997 uint32 numEntries = *_numEntries; 5998 *_numEntries = 0; 5999 6000 VMAddressSpace* addressSpace; 6001 addr_t virtualAddress = (addr_t)address; 6002 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6003 phys_addr_t physicalAddress; 6004 status_t status = B_OK; 6005 int32 index = -1; 6006 addr_t offset = 0; 6007 bool interrupts = are_interrupts_enabled(); 6008 6009 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6010 "entries)\n", team, address, numBytes, numEntries)); 6011 6012 if (numEntries == 0 || numBytes == 0) 6013 return B_BAD_VALUE; 6014 6015 // in which address space is the address to be found? 6016 if (IS_USER_ADDRESS(virtualAddress)) { 6017 if (team == B_CURRENT_TEAM) 6018 addressSpace = VMAddressSpace::GetCurrent(); 6019 else 6020 addressSpace = VMAddressSpace::Get(team); 6021 } else 6022 addressSpace = VMAddressSpace::GetKernel(); 6023 6024 if (addressSpace == NULL) 6025 return B_ERROR; 6026 6027 VMTranslationMap* map = addressSpace->TranslationMap(); 6028 6029 if (interrupts) 6030 map->Lock(); 6031 6032 while (offset < numBytes) { 6033 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6034 uint32 flags; 6035 6036 if (interrupts) { 6037 status = map->Query((addr_t)address + offset, &physicalAddress, 6038 &flags); 6039 } else { 6040 status = map->QueryInterrupt((addr_t)address + offset, 6041 &physicalAddress, &flags); 6042 } 6043 if (status < B_OK) 6044 break; 6045 if ((flags & PAGE_PRESENT) == 0) { 6046 panic("get_memory_map() called on unmapped memory!"); 6047 return B_BAD_ADDRESS; 6048 } 6049 6050 if (index < 0 && pageOffset > 0) { 6051 physicalAddress += pageOffset; 6052 if (bytes > B_PAGE_SIZE - pageOffset) 6053 bytes = B_PAGE_SIZE - pageOffset; 6054 } 6055 6056 // need to switch to the next physical_entry? 6057 if (index < 0 || table[index].address 6058 != physicalAddress - table[index].size) { 6059 if ((uint32)++index + 1 > numEntries) { 6060 // table to small 6061 break; 6062 } 6063 table[index].address = physicalAddress; 6064 table[index].size = bytes; 6065 } else { 6066 // page does fit in current entry 6067 table[index].size += bytes; 6068 } 6069 6070 offset += bytes; 6071 } 6072 6073 if (interrupts) 6074 map->Unlock(); 6075 6076 if (status != B_OK) 6077 return status; 6078 6079 if ((uint32)index + 1 > numEntries) { 6080 *_numEntries = index; 6081 return B_BUFFER_OVERFLOW; 6082 } 6083 6084 *_numEntries = index + 1; 6085 return B_OK; 6086 } 6087 6088 6089 /*! According to the BeBook, this function should always succeed. 6090 This is no longer the case. 6091 */ 6092 extern "C" int32 6093 __get_memory_map_haiku(const void* address, size_t numBytes, 6094 physical_entry* table, int32 numEntries) 6095 { 6096 uint32 entriesRead = numEntries; 6097 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6098 table, &entriesRead); 6099 if (error != B_OK) 6100 return error; 6101 6102 // close the entry list 6103 6104 // if it's only one entry, we will silently accept the missing ending 6105 if (numEntries == 1) 6106 return B_OK; 6107 6108 if (entriesRead + 1 > (uint32)numEntries) 6109 return B_BUFFER_OVERFLOW; 6110 6111 table[entriesRead].address = 0; 6112 table[entriesRead].size = 0; 6113 6114 return B_OK; 6115 } 6116 6117 6118 area_id 6119 area_for(void* address) 6120 { 6121 return vm_area_for((addr_t)address, true); 6122 } 6123 6124 6125 area_id 6126 find_area(const char* name) 6127 { 6128 return VMAreas::Find(name); 6129 } 6130 6131 6132 status_t 6133 _get_area_info(area_id id, area_info* info, size_t size) 6134 { 6135 if (size != sizeof(area_info) || info == NULL) 6136 return B_BAD_VALUE; 6137 6138 AddressSpaceReadLocker locker; 6139 VMArea* area; 6140 status_t status = locker.SetFromArea(id, area); 6141 if (status != B_OK) 6142 return status; 6143 6144 fill_area_info(area, info, size); 6145 return B_OK; 6146 } 6147 6148 6149 status_t 6150 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6151 { 6152 addr_t nextBase = *(addr_t*)cookie; 6153 6154 // we're already through the list 6155 if (nextBase == (addr_t)-1) 6156 return B_ENTRY_NOT_FOUND; 6157 6158 if (team == B_CURRENT_TEAM) 6159 team = team_get_current_team_id(); 6160 6161 AddressSpaceReadLocker locker(team); 6162 if (!locker.IsLocked()) 6163 return B_BAD_TEAM_ID; 6164 6165 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6166 if (area == NULL) { 6167 nextBase = (addr_t)-1; 6168 return B_ENTRY_NOT_FOUND; 6169 } 6170 6171 fill_area_info(area, info, size); 6172 *cookie = (ssize_t)(area->Base() + 1); 6173 6174 return B_OK; 6175 } 6176 6177 6178 status_t 6179 set_area_protection(area_id area, uint32 newProtection) 6180 { 6181 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6182 newProtection, true); 6183 } 6184 6185 6186 status_t 6187 resize_area(area_id areaID, size_t newSize) 6188 { 6189 return vm_resize_area(areaID, newSize, true); 6190 } 6191 6192 6193 /*! Transfers the specified area to a new team. The caller must be the owner 6194 of the area. 6195 */ 6196 area_id 6197 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6198 bool kernel) 6199 { 6200 area_info info; 6201 status_t status = get_area_info(id, &info); 6202 if (status != B_OK) 6203 return status; 6204 6205 if (info.team != thread_get_current_thread()->team->id) 6206 return B_PERMISSION_DENIED; 6207 6208 // We need to mark the area cloneable so the following operations work. 6209 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6210 if (status != B_OK) 6211 return status; 6212 6213 area_id clonedArea = vm_clone_area(target, info.name, _address, 6214 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6215 if (clonedArea < 0) 6216 return clonedArea; 6217 6218 status = vm_delete_area(info.team, id, kernel); 6219 if (status != B_OK) { 6220 vm_delete_area(target, clonedArea, kernel); 6221 return status; 6222 } 6223 6224 // Now we can reset the protection to whatever it was before. 6225 set_area_protection(clonedArea, info.protection); 6226 6227 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6228 6229 return clonedArea; 6230 } 6231 6232 6233 extern "C" area_id 6234 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6235 size_t numBytes, uint32 addressSpec, uint32 protection, 6236 void** _virtualAddress) 6237 { 6238 if (!arch_vm_supports_protection(protection)) 6239 return B_NOT_SUPPORTED; 6240 6241 fix_protection(&protection); 6242 6243 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6244 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6245 false); 6246 } 6247 6248 6249 area_id 6250 clone_area(const char* name, void** _address, uint32 addressSpec, 6251 uint32 protection, area_id source) 6252 { 6253 if ((protection & B_KERNEL_PROTECTION) == 0) 6254 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6255 6256 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6257 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6258 } 6259 6260 6261 area_id 6262 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6263 uint32 protection, uint32 flags, uint32 guardSize, 6264 const virtual_address_restrictions* virtualAddressRestrictions, 6265 const physical_address_restrictions* physicalAddressRestrictions, 6266 void** _address) 6267 { 6268 fix_protection(&protection); 6269 6270 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6271 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6272 true, _address); 6273 } 6274 6275 6276 extern "C" area_id 6277 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6278 size_t size, uint32 lock, uint32 protection) 6279 { 6280 fix_protection(&protection); 6281 6282 virtual_address_restrictions virtualRestrictions = {}; 6283 virtualRestrictions.address = *_address; 6284 virtualRestrictions.address_specification = addressSpec; 6285 physical_address_restrictions physicalRestrictions = {}; 6286 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6287 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6288 true, _address); 6289 } 6290 6291 6292 status_t 6293 delete_area(area_id area) 6294 { 6295 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6296 } 6297 6298 6299 // #pragma mark - Userland syscalls 6300 6301 6302 status_t 6303 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6304 addr_t size) 6305 { 6306 // filter out some unavailable values (for userland) 6307 switch (addressSpec) { 6308 case B_ANY_KERNEL_ADDRESS: 6309 case B_ANY_KERNEL_BLOCK_ADDRESS: 6310 return B_BAD_VALUE; 6311 } 6312 6313 addr_t address; 6314 6315 if (!IS_USER_ADDRESS(userAddress) 6316 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6317 return B_BAD_ADDRESS; 6318 6319 status_t status = vm_reserve_address_range( 6320 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6321 RESERVED_AVOID_BASE); 6322 if (status != B_OK) 6323 return status; 6324 6325 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6326 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6327 (void*)address, size); 6328 return B_BAD_ADDRESS; 6329 } 6330 6331 return B_OK; 6332 } 6333 6334 6335 status_t 6336 _user_unreserve_address_range(addr_t address, addr_t size) 6337 { 6338 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6339 (void*)address, size); 6340 } 6341 6342 6343 area_id 6344 _user_area_for(void* address) 6345 { 6346 return vm_area_for((addr_t)address, false); 6347 } 6348 6349 6350 area_id 6351 _user_find_area(const char* userName) 6352 { 6353 char name[B_OS_NAME_LENGTH]; 6354 6355 if (!IS_USER_ADDRESS(userName) 6356 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6357 return B_BAD_ADDRESS; 6358 6359 return find_area(name); 6360 } 6361 6362 6363 status_t 6364 _user_get_area_info(area_id area, area_info* userInfo) 6365 { 6366 if (!IS_USER_ADDRESS(userInfo)) 6367 return B_BAD_ADDRESS; 6368 6369 area_info info; 6370 status_t status = get_area_info(area, &info); 6371 if (status < B_OK) 6372 return status; 6373 6374 // TODO: do we want to prevent userland from seeing kernel protections? 6375 //info.protection &= B_USER_PROTECTION; 6376 6377 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6378 return B_BAD_ADDRESS; 6379 6380 return status; 6381 } 6382 6383 6384 status_t 6385 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6386 { 6387 ssize_t cookie; 6388 6389 if (!IS_USER_ADDRESS(userCookie) 6390 || !IS_USER_ADDRESS(userInfo) 6391 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6392 return B_BAD_ADDRESS; 6393 6394 area_info info; 6395 status_t status = _get_next_area_info(team, &cookie, &info, 6396 sizeof(area_info)); 6397 if (status != B_OK) 6398 return status; 6399 6400 //info.protection &= B_USER_PROTECTION; 6401 6402 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6403 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6404 return B_BAD_ADDRESS; 6405 6406 return status; 6407 } 6408 6409 6410 status_t 6411 _user_set_area_protection(area_id area, uint32 newProtection) 6412 { 6413 if ((newProtection & ~B_USER_PROTECTION) != 0) 6414 return B_BAD_VALUE; 6415 6416 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6417 newProtection, false); 6418 } 6419 6420 6421 status_t 6422 _user_resize_area(area_id area, size_t newSize) 6423 { 6424 // TODO: Since we restrict deleting of areas to those owned by the team, 6425 // we should also do that for resizing (check other functions, too). 6426 return vm_resize_area(area, newSize, false); 6427 } 6428 6429 6430 area_id 6431 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6432 team_id target) 6433 { 6434 // filter out some unavailable values (for userland) 6435 switch (addressSpec) { 6436 case B_ANY_KERNEL_ADDRESS: 6437 case B_ANY_KERNEL_BLOCK_ADDRESS: 6438 return B_BAD_VALUE; 6439 } 6440 6441 void* address; 6442 if (!IS_USER_ADDRESS(userAddress) 6443 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6444 return B_BAD_ADDRESS; 6445 6446 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6447 if (newArea < B_OK) 6448 return newArea; 6449 6450 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6451 return B_BAD_ADDRESS; 6452 6453 return newArea; 6454 } 6455 6456 6457 area_id 6458 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6459 uint32 protection, area_id sourceArea) 6460 { 6461 char name[B_OS_NAME_LENGTH]; 6462 void* address; 6463 6464 // filter out some unavailable values (for userland) 6465 switch (addressSpec) { 6466 case B_ANY_KERNEL_ADDRESS: 6467 case B_ANY_KERNEL_BLOCK_ADDRESS: 6468 return B_BAD_VALUE; 6469 } 6470 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6471 return B_BAD_VALUE; 6472 6473 if (!IS_USER_ADDRESS(userName) 6474 || !IS_USER_ADDRESS(userAddress) 6475 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6476 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6477 return B_BAD_ADDRESS; 6478 6479 fix_protection(&protection); 6480 6481 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6482 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6483 false); 6484 if (clonedArea < B_OK) 6485 return clonedArea; 6486 6487 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6488 delete_area(clonedArea); 6489 return B_BAD_ADDRESS; 6490 } 6491 6492 return clonedArea; 6493 } 6494 6495 6496 area_id 6497 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6498 size_t size, uint32 lock, uint32 protection) 6499 { 6500 char name[B_OS_NAME_LENGTH]; 6501 void* address; 6502 6503 // filter out some unavailable values (for userland) 6504 switch (addressSpec) { 6505 case B_ANY_KERNEL_ADDRESS: 6506 case B_ANY_KERNEL_BLOCK_ADDRESS: 6507 return B_BAD_VALUE; 6508 } 6509 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6510 return B_BAD_VALUE; 6511 6512 if (!IS_USER_ADDRESS(userName) 6513 || !IS_USER_ADDRESS(userAddress) 6514 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6515 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6516 return B_BAD_ADDRESS; 6517 6518 if (addressSpec == B_EXACT_ADDRESS 6519 && IS_KERNEL_ADDRESS(address)) 6520 return B_BAD_VALUE; 6521 6522 if (addressSpec == B_ANY_ADDRESS) 6523 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6524 if (addressSpec == B_BASE_ADDRESS) 6525 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6526 6527 fix_protection(&protection); 6528 6529 virtual_address_restrictions virtualRestrictions = {}; 6530 virtualRestrictions.address = address; 6531 virtualRestrictions.address_specification = addressSpec; 6532 physical_address_restrictions physicalRestrictions = {}; 6533 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6534 size, lock, protection, 0, 0, &virtualRestrictions, 6535 &physicalRestrictions, false, &address); 6536 6537 if (area >= B_OK 6538 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6539 delete_area(area); 6540 return B_BAD_ADDRESS; 6541 } 6542 6543 return area; 6544 } 6545 6546 6547 status_t 6548 _user_delete_area(area_id area) 6549 { 6550 // Unlike the BeOS implementation, you can now only delete areas 6551 // that you have created yourself from userland. 6552 // The documentation to delete_area() explicitly states that this 6553 // will be restricted in the future, and so it will. 6554 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6555 } 6556 6557 6558 // TODO: create a BeOS style call for this! 6559 6560 area_id 6561 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6562 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6563 int fd, off_t offset) 6564 { 6565 char name[B_OS_NAME_LENGTH]; 6566 void* address; 6567 area_id area; 6568 6569 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6570 return B_BAD_VALUE; 6571 6572 fix_protection(&protection); 6573 6574 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6575 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6576 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6577 return B_BAD_ADDRESS; 6578 6579 if (addressSpec == B_EXACT_ADDRESS) { 6580 if ((addr_t)address + size < (addr_t)address 6581 || (addr_t)address % B_PAGE_SIZE != 0) { 6582 return B_BAD_VALUE; 6583 } 6584 if (!IS_USER_ADDRESS(address) 6585 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6586 return B_BAD_ADDRESS; 6587 } 6588 } 6589 6590 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6591 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6592 false); 6593 if (area < B_OK) 6594 return area; 6595 6596 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6597 return B_BAD_ADDRESS; 6598 6599 return area; 6600 } 6601 6602 6603 status_t 6604 _user_unmap_memory(void* _address, size_t size) 6605 { 6606 addr_t address = (addr_t)_address; 6607 6608 // check params 6609 if (size == 0 || (addr_t)address + size < (addr_t)address 6610 || (addr_t)address % B_PAGE_SIZE != 0) { 6611 return B_BAD_VALUE; 6612 } 6613 6614 if (!IS_USER_ADDRESS(address) 6615 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6616 return B_BAD_ADDRESS; 6617 } 6618 6619 // Write lock the address space and ensure the address range is not wired. 6620 AddressSpaceWriteLocker locker; 6621 do { 6622 status_t status = locker.SetTo(team_get_current_team_id()); 6623 if (status != B_OK) 6624 return status; 6625 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6626 size, &locker)); 6627 6628 // unmap 6629 return unmap_address_range(locker.AddressSpace(), address, size, false); 6630 } 6631 6632 6633 status_t 6634 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6635 { 6636 // check address range 6637 addr_t address = (addr_t)_address; 6638 size = PAGE_ALIGN(size); 6639 6640 if ((address % B_PAGE_SIZE) != 0) 6641 return B_BAD_VALUE; 6642 if (!is_user_address_range(_address, size)) { 6643 // weird error code required by POSIX 6644 return ENOMEM; 6645 } 6646 6647 // extend and check protection 6648 if ((protection & ~B_USER_PROTECTION) != 0) 6649 return B_BAD_VALUE; 6650 6651 fix_protection(&protection); 6652 6653 // We need to write lock the address space, since we're going to play with 6654 // the areas. Also make sure that none of the areas is wired and that we're 6655 // actually allowed to change the protection. 6656 AddressSpaceWriteLocker locker; 6657 6658 bool restart; 6659 do { 6660 restart = false; 6661 6662 status_t status = locker.SetTo(team_get_current_team_id()); 6663 if (status != B_OK) 6664 return status; 6665 6666 // First round: Check whether the whole range is covered by areas and we 6667 // are allowed to modify them. 6668 addr_t currentAddress = address; 6669 size_t sizeLeft = size; 6670 while (sizeLeft > 0) { 6671 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6672 if (area == NULL) 6673 return B_NO_MEMORY; 6674 6675 if ((area->protection & B_KERNEL_AREA) != 0) 6676 return B_NOT_ALLOWED; 6677 if (area->protection_max != 0 6678 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6679 return B_NOT_ALLOWED; 6680 } 6681 6682 addr_t offset = currentAddress - area->Base(); 6683 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6684 6685 AreaCacheLocker cacheLocker(area); 6686 6687 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6688 &locker, &cacheLocker)) { 6689 restart = true; 6690 break; 6691 } 6692 6693 cacheLocker.Unlock(); 6694 6695 currentAddress += rangeSize; 6696 sizeLeft -= rangeSize; 6697 } 6698 } while (restart); 6699 6700 // Second round: If the protections differ from that of the area, create a 6701 // page protection array and re-map mapped pages. 6702 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6703 addr_t currentAddress = address; 6704 size_t sizeLeft = size; 6705 while (sizeLeft > 0) { 6706 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6707 if (area == NULL) 6708 return B_NO_MEMORY; 6709 6710 addr_t offset = currentAddress - area->Base(); 6711 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6712 6713 currentAddress += rangeSize; 6714 sizeLeft -= rangeSize; 6715 6716 if (area->page_protections == NULL) { 6717 if (area->protection == protection) 6718 continue; 6719 if (offset == 0 && rangeSize == area->Size()) { 6720 status_t status = vm_set_area_protection(area->address_space->ID(), 6721 area->id, protection, false); 6722 if (status != B_OK) 6723 return status; 6724 continue; 6725 } 6726 6727 status_t status = allocate_area_page_protections(area); 6728 if (status != B_OK) 6729 return status; 6730 } 6731 6732 // We need to lock the complete cache chain, since we potentially unmap 6733 // pages of lower caches. 6734 VMCache* topCache = vm_area_get_locked_cache(area); 6735 VMCacheChainLocker cacheChainLocker(topCache); 6736 cacheChainLocker.LockAllSourceCaches(); 6737 6738 for (addr_t pageAddress = area->Base() + offset; 6739 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6740 map->Lock(); 6741 6742 set_area_page_protection(area, pageAddress, protection); 6743 6744 phys_addr_t physicalAddress; 6745 uint32 flags; 6746 6747 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6748 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6749 map->Unlock(); 6750 continue; 6751 } 6752 6753 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6754 if (page == NULL) { 6755 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6756 "\n", area, physicalAddress); 6757 map->Unlock(); 6758 return B_ERROR; 6759 } 6760 6761 // If the page is not in the topmost cache and write access is 6762 // requested, we have to unmap it. Otherwise we can re-map it with 6763 // the new protection. 6764 bool unmapPage = page->Cache() != topCache 6765 && (protection & B_WRITE_AREA) != 0; 6766 6767 if (!unmapPage) 6768 map->ProtectPage(area, pageAddress, protection); 6769 6770 map->Unlock(); 6771 6772 if (unmapPage) { 6773 DEBUG_PAGE_ACCESS_START(page); 6774 unmap_page(area, pageAddress); 6775 DEBUG_PAGE_ACCESS_END(page); 6776 } 6777 } 6778 } 6779 6780 return B_OK; 6781 } 6782 6783 6784 status_t 6785 _user_sync_memory(void* _address, size_t size, uint32 flags) 6786 { 6787 addr_t address = (addr_t)_address; 6788 size = PAGE_ALIGN(size); 6789 6790 // check params 6791 if ((address % B_PAGE_SIZE) != 0) 6792 return B_BAD_VALUE; 6793 if (!is_user_address_range(_address, size)) { 6794 // weird error code required by POSIX 6795 return ENOMEM; 6796 } 6797 6798 bool writeSync = (flags & MS_SYNC) != 0; 6799 bool writeAsync = (flags & MS_ASYNC) != 0; 6800 if (writeSync && writeAsync) 6801 return B_BAD_VALUE; 6802 6803 if (size == 0 || (!writeSync && !writeAsync)) 6804 return B_OK; 6805 6806 // iterate through the range and sync all concerned areas 6807 while (size > 0) { 6808 // read lock the address space 6809 AddressSpaceReadLocker locker; 6810 status_t error = locker.SetTo(team_get_current_team_id()); 6811 if (error != B_OK) 6812 return error; 6813 6814 // get the first area 6815 VMArea* area = locker.AddressSpace()->LookupArea(address); 6816 if (area == NULL) 6817 return B_NO_MEMORY; 6818 6819 uint32 offset = address - area->Base(); 6820 size_t rangeSize = min_c(area->Size() - offset, size); 6821 offset += area->cache_offset; 6822 6823 // lock the cache 6824 AreaCacheLocker cacheLocker(area); 6825 if (!cacheLocker) 6826 return B_BAD_VALUE; 6827 VMCache* cache = area->cache; 6828 6829 locker.Unlock(); 6830 6831 uint32 firstPage = offset >> PAGE_SHIFT; 6832 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6833 6834 // write the pages 6835 if (cache->type == CACHE_TYPE_VNODE) { 6836 if (writeSync) { 6837 // synchronous 6838 error = vm_page_write_modified_page_range(cache, firstPage, 6839 endPage); 6840 if (error != B_OK) 6841 return error; 6842 } else { 6843 // asynchronous 6844 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6845 // TODO: This is probably not quite what is supposed to happen. 6846 // Especially when a lot has to be written, it might take ages 6847 // until it really hits the disk. 6848 } 6849 } 6850 6851 address += rangeSize; 6852 size -= rangeSize; 6853 } 6854 6855 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6856 // synchronize multiple mappings of the same file. In our VM they never get 6857 // out of sync, though, so we don't have to do anything. 6858 6859 return B_OK; 6860 } 6861 6862 6863 status_t 6864 _user_memory_advice(void* _address, size_t size, uint32 advice) 6865 { 6866 addr_t address = (addr_t)_address; 6867 if ((address % B_PAGE_SIZE) != 0) 6868 return B_BAD_VALUE; 6869 6870 size = PAGE_ALIGN(size); 6871 if (!is_user_address_range(_address, size)) { 6872 // weird error code required by POSIX 6873 return B_NO_MEMORY; 6874 } 6875 6876 switch (advice) { 6877 case MADV_NORMAL: 6878 case MADV_SEQUENTIAL: 6879 case MADV_RANDOM: 6880 case MADV_WILLNEED: 6881 case MADV_DONTNEED: 6882 // TODO: Implement! 6883 break; 6884 6885 case MADV_FREE: 6886 { 6887 AddressSpaceWriteLocker locker; 6888 do { 6889 status_t status = locker.SetTo(team_get_current_team_id()); 6890 if (status != B_OK) 6891 return status; 6892 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6893 address, size, &locker)); 6894 6895 discard_address_range(locker.AddressSpace(), address, size, false); 6896 break; 6897 } 6898 6899 default: 6900 return B_BAD_VALUE; 6901 } 6902 6903 return B_OK; 6904 } 6905 6906 6907 status_t 6908 _user_get_memory_properties(team_id teamID, const void* address, 6909 uint32* _protected, uint32* _lock) 6910 { 6911 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6912 return B_BAD_ADDRESS; 6913 6914 AddressSpaceReadLocker locker; 6915 status_t error = locker.SetTo(teamID); 6916 if (error != B_OK) 6917 return error; 6918 6919 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6920 if (area == NULL) 6921 return B_NO_MEMORY; 6922 6923 uint32 protection = get_area_page_protection(area, (addr_t)address); 6924 uint32 wiring = area->wiring; 6925 6926 locker.Unlock(); 6927 6928 error = user_memcpy(_protected, &protection, sizeof(protection)); 6929 if (error != B_OK) 6930 return error; 6931 6932 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6933 6934 return error; 6935 } 6936 6937 6938 static status_t 6939 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 6940 { 6941 #if ENABLE_SWAP_SUPPORT 6942 // check address range 6943 addr_t address = (addr_t)_address; 6944 size = PAGE_ALIGN(size); 6945 6946 if ((address % B_PAGE_SIZE) != 0) 6947 return EINVAL; 6948 if (!is_user_address_range(_address, size)) 6949 return EINVAL; 6950 6951 const addr_t endAddress = address + size; 6952 6953 AddressSpaceReadLocker addressSpaceLocker; 6954 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 6955 if (error != B_OK) 6956 return error; 6957 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 6958 6959 // iterate through all concerned areas 6960 addr_t nextAddress = address; 6961 while (nextAddress != endAddress) { 6962 // get the next area 6963 VMArea* area = addressSpace->LookupArea(nextAddress); 6964 if (area == NULL) { 6965 error = B_BAD_ADDRESS; 6966 break; 6967 } 6968 6969 const addr_t areaStart = nextAddress; 6970 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 6971 nextAddress = areaEnd; 6972 6973 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6974 if (error != B_OK) { 6975 // We don't need to unset or reset things on failure. 6976 break; 6977 } 6978 6979 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6980 VMAnonymousCache* anonCache = NULL; 6981 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 6982 // This memory will aready never be swapped. Nothing to do. 6983 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 6984 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 6985 areaEnd - areaStart, swappable); 6986 } else { 6987 // Some other cache type? We cannot affect anything here. 6988 error = EINVAL; 6989 } 6990 6991 cacheChainLocker.Unlock(); 6992 6993 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6994 if (error != B_OK) 6995 break; 6996 } 6997 6998 return error; 6999 #else 7000 // No swap support? Nothing to do. 7001 return B_OK; 7002 #endif 7003 } 7004 7005 7006 status_t 7007 _user_mlock(const void* _address, size_t size) 7008 { 7009 return user_set_memory_swappable(_address, size, false); 7010 } 7011 7012 7013 status_t 7014 _user_munlock(const void* _address, size_t size) 7015 { 7016 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7017 // if multiple clones of an area had mlock() called on them, 7018 // munlock() must also be called on all of them to actually unlock. 7019 // (At present, the first munlock() will unlock all.) 7020 // TODO: fork() should automatically unlock memory in the child. 7021 return user_set_memory_swappable(_address, size, true); 7022 } 7023 7024 7025 // #pragma mark -- compatibility 7026 7027 7028 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7029 7030 7031 struct physical_entry_beos { 7032 uint32 address; 7033 uint32 size; 7034 }; 7035 7036 7037 /*! The physical_entry structure has changed. We need to translate it to the 7038 old one. 7039 */ 7040 extern "C" int32 7041 __get_memory_map_beos(const void* _address, size_t numBytes, 7042 physical_entry_beos* table, int32 numEntries) 7043 { 7044 if (numEntries <= 0) 7045 return B_BAD_VALUE; 7046 7047 const uint8* address = (const uint8*)_address; 7048 7049 int32 count = 0; 7050 while (numBytes > 0 && count < numEntries) { 7051 physical_entry entry; 7052 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7053 if (result < 0) { 7054 if (result != B_BUFFER_OVERFLOW) 7055 return result; 7056 } 7057 7058 if (entry.address >= (phys_addr_t)1 << 32) { 7059 panic("get_memory_map(): Address is greater 4 GB!"); 7060 return B_ERROR; 7061 } 7062 7063 table[count].address = entry.address; 7064 table[count++].size = entry.size; 7065 7066 address += entry.size; 7067 numBytes -= entry.size; 7068 } 7069 7070 // null-terminate the table, if possible 7071 if (count < numEntries) { 7072 table[count].address = 0; 7073 table[count].size = 0; 7074 } 7075 7076 return B_OK; 7077 } 7078 7079 7080 /*! The type of the \a physicalAddress parameter has changed from void* to 7081 phys_addr_t. 7082 */ 7083 extern "C" area_id 7084 __map_physical_memory_beos(const char* name, void* physicalAddress, 7085 size_t numBytes, uint32 addressSpec, uint32 protection, 7086 void** _virtualAddress) 7087 { 7088 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7089 addressSpec, protection, _virtualAddress); 7090 } 7091 7092 7093 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7094 we meddle with the \a lock parameter to force 32 bit. 7095 */ 7096 extern "C" area_id 7097 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7098 size_t size, uint32 lock, uint32 protection) 7099 { 7100 switch (lock) { 7101 case B_NO_LOCK: 7102 break; 7103 case B_FULL_LOCK: 7104 case B_LAZY_LOCK: 7105 lock = B_32_BIT_FULL_LOCK; 7106 break; 7107 case B_CONTIGUOUS: 7108 lock = B_32_BIT_CONTIGUOUS; 7109 break; 7110 } 7111 7112 return __create_area_haiku(name, _address, addressSpec, size, lock, 7113 protection); 7114 } 7115 7116 7117 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7118 "BASE"); 7119 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7120 "map_physical_memory@", "BASE"); 7121 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7122 "BASE"); 7123 7124 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7125 "get_memory_map@@", "1_ALPHA3"); 7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7127 "map_physical_memory@@", "1_ALPHA3"); 7128 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7129 "1_ALPHA3"); 7130 7131 7132 #else 7133 7134 7135 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7136 "get_memory_map@@", "BASE"); 7137 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7138 "map_physical_memory@@", "BASE"); 7139 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7140 "BASE"); 7141 7142 7143 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7144