1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "VMAnonymousNoSwapCache.h" 59 #include "IORequest.h" 60 61 62 //#define TRACE_VM 63 //#define TRACE_FAULTS 64 #ifdef TRACE_VM 65 # define TRACE(x) dprintf x 66 #else 67 # define TRACE(x) ; 68 #endif 69 #ifdef TRACE_FAULTS 70 # define FTRACE(x) dprintf x 71 #else 72 # define FTRACE(x) ; 73 #endif 74 75 76 namespace { 77 78 class AreaCacheLocking { 79 public: 80 inline bool Lock(VMCache* lockable) 81 { 82 return false; 83 } 84 85 inline void Unlock(VMCache* lockable) 86 { 87 vm_area_put_locked_cache(lockable); 88 } 89 }; 90 91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 92 public: 93 inline AreaCacheLocker(VMCache* cache = NULL) 94 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 95 { 96 } 97 98 inline AreaCacheLocker(VMArea* area) 99 : AutoLocker<VMCache, AreaCacheLocking>() 100 { 101 SetTo(area); 102 } 103 104 inline void SetTo(VMCache* cache, bool alreadyLocked) 105 { 106 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 107 } 108 109 inline void SetTo(VMArea* area) 110 { 111 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 112 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 113 } 114 }; 115 116 117 class VMCacheChainLocker { 118 public: 119 VMCacheChainLocker() 120 : 121 fTopCache(NULL), 122 fBottomCache(NULL) 123 { 124 } 125 126 VMCacheChainLocker(VMCache* topCache) 127 : 128 fTopCache(topCache), 129 fBottomCache(topCache) 130 { 131 } 132 133 ~VMCacheChainLocker() 134 { 135 Unlock(); 136 } 137 138 void SetTo(VMCache* topCache) 139 { 140 fTopCache = topCache; 141 fBottomCache = topCache; 142 143 if (topCache != NULL) 144 topCache->SetUserData(NULL); 145 } 146 147 VMCache* LockSourceCache() 148 { 149 if (fBottomCache == NULL || fBottomCache->source == NULL) 150 return NULL; 151 152 VMCache* previousCache = fBottomCache; 153 154 fBottomCache = fBottomCache->source; 155 fBottomCache->Lock(); 156 fBottomCache->AcquireRefLocked(); 157 fBottomCache->SetUserData(previousCache); 158 159 return fBottomCache; 160 } 161 162 void LockAllSourceCaches() 163 { 164 while (LockSourceCache() != NULL) { 165 } 166 } 167 168 void Unlock(VMCache* exceptCache = NULL) 169 { 170 if (fTopCache == NULL) 171 return; 172 173 // Unlock caches in source -> consumer direction. This is important to 174 // avoid double-locking and a reversal of locking order in case a cache 175 // is eligable for merging. 176 VMCache* cache = fBottomCache; 177 while (cache != NULL) { 178 VMCache* nextCache = (VMCache*)cache->UserData(); 179 if (cache != exceptCache) 180 cache->ReleaseRefAndUnlock(cache != fTopCache); 181 182 if (cache == fTopCache) 183 break; 184 185 cache = nextCache; 186 } 187 188 fTopCache = NULL; 189 fBottomCache = NULL; 190 } 191 192 void UnlockKeepRefs(bool keepTopCacheLocked) 193 { 194 if (fTopCache == NULL) 195 return; 196 197 VMCache* nextCache = fBottomCache; 198 VMCache* cache = NULL; 199 200 while (keepTopCacheLocked 201 ? nextCache != fTopCache : cache != fTopCache) { 202 cache = nextCache; 203 nextCache = (VMCache*)cache->UserData(); 204 cache->Unlock(cache != fTopCache); 205 } 206 } 207 208 void RelockCaches(bool topCacheLocked) 209 { 210 if (fTopCache == NULL) 211 return; 212 213 VMCache* nextCache = fTopCache; 214 VMCache* cache = NULL; 215 if (topCacheLocked) { 216 cache = nextCache; 217 nextCache = cache->source; 218 } 219 220 while (cache != fBottomCache && nextCache != NULL) { 221 VMCache* consumer = cache; 222 cache = nextCache; 223 nextCache = cache->source; 224 cache->Lock(); 225 cache->SetUserData(consumer); 226 } 227 } 228 229 private: 230 VMCache* fTopCache; 231 VMCache* fBottomCache; 232 }; 233 234 } // namespace 235 236 237 // The memory reserve an allocation of the certain priority must not touch. 238 static const size_t kMemoryReserveForPriority[] = { 239 VM_MEMORY_RESERVE_USER, // user 240 VM_MEMORY_RESERVE_SYSTEM, // system 241 0 // VIP 242 }; 243 244 245 ObjectCache* gPageMappingsObjectCache; 246 247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 248 249 static off_t sAvailableMemory; 250 static off_t sNeededMemory; 251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 252 static uint32 sPageFaults; 253 254 static VMPhysicalPageMapper* sPhysicalPageMapper; 255 256 #if DEBUG_CACHE_LIST 257 258 struct cache_info { 259 VMCache* cache; 260 addr_t page_count; 261 addr_t committed; 262 }; 263 264 static const int kCacheInfoTableCount = 100 * 1024; 265 static cache_info* sCacheInfoTable; 266 267 #endif // DEBUG_CACHE_LIST 268 269 270 // function declarations 271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 272 bool addressSpaceCleanup); 273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 274 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 275 static status_t map_backing_store(VMAddressSpace* addressSpace, 276 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 277 int protection, int protectionMax, int mapping, uint32 flags, 278 const virtual_address_restrictions* addressRestrictions, bool kernel, 279 VMArea** _area, void** _virtualAddress); 280 static void fix_protection(uint32* protection); 281 282 283 // #pragma mark - 284 285 286 #if VM_PAGE_FAULT_TRACING 287 288 namespace VMPageFaultTracing { 289 290 class PageFaultStart : public AbstractTraceEntry { 291 public: 292 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 293 : 294 fAddress(address), 295 fPC(pc), 296 fWrite(write), 297 fUser(user) 298 { 299 Initialized(); 300 } 301 302 virtual void AddDump(TraceOutput& out) 303 { 304 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 305 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 306 } 307 308 private: 309 addr_t fAddress; 310 addr_t fPC; 311 bool fWrite; 312 bool fUser; 313 }; 314 315 316 // page fault errors 317 enum { 318 PAGE_FAULT_ERROR_NO_AREA = 0, 319 PAGE_FAULT_ERROR_KERNEL_ONLY, 320 PAGE_FAULT_ERROR_WRITE_PROTECTED, 321 PAGE_FAULT_ERROR_READ_PROTECTED, 322 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 323 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 324 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 325 }; 326 327 328 class PageFaultError : public AbstractTraceEntry { 329 public: 330 PageFaultError(area_id area, status_t error) 331 : 332 fArea(area), 333 fError(error) 334 { 335 Initialized(); 336 } 337 338 virtual void AddDump(TraceOutput& out) 339 { 340 switch (fError) { 341 case PAGE_FAULT_ERROR_NO_AREA: 342 out.Print("page fault error: no area"); 343 break; 344 case PAGE_FAULT_ERROR_KERNEL_ONLY: 345 out.Print("page fault error: area: %ld, kernel only", fArea); 346 break; 347 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 348 out.Print("page fault error: area: %ld, write protected", 349 fArea); 350 break; 351 case PAGE_FAULT_ERROR_READ_PROTECTED: 352 out.Print("page fault error: area: %ld, read protected", fArea); 353 break; 354 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 355 out.Print("page fault error: area: %ld, execute protected", 356 fArea); 357 break; 358 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 359 out.Print("page fault error: kernel touching bad user memory"); 360 break; 361 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 362 out.Print("page fault error: no address space"); 363 break; 364 default: 365 out.Print("page fault error: area: %ld, error: %s", fArea, 366 strerror(fError)); 367 break; 368 } 369 } 370 371 private: 372 area_id fArea; 373 status_t fError; 374 }; 375 376 377 class PageFaultDone : public AbstractTraceEntry { 378 public: 379 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 380 vm_page* page) 381 : 382 fArea(area), 383 fTopCache(topCache), 384 fCache(cache), 385 fPage(page) 386 { 387 Initialized(); 388 } 389 390 virtual void AddDump(TraceOutput& out) 391 { 392 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 393 "page: %p", fArea, fTopCache, fCache, fPage); 394 } 395 396 private: 397 area_id fArea; 398 VMCache* fTopCache; 399 VMCache* fCache; 400 vm_page* fPage; 401 }; 402 403 } // namespace VMPageFaultTracing 404 405 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 406 #else 407 # define TPF(x) ; 408 #endif // VM_PAGE_FAULT_TRACING 409 410 411 // #pragma mark - 412 413 414 /*! The page's cache must be locked. 415 */ 416 static inline void 417 increment_page_wired_count(vm_page* page) 418 { 419 if (!page->IsMapped()) 420 atomic_add(&gMappedPagesCount, 1); 421 page->IncrementWiredCount(); 422 } 423 424 425 /*! The page's cache must be locked. 426 */ 427 static inline void 428 decrement_page_wired_count(vm_page* page) 429 { 430 page->DecrementWiredCount(); 431 if (!page->IsMapped()) 432 atomic_add(&gMappedPagesCount, -1); 433 } 434 435 436 static inline addr_t 437 virtual_page_address(VMArea* area, vm_page* page) 438 { 439 return area->Base() 440 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 441 } 442 443 444 //! You need to have the address space locked when calling this function 445 static VMArea* 446 lookup_area(VMAddressSpace* addressSpace, area_id id) 447 { 448 VMAreaHash::ReadLock(); 449 450 VMArea* area = VMAreaHash::LookupLocked(id); 451 if (area != NULL && area->address_space != addressSpace) 452 area = NULL; 453 454 VMAreaHash::ReadUnlock(); 455 456 return area; 457 } 458 459 460 static status_t 461 allocate_area_page_protections(VMArea* area) 462 { 463 // In the page protections we store only the three user protections, 464 // so we use 4 bits per page. 465 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 466 area->page_protections = (uint8*)malloc_etc(bytes, 467 HEAP_DONT_LOCK_KERNEL_SPACE); 468 if (area->page_protections == NULL) 469 return B_NO_MEMORY; 470 471 // init the page protections for all pages to that of the area 472 uint32 areaProtection = area->protection 473 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 474 memset(area->page_protections, areaProtection | (areaProtection << 4), 475 bytes); 476 return B_OK; 477 } 478 479 480 static inline void 481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 482 { 483 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 484 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 485 uint8& entry = area->page_protections[pageIndex / 2]; 486 if (pageIndex % 2 == 0) 487 entry = (entry & 0xf0) | protection; 488 else 489 entry = (entry & 0x0f) | (protection << 4); 490 } 491 492 493 static inline uint32 494 get_area_page_protection(VMArea* area, addr_t pageAddress) 495 { 496 if (area->page_protections == NULL) 497 return area->protection; 498 499 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 500 uint32 protection = area->page_protections[pageIndex / 2]; 501 if (pageIndex % 2 == 0) 502 protection &= 0x0f; 503 else 504 protection >>= 4; 505 506 // If this is a kernel area we translate the user flags to kernel flags. 507 if (area->address_space == VMAddressSpace::Kernel()) { 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 return kernelProtection; 515 } 516 517 return protection | B_KERNEL_READ_AREA 518 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 811 &addressRestrictions, kernel, &secondArea, NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, 925 int protectionMax, int mapping, 926 uint32 flags, const virtual_address_restrictions* addressRestrictions, 927 bool kernel, VMArea** _area, void** _virtualAddress) 928 { 929 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 930 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 931 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 932 addressSpace, cache, addressRestrictions->address, offset, size, 933 addressRestrictions->address_specification, wiring, protection, 934 protectionMax, _area, areaName)); 935 cache->AssertLocked(); 936 937 if (size == 0) { 938 #if KDEBUG 939 panic("map_backing_store(): called with size=0 for area '%s'!", 940 areaName); 941 #endif 942 return B_BAD_VALUE; 943 } 944 945 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 946 | HEAP_DONT_LOCK_KERNEL_SPACE; 947 int priority; 948 if (addressSpace != VMAddressSpace::Kernel()) { 949 priority = VM_PRIORITY_USER; 950 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 951 priority = VM_PRIORITY_VIP; 952 allocationFlags |= HEAP_PRIORITY_VIP; 953 } else 954 priority = VM_PRIORITY_SYSTEM; 955 956 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 957 allocationFlags); 958 if (mapping != REGION_PRIVATE_MAP) 959 area->protection_max = protectionMax & B_USER_PROTECTION; 960 if (area == NULL) 961 return B_NO_MEMORY; 962 963 status_t status; 964 965 // if this is a private map, we need to create a new cache 966 // to handle the private copies of pages as they are written to 967 VMCache* sourceCache = cache; 968 if (mapping == REGION_PRIVATE_MAP) { 969 VMCache* newCache; 970 971 // create an anonymous cache 972 status = VMCacheFactory::CreateAnonymousCache(newCache, 973 (protection & B_STACK_AREA) != 0 974 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 975 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 976 if (status != B_OK) 977 goto err1; 978 979 newCache->Lock(); 980 newCache->temporary = 1; 981 newCache->virtual_base = offset; 982 newCache->virtual_end = offset + size; 983 984 cache->AddConsumer(newCache); 985 986 cache = newCache; 987 } 988 989 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 990 status = cache->SetMinimalCommitment(size, priority); 991 if (status != B_OK) 992 goto err2; 993 } 994 995 // check to see if this address space has entered DELETE state 996 if (addressSpace->IsBeingDeleted()) { 997 // okay, someone is trying to delete this address space now, so we can't 998 // insert the area, so back out 999 status = B_BAD_TEAM_ID; 1000 goto err2; 1001 } 1002 1003 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1004 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1005 status = unmap_address_range(addressSpace, 1006 (addr_t)addressRestrictions->address, size, kernel); 1007 if (status != B_OK) 1008 goto err2; 1009 } 1010 1011 status = addressSpace->InsertArea(area, size, addressRestrictions, 1012 allocationFlags, _virtualAddress); 1013 if (status == B_NO_MEMORY 1014 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1015 // TODO: At present, there is no way to notify the low_resource monitor 1016 // that kernel addresss space is fragmented, nor does it check for this 1017 // automatically. Due to how many locks are held, we cannot wait here 1018 // for space to be freed up, but it would be good to at least notify 1019 // that we tried and failed to allocate some amount. 1020 } 1021 if (status != B_OK) 1022 goto err2; 1023 1024 // attach the cache to the area 1025 area->cache = cache; 1026 area->cache_offset = offset; 1027 1028 // point the cache back to the area 1029 cache->InsertAreaLocked(area); 1030 if (mapping == REGION_PRIVATE_MAP) 1031 cache->Unlock(); 1032 1033 // insert the area in the global area hash table 1034 VMAreaHash::Insert(area); 1035 1036 // grab a ref to the address space (the area holds this) 1037 addressSpace->Get(); 1038 1039 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1040 // cache, sourceCache, areaName, area); 1041 1042 *_area = area; 1043 return B_OK; 1044 1045 err2: 1046 if (mapping == REGION_PRIVATE_MAP) { 1047 // We created this cache, so we must delete it again. Note, that we 1048 // need to temporarily unlock the source cache or we'll otherwise 1049 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1050 sourceCache->Unlock(); 1051 cache->ReleaseRefAndUnlock(); 1052 sourceCache->Lock(); 1053 } 1054 err1: 1055 addressSpace->DeleteArea(area, allocationFlags); 1056 return status; 1057 } 1058 1059 1060 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1061 locker1, locker2). 1062 */ 1063 template<typename LockerType1, typename LockerType2> 1064 static inline bool 1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1066 { 1067 area->cache->AssertLocked(); 1068 1069 VMAreaUnwiredWaiter waiter; 1070 if (!area->AddWaiterIfWired(&waiter)) 1071 return false; 1072 1073 // unlock everything and wait 1074 if (locker1 != NULL) 1075 locker1->Unlock(); 1076 if (locker2 != NULL) 1077 locker2->Unlock(); 1078 1079 waiter.waitEntry.Wait(); 1080 1081 return true; 1082 } 1083 1084 1085 /*! Checks whether the given area has any wired ranges intersecting with the 1086 specified range and waits, if so. 1087 1088 When it has to wait, the function calls \c Unlock() on both \a locker1 1089 and \a locker2, if given. 1090 The area's top cache must be locked and must be unlocked as a side effect 1091 of calling \c Unlock() on either \a locker1 or \a locker2. 1092 1093 If the function does not have to wait it does not modify or unlock any 1094 object. 1095 1096 \param area The area to be checked. 1097 \param base The base address of the range to check. 1098 \param size The size of the address range to check. 1099 \param locker1 An object to be unlocked when before starting to wait (may 1100 be \c NULL). 1101 \param locker2 An object to be unlocked when before starting to wait (may 1102 be \c NULL). 1103 \return \c true, if the function had to wait, \c false otherwise. 1104 */ 1105 template<typename LockerType1, typename LockerType2> 1106 static inline bool 1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1108 LockerType1* locker1, LockerType2* locker2) 1109 { 1110 area->cache->AssertLocked(); 1111 1112 VMAreaUnwiredWaiter waiter; 1113 if (!area->AddWaiterIfWired(&waiter, base, size)) 1114 return false; 1115 1116 // unlock everything and wait 1117 if (locker1 != NULL) 1118 locker1->Unlock(); 1119 if (locker2 != NULL) 1120 locker2->Unlock(); 1121 1122 waiter.waitEntry.Wait(); 1123 1124 return true; 1125 } 1126 1127 1128 /*! Checks whether the given address space has any wired ranges intersecting 1129 with the specified range and waits, if so. 1130 1131 Similar to wait_if_area_range_is_wired(), with the following differences: 1132 - All areas intersecting with the range are checked (respectively all until 1133 one is found that contains a wired range intersecting with the given 1134 range). 1135 - The given address space must at least be read-locked and must be unlocked 1136 when \c Unlock() is called on \a locker. 1137 - None of the areas' caches are allowed to be locked. 1138 */ 1139 template<typename LockerType> 1140 static inline bool 1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1142 size_t size, LockerType* locker) 1143 { 1144 for (VMAddressSpace::AreaRangeIterator it 1145 = addressSpace->GetAreaRangeIterator(base, size); 1146 VMArea* area = it.Next();) { 1147 1148 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1149 1150 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1151 return true; 1152 } 1153 1154 return false; 1155 } 1156 1157 1158 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1159 It must be called in a situation where the kernel address space may be 1160 locked. 1161 */ 1162 status_t 1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1164 { 1165 AddressSpaceReadLocker locker; 1166 VMArea* area; 1167 status_t status = locker.SetFromArea(id, area); 1168 if (status != B_OK) 1169 return status; 1170 1171 if (area->page_protections == NULL) { 1172 status = allocate_area_page_protections(area); 1173 if (status != B_OK) 1174 return status; 1175 } 1176 1177 *cookie = (void*)area; 1178 return B_OK; 1179 } 1180 1181 1182 /*! This is a debug helper function that can only be used with very specific 1183 use cases. 1184 Sets protection for the given address range to the protection specified. 1185 If \a protection is 0 then the involved pages will be marked non-present 1186 in the translation map to cause a fault on access. The pages aren't 1187 actually unmapped however so that they can be marked present again with 1188 additional calls to this function. For this to work the area must be 1189 fully locked in memory so that the pages aren't otherwise touched. 1190 This function does not lock the kernel address space and needs to be 1191 supplied with a \a cookie retrieved from a successful call to 1192 vm_prepare_kernel_area_debug_protection(). 1193 */ 1194 status_t 1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1196 uint32 protection) 1197 { 1198 // check address range 1199 addr_t address = (addr_t)_address; 1200 size = PAGE_ALIGN(size); 1201 1202 if ((address % B_PAGE_SIZE) != 0 1203 || (addr_t)address + size < (addr_t)address 1204 || !IS_KERNEL_ADDRESS(address) 1205 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1206 return B_BAD_VALUE; 1207 } 1208 1209 // Translate the kernel protection to user protection as we only store that. 1210 if ((protection & B_KERNEL_READ_AREA) != 0) 1211 protection |= B_READ_AREA; 1212 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1213 protection |= B_WRITE_AREA; 1214 1215 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1216 VMTranslationMap* map = addressSpace->TranslationMap(); 1217 VMArea* area = (VMArea*)cookie; 1218 1219 addr_t offset = address - area->Base(); 1220 if (area->Size() - offset < size) { 1221 panic("protect range not fully within supplied area"); 1222 return B_BAD_VALUE; 1223 } 1224 1225 if (area->page_protections == NULL) { 1226 panic("area has no page protections"); 1227 return B_BAD_VALUE; 1228 } 1229 1230 // Invalidate the mapping entries so any access to them will fault or 1231 // restore the mapping entries unchanged so that lookup will success again. 1232 map->Lock(); 1233 map->DebugMarkRangePresent(address, address + size, protection != 0); 1234 map->Unlock(); 1235 1236 // And set the proper page protections so that the fault case will actually 1237 // fail and not simply try to map a new page. 1238 for (addr_t pageAddress = address; pageAddress < address + size; 1239 pageAddress += B_PAGE_SIZE) { 1240 set_area_page_protection(area, pageAddress, protection); 1241 } 1242 1243 return B_OK; 1244 } 1245 1246 1247 status_t 1248 vm_block_address_range(const char* name, void* address, addr_t size) 1249 { 1250 if (!arch_vm_supports_protection(0)) 1251 return B_NOT_SUPPORTED; 1252 1253 AddressSpaceWriteLocker locker; 1254 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1255 if (status != B_OK) 1256 return status; 1257 1258 VMAddressSpace* addressSpace = locker.AddressSpace(); 1259 1260 // create an anonymous cache 1261 VMCache* cache; 1262 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1263 VM_PRIORITY_SYSTEM); 1264 if (status != B_OK) 1265 return status; 1266 1267 cache->temporary = 1; 1268 cache->virtual_end = size; 1269 cache->Lock(); 1270 1271 VMArea* area; 1272 virtual_address_restrictions addressRestrictions = {}; 1273 addressRestrictions.address = address; 1274 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1275 status = map_backing_store(addressSpace, cache, 0, name, size, 1276 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1277 true, &area, NULL); 1278 if (status != B_OK) { 1279 cache->ReleaseRefAndUnlock(); 1280 return status; 1281 } 1282 1283 cache->Unlock(); 1284 area->cache_type = CACHE_TYPE_RAM; 1285 return area->id; 1286 } 1287 1288 1289 status_t 1290 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1291 { 1292 AddressSpaceWriteLocker locker(team); 1293 if (!locker.IsLocked()) 1294 return B_BAD_TEAM_ID; 1295 1296 VMAddressSpace* addressSpace = locker.AddressSpace(); 1297 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1298 addressSpace == VMAddressSpace::Kernel() 1299 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1300 } 1301 1302 1303 status_t 1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1305 addr_t size, uint32 flags) 1306 { 1307 if (size == 0) 1308 return B_BAD_VALUE; 1309 1310 AddressSpaceWriteLocker locker(team); 1311 if (!locker.IsLocked()) 1312 return B_BAD_TEAM_ID; 1313 1314 virtual_address_restrictions addressRestrictions = {}; 1315 addressRestrictions.address = *_address; 1316 addressRestrictions.address_specification = addressSpec; 1317 VMAddressSpace* addressSpace = locker.AddressSpace(); 1318 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1319 addressSpace == VMAddressSpace::Kernel() 1320 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1321 _address); 1322 } 1323 1324 1325 area_id 1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1327 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1328 const virtual_address_restrictions* virtualAddressRestrictions, 1329 const physical_address_restrictions* physicalAddressRestrictions, 1330 bool kernel, void** _address) 1331 { 1332 VMArea* area; 1333 VMCache* cache; 1334 vm_page* page = NULL; 1335 bool isStack = (protection & B_STACK_AREA) != 0; 1336 page_num_t guardPages; 1337 bool canOvercommit = false; 1338 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1339 ? VM_PAGE_ALLOC_CLEAR : 0; 1340 1341 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1342 team, name, size)); 1343 1344 size = PAGE_ALIGN(size); 1345 guardSize = PAGE_ALIGN(guardSize); 1346 guardPages = guardSize / B_PAGE_SIZE; 1347 1348 if (size == 0 || size < guardSize) 1349 return B_BAD_VALUE; 1350 if (!arch_vm_supports_protection(protection)) 1351 return B_NOT_SUPPORTED; 1352 1353 if (team == B_CURRENT_TEAM) 1354 team = VMAddressSpace::CurrentID(); 1355 if (team < 0) 1356 return B_BAD_TEAM_ID; 1357 1358 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1359 canOvercommit = true; 1360 1361 #ifdef DEBUG_KERNEL_STACKS 1362 if ((protection & B_KERNEL_STACK_AREA) != 0) 1363 isStack = true; 1364 #endif 1365 1366 // check parameters 1367 switch (virtualAddressRestrictions->address_specification) { 1368 case B_ANY_ADDRESS: 1369 case B_EXACT_ADDRESS: 1370 case B_BASE_ADDRESS: 1371 case B_ANY_KERNEL_ADDRESS: 1372 case B_ANY_KERNEL_BLOCK_ADDRESS: 1373 case B_RANDOMIZED_ANY_ADDRESS: 1374 case B_RANDOMIZED_BASE_ADDRESS: 1375 break; 1376 1377 default: 1378 return B_BAD_VALUE; 1379 } 1380 1381 // If low or high physical address restrictions are given, we force 1382 // B_CONTIGUOUS wiring, since only then we'll use 1383 // vm_page_allocate_page_run() which deals with those restrictions. 1384 if (physicalAddressRestrictions->low_address != 0 1385 || physicalAddressRestrictions->high_address != 0) { 1386 wiring = B_CONTIGUOUS; 1387 } 1388 1389 physical_address_restrictions stackPhysicalRestrictions; 1390 bool doReserveMemory = false; 1391 switch (wiring) { 1392 case B_NO_LOCK: 1393 break; 1394 case B_FULL_LOCK: 1395 case B_LAZY_LOCK: 1396 case B_CONTIGUOUS: 1397 doReserveMemory = true; 1398 break; 1399 case B_ALREADY_WIRED: 1400 break; 1401 case B_LOMEM: 1402 stackPhysicalRestrictions = *physicalAddressRestrictions; 1403 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1404 physicalAddressRestrictions = &stackPhysicalRestrictions; 1405 wiring = B_CONTIGUOUS; 1406 doReserveMemory = true; 1407 break; 1408 case B_32_BIT_FULL_LOCK: 1409 if (B_HAIKU_PHYSICAL_BITS <= 32 1410 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1411 wiring = B_FULL_LOCK; 1412 doReserveMemory = true; 1413 break; 1414 } 1415 // TODO: We don't really support this mode efficiently. Just fall 1416 // through for now ... 1417 case B_32_BIT_CONTIGUOUS: 1418 #if B_HAIKU_PHYSICAL_BITS > 32 1419 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1420 stackPhysicalRestrictions = *physicalAddressRestrictions; 1421 stackPhysicalRestrictions.high_address 1422 = (phys_addr_t)1 << 32; 1423 physicalAddressRestrictions = &stackPhysicalRestrictions; 1424 } 1425 #endif 1426 wiring = B_CONTIGUOUS; 1427 doReserveMemory = true; 1428 break; 1429 default: 1430 return B_BAD_VALUE; 1431 } 1432 1433 // Optimization: For a single-page contiguous allocation without low/high 1434 // memory restriction B_FULL_LOCK wiring suffices. 1435 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1436 && physicalAddressRestrictions->low_address == 0 1437 && physicalAddressRestrictions->high_address == 0) { 1438 wiring = B_FULL_LOCK; 1439 } 1440 1441 // For full lock or contiguous areas we're also going to map the pages and 1442 // thus need to reserve pages for the mapping backend upfront. 1443 addr_t reservedMapPages = 0; 1444 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1445 AddressSpaceWriteLocker locker; 1446 status_t status = locker.SetTo(team); 1447 if (status != B_OK) 1448 return status; 1449 1450 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1451 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1452 } 1453 1454 int priority; 1455 if (team != VMAddressSpace::KernelID()) 1456 priority = VM_PRIORITY_USER; 1457 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1458 priority = VM_PRIORITY_VIP; 1459 else 1460 priority = VM_PRIORITY_SYSTEM; 1461 1462 // Reserve memory before acquiring the address space lock. This reduces the 1463 // chances of failure, since while holding the write lock to the address 1464 // space (if it is the kernel address space that is), the low memory handler 1465 // won't be able to free anything for us. 1466 addr_t reservedMemory = 0; 1467 if (doReserveMemory) { 1468 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1469 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1470 return B_NO_MEMORY; 1471 reservedMemory = size; 1472 // TODO: We don't reserve the memory for the pages for the page 1473 // directories/tables. We actually need to do since we currently don't 1474 // reclaim them (and probably can't reclaim all of them anyway). Thus 1475 // there are actually less physical pages than there should be, which 1476 // can get the VM into trouble in low memory situations. 1477 } 1478 1479 AddressSpaceWriteLocker locker; 1480 VMAddressSpace* addressSpace; 1481 status_t status; 1482 1483 // For full lock areas reserve the pages before locking the address 1484 // space. E.g. block caches can't release their memory while we hold the 1485 // address space lock. 1486 page_num_t reservedPages = reservedMapPages; 1487 if (wiring == B_FULL_LOCK) 1488 reservedPages += size / B_PAGE_SIZE; 1489 1490 vm_page_reservation reservation; 1491 if (reservedPages > 0) { 1492 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1493 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1494 priority)) { 1495 reservedPages = 0; 1496 status = B_WOULD_BLOCK; 1497 goto err0; 1498 } 1499 } else 1500 vm_page_reserve_pages(&reservation, reservedPages, priority); 1501 } 1502 1503 if (wiring == B_CONTIGUOUS) { 1504 // we try to allocate the page run here upfront as this may easily 1505 // fail for obvious reasons 1506 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1507 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1508 if (page == NULL) { 1509 status = B_NO_MEMORY; 1510 goto err0; 1511 } 1512 } 1513 1514 // Lock the address space and, if B_EXACT_ADDRESS and 1515 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1516 // is not wired. 1517 do { 1518 status = locker.SetTo(team); 1519 if (status != B_OK) 1520 goto err1; 1521 1522 addressSpace = locker.AddressSpace(); 1523 } while (virtualAddressRestrictions->address_specification 1524 == B_EXACT_ADDRESS 1525 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1526 && wait_if_address_range_is_wired(addressSpace, 1527 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1528 1529 // create an anonymous cache 1530 // if it's a stack, make sure that two pages are available at least 1531 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1532 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1533 wiring == B_NO_LOCK, priority); 1534 if (status != B_OK) 1535 goto err1; 1536 1537 cache->temporary = 1; 1538 cache->virtual_end = size; 1539 cache->committed_size = reservedMemory; 1540 // TODO: This should be done via a method. 1541 reservedMemory = 0; 1542 1543 cache->Lock(); 1544 1545 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1546 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1547 virtualAddressRestrictions, kernel, &area, _address); 1548 1549 if (status != B_OK) { 1550 cache->ReleaseRefAndUnlock(); 1551 goto err1; 1552 } 1553 1554 locker.DegradeToReadLock(); 1555 1556 switch (wiring) { 1557 case B_NO_LOCK: 1558 case B_LAZY_LOCK: 1559 // do nothing - the pages are mapped in as needed 1560 break; 1561 1562 case B_FULL_LOCK: 1563 { 1564 // Allocate and map all pages for this area 1565 1566 off_t offset = 0; 1567 for (addr_t address = area->Base(); 1568 address < area->Base() + (area->Size() - 1); 1569 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1570 #ifdef DEBUG_KERNEL_STACKS 1571 # ifdef STACK_GROWS_DOWNWARDS 1572 if (isStack && address < area->Base() 1573 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1574 # else 1575 if (isStack && address >= area->Base() + area->Size() 1576 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1577 # endif 1578 continue; 1579 #endif 1580 vm_page* page = vm_page_allocate_page(&reservation, 1581 PAGE_STATE_WIRED | pageAllocFlags); 1582 cache->InsertPage(page, offset); 1583 map_page(area, page, address, protection, &reservation); 1584 1585 DEBUG_PAGE_ACCESS_END(page); 1586 } 1587 1588 break; 1589 } 1590 1591 case B_ALREADY_WIRED: 1592 { 1593 // The pages should already be mapped. This is only really useful 1594 // during boot time. Find the appropriate vm_page objects and stick 1595 // them in the cache object. 1596 VMTranslationMap* map = addressSpace->TranslationMap(); 1597 off_t offset = 0; 1598 1599 if (!gKernelStartup) 1600 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1601 1602 map->Lock(); 1603 1604 for (addr_t virtualAddress = area->Base(); 1605 virtualAddress < area->Base() + (area->Size() - 1); 1606 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1607 phys_addr_t physicalAddress; 1608 uint32 flags; 1609 status = map->Query(virtualAddress, &physicalAddress, &flags); 1610 if (status < B_OK) { 1611 panic("looking up mapping failed for va 0x%lx\n", 1612 virtualAddress); 1613 } 1614 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1615 if (page == NULL) { 1616 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1617 "\n", physicalAddress); 1618 } 1619 1620 DEBUG_PAGE_ACCESS_START(page); 1621 1622 cache->InsertPage(page, offset); 1623 increment_page_wired_count(page); 1624 vm_page_set_state(page, PAGE_STATE_WIRED); 1625 page->busy = false; 1626 1627 DEBUG_PAGE_ACCESS_END(page); 1628 } 1629 1630 map->Unlock(); 1631 break; 1632 } 1633 1634 case B_CONTIGUOUS: 1635 { 1636 // We have already allocated our continuous pages run, so we can now 1637 // just map them in the address space 1638 VMTranslationMap* map = addressSpace->TranslationMap(); 1639 phys_addr_t physicalAddress 1640 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1641 addr_t virtualAddress = area->Base(); 1642 off_t offset = 0; 1643 1644 map->Lock(); 1645 1646 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1647 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1648 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1649 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1650 if (page == NULL) 1651 panic("couldn't lookup physical page just allocated\n"); 1652 1653 status = map->Map(virtualAddress, physicalAddress, protection, 1654 area->MemoryType(), &reservation); 1655 if (status < B_OK) 1656 panic("couldn't map physical page in page run\n"); 1657 1658 cache->InsertPage(page, offset); 1659 increment_page_wired_count(page); 1660 1661 DEBUG_PAGE_ACCESS_END(page); 1662 } 1663 1664 map->Unlock(); 1665 break; 1666 } 1667 1668 default: 1669 break; 1670 } 1671 1672 cache->Unlock(); 1673 1674 if (reservedPages > 0) 1675 vm_page_unreserve_pages(&reservation); 1676 1677 TRACE(("vm_create_anonymous_area: done\n")); 1678 1679 area->cache_type = CACHE_TYPE_RAM; 1680 return area->id; 1681 1682 err1: 1683 if (wiring == B_CONTIGUOUS) { 1684 // we had reserved the area space upfront... 1685 phys_addr_t pageNumber = page->physical_page_number; 1686 int32 i; 1687 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1688 page = vm_lookup_page(pageNumber); 1689 if (page == NULL) 1690 panic("couldn't lookup physical page just allocated\n"); 1691 1692 vm_page_set_state(page, PAGE_STATE_FREE); 1693 } 1694 } 1695 1696 err0: 1697 if (reservedPages > 0) 1698 vm_page_unreserve_pages(&reservation); 1699 if (reservedMemory > 0) 1700 vm_unreserve_memory(reservedMemory); 1701 1702 return status; 1703 } 1704 1705 1706 area_id 1707 vm_map_physical_memory(team_id team, const char* name, void** _address, 1708 uint32 addressSpec, addr_t size, uint32 protection, 1709 phys_addr_t physicalAddress, bool alreadyWired) 1710 { 1711 VMArea* area; 1712 VMCache* cache; 1713 addr_t mapOffset; 1714 1715 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1716 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1717 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1718 addressSpec, size, protection, physicalAddress)); 1719 1720 if (!arch_vm_supports_protection(protection)) 1721 return B_NOT_SUPPORTED; 1722 1723 AddressSpaceWriteLocker locker(team); 1724 if (!locker.IsLocked()) 1725 return B_BAD_TEAM_ID; 1726 1727 // if the physical address is somewhat inside a page, 1728 // move the actual area down to align on a page boundary 1729 mapOffset = physicalAddress % B_PAGE_SIZE; 1730 size += mapOffset; 1731 physicalAddress -= mapOffset; 1732 1733 size = PAGE_ALIGN(size); 1734 1735 // create a device cache 1736 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1737 if (status != B_OK) 1738 return status; 1739 1740 cache->virtual_end = size; 1741 1742 cache->Lock(); 1743 1744 virtual_address_restrictions addressRestrictions = {}; 1745 addressRestrictions.address = *_address; 1746 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1747 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1748 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1749 true, &area, _address); 1750 1751 if (status < B_OK) 1752 cache->ReleaseRefLocked(); 1753 1754 cache->Unlock(); 1755 1756 if (status == B_OK) { 1757 // set requested memory type -- use uncached, if not given 1758 uint32 memoryType = addressSpec & B_MTR_MASK; 1759 if (memoryType == 0) 1760 memoryType = B_MTR_UC; 1761 1762 area->SetMemoryType(memoryType); 1763 1764 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1765 if (status != B_OK) 1766 delete_area(locker.AddressSpace(), area, false); 1767 } 1768 1769 if (status != B_OK) 1770 return status; 1771 1772 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1773 1774 if (alreadyWired) { 1775 // The area is already mapped, but possibly not with the right 1776 // memory type. 1777 map->Lock(); 1778 map->ProtectArea(area, area->protection); 1779 map->Unlock(); 1780 } else { 1781 // Map the area completely. 1782 1783 // reserve pages needed for the mapping 1784 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1785 area->Base() + (size - 1)); 1786 vm_page_reservation reservation; 1787 vm_page_reserve_pages(&reservation, reservePages, 1788 team == VMAddressSpace::KernelID() 1789 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1790 1791 map->Lock(); 1792 1793 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1794 map->Map(area->Base() + offset, physicalAddress + offset, 1795 protection, area->MemoryType(), &reservation); 1796 } 1797 1798 map->Unlock(); 1799 1800 vm_page_unreserve_pages(&reservation); 1801 } 1802 1803 // modify the pointer returned to be offset back into the new area 1804 // the same way the physical address in was offset 1805 *_address = (void*)((addr_t)*_address + mapOffset); 1806 1807 area->cache_type = CACHE_TYPE_DEVICE; 1808 return area->id; 1809 } 1810 1811 1812 /*! Don't use! 1813 TODO: This function was introduced to map physical page vecs to 1814 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1815 use a device cache and does not track vm_page::wired_count! 1816 */ 1817 area_id 1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1819 uint32 addressSpec, addr_t* _size, uint32 protection, 1820 struct generic_io_vec* vecs, uint32 vecCount) 1821 { 1822 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1823 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1824 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1825 addressSpec, _size, protection, vecs, vecCount)); 1826 1827 if (!arch_vm_supports_protection(protection) 1828 || (addressSpec & B_MTR_MASK) != 0) { 1829 return B_NOT_SUPPORTED; 1830 } 1831 1832 AddressSpaceWriteLocker locker(team); 1833 if (!locker.IsLocked()) 1834 return B_BAD_TEAM_ID; 1835 1836 if (vecCount == 0) 1837 return B_BAD_VALUE; 1838 1839 addr_t size = 0; 1840 for (uint32 i = 0; i < vecCount; i++) { 1841 if (vecs[i].base % B_PAGE_SIZE != 0 1842 || vecs[i].length % B_PAGE_SIZE != 0) { 1843 return B_BAD_VALUE; 1844 } 1845 1846 size += vecs[i].length; 1847 } 1848 1849 // create a device cache 1850 VMCache* cache; 1851 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1852 if (result != B_OK) 1853 return result; 1854 1855 cache->virtual_end = size; 1856 1857 cache->Lock(); 1858 1859 VMArea* area; 1860 virtual_address_restrictions addressRestrictions = {}; 1861 addressRestrictions.address = *_address; 1862 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1863 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1864 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1865 &addressRestrictions, true, &area, _address); 1866 1867 if (result != B_OK) 1868 cache->ReleaseRefLocked(); 1869 1870 cache->Unlock(); 1871 1872 if (result != B_OK) 1873 return result; 1874 1875 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1876 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1877 area->Base() + (size - 1)); 1878 1879 vm_page_reservation reservation; 1880 vm_page_reserve_pages(&reservation, reservePages, 1881 team == VMAddressSpace::KernelID() 1882 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1883 map->Lock(); 1884 1885 uint32 vecIndex = 0; 1886 size_t vecOffset = 0; 1887 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1888 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1889 vecOffset = 0; 1890 vecIndex++; 1891 } 1892 1893 if (vecIndex >= vecCount) 1894 break; 1895 1896 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1897 protection, area->MemoryType(), &reservation); 1898 1899 vecOffset += B_PAGE_SIZE; 1900 } 1901 1902 map->Unlock(); 1903 vm_page_unreserve_pages(&reservation); 1904 1905 if (_size != NULL) 1906 *_size = size; 1907 1908 area->cache_type = CACHE_TYPE_DEVICE; 1909 return area->id; 1910 } 1911 1912 1913 area_id 1914 vm_create_null_area(team_id team, const char* name, void** address, 1915 uint32 addressSpec, addr_t size, uint32 flags) 1916 { 1917 size = PAGE_ALIGN(size); 1918 1919 // Lock the address space and, if B_EXACT_ADDRESS and 1920 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1921 // is not wired. 1922 AddressSpaceWriteLocker locker; 1923 do { 1924 if (locker.SetTo(team) != B_OK) 1925 return B_BAD_TEAM_ID; 1926 } while (addressSpec == B_EXACT_ADDRESS 1927 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1928 && wait_if_address_range_is_wired(locker.AddressSpace(), 1929 (addr_t)*address, size, &locker)); 1930 1931 // create a null cache 1932 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1933 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1934 VMCache* cache; 1935 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1936 if (status != B_OK) 1937 return status; 1938 1939 cache->temporary = 1; 1940 cache->virtual_end = size; 1941 1942 cache->Lock(); 1943 1944 VMArea* area; 1945 virtual_address_restrictions addressRestrictions = {}; 1946 addressRestrictions.address = *address; 1947 addressRestrictions.address_specification = addressSpec; 1948 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1949 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1950 REGION_NO_PRIVATE_MAP, flags, 1951 &addressRestrictions, true, &area, address); 1952 1953 if (status < B_OK) { 1954 cache->ReleaseRefAndUnlock(); 1955 return status; 1956 } 1957 1958 cache->Unlock(); 1959 1960 area->cache_type = CACHE_TYPE_NULL; 1961 return area->id; 1962 } 1963 1964 1965 /*! Creates the vnode cache for the specified \a vnode. 1966 The vnode has to be marked busy when calling this function. 1967 */ 1968 status_t 1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1970 { 1971 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1972 } 1973 1974 1975 /*! \a cache must be locked. The area's address space must be read-locked. 1976 */ 1977 static void 1978 pre_map_area_pages(VMArea* area, VMCache* cache, 1979 vm_page_reservation* reservation) 1980 { 1981 addr_t baseAddress = area->Base(); 1982 addr_t cacheOffset = area->cache_offset; 1983 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1984 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1985 1986 for (VMCachePagesTree::Iterator it 1987 = cache->pages.GetIterator(firstPage, true, true); 1988 vm_page* page = it.Next();) { 1989 if (page->cache_offset >= endPage) 1990 break; 1991 1992 // skip busy and inactive pages 1993 if (page->busy || page->usage_count == 0) 1994 continue; 1995 1996 DEBUG_PAGE_ACCESS_START(page); 1997 map_page(area, page, 1998 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1999 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2000 DEBUG_PAGE_ACCESS_END(page); 2001 } 2002 } 2003 2004 2005 /*! Will map the file specified by \a fd to an area in memory. 2006 The file will be mirrored beginning at the specified \a offset. The 2007 \a offset and \a size arguments have to be page aligned. 2008 */ 2009 static area_id 2010 _vm_map_file(team_id team, const char* name, void** _address, 2011 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2012 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2013 { 2014 // TODO: for binary files, we want to make sure that they get the 2015 // copy of a file at a given time, ie. later changes should not 2016 // make it into the mapped copy -- this will need quite some changes 2017 // to be done in a nice way 2018 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2019 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2020 2021 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2022 size = PAGE_ALIGN(size); 2023 2024 if (mapping == REGION_NO_PRIVATE_MAP) 2025 protection |= B_SHARED_AREA; 2026 if (addressSpec != B_EXACT_ADDRESS) 2027 unmapAddressRange = false; 2028 2029 if (fd < 0) { 2030 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2031 virtual_address_restrictions virtualRestrictions = {}; 2032 virtualRestrictions.address = *_address; 2033 virtualRestrictions.address_specification = addressSpec; 2034 physical_address_restrictions physicalRestrictions = {}; 2035 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2036 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2037 _address); 2038 } 2039 2040 // get the open flags of the FD 2041 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2042 if (descriptor == NULL) 2043 return EBADF; 2044 int32 openMode = descriptor->open_mode; 2045 put_fd(descriptor); 2046 2047 // The FD must open for reading at any rate. For shared mapping with write 2048 // access, additionally the FD must be open for writing. 2049 if ((openMode & O_ACCMODE) == O_WRONLY 2050 || (mapping == REGION_NO_PRIVATE_MAP 2051 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2052 && (openMode & O_ACCMODE) == O_RDONLY)) { 2053 return EACCES; 2054 } 2055 2056 uint32 protectionMax = 0; 2057 if (mapping != REGION_PRIVATE_MAP) { 2058 protectionMax = protection | B_READ_AREA; 2059 if ((openMode & O_ACCMODE) == O_RDWR) 2060 protectionMax |= B_WRITE_AREA; 2061 } 2062 2063 // get the vnode for the object, this also grabs a ref to it 2064 struct vnode* vnode = NULL; 2065 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2066 if (status < B_OK) 2067 return status; 2068 VnodePutter vnodePutter(vnode); 2069 2070 // If we're going to pre-map pages, we need to reserve the pages needed by 2071 // the mapping backend upfront. 2072 page_num_t reservedPreMapPages = 0; 2073 vm_page_reservation reservation; 2074 if ((protection & B_READ_AREA) != 0) { 2075 AddressSpaceWriteLocker locker; 2076 status = locker.SetTo(team); 2077 if (status != B_OK) 2078 return status; 2079 2080 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2081 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2082 2083 locker.Unlock(); 2084 2085 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2086 team == VMAddressSpace::KernelID() 2087 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2088 } 2089 2090 struct PageUnreserver { 2091 PageUnreserver(vm_page_reservation* reservation) 2092 : 2093 fReservation(reservation) 2094 { 2095 } 2096 2097 ~PageUnreserver() 2098 { 2099 if (fReservation != NULL) 2100 vm_page_unreserve_pages(fReservation); 2101 } 2102 2103 vm_page_reservation* fReservation; 2104 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2105 2106 // Lock the address space and, if the specified address range shall be 2107 // unmapped, ensure it is not wired. 2108 AddressSpaceWriteLocker locker; 2109 do { 2110 if (locker.SetTo(team) != B_OK) 2111 return B_BAD_TEAM_ID; 2112 } while (unmapAddressRange 2113 && wait_if_address_range_is_wired(locker.AddressSpace(), 2114 (addr_t)*_address, size, &locker)); 2115 2116 // TODO: this only works for file systems that use the file cache 2117 VMCache* cache; 2118 status = vfs_get_vnode_cache(vnode, &cache, false); 2119 if (status < B_OK) 2120 return status; 2121 2122 cache->Lock(); 2123 2124 VMArea* area; 2125 virtual_address_restrictions addressRestrictions = {}; 2126 addressRestrictions.address = *_address; 2127 addressRestrictions.address_specification = addressSpec; 2128 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2129 0, protection, protectionMax, mapping, 2130 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2131 &addressRestrictions, kernel, &area, _address); 2132 2133 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2134 // map_backing_store() cannot know we no longer need the ref 2135 cache->ReleaseRefLocked(); 2136 } 2137 2138 if (status == B_OK && (protection & B_READ_AREA) != 0) 2139 pre_map_area_pages(area, cache, &reservation); 2140 2141 cache->Unlock(); 2142 2143 if (status == B_OK) { 2144 // TODO: this probably deserves a smarter solution, ie. don't always 2145 // prefetch stuff, and also, probably don't trigger it at this place. 2146 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2147 // prefetches at max 10 MB starting from "offset" 2148 } 2149 2150 if (status != B_OK) 2151 return status; 2152 2153 area->cache_type = CACHE_TYPE_VNODE; 2154 return area->id; 2155 } 2156 2157 2158 area_id 2159 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2160 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2161 int fd, off_t offset) 2162 { 2163 if (!arch_vm_supports_protection(protection)) 2164 return B_NOT_SUPPORTED; 2165 2166 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2167 mapping, unmapAddressRange, fd, offset, true); 2168 } 2169 2170 2171 VMCache* 2172 vm_area_get_locked_cache(VMArea* area) 2173 { 2174 rw_lock_read_lock(&sAreaCacheLock); 2175 2176 while (true) { 2177 VMCache* cache = area->cache; 2178 2179 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2180 // cache has been deleted 2181 rw_lock_read_lock(&sAreaCacheLock); 2182 continue; 2183 } 2184 2185 rw_lock_read_lock(&sAreaCacheLock); 2186 2187 if (cache == area->cache) { 2188 cache->AcquireRefLocked(); 2189 rw_lock_read_unlock(&sAreaCacheLock); 2190 return cache; 2191 } 2192 2193 // the cache changed in the meantime 2194 cache->Unlock(); 2195 } 2196 } 2197 2198 2199 void 2200 vm_area_put_locked_cache(VMCache* cache) 2201 { 2202 cache->ReleaseRefAndUnlock(); 2203 } 2204 2205 2206 area_id 2207 vm_clone_area(team_id team, const char* name, void** address, 2208 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2209 bool kernel) 2210 { 2211 VMArea* newArea = NULL; 2212 VMArea* sourceArea; 2213 2214 // Check whether the source area exists and is cloneable. If so, mark it 2215 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2216 { 2217 AddressSpaceWriteLocker locker; 2218 status_t status = locker.SetFromArea(sourceID, sourceArea); 2219 if (status != B_OK) 2220 return status; 2221 2222 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2223 return B_NOT_ALLOWED; 2224 2225 sourceArea->protection |= B_SHARED_AREA; 2226 protection |= B_SHARED_AREA; 2227 } 2228 2229 // Now lock both address spaces and actually do the cloning. 2230 2231 MultiAddressSpaceLocker locker; 2232 VMAddressSpace* sourceAddressSpace; 2233 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2234 if (status != B_OK) 2235 return status; 2236 2237 VMAddressSpace* targetAddressSpace; 2238 status = locker.AddTeam(team, true, &targetAddressSpace); 2239 if (status != B_OK) 2240 return status; 2241 2242 status = locker.Lock(); 2243 if (status != B_OK) 2244 return status; 2245 2246 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2247 if (sourceArea == NULL) 2248 return B_BAD_VALUE; 2249 2250 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2251 return B_NOT_ALLOWED; 2252 2253 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2254 2255 if (!kernel && sourceAddressSpace != targetAddressSpace 2256 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2257 #if KDEBUG 2258 Team* team = thread_get_current_thread()->team; 2259 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2260 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2261 #endif 2262 status = B_NOT_ALLOWED; 2263 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2264 status = B_NOT_ALLOWED; 2265 } else { 2266 virtual_address_restrictions addressRestrictions = {}; 2267 addressRestrictions.address = *address; 2268 addressRestrictions.address_specification = addressSpec; 2269 status = map_backing_store(targetAddressSpace, cache, 2270 sourceArea->cache_offset, name, sourceArea->Size(), 2271 sourceArea->wiring, protection, sourceArea->protection_max, 2272 mapping, 0, &addressRestrictions, 2273 kernel, &newArea, address); 2274 } 2275 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2276 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2277 // to create a new cache, and has therefore already acquired a reference 2278 // to the source cache - but otherwise it has no idea that we need 2279 // one. 2280 cache->AcquireRefLocked(); 2281 } 2282 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2283 // we need to map in everything at this point 2284 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2285 // we don't have actual pages to map but a physical area 2286 VMTranslationMap* map 2287 = sourceArea->address_space->TranslationMap(); 2288 map->Lock(); 2289 2290 phys_addr_t physicalAddress; 2291 uint32 oldProtection; 2292 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2293 2294 map->Unlock(); 2295 2296 map = targetAddressSpace->TranslationMap(); 2297 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2298 newArea->Base() + (newArea->Size() - 1)); 2299 2300 vm_page_reservation reservation; 2301 vm_page_reserve_pages(&reservation, reservePages, 2302 targetAddressSpace == VMAddressSpace::Kernel() 2303 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2304 map->Lock(); 2305 2306 for (addr_t offset = 0; offset < newArea->Size(); 2307 offset += B_PAGE_SIZE) { 2308 map->Map(newArea->Base() + offset, physicalAddress + offset, 2309 protection, newArea->MemoryType(), &reservation); 2310 } 2311 2312 map->Unlock(); 2313 vm_page_unreserve_pages(&reservation); 2314 } else { 2315 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2316 size_t reservePages = map->MaxPagesNeededToMap( 2317 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2318 vm_page_reservation reservation; 2319 vm_page_reserve_pages(&reservation, reservePages, 2320 targetAddressSpace == VMAddressSpace::Kernel() 2321 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2322 2323 // map in all pages from source 2324 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2325 vm_page* page = it.Next();) { 2326 if (!page->busy) { 2327 DEBUG_PAGE_ACCESS_START(page); 2328 map_page(newArea, page, 2329 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2330 - newArea->cache_offset), 2331 protection, &reservation); 2332 DEBUG_PAGE_ACCESS_END(page); 2333 } 2334 } 2335 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2336 // ensuring that! 2337 2338 vm_page_unreserve_pages(&reservation); 2339 } 2340 } 2341 if (status == B_OK) 2342 newArea->cache_type = sourceArea->cache_type; 2343 2344 vm_area_put_locked_cache(cache); 2345 2346 if (status < B_OK) 2347 return status; 2348 2349 return newArea->id; 2350 } 2351 2352 2353 /*! Deletes the specified area of the given address space. 2354 2355 The address space must be write-locked. 2356 The caller must ensure that the area does not have any wired ranges. 2357 2358 \param addressSpace The address space containing the area. 2359 \param area The area to be deleted. 2360 \param deletingAddressSpace \c true, if the address space is in the process 2361 of being deleted. 2362 */ 2363 static void 2364 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2365 bool deletingAddressSpace) 2366 { 2367 ASSERT(!area->IsWired()); 2368 2369 VMAreaHash::Remove(area); 2370 2371 // At this point the area is removed from the global hash table, but 2372 // still exists in the area list. 2373 2374 // Unmap the virtual address space the area occupied. 2375 { 2376 // We need to lock the complete cache chain. 2377 VMCache* topCache = vm_area_get_locked_cache(area); 2378 VMCacheChainLocker cacheChainLocker(topCache); 2379 cacheChainLocker.LockAllSourceCaches(); 2380 2381 // If the area's top cache is a temporary cache and the area is the only 2382 // one referencing it (besides us currently holding a second reference), 2383 // the unmapping code doesn't need to care about preserving the accessed 2384 // and dirty flags of the top cache page mappings. 2385 bool ignoreTopCachePageFlags 2386 = topCache->temporary && topCache->RefCount() == 2; 2387 2388 area->address_space->TranslationMap()->UnmapArea(area, 2389 deletingAddressSpace, ignoreTopCachePageFlags); 2390 } 2391 2392 if (!area->cache->temporary) 2393 area->cache->WriteModified(); 2394 2395 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2396 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2397 2398 arch_vm_unset_memory_type(area); 2399 addressSpace->RemoveArea(area, allocationFlags); 2400 addressSpace->Put(); 2401 2402 area->cache->RemoveArea(area); 2403 area->cache->ReleaseRef(); 2404 2405 addressSpace->DeleteArea(area, allocationFlags); 2406 } 2407 2408 2409 status_t 2410 vm_delete_area(team_id team, area_id id, bool kernel) 2411 { 2412 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2413 team, id)); 2414 2415 // lock the address space and make sure the area isn't wired 2416 AddressSpaceWriteLocker locker; 2417 VMArea* area; 2418 AreaCacheLocker cacheLocker; 2419 2420 do { 2421 status_t status = locker.SetFromArea(team, id, area); 2422 if (status != B_OK) 2423 return status; 2424 2425 cacheLocker.SetTo(area); 2426 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2427 2428 cacheLocker.Unlock(); 2429 2430 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2431 return B_NOT_ALLOWED; 2432 2433 delete_area(locker.AddressSpace(), area, false); 2434 return B_OK; 2435 } 2436 2437 2438 /*! Creates a new cache on top of given cache, moves all areas from 2439 the old cache to the new one, and changes the protection of all affected 2440 areas' pages to read-only. If requested, wired pages are moved up to the 2441 new cache and copies are added to the old cache in their place. 2442 Preconditions: 2443 - The given cache must be locked. 2444 - All of the cache's areas' address spaces must be read locked. 2445 - Either the cache must not have any wired ranges or a page reservation for 2446 all wired pages must be provided, so they can be copied. 2447 2448 \param lowerCache The cache on top of which a new cache shall be created. 2449 \param wiredPagesReservation If \c NULL there must not be any wired pages 2450 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2451 has wired page. The wired pages are copied in this case. 2452 */ 2453 static status_t 2454 vm_copy_on_write_area(VMCache* lowerCache, 2455 vm_page_reservation* wiredPagesReservation) 2456 { 2457 VMCache* upperCache; 2458 2459 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2460 2461 // We need to separate the cache from its areas. The cache goes one level 2462 // deeper and we create a new cache inbetween. 2463 2464 // create an anonymous cache 2465 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2466 lowerCache->GuardSize() / B_PAGE_SIZE, 2467 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2468 VM_PRIORITY_USER); 2469 if (status != B_OK) 2470 return status; 2471 2472 upperCache->Lock(); 2473 2474 upperCache->temporary = 1; 2475 upperCache->virtual_base = lowerCache->virtual_base; 2476 upperCache->virtual_end = lowerCache->virtual_end; 2477 2478 // transfer the lower cache areas to the upper cache 2479 rw_lock_write_lock(&sAreaCacheLock); 2480 upperCache->TransferAreas(lowerCache); 2481 rw_lock_write_unlock(&sAreaCacheLock); 2482 2483 lowerCache->AddConsumer(upperCache); 2484 2485 // We now need to remap all pages from all of the cache's areas read-only, 2486 // so that a copy will be created on next write access. If there are wired 2487 // pages, we keep their protection, move them to the upper cache and create 2488 // copies for the lower cache. 2489 if (wiredPagesReservation != NULL) { 2490 // We need to handle wired pages -- iterate through the cache's pages. 2491 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2492 vm_page* page = it.Next();) { 2493 if (page->WiredCount() > 0) { 2494 // allocate a new page and copy the wired one 2495 vm_page* copiedPage = vm_page_allocate_page( 2496 wiredPagesReservation, PAGE_STATE_ACTIVE); 2497 2498 vm_memcpy_physical_page( 2499 copiedPage->physical_page_number * B_PAGE_SIZE, 2500 page->physical_page_number * B_PAGE_SIZE); 2501 2502 // move the wired page to the upper cache (note: removing is OK 2503 // with the SplayTree iterator) and insert the copy 2504 upperCache->MovePage(page); 2505 lowerCache->InsertPage(copiedPage, 2506 page->cache_offset * B_PAGE_SIZE); 2507 2508 DEBUG_PAGE_ACCESS_END(copiedPage); 2509 } else { 2510 // Change the protection of this page in all areas. 2511 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2512 tempArea = tempArea->cache_next) { 2513 // The area must be readable in the same way it was 2514 // previously writable. 2515 uint32 protection = B_KERNEL_READ_AREA; 2516 if ((tempArea->protection & B_READ_AREA) != 0) 2517 protection |= B_READ_AREA; 2518 2519 VMTranslationMap* map 2520 = tempArea->address_space->TranslationMap(); 2521 map->Lock(); 2522 map->ProtectPage(tempArea, 2523 virtual_page_address(tempArea, page), protection); 2524 map->Unlock(); 2525 } 2526 } 2527 } 2528 } else { 2529 ASSERT(lowerCache->WiredPagesCount() == 0); 2530 2531 // just change the protection of all areas 2532 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2533 tempArea = tempArea->cache_next) { 2534 // The area must be readable in the same way it was previously 2535 // writable. 2536 uint32 protection = B_KERNEL_READ_AREA; 2537 if ((tempArea->protection & B_READ_AREA) != 0) 2538 protection |= B_READ_AREA; 2539 2540 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2541 map->Lock(); 2542 map->ProtectArea(tempArea, protection); 2543 map->Unlock(); 2544 } 2545 } 2546 2547 vm_area_put_locked_cache(upperCache); 2548 2549 return B_OK; 2550 } 2551 2552 2553 area_id 2554 vm_copy_area(team_id team, const char* name, void** _address, 2555 uint32 addressSpec, area_id sourceID) 2556 { 2557 // Do the locking: target address space, all address spaces associated with 2558 // the source cache, and the cache itself. 2559 MultiAddressSpaceLocker locker; 2560 VMAddressSpace* targetAddressSpace; 2561 VMCache* cache; 2562 VMArea* source; 2563 AreaCacheLocker cacheLocker; 2564 status_t status; 2565 bool sharedArea; 2566 2567 page_num_t wiredPages = 0; 2568 vm_page_reservation wiredPagesReservation; 2569 2570 bool restart; 2571 do { 2572 restart = false; 2573 2574 locker.Unset(); 2575 status = locker.AddTeam(team, true, &targetAddressSpace); 2576 if (status == B_OK) { 2577 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2578 &cache); 2579 } 2580 if (status != B_OK) 2581 return status; 2582 2583 cacheLocker.SetTo(cache, true); // already locked 2584 2585 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2586 2587 page_num_t oldWiredPages = wiredPages; 2588 wiredPages = 0; 2589 2590 // If the source area isn't shared, count the number of wired pages in 2591 // the cache and reserve as many pages. 2592 if (!sharedArea) { 2593 wiredPages = cache->WiredPagesCount(); 2594 2595 if (wiredPages > oldWiredPages) { 2596 cacheLocker.Unlock(); 2597 locker.Unlock(); 2598 2599 if (oldWiredPages > 0) 2600 vm_page_unreserve_pages(&wiredPagesReservation); 2601 2602 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2603 VM_PRIORITY_USER); 2604 2605 restart = true; 2606 } 2607 } else if (oldWiredPages > 0) 2608 vm_page_unreserve_pages(&wiredPagesReservation); 2609 } while (restart); 2610 2611 // unreserve pages later 2612 struct PagesUnreserver { 2613 PagesUnreserver(vm_page_reservation* reservation) 2614 : 2615 fReservation(reservation) 2616 { 2617 } 2618 2619 ~PagesUnreserver() 2620 { 2621 if (fReservation != NULL) 2622 vm_page_unreserve_pages(fReservation); 2623 } 2624 2625 private: 2626 vm_page_reservation* fReservation; 2627 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2628 2629 bool writableCopy 2630 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2631 uint8* targetPageProtections = NULL; 2632 2633 if (source->page_protections != NULL) { 2634 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2635 targetPageProtections = (uint8*)malloc_etc(bytes, 2636 HEAP_DONT_LOCK_KERNEL_SPACE); 2637 if (targetPageProtections == NULL) 2638 return B_NO_MEMORY; 2639 2640 memcpy(targetPageProtections, source->page_protections, bytes); 2641 2642 if (!writableCopy) { 2643 for (size_t i = 0; i < bytes; i++) { 2644 if ((targetPageProtections[i] 2645 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2646 writableCopy = true; 2647 break; 2648 } 2649 } 2650 } 2651 } 2652 2653 if (addressSpec == B_CLONE_ADDRESS) { 2654 addressSpec = B_EXACT_ADDRESS; 2655 *_address = (void*)source->Base(); 2656 } 2657 2658 // First, create a cache on top of the source area, respectively use the 2659 // existing one, if this is a shared area. 2660 2661 VMArea* target; 2662 virtual_address_restrictions addressRestrictions = {}; 2663 addressRestrictions.address = *_address; 2664 addressRestrictions.address_specification = addressSpec; 2665 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2666 name, source->Size(), source->wiring, source->protection, 2667 source->protection_max, 2668 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2669 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2670 &addressRestrictions, true, &target, _address); 2671 if (status < B_OK) { 2672 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2673 return status; 2674 } 2675 2676 if (targetPageProtections != NULL) 2677 target->page_protections = targetPageProtections; 2678 2679 if (sharedArea) { 2680 // The new area uses the old area's cache, but map_backing_store() 2681 // hasn't acquired a ref. So we have to do that now. 2682 cache->AcquireRefLocked(); 2683 } 2684 2685 // If the source area is writable, we need to move it one layer up as well 2686 2687 if (!sharedArea) { 2688 if (writableCopy) { 2689 // TODO: do something more useful if this fails! 2690 if (vm_copy_on_write_area(cache, 2691 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2692 panic("vm_copy_on_write_area() failed!\n"); 2693 } 2694 } 2695 } 2696 2697 // we return the ID of the newly created area 2698 return target->id; 2699 } 2700 2701 2702 status_t 2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2704 bool kernel) 2705 { 2706 fix_protection(&newProtection); 2707 2708 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2709 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2710 2711 if (!arch_vm_supports_protection(newProtection)) 2712 return B_NOT_SUPPORTED; 2713 2714 bool becomesWritable 2715 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2716 2717 // lock address spaces and cache 2718 MultiAddressSpaceLocker locker; 2719 VMCache* cache; 2720 VMArea* area; 2721 status_t status; 2722 AreaCacheLocker cacheLocker; 2723 bool isWritable; 2724 2725 bool restart; 2726 do { 2727 restart = false; 2728 2729 locker.Unset(); 2730 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2731 if (status != B_OK) 2732 return status; 2733 2734 cacheLocker.SetTo(cache, true); // already locked 2735 2736 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2737 || (area->protection & B_KERNEL_AREA) != 0)) { 2738 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2739 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2740 " (%s)\n", team, newProtection, areaID, area->name); 2741 return B_NOT_ALLOWED; 2742 } 2743 if (!kernel && area->protection_max != 0 2744 && (newProtection & area->protection_max) 2745 != (newProtection & B_USER_PROTECTION)) { 2746 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2747 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2748 "area %" B_PRId32 " (%s)\n", team, newProtection, 2749 area->protection_max, areaID, area->name); 2750 return B_NOT_ALLOWED; 2751 } 2752 2753 if (area->protection == newProtection) 2754 return B_OK; 2755 2756 if (team != VMAddressSpace::KernelID() 2757 && area->address_space->ID() != team) { 2758 // unless you're the kernel, you are only allowed to set 2759 // the protection of your own areas 2760 return B_NOT_ALLOWED; 2761 } 2762 2763 isWritable 2764 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2765 2766 // Make sure the area (respectively, if we're going to call 2767 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2768 // wired ranges. 2769 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2770 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2771 otherArea = otherArea->cache_next) { 2772 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2773 restart = true; 2774 break; 2775 } 2776 } 2777 } else { 2778 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2779 restart = true; 2780 } 2781 } while (restart); 2782 2783 bool changePageProtection = true; 2784 bool changeTopCachePagesOnly = false; 2785 2786 if (isWritable && !becomesWritable) { 2787 // writable -> !writable 2788 2789 if (cache->source != NULL && cache->temporary) { 2790 if (cache->CountWritableAreas(area) == 0) { 2791 // Since this cache now lives from the pages in its source cache, 2792 // we can change the cache's commitment to take only those pages 2793 // into account that really are in this cache. 2794 2795 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2796 team == VMAddressSpace::KernelID() 2797 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2798 2799 // TODO: we may be able to join with our source cache, if 2800 // count == 0 2801 } 2802 } 2803 2804 // If only the writability changes, we can just remap the pages of the 2805 // top cache, since the pages of lower caches are mapped read-only 2806 // anyway. That's advantageous only, if the number of pages in the cache 2807 // is significantly smaller than the number of pages in the area, 2808 // though. 2809 if (newProtection 2810 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2811 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2812 changeTopCachePagesOnly = true; 2813 } 2814 } else if (!isWritable && becomesWritable) { 2815 // !writable -> writable 2816 2817 if (!cache->consumers.IsEmpty()) { 2818 // There are consumers -- we have to insert a new cache. Fortunately 2819 // vm_copy_on_write_area() does everything that's needed. 2820 changePageProtection = false; 2821 status = vm_copy_on_write_area(cache, NULL); 2822 } else { 2823 // No consumers, so we don't need to insert a new one. 2824 if (cache->source != NULL && cache->temporary) { 2825 // the cache's commitment must contain all possible pages 2826 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2827 team == VMAddressSpace::KernelID() 2828 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2829 } 2830 2831 if (status == B_OK && cache->source != NULL) { 2832 // There's a source cache, hence we can't just change all pages' 2833 // protection or we might allow writing into pages belonging to 2834 // a lower cache. 2835 changeTopCachePagesOnly = true; 2836 } 2837 } 2838 } else { 2839 // we don't have anything special to do in all other cases 2840 } 2841 2842 if (status == B_OK) { 2843 // remap existing pages in this cache 2844 if (changePageProtection) { 2845 VMTranslationMap* map = area->address_space->TranslationMap(); 2846 map->Lock(); 2847 2848 if (changeTopCachePagesOnly) { 2849 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2850 page_num_t lastPageOffset 2851 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2852 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2853 vm_page* page = it.Next();) { 2854 if (page->cache_offset >= firstPageOffset 2855 && page->cache_offset <= lastPageOffset) { 2856 addr_t address = virtual_page_address(area, page); 2857 map->ProtectPage(area, address, newProtection); 2858 } 2859 } 2860 } else 2861 map->ProtectArea(area, newProtection); 2862 2863 map->Unlock(); 2864 } 2865 2866 area->protection = newProtection; 2867 } 2868 2869 return status; 2870 } 2871 2872 2873 status_t 2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2875 { 2876 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2877 if (addressSpace == NULL) 2878 return B_BAD_TEAM_ID; 2879 2880 VMTranslationMap* map = addressSpace->TranslationMap(); 2881 2882 map->Lock(); 2883 uint32 dummyFlags; 2884 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2885 map->Unlock(); 2886 2887 addressSpace->Put(); 2888 return status; 2889 } 2890 2891 2892 /*! The page's cache must be locked. 2893 */ 2894 bool 2895 vm_test_map_modification(vm_page* page) 2896 { 2897 if (page->modified) 2898 return true; 2899 2900 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2901 vm_page_mapping* mapping; 2902 while ((mapping = iterator.Next()) != NULL) { 2903 VMArea* area = mapping->area; 2904 VMTranslationMap* map = area->address_space->TranslationMap(); 2905 2906 phys_addr_t physicalAddress; 2907 uint32 flags; 2908 map->Lock(); 2909 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2910 map->Unlock(); 2911 2912 if ((flags & PAGE_MODIFIED) != 0) 2913 return true; 2914 } 2915 2916 return false; 2917 } 2918 2919 2920 /*! The page's cache must be locked. 2921 */ 2922 void 2923 vm_clear_map_flags(vm_page* page, uint32 flags) 2924 { 2925 if ((flags & PAGE_ACCESSED) != 0) 2926 page->accessed = false; 2927 if ((flags & PAGE_MODIFIED) != 0) 2928 page->modified = false; 2929 2930 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2931 vm_page_mapping* mapping; 2932 while ((mapping = iterator.Next()) != NULL) { 2933 VMArea* area = mapping->area; 2934 VMTranslationMap* map = area->address_space->TranslationMap(); 2935 2936 map->Lock(); 2937 map->ClearFlags(virtual_page_address(area, page), flags); 2938 map->Unlock(); 2939 } 2940 } 2941 2942 2943 /*! Removes all mappings from a page. 2944 After you've called this function, the page is unmapped from memory and 2945 the page's \c accessed and \c modified flags have been updated according 2946 to the state of the mappings. 2947 The page's cache must be locked. 2948 */ 2949 void 2950 vm_remove_all_page_mappings(vm_page* page) 2951 { 2952 while (vm_page_mapping* mapping = page->mappings.Head()) { 2953 VMArea* area = mapping->area; 2954 VMTranslationMap* map = area->address_space->TranslationMap(); 2955 addr_t address = virtual_page_address(area, page); 2956 map->UnmapPage(area, address, false); 2957 } 2958 } 2959 2960 2961 int32 2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2963 { 2964 int32 count = 0; 2965 2966 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2967 vm_page_mapping* mapping; 2968 while ((mapping = iterator.Next()) != NULL) { 2969 VMArea* area = mapping->area; 2970 VMTranslationMap* map = area->address_space->TranslationMap(); 2971 2972 bool modified; 2973 if (map->ClearAccessedAndModified(area, 2974 virtual_page_address(area, page), false, modified)) { 2975 count++; 2976 } 2977 2978 page->modified |= modified; 2979 } 2980 2981 2982 if (page->accessed) { 2983 count++; 2984 page->accessed = false; 2985 } 2986 2987 return count; 2988 } 2989 2990 2991 /*! Removes all mappings of a page and/or clears the accessed bits of the 2992 mappings. 2993 The function iterates through the page mappings and removes them until 2994 encountering one that has been accessed. From then on it will continue to 2995 iterate, but only clear the accessed flag of the mapping. The page's 2996 \c modified bit will be updated accordingly, the \c accessed bit will be 2997 cleared. 2998 \return The number of mapping accessed bits encountered, including the 2999 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3000 of the page have been removed. 3001 */ 3002 int32 3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3004 { 3005 ASSERT(page->WiredCount() == 0); 3006 3007 if (page->accessed) 3008 return vm_clear_page_mapping_accessed_flags(page); 3009 3010 while (vm_page_mapping* mapping = page->mappings.Head()) { 3011 VMArea* area = mapping->area; 3012 VMTranslationMap* map = area->address_space->TranslationMap(); 3013 addr_t address = virtual_page_address(area, page); 3014 bool modified = false; 3015 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3016 page->accessed = true; 3017 page->modified |= modified; 3018 return vm_clear_page_mapping_accessed_flags(page); 3019 } 3020 page->modified |= modified; 3021 } 3022 3023 return 0; 3024 } 3025 3026 3027 static int 3028 display_mem(int argc, char** argv) 3029 { 3030 bool physical = false; 3031 addr_t copyAddress; 3032 int32 displayWidth; 3033 int32 itemSize; 3034 int32 num = -1; 3035 addr_t address; 3036 int i = 1, j; 3037 3038 if (argc > 1 && argv[1][0] == '-') { 3039 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3040 physical = true; 3041 i++; 3042 } else 3043 i = 99; 3044 } 3045 3046 if (argc < i + 1 || argc > i + 2) { 3047 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3048 "\tdl - 8 bytes\n" 3049 "\tdw - 4 bytes\n" 3050 "\tds - 2 bytes\n" 3051 "\tdb - 1 byte\n" 3052 "\tstring - a whole string\n" 3053 " -p or --physical only allows memory from a single page to be " 3054 "displayed.\n"); 3055 return 0; 3056 } 3057 3058 address = parse_expression(argv[i]); 3059 3060 if (argc > i + 1) 3061 num = parse_expression(argv[i + 1]); 3062 3063 // build the format string 3064 if (strcmp(argv[0], "db") == 0) { 3065 itemSize = 1; 3066 displayWidth = 16; 3067 } else if (strcmp(argv[0], "ds") == 0) { 3068 itemSize = 2; 3069 displayWidth = 8; 3070 } else if (strcmp(argv[0], "dw") == 0) { 3071 itemSize = 4; 3072 displayWidth = 4; 3073 } else if (strcmp(argv[0], "dl") == 0) { 3074 itemSize = 8; 3075 displayWidth = 2; 3076 } else if (strcmp(argv[0], "string") == 0) { 3077 itemSize = 1; 3078 displayWidth = -1; 3079 } else { 3080 kprintf("display_mem called in an invalid way!\n"); 3081 return 0; 3082 } 3083 3084 if (num <= 0) 3085 num = displayWidth; 3086 3087 void* physicalPageHandle = NULL; 3088 3089 if (physical) { 3090 int32 offset = address & (B_PAGE_SIZE - 1); 3091 if (num * itemSize + offset > B_PAGE_SIZE) { 3092 num = (B_PAGE_SIZE - offset) / itemSize; 3093 kprintf("NOTE: number of bytes has been cut to page size\n"); 3094 } 3095 3096 address = ROUNDDOWN(address, B_PAGE_SIZE); 3097 3098 if (vm_get_physical_page_debug(address, ©Address, 3099 &physicalPageHandle) != B_OK) { 3100 kprintf("getting the hardware page failed."); 3101 return 0; 3102 } 3103 3104 address += offset; 3105 copyAddress += offset; 3106 } else 3107 copyAddress = address; 3108 3109 if (!strcmp(argv[0], "string")) { 3110 kprintf("%p \"", (char*)copyAddress); 3111 3112 // string mode 3113 for (i = 0; true; i++) { 3114 char c; 3115 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3116 != B_OK 3117 || c == '\0') { 3118 break; 3119 } 3120 3121 if (c == '\n') 3122 kprintf("\\n"); 3123 else if (c == '\t') 3124 kprintf("\\t"); 3125 else { 3126 if (!isprint(c)) 3127 c = '.'; 3128 3129 kprintf("%c", c); 3130 } 3131 } 3132 3133 kprintf("\"\n"); 3134 } else { 3135 // number mode 3136 for (i = 0; i < num; i++) { 3137 uint64 value; 3138 3139 if ((i % displayWidth) == 0) { 3140 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3141 if (i != 0) 3142 kprintf("\n"); 3143 3144 kprintf("[0x%lx] ", address + i * itemSize); 3145 3146 for (j = 0; j < displayed; j++) { 3147 char c; 3148 if (debug_memcpy(B_CURRENT_TEAM, &c, 3149 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3150 displayed = j; 3151 break; 3152 } 3153 if (!isprint(c)) 3154 c = '.'; 3155 3156 kprintf("%c", c); 3157 } 3158 if (num > displayWidth) { 3159 // make sure the spacing in the last line is correct 3160 for (j = displayed; j < displayWidth * itemSize; j++) 3161 kprintf(" "); 3162 } 3163 kprintf(" "); 3164 } 3165 3166 if (debug_memcpy(B_CURRENT_TEAM, &value, 3167 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3168 kprintf("read fault"); 3169 break; 3170 } 3171 3172 switch (itemSize) { 3173 case 1: 3174 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3175 break; 3176 case 2: 3177 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3178 break; 3179 case 4: 3180 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3181 break; 3182 case 8: 3183 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3184 break; 3185 } 3186 } 3187 3188 kprintf("\n"); 3189 } 3190 3191 if (physical) { 3192 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3193 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3194 } 3195 return 0; 3196 } 3197 3198 3199 static void 3200 dump_cache_tree_recursively(VMCache* cache, int level, 3201 VMCache* highlightCache) 3202 { 3203 // print this cache 3204 for (int i = 0; i < level; i++) 3205 kprintf(" "); 3206 if (cache == highlightCache) 3207 kprintf("%p <--\n", cache); 3208 else 3209 kprintf("%p\n", cache); 3210 3211 // recursively print its consumers 3212 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3213 VMCache* consumer = it.Next();) { 3214 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3215 } 3216 } 3217 3218 3219 static int 3220 dump_cache_tree(int argc, char** argv) 3221 { 3222 if (argc != 2 || !strcmp(argv[1], "--help")) { 3223 kprintf("usage: %s <address>\n", argv[0]); 3224 return 0; 3225 } 3226 3227 addr_t address = parse_expression(argv[1]); 3228 if (address == 0) 3229 return 0; 3230 3231 VMCache* cache = (VMCache*)address; 3232 VMCache* root = cache; 3233 3234 // find the root cache (the transitive source) 3235 while (root->source != NULL) 3236 root = root->source; 3237 3238 dump_cache_tree_recursively(root, 0, cache); 3239 3240 return 0; 3241 } 3242 3243 3244 const char* 3245 vm_cache_type_to_string(int32 type) 3246 { 3247 switch (type) { 3248 case CACHE_TYPE_RAM: 3249 return "RAM"; 3250 case CACHE_TYPE_DEVICE: 3251 return "device"; 3252 case CACHE_TYPE_VNODE: 3253 return "vnode"; 3254 case CACHE_TYPE_NULL: 3255 return "null"; 3256 3257 default: 3258 return "unknown"; 3259 } 3260 } 3261 3262 3263 #if DEBUG_CACHE_LIST 3264 3265 static void 3266 update_cache_info_recursively(VMCache* cache, cache_info& info) 3267 { 3268 info.page_count += cache->page_count; 3269 if (cache->type == CACHE_TYPE_RAM) 3270 info.committed += cache->committed_size; 3271 3272 // recurse 3273 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3274 VMCache* consumer = it.Next();) { 3275 update_cache_info_recursively(consumer, info); 3276 } 3277 } 3278 3279 3280 static int 3281 cache_info_compare_page_count(const void* _a, const void* _b) 3282 { 3283 const cache_info* a = (const cache_info*)_a; 3284 const cache_info* b = (const cache_info*)_b; 3285 if (a->page_count == b->page_count) 3286 return 0; 3287 return a->page_count < b->page_count ? 1 : -1; 3288 } 3289 3290 3291 static int 3292 cache_info_compare_committed(const void* _a, const void* _b) 3293 { 3294 const cache_info* a = (const cache_info*)_a; 3295 const cache_info* b = (const cache_info*)_b; 3296 if (a->committed == b->committed) 3297 return 0; 3298 return a->committed < b->committed ? 1 : -1; 3299 } 3300 3301 3302 static void 3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3304 { 3305 for (int i = 0; i < level; i++) 3306 kprintf(" "); 3307 3308 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3309 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3310 cache->virtual_base, cache->virtual_end, cache->page_count); 3311 3312 if (level == 0) 3313 kprintf("/%lu", info.page_count); 3314 3315 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3316 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3317 3318 if (level == 0) 3319 kprintf("/%lu", info.committed); 3320 } 3321 3322 // areas 3323 if (cache->areas != NULL) { 3324 VMArea* area = cache->areas; 3325 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3326 area->name, area->address_space->ID()); 3327 3328 while (area->cache_next != NULL) { 3329 area = area->cache_next; 3330 kprintf(", %" B_PRId32, area->id); 3331 } 3332 } 3333 3334 kputs("\n"); 3335 3336 // recurse 3337 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3338 VMCache* consumer = it.Next();) { 3339 dump_caches_recursively(consumer, info, level + 1); 3340 } 3341 } 3342 3343 3344 static int 3345 dump_caches(int argc, char** argv) 3346 { 3347 if (sCacheInfoTable == NULL) { 3348 kprintf("No cache info table!\n"); 3349 return 0; 3350 } 3351 3352 bool sortByPageCount = true; 3353 3354 for (int32 i = 1; i < argc; i++) { 3355 if (strcmp(argv[i], "-c") == 0) { 3356 sortByPageCount = false; 3357 } else { 3358 print_debugger_command_usage(argv[0]); 3359 return 0; 3360 } 3361 } 3362 3363 uint32 totalCount = 0; 3364 uint32 rootCount = 0; 3365 off_t totalCommitted = 0; 3366 page_num_t totalPages = 0; 3367 3368 VMCache* cache = gDebugCacheList; 3369 while (cache) { 3370 totalCount++; 3371 if (cache->source == NULL) { 3372 cache_info stackInfo; 3373 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3374 ? sCacheInfoTable[rootCount] : stackInfo; 3375 rootCount++; 3376 info.cache = cache; 3377 info.page_count = 0; 3378 info.committed = 0; 3379 update_cache_info_recursively(cache, info); 3380 totalCommitted += info.committed; 3381 totalPages += info.page_count; 3382 } 3383 3384 cache = cache->debug_next; 3385 } 3386 3387 if (rootCount <= (uint32)kCacheInfoTableCount) { 3388 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3389 sortByPageCount 3390 ? &cache_info_compare_page_count 3391 : &cache_info_compare_committed); 3392 } 3393 3394 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3395 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3396 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3397 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3398 "page count" : "committed size"); 3399 3400 if (rootCount <= (uint32)kCacheInfoTableCount) { 3401 for (uint32 i = 0; i < rootCount; i++) { 3402 cache_info& info = sCacheInfoTable[i]; 3403 dump_caches_recursively(info.cache, info, 0); 3404 } 3405 } else 3406 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3407 3408 return 0; 3409 } 3410 3411 #endif // DEBUG_CACHE_LIST 3412 3413 3414 static int 3415 dump_cache(int argc, char** argv) 3416 { 3417 VMCache* cache; 3418 bool showPages = false; 3419 int i = 1; 3420 3421 if (argc < 2 || !strcmp(argv[1], "--help")) { 3422 kprintf("usage: %s [-ps] <address>\n" 3423 " if -p is specified, all pages are shown, if -s is used\n" 3424 " only the cache info is shown respectively.\n", argv[0]); 3425 return 0; 3426 } 3427 while (argv[i][0] == '-') { 3428 char* arg = argv[i] + 1; 3429 while (arg[0]) { 3430 if (arg[0] == 'p') 3431 showPages = true; 3432 arg++; 3433 } 3434 i++; 3435 } 3436 if (argv[i] == NULL) { 3437 kprintf("%s: invalid argument, pass address\n", argv[0]); 3438 return 0; 3439 } 3440 3441 addr_t address = parse_expression(argv[i]); 3442 if (address == 0) 3443 return 0; 3444 3445 cache = (VMCache*)address; 3446 3447 cache->Dump(showPages); 3448 3449 set_debug_variable("_sourceCache", (addr_t)cache->source); 3450 3451 return 0; 3452 } 3453 3454 3455 static void 3456 dump_area_struct(VMArea* area, bool mappings) 3457 { 3458 kprintf("AREA: %p\n", area); 3459 kprintf("name:\t\t'%s'\n", area->name); 3460 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3461 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3462 kprintf("base:\t\t0x%lx\n", area->Base()); 3463 kprintf("size:\t\t0x%lx\n", area->Size()); 3464 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3465 kprintf("page_protection:%p\n", area->page_protections); 3466 kprintf("wiring:\t\t0x%x\n", area->wiring); 3467 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3468 kprintf("cache:\t\t%p\n", area->cache); 3469 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3470 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3471 kprintf("cache_next:\t%p\n", area->cache_next); 3472 kprintf("cache_prev:\t%p\n", area->cache_prev); 3473 3474 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3475 if (mappings) { 3476 kprintf("page mappings:\n"); 3477 while (iterator.HasNext()) { 3478 vm_page_mapping* mapping = iterator.Next(); 3479 kprintf(" %p", mapping->page); 3480 } 3481 kprintf("\n"); 3482 } else { 3483 uint32 count = 0; 3484 while (iterator.Next() != NULL) { 3485 count++; 3486 } 3487 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3488 } 3489 } 3490 3491 3492 static int 3493 dump_area(int argc, char** argv) 3494 { 3495 bool mappings = false; 3496 bool found = false; 3497 int32 index = 1; 3498 VMArea* area; 3499 addr_t num; 3500 3501 if (argc < 2 || !strcmp(argv[1], "--help")) { 3502 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3503 "All areas matching either id/address/name are listed. You can\n" 3504 "force to check only a specific item by prefixing the specifier\n" 3505 "with the id/contains/address/name keywords.\n" 3506 "-m shows the area's mappings as well.\n"); 3507 return 0; 3508 } 3509 3510 if (!strcmp(argv[1], "-m")) { 3511 mappings = true; 3512 index++; 3513 } 3514 3515 int32 mode = 0xf; 3516 if (!strcmp(argv[index], "id")) 3517 mode = 1; 3518 else if (!strcmp(argv[index], "contains")) 3519 mode = 2; 3520 else if (!strcmp(argv[index], "name")) 3521 mode = 4; 3522 else if (!strcmp(argv[index], "address")) 3523 mode = 0; 3524 if (mode != 0xf) 3525 index++; 3526 3527 if (index >= argc) { 3528 kprintf("No area specifier given.\n"); 3529 return 0; 3530 } 3531 3532 num = parse_expression(argv[index]); 3533 3534 if (mode == 0) { 3535 dump_area_struct((struct VMArea*)num, mappings); 3536 } else { 3537 // walk through the area list, looking for the arguments as a name 3538 3539 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3540 while ((area = it.Next()) != NULL) { 3541 if (((mode & 4) != 0 3542 && !strcmp(argv[index], area->name)) 3543 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3544 || (((mode & 2) != 0 && area->Base() <= num 3545 && area->Base() + area->Size() > num))))) { 3546 dump_area_struct(area, mappings); 3547 found = true; 3548 } 3549 } 3550 3551 if (!found) 3552 kprintf("could not find area %s (%ld)\n", argv[index], num); 3553 } 3554 3555 return 0; 3556 } 3557 3558 3559 static int 3560 dump_area_list(int argc, char** argv) 3561 { 3562 VMArea* area; 3563 const char* name = NULL; 3564 int32 id = 0; 3565 3566 if (argc > 1) { 3567 id = parse_expression(argv[1]); 3568 if (id == 0) 3569 name = argv[1]; 3570 } 3571 3572 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3573 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3574 B_PRINTF_POINTER_WIDTH, "size"); 3575 3576 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3577 while ((area = it.Next()) != NULL) { 3578 if ((id != 0 && area->address_space->ID() != id) 3579 || (name != NULL && strstr(area->name, name) == NULL)) 3580 continue; 3581 3582 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3583 area->id, (void*)area->Base(), (void*)area->Size(), 3584 area->protection, area->wiring, area->name); 3585 } 3586 return 0; 3587 } 3588 3589 3590 static int 3591 dump_available_memory(int argc, char** argv) 3592 { 3593 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3594 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3595 return 0; 3596 } 3597 3598 3599 static int 3600 dump_mapping_info(int argc, char** argv) 3601 { 3602 bool reverseLookup = false; 3603 bool pageLookup = false; 3604 3605 int argi = 1; 3606 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3607 const char* arg = argv[argi]; 3608 if (strcmp(arg, "-r") == 0) { 3609 reverseLookup = true; 3610 } else if (strcmp(arg, "-p") == 0) { 3611 reverseLookup = true; 3612 pageLookup = true; 3613 } else { 3614 print_debugger_command_usage(argv[0]); 3615 return 0; 3616 } 3617 } 3618 3619 // We need at least one argument, the address. Optionally a thread ID can be 3620 // specified. 3621 if (argi >= argc || argi + 2 < argc) { 3622 print_debugger_command_usage(argv[0]); 3623 return 0; 3624 } 3625 3626 uint64 addressValue; 3627 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3628 return 0; 3629 3630 Team* team = NULL; 3631 if (argi < argc) { 3632 uint64 threadID; 3633 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3634 return 0; 3635 3636 Thread* thread = Thread::GetDebug(threadID); 3637 if (thread == NULL) { 3638 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3639 return 0; 3640 } 3641 3642 team = thread->team; 3643 } 3644 3645 if (reverseLookup) { 3646 phys_addr_t physicalAddress; 3647 if (pageLookup) { 3648 vm_page* page = (vm_page*)(addr_t)addressValue; 3649 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3650 } else { 3651 physicalAddress = (phys_addr_t)addressValue; 3652 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3653 } 3654 3655 kprintf(" Team Virtual Address Area\n"); 3656 kprintf("--------------------------------------\n"); 3657 3658 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3659 Callback() 3660 : 3661 fAddressSpace(NULL) 3662 { 3663 } 3664 3665 void SetAddressSpace(VMAddressSpace* addressSpace) 3666 { 3667 fAddressSpace = addressSpace; 3668 } 3669 3670 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3671 { 3672 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3673 virtualAddress); 3674 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3675 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3676 else 3677 kprintf("\n"); 3678 return false; 3679 } 3680 3681 private: 3682 VMAddressSpace* fAddressSpace; 3683 } callback; 3684 3685 if (team != NULL) { 3686 // team specified -- get its address space 3687 VMAddressSpace* addressSpace = team->address_space; 3688 if (addressSpace == NULL) { 3689 kprintf("Failed to get address space!\n"); 3690 return 0; 3691 } 3692 3693 callback.SetAddressSpace(addressSpace); 3694 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3695 physicalAddress, callback); 3696 } else { 3697 // no team specified -- iterate through all address spaces 3698 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3699 addressSpace != NULL; 3700 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3701 callback.SetAddressSpace(addressSpace); 3702 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3703 physicalAddress, callback); 3704 } 3705 } 3706 } else { 3707 // get the address space 3708 addr_t virtualAddress = (addr_t)addressValue; 3709 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3710 VMAddressSpace* addressSpace; 3711 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3712 addressSpace = VMAddressSpace::Kernel(); 3713 } else if (team != NULL) { 3714 addressSpace = team->address_space; 3715 } else { 3716 Thread* thread = debug_get_debugged_thread(); 3717 if (thread == NULL || thread->team == NULL) { 3718 kprintf("Failed to get team!\n"); 3719 return 0; 3720 } 3721 3722 addressSpace = thread->team->address_space; 3723 } 3724 3725 if (addressSpace == NULL) { 3726 kprintf("Failed to get address space!\n"); 3727 return 0; 3728 } 3729 3730 // let the translation map implementation do the job 3731 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3732 } 3733 3734 return 0; 3735 } 3736 3737 3738 /*! Deletes all areas and reserved regions in the given address space. 3739 3740 The caller must ensure that none of the areas has any wired ranges. 3741 3742 \param addressSpace The address space. 3743 \param deletingAddressSpace \c true, if the address space is in the process 3744 of being deleted. 3745 */ 3746 void 3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3748 { 3749 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3750 addressSpace->ID())); 3751 3752 addressSpace->WriteLock(); 3753 3754 // remove all reserved areas in this address space 3755 addressSpace->UnreserveAllAddressRanges(0); 3756 3757 // delete all the areas in this address space 3758 while (VMArea* area = addressSpace->FirstArea()) { 3759 ASSERT(!area->IsWired()); 3760 delete_area(addressSpace, area, deletingAddressSpace); 3761 } 3762 3763 addressSpace->WriteUnlock(); 3764 } 3765 3766 3767 static area_id 3768 vm_area_for(addr_t address, bool kernel) 3769 { 3770 team_id team; 3771 if (IS_USER_ADDRESS(address)) { 3772 // we try the user team address space, if any 3773 team = VMAddressSpace::CurrentID(); 3774 if (team < 0) 3775 return team; 3776 } else 3777 team = VMAddressSpace::KernelID(); 3778 3779 AddressSpaceReadLocker locker(team); 3780 if (!locker.IsLocked()) 3781 return B_BAD_TEAM_ID; 3782 3783 VMArea* area = locker.AddressSpace()->LookupArea(address); 3784 if (area != NULL) { 3785 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3786 return B_ERROR; 3787 3788 return area->id; 3789 } 3790 3791 return B_ERROR; 3792 } 3793 3794 3795 /*! Frees physical pages that were used during the boot process. 3796 \a end is inclusive. 3797 */ 3798 static void 3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3800 { 3801 // free all physical pages in the specified range 3802 3803 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3804 phys_addr_t physicalAddress; 3805 uint32 flags; 3806 3807 if (map->Query(current, &physicalAddress, &flags) == B_OK 3808 && (flags & PAGE_PRESENT) != 0) { 3809 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3810 if (page != NULL && page->State() != PAGE_STATE_FREE 3811 && page->State() != PAGE_STATE_CLEAR 3812 && page->State() != PAGE_STATE_UNUSED) { 3813 DEBUG_PAGE_ACCESS_START(page); 3814 vm_page_set_state(page, PAGE_STATE_FREE); 3815 } 3816 } 3817 } 3818 3819 // unmap the memory 3820 map->Unmap(start, end); 3821 } 3822 3823 3824 void 3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3826 { 3827 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3828 addr_t end = start + (size - 1); 3829 addr_t lastEnd = start; 3830 3831 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3832 (void*)start, (void*)end)); 3833 3834 // The areas are sorted in virtual address space order, so 3835 // we just have to find the holes between them that fall 3836 // into the area we should dispose 3837 3838 map->Lock(); 3839 3840 for (VMAddressSpace::AreaIterator it 3841 = VMAddressSpace::Kernel()->GetAreaIterator(); 3842 VMArea* area = it.Next();) { 3843 addr_t areaStart = area->Base(); 3844 addr_t areaEnd = areaStart + (area->Size() - 1); 3845 3846 if (areaEnd < start) 3847 continue; 3848 3849 if (areaStart > end) { 3850 // we are done, the area is already beyond of what we have to free 3851 break; 3852 } 3853 3854 if (areaStart > lastEnd) { 3855 // this is something we can free 3856 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3857 (void*)areaStart)); 3858 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3859 } 3860 3861 if (areaEnd >= end) { 3862 lastEnd = areaEnd; 3863 // no +1 to prevent potential overflow 3864 break; 3865 } 3866 3867 lastEnd = areaEnd + 1; 3868 } 3869 3870 if (lastEnd < end) { 3871 // we can also get rid of some space at the end of the area 3872 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3873 (void*)end)); 3874 unmap_and_free_physical_pages(map, lastEnd, end); 3875 } 3876 3877 map->Unlock(); 3878 } 3879 3880 3881 static void 3882 create_preloaded_image_areas(struct preloaded_image* _image) 3883 { 3884 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3885 char name[B_OS_NAME_LENGTH]; 3886 void* address; 3887 int32 length; 3888 3889 // use file name to create a good area name 3890 char* fileName = strrchr(image->name, '/'); 3891 if (fileName == NULL) 3892 fileName = image->name; 3893 else 3894 fileName++; 3895 3896 length = strlen(fileName); 3897 // make sure there is enough space for the suffix 3898 if (length > 25) 3899 length = 25; 3900 3901 memcpy(name, fileName, length); 3902 strcpy(name + length, "_text"); 3903 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3904 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3905 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3906 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3907 // this will later be remapped read-only/executable by the 3908 // ELF initialization code 3909 3910 strcpy(name + length, "_data"); 3911 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3912 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3913 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3914 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3915 } 3916 3917 3918 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3919 Any boot loader resources contained in that arguments must not be accessed 3920 anymore past this point. 3921 */ 3922 void 3923 vm_free_kernel_args(kernel_args* args) 3924 { 3925 uint32 i; 3926 3927 TRACE(("vm_free_kernel_args()\n")); 3928 3929 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3930 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3931 if (area >= B_OK) 3932 delete_area(area); 3933 } 3934 } 3935 3936 3937 static void 3938 allocate_kernel_args(kernel_args* args) 3939 { 3940 TRACE(("allocate_kernel_args()\n")); 3941 3942 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3943 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3944 3945 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3946 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3947 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3948 } 3949 } 3950 3951 3952 static void 3953 unreserve_boot_loader_ranges(kernel_args* args) 3954 { 3955 TRACE(("unreserve_boot_loader_ranges()\n")); 3956 3957 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3958 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3959 (void*)(addr_t)args->virtual_allocated_range[i].start, 3960 args->virtual_allocated_range[i].size); 3961 } 3962 } 3963 3964 3965 static void 3966 reserve_boot_loader_ranges(kernel_args* args) 3967 { 3968 TRACE(("reserve_boot_loader_ranges()\n")); 3969 3970 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3971 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3972 3973 // If the address is no kernel address, we just skip it. The 3974 // architecture specific code has to deal with it. 3975 if (!IS_KERNEL_ADDRESS(address)) { 3976 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3977 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3978 continue; 3979 } 3980 3981 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3982 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3983 if (status < B_OK) 3984 panic("could not reserve boot loader ranges\n"); 3985 } 3986 } 3987 3988 3989 static addr_t 3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3991 { 3992 size = PAGE_ALIGN(size); 3993 3994 // find a slot in the virtual allocation addr range 3995 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3996 // check to see if the space between this one and the last is big enough 3997 addr_t rangeStart = args->virtual_allocated_range[i].start; 3998 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3999 + args->virtual_allocated_range[i - 1].size; 4000 4001 addr_t base = alignment > 0 4002 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4003 4004 if (base >= KERNEL_BASE && base < rangeStart 4005 && rangeStart - base >= size) { 4006 args->virtual_allocated_range[i - 1].size 4007 += base + size - previousRangeEnd; 4008 return base; 4009 } 4010 } 4011 4012 // we hadn't found one between allocation ranges. this is ok. 4013 // see if there's a gap after the last one 4014 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4015 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4016 + args->virtual_allocated_range[lastEntryIndex].size; 4017 addr_t base = alignment > 0 4018 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4019 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4020 args->virtual_allocated_range[lastEntryIndex].size 4021 += base + size - lastRangeEnd; 4022 return base; 4023 } 4024 4025 // see if there's a gap before the first one 4026 addr_t rangeStart = args->virtual_allocated_range[0].start; 4027 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4028 base = rangeStart - size; 4029 if (alignment > 0) 4030 base = ROUNDDOWN(base, alignment); 4031 4032 if (base >= KERNEL_BASE) { 4033 args->virtual_allocated_range[0].start = base; 4034 args->virtual_allocated_range[0].size += rangeStart - base; 4035 return base; 4036 } 4037 } 4038 4039 return 0; 4040 } 4041 4042 4043 static bool 4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4045 { 4046 // TODO: horrible brute-force method of determining if the page can be 4047 // allocated 4048 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4049 if (address >= args->physical_memory_range[i].start 4050 && address < args->physical_memory_range[i].start 4051 + args->physical_memory_range[i].size) 4052 return true; 4053 } 4054 return false; 4055 } 4056 4057 4058 page_num_t 4059 vm_allocate_early_physical_page(kernel_args* args) 4060 { 4061 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4062 phys_addr_t nextPage; 4063 4064 nextPage = args->physical_allocated_range[i].start 4065 + args->physical_allocated_range[i].size; 4066 // see if the page after the next allocated paddr run can be allocated 4067 if (i + 1 < args->num_physical_allocated_ranges 4068 && args->physical_allocated_range[i + 1].size != 0) { 4069 // see if the next page will collide with the next allocated range 4070 if (nextPage >= args->physical_allocated_range[i+1].start) 4071 continue; 4072 } 4073 // see if the next physical page fits in the memory block 4074 if (is_page_in_physical_memory_range(args, nextPage)) { 4075 // we got one! 4076 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4077 return nextPage / B_PAGE_SIZE; 4078 } 4079 } 4080 4081 // Expanding upwards didn't work, try going downwards. 4082 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4083 phys_addr_t nextPage; 4084 4085 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4086 // see if the page after the prev allocated paddr run can be allocated 4087 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4088 // see if the next page will collide with the next allocated range 4089 if (nextPage < args->physical_allocated_range[i-1].start 4090 + args->physical_allocated_range[i-1].size) 4091 continue; 4092 } 4093 // see if the next physical page fits in the memory block 4094 if (is_page_in_physical_memory_range(args, nextPage)) { 4095 // we got one! 4096 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4097 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4098 return nextPage / B_PAGE_SIZE; 4099 } 4100 } 4101 4102 return 0; 4103 // could not allocate a block 4104 } 4105 4106 4107 /*! This one uses the kernel_args' physical and virtual memory ranges to 4108 allocate some pages before the VM is completely up. 4109 */ 4110 addr_t 4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4112 uint32 attributes, addr_t alignment) 4113 { 4114 if (physicalSize > virtualSize) 4115 physicalSize = virtualSize; 4116 4117 // find the vaddr to allocate at 4118 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4119 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4120 if (virtualBase == 0) { 4121 panic("vm_allocate_early: could not allocate virtual address\n"); 4122 return 0; 4123 } 4124 4125 // map the pages 4126 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4127 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4128 if (physicalAddress == 0) 4129 panic("error allocating early page!\n"); 4130 4131 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4132 4133 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4134 physicalAddress * B_PAGE_SIZE, attributes, 4135 &vm_allocate_early_physical_page); 4136 } 4137 4138 return virtualBase; 4139 } 4140 4141 4142 /*! The main entrance point to initialize the VM. */ 4143 status_t 4144 vm_init(kernel_args* args) 4145 { 4146 struct preloaded_image* image; 4147 void* address; 4148 status_t err = 0; 4149 uint32 i; 4150 4151 TRACE(("vm_init: entry\n")); 4152 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4153 err = arch_vm_init(args); 4154 4155 // initialize some globals 4156 vm_page_init_num_pages(args); 4157 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4158 4159 slab_init(args); 4160 4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4162 off_t heapSize = INITIAL_HEAP_SIZE; 4163 // try to accomodate low memory systems 4164 while (heapSize > sAvailableMemory / 8) 4165 heapSize /= 2; 4166 if (heapSize < 1024 * 1024) 4167 panic("vm_init: go buy some RAM please."); 4168 4169 // map in the new heap and initialize it 4170 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4171 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4172 TRACE(("heap at 0x%lx\n", heapBase)); 4173 heap_init(heapBase, heapSize); 4174 #endif 4175 4176 // initialize the free page list and physical page mapper 4177 vm_page_init(args); 4178 4179 // initialize the cache allocators 4180 vm_cache_init(args); 4181 4182 { 4183 status_t error = VMAreaHash::Init(); 4184 if (error != B_OK) 4185 panic("vm_init: error initializing area hash table\n"); 4186 } 4187 4188 VMAddressSpace::Init(); 4189 reserve_boot_loader_ranges(args); 4190 4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4192 heap_init_post_area(); 4193 #endif 4194 4195 // Do any further initialization that the architecture dependant layers may 4196 // need now 4197 arch_vm_translation_map_init_post_area(args); 4198 arch_vm_init_post_area(args); 4199 vm_page_init_post_area(args); 4200 slab_init_post_area(); 4201 4202 // allocate areas to represent stuff that already exists 4203 4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4205 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4206 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4207 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4208 #endif 4209 4210 allocate_kernel_args(args); 4211 4212 create_preloaded_image_areas(args->kernel_image); 4213 4214 // allocate areas for preloaded images 4215 for (image = args->preloaded_images; image != NULL; image = image->next) 4216 create_preloaded_image_areas(image); 4217 4218 // allocate kernel stacks 4219 for (i = 0; i < args->num_cpus; i++) { 4220 char name[64]; 4221 4222 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4223 address = (void*)args->cpu_kstack[i].start; 4224 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4225 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4226 } 4227 4228 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4229 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4230 4231 #if PARANOID_KERNEL_MALLOC 4232 vm_block_address_range("uninitialized heap memory", 4233 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4234 #endif 4235 #if PARANOID_KERNEL_FREE 4236 vm_block_address_range("freed heap memory", 4237 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4238 #endif 4239 4240 // create the object cache for the page mappings 4241 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4242 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4243 NULL, NULL); 4244 if (gPageMappingsObjectCache == NULL) 4245 panic("failed to create page mappings object cache"); 4246 4247 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4248 4249 #if DEBUG_CACHE_LIST 4250 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4251 virtual_address_restrictions virtualRestrictions = {}; 4252 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4253 physical_address_restrictions physicalRestrictions = {}; 4254 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4255 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4256 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4257 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4258 &physicalRestrictions, (void**)&sCacheInfoTable); 4259 } 4260 #endif // DEBUG_CACHE_LIST 4261 4262 // add some debugger commands 4263 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4264 add_debugger_command("area", &dump_area, 4265 "Dump info about a particular area"); 4266 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4267 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4268 #if DEBUG_CACHE_LIST 4269 if (sCacheInfoTable != NULL) { 4270 add_debugger_command_etc("caches", &dump_caches, 4271 "List all VMCache trees", 4272 "[ \"-c\" ]\n" 4273 "All cache trees are listed sorted in decreasing order by number " 4274 "of\n" 4275 "used pages or, if \"-c\" is specified, by size of committed " 4276 "memory.\n", 4277 0); 4278 } 4279 #endif 4280 add_debugger_command("avail", &dump_available_memory, 4281 "Dump available memory"); 4282 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4283 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4284 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4285 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4286 add_debugger_command("string", &display_mem, "dump strings"); 4287 4288 add_debugger_command_etc("mapping", &dump_mapping_info, 4289 "Print address mapping information", 4290 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4291 "Prints low-level page mapping information for a given address. If\n" 4292 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4293 "address that is looked up in the translation map of the current\n" 4294 "team, respectively the team specified by thread ID <thread ID>. If\n" 4295 "\"-r\" is specified, <address> is a physical address that is\n" 4296 "searched in the translation map of all teams, respectively the team\n" 4297 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4298 "<address> is the address of a vm_page structure. The behavior is\n" 4299 "equivalent to specifying \"-r\" with the physical address of that\n" 4300 "page.\n", 4301 0); 4302 4303 TRACE(("vm_init: exit\n")); 4304 4305 vm_cache_init_post_heap(); 4306 4307 return err; 4308 } 4309 4310 4311 status_t 4312 vm_init_post_sem(kernel_args* args) 4313 { 4314 // This frees all unused boot loader resources and makes its space available 4315 // again 4316 arch_vm_init_end(args); 4317 unreserve_boot_loader_ranges(args); 4318 4319 // fill in all of the semaphores that were not allocated before 4320 // since we're still single threaded and only the kernel address space 4321 // exists, it isn't that hard to find all of the ones we need to create 4322 4323 arch_vm_translation_map_init_post_sem(args); 4324 4325 slab_init_post_sem(); 4326 4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4328 heap_init_post_sem(); 4329 #endif 4330 4331 return B_OK; 4332 } 4333 4334 4335 status_t 4336 vm_init_post_thread(kernel_args* args) 4337 { 4338 vm_page_init_post_thread(args); 4339 slab_init_post_thread(); 4340 return heap_init_post_thread(); 4341 } 4342 4343 4344 status_t 4345 vm_init_post_modules(kernel_args* args) 4346 { 4347 return arch_vm_init_post_modules(args); 4348 } 4349 4350 4351 void 4352 permit_page_faults(void) 4353 { 4354 Thread* thread = thread_get_current_thread(); 4355 if (thread != NULL) 4356 atomic_add(&thread->page_faults_allowed, 1); 4357 } 4358 4359 4360 void 4361 forbid_page_faults(void) 4362 { 4363 Thread* thread = thread_get_current_thread(); 4364 if (thread != NULL) 4365 atomic_add(&thread->page_faults_allowed, -1); 4366 } 4367 4368 4369 status_t 4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4371 bool isUser, addr_t* newIP) 4372 { 4373 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4374 faultAddress)); 4375 4376 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4377 4378 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4379 VMAddressSpace* addressSpace = NULL; 4380 4381 status_t status = B_OK; 4382 *newIP = 0; 4383 atomic_add((int32*)&sPageFaults, 1); 4384 4385 if (IS_KERNEL_ADDRESS(pageAddress)) { 4386 addressSpace = VMAddressSpace::GetKernel(); 4387 } else if (IS_USER_ADDRESS(pageAddress)) { 4388 addressSpace = VMAddressSpace::GetCurrent(); 4389 if (addressSpace == NULL) { 4390 if (!isUser) { 4391 dprintf("vm_page_fault: kernel thread accessing invalid user " 4392 "memory!\n"); 4393 status = B_BAD_ADDRESS; 4394 TPF(PageFaultError(-1, 4395 VMPageFaultTracing 4396 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4397 } else { 4398 // XXX weird state. 4399 panic("vm_page_fault: non kernel thread accessing user memory " 4400 "that doesn't exist!\n"); 4401 status = B_BAD_ADDRESS; 4402 } 4403 } 4404 } else { 4405 // the hit was probably in the 64k DMZ between kernel and user space 4406 // this keeps a user space thread from passing a buffer that crosses 4407 // into kernel space 4408 status = B_BAD_ADDRESS; 4409 TPF(PageFaultError(-1, 4410 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4411 } 4412 4413 if (status == B_OK) { 4414 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4415 isUser, NULL); 4416 } 4417 4418 if (status < B_OK) { 4419 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4420 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n", 4421 strerror(status), address, faultAddress, isWrite, isUser, 4422 thread_get_current_thread_id()); 4423 if (!isUser) { 4424 Thread* thread = thread_get_current_thread(); 4425 if (thread != NULL && thread->fault_handler != 0) { 4426 // this will cause the arch dependant page fault handler to 4427 // modify the IP on the interrupt frame or whatever to return 4428 // to this address 4429 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4430 } else { 4431 // unhandled page fault in the kernel 4432 panic("vm_page_fault: unhandled page fault in kernel space at " 4433 "0x%lx, ip 0x%lx\n", address, faultAddress); 4434 } 4435 } else { 4436 Thread* thread = thread_get_current_thread(); 4437 4438 #ifdef TRACE_FAULTS 4439 VMArea* area = NULL; 4440 if (addressSpace != NULL) { 4441 addressSpace->ReadLock(); 4442 area = addressSpace->LookupArea(faultAddress); 4443 } 4444 4445 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4446 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4447 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4448 thread->team->Name(), thread->team->id, 4449 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4450 faultAddress, area ? area->name : "???", faultAddress - (area ? 4451 area->Base() : 0x0)); 4452 4453 if (addressSpace != NULL) 4454 addressSpace->ReadUnlock(); 4455 #endif 4456 4457 // If the thread has a signal handler for SIGSEGV, we simply 4458 // send it the signal. Otherwise we notify the user debugger 4459 // first. 4460 struct sigaction action; 4461 if ((sigaction(SIGSEGV, NULL, &action) == 0 4462 && action.sa_handler != SIG_DFL 4463 && action.sa_handler != SIG_IGN) 4464 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4465 SIGSEGV)) { 4466 Signal signal(SIGSEGV, 4467 status == B_PERMISSION_DENIED 4468 ? SEGV_ACCERR : SEGV_MAPERR, 4469 EFAULT, thread->team->id); 4470 signal.SetAddress((void*)address); 4471 send_signal_to_thread(thread, signal, 0); 4472 } 4473 } 4474 } 4475 4476 if (addressSpace != NULL) 4477 addressSpace->Put(); 4478 4479 return B_HANDLED_INTERRUPT; 4480 } 4481 4482 4483 struct PageFaultContext { 4484 AddressSpaceReadLocker addressSpaceLocker; 4485 VMCacheChainLocker cacheChainLocker; 4486 4487 VMTranslationMap* map; 4488 VMCache* topCache; 4489 off_t cacheOffset; 4490 vm_page_reservation reservation; 4491 bool isWrite; 4492 4493 // return values 4494 vm_page* page; 4495 bool restart; 4496 bool pageAllocated; 4497 4498 4499 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4500 : 4501 addressSpaceLocker(addressSpace, true), 4502 map(addressSpace->TranslationMap()), 4503 isWrite(isWrite) 4504 { 4505 } 4506 4507 ~PageFaultContext() 4508 { 4509 UnlockAll(); 4510 vm_page_unreserve_pages(&reservation); 4511 } 4512 4513 void Prepare(VMCache* topCache, off_t cacheOffset) 4514 { 4515 this->topCache = topCache; 4516 this->cacheOffset = cacheOffset; 4517 page = NULL; 4518 restart = false; 4519 pageAllocated = false; 4520 4521 cacheChainLocker.SetTo(topCache); 4522 } 4523 4524 void UnlockAll(VMCache* exceptCache = NULL) 4525 { 4526 topCache = NULL; 4527 addressSpaceLocker.Unlock(); 4528 cacheChainLocker.Unlock(exceptCache); 4529 } 4530 }; 4531 4532 4533 /*! Gets the page that should be mapped into the area. 4534 Returns an error code other than \c B_OK, if the page couldn't be found or 4535 paged in. The locking state of the address space and the caches is undefined 4536 in that case. 4537 Returns \c B_OK with \c context.restart set to \c true, if the functions 4538 had to unlock the address space and all caches and is supposed to be called 4539 again. 4540 Returns \c B_OK with \c context.restart set to \c false, if the page was 4541 found. It is returned in \c context.page. The address space will still be 4542 locked as well as all caches starting from the top cache to at least the 4543 cache the page lives in. 4544 */ 4545 static status_t 4546 fault_get_page(PageFaultContext& context) 4547 { 4548 VMCache* cache = context.topCache; 4549 VMCache* lastCache = NULL; 4550 vm_page* page = NULL; 4551 4552 while (cache != NULL) { 4553 // We already hold the lock of the cache at this point. 4554 4555 lastCache = cache; 4556 4557 page = cache->LookupPage(context.cacheOffset); 4558 if (page != NULL && page->busy) { 4559 // page must be busy -- wait for it to become unbusy 4560 context.UnlockAll(cache); 4561 cache->ReleaseRefLocked(); 4562 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4563 4564 // restart the whole process 4565 context.restart = true; 4566 return B_OK; 4567 } 4568 4569 if (page != NULL) 4570 break; 4571 4572 // The current cache does not contain the page we're looking for. 4573 4574 // see if the backing store has it 4575 if (cache->HasPage(context.cacheOffset)) { 4576 // insert a fresh page and mark it busy -- we're going to read it in 4577 page = vm_page_allocate_page(&context.reservation, 4578 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4579 cache->InsertPage(page, context.cacheOffset); 4580 4581 // We need to unlock all caches and the address space while reading 4582 // the page in. Keep a reference to the cache around. 4583 cache->AcquireRefLocked(); 4584 context.UnlockAll(); 4585 4586 // read the page in 4587 generic_io_vec vec; 4588 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4589 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4590 4591 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4592 B_PHYSICAL_IO_REQUEST, &bytesRead); 4593 4594 cache->Lock(); 4595 4596 if (status < B_OK) { 4597 // on error remove and free the page 4598 dprintf("reading page from cache %p returned: %s!\n", 4599 cache, strerror(status)); 4600 4601 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4602 cache->RemovePage(page); 4603 vm_page_set_state(page, PAGE_STATE_FREE); 4604 4605 cache->ReleaseRefAndUnlock(); 4606 return status; 4607 } 4608 4609 // mark the page unbusy again 4610 cache->MarkPageUnbusy(page); 4611 4612 DEBUG_PAGE_ACCESS_END(page); 4613 4614 // Since we needed to unlock everything temporarily, the area 4615 // situation might have changed. So we need to restart the whole 4616 // process. 4617 cache->ReleaseRefAndUnlock(); 4618 context.restart = true; 4619 return B_OK; 4620 } 4621 4622 cache = context.cacheChainLocker.LockSourceCache(); 4623 } 4624 4625 if (page == NULL) { 4626 // There was no adequate page, determine the cache for a clean one. 4627 // Read-only pages come in the deepest cache, only the top most cache 4628 // may have direct write access. 4629 cache = context.isWrite ? context.topCache : lastCache; 4630 4631 // allocate a clean page 4632 page = vm_page_allocate_page(&context.reservation, 4633 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4634 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4635 page->physical_page_number)); 4636 4637 // insert the new page into our cache 4638 cache->InsertPage(page, context.cacheOffset); 4639 context.pageAllocated = true; 4640 } else if (page->Cache() != context.topCache && context.isWrite) { 4641 // We have a page that has the data we want, but in the wrong cache 4642 // object so we need to copy it and stick it into the top cache. 4643 vm_page* sourcePage = page; 4644 4645 // TODO: If memory is low, it might be a good idea to steal the page 4646 // from our source cache -- if possible, that is. 4647 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4648 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4649 4650 // To not needlessly kill concurrency we unlock all caches but the top 4651 // one while copying the page. Lacking another mechanism to ensure that 4652 // the source page doesn't disappear, we mark it busy. 4653 sourcePage->busy = true; 4654 context.cacheChainLocker.UnlockKeepRefs(true); 4655 4656 // copy the page 4657 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4658 sourcePage->physical_page_number * B_PAGE_SIZE); 4659 4660 context.cacheChainLocker.RelockCaches(true); 4661 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4662 4663 // insert the new page into our cache 4664 context.topCache->InsertPage(page, context.cacheOffset); 4665 context.pageAllocated = true; 4666 } else 4667 DEBUG_PAGE_ACCESS_START(page); 4668 4669 context.page = page; 4670 return B_OK; 4671 } 4672 4673 4674 /*! Makes sure the address in the given address space is mapped. 4675 4676 \param addressSpace The address space. 4677 \param originalAddress The address. Doesn't need to be page aligned. 4678 \param isWrite If \c true the address shall be write-accessible. 4679 \param isUser If \c true the access is requested by a userland team. 4680 \param wirePage On success, if non \c NULL, the wired count of the page 4681 mapped at the given address is incremented and the page is returned 4682 via this parameter. 4683 \return \c B_OK on success, another error code otherwise. 4684 */ 4685 static status_t 4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4687 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4688 { 4689 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4690 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4691 originalAddress, isWrite, isUser)); 4692 4693 PageFaultContext context(addressSpace, isWrite); 4694 4695 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4696 status_t status = B_OK; 4697 4698 addressSpace->IncrementFaultCount(); 4699 4700 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4701 // the pages upfront makes sure we don't have any cache locked, so that the 4702 // page daemon/thief can do their job without problems. 4703 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4704 originalAddress); 4705 context.addressSpaceLocker.Unlock(); 4706 vm_page_reserve_pages(&context.reservation, reservePages, 4707 addressSpace == VMAddressSpace::Kernel() 4708 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4709 4710 while (true) { 4711 context.addressSpaceLocker.Lock(); 4712 4713 // get the area the fault was in 4714 VMArea* area = addressSpace->LookupArea(address); 4715 if (area == NULL) { 4716 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4717 "space\n", originalAddress); 4718 TPF(PageFaultError(-1, 4719 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4720 status = B_BAD_ADDRESS; 4721 break; 4722 } 4723 4724 // check permissions 4725 uint32 protection = get_area_page_protection(area, address); 4726 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4727 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4728 area->id, (void*)originalAddress); 4729 TPF(PageFaultError(area->id, 4730 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4731 status = B_PERMISSION_DENIED; 4732 break; 4733 } 4734 if (isWrite && (protection 4735 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4736 dprintf("write access attempted on write-protected area 0x%" 4737 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4738 TPF(PageFaultError(area->id, 4739 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4740 status = B_PERMISSION_DENIED; 4741 break; 4742 } else if (isExecute && (protection 4743 & (B_EXECUTE_AREA 4744 | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4745 dprintf("instruction fetch attempted on execute-protected area 0x%" 4746 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4747 TPF(PageFaultError(area->id, 4748 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4749 status = B_PERMISSION_DENIED; 4750 break; 4751 } else if (!isWrite && !isExecute && (protection 4752 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4753 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4754 " at %p\n", area->id, (void*)originalAddress); 4755 TPF(PageFaultError(area->id, 4756 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4757 status = B_PERMISSION_DENIED; 4758 break; 4759 } 4760 4761 // We have the area, it was a valid access, so let's try to resolve the 4762 // page fault now. 4763 // At first, the top most cache from the area is investigated. 4764 4765 context.Prepare(vm_area_get_locked_cache(area), 4766 address - area->Base() + area->cache_offset); 4767 4768 // See if this cache has a fault handler -- this will do all the work 4769 // for us. 4770 { 4771 // Note, since the page fault is resolved with interrupts enabled, 4772 // the fault handler could be called more than once for the same 4773 // reason -- the store must take this into account. 4774 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4775 if (status != B_BAD_HANDLER) 4776 break; 4777 } 4778 4779 // The top most cache has no fault handler, so let's see if the cache or 4780 // its sources already have the page we're searching for (we're going 4781 // from top to bottom). 4782 status = fault_get_page(context); 4783 if (status != B_OK) { 4784 TPF(PageFaultError(area->id, status)); 4785 break; 4786 } 4787 4788 if (context.restart) 4789 continue; 4790 4791 // All went fine, all there is left to do is to map the page into the 4792 // address space. 4793 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4794 context.page)); 4795 4796 // If the page doesn't reside in the area's cache, we need to make sure 4797 // it's mapped in read-only, so that we cannot overwrite someone else's 4798 // data (copy-on-write) 4799 uint32 newProtection = protection; 4800 if (context.page->Cache() != context.topCache && !isWrite) 4801 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4802 4803 bool unmapPage = false; 4804 bool mapPage = true; 4805 4806 // check whether there's already a page mapped at the address 4807 context.map->Lock(); 4808 4809 phys_addr_t physicalAddress; 4810 uint32 flags; 4811 vm_page* mappedPage = NULL; 4812 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4813 && (flags & PAGE_PRESENT) != 0 4814 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4815 != NULL) { 4816 // Yep there's already a page. If it's ours, we can simply adjust 4817 // its protection. Otherwise we have to unmap it. 4818 if (mappedPage == context.page) { 4819 context.map->ProtectPage(area, address, newProtection); 4820 // Note: We assume that ProtectPage() is atomic (i.e. 4821 // the page isn't temporarily unmapped), otherwise we'd have 4822 // to make sure it isn't wired. 4823 mapPage = false; 4824 } else 4825 unmapPage = true; 4826 } 4827 4828 context.map->Unlock(); 4829 4830 if (unmapPage) { 4831 // If the page is wired, we can't unmap it. Wait until it is unwired 4832 // again and restart. Note that the page cannot be wired for 4833 // writing, since it it isn't in the topmost cache. So we can safely 4834 // ignore ranges wired for writing (our own and other concurrent 4835 // wiring attempts in progress) and in fact have to do that to avoid 4836 // a deadlock. 4837 VMAreaUnwiredWaiter waiter; 4838 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4839 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4840 // unlock everything and wait 4841 if (context.pageAllocated) { 4842 // ... but since we allocated a page and inserted it into 4843 // the top cache, remove and free it first. Otherwise we'd 4844 // have a page from a lower cache mapped while an upper 4845 // cache has a page that would shadow it. 4846 context.topCache->RemovePage(context.page); 4847 vm_page_free_etc(context.topCache, context.page, 4848 &context.reservation); 4849 } else 4850 DEBUG_PAGE_ACCESS_END(context.page); 4851 4852 context.UnlockAll(); 4853 waiter.waitEntry.Wait(); 4854 continue; 4855 } 4856 4857 // Note: The mapped page is a page of a lower cache. We are 4858 // guaranteed to have that cached locked, our new page is a copy of 4859 // that page, and the page is not busy. The logic for that guarantee 4860 // is as follows: Since the page is mapped, it must live in the top 4861 // cache (ruled out above) or any of its lower caches, and there is 4862 // (was before the new page was inserted) no other page in any 4863 // cache between the top cache and the page's cache (otherwise that 4864 // would be mapped instead). That in turn means that our algorithm 4865 // must have found it and therefore it cannot be busy either. 4866 DEBUG_PAGE_ACCESS_START(mappedPage); 4867 unmap_page(area, address); 4868 DEBUG_PAGE_ACCESS_END(mappedPage); 4869 } 4870 4871 if (mapPage) { 4872 if (map_page(area, context.page, address, newProtection, 4873 &context.reservation) != B_OK) { 4874 // Mapping can only fail, when the page mapping object couldn't 4875 // be allocated. Save for the missing mapping everything is 4876 // fine, though. If this was a regular page fault, we'll simply 4877 // leave and probably fault again. To make sure we'll have more 4878 // luck then, we ensure that the minimum object reserve is 4879 // available. 4880 DEBUG_PAGE_ACCESS_END(context.page); 4881 4882 context.UnlockAll(); 4883 4884 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4885 != B_OK) { 4886 // Apparently the situation is serious. Let's get ourselves 4887 // killed. 4888 status = B_NO_MEMORY; 4889 } else if (wirePage != NULL) { 4890 // The caller expects us to wire the page. Since 4891 // object_cache_reserve() succeeded, we should now be able 4892 // to allocate a mapping structure. Restart. 4893 continue; 4894 } 4895 4896 break; 4897 } 4898 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4899 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4900 4901 // also wire the page, if requested 4902 if (wirePage != NULL && status == B_OK) { 4903 increment_page_wired_count(context.page); 4904 *wirePage = context.page; 4905 } 4906 4907 DEBUG_PAGE_ACCESS_END(context.page); 4908 4909 break; 4910 } 4911 4912 return status; 4913 } 4914 4915 4916 status_t 4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4918 { 4919 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4920 } 4921 4922 status_t 4923 vm_put_physical_page(addr_t vaddr, void* handle) 4924 { 4925 return sPhysicalPageMapper->PutPage(vaddr, handle); 4926 } 4927 4928 4929 status_t 4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4931 void** _handle) 4932 { 4933 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4934 } 4935 4936 status_t 4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4938 { 4939 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4940 } 4941 4942 4943 status_t 4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4945 { 4946 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4947 } 4948 4949 status_t 4950 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4951 { 4952 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4953 } 4954 4955 4956 void 4957 vm_get_info(system_info* info) 4958 { 4959 swap_get_info(info); 4960 4961 MutexLocker locker(sAvailableMemoryLock); 4962 info->needed_memory = sNeededMemory; 4963 info->free_memory = sAvailableMemory; 4964 } 4965 4966 4967 uint32 4968 vm_num_page_faults(void) 4969 { 4970 return sPageFaults; 4971 } 4972 4973 4974 off_t 4975 vm_available_memory(void) 4976 { 4977 MutexLocker locker(sAvailableMemoryLock); 4978 return sAvailableMemory; 4979 } 4980 4981 4982 off_t 4983 vm_available_not_needed_memory(void) 4984 { 4985 MutexLocker locker(sAvailableMemoryLock); 4986 return sAvailableMemory - sNeededMemory; 4987 } 4988 4989 4990 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4991 debugger. 4992 */ 4993 off_t 4994 vm_available_not_needed_memory_debug(void) 4995 { 4996 return sAvailableMemory - sNeededMemory; 4997 } 4998 4999 5000 size_t 5001 vm_kernel_address_space_left(void) 5002 { 5003 return VMAddressSpace::Kernel()->FreeSpace(); 5004 } 5005 5006 5007 void 5008 vm_unreserve_memory(size_t amount) 5009 { 5010 mutex_lock(&sAvailableMemoryLock); 5011 5012 sAvailableMemory += amount; 5013 5014 mutex_unlock(&sAvailableMemoryLock); 5015 } 5016 5017 5018 status_t 5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5020 { 5021 size_t reserve = kMemoryReserveForPriority[priority]; 5022 5023 MutexLocker locker(sAvailableMemoryLock); 5024 5025 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5026 5027 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5028 sAvailableMemory -= amount; 5029 return B_OK; 5030 } 5031 5032 if (timeout <= 0) 5033 return B_NO_MEMORY; 5034 5035 // turn timeout into an absolute timeout 5036 timeout += system_time(); 5037 5038 // loop until we've got the memory or the timeout occurs 5039 do { 5040 sNeededMemory += amount; 5041 5042 // call the low resource manager 5043 locker.Unlock(); 5044 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5045 B_ABSOLUTE_TIMEOUT, timeout); 5046 locker.Lock(); 5047 5048 sNeededMemory -= amount; 5049 5050 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5051 sAvailableMemory -= amount; 5052 return B_OK; 5053 } 5054 } while (timeout > system_time()); 5055 5056 return B_NO_MEMORY; 5057 } 5058 5059 5060 status_t 5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5062 { 5063 // NOTE: The caller is responsible for synchronizing calls to this function! 5064 5065 AddressSpaceReadLocker locker; 5066 VMArea* area; 5067 status_t status = locker.SetFromArea(id, area); 5068 if (status != B_OK) 5069 return status; 5070 5071 // nothing to do, if the type doesn't change 5072 uint32 oldType = area->MemoryType(); 5073 if (type == oldType) 5074 return B_OK; 5075 5076 // set the memory type of the area and the mapped pages 5077 VMTranslationMap* map = area->address_space->TranslationMap(); 5078 map->Lock(); 5079 area->SetMemoryType(type); 5080 map->ProtectArea(area, area->protection); 5081 map->Unlock(); 5082 5083 // set the physical memory type 5084 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5085 if (error != B_OK) { 5086 // reset the memory type of the area and the mapped pages 5087 map->Lock(); 5088 area->SetMemoryType(oldType); 5089 map->ProtectArea(area, area->protection); 5090 map->Unlock(); 5091 return error; 5092 } 5093 5094 return B_OK; 5095 5096 } 5097 5098 5099 /*! This function enforces some protection properties: 5100 - kernel areas must be W^X (after kernel startup) 5101 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5102 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5103 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5104 and B_KERNEL_WRITE_AREA. 5105 */ 5106 static void 5107 fix_protection(uint32* protection) 5108 { 5109 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5110 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5111 || (*protection & B_WRITE_AREA) != 0) 5112 && !gKernelStartup) 5113 panic("kernel areas cannot be both writable and executable!"); 5114 5115 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5116 if ((*protection & B_USER_PROTECTION) == 0 5117 || (*protection & B_WRITE_AREA) != 0) 5118 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5119 else 5120 *protection |= B_KERNEL_READ_AREA; 5121 } 5122 } 5123 5124 5125 static void 5126 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5127 { 5128 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5129 info->area = area->id; 5130 info->address = (void*)area->Base(); 5131 info->size = area->Size(); 5132 info->protection = area->protection; 5133 info->lock = area->wiring; 5134 info->team = area->address_space->ID(); 5135 info->copy_count = 0; 5136 info->in_count = 0; 5137 info->out_count = 0; 5138 // TODO: retrieve real values here! 5139 5140 VMCache* cache = vm_area_get_locked_cache(area); 5141 5142 // Note, this is a simplification; the cache could be larger than this area 5143 info->ram_size = cache->page_count * B_PAGE_SIZE; 5144 5145 vm_area_put_locked_cache(cache); 5146 } 5147 5148 5149 static status_t 5150 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5151 { 5152 // is newSize a multiple of B_PAGE_SIZE? 5153 if (newSize & (B_PAGE_SIZE - 1)) 5154 return B_BAD_VALUE; 5155 5156 // lock all affected address spaces and the cache 5157 VMArea* area; 5158 VMCache* cache; 5159 5160 MultiAddressSpaceLocker locker; 5161 AreaCacheLocker cacheLocker; 5162 5163 status_t status; 5164 size_t oldSize; 5165 bool anyKernelArea; 5166 bool restart; 5167 5168 do { 5169 anyKernelArea = false; 5170 restart = false; 5171 5172 locker.Unset(); 5173 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5174 if (status != B_OK) 5175 return status; 5176 cacheLocker.SetTo(cache, true); // already locked 5177 5178 // enforce restrictions 5179 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5180 || (area->protection & B_KERNEL_AREA) != 0)) { 5181 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5182 "resize kernel area %" B_PRId32 " (%s)\n", 5183 team_get_current_team_id(), areaID, area->name); 5184 return B_NOT_ALLOWED; 5185 } 5186 // TODO: Enforce all restrictions (team, etc.)! 5187 5188 oldSize = area->Size(); 5189 if (newSize == oldSize) 5190 return B_OK; 5191 5192 if (cache->type != CACHE_TYPE_RAM) 5193 return B_NOT_ALLOWED; 5194 5195 if (oldSize < newSize) { 5196 // We need to check if all areas of this cache can be resized. 5197 for (VMArea* current = cache->areas; current != NULL; 5198 current = current->cache_next) { 5199 if (!current->address_space->CanResizeArea(current, newSize)) 5200 return B_ERROR; 5201 anyKernelArea 5202 |= current->address_space == VMAddressSpace::Kernel(); 5203 } 5204 } else { 5205 // We're shrinking the areas, so we must make sure the affected 5206 // ranges are not wired. 5207 for (VMArea* current = cache->areas; current != NULL; 5208 current = current->cache_next) { 5209 anyKernelArea 5210 |= current->address_space == VMAddressSpace::Kernel(); 5211 5212 if (wait_if_area_range_is_wired(current, 5213 current->Base() + newSize, oldSize - newSize, &locker, 5214 &cacheLocker)) { 5215 restart = true; 5216 break; 5217 } 5218 } 5219 } 5220 } while (restart); 5221 5222 // Okay, looks good so far, so let's do it 5223 5224 int priority = kernel && anyKernelArea 5225 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5226 uint32 allocationFlags = kernel && anyKernelArea 5227 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5228 5229 if (oldSize < newSize) { 5230 // Growing the cache can fail, so we do it first. 5231 status = cache->Resize(cache->virtual_base + newSize, priority); 5232 if (status != B_OK) 5233 return status; 5234 } 5235 5236 for (VMArea* current = cache->areas; current != NULL; 5237 current = current->cache_next) { 5238 status = current->address_space->ResizeArea(current, newSize, 5239 allocationFlags); 5240 if (status != B_OK) 5241 break; 5242 5243 // We also need to unmap all pages beyond the new size, if the area has 5244 // shrunk 5245 if (newSize < oldSize) { 5246 VMCacheChainLocker cacheChainLocker(cache); 5247 cacheChainLocker.LockAllSourceCaches(); 5248 5249 unmap_pages(current, current->Base() + newSize, 5250 oldSize - newSize); 5251 5252 cacheChainLocker.Unlock(cache); 5253 } 5254 } 5255 5256 if (status == B_OK) { 5257 // Shrink or grow individual page protections if in use. 5258 if (area->page_protections != NULL) { 5259 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5260 uint8* newProtections 5261 = (uint8*)realloc(area->page_protections, bytes); 5262 if (newProtections == NULL) 5263 status = B_NO_MEMORY; 5264 else { 5265 area->page_protections = newProtections; 5266 5267 if (oldSize < newSize) { 5268 // init the additional page protections to that of the area 5269 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5270 uint32 areaProtection = area->protection 5271 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5272 memset(area->page_protections + offset, 5273 areaProtection | (areaProtection << 4), bytes - offset); 5274 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5275 uint8& entry = area->page_protections[offset - 1]; 5276 entry = (entry & 0x0f) | (areaProtection << 4); 5277 } 5278 } 5279 } 5280 } 5281 } 5282 5283 // shrinking the cache can't fail, so we do it now 5284 if (status == B_OK && newSize < oldSize) 5285 status = cache->Resize(cache->virtual_base + newSize, priority); 5286 5287 if (status != B_OK) { 5288 // Something failed -- resize the areas back to their original size. 5289 // This can fail, too, in which case we're seriously screwed. 5290 for (VMArea* current = cache->areas; current != NULL; 5291 current = current->cache_next) { 5292 if (current->address_space->ResizeArea(current, oldSize, 5293 allocationFlags) != B_OK) { 5294 panic("vm_resize_area(): Failed and not being able to restore " 5295 "original state."); 5296 } 5297 } 5298 5299 cache->Resize(cache->virtual_base + oldSize, priority); 5300 } 5301 5302 // TODO: we must honour the lock restrictions of this area 5303 return status; 5304 } 5305 5306 5307 status_t 5308 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5309 { 5310 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5311 } 5312 5313 5314 status_t 5315 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5316 { 5317 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5318 } 5319 5320 5321 status_t 5322 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5323 bool user) 5324 { 5325 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5326 } 5327 5328 5329 void 5330 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5331 { 5332 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5333 } 5334 5335 5336 /*! Copies a range of memory directly from/to a page that might not be mapped 5337 at the moment. 5338 5339 For \a unsafeMemory the current mapping (if any is ignored). The function 5340 walks through the respective area's cache chain to find the physical page 5341 and copies from/to it directly. 5342 The memory range starting at \a unsafeMemory with a length of \a size bytes 5343 must not cross a page boundary. 5344 5345 \param teamID The team ID identifying the address space \a unsafeMemory is 5346 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5347 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5348 is passed, the address space of the thread returned by 5349 debug_get_debugged_thread() is used. 5350 \param unsafeMemory The start of the unsafe memory range to be copied 5351 from/to. 5352 \param buffer A safely accessible kernel buffer to be copied from/to. 5353 \param size The number of bytes to be copied. 5354 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5355 \a unsafeMemory, the other way around otherwise. 5356 */ 5357 status_t 5358 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5359 size_t size, bool copyToUnsafe) 5360 { 5361 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5362 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5363 return B_BAD_VALUE; 5364 } 5365 5366 // get the address space for the debugged thread 5367 VMAddressSpace* addressSpace; 5368 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5369 addressSpace = VMAddressSpace::Kernel(); 5370 } else if (teamID == B_CURRENT_TEAM) { 5371 Thread* thread = debug_get_debugged_thread(); 5372 if (thread == NULL || thread->team == NULL) 5373 return B_BAD_ADDRESS; 5374 5375 addressSpace = thread->team->address_space; 5376 } else 5377 addressSpace = VMAddressSpace::DebugGet(teamID); 5378 5379 if (addressSpace == NULL) 5380 return B_BAD_ADDRESS; 5381 5382 // get the area 5383 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5384 if (area == NULL) 5385 return B_BAD_ADDRESS; 5386 5387 // search the page 5388 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5389 + area->cache_offset; 5390 VMCache* cache = area->cache; 5391 vm_page* page = NULL; 5392 while (cache != NULL) { 5393 page = cache->DebugLookupPage(cacheOffset); 5394 if (page != NULL) 5395 break; 5396 5397 // Page not found in this cache -- if it is paged out, we must not try 5398 // to get it from lower caches. 5399 if (cache->DebugHasPage(cacheOffset)) 5400 break; 5401 5402 cache = cache->source; 5403 } 5404 5405 if (page == NULL) 5406 return B_UNSUPPORTED; 5407 5408 // copy from/to physical memory 5409 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5410 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5411 5412 if (copyToUnsafe) { 5413 if (page->Cache() != area->cache) 5414 return B_UNSUPPORTED; 5415 5416 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5417 } 5418 5419 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5420 } 5421 5422 5423 static inline bool 5424 validate_user_range(const void* addr, size_t size) 5425 { 5426 addr_t address = (addr_t)addr; 5427 5428 // Check for overflows on all addresses. 5429 if ((address + size) < address) 5430 return false; 5431 5432 // Validate that the address does not cross the kernel/user boundary. 5433 if (IS_USER_ADDRESS(address)) 5434 return IS_USER_ADDRESS(address + size); 5435 else 5436 return !IS_USER_ADDRESS(address + size); 5437 } 5438 5439 5440 // #pragma mark - kernel public API 5441 5442 5443 status_t 5444 user_memcpy(void* to, const void* from, size_t size) 5445 { 5446 if (!validate_user_range(to, size) || !validate_user_range(from, size)) 5447 return B_BAD_ADDRESS; 5448 5449 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5450 return B_BAD_ADDRESS; 5451 5452 return B_OK; 5453 } 5454 5455 5456 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5457 the string in \a to, NULL-terminating the result. 5458 5459 \param to Pointer to the destination C-string. 5460 \param from Pointer to the source C-string. 5461 \param size Size in bytes of the string buffer pointed to by \a to. 5462 5463 \return strlen(\a from). 5464 */ 5465 ssize_t 5466 user_strlcpy(char* to, const char* from, size_t size) 5467 { 5468 if (to == NULL && size != 0) 5469 return B_BAD_VALUE; 5470 if (from == NULL) 5471 return B_BAD_ADDRESS; 5472 5473 // Protect the source address from overflows. 5474 size_t maxSize = size; 5475 if ((addr_t)from + maxSize < (addr_t)from) 5476 maxSize -= (addr_t)from + maxSize; 5477 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5478 maxSize = USER_TOP - (addr_t)from; 5479 5480 if (!validate_user_range(to, maxSize)) 5481 return B_BAD_ADDRESS; 5482 5483 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5484 if (result < 0) 5485 return result; 5486 5487 // If we hit the address overflow boundary, fail. 5488 if ((size_t)result >= maxSize && maxSize < size) 5489 return B_BAD_ADDRESS; 5490 5491 return result; 5492 } 5493 5494 5495 status_t 5496 user_memset(void* s, char c, size_t count) 5497 { 5498 if (!validate_user_range(s, count)) 5499 return B_BAD_ADDRESS; 5500 5501 if (arch_cpu_user_memset(s, c, count) < B_OK) 5502 return B_BAD_ADDRESS; 5503 5504 return B_OK; 5505 } 5506 5507 5508 /*! Wires a single page at the given address. 5509 5510 \param team The team whose address space the address belongs to. Supports 5511 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5512 parameter is ignored. 5513 \param address address The virtual address to wire down. Does not need to 5514 be page aligned. 5515 \param writable If \c true the page shall be writable. 5516 \param info On success the info is filled in, among other things 5517 containing the physical address the given virtual one translates to. 5518 \return \c B_OK, when the page could be wired, another error code otherwise. 5519 */ 5520 status_t 5521 vm_wire_page(team_id team, addr_t address, bool writable, 5522 VMPageWiringInfo* info) 5523 { 5524 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5525 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5526 5527 // compute the page protection that is required 5528 bool isUser = IS_USER_ADDRESS(address); 5529 uint32 requiredProtection = PAGE_PRESENT 5530 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5531 if (writable) 5532 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5533 5534 // get and read lock the address space 5535 VMAddressSpace* addressSpace = NULL; 5536 if (isUser) { 5537 if (team == B_CURRENT_TEAM) 5538 addressSpace = VMAddressSpace::GetCurrent(); 5539 else 5540 addressSpace = VMAddressSpace::Get(team); 5541 } else 5542 addressSpace = VMAddressSpace::GetKernel(); 5543 if (addressSpace == NULL) 5544 return B_ERROR; 5545 5546 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5547 5548 VMTranslationMap* map = addressSpace->TranslationMap(); 5549 status_t error = B_OK; 5550 5551 // get the area 5552 VMArea* area = addressSpace->LookupArea(pageAddress); 5553 if (area == NULL) { 5554 addressSpace->Put(); 5555 return B_BAD_ADDRESS; 5556 } 5557 5558 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5559 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5560 5561 // mark the area range wired 5562 area->Wire(&info->range); 5563 5564 // Lock the area's cache chain and the translation map. Needed to look 5565 // up the page and play with its wired count. 5566 cacheChainLocker.LockAllSourceCaches(); 5567 map->Lock(); 5568 5569 phys_addr_t physicalAddress; 5570 uint32 flags; 5571 vm_page* page; 5572 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5573 && (flags & requiredProtection) == requiredProtection 5574 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5575 != NULL) { 5576 // Already mapped with the correct permissions -- just increment 5577 // the page's wired count. 5578 increment_page_wired_count(page); 5579 5580 map->Unlock(); 5581 cacheChainLocker.Unlock(); 5582 addressSpaceLocker.Unlock(); 5583 } else { 5584 // Let vm_soft_fault() map the page for us, if possible. We need 5585 // to fully unlock to avoid deadlocks. Since we have already 5586 // wired the area itself, nothing disturbing will happen with it 5587 // in the meantime. 5588 map->Unlock(); 5589 cacheChainLocker.Unlock(); 5590 addressSpaceLocker.Unlock(); 5591 5592 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5593 isUser, &page); 5594 5595 if (error != B_OK) { 5596 // The page could not be mapped -- clean up. 5597 VMCache* cache = vm_area_get_locked_cache(area); 5598 area->Unwire(&info->range); 5599 cache->ReleaseRefAndUnlock(); 5600 addressSpace->Put(); 5601 return error; 5602 } 5603 } 5604 5605 info->physicalAddress 5606 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5607 + address % B_PAGE_SIZE; 5608 info->page = page; 5609 5610 return B_OK; 5611 } 5612 5613 5614 /*! Unwires a single page previously wired via vm_wire_page(). 5615 5616 \param info The same object passed to vm_wire_page() before. 5617 */ 5618 void 5619 vm_unwire_page(VMPageWiringInfo* info) 5620 { 5621 // lock the address space 5622 VMArea* area = info->range.area; 5623 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5624 // takes over our reference 5625 5626 // lock the top cache 5627 VMCache* cache = vm_area_get_locked_cache(area); 5628 VMCacheChainLocker cacheChainLocker(cache); 5629 5630 if (info->page->Cache() != cache) { 5631 // The page is not in the top cache, so we lock the whole cache chain 5632 // before touching the page's wired count. 5633 cacheChainLocker.LockAllSourceCaches(); 5634 } 5635 5636 decrement_page_wired_count(info->page); 5637 5638 // remove the wired range from the range 5639 area->Unwire(&info->range); 5640 5641 cacheChainLocker.Unlock(); 5642 } 5643 5644 5645 /*! Wires down the given address range in the specified team's address space. 5646 5647 If successful the function 5648 - acquires a reference to the specified team's address space, 5649 - adds respective wired ranges to all areas that intersect with the given 5650 address range, 5651 - makes sure all pages in the given address range are mapped with the 5652 requested access permissions and increments their wired count. 5653 5654 It fails, when \a team doesn't specify a valid address space, when any part 5655 of the specified address range is not covered by areas, when the concerned 5656 areas don't allow mapping with the requested permissions, or when mapping 5657 failed for another reason. 5658 5659 When successful the call must be balanced by a unlock_memory_etc() call with 5660 the exact same parameters. 5661 5662 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5663 supported. 5664 \param address The start of the address range to be wired. 5665 \param numBytes The size of the address range to be wired. 5666 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5667 requests that the range must be wired writable ("read from device 5668 into memory"). 5669 \return \c B_OK on success, another error code otherwise. 5670 */ 5671 status_t 5672 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5673 { 5674 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5675 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5676 5677 // compute the page protection that is required 5678 bool isUser = IS_USER_ADDRESS(address); 5679 bool writable = (flags & B_READ_DEVICE) == 0; 5680 uint32 requiredProtection = PAGE_PRESENT 5681 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5682 if (writable) 5683 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5684 5685 uint32 mallocFlags = isUser 5686 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5687 5688 // get and read lock the address space 5689 VMAddressSpace* addressSpace = NULL; 5690 if (isUser) { 5691 if (team == B_CURRENT_TEAM) 5692 addressSpace = VMAddressSpace::GetCurrent(); 5693 else 5694 addressSpace = VMAddressSpace::Get(team); 5695 } else 5696 addressSpace = VMAddressSpace::GetKernel(); 5697 if (addressSpace == NULL) 5698 return B_ERROR; 5699 5700 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5701 // We get a new address space reference here. The one we got above will 5702 // be freed by unlock_memory_etc(). 5703 5704 VMTranslationMap* map = addressSpace->TranslationMap(); 5705 status_t error = B_OK; 5706 5707 // iterate through all concerned areas 5708 addr_t nextAddress = lockBaseAddress; 5709 while (nextAddress != lockEndAddress) { 5710 // get the next area 5711 VMArea* area = addressSpace->LookupArea(nextAddress); 5712 if (area == NULL) { 5713 error = B_BAD_ADDRESS; 5714 break; 5715 } 5716 5717 addr_t areaStart = nextAddress; 5718 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5719 5720 // allocate the wired range (do that before locking the cache to avoid 5721 // deadlocks) 5722 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5723 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5724 if (range == NULL) { 5725 error = B_NO_MEMORY; 5726 break; 5727 } 5728 5729 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5730 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5731 5732 // mark the area range wired 5733 area->Wire(range); 5734 5735 // Depending on the area cache type and the wiring, we may not need to 5736 // look at the individual pages. 5737 if (area->cache_type == CACHE_TYPE_NULL 5738 || area->cache_type == CACHE_TYPE_DEVICE 5739 || area->wiring == B_FULL_LOCK 5740 || area->wiring == B_CONTIGUOUS) { 5741 nextAddress = areaEnd; 5742 continue; 5743 } 5744 5745 // Lock the area's cache chain and the translation map. Needed to look 5746 // up pages and play with their wired count. 5747 cacheChainLocker.LockAllSourceCaches(); 5748 map->Lock(); 5749 5750 // iterate through the pages and wire them 5751 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5752 phys_addr_t physicalAddress; 5753 uint32 flags; 5754 5755 vm_page* page; 5756 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5757 && (flags & requiredProtection) == requiredProtection 5758 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5759 != NULL) { 5760 // Already mapped with the correct permissions -- just increment 5761 // the page's wired count. 5762 increment_page_wired_count(page); 5763 } else { 5764 // Let vm_soft_fault() map the page for us, if possible. We need 5765 // to fully unlock to avoid deadlocks. Since we have already 5766 // wired the area itself, nothing disturbing will happen with it 5767 // in the meantime. 5768 map->Unlock(); 5769 cacheChainLocker.Unlock(); 5770 addressSpaceLocker.Unlock(); 5771 5772 error = vm_soft_fault(addressSpace, nextAddress, writable, 5773 false, isUser, &page); 5774 5775 addressSpaceLocker.Lock(); 5776 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5777 cacheChainLocker.LockAllSourceCaches(); 5778 map->Lock(); 5779 } 5780 5781 if (error != B_OK) 5782 break; 5783 } 5784 5785 map->Unlock(); 5786 5787 if (error == B_OK) { 5788 cacheChainLocker.Unlock(); 5789 } else { 5790 // An error occurred, so abort right here. If the current address 5791 // is the first in this area, unwire the area, since we won't get 5792 // to it when reverting what we've done so far. 5793 if (nextAddress == areaStart) { 5794 area->Unwire(range); 5795 cacheChainLocker.Unlock(); 5796 range->~VMAreaWiredRange(); 5797 free_etc(range, mallocFlags); 5798 } else 5799 cacheChainLocker.Unlock(); 5800 5801 break; 5802 } 5803 } 5804 5805 if (error != B_OK) { 5806 // An error occurred, so unwire all that we've already wired. Note that 5807 // even if not a single page was wired, unlock_memory_etc() is called 5808 // to put the address space reference. 5809 addressSpaceLocker.Unlock(); 5810 unlock_memory_etc(team, (void*)lockBaseAddress, 5811 nextAddress - lockBaseAddress, flags); 5812 } 5813 5814 return error; 5815 } 5816 5817 5818 status_t 5819 lock_memory(void* address, size_t numBytes, uint32 flags) 5820 { 5821 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5822 } 5823 5824 5825 /*! Unwires an address range previously wired with lock_memory_etc(). 5826 5827 Note that a call to this function must balance a previous lock_memory_etc() 5828 call with exactly the same parameters. 5829 */ 5830 status_t 5831 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5832 { 5833 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5834 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5835 5836 // compute the page protection that is required 5837 bool isUser = IS_USER_ADDRESS(address); 5838 bool writable = (flags & B_READ_DEVICE) == 0; 5839 uint32 requiredProtection = PAGE_PRESENT 5840 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5841 if (writable) 5842 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5843 5844 uint32 mallocFlags = isUser 5845 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5846 5847 // get and read lock the address space 5848 VMAddressSpace* addressSpace = NULL; 5849 if (isUser) { 5850 if (team == B_CURRENT_TEAM) 5851 addressSpace = VMAddressSpace::GetCurrent(); 5852 else 5853 addressSpace = VMAddressSpace::Get(team); 5854 } else 5855 addressSpace = VMAddressSpace::GetKernel(); 5856 if (addressSpace == NULL) 5857 return B_ERROR; 5858 5859 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5860 // Take over the address space reference. We don't unlock until we're 5861 // done. 5862 5863 VMTranslationMap* map = addressSpace->TranslationMap(); 5864 status_t error = B_OK; 5865 5866 // iterate through all concerned areas 5867 addr_t nextAddress = lockBaseAddress; 5868 while (nextAddress != lockEndAddress) { 5869 // get the next area 5870 VMArea* area = addressSpace->LookupArea(nextAddress); 5871 if (area == NULL) { 5872 error = B_BAD_ADDRESS; 5873 break; 5874 } 5875 5876 addr_t areaStart = nextAddress; 5877 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5878 5879 // Lock the area's top cache. This is a requirement for 5880 // VMArea::Unwire(). 5881 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5882 5883 // Depending on the area cache type and the wiring, we may not need to 5884 // look at the individual pages. 5885 if (area->cache_type == CACHE_TYPE_NULL 5886 || area->cache_type == CACHE_TYPE_DEVICE 5887 || area->wiring == B_FULL_LOCK 5888 || area->wiring == B_CONTIGUOUS) { 5889 // unwire the range (to avoid deadlocks we delete the range after 5890 // unlocking the cache) 5891 nextAddress = areaEnd; 5892 VMAreaWiredRange* range = area->Unwire(areaStart, 5893 areaEnd - areaStart, writable); 5894 cacheChainLocker.Unlock(); 5895 if (range != NULL) { 5896 range->~VMAreaWiredRange(); 5897 free_etc(range, mallocFlags); 5898 } 5899 continue; 5900 } 5901 5902 // Lock the area's cache chain and the translation map. Needed to look 5903 // up pages and play with their wired count. 5904 cacheChainLocker.LockAllSourceCaches(); 5905 map->Lock(); 5906 5907 // iterate through the pages and unwire them 5908 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5909 phys_addr_t physicalAddress; 5910 uint32 flags; 5911 5912 vm_page* page; 5913 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5914 && (flags & PAGE_PRESENT) != 0 5915 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5916 != NULL) { 5917 // Already mapped with the correct permissions -- just increment 5918 // the page's wired count. 5919 decrement_page_wired_count(page); 5920 } else { 5921 panic("unlock_memory_etc(): Failed to unwire page: address " 5922 "space %p, address: %#" B_PRIxADDR, addressSpace, 5923 nextAddress); 5924 error = B_BAD_VALUE; 5925 break; 5926 } 5927 } 5928 5929 map->Unlock(); 5930 5931 // All pages are unwired. Remove the area's wired range as well (to 5932 // avoid deadlocks we delete the range after unlocking the cache). 5933 VMAreaWiredRange* range = area->Unwire(areaStart, 5934 areaEnd - areaStart, writable); 5935 5936 cacheChainLocker.Unlock(); 5937 5938 if (range != NULL) { 5939 range->~VMAreaWiredRange(); 5940 free_etc(range, mallocFlags); 5941 } 5942 5943 if (error != B_OK) 5944 break; 5945 } 5946 5947 // get rid of the address space reference lock_memory_etc() acquired 5948 addressSpace->Put(); 5949 5950 return error; 5951 } 5952 5953 5954 status_t 5955 unlock_memory(void* address, size_t numBytes, uint32 flags) 5956 { 5957 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5958 } 5959 5960 5961 /*! Similar to get_memory_map(), but also allows to specify the address space 5962 for the memory in question and has a saner semantics. 5963 Returns \c B_OK when the complete range could be translated or 5964 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5965 case the actual number of entries is written to \c *_numEntries. Any other 5966 error case indicates complete failure; \c *_numEntries will be set to \c 0 5967 in this case. 5968 */ 5969 status_t 5970 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5971 physical_entry* table, uint32* _numEntries) 5972 { 5973 uint32 numEntries = *_numEntries; 5974 *_numEntries = 0; 5975 5976 VMAddressSpace* addressSpace; 5977 addr_t virtualAddress = (addr_t)address; 5978 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5979 phys_addr_t physicalAddress; 5980 status_t status = B_OK; 5981 int32 index = -1; 5982 addr_t offset = 0; 5983 bool interrupts = are_interrupts_enabled(); 5984 5985 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5986 "entries)\n", team, address, numBytes, numEntries)); 5987 5988 if (numEntries == 0 || numBytes == 0) 5989 return B_BAD_VALUE; 5990 5991 // in which address space is the address to be found? 5992 if (IS_USER_ADDRESS(virtualAddress)) { 5993 if (team == B_CURRENT_TEAM) 5994 addressSpace = VMAddressSpace::GetCurrent(); 5995 else 5996 addressSpace = VMAddressSpace::Get(team); 5997 } else 5998 addressSpace = VMAddressSpace::GetKernel(); 5999 6000 if (addressSpace == NULL) 6001 return B_ERROR; 6002 6003 VMTranslationMap* map = addressSpace->TranslationMap(); 6004 6005 if (interrupts) 6006 map->Lock(); 6007 6008 while (offset < numBytes) { 6009 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6010 uint32 flags; 6011 6012 if (interrupts) { 6013 status = map->Query((addr_t)address + offset, &physicalAddress, 6014 &flags); 6015 } else { 6016 status = map->QueryInterrupt((addr_t)address + offset, 6017 &physicalAddress, &flags); 6018 } 6019 if (status < B_OK) 6020 break; 6021 if ((flags & PAGE_PRESENT) == 0) { 6022 panic("get_memory_map() called on unmapped memory!"); 6023 return B_BAD_ADDRESS; 6024 } 6025 6026 if (index < 0 && pageOffset > 0) { 6027 physicalAddress += pageOffset; 6028 if (bytes > B_PAGE_SIZE - pageOffset) 6029 bytes = B_PAGE_SIZE - pageOffset; 6030 } 6031 6032 // need to switch to the next physical_entry? 6033 if (index < 0 || table[index].address 6034 != physicalAddress - table[index].size) { 6035 if ((uint32)++index + 1 > numEntries) { 6036 // table to small 6037 break; 6038 } 6039 table[index].address = physicalAddress; 6040 table[index].size = bytes; 6041 } else { 6042 // page does fit in current entry 6043 table[index].size += bytes; 6044 } 6045 6046 offset += bytes; 6047 } 6048 6049 if (interrupts) 6050 map->Unlock(); 6051 6052 if (status != B_OK) 6053 return status; 6054 6055 if ((uint32)index + 1 > numEntries) { 6056 *_numEntries = index; 6057 return B_BUFFER_OVERFLOW; 6058 } 6059 6060 *_numEntries = index + 1; 6061 return B_OK; 6062 } 6063 6064 6065 /*! According to the BeBook, this function should always succeed. 6066 This is no longer the case. 6067 */ 6068 extern "C" int32 6069 __get_memory_map_haiku(const void* address, size_t numBytes, 6070 physical_entry* table, int32 numEntries) 6071 { 6072 uint32 entriesRead = numEntries; 6073 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6074 table, &entriesRead); 6075 if (error != B_OK) 6076 return error; 6077 6078 // close the entry list 6079 6080 // if it's only one entry, we will silently accept the missing ending 6081 if (numEntries == 1) 6082 return B_OK; 6083 6084 if (entriesRead + 1 > (uint32)numEntries) 6085 return B_BUFFER_OVERFLOW; 6086 6087 table[entriesRead].address = 0; 6088 table[entriesRead].size = 0; 6089 6090 return B_OK; 6091 } 6092 6093 6094 area_id 6095 area_for(void* address) 6096 { 6097 return vm_area_for((addr_t)address, true); 6098 } 6099 6100 6101 area_id 6102 find_area(const char* name) 6103 { 6104 return VMAreaHash::Find(name); 6105 } 6106 6107 6108 status_t 6109 _get_area_info(area_id id, area_info* info, size_t size) 6110 { 6111 if (size != sizeof(area_info) || info == NULL) 6112 return B_BAD_VALUE; 6113 6114 AddressSpaceReadLocker locker; 6115 VMArea* area; 6116 status_t status = locker.SetFromArea(id, area); 6117 if (status != B_OK) 6118 return status; 6119 6120 fill_area_info(area, info, size); 6121 return B_OK; 6122 } 6123 6124 6125 status_t 6126 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6127 { 6128 addr_t nextBase = *(addr_t*)cookie; 6129 6130 // we're already through the list 6131 if (nextBase == (addr_t)-1) 6132 return B_ENTRY_NOT_FOUND; 6133 6134 if (team == B_CURRENT_TEAM) 6135 team = team_get_current_team_id(); 6136 6137 AddressSpaceReadLocker locker(team); 6138 if (!locker.IsLocked()) 6139 return B_BAD_TEAM_ID; 6140 6141 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6142 if (area == NULL) { 6143 nextBase = (addr_t)-1; 6144 return B_ENTRY_NOT_FOUND; 6145 } 6146 6147 fill_area_info(area, info, size); 6148 *cookie = (ssize_t)(area->Base() + 1); 6149 6150 return B_OK; 6151 } 6152 6153 6154 status_t 6155 set_area_protection(area_id area, uint32 newProtection) 6156 { 6157 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6158 newProtection, true); 6159 } 6160 6161 6162 status_t 6163 resize_area(area_id areaID, size_t newSize) 6164 { 6165 return vm_resize_area(areaID, newSize, true); 6166 } 6167 6168 6169 /*! Transfers the specified area to a new team. The caller must be the owner 6170 of the area. 6171 */ 6172 area_id 6173 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6174 bool kernel) 6175 { 6176 area_info info; 6177 status_t status = get_area_info(id, &info); 6178 if (status != B_OK) 6179 return status; 6180 6181 if (info.team != thread_get_current_thread()->team->id) 6182 return B_PERMISSION_DENIED; 6183 6184 // We need to mark the area cloneable so the following operations work. 6185 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6186 if (status != B_OK) 6187 return status; 6188 6189 area_id clonedArea = vm_clone_area(target, info.name, _address, 6190 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6191 if (clonedArea < 0) 6192 return clonedArea; 6193 6194 status = vm_delete_area(info.team, id, kernel); 6195 if (status != B_OK) { 6196 vm_delete_area(target, clonedArea, kernel); 6197 return status; 6198 } 6199 6200 // Now we can reset the protection to whatever it was before. 6201 set_area_protection(clonedArea, info.protection); 6202 6203 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6204 6205 return clonedArea; 6206 } 6207 6208 6209 extern "C" area_id 6210 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6211 size_t numBytes, uint32 addressSpec, uint32 protection, 6212 void** _virtualAddress) 6213 { 6214 if (!arch_vm_supports_protection(protection)) 6215 return B_NOT_SUPPORTED; 6216 6217 fix_protection(&protection); 6218 6219 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6220 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6221 false); 6222 } 6223 6224 6225 area_id 6226 clone_area(const char* name, void** _address, uint32 addressSpec, 6227 uint32 protection, area_id source) 6228 { 6229 if ((protection & B_KERNEL_PROTECTION) == 0) 6230 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6231 6232 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6233 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6234 } 6235 6236 6237 area_id 6238 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6239 uint32 protection, uint32 flags, uint32 guardSize, 6240 const virtual_address_restrictions* virtualAddressRestrictions, 6241 const physical_address_restrictions* physicalAddressRestrictions, 6242 void** _address) 6243 { 6244 fix_protection(&protection); 6245 6246 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6247 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6248 true, _address); 6249 } 6250 6251 6252 extern "C" area_id 6253 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6254 size_t size, uint32 lock, uint32 protection) 6255 { 6256 fix_protection(&protection); 6257 6258 virtual_address_restrictions virtualRestrictions = {}; 6259 virtualRestrictions.address = *_address; 6260 virtualRestrictions.address_specification = addressSpec; 6261 physical_address_restrictions physicalRestrictions = {}; 6262 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6263 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6264 true, _address); 6265 } 6266 6267 6268 status_t 6269 delete_area(area_id area) 6270 { 6271 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6272 } 6273 6274 6275 // #pragma mark - Userland syscalls 6276 6277 6278 status_t 6279 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6280 addr_t size) 6281 { 6282 // filter out some unavailable values (for userland) 6283 switch (addressSpec) { 6284 case B_ANY_KERNEL_ADDRESS: 6285 case B_ANY_KERNEL_BLOCK_ADDRESS: 6286 return B_BAD_VALUE; 6287 } 6288 6289 addr_t address; 6290 6291 if (!IS_USER_ADDRESS(userAddress) 6292 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6293 return B_BAD_ADDRESS; 6294 6295 status_t status = vm_reserve_address_range( 6296 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6297 RESERVED_AVOID_BASE); 6298 if (status != B_OK) 6299 return status; 6300 6301 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6302 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6303 (void*)address, size); 6304 return B_BAD_ADDRESS; 6305 } 6306 6307 return B_OK; 6308 } 6309 6310 6311 status_t 6312 _user_unreserve_address_range(addr_t address, addr_t size) 6313 { 6314 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6315 (void*)address, size); 6316 } 6317 6318 6319 area_id 6320 _user_area_for(void* address) 6321 { 6322 return vm_area_for((addr_t)address, false); 6323 } 6324 6325 6326 area_id 6327 _user_find_area(const char* userName) 6328 { 6329 char name[B_OS_NAME_LENGTH]; 6330 6331 if (!IS_USER_ADDRESS(userName) 6332 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6333 return B_BAD_ADDRESS; 6334 6335 return find_area(name); 6336 } 6337 6338 6339 status_t 6340 _user_get_area_info(area_id area, area_info* userInfo) 6341 { 6342 if (!IS_USER_ADDRESS(userInfo)) 6343 return B_BAD_ADDRESS; 6344 6345 area_info info; 6346 status_t status = get_area_info(area, &info); 6347 if (status < B_OK) 6348 return status; 6349 6350 // TODO: do we want to prevent userland from seeing kernel protections? 6351 //info.protection &= B_USER_PROTECTION; 6352 6353 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6354 return B_BAD_ADDRESS; 6355 6356 return status; 6357 } 6358 6359 6360 status_t 6361 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6362 { 6363 ssize_t cookie; 6364 6365 if (!IS_USER_ADDRESS(userCookie) 6366 || !IS_USER_ADDRESS(userInfo) 6367 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6368 return B_BAD_ADDRESS; 6369 6370 area_info info; 6371 status_t status = _get_next_area_info(team, &cookie, &info, 6372 sizeof(area_info)); 6373 if (status != B_OK) 6374 return status; 6375 6376 //info.protection &= B_USER_PROTECTION; 6377 6378 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6379 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6380 return B_BAD_ADDRESS; 6381 6382 return status; 6383 } 6384 6385 6386 status_t 6387 _user_set_area_protection(area_id area, uint32 newProtection) 6388 { 6389 if ((newProtection & ~B_USER_PROTECTION) != 0) 6390 return B_BAD_VALUE; 6391 6392 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6393 newProtection, false); 6394 } 6395 6396 6397 status_t 6398 _user_resize_area(area_id area, size_t newSize) 6399 { 6400 // TODO: Since we restrict deleting of areas to those owned by the team, 6401 // we should also do that for resizing (check other functions, too). 6402 return vm_resize_area(area, newSize, false); 6403 } 6404 6405 6406 area_id 6407 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6408 team_id target) 6409 { 6410 // filter out some unavailable values (for userland) 6411 switch (addressSpec) { 6412 case B_ANY_KERNEL_ADDRESS: 6413 case B_ANY_KERNEL_BLOCK_ADDRESS: 6414 return B_BAD_VALUE; 6415 } 6416 6417 void* address; 6418 if (!IS_USER_ADDRESS(userAddress) 6419 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6420 return B_BAD_ADDRESS; 6421 6422 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6423 if (newArea < B_OK) 6424 return newArea; 6425 6426 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6427 return B_BAD_ADDRESS; 6428 6429 return newArea; 6430 } 6431 6432 6433 area_id 6434 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6435 uint32 protection, area_id sourceArea) 6436 { 6437 char name[B_OS_NAME_LENGTH]; 6438 void* address; 6439 6440 // filter out some unavailable values (for userland) 6441 switch (addressSpec) { 6442 case B_ANY_KERNEL_ADDRESS: 6443 case B_ANY_KERNEL_BLOCK_ADDRESS: 6444 return B_BAD_VALUE; 6445 } 6446 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6447 return B_BAD_VALUE; 6448 6449 if (!IS_USER_ADDRESS(userName) 6450 || !IS_USER_ADDRESS(userAddress) 6451 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6452 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6453 return B_BAD_ADDRESS; 6454 6455 fix_protection(&protection); 6456 6457 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6458 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6459 false); 6460 if (clonedArea < B_OK) 6461 return clonedArea; 6462 6463 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6464 delete_area(clonedArea); 6465 return B_BAD_ADDRESS; 6466 } 6467 6468 return clonedArea; 6469 } 6470 6471 6472 area_id 6473 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6474 size_t size, uint32 lock, uint32 protection) 6475 { 6476 char name[B_OS_NAME_LENGTH]; 6477 void* address; 6478 6479 // filter out some unavailable values (for userland) 6480 switch (addressSpec) { 6481 case B_ANY_KERNEL_ADDRESS: 6482 case B_ANY_KERNEL_BLOCK_ADDRESS: 6483 return B_BAD_VALUE; 6484 } 6485 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6486 return B_BAD_VALUE; 6487 6488 if (!IS_USER_ADDRESS(userName) 6489 || !IS_USER_ADDRESS(userAddress) 6490 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6491 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6492 return B_BAD_ADDRESS; 6493 6494 if (addressSpec == B_EXACT_ADDRESS 6495 && IS_KERNEL_ADDRESS(address)) 6496 return B_BAD_VALUE; 6497 6498 if (addressSpec == B_ANY_ADDRESS) 6499 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6500 if (addressSpec == B_BASE_ADDRESS) 6501 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6502 6503 fix_protection(&protection); 6504 6505 virtual_address_restrictions virtualRestrictions = {}; 6506 virtualRestrictions.address = address; 6507 virtualRestrictions.address_specification = addressSpec; 6508 physical_address_restrictions physicalRestrictions = {}; 6509 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6510 size, lock, protection, 0, 0, &virtualRestrictions, 6511 &physicalRestrictions, false, &address); 6512 6513 if (area >= B_OK 6514 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6515 delete_area(area); 6516 return B_BAD_ADDRESS; 6517 } 6518 6519 return area; 6520 } 6521 6522 6523 status_t 6524 _user_delete_area(area_id area) 6525 { 6526 // Unlike the BeOS implementation, you can now only delete areas 6527 // that you have created yourself from userland. 6528 // The documentation to delete_area() explicitly states that this 6529 // will be restricted in the future, and so it will. 6530 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6531 } 6532 6533 6534 // TODO: create a BeOS style call for this! 6535 6536 area_id 6537 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6538 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6539 int fd, off_t offset) 6540 { 6541 char name[B_OS_NAME_LENGTH]; 6542 void* address; 6543 area_id area; 6544 6545 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6546 return B_BAD_VALUE; 6547 6548 fix_protection(&protection); 6549 6550 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6551 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6552 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6553 return B_BAD_ADDRESS; 6554 6555 if (addressSpec == B_EXACT_ADDRESS) { 6556 if ((addr_t)address + size < (addr_t)address 6557 || (addr_t)address % B_PAGE_SIZE != 0) { 6558 return B_BAD_VALUE; 6559 } 6560 if (!IS_USER_ADDRESS(address) 6561 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6562 return B_BAD_ADDRESS; 6563 } 6564 } 6565 6566 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6567 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6568 false); 6569 if (area < B_OK) 6570 return area; 6571 6572 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6573 return B_BAD_ADDRESS; 6574 6575 return area; 6576 } 6577 6578 6579 status_t 6580 _user_unmap_memory(void* _address, size_t size) 6581 { 6582 addr_t address = (addr_t)_address; 6583 6584 // check params 6585 if (size == 0 || (addr_t)address + size < (addr_t)address 6586 || (addr_t)address % B_PAGE_SIZE != 0) { 6587 return B_BAD_VALUE; 6588 } 6589 6590 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6591 return B_BAD_ADDRESS; 6592 6593 // Write lock the address space and ensure the address range is not wired. 6594 AddressSpaceWriteLocker locker; 6595 do { 6596 status_t status = locker.SetTo(team_get_current_team_id()); 6597 if (status != B_OK) 6598 return status; 6599 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6600 size, &locker)); 6601 6602 // unmap 6603 return unmap_address_range(locker.AddressSpace(), address, size, false); 6604 } 6605 6606 6607 status_t 6608 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6609 { 6610 // check address range 6611 addr_t address = (addr_t)_address; 6612 size = PAGE_ALIGN(size); 6613 6614 if ((address % B_PAGE_SIZE) != 0) 6615 return B_BAD_VALUE; 6616 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6617 || !IS_USER_ADDRESS((addr_t)address + size)) { 6618 // weird error code required by POSIX 6619 return ENOMEM; 6620 } 6621 6622 // extend and check protection 6623 if ((protection & ~B_USER_PROTECTION) != 0) 6624 return B_BAD_VALUE; 6625 6626 fix_protection(&protection); 6627 6628 // We need to write lock the address space, since we're going to play with 6629 // the areas. Also make sure that none of the areas is wired and that we're 6630 // actually allowed to change the protection. 6631 AddressSpaceWriteLocker locker; 6632 6633 bool restart; 6634 do { 6635 restart = false; 6636 6637 status_t status = locker.SetTo(team_get_current_team_id()); 6638 if (status != B_OK) 6639 return status; 6640 6641 // First round: Check whether the whole range is covered by areas and we 6642 // are allowed to modify them. 6643 addr_t currentAddress = address; 6644 size_t sizeLeft = size; 6645 while (sizeLeft > 0) { 6646 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6647 if (area == NULL) 6648 return B_NO_MEMORY; 6649 6650 if ((area->protection & B_KERNEL_AREA) != 0) 6651 return B_NOT_ALLOWED; 6652 if (area->protection_max != 0 6653 && (protection & area->protection_max) != protection) { 6654 return B_NOT_ALLOWED; 6655 } 6656 6657 addr_t offset = currentAddress - area->Base(); 6658 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6659 6660 AreaCacheLocker cacheLocker(area); 6661 6662 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6663 &locker, &cacheLocker)) { 6664 restart = true; 6665 break; 6666 } 6667 6668 cacheLocker.Unlock(); 6669 6670 currentAddress += rangeSize; 6671 sizeLeft -= rangeSize; 6672 } 6673 } while (restart); 6674 6675 // Second round: If the protections differ from that of the area, create a 6676 // page protection array and re-map mapped pages. 6677 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6678 addr_t currentAddress = address; 6679 size_t sizeLeft = size; 6680 while (sizeLeft > 0) { 6681 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6682 if (area == NULL) 6683 return B_NO_MEMORY; 6684 6685 addr_t offset = currentAddress - area->Base(); 6686 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6687 6688 currentAddress += rangeSize; 6689 sizeLeft -= rangeSize; 6690 6691 if (area->page_protections == NULL) { 6692 if (area->protection == protection) 6693 continue; 6694 6695 status_t status = allocate_area_page_protections(area); 6696 if (status != B_OK) 6697 return status; 6698 } 6699 6700 // We need to lock the complete cache chain, since we potentially unmap 6701 // pages of lower caches. 6702 VMCache* topCache = vm_area_get_locked_cache(area); 6703 VMCacheChainLocker cacheChainLocker(topCache); 6704 cacheChainLocker.LockAllSourceCaches(); 6705 6706 for (addr_t pageAddress = area->Base() + offset; 6707 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6708 map->Lock(); 6709 6710 set_area_page_protection(area, pageAddress, protection); 6711 6712 phys_addr_t physicalAddress; 6713 uint32 flags; 6714 6715 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6716 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6717 map->Unlock(); 6718 continue; 6719 } 6720 6721 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6722 if (page == NULL) { 6723 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6724 "\n", area, physicalAddress); 6725 map->Unlock(); 6726 return B_ERROR; 6727 } 6728 6729 // If the page is not in the topmost cache and write access is 6730 // requested, we have to unmap it. Otherwise we can re-map it with 6731 // the new protection. 6732 bool unmapPage = page->Cache() != topCache 6733 && (protection & B_WRITE_AREA) != 0; 6734 6735 if (!unmapPage) 6736 map->ProtectPage(area, pageAddress, protection); 6737 6738 map->Unlock(); 6739 6740 if (unmapPage) { 6741 DEBUG_PAGE_ACCESS_START(page); 6742 unmap_page(area, pageAddress); 6743 DEBUG_PAGE_ACCESS_END(page); 6744 } 6745 } 6746 } 6747 6748 return B_OK; 6749 } 6750 6751 6752 status_t 6753 _user_sync_memory(void* _address, size_t size, uint32 flags) 6754 { 6755 addr_t address = (addr_t)_address; 6756 size = PAGE_ALIGN(size); 6757 6758 // check params 6759 if ((address % B_PAGE_SIZE) != 0) 6760 return B_BAD_VALUE; 6761 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6762 || !IS_USER_ADDRESS((addr_t)address + size)) { 6763 // weird error code required by POSIX 6764 return ENOMEM; 6765 } 6766 6767 bool writeSync = (flags & MS_SYNC) != 0; 6768 bool writeAsync = (flags & MS_ASYNC) != 0; 6769 if (writeSync && writeAsync) 6770 return B_BAD_VALUE; 6771 6772 if (size == 0 || (!writeSync && !writeAsync)) 6773 return B_OK; 6774 6775 // iterate through the range and sync all concerned areas 6776 while (size > 0) { 6777 // read lock the address space 6778 AddressSpaceReadLocker locker; 6779 status_t error = locker.SetTo(team_get_current_team_id()); 6780 if (error != B_OK) 6781 return error; 6782 6783 // get the first area 6784 VMArea* area = locker.AddressSpace()->LookupArea(address); 6785 if (area == NULL) 6786 return B_NO_MEMORY; 6787 6788 uint32 offset = address - area->Base(); 6789 size_t rangeSize = min_c(area->Size() - offset, size); 6790 offset += area->cache_offset; 6791 6792 // lock the cache 6793 AreaCacheLocker cacheLocker(area); 6794 if (!cacheLocker) 6795 return B_BAD_VALUE; 6796 VMCache* cache = area->cache; 6797 6798 locker.Unlock(); 6799 6800 uint32 firstPage = offset >> PAGE_SHIFT; 6801 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6802 6803 // write the pages 6804 if (cache->type == CACHE_TYPE_VNODE) { 6805 if (writeSync) { 6806 // synchronous 6807 error = vm_page_write_modified_page_range(cache, firstPage, 6808 endPage); 6809 if (error != B_OK) 6810 return error; 6811 } else { 6812 // asynchronous 6813 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6814 // TODO: This is probably not quite what is supposed to happen. 6815 // Especially when a lot has to be written, it might take ages 6816 // until it really hits the disk. 6817 } 6818 } 6819 6820 address += rangeSize; 6821 size -= rangeSize; 6822 } 6823 6824 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6825 // synchronize multiple mappings of the same file. In our VM they never get 6826 // out of sync, though, so we don't have to do anything. 6827 6828 return B_OK; 6829 } 6830 6831 6832 status_t 6833 _user_memory_advice(void* _address, size_t size, uint32 advice) 6834 { 6835 addr_t address = (addr_t)_address; 6836 if ((address % B_PAGE_SIZE) != 0) 6837 return B_BAD_VALUE; 6838 6839 size = PAGE_ALIGN(size); 6840 if (address + size < address || !IS_USER_ADDRESS(address) 6841 || !IS_USER_ADDRESS(address + size)) { 6842 // weird error code required by POSIX 6843 return B_NO_MEMORY; 6844 } 6845 6846 switch (advice) { 6847 case MADV_NORMAL: 6848 case MADV_SEQUENTIAL: 6849 case MADV_RANDOM: 6850 case MADV_WILLNEED: 6851 case MADV_DONTNEED: 6852 // TODO: Implement! 6853 break; 6854 6855 case MADV_FREE: 6856 { 6857 AddressSpaceWriteLocker locker; 6858 do { 6859 status_t status = locker.SetTo(team_get_current_team_id()); 6860 if (status != B_OK) 6861 return status; 6862 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6863 address, size, &locker)); 6864 6865 discard_address_range(locker.AddressSpace(), address, size, false); 6866 break; 6867 } 6868 6869 default: 6870 return B_BAD_VALUE; 6871 } 6872 6873 return B_OK; 6874 } 6875 6876 6877 status_t 6878 _user_get_memory_properties(team_id teamID, const void* address, 6879 uint32* _protected, uint32* _lock) 6880 { 6881 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6882 return B_BAD_ADDRESS; 6883 6884 AddressSpaceReadLocker locker; 6885 status_t error = locker.SetTo(teamID); 6886 if (error != B_OK) 6887 return error; 6888 6889 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6890 if (area == NULL) 6891 return B_NO_MEMORY; 6892 6893 6894 uint32 protection = area->protection; 6895 if (area->page_protections != NULL) 6896 protection = get_area_page_protection(area, (addr_t)address); 6897 6898 uint32 wiring = area->wiring; 6899 6900 locker.Unlock(); 6901 6902 error = user_memcpy(_protected, &protection, sizeof(protection)); 6903 if (error != B_OK) 6904 return error; 6905 6906 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6907 6908 return error; 6909 } 6910 6911 6912 // An ordered list of non-overlapping ranges to track mlock/munlock locking. 6913 // It is allowed to call mlock/munlock in unbalanced ways (lock a range 6914 // multiple times, unlock a part of it, lock several consecutive ranges and 6915 // unlock them in one go, etc). However the low level lock_memory and 6916 // unlock_memory calls require the locks/unlocks to be balanced (you lock a 6917 // fixed range, and then unlock exactly the same range). This list allows to 6918 // keep track of what was locked exactly so we can unlock the correct things. 6919 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> { 6920 addr_t start; 6921 addr_t end; 6922 6923 status_t LockMemory() 6924 { 6925 return lock_memory((void*)start, end - start, 0); 6926 } 6927 6928 status_t UnlockMemory() 6929 { 6930 return unlock_memory((void*)start, end - start, 0); 6931 } 6932 6933 status_t Move(addr_t start, addr_t end) 6934 { 6935 status_t result = lock_memory((void*)start, end - start, 0); 6936 if (result != B_OK) 6937 return result; 6938 6939 result = UnlockMemory(); 6940 6941 if (result != B_OK) { 6942 // What can we do if the unlock fails? 6943 panic("Failed to unlock memory: %s", strerror(result)); 6944 return result; 6945 } 6946 6947 this->start = start; 6948 this->end = end; 6949 6950 return B_OK; 6951 } 6952 }; 6953 6954 6955 status_t 6956 _user_mlock(const void* address, size_t size) { 6957 // Maybe there's nothing to do, in which case, do nothing 6958 if (size == 0) 6959 return B_OK; 6960 6961 // Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to 6962 // reject the call otherwise) 6963 if ((addr_t)address % B_PAGE_SIZE != 0) 6964 return EINVAL; 6965 6966 size = ROUNDUP(size, B_PAGE_SIZE); 6967 6968 addr_t endAddress = (addr_t)address + size; 6969 6970 // Pre-allocate a linked list element we may need (it's simpler to do it 6971 // now than run out of memory in the midle of changing things) 6972 LockedPages* newRange = new(std::nothrow) LockedPages(); 6973 if (newRange == NULL) 6974 return ENOMEM; 6975 6976 // Get and lock the team 6977 Team* team = thread_get_current_thread()->team; 6978 TeamLocker teamLocker(team); 6979 teamLocker.Lock(); 6980 6981 status_t error = B_OK; 6982 LockedPagesList* lockedPages = &team->locked_pages_list; 6983 6984 // Locate the first locked range possibly overlapping ours 6985 LockedPages* currentRange = lockedPages->Head(); 6986 while (currentRange != NULL && currentRange->end <= (addr_t)address) 6987 currentRange = lockedPages->GetNext(currentRange); 6988 6989 if (currentRange == NULL || currentRange->start >= endAddress) { 6990 // No existing range is overlapping with ours. We can just lock our 6991 // range and stop here. 6992 newRange->start = (addr_t)address; 6993 newRange->end = endAddress; 6994 error = newRange->LockMemory(); 6995 if (error != B_OK) { 6996 delete newRange; 6997 return error; 6998 } 6999 lockedPages->InsertBefore(currentRange, newRange); 7000 return B_OK; 7001 } 7002 7003 // We get here when there is at least one existing overlapping range. 7004 7005 if (currentRange->start <= (addr_t)address) { 7006 if (currentRange->end >= endAddress) { 7007 // An existing range is already fully covering the pages we need to 7008 // lock. Nothing to do then. 7009 delete newRange; 7010 return B_OK; 7011 } else { 7012 // An existing range covers the start of the area we want to lock. 7013 // Advance our start address to avoid it. 7014 address = (void*)currentRange->end; 7015 7016 // Move on to the next range for the next step 7017 currentRange = lockedPages->GetNext(currentRange); 7018 } 7019 } 7020 7021 // First, lock the new range 7022 newRange->start = (addr_t)address; 7023 newRange->end = endAddress; 7024 error = newRange->LockMemory(); 7025 if (error != B_OK) { 7026 delete newRange; 7027 return error; 7028 } 7029 7030 // Unlock all ranges fully overlapping with the area we need to lock 7031 while (currentRange != NULL && currentRange->end < endAddress) { 7032 // The existing range is fully contained inside the new one we're 7033 // trying to lock. Delete/unlock it, and replace it with a new one 7034 // (this limits fragmentation of the range list, and is simpler to 7035 // manage) 7036 error = currentRange->UnlockMemory(); 7037 if (error != B_OK) { 7038 panic("Failed to unlock a memory range: %s", strerror(error)); 7039 newRange->UnlockMemory(); 7040 delete newRange; 7041 return error; 7042 } 7043 LockedPages* temp = currentRange; 7044 currentRange = lockedPages->GetNext(currentRange); 7045 lockedPages->Remove(temp); 7046 delete temp; 7047 } 7048 7049 if (currentRange != NULL) { 7050 // One last range may cover the end of the area we're trying to lock 7051 7052 if (currentRange->start == (addr_t)address) { 7053 // In case two overlapping ranges (one at the start and the other 7054 // at the end) already cover the area we're after, there's nothing 7055 // more to do. So we destroy our new extra allocation 7056 error = newRange->UnlockMemory(); 7057 delete newRange; 7058 return error; 7059 } 7060 7061 if (currentRange->start < endAddress) { 7062 // Make sure the last range is not overlapping, by moving its start 7063 error = currentRange->Move(endAddress, currentRange->end); 7064 if (error != B_OK) { 7065 panic("Failed to move a memory range: %s", strerror(error)); 7066 newRange->UnlockMemory(); 7067 delete newRange; 7068 return error; 7069 } 7070 } 7071 } 7072 7073 // Finally, store the new range in the locked list 7074 lockedPages->InsertBefore(currentRange, newRange); 7075 return B_OK; 7076 } 7077 7078 7079 status_t 7080 _user_munlock(const void* address, size_t size) { 7081 // Maybe there's nothing to do, in which case, do nothing 7082 if (size == 0) 7083 return B_OK; 7084 7085 // Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to 7086 // reject the call otherwise) 7087 if ((addr_t)address % B_PAGE_SIZE != 0) 7088 return EINVAL; 7089 7090 // Round size up to the next page 7091 size = ROUNDUP(size, B_PAGE_SIZE); 7092 7093 addr_t endAddress = (addr_t)address + size; 7094 7095 // Get and lock the team 7096 Team* team = thread_get_current_thread()->team; 7097 TeamLocker teamLocker(team); 7098 teamLocker.Lock(); 7099 LockedPagesList* lockedPages = &team->locked_pages_list; 7100 7101 status_t error = B_OK; 7102 7103 // Locate the first locked range possibly overlapping ours 7104 LockedPages* currentRange = lockedPages->Head(); 7105 while (currentRange != NULL && currentRange->end <= (addr_t)address) 7106 currentRange = lockedPages->GetNext(currentRange); 7107 7108 if (currentRange == NULL || currentRange->start >= endAddress) { 7109 // No range is intersecting, nothing to unlock 7110 return B_OK; 7111 } 7112 7113 if (currentRange->start < (addr_t)address) { 7114 if (currentRange->end > endAddress) { 7115 // There is a range fully covering the area we want to unlock, 7116 // and it extends on both sides. We need to split it in two 7117 LockedPages* newRange = new(std::nothrow) LockedPages(); 7118 if (newRange == NULL) 7119 return ENOMEM; 7120 7121 newRange->start = endAddress; 7122 newRange->end = currentRange->end; 7123 7124 error = newRange->LockMemory(); 7125 if (error != B_OK) { 7126 delete newRange; 7127 return error; 7128 } 7129 7130 error = currentRange->Move(currentRange->start, (addr_t)address); 7131 if (error != B_OK) { 7132 delete newRange; 7133 return error; 7134 } 7135 7136 lockedPages->InsertAfter(currentRange, newRange); 7137 return B_OK; 7138 } else { 7139 // There is a range that overlaps and extends before the one we 7140 // want to unlock, we need to shrink it 7141 error = currentRange->Move(currentRange->start, (addr_t)address); 7142 if (error != B_OK) 7143 return error; 7144 } 7145 } 7146 7147 while (currentRange != NULL && currentRange->end <= endAddress) { 7148 // Unlock all fully overlapping ranges 7149 error = currentRange->UnlockMemory(); 7150 if (error != B_OK) 7151 return error; 7152 LockedPages* temp = currentRange; 7153 currentRange = lockedPages->GetNext(currentRange); 7154 lockedPages->Remove(temp); 7155 delete temp; 7156 } 7157 7158 // Finally split the last partially overlapping range if any 7159 if (currentRange != NULL && currentRange->start < endAddress) { 7160 error = currentRange->Move(endAddress, currentRange->end); 7161 if (error != B_OK) 7162 return error; 7163 } 7164 7165 return B_OK; 7166 } 7167 7168 7169 // #pragma mark -- compatibility 7170 7171 7172 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7173 7174 7175 struct physical_entry_beos { 7176 uint32 address; 7177 uint32 size; 7178 }; 7179 7180 7181 /*! The physical_entry structure has changed. We need to translate it to the 7182 old one. 7183 */ 7184 extern "C" int32 7185 __get_memory_map_beos(const void* _address, size_t numBytes, 7186 physical_entry_beos* table, int32 numEntries) 7187 { 7188 if (numEntries <= 0) 7189 return B_BAD_VALUE; 7190 7191 const uint8* address = (const uint8*)_address; 7192 7193 int32 count = 0; 7194 while (numBytes > 0 && count < numEntries) { 7195 physical_entry entry; 7196 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7197 if (result < 0) { 7198 if (result != B_BUFFER_OVERFLOW) 7199 return result; 7200 } 7201 7202 if (entry.address >= (phys_addr_t)1 << 32) { 7203 panic("get_memory_map(): Address is greater 4 GB!"); 7204 return B_ERROR; 7205 } 7206 7207 table[count].address = entry.address; 7208 table[count++].size = entry.size; 7209 7210 address += entry.size; 7211 numBytes -= entry.size; 7212 } 7213 7214 // null-terminate the table, if possible 7215 if (count < numEntries) { 7216 table[count].address = 0; 7217 table[count].size = 0; 7218 } 7219 7220 return B_OK; 7221 } 7222 7223 7224 /*! The type of the \a physicalAddress parameter has changed from void* to 7225 phys_addr_t. 7226 */ 7227 extern "C" area_id 7228 __map_physical_memory_beos(const char* name, void* physicalAddress, 7229 size_t numBytes, uint32 addressSpec, uint32 protection, 7230 void** _virtualAddress) 7231 { 7232 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7233 addressSpec, protection, _virtualAddress); 7234 } 7235 7236 7237 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7238 we meddle with the \a lock parameter to force 32 bit. 7239 */ 7240 extern "C" area_id 7241 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7242 size_t size, uint32 lock, uint32 protection) 7243 { 7244 switch (lock) { 7245 case B_NO_LOCK: 7246 break; 7247 case B_FULL_LOCK: 7248 case B_LAZY_LOCK: 7249 lock = B_32_BIT_FULL_LOCK; 7250 break; 7251 case B_CONTIGUOUS: 7252 lock = B_32_BIT_CONTIGUOUS; 7253 break; 7254 } 7255 7256 return __create_area_haiku(name, _address, addressSpec, size, lock, 7257 protection); 7258 } 7259 7260 7261 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7262 "BASE"); 7263 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7264 "map_physical_memory@", "BASE"); 7265 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7266 "BASE"); 7267 7268 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7269 "get_memory_map@@", "1_ALPHA3"); 7270 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7271 "map_physical_memory@@", "1_ALPHA3"); 7272 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7273 "1_ALPHA3"); 7274 7275 7276 #else 7277 7278 7279 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7280 "get_memory_map@@", "BASE"); 7281 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7282 "map_physical_memory@@", "BASE"); 7283 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7284 "BASE"); 7285 7286 7287 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7288