1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "VMAnonymousNoSwapCache.h" 59 #include "IORequest.h" 60 61 62 //#define TRACE_VM 63 //#define TRACE_FAULTS 64 #ifdef TRACE_VM 65 # define TRACE(x) dprintf x 66 #else 67 # define TRACE(x) ; 68 #endif 69 #ifdef TRACE_FAULTS 70 # define FTRACE(x) dprintf x 71 #else 72 # define FTRACE(x) ; 73 #endif 74 75 76 namespace { 77 78 class AreaCacheLocking { 79 public: 80 inline bool Lock(VMCache* lockable) 81 { 82 return false; 83 } 84 85 inline void Unlock(VMCache* lockable) 86 { 87 vm_area_put_locked_cache(lockable); 88 } 89 }; 90 91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 92 public: 93 inline AreaCacheLocker(VMCache* cache = NULL) 94 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 95 { 96 } 97 98 inline AreaCacheLocker(VMArea* area) 99 : AutoLocker<VMCache, AreaCacheLocking>() 100 { 101 SetTo(area); 102 } 103 104 inline void SetTo(VMCache* cache, bool alreadyLocked) 105 { 106 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 107 } 108 109 inline void SetTo(VMArea* area) 110 { 111 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 112 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 113 } 114 }; 115 116 117 class VMCacheChainLocker { 118 public: 119 VMCacheChainLocker() 120 : 121 fTopCache(NULL), 122 fBottomCache(NULL) 123 { 124 } 125 126 VMCacheChainLocker(VMCache* topCache) 127 : 128 fTopCache(topCache), 129 fBottomCache(topCache) 130 { 131 } 132 133 ~VMCacheChainLocker() 134 { 135 Unlock(); 136 } 137 138 void SetTo(VMCache* topCache) 139 { 140 fTopCache = topCache; 141 fBottomCache = topCache; 142 143 if (topCache != NULL) 144 topCache->SetUserData(NULL); 145 } 146 147 VMCache* LockSourceCache() 148 { 149 if (fBottomCache == NULL || fBottomCache->source == NULL) 150 return NULL; 151 152 VMCache* previousCache = fBottomCache; 153 154 fBottomCache = fBottomCache->source; 155 fBottomCache->Lock(); 156 fBottomCache->AcquireRefLocked(); 157 fBottomCache->SetUserData(previousCache); 158 159 return fBottomCache; 160 } 161 162 void LockAllSourceCaches() 163 { 164 while (LockSourceCache() != NULL) { 165 } 166 } 167 168 void Unlock(VMCache* exceptCache = NULL) 169 { 170 if (fTopCache == NULL) 171 return; 172 173 // Unlock caches in source -> consumer direction. This is important to 174 // avoid double-locking and a reversal of locking order in case a cache 175 // is eligable for merging. 176 VMCache* cache = fBottomCache; 177 while (cache != NULL) { 178 VMCache* nextCache = (VMCache*)cache->UserData(); 179 if (cache != exceptCache) 180 cache->ReleaseRefAndUnlock(cache != fTopCache); 181 182 if (cache == fTopCache) 183 break; 184 185 cache = nextCache; 186 } 187 188 fTopCache = NULL; 189 fBottomCache = NULL; 190 } 191 192 void UnlockKeepRefs(bool keepTopCacheLocked) 193 { 194 if (fTopCache == NULL) 195 return; 196 197 VMCache* nextCache = fBottomCache; 198 VMCache* cache = NULL; 199 200 while (keepTopCacheLocked 201 ? nextCache != fTopCache : cache != fTopCache) { 202 cache = nextCache; 203 nextCache = (VMCache*)cache->UserData(); 204 cache->Unlock(cache != fTopCache); 205 } 206 } 207 208 void RelockCaches(bool topCacheLocked) 209 { 210 if (fTopCache == NULL) 211 return; 212 213 VMCache* nextCache = fTopCache; 214 VMCache* cache = NULL; 215 if (topCacheLocked) { 216 cache = nextCache; 217 nextCache = cache->source; 218 } 219 220 while (cache != fBottomCache && nextCache != NULL) { 221 VMCache* consumer = cache; 222 cache = nextCache; 223 nextCache = cache->source; 224 cache->Lock(); 225 cache->SetUserData(consumer); 226 } 227 } 228 229 private: 230 VMCache* fTopCache; 231 VMCache* fBottomCache; 232 }; 233 234 } // namespace 235 236 237 // The memory reserve an allocation of the certain priority must not touch. 238 static const size_t kMemoryReserveForPriority[] = { 239 VM_MEMORY_RESERVE_USER, // user 240 VM_MEMORY_RESERVE_SYSTEM, // system 241 0 // VIP 242 }; 243 244 245 ObjectCache* gPageMappingsObjectCache; 246 247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 248 249 static off_t sAvailableMemory; 250 static off_t sNeededMemory; 251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 252 static uint32 sPageFaults; 253 254 static VMPhysicalPageMapper* sPhysicalPageMapper; 255 256 #if DEBUG_CACHE_LIST 257 258 struct cache_info { 259 VMCache* cache; 260 addr_t page_count; 261 addr_t committed; 262 }; 263 264 static const int kCacheInfoTableCount = 100 * 1024; 265 static cache_info* sCacheInfoTable; 266 267 #endif // DEBUG_CACHE_LIST 268 269 270 // function declarations 271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 272 bool addressSpaceCleanup); 273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 274 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 275 static status_t map_backing_store(VMAddressSpace* addressSpace, 276 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 277 int protection, int mapping, uint32 flags, 278 const virtual_address_restrictions* addressRestrictions, bool kernel, 279 VMArea** _area, void** _virtualAddress); 280 static void fix_protection(uint32* protection); 281 282 283 // #pragma mark - 284 285 286 #if VM_PAGE_FAULT_TRACING 287 288 namespace VMPageFaultTracing { 289 290 class PageFaultStart : public AbstractTraceEntry { 291 public: 292 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 293 : 294 fAddress(address), 295 fPC(pc), 296 fWrite(write), 297 fUser(user) 298 { 299 Initialized(); 300 } 301 302 virtual void AddDump(TraceOutput& out) 303 { 304 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 305 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 306 } 307 308 private: 309 addr_t fAddress; 310 addr_t fPC; 311 bool fWrite; 312 bool fUser; 313 }; 314 315 316 // page fault errors 317 enum { 318 PAGE_FAULT_ERROR_NO_AREA = 0, 319 PAGE_FAULT_ERROR_KERNEL_ONLY, 320 PAGE_FAULT_ERROR_WRITE_PROTECTED, 321 PAGE_FAULT_ERROR_READ_PROTECTED, 322 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 323 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 324 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 325 }; 326 327 328 class PageFaultError : public AbstractTraceEntry { 329 public: 330 PageFaultError(area_id area, status_t error) 331 : 332 fArea(area), 333 fError(error) 334 { 335 Initialized(); 336 } 337 338 virtual void AddDump(TraceOutput& out) 339 { 340 switch (fError) { 341 case PAGE_FAULT_ERROR_NO_AREA: 342 out.Print("page fault error: no area"); 343 break; 344 case PAGE_FAULT_ERROR_KERNEL_ONLY: 345 out.Print("page fault error: area: %ld, kernel only", fArea); 346 break; 347 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 348 out.Print("page fault error: area: %ld, write protected", 349 fArea); 350 break; 351 case PAGE_FAULT_ERROR_READ_PROTECTED: 352 out.Print("page fault error: area: %ld, read protected", fArea); 353 break; 354 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 355 out.Print("page fault error: area: %ld, execute protected", 356 fArea); 357 break; 358 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 359 out.Print("page fault error: kernel touching bad user memory"); 360 break; 361 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 362 out.Print("page fault error: no address space"); 363 break; 364 default: 365 out.Print("page fault error: area: %ld, error: %s", fArea, 366 strerror(fError)); 367 break; 368 } 369 } 370 371 private: 372 area_id fArea; 373 status_t fError; 374 }; 375 376 377 class PageFaultDone : public AbstractTraceEntry { 378 public: 379 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 380 vm_page* page) 381 : 382 fArea(area), 383 fTopCache(topCache), 384 fCache(cache), 385 fPage(page) 386 { 387 Initialized(); 388 } 389 390 virtual void AddDump(TraceOutput& out) 391 { 392 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 393 "page: %p", fArea, fTopCache, fCache, fPage); 394 } 395 396 private: 397 area_id fArea; 398 VMCache* fTopCache; 399 VMCache* fCache; 400 vm_page* fPage; 401 }; 402 403 } // namespace VMPageFaultTracing 404 405 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 406 #else 407 # define TPF(x) ; 408 #endif // VM_PAGE_FAULT_TRACING 409 410 411 // #pragma mark - 412 413 414 /*! The page's cache must be locked. 415 */ 416 static inline void 417 increment_page_wired_count(vm_page* page) 418 { 419 if (!page->IsMapped()) 420 atomic_add(&gMappedPagesCount, 1); 421 page->IncrementWiredCount(); 422 } 423 424 425 /*! The page's cache must be locked. 426 */ 427 static inline void 428 decrement_page_wired_count(vm_page* page) 429 { 430 page->DecrementWiredCount(); 431 if (!page->IsMapped()) 432 atomic_add(&gMappedPagesCount, -1); 433 } 434 435 436 static inline addr_t 437 virtual_page_address(VMArea* area, vm_page* page) 438 { 439 return area->Base() 440 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 441 } 442 443 444 //! You need to have the address space locked when calling this function 445 static VMArea* 446 lookup_area(VMAddressSpace* addressSpace, area_id id) 447 { 448 VMAreaHash::ReadLock(); 449 450 VMArea* area = VMAreaHash::LookupLocked(id); 451 if (area != NULL && area->address_space != addressSpace) 452 area = NULL; 453 454 VMAreaHash::ReadUnlock(); 455 456 return area; 457 } 458 459 460 static status_t 461 allocate_area_page_protections(VMArea* area) 462 { 463 // In the page protections we store only the three user protections, 464 // so we use 4 bits per page. 465 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 466 area->page_protections = (uint8*)malloc_etc(bytes, 467 HEAP_DONT_LOCK_KERNEL_SPACE); 468 if (area->page_protections == NULL) 469 return B_NO_MEMORY; 470 471 // init the page protections for all pages to that of the area 472 uint32 areaProtection = area->protection 473 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 474 memset(area->page_protections, areaProtection | (areaProtection << 4), 475 bytes); 476 return B_OK; 477 } 478 479 480 static inline void 481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 482 { 483 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 484 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 485 uint8& entry = area->page_protections[pageIndex / 2]; 486 if (pageIndex % 2 == 0) 487 entry = (entry & 0xf0) | protection; 488 else 489 entry = (entry & 0x0f) | (protection << 4); 490 } 491 492 493 static inline uint32 494 get_area_page_protection(VMArea* area, addr_t pageAddress) 495 { 496 if (area->page_protections == NULL) 497 return area->protection; 498 499 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 500 uint32 protection = area->page_protections[pageIndex / 2]; 501 if (pageIndex % 2 == 0) 502 protection &= 0x0f; 503 else 504 protection >>= 4; 505 506 // If this is a kernel area we translate the user flags to kernel flags. 507 if (area->address_space == VMAddressSpace::Kernel()) { 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 return kernelProtection; 515 } 516 517 return protection | B_KERNEL_READ_AREA 518 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, kernel, &secondArea, 811 NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, int mapping, 925 uint32 flags, const virtual_address_restrictions* addressRestrictions, 926 bool kernel, VMArea** _area, void** _virtualAddress) 927 { 928 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 929 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 930 ", protection %d, area %p, areaName '%s'\n", addressSpace, cache, 931 addressRestrictions->address, offset, size, 932 addressRestrictions->address_specification, wiring, protection, 933 _area, areaName)); 934 cache->AssertLocked(); 935 936 if (size == 0) { 937 #if KDEBUG 938 panic("map_backing_store(): called with size=0 for area '%s'!", 939 areaName); 940 #endif 941 return B_BAD_VALUE; 942 } 943 944 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 945 | HEAP_DONT_LOCK_KERNEL_SPACE; 946 int priority; 947 if (addressSpace != VMAddressSpace::Kernel()) { 948 priority = VM_PRIORITY_USER; 949 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 950 priority = VM_PRIORITY_VIP; 951 allocationFlags |= HEAP_PRIORITY_VIP; 952 } else 953 priority = VM_PRIORITY_SYSTEM; 954 955 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 956 allocationFlags); 957 if (area == NULL) 958 return B_NO_MEMORY; 959 960 status_t status; 961 962 // if this is a private map, we need to create a new cache 963 // to handle the private copies of pages as they are written to 964 VMCache* sourceCache = cache; 965 if (mapping == REGION_PRIVATE_MAP) { 966 VMCache* newCache; 967 968 // create an anonymous cache 969 status = VMCacheFactory::CreateAnonymousCache(newCache, 970 (protection & B_STACK_AREA) != 0 971 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 972 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 973 if (status != B_OK) 974 goto err1; 975 976 newCache->Lock(); 977 newCache->temporary = 1; 978 newCache->virtual_base = offset; 979 newCache->virtual_end = offset + size; 980 981 cache->AddConsumer(newCache); 982 983 cache = newCache; 984 } 985 986 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 987 status = cache->SetMinimalCommitment(size, priority); 988 if (status != B_OK) 989 goto err2; 990 } 991 992 // check to see if this address space has entered DELETE state 993 if (addressSpace->IsBeingDeleted()) { 994 // okay, someone is trying to delete this address space now, so we can't 995 // insert the area, so back out 996 status = B_BAD_TEAM_ID; 997 goto err2; 998 } 999 1000 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1001 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1002 status = unmap_address_range(addressSpace, 1003 (addr_t)addressRestrictions->address, size, kernel); 1004 if (status != B_OK) 1005 goto err2; 1006 } 1007 1008 status = addressSpace->InsertArea(area, size, addressRestrictions, 1009 allocationFlags, _virtualAddress); 1010 if (status == B_NO_MEMORY 1011 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1012 // TODO: At present, there is no way to notify the low_resource monitor 1013 // that kernel addresss space is fragmented, nor does it check for this 1014 // automatically. Due to how many locks are held, we cannot wait here 1015 // for space to be freed up, but it would be good to at least notify 1016 // that we tried and failed to allocate some amount. 1017 } 1018 if (status != B_OK) 1019 goto err2; 1020 1021 // attach the cache to the area 1022 area->cache = cache; 1023 area->cache_offset = offset; 1024 1025 // point the cache back to the area 1026 cache->InsertAreaLocked(area); 1027 if (mapping == REGION_PRIVATE_MAP) 1028 cache->Unlock(); 1029 1030 // insert the area in the global area hash table 1031 VMAreaHash::Insert(area); 1032 1033 // grab a ref to the address space (the area holds this) 1034 addressSpace->Get(); 1035 1036 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1037 // cache, sourceCache, areaName, area); 1038 1039 *_area = area; 1040 return B_OK; 1041 1042 err2: 1043 if (mapping == REGION_PRIVATE_MAP) { 1044 // We created this cache, so we must delete it again. Note, that we 1045 // need to temporarily unlock the source cache or we'll otherwise 1046 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1047 sourceCache->Unlock(); 1048 cache->ReleaseRefAndUnlock(); 1049 sourceCache->Lock(); 1050 } 1051 err1: 1052 addressSpace->DeleteArea(area, allocationFlags); 1053 return status; 1054 } 1055 1056 1057 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1058 locker1, locker2). 1059 */ 1060 template<typename LockerType1, typename LockerType2> 1061 static inline bool 1062 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1063 { 1064 area->cache->AssertLocked(); 1065 1066 VMAreaUnwiredWaiter waiter; 1067 if (!area->AddWaiterIfWired(&waiter)) 1068 return false; 1069 1070 // unlock everything and wait 1071 if (locker1 != NULL) 1072 locker1->Unlock(); 1073 if (locker2 != NULL) 1074 locker2->Unlock(); 1075 1076 waiter.waitEntry.Wait(); 1077 1078 return true; 1079 } 1080 1081 1082 /*! Checks whether the given area has any wired ranges intersecting with the 1083 specified range and waits, if so. 1084 1085 When it has to wait, the function calls \c Unlock() on both \a locker1 1086 and \a locker2, if given. 1087 The area's top cache must be locked and must be unlocked as a side effect 1088 of calling \c Unlock() on either \a locker1 or \a locker2. 1089 1090 If the function does not have to wait it does not modify or unlock any 1091 object. 1092 1093 \param area The area to be checked. 1094 \param base The base address of the range to check. 1095 \param size The size of the address range to check. 1096 \param locker1 An object to be unlocked when before starting to wait (may 1097 be \c NULL). 1098 \param locker2 An object to be unlocked when before starting to wait (may 1099 be \c NULL). 1100 \return \c true, if the function had to wait, \c false otherwise. 1101 */ 1102 template<typename LockerType1, typename LockerType2> 1103 static inline bool 1104 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1105 LockerType1* locker1, LockerType2* locker2) 1106 { 1107 area->cache->AssertLocked(); 1108 1109 VMAreaUnwiredWaiter waiter; 1110 if (!area->AddWaiterIfWired(&waiter, base, size)) 1111 return false; 1112 1113 // unlock everything and wait 1114 if (locker1 != NULL) 1115 locker1->Unlock(); 1116 if (locker2 != NULL) 1117 locker2->Unlock(); 1118 1119 waiter.waitEntry.Wait(); 1120 1121 return true; 1122 } 1123 1124 1125 /*! Checks whether the given address space has any wired ranges intersecting 1126 with the specified range and waits, if so. 1127 1128 Similar to wait_if_area_range_is_wired(), with the following differences: 1129 - All areas intersecting with the range are checked (respectively all until 1130 one is found that contains a wired range intersecting with the given 1131 range). 1132 - The given address space must at least be read-locked and must be unlocked 1133 when \c Unlock() is called on \a locker. 1134 - None of the areas' caches are allowed to be locked. 1135 */ 1136 template<typename LockerType> 1137 static inline bool 1138 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1139 size_t size, LockerType* locker) 1140 { 1141 for (VMAddressSpace::AreaRangeIterator it 1142 = addressSpace->GetAreaRangeIterator(base, size); 1143 VMArea* area = it.Next();) { 1144 1145 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1146 1147 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1148 return true; 1149 } 1150 1151 return false; 1152 } 1153 1154 1155 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1156 It must be called in a situation where the kernel address space may be 1157 locked. 1158 */ 1159 status_t 1160 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1161 { 1162 AddressSpaceReadLocker locker; 1163 VMArea* area; 1164 status_t status = locker.SetFromArea(id, area); 1165 if (status != B_OK) 1166 return status; 1167 1168 if (area->page_protections == NULL) { 1169 status = allocate_area_page_protections(area); 1170 if (status != B_OK) 1171 return status; 1172 } 1173 1174 *cookie = (void*)area; 1175 return B_OK; 1176 } 1177 1178 1179 /*! This is a debug helper function that can only be used with very specific 1180 use cases. 1181 Sets protection for the given address range to the protection specified. 1182 If \a protection is 0 then the involved pages will be marked non-present 1183 in the translation map to cause a fault on access. The pages aren't 1184 actually unmapped however so that they can be marked present again with 1185 additional calls to this function. For this to work the area must be 1186 fully locked in memory so that the pages aren't otherwise touched. 1187 This function does not lock the kernel address space and needs to be 1188 supplied with a \a cookie retrieved from a successful call to 1189 vm_prepare_kernel_area_debug_protection(). 1190 */ 1191 status_t 1192 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1193 uint32 protection) 1194 { 1195 // check address range 1196 addr_t address = (addr_t)_address; 1197 size = PAGE_ALIGN(size); 1198 1199 if ((address % B_PAGE_SIZE) != 0 1200 || (addr_t)address + size < (addr_t)address 1201 || !IS_KERNEL_ADDRESS(address) 1202 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1203 return B_BAD_VALUE; 1204 } 1205 1206 // Translate the kernel protection to user protection as we only store that. 1207 if ((protection & B_KERNEL_READ_AREA) != 0) 1208 protection |= B_READ_AREA; 1209 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1210 protection |= B_WRITE_AREA; 1211 1212 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1213 VMTranslationMap* map = addressSpace->TranslationMap(); 1214 VMArea* area = (VMArea*)cookie; 1215 1216 addr_t offset = address - area->Base(); 1217 if (area->Size() - offset < size) { 1218 panic("protect range not fully within supplied area"); 1219 return B_BAD_VALUE; 1220 } 1221 1222 if (area->page_protections == NULL) { 1223 panic("area has no page protections"); 1224 return B_BAD_VALUE; 1225 } 1226 1227 // Invalidate the mapping entries so any access to them will fault or 1228 // restore the mapping entries unchanged so that lookup will success again. 1229 map->Lock(); 1230 map->DebugMarkRangePresent(address, address + size, protection != 0); 1231 map->Unlock(); 1232 1233 // And set the proper page protections so that the fault case will actually 1234 // fail and not simply try to map a new page. 1235 for (addr_t pageAddress = address; pageAddress < address + size; 1236 pageAddress += B_PAGE_SIZE) { 1237 set_area_page_protection(area, pageAddress, protection); 1238 } 1239 1240 return B_OK; 1241 } 1242 1243 1244 status_t 1245 vm_block_address_range(const char* name, void* address, addr_t size) 1246 { 1247 if (!arch_vm_supports_protection(0)) 1248 return B_NOT_SUPPORTED; 1249 1250 AddressSpaceWriteLocker locker; 1251 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1252 if (status != B_OK) 1253 return status; 1254 1255 VMAddressSpace* addressSpace = locker.AddressSpace(); 1256 1257 // create an anonymous cache 1258 VMCache* cache; 1259 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1260 VM_PRIORITY_SYSTEM); 1261 if (status != B_OK) 1262 return status; 1263 1264 cache->temporary = 1; 1265 cache->virtual_end = size; 1266 cache->Lock(); 1267 1268 VMArea* area; 1269 virtual_address_restrictions addressRestrictions = {}; 1270 addressRestrictions.address = address; 1271 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1272 status = map_backing_store(addressSpace, cache, 0, name, size, 1273 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1274 true, &area, NULL); 1275 if (status != B_OK) { 1276 cache->ReleaseRefAndUnlock(); 1277 return status; 1278 } 1279 1280 cache->Unlock(); 1281 area->cache_type = CACHE_TYPE_RAM; 1282 return area->id; 1283 } 1284 1285 1286 status_t 1287 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1288 { 1289 AddressSpaceWriteLocker locker(team); 1290 if (!locker.IsLocked()) 1291 return B_BAD_TEAM_ID; 1292 1293 VMAddressSpace* addressSpace = locker.AddressSpace(); 1294 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1295 addressSpace == VMAddressSpace::Kernel() 1296 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1297 } 1298 1299 1300 status_t 1301 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1302 addr_t size, uint32 flags) 1303 { 1304 if (size == 0) 1305 return B_BAD_VALUE; 1306 1307 AddressSpaceWriteLocker locker(team); 1308 if (!locker.IsLocked()) 1309 return B_BAD_TEAM_ID; 1310 1311 virtual_address_restrictions addressRestrictions = {}; 1312 addressRestrictions.address = *_address; 1313 addressRestrictions.address_specification = addressSpec; 1314 VMAddressSpace* addressSpace = locker.AddressSpace(); 1315 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1316 addressSpace == VMAddressSpace::Kernel() 1317 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1318 _address); 1319 } 1320 1321 1322 area_id 1323 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1324 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1325 const virtual_address_restrictions* virtualAddressRestrictions, 1326 const physical_address_restrictions* physicalAddressRestrictions, 1327 bool kernel, void** _address) 1328 { 1329 VMArea* area; 1330 VMCache* cache; 1331 vm_page* page = NULL; 1332 bool isStack = (protection & B_STACK_AREA) != 0; 1333 page_num_t guardPages; 1334 bool canOvercommit = false; 1335 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1336 ? VM_PAGE_ALLOC_CLEAR : 0; 1337 1338 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1339 team, name, size)); 1340 1341 size = PAGE_ALIGN(size); 1342 guardSize = PAGE_ALIGN(guardSize); 1343 guardPages = guardSize / B_PAGE_SIZE; 1344 1345 if (size == 0 || size < guardSize) 1346 return B_BAD_VALUE; 1347 if (!arch_vm_supports_protection(protection)) 1348 return B_NOT_SUPPORTED; 1349 1350 if (team == B_CURRENT_TEAM) 1351 team = VMAddressSpace::CurrentID(); 1352 if (team < 0) 1353 return B_BAD_TEAM_ID; 1354 1355 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1356 canOvercommit = true; 1357 1358 #ifdef DEBUG_KERNEL_STACKS 1359 if ((protection & B_KERNEL_STACK_AREA) != 0) 1360 isStack = true; 1361 #endif 1362 1363 // check parameters 1364 switch (virtualAddressRestrictions->address_specification) { 1365 case B_ANY_ADDRESS: 1366 case B_EXACT_ADDRESS: 1367 case B_BASE_ADDRESS: 1368 case B_ANY_KERNEL_ADDRESS: 1369 case B_ANY_KERNEL_BLOCK_ADDRESS: 1370 case B_RANDOMIZED_ANY_ADDRESS: 1371 case B_RANDOMIZED_BASE_ADDRESS: 1372 break; 1373 1374 default: 1375 return B_BAD_VALUE; 1376 } 1377 1378 // If low or high physical address restrictions are given, we force 1379 // B_CONTIGUOUS wiring, since only then we'll use 1380 // vm_page_allocate_page_run() which deals with those restrictions. 1381 if (physicalAddressRestrictions->low_address != 0 1382 || physicalAddressRestrictions->high_address != 0) { 1383 wiring = B_CONTIGUOUS; 1384 } 1385 1386 physical_address_restrictions stackPhysicalRestrictions; 1387 bool doReserveMemory = false; 1388 switch (wiring) { 1389 case B_NO_LOCK: 1390 break; 1391 case B_FULL_LOCK: 1392 case B_LAZY_LOCK: 1393 case B_CONTIGUOUS: 1394 doReserveMemory = true; 1395 break; 1396 case B_ALREADY_WIRED: 1397 break; 1398 case B_LOMEM: 1399 stackPhysicalRestrictions = *physicalAddressRestrictions; 1400 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1401 physicalAddressRestrictions = &stackPhysicalRestrictions; 1402 wiring = B_CONTIGUOUS; 1403 doReserveMemory = true; 1404 break; 1405 case B_32_BIT_FULL_LOCK: 1406 if (B_HAIKU_PHYSICAL_BITS <= 32 1407 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1408 wiring = B_FULL_LOCK; 1409 doReserveMemory = true; 1410 break; 1411 } 1412 // TODO: We don't really support this mode efficiently. Just fall 1413 // through for now ... 1414 case B_32_BIT_CONTIGUOUS: 1415 #if B_HAIKU_PHYSICAL_BITS > 32 1416 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1417 stackPhysicalRestrictions = *physicalAddressRestrictions; 1418 stackPhysicalRestrictions.high_address 1419 = (phys_addr_t)1 << 32; 1420 physicalAddressRestrictions = &stackPhysicalRestrictions; 1421 } 1422 #endif 1423 wiring = B_CONTIGUOUS; 1424 doReserveMemory = true; 1425 break; 1426 default: 1427 return B_BAD_VALUE; 1428 } 1429 1430 // Optimization: For a single-page contiguous allocation without low/high 1431 // memory restriction B_FULL_LOCK wiring suffices. 1432 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1433 && physicalAddressRestrictions->low_address == 0 1434 && physicalAddressRestrictions->high_address == 0) { 1435 wiring = B_FULL_LOCK; 1436 } 1437 1438 // For full lock or contiguous areas we're also going to map the pages and 1439 // thus need to reserve pages for the mapping backend upfront. 1440 addr_t reservedMapPages = 0; 1441 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1442 AddressSpaceWriteLocker locker; 1443 status_t status = locker.SetTo(team); 1444 if (status != B_OK) 1445 return status; 1446 1447 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1448 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1449 } 1450 1451 int priority; 1452 if (team != VMAddressSpace::KernelID()) 1453 priority = VM_PRIORITY_USER; 1454 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1455 priority = VM_PRIORITY_VIP; 1456 else 1457 priority = VM_PRIORITY_SYSTEM; 1458 1459 // Reserve memory before acquiring the address space lock. This reduces the 1460 // chances of failure, since while holding the write lock to the address 1461 // space (if it is the kernel address space that is), the low memory handler 1462 // won't be able to free anything for us. 1463 addr_t reservedMemory = 0; 1464 if (doReserveMemory) { 1465 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1466 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1467 return B_NO_MEMORY; 1468 reservedMemory = size; 1469 // TODO: We don't reserve the memory for the pages for the page 1470 // directories/tables. We actually need to do since we currently don't 1471 // reclaim them (and probably can't reclaim all of them anyway). Thus 1472 // there are actually less physical pages than there should be, which 1473 // can get the VM into trouble in low memory situations. 1474 } 1475 1476 AddressSpaceWriteLocker locker; 1477 VMAddressSpace* addressSpace; 1478 status_t status; 1479 1480 // For full lock areas reserve the pages before locking the address 1481 // space. E.g. block caches can't release their memory while we hold the 1482 // address space lock. 1483 page_num_t reservedPages = reservedMapPages; 1484 if (wiring == B_FULL_LOCK) 1485 reservedPages += size / B_PAGE_SIZE; 1486 1487 vm_page_reservation reservation; 1488 if (reservedPages > 0) { 1489 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1490 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1491 priority)) { 1492 reservedPages = 0; 1493 status = B_WOULD_BLOCK; 1494 goto err0; 1495 } 1496 } else 1497 vm_page_reserve_pages(&reservation, reservedPages, priority); 1498 } 1499 1500 if (wiring == B_CONTIGUOUS) { 1501 // we try to allocate the page run here upfront as this may easily 1502 // fail for obvious reasons 1503 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1504 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1505 if (page == NULL) { 1506 status = B_NO_MEMORY; 1507 goto err0; 1508 } 1509 } 1510 1511 // Lock the address space and, if B_EXACT_ADDRESS and 1512 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1513 // is not wired. 1514 do { 1515 status = locker.SetTo(team); 1516 if (status != B_OK) 1517 goto err1; 1518 1519 addressSpace = locker.AddressSpace(); 1520 } while (virtualAddressRestrictions->address_specification 1521 == B_EXACT_ADDRESS 1522 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1523 && wait_if_address_range_is_wired(addressSpace, 1524 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1525 1526 // create an anonymous cache 1527 // if it's a stack, make sure that two pages are available at least 1528 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1529 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1530 wiring == B_NO_LOCK, priority); 1531 if (status != B_OK) 1532 goto err1; 1533 1534 cache->temporary = 1; 1535 cache->virtual_end = size; 1536 cache->committed_size = reservedMemory; 1537 // TODO: This should be done via a method. 1538 reservedMemory = 0; 1539 1540 cache->Lock(); 1541 1542 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1543 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions, 1544 kernel, &area, _address); 1545 1546 if (status != B_OK) { 1547 cache->ReleaseRefAndUnlock(); 1548 goto err1; 1549 } 1550 1551 locker.DegradeToReadLock(); 1552 1553 switch (wiring) { 1554 case B_NO_LOCK: 1555 case B_LAZY_LOCK: 1556 // do nothing - the pages are mapped in as needed 1557 break; 1558 1559 case B_FULL_LOCK: 1560 { 1561 // Allocate and map all pages for this area 1562 1563 off_t offset = 0; 1564 for (addr_t address = area->Base(); 1565 address < area->Base() + (area->Size() - 1); 1566 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1567 #ifdef DEBUG_KERNEL_STACKS 1568 # ifdef STACK_GROWS_DOWNWARDS 1569 if (isStack && address < area->Base() 1570 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1571 # else 1572 if (isStack && address >= area->Base() + area->Size() 1573 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1574 # endif 1575 continue; 1576 #endif 1577 vm_page* page = vm_page_allocate_page(&reservation, 1578 PAGE_STATE_WIRED | pageAllocFlags); 1579 cache->InsertPage(page, offset); 1580 map_page(area, page, address, protection, &reservation); 1581 1582 DEBUG_PAGE_ACCESS_END(page); 1583 } 1584 1585 break; 1586 } 1587 1588 case B_ALREADY_WIRED: 1589 { 1590 // The pages should already be mapped. This is only really useful 1591 // during boot time. Find the appropriate vm_page objects and stick 1592 // them in the cache object. 1593 VMTranslationMap* map = addressSpace->TranslationMap(); 1594 off_t offset = 0; 1595 1596 if (!gKernelStartup) 1597 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1598 1599 map->Lock(); 1600 1601 for (addr_t virtualAddress = area->Base(); 1602 virtualAddress < area->Base() + (area->Size() - 1); 1603 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1604 phys_addr_t physicalAddress; 1605 uint32 flags; 1606 status = map->Query(virtualAddress, &physicalAddress, &flags); 1607 if (status < B_OK) { 1608 panic("looking up mapping failed for va 0x%lx\n", 1609 virtualAddress); 1610 } 1611 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1612 if (page == NULL) { 1613 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1614 "\n", physicalAddress); 1615 } 1616 1617 DEBUG_PAGE_ACCESS_START(page); 1618 1619 cache->InsertPage(page, offset); 1620 increment_page_wired_count(page); 1621 vm_page_set_state(page, PAGE_STATE_WIRED); 1622 page->busy = false; 1623 1624 DEBUG_PAGE_ACCESS_END(page); 1625 } 1626 1627 map->Unlock(); 1628 break; 1629 } 1630 1631 case B_CONTIGUOUS: 1632 { 1633 // We have already allocated our continuous pages run, so we can now 1634 // just map them in the address space 1635 VMTranslationMap* map = addressSpace->TranslationMap(); 1636 phys_addr_t physicalAddress 1637 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1638 addr_t virtualAddress = area->Base(); 1639 off_t offset = 0; 1640 1641 map->Lock(); 1642 1643 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1644 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1645 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1646 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1647 if (page == NULL) 1648 panic("couldn't lookup physical page just allocated\n"); 1649 1650 status = map->Map(virtualAddress, physicalAddress, protection, 1651 area->MemoryType(), &reservation); 1652 if (status < B_OK) 1653 panic("couldn't map physical page in page run\n"); 1654 1655 cache->InsertPage(page, offset); 1656 increment_page_wired_count(page); 1657 1658 DEBUG_PAGE_ACCESS_END(page); 1659 } 1660 1661 map->Unlock(); 1662 break; 1663 } 1664 1665 default: 1666 break; 1667 } 1668 1669 cache->Unlock(); 1670 1671 if (reservedPages > 0) 1672 vm_page_unreserve_pages(&reservation); 1673 1674 TRACE(("vm_create_anonymous_area: done\n")); 1675 1676 area->cache_type = CACHE_TYPE_RAM; 1677 return area->id; 1678 1679 err1: 1680 if (wiring == B_CONTIGUOUS) { 1681 // we had reserved the area space upfront... 1682 phys_addr_t pageNumber = page->physical_page_number; 1683 int32 i; 1684 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1685 page = vm_lookup_page(pageNumber); 1686 if (page == NULL) 1687 panic("couldn't lookup physical page just allocated\n"); 1688 1689 vm_page_set_state(page, PAGE_STATE_FREE); 1690 } 1691 } 1692 1693 err0: 1694 if (reservedPages > 0) 1695 vm_page_unreserve_pages(&reservation); 1696 if (reservedMemory > 0) 1697 vm_unreserve_memory(reservedMemory); 1698 1699 return status; 1700 } 1701 1702 1703 area_id 1704 vm_map_physical_memory(team_id team, const char* name, void** _address, 1705 uint32 addressSpec, addr_t size, uint32 protection, 1706 phys_addr_t physicalAddress, bool alreadyWired) 1707 { 1708 VMArea* area; 1709 VMCache* cache; 1710 addr_t mapOffset; 1711 1712 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1713 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1714 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1715 addressSpec, size, protection, physicalAddress)); 1716 1717 if (!arch_vm_supports_protection(protection)) 1718 return B_NOT_SUPPORTED; 1719 1720 AddressSpaceWriteLocker locker(team); 1721 if (!locker.IsLocked()) 1722 return B_BAD_TEAM_ID; 1723 1724 // if the physical address is somewhat inside a page, 1725 // move the actual area down to align on a page boundary 1726 mapOffset = physicalAddress % B_PAGE_SIZE; 1727 size += mapOffset; 1728 physicalAddress -= mapOffset; 1729 1730 size = PAGE_ALIGN(size); 1731 1732 // create a device cache 1733 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1734 if (status != B_OK) 1735 return status; 1736 1737 cache->virtual_end = size; 1738 1739 cache->Lock(); 1740 1741 virtual_address_restrictions addressRestrictions = {}; 1742 addressRestrictions.address = *_address; 1743 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1744 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1745 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1746 true, &area, _address); 1747 1748 if (status < B_OK) 1749 cache->ReleaseRefLocked(); 1750 1751 cache->Unlock(); 1752 1753 if (status == B_OK) { 1754 // set requested memory type -- use uncached, if not given 1755 uint32 memoryType = addressSpec & B_MTR_MASK; 1756 if (memoryType == 0) 1757 memoryType = B_MTR_UC; 1758 1759 area->SetMemoryType(memoryType); 1760 1761 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1762 if (status != B_OK) 1763 delete_area(locker.AddressSpace(), area, false); 1764 } 1765 1766 if (status != B_OK) 1767 return status; 1768 1769 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1770 1771 if (alreadyWired) { 1772 // The area is already mapped, but possibly not with the right 1773 // memory type. 1774 map->Lock(); 1775 map->ProtectArea(area, area->protection); 1776 map->Unlock(); 1777 } else { 1778 // Map the area completely. 1779 1780 // reserve pages needed for the mapping 1781 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1782 area->Base() + (size - 1)); 1783 vm_page_reservation reservation; 1784 vm_page_reserve_pages(&reservation, reservePages, 1785 team == VMAddressSpace::KernelID() 1786 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1787 1788 map->Lock(); 1789 1790 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1791 map->Map(area->Base() + offset, physicalAddress + offset, 1792 protection, area->MemoryType(), &reservation); 1793 } 1794 1795 map->Unlock(); 1796 1797 vm_page_unreserve_pages(&reservation); 1798 } 1799 1800 // modify the pointer returned to be offset back into the new area 1801 // the same way the physical address in was offset 1802 *_address = (void*)((addr_t)*_address + mapOffset); 1803 1804 area->cache_type = CACHE_TYPE_DEVICE; 1805 return area->id; 1806 } 1807 1808 1809 /*! Don't use! 1810 TODO: This function was introduced to map physical page vecs to 1811 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1812 use a device cache and does not track vm_page::wired_count! 1813 */ 1814 area_id 1815 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1816 uint32 addressSpec, addr_t* _size, uint32 protection, 1817 struct generic_io_vec* vecs, uint32 vecCount) 1818 { 1819 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1820 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1821 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1822 addressSpec, _size, protection, vecs, vecCount)); 1823 1824 if (!arch_vm_supports_protection(protection) 1825 || (addressSpec & B_MTR_MASK) != 0) { 1826 return B_NOT_SUPPORTED; 1827 } 1828 1829 AddressSpaceWriteLocker locker(team); 1830 if (!locker.IsLocked()) 1831 return B_BAD_TEAM_ID; 1832 1833 if (vecCount == 0) 1834 return B_BAD_VALUE; 1835 1836 addr_t size = 0; 1837 for (uint32 i = 0; i < vecCount; i++) { 1838 if (vecs[i].base % B_PAGE_SIZE != 0 1839 || vecs[i].length % B_PAGE_SIZE != 0) { 1840 return B_BAD_VALUE; 1841 } 1842 1843 size += vecs[i].length; 1844 } 1845 1846 // create a device cache 1847 VMCache* cache; 1848 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1849 if (result != B_OK) 1850 return result; 1851 1852 cache->virtual_end = size; 1853 1854 cache->Lock(); 1855 1856 VMArea* area; 1857 virtual_address_restrictions addressRestrictions = {}; 1858 addressRestrictions.address = *_address; 1859 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1860 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1861 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, 1862 &addressRestrictions, true, &area, _address); 1863 1864 if (result != B_OK) 1865 cache->ReleaseRefLocked(); 1866 1867 cache->Unlock(); 1868 1869 if (result != B_OK) 1870 return result; 1871 1872 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1873 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1874 area->Base() + (size - 1)); 1875 1876 vm_page_reservation reservation; 1877 vm_page_reserve_pages(&reservation, reservePages, 1878 team == VMAddressSpace::KernelID() 1879 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1880 map->Lock(); 1881 1882 uint32 vecIndex = 0; 1883 size_t vecOffset = 0; 1884 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1885 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1886 vecOffset = 0; 1887 vecIndex++; 1888 } 1889 1890 if (vecIndex >= vecCount) 1891 break; 1892 1893 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1894 protection, area->MemoryType(), &reservation); 1895 1896 vecOffset += B_PAGE_SIZE; 1897 } 1898 1899 map->Unlock(); 1900 vm_page_unreserve_pages(&reservation); 1901 1902 if (_size != NULL) 1903 *_size = size; 1904 1905 area->cache_type = CACHE_TYPE_DEVICE; 1906 return area->id; 1907 } 1908 1909 1910 area_id 1911 vm_create_null_area(team_id team, const char* name, void** address, 1912 uint32 addressSpec, addr_t size, uint32 flags) 1913 { 1914 size = PAGE_ALIGN(size); 1915 1916 // Lock the address space and, if B_EXACT_ADDRESS and 1917 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1918 // is not wired. 1919 AddressSpaceWriteLocker locker; 1920 do { 1921 if (locker.SetTo(team) != B_OK) 1922 return B_BAD_TEAM_ID; 1923 } while (addressSpec == B_EXACT_ADDRESS 1924 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1925 && wait_if_address_range_is_wired(locker.AddressSpace(), 1926 (addr_t)*address, size, &locker)); 1927 1928 // create a null cache 1929 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1930 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1931 VMCache* cache; 1932 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1933 if (status != B_OK) 1934 return status; 1935 1936 cache->temporary = 1; 1937 cache->virtual_end = size; 1938 1939 cache->Lock(); 1940 1941 VMArea* area; 1942 virtual_address_restrictions addressRestrictions = {}; 1943 addressRestrictions.address = *address; 1944 addressRestrictions.address_specification = addressSpec; 1945 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1946 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags, 1947 &addressRestrictions, true, &area, address); 1948 1949 if (status < B_OK) { 1950 cache->ReleaseRefAndUnlock(); 1951 return status; 1952 } 1953 1954 cache->Unlock(); 1955 1956 area->cache_type = CACHE_TYPE_NULL; 1957 return area->id; 1958 } 1959 1960 1961 /*! Creates the vnode cache for the specified \a vnode. 1962 The vnode has to be marked busy when calling this function. 1963 */ 1964 status_t 1965 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1966 { 1967 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1968 } 1969 1970 1971 /*! \a cache must be locked. The area's address space must be read-locked. 1972 */ 1973 static void 1974 pre_map_area_pages(VMArea* area, VMCache* cache, 1975 vm_page_reservation* reservation) 1976 { 1977 addr_t baseAddress = area->Base(); 1978 addr_t cacheOffset = area->cache_offset; 1979 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1980 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1981 1982 for (VMCachePagesTree::Iterator it 1983 = cache->pages.GetIterator(firstPage, true, true); 1984 vm_page* page = it.Next();) { 1985 if (page->cache_offset >= endPage) 1986 break; 1987 1988 // skip busy and inactive pages 1989 if (page->busy || page->usage_count == 0) 1990 continue; 1991 1992 DEBUG_PAGE_ACCESS_START(page); 1993 map_page(area, page, 1994 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1995 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1996 DEBUG_PAGE_ACCESS_END(page); 1997 } 1998 } 1999 2000 2001 /*! Will map the file specified by \a fd to an area in memory. 2002 The file will be mirrored beginning at the specified \a offset. The 2003 \a offset and \a size arguments have to be page aligned. 2004 */ 2005 static area_id 2006 _vm_map_file(team_id team, const char* name, void** _address, 2007 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2008 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2009 { 2010 // TODO: for binary files, we want to make sure that they get the 2011 // copy of a file at a given time, ie. later changes should not 2012 // make it into the mapped copy -- this will need quite some changes 2013 // to be done in a nice way 2014 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2015 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2016 2017 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2018 size = PAGE_ALIGN(size); 2019 2020 if (mapping == REGION_NO_PRIVATE_MAP) 2021 protection |= B_SHARED_AREA; 2022 if (addressSpec != B_EXACT_ADDRESS) 2023 unmapAddressRange = false; 2024 2025 if (fd < 0) { 2026 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2027 virtual_address_restrictions virtualRestrictions = {}; 2028 virtualRestrictions.address = *_address; 2029 virtualRestrictions.address_specification = addressSpec; 2030 physical_address_restrictions physicalRestrictions = {}; 2031 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2032 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2033 _address); 2034 } 2035 2036 // get the open flags of the FD 2037 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2038 if (descriptor == NULL) 2039 return EBADF; 2040 int32 openMode = descriptor->open_mode; 2041 put_fd(descriptor); 2042 2043 // The FD must open for reading at any rate. For shared mapping with write 2044 // access, additionally the FD must be open for writing. 2045 if ((openMode & O_ACCMODE) == O_WRONLY 2046 || (mapping == REGION_NO_PRIVATE_MAP 2047 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2048 && (openMode & O_ACCMODE) == O_RDONLY)) { 2049 return EACCES; 2050 } 2051 2052 // get the vnode for the object, this also grabs a ref to it 2053 struct vnode* vnode = NULL; 2054 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2055 if (status < B_OK) 2056 return status; 2057 VnodePutter vnodePutter(vnode); 2058 2059 // If we're going to pre-map pages, we need to reserve the pages needed by 2060 // the mapping backend upfront. 2061 page_num_t reservedPreMapPages = 0; 2062 vm_page_reservation reservation; 2063 if ((protection & B_READ_AREA) != 0) { 2064 AddressSpaceWriteLocker locker; 2065 status = locker.SetTo(team); 2066 if (status != B_OK) 2067 return status; 2068 2069 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2070 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2071 2072 locker.Unlock(); 2073 2074 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2075 team == VMAddressSpace::KernelID() 2076 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2077 } 2078 2079 struct PageUnreserver { 2080 PageUnreserver(vm_page_reservation* reservation) 2081 : 2082 fReservation(reservation) 2083 { 2084 } 2085 2086 ~PageUnreserver() 2087 { 2088 if (fReservation != NULL) 2089 vm_page_unreserve_pages(fReservation); 2090 } 2091 2092 vm_page_reservation* fReservation; 2093 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2094 2095 // Lock the address space and, if the specified address range shall be 2096 // unmapped, ensure it is not wired. 2097 AddressSpaceWriteLocker locker; 2098 do { 2099 if (locker.SetTo(team) != B_OK) 2100 return B_BAD_TEAM_ID; 2101 } while (unmapAddressRange 2102 && wait_if_address_range_is_wired(locker.AddressSpace(), 2103 (addr_t)*_address, size, &locker)); 2104 2105 // TODO: this only works for file systems that use the file cache 2106 VMCache* cache; 2107 status = vfs_get_vnode_cache(vnode, &cache, false); 2108 if (status < B_OK) 2109 return status; 2110 2111 cache->Lock(); 2112 2113 VMArea* area; 2114 virtual_address_restrictions addressRestrictions = {}; 2115 addressRestrictions.address = *_address; 2116 addressRestrictions.address_specification = addressSpec; 2117 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2118 0, protection, mapping, 2119 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2120 &addressRestrictions, kernel, &area, _address); 2121 2122 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2123 // map_backing_store() cannot know we no longer need the ref 2124 cache->ReleaseRefLocked(); 2125 } 2126 2127 if (status == B_OK && (protection & B_READ_AREA) != 0) 2128 pre_map_area_pages(area, cache, &reservation); 2129 2130 cache->Unlock(); 2131 2132 if (status == B_OK) { 2133 // TODO: this probably deserves a smarter solution, ie. don't always 2134 // prefetch stuff, and also, probably don't trigger it at this place. 2135 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2136 // prefetches at max 10 MB starting from "offset" 2137 } 2138 2139 if (status != B_OK) 2140 return status; 2141 2142 area->cache_type = CACHE_TYPE_VNODE; 2143 return area->id; 2144 } 2145 2146 2147 area_id 2148 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2149 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2150 int fd, off_t offset) 2151 { 2152 if (!arch_vm_supports_protection(protection)) 2153 return B_NOT_SUPPORTED; 2154 2155 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2156 mapping, unmapAddressRange, fd, offset, true); 2157 } 2158 2159 2160 VMCache* 2161 vm_area_get_locked_cache(VMArea* area) 2162 { 2163 rw_lock_read_lock(&sAreaCacheLock); 2164 2165 while (true) { 2166 VMCache* cache = area->cache; 2167 2168 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2169 // cache has been deleted 2170 rw_lock_read_lock(&sAreaCacheLock); 2171 continue; 2172 } 2173 2174 rw_lock_read_lock(&sAreaCacheLock); 2175 2176 if (cache == area->cache) { 2177 cache->AcquireRefLocked(); 2178 rw_lock_read_unlock(&sAreaCacheLock); 2179 return cache; 2180 } 2181 2182 // the cache changed in the meantime 2183 cache->Unlock(); 2184 } 2185 } 2186 2187 2188 void 2189 vm_area_put_locked_cache(VMCache* cache) 2190 { 2191 cache->ReleaseRefAndUnlock(); 2192 } 2193 2194 2195 area_id 2196 vm_clone_area(team_id team, const char* name, void** address, 2197 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2198 bool kernel) 2199 { 2200 VMArea* newArea = NULL; 2201 VMArea* sourceArea; 2202 2203 // Check whether the source area exists and is cloneable. If so, mark it 2204 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2205 { 2206 AddressSpaceWriteLocker locker; 2207 status_t status = locker.SetFromArea(sourceID, sourceArea); 2208 if (status != B_OK) 2209 return status; 2210 2211 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2212 return B_NOT_ALLOWED; 2213 2214 sourceArea->protection |= B_SHARED_AREA; 2215 protection |= B_SHARED_AREA; 2216 } 2217 2218 // Now lock both address spaces and actually do the cloning. 2219 2220 MultiAddressSpaceLocker locker; 2221 VMAddressSpace* sourceAddressSpace; 2222 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2223 if (status != B_OK) 2224 return status; 2225 2226 VMAddressSpace* targetAddressSpace; 2227 status = locker.AddTeam(team, true, &targetAddressSpace); 2228 if (status != B_OK) 2229 return status; 2230 2231 status = locker.Lock(); 2232 if (status != B_OK) 2233 return status; 2234 2235 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2236 if (sourceArea == NULL) 2237 return B_BAD_VALUE; 2238 2239 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2240 return B_NOT_ALLOWED; 2241 2242 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2243 2244 if (!kernel && sourceAddressSpace != targetAddressSpace 2245 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2246 #if KDEBUG 2247 Team* team = thread_get_current_thread()->team; 2248 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2249 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2250 #endif 2251 status = B_NOT_ALLOWED; 2252 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2253 status = B_NOT_ALLOWED; 2254 } else { 2255 virtual_address_restrictions addressRestrictions = {}; 2256 addressRestrictions.address = *address; 2257 addressRestrictions.address_specification = addressSpec; 2258 status = map_backing_store(targetAddressSpace, cache, 2259 sourceArea->cache_offset, name, sourceArea->Size(), 2260 sourceArea->wiring, protection, mapping, 0, &addressRestrictions, 2261 kernel, &newArea, address); 2262 } 2263 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2264 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2265 // to create a new cache, and has therefore already acquired a reference 2266 // to the source cache - but otherwise it has no idea that we need 2267 // one. 2268 cache->AcquireRefLocked(); 2269 } 2270 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2271 // we need to map in everything at this point 2272 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2273 // we don't have actual pages to map but a physical area 2274 VMTranslationMap* map 2275 = sourceArea->address_space->TranslationMap(); 2276 map->Lock(); 2277 2278 phys_addr_t physicalAddress; 2279 uint32 oldProtection; 2280 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2281 2282 map->Unlock(); 2283 2284 map = targetAddressSpace->TranslationMap(); 2285 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2286 newArea->Base() + (newArea->Size() - 1)); 2287 2288 vm_page_reservation reservation; 2289 vm_page_reserve_pages(&reservation, reservePages, 2290 targetAddressSpace == VMAddressSpace::Kernel() 2291 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2292 map->Lock(); 2293 2294 for (addr_t offset = 0; offset < newArea->Size(); 2295 offset += B_PAGE_SIZE) { 2296 map->Map(newArea->Base() + offset, physicalAddress + offset, 2297 protection, newArea->MemoryType(), &reservation); 2298 } 2299 2300 map->Unlock(); 2301 vm_page_unreserve_pages(&reservation); 2302 } else { 2303 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2304 size_t reservePages = map->MaxPagesNeededToMap( 2305 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2306 vm_page_reservation reservation; 2307 vm_page_reserve_pages(&reservation, reservePages, 2308 targetAddressSpace == VMAddressSpace::Kernel() 2309 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2310 2311 // map in all pages from source 2312 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2313 vm_page* page = it.Next();) { 2314 if (!page->busy) { 2315 DEBUG_PAGE_ACCESS_START(page); 2316 map_page(newArea, page, 2317 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2318 - newArea->cache_offset), 2319 protection, &reservation); 2320 DEBUG_PAGE_ACCESS_END(page); 2321 } 2322 } 2323 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2324 // ensuring that! 2325 2326 vm_page_unreserve_pages(&reservation); 2327 } 2328 } 2329 if (status == B_OK) 2330 newArea->cache_type = sourceArea->cache_type; 2331 2332 vm_area_put_locked_cache(cache); 2333 2334 if (status < B_OK) 2335 return status; 2336 2337 return newArea->id; 2338 } 2339 2340 2341 /*! Deletes the specified area of the given address space. 2342 2343 The address space must be write-locked. 2344 The caller must ensure that the area does not have any wired ranges. 2345 2346 \param addressSpace The address space containing the area. 2347 \param area The area to be deleted. 2348 \param deletingAddressSpace \c true, if the address space is in the process 2349 of being deleted. 2350 */ 2351 static void 2352 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2353 bool deletingAddressSpace) 2354 { 2355 ASSERT(!area->IsWired()); 2356 2357 VMAreaHash::Remove(area); 2358 2359 // At this point the area is removed from the global hash table, but 2360 // still exists in the area list. 2361 2362 // Unmap the virtual address space the area occupied. 2363 { 2364 // We need to lock the complete cache chain. 2365 VMCache* topCache = vm_area_get_locked_cache(area); 2366 VMCacheChainLocker cacheChainLocker(topCache); 2367 cacheChainLocker.LockAllSourceCaches(); 2368 2369 // If the area's top cache is a temporary cache and the area is the only 2370 // one referencing it (besides us currently holding a second reference), 2371 // the unmapping code doesn't need to care about preserving the accessed 2372 // and dirty flags of the top cache page mappings. 2373 bool ignoreTopCachePageFlags 2374 = topCache->temporary && topCache->RefCount() == 2; 2375 2376 area->address_space->TranslationMap()->UnmapArea(area, 2377 deletingAddressSpace, ignoreTopCachePageFlags); 2378 } 2379 2380 if (!area->cache->temporary) 2381 area->cache->WriteModified(); 2382 2383 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2384 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2385 2386 arch_vm_unset_memory_type(area); 2387 addressSpace->RemoveArea(area, allocationFlags); 2388 addressSpace->Put(); 2389 2390 area->cache->RemoveArea(area); 2391 area->cache->ReleaseRef(); 2392 2393 addressSpace->DeleteArea(area, allocationFlags); 2394 } 2395 2396 2397 status_t 2398 vm_delete_area(team_id team, area_id id, bool kernel) 2399 { 2400 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2401 team, id)); 2402 2403 // lock the address space and make sure the area isn't wired 2404 AddressSpaceWriteLocker locker; 2405 VMArea* area; 2406 AreaCacheLocker cacheLocker; 2407 2408 do { 2409 status_t status = locker.SetFromArea(team, id, area); 2410 if (status != B_OK) 2411 return status; 2412 2413 cacheLocker.SetTo(area); 2414 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2415 2416 cacheLocker.Unlock(); 2417 2418 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2419 return B_NOT_ALLOWED; 2420 2421 delete_area(locker.AddressSpace(), area, false); 2422 return B_OK; 2423 } 2424 2425 2426 /*! Creates a new cache on top of given cache, moves all areas from 2427 the old cache to the new one, and changes the protection of all affected 2428 areas' pages to read-only. If requested, wired pages are moved up to the 2429 new cache and copies are added to the old cache in their place. 2430 Preconditions: 2431 - The given cache must be locked. 2432 - All of the cache's areas' address spaces must be read locked. 2433 - Either the cache must not have any wired ranges or a page reservation for 2434 all wired pages must be provided, so they can be copied. 2435 2436 \param lowerCache The cache on top of which a new cache shall be created. 2437 \param wiredPagesReservation If \c NULL there must not be any wired pages 2438 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2439 has wired page. The wired pages are copied in this case. 2440 */ 2441 static status_t 2442 vm_copy_on_write_area(VMCache* lowerCache, 2443 vm_page_reservation* wiredPagesReservation) 2444 { 2445 VMCache* upperCache; 2446 2447 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2448 2449 // We need to separate the cache from its areas. The cache goes one level 2450 // deeper and we create a new cache inbetween. 2451 2452 // create an anonymous cache 2453 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2454 lowerCache->GuardSize() / B_PAGE_SIZE, 2455 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2456 VM_PRIORITY_USER); 2457 if (status != B_OK) 2458 return status; 2459 2460 upperCache->Lock(); 2461 2462 upperCache->temporary = 1; 2463 upperCache->virtual_base = lowerCache->virtual_base; 2464 upperCache->virtual_end = lowerCache->virtual_end; 2465 2466 // transfer the lower cache areas to the upper cache 2467 rw_lock_write_lock(&sAreaCacheLock); 2468 upperCache->TransferAreas(lowerCache); 2469 rw_lock_write_unlock(&sAreaCacheLock); 2470 2471 lowerCache->AddConsumer(upperCache); 2472 2473 // We now need to remap all pages from all of the cache's areas read-only, 2474 // so that a copy will be created on next write access. If there are wired 2475 // pages, we keep their protection, move them to the upper cache and create 2476 // copies for the lower cache. 2477 if (wiredPagesReservation != NULL) { 2478 // We need to handle wired pages -- iterate through the cache's pages. 2479 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2480 vm_page* page = it.Next();) { 2481 if (page->WiredCount() > 0) { 2482 // allocate a new page and copy the wired one 2483 vm_page* copiedPage = vm_page_allocate_page( 2484 wiredPagesReservation, PAGE_STATE_ACTIVE); 2485 2486 vm_memcpy_physical_page( 2487 copiedPage->physical_page_number * B_PAGE_SIZE, 2488 page->physical_page_number * B_PAGE_SIZE); 2489 2490 // move the wired page to the upper cache (note: removing is OK 2491 // with the SplayTree iterator) and insert the copy 2492 upperCache->MovePage(page); 2493 lowerCache->InsertPage(copiedPage, 2494 page->cache_offset * B_PAGE_SIZE); 2495 2496 DEBUG_PAGE_ACCESS_END(copiedPage); 2497 } else { 2498 // Change the protection of this page in all areas. 2499 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2500 tempArea = tempArea->cache_next) { 2501 // The area must be readable in the same way it was 2502 // previously writable. 2503 uint32 protection = B_KERNEL_READ_AREA; 2504 if ((tempArea->protection & B_READ_AREA) != 0) 2505 protection |= B_READ_AREA; 2506 2507 VMTranslationMap* map 2508 = tempArea->address_space->TranslationMap(); 2509 map->Lock(); 2510 map->ProtectPage(tempArea, 2511 virtual_page_address(tempArea, page), protection); 2512 map->Unlock(); 2513 } 2514 } 2515 } 2516 } else { 2517 ASSERT(lowerCache->WiredPagesCount() == 0); 2518 2519 // just change the protection of all areas 2520 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2521 tempArea = tempArea->cache_next) { 2522 // The area must be readable in the same way it was previously 2523 // writable. 2524 uint32 protection = B_KERNEL_READ_AREA; 2525 if ((tempArea->protection & B_READ_AREA) != 0) 2526 protection |= B_READ_AREA; 2527 2528 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2529 map->Lock(); 2530 map->ProtectArea(tempArea, protection); 2531 map->Unlock(); 2532 } 2533 } 2534 2535 vm_area_put_locked_cache(upperCache); 2536 2537 return B_OK; 2538 } 2539 2540 2541 area_id 2542 vm_copy_area(team_id team, const char* name, void** _address, 2543 uint32 addressSpec, area_id sourceID) 2544 { 2545 // Do the locking: target address space, all address spaces associated with 2546 // the source cache, and the cache itself. 2547 MultiAddressSpaceLocker locker; 2548 VMAddressSpace* targetAddressSpace; 2549 VMCache* cache; 2550 VMArea* source; 2551 AreaCacheLocker cacheLocker; 2552 status_t status; 2553 bool sharedArea; 2554 2555 page_num_t wiredPages = 0; 2556 vm_page_reservation wiredPagesReservation; 2557 2558 bool restart; 2559 do { 2560 restart = false; 2561 2562 locker.Unset(); 2563 status = locker.AddTeam(team, true, &targetAddressSpace); 2564 if (status == B_OK) { 2565 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2566 &cache); 2567 } 2568 if (status != B_OK) 2569 return status; 2570 2571 cacheLocker.SetTo(cache, true); // already locked 2572 2573 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2574 2575 page_num_t oldWiredPages = wiredPages; 2576 wiredPages = 0; 2577 2578 // If the source area isn't shared, count the number of wired pages in 2579 // the cache and reserve as many pages. 2580 if (!sharedArea) { 2581 wiredPages = cache->WiredPagesCount(); 2582 2583 if (wiredPages > oldWiredPages) { 2584 cacheLocker.Unlock(); 2585 locker.Unlock(); 2586 2587 if (oldWiredPages > 0) 2588 vm_page_unreserve_pages(&wiredPagesReservation); 2589 2590 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2591 VM_PRIORITY_USER); 2592 2593 restart = true; 2594 } 2595 } else if (oldWiredPages > 0) 2596 vm_page_unreserve_pages(&wiredPagesReservation); 2597 } while (restart); 2598 2599 // unreserve pages later 2600 struct PagesUnreserver { 2601 PagesUnreserver(vm_page_reservation* reservation) 2602 : 2603 fReservation(reservation) 2604 { 2605 } 2606 2607 ~PagesUnreserver() 2608 { 2609 if (fReservation != NULL) 2610 vm_page_unreserve_pages(fReservation); 2611 } 2612 2613 private: 2614 vm_page_reservation* fReservation; 2615 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2616 2617 bool writableCopy 2618 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2619 uint8* targetPageProtections = NULL; 2620 2621 if (source->page_protections != NULL) { 2622 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2623 targetPageProtections = (uint8*)malloc_etc(bytes, 2624 HEAP_DONT_LOCK_KERNEL_SPACE); 2625 if (targetPageProtections == NULL) 2626 return B_NO_MEMORY; 2627 2628 memcpy(targetPageProtections, source->page_protections, bytes); 2629 2630 if (!writableCopy) { 2631 for (size_t i = 0; i < bytes; i++) { 2632 if ((targetPageProtections[i] 2633 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2634 writableCopy = true; 2635 break; 2636 } 2637 } 2638 } 2639 } 2640 2641 if (addressSpec == B_CLONE_ADDRESS) { 2642 addressSpec = B_EXACT_ADDRESS; 2643 *_address = (void*)source->Base(); 2644 } 2645 2646 // First, create a cache on top of the source area, respectively use the 2647 // existing one, if this is a shared area. 2648 2649 VMArea* target; 2650 virtual_address_restrictions addressRestrictions = {}; 2651 addressRestrictions.address = *_address; 2652 addressRestrictions.address_specification = addressSpec; 2653 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2654 name, source->Size(), source->wiring, source->protection, 2655 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2656 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2657 &addressRestrictions, true, &target, _address); 2658 if (status < B_OK) { 2659 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2660 return status; 2661 } 2662 2663 if (targetPageProtections != NULL) 2664 target->page_protections = targetPageProtections; 2665 2666 if (sharedArea) { 2667 // The new area uses the old area's cache, but map_backing_store() 2668 // hasn't acquired a ref. So we have to do that now. 2669 cache->AcquireRefLocked(); 2670 } 2671 2672 // If the source area is writable, we need to move it one layer up as well 2673 2674 if (!sharedArea) { 2675 if (writableCopy) { 2676 // TODO: do something more useful if this fails! 2677 if (vm_copy_on_write_area(cache, 2678 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2679 panic("vm_copy_on_write_area() failed!\n"); 2680 } 2681 } 2682 } 2683 2684 // we return the ID of the newly created area 2685 return target->id; 2686 } 2687 2688 2689 status_t 2690 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2691 bool kernel) 2692 { 2693 fix_protection(&newProtection); 2694 2695 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2696 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2697 2698 if (!arch_vm_supports_protection(newProtection)) 2699 return B_NOT_SUPPORTED; 2700 2701 bool becomesWritable 2702 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2703 2704 // lock address spaces and cache 2705 MultiAddressSpaceLocker locker; 2706 VMCache* cache; 2707 VMArea* area; 2708 status_t status; 2709 AreaCacheLocker cacheLocker; 2710 bool isWritable; 2711 2712 bool restart; 2713 do { 2714 restart = false; 2715 2716 locker.Unset(); 2717 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2718 if (status != B_OK) 2719 return status; 2720 2721 cacheLocker.SetTo(cache, true); // already locked 2722 2723 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2724 || (area->protection & B_KERNEL_AREA) != 0)) { 2725 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2726 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2727 " (%s)\n", team, newProtection, areaID, area->name); 2728 return B_NOT_ALLOWED; 2729 } 2730 2731 if (area->protection == newProtection) 2732 return B_OK; 2733 2734 if (team != VMAddressSpace::KernelID() 2735 && area->address_space->ID() != team) { 2736 // unless you're the kernel, you are only allowed to set 2737 // the protection of your own areas 2738 return B_NOT_ALLOWED; 2739 } 2740 2741 isWritable 2742 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2743 2744 // Make sure the area (respectively, if we're going to call 2745 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2746 // wired ranges. 2747 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2748 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2749 otherArea = otherArea->cache_next) { 2750 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2751 restart = true; 2752 break; 2753 } 2754 } 2755 } else { 2756 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2757 restart = true; 2758 } 2759 } while (restart); 2760 2761 bool changePageProtection = true; 2762 bool changeTopCachePagesOnly = false; 2763 2764 if (isWritable && !becomesWritable) { 2765 // writable -> !writable 2766 2767 if (cache->source != NULL && cache->temporary) { 2768 if (cache->CountWritableAreas(area) == 0) { 2769 // Since this cache now lives from the pages in its source cache, 2770 // we can change the cache's commitment to take only those pages 2771 // into account that really are in this cache. 2772 2773 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2774 team == VMAddressSpace::KernelID() 2775 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2776 2777 // TODO: we may be able to join with our source cache, if 2778 // count == 0 2779 } 2780 } 2781 2782 // If only the writability changes, we can just remap the pages of the 2783 // top cache, since the pages of lower caches are mapped read-only 2784 // anyway. That's advantageous only, if the number of pages in the cache 2785 // is significantly smaller than the number of pages in the area, 2786 // though. 2787 if (newProtection 2788 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2789 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2790 changeTopCachePagesOnly = true; 2791 } 2792 } else if (!isWritable && becomesWritable) { 2793 // !writable -> writable 2794 2795 if (!cache->consumers.IsEmpty()) { 2796 // There are consumers -- we have to insert a new cache. Fortunately 2797 // vm_copy_on_write_area() does everything that's needed. 2798 changePageProtection = false; 2799 status = vm_copy_on_write_area(cache, NULL); 2800 } else { 2801 // No consumers, so we don't need to insert a new one. 2802 if (cache->source != NULL && cache->temporary) { 2803 // the cache's commitment must contain all possible pages 2804 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2805 team == VMAddressSpace::KernelID() 2806 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2807 } 2808 2809 if (status == B_OK && cache->source != NULL) { 2810 // There's a source cache, hence we can't just change all pages' 2811 // protection or we might allow writing into pages belonging to 2812 // a lower cache. 2813 changeTopCachePagesOnly = true; 2814 } 2815 } 2816 } else { 2817 // we don't have anything special to do in all other cases 2818 } 2819 2820 if (status == B_OK) { 2821 // remap existing pages in this cache 2822 if (changePageProtection) { 2823 VMTranslationMap* map = area->address_space->TranslationMap(); 2824 map->Lock(); 2825 2826 if (changeTopCachePagesOnly) { 2827 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2828 page_num_t lastPageOffset 2829 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2830 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2831 vm_page* page = it.Next();) { 2832 if (page->cache_offset >= firstPageOffset 2833 && page->cache_offset <= lastPageOffset) { 2834 addr_t address = virtual_page_address(area, page); 2835 map->ProtectPage(area, address, newProtection); 2836 } 2837 } 2838 } else 2839 map->ProtectArea(area, newProtection); 2840 2841 map->Unlock(); 2842 } 2843 2844 area->protection = newProtection; 2845 } 2846 2847 return status; 2848 } 2849 2850 2851 status_t 2852 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2853 { 2854 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2855 if (addressSpace == NULL) 2856 return B_BAD_TEAM_ID; 2857 2858 VMTranslationMap* map = addressSpace->TranslationMap(); 2859 2860 map->Lock(); 2861 uint32 dummyFlags; 2862 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2863 map->Unlock(); 2864 2865 addressSpace->Put(); 2866 return status; 2867 } 2868 2869 2870 /*! The page's cache must be locked. 2871 */ 2872 bool 2873 vm_test_map_modification(vm_page* page) 2874 { 2875 if (page->modified) 2876 return true; 2877 2878 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2879 vm_page_mapping* mapping; 2880 while ((mapping = iterator.Next()) != NULL) { 2881 VMArea* area = mapping->area; 2882 VMTranslationMap* map = area->address_space->TranslationMap(); 2883 2884 phys_addr_t physicalAddress; 2885 uint32 flags; 2886 map->Lock(); 2887 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2888 map->Unlock(); 2889 2890 if ((flags & PAGE_MODIFIED) != 0) 2891 return true; 2892 } 2893 2894 return false; 2895 } 2896 2897 2898 /*! The page's cache must be locked. 2899 */ 2900 void 2901 vm_clear_map_flags(vm_page* page, uint32 flags) 2902 { 2903 if ((flags & PAGE_ACCESSED) != 0) 2904 page->accessed = false; 2905 if ((flags & PAGE_MODIFIED) != 0) 2906 page->modified = false; 2907 2908 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2909 vm_page_mapping* mapping; 2910 while ((mapping = iterator.Next()) != NULL) { 2911 VMArea* area = mapping->area; 2912 VMTranslationMap* map = area->address_space->TranslationMap(); 2913 2914 map->Lock(); 2915 map->ClearFlags(virtual_page_address(area, page), flags); 2916 map->Unlock(); 2917 } 2918 } 2919 2920 2921 /*! Removes all mappings from a page. 2922 After you've called this function, the page is unmapped from memory and 2923 the page's \c accessed and \c modified flags have been updated according 2924 to the state of the mappings. 2925 The page's cache must be locked. 2926 */ 2927 void 2928 vm_remove_all_page_mappings(vm_page* page) 2929 { 2930 while (vm_page_mapping* mapping = page->mappings.Head()) { 2931 VMArea* area = mapping->area; 2932 VMTranslationMap* map = area->address_space->TranslationMap(); 2933 addr_t address = virtual_page_address(area, page); 2934 map->UnmapPage(area, address, false); 2935 } 2936 } 2937 2938 2939 int32 2940 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2941 { 2942 int32 count = 0; 2943 2944 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2945 vm_page_mapping* mapping; 2946 while ((mapping = iterator.Next()) != NULL) { 2947 VMArea* area = mapping->area; 2948 VMTranslationMap* map = area->address_space->TranslationMap(); 2949 2950 bool modified; 2951 if (map->ClearAccessedAndModified(area, 2952 virtual_page_address(area, page), false, modified)) { 2953 count++; 2954 } 2955 2956 page->modified |= modified; 2957 } 2958 2959 2960 if (page->accessed) { 2961 count++; 2962 page->accessed = false; 2963 } 2964 2965 return count; 2966 } 2967 2968 2969 /*! Removes all mappings of a page and/or clears the accessed bits of the 2970 mappings. 2971 The function iterates through the page mappings and removes them until 2972 encountering one that has been accessed. From then on it will continue to 2973 iterate, but only clear the accessed flag of the mapping. The page's 2974 \c modified bit will be updated accordingly, the \c accessed bit will be 2975 cleared. 2976 \return The number of mapping accessed bits encountered, including the 2977 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2978 of the page have been removed. 2979 */ 2980 int32 2981 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2982 { 2983 ASSERT(page->WiredCount() == 0); 2984 2985 if (page->accessed) 2986 return vm_clear_page_mapping_accessed_flags(page); 2987 2988 while (vm_page_mapping* mapping = page->mappings.Head()) { 2989 VMArea* area = mapping->area; 2990 VMTranslationMap* map = area->address_space->TranslationMap(); 2991 addr_t address = virtual_page_address(area, page); 2992 bool modified = false; 2993 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2994 page->accessed = true; 2995 page->modified |= modified; 2996 return vm_clear_page_mapping_accessed_flags(page); 2997 } 2998 page->modified |= modified; 2999 } 3000 3001 return 0; 3002 } 3003 3004 3005 static int 3006 display_mem(int argc, char** argv) 3007 { 3008 bool physical = false; 3009 addr_t copyAddress; 3010 int32 displayWidth; 3011 int32 itemSize; 3012 int32 num = -1; 3013 addr_t address; 3014 int i = 1, j; 3015 3016 if (argc > 1 && argv[1][0] == '-') { 3017 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3018 physical = true; 3019 i++; 3020 } else 3021 i = 99; 3022 } 3023 3024 if (argc < i + 1 || argc > i + 2) { 3025 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3026 "\tdl - 8 bytes\n" 3027 "\tdw - 4 bytes\n" 3028 "\tds - 2 bytes\n" 3029 "\tdb - 1 byte\n" 3030 "\tstring - a whole string\n" 3031 " -p or --physical only allows memory from a single page to be " 3032 "displayed.\n"); 3033 return 0; 3034 } 3035 3036 address = parse_expression(argv[i]); 3037 3038 if (argc > i + 1) 3039 num = parse_expression(argv[i + 1]); 3040 3041 // build the format string 3042 if (strcmp(argv[0], "db") == 0) { 3043 itemSize = 1; 3044 displayWidth = 16; 3045 } else if (strcmp(argv[0], "ds") == 0) { 3046 itemSize = 2; 3047 displayWidth = 8; 3048 } else if (strcmp(argv[0], "dw") == 0) { 3049 itemSize = 4; 3050 displayWidth = 4; 3051 } else if (strcmp(argv[0], "dl") == 0) { 3052 itemSize = 8; 3053 displayWidth = 2; 3054 } else if (strcmp(argv[0], "string") == 0) { 3055 itemSize = 1; 3056 displayWidth = -1; 3057 } else { 3058 kprintf("display_mem called in an invalid way!\n"); 3059 return 0; 3060 } 3061 3062 if (num <= 0) 3063 num = displayWidth; 3064 3065 void* physicalPageHandle = NULL; 3066 3067 if (physical) { 3068 int32 offset = address & (B_PAGE_SIZE - 1); 3069 if (num * itemSize + offset > B_PAGE_SIZE) { 3070 num = (B_PAGE_SIZE - offset) / itemSize; 3071 kprintf("NOTE: number of bytes has been cut to page size\n"); 3072 } 3073 3074 address = ROUNDDOWN(address, B_PAGE_SIZE); 3075 3076 if (vm_get_physical_page_debug(address, ©Address, 3077 &physicalPageHandle) != B_OK) { 3078 kprintf("getting the hardware page failed."); 3079 return 0; 3080 } 3081 3082 address += offset; 3083 copyAddress += offset; 3084 } else 3085 copyAddress = address; 3086 3087 if (!strcmp(argv[0], "string")) { 3088 kprintf("%p \"", (char*)copyAddress); 3089 3090 // string mode 3091 for (i = 0; true; i++) { 3092 char c; 3093 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3094 != B_OK 3095 || c == '\0') { 3096 break; 3097 } 3098 3099 if (c == '\n') 3100 kprintf("\\n"); 3101 else if (c == '\t') 3102 kprintf("\\t"); 3103 else { 3104 if (!isprint(c)) 3105 c = '.'; 3106 3107 kprintf("%c", c); 3108 } 3109 } 3110 3111 kprintf("\"\n"); 3112 } else { 3113 // number mode 3114 for (i = 0; i < num; i++) { 3115 uint64 value; 3116 3117 if ((i % displayWidth) == 0) { 3118 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3119 if (i != 0) 3120 kprintf("\n"); 3121 3122 kprintf("[0x%lx] ", address + i * itemSize); 3123 3124 for (j = 0; j < displayed; j++) { 3125 char c; 3126 if (debug_memcpy(B_CURRENT_TEAM, &c, 3127 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3128 displayed = j; 3129 break; 3130 } 3131 if (!isprint(c)) 3132 c = '.'; 3133 3134 kprintf("%c", c); 3135 } 3136 if (num > displayWidth) { 3137 // make sure the spacing in the last line is correct 3138 for (j = displayed; j < displayWidth * itemSize; j++) 3139 kprintf(" "); 3140 } 3141 kprintf(" "); 3142 } 3143 3144 if (debug_memcpy(B_CURRENT_TEAM, &value, 3145 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3146 kprintf("read fault"); 3147 break; 3148 } 3149 3150 switch (itemSize) { 3151 case 1: 3152 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3153 break; 3154 case 2: 3155 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3156 break; 3157 case 4: 3158 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3159 break; 3160 case 8: 3161 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3162 break; 3163 } 3164 } 3165 3166 kprintf("\n"); 3167 } 3168 3169 if (physical) { 3170 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3171 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3172 } 3173 return 0; 3174 } 3175 3176 3177 static void 3178 dump_cache_tree_recursively(VMCache* cache, int level, 3179 VMCache* highlightCache) 3180 { 3181 // print this cache 3182 for (int i = 0; i < level; i++) 3183 kprintf(" "); 3184 if (cache == highlightCache) 3185 kprintf("%p <--\n", cache); 3186 else 3187 kprintf("%p\n", cache); 3188 3189 // recursively print its consumers 3190 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3191 VMCache* consumer = it.Next();) { 3192 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3193 } 3194 } 3195 3196 3197 static int 3198 dump_cache_tree(int argc, char** argv) 3199 { 3200 if (argc != 2 || !strcmp(argv[1], "--help")) { 3201 kprintf("usage: %s <address>\n", argv[0]); 3202 return 0; 3203 } 3204 3205 addr_t address = parse_expression(argv[1]); 3206 if (address == 0) 3207 return 0; 3208 3209 VMCache* cache = (VMCache*)address; 3210 VMCache* root = cache; 3211 3212 // find the root cache (the transitive source) 3213 while (root->source != NULL) 3214 root = root->source; 3215 3216 dump_cache_tree_recursively(root, 0, cache); 3217 3218 return 0; 3219 } 3220 3221 3222 const char* 3223 vm_cache_type_to_string(int32 type) 3224 { 3225 switch (type) { 3226 case CACHE_TYPE_RAM: 3227 return "RAM"; 3228 case CACHE_TYPE_DEVICE: 3229 return "device"; 3230 case CACHE_TYPE_VNODE: 3231 return "vnode"; 3232 case CACHE_TYPE_NULL: 3233 return "null"; 3234 3235 default: 3236 return "unknown"; 3237 } 3238 } 3239 3240 3241 #if DEBUG_CACHE_LIST 3242 3243 static void 3244 update_cache_info_recursively(VMCache* cache, cache_info& info) 3245 { 3246 info.page_count += cache->page_count; 3247 if (cache->type == CACHE_TYPE_RAM) 3248 info.committed += cache->committed_size; 3249 3250 // recurse 3251 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3252 VMCache* consumer = it.Next();) { 3253 update_cache_info_recursively(consumer, info); 3254 } 3255 } 3256 3257 3258 static int 3259 cache_info_compare_page_count(const void* _a, const void* _b) 3260 { 3261 const cache_info* a = (const cache_info*)_a; 3262 const cache_info* b = (const cache_info*)_b; 3263 if (a->page_count == b->page_count) 3264 return 0; 3265 return a->page_count < b->page_count ? 1 : -1; 3266 } 3267 3268 3269 static int 3270 cache_info_compare_committed(const void* _a, const void* _b) 3271 { 3272 const cache_info* a = (const cache_info*)_a; 3273 const cache_info* b = (const cache_info*)_b; 3274 if (a->committed == b->committed) 3275 return 0; 3276 return a->committed < b->committed ? 1 : -1; 3277 } 3278 3279 3280 static void 3281 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3282 { 3283 for (int i = 0; i < level; i++) 3284 kprintf(" "); 3285 3286 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3287 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3288 cache->virtual_base, cache->virtual_end, cache->page_count); 3289 3290 if (level == 0) 3291 kprintf("/%lu", info.page_count); 3292 3293 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3294 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3295 3296 if (level == 0) 3297 kprintf("/%lu", info.committed); 3298 } 3299 3300 // areas 3301 if (cache->areas != NULL) { 3302 VMArea* area = cache->areas; 3303 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3304 area->name, area->address_space->ID()); 3305 3306 while (area->cache_next != NULL) { 3307 area = area->cache_next; 3308 kprintf(", %" B_PRId32, area->id); 3309 } 3310 } 3311 3312 kputs("\n"); 3313 3314 // recurse 3315 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3316 VMCache* consumer = it.Next();) { 3317 dump_caches_recursively(consumer, info, level + 1); 3318 } 3319 } 3320 3321 3322 static int 3323 dump_caches(int argc, char** argv) 3324 { 3325 if (sCacheInfoTable == NULL) { 3326 kprintf("No cache info table!\n"); 3327 return 0; 3328 } 3329 3330 bool sortByPageCount = true; 3331 3332 for (int32 i = 1; i < argc; i++) { 3333 if (strcmp(argv[i], "-c") == 0) { 3334 sortByPageCount = false; 3335 } else { 3336 print_debugger_command_usage(argv[0]); 3337 return 0; 3338 } 3339 } 3340 3341 uint32 totalCount = 0; 3342 uint32 rootCount = 0; 3343 off_t totalCommitted = 0; 3344 page_num_t totalPages = 0; 3345 3346 VMCache* cache = gDebugCacheList; 3347 while (cache) { 3348 totalCount++; 3349 if (cache->source == NULL) { 3350 cache_info stackInfo; 3351 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3352 ? sCacheInfoTable[rootCount] : stackInfo; 3353 rootCount++; 3354 info.cache = cache; 3355 info.page_count = 0; 3356 info.committed = 0; 3357 update_cache_info_recursively(cache, info); 3358 totalCommitted += info.committed; 3359 totalPages += info.page_count; 3360 } 3361 3362 cache = cache->debug_next; 3363 } 3364 3365 if (rootCount <= (uint32)kCacheInfoTableCount) { 3366 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3367 sortByPageCount 3368 ? &cache_info_compare_page_count 3369 : &cache_info_compare_committed); 3370 } 3371 3372 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3373 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3374 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3375 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3376 "page count" : "committed size"); 3377 3378 if (rootCount <= (uint32)kCacheInfoTableCount) { 3379 for (uint32 i = 0; i < rootCount; i++) { 3380 cache_info& info = sCacheInfoTable[i]; 3381 dump_caches_recursively(info.cache, info, 0); 3382 } 3383 } else 3384 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3385 3386 return 0; 3387 } 3388 3389 #endif // DEBUG_CACHE_LIST 3390 3391 3392 static int 3393 dump_cache(int argc, char** argv) 3394 { 3395 VMCache* cache; 3396 bool showPages = false; 3397 int i = 1; 3398 3399 if (argc < 2 || !strcmp(argv[1], "--help")) { 3400 kprintf("usage: %s [-ps] <address>\n" 3401 " if -p is specified, all pages are shown, if -s is used\n" 3402 " only the cache info is shown respectively.\n", argv[0]); 3403 return 0; 3404 } 3405 while (argv[i][0] == '-') { 3406 char* arg = argv[i] + 1; 3407 while (arg[0]) { 3408 if (arg[0] == 'p') 3409 showPages = true; 3410 arg++; 3411 } 3412 i++; 3413 } 3414 if (argv[i] == NULL) { 3415 kprintf("%s: invalid argument, pass address\n", argv[0]); 3416 return 0; 3417 } 3418 3419 addr_t address = parse_expression(argv[i]); 3420 if (address == 0) 3421 return 0; 3422 3423 cache = (VMCache*)address; 3424 3425 cache->Dump(showPages); 3426 3427 set_debug_variable("_sourceCache", (addr_t)cache->source); 3428 3429 return 0; 3430 } 3431 3432 3433 static void 3434 dump_area_struct(VMArea* area, bool mappings) 3435 { 3436 kprintf("AREA: %p\n", area); 3437 kprintf("name:\t\t'%s'\n", area->name); 3438 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3439 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3440 kprintf("base:\t\t0x%lx\n", area->Base()); 3441 kprintf("size:\t\t0x%lx\n", area->Size()); 3442 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3443 kprintf("page_protection:%p\n", area->page_protections); 3444 kprintf("wiring:\t\t0x%x\n", area->wiring); 3445 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3446 kprintf("cache:\t\t%p\n", area->cache); 3447 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3448 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3449 kprintf("cache_next:\t%p\n", area->cache_next); 3450 kprintf("cache_prev:\t%p\n", area->cache_prev); 3451 3452 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3453 if (mappings) { 3454 kprintf("page mappings:\n"); 3455 while (iterator.HasNext()) { 3456 vm_page_mapping* mapping = iterator.Next(); 3457 kprintf(" %p", mapping->page); 3458 } 3459 kprintf("\n"); 3460 } else { 3461 uint32 count = 0; 3462 while (iterator.Next() != NULL) { 3463 count++; 3464 } 3465 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3466 } 3467 } 3468 3469 3470 static int 3471 dump_area(int argc, char** argv) 3472 { 3473 bool mappings = false; 3474 bool found = false; 3475 int32 index = 1; 3476 VMArea* area; 3477 addr_t num; 3478 3479 if (argc < 2 || !strcmp(argv[1], "--help")) { 3480 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3481 "All areas matching either id/address/name are listed. You can\n" 3482 "force to check only a specific item by prefixing the specifier\n" 3483 "with the id/contains/address/name keywords.\n" 3484 "-m shows the area's mappings as well.\n"); 3485 return 0; 3486 } 3487 3488 if (!strcmp(argv[1], "-m")) { 3489 mappings = true; 3490 index++; 3491 } 3492 3493 int32 mode = 0xf; 3494 if (!strcmp(argv[index], "id")) 3495 mode = 1; 3496 else if (!strcmp(argv[index], "contains")) 3497 mode = 2; 3498 else if (!strcmp(argv[index], "name")) 3499 mode = 4; 3500 else if (!strcmp(argv[index], "address")) 3501 mode = 0; 3502 if (mode != 0xf) 3503 index++; 3504 3505 if (index >= argc) { 3506 kprintf("No area specifier given.\n"); 3507 return 0; 3508 } 3509 3510 num = parse_expression(argv[index]); 3511 3512 if (mode == 0) { 3513 dump_area_struct((struct VMArea*)num, mappings); 3514 } else { 3515 // walk through the area list, looking for the arguments as a name 3516 3517 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3518 while ((area = it.Next()) != NULL) { 3519 if (((mode & 4) != 0 3520 && !strcmp(argv[index], area->name)) 3521 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3522 || (((mode & 2) != 0 && area->Base() <= num 3523 && area->Base() + area->Size() > num))))) { 3524 dump_area_struct(area, mappings); 3525 found = true; 3526 } 3527 } 3528 3529 if (!found) 3530 kprintf("could not find area %s (%ld)\n", argv[index], num); 3531 } 3532 3533 return 0; 3534 } 3535 3536 3537 static int 3538 dump_area_list(int argc, char** argv) 3539 { 3540 VMArea* area; 3541 const char* name = NULL; 3542 int32 id = 0; 3543 3544 if (argc > 1) { 3545 id = parse_expression(argv[1]); 3546 if (id == 0) 3547 name = argv[1]; 3548 } 3549 3550 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3551 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3552 B_PRINTF_POINTER_WIDTH, "size"); 3553 3554 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3555 while ((area = it.Next()) != NULL) { 3556 if ((id != 0 && area->address_space->ID() != id) 3557 || (name != NULL && strstr(area->name, name) == NULL)) 3558 continue; 3559 3560 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3561 area->id, (void*)area->Base(), (void*)area->Size(), 3562 area->protection, area->wiring, area->name); 3563 } 3564 return 0; 3565 } 3566 3567 3568 static int 3569 dump_available_memory(int argc, char** argv) 3570 { 3571 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3572 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3573 return 0; 3574 } 3575 3576 3577 static int 3578 dump_mapping_info(int argc, char** argv) 3579 { 3580 bool reverseLookup = false; 3581 bool pageLookup = false; 3582 3583 int argi = 1; 3584 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3585 const char* arg = argv[argi]; 3586 if (strcmp(arg, "-r") == 0) { 3587 reverseLookup = true; 3588 } else if (strcmp(arg, "-p") == 0) { 3589 reverseLookup = true; 3590 pageLookup = true; 3591 } else { 3592 print_debugger_command_usage(argv[0]); 3593 return 0; 3594 } 3595 } 3596 3597 // We need at least one argument, the address. Optionally a thread ID can be 3598 // specified. 3599 if (argi >= argc || argi + 2 < argc) { 3600 print_debugger_command_usage(argv[0]); 3601 return 0; 3602 } 3603 3604 uint64 addressValue; 3605 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3606 return 0; 3607 3608 Team* team = NULL; 3609 if (argi < argc) { 3610 uint64 threadID; 3611 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3612 return 0; 3613 3614 Thread* thread = Thread::GetDebug(threadID); 3615 if (thread == NULL) { 3616 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3617 return 0; 3618 } 3619 3620 team = thread->team; 3621 } 3622 3623 if (reverseLookup) { 3624 phys_addr_t physicalAddress; 3625 if (pageLookup) { 3626 vm_page* page = (vm_page*)(addr_t)addressValue; 3627 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3628 } else { 3629 physicalAddress = (phys_addr_t)addressValue; 3630 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3631 } 3632 3633 kprintf(" Team Virtual Address Area\n"); 3634 kprintf("--------------------------------------\n"); 3635 3636 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3637 Callback() 3638 : 3639 fAddressSpace(NULL) 3640 { 3641 } 3642 3643 void SetAddressSpace(VMAddressSpace* addressSpace) 3644 { 3645 fAddressSpace = addressSpace; 3646 } 3647 3648 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3649 { 3650 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3651 virtualAddress); 3652 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3653 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3654 else 3655 kprintf("\n"); 3656 return false; 3657 } 3658 3659 private: 3660 VMAddressSpace* fAddressSpace; 3661 } callback; 3662 3663 if (team != NULL) { 3664 // team specified -- get its address space 3665 VMAddressSpace* addressSpace = team->address_space; 3666 if (addressSpace == NULL) { 3667 kprintf("Failed to get address space!\n"); 3668 return 0; 3669 } 3670 3671 callback.SetAddressSpace(addressSpace); 3672 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3673 physicalAddress, callback); 3674 } else { 3675 // no team specified -- iterate through all address spaces 3676 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3677 addressSpace != NULL; 3678 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3679 callback.SetAddressSpace(addressSpace); 3680 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3681 physicalAddress, callback); 3682 } 3683 } 3684 } else { 3685 // get the address space 3686 addr_t virtualAddress = (addr_t)addressValue; 3687 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3688 VMAddressSpace* addressSpace; 3689 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3690 addressSpace = VMAddressSpace::Kernel(); 3691 } else if (team != NULL) { 3692 addressSpace = team->address_space; 3693 } else { 3694 Thread* thread = debug_get_debugged_thread(); 3695 if (thread == NULL || thread->team == NULL) { 3696 kprintf("Failed to get team!\n"); 3697 return 0; 3698 } 3699 3700 addressSpace = thread->team->address_space; 3701 } 3702 3703 if (addressSpace == NULL) { 3704 kprintf("Failed to get address space!\n"); 3705 return 0; 3706 } 3707 3708 // let the translation map implementation do the job 3709 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3710 } 3711 3712 return 0; 3713 } 3714 3715 3716 /*! Deletes all areas and reserved regions in the given address space. 3717 3718 The caller must ensure that none of the areas has any wired ranges. 3719 3720 \param addressSpace The address space. 3721 \param deletingAddressSpace \c true, if the address space is in the process 3722 of being deleted. 3723 */ 3724 void 3725 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3726 { 3727 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3728 addressSpace->ID())); 3729 3730 addressSpace->WriteLock(); 3731 3732 // remove all reserved areas in this address space 3733 addressSpace->UnreserveAllAddressRanges(0); 3734 3735 // delete all the areas in this address space 3736 while (VMArea* area = addressSpace->FirstArea()) { 3737 ASSERT(!area->IsWired()); 3738 delete_area(addressSpace, area, deletingAddressSpace); 3739 } 3740 3741 addressSpace->WriteUnlock(); 3742 } 3743 3744 3745 static area_id 3746 vm_area_for(addr_t address, bool kernel) 3747 { 3748 team_id team; 3749 if (IS_USER_ADDRESS(address)) { 3750 // we try the user team address space, if any 3751 team = VMAddressSpace::CurrentID(); 3752 if (team < 0) 3753 return team; 3754 } else 3755 team = VMAddressSpace::KernelID(); 3756 3757 AddressSpaceReadLocker locker(team); 3758 if (!locker.IsLocked()) 3759 return B_BAD_TEAM_ID; 3760 3761 VMArea* area = locker.AddressSpace()->LookupArea(address); 3762 if (area != NULL) { 3763 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3764 return B_ERROR; 3765 3766 return area->id; 3767 } 3768 3769 return B_ERROR; 3770 } 3771 3772 3773 /*! Frees physical pages that were used during the boot process. 3774 \a end is inclusive. 3775 */ 3776 static void 3777 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3778 { 3779 // free all physical pages in the specified range 3780 3781 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3782 phys_addr_t physicalAddress; 3783 uint32 flags; 3784 3785 if (map->Query(current, &physicalAddress, &flags) == B_OK 3786 && (flags & PAGE_PRESENT) != 0) { 3787 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3788 if (page != NULL && page->State() != PAGE_STATE_FREE 3789 && page->State() != PAGE_STATE_CLEAR 3790 && page->State() != PAGE_STATE_UNUSED) { 3791 DEBUG_PAGE_ACCESS_START(page); 3792 vm_page_set_state(page, PAGE_STATE_FREE); 3793 } 3794 } 3795 } 3796 3797 // unmap the memory 3798 map->Unmap(start, end); 3799 } 3800 3801 3802 void 3803 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3804 { 3805 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3806 addr_t end = start + (size - 1); 3807 addr_t lastEnd = start; 3808 3809 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3810 (void*)start, (void*)end)); 3811 3812 // The areas are sorted in virtual address space order, so 3813 // we just have to find the holes between them that fall 3814 // into the area we should dispose 3815 3816 map->Lock(); 3817 3818 for (VMAddressSpace::AreaIterator it 3819 = VMAddressSpace::Kernel()->GetAreaIterator(); 3820 VMArea* area = it.Next();) { 3821 addr_t areaStart = area->Base(); 3822 addr_t areaEnd = areaStart + (area->Size() - 1); 3823 3824 if (areaEnd < start) 3825 continue; 3826 3827 if (areaStart > end) { 3828 // we are done, the area is already beyond of what we have to free 3829 break; 3830 } 3831 3832 if (areaStart > lastEnd) { 3833 // this is something we can free 3834 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3835 (void*)areaStart)); 3836 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3837 } 3838 3839 if (areaEnd >= end) { 3840 lastEnd = areaEnd; 3841 // no +1 to prevent potential overflow 3842 break; 3843 } 3844 3845 lastEnd = areaEnd + 1; 3846 } 3847 3848 if (lastEnd < end) { 3849 // we can also get rid of some space at the end of the area 3850 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3851 (void*)end)); 3852 unmap_and_free_physical_pages(map, lastEnd, end); 3853 } 3854 3855 map->Unlock(); 3856 } 3857 3858 3859 static void 3860 create_preloaded_image_areas(struct preloaded_image* _image) 3861 { 3862 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3863 char name[B_OS_NAME_LENGTH]; 3864 void* address; 3865 int32 length; 3866 3867 // use file name to create a good area name 3868 char* fileName = strrchr(image->name, '/'); 3869 if (fileName == NULL) 3870 fileName = image->name; 3871 else 3872 fileName++; 3873 3874 length = strlen(fileName); 3875 // make sure there is enough space for the suffix 3876 if (length > 25) 3877 length = 25; 3878 3879 memcpy(name, fileName, length); 3880 strcpy(name + length, "_text"); 3881 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3882 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3883 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3884 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3885 // this will later be remapped read-only/executable by the 3886 // ELF initialization code 3887 3888 strcpy(name + length, "_data"); 3889 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3890 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3891 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3892 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3893 } 3894 3895 3896 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3897 Any boot loader resources contained in that arguments must not be accessed 3898 anymore past this point. 3899 */ 3900 void 3901 vm_free_kernel_args(kernel_args* args) 3902 { 3903 uint32 i; 3904 3905 TRACE(("vm_free_kernel_args()\n")); 3906 3907 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3908 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3909 if (area >= B_OK) 3910 delete_area(area); 3911 } 3912 } 3913 3914 3915 static void 3916 allocate_kernel_args(kernel_args* args) 3917 { 3918 TRACE(("allocate_kernel_args()\n")); 3919 3920 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3921 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3922 3923 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3924 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3925 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3926 } 3927 } 3928 3929 3930 static void 3931 unreserve_boot_loader_ranges(kernel_args* args) 3932 { 3933 TRACE(("unreserve_boot_loader_ranges()\n")); 3934 3935 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3936 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3937 (void*)(addr_t)args->virtual_allocated_range[i].start, 3938 args->virtual_allocated_range[i].size); 3939 } 3940 } 3941 3942 3943 static void 3944 reserve_boot_loader_ranges(kernel_args* args) 3945 { 3946 TRACE(("reserve_boot_loader_ranges()\n")); 3947 3948 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3949 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3950 3951 // If the address is no kernel address, we just skip it. The 3952 // architecture specific code has to deal with it. 3953 if (!IS_KERNEL_ADDRESS(address)) { 3954 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3955 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3956 continue; 3957 } 3958 3959 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3960 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3961 if (status < B_OK) 3962 panic("could not reserve boot loader ranges\n"); 3963 } 3964 } 3965 3966 3967 static addr_t 3968 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3969 { 3970 size = PAGE_ALIGN(size); 3971 3972 // find a slot in the virtual allocation addr range 3973 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3974 // check to see if the space between this one and the last is big enough 3975 addr_t rangeStart = args->virtual_allocated_range[i].start; 3976 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3977 + args->virtual_allocated_range[i - 1].size; 3978 3979 addr_t base = alignment > 0 3980 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3981 3982 if (base >= KERNEL_BASE && base < rangeStart 3983 && rangeStart - base >= size) { 3984 args->virtual_allocated_range[i - 1].size 3985 += base + size - previousRangeEnd; 3986 return base; 3987 } 3988 } 3989 3990 // we hadn't found one between allocation ranges. this is ok. 3991 // see if there's a gap after the last one 3992 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3993 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3994 + args->virtual_allocated_range[lastEntryIndex].size; 3995 addr_t base = alignment > 0 3996 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3997 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3998 args->virtual_allocated_range[lastEntryIndex].size 3999 += base + size - lastRangeEnd; 4000 return base; 4001 } 4002 4003 // see if there's a gap before the first one 4004 addr_t rangeStart = args->virtual_allocated_range[0].start; 4005 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4006 base = rangeStart - size; 4007 if (alignment > 0) 4008 base = ROUNDDOWN(base, alignment); 4009 4010 if (base >= KERNEL_BASE) { 4011 args->virtual_allocated_range[0].start = base; 4012 args->virtual_allocated_range[0].size += rangeStart - base; 4013 return base; 4014 } 4015 } 4016 4017 return 0; 4018 } 4019 4020 4021 static bool 4022 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4023 { 4024 // TODO: horrible brute-force method of determining if the page can be 4025 // allocated 4026 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4027 if (address >= args->physical_memory_range[i].start 4028 && address < args->physical_memory_range[i].start 4029 + args->physical_memory_range[i].size) 4030 return true; 4031 } 4032 return false; 4033 } 4034 4035 4036 page_num_t 4037 vm_allocate_early_physical_page(kernel_args* args) 4038 { 4039 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4040 phys_addr_t nextPage; 4041 4042 nextPage = args->physical_allocated_range[i].start 4043 + args->physical_allocated_range[i].size; 4044 // see if the page after the next allocated paddr run can be allocated 4045 if (i + 1 < args->num_physical_allocated_ranges 4046 && args->physical_allocated_range[i + 1].size != 0) { 4047 // see if the next page will collide with the next allocated range 4048 if (nextPage >= args->physical_allocated_range[i+1].start) 4049 continue; 4050 } 4051 // see if the next physical page fits in the memory block 4052 if (is_page_in_physical_memory_range(args, nextPage)) { 4053 // we got one! 4054 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4055 return nextPage / B_PAGE_SIZE; 4056 } 4057 } 4058 4059 // Expanding upwards didn't work, try going downwards. 4060 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4061 phys_addr_t nextPage; 4062 4063 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4064 // see if the page after the prev allocated paddr run can be allocated 4065 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4066 // see if the next page will collide with the next allocated range 4067 if (nextPage < args->physical_allocated_range[i-1].start 4068 + args->physical_allocated_range[i-1].size) 4069 continue; 4070 } 4071 // see if the next physical page fits in the memory block 4072 if (is_page_in_physical_memory_range(args, nextPage)) { 4073 // we got one! 4074 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4075 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4076 return nextPage / B_PAGE_SIZE; 4077 } 4078 } 4079 4080 return 0; 4081 // could not allocate a block 4082 } 4083 4084 4085 /*! This one uses the kernel_args' physical and virtual memory ranges to 4086 allocate some pages before the VM is completely up. 4087 */ 4088 addr_t 4089 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4090 uint32 attributes, addr_t alignment) 4091 { 4092 if (physicalSize > virtualSize) 4093 physicalSize = virtualSize; 4094 4095 // find the vaddr to allocate at 4096 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4097 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4098 if (virtualBase == 0) { 4099 panic("vm_allocate_early: could not allocate virtual address\n"); 4100 return 0; 4101 } 4102 4103 // map the pages 4104 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4105 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4106 if (physicalAddress == 0) 4107 panic("error allocating early page!\n"); 4108 4109 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4110 4111 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4112 physicalAddress * B_PAGE_SIZE, attributes, 4113 &vm_allocate_early_physical_page); 4114 } 4115 4116 return virtualBase; 4117 } 4118 4119 4120 /*! The main entrance point to initialize the VM. */ 4121 status_t 4122 vm_init(kernel_args* args) 4123 { 4124 struct preloaded_image* image; 4125 void* address; 4126 status_t err = 0; 4127 uint32 i; 4128 4129 TRACE(("vm_init: entry\n")); 4130 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4131 err = arch_vm_init(args); 4132 4133 // initialize some globals 4134 vm_page_init_num_pages(args); 4135 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4136 4137 slab_init(args); 4138 4139 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4140 off_t heapSize = INITIAL_HEAP_SIZE; 4141 // try to accomodate low memory systems 4142 while (heapSize > sAvailableMemory / 8) 4143 heapSize /= 2; 4144 if (heapSize < 1024 * 1024) 4145 panic("vm_init: go buy some RAM please."); 4146 4147 // map in the new heap and initialize it 4148 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4149 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4150 TRACE(("heap at 0x%lx\n", heapBase)); 4151 heap_init(heapBase, heapSize); 4152 #endif 4153 4154 // initialize the free page list and physical page mapper 4155 vm_page_init(args); 4156 4157 // initialize the cache allocators 4158 vm_cache_init(args); 4159 4160 { 4161 status_t error = VMAreaHash::Init(); 4162 if (error != B_OK) 4163 panic("vm_init: error initializing area hash table\n"); 4164 } 4165 4166 VMAddressSpace::Init(); 4167 reserve_boot_loader_ranges(args); 4168 4169 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4170 heap_init_post_area(); 4171 #endif 4172 4173 // Do any further initialization that the architecture dependant layers may 4174 // need now 4175 arch_vm_translation_map_init_post_area(args); 4176 arch_vm_init_post_area(args); 4177 vm_page_init_post_area(args); 4178 slab_init_post_area(); 4179 4180 // allocate areas to represent stuff that already exists 4181 4182 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4183 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4184 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4185 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4186 #endif 4187 4188 allocate_kernel_args(args); 4189 4190 create_preloaded_image_areas(args->kernel_image); 4191 4192 // allocate areas for preloaded images 4193 for (image = args->preloaded_images; image != NULL; image = image->next) 4194 create_preloaded_image_areas(image); 4195 4196 // allocate kernel stacks 4197 for (i = 0; i < args->num_cpus; i++) { 4198 char name[64]; 4199 4200 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4201 address = (void*)args->cpu_kstack[i].start; 4202 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4203 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4204 } 4205 4206 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4207 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4208 4209 #if PARANOID_KERNEL_MALLOC 4210 vm_block_address_range("uninitialized heap memory", 4211 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4212 #endif 4213 #if PARANOID_KERNEL_FREE 4214 vm_block_address_range("freed heap memory", 4215 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4216 #endif 4217 4218 // create the object cache for the page mappings 4219 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4220 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4221 NULL, NULL); 4222 if (gPageMappingsObjectCache == NULL) 4223 panic("failed to create page mappings object cache"); 4224 4225 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4226 4227 #if DEBUG_CACHE_LIST 4228 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4229 virtual_address_restrictions virtualRestrictions = {}; 4230 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4231 physical_address_restrictions physicalRestrictions = {}; 4232 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4233 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4234 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4235 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4236 &physicalRestrictions, (void**)&sCacheInfoTable); 4237 } 4238 #endif // DEBUG_CACHE_LIST 4239 4240 // add some debugger commands 4241 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4242 add_debugger_command("area", &dump_area, 4243 "Dump info about a particular area"); 4244 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4245 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4246 #if DEBUG_CACHE_LIST 4247 if (sCacheInfoTable != NULL) { 4248 add_debugger_command_etc("caches", &dump_caches, 4249 "List all VMCache trees", 4250 "[ \"-c\" ]\n" 4251 "All cache trees are listed sorted in decreasing order by number " 4252 "of\n" 4253 "used pages or, if \"-c\" is specified, by size of committed " 4254 "memory.\n", 4255 0); 4256 } 4257 #endif 4258 add_debugger_command("avail", &dump_available_memory, 4259 "Dump available memory"); 4260 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4261 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4262 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4263 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4264 add_debugger_command("string", &display_mem, "dump strings"); 4265 4266 add_debugger_command_etc("mapping", &dump_mapping_info, 4267 "Print address mapping information", 4268 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4269 "Prints low-level page mapping information for a given address. If\n" 4270 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4271 "address that is looked up in the translation map of the current\n" 4272 "team, respectively the team specified by thread ID <thread ID>. If\n" 4273 "\"-r\" is specified, <address> is a physical address that is\n" 4274 "searched in the translation map of all teams, respectively the team\n" 4275 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4276 "<address> is the address of a vm_page structure. The behavior is\n" 4277 "equivalent to specifying \"-r\" with the physical address of that\n" 4278 "page.\n", 4279 0); 4280 4281 TRACE(("vm_init: exit\n")); 4282 4283 vm_cache_init_post_heap(); 4284 4285 return err; 4286 } 4287 4288 4289 status_t 4290 vm_init_post_sem(kernel_args* args) 4291 { 4292 // This frees all unused boot loader resources and makes its space available 4293 // again 4294 arch_vm_init_end(args); 4295 unreserve_boot_loader_ranges(args); 4296 4297 // fill in all of the semaphores that were not allocated before 4298 // since we're still single threaded and only the kernel address space 4299 // exists, it isn't that hard to find all of the ones we need to create 4300 4301 arch_vm_translation_map_init_post_sem(args); 4302 4303 slab_init_post_sem(); 4304 4305 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4306 heap_init_post_sem(); 4307 #endif 4308 4309 return B_OK; 4310 } 4311 4312 4313 status_t 4314 vm_init_post_thread(kernel_args* args) 4315 { 4316 vm_page_init_post_thread(args); 4317 slab_init_post_thread(); 4318 return heap_init_post_thread(); 4319 } 4320 4321 4322 status_t 4323 vm_init_post_modules(kernel_args* args) 4324 { 4325 return arch_vm_init_post_modules(args); 4326 } 4327 4328 4329 void 4330 permit_page_faults(void) 4331 { 4332 Thread* thread = thread_get_current_thread(); 4333 if (thread != NULL) 4334 atomic_add(&thread->page_faults_allowed, 1); 4335 } 4336 4337 4338 void 4339 forbid_page_faults(void) 4340 { 4341 Thread* thread = thread_get_current_thread(); 4342 if (thread != NULL) 4343 atomic_add(&thread->page_faults_allowed, -1); 4344 } 4345 4346 4347 status_t 4348 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4349 bool isUser, addr_t* newIP) 4350 { 4351 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4352 faultAddress)); 4353 4354 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4355 4356 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4357 VMAddressSpace* addressSpace = NULL; 4358 4359 status_t status = B_OK; 4360 *newIP = 0; 4361 atomic_add((int32*)&sPageFaults, 1); 4362 4363 if (IS_KERNEL_ADDRESS(pageAddress)) { 4364 addressSpace = VMAddressSpace::GetKernel(); 4365 } else if (IS_USER_ADDRESS(pageAddress)) { 4366 addressSpace = VMAddressSpace::GetCurrent(); 4367 if (addressSpace == NULL) { 4368 if (!isUser) { 4369 dprintf("vm_page_fault: kernel thread accessing invalid user " 4370 "memory!\n"); 4371 status = B_BAD_ADDRESS; 4372 TPF(PageFaultError(-1, 4373 VMPageFaultTracing 4374 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4375 } else { 4376 // XXX weird state. 4377 panic("vm_page_fault: non kernel thread accessing user memory " 4378 "that doesn't exist!\n"); 4379 status = B_BAD_ADDRESS; 4380 } 4381 } 4382 } else { 4383 // the hit was probably in the 64k DMZ between kernel and user space 4384 // this keeps a user space thread from passing a buffer that crosses 4385 // into kernel space 4386 status = B_BAD_ADDRESS; 4387 TPF(PageFaultError(-1, 4388 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4389 } 4390 4391 if (status == B_OK) { 4392 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4393 isUser, NULL); 4394 } 4395 4396 if (status < B_OK) { 4397 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4398 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n", 4399 strerror(status), address, faultAddress, isWrite, isUser, 4400 thread_get_current_thread_id()); 4401 if (!isUser) { 4402 Thread* thread = thread_get_current_thread(); 4403 if (thread != NULL && thread->fault_handler != 0) { 4404 // this will cause the arch dependant page fault handler to 4405 // modify the IP on the interrupt frame or whatever to return 4406 // to this address 4407 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4408 } else { 4409 // unhandled page fault in the kernel 4410 panic("vm_page_fault: unhandled page fault in kernel space at " 4411 "0x%lx, ip 0x%lx\n", address, faultAddress); 4412 } 4413 } else { 4414 Thread* thread = thread_get_current_thread(); 4415 4416 #ifdef TRACE_FAULTS 4417 VMArea* area = NULL; 4418 if (addressSpace != NULL) { 4419 addressSpace->ReadLock(); 4420 area = addressSpace->LookupArea(faultAddress); 4421 } 4422 4423 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4424 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4425 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4426 thread->team->Name(), thread->team->id, 4427 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4428 faultAddress, area ? area->name : "???", faultAddress - (area ? 4429 area->Base() : 0x0)); 4430 4431 if (addressSpace != NULL) 4432 addressSpace->ReadUnlock(); 4433 #endif 4434 4435 // If the thread has a signal handler for SIGSEGV, we simply 4436 // send it the signal. Otherwise we notify the user debugger 4437 // first. 4438 struct sigaction action; 4439 if ((sigaction(SIGSEGV, NULL, &action) == 0 4440 && action.sa_handler != SIG_DFL 4441 && action.sa_handler != SIG_IGN) 4442 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4443 SIGSEGV)) { 4444 Signal signal(SIGSEGV, 4445 status == B_PERMISSION_DENIED 4446 ? SEGV_ACCERR : SEGV_MAPERR, 4447 EFAULT, thread->team->id); 4448 signal.SetAddress((void*)address); 4449 send_signal_to_thread(thread, signal, 0); 4450 } 4451 } 4452 } 4453 4454 if (addressSpace != NULL) 4455 addressSpace->Put(); 4456 4457 return B_HANDLED_INTERRUPT; 4458 } 4459 4460 4461 struct PageFaultContext { 4462 AddressSpaceReadLocker addressSpaceLocker; 4463 VMCacheChainLocker cacheChainLocker; 4464 4465 VMTranslationMap* map; 4466 VMCache* topCache; 4467 off_t cacheOffset; 4468 vm_page_reservation reservation; 4469 bool isWrite; 4470 4471 // return values 4472 vm_page* page; 4473 bool restart; 4474 bool pageAllocated; 4475 4476 4477 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4478 : 4479 addressSpaceLocker(addressSpace, true), 4480 map(addressSpace->TranslationMap()), 4481 isWrite(isWrite) 4482 { 4483 } 4484 4485 ~PageFaultContext() 4486 { 4487 UnlockAll(); 4488 vm_page_unreserve_pages(&reservation); 4489 } 4490 4491 void Prepare(VMCache* topCache, off_t cacheOffset) 4492 { 4493 this->topCache = topCache; 4494 this->cacheOffset = cacheOffset; 4495 page = NULL; 4496 restart = false; 4497 pageAllocated = false; 4498 4499 cacheChainLocker.SetTo(topCache); 4500 } 4501 4502 void UnlockAll(VMCache* exceptCache = NULL) 4503 { 4504 topCache = NULL; 4505 addressSpaceLocker.Unlock(); 4506 cacheChainLocker.Unlock(exceptCache); 4507 } 4508 }; 4509 4510 4511 /*! Gets the page that should be mapped into the area. 4512 Returns an error code other than \c B_OK, if the page couldn't be found or 4513 paged in. The locking state of the address space and the caches is undefined 4514 in that case. 4515 Returns \c B_OK with \c context.restart set to \c true, if the functions 4516 had to unlock the address space and all caches and is supposed to be called 4517 again. 4518 Returns \c B_OK with \c context.restart set to \c false, if the page was 4519 found. It is returned in \c context.page. The address space will still be 4520 locked as well as all caches starting from the top cache to at least the 4521 cache the page lives in. 4522 */ 4523 static status_t 4524 fault_get_page(PageFaultContext& context) 4525 { 4526 VMCache* cache = context.topCache; 4527 VMCache* lastCache = NULL; 4528 vm_page* page = NULL; 4529 4530 while (cache != NULL) { 4531 // We already hold the lock of the cache at this point. 4532 4533 lastCache = cache; 4534 4535 page = cache->LookupPage(context.cacheOffset); 4536 if (page != NULL && page->busy) { 4537 // page must be busy -- wait for it to become unbusy 4538 context.UnlockAll(cache); 4539 cache->ReleaseRefLocked(); 4540 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4541 4542 // restart the whole process 4543 context.restart = true; 4544 return B_OK; 4545 } 4546 4547 if (page != NULL) 4548 break; 4549 4550 // The current cache does not contain the page we're looking for. 4551 4552 // see if the backing store has it 4553 if (cache->HasPage(context.cacheOffset)) { 4554 // insert a fresh page and mark it busy -- we're going to read it in 4555 page = vm_page_allocate_page(&context.reservation, 4556 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4557 cache->InsertPage(page, context.cacheOffset); 4558 4559 // We need to unlock all caches and the address space while reading 4560 // the page in. Keep a reference to the cache around. 4561 cache->AcquireRefLocked(); 4562 context.UnlockAll(); 4563 4564 // read the page in 4565 generic_io_vec vec; 4566 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4567 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4568 4569 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4570 B_PHYSICAL_IO_REQUEST, &bytesRead); 4571 4572 cache->Lock(); 4573 4574 if (status < B_OK) { 4575 // on error remove and free the page 4576 dprintf("reading page from cache %p returned: %s!\n", 4577 cache, strerror(status)); 4578 4579 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4580 cache->RemovePage(page); 4581 vm_page_set_state(page, PAGE_STATE_FREE); 4582 4583 cache->ReleaseRefAndUnlock(); 4584 return status; 4585 } 4586 4587 // mark the page unbusy again 4588 cache->MarkPageUnbusy(page); 4589 4590 DEBUG_PAGE_ACCESS_END(page); 4591 4592 // Since we needed to unlock everything temporarily, the area 4593 // situation might have changed. So we need to restart the whole 4594 // process. 4595 cache->ReleaseRefAndUnlock(); 4596 context.restart = true; 4597 return B_OK; 4598 } 4599 4600 cache = context.cacheChainLocker.LockSourceCache(); 4601 } 4602 4603 if (page == NULL) { 4604 // There was no adequate page, determine the cache for a clean one. 4605 // Read-only pages come in the deepest cache, only the top most cache 4606 // may have direct write access. 4607 cache = context.isWrite ? context.topCache : lastCache; 4608 4609 // allocate a clean page 4610 page = vm_page_allocate_page(&context.reservation, 4611 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4612 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4613 page->physical_page_number)); 4614 4615 // insert the new page into our cache 4616 cache->InsertPage(page, context.cacheOffset); 4617 context.pageAllocated = true; 4618 } else if (page->Cache() != context.topCache && context.isWrite) { 4619 // We have a page that has the data we want, but in the wrong cache 4620 // object so we need to copy it and stick it into the top cache. 4621 vm_page* sourcePage = page; 4622 4623 // TODO: If memory is low, it might be a good idea to steal the page 4624 // from our source cache -- if possible, that is. 4625 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4626 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4627 4628 // To not needlessly kill concurrency we unlock all caches but the top 4629 // one while copying the page. Lacking another mechanism to ensure that 4630 // the source page doesn't disappear, we mark it busy. 4631 sourcePage->busy = true; 4632 context.cacheChainLocker.UnlockKeepRefs(true); 4633 4634 // copy the page 4635 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4636 sourcePage->physical_page_number * B_PAGE_SIZE); 4637 4638 context.cacheChainLocker.RelockCaches(true); 4639 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4640 4641 // insert the new page into our cache 4642 context.topCache->InsertPage(page, context.cacheOffset); 4643 context.pageAllocated = true; 4644 } else 4645 DEBUG_PAGE_ACCESS_START(page); 4646 4647 context.page = page; 4648 return B_OK; 4649 } 4650 4651 4652 /*! Makes sure the address in the given address space is mapped. 4653 4654 \param addressSpace The address space. 4655 \param originalAddress The address. Doesn't need to be page aligned. 4656 \param isWrite If \c true the address shall be write-accessible. 4657 \param isUser If \c true the access is requested by a userland team. 4658 \param wirePage On success, if non \c NULL, the wired count of the page 4659 mapped at the given address is incremented and the page is returned 4660 via this parameter. 4661 \return \c B_OK on success, another error code otherwise. 4662 */ 4663 static status_t 4664 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4665 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4666 { 4667 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4668 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4669 originalAddress, isWrite, isUser)); 4670 4671 PageFaultContext context(addressSpace, isWrite); 4672 4673 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4674 status_t status = B_OK; 4675 4676 addressSpace->IncrementFaultCount(); 4677 4678 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4679 // the pages upfront makes sure we don't have any cache locked, so that the 4680 // page daemon/thief can do their job without problems. 4681 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4682 originalAddress); 4683 context.addressSpaceLocker.Unlock(); 4684 vm_page_reserve_pages(&context.reservation, reservePages, 4685 addressSpace == VMAddressSpace::Kernel() 4686 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4687 4688 while (true) { 4689 context.addressSpaceLocker.Lock(); 4690 4691 // get the area the fault was in 4692 VMArea* area = addressSpace->LookupArea(address); 4693 if (area == NULL) { 4694 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4695 "space\n", originalAddress); 4696 TPF(PageFaultError(-1, 4697 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4698 status = B_BAD_ADDRESS; 4699 break; 4700 } 4701 4702 // check permissions 4703 uint32 protection = get_area_page_protection(area, address); 4704 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4705 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4706 area->id, (void*)originalAddress); 4707 TPF(PageFaultError(area->id, 4708 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4709 status = B_PERMISSION_DENIED; 4710 break; 4711 } 4712 if (isWrite && (protection 4713 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4714 dprintf("write access attempted on write-protected area 0x%" 4715 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4716 TPF(PageFaultError(area->id, 4717 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4718 status = B_PERMISSION_DENIED; 4719 break; 4720 } else if (isExecute && (protection 4721 & (B_EXECUTE_AREA 4722 | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4723 dprintf("instruction fetch attempted on execute-protected area 0x%" 4724 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4725 TPF(PageFaultError(area->id, 4726 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4727 status = B_PERMISSION_DENIED; 4728 break; 4729 } else if (!isWrite && !isExecute && (protection 4730 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4731 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4732 " at %p\n", area->id, (void*)originalAddress); 4733 TPF(PageFaultError(area->id, 4734 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4735 status = B_PERMISSION_DENIED; 4736 break; 4737 } 4738 4739 // We have the area, it was a valid access, so let's try to resolve the 4740 // page fault now. 4741 // At first, the top most cache from the area is investigated. 4742 4743 context.Prepare(vm_area_get_locked_cache(area), 4744 address - area->Base() + area->cache_offset); 4745 4746 // See if this cache has a fault handler -- this will do all the work 4747 // for us. 4748 { 4749 // Note, since the page fault is resolved with interrupts enabled, 4750 // the fault handler could be called more than once for the same 4751 // reason -- the store must take this into account. 4752 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4753 if (status != B_BAD_HANDLER) 4754 break; 4755 } 4756 4757 // The top most cache has no fault handler, so let's see if the cache or 4758 // its sources already have the page we're searching for (we're going 4759 // from top to bottom). 4760 status = fault_get_page(context); 4761 if (status != B_OK) { 4762 TPF(PageFaultError(area->id, status)); 4763 break; 4764 } 4765 4766 if (context.restart) 4767 continue; 4768 4769 // All went fine, all there is left to do is to map the page into the 4770 // address space. 4771 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4772 context.page)); 4773 4774 // If the page doesn't reside in the area's cache, we need to make sure 4775 // it's mapped in read-only, so that we cannot overwrite someone else's 4776 // data (copy-on-write) 4777 uint32 newProtection = protection; 4778 if (context.page->Cache() != context.topCache && !isWrite) 4779 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4780 4781 bool unmapPage = false; 4782 bool mapPage = true; 4783 4784 // check whether there's already a page mapped at the address 4785 context.map->Lock(); 4786 4787 phys_addr_t physicalAddress; 4788 uint32 flags; 4789 vm_page* mappedPage = NULL; 4790 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4791 && (flags & PAGE_PRESENT) != 0 4792 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4793 != NULL) { 4794 // Yep there's already a page. If it's ours, we can simply adjust 4795 // its protection. Otherwise we have to unmap it. 4796 if (mappedPage == context.page) { 4797 context.map->ProtectPage(area, address, newProtection); 4798 // Note: We assume that ProtectPage() is atomic (i.e. 4799 // the page isn't temporarily unmapped), otherwise we'd have 4800 // to make sure it isn't wired. 4801 mapPage = false; 4802 } else 4803 unmapPage = true; 4804 } 4805 4806 context.map->Unlock(); 4807 4808 if (unmapPage) { 4809 // If the page is wired, we can't unmap it. Wait until it is unwired 4810 // again and restart. Note that the page cannot be wired for 4811 // writing, since it it isn't in the topmost cache. So we can safely 4812 // ignore ranges wired for writing (our own and other concurrent 4813 // wiring attempts in progress) and in fact have to do that to avoid 4814 // a deadlock. 4815 VMAreaUnwiredWaiter waiter; 4816 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4817 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4818 // unlock everything and wait 4819 if (context.pageAllocated) { 4820 // ... but since we allocated a page and inserted it into 4821 // the top cache, remove and free it first. Otherwise we'd 4822 // have a page from a lower cache mapped while an upper 4823 // cache has a page that would shadow it. 4824 context.topCache->RemovePage(context.page); 4825 vm_page_free_etc(context.topCache, context.page, 4826 &context.reservation); 4827 } else 4828 DEBUG_PAGE_ACCESS_END(context.page); 4829 4830 context.UnlockAll(); 4831 waiter.waitEntry.Wait(); 4832 continue; 4833 } 4834 4835 // Note: The mapped page is a page of a lower cache. We are 4836 // guaranteed to have that cached locked, our new page is a copy of 4837 // that page, and the page is not busy. The logic for that guarantee 4838 // is as follows: Since the page is mapped, it must live in the top 4839 // cache (ruled out above) or any of its lower caches, and there is 4840 // (was before the new page was inserted) no other page in any 4841 // cache between the top cache and the page's cache (otherwise that 4842 // would be mapped instead). That in turn means that our algorithm 4843 // must have found it and therefore it cannot be busy either. 4844 DEBUG_PAGE_ACCESS_START(mappedPage); 4845 unmap_page(area, address); 4846 DEBUG_PAGE_ACCESS_END(mappedPage); 4847 } 4848 4849 if (mapPage) { 4850 if (map_page(area, context.page, address, newProtection, 4851 &context.reservation) != B_OK) { 4852 // Mapping can only fail, when the page mapping object couldn't 4853 // be allocated. Save for the missing mapping everything is 4854 // fine, though. If this was a regular page fault, we'll simply 4855 // leave and probably fault again. To make sure we'll have more 4856 // luck then, we ensure that the minimum object reserve is 4857 // available. 4858 DEBUG_PAGE_ACCESS_END(context.page); 4859 4860 context.UnlockAll(); 4861 4862 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4863 != B_OK) { 4864 // Apparently the situation is serious. Let's get ourselves 4865 // killed. 4866 status = B_NO_MEMORY; 4867 } else if (wirePage != NULL) { 4868 // The caller expects us to wire the page. Since 4869 // object_cache_reserve() succeeded, we should now be able 4870 // to allocate a mapping structure. Restart. 4871 continue; 4872 } 4873 4874 break; 4875 } 4876 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4877 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4878 4879 // also wire the page, if requested 4880 if (wirePage != NULL && status == B_OK) { 4881 increment_page_wired_count(context.page); 4882 *wirePage = context.page; 4883 } 4884 4885 DEBUG_PAGE_ACCESS_END(context.page); 4886 4887 break; 4888 } 4889 4890 return status; 4891 } 4892 4893 4894 status_t 4895 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4896 { 4897 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4898 } 4899 4900 status_t 4901 vm_put_physical_page(addr_t vaddr, void* handle) 4902 { 4903 return sPhysicalPageMapper->PutPage(vaddr, handle); 4904 } 4905 4906 4907 status_t 4908 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4909 void** _handle) 4910 { 4911 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4912 } 4913 4914 status_t 4915 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4916 { 4917 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4918 } 4919 4920 4921 status_t 4922 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4923 { 4924 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4925 } 4926 4927 status_t 4928 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4929 { 4930 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4931 } 4932 4933 4934 void 4935 vm_get_info(system_info* info) 4936 { 4937 swap_get_info(info); 4938 4939 MutexLocker locker(sAvailableMemoryLock); 4940 info->needed_memory = sNeededMemory; 4941 info->free_memory = sAvailableMemory; 4942 } 4943 4944 4945 uint32 4946 vm_num_page_faults(void) 4947 { 4948 return sPageFaults; 4949 } 4950 4951 4952 off_t 4953 vm_available_memory(void) 4954 { 4955 MutexLocker locker(sAvailableMemoryLock); 4956 return sAvailableMemory; 4957 } 4958 4959 4960 off_t 4961 vm_available_not_needed_memory(void) 4962 { 4963 MutexLocker locker(sAvailableMemoryLock); 4964 return sAvailableMemory - sNeededMemory; 4965 } 4966 4967 4968 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4969 debugger. 4970 */ 4971 off_t 4972 vm_available_not_needed_memory_debug(void) 4973 { 4974 return sAvailableMemory - sNeededMemory; 4975 } 4976 4977 4978 size_t 4979 vm_kernel_address_space_left(void) 4980 { 4981 return VMAddressSpace::Kernel()->FreeSpace(); 4982 } 4983 4984 4985 void 4986 vm_unreserve_memory(size_t amount) 4987 { 4988 mutex_lock(&sAvailableMemoryLock); 4989 4990 sAvailableMemory += amount; 4991 4992 mutex_unlock(&sAvailableMemoryLock); 4993 } 4994 4995 4996 status_t 4997 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4998 { 4999 size_t reserve = kMemoryReserveForPriority[priority]; 5000 5001 MutexLocker locker(sAvailableMemoryLock); 5002 5003 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5004 5005 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5006 sAvailableMemory -= amount; 5007 return B_OK; 5008 } 5009 5010 if (timeout <= 0) 5011 return B_NO_MEMORY; 5012 5013 // turn timeout into an absolute timeout 5014 timeout += system_time(); 5015 5016 // loop until we've got the memory or the timeout occurs 5017 do { 5018 sNeededMemory += amount; 5019 5020 // call the low resource manager 5021 locker.Unlock(); 5022 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5023 B_ABSOLUTE_TIMEOUT, timeout); 5024 locker.Lock(); 5025 5026 sNeededMemory -= amount; 5027 5028 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5029 sAvailableMemory -= amount; 5030 return B_OK; 5031 } 5032 } while (timeout > system_time()); 5033 5034 return B_NO_MEMORY; 5035 } 5036 5037 5038 status_t 5039 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5040 { 5041 // NOTE: The caller is responsible for synchronizing calls to this function! 5042 5043 AddressSpaceReadLocker locker; 5044 VMArea* area; 5045 status_t status = locker.SetFromArea(id, area); 5046 if (status != B_OK) 5047 return status; 5048 5049 // nothing to do, if the type doesn't change 5050 uint32 oldType = area->MemoryType(); 5051 if (type == oldType) 5052 return B_OK; 5053 5054 // set the memory type of the area and the mapped pages 5055 VMTranslationMap* map = area->address_space->TranslationMap(); 5056 map->Lock(); 5057 area->SetMemoryType(type); 5058 map->ProtectArea(area, area->protection); 5059 map->Unlock(); 5060 5061 // set the physical memory type 5062 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5063 if (error != B_OK) { 5064 // reset the memory type of the area and the mapped pages 5065 map->Lock(); 5066 area->SetMemoryType(oldType); 5067 map->ProtectArea(area, area->protection); 5068 map->Unlock(); 5069 return error; 5070 } 5071 5072 return B_OK; 5073 5074 } 5075 5076 5077 /*! This function enforces some protection properties: 5078 - kernel areas must be W^X (after kernel startup) 5079 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5080 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5081 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5082 and B_KERNEL_WRITE_AREA. 5083 */ 5084 static void 5085 fix_protection(uint32* protection) 5086 { 5087 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5088 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5089 || (*protection & B_WRITE_AREA) != 0) 5090 && !gKernelStartup) 5091 panic("kernel areas cannot be both writable and executable!"); 5092 5093 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5094 if ((*protection & B_USER_PROTECTION) == 0 5095 || (*protection & B_WRITE_AREA) != 0) 5096 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5097 else 5098 *protection |= B_KERNEL_READ_AREA; 5099 } 5100 } 5101 5102 5103 static void 5104 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5105 { 5106 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5107 info->area = area->id; 5108 info->address = (void*)area->Base(); 5109 info->size = area->Size(); 5110 info->protection = area->protection; 5111 info->lock = area->wiring; 5112 info->team = area->address_space->ID(); 5113 info->copy_count = 0; 5114 info->in_count = 0; 5115 info->out_count = 0; 5116 // TODO: retrieve real values here! 5117 5118 VMCache* cache = vm_area_get_locked_cache(area); 5119 5120 // Note, this is a simplification; the cache could be larger than this area 5121 info->ram_size = cache->page_count * B_PAGE_SIZE; 5122 5123 vm_area_put_locked_cache(cache); 5124 } 5125 5126 5127 static status_t 5128 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5129 { 5130 // is newSize a multiple of B_PAGE_SIZE? 5131 if (newSize & (B_PAGE_SIZE - 1)) 5132 return B_BAD_VALUE; 5133 5134 // lock all affected address spaces and the cache 5135 VMArea* area; 5136 VMCache* cache; 5137 5138 MultiAddressSpaceLocker locker; 5139 AreaCacheLocker cacheLocker; 5140 5141 status_t status; 5142 size_t oldSize; 5143 bool anyKernelArea; 5144 bool restart; 5145 5146 do { 5147 anyKernelArea = false; 5148 restart = false; 5149 5150 locker.Unset(); 5151 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5152 if (status != B_OK) 5153 return status; 5154 cacheLocker.SetTo(cache, true); // already locked 5155 5156 // enforce restrictions 5157 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5158 || (area->protection & B_KERNEL_AREA) != 0)) { 5159 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5160 "resize kernel area %" B_PRId32 " (%s)\n", 5161 team_get_current_team_id(), areaID, area->name); 5162 return B_NOT_ALLOWED; 5163 } 5164 // TODO: Enforce all restrictions (team, etc.)! 5165 5166 oldSize = area->Size(); 5167 if (newSize == oldSize) 5168 return B_OK; 5169 5170 if (cache->type != CACHE_TYPE_RAM) 5171 return B_NOT_ALLOWED; 5172 5173 if (oldSize < newSize) { 5174 // We need to check if all areas of this cache can be resized. 5175 for (VMArea* current = cache->areas; current != NULL; 5176 current = current->cache_next) { 5177 if (!current->address_space->CanResizeArea(current, newSize)) 5178 return B_ERROR; 5179 anyKernelArea 5180 |= current->address_space == VMAddressSpace::Kernel(); 5181 } 5182 } else { 5183 // We're shrinking the areas, so we must make sure the affected 5184 // ranges are not wired. 5185 for (VMArea* current = cache->areas; current != NULL; 5186 current = current->cache_next) { 5187 anyKernelArea 5188 |= current->address_space == VMAddressSpace::Kernel(); 5189 5190 if (wait_if_area_range_is_wired(current, 5191 current->Base() + newSize, oldSize - newSize, &locker, 5192 &cacheLocker)) { 5193 restart = true; 5194 break; 5195 } 5196 } 5197 } 5198 } while (restart); 5199 5200 // Okay, looks good so far, so let's do it 5201 5202 int priority = kernel && anyKernelArea 5203 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5204 uint32 allocationFlags = kernel && anyKernelArea 5205 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5206 5207 if (oldSize < newSize) { 5208 // Growing the cache can fail, so we do it first. 5209 status = cache->Resize(cache->virtual_base + newSize, priority); 5210 if (status != B_OK) 5211 return status; 5212 } 5213 5214 for (VMArea* current = cache->areas; current != NULL; 5215 current = current->cache_next) { 5216 status = current->address_space->ResizeArea(current, newSize, 5217 allocationFlags); 5218 if (status != B_OK) 5219 break; 5220 5221 // We also need to unmap all pages beyond the new size, if the area has 5222 // shrunk 5223 if (newSize < oldSize) { 5224 VMCacheChainLocker cacheChainLocker(cache); 5225 cacheChainLocker.LockAllSourceCaches(); 5226 5227 unmap_pages(current, current->Base() + newSize, 5228 oldSize - newSize); 5229 5230 cacheChainLocker.Unlock(cache); 5231 } 5232 } 5233 5234 if (status == B_OK) { 5235 // Shrink or grow individual page protections if in use. 5236 if (area->page_protections != NULL) { 5237 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5238 uint8* newProtections 5239 = (uint8*)realloc(area->page_protections, bytes); 5240 if (newProtections == NULL) 5241 status = B_NO_MEMORY; 5242 else { 5243 area->page_protections = newProtections; 5244 5245 if (oldSize < newSize) { 5246 // init the additional page protections to that of the area 5247 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5248 uint32 areaProtection = area->protection 5249 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5250 memset(area->page_protections + offset, 5251 areaProtection | (areaProtection << 4), bytes - offset); 5252 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5253 uint8& entry = area->page_protections[offset - 1]; 5254 entry = (entry & 0x0f) | (areaProtection << 4); 5255 } 5256 } 5257 } 5258 } 5259 } 5260 5261 // shrinking the cache can't fail, so we do it now 5262 if (status == B_OK && newSize < oldSize) 5263 status = cache->Resize(cache->virtual_base + newSize, priority); 5264 5265 if (status != B_OK) { 5266 // Something failed -- resize the areas back to their original size. 5267 // This can fail, too, in which case we're seriously screwed. 5268 for (VMArea* current = cache->areas; current != NULL; 5269 current = current->cache_next) { 5270 if (current->address_space->ResizeArea(current, oldSize, 5271 allocationFlags) != B_OK) { 5272 panic("vm_resize_area(): Failed and not being able to restore " 5273 "original state."); 5274 } 5275 } 5276 5277 cache->Resize(cache->virtual_base + oldSize, priority); 5278 } 5279 5280 // TODO: we must honour the lock restrictions of this area 5281 return status; 5282 } 5283 5284 5285 status_t 5286 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5287 { 5288 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5289 } 5290 5291 5292 status_t 5293 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5294 { 5295 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5296 } 5297 5298 5299 status_t 5300 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5301 bool user) 5302 { 5303 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5304 } 5305 5306 5307 void 5308 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5309 { 5310 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5311 } 5312 5313 5314 /*! Copies a range of memory directly from/to a page that might not be mapped 5315 at the moment. 5316 5317 For \a unsafeMemory the current mapping (if any is ignored). The function 5318 walks through the respective area's cache chain to find the physical page 5319 and copies from/to it directly. 5320 The memory range starting at \a unsafeMemory with a length of \a size bytes 5321 must not cross a page boundary. 5322 5323 \param teamID The team ID identifying the address space \a unsafeMemory is 5324 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5325 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5326 is passed, the address space of the thread returned by 5327 debug_get_debugged_thread() is used. 5328 \param unsafeMemory The start of the unsafe memory range to be copied 5329 from/to. 5330 \param buffer A safely accessible kernel buffer to be copied from/to. 5331 \param size The number of bytes to be copied. 5332 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5333 \a unsafeMemory, the other way around otherwise. 5334 */ 5335 status_t 5336 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5337 size_t size, bool copyToUnsafe) 5338 { 5339 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5340 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5341 return B_BAD_VALUE; 5342 } 5343 5344 // get the address space for the debugged thread 5345 VMAddressSpace* addressSpace; 5346 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5347 addressSpace = VMAddressSpace::Kernel(); 5348 } else if (teamID == B_CURRENT_TEAM) { 5349 Thread* thread = debug_get_debugged_thread(); 5350 if (thread == NULL || thread->team == NULL) 5351 return B_BAD_ADDRESS; 5352 5353 addressSpace = thread->team->address_space; 5354 } else 5355 addressSpace = VMAddressSpace::DebugGet(teamID); 5356 5357 if (addressSpace == NULL) 5358 return B_BAD_ADDRESS; 5359 5360 // get the area 5361 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5362 if (area == NULL) 5363 return B_BAD_ADDRESS; 5364 5365 // search the page 5366 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5367 + area->cache_offset; 5368 VMCache* cache = area->cache; 5369 vm_page* page = NULL; 5370 while (cache != NULL) { 5371 page = cache->DebugLookupPage(cacheOffset); 5372 if (page != NULL) 5373 break; 5374 5375 // Page not found in this cache -- if it is paged out, we must not try 5376 // to get it from lower caches. 5377 if (cache->DebugHasPage(cacheOffset)) 5378 break; 5379 5380 cache = cache->source; 5381 } 5382 5383 if (page == NULL) 5384 return B_UNSUPPORTED; 5385 5386 // copy from/to physical memory 5387 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5388 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5389 5390 if (copyToUnsafe) { 5391 if (page->Cache() != area->cache) 5392 return B_UNSUPPORTED; 5393 5394 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5395 } 5396 5397 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5398 } 5399 5400 5401 static inline bool 5402 validate_user_range(const void* addr, size_t size) 5403 { 5404 addr_t address = (addr_t)addr; 5405 5406 // Check for overflows on all addresses. 5407 if ((address + size) < address) 5408 return false; 5409 5410 // Validate that the address does not cross the kernel/user boundary. 5411 if (IS_USER_ADDRESS(address)) 5412 return IS_USER_ADDRESS(address + size); 5413 else 5414 return !IS_USER_ADDRESS(address + size); 5415 } 5416 5417 5418 // #pragma mark - kernel public API 5419 5420 5421 status_t 5422 user_memcpy(void* to, const void* from, size_t size) 5423 { 5424 if (!validate_user_range(to, size) || !validate_user_range(from, size)) 5425 return B_BAD_ADDRESS; 5426 5427 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5428 return B_BAD_ADDRESS; 5429 5430 return B_OK; 5431 } 5432 5433 5434 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5435 the string in \a to, NULL-terminating the result. 5436 5437 \param to Pointer to the destination C-string. 5438 \param from Pointer to the source C-string. 5439 \param size Size in bytes of the string buffer pointed to by \a to. 5440 5441 \return strlen(\a from). 5442 */ 5443 ssize_t 5444 user_strlcpy(char* to, const char* from, size_t size) 5445 { 5446 if (to == NULL && size != 0) 5447 return B_BAD_VALUE; 5448 if (from == NULL) 5449 return B_BAD_ADDRESS; 5450 5451 // Protect the source address from overflows. 5452 size_t maxSize = size; 5453 if ((addr_t)from + maxSize < (addr_t)from) 5454 maxSize -= (addr_t)from + maxSize; 5455 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5456 maxSize = USER_TOP - (addr_t)from; 5457 5458 if (!validate_user_range(to, maxSize)) 5459 return B_BAD_ADDRESS; 5460 5461 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5462 if (result < 0) 5463 return result; 5464 5465 // If we hit the address overflow boundary, fail. 5466 if ((size_t)result >= maxSize && maxSize < size) 5467 return B_BAD_ADDRESS; 5468 5469 return result; 5470 } 5471 5472 5473 status_t 5474 user_memset(void* s, char c, size_t count) 5475 { 5476 if (!validate_user_range(s, count)) 5477 return B_BAD_ADDRESS; 5478 5479 if (arch_cpu_user_memset(s, c, count) < B_OK) 5480 return B_BAD_ADDRESS; 5481 5482 return B_OK; 5483 } 5484 5485 5486 /*! Wires a single page at the given address. 5487 5488 \param team The team whose address space the address belongs to. Supports 5489 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5490 parameter is ignored. 5491 \param address address The virtual address to wire down. Does not need to 5492 be page aligned. 5493 \param writable If \c true the page shall be writable. 5494 \param info On success the info is filled in, among other things 5495 containing the physical address the given virtual one translates to. 5496 \return \c B_OK, when the page could be wired, another error code otherwise. 5497 */ 5498 status_t 5499 vm_wire_page(team_id team, addr_t address, bool writable, 5500 VMPageWiringInfo* info) 5501 { 5502 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5503 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5504 5505 // compute the page protection that is required 5506 bool isUser = IS_USER_ADDRESS(address); 5507 uint32 requiredProtection = PAGE_PRESENT 5508 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5509 if (writable) 5510 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5511 5512 // get and read lock the address space 5513 VMAddressSpace* addressSpace = NULL; 5514 if (isUser) { 5515 if (team == B_CURRENT_TEAM) 5516 addressSpace = VMAddressSpace::GetCurrent(); 5517 else 5518 addressSpace = VMAddressSpace::Get(team); 5519 } else 5520 addressSpace = VMAddressSpace::GetKernel(); 5521 if (addressSpace == NULL) 5522 return B_ERROR; 5523 5524 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5525 5526 VMTranslationMap* map = addressSpace->TranslationMap(); 5527 status_t error = B_OK; 5528 5529 // get the area 5530 VMArea* area = addressSpace->LookupArea(pageAddress); 5531 if (area == NULL) { 5532 addressSpace->Put(); 5533 return B_BAD_ADDRESS; 5534 } 5535 5536 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5537 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5538 5539 // mark the area range wired 5540 area->Wire(&info->range); 5541 5542 // Lock the area's cache chain and the translation map. Needed to look 5543 // up the page and play with its wired count. 5544 cacheChainLocker.LockAllSourceCaches(); 5545 map->Lock(); 5546 5547 phys_addr_t physicalAddress; 5548 uint32 flags; 5549 vm_page* page; 5550 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5551 && (flags & requiredProtection) == requiredProtection 5552 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5553 != NULL) { 5554 // Already mapped with the correct permissions -- just increment 5555 // the page's wired count. 5556 increment_page_wired_count(page); 5557 5558 map->Unlock(); 5559 cacheChainLocker.Unlock(); 5560 addressSpaceLocker.Unlock(); 5561 } else { 5562 // Let vm_soft_fault() map the page for us, if possible. We need 5563 // to fully unlock to avoid deadlocks. Since we have already 5564 // wired the area itself, nothing disturbing will happen with it 5565 // in the meantime. 5566 map->Unlock(); 5567 cacheChainLocker.Unlock(); 5568 addressSpaceLocker.Unlock(); 5569 5570 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5571 isUser, &page); 5572 5573 if (error != B_OK) { 5574 // The page could not be mapped -- clean up. 5575 VMCache* cache = vm_area_get_locked_cache(area); 5576 area->Unwire(&info->range); 5577 cache->ReleaseRefAndUnlock(); 5578 addressSpace->Put(); 5579 return error; 5580 } 5581 } 5582 5583 info->physicalAddress 5584 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5585 + address % B_PAGE_SIZE; 5586 info->page = page; 5587 5588 return B_OK; 5589 } 5590 5591 5592 /*! Unwires a single page previously wired via vm_wire_page(). 5593 5594 \param info The same object passed to vm_wire_page() before. 5595 */ 5596 void 5597 vm_unwire_page(VMPageWiringInfo* info) 5598 { 5599 // lock the address space 5600 VMArea* area = info->range.area; 5601 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5602 // takes over our reference 5603 5604 // lock the top cache 5605 VMCache* cache = vm_area_get_locked_cache(area); 5606 VMCacheChainLocker cacheChainLocker(cache); 5607 5608 if (info->page->Cache() != cache) { 5609 // The page is not in the top cache, so we lock the whole cache chain 5610 // before touching the page's wired count. 5611 cacheChainLocker.LockAllSourceCaches(); 5612 } 5613 5614 decrement_page_wired_count(info->page); 5615 5616 // remove the wired range from the range 5617 area->Unwire(&info->range); 5618 5619 cacheChainLocker.Unlock(); 5620 } 5621 5622 5623 /*! Wires down the given address range in the specified team's address space. 5624 5625 If successful the function 5626 - acquires a reference to the specified team's address space, 5627 - adds respective wired ranges to all areas that intersect with the given 5628 address range, 5629 - makes sure all pages in the given address range are mapped with the 5630 requested access permissions and increments their wired count. 5631 5632 It fails, when \a team doesn't specify a valid address space, when any part 5633 of the specified address range is not covered by areas, when the concerned 5634 areas don't allow mapping with the requested permissions, or when mapping 5635 failed for another reason. 5636 5637 When successful the call must be balanced by a unlock_memory_etc() call with 5638 the exact same parameters. 5639 5640 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5641 supported. 5642 \param address The start of the address range to be wired. 5643 \param numBytes The size of the address range to be wired. 5644 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5645 requests that the range must be wired writable ("read from device 5646 into memory"). 5647 \return \c B_OK on success, another error code otherwise. 5648 */ 5649 status_t 5650 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5651 { 5652 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5653 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5654 5655 // compute the page protection that is required 5656 bool isUser = IS_USER_ADDRESS(address); 5657 bool writable = (flags & B_READ_DEVICE) == 0; 5658 uint32 requiredProtection = PAGE_PRESENT 5659 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5660 if (writable) 5661 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5662 5663 uint32 mallocFlags = isUser 5664 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5665 5666 // get and read lock the address space 5667 VMAddressSpace* addressSpace = NULL; 5668 if (isUser) { 5669 if (team == B_CURRENT_TEAM) 5670 addressSpace = VMAddressSpace::GetCurrent(); 5671 else 5672 addressSpace = VMAddressSpace::Get(team); 5673 } else 5674 addressSpace = VMAddressSpace::GetKernel(); 5675 if (addressSpace == NULL) 5676 return B_ERROR; 5677 5678 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5679 // We get a new address space reference here. The one we got above will 5680 // be freed by unlock_memory_etc(). 5681 5682 VMTranslationMap* map = addressSpace->TranslationMap(); 5683 status_t error = B_OK; 5684 5685 // iterate through all concerned areas 5686 addr_t nextAddress = lockBaseAddress; 5687 while (nextAddress != lockEndAddress) { 5688 // get the next area 5689 VMArea* area = addressSpace->LookupArea(nextAddress); 5690 if (area == NULL) { 5691 error = B_BAD_ADDRESS; 5692 break; 5693 } 5694 5695 addr_t areaStart = nextAddress; 5696 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5697 5698 // allocate the wired range (do that before locking the cache to avoid 5699 // deadlocks) 5700 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5701 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5702 if (range == NULL) { 5703 error = B_NO_MEMORY; 5704 break; 5705 } 5706 5707 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5708 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5709 5710 // mark the area range wired 5711 area->Wire(range); 5712 5713 // Depending on the area cache type and the wiring, we may not need to 5714 // look at the individual pages. 5715 if (area->cache_type == CACHE_TYPE_NULL 5716 || area->cache_type == CACHE_TYPE_DEVICE 5717 || area->wiring == B_FULL_LOCK 5718 || area->wiring == B_CONTIGUOUS) { 5719 nextAddress = areaEnd; 5720 continue; 5721 } 5722 5723 // Lock the area's cache chain and the translation map. Needed to look 5724 // up pages and play with their wired count. 5725 cacheChainLocker.LockAllSourceCaches(); 5726 map->Lock(); 5727 5728 // iterate through the pages and wire them 5729 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5730 phys_addr_t physicalAddress; 5731 uint32 flags; 5732 5733 vm_page* page; 5734 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5735 && (flags & requiredProtection) == requiredProtection 5736 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5737 != NULL) { 5738 // Already mapped with the correct permissions -- just increment 5739 // the page's wired count. 5740 increment_page_wired_count(page); 5741 } else { 5742 // Let vm_soft_fault() map the page for us, if possible. We need 5743 // to fully unlock to avoid deadlocks. Since we have already 5744 // wired the area itself, nothing disturbing will happen with it 5745 // in the meantime. 5746 map->Unlock(); 5747 cacheChainLocker.Unlock(); 5748 addressSpaceLocker.Unlock(); 5749 5750 error = vm_soft_fault(addressSpace, nextAddress, writable, 5751 false, isUser, &page); 5752 5753 addressSpaceLocker.Lock(); 5754 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5755 cacheChainLocker.LockAllSourceCaches(); 5756 map->Lock(); 5757 } 5758 5759 if (error != B_OK) 5760 break; 5761 } 5762 5763 map->Unlock(); 5764 5765 if (error == B_OK) { 5766 cacheChainLocker.Unlock(); 5767 } else { 5768 // An error occurred, so abort right here. If the current address 5769 // is the first in this area, unwire the area, since we won't get 5770 // to it when reverting what we've done so far. 5771 if (nextAddress == areaStart) { 5772 area->Unwire(range); 5773 cacheChainLocker.Unlock(); 5774 range->~VMAreaWiredRange(); 5775 free_etc(range, mallocFlags); 5776 } else 5777 cacheChainLocker.Unlock(); 5778 5779 break; 5780 } 5781 } 5782 5783 if (error != B_OK) { 5784 // An error occurred, so unwire all that we've already wired. Note that 5785 // even if not a single page was wired, unlock_memory_etc() is called 5786 // to put the address space reference. 5787 addressSpaceLocker.Unlock(); 5788 unlock_memory_etc(team, (void*)lockBaseAddress, 5789 nextAddress - lockBaseAddress, flags); 5790 } 5791 5792 return error; 5793 } 5794 5795 5796 status_t 5797 lock_memory(void* address, size_t numBytes, uint32 flags) 5798 { 5799 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5800 } 5801 5802 5803 /*! Unwires an address range previously wired with lock_memory_etc(). 5804 5805 Note that a call to this function must balance a previous lock_memory_etc() 5806 call with exactly the same parameters. 5807 */ 5808 status_t 5809 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5810 { 5811 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5812 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5813 5814 // compute the page protection that is required 5815 bool isUser = IS_USER_ADDRESS(address); 5816 bool writable = (flags & B_READ_DEVICE) == 0; 5817 uint32 requiredProtection = PAGE_PRESENT 5818 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5819 if (writable) 5820 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5821 5822 uint32 mallocFlags = isUser 5823 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5824 5825 // get and read lock the address space 5826 VMAddressSpace* addressSpace = NULL; 5827 if (isUser) { 5828 if (team == B_CURRENT_TEAM) 5829 addressSpace = VMAddressSpace::GetCurrent(); 5830 else 5831 addressSpace = VMAddressSpace::Get(team); 5832 } else 5833 addressSpace = VMAddressSpace::GetKernel(); 5834 if (addressSpace == NULL) 5835 return B_ERROR; 5836 5837 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5838 // Take over the address space reference. We don't unlock until we're 5839 // done. 5840 5841 VMTranslationMap* map = addressSpace->TranslationMap(); 5842 status_t error = B_OK; 5843 5844 // iterate through all concerned areas 5845 addr_t nextAddress = lockBaseAddress; 5846 while (nextAddress != lockEndAddress) { 5847 // get the next area 5848 VMArea* area = addressSpace->LookupArea(nextAddress); 5849 if (area == NULL) { 5850 error = B_BAD_ADDRESS; 5851 break; 5852 } 5853 5854 addr_t areaStart = nextAddress; 5855 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5856 5857 // Lock the area's top cache. This is a requirement for 5858 // VMArea::Unwire(). 5859 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5860 5861 // Depending on the area cache type and the wiring, we may not need to 5862 // look at the individual pages. 5863 if (area->cache_type == CACHE_TYPE_NULL 5864 || area->cache_type == CACHE_TYPE_DEVICE 5865 || area->wiring == B_FULL_LOCK 5866 || area->wiring == B_CONTIGUOUS) { 5867 // unwire the range (to avoid deadlocks we delete the range after 5868 // unlocking the cache) 5869 nextAddress = areaEnd; 5870 VMAreaWiredRange* range = area->Unwire(areaStart, 5871 areaEnd - areaStart, writable); 5872 cacheChainLocker.Unlock(); 5873 if (range != NULL) { 5874 range->~VMAreaWiredRange(); 5875 free_etc(range, mallocFlags); 5876 } 5877 continue; 5878 } 5879 5880 // Lock the area's cache chain and the translation map. Needed to look 5881 // up pages and play with their wired count. 5882 cacheChainLocker.LockAllSourceCaches(); 5883 map->Lock(); 5884 5885 // iterate through the pages and unwire them 5886 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5887 phys_addr_t physicalAddress; 5888 uint32 flags; 5889 5890 vm_page* page; 5891 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5892 && (flags & PAGE_PRESENT) != 0 5893 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5894 != NULL) { 5895 // Already mapped with the correct permissions -- just increment 5896 // the page's wired count. 5897 decrement_page_wired_count(page); 5898 } else { 5899 panic("unlock_memory_etc(): Failed to unwire page: address " 5900 "space %p, address: %#" B_PRIxADDR, addressSpace, 5901 nextAddress); 5902 error = B_BAD_VALUE; 5903 break; 5904 } 5905 } 5906 5907 map->Unlock(); 5908 5909 // All pages are unwired. Remove the area's wired range as well (to 5910 // avoid deadlocks we delete the range after unlocking the cache). 5911 VMAreaWiredRange* range = area->Unwire(areaStart, 5912 areaEnd - areaStart, writable); 5913 5914 cacheChainLocker.Unlock(); 5915 5916 if (range != NULL) { 5917 range->~VMAreaWiredRange(); 5918 free_etc(range, mallocFlags); 5919 } 5920 5921 if (error != B_OK) 5922 break; 5923 } 5924 5925 // get rid of the address space reference lock_memory_etc() acquired 5926 addressSpace->Put(); 5927 5928 return error; 5929 } 5930 5931 5932 status_t 5933 unlock_memory(void* address, size_t numBytes, uint32 flags) 5934 { 5935 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5936 } 5937 5938 5939 /*! Similar to get_memory_map(), but also allows to specify the address space 5940 for the memory in question and has a saner semantics. 5941 Returns \c B_OK when the complete range could be translated or 5942 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5943 case the actual number of entries is written to \c *_numEntries. Any other 5944 error case indicates complete failure; \c *_numEntries will be set to \c 0 5945 in this case. 5946 */ 5947 status_t 5948 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5949 physical_entry* table, uint32* _numEntries) 5950 { 5951 uint32 numEntries = *_numEntries; 5952 *_numEntries = 0; 5953 5954 VMAddressSpace* addressSpace; 5955 addr_t virtualAddress = (addr_t)address; 5956 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5957 phys_addr_t physicalAddress; 5958 status_t status = B_OK; 5959 int32 index = -1; 5960 addr_t offset = 0; 5961 bool interrupts = are_interrupts_enabled(); 5962 5963 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5964 "entries)\n", team, address, numBytes, numEntries)); 5965 5966 if (numEntries == 0 || numBytes == 0) 5967 return B_BAD_VALUE; 5968 5969 // in which address space is the address to be found? 5970 if (IS_USER_ADDRESS(virtualAddress)) { 5971 if (team == B_CURRENT_TEAM) 5972 addressSpace = VMAddressSpace::GetCurrent(); 5973 else 5974 addressSpace = VMAddressSpace::Get(team); 5975 } else 5976 addressSpace = VMAddressSpace::GetKernel(); 5977 5978 if (addressSpace == NULL) 5979 return B_ERROR; 5980 5981 VMTranslationMap* map = addressSpace->TranslationMap(); 5982 5983 if (interrupts) 5984 map->Lock(); 5985 5986 while (offset < numBytes) { 5987 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5988 uint32 flags; 5989 5990 if (interrupts) { 5991 status = map->Query((addr_t)address + offset, &physicalAddress, 5992 &flags); 5993 } else { 5994 status = map->QueryInterrupt((addr_t)address + offset, 5995 &physicalAddress, &flags); 5996 } 5997 if (status < B_OK) 5998 break; 5999 if ((flags & PAGE_PRESENT) == 0) { 6000 panic("get_memory_map() called on unmapped memory!"); 6001 return B_BAD_ADDRESS; 6002 } 6003 6004 if (index < 0 && pageOffset > 0) { 6005 physicalAddress += pageOffset; 6006 if (bytes > B_PAGE_SIZE - pageOffset) 6007 bytes = B_PAGE_SIZE - pageOffset; 6008 } 6009 6010 // need to switch to the next physical_entry? 6011 if (index < 0 || table[index].address 6012 != physicalAddress - table[index].size) { 6013 if ((uint32)++index + 1 > numEntries) { 6014 // table to small 6015 break; 6016 } 6017 table[index].address = physicalAddress; 6018 table[index].size = bytes; 6019 } else { 6020 // page does fit in current entry 6021 table[index].size += bytes; 6022 } 6023 6024 offset += bytes; 6025 } 6026 6027 if (interrupts) 6028 map->Unlock(); 6029 6030 if (status != B_OK) 6031 return status; 6032 6033 if ((uint32)index + 1 > numEntries) { 6034 *_numEntries = index; 6035 return B_BUFFER_OVERFLOW; 6036 } 6037 6038 *_numEntries = index + 1; 6039 return B_OK; 6040 } 6041 6042 6043 /*! According to the BeBook, this function should always succeed. 6044 This is no longer the case. 6045 */ 6046 extern "C" int32 6047 __get_memory_map_haiku(const void* address, size_t numBytes, 6048 physical_entry* table, int32 numEntries) 6049 { 6050 uint32 entriesRead = numEntries; 6051 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6052 table, &entriesRead); 6053 if (error != B_OK) 6054 return error; 6055 6056 // close the entry list 6057 6058 // if it's only one entry, we will silently accept the missing ending 6059 if (numEntries == 1) 6060 return B_OK; 6061 6062 if (entriesRead + 1 > (uint32)numEntries) 6063 return B_BUFFER_OVERFLOW; 6064 6065 table[entriesRead].address = 0; 6066 table[entriesRead].size = 0; 6067 6068 return B_OK; 6069 } 6070 6071 6072 area_id 6073 area_for(void* address) 6074 { 6075 return vm_area_for((addr_t)address, true); 6076 } 6077 6078 6079 area_id 6080 find_area(const char* name) 6081 { 6082 return VMAreaHash::Find(name); 6083 } 6084 6085 6086 status_t 6087 _get_area_info(area_id id, area_info* info, size_t size) 6088 { 6089 if (size != sizeof(area_info) || info == NULL) 6090 return B_BAD_VALUE; 6091 6092 AddressSpaceReadLocker locker; 6093 VMArea* area; 6094 status_t status = locker.SetFromArea(id, area); 6095 if (status != B_OK) 6096 return status; 6097 6098 fill_area_info(area, info, size); 6099 return B_OK; 6100 } 6101 6102 6103 status_t 6104 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6105 { 6106 addr_t nextBase = *(addr_t*)cookie; 6107 6108 // we're already through the list 6109 if (nextBase == (addr_t)-1) 6110 return B_ENTRY_NOT_FOUND; 6111 6112 if (team == B_CURRENT_TEAM) 6113 team = team_get_current_team_id(); 6114 6115 AddressSpaceReadLocker locker(team); 6116 if (!locker.IsLocked()) 6117 return B_BAD_TEAM_ID; 6118 6119 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6120 if (area == NULL) { 6121 nextBase = (addr_t)-1; 6122 return B_ENTRY_NOT_FOUND; 6123 } 6124 6125 fill_area_info(area, info, size); 6126 *cookie = (ssize_t)(area->Base() + 1); 6127 6128 return B_OK; 6129 } 6130 6131 6132 status_t 6133 set_area_protection(area_id area, uint32 newProtection) 6134 { 6135 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6136 newProtection, true); 6137 } 6138 6139 6140 status_t 6141 resize_area(area_id areaID, size_t newSize) 6142 { 6143 return vm_resize_area(areaID, newSize, true); 6144 } 6145 6146 6147 /*! Transfers the specified area to a new team. The caller must be the owner 6148 of the area. 6149 */ 6150 area_id 6151 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6152 bool kernel) 6153 { 6154 area_info info; 6155 status_t status = get_area_info(id, &info); 6156 if (status != B_OK) 6157 return status; 6158 6159 if (info.team != thread_get_current_thread()->team->id) 6160 return B_PERMISSION_DENIED; 6161 6162 // We need to mark the area cloneable so the following operations work. 6163 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6164 if (status != B_OK) 6165 return status; 6166 6167 area_id clonedArea = vm_clone_area(target, info.name, _address, 6168 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6169 if (clonedArea < 0) 6170 return clonedArea; 6171 6172 status = vm_delete_area(info.team, id, kernel); 6173 if (status != B_OK) { 6174 vm_delete_area(target, clonedArea, kernel); 6175 return status; 6176 } 6177 6178 // Now we can reset the protection to whatever it was before. 6179 set_area_protection(clonedArea, info.protection); 6180 6181 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6182 6183 return clonedArea; 6184 } 6185 6186 6187 extern "C" area_id 6188 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6189 size_t numBytes, uint32 addressSpec, uint32 protection, 6190 void** _virtualAddress) 6191 { 6192 if (!arch_vm_supports_protection(protection)) 6193 return B_NOT_SUPPORTED; 6194 6195 fix_protection(&protection); 6196 6197 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6198 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6199 false); 6200 } 6201 6202 6203 area_id 6204 clone_area(const char* name, void** _address, uint32 addressSpec, 6205 uint32 protection, area_id source) 6206 { 6207 if ((protection & B_KERNEL_PROTECTION) == 0) 6208 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6209 6210 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6211 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6212 } 6213 6214 6215 area_id 6216 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6217 uint32 protection, uint32 flags, uint32 guardSize, 6218 const virtual_address_restrictions* virtualAddressRestrictions, 6219 const physical_address_restrictions* physicalAddressRestrictions, 6220 void** _address) 6221 { 6222 fix_protection(&protection); 6223 6224 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6225 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6226 true, _address); 6227 } 6228 6229 6230 extern "C" area_id 6231 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6232 size_t size, uint32 lock, uint32 protection) 6233 { 6234 fix_protection(&protection); 6235 6236 virtual_address_restrictions virtualRestrictions = {}; 6237 virtualRestrictions.address = *_address; 6238 virtualRestrictions.address_specification = addressSpec; 6239 physical_address_restrictions physicalRestrictions = {}; 6240 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6241 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6242 true, _address); 6243 } 6244 6245 6246 status_t 6247 delete_area(area_id area) 6248 { 6249 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6250 } 6251 6252 6253 // #pragma mark - Userland syscalls 6254 6255 6256 status_t 6257 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6258 addr_t size) 6259 { 6260 // filter out some unavailable values (for userland) 6261 switch (addressSpec) { 6262 case B_ANY_KERNEL_ADDRESS: 6263 case B_ANY_KERNEL_BLOCK_ADDRESS: 6264 return B_BAD_VALUE; 6265 } 6266 6267 addr_t address; 6268 6269 if (!IS_USER_ADDRESS(userAddress) 6270 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6271 return B_BAD_ADDRESS; 6272 6273 status_t status = vm_reserve_address_range( 6274 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6275 RESERVED_AVOID_BASE); 6276 if (status != B_OK) 6277 return status; 6278 6279 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6280 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6281 (void*)address, size); 6282 return B_BAD_ADDRESS; 6283 } 6284 6285 return B_OK; 6286 } 6287 6288 6289 status_t 6290 _user_unreserve_address_range(addr_t address, addr_t size) 6291 { 6292 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6293 (void*)address, size); 6294 } 6295 6296 6297 area_id 6298 _user_area_for(void* address) 6299 { 6300 return vm_area_for((addr_t)address, false); 6301 } 6302 6303 6304 area_id 6305 _user_find_area(const char* userName) 6306 { 6307 char name[B_OS_NAME_LENGTH]; 6308 6309 if (!IS_USER_ADDRESS(userName) 6310 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6311 return B_BAD_ADDRESS; 6312 6313 return find_area(name); 6314 } 6315 6316 6317 status_t 6318 _user_get_area_info(area_id area, area_info* userInfo) 6319 { 6320 if (!IS_USER_ADDRESS(userInfo)) 6321 return B_BAD_ADDRESS; 6322 6323 area_info info; 6324 status_t status = get_area_info(area, &info); 6325 if (status < B_OK) 6326 return status; 6327 6328 // TODO: do we want to prevent userland from seeing kernel protections? 6329 //info.protection &= B_USER_PROTECTION; 6330 6331 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6332 return B_BAD_ADDRESS; 6333 6334 return status; 6335 } 6336 6337 6338 status_t 6339 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6340 { 6341 ssize_t cookie; 6342 6343 if (!IS_USER_ADDRESS(userCookie) 6344 || !IS_USER_ADDRESS(userInfo) 6345 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6346 return B_BAD_ADDRESS; 6347 6348 area_info info; 6349 status_t status = _get_next_area_info(team, &cookie, &info, 6350 sizeof(area_info)); 6351 if (status != B_OK) 6352 return status; 6353 6354 //info.protection &= B_USER_PROTECTION; 6355 6356 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6357 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6358 return B_BAD_ADDRESS; 6359 6360 return status; 6361 } 6362 6363 6364 status_t 6365 _user_set_area_protection(area_id area, uint32 newProtection) 6366 { 6367 if ((newProtection & ~B_USER_PROTECTION) != 0) 6368 return B_BAD_VALUE; 6369 6370 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6371 newProtection, false); 6372 } 6373 6374 6375 status_t 6376 _user_resize_area(area_id area, size_t newSize) 6377 { 6378 // TODO: Since we restrict deleting of areas to those owned by the team, 6379 // we should also do that for resizing (check other functions, too). 6380 return vm_resize_area(area, newSize, false); 6381 } 6382 6383 6384 area_id 6385 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6386 team_id target) 6387 { 6388 // filter out some unavailable values (for userland) 6389 switch (addressSpec) { 6390 case B_ANY_KERNEL_ADDRESS: 6391 case B_ANY_KERNEL_BLOCK_ADDRESS: 6392 return B_BAD_VALUE; 6393 } 6394 6395 void* address; 6396 if (!IS_USER_ADDRESS(userAddress) 6397 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6398 return B_BAD_ADDRESS; 6399 6400 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6401 if (newArea < B_OK) 6402 return newArea; 6403 6404 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6405 return B_BAD_ADDRESS; 6406 6407 return newArea; 6408 } 6409 6410 6411 area_id 6412 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6413 uint32 protection, area_id sourceArea) 6414 { 6415 char name[B_OS_NAME_LENGTH]; 6416 void* address; 6417 6418 // filter out some unavailable values (for userland) 6419 switch (addressSpec) { 6420 case B_ANY_KERNEL_ADDRESS: 6421 case B_ANY_KERNEL_BLOCK_ADDRESS: 6422 return B_BAD_VALUE; 6423 } 6424 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6425 return B_BAD_VALUE; 6426 6427 if (!IS_USER_ADDRESS(userName) 6428 || !IS_USER_ADDRESS(userAddress) 6429 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6430 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6431 return B_BAD_ADDRESS; 6432 6433 fix_protection(&protection); 6434 6435 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6436 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6437 false); 6438 if (clonedArea < B_OK) 6439 return clonedArea; 6440 6441 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6442 delete_area(clonedArea); 6443 return B_BAD_ADDRESS; 6444 } 6445 6446 return clonedArea; 6447 } 6448 6449 6450 area_id 6451 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6452 size_t size, uint32 lock, uint32 protection) 6453 { 6454 char name[B_OS_NAME_LENGTH]; 6455 void* address; 6456 6457 // filter out some unavailable values (for userland) 6458 switch (addressSpec) { 6459 case B_ANY_KERNEL_ADDRESS: 6460 case B_ANY_KERNEL_BLOCK_ADDRESS: 6461 return B_BAD_VALUE; 6462 } 6463 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6464 return B_BAD_VALUE; 6465 6466 if (!IS_USER_ADDRESS(userName) 6467 || !IS_USER_ADDRESS(userAddress) 6468 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6469 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6470 return B_BAD_ADDRESS; 6471 6472 if (addressSpec == B_EXACT_ADDRESS 6473 && IS_KERNEL_ADDRESS(address)) 6474 return B_BAD_VALUE; 6475 6476 if (addressSpec == B_ANY_ADDRESS) 6477 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6478 if (addressSpec == B_BASE_ADDRESS) 6479 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6480 6481 fix_protection(&protection); 6482 6483 virtual_address_restrictions virtualRestrictions = {}; 6484 virtualRestrictions.address = address; 6485 virtualRestrictions.address_specification = addressSpec; 6486 physical_address_restrictions physicalRestrictions = {}; 6487 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6488 size, lock, protection, 0, 0, &virtualRestrictions, 6489 &physicalRestrictions, false, &address); 6490 6491 if (area >= B_OK 6492 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6493 delete_area(area); 6494 return B_BAD_ADDRESS; 6495 } 6496 6497 return area; 6498 } 6499 6500 6501 status_t 6502 _user_delete_area(area_id area) 6503 { 6504 // Unlike the BeOS implementation, you can now only delete areas 6505 // that you have created yourself from userland. 6506 // The documentation to delete_area() explicitly states that this 6507 // will be restricted in the future, and so it will. 6508 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6509 } 6510 6511 6512 // TODO: create a BeOS style call for this! 6513 6514 area_id 6515 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6516 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6517 int fd, off_t offset) 6518 { 6519 char name[B_OS_NAME_LENGTH]; 6520 void* address; 6521 area_id area; 6522 6523 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6524 return B_BAD_VALUE; 6525 6526 fix_protection(&protection); 6527 6528 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6529 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6530 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6531 return B_BAD_ADDRESS; 6532 6533 if (addressSpec == B_EXACT_ADDRESS) { 6534 if ((addr_t)address + size < (addr_t)address 6535 || (addr_t)address % B_PAGE_SIZE != 0) { 6536 return B_BAD_VALUE; 6537 } 6538 if (!IS_USER_ADDRESS(address) 6539 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6540 return B_BAD_ADDRESS; 6541 } 6542 } 6543 6544 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6545 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6546 false); 6547 if (area < B_OK) 6548 return area; 6549 6550 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6551 return B_BAD_ADDRESS; 6552 6553 return area; 6554 } 6555 6556 6557 status_t 6558 _user_unmap_memory(void* _address, size_t size) 6559 { 6560 addr_t address = (addr_t)_address; 6561 6562 // check params 6563 if (size == 0 || (addr_t)address + size < (addr_t)address 6564 || (addr_t)address % B_PAGE_SIZE != 0) { 6565 return B_BAD_VALUE; 6566 } 6567 6568 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6569 return B_BAD_ADDRESS; 6570 6571 // Write lock the address space and ensure the address range is not wired. 6572 AddressSpaceWriteLocker locker; 6573 do { 6574 status_t status = locker.SetTo(team_get_current_team_id()); 6575 if (status != B_OK) 6576 return status; 6577 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6578 size, &locker)); 6579 6580 // unmap 6581 return unmap_address_range(locker.AddressSpace(), address, size, false); 6582 } 6583 6584 6585 status_t 6586 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6587 { 6588 // check address range 6589 addr_t address = (addr_t)_address; 6590 size = PAGE_ALIGN(size); 6591 6592 if ((address % B_PAGE_SIZE) != 0) 6593 return B_BAD_VALUE; 6594 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6595 || !IS_USER_ADDRESS((addr_t)address + size)) { 6596 // weird error code required by POSIX 6597 return ENOMEM; 6598 } 6599 6600 // extend and check protection 6601 if ((protection & ~B_USER_PROTECTION) != 0) 6602 return B_BAD_VALUE; 6603 6604 fix_protection(&protection); 6605 6606 // We need to write lock the address space, since we're going to play with 6607 // the areas. Also make sure that none of the areas is wired and that we're 6608 // actually allowed to change the protection. 6609 AddressSpaceWriteLocker locker; 6610 6611 bool restart; 6612 do { 6613 restart = false; 6614 6615 status_t status = locker.SetTo(team_get_current_team_id()); 6616 if (status != B_OK) 6617 return status; 6618 6619 // First round: Check whether the whole range is covered by areas and we 6620 // are allowed to modify them. 6621 addr_t currentAddress = address; 6622 size_t sizeLeft = size; 6623 while (sizeLeft > 0) { 6624 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6625 if (area == NULL) 6626 return B_NO_MEMORY; 6627 6628 if ((area->protection & B_KERNEL_AREA) != 0) 6629 return B_NOT_ALLOWED; 6630 6631 // TODO: For (shared) mapped files we should check whether the new 6632 // protections are compatible with the file permissions. We don't 6633 // have a way to do that yet, though. 6634 6635 addr_t offset = currentAddress - area->Base(); 6636 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6637 6638 AreaCacheLocker cacheLocker(area); 6639 6640 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6641 &locker, &cacheLocker)) { 6642 restart = true; 6643 break; 6644 } 6645 6646 cacheLocker.Unlock(); 6647 6648 currentAddress += rangeSize; 6649 sizeLeft -= rangeSize; 6650 } 6651 } while (restart); 6652 6653 // Second round: If the protections differ from that of the area, create a 6654 // page protection array and re-map mapped pages. 6655 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6656 addr_t currentAddress = address; 6657 size_t sizeLeft = size; 6658 while (sizeLeft > 0) { 6659 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6660 if (area == NULL) 6661 return B_NO_MEMORY; 6662 6663 addr_t offset = currentAddress - area->Base(); 6664 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6665 6666 currentAddress += rangeSize; 6667 sizeLeft -= rangeSize; 6668 6669 if (area->page_protections == NULL) { 6670 if (area->protection == protection) 6671 continue; 6672 6673 status_t status = allocate_area_page_protections(area); 6674 if (status != B_OK) 6675 return status; 6676 } 6677 6678 // We need to lock the complete cache chain, since we potentially unmap 6679 // pages of lower caches. 6680 VMCache* topCache = vm_area_get_locked_cache(area); 6681 VMCacheChainLocker cacheChainLocker(topCache); 6682 cacheChainLocker.LockAllSourceCaches(); 6683 6684 for (addr_t pageAddress = area->Base() + offset; 6685 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6686 map->Lock(); 6687 6688 set_area_page_protection(area, pageAddress, protection); 6689 6690 phys_addr_t physicalAddress; 6691 uint32 flags; 6692 6693 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6694 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6695 map->Unlock(); 6696 continue; 6697 } 6698 6699 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6700 if (page == NULL) { 6701 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6702 "\n", area, physicalAddress); 6703 map->Unlock(); 6704 return B_ERROR; 6705 } 6706 6707 // If the page is not in the topmost cache and write access is 6708 // requested, we have to unmap it. Otherwise we can re-map it with 6709 // the new protection. 6710 bool unmapPage = page->Cache() != topCache 6711 && (protection & B_WRITE_AREA) != 0; 6712 6713 if (!unmapPage) 6714 map->ProtectPage(area, pageAddress, protection); 6715 6716 map->Unlock(); 6717 6718 if (unmapPage) { 6719 DEBUG_PAGE_ACCESS_START(page); 6720 unmap_page(area, pageAddress); 6721 DEBUG_PAGE_ACCESS_END(page); 6722 } 6723 } 6724 } 6725 6726 return B_OK; 6727 } 6728 6729 6730 status_t 6731 _user_sync_memory(void* _address, size_t size, uint32 flags) 6732 { 6733 addr_t address = (addr_t)_address; 6734 size = PAGE_ALIGN(size); 6735 6736 // check params 6737 if ((address % B_PAGE_SIZE) != 0) 6738 return B_BAD_VALUE; 6739 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6740 || !IS_USER_ADDRESS((addr_t)address + size)) { 6741 // weird error code required by POSIX 6742 return ENOMEM; 6743 } 6744 6745 bool writeSync = (flags & MS_SYNC) != 0; 6746 bool writeAsync = (flags & MS_ASYNC) != 0; 6747 if (writeSync && writeAsync) 6748 return B_BAD_VALUE; 6749 6750 if (size == 0 || (!writeSync && !writeAsync)) 6751 return B_OK; 6752 6753 // iterate through the range and sync all concerned areas 6754 while (size > 0) { 6755 // read lock the address space 6756 AddressSpaceReadLocker locker; 6757 status_t error = locker.SetTo(team_get_current_team_id()); 6758 if (error != B_OK) 6759 return error; 6760 6761 // get the first area 6762 VMArea* area = locker.AddressSpace()->LookupArea(address); 6763 if (area == NULL) 6764 return B_NO_MEMORY; 6765 6766 uint32 offset = address - area->Base(); 6767 size_t rangeSize = min_c(area->Size() - offset, size); 6768 offset += area->cache_offset; 6769 6770 // lock the cache 6771 AreaCacheLocker cacheLocker(area); 6772 if (!cacheLocker) 6773 return B_BAD_VALUE; 6774 VMCache* cache = area->cache; 6775 6776 locker.Unlock(); 6777 6778 uint32 firstPage = offset >> PAGE_SHIFT; 6779 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6780 6781 // write the pages 6782 if (cache->type == CACHE_TYPE_VNODE) { 6783 if (writeSync) { 6784 // synchronous 6785 error = vm_page_write_modified_page_range(cache, firstPage, 6786 endPage); 6787 if (error != B_OK) 6788 return error; 6789 } else { 6790 // asynchronous 6791 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6792 // TODO: This is probably not quite what is supposed to happen. 6793 // Especially when a lot has to be written, it might take ages 6794 // until it really hits the disk. 6795 } 6796 } 6797 6798 address += rangeSize; 6799 size -= rangeSize; 6800 } 6801 6802 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6803 // synchronize multiple mappings of the same file. In our VM they never get 6804 // out of sync, though, so we don't have to do anything. 6805 6806 return B_OK; 6807 } 6808 6809 6810 status_t 6811 _user_memory_advice(void* _address, size_t size, uint32 advice) 6812 { 6813 addr_t address = (addr_t)_address; 6814 if ((address % B_PAGE_SIZE) != 0) 6815 return B_BAD_VALUE; 6816 6817 size = PAGE_ALIGN(size); 6818 if (address + size < address || !IS_USER_ADDRESS(address) 6819 || !IS_USER_ADDRESS(address + size)) { 6820 // weird error code required by POSIX 6821 return B_NO_MEMORY; 6822 } 6823 6824 switch (advice) { 6825 case MADV_NORMAL: 6826 case MADV_SEQUENTIAL: 6827 case MADV_RANDOM: 6828 case MADV_WILLNEED: 6829 case MADV_DONTNEED: 6830 // TODO: Implement! 6831 break; 6832 6833 case MADV_FREE: 6834 { 6835 AddressSpaceWriteLocker locker; 6836 do { 6837 status_t status = locker.SetTo(team_get_current_team_id()); 6838 if (status != B_OK) 6839 return status; 6840 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6841 address, size, &locker)); 6842 6843 discard_address_range(locker.AddressSpace(), address, size, false); 6844 break; 6845 } 6846 6847 default: 6848 return B_BAD_VALUE; 6849 } 6850 6851 return B_OK; 6852 } 6853 6854 6855 status_t 6856 _user_get_memory_properties(team_id teamID, const void* address, 6857 uint32* _protected, uint32* _lock) 6858 { 6859 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6860 return B_BAD_ADDRESS; 6861 6862 AddressSpaceReadLocker locker; 6863 status_t error = locker.SetTo(teamID); 6864 if (error != B_OK) 6865 return error; 6866 6867 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6868 if (area == NULL) 6869 return B_NO_MEMORY; 6870 6871 6872 uint32 protection = area->protection; 6873 if (area->page_protections != NULL) 6874 protection = get_area_page_protection(area, (addr_t)address); 6875 6876 uint32 wiring = area->wiring; 6877 6878 locker.Unlock(); 6879 6880 error = user_memcpy(_protected, &protection, sizeof(protection)); 6881 if (error != B_OK) 6882 return error; 6883 6884 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6885 6886 return error; 6887 } 6888 6889 6890 // An ordered list of non-overlapping ranges to track mlock/munlock locking. 6891 // It is allowed to call mlock/munlock in unbalanced ways (lock a range 6892 // multiple times, unlock a part of it, lock several consecutive ranges and 6893 // unlock them in one go, etc). However the low level lock_memory and 6894 // unlock_memory calls require the locks/unlocks to be balanced (you lock a 6895 // fixed range, and then unlock exactly the same range). This list allows to 6896 // keep track of what was locked exactly so we can unlock the correct things. 6897 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> { 6898 addr_t start; 6899 addr_t end; 6900 6901 status_t LockMemory() 6902 { 6903 return lock_memory((void*)start, end - start, 0); 6904 } 6905 6906 status_t UnlockMemory() 6907 { 6908 return unlock_memory((void*)start, end - start, 0); 6909 } 6910 6911 status_t Move(addr_t start, addr_t end) 6912 { 6913 status_t result = lock_memory((void*)start, end - start, 0); 6914 if (result != B_OK) 6915 return result; 6916 6917 result = UnlockMemory(); 6918 6919 if (result != B_OK) { 6920 // What can we do if the unlock fails? 6921 panic("Failed to unlock memory: %s", strerror(result)); 6922 return result; 6923 } 6924 6925 this->start = start; 6926 this->end = end; 6927 6928 return B_OK; 6929 } 6930 }; 6931 6932 6933 status_t 6934 _user_mlock(const void* address, size_t size) { 6935 // Maybe there's nothing to do, in which case, do nothing 6936 if (size == 0) 6937 return B_OK; 6938 6939 // Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to 6940 // reject the call otherwise) 6941 if ((addr_t)address % B_PAGE_SIZE != 0) 6942 return EINVAL; 6943 6944 size = ROUNDUP(size, B_PAGE_SIZE); 6945 6946 addr_t endAddress = (addr_t)address + size; 6947 6948 // Pre-allocate a linked list element we may need (it's simpler to do it 6949 // now than run out of memory in the midle of changing things) 6950 LockedPages* newRange = new(std::nothrow) LockedPages(); 6951 if (newRange == NULL) 6952 return ENOMEM; 6953 6954 // Get and lock the team 6955 Team* team = thread_get_current_thread()->team; 6956 TeamLocker teamLocker(team); 6957 teamLocker.Lock(); 6958 6959 status_t error = B_OK; 6960 LockedPagesList* lockedPages = &team->locked_pages_list; 6961 6962 // Locate the first locked range possibly overlapping ours 6963 LockedPages* currentRange = lockedPages->Head(); 6964 while (currentRange != NULL && currentRange->end <= (addr_t)address) 6965 currentRange = lockedPages->GetNext(currentRange); 6966 6967 if (currentRange == NULL || currentRange->start >= endAddress) { 6968 // No existing range is overlapping with ours. We can just lock our 6969 // range and stop here. 6970 newRange->start = (addr_t)address; 6971 newRange->end = endAddress; 6972 error = newRange->LockMemory(); 6973 if (error != B_OK) { 6974 delete newRange; 6975 return error; 6976 } 6977 lockedPages->InsertBefore(currentRange, newRange); 6978 return B_OK; 6979 } 6980 6981 // We get here when there is at least one existing overlapping range. 6982 6983 if (currentRange->start <= (addr_t)address) { 6984 if (currentRange->end >= endAddress) { 6985 // An existing range is already fully covering the pages we need to 6986 // lock. Nothing to do then. 6987 delete newRange; 6988 return B_OK; 6989 } else { 6990 // An existing range covers the start of the area we want to lock. 6991 // Advance our start address to avoid it. 6992 address = (void*)currentRange->end; 6993 6994 // Move on to the next range for the next step 6995 currentRange = lockedPages->GetNext(currentRange); 6996 } 6997 } 6998 6999 // First, lock the new range 7000 newRange->start = (addr_t)address; 7001 newRange->end = endAddress; 7002 error = newRange->LockMemory(); 7003 if (error != B_OK) { 7004 delete newRange; 7005 return error; 7006 } 7007 7008 // Unlock all ranges fully overlapping with the area we need to lock 7009 while (currentRange != NULL && currentRange->end < endAddress) { 7010 // The existing range is fully contained inside the new one we're 7011 // trying to lock. Delete/unlock it, and replace it with a new one 7012 // (this limits fragmentation of the range list, and is simpler to 7013 // manage) 7014 error = currentRange->UnlockMemory(); 7015 if (error != B_OK) { 7016 panic("Failed to unlock a memory range: %s", strerror(error)); 7017 newRange->UnlockMemory(); 7018 delete newRange; 7019 return error; 7020 } 7021 LockedPages* temp = currentRange; 7022 currentRange = lockedPages->GetNext(currentRange); 7023 lockedPages->Remove(temp); 7024 delete temp; 7025 } 7026 7027 if (currentRange != NULL) { 7028 // One last range may cover the end of the area we're trying to lock 7029 7030 if (currentRange->start == (addr_t)address) { 7031 // In case two overlapping ranges (one at the start and the other 7032 // at the end) already cover the area we're after, there's nothing 7033 // more to do. So we destroy our new extra allocation 7034 error = newRange->UnlockMemory(); 7035 delete newRange; 7036 return error; 7037 } 7038 7039 if (currentRange->start < endAddress) { 7040 // Make sure the last range is not overlapping, by moving its start 7041 error = currentRange->Move(endAddress, currentRange->end); 7042 if (error != B_OK) { 7043 panic("Failed to move a memory range: %s", strerror(error)); 7044 newRange->UnlockMemory(); 7045 delete newRange; 7046 return error; 7047 } 7048 } 7049 } 7050 7051 // Finally, store the new range in the locked list 7052 lockedPages->InsertBefore(currentRange, newRange); 7053 return B_OK; 7054 } 7055 7056 7057 status_t 7058 _user_munlock(const void* address, size_t size) { 7059 // Maybe there's nothing to do, in which case, do nothing 7060 if (size == 0) 7061 return B_OK; 7062 7063 // Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to 7064 // reject the call otherwise) 7065 if ((addr_t)address % B_PAGE_SIZE != 0) 7066 return EINVAL; 7067 7068 // Round size up to the next page 7069 size = ROUNDUP(size, B_PAGE_SIZE); 7070 7071 addr_t endAddress = (addr_t)address + size; 7072 7073 // Get and lock the team 7074 Team* team = thread_get_current_thread()->team; 7075 TeamLocker teamLocker(team); 7076 teamLocker.Lock(); 7077 LockedPagesList* lockedPages = &team->locked_pages_list; 7078 7079 status_t error = B_OK; 7080 7081 // Locate the first locked range possibly overlapping ours 7082 LockedPages* currentRange = lockedPages->Head(); 7083 while (currentRange != NULL && currentRange->end <= (addr_t)address) 7084 currentRange = lockedPages->GetNext(currentRange); 7085 7086 if (currentRange == NULL || currentRange->start >= endAddress) { 7087 // No range is intersecting, nothing to unlock 7088 return B_OK; 7089 } 7090 7091 if (currentRange->start < (addr_t)address) { 7092 if (currentRange->end > endAddress) { 7093 // There is a range fully covering the area we want to unlock, 7094 // and it extends on both sides. We need to split it in two 7095 LockedPages* newRange = new(std::nothrow) LockedPages(); 7096 if (newRange == NULL) 7097 return ENOMEM; 7098 7099 newRange->start = endAddress; 7100 newRange->end = currentRange->end; 7101 7102 error = newRange->LockMemory(); 7103 if (error != B_OK) { 7104 delete newRange; 7105 return error; 7106 } 7107 7108 error = currentRange->Move(currentRange->start, (addr_t)address); 7109 if (error != B_OK) { 7110 delete newRange; 7111 return error; 7112 } 7113 7114 lockedPages->InsertAfter(currentRange, newRange); 7115 return B_OK; 7116 } else { 7117 // There is a range that overlaps and extends before the one we 7118 // want to unlock, we need to shrink it 7119 error = currentRange->Move(currentRange->start, (addr_t)address); 7120 if (error != B_OK) 7121 return error; 7122 } 7123 } 7124 7125 while (currentRange != NULL && currentRange->end <= endAddress) { 7126 // Unlock all fully overlapping ranges 7127 error = currentRange->UnlockMemory(); 7128 if (error != B_OK) 7129 return error; 7130 LockedPages* temp = currentRange; 7131 currentRange = lockedPages->GetNext(currentRange); 7132 lockedPages->Remove(temp); 7133 delete temp; 7134 } 7135 7136 // Finally split the last partially overlapping range if any 7137 if (currentRange != NULL && currentRange->start < endAddress) { 7138 error = currentRange->Move(endAddress, currentRange->end); 7139 if (error != B_OK) 7140 return error; 7141 } 7142 7143 return B_OK; 7144 } 7145 7146 7147 // #pragma mark -- compatibility 7148 7149 7150 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7151 7152 7153 struct physical_entry_beos { 7154 uint32 address; 7155 uint32 size; 7156 }; 7157 7158 7159 /*! The physical_entry structure has changed. We need to translate it to the 7160 old one. 7161 */ 7162 extern "C" int32 7163 __get_memory_map_beos(const void* _address, size_t numBytes, 7164 physical_entry_beos* table, int32 numEntries) 7165 { 7166 if (numEntries <= 0) 7167 return B_BAD_VALUE; 7168 7169 const uint8* address = (const uint8*)_address; 7170 7171 int32 count = 0; 7172 while (numBytes > 0 && count < numEntries) { 7173 physical_entry entry; 7174 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7175 if (result < 0) { 7176 if (result != B_BUFFER_OVERFLOW) 7177 return result; 7178 } 7179 7180 if (entry.address >= (phys_addr_t)1 << 32) { 7181 panic("get_memory_map(): Address is greater 4 GB!"); 7182 return B_ERROR; 7183 } 7184 7185 table[count].address = entry.address; 7186 table[count++].size = entry.size; 7187 7188 address += entry.size; 7189 numBytes -= entry.size; 7190 } 7191 7192 // null-terminate the table, if possible 7193 if (count < numEntries) { 7194 table[count].address = 0; 7195 table[count].size = 0; 7196 } 7197 7198 return B_OK; 7199 } 7200 7201 7202 /*! The type of the \a physicalAddress parameter has changed from void* to 7203 phys_addr_t. 7204 */ 7205 extern "C" area_id 7206 __map_physical_memory_beos(const char* name, void* physicalAddress, 7207 size_t numBytes, uint32 addressSpec, uint32 protection, 7208 void** _virtualAddress) 7209 { 7210 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7211 addressSpec, protection, _virtualAddress); 7212 } 7213 7214 7215 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7216 we meddle with the \a lock parameter to force 32 bit. 7217 */ 7218 extern "C" area_id 7219 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7220 size_t size, uint32 lock, uint32 protection) 7221 { 7222 switch (lock) { 7223 case B_NO_LOCK: 7224 break; 7225 case B_FULL_LOCK: 7226 case B_LAZY_LOCK: 7227 lock = B_32_BIT_FULL_LOCK; 7228 break; 7229 case B_CONTIGUOUS: 7230 lock = B_32_BIT_CONTIGUOUS; 7231 break; 7232 } 7233 7234 return __create_area_haiku(name, _address, addressSpec, size, lock, 7235 protection); 7236 } 7237 7238 7239 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7240 "BASE"); 7241 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7242 "map_physical_memory@", "BASE"); 7243 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7244 "BASE"); 7245 7246 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7247 "get_memory_map@@", "1_ALPHA3"); 7248 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7249 "map_physical_memory@@", "1_ALPHA3"); 7250 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7251 "1_ALPHA3"); 7252 7253 7254 #else 7255 7256 7257 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7258 "get_memory_map@@", "BASE"); 7259 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7260 "map_physical_memory@@", "BASE"); 7261 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7262 "BASE"); 7263 7264 7265 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7266