1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <boot/elf.h> 31 #include <boot/stage2.h> 32 #include <condition_variable.h> 33 #include <console.h> 34 #include <debug.h> 35 #include <file_cache.h> 36 #include <fs/fd.h> 37 #include <heap.h> 38 #include <kernel.h> 39 #include <int.h> 40 #include <lock.h> 41 #include <low_resource_manager.h> 42 #include <slab/Slab.h> 43 #include <smp.h> 44 #include <system_info.h> 45 #include <thread.h> 46 #include <team.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/khash.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "VMAnonymousNoSwapCache.h" 59 #include "IORequest.h" 60 61 62 //#define TRACE_VM 63 //#define TRACE_FAULTS 64 #ifdef TRACE_VM 65 # define TRACE(x) dprintf x 66 #else 67 # define TRACE(x) ; 68 #endif 69 #ifdef TRACE_FAULTS 70 # define FTRACE(x) dprintf x 71 #else 72 # define FTRACE(x) ; 73 #endif 74 75 76 class AreaCacheLocking { 77 public: 78 inline bool Lock(VMCache* lockable) 79 { 80 return false; 81 } 82 83 inline void Unlock(VMCache* lockable) 84 { 85 vm_area_put_locked_cache(lockable); 86 } 87 }; 88 89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 90 public: 91 inline AreaCacheLocker(VMCache* cache = NULL) 92 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 93 { 94 } 95 96 inline AreaCacheLocker(VMArea* area) 97 : AutoLocker<VMCache, AreaCacheLocking>() 98 { 99 SetTo(area); 100 } 101 102 inline void SetTo(VMCache* cache, bool alreadyLocked) 103 { 104 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 105 } 106 107 inline void SetTo(VMArea* area) 108 { 109 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 110 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 111 } 112 }; 113 114 115 class VMCacheChainLocker { 116 public: 117 VMCacheChainLocker() 118 : 119 fTopCache(NULL), 120 fBottomCache(NULL) 121 { 122 } 123 124 VMCacheChainLocker(VMCache* topCache) 125 : 126 fTopCache(topCache), 127 fBottomCache(topCache) 128 { 129 } 130 131 ~VMCacheChainLocker() 132 { 133 Unlock(); 134 } 135 136 void SetTo(VMCache* topCache) 137 { 138 fTopCache = topCache; 139 fBottomCache = topCache; 140 141 if (topCache != NULL) 142 topCache->SetUserData(NULL); 143 } 144 145 VMCache* LockSourceCache() 146 { 147 if (fBottomCache == NULL || fBottomCache->source == NULL) 148 return NULL; 149 150 VMCache* previousCache = fBottomCache; 151 152 fBottomCache = fBottomCache->source; 153 fBottomCache->Lock(); 154 fBottomCache->AcquireRefLocked(); 155 fBottomCache->SetUserData(previousCache); 156 157 return fBottomCache; 158 } 159 160 void LockAllSourceCaches() 161 { 162 while (LockSourceCache() != NULL) { 163 } 164 } 165 166 void Unlock(VMCache* exceptCache = NULL) 167 { 168 if (fTopCache == NULL) 169 return; 170 171 // Unlock caches in source -> consumer direction. This is important to 172 // avoid double-locking and a reversal of locking order in case a cache 173 // is eligable for merging. 174 VMCache* cache = fBottomCache; 175 while (cache != NULL) { 176 VMCache* nextCache = (VMCache*)cache->UserData(); 177 if (cache != exceptCache) 178 cache->ReleaseRefAndUnlock(cache != fTopCache); 179 180 if (cache == fTopCache) 181 break; 182 183 cache = nextCache; 184 } 185 186 fTopCache = NULL; 187 fBottomCache = NULL; 188 } 189 190 void UnlockKeepRefs(bool keepTopCacheLocked) 191 { 192 if (fTopCache == NULL) 193 return; 194 195 VMCache* nextCache = fBottomCache; 196 VMCache* cache = NULL; 197 198 while (keepTopCacheLocked 199 ? nextCache != fTopCache : cache != fTopCache) { 200 cache = nextCache; 201 nextCache = (VMCache*)cache->UserData(); 202 cache->Unlock(cache != fTopCache); 203 } 204 } 205 206 void RelockCaches(bool topCacheLocked) 207 { 208 if (fTopCache == NULL) 209 return; 210 211 VMCache* nextCache = fTopCache; 212 VMCache* cache = NULL; 213 if (topCacheLocked) { 214 cache = nextCache; 215 nextCache = cache->source; 216 } 217 218 while (cache != fBottomCache && nextCache != NULL) { 219 VMCache* consumer = cache; 220 cache = nextCache; 221 nextCache = cache->source; 222 cache->Lock(); 223 cache->SetUserData(consumer); 224 } 225 } 226 227 private: 228 VMCache* fTopCache; 229 VMCache* fBottomCache; 230 }; 231 232 233 // The memory reserve an allocation of the certain priority must not touch. 234 static const size_t kMemoryReserveForPriority[] = { 235 VM_MEMORY_RESERVE_USER, // user 236 VM_MEMORY_RESERVE_SYSTEM, // system 237 0 // VIP 238 }; 239 240 241 ObjectCache* gPageMappingsObjectCache; 242 243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 244 245 static off_t sAvailableMemory; 246 static off_t sNeededMemory; 247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 248 static uint32 sPageFaults; 249 250 static VMPhysicalPageMapper* sPhysicalPageMapper; 251 252 #if DEBUG_CACHE_LIST 253 254 struct cache_info { 255 VMCache* cache; 256 addr_t page_count; 257 addr_t committed; 258 }; 259 260 static const int kCacheInfoTableCount = 100 * 1024; 261 static cache_info* sCacheInfoTable; 262 263 #endif // DEBUG_CACHE_LIST 264 265 266 // function declarations 267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 268 bool addressSpaceCleanup); 269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 270 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage, 271 VMAreaWiredRange* wiredRange = NULL); 272 static status_t map_backing_store(VMAddressSpace* addressSpace, 273 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 274 int protection, int mapping, uint32 flags, 275 const virtual_address_restrictions* addressRestrictions, bool kernel, 276 VMArea** _area, void** _virtualAddress); 277 static void fix_protection(uint32* protection); 278 279 280 // #pragma mark - 281 282 283 #if VM_PAGE_FAULT_TRACING 284 285 namespace VMPageFaultTracing { 286 287 class PageFaultStart : public AbstractTraceEntry { 288 public: 289 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 290 : 291 fAddress(address), 292 fPC(pc), 293 fWrite(write), 294 fUser(user) 295 { 296 Initialized(); 297 } 298 299 virtual void AddDump(TraceOutput& out) 300 { 301 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 302 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 303 } 304 305 private: 306 addr_t fAddress; 307 addr_t fPC; 308 bool fWrite; 309 bool fUser; 310 }; 311 312 313 // page fault errors 314 enum { 315 PAGE_FAULT_ERROR_NO_AREA = 0, 316 PAGE_FAULT_ERROR_KERNEL_ONLY, 317 PAGE_FAULT_ERROR_WRITE_PROTECTED, 318 PAGE_FAULT_ERROR_READ_PROTECTED, 319 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 320 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 321 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 322 }; 323 324 325 class PageFaultError : public AbstractTraceEntry { 326 public: 327 PageFaultError(area_id area, status_t error) 328 : 329 fArea(area), 330 fError(error) 331 { 332 Initialized(); 333 } 334 335 virtual void AddDump(TraceOutput& out) 336 { 337 switch (fError) { 338 case PAGE_FAULT_ERROR_NO_AREA: 339 out.Print("page fault error: no area"); 340 break; 341 case PAGE_FAULT_ERROR_KERNEL_ONLY: 342 out.Print("page fault error: area: %ld, kernel only", fArea); 343 break; 344 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 345 out.Print("page fault error: area: %ld, write protected", 346 fArea); 347 break; 348 case PAGE_FAULT_ERROR_READ_PROTECTED: 349 out.Print("page fault error: area: %ld, read protected", fArea); 350 break; 351 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 352 out.Print("page fault error: area: %ld, execute protected", 353 fArea); 354 break; 355 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 356 out.Print("page fault error: kernel touching bad user memory"); 357 break; 358 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 359 out.Print("page fault error: no address space"); 360 break; 361 default: 362 out.Print("page fault error: area: %ld, error: %s", fArea, 363 strerror(fError)); 364 break; 365 } 366 } 367 368 private: 369 area_id fArea; 370 status_t fError; 371 }; 372 373 374 class PageFaultDone : public AbstractTraceEntry { 375 public: 376 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 377 vm_page* page) 378 : 379 fArea(area), 380 fTopCache(topCache), 381 fCache(cache), 382 fPage(page) 383 { 384 Initialized(); 385 } 386 387 virtual void AddDump(TraceOutput& out) 388 { 389 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 390 "page: %p", fArea, fTopCache, fCache, fPage); 391 } 392 393 private: 394 area_id fArea; 395 VMCache* fTopCache; 396 VMCache* fCache; 397 vm_page* fPage; 398 }; 399 400 } // namespace VMPageFaultTracing 401 402 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 403 #else 404 # define TPF(x) ; 405 #endif // VM_PAGE_FAULT_TRACING 406 407 408 // #pragma mark - 409 410 411 /*! The page's cache must be locked. 412 */ 413 static inline void 414 increment_page_wired_count(vm_page* page) 415 { 416 if (!page->IsMapped()) 417 atomic_add(&gMappedPagesCount, 1); 418 page->IncrementWiredCount(); 419 } 420 421 422 /*! The page's cache must be locked. 423 */ 424 static inline void 425 decrement_page_wired_count(vm_page* page) 426 { 427 page->DecrementWiredCount(); 428 if (!page->IsMapped()) 429 atomic_add(&gMappedPagesCount, -1); 430 } 431 432 433 static inline addr_t 434 virtual_page_address(VMArea* area, vm_page* page) 435 { 436 return area->Base() 437 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 438 } 439 440 441 //! You need to have the address space locked when calling this function 442 static VMArea* 443 lookup_area(VMAddressSpace* addressSpace, area_id id) 444 { 445 VMAreaHash::ReadLock(); 446 447 VMArea* area = VMAreaHash::LookupLocked(id); 448 if (area != NULL && area->address_space != addressSpace) 449 area = NULL; 450 451 VMAreaHash::ReadUnlock(); 452 453 return area; 454 } 455 456 457 static status_t 458 allocate_area_page_protections(VMArea* area) 459 { 460 // In the page protections we store only the three user protections, 461 // so we use 4 bits per page. 462 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 463 area->page_protections = (uint8*)malloc_etc(bytes, 464 HEAP_DONT_LOCK_KERNEL_SPACE); 465 if (area->page_protections == NULL) 466 return B_NO_MEMORY; 467 468 // init the page protections for all pages to that of the area 469 uint32 areaProtection = area->protection 470 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 471 memset(area->page_protections, areaProtection | (areaProtection << 4), 472 bytes); 473 return B_OK; 474 } 475 476 477 static inline void 478 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 479 { 480 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 481 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 482 uint8& entry = area->page_protections[pageIndex / 2]; 483 if (pageIndex % 2 == 0) 484 entry = (entry & 0xf0) | protection; 485 else 486 entry = (entry & 0x0f) | (protection << 4); 487 } 488 489 490 static inline uint32 491 get_area_page_protection(VMArea* area, addr_t pageAddress) 492 { 493 if (area->page_protections == NULL) 494 return area->protection; 495 496 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 497 uint32 protection = area->page_protections[pageIndex / 2]; 498 if (pageIndex % 2 == 0) 499 protection &= 0x0f; 500 else 501 protection >>= 4; 502 503 // If this is a kernel area we translate the user flags to kernel flags. 504 if (area->address_space == VMAddressSpace::Kernel()) { 505 uint32 kernelProtection = 0; 506 if ((protection & B_READ_AREA) != 0) 507 kernelProtection |= B_KERNEL_READ_AREA; 508 if ((protection & B_WRITE_AREA) != 0) 509 kernelProtection |= B_KERNEL_WRITE_AREA; 510 511 return kernelProtection; 512 } 513 514 return protection | B_KERNEL_READ_AREA 515 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 516 } 517 518 519 /*! The caller must have reserved enough pages the translation map 520 implementation might need to map this page. 521 The page's cache must be locked. 522 */ 523 static status_t 524 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 525 vm_page_reservation* reservation) 526 { 527 VMTranslationMap* map = area->address_space->TranslationMap(); 528 529 bool wasMapped = page->IsMapped(); 530 531 if (area->wiring == B_NO_LOCK) { 532 DEBUG_PAGE_ACCESS_CHECK(page); 533 534 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 535 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 536 gPageMappingsObjectCache, 537 CACHE_DONT_WAIT_FOR_MEMORY 538 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 539 if (mapping == NULL) 540 return B_NO_MEMORY; 541 542 mapping->page = page; 543 mapping->area = area; 544 545 map->Lock(); 546 547 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 548 area->MemoryType(), reservation); 549 550 // insert mapping into lists 551 if (!page->IsMapped()) 552 atomic_add(&gMappedPagesCount, 1); 553 554 page->mappings.Add(mapping); 555 area->mappings.Add(mapping); 556 557 map->Unlock(); 558 } else { 559 DEBUG_PAGE_ACCESS_CHECK(page); 560 561 map->Lock(); 562 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 563 area->MemoryType(), reservation); 564 map->Unlock(); 565 566 increment_page_wired_count(page); 567 } 568 569 if (!wasMapped) { 570 // The page is mapped now, so we must not remain in the cached queue. 571 // It also makes sense to move it from the inactive to the active, since 572 // otherwise the page daemon wouldn't come to keep track of it (in idle 573 // mode) -- if the page isn't touched, it will be deactivated after a 574 // full iteration through the queue at the latest. 575 if (page->State() == PAGE_STATE_CACHED 576 || page->State() == PAGE_STATE_INACTIVE) { 577 vm_page_set_state(page, PAGE_STATE_ACTIVE); 578 } 579 } 580 581 return B_OK; 582 } 583 584 585 /*! If \a preserveModified is \c true, the caller must hold the lock of the 586 page's cache. 587 */ 588 static inline bool 589 unmap_page(VMArea* area, addr_t virtualAddress) 590 { 591 return area->address_space->TranslationMap()->UnmapPage(area, 592 virtualAddress, true); 593 } 594 595 596 /*! If \a preserveModified is \c true, the caller must hold the lock of all 597 mapped pages' caches. 598 */ 599 static inline void 600 unmap_pages(VMArea* area, addr_t base, size_t size) 601 { 602 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 603 } 604 605 606 /*! Cuts a piece out of an area. If the given cut range covers the complete 607 area, it is deleted. If it covers the beginning or the end, the area is 608 resized accordingly. If the range covers some part in the middle of the 609 area, it is split in two; in this case the second area is returned via 610 \a _secondArea (the variable is left untouched in the other cases). 611 The address space must be write locked. 612 The caller must ensure that no part of the given range is wired. 613 */ 614 static status_t 615 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 616 addr_t lastAddress, VMArea** _secondArea, bool kernel) 617 { 618 // Does the cut range intersect with the area at all? 619 addr_t areaLast = area->Base() + (area->Size() - 1); 620 if (area->Base() > lastAddress || areaLast < address) 621 return B_OK; 622 623 // Is the area fully covered? 624 if (area->Base() >= address && areaLast <= lastAddress) { 625 delete_area(addressSpace, area, false); 626 return B_OK; 627 } 628 629 int priority; 630 uint32 allocationFlags; 631 if (addressSpace == VMAddressSpace::Kernel()) { 632 priority = VM_PRIORITY_SYSTEM; 633 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 634 | HEAP_DONT_LOCK_KERNEL_SPACE; 635 } else { 636 priority = VM_PRIORITY_USER; 637 allocationFlags = 0; 638 } 639 640 VMCache* cache = vm_area_get_locked_cache(area); 641 VMCacheChainLocker cacheChainLocker(cache); 642 cacheChainLocker.LockAllSourceCaches(); 643 644 // Cut the end only? 645 if (areaLast <= lastAddress) { 646 size_t oldSize = area->Size(); 647 size_t newSize = address - area->Base(); 648 649 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 650 allocationFlags); 651 if (error != B_OK) 652 return error; 653 654 // unmap pages 655 unmap_pages(area, address, oldSize - newSize); 656 657 // If no one else uses the area's cache, we can resize it, too. 658 if (cache->areas == area && area->cache_next == NULL 659 && cache->consumers.IsEmpty() 660 && cache->type == CACHE_TYPE_RAM) { 661 // Since VMCache::Resize() can temporarily drop the lock, we must 662 // unlock all lower caches to prevent locking order inversion. 663 cacheChainLocker.Unlock(cache); 664 cache->Resize(cache->virtual_base + newSize, priority); 665 cache->ReleaseRefAndUnlock(); 666 } 667 668 return B_OK; 669 } 670 671 // Cut the beginning only? 672 if (area->Base() >= address) { 673 addr_t oldBase = area->Base(); 674 addr_t newBase = lastAddress + 1; 675 size_t newSize = areaLast - lastAddress; 676 677 // unmap pages 678 unmap_pages(area, oldBase, newBase - oldBase); 679 680 // resize the area 681 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // TODO: If no one else uses the area's cache, we should resize it, too! 687 688 area->cache_offset += newBase - oldBase; 689 690 return B_OK; 691 } 692 693 // The tough part -- cut a piece out of the middle of the area. 694 // We do that by shrinking the area to the begin section and creating a 695 // new area for the end section. 696 697 addr_t firstNewSize = address - area->Base(); 698 addr_t secondBase = lastAddress + 1; 699 addr_t secondSize = areaLast - lastAddress; 700 701 // unmap pages 702 unmap_pages(area, address, area->Size() - firstNewSize); 703 704 // resize the area 705 addr_t oldSize = area->Size(); 706 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 707 allocationFlags); 708 if (error != B_OK) 709 return error; 710 711 // TODO: If no one else uses the area's cache, we might want to create a 712 // new cache for the second area, transfer the concerned pages from the 713 // first cache to it and resize the first cache. 714 715 // map the second area 716 virtual_address_restrictions addressRestrictions = {}; 717 addressRestrictions.address = (void*)secondBase; 718 addressRestrictions.address_specification = B_EXACT_ADDRESS; 719 VMArea* secondArea; 720 error = map_backing_store(addressSpace, cache, 721 area->cache_offset + (secondBase - area->Base()), area->name, 722 secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0, 723 &addressRestrictions, kernel, &secondArea, NULL); 724 if (error != B_OK) { 725 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 726 return error; 727 } 728 729 // We need a cache reference for the new area. 730 cache->AcquireRefLocked(); 731 732 if (_secondArea != NULL) 733 *_secondArea = secondArea; 734 735 return B_OK; 736 } 737 738 739 /*! Deletes all areas in the given address range. 740 The address space must be write-locked. 741 The caller must ensure that no part of the given range is wired. 742 */ 743 static status_t 744 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 745 bool kernel) 746 { 747 size = PAGE_ALIGN(size); 748 addr_t lastAddress = address + (size - 1); 749 750 // Check, whether the caller is allowed to modify the concerned areas. 751 if (!kernel) { 752 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 753 VMArea* area = it.Next();) { 754 addr_t areaLast = area->Base() + (area->Size() - 1); 755 if (area->Base() < lastAddress && address < areaLast) { 756 if ((area->protection & B_KERNEL_AREA) != 0) 757 return B_NOT_ALLOWED; 758 } 759 } 760 } 761 762 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 763 VMArea* area = it.Next();) { 764 addr_t areaLast = area->Base() + (area->Size() - 1); 765 if (area->Base() < lastAddress && address < areaLast) { 766 status_t error = cut_area(addressSpace, area, address, 767 lastAddress, NULL, kernel); 768 if (error != B_OK) 769 return error; 770 // Failing after already messing with areas is ugly, but we 771 // can't do anything about it. 772 } 773 } 774 775 return B_OK; 776 } 777 778 779 /*! You need to hold the lock of the cache and the write lock of the address 780 space when calling this function. 781 Note, that in case of error your cache will be temporarily unlocked. 782 If \a addressSpec is \c B_EXACT_ADDRESS and the 783 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 784 that no part of the specified address range (base \c *_virtualAddress, size 785 \a size) is wired. 786 */ 787 static status_t 788 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 789 const char* areaName, addr_t size, int wiring, int protection, int mapping, 790 uint32 flags, const virtual_address_restrictions* addressRestrictions, 791 bool kernel, VMArea** _area, void** _virtualAddress) 792 { 793 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 794 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 795 ", protection %d, area %p, areaName '%s'\n", addressSpace, cache, 796 addressRestrictions->address, offset, size, 797 addressRestrictions->address_specification, wiring, protection, 798 _area, areaName)); 799 cache->AssertLocked(); 800 801 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 802 | HEAP_DONT_LOCK_KERNEL_SPACE; 803 int priority; 804 if (addressSpace != VMAddressSpace::Kernel()) { 805 priority = VM_PRIORITY_USER; 806 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 807 priority = VM_PRIORITY_VIP; 808 allocationFlags |= HEAP_PRIORITY_VIP; 809 } else 810 priority = VM_PRIORITY_SYSTEM; 811 812 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 813 allocationFlags); 814 if (area == NULL) 815 return B_NO_MEMORY; 816 817 status_t status; 818 819 // if this is a private map, we need to create a new cache 820 // to handle the private copies of pages as they are written to 821 VMCache* sourceCache = cache; 822 if (mapping == REGION_PRIVATE_MAP) { 823 VMCache* newCache; 824 825 // create an anonymous cache 826 status = VMCacheFactory::CreateAnonymousCache(newCache, 827 (protection & B_STACK_AREA) != 0 828 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 829 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 830 if (status != B_OK) 831 goto err1; 832 833 newCache->Lock(); 834 newCache->temporary = 1; 835 newCache->virtual_base = offset; 836 newCache->virtual_end = offset + size; 837 838 cache->AddConsumer(newCache); 839 840 cache = newCache; 841 } 842 843 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 844 status = cache->SetMinimalCommitment(size, priority); 845 if (status != B_OK) 846 goto err2; 847 } 848 849 // check to see if this address space has entered DELETE state 850 if (addressSpace->IsBeingDeleted()) { 851 // okay, someone is trying to delete this address space now, so we can't 852 // insert the area, so back out 853 status = B_BAD_TEAM_ID; 854 goto err2; 855 } 856 857 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 858 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 859 status = unmap_address_range(addressSpace, 860 (addr_t)addressRestrictions->address, size, kernel); 861 if (status != B_OK) 862 goto err2; 863 } 864 865 status = addressSpace->InsertArea(area, size, addressRestrictions, 866 allocationFlags, _virtualAddress); 867 if (status != B_OK) { 868 // TODO: wait and try again once this is working in the backend 869 #if 0 870 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 871 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 872 0, 0); 873 } 874 #endif 875 goto err2; 876 } 877 878 // attach the cache to the area 879 area->cache = cache; 880 area->cache_offset = offset; 881 882 // point the cache back to the area 883 cache->InsertAreaLocked(area); 884 if (mapping == REGION_PRIVATE_MAP) 885 cache->Unlock(); 886 887 // insert the area in the global area hash table 888 VMAreaHash::Insert(area); 889 890 // grab a ref to the address space (the area holds this) 891 addressSpace->Get(); 892 893 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 894 // cache, sourceCache, areaName, area); 895 896 *_area = area; 897 return B_OK; 898 899 err2: 900 if (mapping == REGION_PRIVATE_MAP) { 901 // We created this cache, so we must delete it again. Note, that we 902 // need to temporarily unlock the source cache or we'll otherwise 903 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 904 sourceCache->Unlock(); 905 cache->ReleaseRefAndUnlock(); 906 sourceCache->Lock(); 907 } 908 err1: 909 addressSpace->DeleteArea(area, allocationFlags); 910 return status; 911 } 912 913 914 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 915 locker1, locker2). 916 */ 917 template<typename LockerType1, typename LockerType2> 918 static inline bool 919 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 920 { 921 area->cache->AssertLocked(); 922 923 VMAreaUnwiredWaiter waiter; 924 if (!area->AddWaiterIfWired(&waiter)) 925 return false; 926 927 // unlock everything and wait 928 if (locker1 != NULL) 929 locker1->Unlock(); 930 if (locker2 != NULL) 931 locker2->Unlock(); 932 933 waiter.waitEntry.Wait(); 934 935 return true; 936 } 937 938 939 /*! Checks whether the given area has any wired ranges intersecting with the 940 specified range and waits, if so. 941 942 When it has to wait, the function calls \c Unlock() on both \a locker1 943 and \a locker2, if given. 944 The area's top cache must be locked and must be unlocked as a side effect 945 of calling \c Unlock() on either \a locker1 or \a locker2. 946 947 If the function does not have to wait it does not modify or unlock any 948 object. 949 950 \param area The area to be checked. 951 \param base The base address of the range to check. 952 \param size The size of the address range to check. 953 \param locker1 An object to be unlocked when before starting to wait (may 954 be \c NULL). 955 \param locker2 An object to be unlocked when before starting to wait (may 956 be \c NULL). 957 \return \c true, if the function had to wait, \c false otherwise. 958 */ 959 template<typename LockerType1, typename LockerType2> 960 static inline bool 961 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 962 LockerType1* locker1, LockerType2* locker2) 963 { 964 area->cache->AssertLocked(); 965 966 VMAreaUnwiredWaiter waiter; 967 if (!area->AddWaiterIfWired(&waiter, base, size)) 968 return false; 969 970 // unlock everything and wait 971 if (locker1 != NULL) 972 locker1->Unlock(); 973 if (locker2 != NULL) 974 locker2->Unlock(); 975 976 waiter.waitEntry.Wait(); 977 978 return true; 979 } 980 981 982 /*! Checks whether the given address space has any wired ranges intersecting 983 with the specified range and waits, if so. 984 985 Similar to wait_if_area_range_is_wired(), with the following differences: 986 - All areas intersecting with the range are checked (respectively all until 987 one is found that contains a wired range intersecting with the given 988 range). 989 - The given address space must at least be read-locked and must be unlocked 990 when \c Unlock() is called on \a locker. 991 - None of the areas' caches are allowed to be locked. 992 */ 993 template<typename LockerType> 994 static inline bool 995 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 996 size_t size, LockerType* locker) 997 { 998 addr_t end = base + size - 1; 999 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 1000 VMArea* area = it.Next();) { 1001 // TODO: Introduce a VMAddressSpace method to get a close iterator! 1002 if (area->Base() > end) 1003 return false; 1004 1005 if (base >= area->Base() + area->Size() - 1) 1006 continue; 1007 1008 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1009 1010 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1011 return true; 1012 } 1013 1014 return false; 1015 } 1016 1017 1018 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1019 It must be called in a situation where the kernel address space may be 1020 locked. 1021 */ 1022 status_t 1023 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1024 { 1025 AddressSpaceReadLocker locker; 1026 VMArea* area; 1027 status_t status = locker.SetFromArea(id, area); 1028 if (status != B_OK) 1029 return status; 1030 1031 if (area->page_protections == NULL) { 1032 status = allocate_area_page_protections(area); 1033 if (status != B_OK) 1034 return status; 1035 } 1036 1037 *cookie = (void*)area; 1038 return B_OK; 1039 } 1040 1041 1042 /*! This is a debug helper function that can only be used with very specific 1043 use cases. 1044 Sets protection for the given address range to the protection specified. 1045 If \a protection is 0 then the involved pages will be marked non-present 1046 in the translation map to cause a fault on access. The pages aren't 1047 actually unmapped however so that they can be marked present again with 1048 additional calls to this function. For this to work the area must be 1049 fully locked in memory so that the pages aren't otherwise touched. 1050 This function does not lock the kernel address space and needs to be 1051 supplied with a \a cookie retrieved from a successful call to 1052 vm_prepare_kernel_area_debug_protection(). 1053 */ 1054 status_t 1055 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1056 uint32 protection) 1057 { 1058 // check address range 1059 addr_t address = (addr_t)_address; 1060 size = PAGE_ALIGN(size); 1061 1062 if ((address % B_PAGE_SIZE) != 0 1063 || (addr_t)address + size < (addr_t)address 1064 || !IS_KERNEL_ADDRESS(address) 1065 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1066 return B_BAD_VALUE; 1067 } 1068 1069 // Translate the kernel protection to user protection as we only store that. 1070 if ((protection & B_KERNEL_READ_AREA) != 0) 1071 protection |= B_READ_AREA; 1072 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1073 protection |= B_WRITE_AREA; 1074 1075 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1076 VMTranslationMap* map = addressSpace->TranslationMap(); 1077 VMArea* area = (VMArea*)cookie; 1078 1079 addr_t offset = address - area->Base(); 1080 if (area->Size() - offset < size) { 1081 panic("protect range not fully within supplied area"); 1082 return B_BAD_VALUE; 1083 } 1084 1085 if (area->page_protections == NULL) { 1086 panic("area has no page protections"); 1087 return B_BAD_VALUE; 1088 } 1089 1090 // Invalidate the mapping entries so any access to them will fault or 1091 // restore the mapping entries unchanged so that lookup will success again. 1092 map->Lock(); 1093 map->DebugMarkRangePresent(address, address + size, protection != 0); 1094 map->Unlock(); 1095 1096 // And set the proper page protections so that the fault case will actually 1097 // fail and not simply try to map a new page. 1098 for (addr_t pageAddress = address; pageAddress < address + size; 1099 pageAddress += B_PAGE_SIZE) { 1100 set_area_page_protection(area, pageAddress, protection); 1101 } 1102 1103 return B_OK; 1104 } 1105 1106 1107 status_t 1108 vm_block_address_range(const char* name, void* address, addr_t size) 1109 { 1110 if (!arch_vm_supports_protection(0)) 1111 return B_NOT_SUPPORTED; 1112 1113 AddressSpaceWriteLocker locker; 1114 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1115 if (status != B_OK) 1116 return status; 1117 1118 VMAddressSpace* addressSpace = locker.AddressSpace(); 1119 1120 // create an anonymous cache 1121 VMCache* cache; 1122 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1123 VM_PRIORITY_SYSTEM); 1124 if (status != B_OK) 1125 return status; 1126 1127 cache->temporary = 1; 1128 cache->virtual_end = size; 1129 cache->Lock(); 1130 1131 VMArea* area; 1132 virtual_address_restrictions addressRestrictions = {}; 1133 addressRestrictions.address = address; 1134 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1135 status = map_backing_store(addressSpace, cache, 0, name, size, 1136 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1137 true, &area, NULL); 1138 if (status != B_OK) { 1139 cache->ReleaseRefAndUnlock(); 1140 return status; 1141 } 1142 1143 cache->Unlock(); 1144 area->cache_type = CACHE_TYPE_RAM; 1145 return area->id; 1146 } 1147 1148 1149 status_t 1150 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1151 { 1152 AddressSpaceWriteLocker locker(team); 1153 if (!locker.IsLocked()) 1154 return B_BAD_TEAM_ID; 1155 1156 VMAddressSpace* addressSpace = locker.AddressSpace(); 1157 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1158 addressSpace == VMAddressSpace::Kernel() 1159 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1160 } 1161 1162 1163 status_t 1164 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1165 addr_t size, uint32 flags) 1166 { 1167 if (size == 0) 1168 return B_BAD_VALUE; 1169 1170 AddressSpaceWriteLocker locker(team); 1171 if (!locker.IsLocked()) 1172 return B_BAD_TEAM_ID; 1173 1174 virtual_address_restrictions addressRestrictions = {}; 1175 addressRestrictions.address = *_address; 1176 addressRestrictions.address_specification = addressSpec; 1177 VMAddressSpace* addressSpace = locker.AddressSpace(); 1178 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1179 addressSpace == VMAddressSpace::Kernel() 1180 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1181 _address); 1182 } 1183 1184 1185 area_id 1186 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1187 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1188 const virtual_address_restrictions* virtualAddressRestrictions, 1189 const physical_address_restrictions* physicalAddressRestrictions, 1190 bool kernel, void** _address) 1191 { 1192 VMArea* area; 1193 VMCache* cache; 1194 vm_page* page = NULL; 1195 bool isStack = (protection & B_STACK_AREA) != 0; 1196 page_num_t guardPages; 1197 bool canOvercommit = false; 1198 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1199 ? VM_PAGE_ALLOC_CLEAR : 0; 1200 1201 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1202 team, name, size)); 1203 1204 size = PAGE_ALIGN(size); 1205 guardSize = PAGE_ALIGN(guardSize); 1206 guardPages = guardSize / B_PAGE_SIZE; 1207 1208 if (size == 0 || size < guardSize) 1209 return B_BAD_VALUE; 1210 if (!arch_vm_supports_protection(protection)) 1211 return B_NOT_SUPPORTED; 1212 1213 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1214 canOvercommit = true; 1215 1216 #ifdef DEBUG_KERNEL_STACKS 1217 if ((protection & B_KERNEL_STACK_AREA) != 0) 1218 isStack = true; 1219 #endif 1220 1221 // check parameters 1222 switch (virtualAddressRestrictions->address_specification) { 1223 case B_ANY_ADDRESS: 1224 case B_EXACT_ADDRESS: 1225 case B_BASE_ADDRESS: 1226 case B_ANY_KERNEL_ADDRESS: 1227 case B_ANY_KERNEL_BLOCK_ADDRESS: 1228 case B_RANDOMIZED_ANY_ADDRESS: 1229 case B_RANDOMIZED_BASE_ADDRESS: 1230 break; 1231 1232 default: 1233 return B_BAD_VALUE; 1234 } 1235 1236 // If low or high physical address restrictions are given, we force 1237 // B_CONTIGUOUS wiring, since only then we'll use 1238 // vm_page_allocate_page_run() which deals with those restrictions. 1239 if (physicalAddressRestrictions->low_address != 0 1240 || physicalAddressRestrictions->high_address != 0) { 1241 wiring = B_CONTIGUOUS; 1242 } 1243 1244 physical_address_restrictions stackPhysicalRestrictions; 1245 bool doReserveMemory = false; 1246 switch (wiring) { 1247 case B_NO_LOCK: 1248 break; 1249 case B_FULL_LOCK: 1250 case B_LAZY_LOCK: 1251 case B_CONTIGUOUS: 1252 doReserveMemory = true; 1253 break; 1254 case B_ALREADY_WIRED: 1255 break; 1256 case B_LOMEM: 1257 stackPhysicalRestrictions = *physicalAddressRestrictions; 1258 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1259 physicalAddressRestrictions = &stackPhysicalRestrictions; 1260 wiring = B_CONTIGUOUS; 1261 doReserveMemory = true; 1262 break; 1263 case B_32_BIT_FULL_LOCK: 1264 if (B_HAIKU_PHYSICAL_BITS <= 32 1265 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1266 wiring = B_FULL_LOCK; 1267 doReserveMemory = true; 1268 break; 1269 } 1270 // TODO: We don't really support this mode efficiently. Just fall 1271 // through for now ... 1272 case B_32_BIT_CONTIGUOUS: 1273 #if B_HAIKU_PHYSICAL_BITS > 32 1274 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1275 stackPhysicalRestrictions = *physicalAddressRestrictions; 1276 stackPhysicalRestrictions.high_address 1277 = (phys_addr_t)1 << 32; 1278 physicalAddressRestrictions = &stackPhysicalRestrictions; 1279 } 1280 #endif 1281 wiring = B_CONTIGUOUS; 1282 doReserveMemory = true; 1283 break; 1284 default: 1285 return B_BAD_VALUE; 1286 } 1287 1288 // Optimization: For a single-page contiguous allocation without low/high 1289 // memory restriction B_FULL_LOCK wiring suffices. 1290 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1291 && physicalAddressRestrictions->low_address == 0 1292 && physicalAddressRestrictions->high_address == 0) { 1293 wiring = B_FULL_LOCK; 1294 } 1295 1296 // For full lock or contiguous areas we're also going to map the pages and 1297 // thus need to reserve pages for the mapping backend upfront. 1298 addr_t reservedMapPages = 0; 1299 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1300 AddressSpaceWriteLocker locker; 1301 status_t status = locker.SetTo(team); 1302 if (status != B_OK) 1303 return status; 1304 1305 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1306 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1307 } 1308 1309 int priority; 1310 if (team != VMAddressSpace::KernelID()) 1311 priority = VM_PRIORITY_USER; 1312 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1313 priority = VM_PRIORITY_VIP; 1314 else 1315 priority = VM_PRIORITY_SYSTEM; 1316 1317 // Reserve memory before acquiring the address space lock. This reduces the 1318 // chances of failure, since while holding the write lock to the address 1319 // space (if it is the kernel address space that is), the low memory handler 1320 // won't be able to free anything for us. 1321 addr_t reservedMemory = 0; 1322 if (doReserveMemory) { 1323 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1324 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1325 return B_NO_MEMORY; 1326 reservedMemory = size; 1327 // TODO: We don't reserve the memory for the pages for the page 1328 // directories/tables. We actually need to do since we currently don't 1329 // reclaim them (and probably can't reclaim all of them anyway). Thus 1330 // there are actually less physical pages than there should be, which 1331 // can get the VM into trouble in low memory situations. 1332 } 1333 1334 AddressSpaceWriteLocker locker; 1335 VMAddressSpace* addressSpace; 1336 status_t status; 1337 1338 // For full lock areas reserve the pages before locking the address 1339 // space. E.g. block caches can't release their memory while we hold the 1340 // address space lock. 1341 page_num_t reservedPages = reservedMapPages; 1342 if (wiring == B_FULL_LOCK) 1343 reservedPages += size / B_PAGE_SIZE; 1344 1345 vm_page_reservation reservation; 1346 if (reservedPages > 0) { 1347 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1348 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1349 priority)) { 1350 reservedPages = 0; 1351 status = B_WOULD_BLOCK; 1352 goto err0; 1353 } 1354 } else 1355 vm_page_reserve_pages(&reservation, reservedPages, priority); 1356 } 1357 1358 if (wiring == B_CONTIGUOUS) { 1359 // we try to allocate the page run here upfront as this may easily 1360 // fail for obvious reasons 1361 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1362 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1363 if (page == NULL) { 1364 status = B_NO_MEMORY; 1365 goto err0; 1366 } 1367 } 1368 1369 // Lock the address space and, if B_EXACT_ADDRESS and 1370 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1371 // is not wired. 1372 do { 1373 status = locker.SetTo(team); 1374 if (status != B_OK) 1375 goto err1; 1376 1377 addressSpace = locker.AddressSpace(); 1378 } while (virtualAddressRestrictions->address_specification 1379 == B_EXACT_ADDRESS 1380 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1381 && wait_if_address_range_is_wired(addressSpace, 1382 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1383 1384 // create an anonymous cache 1385 // if it's a stack, make sure that two pages are available at least 1386 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1387 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1388 wiring == B_NO_LOCK, priority); 1389 if (status != B_OK) 1390 goto err1; 1391 1392 cache->temporary = 1; 1393 cache->virtual_end = size; 1394 cache->committed_size = reservedMemory; 1395 // TODO: This should be done via a method. 1396 reservedMemory = 0; 1397 1398 cache->Lock(); 1399 1400 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1401 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions, 1402 kernel, &area, _address); 1403 1404 if (status != B_OK) { 1405 cache->ReleaseRefAndUnlock(); 1406 goto err1; 1407 } 1408 1409 locker.DegradeToReadLock(); 1410 1411 switch (wiring) { 1412 case B_NO_LOCK: 1413 case B_LAZY_LOCK: 1414 // do nothing - the pages are mapped in as needed 1415 break; 1416 1417 case B_FULL_LOCK: 1418 { 1419 // Allocate and map all pages for this area 1420 1421 off_t offset = 0; 1422 for (addr_t address = area->Base(); 1423 address < area->Base() + (area->Size() - 1); 1424 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1425 #ifdef DEBUG_KERNEL_STACKS 1426 # ifdef STACK_GROWS_DOWNWARDS 1427 if (isStack && address < area->Base() 1428 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1429 # else 1430 if (isStack && address >= area->Base() + area->Size() 1431 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1432 # endif 1433 continue; 1434 #endif 1435 vm_page* page = vm_page_allocate_page(&reservation, 1436 PAGE_STATE_WIRED | pageAllocFlags); 1437 cache->InsertPage(page, offset); 1438 map_page(area, page, address, protection, &reservation); 1439 1440 DEBUG_PAGE_ACCESS_END(page); 1441 } 1442 1443 break; 1444 } 1445 1446 case B_ALREADY_WIRED: 1447 { 1448 // The pages should already be mapped. This is only really useful 1449 // during boot time. Find the appropriate vm_page objects and stick 1450 // them in the cache object. 1451 VMTranslationMap* map = addressSpace->TranslationMap(); 1452 off_t offset = 0; 1453 1454 if (!gKernelStartup) 1455 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1456 1457 map->Lock(); 1458 1459 for (addr_t virtualAddress = area->Base(); 1460 virtualAddress < area->Base() + (area->Size() - 1); 1461 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1462 phys_addr_t physicalAddress; 1463 uint32 flags; 1464 status = map->Query(virtualAddress, &physicalAddress, &flags); 1465 if (status < B_OK) { 1466 panic("looking up mapping failed for va 0x%lx\n", 1467 virtualAddress); 1468 } 1469 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1470 if (page == NULL) { 1471 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1472 "\n", physicalAddress); 1473 } 1474 1475 DEBUG_PAGE_ACCESS_START(page); 1476 1477 cache->InsertPage(page, offset); 1478 increment_page_wired_count(page); 1479 vm_page_set_state(page, PAGE_STATE_WIRED); 1480 page->busy = false; 1481 1482 DEBUG_PAGE_ACCESS_END(page); 1483 } 1484 1485 map->Unlock(); 1486 break; 1487 } 1488 1489 case B_CONTIGUOUS: 1490 { 1491 // We have already allocated our continuous pages run, so we can now 1492 // just map them in the address space 1493 VMTranslationMap* map = addressSpace->TranslationMap(); 1494 phys_addr_t physicalAddress 1495 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1496 addr_t virtualAddress = area->Base(); 1497 off_t offset = 0; 1498 1499 map->Lock(); 1500 1501 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1502 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1503 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1504 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1505 if (page == NULL) 1506 panic("couldn't lookup physical page just allocated\n"); 1507 1508 status = map->Map(virtualAddress, physicalAddress, protection, 1509 area->MemoryType(), &reservation); 1510 if (status < B_OK) 1511 panic("couldn't map physical page in page run\n"); 1512 1513 cache->InsertPage(page, offset); 1514 increment_page_wired_count(page); 1515 1516 DEBUG_PAGE_ACCESS_END(page); 1517 } 1518 1519 map->Unlock(); 1520 break; 1521 } 1522 1523 default: 1524 break; 1525 } 1526 1527 cache->Unlock(); 1528 1529 if (reservedPages > 0) 1530 vm_page_unreserve_pages(&reservation); 1531 1532 TRACE(("vm_create_anonymous_area: done\n")); 1533 1534 area->cache_type = CACHE_TYPE_RAM; 1535 return area->id; 1536 1537 err1: 1538 if (wiring == B_CONTIGUOUS) { 1539 // we had reserved the area space upfront... 1540 phys_addr_t pageNumber = page->physical_page_number; 1541 int32 i; 1542 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1543 page = vm_lookup_page(pageNumber); 1544 if (page == NULL) 1545 panic("couldn't lookup physical page just allocated\n"); 1546 1547 vm_page_set_state(page, PAGE_STATE_FREE); 1548 } 1549 } 1550 1551 err0: 1552 if (reservedPages > 0) 1553 vm_page_unreserve_pages(&reservation); 1554 if (reservedMemory > 0) 1555 vm_unreserve_memory(reservedMemory); 1556 1557 return status; 1558 } 1559 1560 1561 area_id 1562 vm_map_physical_memory(team_id team, const char* name, void** _address, 1563 uint32 addressSpec, addr_t size, uint32 protection, 1564 phys_addr_t physicalAddress, bool alreadyWired) 1565 { 1566 VMArea* area; 1567 VMCache* cache; 1568 addr_t mapOffset; 1569 1570 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1571 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1572 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1573 addressSpec, size, protection, physicalAddress)); 1574 1575 if (!arch_vm_supports_protection(protection)) 1576 return B_NOT_SUPPORTED; 1577 1578 AddressSpaceWriteLocker locker(team); 1579 if (!locker.IsLocked()) 1580 return B_BAD_TEAM_ID; 1581 1582 // if the physical address is somewhat inside a page, 1583 // move the actual area down to align on a page boundary 1584 mapOffset = physicalAddress % B_PAGE_SIZE; 1585 size += mapOffset; 1586 physicalAddress -= mapOffset; 1587 1588 size = PAGE_ALIGN(size); 1589 1590 // create a device cache 1591 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1592 if (status != B_OK) 1593 return status; 1594 1595 cache->virtual_end = size; 1596 1597 cache->Lock(); 1598 1599 virtual_address_restrictions addressRestrictions = {}; 1600 addressRestrictions.address = *_address; 1601 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1602 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1603 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1604 true, &area, _address); 1605 1606 if (status < B_OK) 1607 cache->ReleaseRefLocked(); 1608 1609 cache->Unlock(); 1610 1611 if (status == B_OK) { 1612 // set requested memory type -- use uncached, if not given 1613 uint32 memoryType = addressSpec & B_MTR_MASK; 1614 if (memoryType == 0) 1615 memoryType = B_MTR_UC; 1616 1617 area->SetMemoryType(memoryType); 1618 1619 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1620 if (status != B_OK) 1621 delete_area(locker.AddressSpace(), area, false); 1622 } 1623 1624 if (status != B_OK) 1625 return status; 1626 1627 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1628 1629 if (alreadyWired) { 1630 // The area is already mapped, but possibly not with the right 1631 // memory type. 1632 map->Lock(); 1633 map->ProtectArea(area, area->protection); 1634 map->Unlock(); 1635 } else { 1636 // Map the area completely. 1637 1638 // reserve pages needed for the mapping 1639 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1640 area->Base() + (size - 1)); 1641 vm_page_reservation reservation; 1642 vm_page_reserve_pages(&reservation, reservePages, 1643 team == VMAddressSpace::KernelID() 1644 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1645 1646 map->Lock(); 1647 1648 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1649 map->Map(area->Base() + offset, physicalAddress + offset, 1650 protection, area->MemoryType(), &reservation); 1651 } 1652 1653 map->Unlock(); 1654 1655 vm_page_unreserve_pages(&reservation); 1656 } 1657 1658 // modify the pointer returned to be offset back into the new area 1659 // the same way the physical address in was offset 1660 *_address = (void*)((addr_t)*_address + mapOffset); 1661 1662 area->cache_type = CACHE_TYPE_DEVICE; 1663 return area->id; 1664 } 1665 1666 1667 /*! Don't use! 1668 TODO: This function was introduced to map physical page vecs to 1669 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1670 use a device cache and does not track vm_page::wired_count! 1671 */ 1672 area_id 1673 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1674 uint32 addressSpec, addr_t* _size, uint32 protection, 1675 struct generic_io_vec* vecs, uint32 vecCount) 1676 { 1677 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1678 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1679 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1680 addressSpec, _size, protection, vecs, vecCount)); 1681 1682 if (!arch_vm_supports_protection(protection) 1683 || (addressSpec & B_MTR_MASK) != 0) { 1684 return B_NOT_SUPPORTED; 1685 } 1686 1687 AddressSpaceWriteLocker locker(team); 1688 if (!locker.IsLocked()) 1689 return B_BAD_TEAM_ID; 1690 1691 if (vecCount == 0) 1692 return B_BAD_VALUE; 1693 1694 addr_t size = 0; 1695 for (uint32 i = 0; i < vecCount; i++) { 1696 if (vecs[i].base % B_PAGE_SIZE != 0 1697 || vecs[i].length % B_PAGE_SIZE != 0) { 1698 return B_BAD_VALUE; 1699 } 1700 1701 size += vecs[i].length; 1702 } 1703 1704 // create a device cache 1705 VMCache* cache; 1706 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1707 if (result != B_OK) 1708 return result; 1709 1710 cache->virtual_end = size; 1711 1712 cache->Lock(); 1713 1714 VMArea* area; 1715 virtual_address_restrictions addressRestrictions = {}; 1716 addressRestrictions.address = *_address; 1717 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1718 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1719 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, 1720 &addressRestrictions, true, &area, _address); 1721 1722 if (result != B_OK) 1723 cache->ReleaseRefLocked(); 1724 1725 cache->Unlock(); 1726 1727 if (result != B_OK) 1728 return result; 1729 1730 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1731 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1732 area->Base() + (size - 1)); 1733 1734 vm_page_reservation reservation; 1735 vm_page_reserve_pages(&reservation, reservePages, 1736 team == VMAddressSpace::KernelID() 1737 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1738 map->Lock(); 1739 1740 uint32 vecIndex = 0; 1741 size_t vecOffset = 0; 1742 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1743 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1744 vecOffset = 0; 1745 vecIndex++; 1746 } 1747 1748 if (vecIndex >= vecCount) 1749 break; 1750 1751 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1752 protection, area->MemoryType(), &reservation); 1753 1754 vecOffset += B_PAGE_SIZE; 1755 } 1756 1757 map->Unlock(); 1758 vm_page_unreserve_pages(&reservation); 1759 1760 if (_size != NULL) 1761 *_size = size; 1762 1763 area->cache_type = CACHE_TYPE_DEVICE; 1764 return area->id; 1765 } 1766 1767 1768 area_id 1769 vm_create_null_area(team_id team, const char* name, void** address, 1770 uint32 addressSpec, addr_t size, uint32 flags) 1771 { 1772 size = PAGE_ALIGN(size); 1773 1774 // Lock the address space and, if B_EXACT_ADDRESS and 1775 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1776 // is not wired. 1777 AddressSpaceWriteLocker locker; 1778 do { 1779 if (locker.SetTo(team) != B_OK) 1780 return B_BAD_TEAM_ID; 1781 } while (addressSpec == B_EXACT_ADDRESS 1782 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1783 && wait_if_address_range_is_wired(locker.AddressSpace(), 1784 (addr_t)*address, size, &locker)); 1785 1786 // create a null cache 1787 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1788 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1789 VMCache* cache; 1790 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1791 if (status != B_OK) 1792 return status; 1793 1794 cache->temporary = 1; 1795 cache->virtual_end = size; 1796 1797 cache->Lock(); 1798 1799 VMArea* area; 1800 virtual_address_restrictions addressRestrictions = {}; 1801 addressRestrictions.address = *address; 1802 addressRestrictions.address_specification = addressSpec; 1803 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1804 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags, 1805 &addressRestrictions, true, &area, address); 1806 1807 if (status < B_OK) { 1808 cache->ReleaseRefAndUnlock(); 1809 return status; 1810 } 1811 1812 cache->Unlock(); 1813 1814 area->cache_type = CACHE_TYPE_NULL; 1815 return area->id; 1816 } 1817 1818 1819 /*! Creates the vnode cache for the specified \a vnode. 1820 The vnode has to be marked busy when calling this function. 1821 */ 1822 status_t 1823 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1824 { 1825 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1826 } 1827 1828 1829 /*! \a cache must be locked. The area's address space must be read-locked. 1830 */ 1831 static void 1832 pre_map_area_pages(VMArea* area, VMCache* cache, 1833 vm_page_reservation* reservation) 1834 { 1835 addr_t baseAddress = area->Base(); 1836 addr_t cacheOffset = area->cache_offset; 1837 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1838 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1839 1840 for (VMCachePagesTree::Iterator it 1841 = cache->pages.GetIterator(firstPage, true, true); 1842 vm_page* page = it.Next();) { 1843 if (page->cache_offset >= endPage) 1844 break; 1845 1846 // skip busy and inactive pages 1847 if (page->busy || page->usage_count == 0) 1848 continue; 1849 1850 DEBUG_PAGE_ACCESS_START(page); 1851 map_page(area, page, 1852 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1853 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1854 DEBUG_PAGE_ACCESS_END(page); 1855 } 1856 } 1857 1858 1859 /*! Will map the file specified by \a fd to an area in memory. 1860 The file will be mirrored beginning at the specified \a offset. The 1861 \a offset and \a size arguments have to be page aligned. 1862 */ 1863 static area_id 1864 _vm_map_file(team_id team, const char* name, void** _address, 1865 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1866 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1867 { 1868 // TODO: for binary files, we want to make sure that they get the 1869 // copy of a file at a given time, ie. later changes should not 1870 // make it into the mapped copy -- this will need quite some changes 1871 // to be done in a nice way 1872 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 1873 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 1874 1875 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1876 size = PAGE_ALIGN(size); 1877 1878 if (mapping == REGION_NO_PRIVATE_MAP) 1879 protection |= B_SHARED_AREA; 1880 if (addressSpec != B_EXACT_ADDRESS) 1881 unmapAddressRange = false; 1882 1883 if (fd < 0) { 1884 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1885 virtual_address_restrictions virtualRestrictions = {}; 1886 virtualRestrictions.address = *_address; 1887 virtualRestrictions.address_specification = addressSpec; 1888 physical_address_restrictions physicalRestrictions = {}; 1889 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 1890 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 1891 _address); 1892 } 1893 1894 // get the open flags of the FD 1895 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1896 if (descriptor == NULL) 1897 return EBADF; 1898 int32 openMode = descriptor->open_mode; 1899 put_fd(descriptor); 1900 1901 // The FD must open for reading at any rate. For shared mapping with write 1902 // access, additionally the FD must be open for writing. 1903 if ((openMode & O_ACCMODE) == O_WRONLY 1904 || (mapping == REGION_NO_PRIVATE_MAP 1905 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1906 && (openMode & O_ACCMODE) == O_RDONLY)) { 1907 return EACCES; 1908 } 1909 1910 // get the vnode for the object, this also grabs a ref to it 1911 struct vnode* vnode = NULL; 1912 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1913 if (status < B_OK) 1914 return status; 1915 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1916 1917 // If we're going to pre-map pages, we need to reserve the pages needed by 1918 // the mapping backend upfront. 1919 page_num_t reservedPreMapPages = 0; 1920 vm_page_reservation reservation; 1921 if ((protection & B_READ_AREA) != 0) { 1922 AddressSpaceWriteLocker locker; 1923 status = locker.SetTo(team); 1924 if (status != B_OK) 1925 return status; 1926 1927 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1928 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1929 1930 locker.Unlock(); 1931 1932 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1933 team == VMAddressSpace::KernelID() 1934 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1935 } 1936 1937 struct PageUnreserver { 1938 PageUnreserver(vm_page_reservation* reservation) 1939 : 1940 fReservation(reservation) 1941 { 1942 } 1943 1944 ~PageUnreserver() 1945 { 1946 if (fReservation != NULL) 1947 vm_page_unreserve_pages(fReservation); 1948 } 1949 1950 vm_page_reservation* fReservation; 1951 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1952 1953 // Lock the address space and, if the specified address range shall be 1954 // unmapped, ensure it is not wired. 1955 AddressSpaceWriteLocker locker; 1956 do { 1957 if (locker.SetTo(team) != B_OK) 1958 return B_BAD_TEAM_ID; 1959 } while (unmapAddressRange 1960 && wait_if_address_range_is_wired(locker.AddressSpace(), 1961 (addr_t)*_address, size, &locker)); 1962 1963 // TODO: this only works for file systems that use the file cache 1964 VMCache* cache; 1965 status = vfs_get_vnode_cache(vnode, &cache, false); 1966 if (status < B_OK) 1967 return status; 1968 1969 cache->Lock(); 1970 1971 VMArea* area; 1972 virtual_address_restrictions addressRestrictions = {}; 1973 addressRestrictions.address = *_address; 1974 addressRestrictions.address_specification = addressSpec; 1975 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 1976 0, protection, mapping, 1977 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 1978 &addressRestrictions, kernel, &area, _address); 1979 1980 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1981 // map_backing_store() cannot know we no longer need the ref 1982 cache->ReleaseRefLocked(); 1983 } 1984 1985 if (status == B_OK && (protection & B_READ_AREA) != 0) 1986 pre_map_area_pages(area, cache, &reservation); 1987 1988 cache->Unlock(); 1989 1990 if (status == B_OK) { 1991 // TODO: this probably deserves a smarter solution, ie. don't always 1992 // prefetch stuff, and also, probably don't trigger it at this place. 1993 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1994 // prefetches at max 10 MB starting from "offset" 1995 } 1996 1997 if (status != B_OK) 1998 return status; 1999 2000 area->cache_type = CACHE_TYPE_VNODE; 2001 return area->id; 2002 } 2003 2004 2005 area_id 2006 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2007 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2008 int fd, off_t offset) 2009 { 2010 if (!arch_vm_supports_protection(protection)) 2011 return B_NOT_SUPPORTED; 2012 2013 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2014 mapping, unmapAddressRange, fd, offset, true); 2015 } 2016 2017 2018 VMCache* 2019 vm_area_get_locked_cache(VMArea* area) 2020 { 2021 rw_lock_read_lock(&sAreaCacheLock); 2022 2023 while (true) { 2024 VMCache* cache = area->cache; 2025 2026 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2027 // cache has been deleted 2028 rw_lock_read_lock(&sAreaCacheLock); 2029 continue; 2030 } 2031 2032 rw_lock_read_lock(&sAreaCacheLock); 2033 2034 if (cache == area->cache) { 2035 cache->AcquireRefLocked(); 2036 rw_lock_read_unlock(&sAreaCacheLock); 2037 return cache; 2038 } 2039 2040 // the cache changed in the meantime 2041 cache->Unlock(); 2042 } 2043 } 2044 2045 2046 void 2047 vm_area_put_locked_cache(VMCache* cache) 2048 { 2049 cache->ReleaseRefAndUnlock(); 2050 } 2051 2052 2053 area_id 2054 vm_clone_area(team_id team, const char* name, void** address, 2055 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2056 bool kernel) 2057 { 2058 VMArea* newArea = NULL; 2059 VMArea* sourceArea; 2060 2061 // Check whether the source area exists and is cloneable. If so, mark it 2062 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2063 { 2064 AddressSpaceWriteLocker locker; 2065 status_t status = locker.SetFromArea(sourceID, sourceArea); 2066 if (status != B_OK) 2067 return status; 2068 2069 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2070 return B_NOT_ALLOWED; 2071 2072 sourceArea->protection |= B_SHARED_AREA; 2073 protection |= B_SHARED_AREA; 2074 } 2075 2076 // Now lock both address spaces and actually do the cloning. 2077 2078 MultiAddressSpaceLocker locker; 2079 VMAddressSpace* sourceAddressSpace; 2080 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2081 if (status != B_OK) 2082 return status; 2083 2084 VMAddressSpace* targetAddressSpace; 2085 status = locker.AddTeam(team, true, &targetAddressSpace); 2086 if (status != B_OK) 2087 return status; 2088 2089 status = locker.Lock(); 2090 if (status != B_OK) 2091 return status; 2092 2093 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2094 if (sourceArea == NULL) 2095 return B_BAD_VALUE; 2096 2097 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2098 return B_NOT_ALLOWED; 2099 2100 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2101 2102 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2103 // have been adapted. Maybe it should be part of the kernel settings, 2104 // anyway (so that old drivers can always work). 2105 #if 0 2106 if (sourceArea->aspace == VMAddressSpace::Kernel() 2107 && addressSpace != VMAddressSpace::Kernel() 2108 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2109 // kernel areas must not be cloned in userland, unless explicitly 2110 // declared user-cloneable upon construction 2111 status = B_NOT_ALLOWED; 2112 } else 2113 #endif 2114 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2115 status = B_NOT_ALLOWED; 2116 else { 2117 virtual_address_restrictions addressRestrictions = {}; 2118 addressRestrictions.address = *address; 2119 addressRestrictions.address_specification = addressSpec; 2120 status = map_backing_store(targetAddressSpace, cache, 2121 sourceArea->cache_offset, name, sourceArea->Size(), 2122 sourceArea->wiring, protection, mapping, 0, &addressRestrictions, 2123 kernel, &newArea, address); 2124 } 2125 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2126 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2127 // to create a new cache, and has therefore already acquired a reference 2128 // to the source cache - but otherwise it has no idea that we need 2129 // one. 2130 cache->AcquireRefLocked(); 2131 } 2132 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2133 // we need to map in everything at this point 2134 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2135 // we don't have actual pages to map but a physical area 2136 VMTranslationMap* map 2137 = sourceArea->address_space->TranslationMap(); 2138 map->Lock(); 2139 2140 phys_addr_t physicalAddress; 2141 uint32 oldProtection; 2142 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2143 2144 map->Unlock(); 2145 2146 map = targetAddressSpace->TranslationMap(); 2147 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2148 newArea->Base() + (newArea->Size() - 1)); 2149 2150 vm_page_reservation reservation; 2151 vm_page_reserve_pages(&reservation, reservePages, 2152 targetAddressSpace == VMAddressSpace::Kernel() 2153 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2154 map->Lock(); 2155 2156 for (addr_t offset = 0; offset < newArea->Size(); 2157 offset += B_PAGE_SIZE) { 2158 map->Map(newArea->Base() + offset, physicalAddress + offset, 2159 protection, newArea->MemoryType(), &reservation); 2160 } 2161 2162 map->Unlock(); 2163 vm_page_unreserve_pages(&reservation); 2164 } else { 2165 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2166 size_t reservePages = map->MaxPagesNeededToMap( 2167 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2168 vm_page_reservation reservation; 2169 vm_page_reserve_pages(&reservation, reservePages, 2170 targetAddressSpace == VMAddressSpace::Kernel() 2171 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2172 2173 // map in all pages from source 2174 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2175 vm_page* page = it.Next();) { 2176 if (!page->busy) { 2177 DEBUG_PAGE_ACCESS_START(page); 2178 map_page(newArea, page, 2179 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2180 - newArea->cache_offset), 2181 protection, &reservation); 2182 DEBUG_PAGE_ACCESS_END(page); 2183 } 2184 } 2185 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2186 // ensuring that! 2187 2188 vm_page_unreserve_pages(&reservation); 2189 } 2190 } 2191 if (status == B_OK) 2192 newArea->cache_type = sourceArea->cache_type; 2193 2194 vm_area_put_locked_cache(cache); 2195 2196 if (status < B_OK) 2197 return status; 2198 2199 return newArea->id; 2200 } 2201 2202 2203 /*! Deletes the specified area of the given address space. 2204 2205 The address space must be write-locked. 2206 The caller must ensure that the area does not have any wired ranges. 2207 2208 \param addressSpace The address space containing the area. 2209 \param area The area to be deleted. 2210 \param deletingAddressSpace \c true, if the address space is in the process 2211 of being deleted. 2212 */ 2213 static void 2214 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2215 bool deletingAddressSpace) 2216 { 2217 ASSERT(!area->IsWired()); 2218 2219 VMAreaHash::Remove(area); 2220 2221 // At this point the area is removed from the global hash table, but 2222 // still exists in the area list. 2223 2224 // Unmap the virtual address space the area occupied. 2225 { 2226 // We need to lock the complete cache chain. 2227 VMCache* topCache = vm_area_get_locked_cache(area); 2228 VMCacheChainLocker cacheChainLocker(topCache); 2229 cacheChainLocker.LockAllSourceCaches(); 2230 2231 // If the area's top cache is a temporary cache and the area is the only 2232 // one referencing it (besides us currently holding a second reference), 2233 // the unmapping code doesn't need to care about preserving the accessed 2234 // and dirty flags of the top cache page mappings. 2235 bool ignoreTopCachePageFlags 2236 = topCache->temporary && topCache->RefCount() == 2; 2237 2238 area->address_space->TranslationMap()->UnmapArea(area, 2239 deletingAddressSpace, ignoreTopCachePageFlags); 2240 } 2241 2242 if (!area->cache->temporary) 2243 area->cache->WriteModified(); 2244 2245 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2246 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2247 2248 arch_vm_unset_memory_type(area); 2249 addressSpace->RemoveArea(area, allocationFlags); 2250 addressSpace->Put(); 2251 2252 area->cache->RemoveArea(area); 2253 area->cache->ReleaseRef(); 2254 2255 addressSpace->DeleteArea(area, allocationFlags); 2256 } 2257 2258 2259 status_t 2260 vm_delete_area(team_id team, area_id id, bool kernel) 2261 { 2262 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2263 team, id)); 2264 2265 // lock the address space and make sure the area isn't wired 2266 AddressSpaceWriteLocker locker; 2267 VMArea* area; 2268 AreaCacheLocker cacheLocker; 2269 2270 do { 2271 status_t status = locker.SetFromArea(team, id, area); 2272 if (status != B_OK) 2273 return status; 2274 2275 cacheLocker.SetTo(area); 2276 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2277 2278 cacheLocker.Unlock(); 2279 2280 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2281 return B_NOT_ALLOWED; 2282 2283 delete_area(locker.AddressSpace(), area, false); 2284 return B_OK; 2285 } 2286 2287 2288 /*! Creates a new cache on top of given cache, moves all areas from 2289 the old cache to the new one, and changes the protection of all affected 2290 areas' pages to read-only. If requested, wired pages are moved up to the 2291 new cache and copies are added to the old cache in their place. 2292 Preconditions: 2293 - The given cache must be locked. 2294 - All of the cache's areas' address spaces must be read locked. 2295 - Either the cache must not have any wired ranges or a page reservation for 2296 all wired pages must be provided, so they can be copied. 2297 2298 \param lowerCache The cache on top of which a new cache shall be created. 2299 \param wiredPagesReservation If \c NULL there must not be any wired pages 2300 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2301 has wired page. The wired pages are copied in this case. 2302 */ 2303 static status_t 2304 vm_copy_on_write_area(VMCache* lowerCache, 2305 vm_page_reservation* wiredPagesReservation) 2306 { 2307 VMCache* upperCache; 2308 2309 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2310 2311 // We need to separate the cache from its areas. The cache goes one level 2312 // deeper and we create a new cache inbetween. 2313 2314 // create an anonymous cache 2315 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2316 lowerCache->GuardSize() / B_PAGE_SIZE, 2317 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2318 VM_PRIORITY_USER); 2319 if (status != B_OK) 2320 return status; 2321 2322 upperCache->Lock(); 2323 2324 upperCache->temporary = 1; 2325 upperCache->virtual_base = lowerCache->virtual_base; 2326 upperCache->virtual_end = lowerCache->virtual_end; 2327 2328 // transfer the lower cache areas to the upper cache 2329 rw_lock_write_lock(&sAreaCacheLock); 2330 upperCache->TransferAreas(lowerCache); 2331 rw_lock_write_unlock(&sAreaCacheLock); 2332 2333 lowerCache->AddConsumer(upperCache); 2334 2335 // We now need to remap all pages from all of the cache's areas read-only, 2336 // so that a copy will be created on next write access. If there are wired 2337 // pages, we keep their protection, move them to the upper cache and create 2338 // copies for the lower cache. 2339 if (wiredPagesReservation != NULL) { 2340 // We need to handle wired pages -- iterate through the cache's pages. 2341 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2342 vm_page* page = it.Next();) { 2343 if (page->WiredCount() > 0) { 2344 // allocate a new page and copy the wired one 2345 vm_page* copiedPage = vm_page_allocate_page( 2346 wiredPagesReservation, PAGE_STATE_ACTIVE); 2347 2348 vm_memcpy_physical_page( 2349 copiedPage->physical_page_number * B_PAGE_SIZE, 2350 page->physical_page_number * B_PAGE_SIZE); 2351 2352 // move the wired page to the upper cache (note: removing is OK 2353 // with the SplayTree iterator) and insert the copy 2354 upperCache->MovePage(page); 2355 lowerCache->InsertPage(copiedPage, 2356 page->cache_offset * B_PAGE_SIZE); 2357 2358 DEBUG_PAGE_ACCESS_END(copiedPage); 2359 } else { 2360 // Change the protection of this page in all areas. 2361 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2362 tempArea = tempArea->cache_next) { 2363 // The area must be readable in the same way it was 2364 // previously writable. 2365 uint32 protection = B_KERNEL_READ_AREA; 2366 if ((tempArea->protection & B_READ_AREA) != 0) 2367 protection |= B_READ_AREA; 2368 2369 VMTranslationMap* map 2370 = tempArea->address_space->TranslationMap(); 2371 map->Lock(); 2372 map->ProtectPage(tempArea, 2373 virtual_page_address(tempArea, page), protection); 2374 map->Unlock(); 2375 } 2376 } 2377 } 2378 } else { 2379 ASSERT(lowerCache->WiredPagesCount() == 0); 2380 2381 // just change the protection of all areas 2382 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2383 tempArea = tempArea->cache_next) { 2384 // The area must be readable in the same way it was previously 2385 // writable. 2386 uint32 protection = B_KERNEL_READ_AREA; 2387 if ((tempArea->protection & B_READ_AREA) != 0) 2388 protection |= B_READ_AREA; 2389 2390 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2391 map->Lock(); 2392 map->ProtectArea(tempArea, protection); 2393 map->Unlock(); 2394 } 2395 } 2396 2397 vm_area_put_locked_cache(upperCache); 2398 2399 return B_OK; 2400 } 2401 2402 2403 area_id 2404 vm_copy_area(team_id team, const char* name, void** _address, 2405 uint32 addressSpec, uint32 protection, area_id sourceID) 2406 { 2407 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2408 2409 if ((protection & B_KERNEL_PROTECTION) == 0) { 2410 // set the same protection for the kernel as for userland 2411 protection |= B_KERNEL_READ_AREA; 2412 if (writableCopy) 2413 protection |= B_KERNEL_WRITE_AREA; 2414 } 2415 2416 // Do the locking: target address space, all address spaces associated with 2417 // the source cache, and the cache itself. 2418 MultiAddressSpaceLocker locker; 2419 VMAddressSpace* targetAddressSpace; 2420 VMCache* cache; 2421 VMArea* source; 2422 AreaCacheLocker cacheLocker; 2423 status_t status; 2424 bool sharedArea; 2425 2426 page_num_t wiredPages = 0; 2427 vm_page_reservation wiredPagesReservation; 2428 2429 bool restart; 2430 do { 2431 restart = false; 2432 2433 locker.Unset(); 2434 status = locker.AddTeam(team, true, &targetAddressSpace); 2435 if (status == B_OK) { 2436 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2437 &cache); 2438 } 2439 if (status != B_OK) 2440 return status; 2441 2442 cacheLocker.SetTo(cache, true); // already locked 2443 2444 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2445 2446 page_num_t oldWiredPages = wiredPages; 2447 wiredPages = 0; 2448 2449 // If the source area isn't shared, count the number of wired pages in 2450 // the cache and reserve as many pages. 2451 if (!sharedArea) { 2452 wiredPages = cache->WiredPagesCount(); 2453 2454 if (wiredPages > oldWiredPages) { 2455 cacheLocker.Unlock(); 2456 locker.Unlock(); 2457 2458 if (oldWiredPages > 0) 2459 vm_page_unreserve_pages(&wiredPagesReservation); 2460 2461 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2462 VM_PRIORITY_USER); 2463 2464 restart = true; 2465 } 2466 } else if (oldWiredPages > 0) 2467 vm_page_unreserve_pages(&wiredPagesReservation); 2468 } while (restart); 2469 2470 // unreserve pages later 2471 struct PagesUnreserver { 2472 PagesUnreserver(vm_page_reservation* reservation) 2473 : 2474 fReservation(reservation) 2475 { 2476 } 2477 2478 ~PagesUnreserver() 2479 { 2480 if (fReservation != NULL) 2481 vm_page_unreserve_pages(fReservation); 2482 } 2483 2484 private: 2485 vm_page_reservation* fReservation; 2486 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2487 2488 if (addressSpec == B_CLONE_ADDRESS) { 2489 addressSpec = B_EXACT_ADDRESS; 2490 *_address = (void*)source->Base(); 2491 } 2492 2493 // First, create a cache on top of the source area, respectively use the 2494 // existing one, if this is a shared area. 2495 2496 VMArea* target; 2497 virtual_address_restrictions addressRestrictions = {}; 2498 addressRestrictions.address = *_address; 2499 addressRestrictions.address_specification = addressSpec; 2500 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2501 name, source->Size(), source->wiring, protection, 2502 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2503 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2504 &addressRestrictions, true, &target, _address); 2505 if (status < B_OK) 2506 return status; 2507 2508 if (sharedArea) { 2509 // The new area uses the old area's cache, but map_backing_store() 2510 // hasn't acquired a ref. So we have to do that now. 2511 cache->AcquireRefLocked(); 2512 } 2513 2514 // If the source area is writable, we need to move it one layer up as well 2515 2516 if (!sharedArea) { 2517 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2518 // TODO: do something more useful if this fails! 2519 if (vm_copy_on_write_area(cache, 2520 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2521 panic("vm_copy_on_write_area() failed!\n"); 2522 } 2523 } 2524 } 2525 2526 // we return the ID of the newly created area 2527 return target->id; 2528 } 2529 2530 2531 status_t 2532 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2533 bool kernel) 2534 { 2535 fix_protection(&newProtection); 2536 2537 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2538 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2539 2540 if (!arch_vm_supports_protection(newProtection)) 2541 return B_NOT_SUPPORTED; 2542 2543 bool becomesWritable 2544 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2545 2546 // lock address spaces and cache 2547 MultiAddressSpaceLocker locker; 2548 VMCache* cache; 2549 VMArea* area; 2550 status_t status; 2551 AreaCacheLocker cacheLocker; 2552 bool isWritable; 2553 2554 bool restart; 2555 do { 2556 restart = false; 2557 2558 locker.Unset(); 2559 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2560 if (status != B_OK) 2561 return status; 2562 2563 cacheLocker.SetTo(cache, true); // already locked 2564 2565 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2566 return B_NOT_ALLOWED; 2567 2568 if (area->protection == newProtection) 2569 return B_OK; 2570 2571 if (team != VMAddressSpace::KernelID() 2572 && area->address_space->ID() != team) { 2573 // unless you're the kernel, you are only allowed to set 2574 // the protection of your own areas 2575 return B_NOT_ALLOWED; 2576 } 2577 2578 isWritable 2579 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2580 2581 // Make sure the area (respectively, if we're going to call 2582 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2583 // wired ranges. 2584 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2585 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2586 otherArea = otherArea->cache_next) { 2587 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2588 restart = true; 2589 break; 2590 } 2591 } 2592 } else { 2593 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2594 restart = true; 2595 } 2596 } while (restart); 2597 2598 bool changePageProtection = true; 2599 bool changeTopCachePagesOnly = false; 2600 2601 if (isWritable && !becomesWritable) { 2602 // writable -> !writable 2603 2604 if (cache->source != NULL && cache->temporary) { 2605 if (cache->CountWritableAreas(area) == 0) { 2606 // Since this cache now lives from the pages in its source cache, 2607 // we can change the cache's commitment to take only those pages 2608 // into account that really are in this cache. 2609 2610 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2611 team == VMAddressSpace::KernelID() 2612 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2613 2614 // TODO: we may be able to join with our source cache, if 2615 // count == 0 2616 } 2617 } 2618 2619 // If only the writability changes, we can just remap the pages of the 2620 // top cache, since the pages of lower caches are mapped read-only 2621 // anyway. That's advantageous only, if the number of pages in the cache 2622 // is significantly smaller than the number of pages in the area, 2623 // though. 2624 if (newProtection 2625 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2626 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2627 changeTopCachePagesOnly = true; 2628 } 2629 } else if (!isWritable && becomesWritable) { 2630 // !writable -> writable 2631 2632 if (!cache->consumers.IsEmpty()) { 2633 // There are consumers -- we have to insert a new cache. Fortunately 2634 // vm_copy_on_write_area() does everything that's needed. 2635 changePageProtection = false; 2636 status = vm_copy_on_write_area(cache, NULL); 2637 } else { 2638 // No consumers, so we don't need to insert a new one. 2639 if (cache->source != NULL && cache->temporary) { 2640 // the cache's commitment must contain all possible pages 2641 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2642 team == VMAddressSpace::KernelID() 2643 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2644 } 2645 2646 if (status == B_OK && cache->source != NULL) { 2647 // There's a source cache, hence we can't just change all pages' 2648 // protection or we might allow writing into pages belonging to 2649 // a lower cache. 2650 changeTopCachePagesOnly = true; 2651 } 2652 } 2653 } else { 2654 // we don't have anything special to do in all other cases 2655 } 2656 2657 if (status == B_OK) { 2658 // remap existing pages in this cache 2659 if (changePageProtection) { 2660 VMTranslationMap* map = area->address_space->TranslationMap(); 2661 map->Lock(); 2662 2663 if (changeTopCachePagesOnly) { 2664 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2665 page_num_t lastPageOffset 2666 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2667 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2668 vm_page* page = it.Next();) { 2669 if (page->cache_offset >= firstPageOffset 2670 && page->cache_offset <= lastPageOffset) { 2671 addr_t address = virtual_page_address(area, page); 2672 map->ProtectPage(area, address, newProtection); 2673 } 2674 } 2675 } else 2676 map->ProtectArea(area, newProtection); 2677 2678 map->Unlock(); 2679 } 2680 2681 area->protection = newProtection; 2682 } 2683 2684 return status; 2685 } 2686 2687 2688 status_t 2689 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2690 { 2691 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2692 if (addressSpace == NULL) 2693 return B_BAD_TEAM_ID; 2694 2695 VMTranslationMap* map = addressSpace->TranslationMap(); 2696 2697 map->Lock(); 2698 uint32 dummyFlags; 2699 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2700 map->Unlock(); 2701 2702 addressSpace->Put(); 2703 return status; 2704 } 2705 2706 2707 /*! The page's cache must be locked. 2708 */ 2709 bool 2710 vm_test_map_modification(vm_page* page) 2711 { 2712 if (page->modified) 2713 return true; 2714 2715 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2716 vm_page_mapping* mapping; 2717 while ((mapping = iterator.Next()) != NULL) { 2718 VMArea* area = mapping->area; 2719 VMTranslationMap* map = area->address_space->TranslationMap(); 2720 2721 phys_addr_t physicalAddress; 2722 uint32 flags; 2723 map->Lock(); 2724 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2725 map->Unlock(); 2726 2727 if ((flags & PAGE_MODIFIED) != 0) 2728 return true; 2729 } 2730 2731 return false; 2732 } 2733 2734 2735 /*! The page's cache must be locked. 2736 */ 2737 void 2738 vm_clear_map_flags(vm_page* page, uint32 flags) 2739 { 2740 if ((flags & PAGE_ACCESSED) != 0) 2741 page->accessed = false; 2742 if ((flags & PAGE_MODIFIED) != 0) 2743 page->modified = false; 2744 2745 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2746 vm_page_mapping* mapping; 2747 while ((mapping = iterator.Next()) != NULL) { 2748 VMArea* area = mapping->area; 2749 VMTranslationMap* map = area->address_space->TranslationMap(); 2750 2751 map->Lock(); 2752 map->ClearFlags(virtual_page_address(area, page), flags); 2753 map->Unlock(); 2754 } 2755 } 2756 2757 2758 /*! Removes all mappings from a page. 2759 After you've called this function, the page is unmapped from memory and 2760 the page's \c accessed and \c modified flags have been updated according 2761 to the state of the mappings. 2762 The page's cache must be locked. 2763 */ 2764 void 2765 vm_remove_all_page_mappings(vm_page* page) 2766 { 2767 while (vm_page_mapping* mapping = page->mappings.Head()) { 2768 VMArea* area = mapping->area; 2769 VMTranslationMap* map = area->address_space->TranslationMap(); 2770 addr_t address = virtual_page_address(area, page); 2771 map->UnmapPage(area, address, false); 2772 } 2773 } 2774 2775 2776 int32 2777 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2778 { 2779 int32 count = 0; 2780 2781 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2782 vm_page_mapping* mapping; 2783 while ((mapping = iterator.Next()) != NULL) { 2784 VMArea* area = mapping->area; 2785 VMTranslationMap* map = area->address_space->TranslationMap(); 2786 2787 bool modified; 2788 if (map->ClearAccessedAndModified(area, 2789 virtual_page_address(area, page), false, modified)) { 2790 count++; 2791 } 2792 2793 page->modified |= modified; 2794 } 2795 2796 2797 if (page->accessed) { 2798 count++; 2799 page->accessed = false; 2800 } 2801 2802 return count; 2803 } 2804 2805 2806 /*! Removes all mappings of a page and/or clears the accessed bits of the 2807 mappings. 2808 The function iterates through the page mappings and removes them until 2809 encountering one that has been accessed. From then on it will continue to 2810 iterate, but only clear the accessed flag of the mapping. The page's 2811 \c modified bit will be updated accordingly, the \c accessed bit will be 2812 cleared. 2813 \return The number of mapping accessed bits encountered, including the 2814 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2815 of the page have been removed. 2816 */ 2817 int32 2818 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2819 { 2820 ASSERT(page->WiredCount() == 0); 2821 2822 if (page->accessed) 2823 return vm_clear_page_mapping_accessed_flags(page); 2824 2825 while (vm_page_mapping* mapping = page->mappings.Head()) { 2826 VMArea* area = mapping->area; 2827 VMTranslationMap* map = area->address_space->TranslationMap(); 2828 addr_t address = virtual_page_address(area, page); 2829 bool modified = false; 2830 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2831 page->accessed = true; 2832 page->modified |= modified; 2833 return vm_clear_page_mapping_accessed_flags(page); 2834 } 2835 page->modified |= modified; 2836 } 2837 2838 return 0; 2839 } 2840 2841 2842 static int 2843 display_mem(int argc, char** argv) 2844 { 2845 bool physical = false; 2846 addr_t copyAddress; 2847 int32 displayWidth; 2848 int32 itemSize; 2849 int32 num = -1; 2850 addr_t address; 2851 int i = 1, j; 2852 2853 if (argc > 1 && argv[1][0] == '-') { 2854 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2855 physical = true; 2856 i++; 2857 } else 2858 i = 99; 2859 } 2860 2861 if (argc < i + 1 || argc > i + 2) { 2862 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2863 "\tdl - 8 bytes\n" 2864 "\tdw - 4 bytes\n" 2865 "\tds - 2 bytes\n" 2866 "\tdb - 1 byte\n" 2867 "\tstring - a whole string\n" 2868 " -p or --physical only allows memory from a single page to be " 2869 "displayed.\n"); 2870 return 0; 2871 } 2872 2873 address = parse_expression(argv[i]); 2874 2875 if (argc > i + 1) 2876 num = parse_expression(argv[i + 1]); 2877 2878 // build the format string 2879 if (strcmp(argv[0], "db") == 0) { 2880 itemSize = 1; 2881 displayWidth = 16; 2882 } else if (strcmp(argv[0], "ds") == 0) { 2883 itemSize = 2; 2884 displayWidth = 8; 2885 } else if (strcmp(argv[0], "dw") == 0) { 2886 itemSize = 4; 2887 displayWidth = 4; 2888 } else if (strcmp(argv[0], "dl") == 0) { 2889 itemSize = 8; 2890 displayWidth = 2; 2891 } else if (strcmp(argv[0], "string") == 0) { 2892 itemSize = 1; 2893 displayWidth = -1; 2894 } else { 2895 kprintf("display_mem called in an invalid way!\n"); 2896 return 0; 2897 } 2898 2899 if (num <= 0) 2900 num = displayWidth; 2901 2902 void* physicalPageHandle = NULL; 2903 2904 if (physical) { 2905 int32 offset = address & (B_PAGE_SIZE - 1); 2906 if (num * itemSize + offset > B_PAGE_SIZE) { 2907 num = (B_PAGE_SIZE - offset) / itemSize; 2908 kprintf("NOTE: number of bytes has been cut to page size\n"); 2909 } 2910 2911 address = ROUNDDOWN(address, B_PAGE_SIZE); 2912 2913 if (vm_get_physical_page_debug(address, ©Address, 2914 &physicalPageHandle) != B_OK) { 2915 kprintf("getting the hardware page failed."); 2916 return 0; 2917 } 2918 2919 address += offset; 2920 copyAddress += offset; 2921 } else 2922 copyAddress = address; 2923 2924 if (!strcmp(argv[0], "string")) { 2925 kprintf("%p \"", (char*)copyAddress); 2926 2927 // string mode 2928 for (i = 0; true; i++) { 2929 char c; 2930 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 2931 != B_OK 2932 || c == '\0') { 2933 break; 2934 } 2935 2936 if (c == '\n') 2937 kprintf("\\n"); 2938 else if (c == '\t') 2939 kprintf("\\t"); 2940 else { 2941 if (!isprint(c)) 2942 c = '.'; 2943 2944 kprintf("%c", c); 2945 } 2946 } 2947 2948 kprintf("\"\n"); 2949 } else { 2950 // number mode 2951 for (i = 0; i < num; i++) { 2952 uint32 value; 2953 2954 if ((i % displayWidth) == 0) { 2955 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2956 if (i != 0) 2957 kprintf("\n"); 2958 2959 kprintf("[0x%lx] ", address + i * itemSize); 2960 2961 for (j = 0; j < displayed; j++) { 2962 char c; 2963 if (debug_memcpy(B_CURRENT_TEAM, &c, 2964 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 2965 displayed = j; 2966 break; 2967 } 2968 if (!isprint(c)) 2969 c = '.'; 2970 2971 kprintf("%c", c); 2972 } 2973 if (num > displayWidth) { 2974 // make sure the spacing in the last line is correct 2975 for (j = displayed; j < displayWidth * itemSize; j++) 2976 kprintf(" "); 2977 } 2978 kprintf(" "); 2979 } 2980 2981 if (debug_memcpy(B_CURRENT_TEAM, &value, 2982 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 2983 kprintf("read fault"); 2984 break; 2985 } 2986 2987 switch (itemSize) { 2988 case 1: 2989 kprintf(" %02" B_PRIx8, *(uint8*)&value); 2990 break; 2991 case 2: 2992 kprintf(" %04" B_PRIx16, *(uint16*)&value); 2993 break; 2994 case 4: 2995 kprintf(" %08" B_PRIx32, *(uint32*)&value); 2996 break; 2997 case 8: 2998 kprintf(" %016" B_PRIx64, *(uint64*)&value); 2999 break; 3000 } 3001 } 3002 3003 kprintf("\n"); 3004 } 3005 3006 if (physical) { 3007 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3008 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3009 } 3010 return 0; 3011 } 3012 3013 3014 static void 3015 dump_cache_tree_recursively(VMCache* cache, int level, 3016 VMCache* highlightCache) 3017 { 3018 // print this cache 3019 for (int i = 0; i < level; i++) 3020 kprintf(" "); 3021 if (cache == highlightCache) 3022 kprintf("%p <--\n", cache); 3023 else 3024 kprintf("%p\n", cache); 3025 3026 // recursively print its consumers 3027 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3028 VMCache* consumer = it.Next();) { 3029 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3030 } 3031 } 3032 3033 3034 static int 3035 dump_cache_tree(int argc, char** argv) 3036 { 3037 if (argc != 2 || !strcmp(argv[1], "--help")) { 3038 kprintf("usage: %s <address>\n", argv[0]); 3039 return 0; 3040 } 3041 3042 addr_t address = parse_expression(argv[1]); 3043 if (address == 0) 3044 return 0; 3045 3046 VMCache* cache = (VMCache*)address; 3047 VMCache* root = cache; 3048 3049 // find the root cache (the transitive source) 3050 while (root->source != NULL) 3051 root = root->source; 3052 3053 dump_cache_tree_recursively(root, 0, cache); 3054 3055 return 0; 3056 } 3057 3058 3059 const char* 3060 vm_cache_type_to_string(int32 type) 3061 { 3062 switch (type) { 3063 case CACHE_TYPE_RAM: 3064 return "RAM"; 3065 case CACHE_TYPE_DEVICE: 3066 return "device"; 3067 case CACHE_TYPE_VNODE: 3068 return "vnode"; 3069 case CACHE_TYPE_NULL: 3070 return "null"; 3071 3072 default: 3073 return "unknown"; 3074 } 3075 } 3076 3077 3078 #if DEBUG_CACHE_LIST 3079 3080 static void 3081 update_cache_info_recursively(VMCache* cache, cache_info& info) 3082 { 3083 info.page_count += cache->page_count; 3084 if (cache->type == CACHE_TYPE_RAM) 3085 info.committed += cache->committed_size; 3086 3087 // recurse 3088 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3089 VMCache* consumer = it.Next();) { 3090 update_cache_info_recursively(consumer, info); 3091 } 3092 } 3093 3094 3095 static int 3096 cache_info_compare_page_count(const void* _a, const void* _b) 3097 { 3098 const cache_info* a = (const cache_info*)_a; 3099 const cache_info* b = (const cache_info*)_b; 3100 if (a->page_count == b->page_count) 3101 return 0; 3102 return a->page_count < b->page_count ? 1 : -1; 3103 } 3104 3105 3106 static int 3107 cache_info_compare_committed(const void* _a, const void* _b) 3108 { 3109 const cache_info* a = (const cache_info*)_a; 3110 const cache_info* b = (const cache_info*)_b; 3111 if (a->committed == b->committed) 3112 return 0; 3113 return a->committed < b->committed ? 1 : -1; 3114 } 3115 3116 3117 static void 3118 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3119 { 3120 for (int i = 0; i < level; i++) 3121 kprintf(" "); 3122 3123 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3124 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3125 cache->virtual_base, cache->virtual_end, cache->page_count); 3126 3127 if (level == 0) 3128 kprintf("/%lu", info.page_count); 3129 3130 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3131 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3132 3133 if (level == 0) 3134 kprintf("/%lu", info.committed); 3135 } 3136 3137 // areas 3138 if (cache->areas != NULL) { 3139 VMArea* area = cache->areas; 3140 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3141 area->name, area->address_space->ID()); 3142 3143 while (area->cache_next != NULL) { 3144 area = area->cache_next; 3145 kprintf(", %" B_PRId32, area->id); 3146 } 3147 } 3148 3149 kputs("\n"); 3150 3151 // recurse 3152 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3153 VMCache* consumer = it.Next();) { 3154 dump_caches_recursively(consumer, info, level + 1); 3155 } 3156 } 3157 3158 3159 static int 3160 dump_caches(int argc, char** argv) 3161 { 3162 if (sCacheInfoTable == NULL) { 3163 kprintf("No cache info table!\n"); 3164 return 0; 3165 } 3166 3167 bool sortByPageCount = true; 3168 3169 for (int32 i = 1; i < argc; i++) { 3170 if (strcmp(argv[i], "-c") == 0) { 3171 sortByPageCount = false; 3172 } else { 3173 print_debugger_command_usage(argv[0]); 3174 return 0; 3175 } 3176 } 3177 3178 uint32 totalCount = 0; 3179 uint32 rootCount = 0; 3180 off_t totalCommitted = 0; 3181 page_num_t totalPages = 0; 3182 3183 VMCache* cache = gDebugCacheList; 3184 while (cache) { 3185 totalCount++; 3186 if (cache->source == NULL) { 3187 cache_info stackInfo; 3188 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3189 ? sCacheInfoTable[rootCount] : stackInfo; 3190 rootCount++; 3191 info.cache = cache; 3192 info.page_count = 0; 3193 info.committed = 0; 3194 update_cache_info_recursively(cache, info); 3195 totalCommitted += info.committed; 3196 totalPages += info.page_count; 3197 } 3198 3199 cache = cache->debug_next; 3200 } 3201 3202 if (rootCount <= (uint32)kCacheInfoTableCount) { 3203 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3204 sortByPageCount 3205 ? &cache_info_compare_page_count 3206 : &cache_info_compare_committed); 3207 } 3208 3209 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3210 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3211 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3212 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3213 "page count" : "committed size"); 3214 3215 if (rootCount <= (uint32)kCacheInfoTableCount) { 3216 for (uint32 i = 0; i < rootCount; i++) { 3217 cache_info& info = sCacheInfoTable[i]; 3218 dump_caches_recursively(info.cache, info, 0); 3219 } 3220 } else 3221 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3222 3223 return 0; 3224 } 3225 3226 #endif // DEBUG_CACHE_LIST 3227 3228 3229 static int 3230 dump_cache(int argc, char** argv) 3231 { 3232 VMCache* cache; 3233 bool showPages = false; 3234 int i = 1; 3235 3236 if (argc < 2 || !strcmp(argv[1], "--help")) { 3237 kprintf("usage: %s [-ps] <address>\n" 3238 " if -p is specified, all pages are shown, if -s is used\n" 3239 " only the cache info is shown respectively.\n", argv[0]); 3240 return 0; 3241 } 3242 while (argv[i][0] == '-') { 3243 char* arg = argv[i] + 1; 3244 while (arg[0]) { 3245 if (arg[0] == 'p') 3246 showPages = true; 3247 arg++; 3248 } 3249 i++; 3250 } 3251 if (argv[i] == NULL) { 3252 kprintf("%s: invalid argument, pass address\n", argv[0]); 3253 return 0; 3254 } 3255 3256 addr_t address = parse_expression(argv[i]); 3257 if (address == 0) 3258 return 0; 3259 3260 cache = (VMCache*)address; 3261 3262 cache->Dump(showPages); 3263 3264 set_debug_variable("_sourceCache", (addr_t)cache->source); 3265 3266 return 0; 3267 } 3268 3269 3270 static void 3271 dump_area_struct(VMArea* area, bool mappings) 3272 { 3273 kprintf("AREA: %p\n", area); 3274 kprintf("name:\t\t'%s'\n", area->name); 3275 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3276 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3277 kprintf("base:\t\t0x%lx\n", area->Base()); 3278 kprintf("size:\t\t0x%lx\n", area->Size()); 3279 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3280 kprintf("wiring:\t\t0x%x\n", area->wiring); 3281 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3282 kprintf("cache:\t\t%p\n", area->cache); 3283 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3284 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3285 kprintf("cache_next:\t%p\n", area->cache_next); 3286 kprintf("cache_prev:\t%p\n", area->cache_prev); 3287 3288 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3289 if (mappings) { 3290 kprintf("page mappings:\n"); 3291 while (iterator.HasNext()) { 3292 vm_page_mapping* mapping = iterator.Next(); 3293 kprintf(" %p", mapping->page); 3294 } 3295 kprintf("\n"); 3296 } else { 3297 uint32 count = 0; 3298 while (iterator.Next() != NULL) { 3299 count++; 3300 } 3301 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3302 } 3303 } 3304 3305 3306 static int 3307 dump_area(int argc, char** argv) 3308 { 3309 bool mappings = false; 3310 bool found = false; 3311 int32 index = 1; 3312 VMArea* area; 3313 addr_t num; 3314 3315 if (argc < 2 || !strcmp(argv[1], "--help")) { 3316 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3317 "All areas matching either id/address/name are listed. You can\n" 3318 "force to check only a specific item by prefixing the specifier\n" 3319 "with the id/contains/address/name keywords.\n" 3320 "-m shows the area's mappings as well.\n"); 3321 return 0; 3322 } 3323 3324 if (!strcmp(argv[1], "-m")) { 3325 mappings = true; 3326 index++; 3327 } 3328 3329 int32 mode = 0xf; 3330 if (!strcmp(argv[index], "id")) 3331 mode = 1; 3332 else if (!strcmp(argv[index], "contains")) 3333 mode = 2; 3334 else if (!strcmp(argv[index], "name")) 3335 mode = 4; 3336 else if (!strcmp(argv[index], "address")) 3337 mode = 0; 3338 if (mode != 0xf) 3339 index++; 3340 3341 if (index >= argc) { 3342 kprintf("No area specifier given.\n"); 3343 return 0; 3344 } 3345 3346 num = parse_expression(argv[index]); 3347 3348 if (mode == 0) { 3349 dump_area_struct((struct VMArea*)num, mappings); 3350 } else { 3351 // walk through the area list, looking for the arguments as a name 3352 3353 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3354 while ((area = it.Next()) != NULL) { 3355 if (((mode & 4) != 0 && area->name != NULL 3356 && !strcmp(argv[index], area->name)) 3357 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3358 || (((mode & 2) != 0 && area->Base() <= num 3359 && area->Base() + area->Size() > num))))) { 3360 dump_area_struct(area, mappings); 3361 found = true; 3362 } 3363 } 3364 3365 if (!found) 3366 kprintf("could not find area %s (%ld)\n", argv[index], num); 3367 } 3368 3369 return 0; 3370 } 3371 3372 3373 static int 3374 dump_area_list(int argc, char** argv) 3375 { 3376 VMArea* area; 3377 const char* name = NULL; 3378 int32 id = 0; 3379 3380 if (argc > 1) { 3381 id = parse_expression(argv[1]); 3382 if (id == 0) 3383 name = argv[1]; 3384 } 3385 3386 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3387 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3388 B_PRINTF_POINTER_WIDTH, "size"); 3389 3390 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3391 while ((area = it.Next()) != NULL) { 3392 if ((id != 0 && area->address_space->ID() != id) 3393 || (name != NULL && strstr(area->name, name) == NULL)) 3394 continue; 3395 3396 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3397 area->id, (void*)area->Base(), (void*)area->Size(), 3398 area->protection, area->wiring, area->name); 3399 } 3400 return 0; 3401 } 3402 3403 3404 static int 3405 dump_available_memory(int argc, char** argv) 3406 { 3407 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3408 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3409 return 0; 3410 } 3411 3412 3413 /*! Deletes all areas and reserved regions in the given address space. 3414 3415 The caller must ensure that none of the areas has any wired ranges. 3416 3417 \param addressSpace The address space. 3418 \param deletingAddressSpace \c true, if the address space is in the process 3419 of being deleted. 3420 */ 3421 void 3422 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3423 { 3424 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3425 addressSpace->ID())); 3426 3427 addressSpace->WriteLock(); 3428 3429 // remove all reserved areas in this address space 3430 addressSpace->UnreserveAllAddressRanges(0); 3431 3432 // delete all the areas in this address space 3433 while (VMArea* area = addressSpace->FirstArea()) { 3434 ASSERT(!area->IsWired()); 3435 delete_area(addressSpace, area, deletingAddressSpace); 3436 } 3437 3438 addressSpace->WriteUnlock(); 3439 } 3440 3441 3442 static area_id 3443 vm_area_for(addr_t address, bool kernel) 3444 { 3445 team_id team; 3446 if (IS_USER_ADDRESS(address)) { 3447 // we try the user team address space, if any 3448 team = VMAddressSpace::CurrentID(); 3449 if (team < 0) 3450 return team; 3451 } else 3452 team = VMAddressSpace::KernelID(); 3453 3454 AddressSpaceReadLocker locker(team); 3455 if (!locker.IsLocked()) 3456 return B_BAD_TEAM_ID; 3457 3458 VMArea* area = locker.AddressSpace()->LookupArea(address); 3459 if (area != NULL) { 3460 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3461 return B_ERROR; 3462 3463 return area->id; 3464 } 3465 3466 return B_ERROR; 3467 } 3468 3469 3470 /*! Frees physical pages that were used during the boot process. 3471 \a end is inclusive. 3472 */ 3473 static void 3474 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3475 { 3476 // free all physical pages in the specified range 3477 3478 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3479 phys_addr_t physicalAddress; 3480 uint32 flags; 3481 3482 if (map->Query(current, &physicalAddress, &flags) == B_OK 3483 && (flags & PAGE_PRESENT) != 0) { 3484 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3485 if (page != NULL && page->State() != PAGE_STATE_FREE 3486 && page->State() != PAGE_STATE_CLEAR 3487 && page->State() != PAGE_STATE_UNUSED) { 3488 DEBUG_PAGE_ACCESS_START(page); 3489 vm_page_set_state(page, PAGE_STATE_FREE); 3490 } 3491 } 3492 } 3493 3494 // unmap the memory 3495 map->Unmap(start, end); 3496 } 3497 3498 3499 void 3500 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3501 { 3502 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3503 addr_t end = start + (size - 1); 3504 addr_t lastEnd = start; 3505 3506 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3507 (void*)start, (void*)end)); 3508 3509 // The areas are sorted in virtual address space order, so 3510 // we just have to find the holes between them that fall 3511 // into the area we should dispose 3512 3513 map->Lock(); 3514 3515 for (VMAddressSpace::AreaIterator it 3516 = VMAddressSpace::Kernel()->GetAreaIterator(); 3517 VMArea* area = it.Next();) { 3518 addr_t areaStart = area->Base(); 3519 addr_t areaEnd = areaStart + (area->Size() - 1); 3520 3521 if (areaEnd < start) 3522 continue; 3523 3524 if (areaStart > end) { 3525 // we are done, the area is already beyond of what we have to free 3526 break; 3527 } 3528 3529 if (areaStart > lastEnd) { 3530 // this is something we can free 3531 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3532 (void*)areaStart)); 3533 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3534 } 3535 3536 if (areaEnd >= end) { 3537 lastEnd = areaEnd; 3538 // no +1 to prevent potential overflow 3539 break; 3540 } 3541 3542 lastEnd = areaEnd + 1; 3543 } 3544 3545 if (lastEnd < end) { 3546 // we can also get rid of some space at the end of the area 3547 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3548 (void*)end)); 3549 unmap_and_free_physical_pages(map, lastEnd, end); 3550 } 3551 3552 map->Unlock(); 3553 } 3554 3555 3556 static void 3557 create_preloaded_image_areas(struct preloaded_image* _image) 3558 { 3559 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3560 char name[B_OS_NAME_LENGTH]; 3561 void* address; 3562 int32 length; 3563 3564 // use file name to create a good area name 3565 char* fileName = strrchr(image->name, '/'); 3566 if (fileName == NULL) 3567 fileName = image->name; 3568 else 3569 fileName++; 3570 3571 length = strlen(fileName); 3572 // make sure there is enough space for the suffix 3573 if (length > 25) 3574 length = 25; 3575 3576 memcpy(name, fileName, length); 3577 strcpy(name + length, "_text"); 3578 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3579 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3580 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3581 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3582 // this will later be remapped read-only/executable by the 3583 // ELF initialization code 3584 3585 strcpy(name + length, "_data"); 3586 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3587 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3588 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3589 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3590 } 3591 3592 3593 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3594 Any boot loader resources contained in that arguments must not be accessed 3595 anymore past this point. 3596 */ 3597 void 3598 vm_free_kernel_args(kernel_args* args) 3599 { 3600 uint32 i; 3601 3602 TRACE(("vm_free_kernel_args()\n")); 3603 3604 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3605 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3606 if (area >= B_OK) 3607 delete_area(area); 3608 } 3609 } 3610 3611 3612 static void 3613 allocate_kernel_args(kernel_args* args) 3614 { 3615 TRACE(("allocate_kernel_args()\n")); 3616 3617 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3618 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3619 3620 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3621 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3622 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3623 } 3624 } 3625 3626 3627 static void 3628 unreserve_boot_loader_ranges(kernel_args* args) 3629 { 3630 TRACE(("unreserve_boot_loader_ranges()\n")); 3631 3632 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3633 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3634 (void*)(addr_t)args->virtual_allocated_range[i].start, 3635 args->virtual_allocated_range[i].size); 3636 } 3637 } 3638 3639 3640 static void 3641 reserve_boot_loader_ranges(kernel_args* args) 3642 { 3643 TRACE(("reserve_boot_loader_ranges()\n")); 3644 3645 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3646 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3647 3648 // If the address is no kernel address, we just skip it. The 3649 // architecture specific code has to deal with it. 3650 if (!IS_KERNEL_ADDRESS(address)) { 3651 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3652 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3653 continue; 3654 } 3655 3656 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3657 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3658 if (status < B_OK) 3659 panic("could not reserve boot loader ranges\n"); 3660 } 3661 } 3662 3663 3664 static addr_t 3665 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3666 { 3667 size = PAGE_ALIGN(size); 3668 3669 // find a slot in the virtual allocation addr range 3670 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3671 // check to see if the space between this one and the last is big enough 3672 addr_t rangeStart = args->virtual_allocated_range[i].start; 3673 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3674 + args->virtual_allocated_range[i - 1].size; 3675 3676 addr_t base = alignment > 0 3677 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3678 3679 if (base >= KERNEL_BASE && base < rangeStart 3680 && rangeStart - base >= size) { 3681 args->virtual_allocated_range[i - 1].size 3682 += base + size - previousRangeEnd; 3683 return base; 3684 } 3685 } 3686 3687 // we hadn't found one between allocation ranges. this is ok. 3688 // see if there's a gap after the last one 3689 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3690 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3691 + args->virtual_allocated_range[lastEntryIndex].size; 3692 addr_t base = alignment > 0 3693 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3694 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3695 args->virtual_allocated_range[lastEntryIndex].size 3696 += base + size - lastRangeEnd; 3697 return base; 3698 } 3699 3700 // see if there's a gap before the first one 3701 addr_t rangeStart = args->virtual_allocated_range[0].start; 3702 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3703 base = rangeStart - size; 3704 if (alignment > 0) 3705 base = ROUNDDOWN(base, alignment); 3706 3707 if (base >= KERNEL_BASE) { 3708 args->virtual_allocated_range[0].start = base; 3709 args->virtual_allocated_range[0].size += rangeStart - base; 3710 return base; 3711 } 3712 } 3713 3714 return 0; 3715 } 3716 3717 3718 static bool 3719 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 3720 { 3721 // TODO: horrible brute-force method of determining if the page can be 3722 // allocated 3723 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3724 if (address >= args->physical_memory_range[i].start 3725 && address < args->physical_memory_range[i].start 3726 + args->physical_memory_range[i].size) 3727 return true; 3728 } 3729 return false; 3730 } 3731 3732 3733 page_num_t 3734 vm_allocate_early_physical_page(kernel_args* args) 3735 { 3736 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3737 phys_addr_t nextPage; 3738 3739 nextPage = args->physical_allocated_range[i].start 3740 + args->physical_allocated_range[i].size; 3741 // see if the page after the next allocated paddr run can be allocated 3742 if (i + 1 < args->num_physical_allocated_ranges 3743 && args->physical_allocated_range[i + 1].size != 0) { 3744 // see if the next page will collide with the next allocated range 3745 if (nextPage >= args->physical_allocated_range[i+1].start) 3746 continue; 3747 } 3748 // see if the next physical page fits in the memory block 3749 if (is_page_in_physical_memory_range(args, nextPage)) { 3750 // we got one! 3751 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3752 return nextPage / B_PAGE_SIZE; 3753 } 3754 } 3755 3756 return 0; 3757 // could not allocate a block 3758 } 3759 3760 3761 /*! This one uses the kernel_args' physical and virtual memory ranges to 3762 allocate some pages before the VM is completely up. 3763 */ 3764 addr_t 3765 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3766 uint32 attributes, addr_t alignment) 3767 { 3768 if (physicalSize > virtualSize) 3769 physicalSize = virtualSize; 3770 3771 // find the vaddr to allocate at 3772 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 3773 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 3774 3775 // map the pages 3776 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3777 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 3778 if (physicalAddress == 0) 3779 panic("error allocating early page!\n"); 3780 3781 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3782 3783 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3784 physicalAddress * B_PAGE_SIZE, attributes, 3785 &vm_allocate_early_physical_page); 3786 } 3787 3788 return virtualBase; 3789 } 3790 3791 3792 /*! The main entrance point to initialize the VM. */ 3793 status_t 3794 vm_init(kernel_args* args) 3795 { 3796 struct preloaded_image* image; 3797 void* address; 3798 status_t err = 0; 3799 uint32 i; 3800 3801 TRACE(("vm_init: entry\n")); 3802 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3803 err = arch_vm_init(args); 3804 3805 // initialize some globals 3806 vm_page_init_num_pages(args); 3807 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3808 3809 slab_init(args); 3810 3811 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3812 size_t heapSize = INITIAL_HEAP_SIZE; 3813 // try to accomodate low memory systems 3814 while (heapSize > sAvailableMemory / 8) 3815 heapSize /= 2; 3816 if (heapSize < 1024 * 1024) 3817 panic("vm_init: go buy some RAM please."); 3818 3819 // map in the new heap and initialize it 3820 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3821 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3822 TRACE(("heap at 0x%lx\n", heapBase)); 3823 heap_init(heapBase, heapSize); 3824 #endif 3825 3826 // initialize the free page list and physical page mapper 3827 vm_page_init(args); 3828 3829 // initialize the cache allocators 3830 vm_cache_init(args); 3831 3832 { 3833 status_t error = VMAreaHash::Init(); 3834 if (error != B_OK) 3835 panic("vm_init: error initializing area hash table\n"); 3836 } 3837 3838 VMAddressSpace::Init(); 3839 reserve_boot_loader_ranges(args); 3840 3841 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3842 heap_init_post_area(); 3843 #endif 3844 3845 // Do any further initialization that the architecture dependant layers may 3846 // need now 3847 arch_vm_translation_map_init_post_area(args); 3848 arch_vm_init_post_area(args); 3849 vm_page_init_post_area(args); 3850 slab_init_post_area(); 3851 3852 // allocate areas to represent stuff that already exists 3853 3854 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3855 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3856 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3857 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3858 #endif 3859 3860 allocate_kernel_args(args); 3861 3862 create_preloaded_image_areas(args->kernel_image); 3863 3864 // allocate areas for preloaded images 3865 for (image = args->preloaded_images; image != NULL; image = image->next) 3866 create_preloaded_image_areas(image); 3867 3868 // allocate kernel stacks 3869 for (i = 0; i < args->num_cpus; i++) { 3870 char name[64]; 3871 3872 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 3873 address = (void*)args->cpu_kstack[i].start; 3874 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3875 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3876 } 3877 3878 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3879 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3880 3881 #if PARANOID_KERNEL_MALLOC 3882 vm_block_address_range("uninitialized heap memory", 3883 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3884 #endif 3885 #if PARANOID_KERNEL_FREE 3886 vm_block_address_range("freed heap memory", 3887 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3888 #endif 3889 3890 // create the object cache for the page mappings 3891 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3892 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3893 NULL, NULL); 3894 if (gPageMappingsObjectCache == NULL) 3895 panic("failed to create page mappings object cache"); 3896 3897 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3898 3899 #if DEBUG_CACHE_LIST 3900 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 3901 virtual_address_restrictions virtualRestrictions = {}; 3902 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 3903 physical_address_restrictions physicalRestrictions = {}; 3904 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 3905 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3906 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 3907 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 3908 &physicalRestrictions, (void**)&sCacheInfoTable); 3909 } 3910 #endif // DEBUG_CACHE_LIST 3911 3912 // add some debugger commands 3913 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3914 add_debugger_command("area", &dump_area, 3915 "Dump info about a particular area"); 3916 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3917 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3918 #if DEBUG_CACHE_LIST 3919 if (sCacheInfoTable != NULL) { 3920 add_debugger_command_etc("caches", &dump_caches, 3921 "List all VMCache trees", 3922 "[ \"-c\" ]\n" 3923 "All cache trees are listed sorted in decreasing order by number " 3924 "of\n" 3925 "used pages or, if \"-c\" is specified, by size of committed " 3926 "memory.\n", 3927 0); 3928 } 3929 #endif 3930 add_debugger_command("avail", &dump_available_memory, 3931 "Dump available memory"); 3932 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3933 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3934 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3935 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3936 add_debugger_command("string", &display_mem, "dump strings"); 3937 3938 TRACE(("vm_init: exit\n")); 3939 3940 vm_cache_init_post_heap(); 3941 3942 return err; 3943 } 3944 3945 3946 status_t 3947 vm_init_post_sem(kernel_args* args) 3948 { 3949 // This frees all unused boot loader resources and makes its space available 3950 // again 3951 arch_vm_init_end(args); 3952 unreserve_boot_loader_ranges(args); 3953 3954 // fill in all of the semaphores that were not allocated before 3955 // since we're still single threaded and only the kernel address space 3956 // exists, it isn't that hard to find all of the ones we need to create 3957 3958 arch_vm_translation_map_init_post_sem(args); 3959 3960 slab_init_post_sem(); 3961 3962 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3963 heap_init_post_sem(); 3964 #endif 3965 3966 return B_OK; 3967 } 3968 3969 3970 status_t 3971 vm_init_post_thread(kernel_args* args) 3972 { 3973 vm_page_init_post_thread(args); 3974 slab_init_post_thread(); 3975 return heap_init_post_thread(); 3976 } 3977 3978 3979 status_t 3980 vm_init_post_modules(kernel_args* args) 3981 { 3982 return arch_vm_init_post_modules(args); 3983 } 3984 3985 3986 void 3987 permit_page_faults(void) 3988 { 3989 Thread* thread = thread_get_current_thread(); 3990 if (thread != NULL) 3991 atomic_add(&thread->page_faults_allowed, 1); 3992 } 3993 3994 3995 void 3996 forbid_page_faults(void) 3997 { 3998 Thread* thread = thread_get_current_thread(); 3999 if (thread != NULL) 4000 atomic_add(&thread->page_faults_allowed, -1); 4001 } 4002 4003 4004 status_t 4005 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4006 bool isUser, addr_t* newIP) 4007 { 4008 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4009 faultAddress)); 4010 4011 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4012 4013 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4014 VMAddressSpace* addressSpace = NULL; 4015 4016 status_t status = B_OK; 4017 *newIP = 0; 4018 atomic_add((int32*)&sPageFaults, 1); 4019 4020 if (IS_KERNEL_ADDRESS(pageAddress)) { 4021 addressSpace = VMAddressSpace::GetKernel(); 4022 } else if (IS_USER_ADDRESS(pageAddress)) { 4023 addressSpace = VMAddressSpace::GetCurrent(); 4024 if (addressSpace == NULL) { 4025 if (!isUser) { 4026 dprintf("vm_page_fault: kernel thread accessing invalid user " 4027 "memory!\n"); 4028 status = B_BAD_ADDRESS; 4029 TPF(PageFaultError(-1, 4030 VMPageFaultTracing 4031 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4032 } else { 4033 // XXX weird state. 4034 panic("vm_page_fault: non kernel thread accessing user memory " 4035 "that doesn't exist!\n"); 4036 status = B_BAD_ADDRESS; 4037 } 4038 } 4039 } else { 4040 // the hit was probably in the 64k DMZ between kernel and user space 4041 // this keeps a user space thread from passing a buffer that crosses 4042 // into kernel space 4043 status = B_BAD_ADDRESS; 4044 TPF(PageFaultError(-1, 4045 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4046 } 4047 4048 if (status == B_OK) { 4049 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4050 isUser, NULL); 4051 } 4052 4053 if (status < B_OK) { 4054 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4055 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n", 4056 strerror(status), address, faultAddress, isWrite, isUser, 4057 thread_get_current_thread_id()); 4058 if (!isUser) { 4059 Thread* thread = thread_get_current_thread(); 4060 if (thread != NULL && thread->fault_handler != 0) { 4061 // this will cause the arch dependant page fault handler to 4062 // modify the IP on the interrupt frame or whatever to return 4063 // to this address 4064 *newIP = thread->fault_handler; 4065 } else { 4066 // unhandled page fault in the kernel 4067 panic("vm_page_fault: unhandled page fault in kernel space at " 4068 "0x%lx, ip 0x%lx\n", address, faultAddress); 4069 } 4070 } else { 4071 #if 1 4072 // TODO: remove me once we have proper userland debugging support 4073 // (and tools) 4074 VMArea* area = NULL; 4075 if (addressSpace != NULL) { 4076 addressSpace->ReadLock(); 4077 area = addressSpace->LookupArea(faultAddress); 4078 } 4079 4080 Thread* thread = thread_get_current_thread(); 4081 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4082 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4083 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4084 thread->team->Name(), thread->team->id, 4085 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4086 faultAddress, area ? area->name : "???", faultAddress - (area ? 4087 area->Base() : 0x0)); 4088 4089 // We can print a stack trace of the userland thread here. 4090 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4091 // fault and someone is already waiting for a write lock on the same address 4092 // space. This thread will then try to acquire the lock again and will 4093 // be queued after the writer. 4094 # if 0 4095 if (area) { 4096 struct stack_frame { 4097 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4098 struct stack_frame* previous; 4099 void* return_address; 4100 #else 4101 // ... 4102 #warning writeme 4103 #endif 4104 } frame; 4105 # ifdef __INTEL__ 4106 struct iframe* iframe = x86_get_user_iframe(); 4107 if (iframe == NULL) 4108 panic("iframe is NULL!"); 4109 4110 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4111 sizeof(struct stack_frame)); 4112 # elif defined(__POWERPC__) 4113 struct iframe* iframe = ppc_get_user_iframe(); 4114 if (iframe == NULL) 4115 panic("iframe is NULL!"); 4116 4117 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4118 sizeof(struct stack_frame)); 4119 # else 4120 # warning "vm_page_fault() stack trace won't work" 4121 status = B_ERROR; 4122 # endif 4123 4124 dprintf("stack trace:\n"); 4125 int32 maxFrames = 50; 4126 while (status == B_OK && --maxFrames >= 0 4127 && frame.return_address != NULL) { 4128 dprintf(" %p", frame.return_address); 4129 area = addressSpace->LookupArea( 4130 (addr_t)frame.return_address); 4131 if (area) { 4132 dprintf(" (%s + %#lx)", area->name, 4133 (addr_t)frame.return_address - area->Base()); 4134 } 4135 dprintf("\n"); 4136 4137 status = user_memcpy(&frame, frame.previous, 4138 sizeof(struct stack_frame)); 4139 } 4140 } 4141 # endif // 0 (stack trace) 4142 4143 if (addressSpace != NULL) 4144 addressSpace->ReadUnlock(); 4145 #endif 4146 4147 // If the thread has a signal handler for SIGSEGV, we simply 4148 // send it the signal. Otherwise we notify the user debugger 4149 // first. 4150 struct sigaction action; 4151 if ((sigaction(SIGSEGV, NULL, &action) == 0 4152 && action.sa_handler != SIG_DFL 4153 && action.sa_handler != SIG_IGN) 4154 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4155 SIGSEGV)) { 4156 Signal signal(SIGSEGV, 4157 status == B_PERMISSION_DENIED 4158 ? SEGV_ACCERR : SEGV_MAPERR, 4159 EFAULT, thread->team->id); 4160 signal.SetAddress((void*)address); 4161 send_signal_to_thread(thread, signal, 0); 4162 } 4163 } 4164 } 4165 4166 if (addressSpace != NULL) 4167 addressSpace->Put(); 4168 4169 return B_HANDLED_INTERRUPT; 4170 } 4171 4172 4173 struct PageFaultContext { 4174 AddressSpaceReadLocker addressSpaceLocker; 4175 VMCacheChainLocker cacheChainLocker; 4176 4177 VMTranslationMap* map; 4178 VMCache* topCache; 4179 off_t cacheOffset; 4180 vm_page_reservation reservation; 4181 bool isWrite; 4182 4183 // return values 4184 vm_page* page; 4185 bool restart; 4186 4187 4188 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4189 : 4190 addressSpaceLocker(addressSpace, true), 4191 map(addressSpace->TranslationMap()), 4192 isWrite(isWrite) 4193 { 4194 } 4195 4196 ~PageFaultContext() 4197 { 4198 UnlockAll(); 4199 vm_page_unreserve_pages(&reservation); 4200 } 4201 4202 void Prepare(VMCache* topCache, off_t cacheOffset) 4203 { 4204 this->topCache = topCache; 4205 this->cacheOffset = cacheOffset; 4206 page = NULL; 4207 restart = false; 4208 4209 cacheChainLocker.SetTo(topCache); 4210 } 4211 4212 void UnlockAll(VMCache* exceptCache = NULL) 4213 { 4214 topCache = NULL; 4215 addressSpaceLocker.Unlock(); 4216 cacheChainLocker.Unlock(exceptCache); 4217 } 4218 }; 4219 4220 4221 /*! Gets the page that should be mapped into the area. 4222 Returns an error code other than \c B_OK, if the page couldn't be found or 4223 paged in. The locking state of the address space and the caches is undefined 4224 in that case. 4225 Returns \c B_OK with \c context.restart set to \c true, if the functions 4226 had to unlock the address space and all caches and is supposed to be called 4227 again. 4228 Returns \c B_OK with \c context.restart set to \c false, if the page was 4229 found. It is returned in \c context.page. The address space will still be 4230 locked as well as all caches starting from the top cache to at least the 4231 cache the page lives in. 4232 */ 4233 static status_t 4234 fault_get_page(PageFaultContext& context) 4235 { 4236 VMCache* cache = context.topCache; 4237 VMCache* lastCache = NULL; 4238 vm_page* page = NULL; 4239 4240 while (cache != NULL) { 4241 // We already hold the lock of the cache at this point. 4242 4243 lastCache = cache; 4244 4245 page = cache->LookupPage(context.cacheOffset); 4246 if (page != NULL && page->busy) { 4247 // page must be busy -- wait for it to become unbusy 4248 context.UnlockAll(cache); 4249 cache->ReleaseRefLocked(); 4250 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4251 4252 // restart the whole process 4253 context.restart = true; 4254 return B_OK; 4255 } 4256 4257 if (page != NULL) 4258 break; 4259 4260 // The current cache does not contain the page we're looking for. 4261 4262 // see if the backing store has it 4263 if (cache->HasPage(context.cacheOffset)) { 4264 // insert a fresh page and mark it busy -- we're going to read it in 4265 page = vm_page_allocate_page(&context.reservation, 4266 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4267 cache->InsertPage(page, context.cacheOffset); 4268 4269 // We need to unlock all caches and the address space while reading 4270 // the page in. Keep a reference to the cache around. 4271 cache->AcquireRefLocked(); 4272 context.UnlockAll(); 4273 4274 // read the page in 4275 generic_io_vec vec; 4276 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4277 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4278 4279 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4280 B_PHYSICAL_IO_REQUEST, &bytesRead); 4281 4282 cache->Lock(); 4283 4284 if (status < B_OK) { 4285 // on error remove and free the page 4286 dprintf("reading page from cache %p returned: %s!\n", 4287 cache, strerror(status)); 4288 4289 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4290 cache->RemovePage(page); 4291 vm_page_set_state(page, PAGE_STATE_FREE); 4292 4293 cache->ReleaseRefAndUnlock(); 4294 return status; 4295 } 4296 4297 // mark the page unbusy again 4298 cache->MarkPageUnbusy(page); 4299 4300 DEBUG_PAGE_ACCESS_END(page); 4301 4302 // Since we needed to unlock everything temporarily, the area 4303 // situation might have changed. So we need to restart the whole 4304 // process. 4305 cache->ReleaseRefAndUnlock(); 4306 context.restart = true; 4307 return B_OK; 4308 } 4309 4310 cache = context.cacheChainLocker.LockSourceCache(); 4311 } 4312 4313 if (page == NULL) { 4314 // There was no adequate page, determine the cache for a clean one. 4315 // Read-only pages come in the deepest cache, only the top most cache 4316 // may have direct write access. 4317 cache = context.isWrite ? context.topCache : lastCache; 4318 4319 // allocate a clean page 4320 page = vm_page_allocate_page(&context.reservation, 4321 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4322 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4323 page->physical_page_number)); 4324 4325 // insert the new page into our cache 4326 cache->InsertPage(page, context.cacheOffset); 4327 } else if (page->Cache() != context.topCache && context.isWrite) { 4328 // We have a page that has the data we want, but in the wrong cache 4329 // object so we need to copy it and stick it into the top cache. 4330 vm_page* sourcePage = page; 4331 4332 // TODO: If memory is low, it might be a good idea to steal the page 4333 // from our source cache -- if possible, that is. 4334 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4335 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4336 4337 // To not needlessly kill concurrency we unlock all caches but the top 4338 // one while copying the page. Lacking another mechanism to ensure that 4339 // the source page doesn't disappear, we mark it busy. 4340 sourcePage->busy = true; 4341 context.cacheChainLocker.UnlockKeepRefs(true); 4342 4343 // copy the page 4344 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4345 sourcePage->physical_page_number * B_PAGE_SIZE); 4346 4347 context.cacheChainLocker.RelockCaches(true); 4348 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4349 4350 // insert the new page into our cache 4351 context.topCache->InsertPage(page, context.cacheOffset); 4352 } else 4353 DEBUG_PAGE_ACCESS_START(page); 4354 4355 context.page = page; 4356 return B_OK; 4357 } 4358 4359 4360 /*! Makes sure the address in the given address space is mapped. 4361 4362 \param addressSpace The address space. 4363 \param originalAddress The address. Doesn't need to be page aligned. 4364 \param isWrite If \c true the address shall be write-accessible. 4365 \param isUser If \c true the access is requested by a userland team. 4366 \param wirePage On success, if non \c NULL, the wired count of the page 4367 mapped at the given address is incremented and the page is returned 4368 via this parameter. 4369 \param wiredRange If given, this wiredRange is ignored when checking whether 4370 an already mapped page at the virtual address can be unmapped. 4371 \return \c B_OK on success, another error code otherwise. 4372 */ 4373 static status_t 4374 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4375 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage, 4376 VMAreaWiredRange* wiredRange) 4377 { 4378 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4379 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4380 originalAddress, isWrite, isUser)); 4381 4382 PageFaultContext context(addressSpace, isWrite); 4383 4384 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4385 status_t status = B_OK; 4386 4387 addressSpace->IncrementFaultCount(); 4388 4389 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4390 // the pages upfront makes sure we don't have any cache locked, so that the 4391 // page daemon/thief can do their job without problems. 4392 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4393 originalAddress); 4394 context.addressSpaceLocker.Unlock(); 4395 vm_page_reserve_pages(&context.reservation, reservePages, 4396 addressSpace == VMAddressSpace::Kernel() 4397 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4398 4399 while (true) { 4400 context.addressSpaceLocker.Lock(); 4401 4402 // get the area the fault was in 4403 VMArea* area = addressSpace->LookupArea(address); 4404 if (area == NULL) { 4405 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4406 "space\n", originalAddress); 4407 TPF(PageFaultError(-1, 4408 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4409 status = B_BAD_ADDRESS; 4410 break; 4411 } 4412 4413 // check permissions 4414 uint32 protection = get_area_page_protection(area, address); 4415 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4416 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4417 area->id, (void*)originalAddress); 4418 TPF(PageFaultError(area->id, 4419 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4420 status = B_PERMISSION_DENIED; 4421 break; 4422 } 4423 if (isWrite && (protection 4424 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4425 dprintf("write access attempted on write-protected area 0x%" 4426 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4427 TPF(PageFaultError(area->id, 4428 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4429 status = B_PERMISSION_DENIED; 4430 break; 4431 } else if (isExecute && (protection 4432 & (B_EXECUTE_AREA 4433 | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4434 dprintf("instruction fetch attempted on execute-protected area 0x%" 4435 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4436 TPF(PageFaultError(area->id, 4437 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4438 status = B_PERMISSION_DENIED; 4439 break; 4440 } else if (!isWrite && !isExecute && (protection 4441 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4442 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4443 " at %p\n", area->id, (void*)originalAddress); 4444 TPF(PageFaultError(area->id, 4445 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4446 status = B_PERMISSION_DENIED; 4447 break; 4448 } 4449 4450 // We have the area, it was a valid access, so let's try to resolve the 4451 // page fault now. 4452 // At first, the top most cache from the area is investigated. 4453 4454 context.Prepare(vm_area_get_locked_cache(area), 4455 address - area->Base() + area->cache_offset); 4456 4457 // See if this cache has a fault handler -- this will do all the work 4458 // for us. 4459 { 4460 // Note, since the page fault is resolved with interrupts enabled, 4461 // the fault handler could be called more than once for the same 4462 // reason -- the store must take this into account. 4463 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4464 if (status != B_BAD_HANDLER) 4465 break; 4466 } 4467 4468 // The top most cache has no fault handler, so let's see if the cache or 4469 // its sources already have the page we're searching for (we're going 4470 // from top to bottom). 4471 status = fault_get_page(context); 4472 if (status != B_OK) { 4473 TPF(PageFaultError(area->id, status)); 4474 break; 4475 } 4476 4477 if (context.restart) 4478 continue; 4479 4480 // All went fine, all there is left to do is to map the page into the 4481 // address space. 4482 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4483 context.page)); 4484 4485 // If the page doesn't reside in the area's cache, we need to make sure 4486 // it's mapped in read-only, so that we cannot overwrite someone else's 4487 // data (copy-on-write) 4488 uint32 newProtection = protection; 4489 if (context.page->Cache() != context.topCache && !isWrite) 4490 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4491 4492 bool unmapPage = false; 4493 bool mapPage = true; 4494 4495 // check whether there's already a page mapped at the address 4496 context.map->Lock(); 4497 4498 phys_addr_t physicalAddress; 4499 uint32 flags; 4500 vm_page* mappedPage = NULL; 4501 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4502 && (flags & PAGE_PRESENT) != 0 4503 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4504 != NULL) { 4505 // Yep there's already a page. If it's ours, we can simply adjust 4506 // its protection. Otherwise we have to unmap it. 4507 if (mappedPage == context.page) { 4508 context.map->ProtectPage(area, address, newProtection); 4509 // Note: We assume that ProtectPage() is atomic (i.e. 4510 // the page isn't temporarily unmapped), otherwise we'd have 4511 // to make sure it isn't wired. 4512 mapPage = false; 4513 } else 4514 unmapPage = true; 4515 } 4516 4517 context.map->Unlock(); 4518 4519 if (unmapPage) { 4520 // If the page is wired, we can't unmap it. Wait until it is unwired 4521 // again and restart. 4522 VMAreaUnwiredWaiter waiter; 4523 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4524 wiredRange)) { 4525 // unlock everything and wait 4526 context.UnlockAll(); 4527 waiter.waitEntry.Wait(); 4528 continue; 4529 } 4530 4531 // Note: The mapped page is a page of a lower cache. We are 4532 // guaranteed to have that cached locked, our new page is a copy of 4533 // that page, and the page is not busy. The logic for that guarantee 4534 // is as follows: Since the page is mapped, it must live in the top 4535 // cache (ruled out above) or any of its lower caches, and there is 4536 // (was before the new page was inserted) no other page in any 4537 // cache between the top cache and the page's cache (otherwise that 4538 // would be mapped instead). That in turn means that our algorithm 4539 // must have found it and therefore it cannot be busy either. 4540 DEBUG_PAGE_ACCESS_START(mappedPage); 4541 unmap_page(area, address); 4542 DEBUG_PAGE_ACCESS_END(mappedPage); 4543 } 4544 4545 if (mapPage) { 4546 if (map_page(area, context.page, address, newProtection, 4547 &context.reservation) != B_OK) { 4548 // Mapping can only fail, when the page mapping object couldn't 4549 // be allocated. Save for the missing mapping everything is 4550 // fine, though. If this was a regular page fault, we'll simply 4551 // leave and probably fault again. To make sure we'll have more 4552 // luck then, we ensure that the minimum object reserve is 4553 // available. 4554 DEBUG_PAGE_ACCESS_END(context.page); 4555 4556 context.UnlockAll(); 4557 4558 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4559 != B_OK) { 4560 // Apparently the situation is serious. Let's get ourselves 4561 // killed. 4562 status = B_NO_MEMORY; 4563 } else if (wirePage != NULL) { 4564 // The caller expects us to wire the page. Since 4565 // object_cache_reserve() succeeded, we should now be able 4566 // to allocate a mapping structure. Restart. 4567 continue; 4568 } 4569 4570 break; 4571 } 4572 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4573 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4574 4575 // also wire the page, if requested 4576 if (wirePage != NULL && status == B_OK) { 4577 increment_page_wired_count(context.page); 4578 *wirePage = context.page; 4579 } 4580 4581 DEBUG_PAGE_ACCESS_END(context.page); 4582 4583 break; 4584 } 4585 4586 return status; 4587 } 4588 4589 4590 status_t 4591 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4592 { 4593 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4594 } 4595 4596 status_t 4597 vm_put_physical_page(addr_t vaddr, void* handle) 4598 { 4599 return sPhysicalPageMapper->PutPage(vaddr, handle); 4600 } 4601 4602 4603 status_t 4604 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4605 void** _handle) 4606 { 4607 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4608 } 4609 4610 status_t 4611 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4612 { 4613 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4614 } 4615 4616 4617 status_t 4618 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4619 { 4620 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4621 } 4622 4623 status_t 4624 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4625 { 4626 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4627 } 4628 4629 4630 void 4631 vm_get_info(system_memory_info* info) 4632 { 4633 swap_get_info(info); 4634 4635 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4636 info->page_faults = sPageFaults; 4637 4638 MutexLocker locker(sAvailableMemoryLock); 4639 info->free_memory = sAvailableMemory; 4640 info->needed_memory = sNeededMemory; 4641 } 4642 4643 4644 uint32 4645 vm_num_page_faults(void) 4646 { 4647 return sPageFaults; 4648 } 4649 4650 4651 off_t 4652 vm_available_memory(void) 4653 { 4654 MutexLocker locker(sAvailableMemoryLock); 4655 return sAvailableMemory; 4656 } 4657 4658 4659 off_t 4660 vm_available_not_needed_memory(void) 4661 { 4662 MutexLocker locker(sAvailableMemoryLock); 4663 return sAvailableMemory - sNeededMemory; 4664 } 4665 4666 4667 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4668 debugger. 4669 */ 4670 off_t 4671 vm_available_not_needed_memory_debug(void) 4672 { 4673 return sAvailableMemory - sNeededMemory; 4674 } 4675 4676 4677 size_t 4678 vm_kernel_address_space_left(void) 4679 { 4680 return VMAddressSpace::Kernel()->FreeSpace(); 4681 } 4682 4683 4684 void 4685 vm_unreserve_memory(size_t amount) 4686 { 4687 mutex_lock(&sAvailableMemoryLock); 4688 4689 sAvailableMemory += amount; 4690 4691 mutex_unlock(&sAvailableMemoryLock); 4692 } 4693 4694 4695 status_t 4696 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4697 { 4698 size_t reserve = kMemoryReserveForPriority[priority]; 4699 4700 MutexLocker locker(sAvailableMemoryLock); 4701 4702 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4703 4704 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4705 sAvailableMemory -= amount; 4706 return B_OK; 4707 } 4708 4709 if (timeout <= 0) 4710 return B_NO_MEMORY; 4711 4712 // turn timeout into an absolute timeout 4713 timeout += system_time(); 4714 4715 // loop until we've got the memory or the timeout occurs 4716 do { 4717 sNeededMemory += amount; 4718 4719 // call the low resource manager 4720 locker.Unlock(); 4721 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4722 B_ABSOLUTE_TIMEOUT, timeout); 4723 locker.Lock(); 4724 4725 sNeededMemory -= amount; 4726 4727 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4728 sAvailableMemory -= amount; 4729 return B_OK; 4730 } 4731 } while (timeout > system_time()); 4732 4733 return B_NO_MEMORY; 4734 } 4735 4736 4737 status_t 4738 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 4739 { 4740 // NOTE: The caller is responsible for synchronizing calls to this function! 4741 4742 AddressSpaceReadLocker locker; 4743 VMArea* area; 4744 status_t status = locker.SetFromArea(id, area); 4745 if (status != B_OK) 4746 return status; 4747 4748 // nothing to do, if the type doesn't change 4749 uint32 oldType = area->MemoryType(); 4750 if (type == oldType) 4751 return B_OK; 4752 4753 // set the memory type of the area and the mapped pages 4754 VMTranslationMap* map = area->address_space->TranslationMap(); 4755 map->Lock(); 4756 area->SetMemoryType(type); 4757 map->ProtectArea(area, area->protection); 4758 map->Unlock(); 4759 4760 // set the physical memory type 4761 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 4762 if (error != B_OK) { 4763 // reset the memory type of the area and the mapped pages 4764 map->Lock(); 4765 area->SetMemoryType(oldType); 4766 map->ProtectArea(area, area->protection); 4767 map->Unlock(); 4768 return error; 4769 } 4770 4771 return B_OK; 4772 4773 } 4774 4775 4776 /*! This function enforces some protection properties: 4777 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 4778 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well 4779 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4780 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4781 and B_KERNEL_WRITE_AREA. 4782 */ 4783 static void 4784 fix_protection(uint32* protection) 4785 { 4786 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4787 if ((*protection & B_USER_PROTECTION) == 0 4788 || (*protection & B_WRITE_AREA) != 0) 4789 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4790 else 4791 *protection |= B_KERNEL_READ_AREA; 4792 if ((*protection & B_EXECUTE_AREA) != 0) 4793 *protection |= B_KERNEL_EXECUTE_AREA; 4794 } 4795 } 4796 4797 4798 static void 4799 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4800 { 4801 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4802 info->area = area->id; 4803 info->address = (void*)area->Base(); 4804 info->size = area->Size(); 4805 info->protection = area->protection; 4806 info->lock = B_FULL_LOCK; 4807 info->team = area->address_space->ID(); 4808 info->copy_count = 0; 4809 info->in_count = 0; 4810 info->out_count = 0; 4811 // TODO: retrieve real values here! 4812 4813 VMCache* cache = vm_area_get_locked_cache(area); 4814 4815 // Note, this is a simplification; the cache could be larger than this area 4816 info->ram_size = cache->page_count * B_PAGE_SIZE; 4817 4818 vm_area_put_locked_cache(cache); 4819 } 4820 4821 4822 static status_t 4823 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4824 { 4825 // is newSize a multiple of B_PAGE_SIZE? 4826 if (newSize & (B_PAGE_SIZE - 1)) 4827 return B_BAD_VALUE; 4828 4829 // lock all affected address spaces and the cache 4830 VMArea* area; 4831 VMCache* cache; 4832 4833 MultiAddressSpaceLocker locker; 4834 AreaCacheLocker cacheLocker; 4835 4836 status_t status; 4837 size_t oldSize; 4838 bool anyKernelArea; 4839 bool restart; 4840 4841 do { 4842 anyKernelArea = false; 4843 restart = false; 4844 4845 locker.Unset(); 4846 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4847 if (status != B_OK) 4848 return status; 4849 cacheLocker.SetTo(cache, true); // already locked 4850 4851 // enforce restrictions 4852 if (!kernel) { 4853 if ((area->protection & B_KERNEL_AREA) != 0) 4854 return B_NOT_ALLOWED; 4855 // TODO: Enforce all restrictions (team, etc.)! 4856 } 4857 4858 oldSize = area->Size(); 4859 if (newSize == oldSize) 4860 return B_OK; 4861 4862 if (cache->type != CACHE_TYPE_RAM) 4863 return B_NOT_ALLOWED; 4864 4865 if (oldSize < newSize) { 4866 // We need to check if all areas of this cache can be resized. 4867 for (VMArea* current = cache->areas; current != NULL; 4868 current = current->cache_next) { 4869 if (!current->address_space->CanResizeArea(current, newSize)) 4870 return B_ERROR; 4871 anyKernelArea 4872 |= current->address_space == VMAddressSpace::Kernel(); 4873 } 4874 } else { 4875 // We're shrinking the areas, so we must make sure the affected 4876 // ranges are not wired. 4877 for (VMArea* current = cache->areas; current != NULL; 4878 current = current->cache_next) { 4879 anyKernelArea 4880 |= current->address_space == VMAddressSpace::Kernel(); 4881 4882 if (wait_if_area_range_is_wired(current, 4883 current->Base() + newSize, oldSize - newSize, &locker, 4884 &cacheLocker)) { 4885 restart = true; 4886 break; 4887 } 4888 } 4889 } 4890 } while (restart); 4891 4892 // Okay, looks good so far, so let's do it 4893 4894 int priority = kernel && anyKernelArea 4895 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4896 uint32 allocationFlags = kernel && anyKernelArea 4897 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4898 4899 if (oldSize < newSize) { 4900 // Growing the cache can fail, so we do it first. 4901 status = cache->Resize(cache->virtual_base + newSize, priority); 4902 if (status != B_OK) 4903 return status; 4904 } 4905 4906 for (VMArea* current = cache->areas; current != NULL; 4907 current = current->cache_next) { 4908 status = current->address_space->ResizeArea(current, newSize, 4909 allocationFlags); 4910 if (status != B_OK) 4911 break; 4912 4913 // We also need to unmap all pages beyond the new size, if the area has 4914 // shrunk 4915 if (newSize < oldSize) { 4916 VMCacheChainLocker cacheChainLocker(cache); 4917 cacheChainLocker.LockAllSourceCaches(); 4918 4919 unmap_pages(current, current->Base() + newSize, 4920 oldSize - newSize); 4921 4922 cacheChainLocker.Unlock(cache); 4923 } 4924 } 4925 4926 if (status == B_OK) { 4927 // Shrink or grow individual page protections if in use. 4928 if (area->page_protections != NULL) { 4929 uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2; 4930 uint8* newProtections 4931 = (uint8*)realloc(area->page_protections, bytes); 4932 if (newProtections == NULL) 4933 status = B_NO_MEMORY; 4934 else { 4935 area->page_protections = newProtections; 4936 4937 if (oldSize < newSize) { 4938 // init the additional page protections to that of the area 4939 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 4940 uint32 areaProtection = area->protection 4941 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 4942 memset(area->page_protections + offset, 4943 areaProtection | (areaProtection << 4), bytes - offset); 4944 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 4945 uint8& entry = area->page_protections[offset - 1]; 4946 entry = (entry & 0x0f) | (areaProtection << 4); 4947 } 4948 } 4949 } 4950 } 4951 } 4952 4953 // shrinking the cache can't fail, so we do it now 4954 if (status == B_OK && newSize < oldSize) 4955 status = cache->Resize(cache->virtual_base + newSize, priority); 4956 4957 if (status != B_OK) { 4958 // Something failed -- resize the areas back to their original size. 4959 // This can fail, too, in which case we're seriously screwed. 4960 for (VMArea* current = cache->areas; current != NULL; 4961 current = current->cache_next) { 4962 if (current->address_space->ResizeArea(current, oldSize, 4963 allocationFlags) != B_OK) { 4964 panic("vm_resize_area(): Failed and not being able to restore " 4965 "original state."); 4966 } 4967 } 4968 4969 cache->Resize(cache->virtual_base + oldSize, priority); 4970 } 4971 4972 // TODO: we must honour the lock restrictions of this area 4973 return status; 4974 } 4975 4976 4977 status_t 4978 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 4979 { 4980 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4981 } 4982 4983 4984 status_t 4985 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 4986 { 4987 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4988 } 4989 4990 4991 status_t 4992 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 4993 bool user) 4994 { 4995 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4996 } 4997 4998 4999 void 5000 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5001 { 5002 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5003 } 5004 5005 5006 /*! Copies a range of memory directly from/to a page that might not be mapped 5007 at the moment. 5008 5009 For \a unsafeMemory the current mapping (if any is ignored). The function 5010 walks through the respective area's cache chain to find the physical page 5011 and copies from/to it directly. 5012 The memory range starting at \a unsafeMemory with a length of \a size bytes 5013 must not cross a page boundary. 5014 5015 \param teamID The team ID identifying the address space \a unsafeMemory is 5016 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5017 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5018 is passed, the address space of the thread returned by 5019 debug_get_debugged_thread() is used. 5020 \param unsafeMemory The start of the unsafe memory range to be copied 5021 from/to. 5022 \param buffer A safely accessible kernel buffer to be copied from/to. 5023 \param size The number of bytes to be copied. 5024 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5025 \a unsafeMemory, the other way around otherwise. 5026 */ 5027 status_t 5028 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5029 size_t size, bool copyToUnsafe) 5030 { 5031 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5032 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5033 return B_BAD_VALUE; 5034 } 5035 5036 // get the address space for the debugged thread 5037 VMAddressSpace* addressSpace; 5038 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5039 addressSpace = VMAddressSpace::Kernel(); 5040 } else if (teamID == B_CURRENT_TEAM) { 5041 Thread* thread = debug_get_debugged_thread(); 5042 if (thread == NULL || thread->team == NULL) 5043 return B_BAD_ADDRESS; 5044 5045 addressSpace = thread->team->address_space; 5046 } else 5047 addressSpace = VMAddressSpace::DebugGet(teamID); 5048 5049 if (addressSpace == NULL) 5050 return B_BAD_ADDRESS; 5051 5052 // get the area 5053 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5054 if (area == NULL) 5055 return B_BAD_ADDRESS; 5056 5057 // search the page 5058 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5059 + area->cache_offset; 5060 VMCache* cache = area->cache; 5061 vm_page* page = NULL; 5062 while (cache != NULL) { 5063 page = cache->DebugLookupPage(cacheOffset); 5064 if (page != NULL) 5065 break; 5066 5067 // Page not found in this cache -- if it is paged out, we must not try 5068 // to get it from lower caches. 5069 if (cache->DebugHasPage(cacheOffset)) 5070 break; 5071 5072 cache = cache->source; 5073 } 5074 5075 if (page == NULL) 5076 return B_UNSUPPORTED; 5077 5078 // copy from/to physical memory 5079 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5080 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5081 5082 if (copyToUnsafe) { 5083 if (page->Cache() != area->cache) 5084 return B_UNSUPPORTED; 5085 5086 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5087 } 5088 5089 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5090 } 5091 5092 5093 // #pragma mark - kernel public API 5094 5095 5096 status_t 5097 user_memcpy(void* to, const void* from, size_t size) 5098 { 5099 // don't allow address overflows 5100 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5101 return B_BAD_ADDRESS; 5102 5103 if (arch_cpu_user_memcpy(to, from, size, 5104 &thread_get_current_thread()->fault_handler) < B_OK) 5105 return B_BAD_ADDRESS; 5106 5107 return B_OK; 5108 } 5109 5110 5111 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5112 the string in \a to, NULL-terminating the result. 5113 5114 \param to Pointer to the destination C-string. 5115 \param from Pointer to the source C-string. 5116 \param size Size in bytes of the string buffer pointed to by \a to. 5117 5118 \return strlen(\a from). 5119 */ 5120 ssize_t 5121 user_strlcpy(char* to, const char* from, size_t size) 5122 { 5123 if (to == NULL && size != 0) 5124 return B_BAD_VALUE; 5125 if (from == NULL) 5126 return B_BAD_ADDRESS; 5127 5128 // limit size to avoid address overflows 5129 size_t maxSize = std::min(size, 5130 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5131 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5132 // the source address might still overflow. 5133 5134 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5135 &thread_get_current_thread()->fault_handler); 5136 5137 // If we hit the address overflow boundary, fail. 5138 if (result < 0 || (result >= 0 && (size_t)result >= maxSize 5139 && maxSize < size)) { 5140 return B_BAD_ADDRESS; 5141 } 5142 5143 return result; 5144 } 5145 5146 5147 status_t 5148 user_memset(void* s, char c, size_t count) 5149 { 5150 // don't allow address overflows 5151 if ((addr_t)s + count < (addr_t)s) 5152 return B_BAD_ADDRESS; 5153 5154 if (arch_cpu_user_memset(s, c, count, 5155 &thread_get_current_thread()->fault_handler) < B_OK) 5156 return B_BAD_ADDRESS; 5157 5158 return B_OK; 5159 } 5160 5161 5162 /*! Wires a single page at the given address. 5163 5164 \param team The team whose address space the address belongs to. Supports 5165 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5166 parameter is ignored. 5167 \param address address The virtual address to wire down. Does not need to 5168 be page aligned. 5169 \param writable If \c true the page shall be writable. 5170 \param info On success the info is filled in, among other things 5171 containing the physical address the given virtual one translates to. 5172 \return \c B_OK, when the page could be wired, another error code otherwise. 5173 */ 5174 status_t 5175 vm_wire_page(team_id team, addr_t address, bool writable, 5176 VMPageWiringInfo* info) 5177 { 5178 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5179 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5180 5181 // compute the page protection that is required 5182 bool isUser = IS_USER_ADDRESS(address); 5183 uint32 requiredProtection = PAGE_PRESENT 5184 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5185 if (writable) 5186 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5187 5188 // get and read lock the address space 5189 VMAddressSpace* addressSpace = NULL; 5190 if (isUser) { 5191 if (team == B_CURRENT_TEAM) 5192 addressSpace = VMAddressSpace::GetCurrent(); 5193 else 5194 addressSpace = VMAddressSpace::Get(team); 5195 } else 5196 addressSpace = VMAddressSpace::GetKernel(); 5197 if (addressSpace == NULL) 5198 return B_ERROR; 5199 5200 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5201 5202 VMTranslationMap* map = addressSpace->TranslationMap(); 5203 status_t error = B_OK; 5204 5205 // get the area 5206 VMArea* area = addressSpace->LookupArea(pageAddress); 5207 if (area == NULL) { 5208 addressSpace->Put(); 5209 return B_BAD_ADDRESS; 5210 } 5211 5212 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5213 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5214 5215 // mark the area range wired 5216 area->Wire(&info->range); 5217 5218 // Lock the area's cache chain and the translation map. Needed to look 5219 // up the page and play with its wired count. 5220 cacheChainLocker.LockAllSourceCaches(); 5221 map->Lock(); 5222 5223 phys_addr_t physicalAddress; 5224 uint32 flags; 5225 vm_page* page; 5226 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5227 && (flags & requiredProtection) == requiredProtection 5228 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5229 != NULL) { 5230 // Already mapped with the correct permissions -- just increment 5231 // the page's wired count. 5232 increment_page_wired_count(page); 5233 5234 map->Unlock(); 5235 cacheChainLocker.Unlock(); 5236 addressSpaceLocker.Unlock(); 5237 } else { 5238 // Let vm_soft_fault() map the page for us, if possible. We need 5239 // to fully unlock to avoid deadlocks. Since we have already 5240 // wired the area itself, nothing disturbing will happen with it 5241 // in the meantime. 5242 map->Unlock(); 5243 cacheChainLocker.Unlock(); 5244 addressSpaceLocker.Unlock(); 5245 5246 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5247 isUser, &page, &info->range); 5248 5249 if (error != B_OK) { 5250 // The page could not be mapped -- clean up. 5251 VMCache* cache = vm_area_get_locked_cache(area); 5252 area->Unwire(&info->range); 5253 cache->ReleaseRefAndUnlock(); 5254 addressSpace->Put(); 5255 return error; 5256 } 5257 } 5258 5259 info->physicalAddress 5260 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5261 + address % B_PAGE_SIZE; 5262 info->page = page; 5263 5264 return B_OK; 5265 } 5266 5267 5268 /*! Unwires a single page previously wired via vm_wire_page(). 5269 5270 \param info The same object passed to vm_wire_page() before. 5271 */ 5272 void 5273 vm_unwire_page(VMPageWiringInfo* info) 5274 { 5275 // lock the address space 5276 VMArea* area = info->range.area; 5277 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5278 // takes over our reference 5279 5280 // lock the top cache 5281 VMCache* cache = vm_area_get_locked_cache(area); 5282 VMCacheChainLocker cacheChainLocker(cache); 5283 5284 if (info->page->Cache() != cache) { 5285 // The page is not in the top cache, so we lock the whole cache chain 5286 // before touching the page's wired count. 5287 cacheChainLocker.LockAllSourceCaches(); 5288 } 5289 5290 decrement_page_wired_count(info->page); 5291 5292 // remove the wired range from the range 5293 area->Unwire(&info->range); 5294 5295 cacheChainLocker.Unlock(); 5296 } 5297 5298 5299 /*! Wires down the given address range in the specified team's address space. 5300 5301 If successful the function 5302 - acquires a reference to the specified team's address space, 5303 - adds respective wired ranges to all areas that intersect with the given 5304 address range, 5305 - makes sure all pages in the given address range are mapped with the 5306 requested access permissions and increments their wired count. 5307 5308 It fails, when \a team doesn't specify a valid address space, when any part 5309 of the specified address range is not covered by areas, when the concerned 5310 areas don't allow mapping with the requested permissions, or when mapping 5311 failed for another reason. 5312 5313 When successful the call must be balanced by a unlock_memory_etc() call with 5314 the exact same parameters. 5315 5316 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5317 supported. 5318 \param address The start of the address range to be wired. 5319 \param numBytes The size of the address range to be wired. 5320 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5321 requests that the range must be wired writable ("read from device 5322 into memory"). 5323 \return \c B_OK on success, another error code otherwise. 5324 */ 5325 status_t 5326 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5327 { 5328 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5329 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5330 5331 // compute the page protection that is required 5332 bool isUser = IS_USER_ADDRESS(address); 5333 bool writable = (flags & B_READ_DEVICE) == 0; 5334 uint32 requiredProtection = PAGE_PRESENT 5335 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5336 if (writable) 5337 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5338 5339 uint32 mallocFlags = isUser 5340 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5341 5342 // get and read lock the address space 5343 VMAddressSpace* addressSpace = NULL; 5344 if (isUser) { 5345 if (team == B_CURRENT_TEAM) 5346 addressSpace = VMAddressSpace::GetCurrent(); 5347 else 5348 addressSpace = VMAddressSpace::Get(team); 5349 } else 5350 addressSpace = VMAddressSpace::GetKernel(); 5351 if (addressSpace == NULL) 5352 return B_ERROR; 5353 5354 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5355 // We get a new address space reference here. The one we got above will 5356 // be freed by unlock_memory_etc(). 5357 5358 VMTranslationMap* map = addressSpace->TranslationMap(); 5359 status_t error = B_OK; 5360 5361 // iterate through all concerned areas 5362 addr_t nextAddress = lockBaseAddress; 5363 while (nextAddress != lockEndAddress) { 5364 // get the next area 5365 VMArea* area = addressSpace->LookupArea(nextAddress); 5366 if (area == NULL) { 5367 error = B_BAD_ADDRESS; 5368 break; 5369 } 5370 5371 addr_t areaStart = nextAddress; 5372 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5373 5374 // allocate the wired range (do that before locking the cache to avoid 5375 // deadlocks) 5376 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5377 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5378 if (range == NULL) { 5379 error = B_NO_MEMORY; 5380 break; 5381 } 5382 5383 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5384 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5385 5386 // mark the area range wired 5387 area->Wire(range); 5388 5389 // Depending on the area cache type and the wiring, we may not need to 5390 // look at the individual pages. 5391 if (area->cache_type == CACHE_TYPE_NULL 5392 || area->cache_type == CACHE_TYPE_DEVICE 5393 || area->wiring == B_FULL_LOCK 5394 || area->wiring == B_CONTIGUOUS) { 5395 nextAddress = areaEnd; 5396 continue; 5397 } 5398 5399 // Lock the area's cache chain and the translation map. Needed to look 5400 // up pages and play with their wired count. 5401 cacheChainLocker.LockAllSourceCaches(); 5402 map->Lock(); 5403 5404 // iterate through the pages and wire them 5405 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5406 phys_addr_t physicalAddress; 5407 uint32 flags; 5408 5409 vm_page* page; 5410 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5411 && (flags & requiredProtection) == requiredProtection 5412 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5413 != NULL) { 5414 // Already mapped with the correct permissions -- just increment 5415 // the page's wired count. 5416 increment_page_wired_count(page); 5417 } else { 5418 // Let vm_soft_fault() map the page for us, if possible. We need 5419 // to fully unlock to avoid deadlocks. Since we have already 5420 // wired the area itself, nothing disturbing will happen with it 5421 // in the meantime. 5422 map->Unlock(); 5423 cacheChainLocker.Unlock(); 5424 addressSpaceLocker.Unlock(); 5425 5426 error = vm_soft_fault(addressSpace, nextAddress, writable, 5427 false, isUser, &page, range); 5428 5429 addressSpaceLocker.Lock(); 5430 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5431 cacheChainLocker.LockAllSourceCaches(); 5432 map->Lock(); 5433 } 5434 5435 if (error != B_OK) 5436 break; 5437 } 5438 5439 map->Unlock(); 5440 5441 if (error == B_OK) { 5442 cacheChainLocker.Unlock(); 5443 } else { 5444 // An error occurred, so abort right here. If the current address 5445 // is the first in this area, unwire the area, since we won't get 5446 // to it when reverting what we've done so far. 5447 if (nextAddress == areaStart) { 5448 area->Unwire(range); 5449 cacheChainLocker.Unlock(); 5450 range->~VMAreaWiredRange(); 5451 free_etc(range, mallocFlags); 5452 } else 5453 cacheChainLocker.Unlock(); 5454 5455 break; 5456 } 5457 } 5458 5459 if (error != B_OK) { 5460 // An error occurred, so unwire all that we've already wired. Note that 5461 // even if not a single page was wired, unlock_memory_etc() is called 5462 // to put the address space reference. 5463 addressSpaceLocker.Unlock(); 5464 unlock_memory_etc(team, (void*)lockBaseAddress, 5465 nextAddress - lockBaseAddress, flags); 5466 } 5467 5468 return error; 5469 } 5470 5471 5472 status_t 5473 lock_memory(void* address, size_t numBytes, uint32 flags) 5474 { 5475 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5476 } 5477 5478 5479 /*! Unwires an address range previously wired with lock_memory_etc(). 5480 5481 Note that a call to this function must balance a previous lock_memory_etc() 5482 call with exactly the same parameters. 5483 */ 5484 status_t 5485 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5486 { 5487 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5488 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5489 5490 // compute the page protection that is required 5491 bool isUser = IS_USER_ADDRESS(address); 5492 bool writable = (flags & B_READ_DEVICE) == 0; 5493 uint32 requiredProtection = PAGE_PRESENT 5494 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5495 if (writable) 5496 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5497 5498 uint32 mallocFlags = isUser 5499 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5500 5501 // get and read lock the address space 5502 VMAddressSpace* addressSpace = NULL; 5503 if (isUser) { 5504 if (team == B_CURRENT_TEAM) 5505 addressSpace = VMAddressSpace::GetCurrent(); 5506 else 5507 addressSpace = VMAddressSpace::Get(team); 5508 } else 5509 addressSpace = VMAddressSpace::GetKernel(); 5510 if (addressSpace == NULL) 5511 return B_ERROR; 5512 5513 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5514 // Take over the address space reference. We don't unlock until we're 5515 // done. 5516 5517 VMTranslationMap* map = addressSpace->TranslationMap(); 5518 status_t error = B_OK; 5519 5520 // iterate through all concerned areas 5521 addr_t nextAddress = lockBaseAddress; 5522 while (nextAddress != lockEndAddress) { 5523 // get the next area 5524 VMArea* area = addressSpace->LookupArea(nextAddress); 5525 if (area == NULL) { 5526 error = B_BAD_ADDRESS; 5527 break; 5528 } 5529 5530 addr_t areaStart = nextAddress; 5531 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5532 5533 // Lock the area's top cache. This is a requirement for 5534 // VMArea::Unwire(). 5535 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5536 5537 // Depending on the area cache type and the wiring, we may not need to 5538 // look at the individual pages. 5539 if (area->cache_type == CACHE_TYPE_NULL 5540 || area->cache_type == CACHE_TYPE_DEVICE 5541 || area->wiring == B_FULL_LOCK 5542 || area->wiring == B_CONTIGUOUS) { 5543 // unwire the range (to avoid deadlocks we delete the range after 5544 // unlocking the cache) 5545 nextAddress = areaEnd; 5546 VMAreaWiredRange* range = area->Unwire(areaStart, 5547 areaEnd - areaStart, writable); 5548 cacheChainLocker.Unlock(); 5549 if (range != NULL) { 5550 range->~VMAreaWiredRange(); 5551 free_etc(range, mallocFlags); 5552 } 5553 continue; 5554 } 5555 5556 // Lock the area's cache chain and the translation map. Needed to look 5557 // up pages and play with their wired count. 5558 cacheChainLocker.LockAllSourceCaches(); 5559 map->Lock(); 5560 5561 // iterate through the pages and unwire them 5562 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5563 phys_addr_t physicalAddress; 5564 uint32 flags; 5565 5566 vm_page* page; 5567 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5568 && (flags & PAGE_PRESENT) != 0 5569 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5570 != NULL) { 5571 // Already mapped with the correct permissions -- just increment 5572 // the page's wired count. 5573 decrement_page_wired_count(page); 5574 } else { 5575 panic("unlock_memory_etc(): Failed to unwire page: address " 5576 "space %p, address: %#" B_PRIxADDR, addressSpace, 5577 nextAddress); 5578 error = B_BAD_VALUE; 5579 break; 5580 } 5581 } 5582 5583 map->Unlock(); 5584 5585 // All pages are unwired. Remove the area's wired range as well (to 5586 // avoid deadlocks we delete the range after unlocking the cache). 5587 VMAreaWiredRange* range = area->Unwire(areaStart, 5588 areaEnd - areaStart, writable); 5589 5590 cacheChainLocker.Unlock(); 5591 5592 if (range != NULL) { 5593 range->~VMAreaWiredRange(); 5594 free_etc(range, mallocFlags); 5595 } 5596 5597 if (error != B_OK) 5598 break; 5599 } 5600 5601 // get rid of the address space reference lock_memory_etc() acquired 5602 addressSpace->Put(); 5603 5604 return error; 5605 } 5606 5607 5608 status_t 5609 unlock_memory(void* address, size_t numBytes, uint32 flags) 5610 { 5611 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5612 } 5613 5614 5615 /*! Similar to get_memory_map(), but also allows to specify the address space 5616 for the memory in question and has a saner semantics. 5617 Returns \c B_OK when the complete range could be translated or 5618 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5619 case the actual number of entries is written to \c *_numEntries. Any other 5620 error case indicates complete failure; \c *_numEntries will be set to \c 0 5621 in this case. 5622 */ 5623 status_t 5624 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5625 physical_entry* table, uint32* _numEntries) 5626 { 5627 uint32 numEntries = *_numEntries; 5628 *_numEntries = 0; 5629 5630 VMAddressSpace* addressSpace; 5631 addr_t virtualAddress = (addr_t)address; 5632 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5633 phys_addr_t physicalAddress; 5634 status_t status = B_OK; 5635 int32 index = -1; 5636 addr_t offset = 0; 5637 bool interrupts = are_interrupts_enabled(); 5638 5639 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5640 "entries)\n", team, address, numBytes, numEntries)); 5641 5642 if (numEntries == 0 || numBytes == 0) 5643 return B_BAD_VALUE; 5644 5645 // in which address space is the address to be found? 5646 if (IS_USER_ADDRESS(virtualAddress)) { 5647 if (team == B_CURRENT_TEAM) 5648 addressSpace = VMAddressSpace::GetCurrent(); 5649 else 5650 addressSpace = VMAddressSpace::Get(team); 5651 } else 5652 addressSpace = VMAddressSpace::GetKernel(); 5653 5654 if (addressSpace == NULL) 5655 return B_ERROR; 5656 5657 VMTranslationMap* map = addressSpace->TranslationMap(); 5658 5659 if (interrupts) 5660 map->Lock(); 5661 5662 while (offset < numBytes) { 5663 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5664 uint32 flags; 5665 5666 if (interrupts) { 5667 status = map->Query((addr_t)address + offset, &physicalAddress, 5668 &flags); 5669 } else { 5670 status = map->QueryInterrupt((addr_t)address + offset, 5671 &physicalAddress, &flags); 5672 } 5673 if (status < B_OK) 5674 break; 5675 if ((flags & PAGE_PRESENT) == 0) { 5676 panic("get_memory_map() called on unmapped memory!"); 5677 return B_BAD_ADDRESS; 5678 } 5679 5680 if (index < 0 && pageOffset > 0) { 5681 physicalAddress += pageOffset; 5682 if (bytes > B_PAGE_SIZE - pageOffset) 5683 bytes = B_PAGE_SIZE - pageOffset; 5684 } 5685 5686 // need to switch to the next physical_entry? 5687 if (index < 0 || table[index].address 5688 != physicalAddress - table[index].size) { 5689 if ((uint32)++index + 1 > numEntries) { 5690 // table to small 5691 break; 5692 } 5693 table[index].address = physicalAddress; 5694 table[index].size = bytes; 5695 } else { 5696 // page does fit in current entry 5697 table[index].size += bytes; 5698 } 5699 5700 offset += bytes; 5701 } 5702 5703 if (interrupts) 5704 map->Unlock(); 5705 5706 if (status != B_OK) 5707 return status; 5708 5709 if ((uint32)index + 1 > numEntries) { 5710 *_numEntries = index; 5711 return B_BUFFER_OVERFLOW; 5712 } 5713 5714 *_numEntries = index + 1; 5715 return B_OK; 5716 } 5717 5718 5719 /*! According to the BeBook, this function should always succeed. 5720 This is no longer the case. 5721 */ 5722 extern "C" int32 5723 __get_memory_map_haiku(const void* address, size_t numBytes, 5724 physical_entry* table, int32 numEntries) 5725 { 5726 uint32 entriesRead = numEntries; 5727 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5728 table, &entriesRead); 5729 if (error != B_OK) 5730 return error; 5731 5732 // close the entry list 5733 5734 // if it's only one entry, we will silently accept the missing ending 5735 if (numEntries == 1) 5736 return B_OK; 5737 5738 if (entriesRead + 1 > (uint32)numEntries) 5739 return B_BUFFER_OVERFLOW; 5740 5741 table[entriesRead].address = 0; 5742 table[entriesRead].size = 0; 5743 5744 return B_OK; 5745 } 5746 5747 5748 area_id 5749 area_for(void* address) 5750 { 5751 return vm_area_for((addr_t)address, true); 5752 } 5753 5754 5755 area_id 5756 find_area(const char* name) 5757 { 5758 return VMAreaHash::Find(name); 5759 } 5760 5761 5762 status_t 5763 _get_area_info(area_id id, area_info* info, size_t size) 5764 { 5765 if (size != sizeof(area_info) || info == NULL) 5766 return B_BAD_VALUE; 5767 5768 AddressSpaceReadLocker locker; 5769 VMArea* area; 5770 status_t status = locker.SetFromArea(id, area); 5771 if (status != B_OK) 5772 return status; 5773 5774 fill_area_info(area, info, size); 5775 return B_OK; 5776 } 5777 5778 5779 status_t 5780 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 5781 { 5782 addr_t nextBase = *(addr_t*)cookie; 5783 5784 // we're already through the list 5785 if (nextBase == (addr_t)-1) 5786 return B_ENTRY_NOT_FOUND; 5787 5788 if (team == B_CURRENT_TEAM) 5789 team = team_get_current_team_id(); 5790 5791 AddressSpaceReadLocker locker(team); 5792 if (!locker.IsLocked()) 5793 return B_BAD_TEAM_ID; 5794 5795 VMArea* area; 5796 for (VMAddressSpace::AreaIterator it 5797 = locker.AddressSpace()->GetAreaIterator(); 5798 (area = it.Next()) != NULL;) { 5799 if (area->Base() > nextBase) 5800 break; 5801 } 5802 5803 if (area == NULL) { 5804 nextBase = (addr_t)-1; 5805 return B_ENTRY_NOT_FOUND; 5806 } 5807 5808 fill_area_info(area, info, size); 5809 *cookie = (ssize_t)(area->Base()); 5810 5811 return B_OK; 5812 } 5813 5814 5815 status_t 5816 set_area_protection(area_id area, uint32 newProtection) 5817 { 5818 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5819 newProtection, true); 5820 } 5821 5822 5823 status_t 5824 resize_area(area_id areaID, size_t newSize) 5825 { 5826 return vm_resize_area(areaID, newSize, true); 5827 } 5828 5829 5830 /*! Transfers the specified area to a new team. The caller must be the owner 5831 of the area. 5832 */ 5833 area_id 5834 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5835 bool kernel) 5836 { 5837 area_info info; 5838 status_t status = get_area_info(id, &info); 5839 if (status != B_OK) 5840 return status; 5841 5842 if (info.team != thread_get_current_thread()->team->id) 5843 return B_PERMISSION_DENIED; 5844 5845 area_id clonedArea = vm_clone_area(target, info.name, _address, 5846 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5847 if (clonedArea < 0) 5848 return clonedArea; 5849 5850 status = vm_delete_area(info.team, id, kernel); 5851 if (status != B_OK) { 5852 vm_delete_area(target, clonedArea, kernel); 5853 return status; 5854 } 5855 5856 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5857 5858 return clonedArea; 5859 } 5860 5861 5862 extern "C" area_id 5863 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 5864 size_t numBytes, uint32 addressSpec, uint32 protection, 5865 void** _virtualAddress) 5866 { 5867 if (!arch_vm_supports_protection(protection)) 5868 return B_NOT_SUPPORTED; 5869 5870 fix_protection(&protection); 5871 5872 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5873 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 5874 false); 5875 } 5876 5877 5878 area_id 5879 clone_area(const char* name, void** _address, uint32 addressSpec, 5880 uint32 protection, area_id source) 5881 { 5882 if ((protection & B_KERNEL_PROTECTION) == 0) 5883 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5884 5885 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5886 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5887 } 5888 5889 5890 area_id 5891 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock, 5892 uint32 protection, uint32 flags, uint32 guardSize, 5893 const virtual_address_restrictions* virtualAddressRestrictions, 5894 const physical_address_restrictions* physicalAddressRestrictions, 5895 void** _address) 5896 { 5897 fix_protection(&protection); 5898 5899 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 5900 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 5901 true, _address); 5902 } 5903 5904 5905 extern "C" area_id 5906 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 5907 size_t size, uint32 lock, uint32 protection) 5908 { 5909 fix_protection(&protection); 5910 5911 virtual_address_restrictions virtualRestrictions = {}; 5912 virtualRestrictions.address = *_address; 5913 virtualRestrictions.address_specification = addressSpec; 5914 physical_address_restrictions physicalRestrictions = {}; 5915 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 5916 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 5917 true, _address); 5918 } 5919 5920 5921 status_t 5922 delete_area(area_id area) 5923 { 5924 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5925 } 5926 5927 5928 // #pragma mark - Userland syscalls 5929 5930 5931 status_t 5932 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5933 addr_t size) 5934 { 5935 // filter out some unavailable values (for userland) 5936 switch (addressSpec) { 5937 case B_ANY_KERNEL_ADDRESS: 5938 case B_ANY_KERNEL_BLOCK_ADDRESS: 5939 return B_BAD_VALUE; 5940 } 5941 5942 addr_t address; 5943 5944 if (!IS_USER_ADDRESS(userAddress) 5945 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5946 return B_BAD_ADDRESS; 5947 5948 status_t status = vm_reserve_address_range( 5949 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5950 RESERVED_AVOID_BASE); 5951 if (status != B_OK) 5952 return status; 5953 5954 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5955 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5956 (void*)address, size); 5957 return B_BAD_ADDRESS; 5958 } 5959 5960 return B_OK; 5961 } 5962 5963 5964 status_t 5965 _user_unreserve_address_range(addr_t address, addr_t size) 5966 { 5967 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5968 (void*)address, size); 5969 } 5970 5971 5972 area_id 5973 _user_area_for(void* address) 5974 { 5975 return vm_area_for((addr_t)address, false); 5976 } 5977 5978 5979 area_id 5980 _user_find_area(const char* userName) 5981 { 5982 char name[B_OS_NAME_LENGTH]; 5983 5984 if (!IS_USER_ADDRESS(userName) 5985 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5986 return B_BAD_ADDRESS; 5987 5988 return find_area(name); 5989 } 5990 5991 5992 status_t 5993 _user_get_area_info(area_id area, area_info* userInfo) 5994 { 5995 if (!IS_USER_ADDRESS(userInfo)) 5996 return B_BAD_ADDRESS; 5997 5998 area_info info; 5999 status_t status = get_area_info(area, &info); 6000 if (status < B_OK) 6001 return status; 6002 6003 // TODO: do we want to prevent userland from seeing kernel protections? 6004 //info.protection &= B_USER_PROTECTION; 6005 6006 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6007 return B_BAD_ADDRESS; 6008 6009 return status; 6010 } 6011 6012 6013 status_t 6014 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6015 { 6016 ssize_t cookie; 6017 6018 if (!IS_USER_ADDRESS(userCookie) 6019 || !IS_USER_ADDRESS(userInfo) 6020 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6021 return B_BAD_ADDRESS; 6022 6023 area_info info; 6024 status_t status = _get_next_area_info(team, &cookie, &info, 6025 sizeof(area_info)); 6026 if (status != B_OK) 6027 return status; 6028 6029 //info.protection &= B_USER_PROTECTION; 6030 6031 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6032 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6033 return B_BAD_ADDRESS; 6034 6035 return status; 6036 } 6037 6038 6039 status_t 6040 _user_set_area_protection(area_id area, uint32 newProtection) 6041 { 6042 if ((newProtection & ~B_USER_PROTECTION) != 0) 6043 return B_BAD_VALUE; 6044 6045 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6046 newProtection, false); 6047 } 6048 6049 6050 status_t 6051 _user_resize_area(area_id area, size_t newSize) 6052 { 6053 // TODO: Since we restrict deleting of areas to those owned by the team, 6054 // we should also do that for resizing (check other functions, too). 6055 return vm_resize_area(area, newSize, false); 6056 } 6057 6058 6059 area_id 6060 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6061 team_id target) 6062 { 6063 // filter out some unavailable values (for userland) 6064 switch (addressSpec) { 6065 case B_ANY_KERNEL_ADDRESS: 6066 case B_ANY_KERNEL_BLOCK_ADDRESS: 6067 return B_BAD_VALUE; 6068 } 6069 6070 void* address; 6071 if (!IS_USER_ADDRESS(userAddress) 6072 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6073 return B_BAD_ADDRESS; 6074 6075 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6076 if (newArea < B_OK) 6077 return newArea; 6078 6079 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6080 return B_BAD_ADDRESS; 6081 6082 return newArea; 6083 } 6084 6085 6086 area_id 6087 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6088 uint32 protection, area_id sourceArea) 6089 { 6090 char name[B_OS_NAME_LENGTH]; 6091 void* address; 6092 6093 // filter out some unavailable values (for userland) 6094 switch (addressSpec) { 6095 case B_ANY_KERNEL_ADDRESS: 6096 case B_ANY_KERNEL_BLOCK_ADDRESS: 6097 return B_BAD_VALUE; 6098 } 6099 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6100 return B_BAD_VALUE; 6101 6102 if (!IS_USER_ADDRESS(userName) 6103 || !IS_USER_ADDRESS(userAddress) 6104 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6105 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6106 return B_BAD_ADDRESS; 6107 6108 fix_protection(&protection); 6109 6110 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6111 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6112 false); 6113 if (clonedArea < B_OK) 6114 return clonedArea; 6115 6116 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6117 delete_area(clonedArea); 6118 return B_BAD_ADDRESS; 6119 } 6120 6121 return clonedArea; 6122 } 6123 6124 6125 area_id 6126 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6127 size_t size, uint32 lock, uint32 protection) 6128 { 6129 char name[B_OS_NAME_LENGTH]; 6130 void* address; 6131 6132 // filter out some unavailable values (for userland) 6133 switch (addressSpec) { 6134 case B_ANY_KERNEL_ADDRESS: 6135 case B_ANY_KERNEL_BLOCK_ADDRESS: 6136 return B_BAD_VALUE; 6137 } 6138 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6139 return B_BAD_VALUE; 6140 6141 if (!IS_USER_ADDRESS(userName) 6142 || !IS_USER_ADDRESS(userAddress) 6143 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6144 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6145 return B_BAD_ADDRESS; 6146 6147 if (addressSpec == B_EXACT_ADDRESS 6148 && IS_KERNEL_ADDRESS(address)) 6149 return B_BAD_VALUE; 6150 6151 if (addressSpec == B_ANY_ADDRESS) 6152 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6153 if (addressSpec == B_BASE_ADDRESS) 6154 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6155 6156 fix_protection(&protection); 6157 6158 virtual_address_restrictions virtualRestrictions = {}; 6159 virtualRestrictions.address = address; 6160 virtualRestrictions.address_specification = addressSpec; 6161 physical_address_restrictions physicalRestrictions = {}; 6162 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6163 size, lock, protection, 0, 0, &virtualRestrictions, 6164 &physicalRestrictions, false, &address); 6165 6166 if (area >= B_OK 6167 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6168 delete_area(area); 6169 return B_BAD_ADDRESS; 6170 } 6171 6172 return area; 6173 } 6174 6175 6176 status_t 6177 _user_delete_area(area_id area) 6178 { 6179 // Unlike the BeOS implementation, you can now only delete areas 6180 // that you have created yourself from userland. 6181 // The documentation to delete_area() explicitly states that this 6182 // will be restricted in the future, and so it will. 6183 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6184 } 6185 6186 6187 // TODO: create a BeOS style call for this! 6188 6189 area_id 6190 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6191 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6192 int fd, off_t offset) 6193 { 6194 char name[B_OS_NAME_LENGTH]; 6195 void* address; 6196 area_id area; 6197 6198 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6199 return B_BAD_VALUE; 6200 6201 fix_protection(&protection); 6202 6203 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6204 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6205 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6206 return B_BAD_ADDRESS; 6207 6208 if (addressSpec == B_EXACT_ADDRESS) { 6209 if ((addr_t)address + size < (addr_t)address 6210 || (addr_t)address % B_PAGE_SIZE != 0) { 6211 return B_BAD_VALUE; 6212 } 6213 if (!IS_USER_ADDRESS(address) 6214 || !IS_USER_ADDRESS((addr_t)address + size)) { 6215 return B_BAD_ADDRESS; 6216 } 6217 } 6218 6219 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6220 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6221 false); 6222 if (area < B_OK) 6223 return area; 6224 6225 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6226 return B_BAD_ADDRESS; 6227 6228 return area; 6229 } 6230 6231 6232 status_t 6233 _user_unmap_memory(void* _address, size_t size) 6234 { 6235 addr_t address = (addr_t)_address; 6236 6237 // check params 6238 if (size == 0 || (addr_t)address + size < (addr_t)address 6239 || (addr_t)address % B_PAGE_SIZE != 0) { 6240 return B_BAD_VALUE; 6241 } 6242 6243 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6244 return B_BAD_ADDRESS; 6245 6246 // Write lock the address space and ensure the address range is not wired. 6247 AddressSpaceWriteLocker locker; 6248 do { 6249 status_t status = locker.SetTo(team_get_current_team_id()); 6250 if (status != B_OK) 6251 return status; 6252 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6253 size, &locker)); 6254 6255 // unmap 6256 return unmap_address_range(locker.AddressSpace(), address, size, false); 6257 } 6258 6259 6260 status_t 6261 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6262 { 6263 // check address range 6264 addr_t address = (addr_t)_address; 6265 size = PAGE_ALIGN(size); 6266 6267 if ((address % B_PAGE_SIZE) != 0) 6268 return B_BAD_VALUE; 6269 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6270 || !IS_USER_ADDRESS((addr_t)address + size)) { 6271 // weird error code required by POSIX 6272 return ENOMEM; 6273 } 6274 6275 // extend and check protection 6276 if ((protection & ~B_USER_PROTECTION) != 0) 6277 return B_BAD_VALUE; 6278 6279 fix_protection(&protection); 6280 6281 // We need to write lock the address space, since we're going to play with 6282 // the areas. Also make sure that none of the areas is wired and that we're 6283 // actually allowed to change the protection. 6284 AddressSpaceWriteLocker locker; 6285 6286 bool restart; 6287 do { 6288 restart = false; 6289 6290 status_t status = locker.SetTo(team_get_current_team_id()); 6291 if (status != B_OK) 6292 return status; 6293 6294 // First round: Check whether the whole range is covered by areas and we 6295 // are allowed to modify them. 6296 addr_t currentAddress = address; 6297 size_t sizeLeft = size; 6298 while (sizeLeft > 0) { 6299 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6300 if (area == NULL) 6301 return B_NO_MEMORY; 6302 6303 if ((area->protection & B_KERNEL_AREA) != 0) 6304 return B_NOT_ALLOWED; 6305 6306 // TODO: For (shared) mapped files we should check whether the new 6307 // protections are compatible with the file permissions. We don't 6308 // have a way to do that yet, though. 6309 6310 addr_t offset = currentAddress - area->Base(); 6311 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6312 6313 AreaCacheLocker cacheLocker(area); 6314 6315 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6316 &locker, &cacheLocker)) { 6317 restart = true; 6318 break; 6319 } 6320 6321 cacheLocker.Unlock(); 6322 6323 currentAddress += rangeSize; 6324 sizeLeft -= rangeSize; 6325 } 6326 } while (restart); 6327 6328 // Second round: If the protections differ from that of the area, create a 6329 // page protection array and re-map mapped pages. 6330 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6331 addr_t currentAddress = address; 6332 size_t sizeLeft = size; 6333 while (sizeLeft > 0) { 6334 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6335 if (area == NULL) 6336 return B_NO_MEMORY; 6337 6338 addr_t offset = currentAddress - area->Base(); 6339 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6340 6341 currentAddress += rangeSize; 6342 sizeLeft -= rangeSize; 6343 6344 if (area->page_protections == NULL) { 6345 if (area->protection == protection) 6346 continue; 6347 6348 status_t status = allocate_area_page_protections(area); 6349 if (status != B_OK) 6350 return status; 6351 } 6352 6353 // We need to lock the complete cache chain, since we potentially unmap 6354 // pages of lower caches. 6355 VMCache* topCache = vm_area_get_locked_cache(area); 6356 VMCacheChainLocker cacheChainLocker(topCache); 6357 cacheChainLocker.LockAllSourceCaches(); 6358 6359 for (addr_t pageAddress = area->Base() + offset; 6360 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6361 map->Lock(); 6362 6363 set_area_page_protection(area, pageAddress, protection); 6364 6365 phys_addr_t physicalAddress; 6366 uint32 flags; 6367 6368 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6369 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6370 map->Unlock(); 6371 continue; 6372 } 6373 6374 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6375 if (page == NULL) { 6376 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6377 "\n", area, physicalAddress); 6378 map->Unlock(); 6379 return B_ERROR; 6380 } 6381 6382 // If the page is not in the topmost cache and write access is 6383 // requested, we have to unmap it. Otherwise we can re-map it with 6384 // the new protection. 6385 bool unmapPage = page->Cache() != topCache 6386 && (protection & B_WRITE_AREA) != 0; 6387 6388 if (!unmapPage) 6389 map->ProtectPage(area, pageAddress, protection); 6390 6391 map->Unlock(); 6392 6393 if (unmapPage) { 6394 DEBUG_PAGE_ACCESS_START(page); 6395 unmap_page(area, pageAddress); 6396 DEBUG_PAGE_ACCESS_END(page); 6397 } 6398 } 6399 } 6400 6401 return B_OK; 6402 } 6403 6404 6405 status_t 6406 _user_sync_memory(void* _address, size_t size, uint32 flags) 6407 { 6408 addr_t address = (addr_t)_address; 6409 size = PAGE_ALIGN(size); 6410 6411 // check params 6412 if ((address % B_PAGE_SIZE) != 0) 6413 return B_BAD_VALUE; 6414 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6415 || !IS_USER_ADDRESS((addr_t)address + size)) { 6416 // weird error code required by POSIX 6417 return ENOMEM; 6418 } 6419 6420 bool writeSync = (flags & MS_SYNC) != 0; 6421 bool writeAsync = (flags & MS_ASYNC) != 0; 6422 if (writeSync && writeAsync) 6423 return B_BAD_VALUE; 6424 6425 if (size == 0 || (!writeSync && !writeAsync)) 6426 return B_OK; 6427 6428 // iterate through the range and sync all concerned areas 6429 while (size > 0) { 6430 // read lock the address space 6431 AddressSpaceReadLocker locker; 6432 status_t error = locker.SetTo(team_get_current_team_id()); 6433 if (error != B_OK) 6434 return error; 6435 6436 // get the first area 6437 VMArea* area = locker.AddressSpace()->LookupArea(address); 6438 if (area == NULL) 6439 return B_NO_MEMORY; 6440 6441 uint32 offset = address - area->Base(); 6442 size_t rangeSize = min_c(area->Size() - offset, size); 6443 offset += area->cache_offset; 6444 6445 // lock the cache 6446 AreaCacheLocker cacheLocker(area); 6447 if (!cacheLocker) 6448 return B_BAD_VALUE; 6449 VMCache* cache = area->cache; 6450 6451 locker.Unlock(); 6452 6453 uint32 firstPage = offset >> PAGE_SHIFT; 6454 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6455 6456 // write the pages 6457 if (cache->type == CACHE_TYPE_VNODE) { 6458 if (writeSync) { 6459 // synchronous 6460 error = vm_page_write_modified_page_range(cache, firstPage, 6461 endPage); 6462 if (error != B_OK) 6463 return error; 6464 } else { 6465 // asynchronous 6466 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6467 // TODO: This is probably not quite what is supposed to happen. 6468 // Especially when a lot has to be written, it might take ages 6469 // until it really hits the disk. 6470 } 6471 } 6472 6473 address += rangeSize; 6474 size -= rangeSize; 6475 } 6476 6477 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6478 // synchronize multiple mappings of the same file. In our VM they never get 6479 // out of sync, though, so we don't have to do anything. 6480 6481 return B_OK; 6482 } 6483 6484 6485 status_t 6486 _user_memory_advice(void* address, size_t size, uint32 advice) 6487 { 6488 // TODO: Implement! 6489 return B_OK; 6490 } 6491 6492 6493 status_t 6494 _user_get_memory_properties(team_id teamID, const void* address, 6495 uint32* _protected, uint32* _lock) 6496 { 6497 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6498 return B_BAD_ADDRESS; 6499 6500 AddressSpaceReadLocker locker; 6501 status_t error = locker.SetTo(teamID); 6502 if (error != B_OK) 6503 return error; 6504 6505 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6506 if (area == NULL) 6507 return B_NO_MEMORY; 6508 6509 6510 uint32 protection = area->protection; 6511 if (area->page_protections != NULL) 6512 protection = get_area_page_protection(area, (addr_t)address); 6513 6514 uint32 wiring = area->wiring; 6515 6516 locker.Unlock(); 6517 6518 error = user_memcpy(_protected, &protection, sizeof(protection)); 6519 if (error != B_OK) 6520 return error; 6521 6522 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6523 6524 return error; 6525 } 6526 6527 6528 // #pragma mark -- compatibility 6529 6530 6531 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6532 6533 6534 struct physical_entry_beos { 6535 uint32 address; 6536 uint32 size; 6537 }; 6538 6539 6540 /*! The physical_entry structure has changed. We need to translate it to the 6541 old one. 6542 */ 6543 extern "C" int32 6544 __get_memory_map_beos(const void* _address, size_t numBytes, 6545 physical_entry_beos* table, int32 numEntries) 6546 { 6547 if (numEntries <= 0) 6548 return B_BAD_VALUE; 6549 6550 const uint8* address = (const uint8*)_address; 6551 6552 int32 count = 0; 6553 while (numBytes > 0 && count < numEntries) { 6554 physical_entry entry; 6555 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 6556 if (result < 0) { 6557 if (result != B_BUFFER_OVERFLOW) 6558 return result; 6559 } 6560 6561 if (entry.address >= (phys_addr_t)1 << 32) { 6562 panic("get_memory_map(): Address is greater 4 GB!"); 6563 return B_ERROR; 6564 } 6565 6566 table[count].address = entry.address; 6567 table[count++].size = entry.size; 6568 6569 address += entry.size; 6570 numBytes -= entry.size; 6571 } 6572 6573 // null-terminate the table, if possible 6574 if (count < numEntries) { 6575 table[count].address = 0; 6576 table[count].size = 0; 6577 } 6578 6579 return B_OK; 6580 } 6581 6582 6583 /*! The type of the \a physicalAddress parameter has changed from void* to 6584 phys_addr_t. 6585 */ 6586 extern "C" area_id 6587 __map_physical_memory_beos(const char* name, void* physicalAddress, 6588 size_t numBytes, uint32 addressSpec, uint32 protection, 6589 void** _virtualAddress) 6590 { 6591 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 6592 addressSpec, protection, _virtualAddress); 6593 } 6594 6595 6596 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 6597 we meddle with the \a lock parameter to force 32 bit. 6598 */ 6599 extern "C" area_id 6600 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 6601 size_t size, uint32 lock, uint32 protection) 6602 { 6603 switch (lock) { 6604 case B_NO_LOCK: 6605 break; 6606 case B_FULL_LOCK: 6607 case B_LAZY_LOCK: 6608 lock = B_32_BIT_FULL_LOCK; 6609 break; 6610 case B_CONTIGUOUS: 6611 lock = B_32_BIT_CONTIGUOUS; 6612 break; 6613 } 6614 6615 return __create_area_haiku(name, _address, addressSpec, size, lock, 6616 protection); 6617 } 6618 6619 6620 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 6621 "BASE"); 6622 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 6623 "map_physical_memory@", "BASE"); 6624 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 6625 "BASE"); 6626 6627 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6628 "get_memory_map@@", "1_ALPHA3"); 6629 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6630 "map_physical_memory@@", "1_ALPHA3"); 6631 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6632 "1_ALPHA3"); 6633 6634 6635 #else 6636 6637 6638 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6639 "get_memory_map@@", "BASE"); 6640 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6641 "map_physical_memory@@", "BASE"); 6642 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6643 "BASE"); 6644 6645 6646 #endif // defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6647