1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <boot/elf.h> 31 #include <boot/stage2.h> 32 #include <condition_variable.h> 33 #include <console.h> 34 #include <debug.h> 35 #include <file_cache.h> 36 #include <fs/fd.h> 37 #include <heap.h> 38 #include <kernel.h> 39 #include <int.h> 40 #include <lock.h> 41 #include <low_resource_manager.h> 42 #include <slab/Slab.h> 43 #include <smp.h> 44 #include <system_info.h> 45 #include <thread.h> 46 #include <team.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/khash.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "VMAnonymousNoSwapCache.h" 59 #include "IORequest.h" 60 61 62 //#define TRACE_VM 63 //#define TRACE_FAULTS 64 #ifdef TRACE_VM 65 # define TRACE(x) dprintf x 66 #else 67 # define TRACE(x) ; 68 #endif 69 #ifdef TRACE_FAULTS 70 # define FTRACE(x) dprintf x 71 #else 72 # define FTRACE(x) ; 73 #endif 74 75 76 class AreaCacheLocking { 77 public: 78 inline bool Lock(VMCache* lockable) 79 { 80 return false; 81 } 82 83 inline void Unlock(VMCache* lockable) 84 { 85 vm_area_put_locked_cache(lockable); 86 } 87 }; 88 89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 90 public: 91 inline AreaCacheLocker(VMCache* cache = NULL) 92 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 93 { 94 } 95 96 inline AreaCacheLocker(VMArea* area) 97 : AutoLocker<VMCache, AreaCacheLocking>() 98 { 99 SetTo(area); 100 } 101 102 inline void SetTo(VMCache* cache, bool alreadyLocked) 103 { 104 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 105 } 106 107 inline void SetTo(VMArea* area) 108 { 109 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 110 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 111 } 112 }; 113 114 115 class VMCacheChainLocker { 116 public: 117 VMCacheChainLocker() 118 : 119 fTopCache(NULL), 120 fBottomCache(NULL) 121 { 122 } 123 124 VMCacheChainLocker(VMCache* topCache) 125 : 126 fTopCache(topCache), 127 fBottomCache(topCache) 128 { 129 } 130 131 ~VMCacheChainLocker() 132 { 133 Unlock(); 134 } 135 136 void SetTo(VMCache* topCache) 137 { 138 fTopCache = topCache; 139 fBottomCache = topCache; 140 141 if (topCache != NULL) 142 topCache->SetUserData(NULL); 143 } 144 145 VMCache* LockSourceCache() 146 { 147 if (fBottomCache == NULL || fBottomCache->source == NULL) 148 return NULL; 149 150 VMCache* previousCache = fBottomCache; 151 152 fBottomCache = fBottomCache->source; 153 fBottomCache->Lock(); 154 fBottomCache->AcquireRefLocked(); 155 fBottomCache->SetUserData(previousCache); 156 157 return fBottomCache; 158 } 159 160 void LockAllSourceCaches() 161 { 162 while (LockSourceCache() != NULL) { 163 } 164 } 165 166 void Unlock(VMCache* exceptCache = NULL) 167 { 168 if (fTopCache == NULL) 169 return; 170 171 // Unlock caches in source -> consumer direction. This is important to 172 // avoid double-locking and a reversal of locking order in case a cache 173 // is eligable for merging. 174 VMCache* cache = fBottomCache; 175 while (cache != NULL) { 176 VMCache* nextCache = (VMCache*)cache->UserData(); 177 if (cache != exceptCache) 178 cache->ReleaseRefAndUnlock(cache != fTopCache); 179 180 if (cache == fTopCache) 181 break; 182 183 cache = nextCache; 184 } 185 186 fTopCache = NULL; 187 fBottomCache = NULL; 188 } 189 190 void UnlockKeepRefs(bool keepTopCacheLocked) 191 { 192 if (fTopCache == NULL) 193 return; 194 195 VMCache* nextCache = fBottomCache; 196 VMCache* cache = NULL; 197 198 while (keepTopCacheLocked 199 ? nextCache != fTopCache : cache != fTopCache) { 200 cache = nextCache; 201 nextCache = (VMCache*)cache->UserData(); 202 cache->Unlock(cache != fTopCache); 203 } 204 } 205 206 void RelockCaches(bool topCacheLocked) 207 { 208 if (fTopCache == NULL) 209 return; 210 211 VMCache* nextCache = fTopCache; 212 VMCache* cache = NULL; 213 if (topCacheLocked) { 214 cache = nextCache; 215 nextCache = cache->source; 216 } 217 218 while (cache != fBottomCache && nextCache != NULL) { 219 VMCache* consumer = cache; 220 cache = nextCache; 221 nextCache = cache->source; 222 cache->Lock(); 223 cache->SetUserData(consumer); 224 } 225 } 226 227 private: 228 VMCache* fTopCache; 229 VMCache* fBottomCache; 230 }; 231 232 233 // The memory reserve an allocation of the certain priority must not touch. 234 static const size_t kMemoryReserveForPriority[] = { 235 VM_MEMORY_RESERVE_USER, // user 236 VM_MEMORY_RESERVE_SYSTEM, // system 237 0 // VIP 238 }; 239 240 241 ObjectCache* gPageMappingsObjectCache; 242 243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 244 245 static off_t sAvailableMemory; 246 static off_t sNeededMemory; 247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 248 static uint32 sPageFaults; 249 250 static VMPhysicalPageMapper* sPhysicalPageMapper; 251 252 #if DEBUG_CACHE_LIST 253 254 struct cache_info { 255 VMCache* cache; 256 addr_t page_count; 257 addr_t committed; 258 }; 259 260 static const int kCacheInfoTableCount = 100 * 1024; 261 static cache_info* sCacheInfoTable; 262 263 #endif // DEBUG_CACHE_LIST 264 265 266 // function declarations 267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 268 bool addressSpaceCleanup); 269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 270 bool isWrite, bool isUser, vm_page** wirePage, 271 VMAreaWiredRange* wiredRange = NULL); 272 static status_t map_backing_store(VMAddressSpace* addressSpace, 273 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 274 int protection, int mapping, uint32 flags, 275 const virtual_address_restrictions* addressRestrictions, bool kernel, 276 VMArea** _area, void** _virtualAddress); 277 278 279 // #pragma mark - 280 281 282 #if VM_PAGE_FAULT_TRACING 283 284 namespace VMPageFaultTracing { 285 286 class PageFaultStart : public AbstractTraceEntry { 287 public: 288 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 289 : 290 fAddress(address), 291 fPC(pc), 292 fWrite(write), 293 fUser(user) 294 { 295 Initialized(); 296 } 297 298 virtual void AddDump(TraceOutput& out) 299 { 300 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 301 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 302 } 303 304 private: 305 addr_t fAddress; 306 addr_t fPC; 307 bool fWrite; 308 bool fUser; 309 }; 310 311 312 // page fault errors 313 enum { 314 PAGE_FAULT_ERROR_NO_AREA = 0, 315 PAGE_FAULT_ERROR_KERNEL_ONLY, 316 PAGE_FAULT_ERROR_WRITE_PROTECTED, 317 PAGE_FAULT_ERROR_READ_PROTECTED, 318 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 319 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 320 }; 321 322 323 class PageFaultError : public AbstractTraceEntry { 324 public: 325 PageFaultError(area_id area, status_t error) 326 : 327 fArea(area), 328 fError(error) 329 { 330 Initialized(); 331 } 332 333 virtual void AddDump(TraceOutput& out) 334 { 335 switch (fError) { 336 case PAGE_FAULT_ERROR_NO_AREA: 337 out.Print("page fault error: no area"); 338 break; 339 case PAGE_FAULT_ERROR_KERNEL_ONLY: 340 out.Print("page fault error: area: %ld, kernel only", fArea); 341 break; 342 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 343 out.Print("page fault error: area: %ld, write protected", 344 fArea); 345 break; 346 case PAGE_FAULT_ERROR_READ_PROTECTED: 347 out.Print("page fault error: area: %ld, read protected", fArea); 348 break; 349 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 350 out.Print("page fault error: kernel touching bad user memory"); 351 break; 352 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 353 out.Print("page fault error: no address space"); 354 break; 355 default: 356 out.Print("page fault error: area: %ld, error: %s", fArea, 357 strerror(fError)); 358 break; 359 } 360 } 361 362 private: 363 area_id fArea; 364 status_t fError; 365 }; 366 367 368 class PageFaultDone : public AbstractTraceEntry { 369 public: 370 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 371 vm_page* page) 372 : 373 fArea(area), 374 fTopCache(topCache), 375 fCache(cache), 376 fPage(page) 377 { 378 Initialized(); 379 } 380 381 virtual void AddDump(TraceOutput& out) 382 { 383 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 384 "page: %p", fArea, fTopCache, fCache, fPage); 385 } 386 387 private: 388 area_id fArea; 389 VMCache* fTopCache; 390 VMCache* fCache; 391 vm_page* fPage; 392 }; 393 394 } // namespace VMPageFaultTracing 395 396 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 397 #else 398 # define TPF(x) ; 399 #endif // VM_PAGE_FAULT_TRACING 400 401 402 // #pragma mark - 403 404 405 /*! The page's cache must be locked. 406 */ 407 static inline void 408 increment_page_wired_count(vm_page* page) 409 { 410 if (!page->IsMapped()) 411 atomic_add(&gMappedPagesCount, 1); 412 page->IncrementWiredCount(); 413 } 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 decrement_page_wired_count(vm_page* page) 420 { 421 page->DecrementWiredCount(); 422 if (!page->IsMapped()) 423 atomic_add(&gMappedPagesCount, -1); 424 } 425 426 427 static inline addr_t 428 virtual_page_address(VMArea* area, vm_page* page) 429 { 430 return area->Base() 431 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 432 } 433 434 435 //! You need to have the address space locked when calling this function 436 static VMArea* 437 lookup_area(VMAddressSpace* addressSpace, area_id id) 438 { 439 VMAreaHash::ReadLock(); 440 441 VMArea* area = VMAreaHash::LookupLocked(id); 442 if (area != NULL && area->address_space != addressSpace) 443 area = NULL; 444 445 VMAreaHash::ReadUnlock(); 446 447 return area; 448 } 449 450 451 static status_t 452 allocate_area_page_protections(VMArea* area) 453 { 454 // In the page protections we store only the three user protections, 455 // so we use 4 bits per page. 456 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 457 area->page_protections = (uint8*)malloc_etc(bytes, 458 HEAP_DONT_LOCK_KERNEL_SPACE); 459 if (area->page_protections == NULL) 460 return B_NO_MEMORY; 461 462 // init the page protections for all pages to that of the area 463 uint32 areaProtection = area->protection 464 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 465 memset(area->page_protections, areaProtection | (areaProtection << 4), 466 bytes); 467 return B_OK; 468 } 469 470 471 static inline void 472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 473 { 474 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 475 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 476 uint8& entry = area->page_protections[pageIndex / 2]; 477 if (pageIndex % 2 == 0) 478 entry = (entry & 0xf0) | protection; 479 else 480 entry = (entry & 0x0f) | (protection << 4); 481 } 482 483 484 static inline uint32 485 get_area_page_protection(VMArea* area, addr_t pageAddress) 486 { 487 if (area->page_protections == NULL) 488 return area->protection; 489 490 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 491 uint32 protection = area->page_protections[pageIndex / 2]; 492 if (pageIndex % 2 == 0) 493 protection &= 0x0f; 494 else 495 protection >>= 4; 496 497 // If this is a kernel area we translate the user flags to kernel flags. 498 if (area->address_space == VMAddressSpace::Kernel()) { 499 uint32 kernelProtection = 0; 500 if ((protection & B_READ_AREA) != 0) 501 kernelProtection |= B_KERNEL_READ_AREA; 502 if ((protection & B_WRITE_AREA) != 0) 503 kernelProtection |= B_KERNEL_WRITE_AREA; 504 505 return kernelProtection; 506 } 507 508 return protection | B_KERNEL_READ_AREA 509 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 510 } 511 512 513 /*! The caller must have reserved enough pages the translation map 514 implementation might need to map this page. 515 The page's cache must be locked. 516 */ 517 static status_t 518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 519 vm_page_reservation* reservation) 520 { 521 VMTranslationMap* map = area->address_space->TranslationMap(); 522 523 bool wasMapped = page->IsMapped(); 524 525 if (area->wiring == B_NO_LOCK) { 526 DEBUG_PAGE_ACCESS_CHECK(page); 527 528 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 529 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 530 gPageMappingsObjectCache, 531 CACHE_DONT_WAIT_FOR_MEMORY 532 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 533 if (mapping == NULL) 534 return B_NO_MEMORY; 535 536 mapping->page = page; 537 mapping->area = area; 538 539 map->Lock(); 540 541 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 542 area->MemoryType(), reservation); 543 544 // insert mapping into lists 545 if (!page->IsMapped()) 546 atomic_add(&gMappedPagesCount, 1); 547 548 page->mappings.Add(mapping); 549 area->mappings.Add(mapping); 550 551 map->Unlock(); 552 } else { 553 DEBUG_PAGE_ACCESS_CHECK(page); 554 555 map->Lock(); 556 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 557 area->MemoryType(), reservation); 558 map->Unlock(); 559 560 increment_page_wired_count(page); 561 } 562 563 if (!wasMapped) { 564 // The page is mapped now, so we must not remain in the cached queue. 565 // It also makes sense to move it from the inactive to the active, since 566 // otherwise the page daemon wouldn't come to keep track of it (in idle 567 // mode) -- if the page isn't touched, it will be deactivated after a 568 // full iteration through the queue at the latest. 569 if (page->State() == PAGE_STATE_CACHED 570 || page->State() == PAGE_STATE_INACTIVE) { 571 vm_page_set_state(page, PAGE_STATE_ACTIVE); 572 } 573 } 574 575 return B_OK; 576 } 577 578 579 /*! If \a preserveModified is \c true, the caller must hold the lock of the 580 page's cache. 581 */ 582 static inline bool 583 unmap_page(VMArea* area, addr_t virtualAddress) 584 { 585 return area->address_space->TranslationMap()->UnmapPage(area, 586 virtualAddress, true); 587 } 588 589 590 /*! If \a preserveModified is \c true, the caller must hold the lock of all 591 mapped pages' caches. 592 */ 593 static inline void 594 unmap_pages(VMArea* area, addr_t base, size_t size) 595 { 596 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 597 } 598 599 600 /*! Cuts a piece out of an area. If the given cut range covers the complete 601 area, it is deleted. If it covers the beginning or the end, the area is 602 resized accordingly. If the range covers some part in the middle of the 603 area, it is split in two; in this case the second area is returned via 604 \a _secondArea (the variable is left untouched in the other cases). 605 The address space must be write locked. 606 The caller must ensure that no part of the given range is wired. 607 */ 608 static status_t 609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 610 addr_t lastAddress, VMArea** _secondArea, bool kernel) 611 { 612 // Does the cut range intersect with the area at all? 613 addr_t areaLast = area->Base() + (area->Size() - 1); 614 if (area->Base() > lastAddress || areaLast < address) 615 return B_OK; 616 617 // Is the area fully covered? 618 if (area->Base() >= address && areaLast <= lastAddress) { 619 delete_area(addressSpace, area, false); 620 return B_OK; 621 } 622 623 int priority; 624 uint32 allocationFlags; 625 if (addressSpace == VMAddressSpace::Kernel()) { 626 priority = VM_PRIORITY_SYSTEM; 627 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 628 | HEAP_DONT_LOCK_KERNEL_SPACE; 629 } else { 630 priority = VM_PRIORITY_USER; 631 allocationFlags = 0; 632 } 633 634 VMCache* cache = vm_area_get_locked_cache(area); 635 VMCacheChainLocker cacheChainLocker(cache); 636 cacheChainLocker.LockAllSourceCaches(); 637 638 // Cut the end only? 639 if (areaLast <= lastAddress) { 640 size_t oldSize = area->Size(); 641 size_t newSize = address - area->Base(); 642 643 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 644 allocationFlags); 645 if (error != B_OK) 646 return error; 647 648 // unmap pages 649 unmap_pages(area, address, oldSize - newSize); 650 651 // If no one else uses the area's cache, we can resize it, too. 652 if (cache->areas == area && area->cache_next == NULL 653 && cache->consumers.IsEmpty() 654 && cache->type == CACHE_TYPE_RAM) { 655 // Since VMCache::Resize() can temporarily drop the lock, we must 656 // unlock all lower caches to prevent locking order inversion. 657 cacheChainLocker.Unlock(cache); 658 cache->Resize(cache->virtual_base + newSize, priority); 659 cache->ReleaseRefAndUnlock(); 660 } 661 662 return B_OK; 663 } 664 665 // Cut the beginning only? 666 if (area->Base() >= address) { 667 addr_t oldBase = area->Base(); 668 addr_t newBase = lastAddress + 1; 669 size_t newSize = areaLast - lastAddress; 670 671 // unmap pages 672 unmap_pages(area, oldBase, newBase - oldBase); 673 674 // resize the area 675 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 676 allocationFlags); 677 if (error != B_OK) 678 return error; 679 680 // TODO: If no one else uses the area's cache, we should resize it, too! 681 682 area->cache_offset += newBase - oldBase; 683 684 return B_OK; 685 } 686 687 // The tough part -- cut a piece out of the middle of the area. 688 // We do that by shrinking the area to the begin section and creating a 689 // new area for the end section. 690 691 addr_t firstNewSize = address - area->Base(); 692 addr_t secondBase = lastAddress + 1; 693 addr_t secondSize = areaLast - lastAddress; 694 695 // unmap pages 696 unmap_pages(area, address, area->Size() - firstNewSize); 697 698 // resize the area 699 addr_t oldSize = area->Size(); 700 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 701 allocationFlags); 702 if (error != B_OK) 703 return error; 704 705 // TODO: If no one else uses the area's cache, we might want to create a 706 // new cache for the second area, transfer the concerned pages from the 707 // first cache to it and resize the first cache. 708 709 // map the second area 710 virtual_address_restrictions addressRestrictions = {}; 711 addressRestrictions.address = (void*)secondBase; 712 addressRestrictions.address_specification = B_EXACT_ADDRESS; 713 VMArea* secondArea; 714 error = map_backing_store(addressSpace, cache, 715 area->cache_offset + (secondBase - area->Base()), area->name, 716 secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0, 717 &addressRestrictions, kernel, &secondArea, NULL); 718 if (error != B_OK) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return error; 721 } 722 723 // We need a cache reference for the new area. 724 cache->AcquireRefLocked(); 725 726 if (_secondArea != NULL) 727 *_secondArea = secondArea; 728 729 return B_OK; 730 } 731 732 733 /*! Deletes all areas in the given address range. 734 The address space must be write-locked. 735 The caller must ensure that no part of the given range is wired. 736 */ 737 static status_t 738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 739 bool kernel) 740 { 741 size = PAGE_ALIGN(size); 742 addr_t lastAddress = address + (size - 1); 743 744 // Check, whether the caller is allowed to modify the concerned areas. 745 if (!kernel) { 746 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 747 VMArea* area = it.Next();) { 748 addr_t areaLast = area->Base() + (area->Size() - 1); 749 if (area->Base() < lastAddress && address < areaLast) { 750 if ((area->protection & B_KERNEL_AREA) != 0) 751 return B_NOT_ALLOWED; 752 } 753 } 754 } 755 756 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 757 VMArea* area = it.Next();) { 758 addr_t areaLast = area->Base() + (area->Size() - 1); 759 if (area->Base() < lastAddress && address < areaLast) { 760 status_t error = cut_area(addressSpace, area, address, 761 lastAddress, NULL, kernel); 762 if (error != B_OK) 763 return error; 764 // Failing after already messing with areas is ugly, but we 765 // can't do anything about it. 766 } 767 } 768 769 return B_OK; 770 } 771 772 773 /*! You need to hold the lock of the cache and the write lock of the address 774 space when calling this function. 775 Note, that in case of error your cache will be temporarily unlocked. 776 If \a addressSpec is \c B_EXACT_ADDRESS and the 777 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 778 that no part of the specified address range (base \c *_virtualAddress, size 779 \a size) is wired. 780 */ 781 static status_t 782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 783 const char* areaName, addr_t size, int wiring, int protection, int mapping, 784 uint32 flags, const virtual_address_restrictions* addressRestrictions, 785 bool kernel, VMArea** _area, void** _virtualAddress) 786 { 787 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 788 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 789 ", protection %d, area %p, areaName '%s'\n", addressSpace, cache, 790 addressRestrictions->address, offset, size, 791 addressRestrictions->address_specification, wiring, protection, 792 _area, areaName)); 793 cache->AssertLocked(); 794 795 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 796 | HEAP_DONT_LOCK_KERNEL_SPACE; 797 int priority; 798 if (addressSpace != VMAddressSpace::Kernel()) { 799 priority = VM_PRIORITY_USER; 800 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 801 priority = VM_PRIORITY_VIP; 802 allocationFlags |= HEAP_PRIORITY_VIP; 803 } else 804 priority = VM_PRIORITY_SYSTEM; 805 806 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 807 allocationFlags); 808 if (area == NULL) 809 return B_NO_MEMORY; 810 811 status_t status; 812 813 // if this is a private map, we need to create a new cache 814 // to handle the private copies of pages as they are written to 815 VMCache* sourceCache = cache; 816 if (mapping == REGION_PRIVATE_MAP) { 817 VMCache* newCache; 818 819 // create an anonymous cache 820 bool isStack = (protection & B_STACK_AREA) != 0; 821 status = VMCacheFactory::CreateAnonymousCache(newCache, 822 isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 823 isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER); 824 if (status != B_OK) 825 goto err1; 826 827 newCache->Lock(); 828 newCache->temporary = 1; 829 newCache->virtual_base = offset; 830 newCache->virtual_end = offset + size; 831 832 cache->AddConsumer(newCache); 833 834 cache = newCache; 835 } 836 837 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 838 status = cache->SetMinimalCommitment(size, priority); 839 if (status != B_OK) 840 goto err2; 841 } 842 843 // check to see if this address space has entered DELETE state 844 if (addressSpace->IsBeingDeleted()) { 845 // okay, someone is trying to delete this address space now, so we can't 846 // insert the area, so back out 847 status = B_BAD_TEAM_ID; 848 goto err2; 849 } 850 851 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 852 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 853 status = unmap_address_range(addressSpace, 854 (addr_t)addressRestrictions->address, size, kernel); 855 if (status != B_OK) 856 goto err2; 857 } 858 859 status = addressSpace->InsertArea(area, size, addressRestrictions, 860 allocationFlags, _virtualAddress); 861 if (status != B_OK) { 862 // TODO: wait and try again once this is working in the backend 863 #if 0 864 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 865 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 866 0, 0); 867 } 868 #endif 869 goto err2; 870 } 871 872 // attach the cache to the area 873 area->cache = cache; 874 area->cache_offset = offset; 875 876 // point the cache back to the area 877 cache->InsertAreaLocked(area); 878 if (mapping == REGION_PRIVATE_MAP) 879 cache->Unlock(); 880 881 // insert the area in the global area hash table 882 VMAreaHash::Insert(area); 883 884 // grab a ref to the address space (the area holds this) 885 addressSpace->Get(); 886 887 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 888 // cache, sourceCache, areaName, area); 889 890 *_area = area; 891 return B_OK; 892 893 err2: 894 if (mapping == REGION_PRIVATE_MAP) { 895 // We created this cache, so we must delete it again. Note, that we 896 // need to temporarily unlock the source cache or we'll otherwise 897 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 898 sourceCache->Unlock(); 899 cache->ReleaseRefAndUnlock(); 900 sourceCache->Lock(); 901 } 902 err1: 903 addressSpace->DeleteArea(area, allocationFlags); 904 return status; 905 } 906 907 908 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 909 locker1, locker2). 910 */ 911 template<typename LockerType1, typename LockerType2> 912 static inline bool 913 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 914 { 915 area->cache->AssertLocked(); 916 917 VMAreaUnwiredWaiter waiter; 918 if (!area->AddWaiterIfWired(&waiter)) 919 return false; 920 921 // unlock everything and wait 922 if (locker1 != NULL) 923 locker1->Unlock(); 924 if (locker2 != NULL) 925 locker2->Unlock(); 926 927 waiter.waitEntry.Wait(); 928 929 return true; 930 } 931 932 933 /*! Checks whether the given area has any wired ranges intersecting with the 934 specified range and waits, if so. 935 936 When it has to wait, the function calls \c Unlock() on both \a locker1 937 and \a locker2, if given. 938 The area's top cache must be locked and must be unlocked as a side effect 939 of calling \c Unlock() on either \a locker1 or \a locker2. 940 941 If the function does not have to wait it does not modify or unlock any 942 object. 943 944 \param area The area to be checked. 945 \param base The base address of the range to check. 946 \param size The size of the address range to check. 947 \param locker1 An object to be unlocked when before starting to wait (may 948 be \c NULL). 949 \param locker2 An object to be unlocked when before starting to wait (may 950 be \c NULL). 951 \return \c true, if the function had to wait, \c false otherwise. 952 */ 953 template<typename LockerType1, typename LockerType2> 954 static inline bool 955 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 956 LockerType1* locker1, LockerType2* locker2) 957 { 958 area->cache->AssertLocked(); 959 960 VMAreaUnwiredWaiter waiter; 961 if (!area->AddWaiterIfWired(&waiter, base, size)) 962 return false; 963 964 // unlock everything and wait 965 if (locker1 != NULL) 966 locker1->Unlock(); 967 if (locker2 != NULL) 968 locker2->Unlock(); 969 970 waiter.waitEntry.Wait(); 971 972 return true; 973 } 974 975 976 /*! Checks whether the given address space has any wired ranges intersecting 977 with the specified range and waits, if so. 978 979 Similar to wait_if_area_range_is_wired(), with the following differences: 980 - All areas intersecting with the range are checked (respectively all until 981 one is found that contains a wired range intersecting with the given 982 range). 983 - The given address space must at least be read-locked and must be unlocked 984 when \c Unlock() is called on \a locker. 985 - None of the areas' caches are allowed to be locked. 986 */ 987 template<typename LockerType> 988 static inline bool 989 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 990 size_t size, LockerType* locker) 991 { 992 addr_t end = base + size - 1; 993 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 994 VMArea* area = it.Next();) { 995 // TODO: Introduce a VMAddressSpace method to get a close iterator! 996 if (area->Base() > end) 997 return false; 998 999 if (base >= area->Base() + area->Size() - 1) 1000 continue; 1001 1002 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1003 1004 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1005 return true; 1006 } 1007 1008 return false; 1009 } 1010 1011 1012 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1013 It must be called in a situation where the kernel address space may be 1014 locked. 1015 */ 1016 status_t 1017 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1018 { 1019 AddressSpaceReadLocker locker; 1020 VMArea* area; 1021 status_t status = locker.SetFromArea(id, area); 1022 if (status != B_OK) 1023 return status; 1024 1025 if (area->page_protections == NULL) { 1026 status = allocate_area_page_protections(area); 1027 if (status != B_OK) 1028 return status; 1029 } 1030 1031 *cookie = (void*)area; 1032 return B_OK; 1033 } 1034 1035 1036 /*! This is a debug helper function that can only be used with very specific 1037 use cases. 1038 Sets protection for the given address range to the protection specified. 1039 If \a protection is 0 then the involved pages will be marked non-present 1040 in the translation map to cause a fault on access. The pages aren't 1041 actually unmapped however so that they can be marked present again with 1042 additional calls to this function. For this to work the area must be 1043 fully locked in memory so that the pages aren't otherwise touched. 1044 This function does not lock the kernel address space and needs to be 1045 supplied with a \a cookie retrieved from a successful call to 1046 vm_prepare_kernel_area_debug_protection(). 1047 */ 1048 status_t 1049 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1050 uint32 protection) 1051 { 1052 // check address range 1053 addr_t address = (addr_t)_address; 1054 size = PAGE_ALIGN(size); 1055 1056 if ((address % B_PAGE_SIZE) != 0 1057 || (addr_t)address + size < (addr_t)address 1058 || !IS_KERNEL_ADDRESS(address) 1059 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1060 return B_BAD_VALUE; 1061 } 1062 1063 // Translate the kernel protection to user protection as we only store that. 1064 if ((protection & B_KERNEL_READ_AREA) != 0) 1065 protection |= B_READ_AREA; 1066 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1067 protection |= B_WRITE_AREA; 1068 1069 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1070 VMTranslationMap* map = addressSpace->TranslationMap(); 1071 VMArea* area = (VMArea*)cookie; 1072 1073 addr_t offset = address - area->Base(); 1074 if (area->Size() - offset < size) { 1075 panic("protect range not fully within supplied area"); 1076 return B_BAD_VALUE; 1077 } 1078 1079 if (area->page_protections == NULL) { 1080 panic("area has no page protections"); 1081 return B_BAD_VALUE; 1082 } 1083 1084 // Invalidate the mapping entries so any access to them will fault or 1085 // restore the mapping entries unchanged so that lookup will success again. 1086 map->Lock(); 1087 map->DebugMarkRangePresent(address, address + size, protection != 0); 1088 map->Unlock(); 1089 1090 // And set the proper page protections so that the fault case will actually 1091 // fail and not simply try to map a new page. 1092 for (addr_t pageAddress = address; pageAddress < address + size; 1093 pageAddress += B_PAGE_SIZE) { 1094 set_area_page_protection(area, pageAddress, protection); 1095 } 1096 1097 return B_OK; 1098 } 1099 1100 1101 status_t 1102 vm_block_address_range(const char* name, void* address, addr_t size) 1103 { 1104 if (!arch_vm_supports_protection(0)) 1105 return B_NOT_SUPPORTED; 1106 1107 AddressSpaceWriteLocker locker; 1108 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1109 if (status != B_OK) 1110 return status; 1111 1112 VMAddressSpace* addressSpace = locker.AddressSpace(); 1113 1114 // create an anonymous cache 1115 VMCache* cache; 1116 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1117 VM_PRIORITY_SYSTEM); 1118 if (status != B_OK) 1119 return status; 1120 1121 cache->temporary = 1; 1122 cache->virtual_end = size; 1123 cache->Lock(); 1124 1125 VMArea* area; 1126 virtual_address_restrictions addressRestrictions = {}; 1127 addressRestrictions.address = address; 1128 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1129 status = map_backing_store(addressSpace, cache, 0, name, size, 1130 B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0, 1131 &addressRestrictions, true, &area, NULL); 1132 if (status != B_OK) { 1133 cache->ReleaseRefAndUnlock(); 1134 return status; 1135 } 1136 1137 cache->Unlock(); 1138 area->cache_type = CACHE_TYPE_RAM; 1139 return area->id; 1140 } 1141 1142 1143 status_t 1144 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1145 { 1146 AddressSpaceWriteLocker locker(team); 1147 if (!locker.IsLocked()) 1148 return B_BAD_TEAM_ID; 1149 1150 VMAddressSpace* addressSpace = locker.AddressSpace(); 1151 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1152 addressSpace == VMAddressSpace::Kernel() 1153 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1154 } 1155 1156 1157 status_t 1158 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1159 addr_t size, uint32 flags) 1160 { 1161 if (size == 0) 1162 return B_BAD_VALUE; 1163 1164 AddressSpaceWriteLocker locker(team); 1165 if (!locker.IsLocked()) 1166 return B_BAD_TEAM_ID; 1167 1168 virtual_address_restrictions addressRestrictions = {}; 1169 addressRestrictions.address = *_address; 1170 addressRestrictions.address_specification = addressSpec; 1171 VMAddressSpace* addressSpace = locker.AddressSpace(); 1172 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1173 addressSpace == VMAddressSpace::Kernel() 1174 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1175 _address); 1176 } 1177 1178 1179 area_id 1180 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1181 uint32 wiring, uint32 protection, uint32 flags, 1182 const virtual_address_restrictions* virtualAddressRestrictions, 1183 const physical_address_restrictions* physicalAddressRestrictions, 1184 bool kernel, void** _address) 1185 { 1186 VMArea* area; 1187 VMCache* cache; 1188 vm_page* page = NULL; 1189 bool isStack = (protection & B_STACK_AREA) != 0; 1190 page_num_t guardPages; 1191 bool canOvercommit = false; 1192 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1193 ? VM_PAGE_ALLOC_CLEAR : 0; 1194 1195 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1196 team, name, size)); 1197 1198 size = PAGE_ALIGN(size); 1199 1200 if (size == 0) 1201 return B_BAD_VALUE; 1202 if (!arch_vm_supports_protection(protection)) 1203 return B_NOT_SUPPORTED; 1204 1205 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1206 canOvercommit = true; 1207 1208 #ifdef DEBUG_KERNEL_STACKS 1209 if ((protection & B_KERNEL_STACK_AREA) != 0) 1210 isStack = true; 1211 #endif 1212 1213 // check parameters 1214 switch (virtualAddressRestrictions->address_specification) { 1215 case B_ANY_ADDRESS: 1216 case B_EXACT_ADDRESS: 1217 case B_BASE_ADDRESS: 1218 case B_ANY_KERNEL_ADDRESS: 1219 case B_ANY_KERNEL_BLOCK_ADDRESS: 1220 break; 1221 1222 default: 1223 return B_BAD_VALUE; 1224 } 1225 1226 // If low or high physical address restrictions are given, we force 1227 // B_CONTIGUOUS wiring, since only then we'll use 1228 // vm_page_allocate_page_run() which deals with those restrictions. 1229 if (physicalAddressRestrictions->low_address != 0 1230 || physicalAddressRestrictions->high_address != 0) { 1231 wiring = B_CONTIGUOUS; 1232 } 1233 1234 physical_address_restrictions stackPhysicalRestrictions; 1235 bool doReserveMemory = false; 1236 switch (wiring) { 1237 case B_NO_LOCK: 1238 break; 1239 case B_FULL_LOCK: 1240 case B_LAZY_LOCK: 1241 case B_CONTIGUOUS: 1242 doReserveMemory = true; 1243 break; 1244 case B_ALREADY_WIRED: 1245 break; 1246 case B_LOMEM: 1247 stackPhysicalRestrictions = *physicalAddressRestrictions; 1248 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1249 physicalAddressRestrictions = &stackPhysicalRestrictions; 1250 wiring = B_CONTIGUOUS; 1251 doReserveMemory = true; 1252 break; 1253 case B_32_BIT_FULL_LOCK: 1254 if (B_HAIKU_PHYSICAL_BITS <= 32 1255 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1256 wiring = B_FULL_LOCK; 1257 doReserveMemory = true; 1258 break; 1259 } 1260 // TODO: We don't really support this mode efficiently. Just fall 1261 // through for now ... 1262 case B_32_BIT_CONTIGUOUS: 1263 #if B_HAIKU_PHYSICAL_BITS > 32 1264 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1265 stackPhysicalRestrictions = *physicalAddressRestrictions; 1266 stackPhysicalRestrictions.high_address 1267 = (phys_addr_t)1 << 32; 1268 physicalAddressRestrictions = &stackPhysicalRestrictions; 1269 } 1270 #endif 1271 wiring = B_CONTIGUOUS; 1272 doReserveMemory = true; 1273 break; 1274 default: 1275 return B_BAD_VALUE; 1276 } 1277 1278 // Optimization: For a single-page contiguous allocation without low/high 1279 // memory restriction B_FULL_LOCK wiring suffices. 1280 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1281 && physicalAddressRestrictions->low_address == 0 1282 && physicalAddressRestrictions->high_address == 0) { 1283 wiring = B_FULL_LOCK; 1284 } 1285 1286 // For full lock or contiguous areas we're also going to map the pages and 1287 // thus need to reserve pages for the mapping backend upfront. 1288 addr_t reservedMapPages = 0; 1289 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1290 AddressSpaceWriteLocker locker; 1291 status_t status = locker.SetTo(team); 1292 if (status != B_OK) 1293 return status; 1294 1295 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1296 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1297 } 1298 1299 int priority; 1300 if (team != VMAddressSpace::KernelID()) 1301 priority = VM_PRIORITY_USER; 1302 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1303 priority = VM_PRIORITY_VIP; 1304 else 1305 priority = VM_PRIORITY_SYSTEM; 1306 1307 // Reserve memory before acquiring the address space lock. This reduces the 1308 // chances of failure, since while holding the write lock to the address 1309 // space (if it is the kernel address space that is), the low memory handler 1310 // won't be able to free anything for us. 1311 addr_t reservedMemory = 0; 1312 if (doReserveMemory) { 1313 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1314 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1315 return B_NO_MEMORY; 1316 reservedMemory = size; 1317 // TODO: We don't reserve the memory for the pages for the page 1318 // directories/tables. We actually need to do since we currently don't 1319 // reclaim them (and probably can't reclaim all of them anyway). Thus 1320 // there are actually less physical pages than there should be, which 1321 // can get the VM into trouble in low memory situations. 1322 } 1323 1324 AddressSpaceWriteLocker locker; 1325 VMAddressSpace* addressSpace; 1326 status_t status; 1327 1328 // For full lock areas reserve the pages before locking the address 1329 // space. E.g. block caches can't release their memory while we hold the 1330 // address space lock. 1331 page_num_t reservedPages = reservedMapPages; 1332 if (wiring == B_FULL_LOCK) 1333 reservedPages += size / B_PAGE_SIZE; 1334 1335 vm_page_reservation reservation; 1336 if (reservedPages > 0) { 1337 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1338 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1339 priority)) { 1340 reservedPages = 0; 1341 status = B_WOULD_BLOCK; 1342 goto err0; 1343 } 1344 } else 1345 vm_page_reserve_pages(&reservation, reservedPages, priority); 1346 } 1347 1348 if (wiring == B_CONTIGUOUS) { 1349 // we try to allocate the page run here upfront as this may easily 1350 // fail for obvious reasons 1351 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1352 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1353 if (page == NULL) { 1354 status = B_NO_MEMORY; 1355 goto err0; 1356 } 1357 } 1358 1359 // Lock the address space and, if B_EXACT_ADDRESS and 1360 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1361 // is not wired. 1362 do { 1363 status = locker.SetTo(team); 1364 if (status != B_OK) 1365 goto err1; 1366 1367 addressSpace = locker.AddressSpace(); 1368 } while (virtualAddressRestrictions->address_specification 1369 == B_EXACT_ADDRESS 1370 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1371 && wait_if_address_range_is_wired(addressSpace, 1372 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1373 1374 // create an anonymous cache 1375 // if it's a stack, make sure that two pages are available at least 1376 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1377 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1378 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1379 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1380 wiring == B_NO_LOCK, priority); 1381 if (status != B_OK) 1382 goto err1; 1383 1384 cache->temporary = 1; 1385 cache->virtual_end = size; 1386 cache->committed_size = reservedMemory; 1387 // TODO: This should be done via a method. 1388 reservedMemory = 0; 1389 1390 cache->Lock(); 1391 1392 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1393 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions, 1394 kernel, &area, _address); 1395 1396 if (status != B_OK) { 1397 cache->ReleaseRefAndUnlock(); 1398 goto err1; 1399 } 1400 1401 locker.DegradeToReadLock(); 1402 1403 switch (wiring) { 1404 case B_NO_LOCK: 1405 case B_LAZY_LOCK: 1406 // do nothing - the pages are mapped in as needed 1407 break; 1408 1409 case B_FULL_LOCK: 1410 { 1411 // Allocate and map all pages for this area 1412 1413 off_t offset = 0; 1414 for (addr_t address = area->Base(); 1415 address < area->Base() + (area->Size() - 1); 1416 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1417 #ifdef DEBUG_KERNEL_STACKS 1418 # ifdef STACK_GROWS_DOWNWARDS 1419 if (isStack && address < area->Base() 1420 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1421 # else 1422 if (isStack && address >= area->Base() + area->Size() 1423 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1424 # endif 1425 continue; 1426 #endif 1427 vm_page* page = vm_page_allocate_page(&reservation, 1428 PAGE_STATE_WIRED | pageAllocFlags); 1429 cache->InsertPage(page, offset); 1430 map_page(area, page, address, protection, &reservation); 1431 1432 DEBUG_PAGE_ACCESS_END(page); 1433 } 1434 1435 break; 1436 } 1437 1438 case B_ALREADY_WIRED: 1439 { 1440 // The pages should already be mapped. This is only really useful 1441 // during boot time. Find the appropriate vm_page objects and stick 1442 // them in the cache object. 1443 VMTranslationMap* map = addressSpace->TranslationMap(); 1444 off_t offset = 0; 1445 1446 if (!gKernelStartup) 1447 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1448 1449 map->Lock(); 1450 1451 for (addr_t virtualAddress = area->Base(); 1452 virtualAddress < area->Base() + (area->Size() - 1); 1453 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1454 phys_addr_t physicalAddress; 1455 uint32 flags; 1456 status = map->Query(virtualAddress, &physicalAddress, &flags); 1457 if (status < B_OK) { 1458 panic("looking up mapping failed for va 0x%lx\n", 1459 virtualAddress); 1460 } 1461 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1462 if (page == NULL) { 1463 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1464 "\n", physicalAddress); 1465 } 1466 1467 DEBUG_PAGE_ACCESS_START(page); 1468 1469 cache->InsertPage(page, offset); 1470 increment_page_wired_count(page); 1471 vm_page_set_state(page, PAGE_STATE_WIRED); 1472 page->busy = false; 1473 1474 DEBUG_PAGE_ACCESS_END(page); 1475 } 1476 1477 map->Unlock(); 1478 break; 1479 } 1480 1481 case B_CONTIGUOUS: 1482 { 1483 // We have already allocated our continuous pages run, so we can now 1484 // just map them in the address space 1485 VMTranslationMap* map = addressSpace->TranslationMap(); 1486 phys_addr_t physicalAddress 1487 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1488 addr_t virtualAddress = area->Base(); 1489 off_t offset = 0; 1490 1491 map->Lock(); 1492 1493 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1494 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1495 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1496 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1497 if (page == NULL) 1498 panic("couldn't lookup physical page just allocated\n"); 1499 1500 status = map->Map(virtualAddress, physicalAddress, protection, 1501 area->MemoryType(), &reservation); 1502 if (status < B_OK) 1503 panic("couldn't map physical page in page run\n"); 1504 1505 cache->InsertPage(page, offset); 1506 increment_page_wired_count(page); 1507 1508 DEBUG_PAGE_ACCESS_END(page); 1509 } 1510 1511 map->Unlock(); 1512 break; 1513 } 1514 1515 default: 1516 break; 1517 } 1518 1519 cache->Unlock(); 1520 1521 if (reservedPages > 0) 1522 vm_page_unreserve_pages(&reservation); 1523 1524 TRACE(("vm_create_anonymous_area: done\n")); 1525 1526 area->cache_type = CACHE_TYPE_RAM; 1527 return area->id; 1528 1529 err1: 1530 if (wiring == B_CONTIGUOUS) { 1531 // we had reserved the area space upfront... 1532 phys_addr_t pageNumber = page->physical_page_number; 1533 int32 i; 1534 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1535 page = vm_lookup_page(pageNumber); 1536 if (page == NULL) 1537 panic("couldn't lookup physical page just allocated\n"); 1538 1539 vm_page_set_state(page, PAGE_STATE_FREE); 1540 } 1541 } 1542 1543 err0: 1544 if (reservedPages > 0) 1545 vm_page_unreserve_pages(&reservation); 1546 if (reservedMemory > 0) 1547 vm_unreserve_memory(reservedMemory); 1548 1549 return status; 1550 } 1551 1552 1553 area_id 1554 vm_map_physical_memory(team_id team, const char* name, void** _address, 1555 uint32 addressSpec, addr_t size, uint32 protection, 1556 phys_addr_t physicalAddress, bool alreadyWired) 1557 { 1558 VMArea* area; 1559 VMCache* cache; 1560 addr_t mapOffset; 1561 1562 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1563 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1564 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1565 addressSpec, size, protection, physicalAddress)); 1566 1567 if (!arch_vm_supports_protection(protection)) 1568 return B_NOT_SUPPORTED; 1569 1570 AddressSpaceWriteLocker locker(team); 1571 if (!locker.IsLocked()) 1572 return B_BAD_TEAM_ID; 1573 1574 // if the physical address is somewhat inside a page, 1575 // move the actual area down to align on a page boundary 1576 mapOffset = physicalAddress % B_PAGE_SIZE; 1577 size += mapOffset; 1578 physicalAddress -= mapOffset; 1579 1580 size = PAGE_ALIGN(size); 1581 1582 // create a device cache 1583 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1584 if (status != B_OK) 1585 return status; 1586 1587 cache->virtual_end = size; 1588 1589 cache->Lock(); 1590 1591 virtual_address_restrictions addressRestrictions = {}; 1592 addressRestrictions.address = *_address; 1593 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1594 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1595 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1596 true, &area, _address); 1597 1598 if (status < B_OK) 1599 cache->ReleaseRefLocked(); 1600 1601 cache->Unlock(); 1602 1603 if (status == B_OK) { 1604 // set requested memory type -- use uncached, if not given 1605 uint32 memoryType = addressSpec & B_MTR_MASK; 1606 if (memoryType == 0) 1607 memoryType = B_MTR_UC; 1608 1609 area->SetMemoryType(memoryType); 1610 1611 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1612 if (status != B_OK) 1613 delete_area(locker.AddressSpace(), area, false); 1614 } 1615 1616 if (status != B_OK) 1617 return status; 1618 1619 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1620 1621 if (alreadyWired) { 1622 // The area is already mapped, but possibly not with the right 1623 // memory type. 1624 map->Lock(); 1625 map->ProtectArea(area, area->protection); 1626 map->Unlock(); 1627 } else { 1628 // Map the area completely. 1629 1630 // reserve pages needed for the mapping 1631 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1632 area->Base() + (size - 1)); 1633 vm_page_reservation reservation; 1634 vm_page_reserve_pages(&reservation, reservePages, 1635 team == VMAddressSpace::KernelID() 1636 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1637 1638 map->Lock(); 1639 1640 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1641 map->Map(area->Base() + offset, physicalAddress + offset, 1642 protection, area->MemoryType(), &reservation); 1643 } 1644 1645 map->Unlock(); 1646 1647 vm_page_unreserve_pages(&reservation); 1648 } 1649 1650 // modify the pointer returned to be offset back into the new area 1651 // the same way the physical address in was offset 1652 *_address = (void*)((addr_t)*_address + mapOffset); 1653 1654 area->cache_type = CACHE_TYPE_DEVICE; 1655 return area->id; 1656 } 1657 1658 1659 /*! Don't use! 1660 TODO: This function was introduced to map physical page vecs to 1661 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1662 use a device cache and does not track vm_page::wired_count! 1663 */ 1664 area_id 1665 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1666 uint32 addressSpec, addr_t* _size, uint32 protection, 1667 struct generic_io_vec* vecs, uint32 vecCount) 1668 { 1669 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1670 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1671 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1672 addressSpec, _size, protection, vecs, vecCount)); 1673 1674 if (!arch_vm_supports_protection(protection) 1675 || (addressSpec & B_MTR_MASK) != 0) { 1676 return B_NOT_SUPPORTED; 1677 } 1678 1679 AddressSpaceWriteLocker locker(team); 1680 if (!locker.IsLocked()) 1681 return B_BAD_TEAM_ID; 1682 1683 if (vecCount == 0) 1684 return B_BAD_VALUE; 1685 1686 addr_t size = 0; 1687 for (uint32 i = 0; i < vecCount; i++) { 1688 if (vecs[i].base % B_PAGE_SIZE != 0 1689 || vecs[i].length % B_PAGE_SIZE != 0) { 1690 return B_BAD_VALUE; 1691 } 1692 1693 size += vecs[i].length; 1694 } 1695 1696 // create a device cache 1697 VMCache* cache; 1698 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1699 if (result != B_OK) 1700 return result; 1701 1702 cache->virtual_end = size; 1703 1704 cache->Lock(); 1705 1706 VMArea* area; 1707 virtual_address_restrictions addressRestrictions = {}; 1708 addressRestrictions.address = *_address; 1709 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1710 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1711 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, 1712 &addressRestrictions, true, &area, _address); 1713 1714 if (result != B_OK) 1715 cache->ReleaseRefLocked(); 1716 1717 cache->Unlock(); 1718 1719 if (result != B_OK) 1720 return result; 1721 1722 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1723 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1724 area->Base() + (size - 1)); 1725 1726 vm_page_reservation reservation; 1727 vm_page_reserve_pages(&reservation, reservePages, 1728 team == VMAddressSpace::KernelID() 1729 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1730 map->Lock(); 1731 1732 uint32 vecIndex = 0; 1733 size_t vecOffset = 0; 1734 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1735 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1736 vecOffset = 0; 1737 vecIndex++; 1738 } 1739 1740 if (vecIndex >= vecCount) 1741 break; 1742 1743 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1744 protection, area->MemoryType(), &reservation); 1745 1746 vecOffset += B_PAGE_SIZE; 1747 } 1748 1749 map->Unlock(); 1750 vm_page_unreserve_pages(&reservation); 1751 1752 if (_size != NULL) 1753 *_size = size; 1754 1755 area->cache_type = CACHE_TYPE_DEVICE; 1756 return area->id; 1757 } 1758 1759 1760 area_id 1761 vm_create_null_area(team_id team, const char* name, void** address, 1762 uint32 addressSpec, addr_t size, uint32 flags) 1763 { 1764 size = PAGE_ALIGN(size); 1765 1766 // Lock the address space and, if B_EXACT_ADDRESS and 1767 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1768 // is not wired. 1769 AddressSpaceWriteLocker locker; 1770 do { 1771 if (locker.SetTo(team) != B_OK) 1772 return B_BAD_TEAM_ID; 1773 } while (addressSpec == B_EXACT_ADDRESS 1774 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1775 && wait_if_address_range_is_wired(locker.AddressSpace(), 1776 (addr_t)*address, size, &locker)); 1777 1778 // create a null cache 1779 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1780 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1781 VMCache* cache; 1782 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1783 if (status != B_OK) 1784 return status; 1785 1786 cache->temporary = 1; 1787 cache->virtual_end = size; 1788 1789 cache->Lock(); 1790 1791 VMArea* area; 1792 virtual_address_restrictions addressRestrictions = {}; 1793 addressRestrictions.address = *address; 1794 addressRestrictions.address_specification = addressSpec; 1795 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1796 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags, 1797 &addressRestrictions, true, &area, address); 1798 1799 if (status < B_OK) { 1800 cache->ReleaseRefAndUnlock(); 1801 return status; 1802 } 1803 1804 cache->Unlock(); 1805 1806 area->cache_type = CACHE_TYPE_NULL; 1807 return area->id; 1808 } 1809 1810 1811 /*! Creates the vnode cache for the specified \a vnode. 1812 The vnode has to be marked busy when calling this function. 1813 */ 1814 status_t 1815 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1816 { 1817 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1818 } 1819 1820 1821 /*! \a cache must be locked. The area's address space must be read-locked. 1822 */ 1823 static void 1824 pre_map_area_pages(VMArea* area, VMCache* cache, 1825 vm_page_reservation* reservation) 1826 { 1827 addr_t baseAddress = area->Base(); 1828 addr_t cacheOffset = area->cache_offset; 1829 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1830 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1831 1832 for (VMCachePagesTree::Iterator it 1833 = cache->pages.GetIterator(firstPage, true, true); 1834 vm_page* page = it.Next();) { 1835 if (page->cache_offset >= endPage) 1836 break; 1837 1838 // skip busy and inactive pages 1839 if (page->busy || page->usage_count == 0) 1840 continue; 1841 1842 DEBUG_PAGE_ACCESS_START(page); 1843 map_page(area, page, 1844 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1845 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1846 DEBUG_PAGE_ACCESS_END(page); 1847 } 1848 } 1849 1850 1851 /*! Will map the file specified by \a fd to an area in memory. 1852 The file will be mirrored beginning at the specified \a offset. The 1853 \a offset and \a size arguments have to be page aligned. 1854 */ 1855 static area_id 1856 _vm_map_file(team_id team, const char* name, void** _address, 1857 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1858 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1859 { 1860 // TODO: for binary files, we want to make sure that they get the 1861 // copy of a file at a given time, ie. later changes should not 1862 // make it into the mapped copy -- this will need quite some changes 1863 // to be done in a nice way 1864 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 1865 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 1866 1867 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1868 size = PAGE_ALIGN(size); 1869 1870 if (mapping == REGION_NO_PRIVATE_MAP) 1871 protection |= B_SHARED_AREA; 1872 if (addressSpec != B_EXACT_ADDRESS) 1873 unmapAddressRange = false; 1874 1875 if (fd < 0) { 1876 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1877 virtual_address_restrictions virtualRestrictions = {}; 1878 virtualRestrictions.address = *_address; 1879 virtualRestrictions.address_specification = addressSpec; 1880 physical_address_restrictions physicalRestrictions = {}; 1881 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 1882 flags, &virtualRestrictions, &physicalRestrictions, kernel, 1883 _address); 1884 } 1885 1886 // get the open flags of the FD 1887 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1888 if (descriptor == NULL) 1889 return EBADF; 1890 int32 openMode = descriptor->open_mode; 1891 put_fd(descriptor); 1892 1893 // The FD must open for reading at any rate. For shared mapping with write 1894 // access, additionally the FD must be open for writing. 1895 if ((openMode & O_ACCMODE) == O_WRONLY 1896 || (mapping == REGION_NO_PRIVATE_MAP 1897 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1898 && (openMode & O_ACCMODE) == O_RDONLY)) { 1899 return EACCES; 1900 } 1901 1902 // get the vnode for the object, this also grabs a ref to it 1903 struct vnode* vnode = NULL; 1904 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1905 if (status < B_OK) 1906 return status; 1907 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1908 1909 // If we're going to pre-map pages, we need to reserve the pages needed by 1910 // the mapping backend upfront. 1911 page_num_t reservedPreMapPages = 0; 1912 vm_page_reservation reservation; 1913 if ((protection & B_READ_AREA) != 0) { 1914 AddressSpaceWriteLocker locker; 1915 status = locker.SetTo(team); 1916 if (status != B_OK) 1917 return status; 1918 1919 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1920 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1921 1922 locker.Unlock(); 1923 1924 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1925 team == VMAddressSpace::KernelID() 1926 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1927 } 1928 1929 struct PageUnreserver { 1930 PageUnreserver(vm_page_reservation* reservation) 1931 : 1932 fReservation(reservation) 1933 { 1934 } 1935 1936 ~PageUnreserver() 1937 { 1938 if (fReservation != NULL) 1939 vm_page_unreserve_pages(fReservation); 1940 } 1941 1942 vm_page_reservation* fReservation; 1943 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1944 1945 // Lock the address space and, if the specified address range shall be 1946 // unmapped, ensure it is not wired. 1947 AddressSpaceWriteLocker locker; 1948 do { 1949 if (locker.SetTo(team) != B_OK) 1950 return B_BAD_TEAM_ID; 1951 } while (unmapAddressRange 1952 && wait_if_address_range_is_wired(locker.AddressSpace(), 1953 (addr_t)*_address, size, &locker)); 1954 1955 // TODO: this only works for file systems that use the file cache 1956 VMCache* cache; 1957 status = vfs_get_vnode_cache(vnode, &cache, false); 1958 if (status < B_OK) 1959 return status; 1960 1961 cache->Lock(); 1962 1963 VMArea* area; 1964 virtual_address_restrictions addressRestrictions = {}; 1965 addressRestrictions.address = *_address; 1966 addressRestrictions.address_specification = addressSpec; 1967 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 1968 0, protection, mapping, 1969 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 1970 &addressRestrictions, kernel, &area, _address); 1971 1972 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1973 // map_backing_store() cannot know we no longer need the ref 1974 cache->ReleaseRefLocked(); 1975 } 1976 1977 if (status == B_OK && (protection & B_READ_AREA) != 0) 1978 pre_map_area_pages(area, cache, &reservation); 1979 1980 cache->Unlock(); 1981 1982 if (status == B_OK) { 1983 // TODO: this probably deserves a smarter solution, ie. don't always 1984 // prefetch stuff, and also, probably don't trigger it at this place. 1985 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1986 // prefetches at max 10 MB starting from "offset" 1987 } 1988 1989 if (status != B_OK) 1990 return status; 1991 1992 area->cache_type = CACHE_TYPE_VNODE; 1993 return area->id; 1994 } 1995 1996 1997 area_id 1998 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 1999 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2000 int fd, off_t offset) 2001 { 2002 if (!arch_vm_supports_protection(protection)) 2003 return B_NOT_SUPPORTED; 2004 2005 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2006 mapping, unmapAddressRange, fd, offset, true); 2007 } 2008 2009 2010 VMCache* 2011 vm_area_get_locked_cache(VMArea* area) 2012 { 2013 rw_lock_read_lock(&sAreaCacheLock); 2014 2015 while (true) { 2016 VMCache* cache = area->cache; 2017 2018 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2019 // cache has been deleted 2020 rw_lock_read_lock(&sAreaCacheLock); 2021 continue; 2022 } 2023 2024 rw_lock_read_lock(&sAreaCacheLock); 2025 2026 if (cache == area->cache) { 2027 cache->AcquireRefLocked(); 2028 rw_lock_read_unlock(&sAreaCacheLock); 2029 return cache; 2030 } 2031 2032 // the cache changed in the meantime 2033 cache->Unlock(); 2034 } 2035 } 2036 2037 2038 void 2039 vm_area_put_locked_cache(VMCache* cache) 2040 { 2041 cache->ReleaseRefAndUnlock(); 2042 } 2043 2044 2045 area_id 2046 vm_clone_area(team_id team, const char* name, void** address, 2047 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2048 bool kernel) 2049 { 2050 VMArea* newArea = NULL; 2051 VMArea* sourceArea; 2052 2053 // Check whether the source area exists and is cloneable. If so, mark it 2054 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2055 { 2056 AddressSpaceWriteLocker locker; 2057 status_t status = locker.SetFromArea(sourceID, sourceArea); 2058 if (status != B_OK) 2059 return status; 2060 2061 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2062 return B_NOT_ALLOWED; 2063 2064 sourceArea->protection |= B_SHARED_AREA; 2065 protection |= B_SHARED_AREA; 2066 } 2067 2068 // Now lock both address spaces and actually do the cloning. 2069 2070 MultiAddressSpaceLocker locker; 2071 VMAddressSpace* sourceAddressSpace; 2072 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2073 if (status != B_OK) 2074 return status; 2075 2076 VMAddressSpace* targetAddressSpace; 2077 status = locker.AddTeam(team, true, &targetAddressSpace); 2078 if (status != B_OK) 2079 return status; 2080 2081 status = locker.Lock(); 2082 if (status != B_OK) 2083 return status; 2084 2085 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2086 if (sourceArea == NULL) 2087 return B_BAD_VALUE; 2088 2089 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2090 return B_NOT_ALLOWED; 2091 2092 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2093 2094 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2095 // have been adapted. Maybe it should be part of the kernel settings, 2096 // anyway (so that old drivers can always work). 2097 #if 0 2098 if (sourceArea->aspace == VMAddressSpace::Kernel() 2099 && addressSpace != VMAddressSpace::Kernel() 2100 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2101 // kernel areas must not be cloned in userland, unless explicitly 2102 // declared user-cloneable upon construction 2103 status = B_NOT_ALLOWED; 2104 } else 2105 #endif 2106 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2107 status = B_NOT_ALLOWED; 2108 else { 2109 virtual_address_restrictions addressRestrictions = {}; 2110 addressRestrictions.address = *address; 2111 addressRestrictions.address_specification = addressSpec; 2112 status = map_backing_store(targetAddressSpace, cache, 2113 sourceArea->cache_offset, name, sourceArea->Size(), 2114 sourceArea->wiring, protection, mapping, 0, &addressRestrictions, 2115 kernel, &newArea, address); 2116 } 2117 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2118 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2119 // to create a new cache, and has therefore already acquired a reference 2120 // to the source cache - but otherwise it has no idea that we need 2121 // one. 2122 cache->AcquireRefLocked(); 2123 } 2124 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2125 // we need to map in everything at this point 2126 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2127 // we don't have actual pages to map but a physical area 2128 VMTranslationMap* map 2129 = sourceArea->address_space->TranslationMap(); 2130 map->Lock(); 2131 2132 phys_addr_t physicalAddress; 2133 uint32 oldProtection; 2134 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2135 2136 map->Unlock(); 2137 2138 map = targetAddressSpace->TranslationMap(); 2139 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2140 newArea->Base() + (newArea->Size() - 1)); 2141 2142 vm_page_reservation reservation; 2143 vm_page_reserve_pages(&reservation, reservePages, 2144 targetAddressSpace == VMAddressSpace::Kernel() 2145 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2146 map->Lock(); 2147 2148 for (addr_t offset = 0; offset < newArea->Size(); 2149 offset += B_PAGE_SIZE) { 2150 map->Map(newArea->Base() + offset, physicalAddress + offset, 2151 protection, newArea->MemoryType(), &reservation); 2152 } 2153 2154 map->Unlock(); 2155 vm_page_unreserve_pages(&reservation); 2156 } else { 2157 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2158 size_t reservePages = map->MaxPagesNeededToMap( 2159 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2160 vm_page_reservation reservation; 2161 vm_page_reserve_pages(&reservation, reservePages, 2162 targetAddressSpace == VMAddressSpace::Kernel() 2163 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2164 2165 // map in all pages from source 2166 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2167 vm_page* page = it.Next();) { 2168 if (!page->busy) { 2169 DEBUG_PAGE_ACCESS_START(page); 2170 map_page(newArea, page, 2171 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2172 - newArea->cache_offset), 2173 protection, &reservation); 2174 DEBUG_PAGE_ACCESS_END(page); 2175 } 2176 } 2177 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2178 // ensuring that! 2179 2180 vm_page_unreserve_pages(&reservation); 2181 } 2182 } 2183 if (status == B_OK) 2184 newArea->cache_type = sourceArea->cache_type; 2185 2186 vm_area_put_locked_cache(cache); 2187 2188 if (status < B_OK) 2189 return status; 2190 2191 return newArea->id; 2192 } 2193 2194 2195 /*! Deletes the specified area of the given address space. 2196 2197 The address space must be write-locked. 2198 The caller must ensure that the area does not have any wired ranges. 2199 2200 \param addressSpace The address space containing the area. 2201 \param area The area to be deleted. 2202 \param deletingAddressSpace \c true, if the address space is in the process 2203 of being deleted. 2204 */ 2205 static void 2206 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2207 bool deletingAddressSpace) 2208 { 2209 ASSERT(!area->IsWired()); 2210 2211 VMAreaHash::Remove(area); 2212 2213 // At this point the area is removed from the global hash table, but 2214 // still exists in the area list. 2215 2216 // Unmap the virtual address space the area occupied. 2217 { 2218 // We need to lock the complete cache chain. 2219 VMCache* topCache = vm_area_get_locked_cache(area); 2220 VMCacheChainLocker cacheChainLocker(topCache); 2221 cacheChainLocker.LockAllSourceCaches(); 2222 2223 // If the area's top cache is a temporary cache and the area is the only 2224 // one referencing it (besides us currently holding a second reference), 2225 // the unmapping code doesn't need to care about preserving the accessed 2226 // and dirty flags of the top cache page mappings. 2227 bool ignoreTopCachePageFlags 2228 = topCache->temporary && topCache->RefCount() == 2; 2229 2230 area->address_space->TranslationMap()->UnmapArea(area, 2231 deletingAddressSpace, ignoreTopCachePageFlags); 2232 } 2233 2234 if (!area->cache->temporary) 2235 area->cache->WriteModified(); 2236 2237 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2238 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2239 2240 arch_vm_unset_memory_type(area); 2241 addressSpace->RemoveArea(area, allocationFlags); 2242 addressSpace->Put(); 2243 2244 area->cache->RemoveArea(area); 2245 area->cache->ReleaseRef(); 2246 2247 addressSpace->DeleteArea(area, allocationFlags); 2248 } 2249 2250 2251 status_t 2252 vm_delete_area(team_id team, area_id id, bool kernel) 2253 { 2254 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2255 team, id)); 2256 2257 // lock the address space and make sure the area isn't wired 2258 AddressSpaceWriteLocker locker; 2259 VMArea* area; 2260 AreaCacheLocker cacheLocker; 2261 2262 do { 2263 status_t status = locker.SetFromArea(team, id, area); 2264 if (status != B_OK) 2265 return status; 2266 2267 cacheLocker.SetTo(area); 2268 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2269 2270 cacheLocker.Unlock(); 2271 2272 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2273 return B_NOT_ALLOWED; 2274 2275 delete_area(locker.AddressSpace(), area, false); 2276 return B_OK; 2277 } 2278 2279 2280 /*! Creates a new cache on top of given cache, moves all areas from 2281 the old cache to the new one, and changes the protection of all affected 2282 areas' pages to read-only. If requested, wired pages are moved up to the 2283 new cache and copies are added to the old cache in their place. 2284 Preconditions: 2285 - The given cache must be locked. 2286 - All of the cache's areas' address spaces must be read locked. 2287 - Either the cache must not have any wired ranges or a page reservation for 2288 all wired pages must be provided, so they can be copied. 2289 2290 \param lowerCache The cache on top of which a new cache shall be created. 2291 \param wiredPagesReservation If \c NULL there must not be any wired pages 2292 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2293 has wired page. The wired pages are copied in this case. 2294 */ 2295 static status_t 2296 vm_copy_on_write_area(VMCache* lowerCache, 2297 vm_page_reservation* wiredPagesReservation) 2298 { 2299 VMCache* upperCache; 2300 2301 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2302 2303 // We need to separate the cache from its areas. The cache goes one level 2304 // deeper and we create a new cache inbetween. 2305 2306 // create an anonymous cache 2307 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2308 0, dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2309 VM_PRIORITY_USER); 2310 if (status != B_OK) 2311 return status; 2312 2313 upperCache->Lock(); 2314 2315 upperCache->temporary = 1; 2316 upperCache->virtual_base = lowerCache->virtual_base; 2317 upperCache->virtual_end = lowerCache->virtual_end; 2318 2319 // transfer the lower cache areas to the upper cache 2320 rw_lock_write_lock(&sAreaCacheLock); 2321 upperCache->TransferAreas(lowerCache); 2322 rw_lock_write_unlock(&sAreaCacheLock); 2323 2324 lowerCache->AddConsumer(upperCache); 2325 2326 // We now need to remap all pages from all of the cache's areas read-only, 2327 // so that a copy will be created on next write access. If there are wired 2328 // pages, we keep their protection, move them to the upper cache and create 2329 // copies for the lower cache. 2330 if (wiredPagesReservation != NULL) { 2331 // We need to handle wired pages -- iterate through the cache's pages. 2332 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2333 vm_page* page = it.Next();) { 2334 if (page->WiredCount() > 0) { 2335 // allocate a new page and copy the wired one 2336 vm_page* copiedPage = vm_page_allocate_page( 2337 wiredPagesReservation, PAGE_STATE_ACTIVE); 2338 2339 vm_memcpy_physical_page( 2340 copiedPage->physical_page_number * B_PAGE_SIZE, 2341 page->physical_page_number * B_PAGE_SIZE); 2342 2343 // move the wired page to the upper cache (note: removing is OK 2344 // with the SplayTree iterator) and insert the copy 2345 upperCache->MovePage(page); 2346 lowerCache->InsertPage(copiedPage, 2347 page->cache_offset * B_PAGE_SIZE); 2348 2349 DEBUG_PAGE_ACCESS_END(copiedPage); 2350 } else { 2351 // Change the protection of this page in all areas. 2352 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2353 tempArea = tempArea->cache_next) { 2354 // The area must be readable in the same way it was 2355 // previously writable. 2356 uint32 protection = B_KERNEL_READ_AREA; 2357 if ((tempArea->protection & B_READ_AREA) != 0) 2358 protection |= B_READ_AREA; 2359 2360 VMTranslationMap* map 2361 = tempArea->address_space->TranslationMap(); 2362 map->Lock(); 2363 map->ProtectPage(tempArea, 2364 virtual_page_address(tempArea, page), protection); 2365 map->Unlock(); 2366 } 2367 } 2368 } 2369 } else { 2370 ASSERT(lowerCache->WiredPagesCount() == 0); 2371 2372 // just change the protection of all areas 2373 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2374 tempArea = tempArea->cache_next) { 2375 // The area must be readable in the same way it was previously 2376 // writable. 2377 uint32 protection = B_KERNEL_READ_AREA; 2378 if ((tempArea->protection & B_READ_AREA) != 0) 2379 protection |= B_READ_AREA; 2380 2381 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2382 map->Lock(); 2383 map->ProtectArea(tempArea, protection); 2384 map->Unlock(); 2385 } 2386 } 2387 2388 vm_area_put_locked_cache(upperCache); 2389 2390 return B_OK; 2391 } 2392 2393 2394 area_id 2395 vm_copy_area(team_id team, const char* name, void** _address, 2396 uint32 addressSpec, uint32 protection, area_id sourceID) 2397 { 2398 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2399 2400 if ((protection & B_KERNEL_PROTECTION) == 0) { 2401 // set the same protection for the kernel as for userland 2402 protection |= B_KERNEL_READ_AREA; 2403 if (writableCopy) 2404 protection |= B_KERNEL_WRITE_AREA; 2405 } 2406 2407 // Do the locking: target address space, all address spaces associated with 2408 // the source cache, and the cache itself. 2409 MultiAddressSpaceLocker locker; 2410 VMAddressSpace* targetAddressSpace; 2411 VMCache* cache; 2412 VMArea* source; 2413 AreaCacheLocker cacheLocker; 2414 status_t status; 2415 bool sharedArea; 2416 2417 page_num_t wiredPages = 0; 2418 vm_page_reservation wiredPagesReservation; 2419 2420 bool restart; 2421 do { 2422 restart = false; 2423 2424 locker.Unset(); 2425 status = locker.AddTeam(team, true, &targetAddressSpace); 2426 if (status == B_OK) { 2427 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2428 &cache); 2429 } 2430 if (status != B_OK) 2431 return status; 2432 2433 cacheLocker.SetTo(cache, true); // already locked 2434 2435 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2436 2437 page_num_t oldWiredPages = wiredPages; 2438 wiredPages = 0; 2439 2440 // If the source area isn't shared, count the number of wired pages in 2441 // the cache and reserve as many pages. 2442 if (!sharedArea) { 2443 wiredPages = cache->WiredPagesCount(); 2444 2445 if (wiredPages > oldWiredPages) { 2446 cacheLocker.Unlock(); 2447 locker.Unlock(); 2448 2449 if (oldWiredPages > 0) 2450 vm_page_unreserve_pages(&wiredPagesReservation); 2451 2452 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2453 VM_PRIORITY_USER); 2454 2455 restart = true; 2456 } 2457 } else if (oldWiredPages > 0) 2458 vm_page_unreserve_pages(&wiredPagesReservation); 2459 } while (restart); 2460 2461 // unreserve pages later 2462 struct PagesUnreserver { 2463 PagesUnreserver(vm_page_reservation* reservation) 2464 : 2465 fReservation(reservation) 2466 { 2467 } 2468 2469 ~PagesUnreserver() 2470 { 2471 if (fReservation != NULL) 2472 vm_page_unreserve_pages(fReservation); 2473 } 2474 2475 private: 2476 vm_page_reservation* fReservation; 2477 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2478 2479 if (addressSpec == B_CLONE_ADDRESS) { 2480 addressSpec = B_EXACT_ADDRESS; 2481 *_address = (void*)source->Base(); 2482 } 2483 2484 // First, create a cache on top of the source area, respectively use the 2485 // existing one, if this is a shared area. 2486 2487 VMArea* target; 2488 virtual_address_restrictions addressRestrictions = {}; 2489 addressRestrictions.address = *_address; 2490 addressRestrictions.address_specification = addressSpec; 2491 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2492 name, source->Size(), source->wiring, protection, 2493 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2494 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2495 &addressRestrictions, true, &target, _address); 2496 if (status < B_OK) 2497 return status; 2498 2499 if (sharedArea) { 2500 // The new area uses the old area's cache, but map_backing_store() 2501 // hasn't acquired a ref. So we have to do that now. 2502 cache->AcquireRefLocked(); 2503 } 2504 2505 // If the source area is writable, we need to move it one layer up as well 2506 2507 if (!sharedArea) { 2508 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2509 // TODO: do something more useful if this fails! 2510 if (vm_copy_on_write_area(cache, 2511 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2512 panic("vm_copy_on_write_area() failed!\n"); 2513 } 2514 } 2515 } 2516 2517 // we return the ID of the newly created area 2518 return target->id; 2519 } 2520 2521 2522 static status_t 2523 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2524 bool kernel) 2525 { 2526 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2527 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2528 2529 if (!arch_vm_supports_protection(newProtection)) 2530 return B_NOT_SUPPORTED; 2531 2532 bool becomesWritable 2533 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2534 2535 // lock address spaces and cache 2536 MultiAddressSpaceLocker locker; 2537 VMCache* cache; 2538 VMArea* area; 2539 status_t status; 2540 AreaCacheLocker cacheLocker; 2541 bool isWritable; 2542 2543 bool restart; 2544 do { 2545 restart = false; 2546 2547 locker.Unset(); 2548 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2549 if (status != B_OK) 2550 return status; 2551 2552 cacheLocker.SetTo(cache, true); // already locked 2553 2554 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2555 return B_NOT_ALLOWED; 2556 2557 if (area->protection == newProtection) 2558 return B_OK; 2559 2560 if (team != VMAddressSpace::KernelID() 2561 && area->address_space->ID() != team) { 2562 // unless you're the kernel, you are only allowed to set 2563 // the protection of your own areas 2564 return B_NOT_ALLOWED; 2565 } 2566 2567 isWritable 2568 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2569 2570 // Make sure the area (respectively, if we're going to call 2571 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2572 // wired ranges. 2573 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2574 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2575 otherArea = otherArea->cache_next) { 2576 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2577 restart = true; 2578 break; 2579 } 2580 } 2581 } else { 2582 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2583 restart = true; 2584 } 2585 } while (restart); 2586 2587 bool changePageProtection = true; 2588 bool changeTopCachePagesOnly = false; 2589 2590 if (isWritable && !becomesWritable) { 2591 // writable -> !writable 2592 2593 if (cache->source != NULL && cache->temporary) { 2594 if (cache->CountWritableAreas(area) == 0) { 2595 // Since this cache now lives from the pages in its source cache, 2596 // we can change the cache's commitment to take only those pages 2597 // into account that really are in this cache. 2598 2599 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2600 team == VMAddressSpace::KernelID() 2601 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2602 2603 // TODO: we may be able to join with our source cache, if 2604 // count == 0 2605 } 2606 } 2607 2608 // If only the writability changes, we can just remap the pages of the 2609 // top cache, since the pages of lower caches are mapped read-only 2610 // anyway. That's advantageous only, if the number of pages in the cache 2611 // is significantly smaller than the number of pages in the area, 2612 // though. 2613 if (newProtection 2614 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2615 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2616 changeTopCachePagesOnly = true; 2617 } 2618 } else if (!isWritable && becomesWritable) { 2619 // !writable -> writable 2620 2621 if (!cache->consumers.IsEmpty()) { 2622 // There are consumers -- we have to insert a new cache. Fortunately 2623 // vm_copy_on_write_area() does everything that's needed. 2624 changePageProtection = false; 2625 status = vm_copy_on_write_area(cache, NULL); 2626 } else { 2627 // No consumers, so we don't need to insert a new one. 2628 if (cache->source != NULL && cache->temporary) { 2629 // the cache's commitment must contain all possible pages 2630 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2631 team == VMAddressSpace::KernelID() 2632 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2633 } 2634 2635 if (status == B_OK && cache->source != NULL) { 2636 // There's a source cache, hence we can't just change all pages' 2637 // protection or we might allow writing into pages belonging to 2638 // a lower cache. 2639 changeTopCachePagesOnly = true; 2640 } 2641 } 2642 } else { 2643 // we don't have anything special to do in all other cases 2644 } 2645 2646 if (status == B_OK) { 2647 // remap existing pages in this cache 2648 if (changePageProtection) { 2649 VMTranslationMap* map = area->address_space->TranslationMap(); 2650 map->Lock(); 2651 2652 if (changeTopCachePagesOnly) { 2653 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2654 page_num_t lastPageOffset 2655 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2656 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2657 vm_page* page = it.Next();) { 2658 if (page->cache_offset >= firstPageOffset 2659 && page->cache_offset <= lastPageOffset) { 2660 addr_t address = virtual_page_address(area, page); 2661 map->ProtectPage(area, address, newProtection); 2662 } 2663 } 2664 } else 2665 map->ProtectArea(area, newProtection); 2666 2667 map->Unlock(); 2668 } 2669 2670 area->protection = newProtection; 2671 } 2672 2673 return status; 2674 } 2675 2676 2677 status_t 2678 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2679 { 2680 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2681 if (addressSpace == NULL) 2682 return B_BAD_TEAM_ID; 2683 2684 VMTranslationMap* map = addressSpace->TranslationMap(); 2685 2686 map->Lock(); 2687 uint32 dummyFlags; 2688 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2689 map->Unlock(); 2690 2691 addressSpace->Put(); 2692 return status; 2693 } 2694 2695 2696 /*! The page's cache must be locked. 2697 */ 2698 bool 2699 vm_test_map_modification(vm_page* page) 2700 { 2701 if (page->modified) 2702 return true; 2703 2704 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2705 vm_page_mapping* mapping; 2706 while ((mapping = iterator.Next()) != NULL) { 2707 VMArea* area = mapping->area; 2708 VMTranslationMap* map = area->address_space->TranslationMap(); 2709 2710 phys_addr_t physicalAddress; 2711 uint32 flags; 2712 map->Lock(); 2713 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2714 map->Unlock(); 2715 2716 if ((flags & PAGE_MODIFIED) != 0) 2717 return true; 2718 } 2719 2720 return false; 2721 } 2722 2723 2724 /*! The page's cache must be locked. 2725 */ 2726 void 2727 vm_clear_map_flags(vm_page* page, uint32 flags) 2728 { 2729 if ((flags & PAGE_ACCESSED) != 0) 2730 page->accessed = false; 2731 if ((flags & PAGE_MODIFIED) != 0) 2732 page->modified = false; 2733 2734 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2735 vm_page_mapping* mapping; 2736 while ((mapping = iterator.Next()) != NULL) { 2737 VMArea* area = mapping->area; 2738 VMTranslationMap* map = area->address_space->TranslationMap(); 2739 2740 map->Lock(); 2741 map->ClearFlags(virtual_page_address(area, page), flags); 2742 map->Unlock(); 2743 } 2744 } 2745 2746 2747 /*! Removes all mappings from a page. 2748 After you've called this function, the page is unmapped from memory and 2749 the page's \c accessed and \c modified flags have been updated according 2750 to the state of the mappings. 2751 The page's cache must be locked. 2752 */ 2753 void 2754 vm_remove_all_page_mappings(vm_page* page) 2755 { 2756 while (vm_page_mapping* mapping = page->mappings.Head()) { 2757 VMArea* area = mapping->area; 2758 VMTranslationMap* map = area->address_space->TranslationMap(); 2759 addr_t address = virtual_page_address(area, page); 2760 map->UnmapPage(area, address, false); 2761 } 2762 } 2763 2764 2765 int32 2766 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2767 { 2768 int32 count = 0; 2769 2770 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2771 vm_page_mapping* mapping; 2772 while ((mapping = iterator.Next()) != NULL) { 2773 VMArea* area = mapping->area; 2774 VMTranslationMap* map = area->address_space->TranslationMap(); 2775 2776 bool modified; 2777 if (map->ClearAccessedAndModified(area, 2778 virtual_page_address(area, page), false, modified)) { 2779 count++; 2780 } 2781 2782 page->modified |= modified; 2783 } 2784 2785 2786 if (page->accessed) { 2787 count++; 2788 page->accessed = false; 2789 } 2790 2791 return count; 2792 } 2793 2794 2795 /*! Removes all mappings of a page and/or clears the accessed bits of the 2796 mappings. 2797 The function iterates through the page mappings and removes them until 2798 encountering one that has been accessed. From then on it will continue to 2799 iterate, but only clear the accessed flag of the mapping. The page's 2800 \c modified bit will be updated accordingly, the \c accessed bit will be 2801 cleared. 2802 \return The number of mapping accessed bits encountered, including the 2803 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2804 of the page have been removed. 2805 */ 2806 int32 2807 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2808 { 2809 ASSERT(page->WiredCount() == 0); 2810 2811 if (page->accessed) 2812 return vm_clear_page_mapping_accessed_flags(page); 2813 2814 while (vm_page_mapping* mapping = page->mappings.Head()) { 2815 VMArea* area = mapping->area; 2816 VMTranslationMap* map = area->address_space->TranslationMap(); 2817 addr_t address = virtual_page_address(area, page); 2818 bool modified = false; 2819 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2820 page->accessed = true; 2821 page->modified |= modified; 2822 return vm_clear_page_mapping_accessed_flags(page); 2823 } 2824 page->modified |= modified; 2825 } 2826 2827 return 0; 2828 } 2829 2830 2831 static int 2832 display_mem(int argc, char** argv) 2833 { 2834 bool physical = false; 2835 addr_t copyAddress; 2836 int32 displayWidth; 2837 int32 itemSize; 2838 int32 num = -1; 2839 addr_t address; 2840 int i = 1, j; 2841 2842 if (argc > 1 && argv[1][0] == '-') { 2843 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2844 physical = true; 2845 i++; 2846 } else 2847 i = 99; 2848 } 2849 2850 if (argc < i + 1 || argc > i + 2) { 2851 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2852 "\tdl - 8 bytes\n" 2853 "\tdw - 4 bytes\n" 2854 "\tds - 2 bytes\n" 2855 "\tdb - 1 byte\n" 2856 "\tstring - a whole string\n" 2857 " -p or --physical only allows memory from a single page to be " 2858 "displayed.\n"); 2859 return 0; 2860 } 2861 2862 address = parse_expression(argv[i]); 2863 2864 if (argc > i + 1) 2865 num = parse_expression(argv[i + 1]); 2866 2867 // build the format string 2868 if (strcmp(argv[0], "db") == 0) { 2869 itemSize = 1; 2870 displayWidth = 16; 2871 } else if (strcmp(argv[0], "ds") == 0) { 2872 itemSize = 2; 2873 displayWidth = 8; 2874 } else if (strcmp(argv[0], "dw") == 0) { 2875 itemSize = 4; 2876 displayWidth = 4; 2877 } else if (strcmp(argv[0], "dl") == 0) { 2878 itemSize = 8; 2879 displayWidth = 2; 2880 } else if (strcmp(argv[0], "string") == 0) { 2881 itemSize = 1; 2882 displayWidth = -1; 2883 } else { 2884 kprintf("display_mem called in an invalid way!\n"); 2885 return 0; 2886 } 2887 2888 if (num <= 0) 2889 num = displayWidth; 2890 2891 void* physicalPageHandle = NULL; 2892 2893 if (physical) { 2894 int32 offset = address & (B_PAGE_SIZE - 1); 2895 if (num * itemSize + offset > B_PAGE_SIZE) { 2896 num = (B_PAGE_SIZE - offset) / itemSize; 2897 kprintf("NOTE: number of bytes has been cut to page size\n"); 2898 } 2899 2900 address = ROUNDDOWN(address, B_PAGE_SIZE); 2901 2902 if (vm_get_physical_page_debug(address, ©Address, 2903 &physicalPageHandle) != B_OK) { 2904 kprintf("getting the hardware page failed."); 2905 return 0; 2906 } 2907 2908 address += offset; 2909 copyAddress += offset; 2910 } else 2911 copyAddress = address; 2912 2913 if (!strcmp(argv[0], "string")) { 2914 kprintf("%p \"", (char*)copyAddress); 2915 2916 // string mode 2917 for (i = 0; true; i++) { 2918 char c; 2919 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 2920 != B_OK 2921 || c == '\0') { 2922 break; 2923 } 2924 2925 if (c == '\n') 2926 kprintf("\\n"); 2927 else if (c == '\t') 2928 kprintf("\\t"); 2929 else { 2930 if (!isprint(c)) 2931 c = '.'; 2932 2933 kprintf("%c", c); 2934 } 2935 } 2936 2937 kprintf("\"\n"); 2938 } else { 2939 // number mode 2940 for (i = 0; i < num; i++) { 2941 uint32 value; 2942 2943 if ((i % displayWidth) == 0) { 2944 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2945 if (i != 0) 2946 kprintf("\n"); 2947 2948 kprintf("[0x%lx] ", address + i * itemSize); 2949 2950 for (j = 0; j < displayed; j++) { 2951 char c; 2952 if (debug_memcpy(B_CURRENT_TEAM, &c, 2953 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 2954 displayed = j; 2955 break; 2956 } 2957 if (!isprint(c)) 2958 c = '.'; 2959 2960 kprintf("%c", c); 2961 } 2962 if (num > displayWidth) { 2963 // make sure the spacing in the last line is correct 2964 for (j = displayed; j < displayWidth * itemSize; j++) 2965 kprintf(" "); 2966 } 2967 kprintf(" "); 2968 } 2969 2970 if (debug_memcpy(B_CURRENT_TEAM, &value, 2971 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 2972 kprintf("read fault"); 2973 break; 2974 } 2975 2976 switch (itemSize) { 2977 case 1: 2978 kprintf(" %02" B_PRIx8, *(uint8*)&value); 2979 break; 2980 case 2: 2981 kprintf(" %04" B_PRIx16, *(uint16*)&value); 2982 break; 2983 case 4: 2984 kprintf(" %08" B_PRIx32, *(uint32*)&value); 2985 break; 2986 case 8: 2987 kprintf(" %016" B_PRIx64, *(uint64*)&value); 2988 break; 2989 } 2990 } 2991 2992 kprintf("\n"); 2993 } 2994 2995 if (physical) { 2996 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 2997 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 2998 } 2999 return 0; 3000 } 3001 3002 3003 static void 3004 dump_cache_tree_recursively(VMCache* cache, int level, 3005 VMCache* highlightCache) 3006 { 3007 // print this cache 3008 for (int i = 0; i < level; i++) 3009 kprintf(" "); 3010 if (cache == highlightCache) 3011 kprintf("%p <--\n", cache); 3012 else 3013 kprintf("%p\n", cache); 3014 3015 // recursively print its consumers 3016 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3017 VMCache* consumer = it.Next();) { 3018 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3019 } 3020 } 3021 3022 3023 static int 3024 dump_cache_tree(int argc, char** argv) 3025 { 3026 if (argc != 2 || !strcmp(argv[1], "--help")) { 3027 kprintf("usage: %s <address>\n", argv[0]); 3028 return 0; 3029 } 3030 3031 addr_t address = parse_expression(argv[1]); 3032 if (address == 0) 3033 return 0; 3034 3035 VMCache* cache = (VMCache*)address; 3036 VMCache* root = cache; 3037 3038 // find the root cache (the transitive source) 3039 while (root->source != NULL) 3040 root = root->source; 3041 3042 dump_cache_tree_recursively(root, 0, cache); 3043 3044 return 0; 3045 } 3046 3047 3048 const char* 3049 vm_cache_type_to_string(int32 type) 3050 { 3051 switch (type) { 3052 case CACHE_TYPE_RAM: 3053 return "RAM"; 3054 case CACHE_TYPE_DEVICE: 3055 return "device"; 3056 case CACHE_TYPE_VNODE: 3057 return "vnode"; 3058 case CACHE_TYPE_NULL: 3059 return "null"; 3060 3061 default: 3062 return "unknown"; 3063 } 3064 } 3065 3066 3067 #if DEBUG_CACHE_LIST 3068 3069 static void 3070 update_cache_info_recursively(VMCache* cache, cache_info& info) 3071 { 3072 info.page_count += cache->page_count; 3073 if (cache->type == CACHE_TYPE_RAM) 3074 info.committed += cache->committed_size; 3075 3076 // recurse 3077 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3078 VMCache* consumer = it.Next();) { 3079 update_cache_info_recursively(consumer, info); 3080 } 3081 } 3082 3083 3084 static int 3085 cache_info_compare_page_count(const void* _a, const void* _b) 3086 { 3087 const cache_info* a = (const cache_info*)_a; 3088 const cache_info* b = (const cache_info*)_b; 3089 if (a->page_count == b->page_count) 3090 return 0; 3091 return a->page_count < b->page_count ? 1 : -1; 3092 } 3093 3094 3095 static int 3096 cache_info_compare_committed(const void* _a, const void* _b) 3097 { 3098 const cache_info* a = (const cache_info*)_a; 3099 const cache_info* b = (const cache_info*)_b; 3100 if (a->committed == b->committed) 3101 return 0; 3102 return a->committed < b->committed ? 1 : -1; 3103 } 3104 3105 3106 static void 3107 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3108 { 3109 for (int i = 0; i < level; i++) 3110 kprintf(" "); 3111 3112 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3113 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3114 cache->virtual_base, cache->virtual_end, cache->page_count); 3115 3116 if (level == 0) 3117 kprintf("/%lu", info.page_count); 3118 3119 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3120 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3121 3122 if (level == 0) 3123 kprintf("/%lu", info.committed); 3124 } 3125 3126 // areas 3127 if (cache->areas != NULL) { 3128 VMArea* area = cache->areas; 3129 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3130 area->name, area->address_space->ID()); 3131 3132 while (area->cache_next != NULL) { 3133 area = area->cache_next; 3134 kprintf(", %" B_PRId32, area->id); 3135 } 3136 } 3137 3138 kputs("\n"); 3139 3140 // recurse 3141 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3142 VMCache* consumer = it.Next();) { 3143 dump_caches_recursively(consumer, info, level + 1); 3144 } 3145 } 3146 3147 3148 static int 3149 dump_caches(int argc, char** argv) 3150 { 3151 if (sCacheInfoTable == NULL) { 3152 kprintf("No cache info table!\n"); 3153 return 0; 3154 } 3155 3156 bool sortByPageCount = true; 3157 3158 for (int32 i = 1; i < argc; i++) { 3159 if (strcmp(argv[i], "-c") == 0) { 3160 sortByPageCount = false; 3161 } else { 3162 print_debugger_command_usage(argv[0]); 3163 return 0; 3164 } 3165 } 3166 3167 uint32 totalCount = 0; 3168 uint32 rootCount = 0; 3169 off_t totalCommitted = 0; 3170 page_num_t totalPages = 0; 3171 3172 VMCache* cache = gDebugCacheList; 3173 while (cache) { 3174 totalCount++; 3175 if (cache->source == NULL) { 3176 cache_info stackInfo; 3177 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3178 ? sCacheInfoTable[rootCount] : stackInfo; 3179 rootCount++; 3180 info.cache = cache; 3181 info.page_count = 0; 3182 info.committed = 0; 3183 update_cache_info_recursively(cache, info); 3184 totalCommitted += info.committed; 3185 totalPages += info.page_count; 3186 } 3187 3188 cache = cache->debug_next; 3189 } 3190 3191 if (rootCount <= (uint32)kCacheInfoTableCount) { 3192 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3193 sortByPageCount 3194 ? &cache_info_compare_page_count 3195 : &cache_info_compare_committed); 3196 } 3197 3198 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3199 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3200 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3201 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3202 "page count" : "committed size"); 3203 3204 if (rootCount <= (uint32)kCacheInfoTableCount) { 3205 for (uint32 i = 0; i < rootCount; i++) { 3206 cache_info& info = sCacheInfoTable[i]; 3207 dump_caches_recursively(info.cache, info, 0); 3208 } 3209 } else 3210 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3211 3212 return 0; 3213 } 3214 3215 #endif // DEBUG_CACHE_LIST 3216 3217 3218 static int 3219 dump_cache(int argc, char** argv) 3220 { 3221 VMCache* cache; 3222 bool showPages = false; 3223 int i = 1; 3224 3225 if (argc < 2 || !strcmp(argv[1], "--help")) { 3226 kprintf("usage: %s [-ps] <address>\n" 3227 " if -p is specified, all pages are shown, if -s is used\n" 3228 " only the cache info is shown respectively.\n", argv[0]); 3229 return 0; 3230 } 3231 while (argv[i][0] == '-') { 3232 char* arg = argv[i] + 1; 3233 while (arg[0]) { 3234 if (arg[0] == 'p') 3235 showPages = true; 3236 arg++; 3237 } 3238 i++; 3239 } 3240 if (argv[i] == NULL) { 3241 kprintf("%s: invalid argument, pass address\n", argv[0]); 3242 return 0; 3243 } 3244 3245 addr_t address = parse_expression(argv[i]); 3246 if (address == 0) 3247 return 0; 3248 3249 cache = (VMCache*)address; 3250 3251 cache->Dump(showPages); 3252 3253 set_debug_variable("_sourceCache", (addr_t)cache->source); 3254 3255 return 0; 3256 } 3257 3258 3259 static void 3260 dump_area_struct(VMArea* area, bool mappings) 3261 { 3262 kprintf("AREA: %p\n", area); 3263 kprintf("name:\t\t'%s'\n", area->name); 3264 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3265 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3266 kprintf("base:\t\t0x%lx\n", area->Base()); 3267 kprintf("size:\t\t0x%lx\n", area->Size()); 3268 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3269 kprintf("wiring:\t\t0x%x\n", area->wiring); 3270 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3271 kprintf("cache:\t\t%p\n", area->cache); 3272 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3273 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3274 kprintf("cache_next:\t%p\n", area->cache_next); 3275 kprintf("cache_prev:\t%p\n", area->cache_prev); 3276 3277 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3278 if (mappings) { 3279 kprintf("page mappings:\n"); 3280 while (iterator.HasNext()) { 3281 vm_page_mapping* mapping = iterator.Next(); 3282 kprintf(" %p", mapping->page); 3283 } 3284 kprintf("\n"); 3285 } else { 3286 uint32 count = 0; 3287 while (iterator.Next() != NULL) { 3288 count++; 3289 } 3290 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3291 } 3292 } 3293 3294 3295 static int 3296 dump_area(int argc, char** argv) 3297 { 3298 bool mappings = false; 3299 bool found = false; 3300 int32 index = 1; 3301 VMArea* area; 3302 addr_t num; 3303 3304 if (argc < 2 || !strcmp(argv[1], "--help")) { 3305 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3306 "All areas matching either id/address/name are listed. You can\n" 3307 "force to check only a specific item by prefixing the specifier\n" 3308 "with the id/contains/address/name keywords.\n" 3309 "-m shows the area's mappings as well.\n"); 3310 return 0; 3311 } 3312 3313 if (!strcmp(argv[1], "-m")) { 3314 mappings = true; 3315 index++; 3316 } 3317 3318 int32 mode = 0xf; 3319 if (!strcmp(argv[index], "id")) 3320 mode = 1; 3321 else if (!strcmp(argv[index], "contains")) 3322 mode = 2; 3323 else if (!strcmp(argv[index], "name")) 3324 mode = 4; 3325 else if (!strcmp(argv[index], "address")) 3326 mode = 0; 3327 if (mode != 0xf) 3328 index++; 3329 3330 if (index >= argc) { 3331 kprintf("No area specifier given.\n"); 3332 return 0; 3333 } 3334 3335 num = parse_expression(argv[index]); 3336 3337 if (mode == 0) { 3338 dump_area_struct((struct VMArea*)num, mappings); 3339 } else { 3340 // walk through the area list, looking for the arguments as a name 3341 3342 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3343 while ((area = it.Next()) != NULL) { 3344 if (((mode & 4) != 0 && area->name != NULL 3345 && !strcmp(argv[index], area->name)) 3346 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3347 || (((mode & 2) != 0 && area->Base() <= num 3348 && area->Base() + area->Size() > num))))) { 3349 dump_area_struct(area, mappings); 3350 found = true; 3351 } 3352 } 3353 3354 if (!found) 3355 kprintf("could not find area %s (%ld)\n", argv[index], num); 3356 } 3357 3358 return 0; 3359 } 3360 3361 3362 static int 3363 dump_area_list(int argc, char** argv) 3364 { 3365 VMArea* area; 3366 const char* name = NULL; 3367 int32 id = 0; 3368 3369 if (argc > 1) { 3370 id = parse_expression(argv[1]); 3371 if (id == 0) 3372 name = argv[1]; 3373 } 3374 3375 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3376 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3377 B_PRINTF_POINTER_WIDTH, "size"); 3378 3379 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3380 while ((area = it.Next()) != NULL) { 3381 if ((id != 0 && area->address_space->ID() != id) 3382 || (name != NULL && strstr(area->name, name) == NULL)) 3383 continue; 3384 3385 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3386 area->id, (void*)area->Base(), (void*)area->Size(), 3387 area->protection, area->wiring, area->name); 3388 } 3389 return 0; 3390 } 3391 3392 3393 static int 3394 dump_available_memory(int argc, char** argv) 3395 { 3396 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3397 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3398 return 0; 3399 } 3400 3401 3402 /*! Deletes all areas and reserved regions in the given address space. 3403 3404 The caller must ensure that none of the areas has any wired ranges. 3405 3406 \param addressSpace The address space. 3407 \param deletingAddressSpace \c true, if the address space is in the process 3408 of being deleted. 3409 */ 3410 void 3411 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3412 { 3413 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3414 addressSpace->ID())); 3415 3416 addressSpace->WriteLock(); 3417 3418 // remove all reserved areas in this address space 3419 addressSpace->UnreserveAllAddressRanges(0); 3420 3421 // delete all the areas in this address space 3422 while (VMArea* area = addressSpace->FirstArea()) { 3423 ASSERT(!area->IsWired()); 3424 delete_area(addressSpace, area, deletingAddressSpace); 3425 } 3426 3427 addressSpace->WriteUnlock(); 3428 } 3429 3430 3431 static area_id 3432 vm_area_for(addr_t address, bool kernel) 3433 { 3434 team_id team; 3435 if (IS_USER_ADDRESS(address)) { 3436 // we try the user team address space, if any 3437 team = VMAddressSpace::CurrentID(); 3438 if (team < 0) 3439 return team; 3440 } else 3441 team = VMAddressSpace::KernelID(); 3442 3443 AddressSpaceReadLocker locker(team); 3444 if (!locker.IsLocked()) 3445 return B_BAD_TEAM_ID; 3446 3447 VMArea* area = locker.AddressSpace()->LookupArea(address); 3448 if (area != NULL) { 3449 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3450 return B_ERROR; 3451 3452 return area->id; 3453 } 3454 3455 return B_ERROR; 3456 } 3457 3458 3459 /*! Frees physical pages that were used during the boot process. 3460 \a end is inclusive. 3461 */ 3462 static void 3463 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3464 { 3465 // free all physical pages in the specified range 3466 3467 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3468 phys_addr_t physicalAddress; 3469 uint32 flags; 3470 3471 if (map->Query(current, &physicalAddress, &flags) == B_OK 3472 && (flags & PAGE_PRESENT) != 0) { 3473 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3474 if (page != NULL && page->State() != PAGE_STATE_FREE 3475 && page->State() != PAGE_STATE_CLEAR 3476 && page->State() != PAGE_STATE_UNUSED) { 3477 DEBUG_PAGE_ACCESS_START(page); 3478 vm_page_set_state(page, PAGE_STATE_FREE); 3479 } 3480 } 3481 } 3482 3483 // unmap the memory 3484 map->Unmap(start, end); 3485 } 3486 3487 3488 void 3489 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3490 { 3491 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3492 addr_t end = start + (size - 1); 3493 addr_t lastEnd = start; 3494 3495 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3496 (void*)start, (void*)end)); 3497 3498 // The areas are sorted in virtual address space order, so 3499 // we just have to find the holes between them that fall 3500 // into the area we should dispose 3501 3502 map->Lock(); 3503 3504 for (VMAddressSpace::AreaIterator it 3505 = VMAddressSpace::Kernel()->GetAreaIterator(); 3506 VMArea* area = it.Next();) { 3507 addr_t areaStart = area->Base(); 3508 addr_t areaEnd = areaStart + (area->Size() - 1); 3509 3510 if (areaEnd < start) 3511 continue; 3512 3513 if (areaStart > end) { 3514 // we are done, the area is already beyond of what we have to free 3515 break; 3516 } 3517 3518 if (areaStart > lastEnd) { 3519 // this is something we can free 3520 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3521 (void*)areaStart)); 3522 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3523 } 3524 3525 if (areaEnd >= end) { 3526 lastEnd = areaEnd; 3527 // no +1 to prevent potential overflow 3528 break; 3529 } 3530 3531 lastEnd = areaEnd + 1; 3532 } 3533 3534 if (lastEnd < end) { 3535 // we can also get rid of some space at the end of the area 3536 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3537 (void*)end)); 3538 unmap_and_free_physical_pages(map, lastEnd, end); 3539 } 3540 3541 map->Unlock(); 3542 } 3543 3544 3545 static void 3546 create_preloaded_image_areas(struct preloaded_image* _image) 3547 { 3548 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3549 char name[B_OS_NAME_LENGTH]; 3550 void* address; 3551 int32 length; 3552 3553 // use file name to create a good area name 3554 char* fileName = strrchr(image->name, '/'); 3555 if (fileName == NULL) 3556 fileName = image->name; 3557 else 3558 fileName++; 3559 3560 length = strlen(fileName); 3561 // make sure there is enough space for the suffix 3562 if (length > 25) 3563 length = 25; 3564 3565 memcpy(name, fileName, length); 3566 strcpy(name + length, "_text"); 3567 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3568 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3569 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3570 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3571 // this will later be remapped read-only/executable by the 3572 // ELF initialization code 3573 3574 strcpy(name + length, "_data"); 3575 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3576 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3577 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3578 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3579 } 3580 3581 3582 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3583 Any boot loader resources contained in that arguments must not be accessed 3584 anymore past this point. 3585 */ 3586 void 3587 vm_free_kernel_args(kernel_args* args) 3588 { 3589 uint32 i; 3590 3591 TRACE(("vm_free_kernel_args()\n")); 3592 3593 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3594 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3595 if (area >= B_OK) 3596 delete_area(area); 3597 } 3598 } 3599 3600 3601 static void 3602 allocate_kernel_args(kernel_args* args) 3603 { 3604 TRACE(("allocate_kernel_args()\n")); 3605 3606 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3607 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3608 3609 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3610 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3611 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3612 } 3613 } 3614 3615 3616 static void 3617 unreserve_boot_loader_ranges(kernel_args* args) 3618 { 3619 TRACE(("unreserve_boot_loader_ranges()\n")); 3620 3621 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3622 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3623 (void*)(addr_t)args->virtual_allocated_range[i].start, 3624 args->virtual_allocated_range[i].size); 3625 } 3626 } 3627 3628 3629 static void 3630 reserve_boot_loader_ranges(kernel_args* args) 3631 { 3632 TRACE(("reserve_boot_loader_ranges()\n")); 3633 3634 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3635 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3636 3637 // If the address is no kernel address, we just skip it. The 3638 // architecture specific code has to deal with it. 3639 if (!IS_KERNEL_ADDRESS(address)) { 3640 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3641 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3642 continue; 3643 } 3644 3645 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3646 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3647 if (status < B_OK) 3648 panic("could not reserve boot loader ranges\n"); 3649 } 3650 } 3651 3652 3653 static addr_t 3654 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3655 { 3656 size = PAGE_ALIGN(size); 3657 3658 // find a slot in the virtual allocation addr range 3659 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3660 // check to see if the space between this one and the last is big enough 3661 addr_t rangeStart = args->virtual_allocated_range[i].start; 3662 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3663 + args->virtual_allocated_range[i - 1].size; 3664 3665 addr_t base = alignment > 0 3666 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3667 3668 if (base >= KERNEL_BASE && base < rangeStart 3669 && rangeStart - base >= size) { 3670 args->virtual_allocated_range[i - 1].size 3671 += base + size - previousRangeEnd; 3672 return base; 3673 } 3674 } 3675 3676 // we hadn't found one between allocation ranges. this is ok. 3677 // see if there's a gap after the last one 3678 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3679 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3680 + args->virtual_allocated_range[lastEntryIndex].size; 3681 addr_t base = alignment > 0 3682 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3683 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3684 args->virtual_allocated_range[lastEntryIndex].size 3685 += base + size - lastRangeEnd; 3686 return base; 3687 } 3688 3689 // see if there's a gap before the first one 3690 addr_t rangeStart = args->virtual_allocated_range[0].start; 3691 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3692 base = rangeStart - size; 3693 if (alignment > 0) 3694 base = ROUNDDOWN(base, alignment); 3695 3696 if (base >= KERNEL_BASE) { 3697 args->virtual_allocated_range[0].start = base; 3698 args->virtual_allocated_range[0].size += rangeStart - base; 3699 return base; 3700 } 3701 } 3702 3703 return 0; 3704 } 3705 3706 3707 static bool 3708 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 3709 { 3710 // TODO: horrible brute-force method of determining if the page can be 3711 // allocated 3712 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3713 if (address >= args->physical_memory_range[i].start 3714 && address < args->physical_memory_range[i].start 3715 + args->physical_memory_range[i].size) 3716 return true; 3717 } 3718 return false; 3719 } 3720 3721 3722 page_num_t 3723 vm_allocate_early_physical_page(kernel_args* args) 3724 { 3725 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3726 phys_addr_t nextPage; 3727 3728 nextPage = args->physical_allocated_range[i].start 3729 + args->physical_allocated_range[i].size; 3730 // see if the page after the next allocated paddr run can be allocated 3731 if (i + 1 < args->num_physical_allocated_ranges 3732 && args->physical_allocated_range[i + 1].size != 0) { 3733 // see if the next page will collide with the next allocated range 3734 if (nextPage >= args->physical_allocated_range[i+1].start) 3735 continue; 3736 } 3737 // see if the next physical page fits in the memory block 3738 if (is_page_in_physical_memory_range(args, nextPage)) { 3739 // we got one! 3740 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3741 return nextPage / B_PAGE_SIZE; 3742 } 3743 } 3744 3745 return 0; 3746 // could not allocate a block 3747 } 3748 3749 3750 /*! This one uses the kernel_args' physical and virtual memory ranges to 3751 allocate some pages before the VM is completely up. 3752 */ 3753 addr_t 3754 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3755 uint32 attributes, addr_t alignment) 3756 { 3757 if (physicalSize > virtualSize) 3758 physicalSize = virtualSize; 3759 3760 // find the vaddr to allocate at 3761 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 3762 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 3763 3764 // map the pages 3765 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3766 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 3767 if (physicalAddress == 0) 3768 panic("error allocating early page!\n"); 3769 3770 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3771 3772 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3773 physicalAddress * B_PAGE_SIZE, attributes, 3774 &vm_allocate_early_physical_page); 3775 } 3776 3777 return virtualBase; 3778 } 3779 3780 3781 /*! The main entrance point to initialize the VM. */ 3782 status_t 3783 vm_init(kernel_args* args) 3784 { 3785 struct preloaded_image* image; 3786 void* address; 3787 status_t err = 0; 3788 uint32 i; 3789 3790 TRACE(("vm_init: entry\n")); 3791 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3792 err = arch_vm_init(args); 3793 3794 // initialize some globals 3795 vm_page_init_num_pages(args); 3796 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3797 3798 slab_init(args); 3799 3800 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3801 size_t heapSize = INITIAL_HEAP_SIZE; 3802 // try to accomodate low memory systems 3803 while (heapSize > sAvailableMemory / 8) 3804 heapSize /= 2; 3805 if (heapSize < 1024 * 1024) 3806 panic("vm_init: go buy some RAM please."); 3807 3808 // map in the new heap and initialize it 3809 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3810 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3811 TRACE(("heap at 0x%lx\n", heapBase)); 3812 heap_init(heapBase, heapSize); 3813 #endif 3814 3815 // initialize the free page list and physical page mapper 3816 vm_page_init(args); 3817 3818 // initialize the cache allocators 3819 vm_cache_init(args); 3820 3821 { 3822 status_t error = VMAreaHash::Init(); 3823 if (error != B_OK) 3824 panic("vm_init: error initializing area hash table\n"); 3825 } 3826 3827 VMAddressSpace::Init(); 3828 reserve_boot_loader_ranges(args); 3829 3830 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3831 heap_init_post_area(); 3832 #endif 3833 3834 // Do any further initialization that the architecture dependant layers may 3835 // need now 3836 arch_vm_translation_map_init_post_area(args); 3837 arch_vm_init_post_area(args); 3838 vm_page_init_post_area(args); 3839 slab_init_post_area(); 3840 3841 // allocate areas to represent stuff that already exists 3842 3843 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3844 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3845 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3846 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3847 #endif 3848 3849 allocate_kernel_args(args); 3850 3851 create_preloaded_image_areas(args->kernel_image); 3852 3853 // allocate areas for preloaded images 3854 for (image = args->preloaded_images; image != NULL; image = image->next) 3855 create_preloaded_image_areas(image); 3856 3857 // allocate kernel stacks 3858 for (i = 0; i < args->num_cpus; i++) { 3859 char name[64]; 3860 3861 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 3862 address = (void*)args->cpu_kstack[i].start; 3863 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3864 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3865 } 3866 3867 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3868 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3869 3870 #if PARANOID_KERNEL_MALLOC 3871 vm_block_address_range("uninitialized heap memory", 3872 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3873 #endif 3874 #if PARANOID_KERNEL_FREE 3875 vm_block_address_range("freed heap memory", 3876 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3877 #endif 3878 3879 // create the object cache for the page mappings 3880 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3881 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3882 NULL, NULL); 3883 if (gPageMappingsObjectCache == NULL) 3884 panic("failed to create page mappings object cache"); 3885 3886 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3887 3888 #if DEBUG_CACHE_LIST 3889 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 3890 virtual_address_restrictions virtualRestrictions = {}; 3891 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 3892 physical_address_restrictions physicalRestrictions = {}; 3893 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 3894 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3895 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 3896 CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions, 3897 (void**)&sCacheInfoTable); 3898 } 3899 #endif // DEBUG_CACHE_LIST 3900 3901 // add some debugger commands 3902 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3903 add_debugger_command("area", &dump_area, 3904 "Dump info about a particular area"); 3905 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3906 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3907 #if DEBUG_CACHE_LIST 3908 if (sCacheInfoTable != NULL) { 3909 add_debugger_command_etc("caches", &dump_caches, 3910 "List all VMCache trees", 3911 "[ \"-c\" ]\n" 3912 "All cache trees are listed sorted in decreasing order by number " 3913 "of\n" 3914 "used pages or, if \"-c\" is specified, by size of committed " 3915 "memory.\n", 3916 0); 3917 } 3918 #endif 3919 add_debugger_command("avail", &dump_available_memory, 3920 "Dump available memory"); 3921 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3922 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3923 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3924 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3925 add_debugger_command("string", &display_mem, "dump strings"); 3926 3927 TRACE(("vm_init: exit\n")); 3928 3929 vm_cache_init_post_heap(); 3930 3931 return err; 3932 } 3933 3934 3935 status_t 3936 vm_init_post_sem(kernel_args* args) 3937 { 3938 // This frees all unused boot loader resources and makes its space available 3939 // again 3940 arch_vm_init_end(args); 3941 unreserve_boot_loader_ranges(args); 3942 3943 // fill in all of the semaphores that were not allocated before 3944 // since we're still single threaded and only the kernel address space 3945 // exists, it isn't that hard to find all of the ones we need to create 3946 3947 arch_vm_translation_map_init_post_sem(args); 3948 3949 slab_init_post_sem(); 3950 3951 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3952 heap_init_post_sem(); 3953 #endif 3954 3955 return B_OK; 3956 } 3957 3958 3959 status_t 3960 vm_init_post_thread(kernel_args* args) 3961 { 3962 vm_page_init_post_thread(args); 3963 slab_init_post_thread(); 3964 return heap_init_post_thread(); 3965 } 3966 3967 3968 status_t 3969 vm_init_post_modules(kernel_args* args) 3970 { 3971 return arch_vm_init_post_modules(args); 3972 } 3973 3974 3975 void 3976 permit_page_faults(void) 3977 { 3978 Thread* thread = thread_get_current_thread(); 3979 if (thread != NULL) 3980 atomic_add(&thread->page_faults_allowed, 1); 3981 } 3982 3983 3984 void 3985 forbid_page_faults(void) 3986 { 3987 Thread* thread = thread_get_current_thread(); 3988 if (thread != NULL) 3989 atomic_add(&thread->page_faults_allowed, -1); 3990 } 3991 3992 3993 status_t 3994 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3995 addr_t* newIP) 3996 { 3997 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3998 faultAddress)); 3999 4000 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4001 4002 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4003 VMAddressSpace* addressSpace = NULL; 4004 4005 status_t status = B_OK; 4006 *newIP = 0; 4007 atomic_add((int32*)&sPageFaults, 1); 4008 4009 if (IS_KERNEL_ADDRESS(pageAddress)) { 4010 addressSpace = VMAddressSpace::GetKernel(); 4011 } else if (IS_USER_ADDRESS(pageAddress)) { 4012 addressSpace = VMAddressSpace::GetCurrent(); 4013 if (addressSpace == NULL) { 4014 if (!isUser) { 4015 dprintf("vm_page_fault: kernel thread accessing invalid user " 4016 "memory!\n"); 4017 status = B_BAD_ADDRESS; 4018 TPF(PageFaultError(-1, 4019 VMPageFaultTracing 4020 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4021 } else { 4022 // XXX weird state. 4023 panic("vm_page_fault: non kernel thread accessing user memory " 4024 "that doesn't exist!\n"); 4025 status = B_BAD_ADDRESS; 4026 } 4027 } 4028 } else { 4029 // the hit was probably in the 64k DMZ between kernel and user space 4030 // this keeps a user space thread from passing a buffer that crosses 4031 // into kernel space 4032 status = B_BAD_ADDRESS; 4033 TPF(PageFaultError(-1, 4034 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4035 } 4036 4037 if (status == B_OK) { 4038 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser, 4039 NULL); 4040 } 4041 4042 if (status < B_OK) { 4043 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4044 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n", 4045 strerror(status), address, faultAddress, isWrite, isUser, 4046 thread_get_current_thread_id()); 4047 if (!isUser) { 4048 Thread* thread = thread_get_current_thread(); 4049 if (thread != NULL && thread->fault_handler != 0) { 4050 // this will cause the arch dependant page fault handler to 4051 // modify the IP on the interrupt frame or whatever to return 4052 // to this address 4053 *newIP = thread->fault_handler; 4054 } else { 4055 // unhandled page fault in the kernel 4056 panic("vm_page_fault: unhandled page fault in kernel space at " 4057 "0x%lx, ip 0x%lx\n", address, faultAddress); 4058 } 4059 } else { 4060 #if 1 4061 // TODO: remove me once we have proper userland debugging support 4062 // (and tools) 4063 VMArea* area = NULL; 4064 if (addressSpace != NULL) { 4065 addressSpace->ReadLock(); 4066 area = addressSpace->LookupArea(faultAddress); 4067 } 4068 4069 Thread* thread = thread_get_current_thread(); 4070 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4071 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4072 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4073 thread->team->Name(), thread->team->id, 4074 isWrite ? "write" : "read", address, faultAddress, 4075 area ? area->name : "???", faultAddress - (area ? 4076 area->Base() : 0x0)); 4077 4078 // We can print a stack trace of the userland thread here. 4079 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4080 // fault and someone is already waiting for a write lock on the same address 4081 // space. This thread will then try to acquire the lock again and will 4082 // be queued after the writer. 4083 # if 0 4084 if (area) { 4085 struct stack_frame { 4086 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4087 struct stack_frame* previous; 4088 void* return_address; 4089 #else 4090 // ... 4091 #warning writeme 4092 #endif 4093 } frame; 4094 # ifdef __INTEL__ 4095 struct iframe* iframe = x86_get_user_iframe(); 4096 if (iframe == NULL) 4097 panic("iframe is NULL!"); 4098 4099 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4100 sizeof(struct stack_frame)); 4101 # elif defined(__POWERPC__) 4102 struct iframe* iframe = ppc_get_user_iframe(); 4103 if (iframe == NULL) 4104 panic("iframe is NULL!"); 4105 4106 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4107 sizeof(struct stack_frame)); 4108 # else 4109 # warning "vm_page_fault() stack trace won't work" 4110 status = B_ERROR; 4111 # endif 4112 4113 dprintf("stack trace:\n"); 4114 int32 maxFrames = 50; 4115 while (status == B_OK && --maxFrames >= 0 4116 && frame.return_address != NULL) { 4117 dprintf(" %p", frame.return_address); 4118 area = addressSpace->LookupArea( 4119 (addr_t)frame.return_address); 4120 if (area) { 4121 dprintf(" (%s + %#lx)", area->name, 4122 (addr_t)frame.return_address - area->Base()); 4123 } 4124 dprintf("\n"); 4125 4126 status = user_memcpy(&frame, frame.previous, 4127 sizeof(struct stack_frame)); 4128 } 4129 } 4130 # endif // 0 (stack trace) 4131 4132 if (addressSpace != NULL) 4133 addressSpace->ReadUnlock(); 4134 #endif 4135 4136 // If the thread has a signal handler for SIGSEGV, we simply 4137 // send it the signal. Otherwise we notify the user debugger 4138 // first. 4139 struct sigaction action; 4140 if ((sigaction(SIGSEGV, NULL, &action) == 0 4141 && action.sa_handler != SIG_DFL 4142 && action.sa_handler != SIG_IGN) 4143 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4144 SIGSEGV)) { 4145 Signal signal(SIGSEGV, 4146 status == B_PERMISSION_DENIED 4147 ? SEGV_ACCERR : SEGV_MAPERR, 4148 EFAULT, thread->team->id); 4149 signal.SetAddress((void*)address); 4150 send_signal_to_thread(thread, signal, 0); 4151 } 4152 } 4153 } 4154 4155 if (addressSpace != NULL) 4156 addressSpace->Put(); 4157 4158 return B_HANDLED_INTERRUPT; 4159 } 4160 4161 4162 struct PageFaultContext { 4163 AddressSpaceReadLocker addressSpaceLocker; 4164 VMCacheChainLocker cacheChainLocker; 4165 4166 VMTranslationMap* map; 4167 VMCache* topCache; 4168 off_t cacheOffset; 4169 vm_page_reservation reservation; 4170 bool isWrite; 4171 4172 // return values 4173 vm_page* page; 4174 bool restart; 4175 4176 4177 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4178 : 4179 addressSpaceLocker(addressSpace, true), 4180 map(addressSpace->TranslationMap()), 4181 isWrite(isWrite) 4182 { 4183 } 4184 4185 ~PageFaultContext() 4186 { 4187 UnlockAll(); 4188 vm_page_unreserve_pages(&reservation); 4189 } 4190 4191 void Prepare(VMCache* topCache, off_t cacheOffset) 4192 { 4193 this->topCache = topCache; 4194 this->cacheOffset = cacheOffset; 4195 page = NULL; 4196 restart = false; 4197 4198 cacheChainLocker.SetTo(topCache); 4199 } 4200 4201 void UnlockAll(VMCache* exceptCache = NULL) 4202 { 4203 topCache = NULL; 4204 addressSpaceLocker.Unlock(); 4205 cacheChainLocker.Unlock(exceptCache); 4206 } 4207 }; 4208 4209 4210 /*! Gets the page that should be mapped into the area. 4211 Returns an error code other than \c B_OK, if the page couldn't be found or 4212 paged in. The locking state of the address space and the caches is undefined 4213 in that case. 4214 Returns \c B_OK with \c context.restart set to \c true, if the functions 4215 had to unlock the address space and all caches and is supposed to be called 4216 again. 4217 Returns \c B_OK with \c context.restart set to \c false, if the page was 4218 found. It is returned in \c context.page. The address space will still be 4219 locked as well as all caches starting from the top cache to at least the 4220 cache the page lives in. 4221 */ 4222 static status_t 4223 fault_get_page(PageFaultContext& context) 4224 { 4225 VMCache* cache = context.topCache; 4226 VMCache* lastCache = NULL; 4227 vm_page* page = NULL; 4228 4229 while (cache != NULL) { 4230 // We already hold the lock of the cache at this point. 4231 4232 lastCache = cache; 4233 4234 page = cache->LookupPage(context.cacheOffset); 4235 if (page != NULL && page->busy) { 4236 // page must be busy -- wait for it to become unbusy 4237 context.UnlockAll(cache); 4238 cache->ReleaseRefLocked(); 4239 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4240 4241 // restart the whole process 4242 context.restart = true; 4243 return B_OK; 4244 } 4245 4246 if (page != NULL) 4247 break; 4248 4249 // The current cache does not contain the page we're looking for. 4250 4251 // see if the backing store has it 4252 if (cache->HasPage(context.cacheOffset)) { 4253 // insert a fresh page and mark it busy -- we're going to read it in 4254 page = vm_page_allocate_page(&context.reservation, 4255 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4256 cache->InsertPage(page, context.cacheOffset); 4257 4258 // We need to unlock all caches and the address space while reading 4259 // the page in. Keep a reference to the cache around. 4260 cache->AcquireRefLocked(); 4261 context.UnlockAll(); 4262 4263 // read the page in 4264 generic_io_vec vec; 4265 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4266 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4267 4268 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4269 B_PHYSICAL_IO_REQUEST, &bytesRead); 4270 4271 cache->Lock(); 4272 4273 if (status < B_OK) { 4274 // on error remove and free the page 4275 dprintf("reading page from cache %p returned: %s!\n", 4276 cache, strerror(status)); 4277 4278 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4279 cache->RemovePage(page); 4280 vm_page_set_state(page, PAGE_STATE_FREE); 4281 4282 cache->ReleaseRefAndUnlock(); 4283 return status; 4284 } 4285 4286 // mark the page unbusy again 4287 cache->MarkPageUnbusy(page); 4288 4289 DEBUG_PAGE_ACCESS_END(page); 4290 4291 // Since we needed to unlock everything temporarily, the area 4292 // situation might have changed. So we need to restart the whole 4293 // process. 4294 cache->ReleaseRefAndUnlock(); 4295 context.restart = true; 4296 return B_OK; 4297 } 4298 4299 cache = context.cacheChainLocker.LockSourceCache(); 4300 } 4301 4302 if (page == NULL) { 4303 // There was no adequate page, determine the cache for a clean one. 4304 // Read-only pages come in the deepest cache, only the top most cache 4305 // may have direct write access. 4306 cache = context.isWrite ? context.topCache : lastCache; 4307 4308 // allocate a clean page 4309 page = vm_page_allocate_page(&context.reservation, 4310 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4311 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4312 page->physical_page_number)); 4313 4314 // insert the new page into our cache 4315 cache->InsertPage(page, context.cacheOffset); 4316 } else if (page->Cache() != context.topCache && context.isWrite) { 4317 // We have a page that has the data we want, but in the wrong cache 4318 // object so we need to copy it and stick it into the top cache. 4319 vm_page* sourcePage = page; 4320 4321 // TODO: If memory is low, it might be a good idea to steal the page 4322 // from our source cache -- if possible, that is. 4323 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4324 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4325 4326 // To not needlessly kill concurrency we unlock all caches but the top 4327 // one while copying the page. Lacking another mechanism to ensure that 4328 // the source page doesn't disappear, we mark it busy. 4329 sourcePage->busy = true; 4330 context.cacheChainLocker.UnlockKeepRefs(true); 4331 4332 // copy the page 4333 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4334 sourcePage->physical_page_number * B_PAGE_SIZE); 4335 4336 context.cacheChainLocker.RelockCaches(true); 4337 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4338 4339 // insert the new page into our cache 4340 context.topCache->InsertPage(page, context.cacheOffset); 4341 } else 4342 DEBUG_PAGE_ACCESS_START(page); 4343 4344 context.page = page; 4345 return B_OK; 4346 } 4347 4348 4349 /*! Makes sure the address in the given address space is mapped. 4350 4351 \param addressSpace The address space. 4352 \param originalAddress The address. Doesn't need to be page aligned. 4353 \param isWrite If \c true the address shall be write-accessible. 4354 \param isUser If \c true the access is requested by a userland team. 4355 \param wirePage On success, if non \c NULL, the wired count of the page 4356 mapped at the given address is incremented and the page is returned 4357 via this parameter. 4358 \param wiredRange If given, this wiredRange is ignored when checking whether 4359 an already mapped page at the virtual address can be unmapped. 4360 \return \c B_OK on success, another error code otherwise. 4361 */ 4362 static status_t 4363 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4364 bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange) 4365 { 4366 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4367 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4368 originalAddress, isWrite, isUser)); 4369 4370 PageFaultContext context(addressSpace, isWrite); 4371 4372 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4373 status_t status = B_OK; 4374 4375 addressSpace->IncrementFaultCount(); 4376 4377 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4378 // the pages upfront makes sure we don't have any cache locked, so that the 4379 // page daemon/thief can do their job without problems. 4380 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4381 originalAddress); 4382 context.addressSpaceLocker.Unlock(); 4383 vm_page_reserve_pages(&context.reservation, reservePages, 4384 addressSpace == VMAddressSpace::Kernel() 4385 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4386 4387 while (true) { 4388 context.addressSpaceLocker.Lock(); 4389 4390 // get the area the fault was in 4391 VMArea* area = addressSpace->LookupArea(address); 4392 if (area == NULL) { 4393 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4394 "space\n", originalAddress); 4395 TPF(PageFaultError(-1, 4396 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4397 status = B_BAD_ADDRESS; 4398 break; 4399 } 4400 4401 // check permissions 4402 uint32 protection = get_area_page_protection(area, address); 4403 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4404 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4405 area->id, (void*)originalAddress); 4406 TPF(PageFaultError(area->id, 4407 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4408 status = B_PERMISSION_DENIED; 4409 break; 4410 } 4411 if (isWrite && (protection 4412 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4413 dprintf("write access attempted on write-protected area 0x%" 4414 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4415 TPF(PageFaultError(area->id, 4416 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4417 status = B_PERMISSION_DENIED; 4418 break; 4419 } else if (!isWrite && (protection 4420 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4421 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4422 " at %p\n", area->id, (void*)originalAddress); 4423 TPF(PageFaultError(area->id, 4424 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4425 status = B_PERMISSION_DENIED; 4426 break; 4427 } 4428 4429 // We have the area, it was a valid access, so let's try to resolve the 4430 // page fault now. 4431 // At first, the top most cache from the area is investigated. 4432 4433 context.Prepare(vm_area_get_locked_cache(area), 4434 address - area->Base() + area->cache_offset); 4435 4436 // See if this cache has a fault handler -- this will do all the work 4437 // for us. 4438 { 4439 // Note, since the page fault is resolved with interrupts enabled, 4440 // the fault handler could be called more than once for the same 4441 // reason -- the store must take this into account. 4442 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4443 if (status != B_BAD_HANDLER) 4444 break; 4445 } 4446 4447 // The top most cache has no fault handler, so let's see if the cache or 4448 // its sources already have the page we're searching for (we're going 4449 // from top to bottom). 4450 status = fault_get_page(context); 4451 if (status != B_OK) { 4452 TPF(PageFaultError(area->id, status)); 4453 break; 4454 } 4455 4456 if (context.restart) 4457 continue; 4458 4459 // All went fine, all there is left to do is to map the page into the 4460 // address space. 4461 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4462 context.page)); 4463 4464 // If the page doesn't reside in the area's cache, we need to make sure 4465 // it's mapped in read-only, so that we cannot overwrite someone else's 4466 // data (copy-on-write) 4467 uint32 newProtection = protection; 4468 if (context.page->Cache() != context.topCache && !isWrite) 4469 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4470 4471 bool unmapPage = false; 4472 bool mapPage = true; 4473 4474 // check whether there's already a page mapped at the address 4475 context.map->Lock(); 4476 4477 phys_addr_t physicalAddress; 4478 uint32 flags; 4479 vm_page* mappedPage = NULL; 4480 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4481 && (flags & PAGE_PRESENT) != 0 4482 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4483 != NULL) { 4484 // Yep there's already a page. If it's ours, we can simply adjust 4485 // its protection. Otherwise we have to unmap it. 4486 if (mappedPage == context.page) { 4487 context.map->ProtectPage(area, address, newProtection); 4488 // Note: We assume that ProtectPage() is atomic (i.e. 4489 // the page isn't temporarily unmapped), otherwise we'd have 4490 // to make sure it isn't wired. 4491 mapPage = false; 4492 } else 4493 unmapPage = true; 4494 } 4495 4496 context.map->Unlock(); 4497 4498 if (unmapPage) { 4499 // If the page is wired, we can't unmap it. Wait until it is unwired 4500 // again and restart. 4501 VMAreaUnwiredWaiter waiter; 4502 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4503 wiredRange)) { 4504 // unlock everything and wait 4505 context.UnlockAll(); 4506 waiter.waitEntry.Wait(); 4507 continue; 4508 } 4509 4510 // Note: The mapped page is a page of a lower cache. We are 4511 // guaranteed to have that cached locked, our new page is a copy of 4512 // that page, and the page is not busy. The logic for that guarantee 4513 // is as follows: Since the page is mapped, it must live in the top 4514 // cache (ruled out above) or any of its lower caches, and there is 4515 // (was before the new page was inserted) no other page in any 4516 // cache between the top cache and the page's cache (otherwise that 4517 // would be mapped instead). That in turn means that our algorithm 4518 // must have found it and therefore it cannot be busy either. 4519 DEBUG_PAGE_ACCESS_START(mappedPage); 4520 unmap_page(area, address); 4521 DEBUG_PAGE_ACCESS_END(mappedPage); 4522 } 4523 4524 if (mapPage) { 4525 if (map_page(area, context.page, address, newProtection, 4526 &context.reservation) != B_OK) { 4527 // Mapping can only fail, when the page mapping object couldn't 4528 // be allocated. Save for the missing mapping everything is 4529 // fine, though. If this was a regular page fault, we'll simply 4530 // leave and probably fault again. To make sure we'll have more 4531 // luck then, we ensure that the minimum object reserve is 4532 // available. 4533 DEBUG_PAGE_ACCESS_END(context.page); 4534 4535 context.UnlockAll(); 4536 4537 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4538 != B_OK) { 4539 // Apparently the situation is serious. Let's get ourselves 4540 // killed. 4541 status = B_NO_MEMORY; 4542 } else if (wirePage != NULL) { 4543 // The caller expects us to wire the page. Since 4544 // object_cache_reserve() succeeded, we should now be able 4545 // to allocate a mapping structure. Restart. 4546 continue; 4547 } 4548 4549 break; 4550 } 4551 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4552 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4553 4554 // also wire the page, if requested 4555 if (wirePage != NULL && status == B_OK) { 4556 increment_page_wired_count(context.page); 4557 *wirePage = context.page; 4558 } 4559 4560 DEBUG_PAGE_ACCESS_END(context.page); 4561 4562 break; 4563 } 4564 4565 return status; 4566 } 4567 4568 4569 status_t 4570 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4571 { 4572 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4573 } 4574 4575 status_t 4576 vm_put_physical_page(addr_t vaddr, void* handle) 4577 { 4578 return sPhysicalPageMapper->PutPage(vaddr, handle); 4579 } 4580 4581 4582 status_t 4583 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4584 void** _handle) 4585 { 4586 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4587 } 4588 4589 status_t 4590 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4591 { 4592 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4593 } 4594 4595 4596 status_t 4597 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4598 { 4599 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4600 } 4601 4602 status_t 4603 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4604 { 4605 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4606 } 4607 4608 4609 void 4610 vm_get_info(system_memory_info* info) 4611 { 4612 swap_get_info(info); 4613 4614 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4615 info->page_faults = sPageFaults; 4616 4617 MutexLocker locker(sAvailableMemoryLock); 4618 info->free_memory = sAvailableMemory; 4619 info->needed_memory = sNeededMemory; 4620 } 4621 4622 4623 uint32 4624 vm_num_page_faults(void) 4625 { 4626 return sPageFaults; 4627 } 4628 4629 4630 off_t 4631 vm_available_memory(void) 4632 { 4633 MutexLocker locker(sAvailableMemoryLock); 4634 return sAvailableMemory; 4635 } 4636 4637 4638 off_t 4639 vm_available_not_needed_memory(void) 4640 { 4641 MutexLocker locker(sAvailableMemoryLock); 4642 return sAvailableMemory - sNeededMemory; 4643 } 4644 4645 4646 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4647 debugger. 4648 */ 4649 off_t 4650 vm_available_not_needed_memory_debug(void) 4651 { 4652 return sAvailableMemory - sNeededMemory; 4653 } 4654 4655 4656 size_t 4657 vm_kernel_address_space_left(void) 4658 { 4659 return VMAddressSpace::Kernel()->FreeSpace(); 4660 } 4661 4662 4663 void 4664 vm_unreserve_memory(size_t amount) 4665 { 4666 mutex_lock(&sAvailableMemoryLock); 4667 4668 sAvailableMemory += amount; 4669 4670 mutex_unlock(&sAvailableMemoryLock); 4671 } 4672 4673 4674 status_t 4675 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4676 { 4677 size_t reserve = kMemoryReserveForPriority[priority]; 4678 4679 MutexLocker locker(sAvailableMemoryLock); 4680 4681 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4682 4683 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4684 sAvailableMemory -= amount; 4685 return B_OK; 4686 } 4687 4688 if (timeout <= 0) 4689 return B_NO_MEMORY; 4690 4691 // turn timeout into an absolute timeout 4692 timeout += system_time(); 4693 4694 // loop until we've got the memory or the timeout occurs 4695 do { 4696 sNeededMemory += amount; 4697 4698 // call the low resource manager 4699 locker.Unlock(); 4700 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4701 B_ABSOLUTE_TIMEOUT, timeout); 4702 locker.Lock(); 4703 4704 sNeededMemory -= amount; 4705 4706 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4707 sAvailableMemory -= amount; 4708 return B_OK; 4709 } 4710 } while (timeout > system_time()); 4711 4712 return B_NO_MEMORY; 4713 } 4714 4715 4716 status_t 4717 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 4718 { 4719 // NOTE: The caller is responsible for synchronizing calls to this function! 4720 4721 AddressSpaceReadLocker locker; 4722 VMArea* area; 4723 status_t status = locker.SetFromArea(id, area); 4724 if (status != B_OK) 4725 return status; 4726 4727 // nothing to do, if the type doesn't change 4728 uint32 oldType = area->MemoryType(); 4729 if (type == oldType) 4730 return B_OK; 4731 4732 // set the memory type of the area and the mapped pages 4733 VMTranslationMap* map = area->address_space->TranslationMap(); 4734 map->Lock(); 4735 area->SetMemoryType(type); 4736 map->ProtectArea(area, area->protection); 4737 map->Unlock(); 4738 4739 // set the physical memory type 4740 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 4741 if (error != B_OK) { 4742 // reset the memory type of the area and the mapped pages 4743 map->Lock(); 4744 area->SetMemoryType(oldType); 4745 map->ProtectArea(area, area->protection); 4746 map->Unlock(); 4747 return error; 4748 } 4749 4750 return B_OK; 4751 4752 } 4753 4754 4755 /*! This function enforces some protection properties: 4756 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 4757 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4758 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4759 and B_KERNEL_WRITE_AREA. 4760 */ 4761 static void 4762 fix_protection(uint32* protection) 4763 { 4764 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4765 if ((*protection & B_USER_PROTECTION) == 0 4766 || (*protection & B_WRITE_AREA) != 0) 4767 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4768 else 4769 *protection |= B_KERNEL_READ_AREA; 4770 } 4771 } 4772 4773 4774 static void 4775 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4776 { 4777 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4778 info->area = area->id; 4779 info->address = (void*)area->Base(); 4780 info->size = area->Size(); 4781 info->protection = area->protection; 4782 info->lock = B_FULL_LOCK; 4783 info->team = area->address_space->ID(); 4784 info->copy_count = 0; 4785 info->in_count = 0; 4786 info->out_count = 0; 4787 // TODO: retrieve real values here! 4788 4789 VMCache* cache = vm_area_get_locked_cache(area); 4790 4791 // Note, this is a simplification; the cache could be larger than this area 4792 info->ram_size = cache->page_count * B_PAGE_SIZE; 4793 4794 vm_area_put_locked_cache(cache); 4795 } 4796 4797 4798 static status_t 4799 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4800 { 4801 // is newSize a multiple of B_PAGE_SIZE? 4802 if (newSize & (B_PAGE_SIZE - 1)) 4803 return B_BAD_VALUE; 4804 4805 // lock all affected address spaces and the cache 4806 VMArea* area; 4807 VMCache* cache; 4808 4809 MultiAddressSpaceLocker locker; 4810 AreaCacheLocker cacheLocker; 4811 4812 status_t status; 4813 size_t oldSize; 4814 bool anyKernelArea; 4815 bool restart; 4816 4817 do { 4818 anyKernelArea = false; 4819 restart = false; 4820 4821 locker.Unset(); 4822 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4823 if (status != B_OK) 4824 return status; 4825 cacheLocker.SetTo(cache, true); // already locked 4826 4827 // enforce restrictions 4828 if (!kernel) { 4829 if ((area->protection & B_KERNEL_AREA) != 0) 4830 return B_NOT_ALLOWED; 4831 // TODO: Enforce all restrictions (team, etc.)! 4832 } 4833 4834 oldSize = area->Size(); 4835 if (newSize == oldSize) 4836 return B_OK; 4837 4838 if (cache->type != CACHE_TYPE_RAM) 4839 return B_NOT_ALLOWED; 4840 4841 if (oldSize < newSize) { 4842 // We need to check if all areas of this cache can be resized. 4843 for (VMArea* current = cache->areas; current != NULL; 4844 current = current->cache_next) { 4845 if (!current->address_space->CanResizeArea(current, newSize)) 4846 return B_ERROR; 4847 anyKernelArea 4848 |= current->address_space == VMAddressSpace::Kernel(); 4849 } 4850 } else { 4851 // We're shrinking the areas, so we must make sure the affected 4852 // ranges are not wired. 4853 for (VMArea* current = cache->areas; current != NULL; 4854 current = current->cache_next) { 4855 anyKernelArea 4856 |= current->address_space == VMAddressSpace::Kernel(); 4857 4858 if (wait_if_area_range_is_wired(current, 4859 current->Base() + newSize, oldSize - newSize, &locker, 4860 &cacheLocker)) { 4861 restart = true; 4862 break; 4863 } 4864 } 4865 } 4866 } while (restart); 4867 4868 // Okay, looks good so far, so let's do it 4869 4870 int priority = kernel && anyKernelArea 4871 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4872 uint32 allocationFlags = kernel && anyKernelArea 4873 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4874 4875 if (oldSize < newSize) { 4876 // Growing the cache can fail, so we do it first. 4877 status = cache->Resize(cache->virtual_base + newSize, priority); 4878 if (status != B_OK) 4879 return status; 4880 } 4881 4882 for (VMArea* current = cache->areas; current != NULL; 4883 current = current->cache_next) { 4884 status = current->address_space->ResizeArea(current, newSize, 4885 allocationFlags); 4886 if (status != B_OK) 4887 break; 4888 4889 // We also need to unmap all pages beyond the new size, if the area has 4890 // shrunk 4891 if (newSize < oldSize) { 4892 VMCacheChainLocker cacheChainLocker(cache); 4893 cacheChainLocker.LockAllSourceCaches(); 4894 4895 unmap_pages(current, current->Base() + newSize, 4896 oldSize - newSize); 4897 4898 cacheChainLocker.Unlock(cache); 4899 } 4900 } 4901 4902 if (status == B_OK) { 4903 // Shrink or grow individual page protections if in use. 4904 if (area->page_protections != NULL) { 4905 uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2; 4906 uint8* newProtections 4907 = (uint8*)realloc(area->page_protections, bytes); 4908 if (newProtections == NULL) 4909 status = B_NO_MEMORY; 4910 else { 4911 area->page_protections = newProtections; 4912 4913 if (oldSize < newSize) { 4914 // init the additional page protections to that of the area 4915 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 4916 uint32 areaProtection = area->protection 4917 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 4918 memset(area->page_protections + offset, 4919 areaProtection | (areaProtection << 4), bytes - offset); 4920 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 4921 uint8& entry = area->page_protections[offset - 1]; 4922 entry = (entry & 0x0f) | (areaProtection << 4); 4923 } 4924 } 4925 } 4926 } 4927 } 4928 4929 // shrinking the cache can't fail, so we do it now 4930 if (status == B_OK && newSize < oldSize) 4931 status = cache->Resize(cache->virtual_base + newSize, priority); 4932 4933 if (status != B_OK) { 4934 // Something failed -- resize the areas back to their original size. 4935 // This can fail, too, in which case we're seriously screwed. 4936 for (VMArea* current = cache->areas; current != NULL; 4937 current = current->cache_next) { 4938 if (current->address_space->ResizeArea(current, oldSize, 4939 allocationFlags) != B_OK) { 4940 panic("vm_resize_area(): Failed and not being able to restore " 4941 "original state."); 4942 } 4943 } 4944 4945 cache->Resize(cache->virtual_base + oldSize, priority); 4946 } 4947 4948 // TODO: we must honour the lock restrictions of this area 4949 return status; 4950 } 4951 4952 4953 status_t 4954 vm_memset_physical(phys_addr_t address, int value, size_t length) 4955 { 4956 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4957 } 4958 4959 4960 status_t 4961 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 4962 { 4963 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4964 } 4965 4966 4967 status_t 4968 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 4969 bool user) 4970 { 4971 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4972 } 4973 4974 4975 void 4976 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 4977 { 4978 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4979 } 4980 4981 4982 /*! Copies a range of memory directly from/to a page that might not be mapped 4983 at the moment. 4984 4985 For \a unsafeMemory the current mapping (if any is ignored). The function 4986 walks through the respective area's cache chain to find the physical page 4987 and copies from/to it directly. 4988 The memory range starting at \a unsafeMemory with a length of \a size bytes 4989 must not cross a page boundary. 4990 4991 \param teamID The team ID identifying the address space \a unsafeMemory is 4992 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 4993 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 4994 is passed, the address space of the thread returned by 4995 debug_get_debugged_thread() is used. 4996 \param unsafeMemory The start of the unsafe memory range to be copied 4997 from/to. 4998 \param buffer A safely accessible kernel buffer to be copied from/to. 4999 \param size The number of bytes to be copied. 5000 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5001 \a unsafeMemory, the other way around otherwise. 5002 */ 5003 status_t 5004 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5005 size_t size, bool copyToUnsafe) 5006 { 5007 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5008 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5009 return B_BAD_VALUE; 5010 } 5011 5012 // get the address space for the debugged thread 5013 VMAddressSpace* addressSpace; 5014 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5015 addressSpace = VMAddressSpace::Kernel(); 5016 } else if (teamID == B_CURRENT_TEAM) { 5017 Thread* thread = debug_get_debugged_thread(); 5018 if (thread == NULL || thread->team == NULL) 5019 return B_BAD_ADDRESS; 5020 5021 addressSpace = thread->team->address_space; 5022 } else 5023 addressSpace = VMAddressSpace::DebugGet(teamID); 5024 5025 if (addressSpace == NULL) 5026 return B_BAD_ADDRESS; 5027 5028 // get the area 5029 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5030 if (area == NULL) 5031 return B_BAD_ADDRESS; 5032 5033 // search the page 5034 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5035 + area->cache_offset; 5036 VMCache* cache = area->cache; 5037 vm_page* page = NULL; 5038 while (cache != NULL) { 5039 page = cache->DebugLookupPage(cacheOffset); 5040 if (page != NULL) 5041 break; 5042 5043 // Page not found in this cache -- if it is paged out, we must not try 5044 // to get it from lower caches. 5045 if (cache->DebugHasPage(cacheOffset)) 5046 break; 5047 5048 cache = cache->source; 5049 } 5050 5051 if (page == NULL) 5052 return B_UNSUPPORTED; 5053 5054 // copy from/to physical memory 5055 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5056 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5057 5058 if (copyToUnsafe) { 5059 if (page->Cache() != area->cache) 5060 return B_UNSUPPORTED; 5061 5062 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5063 } 5064 5065 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5066 } 5067 5068 5069 // #pragma mark - kernel public API 5070 5071 5072 status_t 5073 user_memcpy(void* to, const void* from, size_t size) 5074 { 5075 // don't allow address overflows 5076 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5077 return B_BAD_ADDRESS; 5078 5079 if (arch_cpu_user_memcpy(to, from, size, 5080 &thread_get_current_thread()->fault_handler) < B_OK) 5081 return B_BAD_ADDRESS; 5082 5083 return B_OK; 5084 } 5085 5086 5087 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5088 the string in \a to, NULL-terminating the result. 5089 5090 \param to Pointer to the destination C-string. 5091 \param from Pointer to the source C-string. 5092 \param size Size in bytes of the string buffer pointed to by \a to. 5093 5094 \return strlen(\a from). 5095 */ 5096 ssize_t 5097 user_strlcpy(char* to, const char* from, size_t size) 5098 { 5099 if (to == NULL && size != 0) 5100 return B_BAD_VALUE; 5101 if (from == NULL) 5102 return B_BAD_ADDRESS; 5103 5104 // limit size to avoid address overflows 5105 size_t maxSize = std::min(size, 5106 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5107 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5108 // the source address might still overflow. 5109 5110 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5111 &thread_get_current_thread()->fault_handler); 5112 5113 // If we hit the address overflow boundary, fail. 5114 if (result < 0 || (result >= 0 && (size_t)result >= maxSize 5115 && maxSize < size)) { 5116 return B_BAD_ADDRESS; 5117 } 5118 5119 return result; 5120 } 5121 5122 5123 status_t 5124 user_memset(void* s, char c, size_t count) 5125 { 5126 // don't allow address overflows 5127 if ((addr_t)s + count < (addr_t)s) 5128 return B_BAD_ADDRESS; 5129 5130 if (arch_cpu_user_memset(s, c, count, 5131 &thread_get_current_thread()->fault_handler) < B_OK) 5132 return B_BAD_ADDRESS; 5133 5134 return B_OK; 5135 } 5136 5137 5138 /*! Wires a single page at the given address. 5139 5140 \param team The team whose address space the address belongs to. Supports 5141 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5142 parameter is ignored. 5143 \param address address The virtual address to wire down. Does not need to 5144 be page aligned. 5145 \param writable If \c true the page shall be writable. 5146 \param info On success the info is filled in, among other things 5147 containing the physical address the given virtual one translates to. 5148 \return \c B_OK, when the page could be wired, another error code otherwise. 5149 */ 5150 status_t 5151 vm_wire_page(team_id team, addr_t address, bool writable, 5152 VMPageWiringInfo* info) 5153 { 5154 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5155 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5156 5157 // compute the page protection that is required 5158 bool isUser = IS_USER_ADDRESS(address); 5159 uint32 requiredProtection = PAGE_PRESENT 5160 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5161 if (writable) 5162 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5163 5164 // get and read lock the address space 5165 VMAddressSpace* addressSpace = NULL; 5166 if (isUser) { 5167 if (team == B_CURRENT_TEAM) 5168 addressSpace = VMAddressSpace::GetCurrent(); 5169 else 5170 addressSpace = VMAddressSpace::Get(team); 5171 } else 5172 addressSpace = VMAddressSpace::GetKernel(); 5173 if (addressSpace == NULL) 5174 return B_ERROR; 5175 5176 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5177 5178 VMTranslationMap* map = addressSpace->TranslationMap(); 5179 status_t error = B_OK; 5180 5181 // get the area 5182 VMArea* area = addressSpace->LookupArea(pageAddress); 5183 if (area == NULL) { 5184 addressSpace->Put(); 5185 return B_BAD_ADDRESS; 5186 } 5187 5188 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5189 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5190 5191 // mark the area range wired 5192 area->Wire(&info->range); 5193 5194 // Lock the area's cache chain and the translation map. Needed to look 5195 // up the page and play with its wired count. 5196 cacheChainLocker.LockAllSourceCaches(); 5197 map->Lock(); 5198 5199 phys_addr_t physicalAddress; 5200 uint32 flags; 5201 vm_page* page; 5202 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5203 && (flags & requiredProtection) == requiredProtection 5204 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5205 != NULL) { 5206 // Already mapped with the correct permissions -- just increment 5207 // the page's wired count. 5208 increment_page_wired_count(page); 5209 5210 map->Unlock(); 5211 cacheChainLocker.Unlock(); 5212 addressSpaceLocker.Unlock(); 5213 } else { 5214 // Let vm_soft_fault() map the page for us, if possible. We need 5215 // to fully unlock to avoid deadlocks. Since we have already 5216 // wired the area itself, nothing disturbing will happen with it 5217 // in the meantime. 5218 map->Unlock(); 5219 cacheChainLocker.Unlock(); 5220 addressSpaceLocker.Unlock(); 5221 5222 error = vm_soft_fault(addressSpace, pageAddress, writable, isUser, 5223 &page, &info->range); 5224 5225 if (error != B_OK) { 5226 // The page could not be mapped -- clean up. 5227 VMCache* cache = vm_area_get_locked_cache(area); 5228 area->Unwire(&info->range); 5229 cache->ReleaseRefAndUnlock(); 5230 addressSpace->Put(); 5231 return error; 5232 } 5233 } 5234 5235 info->physicalAddress 5236 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5237 + address % B_PAGE_SIZE; 5238 info->page = page; 5239 5240 return B_OK; 5241 } 5242 5243 5244 /*! Unwires a single page previously wired via vm_wire_page(). 5245 5246 \param info The same object passed to vm_wire_page() before. 5247 */ 5248 void 5249 vm_unwire_page(VMPageWiringInfo* info) 5250 { 5251 // lock the address space 5252 VMArea* area = info->range.area; 5253 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5254 // takes over our reference 5255 5256 // lock the top cache 5257 VMCache* cache = vm_area_get_locked_cache(area); 5258 VMCacheChainLocker cacheChainLocker(cache); 5259 5260 if (info->page->Cache() != cache) { 5261 // The page is not in the top cache, so we lock the whole cache chain 5262 // before touching the page's wired count. 5263 cacheChainLocker.LockAllSourceCaches(); 5264 } 5265 5266 decrement_page_wired_count(info->page); 5267 5268 // remove the wired range from the range 5269 area->Unwire(&info->range); 5270 5271 cacheChainLocker.Unlock(); 5272 } 5273 5274 5275 /*! Wires down the given address range in the specified team's address space. 5276 5277 If successful the function 5278 - acquires a reference to the specified team's address space, 5279 - adds respective wired ranges to all areas that intersect with the given 5280 address range, 5281 - makes sure all pages in the given address range are mapped with the 5282 requested access permissions and increments their wired count. 5283 5284 It fails, when \a team doesn't specify a valid address space, when any part 5285 of the specified address range is not covered by areas, when the concerned 5286 areas don't allow mapping with the requested permissions, or when mapping 5287 failed for another reason. 5288 5289 When successful the call must be balanced by a unlock_memory_etc() call with 5290 the exact same parameters. 5291 5292 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5293 supported. 5294 \param address The start of the address range to be wired. 5295 \param numBytes The size of the address range to be wired. 5296 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5297 requests that the range must be wired writable ("read from device 5298 into memory"). 5299 \return \c B_OK on success, another error code otherwise. 5300 */ 5301 status_t 5302 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5303 { 5304 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5305 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5306 5307 // compute the page protection that is required 5308 bool isUser = IS_USER_ADDRESS(address); 5309 bool writable = (flags & B_READ_DEVICE) == 0; 5310 uint32 requiredProtection = PAGE_PRESENT 5311 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5312 if (writable) 5313 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5314 5315 uint32 mallocFlags = isUser 5316 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5317 5318 // get and read lock the address space 5319 VMAddressSpace* addressSpace = NULL; 5320 if (isUser) { 5321 if (team == B_CURRENT_TEAM) 5322 addressSpace = VMAddressSpace::GetCurrent(); 5323 else 5324 addressSpace = VMAddressSpace::Get(team); 5325 } else 5326 addressSpace = VMAddressSpace::GetKernel(); 5327 if (addressSpace == NULL) 5328 return B_ERROR; 5329 5330 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5331 5332 VMTranslationMap* map = addressSpace->TranslationMap(); 5333 status_t error = B_OK; 5334 5335 // iterate through all concerned areas 5336 addr_t nextAddress = lockBaseAddress; 5337 while (nextAddress != lockEndAddress) { 5338 // get the next area 5339 VMArea* area = addressSpace->LookupArea(nextAddress); 5340 if (area == NULL) { 5341 error = B_BAD_ADDRESS; 5342 break; 5343 } 5344 5345 addr_t areaStart = nextAddress; 5346 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5347 5348 // allocate the wired range (do that before locking the cache to avoid 5349 // deadlocks) 5350 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5351 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5352 if (range == NULL) { 5353 error = B_NO_MEMORY; 5354 break; 5355 } 5356 5357 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5358 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5359 5360 // mark the area range wired 5361 area->Wire(range); 5362 5363 // Depending on the area cache type and the wiring, we may not need to 5364 // look at the individual pages. 5365 if (area->cache_type == CACHE_TYPE_NULL 5366 || area->cache_type == CACHE_TYPE_DEVICE 5367 || area->wiring == B_FULL_LOCK 5368 || area->wiring == B_CONTIGUOUS) { 5369 nextAddress = areaEnd; 5370 continue; 5371 } 5372 5373 // Lock the area's cache chain and the translation map. Needed to look 5374 // up pages and play with their wired count. 5375 cacheChainLocker.LockAllSourceCaches(); 5376 map->Lock(); 5377 5378 // iterate through the pages and wire them 5379 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5380 phys_addr_t physicalAddress; 5381 uint32 flags; 5382 5383 vm_page* page; 5384 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5385 && (flags & requiredProtection) == requiredProtection 5386 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5387 != NULL) { 5388 // Already mapped with the correct permissions -- just increment 5389 // the page's wired count. 5390 increment_page_wired_count(page); 5391 } else { 5392 // Let vm_soft_fault() map the page for us, if possible. We need 5393 // to fully unlock to avoid deadlocks. Since we have already 5394 // wired the area itself, nothing disturbing will happen with it 5395 // in the meantime. 5396 map->Unlock(); 5397 cacheChainLocker.Unlock(); 5398 addressSpaceLocker.Unlock(); 5399 5400 error = vm_soft_fault(addressSpace, nextAddress, writable, 5401 isUser, &page, range); 5402 5403 addressSpaceLocker.Lock(); 5404 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5405 cacheChainLocker.LockAllSourceCaches(); 5406 map->Lock(); 5407 } 5408 5409 if (error != B_OK) 5410 break; 5411 } 5412 5413 map->Unlock(); 5414 5415 if (error == B_OK) { 5416 cacheChainLocker.Unlock(); 5417 } else { 5418 // An error occurred, so abort right here. If the current address 5419 // is the first in this area, unwire the area, since we won't get 5420 // to it when reverting what we've done so far. 5421 if (nextAddress == areaStart) { 5422 area->Unwire(range); 5423 cacheChainLocker.Unlock(); 5424 range->~VMAreaWiredRange(); 5425 free_etc(range, mallocFlags); 5426 } else 5427 cacheChainLocker.Unlock(); 5428 5429 break; 5430 } 5431 } 5432 5433 if (error != B_OK) { 5434 // An error occurred, so unwire all that we've already wired. Note that 5435 // even if not a single page was wired, unlock_memory_etc() is called 5436 // to put the address space reference. 5437 addressSpaceLocker.Unlock(); 5438 unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress, 5439 flags); 5440 } 5441 5442 return error; 5443 } 5444 5445 5446 status_t 5447 lock_memory(void* address, size_t numBytes, uint32 flags) 5448 { 5449 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5450 } 5451 5452 5453 /*! Unwires an address range previously wired with lock_memory_etc(). 5454 5455 Note that a call to this function must balance a previous lock_memory_etc() 5456 call with exactly the same parameters. 5457 */ 5458 status_t 5459 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5460 { 5461 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5462 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5463 5464 // compute the page protection that is required 5465 bool isUser = IS_USER_ADDRESS(address); 5466 bool writable = (flags & B_READ_DEVICE) == 0; 5467 uint32 requiredProtection = PAGE_PRESENT 5468 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5469 if (writable) 5470 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5471 5472 uint32 mallocFlags = isUser 5473 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5474 5475 // get and read lock the address space 5476 VMAddressSpace* addressSpace = NULL; 5477 if (isUser) { 5478 if (team == B_CURRENT_TEAM) 5479 addressSpace = VMAddressSpace::GetCurrent(); 5480 else 5481 addressSpace = VMAddressSpace::Get(team); 5482 } else 5483 addressSpace = VMAddressSpace::GetKernel(); 5484 if (addressSpace == NULL) 5485 return B_ERROR; 5486 5487 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5488 5489 VMTranslationMap* map = addressSpace->TranslationMap(); 5490 status_t error = B_OK; 5491 5492 // iterate through all concerned areas 5493 addr_t nextAddress = lockBaseAddress; 5494 while (nextAddress != lockEndAddress) { 5495 // get the next area 5496 VMArea* area = addressSpace->LookupArea(nextAddress); 5497 if (area == NULL) { 5498 error = B_BAD_ADDRESS; 5499 break; 5500 } 5501 5502 addr_t areaStart = nextAddress; 5503 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5504 5505 // Lock the area's top cache. This is a requirement for 5506 // VMArea::Unwire(). 5507 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5508 5509 // Depending on the area cache type and the wiring, we may not need to 5510 // look at the individual pages. 5511 if (area->cache_type == CACHE_TYPE_NULL 5512 || area->cache_type == CACHE_TYPE_DEVICE 5513 || area->wiring == B_FULL_LOCK 5514 || area->wiring == B_CONTIGUOUS) { 5515 // unwire the range (to avoid deadlocks we delete the range after 5516 // unlocking the cache) 5517 nextAddress = areaEnd; 5518 VMAreaWiredRange* range = area->Unwire(areaStart, 5519 areaEnd - areaStart, writable); 5520 cacheChainLocker.Unlock(); 5521 if (range != NULL) { 5522 range->~VMAreaWiredRange(); 5523 free_etc(range, mallocFlags); 5524 } 5525 continue; 5526 } 5527 5528 // Lock the area's cache chain and the translation map. Needed to look 5529 // up pages and play with their wired count. 5530 cacheChainLocker.LockAllSourceCaches(); 5531 map->Lock(); 5532 5533 // iterate through the pages and unwire them 5534 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5535 phys_addr_t physicalAddress; 5536 uint32 flags; 5537 5538 vm_page* page; 5539 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5540 && (flags & PAGE_PRESENT) != 0 5541 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5542 != NULL) { 5543 // Already mapped with the correct permissions -- just increment 5544 // the page's wired count. 5545 decrement_page_wired_count(page); 5546 } else { 5547 panic("unlock_memory_etc(): Failed to unwire page: address " 5548 "space %p, address: %#" B_PRIxADDR, addressSpace, 5549 nextAddress); 5550 error = B_BAD_VALUE; 5551 break; 5552 } 5553 } 5554 5555 map->Unlock(); 5556 5557 // All pages are unwired. Remove the area's wired range as well (to 5558 // avoid deadlocks we delete the range after unlocking the cache). 5559 VMAreaWiredRange* range = area->Unwire(areaStart, 5560 areaEnd - areaStart, writable); 5561 5562 cacheChainLocker.Unlock(); 5563 5564 if (range != NULL) { 5565 range->~VMAreaWiredRange(); 5566 free_etc(range, mallocFlags); 5567 } 5568 5569 if (error != B_OK) 5570 break; 5571 } 5572 5573 // get rid of the address space reference 5574 addressSpace->Put(); 5575 5576 return error; 5577 } 5578 5579 5580 status_t 5581 unlock_memory(void* address, size_t numBytes, uint32 flags) 5582 { 5583 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5584 } 5585 5586 5587 /*! Similar to get_memory_map(), but also allows to specify the address space 5588 for the memory in question and has a saner semantics. 5589 Returns \c B_OK when the complete range could be translated or 5590 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5591 case the actual number of entries is written to \c *_numEntries. Any other 5592 error case indicates complete failure; \c *_numEntries will be set to \c 0 5593 in this case. 5594 */ 5595 status_t 5596 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5597 physical_entry* table, uint32* _numEntries) 5598 { 5599 uint32 numEntries = *_numEntries; 5600 *_numEntries = 0; 5601 5602 VMAddressSpace* addressSpace; 5603 addr_t virtualAddress = (addr_t)address; 5604 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5605 phys_addr_t physicalAddress; 5606 status_t status = B_OK; 5607 int32 index = -1; 5608 addr_t offset = 0; 5609 bool interrupts = are_interrupts_enabled(); 5610 5611 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5612 "entries)\n", team, address, numBytes, numEntries)); 5613 5614 if (numEntries == 0 || numBytes == 0) 5615 return B_BAD_VALUE; 5616 5617 // in which address space is the address to be found? 5618 if (IS_USER_ADDRESS(virtualAddress)) { 5619 if (team == B_CURRENT_TEAM) 5620 addressSpace = VMAddressSpace::GetCurrent(); 5621 else 5622 addressSpace = VMAddressSpace::Get(team); 5623 } else 5624 addressSpace = VMAddressSpace::GetKernel(); 5625 5626 if (addressSpace == NULL) 5627 return B_ERROR; 5628 5629 VMTranslationMap* map = addressSpace->TranslationMap(); 5630 5631 if (interrupts) 5632 map->Lock(); 5633 5634 while (offset < numBytes) { 5635 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5636 uint32 flags; 5637 5638 if (interrupts) { 5639 status = map->Query((addr_t)address + offset, &physicalAddress, 5640 &flags); 5641 } else { 5642 status = map->QueryInterrupt((addr_t)address + offset, 5643 &physicalAddress, &flags); 5644 } 5645 if (status < B_OK) 5646 break; 5647 if ((flags & PAGE_PRESENT) == 0) { 5648 panic("get_memory_map() called on unmapped memory!"); 5649 return B_BAD_ADDRESS; 5650 } 5651 5652 if (index < 0 && pageOffset > 0) { 5653 physicalAddress += pageOffset; 5654 if (bytes > B_PAGE_SIZE - pageOffset) 5655 bytes = B_PAGE_SIZE - pageOffset; 5656 } 5657 5658 // need to switch to the next physical_entry? 5659 if (index < 0 || table[index].address 5660 != physicalAddress - table[index].size) { 5661 if ((uint32)++index + 1 > numEntries) { 5662 // table to small 5663 break; 5664 } 5665 table[index].address = physicalAddress; 5666 table[index].size = bytes; 5667 } else { 5668 // page does fit in current entry 5669 table[index].size += bytes; 5670 } 5671 5672 offset += bytes; 5673 } 5674 5675 if (interrupts) 5676 map->Unlock(); 5677 5678 if (status != B_OK) 5679 return status; 5680 5681 if ((uint32)index + 1 > numEntries) { 5682 *_numEntries = index; 5683 return B_BUFFER_OVERFLOW; 5684 } 5685 5686 *_numEntries = index + 1; 5687 return B_OK; 5688 } 5689 5690 5691 /*! According to the BeBook, this function should always succeed. 5692 This is no longer the case. 5693 */ 5694 extern "C" int32 5695 __get_memory_map_haiku(const void* address, size_t numBytes, 5696 physical_entry* table, int32 numEntries) 5697 { 5698 uint32 entriesRead = numEntries; 5699 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5700 table, &entriesRead); 5701 if (error != B_OK) 5702 return error; 5703 5704 // close the entry list 5705 5706 // if it's only one entry, we will silently accept the missing ending 5707 if (numEntries == 1) 5708 return B_OK; 5709 5710 if (entriesRead + 1 > (uint32)numEntries) 5711 return B_BUFFER_OVERFLOW; 5712 5713 table[entriesRead].address = 0; 5714 table[entriesRead].size = 0; 5715 5716 return B_OK; 5717 } 5718 5719 5720 area_id 5721 area_for(void* address) 5722 { 5723 return vm_area_for((addr_t)address, true); 5724 } 5725 5726 5727 area_id 5728 find_area(const char* name) 5729 { 5730 return VMAreaHash::Find(name); 5731 } 5732 5733 5734 status_t 5735 _get_area_info(area_id id, area_info* info, size_t size) 5736 { 5737 if (size != sizeof(area_info) || info == NULL) 5738 return B_BAD_VALUE; 5739 5740 AddressSpaceReadLocker locker; 5741 VMArea* area; 5742 status_t status = locker.SetFromArea(id, area); 5743 if (status != B_OK) 5744 return status; 5745 5746 fill_area_info(area, info, size); 5747 return B_OK; 5748 } 5749 5750 5751 status_t 5752 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 5753 { 5754 addr_t nextBase = *(addr_t*)cookie; 5755 5756 // we're already through the list 5757 if (nextBase == (addr_t)-1) 5758 return B_ENTRY_NOT_FOUND; 5759 5760 if (team == B_CURRENT_TEAM) 5761 team = team_get_current_team_id(); 5762 5763 AddressSpaceReadLocker locker(team); 5764 if (!locker.IsLocked()) 5765 return B_BAD_TEAM_ID; 5766 5767 VMArea* area; 5768 for (VMAddressSpace::AreaIterator it 5769 = locker.AddressSpace()->GetAreaIterator(); 5770 (area = it.Next()) != NULL;) { 5771 if (area->Base() > nextBase) 5772 break; 5773 } 5774 5775 if (area == NULL) { 5776 nextBase = (addr_t)-1; 5777 return B_ENTRY_NOT_FOUND; 5778 } 5779 5780 fill_area_info(area, info, size); 5781 *cookie = (ssize_t)(area->Base()); 5782 5783 return B_OK; 5784 } 5785 5786 5787 status_t 5788 set_area_protection(area_id area, uint32 newProtection) 5789 { 5790 fix_protection(&newProtection); 5791 5792 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5793 newProtection, true); 5794 } 5795 5796 5797 status_t 5798 resize_area(area_id areaID, size_t newSize) 5799 { 5800 return vm_resize_area(areaID, newSize, true); 5801 } 5802 5803 5804 /*! Transfers the specified area to a new team. The caller must be the owner 5805 of the area. 5806 */ 5807 area_id 5808 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5809 bool kernel) 5810 { 5811 area_info info; 5812 status_t status = get_area_info(id, &info); 5813 if (status != B_OK) 5814 return status; 5815 5816 if (info.team != thread_get_current_thread()->team->id) 5817 return B_PERMISSION_DENIED; 5818 5819 area_id clonedArea = vm_clone_area(target, info.name, _address, 5820 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5821 if (clonedArea < 0) 5822 return clonedArea; 5823 5824 status = vm_delete_area(info.team, id, kernel); 5825 if (status != B_OK) { 5826 vm_delete_area(target, clonedArea, kernel); 5827 return status; 5828 } 5829 5830 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5831 5832 return clonedArea; 5833 } 5834 5835 5836 extern "C" area_id 5837 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 5838 size_t numBytes, uint32 addressSpec, uint32 protection, 5839 void** _virtualAddress) 5840 { 5841 if (!arch_vm_supports_protection(protection)) 5842 return B_NOT_SUPPORTED; 5843 5844 fix_protection(&protection); 5845 5846 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5847 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 5848 false); 5849 } 5850 5851 5852 area_id 5853 clone_area(const char* name, void** _address, uint32 addressSpec, 5854 uint32 protection, area_id source) 5855 { 5856 if ((protection & B_KERNEL_PROTECTION) == 0) 5857 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5858 5859 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5860 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5861 } 5862 5863 5864 area_id 5865 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock, 5866 uint32 protection, uint32 flags, 5867 const virtual_address_restrictions* virtualAddressRestrictions, 5868 const physical_address_restrictions* physicalAddressRestrictions, 5869 void** _address) 5870 { 5871 fix_protection(&protection); 5872 5873 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 5874 virtualAddressRestrictions, physicalAddressRestrictions, true, 5875 _address); 5876 } 5877 5878 5879 extern "C" area_id 5880 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 5881 size_t size, uint32 lock, uint32 protection) 5882 { 5883 fix_protection(&protection); 5884 5885 virtual_address_restrictions virtualRestrictions = {}; 5886 virtualRestrictions.address = *_address; 5887 virtualRestrictions.address_specification = addressSpec; 5888 physical_address_restrictions physicalRestrictions = {}; 5889 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 5890 lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true, 5891 _address); 5892 } 5893 5894 5895 status_t 5896 delete_area(area_id area) 5897 { 5898 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5899 } 5900 5901 5902 // #pragma mark - Userland syscalls 5903 5904 5905 status_t 5906 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5907 addr_t size) 5908 { 5909 // filter out some unavailable values (for userland) 5910 switch (addressSpec) { 5911 case B_ANY_KERNEL_ADDRESS: 5912 case B_ANY_KERNEL_BLOCK_ADDRESS: 5913 return B_BAD_VALUE; 5914 } 5915 5916 addr_t address; 5917 5918 if (!IS_USER_ADDRESS(userAddress) 5919 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5920 return B_BAD_ADDRESS; 5921 5922 status_t status = vm_reserve_address_range( 5923 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5924 RESERVED_AVOID_BASE); 5925 if (status != B_OK) 5926 return status; 5927 5928 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5929 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5930 (void*)address, size); 5931 return B_BAD_ADDRESS; 5932 } 5933 5934 return B_OK; 5935 } 5936 5937 5938 status_t 5939 _user_unreserve_address_range(addr_t address, addr_t size) 5940 { 5941 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5942 (void*)address, size); 5943 } 5944 5945 5946 area_id 5947 _user_area_for(void* address) 5948 { 5949 return vm_area_for((addr_t)address, false); 5950 } 5951 5952 5953 area_id 5954 _user_find_area(const char* userName) 5955 { 5956 char name[B_OS_NAME_LENGTH]; 5957 5958 if (!IS_USER_ADDRESS(userName) 5959 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5960 return B_BAD_ADDRESS; 5961 5962 return find_area(name); 5963 } 5964 5965 5966 status_t 5967 _user_get_area_info(area_id area, area_info* userInfo) 5968 { 5969 if (!IS_USER_ADDRESS(userInfo)) 5970 return B_BAD_ADDRESS; 5971 5972 area_info info; 5973 status_t status = get_area_info(area, &info); 5974 if (status < B_OK) 5975 return status; 5976 5977 // TODO: do we want to prevent userland from seeing kernel protections? 5978 //info.protection &= B_USER_PROTECTION; 5979 5980 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5981 return B_BAD_ADDRESS; 5982 5983 return status; 5984 } 5985 5986 5987 status_t 5988 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 5989 { 5990 ssize_t cookie; 5991 5992 if (!IS_USER_ADDRESS(userCookie) 5993 || !IS_USER_ADDRESS(userInfo) 5994 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 5995 return B_BAD_ADDRESS; 5996 5997 area_info info; 5998 status_t status = _get_next_area_info(team, &cookie, &info, 5999 sizeof(area_info)); 6000 if (status != B_OK) 6001 return status; 6002 6003 //info.protection &= B_USER_PROTECTION; 6004 6005 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6006 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6007 return B_BAD_ADDRESS; 6008 6009 return status; 6010 } 6011 6012 6013 status_t 6014 _user_set_area_protection(area_id area, uint32 newProtection) 6015 { 6016 if ((newProtection & ~B_USER_PROTECTION) != 0) 6017 return B_BAD_VALUE; 6018 6019 fix_protection(&newProtection); 6020 6021 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6022 newProtection, false); 6023 } 6024 6025 6026 status_t 6027 _user_resize_area(area_id area, size_t newSize) 6028 { 6029 // TODO: Since we restrict deleting of areas to those owned by the team, 6030 // we should also do that for resizing (check other functions, too). 6031 return vm_resize_area(area, newSize, false); 6032 } 6033 6034 6035 area_id 6036 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6037 team_id target) 6038 { 6039 // filter out some unavailable values (for userland) 6040 switch (addressSpec) { 6041 case B_ANY_KERNEL_ADDRESS: 6042 case B_ANY_KERNEL_BLOCK_ADDRESS: 6043 return B_BAD_VALUE; 6044 } 6045 6046 void* address; 6047 if (!IS_USER_ADDRESS(userAddress) 6048 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6049 return B_BAD_ADDRESS; 6050 6051 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6052 if (newArea < B_OK) 6053 return newArea; 6054 6055 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6056 return B_BAD_ADDRESS; 6057 6058 return newArea; 6059 } 6060 6061 6062 area_id 6063 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6064 uint32 protection, area_id sourceArea) 6065 { 6066 char name[B_OS_NAME_LENGTH]; 6067 void* address; 6068 6069 // filter out some unavailable values (for userland) 6070 switch (addressSpec) { 6071 case B_ANY_KERNEL_ADDRESS: 6072 case B_ANY_KERNEL_BLOCK_ADDRESS: 6073 return B_BAD_VALUE; 6074 } 6075 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6076 return B_BAD_VALUE; 6077 6078 if (!IS_USER_ADDRESS(userName) 6079 || !IS_USER_ADDRESS(userAddress) 6080 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6081 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6082 return B_BAD_ADDRESS; 6083 6084 fix_protection(&protection); 6085 6086 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6087 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6088 false); 6089 if (clonedArea < B_OK) 6090 return clonedArea; 6091 6092 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6093 delete_area(clonedArea); 6094 return B_BAD_ADDRESS; 6095 } 6096 6097 return clonedArea; 6098 } 6099 6100 6101 area_id 6102 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6103 size_t size, uint32 lock, uint32 protection) 6104 { 6105 char name[B_OS_NAME_LENGTH]; 6106 void* address; 6107 6108 // filter out some unavailable values (for userland) 6109 switch (addressSpec) { 6110 case B_ANY_KERNEL_ADDRESS: 6111 case B_ANY_KERNEL_BLOCK_ADDRESS: 6112 return B_BAD_VALUE; 6113 } 6114 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6115 return B_BAD_VALUE; 6116 6117 if (!IS_USER_ADDRESS(userName) 6118 || !IS_USER_ADDRESS(userAddress) 6119 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6120 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6121 return B_BAD_ADDRESS; 6122 6123 if (addressSpec == B_EXACT_ADDRESS 6124 && IS_KERNEL_ADDRESS(address)) 6125 return B_BAD_VALUE; 6126 6127 fix_protection(&protection); 6128 6129 virtual_address_restrictions virtualRestrictions = {}; 6130 virtualRestrictions.address = address; 6131 virtualRestrictions.address_specification = addressSpec; 6132 physical_address_restrictions physicalRestrictions = {}; 6133 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6134 size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions, 6135 false, &address); 6136 6137 if (area >= B_OK 6138 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6139 delete_area(area); 6140 return B_BAD_ADDRESS; 6141 } 6142 6143 return area; 6144 } 6145 6146 6147 status_t 6148 _user_delete_area(area_id area) 6149 { 6150 // Unlike the BeOS implementation, you can now only delete areas 6151 // that you have created yourself from userland. 6152 // The documentation to delete_area() explicitly states that this 6153 // will be restricted in the future, and so it will. 6154 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6155 } 6156 6157 6158 // TODO: create a BeOS style call for this! 6159 6160 area_id 6161 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6162 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6163 int fd, off_t offset) 6164 { 6165 char name[B_OS_NAME_LENGTH]; 6166 void* address; 6167 area_id area; 6168 6169 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6170 return B_BAD_VALUE; 6171 6172 fix_protection(&protection); 6173 6174 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6175 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6176 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6177 return B_BAD_ADDRESS; 6178 6179 if (addressSpec == B_EXACT_ADDRESS) { 6180 if ((addr_t)address + size < (addr_t)address 6181 || (addr_t)address % B_PAGE_SIZE != 0) { 6182 return B_BAD_VALUE; 6183 } 6184 if (!IS_USER_ADDRESS(address) 6185 || !IS_USER_ADDRESS((addr_t)address + size)) { 6186 return B_BAD_ADDRESS; 6187 } 6188 } 6189 6190 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6191 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6192 false); 6193 if (area < B_OK) 6194 return area; 6195 6196 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6197 return B_BAD_ADDRESS; 6198 6199 return area; 6200 } 6201 6202 6203 status_t 6204 _user_unmap_memory(void* _address, size_t size) 6205 { 6206 addr_t address = (addr_t)_address; 6207 6208 // check params 6209 if (size == 0 || (addr_t)address + size < (addr_t)address 6210 || (addr_t)address % B_PAGE_SIZE != 0) { 6211 return B_BAD_VALUE; 6212 } 6213 6214 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6215 return B_BAD_ADDRESS; 6216 6217 // Write lock the address space and ensure the address range is not wired. 6218 AddressSpaceWriteLocker locker; 6219 do { 6220 status_t status = locker.SetTo(team_get_current_team_id()); 6221 if (status != B_OK) 6222 return status; 6223 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6224 size, &locker)); 6225 6226 // unmap 6227 return unmap_address_range(locker.AddressSpace(), address, size, false); 6228 } 6229 6230 6231 status_t 6232 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6233 { 6234 // check address range 6235 addr_t address = (addr_t)_address; 6236 size = PAGE_ALIGN(size); 6237 6238 if ((address % B_PAGE_SIZE) != 0) 6239 return B_BAD_VALUE; 6240 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6241 || !IS_USER_ADDRESS((addr_t)address + size)) { 6242 // weird error code required by POSIX 6243 return ENOMEM; 6244 } 6245 6246 // extend and check protection 6247 if ((protection & ~B_USER_PROTECTION) != 0) 6248 return B_BAD_VALUE; 6249 6250 fix_protection(&protection); 6251 6252 // We need to write lock the address space, since we're going to play with 6253 // the areas. Also make sure that none of the areas is wired and that we're 6254 // actually allowed to change the protection. 6255 AddressSpaceWriteLocker locker; 6256 6257 bool restart; 6258 do { 6259 restart = false; 6260 6261 status_t status = locker.SetTo(team_get_current_team_id()); 6262 if (status != B_OK) 6263 return status; 6264 6265 // First round: Check whether the whole range is covered by areas and we 6266 // are allowed to modify them. 6267 addr_t currentAddress = address; 6268 size_t sizeLeft = size; 6269 while (sizeLeft > 0) { 6270 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6271 if (area == NULL) 6272 return B_NO_MEMORY; 6273 6274 if ((area->protection & B_KERNEL_AREA) != 0) 6275 return B_NOT_ALLOWED; 6276 6277 // TODO: For (shared) mapped files we should check whether the new 6278 // protections are compatible with the file permissions. We don't 6279 // have a way to do that yet, though. 6280 6281 addr_t offset = currentAddress - area->Base(); 6282 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6283 6284 AreaCacheLocker cacheLocker(area); 6285 6286 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6287 &locker, &cacheLocker)) { 6288 restart = true; 6289 break; 6290 } 6291 6292 cacheLocker.Unlock(); 6293 6294 currentAddress += rangeSize; 6295 sizeLeft -= rangeSize; 6296 } 6297 } while (restart); 6298 6299 // Second round: If the protections differ from that of the area, create a 6300 // page protection array and re-map mapped pages. 6301 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6302 addr_t currentAddress = address; 6303 size_t sizeLeft = size; 6304 while (sizeLeft > 0) { 6305 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6306 if (area == NULL) 6307 return B_NO_MEMORY; 6308 6309 addr_t offset = currentAddress - area->Base(); 6310 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6311 6312 currentAddress += rangeSize; 6313 sizeLeft -= rangeSize; 6314 6315 if (area->page_protections == NULL) { 6316 if (area->protection == protection) 6317 continue; 6318 6319 status_t status = allocate_area_page_protections(area); 6320 if (status != B_OK) 6321 return status; 6322 } 6323 6324 // We need to lock the complete cache chain, since we potentially unmap 6325 // pages of lower caches. 6326 VMCache* topCache = vm_area_get_locked_cache(area); 6327 VMCacheChainLocker cacheChainLocker(topCache); 6328 cacheChainLocker.LockAllSourceCaches(); 6329 6330 for (addr_t pageAddress = area->Base() + offset; 6331 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6332 map->Lock(); 6333 6334 set_area_page_protection(area, pageAddress, protection); 6335 6336 phys_addr_t physicalAddress; 6337 uint32 flags; 6338 6339 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6340 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6341 map->Unlock(); 6342 continue; 6343 } 6344 6345 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6346 if (page == NULL) { 6347 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6348 "\n", area, physicalAddress); 6349 map->Unlock(); 6350 return B_ERROR; 6351 } 6352 6353 // If the page is not in the topmost cache and write access is 6354 // requested, we have to unmap it. Otherwise we can re-map it with 6355 // the new protection. 6356 bool unmapPage = page->Cache() != topCache 6357 && (protection & B_WRITE_AREA) != 0; 6358 6359 if (!unmapPage) 6360 map->ProtectPage(area, pageAddress, protection); 6361 6362 map->Unlock(); 6363 6364 if (unmapPage) { 6365 DEBUG_PAGE_ACCESS_START(page); 6366 unmap_page(area, pageAddress); 6367 DEBUG_PAGE_ACCESS_END(page); 6368 } 6369 } 6370 } 6371 6372 return B_OK; 6373 } 6374 6375 6376 status_t 6377 _user_sync_memory(void* _address, size_t size, uint32 flags) 6378 { 6379 addr_t address = (addr_t)_address; 6380 size = PAGE_ALIGN(size); 6381 6382 // check params 6383 if ((address % B_PAGE_SIZE) != 0) 6384 return B_BAD_VALUE; 6385 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6386 || !IS_USER_ADDRESS((addr_t)address + size)) { 6387 // weird error code required by POSIX 6388 return ENOMEM; 6389 } 6390 6391 bool writeSync = (flags & MS_SYNC) != 0; 6392 bool writeAsync = (flags & MS_ASYNC) != 0; 6393 if (writeSync && writeAsync) 6394 return B_BAD_VALUE; 6395 6396 if (size == 0 || (!writeSync && !writeAsync)) 6397 return B_OK; 6398 6399 // iterate through the range and sync all concerned areas 6400 while (size > 0) { 6401 // read lock the address space 6402 AddressSpaceReadLocker locker; 6403 status_t error = locker.SetTo(team_get_current_team_id()); 6404 if (error != B_OK) 6405 return error; 6406 6407 // get the first area 6408 VMArea* area = locker.AddressSpace()->LookupArea(address); 6409 if (area == NULL) 6410 return B_NO_MEMORY; 6411 6412 uint32 offset = address - area->Base(); 6413 size_t rangeSize = min_c(area->Size() - offset, size); 6414 offset += area->cache_offset; 6415 6416 // lock the cache 6417 AreaCacheLocker cacheLocker(area); 6418 if (!cacheLocker) 6419 return B_BAD_VALUE; 6420 VMCache* cache = area->cache; 6421 6422 locker.Unlock(); 6423 6424 uint32 firstPage = offset >> PAGE_SHIFT; 6425 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6426 6427 // write the pages 6428 if (cache->type == CACHE_TYPE_VNODE) { 6429 if (writeSync) { 6430 // synchronous 6431 error = vm_page_write_modified_page_range(cache, firstPage, 6432 endPage); 6433 if (error != B_OK) 6434 return error; 6435 } else { 6436 // asynchronous 6437 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6438 // TODO: This is probably not quite what is supposed to happen. 6439 // Especially when a lot has to be written, it might take ages 6440 // until it really hits the disk. 6441 } 6442 } 6443 6444 address += rangeSize; 6445 size -= rangeSize; 6446 } 6447 6448 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6449 // synchronize multiple mappings of the same file. In our VM they never get 6450 // out of sync, though, so we don't have to do anything. 6451 6452 return B_OK; 6453 } 6454 6455 6456 status_t 6457 _user_memory_advice(void* address, size_t size, uint32 advice) 6458 { 6459 // TODO: Implement! 6460 return B_OK; 6461 } 6462 6463 6464 status_t 6465 _user_get_memory_properties(team_id teamID, const void* address, 6466 uint32* _protected, uint32* _lock) 6467 { 6468 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6469 return B_BAD_ADDRESS; 6470 6471 AddressSpaceReadLocker locker; 6472 status_t error = locker.SetTo(teamID); 6473 if (error != B_OK) 6474 return error; 6475 6476 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6477 if (area == NULL) 6478 return B_NO_MEMORY; 6479 6480 6481 uint32 protection = area->protection; 6482 if (area->page_protections != NULL) 6483 protection = get_area_page_protection(area, (addr_t)address); 6484 6485 uint32 wiring = area->wiring; 6486 6487 locker.Unlock(); 6488 6489 error = user_memcpy(_protected, &protection, sizeof(protection)); 6490 if (error != B_OK) 6491 return error; 6492 6493 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6494 6495 return error; 6496 } 6497 6498 6499 // #pragma mark -- compatibility 6500 6501 6502 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6503 6504 6505 struct physical_entry_beos { 6506 uint32 address; 6507 uint32 size; 6508 }; 6509 6510 6511 /*! The physical_entry structure has changed. We need to translate it to the 6512 old one. 6513 */ 6514 extern "C" int32 6515 __get_memory_map_beos(const void* _address, size_t numBytes, 6516 physical_entry_beos* table, int32 numEntries) 6517 { 6518 if (numEntries <= 0) 6519 return B_BAD_VALUE; 6520 6521 const uint8* address = (const uint8*)_address; 6522 6523 int32 count = 0; 6524 while (numBytes > 0 && count < numEntries) { 6525 physical_entry entry; 6526 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 6527 if (result < 0) { 6528 if (result != B_BUFFER_OVERFLOW) 6529 return result; 6530 } 6531 6532 if (entry.address >= (phys_addr_t)1 << 32) { 6533 panic("get_memory_map(): Address is greater 4 GB!"); 6534 return B_ERROR; 6535 } 6536 6537 table[count].address = entry.address; 6538 table[count++].size = entry.size; 6539 6540 address += entry.size; 6541 numBytes -= entry.size; 6542 } 6543 6544 // null-terminate the table, if possible 6545 if (count < numEntries) { 6546 table[count].address = 0; 6547 table[count].size = 0; 6548 } 6549 6550 return B_OK; 6551 } 6552 6553 6554 /*! The type of the \a physicalAddress parameter has changed from void* to 6555 phys_addr_t. 6556 */ 6557 extern "C" area_id 6558 __map_physical_memory_beos(const char* name, void* physicalAddress, 6559 size_t numBytes, uint32 addressSpec, uint32 protection, 6560 void** _virtualAddress) 6561 { 6562 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 6563 addressSpec, protection, _virtualAddress); 6564 } 6565 6566 6567 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 6568 we meddle with the \a lock parameter to force 32 bit. 6569 */ 6570 extern "C" area_id 6571 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 6572 size_t size, uint32 lock, uint32 protection) 6573 { 6574 switch (lock) { 6575 case B_NO_LOCK: 6576 break; 6577 case B_FULL_LOCK: 6578 case B_LAZY_LOCK: 6579 lock = B_32_BIT_FULL_LOCK; 6580 break; 6581 case B_CONTIGUOUS: 6582 lock = B_32_BIT_CONTIGUOUS; 6583 break; 6584 } 6585 6586 return __create_area_haiku(name, _address, addressSpec, size, lock, 6587 protection); 6588 } 6589 6590 6591 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 6592 "BASE"); 6593 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 6594 "map_physical_memory@", "BASE"); 6595 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 6596 "BASE"); 6597 6598 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6599 "get_memory_map@@", "1_ALPHA3"); 6600 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6601 "map_physical_memory@@", "1_ALPHA3"); 6602 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6603 "1_ALPHA3"); 6604 6605 6606 #else 6607 6608 6609 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6610 "get_memory_map@@", "BASE"); 6611 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6612 "map_physical_memory@@", "BASE"); 6613 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6614 "BASE"); 6615 6616 6617 #endif // defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6618