1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <boot/elf.h> 31 #include <boot/stage2.h> 32 #include <condition_variable.h> 33 #include <console.h> 34 #include <debug.h> 35 #include <file_cache.h> 36 #include <fs/fd.h> 37 #include <heap.h> 38 #include <kernel.h> 39 #include <int.h> 40 #include <lock.h> 41 #include <low_resource_manager.h> 42 #include <slab/Slab.h> 43 #include <smp.h> 44 #include <system_info.h> 45 #include <thread.h> 46 #include <team.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/khash.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "VMAnonymousNoSwapCache.h" 59 #include "IORequest.h" 60 61 62 //#define TRACE_VM 63 //#define TRACE_FAULTS 64 #ifdef TRACE_VM 65 # define TRACE(x) dprintf x 66 #else 67 # define TRACE(x) ; 68 #endif 69 #ifdef TRACE_FAULTS 70 # define FTRACE(x) dprintf x 71 #else 72 # define FTRACE(x) ; 73 #endif 74 75 76 class AreaCacheLocking { 77 public: 78 inline bool Lock(VMCache* lockable) 79 { 80 return false; 81 } 82 83 inline void Unlock(VMCache* lockable) 84 { 85 vm_area_put_locked_cache(lockable); 86 } 87 }; 88 89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 90 public: 91 inline AreaCacheLocker(VMCache* cache = NULL) 92 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 93 { 94 } 95 96 inline AreaCacheLocker(VMArea* area) 97 : AutoLocker<VMCache, AreaCacheLocking>() 98 { 99 SetTo(area); 100 } 101 102 inline void SetTo(VMCache* cache, bool alreadyLocked) 103 { 104 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 105 } 106 107 inline void SetTo(VMArea* area) 108 { 109 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 110 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 111 } 112 }; 113 114 115 class VMCacheChainLocker { 116 public: 117 VMCacheChainLocker() 118 : 119 fTopCache(NULL), 120 fBottomCache(NULL) 121 { 122 } 123 124 VMCacheChainLocker(VMCache* topCache) 125 : 126 fTopCache(topCache), 127 fBottomCache(topCache) 128 { 129 } 130 131 ~VMCacheChainLocker() 132 { 133 Unlock(); 134 } 135 136 void SetTo(VMCache* topCache) 137 { 138 fTopCache = topCache; 139 fBottomCache = topCache; 140 141 if (topCache != NULL) 142 topCache->SetUserData(NULL); 143 } 144 145 VMCache* LockSourceCache() 146 { 147 if (fBottomCache == NULL || fBottomCache->source == NULL) 148 return NULL; 149 150 VMCache* previousCache = fBottomCache; 151 152 fBottomCache = fBottomCache->source; 153 fBottomCache->Lock(); 154 fBottomCache->AcquireRefLocked(); 155 fBottomCache->SetUserData(previousCache); 156 157 return fBottomCache; 158 } 159 160 void LockAllSourceCaches() 161 { 162 while (LockSourceCache() != NULL) { 163 } 164 } 165 166 void Unlock(VMCache* exceptCache = NULL) 167 { 168 if (fTopCache == NULL) 169 return; 170 171 // Unlock caches in source -> consumer direction. This is important to 172 // avoid double-locking and a reversal of locking order in case a cache 173 // is eligable for merging. 174 VMCache* cache = fBottomCache; 175 while (cache != NULL) { 176 VMCache* nextCache = (VMCache*)cache->UserData(); 177 if (cache != exceptCache) 178 cache->ReleaseRefAndUnlock(cache != fTopCache); 179 180 if (cache == fTopCache) 181 break; 182 183 cache = nextCache; 184 } 185 186 fTopCache = NULL; 187 fBottomCache = NULL; 188 } 189 190 void UnlockKeepRefs(bool keepTopCacheLocked) 191 { 192 if (fTopCache == NULL) 193 return; 194 195 VMCache* nextCache = fBottomCache; 196 VMCache* cache = NULL; 197 198 while (keepTopCacheLocked 199 ? nextCache != fTopCache : cache != fTopCache) { 200 cache = nextCache; 201 nextCache = (VMCache*)cache->UserData(); 202 cache->Unlock(cache != fTopCache); 203 } 204 } 205 206 void RelockCaches(bool topCacheLocked) 207 { 208 if (fTopCache == NULL) 209 return; 210 211 VMCache* nextCache = fTopCache; 212 VMCache* cache = NULL; 213 if (topCacheLocked) { 214 cache = nextCache; 215 nextCache = cache->source; 216 } 217 218 while (cache != fBottomCache && nextCache != NULL) { 219 VMCache* consumer = cache; 220 cache = nextCache; 221 nextCache = cache->source; 222 cache->Lock(); 223 cache->SetUserData(consumer); 224 } 225 } 226 227 private: 228 VMCache* fTopCache; 229 VMCache* fBottomCache; 230 }; 231 232 233 // The memory reserve an allocation of the certain priority must not touch. 234 static const size_t kMemoryReserveForPriority[] = { 235 VM_MEMORY_RESERVE_USER, // user 236 VM_MEMORY_RESERVE_SYSTEM, // system 237 0 // VIP 238 }; 239 240 241 ObjectCache* gPageMappingsObjectCache; 242 243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 244 245 static off_t sAvailableMemory; 246 static off_t sNeededMemory; 247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 248 static uint32 sPageFaults; 249 250 static VMPhysicalPageMapper* sPhysicalPageMapper; 251 252 #if DEBUG_CACHE_LIST 253 254 struct cache_info { 255 VMCache* cache; 256 addr_t page_count; 257 addr_t committed; 258 }; 259 260 static const int kCacheInfoTableCount = 100 * 1024; 261 static cache_info* sCacheInfoTable; 262 263 #endif // DEBUG_CACHE_LIST 264 265 266 // function declarations 267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 268 bool addressSpaceCleanup); 269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 270 bool isWrite, bool isUser, vm_page** wirePage, 271 VMAreaWiredRange* wiredRange = NULL); 272 static status_t map_backing_store(VMAddressSpace* addressSpace, 273 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 274 int protection, int mapping, uint32 flags, 275 const virtual_address_restrictions* addressRestrictions, bool kernel, 276 VMArea** _area, void** _virtualAddress); 277 278 279 // #pragma mark - 280 281 282 #if VM_PAGE_FAULT_TRACING 283 284 namespace VMPageFaultTracing { 285 286 class PageFaultStart : public AbstractTraceEntry { 287 public: 288 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 289 : 290 fAddress(address), 291 fPC(pc), 292 fWrite(write), 293 fUser(user) 294 { 295 Initialized(); 296 } 297 298 virtual void AddDump(TraceOutput& out) 299 { 300 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 301 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 302 } 303 304 private: 305 addr_t fAddress; 306 addr_t fPC; 307 bool fWrite; 308 bool fUser; 309 }; 310 311 312 // page fault errors 313 enum { 314 PAGE_FAULT_ERROR_NO_AREA = 0, 315 PAGE_FAULT_ERROR_KERNEL_ONLY, 316 PAGE_FAULT_ERROR_WRITE_PROTECTED, 317 PAGE_FAULT_ERROR_READ_PROTECTED, 318 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 319 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 320 }; 321 322 323 class PageFaultError : public AbstractTraceEntry { 324 public: 325 PageFaultError(area_id area, status_t error) 326 : 327 fArea(area), 328 fError(error) 329 { 330 Initialized(); 331 } 332 333 virtual void AddDump(TraceOutput& out) 334 { 335 switch (fError) { 336 case PAGE_FAULT_ERROR_NO_AREA: 337 out.Print("page fault error: no area"); 338 break; 339 case PAGE_FAULT_ERROR_KERNEL_ONLY: 340 out.Print("page fault error: area: %ld, kernel only", fArea); 341 break; 342 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 343 out.Print("page fault error: area: %ld, write protected", 344 fArea); 345 break; 346 case PAGE_FAULT_ERROR_READ_PROTECTED: 347 out.Print("page fault error: area: %ld, read protected", fArea); 348 break; 349 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 350 out.Print("page fault error: kernel touching bad user memory"); 351 break; 352 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 353 out.Print("page fault error: no address space"); 354 break; 355 default: 356 out.Print("page fault error: area: %ld, error: %s", fArea, 357 strerror(fError)); 358 break; 359 } 360 } 361 362 private: 363 area_id fArea; 364 status_t fError; 365 }; 366 367 368 class PageFaultDone : public AbstractTraceEntry { 369 public: 370 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 371 vm_page* page) 372 : 373 fArea(area), 374 fTopCache(topCache), 375 fCache(cache), 376 fPage(page) 377 { 378 Initialized(); 379 } 380 381 virtual void AddDump(TraceOutput& out) 382 { 383 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 384 "page: %p", fArea, fTopCache, fCache, fPage); 385 } 386 387 private: 388 area_id fArea; 389 VMCache* fTopCache; 390 VMCache* fCache; 391 vm_page* fPage; 392 }; 393 394 } // namespace VMPageFaultTracing 395 396 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 397 #else 398 # define TPF(x) ; 399 #endif // VM_PAGE_FAULT_TRACING 400 401 402 // #pragma mark - 403 404 405 /*! The page's cache must be locked. 406 */ 407 static inline void 408 increment_page_wired_count(vm_page* page) 409 { 410 if (!page->IsMapped()) 411 atomic_add(&gMappedPagesCount, 1); 412 page->IncrementWiredCount(); 413 } 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 decrement_page_wired_count(vm_page* page) 420 { 421 page->DecrementWiredCount(); 422 if (!page->IsMapped()) 423 atomic_add(&gMappedPagesCount, -1); 424 } 425 426 427 static inline addr_t 428 virtual_page_address(VMArea* area, vm_page* page) 429 { 430 return area->Base() 431 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 432 } 433 434 435 //! You need to have the address space locked when calling this function 436 static VMArea* 437 lookup_area(VMAddressSpace* addressSpace, area_id id) 438 { 439 VMAreaHash::ReadLock(); 440 441 VMArea* area = VMAreaHash::LookupLocked(id); 442 if (area != NULL && area->address_space != addressSpace) 443 area = NULL; 444 445 VMAreaHash::ReadUnlock(); 446 447 return area; 448 } 449 450 451 static status_t 452 allocate_area_page_protections(VMArea* area) 453 { 454 // In the page protections we store only the three user protections, 455 // so we use 4 bits per page. 456 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 457 area->page_protections = (uint8*)malloc_etc(bytes, 458 HEAP_DONT_LOCK_KERNEL_SPACE); 459 if (area->page_protections == NULL) 460 return B_NO_MEMORY; 461 462 // init the page protections for all pages to that of the area 463 uint32 areaProtection = area->protection 464 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 465 memset(area->page_protections, areaProtection | (areaProtection << 4), 466 bytes); 467 return B_OK; 468 } 469 470 471 static inline void 472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 473 { 474 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 475 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 476 uint8& entry = area->page_protections[pageIndex / 2]; 477 if (pageIndex % 2 == 0) 478 entry = (entry & 0xf0) | protection; 479 else 480 entry = (entry & 0x0f) | (protection << 4); 481 } 482 483 484 static inline uint32 485 get_area_page_protection(VMArea* area, addr_t pageAddress) 486 { 487 if (area->page_protections == NULL) 488 return area->protection; 489 490 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 491 uint32 protection = area->page_protections[pageIndex / 2]; 492 if (pageIndex % 2 == 0) 493 protection &= 0x0f; 494 else 495 protection >>= 4; 496 497 // If this is a kernel area we translate the user flags to kernel flags. 498 if (area->address_space == VMAddressSpace::Kernel()) { 499 uint32 kernelProtection = 0; 500 if ((protection & B_READ_AREA) != 0) 501 kernelProtection |= B_KERNEL_READ_AREA; 502 if ((protection & B_WRITE_AREA) != 0) 503 kernelProtection |= B_KERNEL_WRITE_AREA; 504 505 return kernelProtection; 506 } 507 508 return protection | B_KERNEL_READ_AREA 509 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 510 } 511 512 513 /*! The caller must have reserved enough pages the translation map 514 implementation might need to map this page. 515 The page's cache must be locked. 516 */ 517 static status_t 518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 519 vm_page_reservation* reservation) 520 { 521 VMTranslationMap* map = area->address_space->TranslationMap(); 522 523 bool wasMapped = page->IsMapped(); 524 525 if (area->wiring == B_NO_LOCK) { 526 DEBUG_PAGE_ACCESS_CHECK(page); 527 528 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 529 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 530 gPageMappingsObjectCache, 531 CACHE_DONT_WAIT_FOR_MEMORY 532 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 533 if (mapping == NULL) 534 return B_NO_MEMORY; 535 536 mapping->page = page; 537 mapping->area = area; 538 539 map->Lock(); 540 541 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 542 area->MemoryType(), reservation); 543 544 // insert mapping into lists 545 if (!page->IsMapped()) 546 atomic_add(&gMappedPagesCount, 1); 547 548 page->mappings.Add(mapping); 549 area->mappings.Add(mapping); 550 551 map->Unlock(); 552 } else { 553 DEBUG_PAGE_ACCESS_CHECK(page); 554 555 map->Lock(); 556 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 557 area->MemoryType(), reservation); 558 map->Unlock(); 559 560 increment_page_wired_count(page); 561 } 562 563 if (!wasMapped) { 564 // The page is mapped now, so we must not remain in the cached queue. 565 // It also makes sense to move it from the inactive to the active, since 566 // otherwise the page daemon wouldn't come to keep track of it (in idle 567 // mode) -- if the page isn't touched, it will be deactivated after a 568 // full iteration through the queue at the latest. 569 if (page->State() == PAGE_STATE_CACHED 570 || page->State() == PAGE_STATE_INACTIVE) { 571 vm_page_set_state(page, PAGE_STATE_ACTIVE); 572 } 573 } 574 575 return B_OK; 576 } 577 578 579 /*! If \a preserveModified is \c true, the caller must hold the lock of the 580 page's cache. 581 */ 582 static inline bool 583 unmap_page(VMArea* area, addr_t virtualAddress) 584 { 585 return area->address_space->TranslationMap()->UnmapPage(area, 586 virtualAddress, true); 587 } 588 589 590 /*! If \a preserveModified is \c true, the caller must hold the lock of all 591 mapped pages' caches. 592 */ 593 static inline void 594 unmap_pages(VMArea* area, addr_t base, size_t size) 595 { 596 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 597 } 598 599 600 /*! Cuts a piece out of an area. If the given cut range covers the complete 601 area, it is deleted. If it covers the beginning or the end, the area is 602 resized accordingly. If the range covers some part in the middle of the 603 area, it is split in two; in this case the second area is returned via 604 \a _secondArea (the variable is left untouched in the other cases). 605 The address space must be write locked. 606 The caller must ensure that no part of the given range is wired. 607 */ 608 static status_t 609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 610 addr_t lastAddress, VMArea** _secondArea, bool kernel) 611 { 612 // Does the cut range intersect with the area at all? 613 addr_t areaLast = area->Base() + (area->Size() - 1); 614 if (area->Base() > lastAddress || areaLast < address) 615 return B_OK; 616 617 // Is the area fully covered? 618 if (area->Base() >= address && areaLast <= lastAddress) { 619 delete_area(addressSpace, area, false); 620 return B_OK; 621 } 622 623 int priority; 624 uint32 allocationFlags; 625 if (addressSpace == VMAddressSpace::Kernel()) { 626 priority = VM_PRIORITY_SYSTEM; 627 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 628 | HEAP_DONT_LOCK_KERNEL_SPACE; 629 } else { 630 priority = VM_PRIORITY_USER; 631 allocationFlags = 0; 632 } 633 634 VMCache* cache = vm_area_get_locked_cache(area); 635 VMCacheChainLocker cacheChainLocker(cache); 636 cacheChainLocker.LockAllSourceCaches(); 637 638 // Cut the end only? 639 if (areaLast <= lastAddress) { 640 size_t oldSize = area->Size(); 641 size_t newSize = address - area->Base(); 642 643 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 644 allocationFlags); 645 if (error != B_OK) 646 return error; 647 648 // unmap pages 649 unmap_pages(area, address, oldSize - newSize); 650 651 // If no one else uses the area's cache, we can resize it, too. 652 if (cache->areas == area && area->cache_next == NULL 653 && cache->consumers.IsEmpty() 654 && cache->type == CACHE_TYPE_RAM) { 655 // Since VMCache::Resize() can temporarily drop the lock, we must 656 // unlock all lower caches to prevent locking order inversion. 657 cacheChainLocker.Unlock(cache); 658 cache->Resize(cache->virtual_base + newSize, priority); 659 cache->ReleaseRefAndUnlock(); 660 } 661 662 return B_OK; 663 } 664 665 // Cut the beginning only? 666 if (area->Base() >= address) { 667 addr_t oldBase = area->Base(); 668 addr_t newBase = lastAddress + 1; 669 size_t newSize = areaLast - lastAddress; 670 671 // unmap pages 672 unmap_pages(area, oldBase, newBase - oldBase); 673 674 // resize the area 675 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 676 allocationFlags); 677 if (error != B_OK) 678 return error; 679 680 // TODO: If no one else uses the area's cache, we should resize it, too! 681 682 area->cache_offset += newBase - oldBase; 683 684 return B_OK; 685 } 686 687 // The tough part -- cut a piece out of the middle of the area. 688 // We do that by shrinking the area to the begin section and creating a 689 // new area for the end section. 690 691 addr_t firstNewSize = address - area->Base(); 692 addr_t secondBase = lastAddress + 1; 693 addr_t secondSize = areaLast - lastAddress; 694 695 // unmap pages 696 unmap_pages(area, address, area->Size() - firstNewSize); 697 698 // resize the area 699 addr_t oldSize = area->Size(); 700 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 701 allocationFlags); 702 if (error != B_OK) 703 return error; 704 705 // TODO: If no one else uses the area's cache, we might want to create a 706 // new cache for the second area, transfer the concerned pages from the 707 // first cache to it and resize the first cache. 708 709 // map the second area 710 virtual_address_restrictions addressRestrictions = {}; 711 addressRestrictions.address = (void*)secondBase; 712 addressRestrictions.address_specification = B_EXACT_ADDRESS; 713 VMArea* secondArea; 714 error = map_backing_store(addressSpace, cache, 715 area->cache_offset + (secondBase - area->Base()), area->name, 716 secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0, 717 &addressRestrictions, kernel, &secondArea, NULL); 718 if (error != B_OK) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return error; 721 } 722 723 // We need a cache reference for the new area. 724 cache->AcquireRefLocked(); 725 726 if (_secondArea != NULL) 727 *_secondArea = secondArea; 728 729 return B_OK; 730 } 731 732 733 /*! Deletes all areas in the given address range. 734 The address space must be write-locked. 735 The caller must ensure that no part of the given range is wired. 736 */ 737 static status_t 738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 739 bool kernel) 740 { 741 size = PAGE_ALIGN(size); 742 addr_t lastAddress = address + (size - 1); 743 744 // Check, whether the caller is allowed to modify the concerned areas. 745 if (!kernel) { 746 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 747 VMArea* area = it.Next();) { 748 addr_t areaLast = area->Base() + (area->Size() - 1); 749 if (area->Base() < lastAddress && address < areaLast) { 750 if ((area->protection & B_KERNEL_AREA) != 0) 751 return B_NOT_ALLOWED; 752 } 753 } 754 } 755 756 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 757 VMArea* area = it.Next();) { 758 addr_t areaLast = area->Base() + (area->Size() - 1); 759 if (area->Base() < lastAddress && address < areaLast) { 760 status_t error = cut_area(addressSpace, area, address, 761 lastAddress, NULL, kernel); 762 if (error != B_OK) 763 return error; 764 // Failing after already messing with areas is ugly, but we 765 // can't do anything about it. 766 } 767 } 768 769 return B_OK; 770 } 771 772 773 /*! You need to hold the lock of the cache and the write lock of the address 774 space when calling this function. 775 Note, that in case of error your cache will be temporarily unlocked. 776 If \a addressSpec is \c B_EXACT_ADDRESS and the 777 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 778 that no part of the specified address range (base \c *_virtualAddress, size 779 \a size) is wired. 780 */ 781 static status_t 782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 783 const char* areaName, addr_t size, int wiring, int protection, int mapping, 784 uint32 flags, const virtual_address_restrictions* addressRestrictions, 785 bool kernel, VMArea** _area, void** _virtualAddress) 786 { 787 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 788 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 789 ", protection %d, area %p, areaName '%s'\n", addressSpace, cache, 790 addressRestrictions->address, offset, size, 791 addressRestrictions->address_specification, wiring, protection, 792 _area, areaName)); 793 cache->AssertLocked(); 794 795 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 796 | HEAP_DONT_LOCK_KERNEL_SPACE; 797 int priority; 798 if (addressSpace != VMAddressSpace::Kernel()) { 799 priority = VM_PRIORITY_USER; 800 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 801 priority = VM_PRIORITY_VIP; 802 allocationFlags |= HEAP_PRIORITY_VIP; 803 } else 804 priority = VM_PRIORITY_SYSTEM; 805 806 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 807 allocationFlags); 808 if (area == NULL) 809 return B_NO_MEMORY; 810 811 status_t status; 812 813 // if this is a private map, we need to create a new cache 814 // to handle the private copies of pages as they are written to 815 VMCache* sourceCache = cache; 816 if (mapping == REGION_PRIVATE_MAP) { 817 VMCache* newCache; 818 819 // create an anonymous cache 820 status = VMCacheFactory::CreateAnonymousCache(newCache, 821 (protection & B_STACK_AREA) != 0 822 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 823 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 824 if (status != B_OK) 825 goto err1; 826 827 newCache->Lock(); 828 newCache->temporary = 1; 829 newCache->virtual_base = offset; 830 newCache->virtual_end = offset + size; 831 832 cache->AddConsumer(newCache); 833 834 cache = newCache; 835 } 836 837 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 838 status = cache->SetMinimalCommitment(size, priority); 839 if (status != B_OK) 840 goto err2; 841 } 842 843 // check to see if this address space has entered DELETE state 844 if (addressSpace->IsBeingDeleted()) { 845 // okay, someone is trying to delete this address space now, so we can't 846 // insert the area, so back out 847 status = B_BAD_TEAM_ID; 848 goto err2; 849 } 850 851 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 852 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 853 status = unmap_address_range(addressSpace, 854 (addr_t)addressRestrictions->address, size, kernel); 855 if (status != B_OK) 856 goto err2; 857 } 858 859 status = addressSpace->InsertArea(area, size, addressRestrictions, 860 allocationFlags, _virtualAddress); 861 if (status != B_OK) { 862 // TODO: wait and try again once this is working in the backend 863 #if 0 864 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 865 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 866 0, 0); 867 } 868 #endif 869 goto err2; 870 } 871 872 // attach the cache to the area 873 area->cache = cache; 874 area->cache_offset = offset; 875 876 // point the cache back to the area 877 cache->InsertAreaLocked(area); 878 if (mapping == REGION_PRIVATE_MAP) 879 cache->Unlock(); 880 881 // insert the area in the global area hash table 882 VMAreaHash::Insert(area); 883 884 // grab a ref to the address space (the area holds this) 885 addressSpace->Get(); 886 887 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 888 // cache, sourceCache, areaName, area); 889 890 *_area = area; 891 return B_OK; 892 893 err2: 894 if (mapping == REGION_PRIVATE_MAP) { 895 // We created this cache, so we must delete it again. Note, that we 896 // need to temporarily unlock the source cache or we'll otherwise 897 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 898 sourceCache->Unlock(); 899 cache->ReleaseRefAndUnlock(); 900 sourceCache->Lock(); 901 } 902 err1: 903 addressSpace->DeleteArea(area, allocationFlags); 904 return status; 905 } 906 907 908 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 909 locker1, locker2). 910 */ 911 template<typename LockerType1, typename LockerType2> 912 static inline bool 913 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 914 { 915 area->cache->AssertLocked(); 916 917 VMAreaUnwiredWaiter waiter; 918 if (!area->AddWaiterIfWired(&waiter)) 919 return false; 920 921 // unlock everything and wait 922 if (locker1 != NULL) 923 locker1->Unlock(); 924 if (locker2 != NULL) 925 locker2->Unlock(); 926 927 waiter.waitEntry.Wait(); 928 929 return true; 930 } 931 932 933 /*! Checks whether the given area has any wired ranges intersecting with the 934 specified range and waits, if so. 935 936 When it has to wait, the function calls \c Unlock() on both \a locker1 937 and \a locker2, if given. 938 The area's top cache must be locked and must be unlocked as a side effect 939 of calling \c Unlock() on either \a locker1 or \a locker2. 940 941 If the function does not have to wait it does not modify or unlock any 942 object. 943 944 \param area The area to be checked. 945 \param base The base address of the range to check. 946 \param size The size of the address range to check. 947 \param locker1 An object to be unlocked when before starting to wait (may 948 be \c NULL). 949 \param locker2 An object to be unlocked when before starting to wait (may 950 be \c NULL). 951 \return \c true, if the function had to wait, \c false otherwise. 952 */ 953 template<typename LockerType1, typename LockerType2> 954 static inline bool 955 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 956 LockerType1* locker1, LockerType2* locker2) 957 { 958 area->cache->AssertLocked(); 959 960 VMAreaUnwiredWaiter waiter; 961 if (!area->AddWaiterIfWired(&waiter, base, size)) 962 return false; 963 964 // unlock everything and wait 965 if (locker1 != NULL) 966 locker1->Unlock(); 967 if (locker2 != NULL) 968 locker2->Unlock(); 969 970 waiter.waitEntry.Wait(); 971 972 return true; 973 } 974 975 976 /*! Checks whether the given address space has any wired ranges intersecting 977 with the specified range and waits, if so. 978 979 Similar to wait_if_area_range_is_wired(), with the following differences: 980 - All areas intersecting with the range are checked (respectively all until 981 one is found that contains a wired range intersecting with the given 982 range). 983 - The given address space must at least be read-locked and must be unlocked 984 when \c Unlock() is called on \a locker. 985 - None of the areas' caches are allowed to be locked. 986 */ 987 template<typename LockerType> 988 static inline bool 989 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 990 size_t size, LockerType* locker) 991 { 992 addr_t end = base + size - 1; 993 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 994 VMArea* area = it.Next();) { 995 // TODO: Introduce a VMAddressSpace method to get a close iterator! 996 if (area->Base() > end) 997 return false; 998 999 if (base >= area->Base() + area->Size() - 1) 1000 continue; 1001 1002 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1003 1004 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1005 return true; 1006 } 1007 1008 return false; 1009 } 1010 1011 1012 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1013 It must be called in a situation where the kernel address space may be 1014 locked. 1015 */ 1016 status_t 1017 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1018 { 1019 AddressSpaceReadLocker locker; 1020 VMArea* area; 1021 status_t status = locker.SetFromArea(id, area); 1022 if (status != B_OK) 1023 return status; 1024 1025 if (area->page_protections == NULL) { 1026 status = allocate_area_page_protections(area); 1027 if (status != B_OK) 1028 return status; 1029 } 1030 1031 *cookie = (void*)area; 1032 return B_OK; 1033 } 1034 1035 1036 /*! This is a debug helper function that can only be used with very specific 1037 use cases. 1038 Sets protection for the given address range to the protection specified. 1039 If \a protection is 0 then the involved pages will be marked non-present 1040 in the translation map to cause a fault on access. The pages aren't 1041 actually unmapped however so that they can be marked present again with 1042 additional calls to this function. For this to work the area must be 1043 fully locked in memory so that the pages aren't otherwise touched. 1044 This function does not lock the kernel address space and needs to be 1045 supplied with a \a cookie retrieved from a successful call to 1046 vm_prepare_kernel_area_debug_protection(). 1047 */ 1048 status_t 1049 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1050 uint32 protection) 1051 { 1052 // check address range 1053 addr_t address = (addr_t)_address; 1054 size = PAGE_ALIGN(size); 1055 1056 if ((address % B_PAGE_SIZE) != 0 1057 || (addr_t)address + size < (addr_t)address 1058 || !IS_KERNEL_ADDRESS(address) 1059 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1060 return B_BAD_VALUE; 1061 } 1062 1063 // Translate the kernel protection to user protection as we only store that. 1064 if ((protection & B_KERNEL_READ_AREA) != 0) 1065 protection |= B_READ_AREA; 1066 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1067 protection |= B_WRITE_AREA; 1068 1069 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1070 VMTranslationMap* map = addressSpace->TranslationMap(); 1071 VMArea* area = (VMArea*)cookie; 1072 1073 addr_t offset = address - area->Base(); 1074 if (area->Size() - offset < size) { 1075 panic("protect range not fully within supplied area"); 1076 return B_BAD_VALUE; 1077 } 1078 1079 if (area->page_protections == NULL) { 1080 panic("area has no page protections"); 1081 return B_BAD_VALUE; 1082 } 1083 1084 // Invalidate the mapping entries so any access to them will fault or 1085 // restore the mapping entries unchanged so that lookup will success again. 1086 map->Lock(); 1087 map->DebugMarkRangePresent(address, address + size, protection != 0); 1088 map->Unlock(); 1089 1090 // And set the proper page protections so that the fault case will actually 1091 // fail and not simply try to map a new page. 1092 for (addr_t pageAddress = address; pageAddress < address + size; 1093 pageAddress += B_PAGE_SIZE) { 1094 set_area_page_protection(area, pageAddress, protection); 1095 } 1096 1097 return B_OK; 1098 } 1099 1100 1101 status_t 1102 vm_block_address_range(const char* name, void* address, addr_t size) 1103 { 1104 if (!arch_vm_supports_protection(0)) 1105 return B_NOT_SUPPORTED; 1106 1107 AddressSpaceWriteLocker locker; 1108 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1109 if (status != B_OK) 1110 return status; 1111 1112 VMAddressSpace* addressSpace = locker.AddressSpace(); 1113 1114 // create an anonymous cache 1115 VMCache* cache; 1116 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1117 VM_PRIORITY_SYSTEM); 1118 if (status != B_OK) 1119 return status; 1120 1121 cache->temporary = 1; 1122 cache->virtual_end = size; 1123 cache->Lock(); 1124 1125 VMArea* area; 1126 virtual_address_restrictions addressRestrictions = {}; 1127 addressRestrictions.address = address; 1128 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1129 status = map_backing_store(addressSpace, cache, 0, name, size, 1130 B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0, 1131 &addressRestrictions, true, &area, NULL); 1132 if (status != B_OK) { 1133 cache->ReleaseRefAndUnlock(); 1134 return status; 1135 } 1136 1137 cache->Unlock(); 1138 area->cache_type = CACHE_TYPE_RAM; 1139 return area->id; 1140 } 1141 1142 1143 status_t 1144 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1145 { 1146 AddressSpaceWriteLocker locker(team); 1147 if (!locker.IsLocked()) 1148 return B_BAD_TEAM_ID; 1149 1150 VMAddressSpace* addressSpace = locker.AddressSpace(); 1151 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1152 addressSpace == VMAddressSpace::Kernel() 1153 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1154 } 1155 1156 1157 status_t 1158 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1159 addr_t size, uint32 flags) 1160 { 1161 if (size == 0) 1162 return B_BAD_VALUE; 1163 1164 AddressSpaceWriteLocker locker(team); 1165 if (!locker.IsLocked()) 1166 return B_BAD_TEAM_ID; 1167 1168 virtual_address_restrictions addressRestrictions = {}; 1169 addressRestrictions.address = *_address; 1170 addressRestrictions.address_specification = addressSpec; 1171 VMAddressSpace* addressSpace = locker.AddressSpace(); 1172 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1173 addressSpace == VMAddressSpace::Kernel() 1174 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1175 _address); 1176 } 1177 1178 1179 area_id 1180 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1181 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1182 const virtual_address_restrictions* virtualAddressRestrictions, 1183 const physical_address_restrictions* physicalAddressRestrictions, 1184 bool kernel, void** _address) 1185 { 1186 VMArea* area; 1187 VMCache* cache; 1188 vm_page* page = NULL; 1189 bool isStack = (protection & B_STACK_AREA) != 0; 1190 page_num_t guardPages; 1191 bool canOvercommit = false; 1192 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1193 ? VM_PAGE_ALLOC_CLEAR : 0; 1194 1195 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1196 team, name, size)); 1197 1198 size = PAGE_ALIGN(size); 1199 guardSize = PAGE_ALIGN(guardSize); 1200 guardPages = guardSize / B_PAGE_SIZE; 1201 1202 if (size == 0 || size < guardSize) 1203 return B_BAD_VALUE; 1204 if (!arch_vm_supports_protection(protection)) 1205 return B_NOT_SUPPORTED; 1206 1207 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1208 canOvercommit = true; 1209 1210 #ifdef DEBUG_KERNEL_STACKS 1211 if ((protection & B_KERNEL_STACK_AREA) != 0) 1212 isStack = true; 1213 #endif 1214 1215 // check parameters 1216 switch (virtualAddressRestrictions->address_specification) { 1217 case B_ANY_ADDRESS: 1218 case B_EXACT_ADDRESS: 1219 case B_BASE_ADDRESS: 1220 case B_ANY_KERNEL_ADDRESS: 1221 case B_ANY_KERNEL_BLOCK_ADDRESS: 1222 break; 1223 1224 default: 1225 return B_BAD_VALUE; 1226 } 1227 1228 // If low or high physical address restrictions are given, we force 1229 // B_CONTIGUOUS wiring, since only then we'll use 1230 // vm_page_allocate_page_run() which deals with those restrictions. 1231 if (physicalAddressRestrictions->low_address != 0 1232 || physicalAddressRestrictions->high_address != 0) { 1233 wiring = B_CONTIGUOUS; 1234 } 1235 1236 physical_address_restrictions stackPhysicalRestrictions; 1237 bool doReserveMemory = false; 1238 switch (wiring) { 1239 case B_NO_LOCK: 1240 break; 1241 case B_FULL_LOCK: 1242 case B_LAZY_LOCK: 1243 case B_CONTIGUOUS: 1244 doReserveMemory = true; 1245 break; 1246 case B_ALREADY_WIRED: 1247 break; 1248 case B_LOMEM: 1249 stackPhysicalRestrictions = *physicalAddressRestrictions; 1250 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1251 physicalAddressRestrictions = &stackPhysicalRestrictions; 1252 wiring = B_CONTIGUOUS; 1253 doReserveMemory = true; 1254 break; 1255 case B_32_BIT_FULL_LOCK: 1256 if (B_HAIKU_PHYSICAL_BITS <= 32 1257 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1258 wiring = B_FULL_LOCK; 1259 doReserveMemory = true; 1260 break; 1261 } 1262 // TODO: We don't really support this mode efficiently. Just fall 1263 // through for now ... 1264 case B_32_BIT_CONTIGUOUS: 1265 #if B_HAIKU_PHYSICAL_BITS > 32 1266 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1267 stackPhysicalRestrictions = *physicalAddressRestrictions; 1268 stackPhysicalRestrictions.high_address 1269 = (phys_addr_t)1 << 32; 1270 physicalAddressRestrictions = &stackPhysicalRestrictions; 1271 } 1272 #endif 1273 wiring = B_CONTIGUOUS; 1274 doReserveMemory = true; 1275 break; 1276 default: 1277 return B_BAD_VALUE; 1278 } 1279 1280 // Optimization: For a single-page contiguous allocation without low/high 1281 // memory restriction B_FULL_LOCK wiring suffices. 1282 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1283 && physicalAddressRestrictions->low_address == 0 1284 && physicalAddressRestrictions->high_address == 0) { 1285 wiring = B_FULL_LOCK; 1286 } 1287 1288 // For full lock or contiguous areas we're also going to map the pages and 1289 // thus need to reserve pages for the mapping backend upfront. 1290 addr_t reservedMapPages = 0; 1291 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1292 AddressSpaceWriteLocker locker; 1293 status_t status = locker.SetTo(team); 1294 if (status != B_OK) 1295 return status; 1296 1297 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1298 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1299 } 1300 1301 int priority; 1302 if (team != VMAddressSpace::KernelID()) 1303 priority = VM_PRIORITY_USER; 1304 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1305 priority = VM_PRIORITY_VIP; 1306 else 1307 priority = VM_PRIORITY_SYSTEM; 1308 1309 // Reserve memory before acquiring the address space lock. This reduces the 1310 // chances of failure, since while holding the write lock to the address 1311 // space (if it is the kernel address space that is), the low memory handler 1312 // won't be able to free anything for us. 1313 addr_t reservedMemory = 0; 1314 if (doReserveMemory) { 1315 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1316 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1317 return B_NO_MEMORY; 1318 reservedMemory = size; 1319 // TODO: We don't reserve the memory for the pages for the page 1320 // directories/tables. We actually need to do since we currently don't 1321 // reclaim them (and probably can't reclaim all of them anyway). Thus 1322 // there are actually less physical pages than there should be, which 1323 // can get the VM into trouble in low memory situations. 1324 } 1325 1326 AddressSpaceWriteLocker locker; 1327 VMAddressSpace* addressSpace; 1328 status_t status; 1329 1330 // For full lock areas reserve the pages before locking the address 1331 // space. E.g. block caches can't release their memory while we hold the 1332 // address space lock. 1333 page_num_t reservedPages = reservedMapPages; 1334 if (wiring == B_FULL_LOCK) 1335 reservedPages += size / B_PAGE_SIZE; 1336 1337 vm_page_reservation reservation; 1338 if (reservedPages > 0) { 1339 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1340 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1341 priority)) { 1342 reservedPages = 0; 1343 status = B_WOULD_BLOCK; 1344 goto err0; 1345 } 1346 } else 1347 vm_page_reserve_pages(&reservation, reservedPages, priority); 1348 } 1349 1350 if (wiring == B_CONTIGUOUS) { 1351 // we try to allocate the page run here upfront as this may easily 1352 // fail for obvious reasons 1353 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1354 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1355 if (page == NULL) { 1356 status = B_NO_MEMORY; 1357 goto err0; 1358 } 1359 } 1360 1361 // Lock the address space and, if B_EXACT_ADDRESS and 1362 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1363 // is not wired. 1364 do { 1365 status = locker.SetTo(team); 1366 if (status != B_OK) 1367 goto err1; 1368 1369 addressSpace = locker.AddressSpace(); 1370 } while (virtualAddressRestrictions->address_specification 1371 == B_EXACT_ADDRESS 1372 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1373 && wait_if_address_range_is_wired(addressSpace, 1374 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1375 1376 // create an anonymous cache 1377 // if it's a stack, make sure that two pages are available at least 1378 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1379 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1380 wiring == B_NO_LOCK, priority); 1381 if (status != B_OK) 1382 goto err1; 1383 1384 cache->temporary = 1; 1385 cache->virtual_end = size; 1386 cache->committed_size = reservedMemory; 1387 // TODO: This should be done via a method. 1388 reservedMemory = 0; 1389 1390 cache->Lock(); 1391 1392 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1393 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions, 1394 kernel, &area, _address); 1395 1396 if (status != B_OK) { 1397 cache->ReleaseRefAndUnlock(); 1398 goto err1; 1399 } 1400 1401 locker.DegradeToReadLock(); 1402 1403 switch (wiring) { 1404 case B_NO_LOCK: 1405 case B_LAZY_LOCK: 1406 // do nothing - the pages are mapped in as needed 1407 break; 1408 1409 case B_FULL_LOCK: 1410 { 1411 // Allocate and map all pages for this area 1412 1413 off_t offset = 0; 1414 for (addr_t address = area->Base(); 1415 address < area->Base() + (area->Size() - 1); 1416 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1417 #ifdef DEBUG_KERNEL_STACKS 1418 # ifdef STACK_GROWS_DOWNWARDS 1419 if (isStack && address < area->Base() 1420 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1421 # else 1422 if (isStack && address >= area->Base() + area->Size() 1423 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1424 # endif 1425 continue; 1426 #endif 1427 vm_page* page = vm_page_allocate_page(&reservation, 1428 PAGE_STATE_WIRED | pageAllocFlags); 1429 cache->InsertPage(page, offset); 1430 map_page(area, page, address, protection, &reservation); 1431 1432 DEBUG_PAGE_ACCESS_END(page); 1433 } 1434 1435 break; 1436 } 1437 1438 case B_ALREADY_WIRED: 1439 { 1440 // The pages should already be mapped. This is only really useful 1441 // during boot time. Find the appropriate vm_page objects and stick 1442 // them in the cache object. 1443 VMTranslationMap* map = addressSpace->TranslationMap(); 1444 off_t offset = 0; 1445 1446 if (!gKernelStartup) 1447 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1448 1449 map->Lock(); 1450 1451 for (addr_t virtualAddress = area->Base(); 1452 virtualAddress < area->Base() + (area->Size() - 1); 1453 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1454 phys_addr_t physicalAddress; 1455 uint32 flags; 1456 status = map->Query(virtualAddress, &physicalAddress, &flags); 1457 if (status < B_OK) { 1458 panic("looking up mapping failed for va 0x%lx\n", 1459 virtualAddress); 1460 } 1461 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1462 if (page == NULL) { 1463 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1464 "\n", physicalAddress); 1465 } 1466 1467 DEBUG_PAGE_ACCESS_START(page); 1468 1469 cache->InsertPage(page, offset); 1470 increment_page_wired_count(page); 1471 vm_page_set_state(page, PAGE_STATE_WIRED); 1472 page->busy = false; 1473 1474 DEBUG_PAGE_ACCESS_END(page); 1475 } 1476 1477 map->Unlock(); 1478 break; 1479 } 1480 1481 case B_CONTIGUOUS: 1482 { 1483 // We have already allocated our continuous pages run, so we can now 1484 // just map them in the address space 1485 VMTranslationMap* map = addressSpace->TranslationMap(); 1486 phys_addr_t physicalAddress 1487 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1488 addr_t virtualAddress = area->Base(); 1489 off_t offset = 0; 1490 1491 map->Lock(); 1492 1493 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1494 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1495 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1496 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1497 if (page == NULL) 1498 panic("couldn't lookup physical page just allocated\n"); 1499 1500 status = map->Map(virtualAddress, physicalAddress, protection, 1501 area->MemoryType(), &reservation); 1502 if (status < B_OK) 1503 panic("couldn't map physical page in page run\n"); 1504 1505 cache->InsertPage(page, offset); 1506 increment_page_wired_count(page); 1507 1508 DEBUG_PAGE_ACCESS_END(page); 1509 } 1510 1511 map->Unlock(); 1512 break; 1513 } 1514 1515 default: 1516 break; 1517 } 1518 1519 cache->Unlock(); 1520 1521 if (reservedPages > 0) 1522 vm_page_unreserve_pages(&reservation); 1523 1524 TRACE(("vm_create_anonymous_area: done\n")); 1525 1526 area->cache_type = CACHE_TYPE_RAM; 1527 return area->id; 1528 1529 err1: 1530 if (wiring == B_CONTIGUOUS) { 1531 // we had reserved the area space upfront... 1532 phys_addr_t pageNumber = page->physical_page_number; 1533 int32 i; 1534 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1535 page = vm_lookup_page(pageNumber); 1536 if (page == NULL) 1537 panic("couldn't lookup physical page just allocated\n"); 1538 1539 vm_page_set_state(page, PAGE_STATE_FREE); 1540 } 1541 } 1542 1543 err0: 1544 if (reservedPages > 0) 1545 vm_page_unreserve_pages(&reservation); 1546 if (reservedMemory > 0) 1547 vm_unreserve_memory(reservedMemory); 1548 1549 return status; 1550 } 1551 1552 1553 area_id 1554 vm_map_physical_memory(team_id team, const char* name, void** _address, 1555 uint32 addressSpec, addr_t size, uint32 protection, 1556 phys_addr_t physicalAddress, bool alreadyWired) 1557 { 1558 VMArea* area; 1559 VMCache* cache; 1560 addr_t mapOffset; 1561 1562 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1563 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1564 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1565 addressSpec, size, protection, physicalAddress)); 1566 1567 if (!arch_vm_supports_protection(protection)) 1568 return B_NOT_SUPPORTED; 1569 1570 AddressSpaceWriteLocker locker(team); 1571 if (!locker.IsLocked()) 1572 return B_BAD_TEAM_ID; 1573 1574 // if the physical address is somewhat inside a page, 1575 // move the actual area down to align on a page boundary 1576 mapOffset = physicalAddress % B_PAGE_SIZE; 1577 size += mapOffset; 1578 physicalAddress -= mapOffset; 1579 1580 size = PAGE_ALIGN(size); 1581 1582 // create a device cache 1583 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1584 if (status != B_OK) 1585 return status; 1586 1587 cache->virtual_end = size; 1588 1589 cache->Lock(); 1590 1591 virtual_address_restrictions addressRestrictions = {}; 1592 addressRestrictions.address = *_address; 1593 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1594 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1595 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1596 true, &area, _address); 1597 1598 if (status < B_OK) 1599 cache->ReleaseRefLocked(); 1600 1601 cache->Unlock(); 1602 1603 if (status == B_OK) { 1604 // set requested memory type -- use uncached, if not given 1605 uint32 memoryType = addressSpec & B_MTR_MASK; 1606 if (memoryType == 0) 1607 memoryType = B_MTR_UC; 1608 1609 area->SetMemoryType(memoryType); 1610 1611 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1612 if (status != B_OK) 1613 delete_area(locker.AddressSpace(), area, false); 1614 } 1615 1616 if (status != B_OK) 1617 return status; 1618 1619 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1620 1621 if (alreadyWired) { 1622 // The area is already mapped, but possibly not with the right 1623 // memory type. 1624 map->Lock(); 1625 map->ProtectArea(area, area->protection); 1626 map->Unlock(); 1627 } else { 1628 // Map the area completely. 1629 1630 // reserve pages needed for the mapping 1631 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1632 area->Base() + (size - 1)); 1633 vm_page_reservation reservation; 1634 vm_page_reserve_pages(&reservation, reservePages, 1635 team == VMAddressSpace::KernelID() 1636 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1637 1638 map->Lock(); 1639 1640 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1641 map->Map(area->Base() + offset, physicalAddress + offset, 1642 protection, area->MemoryType(), &reservation); 1643 } 1644 1645 map->Unlock(); 1646 1647 vm_page_unreserve_pages(&reservation); 1648 } 1649 1650 // modify the pointer returned to be offset back into the new area 1651 // the same way the physical address in was offset 1652 *_address = (void*)((addr_t)*_address + mapOffset); 1653 1654 area->cache_type = CACHE_TYPE_DEVICE; 1655 return area->id; 1656 } 1657 1658 1659 /*! Don't use! 1660 TODO: This function was introduced to map physical page vecs to 1661 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1662 use a device cache and does not track vm_page::wired_count! 1663 */ 1664 area_id 1665 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1666 uint32 addressSpec, addr_t* _size, uint32 protection, 1667 struct generic_io_vec* vecs, uint32 vecCount) 1668 { 1669 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1670 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1671 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1672 addressSpec, _size, protection, vecs, vecCount)); 1673 1674 if (!arch_vm_supports_protection(protection) 1675 || (addressSpec & B_MTR_MASK) != 0) { 1676 return B_NOT_SUPPORTED; 1677 } 1678 1679 AddressSpaceWriteLocker locker(team); 1680 if (!locker.IsLocked()) 1681 return B_BAD_TEAM_ID; 1682 1683 if (vecCount == 0) 1684 return B_BAD_VALUE; 1685 1686 addr_t size = 0; 1687 for (uint32 i = 0; i < vecCount; i++) { 1688 if (vecs[i].base % B_PAGE_SIZE != 0 1689 || vecs[i].length % B_PAGE_SIZE != 0) { 1690 return B_BAD_VALUE; 1691 } 1692 1693 size += vecs[i].length; 1694 } 1695 1696 // create a device cache 1697 VMCache* cache; 1698 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1699 if (result != B_OK) 1700 return result; 1701 1702 cache->virtual_end = size; 1703 1704 cache->Lock(); 1705 1706 VMArea* area; 1707 virtual_address_restrictions addressRestrictions = {}; 1708 addressRestrictions.address = *_address; 1709 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1710 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1711 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, 1712 &addressRestrictions, true, &area, _address); 1713 1714 if (result != B_OK) 1715 cache->ReleaseRefLocked(); 1716 1717 cache->Unlock(); 1718 1719 if (result != B_OK) 1720 return result; 1721 1722 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1723 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1724 area->Base() + (size - 1)); 1725 1726 vm_page_reservation reservation; 1727 vm_page_reserve_pages(&reservation, reservePages, 1728 team == VMAddressSpace::KernelID() 1729 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1730 map->Lock(); 1731 1732 uint32 vecIndex = 0; 1733 size_t vecOffset = 0; 1734 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1735 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1736 vecOffset = 0; 1737 vecIndex++; 1738 } 1739 1740 if (vecIndex >= vecCount) 1741 break; 1742 1743 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1744 protection, area->MemoryType(), &reservation); 1745 1746 vecOffset += B_PAGE_SIZE; 1747 } 1748 1749 map->Unlock(); 1750 vm_page_unreserve_pages(&reservation); 1751 1752 if (_size != NULL) 1753 *_size = size; 1754 1755 area->cache_type = CACHE_TYPE_DEVICE; 1756 return area->id; 1757 } 1758 1759 1760 area_id 1761 vm_create_null_area(team_id team, const char* name, void** address, 1762 uint32 addressSpec, addr_t size, uint32 flags) 1763 { 1764 size = PAGE_ALIGN(size); 1765 1766 // Lock the address space and, if B_EXACT_ADDRESS and 1767 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1768 // is not wired. 1769 AddressSpaceWriteLocker locker; 1770 do { 1771 if (locker.SetTo(team) != B_OK) 1772 return B_BAD_TEAM_ID; 1773 } while (addressSpec == B_EXACT_ADDRESS 1774 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1775 && wait_if_address_range_is_wired(locker.AddressSpace(), 1776 (addr_t)*address, size, &locker)); 1777 1778 // create a null cache 1779 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1780 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1781 VMCache* cache; 1782 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1783 if (status != B_OK) 1784 return status; 1785 1786 cache->temporary = 1; 1787 cache->virtual_end = size; 1788 1789 cache->Lock(); 1790 1791 VMArea* area; 1792 virtual_address_restrictions addressRestrictions = {}; 1793 addressRestrictions.address = *address; 1794 addressRestrictions.address_specification = addressSpec; 1795 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1796 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags, 1797 &addressRestrictions, true, &area, address); 1798 1799 if (status < B_OK) { 1800 cache->ReleaseRefAndUnlock(); 1801 return status; 1802 } 1803 1804 cache->Unlock(); 1805 1806 area->cache_type = CACHE_TYPE_NULL; 1807 return area->id; 1808 } 1809 1810 1811 /*! Creates the vnode cache for the specified \a vnode. 1812 The vnode has to be marked busy when calling this function. 1813 */ 1814 status_t 1815 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1816 { 1817 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1818 } 1819 1820 1821 /*! \a cache must be locked. The area's address space must be read-locked. 1822 */ 1823 static void 1824 pre_map_area_pages(VMArea* area, VMCache* cache, 1825 vm_page_reservation* reservation) 1826 { 1827 addr_t baseAddress = area->Base(); 1828 addr_t cacheOffset = area->cache_offset; 1829 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1830 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1831 1832 for (VMCachePagesTree::Iterator it 1833 = cache->pages.GetIterator(firstPage, true, true); 1834 vm_page* page = it.Next();) { 1835 if (page->cache_offset >= endPage) 1836 break; 1837 1838 // skip busy and inactive pages 1839 if (page->busy || page->usage_count == 0) 1840 continue; 1841 1842 DEBUG_PAGE_ACCESS_START(page); 1843 map_page(area, page, 1844 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1845 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1846 DEBUG_PAGE_ACCESS_END(page); 1847 } 1848 } 1849 1850 1851 /*! Will map the file specified by \a fd to an area in memory. 1852 The file will be mirrored beginning at the specified \a offset. The 1853 \a offset and \a size arguments have to be page aligned. 1854 */ 1855 static area_id 1856 _vm_map_file(team_id team, const char* name, void** _address, 1857 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1858 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1859 { 1860 // TODO: for binary files, we want to make sure that they get the 1861 // copy of a file at a given time, ie. later changes should not 1862 // make it into the mapped copy -- this will need quite some changes 1863 // to be done in a nice way 1864 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 1865 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 1866 1867 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1868 size = PAGE_ALIGN(size); 1869 1870 if (mapping == REGION_NO_PRIVATE_MAP) 1871 protection |= B_SHARED_AREA; 1872 if (addressSpec != B_EXACT_ADDRESS) 1873 unmapAddressRange = false; 1874 1875 if (fd < 0) { 1876 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1877 virtual_address_restrictions virtualRestrictions = {}; 1878 virtualRestrictions.address = *_address; 1879 virtualRestrictions.address_specification = addressSpec; 1880 physical_address_restrictions physicalRestrictions = {}; 1881 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 1882 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 1883 _address); 1884 } 1885 1886 // get the open flags of the FD 1887 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1888 if (descriptor == NULL) 1889 return EBADF; 1890 int32 openMode = descriptor->open_mode; 1891 put_fd(descriptor); 1892 1893 // The FD must open for reading at any rate. For shared mapping with write 1894 // access, additionally the FD must be open for writing. 1895 if ((openMode & O_ACCMODE) == O_WRONLY 1896 || (mapping == REGION_NO_PRIVATE_MAP 1897 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1898 && (openMode & O_ACCMODE) == O_RDONLY)) { 1899 return EACCES; 1900 } 1901 1902 // get the vnode for the object, this also grabs a ref to it 1903 struct vnode* vnode = NULL; 1904 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1905 if (status < B_OK) 1906 return status; 1907 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1908 1909 // If we're going to pre-map pages, we need to reserve the pages needed by 1910 // the mapping backend upfront. 1911 page_num_t reservedPreMapPages = 0; 1912 vm_page_reservation reservation; 1913 if ((protection & B_READ_AREA) != 0) { 1914 AddressSpaceWriteLocker locker; 1915 status = locker.SetTo(team); 1916 if (status != B_OK) 1917 return status; 1918 1919 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1920 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1921 1922 locker.Unlock(); 1923 1924 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1925 team == VMAddressSpace::KernelID() 1926 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1927 } 1928 1929 struct PageUnreserver { 1930 PageUnreserver(vm_page_reservation* reservation) 1931 : 1932 fReservation(reservation) 1933 { 1934 } 1935 1936 ~PageUnreserver() 1937 { 1938 if (fReservation != NULL) 1939 vm_page_unreserve_pages(fReservation); 1940 } 1941 1942 vm_page_reservation* fReservation; 1943 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1944 1945 // Lock the address space and, if the specified address range shall be 1946 // unmapped, ensure it is not wired. 1947 AddressSpaceWriteLocker locker; 1948 do { 1949 if (locker.SetTo(team) != B_OK) 1950 return B_BAD_TEAM_ID; 1951 } while (unmapAddressRange 1952 && wait_if_address_range_is_wired(locker.AddressSpace(), 1953 (addr_t)*_address, size, &locker)); 1954 1955 // TODO: this only works for file systems that use the file cache 1956 VMCache* cache; 1957 status = vfs_get_vnode_cache(vnode, &cache, false); 1958 if (status < B_OK) 1959 return status; 1960 1961 cache->Lock(); 1962 1963 VMArea* area; 1964 virtual_address_restrictions addressRestrictions = {}; 1965 addressRestrictions.address = *_address; 1966 addressRestrictions.address_specification = addressSpec; 1967 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 1968 0, protection, mapping, 1969 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 1970 &addressRestrictions, kernel, &area, _address); 1971 1972 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1973 // map_backing_store() cannot know we no longer need the ref 1974 cache->ReleaseRefLocked(); 1975 } 1976 1977 if (status == B_OK && (protection & B_READ_AREA) != 0) 1978 pre_map_area_pages(area, cache, &reservation); 1979 1980 cache->Unlock(); 1981 1982 if (status == B_OK) { 1983 // TODO: this probably deserves a smarter solution, ie. don't always 1984 // prefetch stuff, and also, probably don't trigger it at this place. 1985 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1986 // prefetches at max 10 MB starting from "offset" 1987 } 1988 1989 if (status != B_OK) 1990 return status; 1991 1992 area->cache_type = CACHE_TYPE_VNODE; 1993 return area->id; 1994 } 1995 1996 1997 area_id 1998 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 1999 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2000 int fd, off_t offset) 2001 { 2002 if (!arch_vm_supports_protection(protection)) 2003 return B_NOT_SUPPORTED; 2004 2005 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2006 mapping, unmapAddressRange, fd, offset, true); 2007 } 2008 2009 2010 VMCache* 2011 vm_area_get_locked_cache(VMArea* area) 2012 { 2013 rw_lock_read_lock(&sAreaCacheLock); 2014 2015 while (true) { 2016 VMCache* cache = area->cache; 2017 2018 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2019 // cache has been deleted 2020 rw_lock_read_lock(&sAreaCacheLock); 2021 continue; 2022 } 2023 2024 rw_lock_read_lock(&sAreaCacheLock); 2025 2026 if (cache == area->cache) { 2027 cache->AcquireRefLocked(); 2028 rw_lock_read_unlock(&sAreaCacheLock); 2029 return cache; 2030 } 2031 2032 // the cache changed in the meantime 2033 cache->Unlock(); 2034 } 2035 } 2036 2037 2038 void 2039 vm_area_put_locked_cache(VMCache* cache) 2040 { 2041 cache->ReleaseRefAndUnlock(); 2042 } 2043 2044 2045 area_id 2046 vm_clone_area(team_id team, const char* name, void** address, 2047 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2048 bool kernel) 2049 { 2050 VMArea* newArea = NULL; 2051 VMArea* sourceArea; 2052 2053 // Check whether the source area exists and is cloneable. If so, mark it 2054 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2055 { 2056 AddressSpaceWriteLocker locker; 2057 status_t status = locker.SetFromArea(sourceID, sourceArea); 2058 if (status != B_OK) 2059 return status; 2060 2061 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2062 return B_NOT_ALLOWED; 2063 2064 sourceArea->protection |= B_SHARED_AREA; 2065 protection |= B_SHARED_AREA; 2066 } 2067 2068 // Now lock both address spaces and actually do the cloning. 2069 2070 MultiAddressSpaceLocker locker; 2071 VMAddressSpace* sourceAddressSpace; 2072 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2073 if (status != B_OK) 2074 return status; 2075 2076 VMAddressSpace* targetAddressSpace; 2077 status = locker.AddTeam(team, true, &targetAddressSpace); 2078 if (status != B_OK) 2079 return status; 2080 2081 status = locker.Lock(); 2082 if (status != B_OK) 2083 return status; 2084 2085 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2086 if (sourceArea == NULL) 2087 return B_BAD_VALUE; 2088 2089 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2090 return B_NOT_ALLOWED; 2091 2092 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2093 2094 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2095 // have been adapted. Maybe it should be part of the kernel settings, 2096 // anyway (so that old drivers can always work). 2097 #if 0 2098 if (sourceArea->aspace == VMAddressSpace::Kernel() 2099 && addressSpace != VMAddressSpace::Kernel() 2100 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2101 // kernel areas must not be cloned in userland, unless explicitly 2102 // declared user-cloneable upon construction 2103 status = B_NOT_ALLOWED; 2104 } else 2105 #endif 2106 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2107 status = B_NOT_ALLOWED; 2108 else { 2109 virtual_address_restrictions addressRestrictions = {}; 2110 addressRestrictions.address = *address; 2111 addressRestrictions.address_specification = addressSpec; 2112 status = map_backing_store(targetAddressSpace, cache, 2113 sourceArea->cache_offset, name, sourceArea->Size(), 2114 sourceArea->wiring, protection, mapping, 0, &addressRestrictions, 2115 kernel, &newArea, address); 2116 } 2117 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2118 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2119 // to create a new cache, and has therefore already acquired a reference 2120 // to the source cache - but otherwise it has no idea that we need 2121 // one. 2122 cache->AcquireRefLocked(); 2123 } 2124 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2125 // we need to map in everything at this point 2126 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2127 // we don't have actual pages to map but a physical area 2128 VMTranslationMap* map 2129 = sourceArea->address_space->TranslationMap(); 2130 map->Lock(); 2131 2132 phys_addr_t physicalAddress; 2133 uint32 oldProtection; 2134 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2135 2136 map->Unlock(); 2137 2138 map = targetAddressSpace->TranslationMap(); 2139 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2140 newArea->Base() + (newArea->Size() - 1)); 2141 2142 vm_page_reservation reservation; 2143 vm_page_reserve_pages(&reservation, reservePages, 2144 targetAddressSpace == VMAddressSpace::Kernel() 2145 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2146 map->Lock(); 2147 2148 for (addr_t offset = 0; offset < newArea->Size(); 2149 offset += B_PAGE_SIZE) { 2150 map->Map(newArea->Base() + offset, physicalAddress + offset, 2151 protection, newArea->MemoryType(), &reservation); 2152 } 2153 2154 map->Unlock(); 2155 vm_page_unreserve_pages(&reservation); 2156 } else { 2157 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2158 size_t reservePages = map->MaxPagesNeededToMap( 2159 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2160 vm_page_reservation reservation; 2161 vm_page_reserve_pages(&reservation, reservePages, 2162 targetAddressSpace == VMAddressSpace::Kernel() 2163 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2164 2165 // map in all pages from source 2166 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2167 vm_page* page = it.Next();) { 2168 if (!page->busy) { 2169 DEBUG_PAGE_ACCESS_START(page); 2170 map_page(newArea, page, 2171 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2172 - newArea->cache_offset), 2173 protection, &reservation); 2174 DEBUG_PAGE_ACCESS_END(page); 2175 } 2176 } 2177 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2178 // ensuring that! 2179 2180 vm_page_unreserve_pages(&reservation); 2181 } 2182 } 2183 if (status == B_OK) 2184 newArea->cache_type = sourceArea->cache_type; 2185 2186 vm_area_put_locked_cache(cache); 2187 2188 if (status < B_OK) 2189 return status; 2190 2191 return newArea->id; 2192 } 2193 2194 2195 /*! Deletes the specified area of the given address space. 2196 2197 The address space must be write-locked. 2198 The caller must ensure that the area does not have any wired ranges. 2199 2200 \param addressSpace The address space containing the area. 2201 \param area The area to be deleted. 2202 \param deletingAddressSpace \c true, if the address space is in the process 2203 of being deleted. 2204 */ 2205 static void 2206 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2207 bool deletingAddressSpace) 2208 { 2209 ASSERT(!area->IsWired()); 2210 2211 VMAreaHash::Remove(area); 2212 2213 // At this point the area is removed from the global hash table, but 2214 // still exists in the area list. 2215 2216 // Unmap the virtual address space the area occupied. 2217 { 2218 // We need to lock the complete cache chain. 2219 VMCache* topCache = vm_area_get_locked_cache(area); 2220 VMCacheChainLocker cacheChainLocker(topCache); 2221 cacheChainLocker.LockAllSourceCaches(); 2222 2223 // If the area's top cache is a temporary cache and the area is the only 2224 // one referencing it (besides us currently holding a second reference), 2225 // the unmapping code doesn't need to care about preserving the accessed 2226 // and dirty flags of the top cache page mappings. 2227 bool ignoreTopCachePageFlags 2228 = topCache->temporary && topCache->RefCount() == 2; 2229 2230 area->address_space->TranslationMap()->UnmapArea(area, 2231 deletingAddressSpace, ignoreTopCachePageFlags); 2232 } 2233 2234 if (!area->cache->temporary) 2235 area->cache->WriteModified(); 2236 2237 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2238 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2239 2240 arch_vm_unset_memory_type(area); 2241 addressSpace->RemoveArea(area, allocationFlags); 2242 addressSpace->Put(); 2243 2244 area->cache->RemoveArea(area); 2245 area->cache->ReleaseRef(); 2246 2247 addressSpace->DeleteArea(area, allocationFlags); 2248 } 2249 2250 2251 status_t 2252 vm_delete_area(team_id team, area_id id, bool kernel) 2253 { 2254 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2255 team, id)); 2256 2257 // lock the address space and make sure the area isn't wired 2258 AddressSpaceWriteLocker locker; 2259 VMArea* area; 2260 AreaCacheLocker cacheLocker; 2261 2262 do { 2263 status_t status = locker.SetFromArea(team, id, area); 2264 if (status != B_OK) 2265 return status; 2266 2267 cacheLocker.SetTo(area); 2268 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2269 2270 cacheLocker.Unlock(); 2271 2272 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2273 return B_NOT_ALLOWED; 2274 2275 delete_area(locker.AddressSpace(), area, false); 2276 return B_OK; 2277 } 2278 2279 2280 /*! Creates a new cache on top of given cache, moves all areas from 2281 the old cache to the new one, and changes the protection of all affected 2282 areas' pages to read-only. If requested, wired pages are moved up to the 2283 new cache and copies are added to the old cache in their place. 2284 Preconditions: 2285 - The given cache must be locked. 2286 - All of the cache's areas' address spaces must be read locked. 2287 - Either the cache must not have any wired ranges or a page reservation for 2288 all wired pages must be provided, so they can be copied. 2289 2290 \param lowerCache The cache on top of which a new cache shall be created. 2291 \param wiredPagesReservation If \c NULL there must not be any wired pages 2292 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2293 has wired page. The wired pages are copied in this case. 2294 */ 2295 static status_t 2296 vm_copy_on_write_area(VMCache* lowerCache, 2297 vm_page_reservation* wiredPagesReservation) 2298 { 2299 VMCache* upperCache; 2300 2301 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2302 2303 // We need to separate the cache from its areas. The cache goes one level 2304 // deeper and we create a new cache inbetween. 2305 2306 // create an anonymous cache 2307 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2308 lowerCache->GuardSize() / B_PAGE_SIZE, 2309 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2310 VM_PRIORITY_USER); 2311 if (status != B_OK) 2312 return status; 2313 2314 upperCache->Lock(); 2315 2316 upperCache->temporary = 1; 2317 upperCache->virtual_base = lowerCache->virtual_base; 2318 upperCache->virtual_end = lowerCache->virtual_end; 2319 2320 // transfer the lower cache areas to the upper cache 2321 rw_lock_write_lock(&sAreaCacheLock); 2322 upperCache->TransferAreas(lowerCache); 2323 rw_lock_write_unlock(&sAreaCacheLock); 2324 2325 lowerCache->AddConsumer(upperCache); 2326 2327 // We now need to remap all pages from all of the cache's areas read-only, 2328 // so that a copy will be created on next write access. If there are wired 2329 // pages, we keep their protection, move them to the upper cache and create 2330 // copies for the lower cache. 2331 if (wiredPagesReservation != NULL) { 2332 // We need to handle wired pages -- iterate through the cache's pages. 2333 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2334 vm_page* page = it.Next();) { 2335 if (page->WiredCount() > 0) { 2336 // allocate a new page and copy the wired one 2337 vm_page* copiedPage = vm_page_allocate_page( 2338 wiredPagesReservation, PAGE_STATE_ACTIVE); 2339 2340 vm_memcpy_physical_page( 2341 copiedPage->physical_page_number * B_PAGE_SIZE, 2342 page->physical_page_number * B_PAGE_SIZE); 2343 2344 // move the wired page to the upper cache (note: removing is OK 2345 // with the SplayTree iterator) and insert the copy 2346 upperCache->MovePage(page); 2347 lowerCache->InsertPage(copiedPage, 2348 page->cache_offset * B_PAGE_SIZE); 2349 2350 DEBUG_PAGE_ACCESS_END(copiedPage); 2351 } else { 2352 // Change the protection of this page in all areas. 2353 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2354 tempArea = tempArea->cache_next) { 2355 // The area must be readable in the same way it was 2356 // previously writable. 2357 uint32 protection = B_KERNEL_READ_AREA; 2358 if ((tempArea->protection & B_READ_AREA) != 0) 2359 protection |= B_READ_AREA; 2360 2361 VMTranslationMap* map 2362 = tempArea->address_space->TranslationMap(); 2363 map->Lock(); 2364 map->ProtectPage(tempArea, 2365 virtual_page_address(tempArea, page), protection); 2366 map->Unlock(); 2367 } 2368 } 2369 } 2370 } else { 2371 ASSERT(lowerCache->WiredPagesCount() == 0); 2372 2373 // just change the protection of all areas 2374 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2375 tempArea = tempArea->cache_next) { 2376 // The area must be readable in the same way it was previously 2377 // writable. 2378 uint32 protection = B_KERNEL_READ_AREA; 2379 if ((tempArea->protection & B_READ_AREA) != 0) 2380 protection |= B_READ_AREA; 2381 2382 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2383 map->Lock(); 2384 map->ProtectArea(tempArea, protection); 2385 map->Unlock(); 2386 } 2387 } 2388 2389 vm_area_put_locked_cache(upperCache); 2390 2391 return B_OK; 2392 } 2393 2394 2395 area_id 2396 vm_copy_area(team_id team, const char* name, void** _address, 2397 uint32 addressSpec, uint32 protection, area_id sourceID) 2398 { 2399 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2400 2401 if ((protection & B_KERNEL_PROTECTION) == 0) { 2402 // set the same protection for the kernel as for userland 2403 protection |= B_KERNEL_READ_AREA; 2404 if (writableCopy) 2405 protection |= B_KERNEL_WRITE_AREA; 2406 } 2407 2408 // Do the locking: target address space, all address spaces associated with 2409 // the source cache, and the cache itself. 2410 MultiAddressSpaceLocker locker; 2411 VMAddressSpace* targetAddressSpace; 2412 VMCache* cache; 2413 VMArea* source; 2414 AreaCacheLocker cacheLocker; 2415 status_t status; 2416 bool sharedArea; 2417 2418 page_num_t wiredPages = 0; 2419 vm_page_reservation wiredPagesReservation; 2420 2421 bool restart; 2422 do { 2423 restart = false; 2424 2425 locker.Unset(); 2426 status = locker.AddTeam(team, true, &targetAddressSpace); 2427 if (status == B_OK) { 2428 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2429 &cache); 2430 } 2431 if (status != B_OK) 2432 return status; 2433 2434 cacheLocker.SetTo(cache, true); // already locked 2435 2436 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2437 2438 page_num_t oldWiredPages = wiredPages; 2439 wiredPages = 0; 2440 2441 // If the source area isn't shared, count the number of wired pages in 2442 // the cache and reserve as many pages. 2443 if (!sharedArea) { 2444 wiredPages = cache->WiredPagesCount(); 2445 2446 if (wiredPages > oldWiredPages) { 2447 cacheLocker.Unlock(); 2448 locker.Unlock(); 2449 2450 if (oldWiredPages > 0) 2451 vm_page_unreserve_pages(&wiredPagesReservation); 2452 2453 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2454 VM_PRIORITY_USER); 2455 2456 restart = true; 2457 } 2458 } else if (oldWiredPages > 0) 2459 vm_page_unreserve_pages(&wiredPagesReservation); 2460 } while (restart); 2461 2462 // unreserve pages later 2463 struct PagesUnreserver { 2464 PagesUnreserver(vm_page_reservation* reservation) 2465 : 2466 fReservation(reservation) 2467 { 2468 } 2469 2470 ~PagesUnreserver() 2471 { 2472 if (fReservation != NULL) 2473 vm_page_unreserve_pages(fReservation); 2474 } 2475 2476 private: 2477 vm_page_reservation* fReservation; 2478 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2479 2480 if (addressSpec == B_CLONE_ADDRESS) { 2481 addressSpec = B_EXACT_ADDRESS; 2482 *_address = (void*)source->Base(); 2483 } 2484 2485 // First, create a cache on top of the source area, respectively use the 2486 // existing one, if this is a shared area. 2487 2488 VMArea* target; 2489 virtual_address_restrictions addressRestrictions = {}; 2490 addressRestrictions.address = *_address; 2491 addressRestrictions.address_specification = addressSpec; 2492 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2493 name, source->Size(), source->wiring, protection, 2494 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2495 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2496 &addressRestrictions, true, &target, _address); 2497 if (status < B_OK) 2498 return status; 2499 2500 if (sharedArea) { 2501 // The new area uses the old area's cache, but map_backing_store() 2502 // hasn't acquired a ref. So we have to do that now. 2503 cache->AcquireRefLocked(); 2504 } 2505 2506 // If the source area is writable, we need to move it one layer up as well 2507 2508 if (!sharedArea) { 2509 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2510 // TODO: do something more useful if this fails! 2511 if (vm_copy_on_write_area(cache, 2512 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2513 panic("vm_copy_on_write_area() failed!\n"); 2514 } 2515 } 2516 } 2517 2518 // we return the ID of the newly created area 2519 return target->id; 2520 } 2521 2522 2523 static status_t 2524 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2525 bool kernel) 2526 { 2527 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2528 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2529 2530 if (!arch_vm_supports_protection(newProtection)) 2531 return B_NOT_SUPPORTED; 2532 2533 bool becomesWritable 2534 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2535 2536 // lock address spaces and cache 2537 MultiAddressSpaceLocker locker; 2538 VMCache* cache; 2539 VMArea* area; 2540 status_t status; 2541 AreaCacheLocker cacheLocker; 2542 bool isWritable; 2543 2544 bool restart; 2545 do { 2546 restart = false; 2547 2548 locker.Unset(); 2549 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2550 if (status != B_OK) 2551 return status; 2552 2553 cacheLocker.SetTo(cache, true); // already locked 2554 2555 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2556 return B_NOT_ALLOWED; 2557 2558 if (area->protection == newProtection) 2559 return B_OK; 2560 2561 if (team != VMAddressSpace::KernelID() 2562 && area->address_space->ID() != team) { 2563 // unless you're the kernel, you are only allowed to set 2564 // the protection of your own areas 2565 return B_NOT_ALLOWED; 2566 } 2567 2568 isWritable 2569 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2570 2571 // Make sure the area (respectively, if we're going to call 2572 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2573 // wired ranges. 2574 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2575 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2576 otherArea = otherArea->cache_next) { 2577 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2578 restart = true; 2579 break; 2580 } 2581 } 2582 } else { 2583 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2584 restart = true; 2585 } 2586 } while (restart); 2587 2588 bool changePageProtection = true; 2589 bool changeTopCachePagesOnly = false; 2590 2591 if (isWritable && !becomesWritable) { 2592 // writable -> !writable 2593 2594 if (cache->source != NULL && cache->temporary) { 2595 if (cache->CountWritableAreas(area) == 0) { 2596 // Since this cache now lives from the pages in its source cache, 2597 // we can change the cache's commitment to take only those pages 2598 // into account that really are in this cache. 2599 2600 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2601 team == VMAddressSpace::KernelID() 2602 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2603 2604 // TODO: we may be able to join with our source cache, if 2605 // count == 0 2606 } 2607 } 2608 2609 // If only the writability changes, we can just remap the pages of the 2610 // top cache, since the pages of lower caches are mapped read-only 2611 // anyway. That's advantageous only, if the number of pages in the cache 2612 // is significantly smaller than the number of pages in the area, 2613 // though. 2614 if (newProtection 2615 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2616 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2617 changeTopCachePagesOnly = true; 2618 } 2619 } else if (!isWritable && becomesWritable) { 2620 // !writable -> writable 2621 2622 if (!cache->consumers.IsEmpty()) { 2623 // There are consumers -- we have to insert a new cache. Fortunately 2624 // vm_copy_on_write_area() does everything that's needed. 2625 changePageProtection = false; 2626 status = vm_copy_on_write_area(cache, NULL); 2627 } else { 2628 // No consumers, so we don't need to insert a new one. 2629 if (cache->source != NULL && cache->temporary) { 2630 // the cache's commitment must contain all possible pages 2631 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2632 team == VMAddressSpace::KernelID() 2633 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2634 } 2635 2636 if (status == B_OK && cache->source != NULL) { 2637 // There's a source cache, hence we can't just change all pages' 2638 // protection or we might allow writing into pages belonging to 2639 // a lower cache. 2640 changeTopCachePagesOnly = true; 2641 } 2642 } 2643 } else { 2644 // we don't have anything special to do in all other cases 2645 } 2646 2647 if (status == B_OK) { 2648 // remap existing pages in this cache 2649 if (changePageProtection) { 2650 VMTranslationMap* map = area->address_space->TranslationMap(); 2651 map->Lock(); 2652 2653 if (changeTopCachePagesOnly) { 2654 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2655 page_num_t lastPageOffset 2656 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2657 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2658 vm_page* page = it.Next();) { 2659 if (page->cache_offset >= firstPageOffset 2660 && page->cache_offset <= lastPageOffset) { 2661 addr_t address = virtual_page_address(area, page); 2662 map->ProtectPage(area, address, newProtection); 2663 } 2664 } 2665 } else 2666 map->ProtectArea(area, newProtection); 2667 2668 map->Unlock(); 2669 } 2670 2671 area->protection = newProtection; 2672 } 2673 2674 return status; 2675 } 2676 2677 2678 status_t 2679 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2680 { 2681 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2682 if (addressSpace == NULL) 2683 return B_BAD_TEAM_ID; 2684 2685 VMTranslationMap* map = addressSpace->TranslationMap(); 2686 2687 map->Lock(); 2688 uint32 dummyFlags; 2689 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2690 map->Unlock(); 2691 2692 addressSpace->Put(); 2693 return status; 2694 } 2695 2696 2697 /*! The page's cache must be locked. 2698 */ 2699 bool 2700 vm_test_map_modification(vm_page* page) 2701 { 2702 if (page->modified) 2703 return true; 2704 2705 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2706 vm_page_mapping* mapping; 2707 while ((mapping = iterator.Next()) != NULL) { 2708 VMArea* area = mapping->area; 2709 VMTranslationMap* map = area->address_space->TranslationMap(); 2710 2711 phys_addr_t physicalAddress; 2712 uint32 flags; 2713 map->Lock(); 2714 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2715 map->Unlock(); 2716 2717 if ((flags & PAGE_MODIFIED) != 0) 2718 return true; 2719 } 2720 2721 return false; 2722 } 2723 2724 2725 /*! The page's cache must be locked. 2726 */ 2727 void 2728 vm_clear_map_flags(vm_page* page, uint32 flags) 2729 { 2730 if ((flags & PAGE_ACCESSED) != 0) 2731 page->accessed = false; 2732 if ((flags & PAGE_MODIFIED) != 0) 2733 page->modified = false; 2734 2735 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2736 vm_page_mapping* mapping; 2737 while ((mapping = iterator.Next()) != NULL) { 2738 VMArea* area = mapping->area; 2739 VMTranslationMap* map = area->address_space->TranslationMap(); 2740 2741 map->Lock(); 2742 map->ClearFlags(virtual_page_address(area, page), flags); 2743 map->Unlock(); 2744 } 2745 } 2746 2747 2748 /*! Removes all mappings from a page. 2749 After you've called this function, the page is unmapped from memory and 2750 the page's \c accessed and \c modified flags have been updated according 2751 to the state of the mappings. 2752 The page's cache must be locked. 2753 */ 2754 void 2755 vm_remove_all_page_mappings(vm_page* page) 2756 { 2757 while (vm_page_mapping* mapping = page->mappings.Head()) { 2758 VMArea* area = mapping->area; 2759 VMTranslationMap* map = area->address_space->TranslationMap(); 2760 addr_t address = virtual_page_address(area, page); 2761 map->UnmapPage(area, address, false); 2762 } 2763 } 2764 2765 2766 int32 2767 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2768 { 2769 int32 count = 0; 2770 2771 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2772 vm_page_mapping* mapping; 2773 while ((mapping = iterator.Next()) != NULL) { 2774 VMArea* area = mapping->area; 2775 VMTranslationMap* map = area->address_space->TranslationMap(); 2776 2777 bool modified; 2778 if (map->ClearAccessedAndModified(area, 2779 virtual_page_address(area, page), false, modified)) { 2780 count++; 2781 } 2782 2783 page->modified |= modified; 2784 } 2785 2786 2787 if (page->accessed) { 2788 count++; 2789 page->accessed = false; 2790 } 2791 2792 return count; 2793 } 2794 2795 2796 /*! Removes all mappings of a page and/or clears the accessed bits of the 2797 mappings. 2798 The function iterates through the page mappings and removes them until 2799 encountering one that has been accessed. From then on it will continue to 2800 iterate, but only clear the accessed flag of the mapping. The page's 2801 \c modified bit will be updated accordingly, the \c accessed bit will be 2802 cleared. 2803 \return The number of mapping accessed bits encountered, including the 2804 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2805 of the page have been removed. 2806 */ 2807 int32 2808 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2809 { 2810 ASSERT(page->WiredCount() == 0); 2811 2812 if (page->accessed) 2813 return vm_clear_page_mapping_accessed_flags(page); 2814 2815 while (vm_page_mapping* mapping = page->mappings.Head()) { 2816 VMArea* area = mapping->area; 2817 VMTranslationMap* map = area->address_space->TranslationMap(); 2818 addr_t address = virtual_page_address(area, page); 2819 bool modified = false; 2820 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2821 page->accessed = true; 2822 page->modified |= modified; 2823 return vm_clear_page_mapping_accessed_flags(page); 2824 } 2825 page->modified |= modified; 2826 } 2827 2828 return 0; 2829 } 2830 2831 2832 static int 2833 display_mem(int argc, char** argv) 2834 { 2835 bool physical = false; 2836 addr_t copyAddress; 2837 int32 displayWidth; 2838 int32 itemSize; 2839 int32 num = -1; 2840 addr_t address; 2841 int i = 1, j; 2842 2843 if (argc > 1 && argv[1][0] == '-') { 2844 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2845 physical = true; 2846 i++; 2847 } else 2848 i = 99; 2849 } 2850 2851 if (argc < i + 1 || argc > i + 2) { 2852 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2853 "\tdl - 8 bytes\n" 2854 "\tdw - 4 bytes\n" 2855 "\tds - 2 bytes\n" 2856 "\tdb - 1 byte\n" 2857 "\tstring - a whole string\n" 2858 " -p or --physical only allows memory from a single page to be " 2859 "displayed.\n"); 2860 return 0; 2861 } 2862 2863 address = parse_expression(argv[i]); 2864 2865 if (argc > i + 1) 2866 num = parse_expression(argv[i + 1]); 2867 2868 // build the format string 2869 if (strcmp(argv[0], "db") == 0) { 2870 itemSize = 1; 2871 displayWidth = 16; 2872 } else if (strcmp(argv[0], "ds") == 0) { 2873 itemSize = 2; 2874 displayWidth = 8; 2875 } else if (strcmp(argv[0], "dw") == 0) { 2876 itemSize = 4; 2877 displayWidth = 4; 2878 } else if (strcmp(argv[0], "dl") == 0) { 2879 itemSize = 8; 2880 displayWidth = 2; 2881 } else if (strcmp(argv[0], "string") == 0) { 2882 itemSize = 1; 2883 displayWidth = -1; 2884 } else { 2885 kprintf("display_mem called in an invalid way!\n"); 2886 return 0; 2887 } 2888 2889 if (num <= 0) 2890 num = displayWidth; 2891 2892 void* physicalPageHandle = NULL; 2893 2894 if (physical) { 2895 int32 offset = address & (B_PAGE_SIZE - 1); 2896 if (num * itemSize + offset > B_PAGE_SIZE) { 2897 num = (B_PAGE_SIZE - offset) / itemSize; 2898 kprintf("NOTE: number of bytes has been cut to page size\n"); 2899 } 2900 2901 address = ROUNDDOWN(address, B_PAGE_SIZE); 2902 2903 if (vm_get_physical_page_debug(address, ©Address, 2904 &physicalPageHandle) != B_OK) { 2905 kprintf("getting the hardware page failed."); 2906 return 0; 2907 } 2908 2909 address += offset; 2910 copyAddress += offset; 2911 } else 2912 copyAddress = address; 2913 2914 if (!strcmp(argv[0], "string")) { 2915 kprintf("%p \"", (char*)copyAddress); 2916 2917 // string mode 2918 for (i = 0; true; i++) { 2919 char c; 2920 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 2921 != B_OK 2922 || c == '\0') { 2923 break; 2924 } 2925 2926 if (c == '\n') 2927 kprintf("\\n"); 2928 else if (c == '\t') 2929 kprintf("\\t"); 2930 else { 2931 if (!isprint(c)) 2932 c = '.'; 2933 2934 kprintf("%c", c); 2935 } 2936 } 2937 2938 kprintf("\"\n"); 2939 } else { 2940 // number mode 2941 for (i = 0; i < num; i++) { 2942 uint32 value; 2943 2944 if ((i % displayWidth) == 0) { 2945 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2946 if (i != 0) 2947 kprintf("\n"); 2948 2949 kprintf("[0x%lx] ", address + i * itemSize); 2950 2951 for (j = 0; j < displayed; j++) { 2952 char c; 2953 if (debug_memcpy(B_CURRENT_TEAM, &c, 2954 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 2955 displayed = j; 2956 break; 2957 } 2958 if (!isprint(c)) 2959 c = '.'; 2960 2961 kprintf("%c", c); 2962 } 2963 if (num > displayWidth) { 2964 // make sure the spacing in the last line is correct 2965 for (j = displayed; j < displayWidth * itemSize; j++) 2966 kprintf(" "); 2967 } 2968 kprintf(" "); 2969 } 2970 2971 if (debug_memcpy(B_CURRENT_TEAM, &value, 2972 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 2973 kprintf("read fault"); 2974 break; 2975 } 2976 2977 switch (itemSize) { 2978 case 1: 2979 kprintf(" %02" B_PRIx8, *(uint8*)&value); 2980 break; 2981 case 2: 2982 kprintf(" %04" B_PRIx16, *(uint16*)&value); 2983 break; 2984 case 4: 2985 kprintf(" %08" B_PRIx32, *(uint32*)&value); 2986 break; 2987 case 8: 2988 kprintf(" %016" B_PRIx64, *(uint64*)&value); 2989 break; 2990 } 2991 } 2992 2993 kprintf("\n"); 2994 } 2995 2996 if (physical) { 2997 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 2998 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 2999 } 3000 return 0; 3001 } 3002 3003 3004 static void 3005 dump_cache_tree_recursively(VMCache* cache, int level, 3006 VMCache* highlightCache) 3007 { 3008 // print this cache 3009 for (int i = 0; i < level; i++) 3010 kprintf(" "); 3011 if (cache == highlightCache) 3012 kprintf("%p <--\n", cache); 3013 else 3014 kprintf("%p\n", cache); 3015 3016 // recursively print its consumers 3017 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3018 VMCache* consumer = it.Next();) { 3019 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3020 } 3021 } 3022 3023 3024 static int 3025 dump_cache_tree(int argc, char** argv) 3026 { 3027 if (argc != 2 || !strcmp(argv[1], "--help")) { 3028 kprintf("usage: %s <address>\n", argv[0]); 3029 return 0; 3030 } 3031 3032 addr_t address = parse_expression(argv[1]); 3033 if (address == 0) 3034 return 0; 3035 3036 VMCache* cache = (VMCache*)address; 3037 VMCache* root = cache; 3038 3039 // find the root cache (the transitive source) 3040 while (root->source != NULL) 3041 root = root->source; 3042 3043 dump_cache_tree_recursively(root, 0, cache); 3044 3045 return 0; 3046 } 3047 3048 3049 const char* 3050 vm_cache_type_to_string(int32 type) 3051 { 3052 switch (type) { 3053 case CACHE_TYPE_RAM: 3054 return "RAM"; 3055 case CACHE_TYPE_DEVICE: 3056 return "device"; 3057 case CACHE_TYPE_VNODE: 3058 return "vnode"; 3059 case CACHE_TYPE_NULL: 3060 return "null"; 3061 3062 default: 3063 return "unknown"; 3064 } 3065 } 3066 3067 3068 #if DEBUG_CACHE_LIST 3069 3070 static void 3071 update_cache_info_recursively(VMCache* cache, cache_info& info) 3072 { 3073 info.page_count += cache->page_count; 3074 if (cache->type == CACHE_TYPE_RAM) 3075 info.committed += cache->committed_size; 3076 3077 // recurse 3078 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3079 VMCache* consumer = it.Next();) { 3080 update_cache_info_recursively(consumer, info); 3081 } 3082 } 3083 3084 3085 static int 3086 cache_info_compare_page_count(const void* _a, const void* _b) 3087 { 3088 const cache_info* a = (const cache_info*)_a; 3089 const cache_info* b = (const cache_info*)_b; 3090 if (a->page_count == b->page_count) 3091 return 0; 3092 return a->page_count < b->page_count ? 1 : -1; 3093 } 3094 3095 3096 static int 3097 cache_info_compare_committed(const void* _a, const void* _b) 3098 { 3099 const cache_info* a = (const cache_info*)_a; 3100 const cache_info* b = (const cache_info*)_b; 3101 if (a->committed == b->committed) 3102 return 0; 3103 return a->committed < b->committed ? 1 : -1; 3104 } 3105 3106 3107 static void 3108 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3109 { 3110 for (int i = 0; i < level; i++) 3111 kprintf(" "); 3112 3113 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3114 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3115 cache->virtual_base, cache->virtual_end, cache->page_count); 3116 3117 if (level == 0) 3118 kprintf("/%lu", info.page_count); 3119 3120 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3121 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3122 3123 if (level == 0) 3124 kprintf("/%lu", info.committed); 3125 } 3126 3127 // areas 3128 if (cache->areas != NULL) { 3129 VMArea* area = cache->areas; 3130 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3131 area->name, area->address_space->ID()); 3132 3133 while (area->cache_next != NULL) { 3134 area = area->cache_next; 3135 kprintf(", %" B_PRId32, area->id); 3136 } 3137 } 3138 3139 kputs("\n"); 3140 3141 // recurse 3142 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3143 VMCache* consumer = it.Next();) { 3144 dump_caches_recursively(consumer, info, level + 1); 3145 } 3146 } 3147 3148 3149 static int 3150 dump_caches(int argc, char** argv) 3151 { 3152 if (sCacheInfoTable == NULL) { 3153 kprintf("No cache info table!\n"); 3154 return 0; 3155 } 3156 3157 bool sortByPageCount = true; 3158 3159 for (int32 i = 1; i < argc; i++) { 3160 if (strcmp(argv[i], "-c") == 0) { 3161 sortByPageCount = false; 3162 } else { 3163 print_debugger_command_usage(argv[0]); 3164 return 0; 3165 } 3166 } 3167 3168 uint32 totalCount = 0; 3169 uint32 rootCount = 0; 3170 off_t totalCommitted = 0; 3171 page_num_t totalPages = 0; 3172 3173 VMCache* cache = gDebugCacheList; 3174 while (cache) { 3175 totalCount++; 3176 if (cache->source == NULL) { 3177 cache_info stackInfo; 3178 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3179 ? sCacheInfoTable[rootCount] : stackInfo; 3180 rootCount++; 3181 info.cache = cache; 3182 info.page_count = 0; 3183 info.committed = 0; 3184 update_cache_info_recursively(cache, info); 3185 totalCommitted += info.committed; 3186 totalPages += info.page_count; 3187 } 3188 3189 cache = cache->debug_next; 3190 } 3191 3192 if (rootCount <= (uint32)kCacheInfoTableCount) { 3193 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3194 sortByPageCount 3195 ? &cache_info_compare_page_count 3196 : &cache_info_compare_committed); 3197 } 3198 3199 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3200 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3201 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3202 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3203 "page count" : "committed size"); 3204 3205 if (rootCount <= (uint32)kCacheInfoTableCount) { 3206 for (uint32 i = 0; i < rootCount; i++) { 3207 cache_info& info = sCacheInfoTable[i]; 3208 dump_caches_recursively(info.cache, info, 0); 3209 } 3210 } else 3211 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3212 3213 return 0; 3214 } 3215 3216 #endif // DEBUG_CACHE_LIST 3217 3218 3219 static int 3220 dump_cache(int argc, char** argv) 3221 { 3222 VMCache* cache; 3223 bool showPages = false; 3224 int i = 1; 3225 3226 if (argc < 2 || !strcmp(argv[1], "--help")) { 3227 kprintf("usage: %s [-ps] <address>\n" 3228 " if -p is specified, all pages are shown, if -s is used\n" 3229 " only the cache info is shown respectively.\n", argv[0]); 3230 return 0; 3231 } 3232 while (argv[i][0] == '-') { 3233 char* arg = argv[i] + 1; 3234 while (arg[0]) { 3235 if (arg[0] == 'p') 3236 showPages = true; 3237 arg++; 3238 } 3239 i++; 3240 } 3241 if (argv[i] == NULL) { 3242 kprintf("%s: invalid argument, pass address\n", argv[0]); 3243 return 0; 3244 } 3245 3246 addr_t address = parse_expression(argv[i]); 3247 if (address == 0) 3248 return 0; 3249 3250 cache = (VMCache*)address; 3251 3252 cache->Dump(showPages); 3253 3254 set_debug_variable("_sourceCache", (addr_t)cache->source); 3255 3256 return 0; 3257 } 3258 3259 3260 static void 3261 dump_area_struct(VMArea* area, bool mappings) 3262 { 3263 kprintf("AREA: %p\n", area); 3264 kprintf("name:\t\t'%s'\n", area->name); 3265 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3266 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3267 kprintf("base:\t\t0x%lx\n", area->Base()); 3268 kprintf("size:\t\t0x%lx\n", area->Size()); 3269 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3270 kprintf("wiring:\t\t0x%x\n", area->wiring); 3271 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3272 kprintf("cache:\t\t%p\n", area->cache); 3273 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3274 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3275 kprintf("cache_next:\t%p\n", area->cache_next); 3276 kprintf("cache_prev:\t%p\n", area->cache_prev); 3277 3278 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3279 if (mappings) { 3280 kprintf("page mappings:\n"); 3281 while (iterator.HasNext()) { 3282 vm_page_mapping* mapping = iterator.Next(); 3283 kprintf(" %p", mapping->page); 3284 } 3285 kprintf("\n"); 3286 } else { 3287 uint32 count = 0; 3288 while (iterator.Next() != NULL) { 3289 count++; 3290 } 3291 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3292 } 3293 } 3294 3295 3296 static int 3297 dump_area(int argc, char** argv) 3298 { 3299 bool mappings = false; 3300 bool found = false; 3301 int32 index = 1; 3302 VMArea* area; 3303 addr_t num; 3304 3305 if (argc < 2 || !strcmp(argv[1], "--help")) { 3306 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3307 "All areas matching either id/address/name are listed. You can\n" 3308 "force to check only a specific item by prefixing the specifier\n" 3309 "with the id/contains/address/name keywords.\n" 3310 "-m shows the area's mappings as well.\n"); 3311 return 0; 3312 } 3313 3314 if (!strcmp(argv[1], "-m")) { 3315 mappings = true; 3316 index++; 3317 } 3318 3319 int32 mode = 0xf; 3320 if (!strcmp(argv[index], "id")) 3321 mode = 1; 3322 else if (!strcmp(argv[index], "contains")) 3323 mode = 2; 3324 else if (!strcmp(argv[index], "name")) 3325 mode = 4; 3326 else if (!strcmp(argv[index], "address")) 3327 mode = 0; 3328 if (mode != 0xf) 3329 index++; 3330 3331 if (index >= argc) { 3332 kprintf("No area specifier given.\n"); 3333 return 0; 3334 } 3335 3336 num = parse_expression(argv[index]); 3337 3338 if (mode == 0) { 3339 dump_area_struct((struct VMArea*)num, mappings); 3340 } else { 3341 // walk through the area list, looking for the arguments as a name 3342 3343 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3344 while ((area = it.Next()) != NULL) { 3345 if (((mode & 4) != 0 && area->name != NULL 3346 && !strcmp(argv[index], area->name)) 3347 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3348 || (((mode & 2) != 0 && area->Base() <= num 3349 && area->Base() + area->Size() > num))))) { 3350 dump_area_struct(area, mappings); 3351 found = true; 3352 } 3353 } 3354 3355 if (!found) 3356 kprintf("could not find area %s (%ld)\n", argv[index], num); 3357 } 3358 3359 return 0; 3360 } 3361 3362 3363 static int 3364 dump_area_list(int argc, char** argv) 3365 { 3366 VMArea* area; 3367 const char* name = NULL; 3368 int32 id = 0; 3369 3370 if (argc > 1) { 3371 id = parse_expression(argv[1]); 3372 if (id == 0) 3373 name = argv[1]; 3374 } 3375 3376 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3377 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3378 B_PRINTF_POINTER_WIDTH, "size"); 3379 3380 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3381 while ((area = it.Next()) != NULL) { 3382 if ((id != 0 && area->address_space->ID() != id) 3383 || (name != NULL && strstr(area->name, name) == NULL)) 3384 continue; 3385 3386 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3387 area->id, (void*)area->Base(), (void*)area->Size(), 3388 area->protection, area->wiring, area->name); 3389 } 3390 return 0; 3391 } 3392 3393 3394 static int 3395 dump_available_memory(int argc, char** argv) 3396 { 3397 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3398 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3399 return 0; 3400 } 3401 3402 3403 /*! Deletes all areas and reserved regions in the given address space. 3404 3405 The caller must ensure that none of the areas has any wired ranges. 3406 3407 \param addressSpace The address space. 3408 \param deletingAddressSpace \c true, if the address space is in the process 3409 of being deleted. 3410 */ 3411 void 3412 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3413 { 3414 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3415 addressSpace->ID())); 3416 3417 addressSpace->WriteLock(); 3418 3419 // remove all reserved areas in this address space 3420 addressSpace->UnreserveAllAddressRanges(0); 3421 3422 // delete all the areas in this address space 3423 while (VMArea* area = addressSpace->FirstArea()) { 3424 ASSERT(!area->IsWired()); 3425 delete_area(addressSpace, area, deletingAddressSpace); 3426 } 3427 3428 addressSpace->WriteUnlock(); 3429 } 3430 3431 3432 static area_id 3433 vm_area_for(addr_t address, bool kernel) 3434 { 3435 team_id team; 3436 if (IS_USER_ADDRESS(address)) { 3437 // we try the user team address space, if any 3438 team = VMAddressSpace::CurrentID(); 3439 if (team < 0) 3440 return team; 3441 } else 3442 team = VMAddressSpace::KernelID(); 3443 3444 AddressSpaceReadLocker locker(team); 3445 if (!locker.IsLocked()) 3446 return B_BAD_TEAM_ID; 3447 3448 VMArea* area = locker.AddressSpace()->LookupArea(address); 3449 if (area != NULL) { 3450 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3451 return B_ERROR; 3452 3453 return area->id; 3454 } 3455 3456 return B_ERROR; 3457 } 3458 3459 3460 /*! Frees physical pages that were used during the boot process. 3461 \a end is inclusive. 3462 */ 3463 static void 3464 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3465 { 3466 // free all physical pages in the specified range 3467 3468 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3469 phys_addr_t physicalAddress; 3470 uint32 flags; 3471 3472 if (map->Query(current, &physicalAddress, &flags) == B_OK 3473 && (flags & PAGE_PRESENT) != 0) { 3474 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3475 if (page != NULL && page->State() != PAGE_STATE_FREE 3476 && page->State() != PAGE_STATE_CLEAR 3477 && page->State() != PAGE_STATE_UNUSED) { 3478 DEBUG_PAGE_ACCESS_START(page); 3479 vm_page_set_state(page, PAGE_STATE_FREE); 3480 } 3481 } 3482 } 3483 3484 // unmap the memory 3485 map->Unmap(start, end); 3486 } 3487 3488 3489 void 3490 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3491 { 3492 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3493 addr_t end = start + (size - 1); 3494 addr_t lastEnd = start; 3495 3496 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3497 (void*)start, (void*)end)); 3498 3499 // The areas are sorted in virtual address space order, so 3500 // we just have to find the holes between them that fall 3501 // into the area we should dispose 3502 3503 map->Lock(); 3504 3505 for (VMAddressSpace::AreaIterator it 3506 = VMAddressSpace::Kernel()->GetAreaIterator(); 3507 VMArea* area = it.Next();) { 3508 addr_t areaStart = area->Base(); 3509 addr_t areaEnd = areaStart + (area->Size() - 1); 3510 3511 if (areaEnd < start) 3512 continue; 3513 3514 if (areaStart > end) { 3515 // we are done, the area is already beyond of what we have to free 3516 break; 3517 } 3518 3519 if (areaStart > lastEnd) { 3520 // this is something we can free 3521 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3522 (void*)areaStart)); 3523 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3524 } 3525 3526 if (areaEnd >= end) { 3527 lastEnd = areaEnd; 3528 // no +1 to prevent potential overflow 3529 break; 3530 } 3531 3532 lastEnd = areaEnd + 1; 3533 } 3534 3535 if (lastEnd < end) { 3536 // we can also get rid of some space at the end of the area 3537 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3538 (void*)end)); 3539 unmap_and_free_physical_pages(map, lastEnd, end); 3540 } 3541 3542 map->Unlock(); 3543 } 3544 3545 3546 static void 3547 create_preloaded_image_areas(struct preloaded_image* _image) 3548 { 3549 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3550 char name[B_OS_NAME_LENGTH]; 3551 void* address; 3552 int32 length; 3553 3554 // use file name to create a good area name 3555 char* fileName = strrchr(image->name, '/'); 3556 if (fileName == NULL) 3557 fileName = image->name; 3558 else 3559 fileName++; 3560 3561 length = strlen(fileName); 3562 // make sure there is enough space for the suffix 3563 if (length > 25) 3564 length = 25; 3565 3566 memcpy(name, fileName, length); 3567 strcpy(name + length, "_text"); 3568 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3569 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3570 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3571 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3572 // this will later be remapped read-only/executable by the 3573 // ELF initialization code 3574 3575 strcpy(name + length, "_data"); 3576 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3577 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3578 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3579 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3580 } 3581 3582 3583 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3584 Any boot loader resources contained in that arguments must not be accessed 3585 anymore past this point. 3586 */ 3587 void 3588 vm_free_kernel_args(kernel_args* args) 3589 { 3590 uint32 i; 3591 3592 TRACE(("vm_free_kernel_args()\n")); 3593 3594 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3595 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3596 if (area >= B_OK) 3597 delete_area(area); 3598 } 3599 } 3600 3601 3602 static void 3603 allocate_kernel_args(kernel_args* args) 3604 { 3605 TRACE(("allocate_kernel_args()\n")); 3606 3607 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3608 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3609 3610 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3611 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3612 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3613 } 3614 } 3615 3616 3617 static void 3618 unreserve_boot_loader_ranges(kernel_args* args) 3619 { 3620 TRACE(("unreserve_boot_loader_ranges()\n")); 3621 3622 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3623 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3624 (void*)(addr_t)args->virtual_allocated_range[i].start, 3625 args->virtual_allocated_range[i].size); 3626 } 3627 } 3628 3629 3630 static void 3631 reserve_boot_loader_ranges(kernel_args* args) 3632 { 3633 TRACE(("reserve_boot_loader_ranges()\n")); 3634 3635 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3636 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3637 3638 // If the address is no kernel address, we just skip it. The 3639 // architecture specific code has to deal with it. 3640 if (!IS_KERNEL_ADDRESS(address)) { 3641 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3642 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 3643 continue; 3644 } 3645 3646 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3647 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3648 if (status < B_OK) 3649 panic("could not reserve boot loader ranges\n"); 3650 } 3651 } 3652 3653 3654 static addr_t 3655 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3656 { 3657 size = PAGE_ALIGN(size); 3658 3659 // find a slot in the virtual allocation addr range 3660 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3661 // check to see if the space between this one and the last is big enough 3662 addr_t rangeStart = args->virtual_allocated_range[i].start; 3663 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3664 + args->virtual_allocated_range[i - 1].size; 3665 3666 addr_t base = alignment > 0 3667 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3668 3669 if (base >= KERNEL_BASE && base < rangeStart 3670 && rangeStart - base >= size) { 3671 args->virtual_allocated_range[i - 1].size 3672 += base + size - previousRangeEnd; 3673 return base; 3674 } 3675 } 3676 3677 // we hadn't found one between allocation ranges. this is ok. 3678 // see if there's a gap after the last one 3679 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3680 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3681 + args->virtual_allocated_range[lastEntryIndex].size; 3682 addr_t base = alignment > 0 3683 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3684 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3685 args->virtual_allocated_range[lastEntryIndex].size 3686 += base + size - lastRangeEnd; 3687 return base; 3688 } 3689 3690 // see if there's a gap before the first one 3691 addr_t rangeStart = args->virtual_allocated_range[0].start; 3692 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3693 base = rangeStart - size; 3694 if (alignment > 0) 3695 base = ROUNDDOWN(base, alignment); 3696 3697 if (base >= KERNEL_BASE) { 3698 args->virtual_allocated_range[0].start = base; 3699 args->virtual_allocated_range[0].size += rangeStart - base; 3700 return base; 3701 } 3702 } 3703 3704 return 0; 3705 } 3706 3707 3708 static bool 3709 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 3710 { 3711 // TODO: horrible brute-force method of determining if the page can be 3712 // allocated 3713 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3714 if (address >= args->physical_memory_range[i].start 3715 && address < args->physical_memory_range[i].start 3716 + args->physical_memory_range[i].size) 3717 return true; 3718 } 3719 return false; 3720 } 3721 3722 3723 page_num_t 3724 vm_allocate_early_physical_page(kernel_args* args) 3725 { 3726 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3727 phys_addr_t nextPage; 3728 3729 nextPage = args->physical_allocated_range[i].start 3730 + args->physical_allocated_range[i].size; 3731 // see if the page after the next allocated paddr run can be allocated 3732 if (i + 1 < args->num_physical_allocated_ranges 3733 && args->physical_allocated_range[i + 1].size != 0) { 3734 // see if the next page will collide with the next allocated range 3735 if (nextPage >= args->physical_allocated_range[i+1].start) 3736 continue; 3737 } 3738 // see if the next physical page fits in the memory block 3739 if (is_page_in_physical_memory_range(args, nextPage)) { 3740 // we got one! 3741 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3742 return nextPage / B_PAGE_SIZE; 3743 } 3744 } 3745 3746 return 0; 3747 // could not allocate a block 3748 } 3749 3750 3751 /*! This one uses the kernel_args' physical and virtual memory ranges to 3752 allocate some pages before the VM is completely up. 3753 */ 3754 addr_t 3755 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3756 uint32 attributes, addr_t alignment) 3757 { 3758 if (physicalSize > virtualSize) 3759 physicalSize = virtualSize; 3760 3761 // find the vaddr to allocate at 3762 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 3763 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 3764 3765 // map the pages 3766 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3767 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 3768 if (physicalAddress == 0) 3769 panic("error allocating early page!\n"); 3770 3771 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3772 3773 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3774 physicalAddress * B_PAGE_SIZE, attributes, 3775 &vm_allocate_early_physical_page); 3776 } 3777 3778 return virtualBase; 3779 } 3780 3781 3782 /*! The main entrance point to initialize the VM. */ 3783 status_t 3784 vm_init(kernel_args* args) 3785 { 3786 struct preloaded_image* image; 3787 void* address; 3788 status_t err = 0; 3789 uint32 i; 3790 3791 TRACE(("vm_init: entry\n")); 3792 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3793 err = arch_vm_init(args); 3794 3795 // initialize some globals 3796 vm_page_init_num_pages(args); 3797 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3798 3799 slab_init(args); 3800 3801 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3802 size_t heapSize = INITIAL_HEAP_SIZE; 3803 // try to accomodate low memory systems 3804 while (heapSize > sAvailableMemory / 8) 3805 heapSize /= 2; 3806 if (heapSize < 1024 * 1024) 3807 panic("vm_init: go buy some RAM please."); 3808 3809 // map in the new heap and initialize it 3810 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3811 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3812 TRACE(("heap at 0x%lx\n", heapBase)); 3813 heap_init(heapBase, heapSize); 3814 #endif 3815 3816 // initialize the free page list and physical page mapper 3817 vm_page_init(args); 3818 3819 // initialize the cache allocators 3820 vm_cache_init(args); 3821 3822 { 3823 status_t error = VMAreaHash::Init(); 3824 if (error != B_OK) 3825 panic("vm_init: error initializing area hash table\n"); 3826 } 3827 3828 VMAddressSpace::Init(); 3829 reserve_boot_loader_ranges(args); 3830 3831 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3832 heap_init_post_area(); 3833 #endif 3834 3835 // Do any further initialization that the architecture dependant layers may 3836 // need now 3837 arch_vm_translation_map_init_post_area(args); 3838 arch_vm_init_post_area(args); 3839 vm_page_init_post_area(args); 3840 slab_init_post_area(); 3841 3842 // allocate areas to represent stuff that already exists 3843 3844 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3845 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3846 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3847 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3848 #endif 3849 3850 allocate_kernel_args(args); 3851 3852 create_preloaded_image_areas(args->kernel_image); 3853 3854 // allocate areas for preloaded images 3855 for (image = args->preloaded_images; image != NULL; image = image->next) 3856 create_preloaded_image_areas(image); 3857 3858 // allocate kernel stacks 3859 for (i = 0; i < args->num_cpus; i++) { 3860 char name[64]; 3861 3862 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 3863 address = (void*)args->cpu_kstack[i].start; 3864 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3865 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3866 } 3867 3868 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3869 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3870 3871 #if PARANOID_KERNEL_MALLOC 3872 vm_block_address_range("uninitialized heap memory", 3873 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3874 #endif 3875 #if PARANOID_KERNEL_FREE 3876 vm_block_address_range("freed heap memory", 3877 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3878 #endif 3879 3880 // create the object cache for the page mappings 3881 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3882 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3883 NULL, NULL); 3884 if (gPageMappingsObjectCache == NULL) 3885 panic("failed to create page mappings object cache"); 3886 3887 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3888 3889 #if DEBUG_CACHE_LIST 3890 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 3891 virtual_address_restrictions virtualRestrictions = {}; 3892 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 3893 physical_address_restrictions physicalRestrictions = {}; 3894 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 3895 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3896 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 3897 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 3898 &physicalRestrictions, (void**)&sCacheInfoTable); 3899 } 3900 #endif // DEBUG_CACHE_LIST 3901 3902 // add some debugger commands 3903 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3904 add_debugger_command("area", &dump_area, 3905 "Dump info about a particular area"); 3906 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3907 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3908 #if DEBUG_CACHE_LIST 3909 if (sCacheInfoTable != NULL) { 3910 add_debugger_command_etc("caches", &dump_caches, 3911 "List all VMCache trees", 3912 "[ \"-c\" ]\n" 3913 "All cache trees are listed sorted in decreasing order by number " 3914 "of\n" 3915 "used pages or, if \"-c\" is specified, by size of committed " 3916 "memory.\n", 3917 0); 3918 } 3919 #endif 3920 add_debugger_command("avail", &dump_available_memory, 3921 "Dump available memory"); 3922 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3923 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3924 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3925 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3926 add_debugger_command("string", &display_mem, "dump strings"); 3927 3928 TRACE(("vm_init: exit\n")); 3929 3930 vm_cache_init_post_heap(); 3931 3932 return err; 3933 } 3934 3935 3936 status_t 3937 vm_init_post_sem(kernel_args* args) 3938 { 3939 // This frees all unused boot loader resources and makes its space available 3940 // again 3941 arch_vm_init_end(args); 3942 unreserve_boot_loader_ranges(args); 3943 3944 // fill in all of the semaphores that were not allocated before 3945 // since we're still single threaded and only the kernel address space 3946 // exists, it isn't that hard to find all of the ones we need to create 3947 3948 arch_vm_translation_map_init_post_sem(args); 3949 3950 slab_init_post_sem(); 3951 3952 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3953 heap_init_post_sem(); 3954 #endif 3955 3956 return B_OK; 3957 } 3958 3959 3960 status_t 3961 vm_init_post_thread(kernel_args* args) 3962 { 3963 vm_page_init_post_thread(args); 3964 slab_init_post_thread(); 3965 return heap_init_post_thread(); 3966 } 3967 3968 3969 status_t 3970 vm_init_post_modules(kernel_args* args) 3971 { 3972 return arch_vm_init_post_modules(args); 3973 } 3974 3975 3976 void 3977 permit_page_faults(void) 3978 { 3979 Thread* thread = thread_get_current_thread(); 3980 if (thread != NULL) 3981 atomic_add(&thread->page_faults_allowed, 1); 3982 } 3983 3984 3985 void 3986 forbid_page_faults(void) 3987 { 3988 Thread* thread = thread_get_current_thread(); 3989 if (thread != NULL) 3990 atomic_add(&thread->page_faults_allowed, -1); 3991 } 3992 3993 3994 status_t 3995 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3996 addr_t* newIP) 3997 { 3998 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3999 faultAddress)); 4000 4001 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4002 4003 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4004 VMAddressSpace* addressSpace = NULL; 4005 4006 status_t status = B_OK; 4007 *newIP = 0; 4008 atomic_add((int32*)&sPageFaults, 1); 4009 4010 if (IS_KERNEL_ADDRESS(pageAddress)) { 4011 addressSpace = VMAddressSpace::GetKernel(); 4012 } else if (IS_USER_ADDRESS(pageAddress)) { 4013 addressSpace = VMAddressSpace::GetCurrent(); 4014 if (addressSpace == NULL) { 4015 if (!isUser) { 4016 dprintf("vm_page_fault: kernel thread accessing invalid user " 4017 "memory!\n"); 4018 status = B_BAD_ADDRESS; 4019 TPF(PageFaultError(-1, 4020 VMPageFaultTracing 4021 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4022 } else { 4023 // XXX weird state. 4024 panic("vm_page_fault: non kernel thread accessing user memory " 4025 "that doesn't exist!\n"); 4026 status = B_BAD_ADDRESS; 4027 } 4028 } 4029 } else { 4030 // the hit was probably in the 64k DMZ between kernel and user space 4031 // this keeps a user space thread from passing a buffer that crosses 4032 // into kernel space 4033 status = B_BAD_ADDRESS; 4034 TPF(PageFaultError(-1, 4035 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4036 } 4037 4038 if (status == B_OK) { 4039 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser, 4040 NULL); 4041 } 4042 4043 if (status < B_OK) { 4044 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4045 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n", 4046 strerror(status), address, faultAddress, isWrite, isUser, 4047 thread_get_current_thread_id()); 4048 if (!isUser) { 4049 Thread* thread = thread_get_current_thread(); 4050 if (thread != NULL && thread->fault_handler != 0) { 4051 // this will cause the arch dependant page fault handler to 4052 // modify the IP on the interrupt frame or whatever to return 4053 // to this address 4054 *newIP = thread->fault_handler; 4055 } else { 4056 // unhandled page fault in the kernel 4057 panic("vm_page_fault: unhandled page fault in kernel space at " 4058 "0x%lx, ip 0x%lx\n", address, faultAddress); 4059 } 4060 } else { 4061 #if 1 4062 // TODO: remove me once we have proper userland debugging support 4063 // (and tools) 4064 VMArea* area = NULL; 4065 if (addressSpace != NULL) { 4066 addressSpace->ReadLock(); 4067 area = addressSpace->LookupArea(faultAddress); 4068 } 4069 4070 Thread* thread = thread_get_current_thread(); 4071 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4072 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4073 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4074 thread->team->Name(), thread->team->id, 4075 isWrite ? "write" : "read", address, faultAddress, 4076 area ? area->name : "???", faultAddress - (area ? 4077 area->Base() : 0x0)); 4078 4079 // We can print a stack trace of the userland thread here. 4080 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4081 // fault and someone is already waiting for a write lock on the same address 4082 // space. This thread will then try to acquire the lock again and will 4083 // be queued after the writer. 4084 # if 0 4085 if (area) { 4086 struct stack_frame { 4087 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4088 struct stack_frame* previous; 4089 void* return_address; 4090 #else 4091 // ... 4092 #warning writeme 4093 #endif 4094 } frame; 4095 # ifdef __INTEL__ 4096 struct iframe* iframe = x86_get_user_iframe(); 4097 if (iframe == NULL) 4098 panic("iframe is NULL!"); 4099 4100 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4101 sizeof(struct stack_frame)); 4102 # elif defined(__POWERPC__) 4103 struct iframe* iframe = ppc_get_user_iframe(); 4104 if (iframe == NULL) 4105 panic("iframe is NULL!"); 4106 4107 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4108 sizeof(struct stack_frame)); 4109 # else 4110 # warning "vm_page_fault() stack trace won't work" 4111 status = B_ERROR; 4112 # endif 4113 4114 dprintf("stack trace:\n"); 4115 int32 maxFrames = 50; 4116 while (status == B_OK && --maxFrames >= 0 4117 && frame.return_address != NULL) { 4118 dprintf(" %p", frame.return_address); 4119 area = addressSpace->LookupArea( 4120 (addr_t)frame.return_address); 4121 if (area) { 4122 dprintf(" (%s + %#lx)", area->name, 4123 (addr_t)frame.return_address - area->Base()); 4124 } 4125 dprintf("\n"); 4126 4127 status = user_memcpy(&frame, frame.previous, 4128 sizeof(struct stack_frame)); 4129 } 4130 } 4131 # endif // 0 (stack trace) 4132 4133 if (addressSpace != NULL) 4134 addressSpace->ReadUnlock(); 4135 #endif 4136 4137 // If the thread has a signal handler for SIGSEGV, we simply 4138 // send it the signal. Otherwise we notify the user debugger 4139 // first. 4140 struct sigaction action; 4141 if ((sigaction(SIGSEGV, NULL, &action) == 0 4142 && action.sa_handler != SIG_DFL 4143 && action.sa_handler != SIG_IGN) 4144 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4145 SIGSEGV)) { 4146 Signal signal(SIGSEGV, 4147 status == B_PERMISSION_DENIED 4148 ? SEGV_ACCERR : SEGV_MAPERR, 4149 EFAULT, thread->team->id); 4150 signal.SetAddress((void*)address); 4151 send_signal_to_thread(thread, signal, 0); 4152 } 4153 } 4154 } 4155 4156 if (addressSpace != NULL) 4157 addressSpace->Put(); 4158 4159 return B_HANDLED_INTERRUPT; 4160 } 4161 4162 4163 struct PageFaultContext { 4164 AddressSpaceReadLocker addressSpaceLocker; 4165 VMCacheChainLocker cacheChainLocker; 4166 4167 VMTranslationMap* map; 4168 VMCache* topCache; 4169 off_t cacheOffset; 4170 vm_page_reservation reservation; 4171 bool isWrite; 4172 4173 // return values 4174 vm_page* page; 4175 bool restart; 4176 4177 4178 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4179 : 4180 addressSpaceLocker(addressSpace, true), 4181 map(addressSpace->TranslationMap()), 4182 isWrite(isWrite) 4183 { 4184 } 4185 4186 ~PageFaultContext() 4187 { 4188 UnlockAll(); 4189 vm_page_unreserve_pages(&reservation); 4190 } 4191 4192 void Prepare(VMCache* topCache, off_t cacheOffset) 4193 { 4194 this->topCache = topCache; 4195 this->cacheOffset = cacheOffset; 4196 page = NULL; 4197 restart = false; 4198 4199 cacheChainLocker.SetTo(topCache); 4200 } 4201 4202 void UnlockAll(VMCache* exceptCache = NULL) 4203 { 4204 topCache = NULL; 4205 addressSpaceLocker.Unlock(); 4206 cacheChainLocker.Unlock(exceptCache); 4207 } 4208 }; 4209 4210 4211 /*! Gets the page that should be mapped into the area. 4212 Returns an error code other than \c B_OK, if the page couldn't be found or 4213 paged in. The locking state of the address space and the caches is undefined 4214 in that case. 4215 Returns \c B_OK with \c context.restart set to \c true, if the functions 4216 had to unlock the address space and all caches and is supposed to be called 4217 again. 4218 Returns \c B_OK with \c context.restart set to \c false, if the page was 4219 found. It is returned in \c context.page. The address space will still be 4220 locked as well as all caches starting from the top cache to at least the 4221 cache the page lives in. 4222 */ 4223 static status_t 4224 fault_get_page(PageFaultContext& context) 4225 { 4226 VMCache* cache = context.topCache; 4227 VMCache* lastCache = NULL; 4228 vm_page* page = NULL; 4229 4230 while (cache != NULL) { 4231 // We already hold the lock of the cache at this point. 4232 4233 lastCache = cache; 4234 4235 page = cache->LookupPage(context.cacheOffset); 4236 if (page != NULL && page->busy) { 4237 // page must be busy -- wait for it to become unbusy 4238 context.UnlockAll(cache); 4239 cache->ReleaseRefLocked(); 4240 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4241 4242 // restart the whole process 4243 context.restart = true; 4244 return B_OK; 4245 } 4246 4247 if (page != NULL) 4248 break; 4249 4250 // The current cache does not contain the page we're looking for. 4251 4252 // see if the backing store has it 4253 if (cache->HasPage(context.cacheOffset)) { 4254 // insert a fresh page and mark it busy -- we're going to read it in 4255 page = vm_page_allocate_page(&context.reservation, 4256 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4257 cache->InsertPage(page, context.cacheOffset); 4258 4259 // We need to unlock all caches and the address space while reading 4260 // the page in. Keep a reference to the cache around. 4261 cache->AcquireRefLocked(); 4262 context.UnlockAll(); 4263 4264 // read the page in 4265 generic_io_vec vec; 4266 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4267 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4268 4269 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4270 B_PHYSICAL_IO_REQUEST, &bytesRead); 4271 4272 cache->Lock(); 4273 4274 if (status < B_OK) { 4275 // on error remove and free the page 4276 dprintf("reading page from cache %p returned: %s!\n", 4277 cache, strerror(status)); 4278 4279 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4280 cache->RemovePage(page); 4281 vm_page_set_state(page, PAGE_STATE_FREE); 4282 4283 cache->ReleaseRefAndUnlock(); 4284 return status; 4285 } 4286 4287 // mark the page unbusy again 4288 cache->MarkPageUnbusy(page); 4289 4290 DEBUG_PAGE_ACCESS_END(page); 4291 4292 // Since we needed to unlock everything temporarily, the area 4293 // situation might have changed. So we need to restart the whole 4294 // process. 4295 cache->ReleaseRefAndUnlock(); 4296 context.restart = true; 4297 return B_OK; 4298 } 4299 4300 cache = context.cacheChainLocker.LockSourceCache(); 4301 } 4302 4303 if (page == NULL) { 4304 // There was no adequate page, determine the cache for a clean one. 4305 // Read-only pages come in the deepest cache, only the top most cache 4306 // may have direct write access. 4307 cache = context.isWrite ? context.topCache : lastCache; 4308 4309 // allocate a clean page 4310 page = vm_page_allocate_page(&context.reservation, 4311 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4312 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4313 page->physical_page_number)); 4314 4315 // insert the new page into our cache 4316 cache->InsertPage(page, context.cacheOffset); 4317 } else if (page->Cache() != context.topCache && context.isWrite) { 4318 // We have a page that has the data we want, but in the wrong cache 4319 // object so we need to copy it and stick it into the top cache. 4320 vm_page* sourcePage = page; 4321 4322 // TODO: If memory is low, it might be a good idea to steal the page 4323 // from our source cache -- if possible, that is. 4324 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4325 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4326 4327 // To not needlessly kill concurrency we unlock all caches but the top 4328 // one while copying the page. Lacking another mechanism to ensure that 4329 // the source page doesn't disappear, we mark it busy. 4330 sourcePage->busy = true; 4331 context.cacheChainLocker.UnlockKeepRefs(true); 4332 4333 // copy the page 4334 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4335 sourcePage->physical_page_number * B_PAGE_SIZE); 4336 4337 context.cacheChainLocker.RelockCaches(true); 4338 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4339 4340 // insert the new page into our cache 4341 context.topCache->InsertPage(page, context.cacheOffset); 4342 } else 4343 DEBUG_PAGE_ACCESS_START(page); 4344 4345 context.page = page; 4346 return B_OK; 4347 } 4348 4349 4350 /*! Makes sure the address in the given address space is mapped. 4351 4352 \param addressSpace The address space. 4353 \param originalAddress The address. Doesn't need to be page aligned. 4354 \param isWrite If \c true the address shall be write-accessible. 4355 \param isUser If \c true the access is requested by a userland team. 4356 \param wirePage On success, if non \c NULL, the wired count of the page 4357 mapped at the given address is incremented and the page is returned 4358 via this parameter. 4359 \param wiredRange If given, this wiredRange is ignored when checking whether 4360 an already mapped page at the virtual address can be unmapped. 4361 \return \c B_OK on success, another error code otherwise. 4362 */ 4363 static status_t 4364 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4365 bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange) 4366 { 4367 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4368 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4369 originalAddress, isWrite, isUser)); 4370 4371 PageFaultContext context(addressSpace, isWrite); 4372 4373 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4374 status_t status = B_OK; 4375 4376 addressSpace->IncrementFaultCount(); 4377 4378 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4379 // the pages upfront makes sure we don't have any cache locked, so that the 4380 // page daemon/thief can do their job without problems. 4381 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4382 originalAddress); 4383 context.addressSpaceLocker.Unlock(); 4384 vm_page_reserve_pages(&context.reservation, reservePages, 4385 addressSpace == VMAddressSpace::Kernel() 4386 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4387 4388 while (true) { 4389 context.addressSpaceLocker.Lock(); 4390 4391 // get the area the fault was in 4392 VMArea* area = addressSpace->LookupArea(address); 4393 if (area == NULL) { 4394 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4395 "space\n", originalAddress); 4396 TPF(PageFaultError(-1, 4397 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4398 status = B_BAD_ADDRESS; 4399 break; 4400 } 4401 4402 // check permissions 4403 uint32 protection = get_area_page_protection(area, address); 4404 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4405 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4406 area->id, (void*)originalAddress); 4407 TPF(PageFaultError(area->id, 4408 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4409 status = B_PERMISSION_DENIED; 4410 break; 4411 } 4412 if (isWrite && (protection 4413 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4414 dprintf("write access attempted on write-protected area 0x%" 4415 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4416 TPF(PageFaultError(area->id, 4417 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4418 status = B_PERMISSION_DENIED; 4419 break; 4420 } else if (!isWrite && (protection 4421 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4422 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4423 " at %p\n", area->id, (void*)originalAddress); 4424 TPF(PageFaultError(area->id, 4425 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4426 status = B_PERMISSION_DENIED; 4427 break; 4428 } 4429 4430 // We have the area, it was a valid access, so let's try to resolve the 4431 // page fault now. 4432 // At first, the top most cache from the area is investigated. 4433 4434 context.Prepare(vm_area_get_locked_cache(area), 4435 address - area->Base() + area->cache_offset); 4436 4437 // See if this cache has a fault handler -- this will do all the work 4438 // for us. 4439 { 4440 // Note, since the page fault is resolved with interrupts enabled, 4441 // the fault handler could be called more than once for the same 4442 // reason -- the store must take this into account. 4443 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4444 if (status != B_BAD_HANDLER) 4445 break; 4446 } 4447 4448 // The top most cache has no fault handler, so let's see if the cache or 4449 // its sources already have the page we're searching for (we're going 4450 // from top to bottom). 4451 status = fault_get_page(context); 4452 if (status != B_OK) { 4453 TPF(PageFaultError(area->id, status)); 4454 break; 4455 } 4456 4457 if (context.restart) 4458 continue; 4459 4460 // All went fine, all there is left to do is to map the page into the 4461 // address space. 4462 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4463 context.page)); 4464 4465 // If the page doesn't reside in the area's cache, we need to make sure 4466 // it's mapped in read-only, so that we cannot overwrite someone else's 4467 // data (copy-on-write) 4468 uint32 newProtection = protection; 4469 if (context.page->Cache() != context.topCache && !isWrite) 4470 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4471 4472 bool unmapPage = false; 4473 bool mapPage = true; 4474 4475 // check whether there's already a page mapped at the address 4476 context.map->Lock(); 4477 4478 phys_addr_t physicalAddress; 4479 uint32 flags; 4480 vm_page* mappedPage = NULL; 4481 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4482 && (flags & PAGE_PRESENT) != 0 4483 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4484 != NULL) { 4485 // Yep there's already a page. If it's ours, we can simply adjust 4486 // its protection. Otherwise we have to unmap it. 4487 if (mappedPage == context.page) { 4488 context.map->ProtectPage(area, address, newProtection); 4489 // Note: We assume that ProtectPage() is atomic (i.e. 4490 // the page isn't temporarily unmapped), otherwise we'd have 4491 // to make sure it isn't wired. 4492 mapPage = false; 4493 } else 4494 unmapPage = true; 4495 } 4496 4497 context.map->Unlock(); 4498 4499 if (unmapPage) { 4500 // If the page is wired, we can't unmap it. Wait until it is unwired 4501 // again and restart. 4502 VMAreaUnwiredWaiter waiter; 4503 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4504 wiredRange)) { 4505 // unlock everything and wait 4506 context.UnlockAll(); 4507 waiter.waitEntry.Wait(); 4508 continue; 4509 } 4510 4511 // Note: The mapped page is a page of a lower cache. We are 4512 // guaranteed to have that cached locked, our new page is a copy of 4513 // that page, and the page is not busy. The logic for that guarantee 4514 // is as follows: Since the page is mapped, it must live in the top 4515 // cache (ruled out above) or any of its lower caches, and there is 4516 // (was before the new page was inserted) no other page in any 4517 // cache between the top cache and the page's cache (otherwise that 4518 // would be mapped instead). That in turn means that our algorithm 4519 // must have found it and therefore it cannot be busy either. 4520 DEBUG_PAGE_ACCESS_START(mappedPage); 4521 unmap_page(area, address); 4522 DEBUG_PAGE_ACCESS_END(mappedPage); 4523 } 4524 4525 if (mapPage) { 4526 if (map_page(area, context.page, address, newProtection, 4527 &context.reservation) != B_OK) { 4528 // Mapping can only fail, when the page mapping object couldn't 4529 // be allocated. Save for the missing mapping everything is 4530 // fine, though. If this was a regular page fault, we'll simply 4531 // leave and probably fault again. To make sure we'll have more 4532 // luck then, we ensure that the minimum object reserve is 4533 // available. 4534 DEBUG_PAGE_ACCESS_END(context.page); 4535 4536 context.UnlockAll(); 4537 4538 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4539 != B_OK) { 4540 // Apparently the situation is serious. Let's get ourselves 4541 // killed. 4542 status = B_NO_MEMORY; 4543 } else if (wirePage != NULL) { 4544 // The caller expects us to wire the page. Since 4545 // object_cache_reserve() succeeded, we should now be able 4546 // to allocate a mapping structure. Restart. 4547 continue; 4548 } 4549 4550 break; 4551 } 4552 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4553 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4554 4555 // also wire the page, if requested 4556 if (wirePage != NULL && status == B_OK) { 4557 increment_page_wired_count(context.page); 4558 *wirePage = context.page; 4559 } 4560 4561 DEBUG_PAGE_ACCESS_END(context.page); 4562 4563 break; 4564 } 4565 4566 return status; 4567 } 4568 4569 4570 status_t 4571 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4572 { 4573 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4574 } 4575 4576 status_t 4577 vm_put_physical_page(addr_t vaddr, void* handle) 4578 { 4579 return sPhysicalPageMapper->PutPage(vaddr, handle); 4580 } 4581 4582 4583 status_t 4584 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4585 void** _handle) 4586 { 4587 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4588 } 4589 4590 status_t 4591 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4592 { 4593 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4594 } 4595 4596 4597 status_t 4598 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4599 { 4600 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4601 } 4602 4603 status_t 4604 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4605 { 4606 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4607 } 4608 4609 4610 void 4611 vm_get_info(system_memory_info* info) 4612 { 4613 swap_get_info(info); 4614 4615 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4616 info->page_faults = sPageFaults; 4617 4618 MutexLocker locker(sAvailableMemoryLock); 4619 info->free_memory = sAvailableMemory; 4620 info->needed_memory = sNeededMemory; 4621 } 4622 4623 4624 uint32 4625 vm_num_page_faults(void) 4626 { 4627 return sPageFaults; 4628 } 4629 4630 4631 off_t 4632 vm_available_memory(void) 4633 { 4634 MutexLocker locker(sAvailableMemoryLock); 4635 return sAvailableMemory; 4636 } 4637 4638 4639 off_t 4640 vm_available_not_needed_memory(void) 4641 { 4642 MutexLocker locker(sAvailableMemoryLock); 4643 return sAvailableMemory - sNeededMemory; 4644 } 4645 4646 4647 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4648 debugger. 4649 */ 4650 off_t 4651 vm_available_not_needed_memory_debug(void) 4652 { 4653 return sAvailableMemory - sNeededMemory; 4654 } 4655 4656 4657 size_t 4658 vm_kernel_address_space_left(void) 4659 { 4660 return VMAddressSpace::Kernel()->FreeSpace(); 4661 } 4662 4663 4664 void 4665 vm_unreserve_memory(size_t amount) 4666 { 4667 mutex_lock(&sAvailableMemoryLock); 4668 4669 sAvailableMemory += amount; 4670 4671 mutex_unlock(&sAvailableMemoryLock); 4672 } 4673 4674 4675 status_t 4676 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4677 { 4678 size_t reserve = kMemoryReserveForPriority[priority]; 4679 4680 MutexLocker locker(sAvailableMemoryLock); 4681 4682 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4683 4684 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4685 sAvailableMemory -= amount; 4686 return B_OK; 4687 } 4688 4689 if (timeout <= 0) 4690 return B_NO_MEMORY; 4691 4692 // turn timeout into an absolute timeout 4693 timeout += system_time(); 4694 4695 // loop until we've got the memory or the timeout occurs 4696 do { 4697 sNeededMemory += amount; 4698 4699 // call the low resource manager 4700 locker.Unlock(); 4701 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4702 B_ABSOLUTE_TIMEOUT, timeout); 4703 locker.Lock(); 4704 4705 sNeededMemory -= amount; 4706 4707 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4708 sAvailableMemory -= amount; 4709 return B_OK; 4710 } 4711 } while (timeout > system_time()); 4712 4713 return B_NO_MEMORY; 4714 } 4715 4716 4717 status_t 4718 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 4719 { 4720 // NOTE: The caller is responsible for synchronizing calls to this function! 4721 4722 AddressSpaceReadLocker locker; 4723 VMArea* area; 4724 status_t status = locker.SetFromArea(id, area); 4725 if (status != B_OK) 4726 return status; 4727 4728 // nothing to do, if the type doesn't change 4729 uint32 oldType = area->MemoryType(); 4730 if (type == oldType) 4731 return B_OK; 4732 4733 // set the memory type of the area and the mapped pages 4734 VMTranslationMap* map = area->address_space->TranslationMap(); 4735 map->Lock(); 4736 area->SetMemoryType(type); 4737 map->ProtectArea(area, area->protection); 4738 map->Unlock(); 4739 4740 // set the physical memory type 4741 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 4742 if (error != B_OK) { 4743 // reset the memory type of the area and the mapped pages 4744 map->Lock(); 4745 area->SetMemoryType(oldType); 4746 map->ProtectArea(area, area->protection); 4747 map->Unlock(); 4748 return error; 4749 } 4750 4751 return B_OK; 4752 4753 } 4754 4755 4756 /*! This function enforces some protection properties: 4757 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 4758 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4759 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4760 and B_KERNEL_WRITE_AREA. 4761 */ 4762 static void 4763 fix_protection(uint32* protection) 4764 { 4765 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4766 if ((*protection & B_USER_PROTECTION) == 0 4767 || (*protection & B_WRITE_AREA) != 0) 4768 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4769 else 4770 *protection |= B_KERNEL_READ_AREA; 4771 } 4772 } 4773 4774 4775 static void 4776 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4777 { 4778 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4779 info->area = area->id; 4780 info->address = (void*)area->Base(); 4781 info->size = area->Size(); 4782 info->protection = area->protection; 4783 info->lock = B_FULL_LOCK; 4784 info->team = area->address_space->ID(); 4785 info->copy_count = 0; 4786 info->in_count = 0; 4787 info->out_count = 0; 4788 // TODO: retrieve real values here! 4789 4790 VMCache* cache = vm_area_get_locked_cache(area); 4791 4792 // Note, this is a simplification; the cache could be larger than this area 4793 info->ram_size = cache->page_count * B_PAGE_SIZE; 4794 4795 vm_area_put_locked_cache(cache); 4796 } 4797 4798 4799 static status_t 4800 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4801 { 4802 // is newSize a multiple of B_PAGE_SIZE? 4803 if (newSize & (B_PAGE_SIZE - 1)) 4804 return B_BAD_VALUE; 4805 4806 // lock all affected address spaces and the cache 4807 VMArea* area; 4808 VMCache* cache; 4809 4810 MultiAddressSpaceLocker locker; 4811 AreaCacheLocker cacheLocker; 4812 4813 status_t status; 4814 size_t oldSize; 4815 bool anyKernelArea; 4816 bool restart; 4817 4818 do { 4819 anyKernelArea = false; 4820 restart = false; 4821 4822 locker.Unset(); 4823 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4824 if (status != B_OK) 4825 return status; 4826 cacheLocker.SetTo(cache, true); // already locked 4827 4828 // enforce restrictions 4829 if (!kernel) { 4830 if ((area->protection & B_KERNEL_AREA) != 0) 4831 return B_NOT_ALLOWED; 4832 // TODO: Enforce all restrictions (team, etc.)! 4833 } 4834 4835 oldSize = area->Size(); 4836 if (newSize == oldSize) 4837 return B_OK; 4838 4839 if (cache->type != CACHE_TYPE_RAM) 4840 return B_NOT_ALLOWED; 4841 4842 if (oldSize < newSize) { 4843 // We need to check if all areas of this cache can be resized. 4844 for (VMArea* current = cache->areas; current != NULL; 4845 current = current->cache_next) { 4846 if (!current->address_space->CanResizeArea(current, newSize)) 4847 return B_ERROR; 4848 anyKernelArea 4849 |= current->address_space == VMAddressSpace::Kernel(); 4850 } 4851 } else { 4852 // We're shrinking the areas, so we must make sure the affected 4853 // ranges are not wired. 4854 for (VMArea* current = cache->areas; current != NULL; 4855 current = current->cache_next) { 4856 anyKernelArea 4857 |= current->address_space == VMAddressSpace::Kernel(); 4858 4859 if (wait_if_area_range_is_wired(current, 4860 current->Base() + newSize, oldSize - newSize, &locker, 4861 &cacheLocker)) { 4862 restart = true; 4863 break; 4864 } 4865 } 4866 } 4867 } while (restart); 4868 4869 // Okay, looks good so far, so let's do it 4870 4871 int priority = kernel && anyKernelArea 4872 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4873 uint32 allocationFlags = kernel && anyKernelArea 4874 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4875 4876 if (oldSize < newSize) { 4877 // Growing the cache can fail, so we do it first. 4878 status = cache->Resize(cache->virtual_base + newSize, priority); 4879 if (status != B_OK) 4880 return status; 4881 } 4882 4883 for (VMArea* current = cache->areas; current != NULL; 4884 current = current->cache_next) { 4885 status = current->address_space->ResizeArea(current, newSize, 4886 allocationFlags); 4887 if (status != B_OK) 4888 break; 4889 4890 // We also need to unmap all pages beyond the new size, if the area has 4891 // shrunk 4892 if (newSize < oldSize) { 4893 VMCacheChainLocker cacheChainLocker(cache); 4894 cacheChainLocker.LockAllSourceCaches(); 4895 4896 unmap_pages(current, current->Base() + newSize, 4897 oldSize - newSize); 4898 4899 cacheChainLocker.Unlock(cache); 4900 } 4901 } 4902 4903 if (status == B_OK) { 4904 // Shrink or grow individual page protections if in use. 4905 if (area->page_protections != NULL) { 4906 uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2; 4907 uint8* newProtections 4908 = (uint8*)realloc(area->page_protections, bytes); 4909 if (newProtections == NULL) 4910 status = B_NO_MEMORY; 4911 else { 4912 area->page_protections = newProtections; 4913 4914 if (oldSize < newSize) { 4915 // init the additional page protections to that of the area 4916 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 4917 uint32 areaProtection = area->protection 4918 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 4919 memset(area->page_protections + offset, 4920 areaProtection | (areaProtection << 4), bytes - offset); 4921 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 4922 uint8& entry = area->page_protections[offset - 1]; 4923 entry = (entry & 0x0f) | (areaProtection << 4); 4924 } 4925 } 4926 } 4927 } 4928 } 4929 4930 // shrinking the cache can't fail, so we do it now 4931 if (status == B_OK && newSize < oldSize) 4932 status = cache->Resize(cache->virtual_base + newSize, priority); 4933 4934 if (status != B_OK) { 4935 // Something failed -- resize the areas back to their original size. 4936 // This can fail, too, in which case we're seriously screwed. 4937 for (VMArea* current = cache->areas; current != NULL; 4938 current = current->cache_next) { 4939 if (current->address_space->ResizeArea(current, oldSize, 4940 allocationFlags) != B_OK) { 4941 panic("vm_resize_area(): Failed and not being able to restore " 4942 "original state."); 4943 } 4944 } 4945 4946 cache->Resize(cache->virtual_base + oldSize, priority); 4947 } 4948 4949 // TODO: we must honour the lock restrictions of this area 4950 return status; 4951 } 4952 4953 4954 status_t 4955 vm_memset_physical(phys_addr_t address, int value, size_t length) 4956 { 4957 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4958 } 4959 4960 4961 status_t 4962 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 4963 { 4964 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4965 } 4966 4967 4968 status_t 4969 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 4970 bool user) 4971 { 4972 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4973 } 4974 4975 4976 void 4977 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 4978 { 4979 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4980 } 4981 4982 4983 /*! Copies a range of memory directly from/to a page that might not be mapped 4984 at the moment. 4985 4986 For \a unsafeMemory the current mapping (if any is ignored). The function 4987 walks through the respective area's cache chain to find the physical page 4988 and copies from/to it directly. 4989 The memory range starting at \a unsafeMemory with a length of \a size bytes 4990 must not cross a page boundary. 4991 4992 \param teamID The team ID identifying the address space \a unsafeMemory is 4993 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 4994 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 4995 is passed, the address space of the thread returned by 4996 debug_get_debugged_thread() is used. 4997 \param unsafeMemory The start of the unsafe memory range to be copied 4998 from/to. 4999 \param buffer A safely accessible kernel buffer to be copied from/to. 5000 \param size The number of bytes to be copied. 5001 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5002 \a unsafeMemory, the other way around otherwise. 5003 */ 5004 status_t 5005 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5006 size_t size, bool copyToUnsafe) 5007 { 5008 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5009 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5010 return B_BAD_VALUE; 5011 } 5012 5013 // get the address space for the debugged thread 5014 VMAddressSpace* addressSpace; 5015 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5016 addressSpace = VMAddressSpace::Kernel(); 5017 } else if (teamID == B_CURRENT_TEAM) { 5018 Thread* thread = debug_get_debugged_thread(); 5019 if (thread == NULL || thread->team == NULL) 5020 return B_BAD_ADDRESS; 5021 5022 addressSpace = thread->team->address_space; 5023 } else 5024 addressSpace = VMAddressSpace::DebugGet(teamID); 5025 5026 if (addressSpace == NULL) 5027 return B_BAD_ADDRESS; 5028 5029 // get the area 5030 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5031 if (area == NULL) 5032 return B_BAD_ADDRESS; 5033 5034 // search the page 5035 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5036 + area->cache_offset; 5037 VMCache* cache = area->cache; 5038 vm_page* page = NULL; 5039 while (cache != NULL) { 5040 page = cache->DebugLookupPage(cacheOffset); 5041 if (page != NULL) 5042 break; 5043 5044 // Page not found in this cache -- if it is paged out, we must not try 5045 // to get it from lower caches. 5046 if (cache->DebugHasPage(cacheOffset)) 5047 break; 5048 5049 cache = cache->source; 5050 } 5051 5052 if (page == NULL) 5053 return B_UNSUPPORTED; 5054 5055 // copy from/to physical memory 5056 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5057 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5058 5059 if (copyToUnsafe) { 5060 if (page->Cache() != area->cache) 5061 return B_UNSUPPORTED; 5062 5063 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5064 } 5065 5066 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5067 } 5068 5069 5070 // #pragma mark - kernel public API 5071 5072 5073 status_t 5074 user_memcpy(void* to, const void* from, size_t size) 5075 { 5076 // don't allow address overflows 5077 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5078 return B_BAD_ADDRESS; 5079 5080 if (arch_cpu_user_memcpy(to, from, size, 5081 &thread_get_current_thread()->fault_handler) < B_OK) 5082 return B_BAD_ADDRESS; 5083 5084 return B_OK; 5085 } 5086 5087 5088 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5089 the string in \a to, NULL-terminating the result. 5090 5091 \param to Pointer to the destination C-string. 5092 \param from Pointer to the source C-string. 5093 \param size Size in bytes of the string buffer pointed to by \a to. 5094 5095 \return strlen(\a from). 5096 */ 5097 ssize_t 5098 user_strlcpy(char* to, const char* from, size_t size) 5099 { 5100 if (to == NULL && size != 0) 5101 return B_BAD_VALUE; 5102 if (from == NULL) 5103 return B_BAD_ADDRESS; 5104 5105 // limit size to avoid address overflows 5106 size_t maxSize = std::min(size, 5107 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5108 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5109 // the source address might still overflow. 5110 5111 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5112 &thread_get_current_thread()->fault_handler); 5113 5114 // If we hit the address overflow boundary, fail. 5115 if (result < 0 || (result >= 0 && (size_t)result >= maxSize 5116 && maxSize < size)) { 5117 return B_BAD_ADDRESS; 5118 } 5119 5120 return result; 5121 } 5122 5123 5124 status_t 5125 user_memset(void* s, char c, size_t count) 5126 { 5127 // don't allow address overflows 5128 if ((addr_t)s + count < (addr_t)s) 5129 return B_BAD_ADDRESS; 5130 5131 if (arch_cpu_user_memset(s, c, count, 5132 &thread_get_current_thread()->fault_handler) < B_OK) 5133 return B_BAD_ADDRESS; 5134 5135 return B_OK; 5136 } 5137 5138 5139 /*! Wires a single page at the given address. 5140 5141 \param team The team whose address space the address belongs to. Supports 5142 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5143 parameter is ignored. 5144 \param address address The virtual address to wire down. Does not need to 5145 be page aligned. 5146 \param writable If \c true the page shall be writable. 5147 \param info On success the info is filled in, among other things 5148 containing the physical address the given virtual one translates to. 5149 \return \c B_OK, when the page could be wired, another error code otherwise. 5150 */ 5151 status_t 5152 vm_wire_page(team_id team, addr_t address, bool writable, 5153 VMPageWiringInfo* info) 5154 { 5155 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5156 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5157 5158 // compute the page protection that is required 5159 bool isUser = IS_USER_ADDRESS(address); 5160 uint32 requiredProtection = PAGE_PRESENT 5161 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5162 if (writable) 5163 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5164 5165 // get and read lock the address space 5166 VMAddressSpace* addressSpace = NULL; 5167 if (isUser) { 5168 if (team == B_CURRENT_TEAM) 5169 addressSpace = VMAddressSpace::GetCurrent(); 5170 else 5171 addressSpace = VMAddressSpace::Get(team); 5172 } else 5173 addressSpace = VMAddressSpace::GetKernel(); 5174 if (addressSpace == NULL) 5175 return B_ERROR; 5176 5177 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5178 5179 VMTranslationMap* map = addressSpace->TranslationMap(); 5180 status_t error = B_OK; 5181 5182 // get the area 5183 VMArea* area = addressSpace->LookupArea(pageAddress); 5184 if (area == NULL) { 5185 addressSpace->Put(); 5186 return B_BAD_ADDRESS; 5187 } 5188 5189 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5190 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5191 5192 // mark the area range wired 5193 area->Wire(&info->range); 5194 5195 // Lock the area's cache chain and the translation map. Needed to look 5196 // up the page and play with its wired count. 5197 cacheChainLocker.LockAllSourceCaches(); 5198 map->Lock(); 5199 5200 phys_addr_t physicalAddress; 5201 uint32 flags; 5202 vm_page* page; 5203 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5204 && (flags & requiredProtection) == requiredProtection 5205 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5206 != NULL) { 5207 // Already mapped with the correct permissions -- just increment 5208 // the page's wired count. 5209 increment_page_wired_count(page); 5210 5211 map->Unlock(); 5212 cacheChainLocker.Unlock(); 5213 addressSpaceLocker.Unlock(); 5214 } else { 5215 // Let vm_soft_fault() map the page for us, if possible. We need 5216 // to fully unlock to avoid deadlocks. Since we have already 5217 // wired the area itself, nothing disturbing will happen with it 5218 // in the meantime. 5219 map->Unlock(); 5220 cacheChainLocker.Unlock(); 5221 addressSpaceLocker.Unlock(); 5222 5223 error = vm_soft_fault(addressSpace, pageAddress, writable, isUser, 5224 &page, &info->range); 5225 5226 if (error != B_OK) { 5227 // The page could not be mapped -- clean up. 5228 VMCache* cache = vm_area_get_locked_cache(area); 5229 area->Unwire(&info->range); 5230 cache->ReleaseRefAndUnlock(); 5231 addressSpace->Put(); 5232 return error; 5233 } 5234 } 5235 5236 info->physicalAddress 5237 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5238 + address % B_PAGE_SIZE; 5239 info->page = page; 5240 5241 return B_OK; 5242 } 5243 5244 5245 /*! Unwires a single page previously wired via vm_wire_page(). 5246 5247 \param info The same object passed to vm_wire_page() before. 5248 */ 5249 void 5250 vm_unwire_page(VMPageWiringInfo* info) 5251 { 5252 // lock the address space 5253 VMArea* area = info->range.area; 5254 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5255 // takes over our reference 5256 5257 // lock the top cache 5258 VMCache* cache = vm_area_get_locked_cache(area); 5259 VMCacheChainLocker cacheChainLocker(cache); 5260 5261 if (info->page->Cache() != cache) { 5262 // The page is not in the top cache, so we lock the whole cache chain 5263 // before touching the page's wired count. 5264 cacheChainLocker.LockAllSourceCaches(); 5265 } 5266 5267 decrement_page_wired_count(info->page); 5268 5269 // remove the wired range from the range 5270 area->Unwire(&info->range); 5271 5272 cacheChainLocker.Unlock(); 5273 } 5274 5275 5276 /*! Wires down the given address range in the specified team's address space. 5277 5278 If successful the function 5279 - acquires a reference to the specified team's address space, 5280 - adds respective wired ranges to all areas that intersect with the given 5281 address range, 5282 - makes sure all pages in the given address range are mapped with the 5283 requested access permissions and increments their wired count. 5284 5285 It fails, when \a team doesn't specify a valid address space, when any part 5286 of the specified address range is not covered by areas, when the concerned 5287 areas don't allow mapping with the requested permissions, or when mapping 5288 failed for another reason. 5289 5290 When successful the call must be balanced by a unlock_memory_etc() call with 5291 the exact same parameters. 5292 5293 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5294 supported. 5295 \param address The start of the address range to be wired. 5296 \param numBytes The size of the address range to be wired. 5297 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5298 requests that the range must be wired writable ("read from device 5299 into memory"). 5300 \return \c B_OK on success, another error code otherwise. 5301 */ 5302 status_t 5303 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5304 { 5305 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5306 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5307 5308 // compute the page protection that is required 5309 bool isUser = IS_USER_ADDRESS(address); 5310 bool writable = (flags & B_READ_DEVICE) == 0; 5311 uint32 requiredProtection = PAGE_PRESENT 5312 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5313 if (writable) 5314 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5315 5316 uint32 mallocFlags = isUser 5317 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5318 5319 // get and read lock the address space 5320 VMAddressSpace* addressSpace = NULL; 5321 if (isUser) { 5322 if (team == B_CURRENT_TEAM) 5323 addressSpace = VMAddressSpace::GetCurrent(); 5324 else 5325 addressSpace = VMAddressSpace::Get(team); 5326 } else 5327 addressSpace = VMAddressSpace::GetKernel(); 5328 if (addressSpace == NULL) 5329 return B_ERROR; 5330 5331 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5332 5333 VMTranslationMap* map = addressSpace->TranslationMap(); 5334 status_t error = B_OK; 5335 5336 // iterate through all concerned areas 5337 addr_t nextAddress = lockBaseAddress; 5338 while (nextAddress != lockEndAddress) { 5339 // get the next area 5340 VMArea* area = addressSpace->LookupArea(nextAddress); 5341 if (area == NULL) { 5342 error = B_BAD_ADDRESS; 5343 break; 5344 } 5345 5346 addr_t areaStart = nextAddress; 5347 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5348 5349 // allocate the wired range (do that before locking the cache to avoid 5350 // deadlocks) 5351 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5352 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5353 if (range == NULL) { 5354 error = B_NO_MEMORY; 5355 break; 5356 } 5357 5358 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5359 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5360 5361 // mark the area range wired 5362 area->Wire(range); 5363 5364 // Depending on the area cache type and the wiring, we may not need to 5365 // look at the individual pages. 5366 if (area->cache_type == CACHE_TYPE_NULL 5367 || area->cache_type == CACHE_TYPE_DEVICE 5368 || area->wiring == B_FULL_LOCK 5369 || area->wiring == B_CONTIGUOUS) { 5370 nextAddress = areaEnd; 5371 continue; 5372 } 5373 5374 // Lock the area's cache chain and the translation map. Needed to look 5375 // up pages and play with their wired count. 5376 cacheChainLocker.LockAllSourceCaches(); 5377 map->Lock(); 5378 5379 // iterate through the pages and wire them 5380 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5381 phys_addr_t physicalAddress; 5382 uint32 flags; 5383 5384 vm_page* page; 5385 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5386 && (flags & requiredProtection) == requiredProtection 5387 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5388 != NULL) { 5389 // Already mapped with the correct permissions -- just increment 5390 // the page's wired count. 5391 increment_page_wired_count(page); 5392 } else { 5393 // Let vm_soft_fault() map the page for us, if possible. We need 5394 // to fully unlock to avoid deadlocks. Since we have already 5395 // wired the area itself, nothing disturbing will happen with it 5396 // in the meantime. 5397 map->Unlock(); 5398 cacheChainLocker.Unlock(); 5399 addressSpaceLocker.Unlock(); 5400 5401 error = vm_soft_fault(addressSpace, nextAddress, writable, 5402 isUser, &page, range); 5403 5404 addressSpaceLocker.Lock(); 5405 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5406 cacheChainLocker.LockAllSourceCaches(); 5407 map->Lock(); 5408 } 5409 5410 if (error != B_OK) 5411 break; 5412 } 5413 5414 map->Unlock(); 5415 5416 if (error == B_OK) { 5417 cacheChainLocker.Unlock(); 5418 } else { 5419 // An error occurred, so abort right here. If the current address 5420 // is the first in this area, unwire the area, since we won't get 5421 // to it when reverting what we've done so far. 5422 if (nextAddress == areaStart) { 5423 area->Unwire(range); 5424 cacheChainLocker.Unlock(); 5425 range->~VMAreaWiredRange(); 5426 free_etc(range, mallocFlags); 5427 } else 5428 cacheChainLocker.Unlock(); 5429 5430 break; 5431 } 5432 } 5433 5434 if (error != B_OK) { 5435 // An error occurred, so unwire all that we've already wired. Note that 5436 // even if not a single page was wired, unlock_memory_etc() is called 5437 // to put the address space reference. 5438 addressSpaceLocker.Unlock(); 5439 unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress, 5440 flags); 5441 } 5442 5443 return error; 5444 } 5445 5446 5447 status_t 5448 lock_memory(void* address, size_t numBytes, uint32 flags) 5449 { 5450 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5451 } 5452 5453 5454 /*! Unwires an address range previously wired with lock_memory_etc(). 5455 5456 Note that a call to this function must balance a previous lock_memory_etc() 5457 call with exactly the same parameters. 5458 */ 5459 status_t 5460 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5461 { 5462 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5463 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5464 5465 // compute the page protection that is required 5466 bool isUser = IS_USER_ADDRESS(address); 5467 bool writable = (flags & B_READ_DEVICE) == 0; 5468 uint32 requiredProtection = PAGE_PRESENT 5469 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5470 if (writable) 5471 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5472 5473 uint32 mallocFlags = isUser 5474 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5475 5476 // get and read lock the address space 5477 VMAddressSpace* addressSpace = NULL; 5478 if (isUser) { 5479 if (team == B_CURRENT_TEAM) 5480 addressSpace = VMAddressSpace::GetCurrent(); 5481 else 5482 addressSpace = VMAddressSpace::Get(team); 5483 } else 5484 addressSpace = VMAddressSpace::GetKernel(); 5485 if (addressSpace == NULL) 5486 return B_ERROR; 5487 5488 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5489 5490 VMTranslationMap* map = addressSpace->TranslationMap(); 5491 status_t error = B_OK; 5492 5493 // iterate through all concerned areas 5494 addr_t nextAddress = lockBaseAddress; 5495 while (nextAddress != lockEndAddress) { 5496 // get the next area 5497 VMArea* area = addressSpace->LookupArea(nextAddress); 5498 if (area == NULL) { 5499 error = B_BAD_ADDRESS; 5500 break; 5501 } 5502 5503 addr_t areaStart = nextAddress; 5504 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5505 5506 // Lock the area's top cache. This is a requirement for 5507 // VMArea::Unwire(). 5508 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5509 5510 // Depending on the area cache type and the wiring, we may not need to 5511 // look at the individual pages. 5512 if (area->cache_type == CACHE_TYPE_NULL 5513 || area->cache_type == CACHE_TYPE_DEVICE 5514 || area->wiring == B_FULL_LOCK 5515 || area->wiring == B_CONTIGUOUS) { 5516 // unwire the range (to avoid deadlocks we delete the range after 5517 // unlocking the cache) 5518 nextAddress = areaEnd; 5519 VMAreaWiredRange* range = area->Unwire(areaStart, 5520 areaEnd - areaStart, writable); 5521 cacheChainLocker.Unlock(); 5522 if (range != NULL) { 5523 range->~VMAreaWiredRange(); 5524 free_etc(range, mallocFlags); 5525 } 5526 continue; 5527 } 5528 5529 // Lock the area's cache chain and the translation map. Needed to look 5530 // up pages and play with their wired count. 5531 cacheChainLocker.LockAllSourceCaches(); 5532 map->Lock(); 5533 5534 // iterate through the pages and unwire them 5535 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5536 phys_addr_t physicalAddress; 5537 uint32 flags; 5538 5539 vm_page* page; 5540 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5541 && (flags & PAGE_PRESENT) != 0 5542 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5543 != NULL) { 5544 // Already mapped with the correct permissions -- just increment 5545 // the page's wired count. 5546 decrement_page_wired_count(page); 5547 } else { 5548 panic("unlock_memory_etc(): Failed to unwire page: address " 5549 "space %p, address: %#" B_PRIxADDR, addressSpace, 5550 nextAddress); 5551 error = B_BAD_VALUE; 5552 break; 5553 } 5554 } 5555 5556 map->Unlock(); 5557 5558 // All pages are unwired. Remove the area's wired range as well (to 5559 // avoid deadlocks we delete the range after unlocking the cache). 5560 VMAreaWiredRange* range = area->Unwire(areaStart, 5561 areaEnd - areaStart, writable); 5562 5563 cacheChainLocker.Unlock(); 5564 5565 if (range != NULL) { 5566 range->~VMAreaWiredRange(); 5567 free_etc(range, mallocFlags); 5568 } 5569 5570 if (error != B_OK) 5571 break; 5572 } 5573 5574 // get rid of the address space reference 5575 addressSpace->Put(); 5576 5577 return error; 5578 } 5579 5580 5581 status_t 5582 unlock_memory(void* address, size_t numBytes, uint32 flags) 5583 { 5584 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5585 } 5586 5587 5588 /*! Similar to get_memory_map(), but also allows to specify the address space 5589 for the memory in question and has a saner semantics. 5590 Returns \c B_OK when the complete range could be translated or 5591 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5592 case the actual number of entries is written to \c *_numEntries. Any other 5593 error case indicates complete failure; \c *_numEntries will be set to \c 0 5594 in this case. 5595 */ 5596 status_t 5597 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5598 physical_entry* table, uint32* _numEntries) 5599 { 5600 uint32 numEntries = *_numEntries; 5601 *_numEntries = 0; 5602 5603 VMAddressSpace* addressSpace; 5604 addr_t virtualAddress = (addr_t)address; 5605 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5606 phys_addr_t physicalAddress; 5607 status_t status = B_OK; 5608 int32 index = -1; 5609 addr_t offset = 0; 5610 bool interrupts = are_interrupts_enabled(); 5611 5612 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5613 "entries)\n", team, address, numBytes, numEntries)); 5614 5615 if (numEntries == 0 || numBytes == 0) 5616 return B_BAD_VALUE; 5617 5618 // in which address space is the address to be found? 5619 if (IS_USER_ADDRESS(virtualAddress)) { 5620 if (team == B_CURRENT_TEAM) 5621 addressSpace = VMAddressSpace::GetCurrent(); 5622 else 5623 addressSpace = VMAddressSpace::Get(team); 5624 } else 5625 addressSpace = VMAddressSpace::GetKernel(); 5626 5627 if (addressSpace == NULL) 5628 return B_ERROR; 5629 5630 VMTranslationMap* map = addressSpace->TranslationMap(); 5631 5632 if (interrupts) 5633 map->Lock(); 5634 5635 while (offset < numBytes) { 5636 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5637 uint32 flags; 5638 5639 if (interrupts) { 5640 status = map->Query((addr_t)address + offset, &physicalAddress, 5641 &flags); 5642 } else { 5643 status = map->QueryInterrupt((addr_t)address + offset, 5644 &physicalAddress, &flags); 5645 } 5646 if (status < B_OK) 5647 break; 5648 if ((flags & PAGE_PRESENT) == 0) { 5649 panic("get_memory_map() called on unmapped memory!"); 5650 return B_BAD_ADDRESS; 5651 } 5652 5653 if (index < 0 && pageOffset > 0) { 5654 physicalAddress += pageOffset; 5655 if (bytes > B_PAGE_SIZE - pageOffset) 5656 bytes = B_PAGE_SIZE - pageOffset; 5657 } 5658 5659 // need to switch to the next physical_entry? 5660 if (index < 0 || table[index].address 5661 != physicalAddress - table[index].size) { 5662 if ((uint32)++index + 1 > numEntries) { 5663 // table to small 5664 break; 5665 } 5666 table[index].address = physicalAddress; 5667 table[index].size = bytes; 5668 } else { 5669 // page does fit in current entry 5670 table[index].size += bytes; 5671 } 5672 5673 offset += bytes; 5674 } 5675 5676 if (interrupts) 5677 map->Unlock(); 5678 5679 if (status != B_OK) 5680 return status; 5681 5682 if ((uint32)index + 1 > numEntries) { 5683 *_numEntries = index; 5684 return B_BUFFER_OVERFLOW; 5685 } 5686 5687 *_numEntries = index + 1; 5688 return B_OK; 5689 } 5690 5691 5692 /*! According to the BeBook, this function should always succeed. 5693 This is no longer the case. 5694 */ 5695 extern "C" int32 5696 __get_memory_map_haiku(const void* address, size_t numBytes, 5697 physical_entry* table, int32 numEntries) 5698 { 5699 uint32 entriesRead = numEntries; 5700 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5701 table, &entriesRead); 5702 if (error != B_OK) 5703 return error; 5704 5705 // close the entry list 5706 5707 // if it's only one entry, we will silently accept the missing ending 5708 if (numEntries == 1) 5709 return B_OK; 5710 5711 if (entriesRead + 1 > (uint32)numEntries) 5712 return B_BUFFER_OVERFLOW; 5713 5714 table[entriesRead].address = 0; 5715 table[entriesRead].size = 0; 5716 5717 return B_OK; 5718 } 5719 5720 5721 area_id 5722 area_for(void* address) 5723 { 5724 return vm_area_for((addr_t)address, true); 5725 } 5726 5727 5728 area_id 5729 find_area(const char* name) 5730 { 5731 return VMAreaHash::Find(name); 5732 } 5733 5734 5735 status_t 5736 _get_area_info(area_id id, area_info* info, size_t size) 5737 { 5738 if (size != sizeof(area_info) || info == NULL) 5739 return B_BAD_VALUE; 5740 5741 AddressSpaceReadLocker locker; 5742 VMArea* area; 5743 status_t status = locker.SetFromArea(id, area); 5744 if (status != B_OK) 5745 return status; 5746 5747 fill_area_info(area, info, size); 5748 return B_OK; 5749 } 5750 5751 5752 status_t 5753 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 5754 { 5755 addr_t nextBase = *(addr_t*)cookie; 5756 5757 // we're already through the list 5758 if (nextBase == (addr_t)-1) 5759 return B_ENTRY_NOT_FOUND; 5760 5761 if (team == B_CURRENT_TEAM) 5762 team = team_get_current_team_id(); 5763 5764 AddressSpaceReadLocker locker(team); 5765 if (!locker.IsLocked()) 5766 return B_BAD_TEAM_ID; 5767 5768 VMArea* area; 5769 for (VMAddressSpace::AreaIterator it 5770 = locker.AddressSpace()->GetAreaIterator(); 5771 (area = it.Next()) != NULL;) { 5772 if (area->Base() > nextBase) 5773 break; 5774 } 5775 5776 if (area == NULL) { 5777 nextBase = (addr_t)-1; 5778 return B_ENTRY_NOT_FOUND; 5779 } 5780 5781 fill_area_info(area, info, size); 5782 *cookie = (ssize_t)(area->Base()); 5783 5784 return B_OK; 5785 } 5786 5787 5788 status_t 5789 set_area_protection(area_id area, uint32 newProtection) 5790 { 5791 fix_protection(&newProtection); 5792 5793 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5794 newProtection, true); 5795 } 5796 5797 5798 status_t 5799 resize_area(area_id areaID, size_t newSize) 5800 { 5801 return vm_resize_area(areaID, newSize, true); 5802 } 5803 5804 5805 /*! Transfers the specified area to a new team. The caller must be the owner 5806 of the area. 5807 */ 5808 area_id 5809 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5810 bool kernel) 5811 { 5812 area_info info; 5813 status_t status = get_area_info(id, &info); 5814 if (status != B_OK) 5815 return status; 5816 5817 if (info.team != thread_get_current_thread()->team->id) 5818 return B_PERMISSION_DENIED; 5819 5820 area_id clonedArea = vm_clone_area(target, info.name, _address, 5821 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5822 if (clonedArea < 0) 5823 return clonedArea; 5824 5825 status = vm_delete_area(info.team, id, kernel); 5826 if (status != B_OK) { 5827 vm_delete_area(target, clonedArea, kernel); 5828 return status; 5829 } 5830 5831 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5832 5833 return clonedArea; 5834 } 5835 5836 5837 extern "C" area_id 5838 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 5839 size_t numBytes, uint32 addressSpec, uint32 protection, 5840 void** _virtualAddress) 5841 { 5842 if (!arch_vm_supports_protection(protection)) 5843 return B_NOT_SUPPORTED; 5844 5845 fix_protection(&protection); 5846 5847 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5848 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 5849 false); 5850 } 5851 5852 5853 area_id 5854 clone_area(const char* name, void** _address, uint32 addressSpec, 5855 uint32 protection, area_id source) 5856 { 5857 if ((protection & B_KERNEL_PROTECTION) == 0) 5858 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5859 5860 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5861 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5862 } 5863 5864 5865 area_id 5866 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock, 5867 uint32 protection, uint32 flags, uint32 guardSize, 5868 const virtual_address_restrictions* virtualAddressRestrictions, 5869 const physical_address_restrictions* physicalAddressRestrictions, 5870 void** _address) 5871 { 5872 fix_protection(&protection); 5873 5874 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 5875 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 5876 true, _address); 5877 } 5878 5879 5880 extern "C" area_id 5881 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 5882 size_t size, uint32 lock, uint32 protection) 5883 { 5884 fix_protection(&protection); 5885 5886 virtual_address_restrictions virtualRestrictions = {}; 5887 virtualRestrictions.address = *_address; 5888 virtualRestrictions.address_specification = addressSpec; 5889 physical_address_restrictions physicalRestrictions = {}; 5890 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 5891 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 5892 true, _address); 5893 } 5894 5895 5896 status_t 5897 delete_area(area_id area) 5898 { 5899 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5900 } 5901 5902 5903 // #pragma mark - Userland syscalls 5904 5905 5906 status_t 5907 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5908 addr_t size) 5909 { 5910 // filter out some unavailable values (for userland) 5911 switch (addressSpec) { 5912 case B_ANY_KERNEL_ADDRESS: 5913 case B_ANY_KERNEL_BLOCK_ADDRESS: 5914 return B_BAD_VALUE; 5915 } 5916 5917 addr_t address; 5918 5919 if (!IS_USER_ADDRESS(userAddress) 5920 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5921 return B_BAD_ADDRESS; 5922 5923 status_t status = vm_reserve_address_range( 5924 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5925 RESERVED_AVOID_BASE); 5926 if (status != B_OK) 5927 return status; 5928 5929 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5930 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5931 (void*)address, size); 5932 return B_BAD_ADDRESS; 5933 } 5934 5935 return B_OK; 5936 } 5937 5938 5939 status_t 5940 _user_unreserve_address_range(addr_t address, addr_t size) 5941 { 5942 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5943 (void*)address, size); 5944 } 5945 5946 5947 area_id 5948 _user_area_for(void* address) 5949 { 5950 return vm_area_for((addr_t)address, false); 5951 } 5952 5953 5954 area_id 5955 _user_find_area(const char* userName) 5956 { 5957 char name[B_OS_NAME_LENGTH]; 5958 5959 if (!IS_USER_ADDRESS(userName) 5960 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5961 return B_BAD_ADDRESS; 5962 5963 return find_area(name); 5964 } 5965 5966 5967 status_t 5968 _user_get_area_info(area_id area, area_info* userInfo) 5969 { 5970 if (!IS_USER_ADDRESS(userInfo)) 5971 return B_BAD_ADDRESS; 5972 5973 area_info info; 5974 status_t status = get_area_info(area, &info); 5975 if (status < B_OK) 5976 return status; 5977 5978 // TODO: do we want to prevent userland from seeing kernel protections? 5979 //info.protection &= B_USER_PROTECTION; 5980 5981 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5982 return B_BAD_ADDRESS; 5983 5984 return status; 5985 } 5986 5987 5988 status_t 5989 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 5990 { 5991 ssize_t cookie; 5992 5993 if (!IS_USER_ADDRESS(userCookie) 5994 || !IS_USER_ADDRESS(userInfo) 5995 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 5996 return B_BAD_ADDRESS; 5997 5998 area_info info; 5999 status_t status = _get_next_area_info(team, &cookie, &info, 6000 sizeof(area_info)); 6001 if (status != B_OK) 6002 return status; 6003 6004 //info.protection &= B_USER_PROTECTION; 6005 6006 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6007 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6008 return B_BAD_ADDRESS; 6009 6010 return status; 6011 } 6012 6013 6014 status_t 6015 _user_set_area_protection(area_id area, uint32 newProtection) 6016 { 6017 if ((newProtection & ~B_USER_PROTECTION) != 0) 6018 return B_BAD_VALUE; 6019 6020 fix_protection(&newProtection); 6021 6022 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6023 newProtection, false); 6024 } 6025 6026 6027 status_t 6028 _user_resize_area(area_id area, size_t newSize) 6029 { 6030 // TODO: Since we restrict deleting of areas to those owned by the team, 6031 // we should also do that for resizing (check other functions, too). 6032 return vm_resize_area(area, newSize, false); 6033 } 6034 6035 6036 area_id 6037 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6038 team_id target) 6039 { 6040 // filter out some unavailable values (for userland) 6041 switch (addressSpec) { 6042 case B_ANY_KERNEL_ADDRESS: 6043 case B_ANY_KERNEL_BLOCK_ADDRESS: 6044 return B_BAD_VALUE; 6045 } 6046 6047 void* address; 6048 if (!IS_USER_ADDRESS(userAddress) 6049 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6050 return B_BAD_ADDRESS; 6051 6052 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6053 if (newArea < B_OK) 6054 return newArea; 6055 6056 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6057 return B_BAD_ADDRESS; 6058 6059 return newArea; 6060 } 6061 6062 6063 area_id 6064 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6065 uint32 protection, area_id sourceArea) 6066 { 6067 char name[B_OS_NAME_LENGTH]; 6068 void* address; 6069 6070 // filter out some unavailable values (for userland) 6071 switch (addressSpec) { 6072 case B_ANY_KERNEL_ADDRESS: 6073 case B_ANY_KERNEL_BLOCK_ADDRESS: 6074 return B_BAD_VALUE; 6075 } 6076 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6077 return B_BAD_VALUE; 6078 6079 if (!IS_USER_ADDRESS(userName) 6080 || !IS_USER_ADDRESS(userAddress) 6081 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6082 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6083 return B_BAD_ADDRESS; 6084 6085 fix_protection(&protection); 6086 6087 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6088 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6089 false); 6090 if (clonedArea < B_OK) 6091 return clonedArea; 6092 6093 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6094 delete_area(clonedArea); 6095 return B_BAD_ADDRESS; 6096 } 6097 6098 return clonedArea; 6099 } 6100 6101 6102 area_id 6103 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6104 size_t size, uint32 lock, uint32 protection) 6105 { 6106 char name[B_OS_NAME_LENGTH]; 6107 void* address; 6108 6109 // filter out some unavailable values (for userland) 6110 switch (addressSpec) { 6111 case B_ANY_KERNEL_ADDRESS: 6112 case B_ANY_KERNEL_BLOCK_ADDRESS: 6113 return B_BAD_VALUE; 6114 } 6115 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6116 return B_BAD_VALUE; 6117 6118 if (!IS_USER_ADDRESS(userName) 6119 || !IS_USER_ADDRESS(userAddress) 6120 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6121 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6122 return B_BAD_ADDRESS; 6123 6124 if (addressSpec == B_EXACT_ADDRESS 6125 && IS_KERNEL_ADDRESS(address)) 6126 return B_BAD_VALUE; 6127 6128 fix_protection(&protection); 6129 6130 virtual_address_restrictions virtualRestrictions = {}; 6131 virtualRestrictions.address = address; 6132 virtualRestrictions.address_specification = addressSpec; 6133 physical_address_restrictions physicalRestrictions = {}; 6134 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6135 size, lock, protection, 0, 0, &virtualRestrictions, 6136 &physicalRestrictions, false, &address); 6137 6138 if (area >= B_OK 6139 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6140 delete_area(area); 6141 return B_BAD_ADDRESS; 6142 } 6143 6144 return area; 6145 } 6146 6147 6148 status_t 6149 _user_delete_area(area_id area) 6150 { 6151 // Unlike the BeOS implementation, you can now only delete areas 6152 // that you have created yourself from userland. 6153 // The documentation to delete_area() explicitly states that this 6154 // will be restricted in the future, and so it will. 6155 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6156 } 6157 6158 6159 // TODO: create a BeOS style call for this! 6160 6161 area_id 6162 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6163 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6164 int fd, off_t offset) 6165 { 6166 char name[B_OS_NAME_LENGTH]; 6167 void* address; 6168 area_id area; 6169 6170 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6171 return B_BAD_VALUE; 6172 6173 fix_protection(&protection); 6174 6175 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6176 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6177 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6178 return B_BAD_ADDRESS; 6179 6180 if (addressSpec == B_EXACT_ADDRESS) { 6181 if ((addr_t)address + size < (addr_t)address 6182 || (addr_t)address % B_PAGE_SIZE != 0) { 6183 return B_BAD_VALUE; 6184 } 6185 if (!IS_USER_ADDRESS(address) 6186 || !IS_USER_ADDRESS((addr_t)address + size)) { 6187 return B_BAD_ADDRESS; 6188 } 6189 } 6190 6191 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6192 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6193 false); 6194 if (area < B_OK) 6195 return area; 6196 6197 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6198 return B_BAD_ADDRESS; 6199 6200 return area; 6201 } 6202 6203 6204 status_t 6205 _user_unmap_memory(void* _address, size_t size) 6206 { 6207 addr_t address = (addr_t)_address; 6208 6209 // check params 6210 if (size == 0 || (addr_t)address + size < (addr_t)address 6211 || (addr_t)address % B_PAGE_SIZE != 0) { 6212 return B_BAD_VALUE; 6213 } 6214 6215 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6216 return B_BAD_ADDRESS; 6217 6218 // Write lock the address space and ensure the address range is not wired. 6219 AddressSpaceWriteLocker locker; 6220 do { 6221 status_t status = locker.SetTo(team_get_current_team_id()); 6222 if (status != B_OK) 6223 return status; 6224 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6225 size, &locker)); 6226 6227 // unmap 6228 return unmap_address_range(locker.AddressSpace(), address, size, false); 6229 } 6230 6231 6232 status_t 6233 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6234 { 6235 // check address range 6236 addr_t address = (addr_t)_address; 6237 size = PAGE_ALIGN(size); 6238 6239 if ((address % B_PAGE_SIZE) != 0) 6240 return B_BAD_VALUE; 6241 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6242 || !IS_USER_ADDRESS((addr_t)address + size)) { 6243 // weird error code required by POSIX 6244 return ENOMEM; 6245 } 6246 6247 // extend and check protection 6248 if ((protection & ~B_USER_PROTECTION) != 0) 6249 return B_BAD_VALUE; 6250 6251 fix_protection(&protection); 6252 6253 // We need to write lock the address space, since we're going to play with 6254 // the areas. Also make sure that none of the areas is wired and that we're 6255 // actually allowed to change the protection. 6256 AddressSpaceWriteLocker locker; 6257 6258 bool restart; 6259 do { 6260 restart = false; 6261 6262 status_t status = locker.SetTo(team_get_current_team_id()); 6263 if (status != B_OK) 6264 return status; 6265 6266 // First round: Check whether the whole range is covered by areas and we 6267 // are allowed to modify them. 6268 addr_t currentAddress = address; 6269 size_t sizeLeft = size; 6270 while (sizeLeft > 0) { 6271 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6272 if (area == NULL) 6273 return B_NO_MEMORY; 6274 6275 if ((area->protection & B_KERNEL_AREA) != 0) 6276 return B_NOT_ALLOWED; 6277 6278 // TODO: For (shared) mapped files we should check whether the new 6279 // protections are compatible with the file permissions. We don't 6280 // have a way to do that yet, though. 6281 6282 addr_t offset = currentAddress - area->Base(); 6283 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6284 6285 AreaCacheLocker cacheLocker(area); 6286 6287 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6288 &locker, &cacheLocker)) { 6289 restart = true; 6290 break; 6291 } 6292 6293 cacheLocker.Unlock(); 6294 6295 currentAddress += rangeSize; 6296 sizeLeft -= rangeSize; 6297 } 6298 } while (restart); 6299 6300 // Second round: If the protections differ from that of the area, create a 6301 // page protection array and re-map mapped pages. 6302 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6303 addr_t currentAddress = address; 6304 size_t sizeLeft = size; 6305 while (sizeLeft > 0) { 6306 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6307 if (area == NULL) 6308 return B_NO_MEMORY; 6309 6310 addr_t offset = currentAddress - area->Base(); 6311 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6312 6313 currentAddress += rangeSize; 6314 sizeLeft -= rangeSize; 6315 6316 if (area->page_protections == NULL) { 6317 if (area->protection == protection) 6318 continue; 6319 6320 status_t status = allocate_area_page_protections(area); 6321 if (status != B_OK) 6322 return status; 6323 } 6324 6325 // We need to lock the complete cache chain, since we potentially unmap 6326 // pages of lower caches. 6327 VMCache* topCache = vm_area_get_locked_cache(area); 6328 VMCacheChainLocker cacheChainLocker(topCache); 6329 cacheChainLocker.LockAllSourceCaches(); 6330 6331 for (addr_t pageAddress = area->Base() + offset; 6332 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6333 map->Lock(); 6334 6335 set_area_page_protection(area, pageAddress, protection); 6336 6337 phys_addr_t physicalAddress; 6338 uint32 flags; 6339 6340 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6341 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6342 map->Unlock(); 6343 continue; 6344 } 6345 6346 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6347 if (page == NULL) { 6348 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6349 "\n", area, physicalAddress); 6350 map->Unlock(); 6351 return B_ERROR; 6352 } 6353 6354 // If the page is not in the topmost cache and write access is 6355 // requested, we have to unmap it. Otherwise we can re-map it with 6356 // the new protection. 6357 bool unmapPage = page->Cache() != topCache 6358 && (protection & B_WRITE_AREA) != 0; 6359 6360 if (!unmapPage) 6361 map->ProtectPage(area, pageAddress, protection); 6362 6363 map->Unlock(); 6364 6365 if (unmapPage) { 6366 DEBUG_PAGE_ACCESS_START(page); 6367 unmap_page(area, pageAddress); 6368 DEBUG_PAGE_ACCESS_END(page); 6369 } 6370 } 6371 } 6372 6373 return B_OK; 6374 } 6375 6376 6377 status_t 6378 _user_sync_memory(void* _address, size_t size, uint32 flags) 6379 { 6380 addr_t address = (addr_t)_address; 6381 size = PAGE_ALIGN(size); 6382 6383 // check params 6384 if ((address % B_PAGE_SIZE) != 0) 6385 return B_BAD_VALUE; 6386 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6387 || !IS_USER_ADDRESS((addr_t)address + size)) { 6388 // weird error code required by POSIX 6389 return ENOMEM; 6390 } 6391 6392 bool writeSync = (flags & MS_SYNC) != 0; 6393 bool writeAsync = (flags & MS_ASYNC) != 0; 6394 if (writeSync && writeAsync) 6395 return B_BAD_VALUE; 6396 6397 if (size == 0 || (!writeSync && !writeAsync)) 6398 return B_OK; 6399 6400 // iterate through the range and sync all concerned areas 6401 while (size > 0) { 6402 // read lock the address space 6403 AddressSpaceReadLocker locker; 6404 status_t error = locker.SetTo(team_get_current_team_id()); 6405 if (error != B_OK) 6406 return error; 6407 6408 // get the first area 6409 VMArea* area = locker.AddressSpace()->LookupArea(address); 6410 if (area == NULL) 6411 return B_NO_MEMORY; 6412 6413 uint32 offset = address - area->Base(); 6414 size_t rangeSize = min_c(area->Size() - offset, size); 6415 offset += area->cache_offset; 6416 6417 // lock the cache 6418 AreaCacheLocker cacheLocker(area); 6419 if (!cacheLocker) 6420 return B_BAD_VALUE; 6421 VMCache* cache = area->cache; 6422 6423 locker.Unlock(); 6424 6425 uint32 firstPage = offset >> PAGE_SHIFT; 6426 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6427 6428 // write the pages 6429 if (cache->type == CACHE_TYPE_VNODE) { 6430 if (writeSync) { 6431 // synchronous 6432 error = vm_page_write_modified_page_range(cache, firstPage, 6433 endPage); 6434 if (error != B_OK) 6435 return error; 6436 } else { 6437 // asynchronous 6438 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6439 // TODO: This is probably not quite what is supposed to happen. 6440 // Especially when a lot has to be written, it might take ages 6441 // until it really hits the disk. 6442 } 6443 } 6444 6445 address += rangeSize; 6446 size -= rangeSize; 6447 } 6448 6449 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6450 // synchronize multiple mappings of the same file. In our VM they never get 6451 // out of sync, though, so we don't have to do anything. 6452 6453 return B_OK; 6454 } 6455 6456 6457 status_t 6458 _user_memory_advice(void* address, size_t size, uint32 advice) 6459 { 6460 // TODO: Implement! 6461 return B_OK; 6462 } 6463 6464 6465 status_t 6466 _user_get_memory_properties(team_id teamID, const void* address, 6467 uint32* _protected, uint32* _lock) 6468 { 6469 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6470 return B_BAD_ADDRESS; 6471 6472 AddressSpaceReadLocker locker; 6473 status_t error = locker.SetTo(teamID); 6474 if (error != B_OK) 6475 return error; 6476 6477 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6478 if (area == NULL) 6479 return B_NO_MEMORY; 6480 6481 6482 uint32 protection = area->protection; 6483 if (area->page_protections != NULL) 6484 protection = get_area_page_protection(area, (addr_t)address); 6485 6486 uint32 wiring = area->wiring; 6487 6488 locker.Unlock(); 6489 6490 error = user_memcpy(_protected, &protection, sizeof(protection)); 6491 if (error != B_OK) 6492 return error; 6493 6494 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6495 6496 return error; 6497 } 6498 6499 6500 // #pragma mark -- compatibility 6501 6502 6503 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6504 6505 6506 struct physical_entry_beos { 6507 uint32 address; 6508 uint32 size; 6509 }; 6510 6511 6512 /*! The physical_entry structure has changed. We need to translate it to the 6513 old one. 6514 */ 6515 extern "C" int32 6516 __get_memory_map_beos(const void* _address, size_t numBytes, 6517 physical_entry_beos* table, int32 numEntries) 6518 { 6519 if (numEntries <= 0) 6520 return B_BAD_VALUE; 6521 6522 const uint8* address = (const uint8*)_address; 6523 6524 int32 count = 0; 6525 while (numBytes > 0 && count < numEntries) { 6526 physical_entry entry; 6527 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 6528 if (result < 0) { 6529 if (result != B_BUFFER_OVERFLOW) 6530 return result; 6531 } 6532 6533 if (entry.address >= (phys_addr_t)1 << 32) { 6534 panic("get_memory_map(): Address is greater 4 GB!"); 6535 return B_ERROR; 6536 } 6537 6538 table[count].address = entry.address; 6539 table[count++].size = entry.size; 6540 6541 address += entry.size; 6542 numBytes -= entry.size; 6543 } 6544 6545 // null-terminate the table, if possible 6546 if (count < numEntries) { 6547 table[count].address = 0; 6548 table[count].size = 0; 6549 } 6550 6551 return B_OK; 6552 } 6553 6554 6555 /*! The type of the \a physicalAddress parameter has changed from void* to 6556 phys_addr_t. 6557 */ 6558 extern "C" area_id 6559 __map_physical_memory_beos(const char* name, void* physicalAddress, 6560 size_t numBytes, uint32 addressSpec, uint32 protection, 6561 void** _virtualAddress) 6562 { 6563 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 6564 addressSpec, protection, _virtualAddress); 6565 } 6566 6567 6568 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 6569 we meddle with the \a lock parameter to force 32 bit. 6570 */ 6571 extern "C" area_id 6572 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 6573 size_t size, uint32 lock, uint32 protection) 6574 { 6575 switch (lock) { 6576 case B_NO_LOCK: 6577 break; 6578 case B_FULL_LOCK: 6579 case B_LAZY_LOCK: 6580 lock = B_32_BIT_FULL_LOCK; 6581 break; 6582 case B_CONTIGUOUS: 6583 lock = B_32_BIT_CONTIGUOUS; 6584 break; 6585 } 6586 6587 return __create_area_haiku(name, _address, addressSpec, size, lock, 6588 protection); 6589 } 6590 6591 6592 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 6593 "BASE"); 6594 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 6595 "map_physical_memory@", "BASE"); 6596 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 6597 "BASE"); 6598 6599 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6600 "get_memory_map@@", "1_ALPHA3"); 6601 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6602 "map_physical_memory@@", "1_ALPHA3"); 6603 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6604 "1_ALPHA3"); 6605 6606 6607 #else 6608 6609 6610 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6611 "get_memory_map@@", "BASE"); 6612 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6613 "map_physical_memory@@", "BASE"); 6614 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6615 "BASE"); 6616 6617 6618 #endif // defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6619