1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <boot/elf.h> 31 #include <boot/stage2.h> 32 #include <condition_variable.h> 33 #include <console.h> 34 #include <debug.h> 35 #include <file_cache.h> 36 #include <fs/fd.h> 37 #include <heap.h> 38 #include <kernel.h> 39 #include <int.h> 40 #include <lock.h> 41 #include <low_resource_manager.h> 42 #include <slab/Slab.h> 43 #include <smp.h> 44 #include <system_info.h> 45 #include <thread.h> 46 #include <team.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/khash.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "VMAnonymousNoSwapCache.h" 59 #include "IORequest.h" 60 61 62 //#define TRACE_VM 63 //#define TRACE_FAULTS 64 #ifdef TRACE_VM 65 # define TRACE(x) dprintf x 66 #else 67 # define TRACE(x) ; 68 #endif 69 #ifdef TRACE_FAULTS 70 # define FTRACE(x) dprintf x 71 #else 72 # define FTRACE(x) ; 73 #endif 74 75 76 class AreaCacheLocking { 77 public: 78 inline bool Lock(VMCache* lockable) 79 { 80 return false; 81 } 82 83 inline void Unlock(VMCache* lockable) 84 { 85 vm_area_put_locked_cache(lockable); 86 } 87 }; 88 89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 90 public: 91 inline AreaCacheLocker(VMCache* cache = NULL) 92 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 93 { 94 } 95 96 inline AreaCacheLocker(VMArea* area) 97 : AutoLocker<VMCache, AreaCacheLocking>() 98 { 99 SetTo(area); 100 } 101 102 inline void SetTo(VMCache* cache, bool alreadyLocked) 103 { 104 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 105 } 106 107 inline void SetTo(VMArea* area) 108 { 109 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 110 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 111 } 112 }; 113 114 115 class VMCacheChainLocker { 116 public: 117 VMCacheChainLocker() 118 : 119 fTopCache(NULL), 120 fBottomCache(NULL) 121 { 122 } 123 124 VMCacheChainLocker(VMCache* topCache) 125 : 126 fTopCache(topCache), 127 fBottomCache(topCache) 128 { 129 } 130 131 ~VMCacheChainLocker() 132 { 133 Unlock(); 134 } 135 136 void SetTo(VMCache* topCache) 137 { 138 fTopCache = topCache; 139 fBottomCache = topCache; 140 141 if (topCache != NULL) 142 topCache->SetUserData(NULL); 143 } 144 145 VMCache* LockSourceCache() 146 { 147 if (fBottomCache == NULL || fBottomCache->source == NULL) 148 return NULL; 149 150 VMCache* previousCache = fBottomCache; 151 152 fBottomCache = fBottomCache->source; 153 fBottomCache->Lock(); 154 fBottomCache->AcquireRefLocked(); 155 fBottomCache->SetUserData(previousCache); 156 157 return fBottomCache; 158 } 159 160 void LockAllSourceCaches() 161 { 162 while (LockSourceCache() != NULL) { 163 } 164 } 165 166 void Unlock(VMCache* exceptCache = NULL) 167 { 168 if (fTopCache == NULL) 169 return; 170 171 // Unlock caches in source -> consumer direction. This is important to 172 // avoid double-locking and a reversal of locking order in case a cache 173 // is eligable for merging. 174 VMCache* cache = fBottomCache; 175 while (cache != NULL) { 176 VMCache* nextCache = (VMCache*)cache->UserData(); 177 if (cache != exceptCache) 178 cache->ReleaseRefAndUnlock(cache != fTopCache); 179 180 if (cache == fTopCache) 181 break; 182 183 cache = nextCache; 184 } 185 186 fTopCache = NULL; 187 fBottomCache = NULL; 188 } 189 190 void UnlockKeepRefs(bool keepTopCacheLocked) 191 { 192 if (fTopCache == NULL) 193 return; 194 195 VMCache* nextCache = fBottomCache; 196 VMCache* cache = NULL; 197 198 while (keepTopCacheLocked 199 ? nextCache != fTopCache : cache != fTopCache) { 200 cache = nextCache; 201 nextCache = (VMCache*)cache->UserData(); 202 cache->Unlock(cache != fTopCache); 203 } 204 } 205 206 void RelockCaches(bool topCacheLocked) 207 { 208 if (fTopCache == NULL) 209 return; 210 211 VMCache* nextCache = fTopCache; 212 VMCache* cache = NULL; 213 if (topCacheLocked) { 214 cache = nextCache; 215 nextCache = cache->source; 216 } 217 218 while (cache != fBottomCache && nextCache != NULL) { 219 VMCache* consumer = cache; 220 cache = nextCache; 221 nextCache = cache->source; 222 cache->Lock(); 223 cache->SetUserData(consumer); 224 } 225 } 226 227 private: 228 VMCache* fTopCache; 229 VMCache* fBottomCache; 230 }; 231 232 233 // The memory reserve an allocation of the certain priority must not touch. 234 static const size_t kMemoryReserveForPriority[] = { 235 VM_MEMORY_RESERVE_USER, // user 236 VM_MEMORY_RESERVE_SYSTEM, // system 237 0 // VIP 238 }; 239 240 241 ObjectCache* gPageMappingsObjectCache; 242 243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 244 245 static off_t sAvailableMemory; 246 static off_t sNeededMemory; 247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 248 static uint32 sPageFaults; 249 250 static VMPhysicalPageMapper* sPhysicalPageMapper; 251 252 #if DEBUG_CACHE_LIST 253 254 struct cache_info { 255 VMCache* cache; 256 addr_t page_count; 257 addr_t committed; 258 }; 259 260 static const int kCacheInfoTableCount = 100 * 1024; 261 static cache_info* sCacheInfoTable; 262 263 #endif // DEBUG_CACHE_LIST 264 265 266 // function declarations 267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 268 bool addressSpaceCleanup); 269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 270 bool isWrite, bool isUser, vm_page** wirePage, 271 VMAreaWiredRange* wiredRange = NULL); 272 static status_t map_backing_store(VMAddressSpace* addressSpace, 273 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 274 int protection, int mapping, uint32 flags, 275 const virtual_address_restrictions* addressRestrictions, bool kernel, 276 VMArea** _area, void** _virtualAddress); 277 278 279 // #pragma mark - 280 281 282 #if VM_PAGE_FAULT_TRACING 283 284 namespace VMPageFaultTracing { 285 286 class PageFaultStart : public AbstractTraceEntry { 287 public: 288 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 289 : 290 fAddress(address), 291 fPC(pc), 292 fWrite(write), 293 fUser(user) 294 { 295 Initialized(); 296 } 297 298 virtual void AddDump(TraceOutput& out) 299 { 300 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 301 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 302 } 303 304 private: 305 addr_t fAddress; 306 addr_t fPC; 307 bool fWrite; 308 bool fUser; 309 }; 310 311 312 // page fault errors 313 enum { 314 PAGE_FAULT_ERROR_NO_AREA = 0, 315 PAGE_FAULT_ERROR_KERNEL_ONLY, 316 PAGE_FAULT_ERROR_WRITE_PROTECTED, 317 PAGE_FAULT_ERROR_READ_PROTECTED, 318 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 319 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 320 }; 321 322 323 class PageFaultError : public AbstractTraceEntry { 324 public: 325 PageFaultError(area_id area, status_t error) 326 : 327 fArea(area), 328 fError(error) 329 { 330 Initialized(); 331 } 332 333 virtual void AddDump(TraceOutput& out) 334 { 335 switch (fError) { 336 case PAGE_FAULT_ERROR_NO_AREA: 337 out.Print("page fault error: no area"); 338 break; 339 case PAGE_FAULT_ERROR_KERNEL_ONLY: 340 out.Print("page fault error: area: %ld, kernel only", fArea); 341 break; 342 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 343 out.Print("page fault error: area: %ld, write protected", 344 fArea); 345 break; 346 case PAGE_FAULT_ERROR_READ_PROTECTED: 347 out.Print("page fault error: area: %ld, read protected", fArea); 348 break; 349 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 350 out.Print("page fault error: kernel touching bad user memory"); 351 break; 352 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 353 out.Print("page fault error: no address space"); 354 break; 355 default: 356 out.Print("page fault error: area: %ld, error: %s", fArea, 357 strerror(fError)); 358 break; 359 } 360 } 361 362 private: 363 area_id fArea; 364 status_t fError; 365 }; 366 367 368 class PageFaultDone : public AbstractTraceEntry { 369 public: 370 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 371 vm_page* page) 372 : 373 fArea(area), 374 fTopCache(topCache), 375 fCache(cache), 376 fPage(page) 377 { 378 Initialized(); 379 } 380 381 virtual void AddDump(TraceOutput& out) 382 { 383 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 384 "page: %p", fArea, fTopCache, fCache, fPage); 385 } 386 387 private: 388 area_id fArea; 389 VMCache* fTopCache; 390 VMCache* fCache; 391 vm_page* fPage; 392 }; 393 394 } // namespace VMPageFaultTracing 395 396 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 397 #else 398 # define TPF(x) ; 399 #endif // VM_PAGE_FAULT_TRACING 400 401 402 // #pragma mark - 403 404 405 /*! The page's cache must be locked. 406 */ 407 static inline void 408 increment_page_wired_count(vm_page* page) 409 { 410 if (!page->IsMapped()) 411 atomic_add(&gMappedPagesCount, 1); 412 page->IncrementWiredCount(); 413 } 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 decrement_page_wired_count(vm_page* page) 420 { 421 page->DecrementWiredCount(); 422 if (!page->IsMapped()) 423 atomic_add(&gMappedPagesCount, -1); 424 } 425 426 427 static inline addr_t 428 virtual_page_address(VMArea* area, vm_page* page) 429 { 430 return area->Base() 431 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 432 } 433 434 435 //! You need to have the address space locked when calling this function 436 static VMArea* 437 lookup_area(VMAddressSpace* addressSpace, area_id id) 438 { 439 VMAreaHash::ReadLock(); 440 441 VMArea* area = VMAreaHash::LookupLocked(id); 442 if (area != NULL && area->address_space != addressSpace) 443 area = NULL; 444 445 VMAreaHash::ReadUnlock(); 446 447 return area; 448 } 449 450 451 static status_t 452 allocate_area_page_protections(VMArea* area) 453 { 454 // In the page protections we store only the three user protections, 455 // so we use 4 bits per page. 456 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 457 area->page_protections = (uint8*)malloc_etc(bytes, 458 HEAP_DONT_LOCK_KERNEL_SPACE); 459 if (area->page_protections == NULL) 460 return B_NO_MEMORY; 461 462 // init the page protections for all pages to that of the area 463 uint32 areaProtection = area->protection 464 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 465 memset(area->page_protections, areaProtection | (areaProtection << 4), 466 bytes); 467 return B_OK; 468 } 469 470 471 static inline void 472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 473 { 474 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 475 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 476 uint8& entry = area->page_protections[pageIndex / 2]; 477 if (pageIndex % 2 == 0) 478 entry = (entry & 0xf0) | protection; 479 else 480 entry = (entry & 0x0f) | (protection << 4); 481 } 482 483 484 static inline uint32 485 get_area_page_protection(VMArea* area, addr_t pageAddress) 486 { 487 if (area->page_protections == NULL) 488 return area->protection; 489 490 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 491 uint32 protection = area->page_protections[pageIndex / 2]; 492 if (pageIndex % 2 == 0) 493 protection &= 0x0f; 494 else 495 protection >>= 4; 496 497 // If this is a kernel area we translate the user flags to kernel flags. 498 if (area->address_space == VMAddressSpace::Kernel()) { 499 uint32 kernelProtection = 0; 500 if ((protection & B_READ_AREA) != 0) 501 kernelProtection |= B_KERNEL_READ_AREA; 502 if ((protection & B_WRITE_AREA) != 0) 503 kernelProtection |= B_KERNEL_WRITE_AREA; 504 505 return kernelProtection; 506 } 507 508 return protection | B_KERNEL_READ_AREA 509 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 510 } 511 512 513 /*! The caller must have reserved enough pages the translation map 514 implementation might need to map this page. 515 The page's cache must be locked. 516 */ 517 static status_t 518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 519 vm_page_reservation* reservation) 520 { 521 VMTranslationMap* map = area->address_space->TranslationMap(); 522 523 bool wasMapped = page->IsMapped(); 524 525 if (area->wiring == B_NO_LOCK) { 526 DEBUG_PAGE_ACCESS_CHECK(page); 527 528 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 529 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 530 gPageMappingsObjectCache, 531 CACHE_DONT_WAIT_FOR_MEMORY 532 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 533 if (mapping == NULL) 534 return B_NO_MEMORY; 535 536 mapping->page = page; 537 mapping->area = area; 538 539 map->Lock(); 540 541 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 542 area->MemoryType(), reservation); 543 544 // insert mapping into lists 545 if (!page->IsMapped()) 546 atomic_add(&gMappedPagesCount, 1); 547 548 page->mappings.Add(mapping); 549 area->mappings.Add(mapping); 550 551 map->Unlock(); 552 } else { 553 DEBUG_PAGE_ACCESS_CHECK(page); 554 555 map->Lock(); 556 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 557 area->MemoryType(), reservation); 558 map->Unlock(); 559 560 increment_page_wired_count(page); 561 } 562 563 if (!wasMapped) { 564 // The page is mapped now, so we must not remain in the cached queue. 565 // It also makes sense to move it from the inactive to the active, since 566 // otherwise the page daemon wouldn't come to keep track of it (in idle 567 // mode) -- if the page isn't touched, it will be deactivated after a 568 // full iteration through the queue at the latest. 569 if (page->State() == PAGE_STATE_CACHED 570 || page->State() == PAGE_STATE_INACTIVE) { 571 vm_page_set_state(page, PAGE_STATE_ACTIVE); 572 } 573 } 574 575 return B_OK; 576 } 577 578 579 /*! If \a preserveModified is \c true, the caller must hold the lock of the 580 page's cache. 581 */ 582 static inline bool 583 unmap_page(VMArea* area, addr_t virtualAddress) 584 { 585 return area->address_space->TranslationMap()->UnmapPage(area, 586 virtualAddress, true); 587 } 588 589 590 /*! If \a preserveModified is \c true, the caller must hold the lock of all 591 mapped pages' caches. 592 */ 593 static inline void 594 unmap_pages(VMArea* area, addr_t base, size_t size) 595 { 596 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 597 } 598 599 600 /*! Cuts a piece out of an area. If the given cut range covers the complete 601 area, it is deleted. If it covers the beginning or the end, the area is 602 resized accordingly. If the range covers some part in the middle of the 603 area, it is split in two; in this case the second area is returned via 604 \a _secondArea (the variable is left untouched in the other cases). 605 The address space must be write locked. 606 The caller must ensure that no part of the given range is wired. 607 */ 608 static status_t 609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 610 addr_t lastAddress, VMArea** _secondArea, bool kernel) 611 { 612 // Does the cut range intersect with the area at all? 613 addr_t areaLast = area->Base() + (area->Size() - 1); 614 if (area->Base() > lastAddress || areaLast < address) 615 return B_OK; 616 617 // Is the area fully covered? 618 if (area->Base() >= address && areaLast <= lastAddress) { 619 delete_area(addressSpace, area, false); 620 return B_OK; 621 } 622 623 int priority; 624 uint32 allocationFlags; 625 if (addressSpace == VMAddressSpace::Kernel()) { 626 priority = VM_PRIORITY_SYSTEM; 627 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 628 | HEAP_DONT_LOCK_KERNEL_SPACE; 629 } else { 630 priority = VM_PRIORITY_USER; 631 allocationFlags = 0; 632 } 633 634 VMCache* cache = vm_area_get_locked_cache(area); 635 VMCacheChainLocker cacheChainLocker(cache); 636 cacheChainLocker.LockAllSourceCaches(); 637 638 // Cut the end only? 639 if (areaLast <= lastAddress) { 640 size_t oldSize = area->Size(); 641 size_t newSize = address - area->Base(); 642 643 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 644 allocationFlags); 645 if (error != B_OK) 646 return error; 647 648 // unmap pages 649 unmap_pages(area, address, oldSize - newSize); 650 651 // If no one else uses the area's cache, we can resize it, too. 652 if (cache->areas == area && area->cache_next == NULL 653 && cache->consumers.IsEmpty() 654 && cache->type == CACHE_TYPE_RAM) { 655 // Since VMCache::Resize() can temporarily drop the lock, we must 656 // unlock all lower caches to prevent locking order inversion. 657 cacheChainLocker.Unlock(cache); 658 cache->Resize(cache->virtual_base + newSize, priority); 659 cache->ReleaseRefAndUnlock(); 660 } 661 662 return B_OK; 663 } 664 665 // Cut the beginning only? 666 if (area->Base() >= address) { 667 addr_t oldBase = area->Base(); 668 addr_t newBase = lastAddress + 1; 669 size_t newSize = areaLast - lastAddress; 670 671 // unmap pages 672 unmap_pages(area, oldBase, newBase - oldBase); 673 674 // resize the area 675 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 676 allocationFlags); 677 if (error != B_OK) 678 return error; 679 680 // TODO: If no one else uses the area's cache, we should resize it, too! 681 682 area->cache_offset += newBase - oldBase; 683 684 return B_OK; 685 } 686 687 // The tough part -- cut a piece out of the middle of the area. 688 // We do that by shrinking the area to the begin section and creating a 689 // new area for the end section. 690 691 addr_t firstNewSize = address - area->Base(); 692 addr_t secondBase = lastAddress + 1; 693 addr_t secondSize = areaLast - lastAddress; 694 695 // unmap pages 696 unmap_pages(area, address, area->Size() - firstNewSize); 697 698 // resize the area 699 addr_t oldSize = area->Size(); 700 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 701 allocationFlags); 702 if (error != B_OK) 703 return error; 704 705 // TODO: If no one else uses the area's cache, we might want to create a 706 // new cache for the second area, transfer the concerned pages from the 707 // first cache to it and resize the first cache. 708 709 // map the second area 710 virtual_address_restrictions addressRestrictions = {}; 711 addressRestrictions.address = (void*)secondBase; 712 addressRestrictions.address_specification = B_EXACT_ADDRESS; 713 VMArea* secondArea; 714 error = map_backing_store(addressSpace, cache, 715 area->cache_offset + (secondBase - area->Base()), area->name, 716 secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0, 717 &addressRestrictions, kernel, &secondArea, NULL); 718 if (error != B_OK) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return error; 721 } 722 723 // We need a cache reference for the new area. 724 cache->AcquireRefLocked(); 725 726 if (_secondArea != NULL) 727 *_secondArea = secondArea; 728 729 return B_OK; 730 } 731 732 733 /*! Deletes all areas in the given address range. 734 The address space must be write-locked. 735 The caller must ensure that no part of the given range is wired. 736 */ 737 static status_t 738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 739 bool kernel) 740 { 741 size = PAGE_ALIGN(size); 742 addr_t lastAddress = address + (size - 1); 743 744 // Check, whether the caller is allowed to modify the concerned areas. 745 if (!kernel) { 746 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 747 VMArea* area = it.Next();) { 748 addr_t areaLast = area->Base() + (area->Size() - 1); 749 if (area->Base() < lastAddress && address < areaLast) { 750 if ((area->protection & B_KERNEL_AREA) != 0) 751 return B_NOT_ALLOWED; 752 } 753 } 754 } 755 756 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 757 VMArea* area = it.Next();) { 758 addr_t areaLast = area->Base() + (area->Size() - 1); 759 if (area->Base() < lastAddress && address < areaLast) { 760 status_t error = cut_area(addressSpace, area, address, 761 lastAddress, NULL, kernel); 762 if (error != B_OK) 763 return error; 764 // Failing after already messing with areas is ugly, but we 765 // can't do anything about it. 766 } 767 } 768 769 return B_OK; 770 } 771 772 773 /*! You need to hold the lock of the cache and the write lock of the address 774 space when calling this function. 775 Note, that in case of error your cache will be temporarily unlocked. 776 If \a addressSpec is \c B_EXACT_ADDRESS and the 777 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 778 that no part of the specified address range (base \c *_virtualAddress, size 779 \a size) is wired. 780 */ 781 static status_t 782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 783 const char* areaName, addr_t size, int wiring, int protection, int mapping, 784 uint32 flags, const virtual_address_restrictions* addressRestrictions, 785 bool kernel, VMArea** _area, void** _virtualAddress) 786 { 787 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, " 788 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 789 "'%s'\n", addressSpace, cache, addressRestrictions->address, offset, 790 size, addressRestrictions->address_specification, wiring, protection, 791 _area, areaName)); 792 cache->AssertLocked(); 793 794 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 795 | HEAP_DONT_LOCK_KERNEL_SPACE; 796 int priority; 797 if (addressSpace != VMAddressSpace::Kernel()) { 798 priority = VM_PRIORITY_USER; 799 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 800 priority = VM_PRIORITY_VIP; 801 allocationFlags |= HEAP_PRIORITY_VIP; 802 } else 803 priority = VM_PRIORITY_SYSTEM; 804 805 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 806 allocationFlags); 807 if (area == NULL) 808 return B_NO_MEMORY; 809 810 status_t status; 811 812 // if this is a private map, we need to create a new cache 813 // to handle the private copies of pages as they are written to 814 VMCache* sourceCache = cache; 815 if (mapping == REGION_PRIVATE_MAP) { 816 VMCache* newCache; 817 818 // create an anonymous cache 819 bool isStack = (protection & B_STACK_AREA) != 0; 820 status = VMCacheFactory::CreateAnonymousCache(newCache, 821 isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 822 isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER); 823 if (status != B_OK) 824 goto err1; 825 826 newCache->Lock(); 827 newCache->temporary = 1; 828 newCache->virtual_base = offset; 829 newCache->virtual_end = offset + size; 830 831 cache->AddConsumer(newCache); 832 833 cache = newCache; 834 } 835 836 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 837 status = cache->SetMinimalCommitment(size, priority); 838 if (status != B_OK) 839 goto err2; 840 } 841 842 // check to see if this address space has entered DELETE state 843 if (addressSpace->IsBeingDeleted()) { 844 // okay, someone is trying to delete this address space now, so we can't 845 // insert the area, so back out 846 status = B_BAD_TEAM_ID; 847 goto err2; 848 } 849 850 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 851 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 852 status = unmap_address_range(addressSpace, 853 (addr_t)addressRestrictions->address, size, kernel); 854 if (status != B_OK) 855 goto err2; 856 } 857 858 status = addressSpace->InsertArea(area, size, addressRestrictions, 859 allocationFlags, _virtualAddress); 860 if (status != B_OK) { 861 // TODO: wait and try again once this is working in the backend 862 #if 0 863 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 864 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 865 0, 0); 866 } 867 #endif 868 goto err2; 869 } 870 871 // attach the cache to the area 872 area->cache = cache; 873 area->cache_offset = offset; 874 875 // point the cache back to the area 876 cache->InsertAreaLocked(area); 877 if (mapping == REGION_PRIVATE_MAP) 878 cache->Unlock(); 879 880 // insert the area in the global area hash table 881 VMAreaHash::Insert(area); 882 883 // grab a ref to the address space (the area holds this) 884 addressSpace->Get(); 885 886 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 887 // cache, sourceCache, areaName, area); 888 889 *_area = area; 890 return B_OK; 891 892 err2: 893 if (mapping == REGION_PRIVATE_MAP) { 894 // We created this cache, so we must delete it again. Note, that we 895 // need to temporarily unlock the source cache or we'll otherwise 896 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 897 sourceCache->Unlock(); 898 cache->ReleaseRefAndUnlock(); 899 sourceCache->Lock(); 900 } 901 err1: 902 addressSpace->DeleteArea(area, allocationFlags); 903 return status; 904 } 905 906 907 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 908 locker1, locker2). 909 */ 910 template<typename LockerType1, typename LockerType2> 911 static inline bool 912 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 913 { 914 area->cache->AssertLocked(); 915 916 VMAreaUnwiredWaiter waiter; 917 if (!area->AddWaiterIfWired(&waiter)) 918 return false; 919 920 // unlock everything and wait 921 if (locker1 != NULL) 922 locker1->Unlock(); 923 if (locker2 != NULL) 924 locker2->Unlock(); 925 926 waiter.waitEntry.Wait(); 927 928 return true; 929 } 930 931 932 /*! Checks whether the given area has any wired ranges intersecting with the 933 specified range and waits, if so. 934 935 When it has to wait, the function calls \c Unlock() on both \a locker1 936 and \a locker2, if given. 937 The area's top cache must be locked and must be unlocked as a side effect 938 of calling \c Unlock() on either \a locker1 or \a locker2. 939 940 If the function does not have to wait it does not modify or unlock any 941 object. 942 943 \param area The area to be checked. 944 \param base The base address of the range to check. 945 \param size The size of the address range to check. 946 \param locker1 An object to be unlocked when before starting to wait (may 947 be \c NULL). 948 \param locker2 An object to be unlocked when before starting to wait (may 949 be \c NULL). 950 \return \c true, if the function had to wait, \c false otherwise. 951 */ 952 template<typename LockerType1, typename LockerType2> 953 static inline bool 954 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 955 LockerType1* locker1, LockerType2* locker2) 956 { 957 area->cache->AssertLocked(); 958 959 VMAreaUnwiredWaiter waiter; 960 if (!area->AddWaiterIfWired(&waiter, base, size)) 961 return false; 962 963 // unlock everything and wait 964 if (locker1 != NULL) 965 locker1->Unlock(); 966 if (locker2 != NULL) 967 locker2->Unlock(); 968 969 waiter.waitEntry.Wait(); 970 971 return true; 972 } 973 974 975 /*! Checks whether the given address space has any wired ranges intersecting 976 with the specified range and waits, if so. 977 978 Similar to wait_if_area_range_is_wired(), with the following differences: 979 - All areas intersecting with the range are checked (respectively all until 980 one is found that contains a wired range intersecting with the given 981 range). 982 - The given address space must at least be read-locked and must be unlocked 983 when \c Unlock() is called on \a locker. 984 - None of the areas' caches are allowed to be locked. 985 */ 986 template<typename LockerType> 987 static inline bool 988 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 989 size_t size, LockerType* locker) 990 { 991 addr_t end = base + size - 1; 992 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 993 VMArea* area = it.Next();) { 994 // TODO: Introduce a VMAddressSpace method to get a close iterator! 995 if (area->Base() > end) 996 return false; 997 998 if (base >= area->Base() + area->Size() - 1) 999 continue; 1000 1001 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1002 1003 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1004 return true; 1005 } 1006 1007 return false; 1008 } 1009 1010 1011 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1012 It must be called in a situation where the kernel address space may be 1013 locked. 1014 */ 1015 status_t 1016 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1017 { 1018 AddressSpaceReadLocker locker; 1019 VMArea* area; 1020 status_t status = locker.SetFromArea(id, area); 1021 if (status != B_OK) 1022 return status; 1023 1024 if (area->page_protections == NULL) { 1025 status = allocate_area_page_protections(area); 1026 if (status != B_OK) 1027 return status; 1028 } 1029 1030 *cookie = (void*)area; 1031 return B_OK; 1032 } 1033 1034 1035 /*! This is a debug helper function that can only be used with very specific 1036 use cases. 1037 Sets protection for the given address range to the protection specified. 1038 If \a protection is 0 then the involved pages will be marked non-present 1039 in the translation map to cause a fault on access. The pages aren't 1040 actually unmapped however so that they can be marked present again with 1041 additional calls to this function. For this to work the area must be 1042 fully locked in memory so that the pages aren't otherwise touched. 1043 This function does not lock the kernel address space and needs to be 1044 supplied with a \a cookie retrieved from a successful call to 1045 vm_prepare_kernel_area_debug_protection(). 1046 */ 1047 status_t 1048 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1049 uint32 protection) 1050 { 1051 // check address range 1052 addr_t address = (addr_t)_address; 1053 size = PAGE_ALIGN(size); 1054 1055 if ((address % B_PAGE_SIZE) != 0 1056 || (addr_t)address + size < (addr_t)address 1057 || !IS_KERNEL_ADDRESS(address) 1058 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1059 return B_BAD_VALUE; 1060 } 1061 1062 // Translate the kernel protection to user protection as we only store that. 1063 if ((protection & B_KERNEL_READ_AREA) != 0) 1064 protection |= B_READ_AREA; 1065 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1066 protection |= B_WRITE_AREA; 1067 1068 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1069 VMTranslationMap* map = addressSpace->TranslationMap(); 1070 VMArea* area = (VMArea*)cookie; 1071 1072 addr_t offset = address - area->Base(); 1073 if (area->Size() - offset < size) { 1074 panic("protect range not fully within supplied area"); 1075 return B_BAD_VALUE; 1076 } 1077 1078 if (area->page_protections == NULL) { 1079 panic("area has no page protections"); 1080 return B_BAD_VALUE; 1081 } 1082 1083 // Invalidate the mapping entries so any access to them will fault or 1084 // restore the mapping entries unchanged so that lookup will success again. 1085 map->Lock(); 1086 map->DebugMarkRangePresent(address, address + size, protection != 0); 1087 map->Unlock(); 1088 1089 // And set the proper page protections so that the fault case will actually 1090 // fail and not simply try to map a new page. 1091 for (addr_t pageAddress = address; pageAddress < address + size; 1092 pageAddress += B_PAGE_SIZE) { 1093 set_area_page_protection(area, pageAddress, protection); 1094 } 1095 1096 return B_OK; 1097 } 1098 1099 1100 status_t 1101 vm_block_address_range(const char* name, void* address, addr_t size) 1102 { 1103 if (!arch_vm_supports_protection(0)) 1104 return B_NOT_SUPPORTED; 1105 1106 AddressSpaceWriteLocker locker; 1107 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1108 if (status != B_OK) 1109 return status; 1110 1111 VMAddressSpace* addressSpace = locker.AddressSpace(); 1112 1113 // create an anonymous cache 1114 VMCache* cache; 1115 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1116 VM_PRIORITY_SYSTEM); 1117 if (status != B_OK) 1118 return status; 1119 1120 cache->temporary = 1; 1121 cache->virtual_end = size; 1122 cache->Lock(); 1123 1124 VMArea* area; 1125 virtual_address_restrictions addressRestrictions = {}; 1126 addressRestrictions.address = address; 1127 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1128 status = map_backing_store(addressSpace, cache, 0, name, size, 1129 B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0, 1130 &addressRestrictions, true, &area, NULL); 1131 if (status != B_OK) { 1132 cache->ReleaseRefAndUnlock(); 1133 return status; 1134 } 1135 1136 cache->Unlock(); 1137 area->cache_type = CACHE_TYPE_RAM; 1138 return area->id; 1139 } 1140 1141 1142 status_t 1143 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1144 { 1145 AddressSpaceWriteLocker locker(team); 1146 if (!locker.IsLocked()) 1147 return B_BAD_TEAM_ID; 1148 1149 VMAddressSpace* addressSpace = locker.AddressSpace(); 1150 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1151 addressSpace == VMAddressSpace::Kernel() 1152 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1153 } 1154 1155 1156 status_t 1157 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1158 addr_t size, uint32 flags) 1159 { 1160 if (size == 0) 1161 return B_BAD_VALUE; 1162 1163 AddressSpaceWriteLocker locker(team); 1164 if (!locker.IsLocked()) 1165 return B_BAD_TEAM_ID; 1166 1167 virtual_address_restrictions addressRestrictions = {}; 1168 addressRestrictions.address = *_address; 1169 addressRestrictions.address_specification = addressSpec; 1170 VMAddressSpace* addressSpace = locker.AddressSpace(); 1171 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1172 addressSpace == VMAddressSpace::Kernel() 1173 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1174 _address); 1175 } 1176 1177 1178 area_id 1179 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1180 uint32 wiring, uint32 protection, uint32 flags, 1181 const virtual_address_restrictions* virtualAddressRestrictions, 1182 const physical_address_restrictions* physicalAddressRestrictions, 1183 bool kernel, void** _address) 1184 { 1185 VMArea* area; 1186 VMCache* cache; 1187 vm_page* page = NULL; 1188 bool isStack = (protection & B_STACK_AREA) != 0; 1189 page_num_t guardPages; 1190 bool canOvercommit = false; 1191 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1192 ? VM_PAGE_ALLOC_CLEAR : 0; 1193 1194 TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size)); 1195 1196 size = PAGE_ALIGN(size); 1197 1198 if (size == 0) 1199 return B_BAD_VALUE; 1200 if (!arch_vm_supports_protection(protection)) 1201 return B_NOT_SUPPORTED; 1202 1203 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1204 canOvercommit = true; 1205 1206 #ifdef DEBUG_KERNEL_STACKS 1207 if ((protection & B_KERNEL_STACK_AREA) != 0) 1208 isStack = true; 1209 #endif 1210 1211 // check parameters 1212 switch (virtualAddressRestrictions->address_specification) { 1213 case B_ANY_ADDRESS: 1214 case B_EXACT_ADDRESS: 1215 case B_BASE_ADDRESS: 1216 case B_ANY_KERNEL_ADDRESS: 1217 case B_ANY_KERNEL_BLOCK_ADDRESS: 1218 break; 1219 1220 default: 1221 return B_BAD_VALUE; 1222 } 1223 1224 // If low or high physical address restrictions are given, we force 1225 // B_CONTIGUOUS wiring, since only then we'll use 1226 // vm_page_allocate_page_run() which deals with those restrictions. 1227 if (physicalAddressRestrictions->low_address != 0 1228 || physicalAddressRestrictions->high_address != 0) { 1229 wiring = B_CONTIGUOUS; 1230 } 1231 1232 physical_address_restrictions stackPhysicalRestrictions; 1233 bool doReserveMemory = false; 1234 switch (wiring) { 1235 case B_NO_LOCK: 1236 break; 1237 case B_FULL_LOCK: 1238 case B_LAZY_LOCK: 1239 case B_CONTIGUOUS: 1240 doReserveMemory = true; 1241 break; 1242 case B_ALREADY_WIRED: 1243 break; 1244 case B_LOMEM: 1245 stackPhysicalRestrictions = *physicalAddressRestrictions; 1246 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1247 physicalAddressRestrictions = &stackPhysicalRestrictions; 1248 wiring = B_CONTIGUOUS; 1249 doReserveMemory = true; 1250 break; 1251 case B_32_BIT_FULL_LOCK: 1252 if (B_HAIKU_PHYSICAL_BITS <= 32 1253 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1254 wiring = B_FULL_LOCK; 1255 doReserveMemory = true; 1256 break; 1257 } 1258 // TODO: We don't really support this mode efficiently. Just fall 1259 // through for now ... 1260 case B_32_BIT_CONTIGUOUS: 1261 #if B_HAIKU_PHYSICAL_BITS > 32 1262 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1263 stackPhysicalRestrictions = *physicalAddressRestrictions; 1264 stackPhysicalRestrictions.high_address 1265 = (phys_addr_t)1 << 32; 1266 physicalAddressRestrictions = &stackPhysicalRestrictions; 1267 } 1268 #endif 1269 wiring = B_CONTIGUOUS; 1270 doReserveMemory = true; 1271 break; 1272 default: 1273 return B_BAD_VALUE; 1274 } 1275 1276 // Optimization: For a single-page contiguous allocation without low/high 1277 // memory restriction B_FULL_LOCK wiring suffices. 1278 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1279 && physicalAddressRestrictions->low_address == 0 1280 && physicalAddressRestrictions->high_address == 0) { 1281 wiring = B_FULL_LOCK; 1282 } 1283 1284 // For full lock or contiguous areas we're also going to map the pages and 1285 // thus need to reserve pages for the mapping backend upfront. 1286 addr_t reservedMapPages = 0; 1287 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1288 AddressSpaceWriteLocker locker; 1289 status_t status = locker.SetTo(team); 1290 if (status != B_OK) 1291 return status; 1292 1293 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1294 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1295 } 1296 1297 int priority; 1298 if (team != VMAddressSpace::KernelID()) 1299 priority = VM_PRIORITY_USER; 1300 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1301 priority = VM_PRIORITY_VIP; 1302 else 1303 priority = VM_PRIORITY_SYSTEM; 1304 1305 // Reserve memory before acquiring the address space lock. This reduces the 1306 // chances of failure, since while holding the write lock to the address 1307 // space (if it is the kernel address space that is), the low memory handler 1308 // won't be able to free anything for us. 1309 addr_t reservedMemory = 0; 1310 if (doReserveMemory) { 1311 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1312 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1313 return B_NO_MEMORY; 1314 reservedMemory = size; 1315 // TODO: We don't reserve the memory for the pages for the page 1316 // directories/tables. We actually need to do since we currently don't 1317 // reclaim them (and probably can't reclaim all of them anyway). Thus 1318 // there are actually less physical pages than there should be, which 1319 // can get the VM into trouble in low memory situations. 1320 } 1321 1322 AddressSpaceWriteLocker locker; 1323 VMAddressSpace* addressSpace; 1324 status_t status; 1325 1326 // For full lock areas reserve the pages before locking the address 1327 // space. E.g. block caches can't release their memory while we hold the 1328 // address space lock. 1329 page_num_t reservedPages = reservedMapPages; 1330 if (wiring == B_FULL_LOCK) 1331 reservedPages += size / B_PAGE_SIZE; 1332 1333 vm_page_reservation reservation; 1334 if (reservedPages > 0) { 1335 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1336 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1337 priority)) { 1338 reservedPages = 0; 1339 status = B_WOULD_BLOCK; 1340 goto err0; 1341 } 1342 } else 1343 vm_page_reserve_pages(&reservation, reservedPages, priority); 1344 } 1345 1346 if (wiring == B_CONTIGUOUS) { 1347 // we try to allocate the page run here upfront as this may easily 1348 // fail for obvious reasons 1349 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1350 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1351 if (page == NULL) { 1352 status = B_NO_MEMORY; 1353 goto err0; 1354 } 1355 } 1356 1357 // Lock the address space and, if B_EXACT_ADDRESS and 1358 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1359 // is not wired. 1360 do { 1361 status = locker.SetTo(team); 1362 if (status != B_OK) 1363 goto err1; 1364 1365 addressSpace = locker.AddressSpace(); 1366 } while (virtualAddressRestrictions->address_specification 1367 == B_EXACT_ADDRESS 1368 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1369 && wait_if_address_range_is_wired(addressSpace, 1370 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1371 1372 // create an anonymous cache 1373 // if it's a stack, make sure that two pages are available at least 1374 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1375 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1376 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1377 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1378 wiring == B_NO_LOCK, priority); 1379 if (status != B_OK) 1380 goto err1; 1381 1382 cache->temporary = 1; 1383 cache->virtual_end = size; 1384 cache->committed_size = reservedMemory; 1385 // TODO: This should be done via a method. 1386 reservedMemory = 0; 1387 1388 cache->Lock(); 1389 1390 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1391 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions, 1392 kernel, &area, _address); 1393 1394 if (status != B_OK) { 1395 cache->ReleaseRefAndUnlock(); 1396 goto err1; 1397 } 1398 1399 locker.DegradeToReadLock(); 1400 1401 switch (wiring) { 1402 case B_NO_LOCK: 1403 case B_LAZY_LOCK: 1404 // do nothing - the pages are mapped in as needed 1405 break; 1406 1407 case B_FULL_LOCK: 1408 { 1409 // Allocate and map all pages for this area 1410 1411 off_t offset = 0; 1412 for (addr_t address = area->Base(); 1413 address < area->Base() + (area->Size() - 1); 1414 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1415 #ifdef DEBUG_KERNEL_STACKS 1416 # ifdef STACK_GROWS_DOWNWARDS 1417 if (isStack && address < area->Base() 1418 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1419 # else 1420 if (isStack && address >= area->Base() + area->Size() 1421 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1422 # endif 1423 continue; 1424 #endif 1425 vm_page* page = vm_page_allocate_page(&reservation, 1426 PAGE_STATE_WIRED | pageAllocFlags); 1427 cache->InsertPage(page, offset); 1428 map_page(area, page, address, protection, &reservation); 1429 1430 DEBUG_PAGE_ACCESS_END(page); 1431 } 1432 1433 break; 1434 } 1435 1436 case B_ALREADY_WIRED: 1437 { 1438 // The pages should already be mapped. This is only really useful 1439 // during boot time. Find the appropriate vm_page objects and stick 1440 // them in the cache object. 1441 VMTranslationMap* map = addressSpace->TranslationMap(); 1442 off_t offset = 0; 1443 1444 if (!gKernelStartup) 1445 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1446 1447 map->Lock(); 1448 1449 for (addr_t virtualAddress = area->Base(); 1450 virtualAddress < area->Base() + (area->Size() - 1); 1451 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1452 phys_addr_t physicalAddress; 1453 uint32 flags; 1454 status = map->Query(virtualAddress, &physicalAddress, &flags); 1455 if (status < B_OK) { 1456 panic("looking up mapping failed for va 0x%lx\n", 1457 virtualAddress); 1458 } 1459 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1460 if (page == NULL) { 1461 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1462 "\n", physicalAddress); 1463 } 1464 1465 DEBUG_PAGE_ACCESS_START(page); 1466 1467 cache->InsertPage(page, offset); 1468 increment_page_wired_count(page); 1469 vm_page_set_state(page, PAGE_STATE_WIRED); 1470 page->busy = false; 1471 1472 DEBUG_PAGE_ACCESS_END(page); 1473 } 1474 1475 map->Unlock(); 1476 break; 1477 } 1478 1479 case B_CONTIGUOUS: 1480 { 1481 // We have already allocated our continuous pages run, so we can now 1482 // just map them in the address space 1483 VMTranslationMap* map = addressSpace->TranslationMap(); 1484 phys_addr_t physicalAddress 1485 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1486 addr_t virtualAddress = area->Base(); 1487 off_t offset = 0; 1488 1489 map->Lock(); 1490 1491 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1492 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1493 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1494 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1495 if (page == NULL) 1496 panic("couldn't lookup physical page just allocated\n"); 1497 1498 status = map->Map(virtualAddress, physicalAddress, protection, 1499 area->MemoryType(), &reservation); 1500 if (status < B_OK) 1501 panic("couldn't map physical page in page run\n"); 1502 1503 cache->InsertPage(page, offset); 1504 increment_page_wired_count(page); 1505 1506 DEBUG_PAGE_ACCESS_END(page); 1507 } 1508 1509 map->Unlock(); 1510 break; 1511 } 1512 1513 default: 1514 break; 1515 } 1516 1517 cache->Unlock(); 1518 1519 if (reservedPages > 0) 1520 vm_page_unreserve_pages(&reservation); 1521 1522 TRACE(("vm_create_anonymous_area: done\n")); 1523 1524 area->cache_type = CACHE_TYPE_RAM; 1525 return area->id; 1526 1527 err1: 1528 if (wiring == B_CONTIGUOUS) { 1529 // we had reserved the area space upfront... 1530 phys_addr_t pageNumber = page->physical_page_number; 1531 int32 i; 1532 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1533 page = vm_lookup_page(pageNumber); 1534 if (page == NULL) 1535 panic("couldn't lookup physical page just allocated\n"); 1536 1537 vm_page_set_state(page, PAGE_STATE_FREE); 1538 } 1539 } 1540 1541 err0: 1542 if (reservedPages > 0) 1543 vm_page_unreserve_pages(&reservation); 1544 if (reservedMemory > 0) 1545 vm_unreserve_memory(reservedMemory); 1546 1547 return status; 1548 } 1549 1550 1551 area_id 1552 vm_map_physical_memory(team_id team, const char* name, void** _address, 1553 uint32 addressSpec, addr_t size, uint32 protection, 1554 phys_addr_t physicalAddress, bool alreadyWired) 1555 { 1556 VMArea* area; 1557 VMCache* cache; 1558 addr_t mapOffset; 1559 1560 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 1561 "spec = %ld, size = %lu, protection = %ld, phys = %#" B_PRIxPHYSADDR 1562 ")\n", team, name, *_address, addressSpec, size, protection, 1563 physicalAddress)); 1564 1565 if (!arch_vm_supports_protection(protection)) 1566 return B_NOT_SUPPORTED; 1567 1568 AddressSpaceWriteLocker locker(team); 1569 if (!locker.IsLocked()) 1570 return B_BAD_TEAM_ID; 1571 1572 // if the physical address is somewhat inside a page, 1573 // move the actual area down to align on a page boundary 1574 mapOffset = physicalAddress % B_PAGE_SIZE; 1575 size += mapOffset; 1576 physicalAddress -= mapOffset; 1577 1578 size = PAGE_ALIGN(size); 1579 1580 // create a device cache 1581 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1582 if (status != B_OK) 1583 return status; 1584 1585 cache->virtual_end = size; 1586 1587 cache->Lock(); 1588 1589 virtual_address_restrictions addressRestrictions = {}; 1590 addressRestrictions.address = *_address; 1591 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1592 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1593 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1594 true, &area, _address); 1595 1596 if (status < B_OK) 1597 cache->ReleaseRefLocked(); 1598 1599 cache->Unlock(); 1600 1601 if (status == B_OK) { 1602 // set requested memory type -- use uncached, if not given 1603 uint32 memoryType = addressSpec & B_MTR_MASK; 1604 if (memoryType == 0) 1605 memoryType = B_MTR_UC; 1606 1607 area->SetMemoryType(memoryType); 1608 1609 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1610 if (status != B_OK) 1611 delete_area(locker.AddressSpace(), area, false); 1612 } 1613 1614 if (status != B_OK) 1615 return status; 1616 1617 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1618 1619 if (alreadyWired) { 1620 // The area is already mapped, but possibly not with the right 1621 // memory type. 1622 map->Lock(); 1623 map->ProtectArea(area, area->protection); 1624 map->Unlock(); 1625 } else { 1626 // Map the area completely. 1627 1628 // reserve pages needed for the mapping 1629 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1630 area->Base() + (size - 1)); 1631 vm_page_reservation reservation; 1632 vm_page_reserve_pages(&reservation, reservePages, 1633 team == VMAddressSpace::KernelID() 1634 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1635 1636 map->Lock(); 1637 1638 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1639 map->Map(area->Base() + offset, physicalAddress + offset, 1640 protection, area->MemoryType(), &reservation); 1641 } 1642 1643 map->Unlock(); 1644 1645 vm_page_unreserve_pages(&reservation); 1646 } 1647 1648 // modify the pointer returned to be offset back into the new area 1649 // the same way the physical address in was offset 1650 *_address = (void*)((addr_t)*_address + mapOffset); 1651 1652 area->cache_type = CACHE_TYPE_DEVICE; 1653 return area->id; 1654 } 1655 1656 1657 /*! Don't use! 1658 TODO: This function was introduced to map physical page vecs to 1659 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1660 use a device cache and does not track vm_page::wired_count! 1661 */ 1662 area_id 1663 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1664 uint32 addressSpec, addr_t* _size, uint32 protection, 1665 struct generic_io_vec* vecs, uint32 vecCount) 1666 { 1667 TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, " 1668 "spec = %ld, _size = %p, protection = %ld, vecs = %p, " 1669 "vecCount = %ld)\n", team, name, *_address, addressSpec, _size, 1670 protection, vecs, vecCount)); 1671 1672 if (!arch_vm_supports_protection(protection) 1673 || (addressSpec & B_MTR_MASK) != 0) { 1674 return B_NOT_SUPPORTED; 1675 } 1676 1677 AddressSpaceWriteLocker locker(team); 1678 if (!locker.IsLocked()) 1679 return B_BAD_TEAM_ID; 1680 1681 if (vecCount == 0) 1682 return B_BAD_VALUE; 1683 1684 addr_t size = 0; 1685 for (uint32 i = 0; i < vecCount; i++) { 1686 if (vecs[i].base % B_PAGE_SIZE != 0 1687 || vecs[i].length % B_PAGE_SIZE != 0) { 1688 return B_BAD_VALUE; 1689 } 1690 1691 size += vecs[i].length; 1692 } 1693 1694 // create a device cache 1695 VMCache* cache; 1696 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1697 if (result != B_OK) 1698 return result; 1699 1700 cache->virtual_end = size; 1701 1702 cache->Lock(); 1703 1704 VMArea* area; 1705 virtual_address_restrictions addressRestrictions = {}; 1706 addressRestrictions.address = *_address; 1707 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1708 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1709 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, 1710 &addressRestrictions, true, &area, _address); 1711 1712 if (result != B_OK) 1713 cache->ReleaseRefLocked(); 1714 1715 cache->Unlock(); 1716 1717 if (result != B_OK) 1718 return result; 1719 1720 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1721 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1722 area->Base() + (size - 1)); 1723 1724 vm_page_reservation reservation; 1725 vm_page_reserve_pages(&reservation, reservePages, 1726 team == VMAddressSpace::KernelID() 1727 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1728 map->Lock(); 1729 1730 uint32 vecIndex = 0; 1731 size_t vecOffset = 0; 1732 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1733 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1734 vecOffset = 0; 1735 vecIndex++; 1736 } 1737 1738 if (vecIndex >= vecCount) 1739 break; 1740 1741 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1742 protection, area->MemoryType(), &reservation); 1743 1744 vecOffset += B_PAGE_SIZE; 1745 } 1746 1747 map->Unlock(); 1748 vm_page_unreserve_pages(&reservation); 1749 1750 if (_size != NULL) 1751 *_size = size; 1752 1753 area->cache_type = CACHE_TYPE_DEVICE; 1754 return area->id; 1755 } 1756 1757 1758 area_id 1759 vm_create_null_area(team_id team, const char* name, void** address, 1760 uint32 addressSpec, addr_t size, uint32 flags) 1761 { 1762 size = PAGE_ALIGN(size); 1763 1764 // Lock the address space and, if B_EXACT_ADDRESS and 1765 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1766 // is not wired. 1767 AddressSpaceWriteLocker locker; 1768 do { 1769 if (locker.SetTo(team) != B_OK) 1770 return B_BAD_TEAM_ID; 1771 } while (addressSpec == B_EXACT_ADDRESS 1772 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1773 && wait_if_address_range_is_wired(locker.AddressSpace(), 1774 (addr_t)*address, size, &locker)); 1775 1776 // create a null cache 1777 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1778 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1779 VMCache* cache; 1780 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1781 if (status != B_OK) 1782 return status; 1783 1784 cache->temporary = 1; 1785 cache->virtual_end = size; 1786 1787 cache->Lock(); 1788 1789 VMArea* area; 1790 virtual_address_restrictions addressRestrictions = {}; 1791 addressRestrictions.address = *address; 1792 addressRestrictions.address_specification = addressSpec; 1793 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1794 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags, 1795 &addressRestrictions, true, &area, address); 1796 1797 if (status < B_OK) { 1798 cache->ReleaseRefAndUnlock(); 1799 return status; 1800 } 1801 1802 cache->Unlock(); 1803 1804 area->cache_type = CACHE_TYPE_NULL; 1805 return area->id; 1806 } 1807 1808 1809 /*! Creates the vnode cache for the specified \a vnode. 1810 The vnode has to be marked busy when calling this function. 1811 */ 1812 status_t 1813 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1814 { 1815 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1816 } 1817 1818 1819 /*! \a cache must be locked. The area's address space must be read-locked. 1820 */ 1821 static void 1822 pre_map_area_pages(VMArea* area, VMCache* cache, 1823 vm_page_reservation* reservation) 1824 { 1825 addr_t baseAddress = area->Base(); 1826 addr_t cacheOffset = area->cache_offset; 1827 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1828 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1829 1830 for (VMCachePagesTree::Iterator it 1831 = cache->pages.GetIterator(firstPage, true, true); 1832 vm_page* page = it.Next();) { 1833 if (page->cache_offset >= endPage) 1834 break; 1835 1836 // skip busy and inactive pages 1837 if (page->busy || page->usage_count == 0) 1838 continue; 1839 1840 DEBUG_PAGE_ACCESS_START(page); 1841 map_page(area, page, 1842 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1843 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1844 DEBUG_PAGE_ACCESS_END(page); 1845 } 1846 } 1847 1848 1849 /*! Will map the file specified by \a fd to an area in memory. 1850 The file will be mirrored beginning at the specified \a offset. The 1851 \a offset and \a size arguments have to be page aligned. 1852 */ 1853 static area_id 1854 _vm_map_file(team_id team, const char* name, void** _address, 1855 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1856 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1857 { 1858 // TODO: for binary files, we want to make sure that they get the 1859 // copy of a file at a given time, ie. later changes should not 1860 // make it into the mapped copy -- this will need quite some changes 1861 // to be done in a nice way 1862 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 1863 fd, offset, size, mapping)); 1864 1865 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1866 size = PAGE_ALIGN(size); 1867 1868 if (mapping == REGION_NO_PRIVATE_MAP) 1869 protection |= B_SHARED_AREA; 1870 if (addressSpec != B_EXACT_ADDRESS) 1871 unmapAddressRange = false; 1872 1873 if (fd < 0) { 1874 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1875 virtual_address_restrictions virtualRestrictions = {}; 1876 virtualRestrictions.address = *_address; 1877 virtualRestrictions.address_specification = addressSpec; 1878 physical_address_restrictions physicalRestrictions = {}; 1879 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 1880 flags, &virtualRestrictions, &physicalRestrictions, kernel, 1881 _address); 1882 } 1883 1884 // get the open flags of the FD 1885 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1886 if (descriptor == NULL) 1887 return EBADF; 1888 int32 openMode = descriptor->open_mode; 1889 put_fd(descriptor); 1890 1891 // The FD must open for reading at any rate. For shared mapping with write 1892 // access, additionally the FD must be open for writing. 1893 if ((openMode & O_ACCMODE) == O_WRONLY 1894 || (mapping == REGION_NO_PRIVATE_MAP 1895 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1896 && (openMode & O_ACCMODE) == O_RDONLY)) { 1897 return EACCES; 1898 } 1899 1900 // get the vnode for the object, this also grabs a ref to it 1901 struct vnode* vnode = NULL; 1902 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1903 if (status < B_OK) 1904 return status; 1905 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1906 1907 // If we're going to pre-map pages, we need to reserve the pages needed by 1908 // the mapping backend upfront. 1909 page_num_t reservedPreMapPages = 0; 1910 vm_page_reservation reservation; 1911 if ((protection & B_READ_AREA) != 0) { 1912 AddressSpaceWriteLocker locker; 1913 status = locker.SetTo(team); 1914 if (status != B_OK) 1915 return status; 1916 1917 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1918 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1919 1920 locker.Unlock(); 1921 1922 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1923 team == VMAddressSpace::KernelID() 1924 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1925 } 1926 1927 struct PageUnreserver { 1928 PageUnreserver(vm_page_reservation* reservation) 1929 : 1930 fReservation(reservation) 1931 { 1932 } 1933 1934 ~PageUnreserver() 1935 { 1936 if (fReservation != NULL) 1937 vm_page_unreserve_pages(fReservation); 1938 } 1939 1940 vm_page_reservation* fReservation; 1941 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1942 1943 // Lock the address space and, if the specified address range shall be 1944 // unmapped, ensure it is not wired. 1945 AddressSpaceWriteLocker locker; 1946 do { 1947 if (locker.SetTo(team) != B_OK) 1948 return B_BAD_TEAM_ID; 1949 } while (unmapAddressRange 1950 && wait_if_address_range_is_wired(locker.AddressSpace(), 1951 (addr_t)*_address, size, &locker)); 1952 1953 // TODO: this only works for file systems that use the file cache 1954 VMCache* cache; 1955 status = vfs_get_vnode_cache(vnode, &cache, false); 1956 if (status < B_OK) 1957 return status; 1958 1959 cache->Lock(); 1960 1961 VMArea* area; 1962 virtual_address_restrictions addressRestrictions = {}; 1963 addressRestrictions.address = *_address; 1964 addressRestrictions.address_specification = addressSpec; 1965 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 1966 0, protection, mapping, 1967 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 1968 &addressRestrictions, kernel, &area, _address); 1969 1970 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1971 // map_backing_store() cannot know we no longer need the ref 1972 cache->ReleaseRefLocked(); 1973 } 1974 1975 if (status == B_OK && (protection & B_READ_AREA) != 0) 1976 pre_map_area_pages(area, cache, &reservation); 1977 1978 cache->Unlock(); 1979 1980 if (status == B_OK) { 1981 // TODO: this probably deserves a smarter solution, ie. don't always 1982 // prefetch stuff, and also, probably don't trigger it at this place. 1983 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1984 // prefetches at max 10 MB starting from "offset" 1985 } 1986 1987 if (status != B_OK) 1988 return status; 1989 1990 area->cache_type = CACHE_TYPE_VNODE; 1991 return area->id; 1992 } 1993 1994 1995 area_id 1996 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 1997 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 1998 int fd, off_t offset) 1999 { 2000 if (!arch_vm_supports_protection(protection)) 2001 return B_NOT_SUPPORTED; 2002 2003 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2004 mapping, unmapAddressRange, fd, offset, true); 2005 } 2006 2007 2008 VMCache* 2009 vm_area_get_locked_cache(VMArea* area) 2010 { 2011 rw_lock_read_lock(&sAreaCacheLock); 2012 2013 while (true) { 2014 VMCache* cache = area->cache; 2015 2016 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2017 // cache has been deleted 2018 rw_lock_read_lock(&sAreaCacheLock); 2019 continue; 2020 } 2021 2022 rw_lock_read_lock(&sAreaCacheLock); 2023 2024 if (cache == area->cache) { 2025 cache->AcquireRefLocked(); 2026 rw_lock_read_unlock(&sAreaCacheLock); 2027 return cache; 2028 } 2029 2030 // the cache changed in the meantime 2031 cache->Unlock(); 2032 } 2033 } 2034 2035 2036 void 2037 vm_area_put_locked_cache(VMCache* cache) 2038 { 2039 cache->ReleaseRefAndUnlock(); 2040 } 2041 2042 2043 area_id 2044 vm_clone_area(team_id team, const char* name, void** address, 2045 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2046 bool kernel) 2047 { 2048 VMArea* newArea = NULL; 2049 VMArea* sourceArea; 2050 2051 // Check whether the source area exists and is cloneable. If so, mark it 2052 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2053 { 2054 AddressSpaceWriteLocker locker; 2055 status_t status = locker.SetFromArea(sourceID, sourceArea); 2056 if (status != B_OK) 2057 return status; 2058 2059 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2060 return B_NOT_ALLOWED; 2061 2062 sourceArea->protection |= B_SHARED_AREA; 2063 protection |= B_SHARED_AREA; 2064 } 2065 2066 // Now lock both address spaces and actually do the cloning. 2067 2068 MultiAddressSpaceLocker locker; 2069 VMAddressSpace* sourceAddressSpace; 2070 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2071 if (status != B_OK) 2072 return status; 2073 2074 VMAddressSpace* targetAddressSpace; 2075 status = locker.AddTeam(team, true, &targetAddressSpace); 2076 if (status != B_OK) 2077 return status; 2078 2079 status = locker.Lock(); 2080 if (status != B_OK) 2081 return status; 2082 2083 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2084 if (sourceArea == NULL) 2085 return B_BAD_VALUE; 2086 2087 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2088 return B_NOT_ALLOWED; 2089 2090 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2091 2092 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2093 // have been adapted. Maybe it should be part of the kernel settings, 2094 // anyway (so that old drivers can always work). 2095 #if 0 2096 if (sourceArea->aspace == VMAddressSpace::Kernel() 2097 && addressSpace != VMAddressSpace::Kernel() 2098 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2099 // kernel areas must not be cloned in userland, unless explicitly 2100 // declared user-cloneable upon construction 2101 status = B_NOT_ALLOWED; 2102 } else 2103 #endif 2104 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2105 status = B_NOT_ALLOWED; 2106 else { 2107 virtual_address_restrictions addressRestrictions = {}; 2108 addressRestrictions.address = *address; 2109 addressRestrictions.address_specification = addressSpec; 2110 status = map_backing_store(targetAddressSpace, cache, 2111 sourceArea->cache_offset, name, sourceArea->Size(), 2112 sourceArea->wiring, protection, mapping, 0, &addressRestrictions, 2113 kernel, &newArea, address); 2114 } 2115 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2116 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2117 // to create a new cache, and has therefore already acquired a reference 2118 // to the source cache - but otherwise it has no idea that we need 2119 // one. 2120 cache->AcquireRefLocked(); 2121 } 2122 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2123 // we need to map in everything at this point 2124 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2125 // we don't have actual pages to map but a physical area 2126 VMTranslationMap* map 2127 = sourceArea->address_space->TranslationMap(); 2128 map->Lock(); 2129 2130 phys_addr_t physicalAddress; 2131 uint32 oldProtection; 2132 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2133 2134 map->Unlock(); 2135 2136 map = targetAddressSpace->TranslationMap(); 2137 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2138 newArea->Base() + (newArea->Size() - 1)); 2139 2140 vm_page_reservation reservation; 2141 vm_page_reserve_pages(&reservation, reservePages, 2142 targetAddressSpace == VMAddressSpace::Kernel() 2143 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2144 map->Lock(); 2145 2146 for (addr_t offset = 0; offset < newArea->Size(); 2147 offset += B_PAGE_SIZE) { 2148 map->Map(newArea->Base() + offset, physicalAddress + offset, 2149 protection, newArea->MemoryType(), &reservation); 2150 } 2151 2152 map->Unlock(); 2153 vm_page_unreserve_pages(&reservation); 2154 } else { 2155 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2156 size_t reservePages = map->MaxPagesNeededToMap( 2157 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2158 vm_page_reservation reservation; 2159 vm_page_reserve_pages(&reservation, reservePages, 2160 targetAddressSpace == VMAddressSpace::Kernel() 2161 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2162 2163 // map in all pages from source 2164 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2165 vm_page* page = it.Next();) { 2166 if (!page->busy) { 2167 DEBUG_PAGE_ACCESS_START(page); 2168 map_page(newArea, page, 2169 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2170 - newArea->cache_offset), 2171 protection, &reservation); 2172 DEBUG_PAGE_ACCESS_END(page); 2173 } 2174 } 2175 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2176 // ensuring that! 2177 2178 vm_page_unreserve_pages(&reservation); 2179 } 2180 } 2181 if (status == B_OK) 2182 newArea->cache_type = sourceArea->cache_type; 2183 2184 vm_area_put_locked_cache(cache); 2185 2186 if (status < B_OK) 2187 return status; 2188 2189 return newArea->id; 2190 } 2191 2192 2193 /*! Deletes the specified area of the given address space. 2194 2195 The address space must be write-locked. 2196 The caller must ensure that the area does not have any wired ranges. 2197 2198 \param addressSpace The address space containing the area. 2199 \param area The area to be deleted. 2200 \param deletingAddressSpace \c true, if the address space is in the process 2201 of being deleted. 2202 */ 2203 static void 2204 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2205 bool deletingAddressSpace) 2206 { 2207 ASSERT(!area->IsWired()); 2208 2209 VMAreaHash::Remove(area); 2210 2211 // At this point the area is removed from the global hash table, but 2212 // still exists in the area list. 2213 2214 // Unmap the virtual address space the area occupied. 2215 { 2216 // We need to lock the complete cache chain. 2217 VMCache* topCache = vm_area_get_locked_cache(area); 2218 VMCacheChainLocker cacheChainLocker(topCache); 2219 cacheChainLocker.LockAllSourceCaches(); 2220 2221 // If the area's top cache is a temporary cache and the area is the only 2222 // one referencing it (besides us currently holding a second reference), 2223 // the unmapping code doesn't need to care about preserving the accessed 2224 // and dirty flags of the top cache page mappings. 2225 bool ignoreTopCachePageFlags 2226 = topCache->temporary && topCache->RefCount() == 2; 2227 2228 area->address_space->TranslationMap()->UnmapArea(area, 2229 deletingAddressSpace, ignoreTopCachePageFlags); 2230 } 2231 2232 if (!area->cache->temporary) 2233 area->cache->WriteModified(); 2234 2235 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2236 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2237 2238 arch_vm_unset_memory_type(area); 2239 addressSpace->RemoveArea(area, allocationFlags); 2240 addressSpace->Put(); 2241 2242 area->cache->RemoveArea(area); 2243 area->cache->ReleaseRef(); 2244 2245 addressSpace->DeleteArea(area, allocationFlags); 2246 } 2247 2248 2249 status_t 2250 vm_delete_area(team_id team, area_id id, bool kernel) 2251 { 2252 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2253 2254 // lock the address space and make sure the area isn't wired 2255 AddressSpaceWriteLocker locker; 2256 VMArea* area; 2257 AreaCacheLocker cacheLocker; 2258 2259 do { 2260 status_t status = locker.SetFromArea(team, id, area); 2261 if (status != B_OK) 2262 return status; 2263 2264 cacheLocker.SetTo(area); 2265 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2266 2267 cacheLocker.Unlock(); 2268 2269 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2270 return B_NOT_ALLOWED; 2271 2272 delete_area(locker.AddressSpace(), area, false); 2273 return B_OK; 2274 } 2275 2276 2277 /*! Creates a new cache on top of given cache, moves all areas from 2278 the old cache to the new one, and changes the protection of all affected 2279 areas' pages to read-only. If requested, wired pages are moved up to the 2280 new cache and copies are added to the old cache in their place. 2281 Preconditions: 2282 - The given cache must be locked. 2283 - All of the cache's areas' address spaces must be read locked. 2284 - Either the cache must not have any wired ranges or a page reservation for 2285 all wired pages must be provided, so they can be copied. 2286 2287 \param lowerCache The cache on top of which a new cache shall be created. 2288 \param wiredPagesReservation If \c NULL there must not be any wired pages 2289 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2290 has wired page. The wired pages are copied in this case. 2291 */ 2292 static status_t 2293 vm_copy_on_write_area(VMCache* lowerCache, 2294 vm_page_reservation* wiredPagesReservation) 2295 { 2296 VMCache* upperCache; 2297 2298 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2299 2300 // We need to separate the cache from its areas. The cache goes one level 2301 // deeper and we create a new cache inbetween. 2302 2303 // create an anonymous cache 2304 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2305 0, dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2306 VM_PRIORITY_USER); 2307 if (status != B_OK) 2308 return status; 2309 2310 upperCache->Lock(); 2311 2312 upperCache->temporary = 1; 2313 upperCache->virtual_base = lowerCache->virtual_base; 2314 upperCache->virtual_end = lowerCache->virtual_end; 2315 2316 // transfer the lower cache areas to the upper cache 2317 rw_lock_write_lock(&sAreaCacheLock); 2318 upperCache->TransferAreas(lowerCache); 2319 rw_lock_write_unlock(&sAreaCacheLock); 2320 2321 lowerCache->AddConsumer(upperCache); 2322 2323 // We now need to remap all pages from all of the cache's areas read-only, 2324 // so that a copy will be created on next write access. If there are wired 2325 // pages, we keep their protection, move them to the upper cache and create 2326 // copies for the lower cache. 2327 if (wiredPagesReservation != NULL) { 2328 // We need to handle wired pages -- iterate through the cache's pages. 2329 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2330 vm_page* page = it.Next();) { 2331 if (page->WiredCount() > 0) { 2332 // allocate a new page and copy the wired one 2333 vm_page* copiedPage = vm_page_allocate_page( 2334 wiredPagesReservation, PAGE_STATE_ACTIVE); 2335 2336 vm_memcpy_physical_page( 2337 copiedPage->physical_page_number * B_PAGE_SIZE, 2338 page->physical_page_number * B_PAGE_SIZE); 2339 2340 // move the wired page to the upper cache (note: removing is OK 2341 // with the SplayTree iterator) and insert the copy 2342 upperCache->MovePage(page); 2343 lowerCache->InsertPage(copiedPage, 2344 page->cache_offset * B_PAGE_SIZE); 2345 2346 DEBUG_PAGE_ACCESS_END(copiedPage); 2347 } else { 2348 // Change the protection of this page in all areas. 2349 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2350 tempArea = tempArea->cache_next) { 2351 // The area must be readable in the same way it was 2352 // previously writable. 2353 uint32 protection = B_KERNEL_READ_AREA; 2354 if ((tempArea->protection & B_READ_AREA) != 0) 2355 protection |= B_READ_AREA; 2356 2357 VMTranslationMap* map 2358 = tempArea->address_space->TranslationMap(); 2359 map->Lock(); 2360 map->ProtectPage(tempArea, 2361 virtual_page_address(tempArea, page), protection); 2362 map->Unlock(); 2363 } 2364 } 2365 } 2366 } else { 2367 ASSERT(lowerCache->WiredPagesCount() == 0); 2368 2369 // just change the protection of all areas 2370 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2371 tempArea = tempArea->cache_next) { 2372 // The area must be readable in the same way it was previously 2373 // writable. 2374 uint32 protection = B_KERNEL_READ_AREA; 2375 if ((tempArea->protection & B_READ_AREA) != 0) 2376 protection |= B_READ_AREA; 2377 2378 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2379 map->Lock(); 2380 map->ProtectArea(tempArea, protection); 2381 map->Unlock(); 2382 } 2383 } 2384 2385 vm_area_put_locked_cache(upperCache); 2386 2387 return B_OK; 2388 } 2389 2390 2391 area_id 2392 vm_copy_area(team_id team, const char* name, void** _address, 2393 uint32 addressSpec, uint32 protection, area_id sourceID) 2394 { 2395 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2396 2397 if ((protection & B_KERNEL_PROTECTION) == 0) { 2398 // set the same protection for the kernel as for userland 2399 protection |= B_KERNEL_READ_AREA; 2400 if (writableCopy) 2401 protection |= B_KERNEL_WRITE_AREA; 2402 } 2403 2404 // Do the locking: target address space, all address spaces associated with 2405 // the source cache, and the cache itself. 2406 MultiAddressSpaceLocker locker; 2407 VMAddressSpace* targetAddressSpace; 2408 VMCache* cache; 2409 VMArea* source; 2410 AreaCacheLocker cacheLocker; 2411 status_t status; 2412 bool sharedArea; 2413 2414 page_num_t wiredPages = 0; 2415 vm_page_reservation wiredPagesReservation; 2416 2417 bool restart; 2418 do { 2419 restart = false; 2420 2421 locker.Unset(); 2422 status = locker.AddTeam(team, true, &targetAddressSpace); 2423 if (status == B_OK) { 2424 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2425 &cache); 2426 } 2427 if (status != B_OK) 2428 return status; 2429 2430 cacheLocker.SetTo(cache, true); // already locked 2431 2432 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2433 2434 page_num_t oldWiredPages = wiredPages; 2435 wiredPages = 0; 2436 2437 // If the source area isn't shared, count the number of wired pages in 2438 // the cache and reserve as many pages. 2439 if (!sharedArea) { 2440 wiredPages = cache->WiredPagesCount(); 2441 2442 if (wiredPages > oldWiredPages) { 2443 cacheLocker.Unlock(); 2444 locker.Unlock(); 2445 2446 if (oldWiredPages > 0) 2447 vm_page_unreserve_pages(&wiredPagesReservation); 2448 2449 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2450 VM_PRIORITY_USER); 2451 2452 restart = true; 2453 } 2454 } else if (oldWiredPages > 0) 2455 vm_page_unreserve_pages(&wiredPagesReservation); 2456 } while (restart); 2457 2458 // unreserve pages later 2459 struct PagesUnreserver { 2460 PagesUnreserver(vm_page_reservation* reservation) 2461 : 2462 fReservation(reservation) 2463 { 2464 } 2465 2466 ~PagesUnreserver() 2467 { 2468 if (fReservation != NULL) 2469 vm_page_unreserve_pages(fReservation); 2470 } 2471 2472 private: 2473 vm_page_reservation* fReservation; 2474 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2475 2476 if (addressSpec == B_CLONE_ADDRESS) { 2477 addressSpec = B_EXACT_ADDRESS; 2478 *_address = (void*)source->Base(); 2479 } 2480 2481 // First, create a cache on top of the source area, respectively use the 2482 // existing one, if this is a shared area. 2483 2484 VMArea* target; 2485 virtual_address_restrictions addressRestrictions = {}; 2486 addressRestrictions.address = *_address; 2487 addressRestrictions.address_specification = addressSpec; 2488 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2489 name, source->Size(), source->wiring, protection, 2490 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2491 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2492 &addressRestrictions, true, &target, _address); 2493 if (status < B_OK) 2494 return status; 2495 2496 if (sharedArea) { 2497 // The new area uses the old area's cache, but map_backing_store() 2498 // hasn't acquired a ref. So we have to do that now. 2499 cache->AcquireRefLocked(); 2500 } 2501 2502 // If the source area is writable, we need to move it one layer up as well 2503 2504 if (!sharedArea) { 2505 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2506 // TODO: do something more useful if this fails! 2507 if (vm_copy_on_write_area(cache, 2508 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2509 panic("vm_copy_on_write_area() failed!\n"); 2510 } 2511 } 2512 } 2513 2514 // we return the ID of the newly created area 2515 return target->id; 2516 } 2517 2518 2519 static status_t 2520 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2521 bool kernel) 2522 { 2523 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2524 "%#lx)\n", team, areaID, newProtection)); 2525 2526 if (!arch_vm_supports_protection(newProtection)) 2527 return B_NOT_SUPPORTED; 2528 2529 bool becomesWritable 2530 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2531 2532 // lock address spaces and cache 2533 MultiAddressSpaceLocker locker; 2534 VMCache* cache; 2535 VMArea* area; 2536 status_t status; 2537 AreaCacheLocker cacheLocker; 2538 bool isWritable; 2539 2540 bool restart; 2541 do { 2542 restart = false; 2543 2544 locker.Unset(); 2545 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2546 if (status != B_OK) 2547 return status; 2548 2549 cacheLocker.SetTo(cache, true); // already locked 2550 2551 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2552 return B_NOT_ALLOWED; 2553 2554 if (area->protection == newProtection) 2555 return B_OK; 2556 2557 if (team != VMAddressSpace::KernelID() 2558 && area->address_space->ID() != team) { 2559 // unless you're the kernel, you are only allowed to set 2560 // the protection of your own areas 2561 return B_NOT_ALLOWED; 2562 } 2563 2564 isWritable 2565 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2566 2567 // Make sure the area (respectively, if we're going to call 2568 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2569 // wired ranges. 2570 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2571 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2572 otherArea = otherArea->cache_next) { 2573 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2574 restart = true; 2575 break; 2576 } 2577 } 2578 } else { 2579 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2580 restart = true; 2581 } 2582 } while (restart); 2583 2584 bool changePageProtection = true; 2585 bool changeTopCachePagesOnly = false; 2586 2587 if (isWritable && !becomesWritable) { 2588 // writable -> !writable 2589 2590 if (cache->source != NULL && cache->temporary) { 2591 if (cache->CountWritableAreas(area) == 0) { 2592 // Since this cache now lives from the pages in its source cache, 2593 // we can change the cache's commitment to take only those pages 2594 // into account that really are in this cache. 2595 2596 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2597 team == VMAddressSpace::KernelID() 2598 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2599 2600 // TODO: we may be able to join with our source cache, if 2601 // count == 0 2602 } 2603 } 2604 2605 // If only the writability changes, we can just remap the pages of the 2606 // top cache, since the pages of lower caches are mapped read-only 2607 // anyway. That's advantageous only, if the number of pages in the cache 2608 // is significantly smaller than the number of pages in the area, 2609 // though. 2610 if (newProtection 2611 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2612 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2613 changeTopCachePagesOnly = true; 2614 } 2615 } else if (!isWritable && becomesWritable) { 2616 // !writable -> writable 2617 2618 if (!cache->consumers.IsEmpty()) { 2619 // There are consumers -- we have to insert a new cache. Fortunately 2620 // vm_copy_on_write_area() does everything that's needed. 2621 changePageProtection = false; 2622 status = vm_copy_on_write_area(cache, NULL); 2623 } else { 2624 // No consumers, so we don't need to insert a new one. 2625 if (cache->source != NULL && cache->temporary) { 2626 // the cache's commitment must contain all possible pages 2627 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2628 team == VMAddressSpace::KernelID() 2629 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2630 } 2631 2632 if (status == B_OK && cache->source != NULL) { 2633 // There's a source cache, hence we can't just change all pages' 2634 // protection or we might allow writing into pages belonging to 2635 // a lower cache. 2636 changeTopCachePagesOnly = true; 2637 } 2638 } 2639 } else { 2640 // we don't have anything special to do in all other cases 2641 } 2642 2643 if (status == B_OK) { 2644 // remap existing pages in this cache 2645 if (changePageProtection) { 2646 VMTranslationMap* map = area->address_space->TranslationMap(); 2647 map->Lock(); 2648 2649 if (changeTopCachePagesOnly) { 2650 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2651 page_num_t lastPageOffset 2652 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2653 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2654 vm_page* page = it.Next();) { 2655 if (page->cache_offset >= firstPageOffset 2656 && page->cache_offset <= lastPageOffset) { 2657 addr_t address = virtual_page_address(area, page); 2658 map->ProtectPage(area, address, newProtection); 2659 } 2660 } 2661 } else 2662 map->ProtectArea(area, newProtection); 2663 2664 map->Unlock(); 2665 } 2666 2667 area->protection = newProtection; 2668 } 2669 2670 return status; 2671 } 2672 2673 2674 status_t 2675 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2676 { 2677 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2678 if (addressSpace == NULL) 2679 return B_BAD_TEAM_ID; 2680 2681 VMTranslationMap* map = addressSpace->TranslationMap(); 2682 2683 map->Lock(); 2684 uint32 dummyFlags; 2685 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2686 map->Unlock(); 2687 2688 addressSpace->Put(); 2689 return status; 2690 } 2691 2692 2693 /*! The page's cache must be locked. 2694 */ 2695 bool 2696 vm_test_map_modification(vm_page* page) 2697 { 2698 if (page->modified) 2699 return true; 2700 2701 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2702 vm_page_mapping* mapping; 2703 while ((mapping = iterator.Next()) != NULL) { 2704 VMArea* area = mapping->area; 2705 VMTranslationMap* map = area->address_space->TranslationMap(); 2706 2707 phys_addr_t physicalAddress; 2708 uint32 flags; 2709 map->Lock(); 2710 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2711 map->Unlock(); 2712 2713 if ((flags & PAGE_MODIFIED) != 0) 2714 return true; 2715 } 2716 2717 return false; 2718 } 2719 2720 2721 /*! The page's cache must be locked. 2722 */ 2723 void 2724 vm_clear_map_flags(vm_page* page, uint32 flags) 2725 { 2726 if ((flags & PAGE_ACCESSED) != 0) 2727 page->accessed = false; 2728 if ((flags & PAGE_MODIFIED) != 0) 2729 page->modified = false; 2730 2731 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2732 vm_page_mapping* mapping; 2733 while ((mapping = iterator.Next()) != NULL) { 2734 VMArea* area = mapping->area; 2735 VMTranslationMap* map = area->address_space->TranslationMap(); 2736 2737 map->Lock(); 2738 map->ClearFlags(virtual_page_address(area, page), flags); 2739 map->Unlock(); 2740 } 2741 } 2742 2743 2744 /*! Removes all mappings from a page. 2745 After you've called this function, the page is unmapped from memory and 2746 the page's \c accessed and \c modified flags have been updated according 2747 to the state of the mappings. 2748 The page's cache must be locked. 2749 */ 2750 void 2751 vm_remove_all_page_mappings(vm_page* page) 2752 { 2753 while (vm_page_mapping* mapping = page->mappings.Head()) { 2754 VMArea* area = mapping->area; 2755 VMTranslationMap* map = area->address_space->TranslationMap(); 2756 addr_t address = virtual_page_address(area, page); 2757 map->UnmapPage(area, address, false); 2758 } 2759 } 2760 2761 2762 int32 2763 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2764 { 2765 int32 count = 0; 2766 2767 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2768 vm_page_mapping* mapping; 2769 while ((mapping = iterator.Next()) != NULL) { 2770 VMArea* area = mapping->area; 2771 VMTranslationMap* map = area->address_space->TranslationMap(); 2772 2773 bool modified; 2774 if (map->ClearAccessedAndModified(area, 2775 virtual_page_address(area, page), false, modified)) { 2776 count++; 2777 } 2778 2779 page->modified |= modified; 2780 } 2781 2782 2783 if (page->accessed) { 2784 count++; 2785 page->accessed = false; 2786 } 2787 2788 return count; 2789 } 2790 2791 2792 /*! Removes all mappings of a page and/or clears the accessed bits of the 2793 mappings. 2794 The function iterates through the page mappings and removes them until 2795 encountering one that has been accessed. From then on it will continue to 2796 iterate, but only clear the accessed flag of the mapping. The page's 2797 \c modified bit will be updated accordingly, the \c accessed bit will be 2798 cleared. 2799 \return The number of mapping accessed bits encountered, including the 2800 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2801 of the page have been removed. 2802 */ 2803 int32 2804 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2805 { 2806 ASSERT(page->WiredCount() == 0); 2807 2808 if (page->accessed) 2809 return vm_clear_page_mapping_accessed_flags(page); 2810 2811 while (vm_page_mapping* mapping = page->mappings.Head()) { 2812 VMArea* area = mapping->area; 2813 VMTranslationMap* map = area->address_space->TranslationMap(); 2814 addr_t address = virtual_page_address(area, page); 2815 bool modified = false; 2816 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2817 page->accessed = true; 2818 page->modified |= modified; 2819 return vm_clear_page_mapping_accessed_flags(page); 2820 } 2821 page->modified |= modified; 2822 } 2823 2824 return 0; 2825 } 2826 2827 2828 static int 2829 display_mem(int argc, char** argv) 2830 { 2831 bool physical = false; 2832 addr_t copyAddress; 2833 int32 displayWidth; 2834 int32 itemSize; 2835 int32 num = -1; 2836 addr_t address; 2837 int i = 1, j; 2838 2839 if (argc > 1 && argv[1][0] == '-') { 2840 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2841 physical = true; 2842 i++; 2843 } else 2844 i = 99; 2845 } 2846 2847 if (argc < i + 1 || argc > i + 2) { 2848 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2849 "\tdl - 8 bytes\n" 2850 "\tdw - 4 bytes\n" 2851 "\tds - 2 bytes\n" 2852 "\tdb - 1 byte\n" 2853 "\tstring - a whole string\n" 2854 " -p or --physical only allows memory from a single page to be " 2855 "displayed.\n"); 2856 return 0; 2857 } 2858 2859 address = parse_expression(argv[i]); 2860 2861 if (argc > i + 1) 2862 num = parse_expression(argv[i + 1]); 2863 2864 // build the format string 2865 if (strcmp(argv[0], "db") == 0) { 2866 itemSize = 1; 2867 displayWidth = 16; 2868 } else if (strcmp(argv[0], "ds") == 0) { 2869 itemSize = 2; 2870 displayWidth = 8; 2871 } else if (strcmp(argv[0], "dw") == 0) { 2872 itemSize = 4; 2873 displayWidth = 4; 2874 } else if (strcmp(argv[0], "dl") == 0) { 2875 itemSize = 8; 2876 displayWidth = 2; 2877 } else if (strcmp(argv[0], "string") == 0) { 2878 itemSize = 1; 2879 displayWidth = -1; 2880 } else { 2881 kprintf("display_mem called in an invalid way!\n"); 2882 return 0; 2883 } 2884 2885 if (num <= 0) 2886 num = displayWidth; 2887 2888 void* physicalPageHandle = NULL; 2889 2890 if (physical) { 2891 int32 offset = address & (B_PAGE_SIZE - 1); 2892 if (num * itemSize + offset > B_PAGE_SIZE) { 2893 num = (B_PAGE_SIZE - offset) / itemSize; 2894 kprintf("NOTE: number of bytes has been cut to page size\n"); 2895 } 2896 2897 address = ROUNDDOWN(address, B_PAGE_SIZE); 2898 2899 if (vm_get_physical_page_debug(address, ©Address, 2900 &physicalPageHandle) != B_OK) { 2901 kprintf("getting the hardware page failed."); 2902 return 0; 2903 } 2904 2905 address += offset; 2906 copyAddress += offset; 2907 } else 2908 copyAddress = address; 2909 2910 if (!strcmp(argv[0], "string")) { 2911 kprintf("%p \"", (char*)copyAddress); 2912 2913 // string mode 2914 for (i = 0; true; i++) { 2915 char c; 2916 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 2917 != B_OK 2918 || c == '\0') { 2919 break; 2920 } 2921 2922 if (c == '\n') 2923 kprintf("\\n"); 2924 else if (c == '\t') 2925 kprintf("\\t"); 2926 else { 2927 if (!isprint(c)) 2928 c = '.'; 2929 2930 kprintf("%c", c); 2931 } 2932 } 2933 2934 kprintf("\"\n"); 2935 } else { 2936 // number mode 2937 for (i = 0; i < num; i++) { 2938 uint32 value; 2939 2940 if ((i % displayWidth) == 0) { 2941 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2942 if (i != 0) 2943 kprintf("\n"); 2944 2945 kprintf("[0x%lx] ", address + i * itemSize); 2946 2947 for (j = 0; j < displayed; j++) { 2948 char c; 2949 if (debug_memcpy(B_CURRENT_TEAM, &c, 2950 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 2951 displayed = j; 2952 break; 2953 } 2954 if (!isprint(c)) 2955 c = '.'; 2956 2957 kprintf("%c", c); 2958 } 2959 if (num > displayWidth) { 2960 // make sure the spacing in the last line is correct 2961 for (j = displayed; j < displayWidth * itemSize; j++) 2962 kprintf(" "); 2963 } 2964 kprintf(" "); 2965 } 2966 2967 if (debug_memcpy(B_CURRENT_TEAM, &value, 2968 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 2969 kprintf("read fault"); 2970 break; 2971 } 2972 2973 switch (itemSize) { 2974 case 1: 2975 kprintf(" %02x", *(uint8*)&value); 2976 break; 2977 case 2: 2978 kprintf(" %04x", *(uint16*)&value); 2979 break; 2980 case 4: 2981 kprintf(" %08lx", *(uint32*)&value); 2982 break; 2983 case 8: 2984 kprintf(" %016Lx", *(uint64*)&value); 2985 break; 2986 } 2987 } 2988 2989 kprintf("\n"); 2990 } 2991 2992 if (physical) { 2993 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 2994 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 2995 } 2996 return 0; 2997 } 2998 2999 3000 static void 3001 dump_cache_tree_recursively(VMCache* cache, int level, 3002 VMCache* highlightCache) 3003 { 3004 // print this cache 3005 for (int i = 0; i < level; i++) 3006 kprintf(" "); 3007 if (cache == highlightCache) 3008 kprintf("%p <--\n", cache); 3009 else 3010 kprintf("%p\n", cache); 3011 3012 // recursively print its consumers 3013 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3014 VMCache* consumer = it.Next();) { 3015 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3016 } 3017 } 3018 3019 3020 static int 3021 dump_cache_tree(int argc, char** argv) 3022 { 3023 if (argc != 2 || !strcmp(argv[1], "--help")) { 3024 kprintf("usage: %s <address>\n", argv[0]); 3025 return 0; 3026 } 3027 3028 addr_t address = parse_expression(argv[1]); 3029 if (address == 0) 3030 return 0; 3031 3032 VMCache* cache = (VMCache*)address; 3033 VMCache* root = cache; 3034 3035 // find the root cache (the transitive source) 3036 while (root->source != NULL) 3037 root = root->source; 3038 3039 dump_cache_tree_recursively(root, 0, cache); 3040 3041 return 0; 3042 } 3043 3044 3045 const char* 3046 vm_cache_type_to_string(int32 type) 3047 { 3048 switch (type) { 3049 case CACHE_TYPE_RAM: 3050 return "RAM"; 3051 case CACHE_TYPE_DEVICE: 3052 return "device"; 3053 case CACHE_TYPE_VNODE: 3054 return "vnode"; 3055 case CACHE_TYPE_NULL: 3056 return "null"; 3057 3058 default: 3059 return "unknown"; 3060 } 3061 } 3062 3063 3064 #if DEBUG_CACHE_LIST 3065 3066 static void 3067 update_cache_info_recursively(VMCache* cache, cache_info& info) 3068 { 3069 info.page_count += cache->page_count; 3070 if (cache->type == CACHE_TYPE_RAM) 3071 info.committed += cache->committed_size; 3072 3073 // recurse 3074 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3075 VMCache* consumer = it.Next();) { 3076 update_cache_info_recursively(consumer, info); 3077 } 3078 } 3079 3080 3081 static int 3082 cache_info_compare_page_count(const void* _a, const void* _b) 3083 { 3084 const cache_info* a = (const cache_info*)_a; 3085 const cache_info* b = (const cache_info*)_b; 3086 if (a->page_count == b->page_count) 3087 return 0; 3088 return a->page_count < b->page_count ? 1 : -1; 3089 } 3090 3091 3092 static int 3093 cache_info_compare_committed(const void* _a, const void* _b) 3094 { 3095 const cache_info* a = (const cache_info*)_a; 3096 const cache_info* b = (const cache_info*)_b; 3097 if (a->committed == b->committed) 3098 return 0; 3099 return a->committed < b->committed ? 1 : -1; 3100 } 3101 3102 3103 static void 3104 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3105 { 3106 for (int i = 0; i < level; i++) 3107 kprintf(" "); 3108 3109 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3110 vm_cache_type_to_string(cache->type), cache->virtual_base, 3111 cache->virtual_end, cache->page_count); 3112 3113 if (level == 0) 3114 kprintf("/%lu", info.page_count); 3115 3116 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3117 kprintf(", committed: %lld", cache->committed_size); 3118 3119 if (level == 0) 3120 kprintf("/%lu", info.committed); 3121 } 3122 3123 // areas 3124 if (cache->areas != NULL) { 3125 VMArea* area = cache->areas; 3126 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3127 area->address_space->ID()); 3128 3129 while (area->cache_next != NULL) { 3130 area = area->cache_next; 3131 kprintf(", %ld", area->id); 3132 } 3133 } 3134 3135 kputs("\n"); 3136 3137 // recurse 3138 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3139 VMCache* consumer = it.Next();) { 3140 dump_caches_recursively(consumer, info, level + 1); 3141 } 3142 } 3143 3144 3145 static int 3146 dump_caches(int argc, char** argv) 3147 { 3148 if (sCacheInfoTable == NULL) { 3149 kprintf("No cache info table!\n"); 3150 return 0; 3151 } 3152 3153 bool sortByPageCount = true; 3154 3155 for (int32 i = 1; i < argc; i++) { 3156 if (strcmp(argv[i], "-c") == 0) { 3157 sortByPageCount = false; 3158 } else { 3159 print_debugger_command_usage(argv[0]); 3160 return 0; 3161 } 3162 } 3163 3164 uint32 totalCount = 0; 3165 uint32 rootCount = 0; 3166 off_t totalCommitted = 0; 3167 page_num_t totalPages = 0; 3168 3169 VMCache* cache = gDebugCacheList; 3170 while (cache) { 3171 totalCount++; 3172 if (cache->source == NULL) { 3173 cache_info stackInfo; 3174 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3175 ? sCacheInfoTable[rootCount] : stackInfo; 3176 rootCount++; 3177 info.cache = cache; 3178 info.page_count = 0; 3179 info.committed = 0; 3180 update_cache_info_recursively(cache, info); 3181 totalCommitted += info.committed; 3182 totalPages += info.page_count; 3183 } 3184 3185 cache = cache->debug_next; 3186 } 3187 3188 if (rootCount <= (uint32)kCacheInfoTableCount) { 3189 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3190 sortByPageCount 3191 ? &cache_info_compare_page_count 3192 : &cache_info_compare_committed); 3193 } 3194 3195 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3196 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3197 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3198 "tree...\n\n", totalCount, rootCount, 3199 sortByPageCount ? "page count" : "committed size"); 3200 3201 if (rootCount <= (uint32)kCacheInfoTableCount) { 3202 for (uint32 i = 0; i < rootCount; i++) { 3203 cache_info& info = sCacheInfoTable[i]; 3204 dump_caches_recursively(info.cache, info, 0); 3205 } 3206 } else 3207 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3208 3209 return 0; 3210 } 3211 3212 #endif // DEBUG_CACHE_LIST 3213 3214 3215 static int 3216 dump_cache(int argc, char** argv) 3217 { 3218 VMCache* cache; 3219 bool showPages = false; 3220 int i = 1; 3221 3222 if (argc < 2 || !strcmp(argv[1], "--help")) { 3223 kprintf("usage: %s [-ps] <address>\n" 3224 " if -p is specified, all pages are shown, if -s is used\n" 3225 " only the cache info is shown respectively.\n", argv[0]); 3226 return 0; 3227 } 3228 while (argv[i][0] == '-') { 3229 char* arg = argv[i] + 1; 3230 while (arg[0]) { 3231 if (arg[0] == 'p') 3232 showPages = true; 3233 arg++; 3234 } 3235 i++; 3236 } 3237 if (argv[i] == NULL) { 3238 kprintf("%s: invalid argument, pass address\n", argv[0]); 3239 return 0; 3240 } 3241 3242 addr_t address = parse_expression(argv[i]); 3243 if (address == 0) 3244 return 0; 3245 3246 cache = (VMCache*)address; 3247 3248 cache->Dump(showPages); 3249 3250 set_debug_variable("_sourceCache", (addr_t)cache->source); 3251 3252 return 0; 3253 } 3254 3255 3256 static void 3257 dump_area_struct(VMArea* area, bool mappings) 3258 { 3259 kprintf("AREA: %p\n", area); 3260 kprintf("name:\t\t'%s'\n", area->name); 3261 kprintf("owner:\t\t0x%lx\n", area->address_space->ID()); 3262 kprintf("id:\t\t0x%lx\n", area->id); 3263 kprintf("base:\t\t0x%lx\n", area->Base()); 3264 kprintf("size:\t\t0x%lx\n", area->Size()); 3265 kprintf("protection:\t0x%lx\n", area->protection); 3266 kprintf("wiring:\t\t0x%x\n", area->wiring); 3267 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3268 kprintf("cache:\t\t%p\n", area->cache); 3269 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3270 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3271 kprintf("cache_next:\t%p\n", area->cache_next); 3272 kprintf("cache_prev:\t%p\n", area->cache_prev); 3273 3274 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3275 if (mappings) { 3276 kprintf("page mappings:\n"); 3277 while (iterator.HasNext()) { 3278 vm_page_mapping* mapping = iterator.Next(); 3279 kprintf(" %p", mapping->page); 3280 } 3281 kprintf("\n"); 3282 } else { 3283 uint32 count = 0; 3284 while (iterator.Next() != NULL) { 3285 count++; 3286 } 3287 kprintf("page mappings:\t%lu\n", count); 3288 } 3289 } 3290 3291 3292 static int 3293 dump_area(int argc, char** argv) 3294 { 3295 bool mappings = false; 3296 bool found = false; 3297 int32 index = 1; 3298 VMArea* area; 3299 addr_t num; 3300 3301 if (argc < 2 || !strcmp(argv[1], "--help")) { 3302 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3303 "All areas matching either id/address/name are listed. You can\n" 3304 "force to check only a specific item by prefixing the specifier\n" 3305 "with the id/contains/address/name keywords.\n" 3306 "-m shows the area's mappings as well.\n"); 3307 return 0; 3308 } 3309 3310 if (!strcmp(argv[1], "-m")) { 3311 mappings = true; 3312 index++; 3313 } 3314 3315 int32 mode = 0xf; 3316 if (!strcmp(argv[index], "id")) 3317 mode = 1; 3318 else if (!strcmp(argv[index], "contains")) 3319 mode = 2; 3320 else if (!strcmp(argv[index], "name")) 3321 mode = 4; 3322 else if (!strcmp(argv[index], "address")) 3323 mode = 0; 3324 if (mode != 0xf) 3325 index++; 3326 3327 if (index >= argc) { 3328 kprintf("No area specifier given.\n"); 3329 return 0; 3330 } 3331 3332 num = parse_expression(argv[index]); 3333 3334 if (mode == 0) { 3335 dump_area_struct((struct VMArea*)num, mappings); 3336 } else { 3337 // walk through the area list, looking for the arguments as a name 3338 3339 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3340 while ((area = it.Next()) != NULL) { 3341 if (((mode & 4) != 0 && area->name != NULL 3342 && !strcmp(argv[index], area->name)) 3343 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3344 || (((mode & 2) != 0 && area->Base() <= num 3345 && area->Base() + area->Size() > num))))) { 3346 dump_area_struct(area, mappings); 3347 found = true; 3348 } 3349 } 3350 3351 if (!found) 3352 kprintf("could not find area %s (%ld)\n", argv[index], num); 3353 } 3354 3355 return 0; 3356 } 3357 3358 3359 static int 3360 dump_area_list(int argc, char** argv) 3361 { 3362 VMArea* area; 3363 const char* name = NULL; 3364 int32 id = 0; 3365 3366 if (argc > 1) { 3367 id = parse_expression(argv[1]); 3368 if (id == 0) 3369 name = argv[1]; 3370 } 3371 3372 kprintf("addr id base\t\tsize protect lock name\n"); 3373 3374 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3375 while ((area = it.Next()) != NULL) { 3376 if ((id != 0 && area->address_space->ID() != id) 3377 || (name != NULL && strstr(area->name, name) == NULL)) 3378 continue; 3379 3380 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 3381 (void*)area->Base(), (void*)area->Size(), area->protection, 3382 area->wiring, area->name); 3383 } 3384 return 0; 3385 } 3386 3387 3388 static int 3389 dump_available_memory(int argc, char** argv) 3390 { 3391 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3392 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3393 return 0; 3394 } 3395 3396 3397 /*! Deletes all areas and reserved regions in the given address space. 3398 3399 The caller must ensure that none of the areas has any wired ranges. 3400 3401 \param addressSpace The address space. 3402 \param deletingAddressSpace \c true, if the address space is in the process 3403 of being deleted. 3404 */ 3405 void 3406 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3407 { 3408 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 3409 addressSpace->ID())); 3410 3411 addressSpace->WriteLock(); 3412 3413 // remove all reserved areas in this address space 3414 addressSpace->UnreserveAllAddressRanges(0); 3415 3416 // delete all the areas in this address space 3417 while (VMArea* area = addressSpace->FirstArea()) { 3418 ASSERT(!area->IsWired()); 3419 delete_area(addressSpace, area, deletingAddressSpace); 3420 } 3421 3422 addressSpace->WriteUnlock(); 3423 } 3424 3425 3426 static area_id 3427 vm_area_for(addr_t address, bool kernel) 3428 { 3429 team_id team; 3430 if (IS_USER_ADDRESS(address)) { 3431 // we try the user team address space, if any 3432 team = VMAddressSpace::CurrentID(); 3433 if (team < 0) 3434 return team; 3435 } else 3436 team = VMAddressSpace::KernelID(); 3437 3438 AddressSpaceReadLocker locker(team); 3439 if (!locker.IsLocked()) 3440 return B_BAD_TEAM_ID; 3441 3442 VMArea* area = locker.AddressSpace()->LookupArea(address); 3443 if (area != NULL) { 3444 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3445 return B_ERROR; 3446 3447 return area->id; 3448 } 3449 3450 return B_ERROR; 3451 } 3452 3453 3454 /*! Frees physical pages that were used during the boot process. 3455 \a end is inclusive. 3456 */ 3457 static void 3458 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3459 { 3460 // free all physical pages in the specified range 3461 3462 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3463 phys_addr_t physicalAddress; 3464 uint32 flags; 3465 3466 if (map->Query(current, &physicalAddress, &flags) == B_OK 3467 && (flags & PAGE_PRESENT) != 0) { 3468 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3469 if (page != NULL && page->State() != PAGE_STATE_FREE 3470 && page->State() != PAGE_STATE_CLEAR 3471 && page->State() != PAGE_STATE_UNUSED) { 3472 DEBUG_PAGE_ACCESS_START(page); 3473 vm_page_set_state(page, PAGE_STATE_FREE); 3474 } 3475 } 3476 } 3477 3478 // unmap the memory 3479 map->Unmap(start, end); 3480 } 3481 3482 3483 void 3484 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3485 { 3486 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3487 addr_t end = start + (size - 1); 3488 addr_t lastEnd = start; 3489 3490 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3491 (void*)start, (void*)end)); 3492 3493 // The areas are sorted in virtual address space order, so 3494 // we just have to find the holes between them that fall 3495 // into the area we should dispose 3496 3497 map->Lock(); 3498 3499 for (VMAddressSpace::AreaIterator it 3500 = VMAddressSpace::Kernel()->GetAreaIterator(); 3501 VMArea* area = it.Next();) { 3502 addr_t areaStart = area->Base(); 3503 addr_t areaEnd = areaStart + (area->Size() - 1); 3504 3505 if (areaEnd < start) 3506 continue; 3507 3508 if (areaStart > end) { 3509 // we are done, the area is already beyond of what we have to free 3510 break; 3511 } 3512 3513 if (areaStart > lastEnd) { 3514 // this is something we can free 3515 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3516 (void*)areaStart)); 3517 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3518 } 3519 3520 if (areaEnd >= end) { 3521 lastEnd = areaEnd; 3522 // no +1 to prevent potential overflow 3523 break; 3524 } 3525 3526 lastEnd = areaEnd + 1; 3527 } 3528 3529 if (lastEnd < end) { 3530 // we can also get rid of some space at the end of the area 3531 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3532 (void*)end)); 3533 unmap_and_free_physical_pages(map, lastEnd, end); 3534 } 3535 3536 map->Unlock(); 3537 } 3538 3539 3540 static void 3541 create_preloaded_image_areas(struct preloaded_image* image) 3542 { 3543 char name[B_OS_NAME_LENGTH]; 3544 void* address; 3545 int32 length; 3546 3547 // use file name to create a good area name 3548 char* fileName = strrchr(image->name, '/'); 3549 if (fileName == NULL) 3550 fileName = image->name; 3551 else 3552 fileName++; 3553 3554 length = strlen(fileName); 3555 // make sure there is enough space for the suffix 3556 if (length > 25) 3557 length = 25; 3558 3559 memcpy(name, fileName, length); 3560 strcpy(name + length, "_text"); 3561 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3562 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3563 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3564 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3565 // this will later be remapped read-only/executable by the 3566 // ELF initialization code 3567 3568 strcpy(name + length, "_data"); 3569 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3570 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3571 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3572 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3573 } 3574 3575 3576 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3577 Any boot loader resources contained in that arguments must not be accessed 3578 anymore past this point. 3579 */ 3580 void 3581 vm_free_kernel_args(kernel_args* args) 3582 { 3583 uint32 i; 3584 3585 TRACE(("vm_free_kernel_args()\n")); 3586 3587 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3588 area_id area = area_for((void*)args->kernel_args_range[i].start); 3589 if (area >= B_OK) 3590 delete_area(area); 3591 } 3592 } 3593 3594 3595 static void 3596 allocate_kernel_args(kernel_args* args) 3597 { 3598 TRACE(("allocate_kernel_args()\n")); 3599 3600 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3601 void* address = (void*)args->kernel_args_range[i].start; 3602 3603 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3604 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3605 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3606 } 3607 } 3608 3609 3610 static void 3611 unreserve_boot_loader_ranges(kernel_args* args) 3612 { 3613 TRACE(("unreserve_boot_loader_ranges()\n")); 3614 3615 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3616 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3617 (void*)args->virtual_allocated_range[i].start, 3618 args->virtual_allocated_range[i].size); 3619 } 3620 } 3621 3622 3623 static void 3624 reserve_boot_loader_ranges(kernel_args* args) 3625 { 3626 TRACE(("reserve_boot_loader_ranges()\n")); 3627 3628 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3629 void* address = (void*)args->virtual_allocated_range[i].start; 3630 3631 // If the address is no kernel address, we just skip it. The 3632 // architecture specific code has to deal with it. 3633 if (!IS_KERNEL_ADDRESS(address)) { 3634 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 3635 address, args->virtual_allocated_range[i].size); 3636 continue; 3637 } 3638 3639 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3640 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3641 if (status < B_OK) 3642 panic("could not reserve boot loader ranges\n"); 3643 } 3644 } 3645 3646 3647 static addr_t 3648 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3649 { 3650 size = PAGE_ALIGN(size); 3651 3652 // find a slot in the virtual allocation addr range 3653 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3654 // check to see if the space between this one and the last is big enough 3655 addr_t rangeStart = args->virtual_allocated_range[i].start; 3656 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3657 + args->virtual_allocated_range[i - 1].size; 3658 3659 addr_t base = alignment > 0 3660 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3661 3662 if (base >= KERNEL_BASE && base < rangeStart 3663 && rangeStart - base >= size) { 3664 args->virtual_allocated_range[i - 1].size 3665 += base + size - previousRangeEnd; 3666 return base; 3667 } 3668 } 3669 3670 // we hadn't found one between allocation ranges. this is ok. 3671 // see if there's a gap after the last one 3672 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3673 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3674 + args->virtual_allocated_range[lastEntryIndex].size; 3675 addr_t base = alignment > 0 3676 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3677 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3678 args->virtual_allocated_range[lastEntryIndex].size 3679 += base + size - lastRangeEnd; 3680 return base; 3681 } 3682 3683 // see if there's a gap before the first one 3684 addr_t rangeStart = args->virtual_allocated_range[0].start; 3685 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3686 base = rangeStart - size; 3687 if (alignment > 0) 3688 base = ROUNDDOWN(base, alignment); 3689 3690 if (base >= KERNEL_BASE) { 3691 args->virtual_allocated_range[0].start = base; 3692 args->virtual_allocated_range[0].size += rangeStart - base; 3693 return base; 3694 } 3695 } 3696 3697 return 0; 3698 } 3699 3700 3701 static bool 3702 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 3703 { 3704 // TODO: horrible brute-force method of determining if the page can be 3705 // allocated 3706 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3707 if (address >= args->physical_memory_range[i].start 3708 && address < args->physical_memory_range[i].start 3709 + args->physical_memory_range[i].size) 3710 return true; 3711 } 3712 return false; 3713 } 3714 3715 3716 page_num_t 3717 vm_allocate_early_physical_page(kernel_args* args) 3718 { 3719 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3720 phys_addr_t nextPage; 3721 3722 nextPage = args->physical_allocated_range[i].start 3723 + args->physical_allocated_range[i].size; 3724 // see if the page after the next allocated paddr run can be allocated 3725 if (i + 1 < args->num_physical_allocated_ranges 3726 && args->physical_allocated_range[i + 1].size != 0) { 3727 // see if the next page will collide with the next allocated range 3728 if (nextPage >= args->physical_allocated_range[i+1].start) 3729 continue; 3730 } 3731 // see if the next physical page fits in the memory block 3732 if (is_page_in_physical_memory_range(args, nextPage)) { 3733 // we got one! 3734 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3735 return nextPage / B_PAGE_SIZE; 3736 } 3737 } 3738 3739 return 0; 3740 // could not allocate a block 3741 } 3742 3743 3744 /*! This one uses the kernel_args' physical and virtual memory ranges to 3745 allocate some pages before the VM is completely up. 3746 */ 3747 addr_t 3748 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3749 uint32 attributes, addr_t alignment) 3750 { 3751 if (physicalSize > virtualSize) 3752 physicalSize = virtualSize; 3753 3754 // find the vaddr to allocate at 3755 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 3756 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 3757 3758 // map the pages 3759 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3760 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 3761 if (physicalAddress == 0) 3762 panic("error allocating early page!\n"); 3763 3764 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3765 3766 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3767 physicalAddress * B_PAGE_SIZE, attributes, 3768 &vm_allocate_early_physical_page); 3769 } 3770 3771 return virtualBase; 3772 } 3773 3774 3775 /*! The main entrance point to initialize the VM. */ 3776 status_t 3777 vm_init(kernel_args* args) 3778 { 3779 struct preloaded_image* image; 3780 void* address; 3781 status_t err = 0; 3782 uint32 i; 3783 3784 TRACE(("vm_init: entry\n")); 3785 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3786 err = arch_vm_init(args); 3787 3788 // initialize some globals 3789 vm_page_init_num_pages(args); 3790 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3791 3792 slab_init(args); 3793 3794 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3795 size_t heapSize = INITIAL_HEAP_SIZE; 3796 // try to accomodate low memory systems 3797 while (heapSize > sAvailableMemory / 8) 3798 heapSize /= 2; 3799 if (heapSize < 1024 * 1024) 3800 panic("vm_init: go buy some RAM please."); 3801 3802 // map in the new heap and initialize it 3803 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3804 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3805 TRACE(("heap at 0x%lx\n", heapBase)); 3806 heap_init(heapBase, heapSize); 3807 #endif 3808 3809 // initialize the free page list and physical page mapper 3810 vm_page_init(args); 3811 3812 // initialize the cache allocators 3813 vm_cache_init(args); 3814 3815 { 3816 status_t error = VMAreaHash::Init(); 3817 if (error != B_OK) 3818 panic("vm_init: error initializing area hash table\n"); 3819 } 3820 3821 VMAddressSpace::Init(); 3822 reserve_boot_loader_ranges(args); 3823 3824 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3825 heap_init_post_area(); 3826 #endif 3827 3828 // Do any further initialization that the architecture dependant layers may 3829 // need now 3830 arch_vm_translation_map_init_post_area(args); 3831 arch_vm_init_post_area(args); 3832 vm_page_init_post_area(args); 3833 slab_init_post_area(); 3834 3835 // allocate areas to represent stuff that already exists 3836 3837 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3838 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3839 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3840 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3841 #endif 3842 3843 allocate_kernel_args(args); 3844 3845 create_preloaded_image_areas(&args->kernel_image); 3846 3847 // allocate areas for preloaded images 3848 for (image = args->preloaded_images; image != NULL; image = image->next) 3849 create_preloaded_image_areas(image); 3850 3851 // allocate kernel stacks 3852 for (i = 0; i < args->num_cpus; i++) { 3853 char name[64]; 3854 3855 sprintf(name, "idle thread %lu kstack", i + 1); 3856 address = (void*)args->cpu_kstack[i].start; 3857 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3858 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3859 } 3860 3861 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3862 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3863 3864 #if PARANOID_KERNEL_MALLOC 3865 vm_block_address_range("uninitialized heap memory", 3866 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3867 #endif 3868 #if PARANOID_KERNEL_FREE 3869 vm_block_address_range("freed heap memory", 3870 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3871 #endif 3872 3873 // create the object cache for the page mappings 3874 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3875 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3876 NULL, NULL); 3877 if (gPageMappingsObjectCache == NULL) 3878 panic("failed to create page mappings object cache"); 3879 3880 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3881 3882 #if DEBUG_CACHE_LIST 3883 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 3884 virtual_address_restrictions virtualRestrictions = {}; 3885 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 3886 physical_address_restrictions physicalRestrictions = {}; 3887 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 3888 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3889 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 3890 CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions, 3891 (void**)&sCacheInfoTable); 3892 } 3893 #endif // DEBUG_CACHE_LIST 3894 3895 // add some debugger commands 3896 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3897 add_debugger_command("area", &dump_area, 3898 "Dump info about a particular area"); 3899 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3900 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3901 #if DEBUG_CACHE_LIST 3902 if (sCacheInfoTable != NULL) { 3903 add_debugger_command_etc("caches", &dump_caches, 3904 "List all VMCache trees", 3905 "[ \"-c\" ]\n" 3906 "All cache trees are listed sorted in decreasing order by number " 3907 "of\n" 3908 "used pages or, if \"-c\" is specified, by size of committed " 3909 "memory.\n", 3910 0); 3911 } 3912 #endif 3913 add_debugger_command("avail", &dump_available_memory, 3914 "Dump available memory"); 3915 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3916 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3917 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3918 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3919 add_debugger_command("string", &display_mem, "dump strings"); 3920 3921 TRACE(("vm_init: exit\n")); 3922 3923 vm_cache_init_post_heap(); 3924 3925 return err; 3926 } 3927 3928 3929 status_t 3930 vm_init_post_sem(kernel_args* args) 3931 { 3932 // This frees all unused boot loader resources and makes its space available 3933 // again 3934 arch_vm_init_end(args); 3935 unreserve_boot_loader_ranges(args); 3936 3937 // fill in all of the semaphores that were not allocated before 3938 // since we're still single threaded and only the kernel address space 3939 // exists, it isn't that hard to find all of the ones we need to create 3940 3941 arch_vm_translation_map_init_post_sem(args); 3942 3943 slab_init_post_sem(); 3944 3945 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3946 heap_init_post_sem(); 3947 #endif 3948 3949 return B_OK; 3950 } 3951 3952 3953 status_t 3954 vm_init_post_thread(kernel_args* args) 3955 { 3956 vm_page_init_post_thread(args); 3957 slab_init_post_thread(); 3958 return heap_init_post_thread(); 3959 } 3960 3961 3962 status_t 3963 vm_init_post_modules(kernel_args* args) 3964 { 3965 return arch_vm_init_post_modules(args); 3966 } 3967 3968 3969 void 3970 permit_page_faults(void) 3971 { 3972 Thread* thread = thread_get_current_thread(); 3973 if (thread != NULL) 3974 atomic_add(&thread->page_faults_allowed, 1); 3975 } 3976 3977 3978 void 3979 forbid_page_faults(void) 3980 { 3981 Thread* thread = thread_get_current_thread(); 3982 if (thread != NULL) 3983 atomic_add(&thread->page_faults_allowed, -1); 3984 } 3985 3986 3987 status_t 3988 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3989 addr_t* newIP) 3990 { 3991 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3992 faultAddress)); 3993 3994 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 3995 3996 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 3997 VMAddressSpace* addressSpace = NULL; 3998 3999 status_t status = B_OK; 4000 *newIP = 0; 4001 atomic_add((int32*)&sPageFaults, 1); 4002 4003 if (IS_KERNEL_ADDRESS(pageAddress)) { 4004 addressSpace = VMAddressSpace::GetKernel(); 4005 } else if (IS_USER_ADDRESS(pageAddress)) { 4006 addressSpace = VMAddressSpace::GetCurrent(); 4007 if (addressSpace == NULL) { 4008 if (!isUser) { 4009 dprintf("vm_page_fault: kernel thread accessing invalid user " 4010 "memory!\n"); 4011 status = B_BAD_ADDRESS; 4012 TPF(PageFaultError(-1, 4013 VMPageFaultTracing 4014 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4015 } else { 4016 // XXX weird state. 4017 panic("vm_page_fault: non kernel thread accessing user memory " 4018 "that doesn't exist!\n"); 4019 status = B_BAD_ADDRESS; 4020 } 4021 } 4022 } else { 4023 // the hit was probably in the 64k DMZ between kernel and user space 4024 // this keeps a user space thread from passing a buffer that crosses 4025 // into kernel space 4026 status = B_BAD_ADDRESS; 4027 TPF(PageFaultError(-1, 4028 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4029 } 4030 4031 if (status == B_OK) { 4032 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser, 4033 NULL); 4034 } 4035 4036 if (status < B_OK) { 4037 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4038 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4039 strerror(status), address, faultAddress, isWrite, isUser, 4040 thread_get_current_thread_id()); 4041 if (!isUser) { 4042 Thread* thread = thread_get_current_thread(); 4043 if (thread != NULL && thread->fault_handler != 0) { 4044 // this will cause the arch dependant page fault handler to 4045 // modify the IP on the interrupt frame or whatever to return 4046 // to this address 4047 *newIP = thread->fault_handler; 4048 } else { 4049 // unhandled page fault in the kernel 4050 panic("vm_page_fault: unhandled page fault in kernel space at " 4051 "0x%lx, ip 0x%lx\n", address, faultAddress); 4052 } 4053 } else { 4054 #if 1 4055 addressSpace->ReadLock(); 4056 4057 // TODO: remove me once we have proper userland debugging support 4058 // (and tools) 4059 VMArea* area = addressSpace->LookupArea(faultAddress); 4060 4061 Thread* thread = thread_get_current_thread(); 4062 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4063 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4064 thread->name, thread->id, thread->team->Name(), 4065 thread->team->id, isWrite ? "write" : "read", address, 4066 faultAddress, area ? area->name : "???", 4067 faultAddress - (area ? area->Base() : 0x0)); 4068 4069 // We can print a stack trace of the userland thread here. 4070 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4071 // fault and someone is already waiting for a write lock on the same address 4072 // space. This thread will then try to acquire the lock again and will 4073 // be queued after the writer. 4074 # if 0 4075 if (area) { 4076 struct stack_frame { 4077 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4078 struct stack_frame* previous; 4079 void* return_address; 4080 #else 4081 // ... 4082 #warning writeme 4083 #endif 4084 } frame; 4085 # ifdef __INTEL__ 4086 struct iframe* iframe = i386_get_user_iframe(); 4087 if (iframe == NULL) 4088 panic("iframe is NULL!"); 4089 4090 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4091 sizeof(struct stack_frame)); 4092 # elif defined(__POWERPC__) 4093 struct iframe* iframe = ppc_get_user_iframe(); 4094 if (iframe == NULL) 4095 panic("iframe is NULL!"); 4096 4097 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4098 sizeof(struct stack_frame)); 4099 # else 4100 # warning "vm_page_fault() stack trace won't work" 4101 status = B_ERROR; 4102 # endif 4103 4104 dprintf("stack trace:\n"); 4105 int32 maxFrames = 50; 4106 while (status == B_OK && --maxFrames >= 0 4107 && frame.return_address != NULL) { 4108 dprintf(" %p", frame.return_address); 4109 area = addressSpace->LookupArea( 4110 (addr_t)frame.return_address); 4111 if (area) { 4112 dprintf(" (%s + %#lx)", area->name, 4113 (addr_t)frame.return_address - area->Base()); 4114 } 4115 dprintf("\n"); 4116 4117 status = user_memcpy(&frame, frame.previous, 4118 sizeof(struct stack_frame)); 4119 } 4120 } 4121 # endif // 0 (stack trace) 4122 4123 addressSpace->ReadUnlock(); 4124 #endif 4125 4126 // TODO: the fault_callback is a temporary solution for vm86 4127 if (thread->fault_callback == NULL 4128 || thread->fault_callback(address, faultAddress, isWrite)) { 4129 // If the thread has a signal handler for SIGSEGV, we simply 4130 // send it the signal. Otherwise we notify the user debugger 4131 // first. 4132 struct sigaction action; 4133 if ((sigaction(SIGSEGV, NULL, &action) == 0 4134 && action.sa_handler != SIG_DFL 4135 && action.sa_handler != SIG_IGN) 4136 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4137 SIGSEGV)) { 4138 Signal signal(SIGSEGV, 4139 status == B_PERMISSION_DENIED 4140 ? SEGV_ACCERR : SEGV_MAPERR, 4141 EFAULT, thread->team->id); 4142 signal.SetAddress((void*)address); 4143 send_signal_to_thread(thread, signal, 0); 4144 } 4145 } 4146 } 4147 } 4148 4149 if (addressSpace != NULL) 4150 addressSpace->Put(); 4151 4152 return B_HANDLED_INTERRUPT; 4153 } 4154 4155 4156 struct PageFaultContext { 4157 AddressSpaceReadLocker addressSpaceLocker; 4158 VMCacheChainLocker cacheChainLocker; 4159 4160 VMTranslationMap* map; 4161 VMCache* topCache; 4162 off_t cacheOffset; 4163 vm_page_reservation reservation; 4164 bool isWrite; 4165 4166 // return values 4167 vm_page* page; 4168 bool restart; 4169 4170 4171 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4172 : 4173 addressSpaceLocker(addressSpace, true), 4174 map(addressSpace->TranslationMap()), 4175 isWrite(isWrite) 4176 { 4177 } 4178 4179 ~PageFaultContext() 4180 { 4181 UnlockAll(); 4182 vm_page_unreserve_pages(&reservation); 4183 } 4184 4185 void Prepare(VMCache* topCache, off_t cacheOffset) 4186 { 4187 this->topCache = topCache; 4188 this->cacheOffset = cacheOffset; 4189 page = NULL; 4190 restart = false; 4191 4192 cacheChainLocker.SetTo(topCache); 4193 } 4194 4195 void UnlockAll(VMCache* exceptCache = NULL) 4196 { 4197 topCache = NULL; 4198 addressSpaceLocker.Unlock(); 4199 cacheChainLocker.Unlock(exceptCache); 4200 } 4201 }; 4202 4203 4204 /*! Gets the page that should be mapped into the area. 4205 Returns an error code other than \c B_OK, if the page couldn't be found or 4206 paged in. The locking state of the address space and the caches is undefined 4207 in that case. 4208 Returns \c B_OK with \c context.restart set to \c true, if the functions 4209 had to unlock the address space and all caches and is supposed to be called 4210 again. 4211 Returns \c B_OK with \c context.restart set to \c false, if the page was 4212 found. It is returned in \c context.page. The address space will still be 4213 locked as well as all caches starting from the top cache to at least the 4214 cache the page lives in. 4215 */ 4216 static status_t 4217 fault_get_page(PageFaultContext& context) 4218 { 4219 VMCache* cache = context.topCache; 4220 VMCache* lastCache = NULL; 4221 vm_page* page = NULL; 4222 4223 while (cache != NULL) { 4224 // We already hold the lock of the cache at this point. 4225 4226 lastCache = cache; 4227 4228 page = cache->LookupPage(context.cacheOffset); 4229 if (page != NULL && page->busy) { 4230 // page must be busy -- wait for it to become unbusy 4231 context.UnlockAll(cache); 4232 cache->ReleaseRefLocked(); 4233 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4234 4235 // restart the whole process 4236 context.restart = true; 4237 return B_OK; 4238 } 4239 4240 if (page != NULL) 4241 break; 4242 4243 // The current cache does not contain the page we're looking for. 4244 4245 // see if the backing store has it 4246 if (cache->HasPage(context.cacheOffset)) { 4247 // insert a fresh page and mark it busy -- we're going to read it in 4248 page = vm_page_allocate_page(&context.reservation, 4249 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4250 cache->InsertPage(page, context.cacheOffset); 4251 4252 // We need to unlock all caches and the address space while reading 4253 // the page in. Keep a reference to the cache around. 4254 cache->AcquireRefLocked(); 4255 context.UnlockAll(); 4256 4257 // read the page in 4258 generic_io_vec vec; 4259 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4260 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4261 4262 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4263 B_PHYSICAL_IO_REQUEST, &bytesRead); 4264 4265 cache->Lock(); 4266 4267 if (status < B_OK) { 4268 // on error remove and free the page 4269 dprintf("reading page from cache %p returned: %s!\n", 4270 cache, strerror(status)); 4271 4272 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4273 cache->RemovePage(page); 4274 vm_page_set_state(page, PAGE_STATE_FREE); 4275 4276 cache->ReleaseRefAndUnlock(); 4277 return status; 4278 } 4279 4280 // mark the page unbusy again 4281 cache->MarkPageUnbusy(page); 4282 4283 DEBUG_PAGE_ACCESS_END(page); 4284 4285 // Since we needed to unlock everything temporarily, the area 4286 // situation might have changed. So we need to restart the whole 4287 // process. 4288 cache->ReleaseRefAndUnlock(); 4289 context.restart = true; 4290 return B_OK; 4291 } 4292 4293 cache = context.cacheChainLocker.LockSourceCache(); 4294 } 4295 4296 if (page == NULL) { 4297 // There was no adequate page, determine the cache for a clean one. 4298 // Read-only pages come in the deepest cache, only the top most cache 4299 // may have direct write access. 4300 cache = context.isWrite ? context.topCache : lastCache; 4301 4302 // allocate a clean page 4303 page = vm_page_allocate_page(&context.reservation, 4304 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4305 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4306 page->physical_page_number)); 4307 4308 // insert the new page into our cache 4309 cache->InsertPage(page, context.cacheOffset); 4310 } else if (page->Cache() != context.topCache && context.isWrite) { 4311 // We have a page that has the data we want, but in the wrong cache 4312 // object so we need to copy it and stick it into the top cache. 4313 vm_page* sourcePage = page; 4314 4315 // TODO: If memory is low, it might be a good idea to steal the page 4316 // from our source cache -- if possible, that is. 4317 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4318 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4319 4320 // To not needlessly kill concurrency we unlock all caches but the top 4321 // one while copying the page. Lacking another mechanism to ensure that 4322 // the source page doesn't disappear, we mark it busy. 4323 sourcePage->busy = true; 4324 context.cacheChainLocker.UnlockKeepRefs(true); 4325 4326 // copy the page 4327 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4328 sourcePage->physical_page_number * B_PAGE_SIZE); 4329 4330 context.cacheChainLocker.RelockCaches(true); 4331 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4332 4333 // insert the new page into our cache 4334 context.topCache->InsertPage(page, context.cacheOffset); 4335 } else 4336 DEBUG_PAGE_ACCESS_START(page); 4337 4338 context.page = page; 4339 return B_OK; 4340 } 4341 4342 4343 /*! Makes sure the address in the given address space is mapped. 4344 4345 \param addressSpace The address space. 4346 \param originalAddress The address. Doesn't need to be page aligned. 4347 \param isWrite If \c true the address shall be write-accessible. 4348 \param isUser If \c true the access is requested by a userland team. 4349 \param wirePage On success, if non \c NULL, the wired count of the page 4350 mapped at the given address is incremented and the page is returned 4351 via this parameter. 4352 \param wiredRange If given, this wiredRange is ignored when checking whether 4353 an already mapped page at the virtual address can be unmapped. 4354 \return \c B_OK on success, another error code otherwise. 4355 */ 4356 static status_t 4357 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4358 bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange) 4359 { 4360 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 4361 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 4362 4363 PageFaultContext context(addressSpace, isWrite); 4364 4365 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4366 status_t status = B_OK; 4367 4368 addressSpace->IncrementFaultCount(); 4369 4370 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4371 // the pages upfront makes sure we don't have any cache locked, so that the 4372 // page daemon/thief can do their job without problems. 4373 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4374 originalAddress); 4375 context.addressSpaceLocker.Unlock(); 4376 vm_page_reserve_pages(&context.reservation, reservePages, 4377 addressSpace == VMAddressSpace::Kernel() 4378 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4379 4380 while (true) { 4381 context.addressSpaceLocker.Lock(); 4382 4383 // get the area the fault was in 4384 VMArea* area = addressSpace->LookupArea(address); 4385 if (area == NULL) { 4386 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4387 "space\n", originalAddress); 4388 TPF(PageFaultError(-1, 4389 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4390 status = B_BAD_ADDRESS; 4391 break; 4392 } 4393 4394 // check permissions 4395 uint32 protection = get_area_page_protection(area, address); 4396 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4397 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 4398 (void*)originalAddress); 4399 TPF(PageFaultError(area->id, 4400 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4401 status = B_PERMISSION_DENIED; 4402 break; 4403 } 4404 if (isWrite && (protection 4405 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4406 dprintf("write access attempted on write-protected area 0x%lx at" 4407 " %p\n", area->id, (void*)originalAddress); 4408 TPF(PageFaultError(area->id, 4409 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4410 status = B_PERMISSION_DENIED; 4411 break; 4412 } else if (!isWrite && (protection 4413 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4414 dprintf("read access attempted on read-protected area 0x%lx at" 4415 " %p\n", area->id, (void*)originalAddress); 4416 TPF(PageFaultError(area->id, 4417 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4418 status = B_PERMISSION_DENIED; 4419 break; 4420 } 4421 4422 // We have the area, it was a valid access, so let's try to resolve the 4423 // page fault now. 4424 // At first, the top most cache from the area is investigated. 4425 4426 context.Prepare(vm_area_get_locked_cache(area), 4427 address - area->Base() + area->cache_offset); 4428 4429 // See if this cache has a fault handler -- this will do all the work 4430 // for us. 4431 { 4432 // Note, since the page fault is resolved with interrupts enabled, 4433 // the fault handler could be called more than once for the same 4434 // reason -- the store must take this into account. 4435 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4436 if (status != B_BAD_HANDLER) 4437 break; 4438 } 4439 4440 // The top most cache has no fault handler, so let's see if the cache or 4441 // its sources already have the page we're searching for (we're going 4442 // from top to bottom). 4443 status = fault_get_page(context); 4444 if (status != B_OK) { 4445 TPF(PageFaultError(area->id, status)); 4446 break; 4447 } 4448 4449 if (context.restart) 4450 continue; 4451 4452 // All went fine, all there is left to do is to map the page into the 4453 // address space. 4454 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4455 context.page)); 4456 4457 // If the page doesn't reside in the area's cache, we need to make sure 4458 // it's mapped in read-only, so that we cannot overwrite someone else's 4459 // data (copy-on-write) 4460 uint32 newProtection = protection; 4461 if (context.page->Cache() != context.topCache && !isWrite) 4462 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4463 4464 bool unmapPage = false; 4465 bool mapPage = true; 4466 4467 // check whether there's already a page mapped at the address 4468 context.map->Lock(); 4469 4470 phys_addr_t physicalAddress; 4471 uint32 flags; 4472 vm_page* mappedPage = NULL; 4473 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4474 && (flags & PAGE_PRESENT) != 0 4475 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4476 != NULL) { 4477 // Yep there's already a page. If it's ours, we can simply adjust 4478 // its protection. Otherwise we have to unmap it. 4479 if (mappedPage == context.page) { 4480 context.map->ProtectPage(area, address, newProtection); 4481 // Note: We assume that ProtectPage() is atomic (i.e. 4482 // the page isn't temporarily unmapped), otherwise we'd have 4483 // to make sure it isn't wired. 4484 mapPage = false; 4485 } else 4486 unmapPage = true; 4487 } 4488 4489 context.map->Unlock(); 4490 4491 if (unmapPage) { 4492 // If the page is wired, we can't unmap it. Wait until it is unwired 4493 // again and restart. 4494 VMAreaUnwiredWaiter waiter; 4495 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4496 wiredRange)) { 4497 // unlock everything and wait 4498 context.UnlockAll(); 4499 waiter.waitEntry.Wait(); 4500 continue; 4501 } 4502 4503 // Note: The mapped page is a page of a lower cache. We are 4504 // guaranteed to have that cached locked, our new page is a copy of 4505 // that page, and the page is not busy. The logic for that guarantee 4506 // is as follows: Since the page is mapped, it must live in the top 4507 // cache (ruled out above) or any of its lower caches, and there is 4508 // (was before the new page was inserted) no other page in any 4509 // cache between the top cache and the page's cache (otherwise that 4510 // would be mapped instead). That in turn means that our algorithm 4511 // must have found it and therefore it cannot be busy either. 4512 DEBUG_PAGE_ACCESS_START(mappedPage); 4513 unmap_page(area, address); 4514 DEBUG_PAGE_ACCESS_END(mappedPage); 4515 } 4516 4517 if (mapPage) { 4518 if (map_page(area, context.page, address, newProtection, 4519 &context.reservation) != B_OK) { 4520 // Mapping can only fail, when the page mapping object couldn't 4521 // be allocated. Save for the missing mapping everything is 4522 // fine, though. If this was a regular page fault, we'll simply 4523 // leave and probably fault again. To make sure we'll have more 4524 // luck then, we ensure that the minimum object reserve is 4525 // available. 4526 DEBUG_PAGE_ACCESS_END(context.page); 4527 4528 context.UnlockAll(); 4529 4530 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4531 != B_OK) { 4532 // Apparently the situation is serious. Let's get ourselves 4533 // killed. 4534 status = B_NO_MEMORY; 4535 } else if (wirePage != NULL) { 4536 // The caller expects us to wire the page. Since 4537 // object_cache_reserve() succeeded, we should now be able 4538 // to allocate a mapping structure. Restart. 4539 continue; 4540 } 4541 4542 break; 4543 } 4544 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4545 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4546 4547 // also wire the page, if requested 4548 if (wirePage != NULL && status == B_OK) { 4549 increment_page_wired_count(context.page); 4550 *wirePage = context.page; 4551 } 4552 4553 DEBUG_PAGE_ACCESS_END(context.page); 4554 4555 break; 4556 } 4557 4558 return status; 4559 } 4560 4561 4562 status_t 4563 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4564 { 4565 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4566 } 4567 4568 status_t 4569 vm_put_physical_page(addr_t vaddr, void* handle) 4570 { 4571 return sPhysicalPageMapper->PutPage(vaddr, handle); 4572 } 4573 4574 4575 status_t 4576 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4577 void** _handle) 4578 { 4579 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4580 } 4581 4582 status_t 4583 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4584 { 4585 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4586 } 4587 4588 4589 status_t 4590 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4591 { 4592 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4593 } 4594 4595 status_t 4596 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4597 { 4598 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4599 } 4600 4601 4602 void 4603 vm_get_info(system_memory_info* info) 4604 { 4605 swap_get_info(info); 4606 4607 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4608 info->page_faults = sPageFaults; 4609 4610 MutexLocker locker(sAvailableMemoryLock); 4611 info->free_memory = sAvailableMemory; 4612 info->needed_memory = sNeededMemory; 4613 } 4614 4615 4616 uint32 4617 vm_num_page_faults(void) 4618 { 4619 return sPageFaults; 4620 } 4621 4622 4623 off_t 4624 vm_available_memory(void) 4625 { 4626 MutexLocker locker(sAvailableMemoryLock); 4627 return sAvailableMemory; 4628 } 4629 4630 4631 off_t 4632 vm_available_not_needed_memory(void) 4633 { 4634 MutexLocker locker(sAvailableMemoryLock); 4635 return sAvailableMemory - sNeededMemory; 4636 } 4637 4638 4639 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4640 debugger. 4641 */ 4642 off_t 4643 vm_available_not_needed_memory_debug(void) 4644 { 4645 return sAvailableMemory - sNeededMemory; 4646 } 4647 4648 4649 size_t 4650 vm_kernel_address_space_left(void) 4651 { 4652 return VMAddressSpace::Kernel()->FreeSpace(); 4653 } 4654 4655 4656 void 4657 vm_unreserve_memory(size_t amount) 4658 { 4659 mutex_lock(&sAvailableMemoryLock); 4660 4661 sAvailableMemory += amount; 4662 4663 mutex_unlock(&sAvailableMemoryLock); 4664 } 4665 4666 4667 status_t 4668 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4669 { 4670 size_t reserve = kMemoryReserveForPriority[priority]; 4671 4672 MutexLocker locker(sAvailableMemoryLock); 4673 4674 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4675 4676 if (sAvailableMemory >= amount + reserve) { 4677 sAvailableMemory -= amount; 4678 return B_OK; 4679 } 4680 4681 if (timeout <= 0) 4682 return B_NO_MEMORY; 4683 4684 // turn timeout into an absolute timeout 4685 timeout += system_time(); 4686 4687 // loop until we've got the memory or the timeout occurs 4688 do { 4689 sNeededMemory += amount; 4690 4691 // call the low resource manager 4692 locker.Unlock(); 4693 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4694 B_ABSOLUTE_TIMEOUT, timeout); 4695 locker.Lock(); 4696 4697 sNeededMemory -= amount; 4698 4699 if (sAvailableMemory >= amount + reserve) { 4700 sAvailableMemory -= amount; 4701 return B_OK; 4702 } 4703 } while (timeout > system_time()); 4704 4705 return B_NO_MEMORY; 4706 } 4707 4708 4709 status_t 4710 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 4711 { 4712 // NOTE: The caller is responsible for synchronizing calls to this function! 4713 4714 AddressSpaceReadLocker locker; 4715 VMArea* area; 4716 status_t status = locker.SetFromArea(id, area); 4717 if (status != B_OK) 4718 return status; 4719 4720 // nothing to do, if the type doesn't change 4721 uint32 oldType = area->MemoryType(); 4722 if (type == oldType) 4723 return B_OK; 4724 4725 // set the memory type of the area and the mapped pages 4726 VMTranslationMap* map = area->address_space->TranslationMap(); 4727 map->Lock(); 4728 area->SetMemoryType(type); 4729 map->ProtectArea(area, area->protection); 4730 map->Unlock(); 4731 4732 // set the physical memory type 4733 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 4734 if (error != B_OK) { 4735 // reset the memory type of the area and the mapped pages 4736 map->Lock(); 4737 area->SetMemoryType(oldType); 4738 map->ProtectArea(area, area->protection); 4739 map->Unlock(); 4740 return error; 4741 } 4742 4743 return B_OK; 4744 4745 } 4746 4747 4748 /*! This function enforces some protection properties: 4749 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 4750 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4751 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4752 and B_KERNEL_WRITE_AREA. 4753 */ 4754 static void 4755 fix_protection(uint32* protection) 4756 { 4757 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4758 if ((*protection & B_USER_PROTECTION) == 0 4759 || (*protection & B_WRITE_AREA) != 0) 4760 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4761 else 4762 *protection |= B_KERNEL_READ_AREA; 4763 } 4764 } 4765 4766 4767 static void 4768 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4769 { 4770 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4771 info->area = area->id; 4772 info->address = (void*)area->Base(); 4773 info->size = area->Size(); 4774 info->protection = area->protection; 4775 info->lock = B_FULL_LOCK; 4776 info->team = area->address_space->ID(); 4777 info->copy_count = 0; 4778 info->in_count = 0; 4779 info->out_count = 0; 4780 // TODO: retrieve real values here! 4781 4782 VMCache* cache = vm_area_get_locked_cache(area); 4783 4784 // Note, this is a simplification; the cache could be larger than this area 4785 info->ram_size = cache->page_count * B_PAGE_SIZE; 4786 4787 vm_area_put_locked_cache(cache); 4788 } 4789 4790 4791 static status_t 4792 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4793 { 4794 // is newSize a multiple of B_PAGE_SIZE? 4795 if (newSize & (B_PAGE_SIZE - 1)) 4796 return B_BAD_VALUE; 4797 4798 // lock all affected address spaces and the cache 4799 VMArea* area; 4800 VMCache* cache; 4801 4802 MultiAddressSpaceLocker locker; 4803 AreaCacheLocker cacheLocker; 4804 4805 status_t status; 4806 size_t oldSize; 4807 bool anyKernelArea; 4808 bool restart; 4809 4810 do { 4811 anyKernelArea = false; 4812 restart = false; 4813 4814 locker.Unset(); 4815 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4816 if (status != B_OK) 4817 return status; 4818 cacheLocker.SetTo(cache, true); // already locked 4819 4820 // enforce restrictions 4821 if (!kernel) { 4822 if ((area->protection & B_KERNEL_AREA) != 0) 4823 return B_NOT_ALLOWED; 4824 // TODO: Enforce all restrictions (team, etc.)! 4825 } 4826 4827 oldSize = area->Size(); 4828 if (newSize == oldSize) 4829 return B_OK; 4830 4831 if (cache->type != CACHE_TYPE_RAM) 4832 return B_NOT_ALLOWED; 4833 4834 if (oldSize < newSize) { 4835 // We need to check if all areas of this cache can be resized. 4836 for (VMArea* current = cache->areas; current != NULL; 4837 current = current->cache_next) { 4838 if (!current->address_space->CanResizeArea(current, newSize)) 4839 return B_ERROR; 4840 anyKernelArea 4841 |= current->address_space == VMAddressSpace::Kernel(); 4842 } 4843 } else { 4844 // We're shrinking the areas, so we must make sure the affected 4845 // ranges are not wired. 4846 for (VMArea* current = cache->areas; current != NULL; 4847 current = current->cache_next) { 4848 anyKernelArea 4849 |= current->address_space == VMAddressSpace::Kernel(); 4850 4851 if (wait_if_area_range_is_wired(current, 4852 current->Base() + newSize, oldSize - newSize, &locker, 4853 &cacheLocker)) { 4854 restart = true; 4855 break; 4856 } 4857 } 4858 } 4859 } while (restart); 4860 4861 // Okay, looks good so far, so let's do it 4862 4863 int priority = kernel && anyKernelArea 4864 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4865 uint32 allocationFlags = kernel && anyKernelArea 4866 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4867 4868 if (oldSize < newSize) { 4869 // Growing the cache can fail, so we do it first. 4870 status = cache->Resize(cache->virtual_base + newSize, priority); 4871 if (status != B_OK) 4872 return status; 4873 } 4874 4875 for (VMArea* current = cache->areas; current != NULL; 4876 current = current->cache_next) { 4877 status = current->address_space->ResizeArea(current, newSize, 4878 allocationFlags); 4879 if (status != B_OK) 4880 break; 4881 4882 // We also need to unmap all pages beyond the new size, if the area has 4883 // shrunk 4884 if (newSize < oldSize) { 4885 VMCacheChainLocker cacheChainLocker(cache); 4886 cacheChainLocker.LockAllSourceCaches(); 4887 4888 unmap_pages(current, current->Base() + newSize, 4889 oldSize - newSize); 4890 4891 cacheChainLocker.Unlock(cache); 4892 } 4893 } 4894 4895 if (status == B_OK) { 4896 // Shrink or grow individual page protections if in use. 4897 if (area->page_protections != NULL) { 4898 uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2; 4899 uint8* newProtections 4900 = (uint8*)realloc(area->page_protections, bytes); 4901 if (newProtections == NULL) 4902 status = B_NO_MEMORY; 4903 else { 4904 area->page_protections = newProtections; 4905 4906 if (oldSize < newSize) { 4907 // init the additional page protections to that of the area 4908 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 4909 uint32 areaProtection = area->protection 4910 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 4911 memset(area->page_protections + offset, 4912 areaProtection | (areaProtection << 4), bytes - offset); 4913 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 4914 uint8& entry = area->page_protections[offset - 1]; 4915 entry = (entry & 0x0f) | (areaProtection << 4); 4916 } 4917 } 4918 } 4919 } 4920 } 4921 4922 // shrinking the cache can't fail, so we do it now 4923 if (status == B_OK && newSize < oldSize) 4924 status = cache->Resize(cache->virtual_base + newSize, priority); 4925 4926 if (status != B_OK) { 4927 // Something failed -- resize the areas back to their original size. 4928 // This can fail, too, in which case we're seriously screwed. 4929 for (VMArea* current = cache->areas; current != NULL; 4930 current = current->cache_next) { 4931 if (current->address_space->ResizeArea(current, oldSize, 4932 allocationFlags) != B_OK) { 4933 panic("vm_resize_area(): Failed and not being able to restore " 4934 "original state."); 4935 } 4936 } 4937 4938 cache->Resize(cache->virtual_base + oldSize, priority); 4939 } 4940 4941 // TODO: we must honour the lock restrictions of this area 4942 return status; 4943 } 4944 4945 4946 status_t 4947 vm_memset_physical(phys_addr_t address, int value, size_t length) 4948 { 4949 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4950 } 4951 4952 4953 status_t 4954 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 4955 { 4956 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4957 } 4958 4959 4960 status_t 4961 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 4962 bool user) 4963 { 4964 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4965 } 4966 4967 4968 void 4969 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 4970 { 4971 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4972 } 4973 4974 4975 /*! Copies a range of memory directly from/to a page that might not be mapped 4976 at the moment. 4977 4978 For \a unsafeMemory the current mapping (if any is ignored). The function 4979 walks through the respective area's cache chain to find the physical page 4980 and copies from/to it directly. 4981 The memory range starting at \a unsafeMemory with a length of \a size bytes 4982 must not cross a page boundary. 4983 4984 \param teamID The team ID identifying the address space \a unsafeMemory is 4985 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 4986 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 4987 is passed, the address space of the thread returned by 4988 debug_get_debugged_thread() is used. 4989 \param unsafeMemory The start of the unsafe memory range to be copied 4990 from/to. 4991 \param buffer A safely accessible kernel buffer to be copied from/to. 4992 \param size The number of bytes to be copied. 4993 \param copyToUnsafe If \c true, memory is copied from \a buffer to 4994 \a unsafeMemory, the other way around otherwise. 4995 */ 4996 status_t 4997 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 4998 size_t size, bool copyToUnsafe) 4999 { 5000 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5001 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5002 return B_BAD_VALUE; 5003 } 5004 5005 // get the address space for the debugged thread 5006 VMAddressSpace* addressSpace; 5007 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5008 addressSpace = VMAddressSpace::Kernel(); 5009 } else if (teamID == B_CURRENT_TEAM) { 5010 Thread* thread = debug_get_debugged_thread(); 5011 if (thread == NULL || thread->team == NULL) 5012 return B_BAD_ADDRESS; 5013 5014 addressSpace = thread->team->address_space; 5015 } else 5016 addressSpace = VMAddressSpace::DebugGet(teamID); 5017 5018 if (addressSpace == NULL) 5019 return B_BAD_ADDRESS; 5020 5021 // get the area 5022 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5023 if (area == NULL) 5024 return B_BAD_ADDRESS; 5025 5026 // search the page 5027 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5028 + area->cache_offset; 5029 VMCache* cache = area->cache; 5030 vm_page* page = NULL; 5031 while (cache != NULL) { 5032 page = cache->DebugLookupPage(cacheOffset); 5033 if (page != NULL) 5034 break; 5035 5036 // Page not found in this cache -- if it is paged out, we must not try 5037 // to get it from lower caches. 5038 if (cache->DebugHasPage(cacheOffset)) 5039 break; 5040 5041 cache = cache->source; 5042 } 5043 5044 if (page == NULL) 5045 return B_UNSUPPORTED; 5046 5047 // copy from/to physical memory 5048 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5049 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5050 5051 if (copyToUnsafe) { 5052 if (page->Cache() != area->cache) 5053 return B_UNSUPPORTED; 5054 5055 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5056 } 5057 5058 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5059 } 5060 5061 5062 // #pragma mark - kernel public API 5063 5064 5065 status_t 5066 user_memcpy(void* to, const void* from, size_t size) 5067 { 5068 // don't allow address overflows 5069 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5070 return B_BAD_ADDRESS; 5071 5072 if (arch_cpu_user_memcpy(to, from, size, 5073 &thread_get_current_thread()->fault_handler) < B_OK) 5074 return B_BAD_ADDRESS; 5075 5076 return B_OK; 5077 } 5078 5079 5080 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5081 the string in \a to, NULL-terminating the result. 5082 5083 \param to Pointer to the destination C-string. 5084 \param from Pointer to the source C-string. 5085 \param size Size in bytes of the string buffer pointed to by \a to. 5086 5087 \return strlen(\a from). 5088 */ 5089 ssize_t 5090 user_strlcpy(char* to, const char* from, size_t size) 5091 { 5092 if (to == NULL && size != 0) 5093 return B_BAD_VALUE; 5094 if (from == NULL) 5095 return B_BAD_ADDRESS; 5096 5097 // limit size to avoid address overflows 5098 size_t maxSize = std::min(size, 5099 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5100 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5101 // the source address might still overflow. 5102 5103 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5104 &thread_get_current_thread()->fault_handler); 5105 5106 // If we hit the address overflow boundary, fail. 5107 if (result >= 0 && (size_t)result >= maxSize && maxSize < size) 5108 return B_BAD_ADDRESS; 5109 5110 return result; 5111 } 5112 5113 5114 status_t 5115 user_memset(void* s, char c, size_t count) 5116 { 5117 // don't allow address overflows 5118 if ((addr_t)s + count < (addr_t)s) 5119 return B_BAD_ADDRESS; 5120 5121 if (arch_cpu_user_memset(s, c, count, 5122 &thread_get_current_thread()->fault_handler) < B_OK) 5123 return B_BAD_ADDRESS; 5124 5125 return B_OK; 5126 } 5127 5128 5129 /*! Wires a single page at the given address. 5130 5131 \param team The team whose address space the address belongs to. Supports 5132 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5133 parameter is ignored. 5134 \param address address The virtual address to wire down. Does not need to 5135 be page aligned. 5136 \param writable If \c true the page shall be writable. 5137 \param info On success the info is filled in, among other things 5138 containing the physical address the given virtual one translates to. 5139 \return \c B_OK, when the page could be wired, another error code otherwise. 5140 */ 5141 status_t 5142 vm_wire_page(team_id team, addr_t address, bool writable, 5143 VMPageWiringInfo* info) 5144 { 5145 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5146 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5147 5148 // compute the page protection that is required 5149 bool isUser = IS_USER_ADDRESS(address); 5150 uint32 requiredProtection = PAGE_PRESENT 5151 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5152 if (writable) 5153 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5154 5155 // get and read lock the address space 5156 VMAddressSpace* addressSpace = NULL; 5157 if (isUser) { 5158 if (team == B_CURRENT_TEAM) 5159 addressSpace = VMAddressSpace::GetCurrent(); 5160 else 5161 addressSpace = VMAddressSpace::Get(team); 5162 } else 5163 addressSpace = VMAddressSpace::GetKernel(); 5164 if (addressSpace == NULL) 5165 return B_ERROR; 5166 5167 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5168 5169 VMTranslationMap* map = addressSpace->TranslationMap(); 5170 status_t error = B_OK; 5171 5172 // get the area 5173 VMArea* area = addressSpace->LookupArea(pageAddress); 5174 if (area == NULL) { 5175 addressSpace->Put(); 5176 return B_BAD_ADDRESS; 5177 } 5178 5179 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5180 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5181 5182 // mark the area range wired 5183 area->Wire(&info->range); 5184 5185 // Lock the area's cache chain and the translation map. Needed to look 5186 // up the page and play with its wired count. 5187 cacheChainLocker.LockAllSourceCaches(); 5188 map->Lock(); 5189 5190 phys_addr_t physicalAddress; 5191 uint32 flags; 5192 vm_page* page; 5193 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5194 && (flags & requiredProtection) == requiredProtection 5195 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5196 != NULL) { 5197 // Already mapped with the correct permissions -- just increment 5198 // the page's wired count. 5199 increment_page_wired_count(page); 5200 5201 map->Unlock(); 5202 cacheChainLocker.Unlock(); 5203 addressSpaceLocker.Unlock(); 5204 } else { 5205 // Let vm_soft_fault() map the page for us, if possible. We need 5206 // to fully unlock to avoid deadlocks. Since we have already 5207 // wired the area itself, nothing disturbing will happen with it 5208 // in the meantime. 5209 map->Unlock(); 5210 cacheChainLocker.Unlock(); 5211 addressSpaceLocker.Unlock(); 5212 5213 error = vm_soft_fault(addressSpace, pageAddress, writable, isUser, 5214 &page, &info->range); 5215 5216 if (error != B_OK) { 5217 // The page could not be mapped -- clean up. 5218 VMCache* cache = vm_area_get_locked_cache(area); 5219 area->Unwire(&info->range); 5220 cache->ReleaseRefAndUnlock(); 5221 addressSpace->Put(); 5222 return error; 5223 } 5224 } 5225 5226 info->physicalAddress 5227 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5228 + address % B_PAGE_SIZE; 5229 info->page = page; 5230 5231 return B_OK; 5232 } 5233 5234 5235 /*! Unwires a single page previously wired via vm_wire_page(). 5236 5237 \param info The same object passed to vm_wire_page() before. 5238 */ 5239 void 5240 vm_unwire_page(VMPageWiringInfo* info) 5241 { 5242 // lock the address space 5243 VMArea* area = info->range.area; 5244 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5245 // takes over our reference 5246 5247 // lock the top cache 5248 VMCache* cache = vm_area_get_locked_cache(area); 5249 VMCacheChainLocker cacheChainLocker(cache); 5250 5251 if (info->page->Cache() != cache) { 5252 // The page is not in the top cache, so we lock the whole cache chain 5253 // before touching the page's wired count. 5254 cacheChainLocker.LockAllSourceCaches(); 5255 } 5256 5257 decrement_page_wired_count(info->page); 5258 5259 // remove the wired range from the range 5260 area->Unwire(&info->range); 5261 5262 cacheChainLocker.Unlock(); 5263 } 5264 5265 5266 /*! Wires down the given address range in the specified team's address space. 5267 5268 If successful the function 5269 - acquires a reference to the specified team's address space, 5270 - adds respective wired ranges to all areas that intersect with the given 5271 address range, 5272 - makes sure all pages in the given address range are mapped with the 5273 requested access permissions and increments their wired count. 5274 5275 It fails, when \a team doesn't specify a valid address space, when any part 5276 of the specified address range is not covered by areas, when the concerned 5277 areas don't allow mapping with the requested permissions, or when mapping 5278 failed for another reason. 5279 5280 When successful the call must be balanced by a unlock_memory_etc() call with 5281 the exact same parameters. 5282 5283 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5284 supported. 5285 \param address The start of the address range to be wired. 5286 \param numBytes The size of the address range to be wired. 5287 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5288 requests that the range must be wired writable ("read from device 5289 into memory"). 5290 \return \c B_OK on success, another error code otherwise. 5291 */ 5292 status_t 5293 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5294 { 5295 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5296 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5297 5298 // compute the page protection that is required 5299 bool isUser = IS_USER_ADDRESS(address); 5300 bool writable = (flags & B_READ_DEVICE) == 0; 5301 uint32 requiredProtection = PAGE_PRESENT 5302 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5303 if (writable) 5304 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5305 5306 uint32 mallocFlags = isUser 5307 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5308 5309 // get and read lock the address space 5310 VMAddressSpace* addressSpace = NULL; 5311 if (isUser) { 5312 if (team == B_CURRENT_TEAM) 5313 addressSpace = VMAddressSpace::GetCurrent(); 5314 else 5315 addressSpace = VMAddressSpace::Get(team); 5316 } else 5317 addressSpace = VMAddressSpace::GetKernel(); 5318 if (addressSpace == NULL) 5319 return B_ERROR; 5320 5321 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5322 5323 VMTranslationMap* map = addressSpace->TranslationMap(); 5324 status_t error = B_OK; 5325 5326 // iterate through all concerned areas 5327 addr_t nextAddress = lockBaseAddress; 5328 while (nextAddress != lockEndAddress) { 5329 // get the next area 5330 VMArea* area = addressSpace->LookupArea(nextAddress); 5331 if (area == NULL) { 5332 error = B_BAD_ADDRESS; 5333 break; 5334 } 5335 5336 addr_t areaStart = nextAddress; 5337 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5338 5339 // allocate the wired range (do that before locking the cache to avoid 5340 // deadlocks) 5341 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5342 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5343 if (range == NULL) { 5344 error = B_NO_MEMORY; 5345 break; 5346 } 5347 5348 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5349 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5350 5351 // mark the area range wired 5352 area->Wire(range); 5353 5354 // Depending on the area cache type and the wiring, we may not need to 5355 // look at the individual pages. 5356 if (area->cache_type == CACHE_TYPE_NULL 5357 || area->cache_type == CACHE_TYPE_DEVICE 5358 || area->wiring == B_FULL_LOCK 5359 || area->wiring == B_CONTIGUOUS) { 5360 nextAddress = areaEnd; 5361 continue; 5362 } 5363 5364 // Lock the area's cache chain and the translation map. Needed to look 5365 // up pages and play with their wired count. 5366 cacheChainLocker.LockAllSourceCaches(); 5367 map->Lock(); 5368 5369 // iterate through the pages and wire them 5370 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5371 phys_addr_t physicalAddress; 5372 uint32 flags; 5373 5374 vm_page* page; 5375 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5376 && (flags & requiredProtection) == requiredProtection 5377 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5378 != NULL) { 5379 // Already mapped with the correct permissions -- just increment 5380 // the page's wired count. 5381 increment_page_wired_count(page); 5382 } else { 5383 // Let vm_soft_fault() map the page for us, if possible. We need 5384 // to fully unlock to avoid deadlocks. Since we have already 5385 // wired the area itself, nothing disturbing will happen with it 5386 // in the meantime. 5387 map->Unlock(); 5388 cacheChainLocker.Unlock(); 5389 addressSpaceLocker.Unlock(); 5390 5391 error = vm_soft_fault(addressSpace, nextAddress, writable, 5392 isUser, &page, range); 5393 5394 addressSpaceLocker.Lock(); 5395 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5396 cacheChainLocker.LockAllSourceCaches(); 5397 map->Lock(); 5398 } 5399 5400 if (error != B_OK) 5401 break; 5402 } 5403 5404 map->Unlock(); 5405 5406 if (error == B_OK) { 5407 cacheChainLocker.Unlock(); 5408 } else { 5409 // An error occurred, so abort right here. If the current address 5410 // is the first in this area, unwire the area, since we won't get 5411 // to it when reverting what we've done so far. 5412 if (nextAddress == areaStart) { 5413 area->Unwire(range); 5414 cacheChainLocker.Unlock(); 5415 range->~VMAreaWiredRange(); 5416 free_etc(range, mallocFlags); 5417 } else 5418 cacheChainLocker.Unlock(); 5419 5420 break; 5421 } 5422 } 5423 5424 if (error != B_OK) { 5425 // An error occurred, so unwire all that we've already wired. Note that 5426 // even if not a single page was wired, unlock_memory_etc() is called 5427 // to put the address space reference. 5428 addressSpaceLocker.Unlock(); 5429 unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress, 5430 flags); 5431 } 5432 5433 return error; 5434 } 5435 5436 5437 status_t 5438 lock_memory(void* address, size_t numBytes, uint32 flags) 5439 { 5440 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5441 } 5442 5443 5444 /*! Unwires an address range previously wired with lock_memory_etc(). 5445 5446 Note that a call to this function must balance a previous lock_memory_etc() 5447 call with exactly the same parameters. 5448 */ 5449 status_t 5450 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5451 { 5452 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5453 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5454 5455 // compute the page protection that is required 5456 bool isUser = IS_USER_ADDRESS(address); 5457 bool writable = (flags & B_READ_DEVICE) == 0; 5458 uint32 requiredProtection = PAGE_PRESENT 5459 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5460 if (writable) 5461 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5462 5463 uint32 mallocFlags = isUser 5464 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5465 5466 // get and read lock the address space 5467 VMAddressSpace* addressSpace = NULL; 5468 if (isUser) { 5469 if (team == B_CURRENT_TEAM) 5470 addressSpace = VMAddressSpace::GetCurrent(); 5471 else 5472 addressSpace = VMAddressSpace::Get(team); 5473 } else 5474 addressSpace = VMAddressSpace::GetKernel(); 5475 if (addressSpace == NULL) 5476 return B_ERROR; 5477 5478 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5479 5480 VMTranslationMap* map = addressSpace->TranslationMap(); 5481 status_t error = B_OK; 5482 5483 // iterate through all concerned areas 5484 addr_t nextAddress = lockBaseAddress; 5485 while (nextAddress != lockEndAddress) { 5486 // get the next area 5487 VMArea* area = addressSpace->LookupArea(nextAddress); 5488 if (area == NULL) { 5489 error = B_BAD_ADDRESS; 5490 break; 5491 } 5492 5493 addr_t areaStart = nextAddress; 5494 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5495 5496 // Lock the area's top cache. This is a requirement for 5497 // VMArea::Unwire(). 5498 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5499 5500 // Depending on the area cache type and the wiring, we may not need to 5501 // look at the individual pages. 5502 if (area->cache_type == CACHE_TYPE_NULL 5503 || area->cache_type == CACHE_TYPE_DEVICE 5504 || area->wiring == B_FULL_LOCK 5505 || area->wiring == B_CONTIGUOUS) { 5506 // unwire the range (to avoid deadlocks we delete the range after 5507 // unlocking the cache) 5508 nextAddress = areaEnd; 5509 VMAreaWiredRange* range = area->Unwire(areaStart, 5510 areaEnd - areaStart, writable); 5511 cacheChainLocker.Unlock(); 5512 if (range != NULL) { 5513 range->~VMAreaWiredRange(); 5514 free_etc(range, mallocFlags); 5515 } 5516 continue; 5517 } 5518 5519 // Lock the area's cache chain and the translation map. Needed to look 5520 // up pages and play with their wired count. 5521 cacheChainLocker.LockAllSourceCaches(); 5522 map->Lock(); 5523 5524 // iterate through the pages and unwire them 5525 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5526 phys_addr_t physicalAddress; 5527 uint32 flags; 5528 5529 vm_page* page; 5530 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5531 && (flags & PAGE_PRESENT) != 0 5532 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5533 != NULL) { 5534 // Already mapped with the correct permissions -- just increment 5535 // the page's wired count. 5536 decrement_page_wired_count(page); 5537 } else { 5538 panic("unlock_memory_etc(): Failed to unwire page: address " 5539 "space %p, address: %#" B_PRIxADDR, addressSpace, 5540 nextAddress); 5541 error = B_BAD_VALUE; 5542 break; 5543 } 5544 } 5545 5546 map->Unlock(); 5547 5548 // All pages are unwired. Remove the area's wired range as well (to 5549 // avoid deadlocks we delete the range after unlocking the cache). 5550 VMAreaWiredRange* range = area->Unwire(areaStart, 5551 areaEnd - areaStart, writable); 5552 5553 cacheChainLocker.Unlock(); 5554 5555 if (range != NULL) { 5556 range->~VMAreaWiredRange(); 5557 free_etc(range, mallocFlags); 5558 } 5559 5560 if (error != B_OK) 5561 break; 5562 } 5563 5564 // get rid of the address space reference 5565 addressSpace->Put(); 5566 5567 return error; 5568 } 5569 5570 5571 status_t 5572 unlock_memory(void* address, size_t numBytes, uint32 flags) 5573 { 5574 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5575 } 5576 5577 5578 /*! Similar to get_memory_map(), but also allows to specify the address space 5579 for the memory in question and has a saner semantics. 5580 Returns \c B_OK when the complete range could be translated or 5581 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5582 case the actual number of entries is written to \c *_numEntries. Any other 5583 error case indicates complete failure; \c *_numEntries will be set to \c 0 5584 in this case. 5585 */ 5586 status_t 5587 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5588 physical_entry* table, uint32* _numEntries) 5589 { 5590 uint32 numEntries = *_numEntries; 5591 *_numEntries = 0; 5592 5593 VMAddressSpace* addressSpace; 5594 addr_t virtualAddress = (addr_t)address; 5595 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5596 phys_addr_t physicalAddress; 5597 status_t status = B_OK; 5598 int32 index = -1; 5599 addr_t offset = 0; 5600 bool interrupts = are_interrupts_enabled(); 5601 5602 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5603 address, numBytes, numEntries)); 5604 5605 if (numEntries == 0 || numBytes == 0) 5606 return B_BAD_VALUE; 5607 5608 // in which address space is the address to be found? 5609 if (IS_USER_ADDRESS(virtualAddress)) { 5610 if (team == B_CURRENT_TEAM) 5611 addressSpace = VMAddressSpace::GetCurrent(); 5612 else 5613 addressSpace = VMAddressSpace::Get(team); 5614 } else 5615 addressSpace = VMAddressSpace::GetKernel(); 5616 5617 if (addressSpace == NULL) 5618 return B_ERROR; 5619 5620 VMTranslationMap* map = addressSpace->TranslationMap(); 5621 5622 if (interrupts) 5623 map->Lock(); 5624 5625 while (offset < numBytes) { 5626 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5627 uint32 flags; 5628 5629 if (interrupts) { 5630 status = map->Query((addr_t)address + offset, &physicalAddress, 5631 &flags); 5632 } else { 5633 status = map->QueryInterrupt((addr_t)address + offset, 5634 &physicalAddress, &flags); 5635 } 5636 if (status < B_OK) 5637 break; 5638 if ((flags & PAGE_PRESENT) == 0) { 5639 panic("get_memory_map() called on unmapped memory!"); 5640 return B_BAD_ADDRESS; 5641 } 5642 5643 if (index < 0 && pageOffset > 0) { 5644 physicalAddress += pageOffset; 5645 if (bytes > B_PAGE_SIZE - pageOffset) 5646 bytes = B_PAGE_SIZE - pageOffset; 5647 } 5648 5649 // need to switch to the next physical_entry? 5650 if (index < 0 || table[index].address 5651 != physicalAddress - table[index].size) { 5652 if ((uint32)++index + 1 > numEntries) { 5653 // table to small 5654 break; 5655 } 5656 table[index].address = physicalAddress; 5657 table[index].size = bytes; 5658 } else { 5659 // page does fit in current entry 5660 table[index].size += bytes; 5661 } 5662 5663 offset += bytes; 5664 } 5665 5666 if (interrupts) 5667 map->Unlock(); 5668 5669 if (status != B_OK) 5670 return status; 5671 5672 if ((uint32)index + 1 > numEntries) { 5673 *_numEntries = index; 5674 return B_BUFFER_OVERFLOW; 5675 } 5676 5677 *_numEntries = index + 1; 5678 return B_OK; 5679 } 5680 5681 5682 /*! According to the BeBook, this function should always succeed. 5683 This is no longer the case. 5684 */ 5685 extern "C" int32 5686 __get_memory_map_haiku(const void* address, size_t numBytes, 5687 physical_entry* table, int32 numEntries) 5688 { 5689 uint32 entriesRead = numEntries; 5690 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5691 table, &entriesRead); 5692 if (error != B_OK) 5693 return error; 5694 5695 // close the entry list 5696 5697 // if it's only one entry, we will silently accept the missing ending 5698 if (numEntries == 1) 5699 return B_OK; 5700 5701 if (entriesRead + 1 > (uint32)numEntries) 5702 return B_BUFFER_OVERFLOW; 5703 5704 table[entriesRead].address = 0; 5705 table[entriesRead].size = 0; 5706 5707 return B_OK; 5708 } 5709 5710 5711 area_id 5712 area_for(void* address) 5713 { 5714 return vm_area_for((addr_t)address, true); 5715 } 5716 5717 5718 area_id 5719 find_area(const char* name) 5720 { 5721 return VMAreaHash::Find(name); 5722 } 5723 5724 5725 status_t 5726 _get_area_info(area_id id, area_info* info, size_t size) 5727 { 5728 if (size != sizeof(area_info) || info == NULL) 5729 return B_BAD_VALUE; 5730 5731 AddressSpaceReadLocker locker; 5732 VMArea* area; 5733 status_t status = locker.SetFromArea(id, area); 5734 if (status != B_OK) 5735 return status; 5736 5737 fill_area_info(area, info, size); 5738 return B_OK; 5739 } 5740 5741 5742 status_t 5743 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 5744 { 5745 addr_t nextBase = *(addr_t*)cookie; 5746 5747 // we're already through the list 5748 if (nextBase == (addr_t)-1) 5749 return B_ENTRY_NOT_FOUND; 5750 5751 if (team == B_CURRENT_TEAM) 5752 team = team_get_current_team_id(); 5753 5754 AddressSpaceReadLocker locker(team); 5755 if (!locker.IsLocked()) 5756 return B_BAD_TEAM_ID; 5757 5758 VMArea* area; 5759 for (VMAddressSpace::AreaIterator it 5760 = locker.AddressSpace()->GetAreaIterator(); 5761 (area = it.Next()) != NULL;) { 5762 if (area->Base() > nextBase) 5763 break; 5764 } 5765 5766 if (area == NULL) { 5767 nextBase = (addr_t)-1; 5768 return B_ENTRY_NOT_FOUND; 5769 } 5770 5771 fill_area_info(area, info, size); 5772 *cookie = (int32)(area->Base()); 5773 // TODO: Not 64 bit safe! 5774 5775 return B_OK; 5776 } 5777 5778 5779 status_t 5780 set_area_protection(area_id area, uint32 newProtection) 5781 { 5782 fix_protection(&newProtection); 5783 5784 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5785 newProtection, true); 5786 } 5787 5788 5789 status_t 5790 resize_area(area_id areaID, size_t newSize) 5791 { 5792 return vm_resize_area(areaID, newSize, true); 5793 } 5794 5795 5796 /*! Transfers the specified area to a new team. The caller must be the owner 5797 of the area. 5798 */ 5799 area_id 5800 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5801 bool kernel) 5802 { 5803 area_info info; 5804 status_t status = get_area_info(id, &info); 5805 if (status != B_OK) 5806 return status; 5807 5808 if (info.team != thread_get_current_thread()->team->id) 5809 return B_PERMISSION_DENIED; 5810 5811 area_id clonedArea = vm_clone_area(target, info.name, _address, 5812 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5813 if (clonedArea < 0) 5814 return clonedArea; 5815 5816 status = vm_delete_area(info.team, id, kernel); 5817 if (status != B_OK) { 5818 vm_delete_area(target, clonedArea, kernel); 5819 return status; 5820 } 5821 5822 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5823 5824 return clonedArea; 5825 } 5826 5827 5828 extern "C" area_id 5829 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 5830 size_t numBytes, uint32 addressSpec, uint32 protection, 5831 void** _virtualAddress) 5832 { 5833 if (!arch_vm_supports_protection(protection)) 5834 return B_NOT_SUPPORTED; 5835 5836 fix_protection(&protection); 5837 5838 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5839 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 5840 false); 5841 } 5842 5843 5844 area_id 5845 clone_area(const char* name, void** _address, uint32 addressSpec, 5846 uint32 protection, area_id source) 5847 { 5848 if ((protection & B_KERNEL_PROTECTION) == 0) 5849 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5850 5851 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5852 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5853 } 5854 5855 5856 area_id 5857 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock, 5858 uint32 protection, uint32 flags, 5859 const virtual_address_restrictions* virtualAddressRestrictions, 5860 const physical_address_restrictions* physicalAddressRestrictions, 5861 void** _address) 5862 { 5863 fix_protection(&protection); 5864 5865 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 5866 virtualAddressRestrictions, physicalAddressRestrictions, true, 5867 _address); 5868 } 5869 5870 5871 extern "C" area_id 5872 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 5873 size_t size, uint32 lock, uint32 protection) 5874 { 5875 fix_protection(&protection); 5876 5877 virtual_address_restrictions virtualRestrictions = {}; 5878 virtualRestrictions.address = *_address; 5879 virtualRestrictions.address_specification = addressSpec; 5880 physical_address_restrictions physicalRestrictions = {}; 5881 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 5882 lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true, 5883 _address); 5884 } 5885 5886 5887 status_t 5888 delete_area(area_id area) 5889 { 5890 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5891 } 5892 5893 5894 // #pragma mark - Userland syscalls 5895 5896 5897 status_t 5898 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5899 addr_t size) 5900 { 5901 // filter out some unavailable values (for userland) 5902 switch (addressSpec) { 5903 case B_ANY_KERNEL_ADDRESS: 5904 case B_ANY_KERNEL_BLOCK_ADDRESS: 5905 return B_BAD_VALUE; 5906 } 5907 5908 addr_t address; 5909 5910 if (!IS_USER_ADDRESS(userAddress) 5911 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5912 return B_BAD_ADDRESS; 5913 5914 status_t status = vm_reserve_address_range( 5915 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5916 RESERVED_AVOID_BASE); 5917 if (status != B_OK) 5918 return status; 5919 5920 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5921 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5922 (void*)address, size); 5923 return B_BAD_ADDRESS; 5924 } 5925 5926 return B_OK; 5927 } 5928 5929 5930 status_t 5931 _user_unreserve_address_range(addr_t address, addr_t size) 5932 { 5933 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5934 (void*)address, size); 5935 } 5936 5937 5938 area_id 5939 _user_area_for(void* address) 5940 { 5941 return vm_area_for((addr_t)address, false); 5942 } 5943 5944 5945 area_id 5946 _user_find_area(const char* userName) 5947 { 5948 char name[B_OS_NAME_LENGTH]; 5949 5950 if (!IS_USER_ADDRESS(userName) 5951 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5952 return B_BAD_ADDRESS; 5953 5954 return find_area(name); 5955 } 5956 5957 5958 status_t 5959 _user_get_area_info(area_id area, area_info* userInfo) 5960 { 5961 if (!IS_USER_ADDRESS(userInfo)) 5962 return B_BAD_ADDRESS; 5963 5964 area_info info; 5965 status_t status = get_area_info(area, &info); 5966 if (status < B_OK) 5967 return status; 5968 5969 // TODO: do we want to prevent userland from seeing kernel protections? 5970 //info.protection &= B_USER_PROTECTION; 5971 5972 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5973 return B_BAD_ADDRESS; 5974 5975 return status; 5976 } 5977 5978 5979 status_t 5980 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 5981 { 5982 int32 cookie; 5983 5984 if (!IS_USER_ADDRESS(userCookie) 5985 || !IS_USER_ADDRESS(userInfo) 5986 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 5987 return B_BAD_ADDRESS; 5988 5989 area_info info; 5990 status_t status = _get_next_area_info(team, &cookie, &info, 5991 sizeof(area_info)); 5992 if (status != B_OK) 5993 return status; 5994 5995 //info.protection &= B_USER_PROTECTION; 5996 5997 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 5998 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5999 return B_BAD_ADDRESS; 6000 6001 return status; 6002 } 6003 6004 6005 status_t 6006 _user_set_area_protection(area_id area, uint32 newProtection) 6007 { 6008 if ((newProtection & ~B_USER_PROTECTION) != 0) 6009 return B_BAD_VALUE; 6010 6011 fix_protection(&newProtection); 6012 6013 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6014 newProtection, false); 6015 } 6016 6017 6018 status_t 6019 _user_resize_area(area_id area, size_t newSize) 6020 { 6021 // TODO: Since we restrict deleting of areas to those owned by the team, 6022 // we should also do that for resizing (check other functions, too). 6023 return vm_resize_area(area, newSize, false); 6024 } 6025 6026 6027 area_id 6028 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6029 team_id target) 6030 { 6031 // filter out some unavailable values (for userland) 6032 switch (addressSpec) { 6033 case B_ANY_KERNEL_ADDRESS: 6034 case B_ANY_KERNEL_BLOCK_ADDRESS: 6035 return B_BAD_VALUE; 6036 } 6037 6038 void* address; 6039 if (!IS_USER_ADDRESS(userAddress) 6040 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6041 return B_BAD_ADDRESS; 6042 6043 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6044 if (newArea < B_OK) 6045 return newArea; 6046 6047 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6048 return B_BAD_ADDRESS; 6049 6050 return newArea; 6051 } 6052 6053 6054 area_id 6055 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6056 uint32 protection, area_id sourceArea) 6057 { 6058 char name[B_OS_NAME_LENGTH]; 6059 void* address; 6060 6061 // filter out some unavailable values (for userland) 6062 switch (addressSpec) { 6063 case B_ANY_KERNEL_ADDRESS: 6064 case B_ANY_KERNEL_BLOCK_ADDRESS: 6065 return B_BAD_VALUE; 6066 } 6067 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6068 return B_BAD_VALUE; 6069 6070 if (!IS_USER_ADDRESS(userName) 6071 || !IS_USER_ADDRESS(userAddress) 6072 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6073 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6074 return B_BAD_ADDRESS; 6075 6076 fix_protection(&protection); 6077 6078 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6079 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6080 false); 6081 if (clonedArea < B_OK) 6082 return clonedArea; 6083 6084 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6085 delete_area(clonedArea); 6086 return B_BAD_ADDRESS; 6087 } 6088 6089 return clonedArea; 6090 } 6091 6092 6093 area_id 6094 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6095 size_t size, uint32 lock, uint32 protection) 6096 { 6097 char name[B_OS_NAME_LENGTH]; 6098 void* address; 6099 6100 // filter out some unavailable values (for userland) 6101 switch (addressSpec) { 6102 case B_ANY_KERNEL_ADDRESS: 6103 case B_ANY_KERNEL_BLOCK_ADDRESS: 6104 return B_BAD_VALUE; 6105 } 6106 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6107 return B_BAD_VALUE; 6108 6109 if (!IS_USER_ADDRESS(userName) 6110 || !IS_USER_ADDRESS(userAddress) 6111 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6112 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6113 return B_BAD_ADDRESS; 6114 6115 if (addressSpec == B_EXACT_ADDRESS 6116 && IS_KERNEL_ADDRESS(address)) 6117 return B_BAD_VALUE; 6118 6119 fix_protection(&protection); 6120 6121 virtual_address_restrictions virtualRestrictions = {}; 6122 virtualRestrictions.address = address; 6123 virtualRestrictions.address_specification = addressSpec; 6124 physical_address_restrictions physicalRestrictions = {}; 6125 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6126 size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions, 6127 false, &address); 6128 6129 if (area >= B_OK 6130 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6131 delete_area(area); 6132 return B_BAD_ADDRESS; 6133 } 6134 6135 return area; 6136 } 6137 6138 6139 status_t 6140 _user_delete_area(area_id area) 6141 { 6142 // Unlike the BeOS implementation, you can now only delete areas 6143 // that you have created yourself from userland. 6144 // The documentation to delete_area() explicitly states that this 6145 // will be restricted in the future, and so it will. 6146 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6147 } 6148 6149 6150 // TODO: create a BeOS style call for this! 6151 6152 area_id 6153 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6154 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6155 int fd, off_t offset) 6156 { 6157 char name[B_OS_NAME_LENGTH]; 6158 void* address; 6159 area_id area; 6160 6161 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6162 return B_BAD_VALUE; 6163 6164 fix_protection(&protection); 6165 6166 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6167 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6168 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6169 return B_BAD_ADDRESS; 6170 6171 if (addressSpec == B_EXACT_ADDRESS) { 6172 if ((addr_t)address + size < (addr_t)address 6173 || (addr_t)address % B_PAGE_SIZE != 0) { 6174 return B_BAD_VALUE; 6175 } 6176 if (!IS_USER_ADDRESS(address) 6177 || !IS_USER_ADDRESS((addr_t)address + size)) { 6178 return B_BAD_ADDRESS; 6179 } 6180 } 6181 6182 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6183 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6184 false); 6185 if (area < B_OK) 6186 return area; 6187 6188 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6189 return B_BAD_ADDRESS; 6190 6191 return area; 6192 } 6193 6194 6195 status_t 6196 _user_unmap_memory(void* _address, size_t size) 6197 { 6198 addr_t address = (addr_t)_address; 6199 6200 // check params 6201 if (size == 0 || (addr_t)address + size < (addr_t)address 6202 || (addr_t)address % B_PAGE_SIZE != 0) { 6203 return B_BAD_VALUE; 6204 } 6205 6206 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6207 return B_BAD_ADDRESS; 6208 6209 // Write lock the address space and ensure the address range is not wired. 6210 AddressSpaceWriteLocker locker; 6211 do { 6212 status_t status = locker.SetTo(team_get_current_team_id()); 6213 if (status != B_OK) 6214 return status; 6215 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6216 size, &locker)); 6217 6218 // unmap 6219 return unmap_address_range(locker.AddressSpace(), address, size, false); 6220 } 6221 6222 6223 status_t 6224 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6225 { 6226 // check address range 6227 addr_t address = (addr_t)_address; 6228 size = PAGE_ALIGN(size); 6229 6230 if ((address % B_PAGE_SIZE) != 0) 6231 return B_BAD_VALUE; 6232 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6233 || !IS_USER_ADDRESS((addr_t)address + size)) { 6234 // weird error code required by POSIX 6235 return ENOMEM; 6236 } 6237 6238 // extend and check protection 6239 if ((protection & ~B_USER_PROTECTION) != 0) 6240 return B_BAD_VALUE; 6241 6242 fix_protection(&protection); 6243 6244 // We need to write lock the address space, since we're going to play with 6245 // the areas. Also make sure that none of the areas is wired and that we're 6246 // actually allowed to change the protection. 6247 AddressSpaceWriteLocker locker; 6248 6249 bool restart; 6250 do { 6251 restart = false; 6252 6253 status_t status = locker.SetTo(team_get_current_team_id()); 6254 if (status != B_OK) 6255 return status; 6256 6257 // First round: Check whether the whole range is covered by areas and we 6258 // are allowed to modify them. 6259 addr_t currentAddress = address; 6260 size_t sizeLeft = size; 6261 while (sizeLeft > 0) { 6262 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6263 if (area == NULL) 6264 return B_NO_MEMORY; 6265 6266 if ((area->protection & B_KERNEL_AREA) != 0) 6267 return B_NOT_ALLOWED; 6268 6269 // TODO: For (shared) mapped files we should check whether the new 6270 // protections are compatible with the file permissions. We don't 6271 // have a way to do that yet, though. 6272 6273 addr_t offset = currentAddress - area->Base(); 6274 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6275 6276 AreaCacheLocker cacheLocker(area); 6277 6278 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6279 &locker, &cacheLocker)) { 6280 restart = true; 6281 break; 6282 } 6283 6284 cacheLocker.Unlock(); 6285 6286 currentAddress += rangeSize; 6287 sizeLeft -= rangeSize; 6288 } 6289 } while (restart); 6290 6291 // Second round: If the protections differ from that of the area, create a 6292 // page protection array and re-map mapped pages. 6293 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6294 addr_t currentAddress = address; 6295 size_t sizeLeft = size; 6296 while (sizeLeft > 0) { 6297 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6298 if (area == NULL) 6299 return B_NO_MEMORY; 6300 6301 addr_t offset = currentAddress - area->Base(); 6302 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6303 6304 currentAddress += rangeSize; 6305 sizeLeft -= rangeSize; 6306 6307 if (area->page_protections == NULL) { 6308 if (area->protection == protection) 6309 continue; 6310 6311 status_t status = allocate_area_page_protections(area); 6312 if (status != B_OK) 6313 return status; 6314 } 6315 6316 // We need to lock the complete cache chain, since we potentially unmap 6317 // pages of lower caches. 6318 VMCache* topCache = vm_area_get_locked_cache(area); 6319 VMCacheChainLocker cacheChainLocker(topCache); 6320 cacheChainLocker.LockAllSourceCaches(); 6321 6322 for (addr_t pageAddress = area->Base() + offset; 6323 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6324 map->Lock(); 6325 6326 set_area_page_protection(area, pageAddress, protection); 6327 6328 phys_addr_t physicalAddress; 6329 uint32 flags; 6330 6331 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6332 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6333 map->Unlock(); 6334 continue; 6335 } 6336 6337 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6338 if (page == NULL) { 6339 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6340 "\n", area, physicalAddress); 6341 map->Unlock(); 6342 return B_ERROR; 6343 } 6344 6345 // If the page is not in the topmost cache and write access is 6346 // requested, we have to unmap it. Otherwise we can re-map it with 6347 // the new protection. 6348 bool unmapPage = page->Cache() != topCache 6349 && (protection & B_WRITE_AREA) != 0; 6350 6351 if (!unmapPage) 6352 map->ProtectPage(area, pageAddress, protection); 6353 6354 map->Unlock(); 6355 6356 if (unmapPage) { 6357 DEBUG_PAGE_ACCESS_START(page); 6358 unmap_page(area, pageAddress); 6359 DEBUG_PAGE_ACCESS_END(page); 6360 } 6361 } 6362 } 6363 6364 return B_OK; 6365 } 6366 6367 6368 status_t 6369 _user_sync_memory(void* _address, size_t size, uint32 flags) 6370 { 6371 addr_t address = (addr_t)_address; 6372 size = PAGE_ALIGN(size); 6373 6374 // check params 6375 if ((address % B_PAGE_SIZE) != 0) 6376 return B_BAD_VALUE; 6377 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6378 || !IS_USER_ADDRESS((addr_t)address + size)) { 6379 // weird error code required by POSIX 6380 return ENOMEM; 6381 } 6382 6383 bool writeSync = (flags & MS_SYNC) != 0; 6384 bool writeAsync = (flags & MS_ASYNC) != 0; 6385 if (writeSync && writeAsync) 6386 return B_BAD_VALUE; 6387 6388 if (size == 0 || (!writeSync && !writeAsync)) 6389 return B_OK; 6390 6391 // iterate through the range and sync all concerned areas 6392 while (size > 0) { 6393 // read lock the address space 6394 AddressSpaceReadLocker locker; 6395 status_t error = locker.SetTo(team_get_current_team_id()); 6396 if (error != B_OK) 6397 return error; 6398 6399 // get the first area 6400 VMArea* area = locker.AddressSpace()->LookupArea(address); 6401 if (area == NULL) 6402 return B_NO_MEMORY; 6403 6404 uint32 offset = address - area->Base(); 6405 size_t rangeSize = min_c(area->Size() - offset, size); 6406 offset += area->cache_offset; 6407 6408 // lock the cache 6409 AreaCacheLocker cacheLocker(area); 6410 if (!cacheLocker) 6411 return B_BAD_VALUE; 6412 VMCache* cache = area->cache; 6413 6414 locker.Unlock(); 6415 6416 uint32 firstPage = offset >> PAGE_SHIFT; 6417 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6418 6419 // write the pages 6420 if (cache->type == CACHE_TYPE_VNODE) { 6421 if (writeSync) { 6422 // synchronous 6423 error = vm_page_write_modified_page_range(cache, firstPage, 6424 endPage); 6425 if (error != B_OK) 6426 return error; 6427 } else { 6428 // asynchronous 6429 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6430 // TODO: This is probably not quite what is supposed to happen. 6431 // Especially when a lot has to be written, it might take ages 6432 // until it really hits the disk. 6433 } 6434 } 6435 6436 address += rangeSize; 6437 size -= rangeSize; 6438 } 6439 6440 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6441 // synchronize multiple mappings of the same file. In our VM they never get 6442 // out of sync, though, so we don't have to do anything. 6443 6444 return B_OK; 6445 } 6446 6447 6448 status_t 6449 _user_memory_advice(void* address, size_t size, uint32 advice) 6450 { 6451 // TODO: Implement! 6452 return B_OK; 6453 } 6454 6455 6456 status_t 6457 _user_get_memory_properties(team_id teamID, const void* address, 6458 uint32* _protected, uint32* _lock) 6459 { 6460 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6461 return B_BAD_ADDRESS; 6462 6463 AddressSpaceReadLocker locker; 6464 status_t error = locker.SetTo(teamID); 6465 if (error != B_OK) 6466 return error; 6467 6468 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6469 if (area == NULL) 6470 return B_NO_MEMORY; 6471 6472 6473 uint32 protection = area->protection; 6474 if (area->page_protections != NULL) 6475 protection = get_area_page_protection(area, (addr_t)address); 6476 6477 uint32 wiring = area->wiring; 6478 6479 locker.Unlock(); 6480 6481 error = user_memcpy(_protected, &protection, sizeof(protection)); 6482 if (error != B_OK) 6483 return error; 6484 6485 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6486 6487 return error; 6488 } 6489 6490 6491 // #pragma mark -- compatibility 6492 6493 6494 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6495 6496 6497 struct physical_entry_beos { 6498 uint32 address; 6499 uint32 size; 6500 }; 6501 6502 6503 /*! The physical_entry structure has changed. We need to translate it to the 6504 old one. 6505 */ 6506 extern "C" int32 6507 __get_memory_map_beos(const void* _address, size_t numBytes, 6508 physical_entry_beos* table, int32 numEntries) 6509 { 6510 if (numEntries <= 0) 6511 return B_BAD_VALUE; 6512 6513 const uint8* address = (const uint8*)_address; 6514 6515 int32 count = 0; 6516 while (numBytes > 0 && count < numEntries) { 6517 physical_entry entry; 6518 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 6519 if (result < 0) { 6520 if (result != B_BUFFER_OVERFLOW) 6521 return result; 6522 } 6523 6524 if (entry.address >= (phys_addr_t)1 << 32) { 6525 panic("get_memory_map(): Address is greater 4 GB!"); 6526 return B_ERROR; 6527 } 6528 6529 table[count].address = entry.address; 6530 table[count++].size = entry.size; 6531 6532 address += entry.size; 6533 numBytes -= entry.size; 6534 } 6535 6536 // null-terminate the table, if possible 6537 if (count < numEntries) { 6538 table[count].address = 0; 6539 table[count].size = 0; 6540 } 6541 6542 return B_OK; 6543 } 6544 6545 6546 /*! The type of the \a physicalAddress parameter has changed from void* to 6547 phys_addr_t. 6548 */ 6549 extern "C" area_id 6550 __map_physical_memory_beos(const char* name, void* physicalAddress, 6551 size_t numBytes, uint32 addressSpec, uint32 protection, 6552 void** _virtualAddress) 6553 { 6554 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 6555 addressSpec, protection, _virtualAddress); 6556 } 6557 6558 6559 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 6560 we meddle with the \a lock parameter to force 32 bit. 6561 */ 6562 extern "C" area_id 6563 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 6564 size_t size, uint32 lock, uint32 protection) 6565 { 6566 switch (lock) { 6567 case B_NO_LOCK: 6568 break; 6569 case B_FULL_LOCK: 6570 case B_LAZY_LOCK: 6571 lock = B_32_BIT_FULL_LOCK; 6572 break; 6573 case B_CONTIGUOUS: 6574 lock = B_32_BIT_CONTIGUOUS; 6575 break; 6576 } 6577 6578 return __create_area_haiku(name, _address, addressSpec, size, lock, 6579 protection); 6580 } 6581 6582 6583 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 6584 "BASE"); 6585 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 6586 "map_physical_memory@", "BASE"); 6587 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 6588 "BASE"); 6589 6590 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6591 "get_memory_map@@", "1_ALPHA3"); 6592 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6593 "map_physical_memory@@", "1_ALPHA3"); 6594 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6595 "1_ALPHA3"); 6596 6597 6598 #else 6599 6600 6601 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6602 "get_memory_map@@", "BASE"); 6603 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6604 "map_physical_memory@@", "BASE"); 6605 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6606 "BASE"); 6607 6608 6609 #endif // defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6610