1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <boot/elf.h> 31 #include <boot/stage2.h> 32 #include <condition_variable.h> 33 #include <console.h> 34 #include <debug.h> 35 #include <file_cache.h> 36 #include <fs/fd.h> 37 #include <heap.h> 38 #include <kernel.h> 39 #include <int.h> 40 #include <lock.h> 41 #include <low_resource_manager.h> 42 #include <slab/Slab.h> 43 #include <smp.h> 44 #include <system_info.h> 45 #include <thread.h> 46 #include <team.h> 47 #include <tracing.h> 48 #include <util/AutoLock.h> 49 #include <util/khash.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_priv.h> 52 #include <vm/VMAddressSpace.h> 53 #include <vm/VMArea.h> 54 #include <vm/VMCache.h> 55 56 #include "VMAddressSpaceLocking.h" 57 #include "VMAnonymousCache.h" 58 #include "IORequest.h" 59 60 61 //#define TRACE_VM 62 //#define TRACE_FAULTS 63 #ifdef TRACE_VM 64 # define TRACE(x) dprintf x 65 #else 66 # define TRACE(x) ; 67 #endif 68 #ifdef TRACE_FAULTS 69 # define FTRACE(x) dprintf x 70 #else 71 # define FTRACE(x) ; 72 #endif 73 74 75 class AreaCacheLocking { 76 public: 77 inline bool Lock(VMCache* lockable) 78 { 79 return false; 80 } 81 82 inline void Unlock(VMCache* lockable) 83 { 84 vm_area_put_locked_cache(lockable); 85 } 86 }; 87 88 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 89 public: 90 inline AreaCacheLocker(VMCache* cache = NULL) 91 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 92 { 93 } 94 95 inline AreaCacheLocker(VMArea* area) 96 : AutoLocker<VMCache, AreaCacheLocking>() 97 { 98 SetTo(area); 99 } 100 101 inline void SetTo(VMCache* cache, bool alreadyLocked) 102 { 103 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 104 } 105 106 inline void SetTo(VMArea* area) 107 { 108 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 109 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 110 } 111 }; 112 113 114 class VMCacheChainLocker { 115 public: 116 VMCacheChainLocker() 117 : 118 fTopCache(NULL), 119 fBottomCache(NULL) 120 { 121 } 122 123 VMCacheChainLocker(VMCache* topCache) 124 : 125 fTopCache(topCache), 126 fBottomCache(topCache) 127 { 128 } 129 130 ~VMCacheChainLocker() 131 { 132 Unlock(); 133 } 134 135 void SetTo(VMCache* topCache) 136 { 137 fTopCache = topCache; 138 fBottomCache = topCache; 139 140 if (topCache != NULL) 141 topCache->SetUserData(NULL); 142 } 143 144 VMCache* LockSourceCache() 145 { 146 if (fBottomCache == NULL || fBottomCache->source == NULL) 147 return NULL; 148 149 VMCache* previousCache = fBottomCache; 150 151 fBottomCache = fBottomCache->source; 152 fBottomCache->Lock(); 153 fBottomCache->AcquireRefLocked(); 154 fBottomCache->SetUserData(previousCache); 155 156 return fBottomCache; 157 } 158 159 void LockAllSourceCaches() 160 { 161 while (LockSourceCache() != NULL) { 162 } 163 } 164 165 void Unlock(VMCache* exceptCache = NULL) 166 { 167 if (fTopCache == NULL) 168 return; 169 170 // Unlock caches in source -> consumer direction. This is important to 171 // avoid double-locking and a reversal of locking order in case a cache 172 // is eligable for merging. 173 VMCache* cache = fBottomCache; 174 while (cache != NULL) { 175 VMCache* nextCache = (VMCache*)cache->UserData(); 176 if (cache != exceptCache) 177 cache->ReleaseRefAndUnlock(cache != fTopCache); 178 179 if (cache == fTopCache) 180 break; 181 182 cache = nextCache; 183 } 184 185 fTopCache = NULL; 186 fBottomCache = NULL; 187 } 188 189 void UnlockKeepRefs(bool keepTopCacheLocked) 190 { 191 if (fTopCache == NULL) 192 return; 193 194 VMCache* nextCache = fBottomCache; 195 VMCache* cache = NULL; 196 197 while (keepTopCacheLocked 198 ? nextCache != fTopCache : cache != fTopCache) { 199 cache = nextCache; 200 nextCache = (VMCache*)cache->UserData(); 201 cache->Unlock(cache != fTopCache); 202 } 203 } 204 205 void RelockCaches(bool topCacheLocked) 206 { 207 if (fTopCache == NULL) 208 return; 209 210 VMCache* nextCache = fTopCache; 211 VMCache* cache = NULL; 212 if (topCacheLocked) { 213 cache = nextCache; 214 nextCache = cache->source; 215 } 216 217 while (cache != fBottomCache && nextCache != NULL) { 218 VMCache* consumer = cache; 219 cache = nextCache; 220 nextCache = cache->source; 221 cache->Lock(); 222 cache->SetUserData(consumer); 223 } 224 } 225 226 private: 227 VMCache* fTopCache; 228 VMCache* fBottomCache; 229 }; 230 231 232 // The memory reserve an allocation of the certain priority must not touch. 233 static const size_t kMemoryReserveForPriority[] = { 234 VM_MEMORY_RESERVE_USER, // user 235 VM_MEMORY_RESERVE_SYSTEM, // system 236 0 // VIP 237 }; 238 239 240 ObjectCache* gPageMappingsObjectCache; 241 242 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 243 244 static off_t sAvailableMemory; 245 static off_t sNeededMemory; 246 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 247 static uint32 sPageFaults; 248 249 static VMPhysicalPageMapper* sPhysicalPageMapper; 250 251 #if DEBUG_CACHE_LIST 252 253 struct cache_info { 254 VMCache* cache; 255 addr_t page_count; 256 addr_t committed; 257 }; 258 259 static const int kCacheInfoTableCount = 100 * 1024; 260 static cache_info* sCacheInfoTable; 261 262 #endif // DEBUG_CACHE_LIST 263 264 265 // function declarations 266 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 267 bool addressSpaceCleanup); 268 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 269 bool isWrite, bool isUser, vm_page** wirePage, 270 VMAreaWiredRange* wiredRange = NULL); 271 static status_t map_backing_store(VMAddressSpace* addressSpace, 272 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 273 int protection, int mapping, uint32 flags, 274 const virtual_address_restrictions* addressRestrictions, bool kernel, 275 VMArea** _area, void** _virtualAddress); 276 277 278 // #pragma mark - 279 280 281 #if VM_PAGE_FAULT_TRACING 282 283 namespace VMPageFaultTracing { 284 285 class PageFaultStart : public AbstractTraceEntry { 286 public: 287 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 288 : 289 fAddress(address), 290 fPC(pc), 291 fWrite(write), 292 fUser(user) 293 { 294 Initialized(); 295 } 296 297 virtual void AddDump(TraceOutput& out) 298 { 299 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 300 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 301 } 302 303 private: 304 addr_t fAddress; 305 addr_t fPC; 306 bool fWrite; 307 bool fUser; 308 }; 309 310 311 // page fault errors 312 enum { 313 PAGE_FAULT_ERROR_NO_AREA = 0, 314 PAGE_FAULT_ERROR_KERNEL_ONLY, 315 PAGE_FAULT_ERROR_WRITE_PROTECTED, 316 PAGE_FAULT_ERROR_READ_PROTECTED, 317 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 318 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 319 }; 320 321 322 class PageFaultError : public AbstractTraceEntry { 323 public: 324 PageFaultError(area_id area, status_t error) 325 : 326 fArea(area), 327 fError(error) 328 { 329 Initialized(); 330 } 331 332 virtual void AddDump(TraceOutput& out) 333 { 334 switch (fError) { 335 case PAGE_FAULT_ERROR_NO_AREA: 336 out.Print("page fault error: no area"); 337 break; 338 case PAGE_FAULT_ERROR_KERNEL_ONLY: 339 out.Print("page fault error: area: %ld, kernel only", fArea); 340 break; 341 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 342 out.Print("page fault error: area: %ld, write protected", 343 fArea); 344 break; 345 case PAGE_FAULT_ERROR_READ_PROTECTED: 346 out.Print("page fault error: area: %ld, read protected", fArea); 347 break; 348 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 349 out.Print("page fault error: kernel touching bad user memory"); 350 break; 351 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 352 out.Print("page fault error: no address space"); 353 break; 354 default: 355 out.Print("page fault error: area: %ld, error: %s", fArea, 356 strerror(fError)); 357 break; 358 } 359 } 360 361 private: 362 area_id fArea; 363 status_t fError; 364 }; 365 366 367 class PageFaultDone : public AbstractTraceEntry { 368 public: 369 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 370 vm_page* page) 371 : 372 fArea(area), 373 fTopCache(topCache), 374 fCache(cache), 375 fPage(page) 376 { 377 Initialized(); 378 } 379 380 virtual void AddDump(TraceOutput& out) 381 { 382 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 383 "page: %p", fArea, fTopCache, fCache, fPage); 384 } 385 386 private: 387 area_id fArea; 388 VMCache* fTopCache; 389 VMCache* fCache; 390 vm_page* fPage; 391 }; 392 393 } // namespace VMPageFaultTracing 394 395 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 396 #else 397 # define TPF(x) ; 398 #endif // VM_PAGE_FAULT_TRACING 399 400 401 // #pragma mark - 402 403 404 /*! The page's cache must be locked. 405 */ 406 static inline void 407 increment_page_wired_count(vm_page* page) 408 { 409 if (!page->IsMapped()) 410 atomic_add(&gMappedPagesCount, 1); 411 page->IncrementWiredCount(); 412 } 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 decrement_page_wired_count(vm_page* page) 419 { 420 page->DecrementWiredCount(); 421 if (!page->IsMapped()) 422 atomic_add(&gMappedPagesCount, -1); 423 } 424 425 426 static inline addr_t 427 virtual_page_address(VMArea* area, vm_page* page) 428 { 429 return area->Base() 430 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 431 } 432 433 434 //! You need to have the address space locked when calling this function 435 static VMArea* 436 lookup_area(VMAddressSpace* addressSpace, area_id id) 437 { 438 VMAreaHash::ReadLock(); 439 440 VMArea* area = VMAreaHash::LookupLocked(id); 441 if (area != NULL && area->address_space != addressSpace) 442 area = NULL; 443 444 VMAreaHash::ReadUnlock(); 445 446 return area; 447 } 448 449 450 static status_t 451 allocate_area_page_protections(VMArea* area) 452 { 453 // In the page protections we store only the three user protections, 454 // so we use 4 bits per page. 455 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 456 area->page_protections = (uint8*)malloc_etc(bytes, 457 HEAP_DONT_LOCK_KERNEL_SPACE); 458 if (area->page_protections == NULL) 459 return B_NO_MEMORY; 460 461 // init the page protections for all pages to that of the area 462 uint32 areaProtection = area->protection 463 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 464 memset(area->page_protections, areaProtection | (areaProtection << 4), 465 bytes); 466 return B_OK; 467 } 468 469 470 static inline void 471 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 472 { 473 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 474 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 475 uint8& entry = area->page_protections[pageIndex / 2]; 476 if (pageIndex % 2 == 0) 477 entry = (entry & 0xf0) | protection; 478 else 479 entry = (entry & 0x0f) | (protection << 4); 480 } 481 482 483 static inline uint32 484 get_area_page_protection(VMArea* area, addr_t pageAddress) 485 { 486 if (area->page_protections == NULL) 487 return area->protection; 488 489 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 490 uint32 protection = area->page_protections[pageIndex / 2]; 491 if (pageIndex % 2 == 0) 492 protection &= 0x0f; 493 else 494 protection >>= 4; 495 496 // If this is a kernel area we translate the user flags to kernel flags. 497 if (area->address_space == VMAddressSpace::Kernel()) { 498 uint32 kernelProtection = 0; 499 if ((protection & B_READ_AREA) != 0) 500 kernelProtection |= B_KERNEL_READ_AREA; 501 if ((protection & B_WRITE_AREA) != 0) 502 kernelProtection |= B_KERNEL_WRITE_AREA; 503 504 return kernelProtection; 505 } 506 507 return protection | B_KERNEL_READ_AREA 508 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 509 } 510 511 512 /*! The caller must have reserved enough pages the translation map 513 implementation might need to map this page. 514 The page's cache must be locked. 515 */ 516 static status_t 517 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 518 vm_page_reservation* reservation) 519 { 520 VMTranslationMap* map = area->address_space->TranslationMap(); 521 522 bool wasMapped = page->IsMapped(); 523 524 if (area->wiring == B_NO_LOCK) { 525 DEBUG_PAGE_ACCESS_CHECK(page); 526 527 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 528 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 529 gPageMappingsObjectCache, 530 CACHE_DONT_WAIT_FOR_MEMORY 531 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 532 if (mapping == NULL) 533 return B_NO_MEMORY; 534 535 mapping->page = page; 536 mapping->area = area; 537 538 map->Lock(); 539 540 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 541 area->MemoryType(), reservation); 542 543 // insert mapping into lists 544 if (!page->IsMapped()) 545 atomic_add(&gMappedPagesCount, 1); 546 547 page->mappings.Add(mapping); 548 area->mappings.Add(mapping); 549 550 map->Unlock(); 551 } else { 552 DEBUG_PAGE_ACCESS_CHECK(page); 553 554 map->Lock(); 555 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 556 area->MemoryType(), reservation); 557 map->Unlock(); 558 559 increment_page_wired_count(page); 560 } 561 562 if (!wasMapped) { 563 // The page is mapped now, so we must not remain in the cached queue. 564 // It also makes sense to move it from the inactive to the active, since 565 // otherwise the page daemon wouldn't come to keep track of it (in idle 566 // mode) -- if the page isn't touched, it will be deactivated after a 567 // full iteration through the queue at the latest. 568 if (page->State() == PAGE_STATE_CACHED 569 || page->State() == PAGE_STATE_INACTIVE) { 570 vm_page_set_state(page, PAGE_STATE_ACTIVE); 571 } 572 } 573 574 return B_OK; 575 } 576 577 578 /*! If \a preserveModified is \c true, the caller must hold the lock of the 579 page's cache. 580 */ 581 static inline bool 582 unmap_page(VMArea* area, addr_t virtualAddress) 583 { 584 return area->address_space->TranslationMap()->UnmapPage(area, 585 virtualAddress, true); 586 } 587 588 589 /*! If \a preserveModified is \c true, the caller must hold the lock of all 590 mapped pages' caches. 591 */ 592 static inline void 593 unmap_pages(VMArea* area, addr_t base, size_t size) 594 { 595 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 596 } 597 598 599 /*! Cuts a piece out of an area. If the given cut range covers the complete 600 area, it is deleted. If it covers the beginning or the end, the area is 601 resized accordingly. If the range covers some part in the middle of the 602 area, it is split in two; in this case the second area is returned via 603 \a _secondArea (the variable is left untouched in the other cases). 604 The address space must be write locked. 605 The caller must ensure that no part of the given range is wired. 606 */ 607 static status_t 608 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 609 addr_t lastAddress, VMArea** _secondArea, bool kernel) 610 { 611 // Does the cut range intersect with the area at all? 612 addr_t areaLast = area->Base() + (area->Size() - 1); 613 if (area->Base() > lastAddress || areaLast < address) 614 return B_OK; 615 616 // Is the area fully covered? 617 if (area->Base() >= address && areaLast <= lastAddress) { 618 delete_area(addressSpace, area, false); 619 return B_OK; 620 } 621 622 int priority; 623 uint32 allocationFlags; 624 if (addressSpace == VMAddressSpace::Kernel()) { 625 priority = VM_PRIORITY_SYSTEM; 626 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 627 | HEAP_DONT_LOCK_KERNEL_SPACE; 628 } else { 629 priority = VM_PRIORITY_USER; 630 allocationFlags = 0; 631 } 632 633 VMCache* cache = vm_area_get_locked_cache(area); 634 VMCacheChainLocker cacheChainLocker(cache); 635 cacheChainLocker.LockAllSourceCaches(); 636 637 // Cut the end only? 638 if (areaLast <= lastAddress) { 639 size_t oldSize = area->Size(); 640 size_t newSize = address - area->Base(); 641 642 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 643 allocationFlags); 644 if (error != B_OK) 645 return error; 646 647 // unmap pages 648 unmap_pages(area, address, oldSize - newSize); 649 650 // If no one else uses the area's cache, we can resize it, too. 651 if (cache->areas == area && area->cache_next == NULL 652 && cache->consumers.IsEmpty() 653 && cache->type == CACHE_TYPE_RAM) { 654 // Since VMCache::Resize() can temporarily drop the lock, we must 655 // unlock all lower caches to prevent locking order inversion. 656 cacheChainLocker.Unlock(cache); 657 cache->Resize(cache->virtual_base + newSize, priority); 658 cache->ReleaseRefAndUnlock(); 659 } 660 661 return B_OK; 662 } 663 664 // Cut the beginning only? 665 if (area->Base() >= address) { 666 addr_t oldBase = area->Base(); 667 addr_t newBase = lastAddress + 1; 668 size_t newSize = areaLast - lastAddress; 669 670 // unmap pages 671 unmap_pages(area, oldBase, newBase - oldBase); 672 673 // resize the area 674 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 675 allocationFlags); 676 if (error != B_OK) 677 return error; 678 679 // TODO: If no one else uses the area's cache, we should resize it, too! 680 681 area->cache_offset += newBase - oldBase; 682 683 return B_OK; 684 } 685 686 // The tough part -- cut a piece out of the middle of the area. 687 // We do that by shrinking the area to the begin section and creating a 688 // new area for the end section. 689 690 addr_t firstNewSize = address - area->Base(); 691 addr_t secondBase = lastAddress + 1; 692 addr_t secondSize = areaLast - lastAddress; 693 694 // unmap pages 695 unmap_pages(area, address, area->Size() - firstNewSize); 696 697 // resize the area 698 addr_t oldSize = area->Size(); 699 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 700 allocationFlags); 701 if (error != B_OK) 702 return error; 703 704 // TODO: If no one else uses the area's cache, we might want to create a 705 // new cache for the second area, transfer the concerned pages from the 706 // first cache to it and resize the first cache. 707 708 // map the second area 709 virtual_address_restrictions addressRestrictions = {}; 710 addressRestrictions.address = (void*)secondBase; 711 addressRestrictions.address_specification = B_EXACT_ADDRESS; 712 VMArea* secondArea; 713 error = map_backing_store(addressSpace, cache, 714 area->cache_offset + (secondBase - area->Base()), area->name, 715 secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0, 716 &addressRestrictions, kernel, &secondArea, NULL); 717 if (error != B_OK) { 718 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 719 return error; 720 } 721 722 // We need a cache reference for the new area. 723 cache->AcquireRefLocked(); 724 725 if (_secondArea != NULL) 726 *_secondArea = secondArea; 727 728 return B_OK; 729 } 730 731 732 /*! Deletes all areas in the given address range. 733 The address space must be write-locked. 734 The caller must ensure that no part of the given range is wired. 735 */ 736 static status_t 737 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 738 bool kernel) 739 { 740 size = PAGE_ALIGN(size); 741 addr_t lastAddress = address + (size - 1); 742 743 // Check, whether the caller is allowed to modify the concerned areas. 744 if (!kernel) { 745 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 746 VMArea* area = it.Next();) { 747 addr_t areaLast = area->Base() + (area->Size() - 1); 748 if (area->Base() < lastAddress && address < areaLast) { 749 if ((area->protection & B_KERNEL_AREA) != 0) 750 return B_NOT_ALLOWED; 751 } 752 } 753 } 754 755 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 756 VMArea* area = it.Next();) { 757 addr_t areaLast = area->Base() + (area->Size() - 1); 758 if (area->Base() < lastAddress && address < areaLast) { 759 status_t error = cut_area(addressSpace, area, address, 760 lastAddress, NULL, kernel); 761 if (error != B_OK) 762 return error; 763 // Failing after already messing with areas is ugly, but we 764 // can't do anything about it. 765 } 766 } 767 768 return B_OK; 769 } 770 771 772 /*! You need to hold the lock of the cache and the write lock of the address 773 space when calling this function. 774 Note, that in case of error your cache will be temporarily unlocked. 775 If \a addressSpec is \c B_EXACT_ADDRESS and the 776 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 777 that no part of the specified address range (base \c *_virtualAddress, size 778 \a size) is wired. 779 */ 780 static status_t 781 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 782 const char* areaName, addr_t size, int wiring, int protection, int mapping, 783 uint32 flags, const virtual_address_restrictions* addressRestrictions, 784 bool kernel, VMArea** _area, void** _virtualAddress) 785 { 786 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, " 787 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 788 "'%s'\n", addressSpace, cache, addressRestrictions->address, offset, 789 size, addressRestrictions->address_specification, wiring, protection, 790 _area, areaName)); 791 cache->AssertLocked(); 792 793 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 794 | HEAP_DONT_LOCK_KERNEL_SPACE; 795 int priority; 796 if (addressSpace != VMAddressSpace::Kernel()) { 797 priority = VM_PRIORITY_USER; 798 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 799 priority = VM_PRIORITY_VIP; 800 allocationFlags |= HEAP_PRIORITY_VIP; 801 } else 802 priority = VM_PRIORITY_SYSTEM; 803 804 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 805 allocationFlags); 806 if (area == NULL) 807 return B_NO_MEMORY; 808 809 status_t status; 810 811 // if this is a private map, we need to create a new cache 812 // to handle the private copies of pages as they are written to 813 VMCache* sourceCache = cache; 814 if (mapping == REGION_PRIVATE_MAP) { 815 VMCache* newCache; 816 817 // create an anonymous cache 818 bool isStack = (protection & B_STACK_AREA) != 0; 819 status = VMCacheFactory::CreateAnonymousCache(newCache, 820 isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 821 isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER); 822 if (status != B_OK) 823 goto err1; 824 825 newCache->Lock(); 826 newCache->temporary = 1; 827 newCache->virtual_base = offset; 828 newCache->virtual_end = offset + size; 829 830 cache->AddConsumer(newCache); 831 832 cache = newCache; 833 } 834 835 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 836 status = cache->SetMinimalCommitment(size, priority); 837 if (status != B_OK) 838 goto err2; 839 } 840 841 // check to see if this address space has entered DELETE state 842 if (addressSpace->IsBeingDeleted()) { 843 // okay, someone is trying to delete this address space now, so we can't 844 // insert the area, so back out 845 status = B_BAD_TEAM_ID; 846 goto err2; 847 } 848 849 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 850 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 851 status = unmap_address_range(addressSpace, 852 (addr_t)addressRestrictions->address, size, kernel); 853 if (status != B_OK) 854 goto err2; 855 } 856 857 status = addressSpace->InsertArea(area, size, addressRestrictions, 858 allocationFlags, _virtualAddress); 859 if (status != B_OK) { 860 // TODO: wait and try again once this is working in the backend 861 #if 0 862 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 863 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 864 0, 0); 865 } 866 #endif 867 goto err2; 868 } 869 870 // attach the cache to the area 871 area->cache = cache; 872 area->cache_offset = offset; 873 874 // point the cache back to the area 875 cache->InsertAreaLocked(area); 876 if (mapping == REGION_PRIVATE_MAP) 877 cache->Unlock(); 878 879 // insert the area in the global area hash table 880 VMAreaHash::Insert(area); 881 882 // grab a ref to the address space (the area holds this) 883 addressSpace->Get(); 884 885 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 886 // cache, sourceCache, areaName, area); 887 888 *_area = area; 889 return B_OK; 890 891 err2: 892 if (mapping == REGION_PRIVATE_MAP) { 893 // We created this cache, so we must delete it again. Note, that we 894 // need to temporarily unlock the source cache or we'll otherwise 895 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 896 sourceCache->Unlock(); 897 cache->ReleaseRefAndUnlock(); 898 sourceCache->Lock(); 899 } 900 err1: 901 addressSpace->DeleteArea(area, allocationFlags); 902 return status; 903 } 904 905 906 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 907 locker1, locker2). 908 */ 909 template<typename LockerType1, typename LockerType2> 910 static inline bool 911 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 912 { 913 area->cache->AssertLocked(); 914 915 VMAreaUnwiredWaiter waiter; 916 if (!area->AddWaiterIfWired(&waiter)) 917 return false; 918 919 // unlock everything and wait 920 if (locker1 != NULL) 921 locker1->Unlock(); 922 if (locker2 != NULL) 923 locker2->Unlock(); 924 925 waiter.waitEntry.Wait(); 926 927 return true; 928 } 929 930 931 /*! Checks whether the given area has any wired ranges intersecting with the 932 specified range and waits, if so. 933 934 When it has to wait, the function calls \c Unlock() on both \a locker1 935 and \a locker2, if given. 936 The area's top cache must be locked and must be unlocked as a side effect 937 of calling \c Unlock() on either \a locker1 or \a locker2. 938 939 If the function does not have to wait it does not modify or unlock any 940 object. 941 942 \param area The area to be checked. 943 \param base The base address of the range to check. 944 \param size The size of the address range to check. 945 \param locker1 An object to be unlocked when before starting to wait (may 946 be \c NULL). 947 \param locker2 An object to be unlocked when before starting to wait (may 948 be \c NULL). 949 \return \c true, if the function had to wait, \c false otherwise. 950 */ 951 template<typename LockerType1, typename LockerType2> 952 static inline bool 953 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 954 LockerType1* locker1, LockerType2* locker2) 955 { 956 area->cache->AssertLocked(); 957 958 VMAreaUnwiredWaiter waiter; 959 if (!area->AddWaiterIfWired(&waiter, base, size)) 960 return false; 961 962 // unlock everything and wait 963 if (locker1 != NULL) 964 locker1->Unlock(); 965 if (locker2 != NULL) 966 locker2->Unlock(); 967 968 waiter.waitEntry.Wait(); 969 970 return true; 971 } 972 973 974 /*! Checks whether the given address space has any wired ranges intersecting 975 with the specified range and waits, if so. 976 977 Similar to wait_if_area_range_is_wired(), with the following differences: 978 - All areas intersecting with the range are checked (respectively all until 979 one is found that contains a wired range intersecting with the given 980 range). 981 - The given address space must at least be read-locked and must be unlocked 982 when \c Unlock() is called on \a locker. 983 - None of the areas' caches are allowed to be locked. 984 */ 985 template<typename LockerType> 986 static inline bool 987 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 988 size_t size, LockerType* locker) 989 { 990 addr_t end = base + size - 1; 991 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 992 VMArea* area = it.Next();) { 993 // TODO: Introduce a VMAddressSpace method to get a close iterator! 994 if (area->Base() > end) 995 return false; 996 997 if (base >= area->Base() + area->Size() - 1) 998 continue; 999 1000 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1001 1002 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1003 return true; 1004 } 1005 1006 return false; 1007 } 1008 1009 1010 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1011 It must be called in a situation where the kernel address space may be 1012 locked. 1013 */ 1014 status_t 1015 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1016 { 1017 AddressSpaceReadLocker locker; 1018 VMArea* area; 1019 status_t status = locker.SetFromArea(id, area); 1020 if (status != B_OK) 1021 return status; 1022 1023 if (area->page_protections == NULL) { 1024 status = allocate_area_page_protections(area); 1025 if (status != B_OK) 1026 return status; 1027 } 1028 1029 *cookie = (void*)area; 1030 return B_OK; 1031 } 1032 1033 1034 /*! This is a debug helper function that can only be used with very specific 1035 use cases. 1036 Sets protection for the given address range to the protection specified. 1037 If \a protection is 0 then the involved pages will be marked non-present 1038 in the translation map to cause a fault on access. The pages aren't 1039 actually unmapped however so that they can be marked present again with 1040 additional calls to this function. For this to work the area must be 1041 fully locked in memory so that the pages aren't otherwise touched. 1042 This function does not lock the kernel address space and needs to be 1043 supplied with a \a cookie retrieved from a successful call to 1044 vm_prepare_kernel_area_debug_protection(). 1045 */ 1046 status_t 1047 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1048 uint32 protection) 1049 { 1050 // check address range 1051 addr_t address = (addr_t)_address; 1052 size = PAGE_ALIGN(size); 1053 1054 if ((address % B_PAGE_SIZE) != 0 1055 || (addr_t)address + size < (addr_t)address 1056 || !IS_KERNEL_ADDRESS(address) 1057 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1058 return B_BAD_VALUE; 1059 } 1060 1061 // Translate the kernel protection to user protection as we only store that. 1062 if ((protection & B_KERNEL_READ_AREA) != 0) 1063 protection |= B_READ_AREA; 1064 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1065 protection |= B_WRITE_AREA; 1066 1067 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1068 VMTranslationMap* map = addressSpace->TranslationMap(); 1069 VMArea* area = (VMArea*)cookie; 1070 1071 addr_t offset = address - area->Base(); 1072 if (area->Size() - offset < size) { 1073 panic("protect range not fully within supplied area"); 1074 return B_BAD_VALUE; 1075 } 1076 1077 if (area->page_protections == NULL) { 1078 panic("area has no page protections"); 1079 return B_BAD_VALUE; 1080 } 1081 1082 // Invalidate the mapping entries so any access to them will fault or 1083 // restore the mapping entries unchanged so that lookup will success again. 1084 map->Lock(); 1085 map->DebugMarkRangePresent(address, address + size, protection != 0); 1086 map->Unlock(); 1087 1088 // And set the proper page protections so that the fault case will actually 1089 // fail and not simply try to map a new page. 1090 for (addr_t pageAddress = address; pageAddress < address + size; 1091 pageAddress += B_PAGE_SIZE) { 1092 set_area_page_protection(area, pageAddress, protection); 1093 } 1094 1095 return B_OK; 1096 } 1097 1098 1099 status_t 1100 vm_block_address_range(const char* name, void* address, addr_t size) 1101 { 1102 if (!arch_vm_supports_protection(0)) 1103 return B_NOT_SUPPORTED; 1104 1105 AddressSpaceWriteLocker locker; 1106 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1107 if (status != B_OK) 1108 return status; 1109 1110 VMAddressSpace* addressSpace = locker.AddressSpace(); 1111 1112 // create an anonymous cache 1113 VMCache* cache; 1114 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1115 VM_PRIORITY_SYSTEM); 1116 if (status != B_OK) 1117 return status; 1118 1119 cache->temporary = 1; 1120 cache->virtual_end = size; 1121 cache->Lock(); 1122 1123 VMArea* area; 1124 virtual_address_restrictions addressRestrictions = {}; 1125 addressRestrictions.address = address; 1126 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1127 status = map_backing_store(addressSpace, cache, 0, name, size, 1128 B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0, 1129 &addressRestrictions, true, &area, NULL); 1130 if (status != B_OK) { 1131 cache->ReleaseRefAndUnlock(); 1132 return status; 1133 } 1134 1135 cache->Unlock(); 1136 area->cache_type = CACHE_TYPE_RAM; 1137 return area->id; 1138 } 1139 1140 1141 status_t 1142 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1143 { 1144 AddressSpaceWriteLocker locker(team); 1145 if (!locker.IsLocked()) 1146 return B_BAD_TEAM_ID; 1147 1148 VMAddressSpace* addressSpace = locker.AddressSpace(); 1149 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1150 addressSpace == VMAddressSpace::Kernel() 1151 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1152 } 1153 1154 1155 status_t 1156 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1157 addr_t size, uint32 flags) 1158 { 1159 if (size == 0) 1160 return B_BAD_VALUE; 1161 1162 AddressSpaceWriteLocker locker(team); 1163 if (!locker.IsLocked()) 1164 return B_BAD_TEAM_ID; 1165 1166 virtual_address_restrictions addressRestrictions = {}; 1167 addressRestrictions.address = *_address; 1168 addressRestrictions.address_specification = addressSpec; 1169 VMAddressSpace* addressSpace = locker.AddressSpace(); 1170 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1171 addressSpace == VMAddressSpace::Kernel() 1172 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1173 _address); 1174 } 1175 1176 1177 area_id 1178 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1179 uint32 wiring, uint32 protection, uint32 flags, 1180 const virtual_address_restrictions* virtualAddressRestrictions, 1181 const physical_address_restrictions* physicalAddressRestrictions, 1182 bool kernel, void** _address) 1183 { 1184 VMArea* area; 1185 VMCache* cache; 1186 vm_page* page = NULL; 1187 bool isStack = (protection & B_STACK_AREA) != 0; 1188 page_num_t guardPages; 1189 bool canOvercommit = false; 1190 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1191 ? VM_PAGE_ALLOC_CLEAR : 0; 1192 1193 TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size)); 1194 1195 size = PAGE_ALIGN(size); 1196 1197 if (size == 0) 1198 return B_BAD_VALUE; 1199 if (!arch_vm_supports_protection(protection)) 1200 return B_NOT_SUPPORTED; 1201 1202 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1203 canOvercommit = true; 1204 1205 #ifdef DEBUG_KERNEL_STACKS 1206 if ((protection & B_KERNEL_STACK_AREA) != 0) 1207 isStack = true; 1208 #endif 1209 1210 // check parameters 1211 switch (virtualAddressRestrictions->address_specification) { 1212 case B_ANY_ADDRESS: 1213 case B_EXACT_ADDRESS: 1214 case B_BASE_ADDRESS: 1215 case B_ANY_KERNEL_ADDRESS: 1216 case B_ANY_KERNEL_BLOCK_ADDRESS: 1217 break; 1218 1219 default: 1220 return B_BAD_VALUE; 1221 } 1222 1223 // If low or high physical address restrictions are given, we force 1224 // B_CONTIGUOUS wiring, since only then we'll use 1225 // vm_page_allocate_page_run() which deals with those restrictions. 1226 if (physicalAddressRestrictions->low_address != 0 1227 || physicalAddressRestrictions->high_address != 0) { 1228 wiring = B_CONTIGUOUS; 1229 } 1230 1231 physical_address_restrictions stackPhysicalRestrictions; 1232 bool doReserveMemory = false; 1233 switch (wiring) { 1234 case B_NO_LOCK: 1235 break; 1236 case B_FULL_LOCK: 1237 case B_LAZY_LOCK: 1238 case B_CONTIGUOUS: 1239 doReserveMemory = true; 1240 break; 1241 case B_ALREADY_WIRED: 1242 break; 1243 case B_LOMEM: 1244 stackPhysicalRestrictions = *physicalAddressRestrictions; 1245 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1246 physicalAddressRestrictions = &stackPhysicalRestrictions; 1247 wiring = B_CONTIGUOUS; 1248 doReserveMemory = true; 1249 break; 1250 case B_32_BIT_FULL_LOCK: 1251 if (B_HAIKU_PHYSICAL_BITS <= 32 1252 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1253 wiring = B_FULL_LOCK; 1254 doReserveMemory = true; 1255 break; 1256 } 1257 // TODO: We don't really support this mode efficiently. Just fall 1258 // through for now ... 1259 case B_32_BIT_CONTIGUOUS: 1260 #if B_HAIKU_PHYSICAL_BITS > 32 1261 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1262 stackPhysicalRestrictions = *physicalAddressRestrictions; 1263 stackPhysicalRestrictions.high_address 1264 = (phys_addr_t)1 << 32; 1265 physicalAddressRestrictions = &stackPhysicalRestrictions; 1266 } 1267 #endif 1268 wiring = B_CONTIGUOUS; 1269 doReserveMemory = true; 1270 break; 1271 default: 1272 return B_BAD_VALUE; 1273 } 1274 1275 // Optimization: For a single-page contiguous allocation without low/high 1276 // memory restriction B_FULL_LOCK wiring suffices. 1277 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1278 && physicalAddressRestrictions->low_address == 0 1279 && physicalAddressRestrictions->high_address == 0) { 1280 wiring = B_FULL_LOCK; 1281 } 1282 1283 // For full lock or contiguous areas we're also going to map the pages and 1284 // thus need to reserve pages for the mapping backend upfront. 1285 addr_t reservedMapPages = 0; 1286 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1287 AddressSpaceWriteLocker locker; 1288 status_t status = locker.SetTo(team); 1289 if (status != B_OK) 1290 return status; 1291 1292 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1293 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1294 } 1295 1296 int priority; 1297 if (team != VMAddressSpace::KernelID()) 1298 priority = VM_PRIORITY_USER; 1299 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1300 priority = VM_PRIORITY_VIP; 1301 else 1302 priority = VM_PRIORITY_SYSTEM; 1303 1304 // Reserve memory before acquiring the address space lock. This reduces the 1305 // chances of failure, since while holding the write lock to the address 1306 // space (if it is the kernel address space that is), the low memory handler 1307 // won't be able to free anything for us. 1308 addr_t reservedMemory = 0; 1309 if (doReserveMemory) { 1310 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1311 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1312 return B_NO_MEMORY; 1313 reservedMemory = size; 1314 // TODO: We don't reserve the memory for the pages for the page 1315 // directories/tables. We actually need to do since we currently don't 1316 // reclaim them (and probably can't reclaim all of them anyway). Thus 1317 // there are actually less physical pages than there should be, which 1318 // can get the VM into trouble in low memory situations. 1319 } 1320 1321 AddressSpaceWriteLocker locker; 1322 VMAddressSpace* addressSpace; 1323 status_t status; 1324 1325 // For full lock areas reserve the pages before locking the address 1326 // space. E.g. block caches can't release their memory while we hold the 1327 // address space lock. 1328 page_num_t reservedPages = reservedMapPages; 1329 if (wiring == B_FULL_LOCK) 1330 reservedPages += size / B_PAGE_SIZE; 1331 1332 vm_page_reservation reservation; 1333 if (reservedPages > 0) { 1334 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1335 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1336 priority)) { 1337 reservedPages = 0; 1338 status = B_WOULD_BLOCK; 1339 goto err0; 1340 } 1341 } else 1342 vm_page_reserve_pages(&reservation, reservedPages, priority); 1343 } 1344 1345 if (wiring == B_CONTIGUOUS) { 1346 // we try to allocate the page run here upfront as this may easily 1347 // fail for obvious reasons 1348 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1349 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1350 if (page == NULL) { 1351 status = B_NO_MEMORY; 1352 goto err0; 1353 } 1354 } 1355 1356 // Lock the address space and, if B_EXACT_ADDRESS and 1357 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1358 // is not wired. 1359 do { 1360 status = locker.SetTo(team); 1361 if (status != B_OK) 1362 goto err1; 1363 1364 addressSpace = locker.AddressSpace(); 1365 } while (virtualAddressRestrictions->address_specification 1366 == B_EXACT_ADDRESS 1367 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1368 && wait_if_address_range_is_wired(addressSpace, 1369 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1370 1371 // create an anonymous cache 1372 // if it's a stack, make sure that two pages are available at least 1373 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1374 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1375 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1376 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1377 wiring == B_NO_LOCK, priority); 1378 if (status != B_OK) 1379 goto err1; 1380 1381 cache->temporary = 1; 1382 cache->virtual_end = size; 1383 cache->committed_size = reservedMemory; 1384 // TODO: This should be done via a method. 1385 reservedMemory = 0; 1386 1387 cache->Lock(); 1388 1389 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1390 protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions, 1391 kernel, &area, _address); 1392 1393 if (status != B_OK) { 1394 cache->ReleaseRefAndUnlock(); 1395 goto err1; 1396 } 1397 1398 locker.DegradeToReadLock(); 1399 1400 switch (wiring) { 1401 case B_NO_LOCK: 1402 case B_LAZY_LOCK: 1403 // do nothing - the pages are mapped in as needed 1404 break; 1405 1406 case B_FULL_LOCK: 1407 { 1408 // Allocate and map all pages for this area 1409 1410 off_t offset = 0; 1411 for (addr_t address = area->Base(); 1412 address < area->Base() + (area->Size() - 1); 1413 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1414 #ifdef DEBUG_KERNEL_STACKS 1415 # ifdef STACK_GROWS_DOWNWARDS 1416 if (isStack && address < area->Base() 1417 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1418 # else 1419 if (isStack && address >= area->Base() + area->Size() 1420 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1421 # endif 1422 continue; 1423 #endif 1424 vm_page* page = vm_page_allocate_page(&reservation, 1425 PAGE_STATE_WIRED | pageAllocFlags); 1426 cache->InsertPage(page, offset); 1427 map_page(area, page, address, protection, &reservation); 1428 1429 DEBUG_PAGE_ACCESS_END(page); 1430 } 1431 1432 break; 1433 } 1434 1435 case B_ALREADY_WIRED: 1436 { 1437 // The pages should already be mapped. This is only really useful 1438 // during boot time. Find the appropriate vm_page objects and stick 1439 // them in the cache object. 1440 VMTranslationMap* map = addressSpace->TranslationMap(); 1441 off_t offset = 0; 1442 1443 if (!gKernelStartup) 1444 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1445 1446 map->Lock(); 1447 1448 for (addr_t virtualAddress = area->Base(); 1449 virtualAddress < area->Base() + (area->Size() - 1); 1450 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1451 phys_addr_t physicalAddress; 1452 uint32 flags; 1453 status = map->Query(virtualAddress, &physicalAddress, &flags); 1454 if (status < B_OK) { 1455 panic("looking up mapping failed for va 0x%lx\n", 1456 virtualAddress); 1457 } 1458 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1459 if (page == NULL) { 1460 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1461 "\n", physicalAddress); 1462 } 1463 1464 DEBUG_PAGE_ACCESS_START(page); 1465 1466 cache->InsertPage(page, offset); 1467 increment_page_wired_count(page); 1468 vm_page_set_state(page, PAGE_STATE_WIRED); 1469 page->busy = false; 1470 1471 DEBUG_PAGE_ACCESS_END(page); 1472 } 1473 1474 map->Unlock(); 1475 break; 1476 } 1477 1478 case B_CONTIGUOUS: 1479 { 1480 // We have already allocated our continuous pages run, so we can now 1481 // just map them in the address space 1482 VMTranslationMap* map = addressSpace->TranslationMap(); 1483 phys_addr_t physicalAddress 1484 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1485 addr_t virtualAddress = area->Base(); 1486 off_t offset = 0; 1487 1488 map->Lock(); 1489 1490 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1491 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1492 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1493 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1494 if (page == NULL) 1495 panic("couldn't lookup physical page just allocated\n"); 1496 1497 status = map->Map(virtualAddress, physicalAddress, protection, 1498 area->MemoryType(), &reservation); 1499 if (status < B_OK) 1500 panic("couldn't map physical page in page run\n"); 1501 1502 cache->InsertPage(page, offset); 1503 increment_page_wired_count(page); 1504 1505 DEBUG_PAGE_ACCESS_END(page); 1506 } 1507 1508 map->Unlock(); 1509 break; 1510 } 1511 1512 default: 1513 break; 1514 } 1515 1516 cache->Unlock(); 1517 1518 if (reservedPages > 0) 1519 vm_page_unreserve_pages(&reservation); 1520 1521 TRACE(("vm_create_anonymous_area: done\n")); 1522 1523 area->cache_type = CACHE_TYPE_RAM; 1524 return area->id; 1525 1526 err1: 1527 if (wiring == B_CONTIGUOUS) { 1528 // we had reserved the area space upfront... 1529 phys_addr_t pageNumber = page->physical_page_number; 1530 int32 i; 1531 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1532 page = vm_lookup_page(pageNumber); 1533 if (page == NULL) 1534 panic("couldn't lookup physical page just allocated\n"); 1535 1536 vm_page_set_state(page, PAGE_STATE_FREE); 1537 } 1538 } 1539 1540 err0: 1541 if (reservedPages > 0) 1542 vm_page_unreserve_pages(&reservation); 1543 if (reservedMemory > 0) 1544 vm_unreserve_memory(reservedMemory); 1545 1546 return status; 1547 } 1548 1549 1550 area_id 1551 vm_map_physical_memory(team_id team, const char* name, void** _address, 1552 uint32 addressSpec, addr_t size, uint32 protection, 1553 phys_addr_t physicalAddress, bool alreadyWired) 1554 { 1555 VMArea* area; 1556 VMCache* cache; 1557 addr_t mapOffset; 1558 1559 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 1560 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 1561 name, *_address, addressSpec, size, protection, physicalAddress)); 1562 1563 if (!arch_vm_supports_protection(protection)) 1564 return B_NOT_SUPPORTED; 1565 1566 AddressSpaceWriteLocker locker(team); 1567 if (!locker.IsLocked()) 1568 return B_BAD_TEAM_ID; 1569 1570 // if the physical address is somewhat inside a page, 1571 // move the actual area down to align on a page boundary 1572 mapOffset = physicalAddress % B_PAGE_SIZE; 1573 size += mapOffset; 1574 physicalAddress -= mapOffset; 1575 1576 size = PAGE_ALIGN(size); 1577 1578 // create a device cache 1579 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1580 if (status != B_OK) 1581 return status; 1582 1583 cache->virtual_end = size; 1584 1585 cache->Lock(); 1586 1587 virtual_address_restrictions addressRestrictions = {}; 1588 addressRestrictions.address = *_address; 1589 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1590 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1591 B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1592 true, &area, _address); 1593 1594 if (status < B_OK) 1595 cache->ReleaseRefLocked(); 1596 1597 cache->Unlock(); 1598 1599 if (status == B_OK) { 1600 // set requested memory type -- use uncached, if not given 1601 uint32 memoryType = addressSpec & B_MTR_MASK; 1602 if (memoryType == 0) 1603 memoryType = B_MTR_UC; 1604 1605 area->SetMemoryType(memoryType); 1606 1607 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1608 if (status != B_OK) 1609 delete_area(locker.AddressSpace(), area, false); 1610 } 1611 1612 if (status != B_OK) 1613 return status; 1614 1615 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1616 1617 if (alreadyWired) { 1618 // The area is already mapped, but possibly not with the right 1619 // memory type. 1620 map->Lock(); 1621 map->ProtectArea(area, area->protection); 1622 map->Unlock(); 1623 } else { 1624 // Map the area completely. 1625 1626 // reserve pages needed for the mapping 1627 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1628 area->Base() + (size - 1)); 1629 vm_page_reservation reservation; 1630 vm_page_reserve_pages(&reservation, reservePages, 1631 team == VMAddressSpace::KernelID() 1632 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1633 1634 map->Lock(); 1635 1636 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1637 map->Map(area->Base() + offset, physicalAddress + offset, 1638 protection, area->MemoryType(), &reservation); 1639 } 1640 1641 map->Unlock(); 1642 1643 vm_page_unreserve_pages(&reservation); 1644 } 1645 1646 // modify the pointer returned to be offset back into the new area 1647 // the same way the physical address in was offset 1648 *_address = (void*)((addr_t)*_address + mapOffset); 1649 1650 area->cache_type = CACHE_TYPE_DEVICE; 1651 return area->id; 1652 } 1653 1654 1655 /*! Don't use! 1656 TODO: This function was introduced to map physical page vecs to 1657 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1658 use a device cache and does not track vm_page::wired_count! 1659 */ 1660 area_id 1661 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1662 uint32 addressSpec, addr_t* _size, uint32 protection, 1663 struct generic_io_vec* vecs, uint32 vecCount) 1664 { 1665 TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, " 1666 "spec = %ld, _size = %p, protection = %ld, vecs = %p, " 1667 "vecCount = %ld)\n", team, name, *_address, addressSpec, _size, 1668 protection, vecs, vecCount)); 1669 1670 if (!arch_vm_supports_protection(protection) 1671 || (addressSpec & B_MTR_MASK) != 0) { 1672 return B_NOT_SUPPORTED; 1673 } 1674 1675 AddressSpaceWriteLocker locker(team); 1676 if (!locker.IsLocked()) 1677 return B_BAD_TEAM_ID; 1678 1679 if (vecCount == 0) 1680 return B_BAD_VALUE; 1681 1682 addr_t size = 0; 1683 for (uint32 i = 0; i < vecCount; i++) { 1684 if (vecs[i].base % B_PAGE_SIZE != 0 1685 || vecs[i].length % B_PAGE_SIZE != 0) { 1686 return B_BAD_VALUE; 1687 } 1688 1689 size += vecs[i].length; 1690 } 1691 1692 // create a device cache 1693 VMCache* cache; 1694 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1695 if (result != B_OK) 1696 return result; 1697 1698 cache->virtual_end = size; 1699 1700 cache->Lock(); 1701 1702 VMArea* area; 1703 virtual_address_restrictions addressRestrictions = {}; 1704 addressRestrictions.address = *_address; 1705 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1706 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1707 size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, 1708 &addressRestrictions, true, &area, _address); 1709 1710 if (result != B_OK) 1711 cache->ReleaseRefLocked(); 1712 1713 cache->Unlock(); 1714 1715 if (result != B_OK) 1716 return result; 1717 1718 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1719 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1720 area->Base() + (size - 1)); 1721 1722 vm_page_reservation reservation; 1723 vm_page_reserve_pages(&reservation, reservePages, 1724 team == VMAddressSpace::KernelID() 1725 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1726 map->Lock(); 1727 1728 uint32 vecIndex = 0; 1729 size_t vecOffset = 0; 1730 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1731 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1732 vecOffset = 0; 1733 vecIndex++; 1734 } 1735 1736 if (vecIndex >= vecCount) 1737 break; 1738 1739 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1740 protection, area->MemoryType(), &reservation); 1741 1742 vecOffset += B_PAGE_SIZE; 1743 } 1744 1745 map->Unlock(); 1746 vm_page_unreserve_pages(&reservation); 1747 1748 if (_size != NULL) 1749 *_size = size; 1750 1751 area->cache_type = CACHE_TYPE_DEVICE; 1752 return area->id; 1753 } 1754 1755 1756 area_id 1757 vm_create_null_area(team_id team, const char* name, void** address, 1758 uint32 addressSpec, addr_t size, uint32 flags) 1759 { 1760 size = PAGE_ALIGN(size); 1761 1762 // Lock the address space and, if B_EXACT_ADDRESS and 1763 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1764 // is not wired. 1765 AddressSpaceWriteLocker locker; 1766 do { 1767 if (locker.SetTo(team) != B_OK) 1768 return B_BAD_TEAM_ID; 1769 } while (addressSpec == B_EXACT_ADDRESS 1770 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1771 && wait_if_address_range_is_wired(locker.AddressSpace(), 1772 (addr_t)*address, size, &locker)); 1773 1774 // create a null cache 1775 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1776 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1777 VMCache* cache; 1778 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1779 if (status != B_OK) 1780 return status; 1781 1782 cache->temporary = 1; 1783 cache->virtual_end = size; 1784 1785 cache->Lock(); 1786 1787 VMArea* area; 1788 virtual_address_restrictions addressRestrictions = {}; 1789 addressRestrictions.address = *address; 1790 addressRestrictions.address_specification = addressSpec; 1791 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1792 B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags, 1793 &addressRestrictions, true, &area, address); 1794 1795 if (status < B_OK) { 1796 cache->ReleaseRefAndUnlock(); 1797 return status; 1798 } 1799 1800 cache->Unlock(); 1801 1802 area->cache_type = CACHE_TYPE_NULL; 1803 return area->id; 1804 } 1805 1806 1807 /*! Creates the vnode cache for the specified \a vnode. 1808 The vnode has to be marked busy when calling this function. 1809 */ 1810 status_t 1811 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1812 { 1813 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1814 } 1815 1816 1817 /*! \a cache must be locked. The area's address space must be read-locked. 1818 */ 1819 static void 1820 pre_map_area_pages(VMArea* area, VMCache* cache, 1821 vm_page_reservation* reservation) 1822 { 1823 addr_t baseAddress = area->Base(); 1824 addr_t cacheOffset = area->cache_offset; 1825 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1826 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1827 1828 for (VMCachePagesTree::Iterator it 1829 = cache->pages.GetIterator(firstPage, true, true); 1830 vm_page* page = it.Next();) { 1831 if (page->cache_offset >= endPage) 1832 break; 1833 1834 // skip busy and inactive pages 1835 if (page->busy || page->usage_count == 0) 1836 continue; 1837 1838 DEBUG_PAGE_ACCESS_START(page); 1839 map_page(area, page, 1840 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1841 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1842 DEBUG_PAGE_ACCESS_END(page); 1843 } 1844 } 1845 1846 1847 /*! Will map the file specified by \a fd to an area in memory. 1848 The file will be mirrored beginning at the specified \a offset. The 1849 \a offset and \a size arguments have to be page aligned. 1850 */ 1851 static area_id 1852 _vm_map_file(team_id team, const char* name, void** _address, 1853 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1854 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1855 { 1856 // TODO: for binary files, we want to make sure that they get the 1857 // copy of a file at a given time, ie. later changes should not 1858 // make it into the mapped copy -- this will need quite some changes 1859 // to be done in a nice way 1860 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 1861 fd, offset, size, mapping)); 1862 1863 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1864 size = PAGE_ALIGN(size); 1865 1866 if (mapping == REGION_NO_PRIVATE_MAP) 1867 protection |= B_SHARED_AREA; 1868 if (addressSpec != B_EXACT_ADDRESS) 1869 unmapAddressRange = false; 1870 1871 if (fd < 0) { 1872 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1873 virtual_address_restrictions virtualRestrictions = {}; 1874 virtualRestrictions.address = *_address; 1875 virtualRestrictions.address_specification = addressSpec; 1876 physical_address_restrictions physicalRestrictions = {}; 1877 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 1878 flags, &virtualRestrictions, &physicalRestrictions, kernel, 1879 _address); 1880 } 1881 1882 // get the open flags of the FD 1883 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1884 if (descriptor == NULL) 1885 return EBADF; 1886 int32 openMode = descriptor->open_mode; 1887 put_fd(descriptor); 1888 1889 // The FD must open for reading at any rate. For shared mapping with write 1890 // access, additionally the FD must be open for writing. 1891 if ((openMode & O_ACCMODE) == O_WRONLY 1892 || (mapping == REGION_NO_PRIVATE_MAP 1893 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1894 && (openMode & O_ACCMODE) == O_RDONLY)) { 1895 return EACCES; 1896 } 1897 1898 // get the vnode for the object, this also grabs a ref to it 1899 struct vnode* vnode = NULL; 1900 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1901 if (status < B_OK) 1902 return status; 1903 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1904 1905 // If we're going to pre-map pages, we need to reserve the pages needed by 1906 // the mapping backend upfront. 1907 page_num_t reservedPreMapPages = 0; 1908 vm_page_reservation reservation; 1909 if ((protection & B_READ_AREA) != 0) { 1910 AddressSpaceWriteLocker locker; 1911 status = locker.SetTo(team); 1912 if (status != B_OK) 1913 return status; 1914 1915 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1916 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1917 1918 locker.Unlock(); 1919 1920 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1921 team == VMAddressSpace::KernelID() 1922 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1923 } 1924 1925 struct PageUnreserver { 1926 PageUnreserver(vm_page_reservation* reservation) 1927 : 1928 fReservation(reservation) 1929 { 1930 } 1931 1932 ~PageUnreserver() 1933 { 1934 if (fReservation != NULL) 1935 vm_page_unreserve_pages(fReservation); 1936 } 1937 1938 vm_page_reservation* fReservation; 1939 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1940 1941 // Lock the address space and, if the specified address range shall be 1942 // unmapped, ensure it is not wired. 1943 AddressSpaceWriteLocker locker; 1944 do { 1945 if (locker.SetTo(team) != B_OK) 1946 return B_BAD_TEAM_ID; 1947 } while (unmapAddressRange 1948 && wait_if_address_range_is_wired(locker.AddressSpace(), 1949 (addr_t)*_address, size, &locker)); 1950 1951 // TODO: this only works for file systems that use the file cache 1952 VMCache* cache; 1953 status = vfs_get_vnode_cache(vnode, &cache, false); 1954 if (status < B_OK) 1955 return status; 1956 1957 cache->Lock(); 1958 1959 VMArea* area; 1960 virtual_address_restrictions addressRestrictions = {}; 1961 addressRestrictions.address = *_address; 1962 addressRestrictions.address_specification = addressSpec; 1963 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 1964 0, protection, mapping, 1965 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 1966 &addressRestrictions, kernel, &area, _address); 1967 1968 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1969 // map_backing_store() cannot know we no longer need the ref 1970 cache->ReleaseRefLocked(); 1971 } 1972 1973 if (status == B_OK && (protection & B_READ_AREA) != 0) 1974 pre_map_area_pages(area, cache, &reservation); 1975 1976 cache->Unlock(); 1977 1978 if (status == B_OK) { 1979 // TODO: this probably deserves a smarter solution, ie. don't always 1980 // prefetch stuff, and also, probably don't trigger it at this place. 1981 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1982 // prefetches at max 10 MB starting from "offset" 1983 } 1984 1985 if (status != B_OK) 1986 return status; 1987 1988 area->cache_type = CACHE_TYPE_VNODE; 1989 return area->id; 1990 } 1991 1992 1993 area_id 1994 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 1995 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 1996 int fd, off_t offset) 1997 { 1998 if (!arch_vm_supports_protection(protection)) 1999 return B_NOT_SUPPORTED; 2000 2001 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2002 mapping, unmapAddressRange, fd, offset, true); 2003 } 2004 2005 2006 VMCache* 2007 vm_area_get_locked_cache(VMArea* area) 2008 { 2009 rw_lock_read_lock(&sAreaCacheLock); 2010 2011 while (true) { 2012 VMCache* cache = area->cache; 2013 2014 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2015 // cache has been deleted 2016 rw_lock_read_lock(&sAreaCacheLock); 2017 continue; 2018 } 2019 2020 rw_lock_read_lock(&sAreaCacheLock); 2021 2022 if (cache == area->cache) { 2023 cache->AcquireRefLocked(); 2024 rw_lock_read_unlock(&sAreaCacheLock); 2025 return cache; 2026 } 2027 2028 // the cache changed in the meantime 2029 cache->Unlock(); 2030 } 2031 } 2032 2033 2034 void 2035 vm_area_put_locked_cache(VMCache* cache) 2036 { 2037 cache->ReleaseRefAndUnlock(); 2038 } 2039 2040 2041 area_id 2042 vm_clone_area(team_id team, const char* name, void** address, 2043 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2044 bool kernel) 2045 { 2046 VMArea* newArea = NULL; 2047 VMArea* sourceArea; 2048 2049 // Check whether the source area exists and is cloneable. If so, mark it 2050 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2051 { 2052 AddressSpaceWriteLocker locker; 2053 status_t status = locker.SetFromArea(sourceID, sourceArea); 2054 if (status != B_OK) 2055 return status; 2056 2057 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2058 return B_NOT_ALLOWED; 2059 2060 sourceArea->protection |= B_SHARED_AREA; 2061 protection |= B_SHARED_AREA; 2062 } 2063 2064 // Now lock both address spaces and actually do the cloning. 2065 2066 MultiAddressSpaceLocker locker; 2067 VMAddressSpace* sourceAddressSpace; 2068 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2069 if (status != B_OK) 2070 return status; 2071 2072 VMAddressSpace* targetAddressSpace; 2073 status = locker.AddTeam(team, true, &targetAddressSpace); 2074 if (status != B_OK) 2075 return status; 2076 2077 status = locker.Lock(); 2078 if (status != B_OK) 2079 return status; 2080 2081 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2082 if (sourceArea == NULL) 2083 return B_BAD_VALUE; 2084 2085 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2086 return B_NOT_ALLOWED; 2087 2088 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2089 2090 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2091 // have been adapted. Maybe it should be part of the kernel settings, 2092 // anyway (so that old drivers can always work). 2093 #if 0 2094 if (sourceArea->aspace == VMAddressSpace::Kernel() 2095 && addressSpace != VMAddressSpace::Kernel() 2096 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2097 // kernel areas must not be cloned in userland, unless explicitly 2098 // declared user-cloneable upon construction 2099 status = B_NOT_ALLOWED; 2100 } else 2101 #endif 2102 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2103 status = B_NOT_ALLOWED; 2104 else { 2105 virtual_address_restrictions addressRestrictions = {}; 2106 addressRestrictions.address = *address; 2107 addressRestrictions.address_specification = addressSpec; 2108 status = map_backing_store(targetAddressSpace, cache, 2109 sourceArea->cache_offset, name, sourceArea->Size(), 2110 sourceArea->wiring, protection, mapping, 0, &addressRestrictions, 2111 kernel, &newArea, address); 2112 } 2113 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2114 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2115 // to create a new cache, and has therefore already acquired a reference 2116 // to the source cache - but otherwise it has no idea that we need 2117 // one. 2118 cache->AcquireRefLocked(); 2119 } 2120 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2121 // we need to map in everything at this point 2122 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2123 // we don't have actual pages to map but a physical area 2124 VMTranslationMap* map 2125 = sourceArea->address_space->TranslationMap(); 2126 map->Lock(); 2127 2128 phys_addr_t physicalAddress; 2129 uint32 oldProtection; 2130 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2131 2132 map->Unlock(); 2133 2134 map = targetAddressSpace->TranslationMap(); 2135 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2136 newArea->Base() + (newArea->Size() - 1)); 2137 2138 vm_page_reservation reservation; 2139 vm_page_reserve_pages(&reservation, reservePages, 2140 targetAddressSpace == VMAddressSpace::Kernel() 2141 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2142 map->Lock(); 2143 2144 for (addr_t offset = 0; offset < newArea->Size(); 2145 offset += B_PAGE_SIZE) { 2146 map->Map(newArea->Base() + offset, physicalAddress + offset, 2147 protection, newArea->MemoryType(), &reservation); 2148 } 2149 2150 map->Unlock(); 2151 vm_page_unreserve_pages(&reservation); 2152 } else { 2153 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2154 size_t reservePages = map->MaxPagesNeededToMap( 2155 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2156 vm_page_reservation reservation; 2157 vm_page_reserve_pages(&reservation, reservePages, 2158 targetAddressSpace == VMAddressSpace::Kernel() 2159 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2160 2161 // map in all pages from source 2162 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2163 vm_page* page = it.Next();) { 2164 if (!page->busy) { 2165 DEBUG_PAGE_ACCESS_START(page); 2166 map_page(newArea, page, 2167 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2168 - newArea->cache_offset), 2169 protection, &reservation); 2170 DEBUG_PAGE_ACCESS_END(page); 2171 } 2172 } 2173 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2174 // ensuring that! 2175 2176 vm_page_unreserve_pages(&reservation); 2177 } 2178 } 2179 if (status == B_OK) 2180 newArea->cache_type = sourceArea->cache_type; 2181 2182 vm_area_put_locked_cache(cache); 2183 2184 if (status < B_OK) 2185 return status; 2186 2187 return newArea->id; 2188 } 2189 2190 2191 /*! Deletes the specified area of the given address space. 2192 2193 The address space must be write-locked. 2194 The caller must ensure that the area does not have any wired ranges. 2195 2196 \param addressSpace The address space containing the area. 2197 \param area The area to be deleted. 2198 \param deletingAddressSpace \c true, if the address space is in the process 2199 of being deleted. 2200 */ 2201 static void 2202 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2203 bool deletingAddressSpace) 2204 { 2205 ASSERT(!area->IsWired()); 2206 2207 VMAreaHash::Remove(area); 2208 2209 // At this point the area is removed from the global hash table, but 2210 // still exists in the area list. 2211 2212 // Unmap the virtual address space the area occupied. 2213 { 2214 // We need to lock the complete cache chain. 2215 VMCache* topCache = vm_area_get_locked_cache(area); 2216 VMCacheChainLocker cacheChainLocker(topCache); 2217 cacheChainLocker.LockAllSourceCaches(); 2218 2219 // If the area's top cache is a temporary cache and the area is the only 2220 // one referencing it (besides us currently holding a second reference), 2221 // the unmapping code doesn't need to care about preserving the accessed 2222 // and dirty flags of the top cache page mappings. 2223 bool ignoreTopCachePageFlags 2224 = topCache->temporary && topCache->RefCount() == 2; 2225 2226 area->address_space->TranslationMap()->UnmapArea(area, 2227 deletingAddressSpace, ignoreTopCachePageFlags); 2228 } 2229 2230 if (!area->cache->temporary) 2231 area->cache->WriteModified(); 2232 2233 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2234 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2235 2236 arch_vm_unset_memory_type(area); 2237 addressSpace->RemoveArea(area, allocationFlags); 2238 addressSpace->Put(); 2239 2240 area->cache->RemoveArea(area); 2241 area->cache->ReleaseRef(); 2242 2243 addressSpace->DeleteArea(area, allocationFlags); 2244 } 2245 2246 2247 status_t 2248 vm_delete_area(team_id team, area_id id, bool kernel) 2249 { 2250 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2251 2252 // lock the address space and make sure the area isn't wired 2253 AddressSpaceWriteLocker locker; 2254 VMArea* area; 2255 AreaCacheLocker cacheLocker; 2256 2257 do { 2258 status_t status = locker.SetFromArea(team, id, area); 2259 if (status != B_OK) 2260 return status; 2261 2262 cacheLocker.SetTo(area); 2263 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2264 2265 cacheLocker.Unlock(); 2266 2267 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2268 return B_NOT_ALLOWED; 2269 2270 delete_area(locker.AddressSpace(), area, false); 2271 return B_OK; 2272 } 2273 2274 2275 /*! Creates a new cache on top of given cache, moves all areas from 2276 the old cache to the new one, and changes the protection of all affected 2277 areas' pages to read-only. If requested, wired pages are moved up to the 2278 new cache and copies are added to the old cache in their place. 2279 Preconditions: 2280 - The given cache must be locked. 2281 - All of the cache's areas' address spaces must be read locked. 2282 - Either the cache must not have any wired ranges or a page reservation for 2283 all wired pages must be provided, so they can be copied. 2284 2285 \param lowerCache The cache on top of which a new cache shall be created. 2286 \param wiredPagesReservation If \c NULL there must not be any wired pages 2287 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2288 has wired page. The wired pages are copied in this case. 2289 */ 2290 static status_t 2291 vm_copy_on_write_area(VMCache* lowerCache, 2292 vm_page_reservation* wiredPagesReservation) 2293 { 2294 VMCache* upperCache; 2295 2296 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2297 2298 // We need to separate the cache from its areas. The cache goes one level 2299 // deeper and we create a new cache inbetween. 2300 2301 // create an anonymous cache 2302 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2303 0, true, VM_PRIORITY_USER); 2304 if (status != B_OK) 2305 return status; 2306 2307 upperCache->Lock(); 2308 2309 upperCache->temporary = 1; 2310 upperCache->virtual_base = lowerCache->virtual_base; 2311 upperCache->virtual_end = lowerCache->virtual_end; 2312 2313 // transfer the lower cache areas to the upper cache 2314 rw_lock_write_lock(&sAreaCacheLock); 2315 upperCache->TransferAreas(lowerCache); 2316 rw_lock_write_unlock(&sAreaCacheLock); 2317 2318 lowerCache->AddConsumer(upperCache); 2319 2320 // We now need to remap all pages from all of the cache's areas read-only, 2321 // so that a copy will be created on next write access. If there are wired 2322 // pages, we keep their protection, move them to the upper cache and create 2323 // copies for the lower cache. 2324 if (wiredPagesReservation != NULL) { 2325 // We need to handle wired pages -- iterate through the cache's pages. 2326 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2327 vm_page* page = it.Next();) { 2328 if (page->WiredCount() > 0) { 2329 // allocate a new page and copy the wired one 2330 vm_page* copiedPage = vm_page_allocate_page( 2331 wiredPagesReservation, PAGE_STATE_ACTIVE); 2332 2333 vm_memcpy_physical_page( 2334 copiedPage->physical_page_number * B_PAGE_SIZE, 2335 page->physical_page_number * B_PAGE_SIZE); 2336 2337 // move the wired page to the upper cache (note: removing is OK 2338 // with the SplayTree iterator) and insert the copy 2339 upperCache->MovePage(page); 2340 lowerCache->InsertPage(copiedPage, 2341 page->cache_offset * B_PAGE_SIZE); 2342 2343 DEBUG_PAGE_ACCESS_END(copiedPage); 2344 } else { 2345 // Change the protection of this page in all areas. 2346 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2347 tempArea = tempArea->cache_next) { 2348 // The area must be readable in the same way it was 2349 // previously writable. 2350 uint32 protection = B_KERNEL_READ_AREA; 2351 if ((tempArea->protection & B_READ_AREA) != 0) 2352 protection |= B_READ_AREA; 2353 2354 VMTranslationMap* map 2355 = tempArea->address_space->TranslationMap(); 2356 map->Lock(); 2357 map->ProtectPage(tempArea, 2358 virtual_page_address(tempArea, page), protection); 2359 map->Unlock(); 2360 } 2361 } 2362 } 2363 } else { 2364 // just change the protection of all areas 2365 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2366 tempArea = tempArea->cache_next) { 2367 // The area must be readable in the same way it was previously 2368 // writable. 2369 uint32 protection = B_KERNEL_READ_AREA; 2370 if ((tempArea->protection & B_READ_AREA) != 0) 2371 protection |= B_READ_AREA; 2372 2373 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2374 map->Lock(); 2375 map->ProtectArea(tempArea, protection); 2376 map->Unlock(); 2377 } 2378 } 2379 2380 vm_area_put_locked_cache(upperCache); 2381 2382 return B_OK; 2383 } 2384 2385 2386 area_id 2387 vm_copy_area(team_id team, const char* name, void** _address, 2388 uint32 addressSpec, uint32 protection, area_id sourceID) 2389 { 2390 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2391 2392 if ((protection & B_KERNEL_PROTECTION) == 0) { 2393 // set the same protection for the kernel as for userland 2394 protection |= B_KERNEL_READ_AREA; 2395 if (writableCopy) 2396 protection |= B_KERNEL_WRITE_AREA; 2397 } 2398 2399 // Do the locking: target address space, all address spaces associated with 2400 // the source cache, and the cache itself. 2401 MultiAddressSpaceLocker locker; 2402 VMAddressSpace* targetAddressSpace; 2403 VMCache* cache; 2404 VMArea* source; 2405 AreaCacheLocker cacheLocker; 2406 status_t status; 2407 bool sharedArea; 2408 2409 page_num_t wiredPages = 0; 2410 vm_page_reservation wiredPagesReservation; 2411 2412 bool restart; 2413 do { 2414 restart = false; 2415 2416 locker.Unset(); 2417 status = locker.AddTeam(team, true, &targetAddressSpace); 2418 if (status == B_OK) { 2419 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2420 &cache); 2421 } 2422 if (status != B_OK) 2423 return status; 2424 2425 cacheLocker.SetTo(cache, true); // already locked 2426 2427 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2428 2429 page_num_t oldWiredPages = wiredPages; 2430 wiredPages = 0; 2431 2432 // If the source area isn't shared, count the number of wired pages in 2433 // the cache and reserve as many pages. 2434 if (!sharedArea) { 2435 wiredPages = cache->WiredPagesCount(); 2436 2437 if (wiredPages > oldWiredPages) { 2438 cacheLocker.Unlock(); 2439 locker.Unlock(); 2440 2441 if (oldWiredPages > 0) 2442 vm_page_unreserve_pages(&wiredPagesReservation); 2443 2444 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2445 VM_PRIORITY_USER); 2446 2447 restart = true; 2448 } 2449 } else if (oldWiredPages > 0) 2450 vm_page_unreserve_pages(&wiredPagesReservation); 2451 } while (restart); 2452 2453 // unreserve pages later 2454 struct PagesUnreserver { 2455 PagesUnreserver(vm_page_reservation* reservation) 2456 : 2457 fReservation(reservation) 2458 { 2459 } 2460 2461 ~PagesUnreserver() 2462 { 2463 if (fReservation != NULL) 2464 vm_page_unreserve_pages(fReservation); 2465 } 2466 2467 private: 2468 vm_page_reservation* fReservation; 2469 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2470 2471 if (addressSpec == B_CLONE_ADDRESS) { 2472 addressSpec = B_EXACT_ADDRESS; 2473 *_address = (void*)source->Base(); 2474 } 2475 2476 // First, create a cache on top of the source area, respectively use the 2477 // existing one, if this is a shared area. 2478 2479 VMArea* target; 2480 virtual_address_restrictions addressRestrictions = {}; 2481 addressRestrictions.address = *_address; 2482 addressRestrictions.address_specification = addressSpec; 2483 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2484 name, source->Size(), source->wiring, protection, 2485 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2486 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2487 &addressRestrictions, true, &target, _address); 2488 if (status < B_OK) 2489 return status; 2490 2491 if (sharedArea) { 2492 // The new area uses the old area's cache, but map_backing_store() 2493 // hasn't acquired a ref. So we have to do that now. 2494 cache->AcquireRefLocked(); 2495 } 2496 2497 // If the source area is writable, we need to move it one layer up as well 2498 2499 if (!sharedArea) { 2500 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2501 // TODO: do something more useful if this fails! 2502 if (vm_copy_on_write_area(cache, 2503 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2504 panic("vm_copy_on_write_area() failed!\n"); 2505 } 2506 } 2507 } 2508 2509 // we return the ID of the newly created area 2510 return target->id; 2511 } 2512 2513 2514 static status_t 2515 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2516 bool kernel) 2517 { 2518 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2519 "%#lx)\n", team, areaID, newProtection)); 2520 2521 if (!arch_vm_supports_protection(newProtection)) 2522 return B_NOT_SUPPORTED; 2523 2524 bool becomesWritable 2525 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2526 2527 // lock address spaces and cache 2528 MultiAddressSpaceLocker locker; 2529 VMCache* cache; 2530 VMArea* area; 2531 status_t status; 2532 AreaCacheLocker cacheLocker; 2533 bool isWritable; 2534 2535 bool restart; 2536 do { 2537 restart = false; 2538 2539 locker.Unset(); 2540 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2541 if (status != B_OK) 2542 return status; 2543 2544 cacheLocker.SetTo(cache, true); // already locked 2545 2546 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2547 return B_NOT_ALLOWED; 2548 2549 if (area->protection == newProtection) 2550 return B_OK; 2551 2552 if (team != VMAddressSpace::KernelID() 2553 && area->address_space->ID() != team) { 2554 // unless you're the kernel, you are only allowed to set 2555 // the protection of your own areas 2556 return B_NOT_ALLOWED; 2557 } 2558 2559 isWritable 2560 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2561 2562 // Make sure the area (respectively, if we're going to call 2563 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2564 // wired ranges. 2565 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2566 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2567 otherArea = otherArea->cache_next) { 2568 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2569 restart = true; 2570 break; 2571 } 2572 } 2573 } else { 2574 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2575 restart = true; 2576 } 2577 } while (restart); 2578 2579 bool changePageProtection = true; 2580 bool changeTopCachePagesOnly = false; 2581 2582 if (isWritable && !becomesWritable) { 2583 // writable -> !writable 2584 2585 if (cache->source != NULL && cache->temporary) { 2586 if (cache->CountWritableAreas(area) == 0) { 2587 // Since this cache now lives from the pages in its source cache, 2588 // we can change the cache's commitment to take only those pages 2589 // into account that really are in this cache. 2590 2591 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2592 team == VMAddressSpace::KernelID() 2593 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2594 2595 // TODO: we may be able to join with our source cache, if 2596 // count == 0 2597 } 2598 } 2599 2600 // If only the writability changes, we can just remap the pages of the 2601 // top cache, since the pages of lower caches are mapped read-only 2602 // anyway. That's advantageous only, if the number of pages in the cache 2603 // is significantly smaller than the number of pages in the area, 2604 // though. 2605 if (newProtection 2606 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2607 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2608 changeTopCachePagesOnly = true; 2609 } 2610 } else if (!isWritable && becomesWritable) { 2611 // !writable -> writable 2612 2613 if (!cache->consumers.IsEmpty()) { 2614 // There are consumers -- we have to insert a new cache. Fortunately 2615 // vm_copy_on_write_area() does everything that's needed. 2616 changePageProtection = false; 2617 status = vm_copy_on_write_area(cache, NULL); 2618 } else { 2619 // No consumers, so we don't need to insert a new one. 2620 if (cache->source != NULL && cache->temporary) { 2621 // the cache's commitment must contain all possible pages 2622 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2623 team == VMAddressSpace::KernelID() 2624 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2625 } 2626 2627 if (status == B_OK && cache->source != NULL) { 2628 // There's a source cache, hence we can't just change all pages' 2629 // protection or we might allow writing into pages belonging to 2630 // a lower cache. 2631 changeTopCachePagesOnly = true; 2632 } 2633 } 2634 } else { 2635 // we don't have anything special to do in all other cases 2636 } 2637 2638 if (status == B_OK) { 2639 // remap existing pages in this cache 2640 if (changePageProtection) { 2641 VMTranslationMap* map = area->address_space->TranslationMap(); 2642 map->Lock(); 2643 2644 if (changeTopCachePagesOnly) { 2645 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2646 page_num_t lastPageOffset 2647 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2648 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2649 vm_page* page = it.Next();) { 2650 if (page->cache_offset >= firstPageOffset 2651 && page->cache_offset <= lastPageOffset) { 2652 addr_t address = virtual_page_address(area, page); 2653 map->ProtectPage(area, address, newProtection); 2654 } 2655 } 2656 } else 2657 map->ProtectArea(area, newProtection); 2658 2659 map->Unlock(); 2660 } 2661 2662 area->protection = newProtection; 2663 } 2664 2665 return status; 2666 } 2667 2668 2669 status_t 2670 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2671 { 2672 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2673 if (addressSpace == NULL) 2674 return B_BAD_TEAM_ID; 2675 2676 VMTranslationMap* map = addressSpace->TranslationMap(); 2677 2678 map->Lock(); 2679 uint32 dummyFlags; 2680 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2681 map->Unlock(); 2682 2683 addressSpace->Put(); 2684 return status; 2685 } 2686 2687 2688 /*! The page's cache must be locked. 2689 */ 2690 bool 2691 vm_test_map_modification(vm_page* page) 2692 { 2693 if (page->modified) 2694 return true; 2695 2696 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2697 vm_page_mapping* mapping; 2698 while ((mapping = iterator.Next()) != NULL) { 2699 VMArea* area = mapping->area; 2700 VMTranslationMap* map = area->address_space->TranslationMap(); 2701 2702 phys_addr_t physicalAddress; 2703 uint32 flags; 2704 map->Lock(); 2705 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2706 map->Unlock(); 2707 2708 if ((flags & PAGE_MODIFIED) != 0) 2709 return true; 2710 } 2711 2712 return false; 2713 } 2714 2715 2716 /*! The page's cache must be locked. 2717 */ 2718 void 2719 vm_clear_map_flags(vm_page* page, uint32 flags) 2720 { 2721 if ((flags & PAGE_ACCESSED) != 0) 2722 page->accessed = false; 2723 if ((flags & PAGE_MODIFIED) != 0) 2724 page->modified = false; 2725 2726 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2727 vm_page_mapping* mapping; 2728 while ((mapping = iterator.Next()) != NULL) { 2729 VMArea* area = mapping->area; 2730 VMTranslationMap* map = area->address_space->TranslationMap(); 2731 2732 map->Lock(); 2733 map->ClearFlags(virtual_page_address(area, page), flags); 2734 map->Unlock(); 2735 } 2736 } 2737 2738 2739 /*! Removes all mappings from a page. 2740 After you've called this function, the page is unmapped from memory and 2741 the page's \c accessed and \c modified flags have been updated according 2742 to the state of the mappings. 2743 The page's cache must be locked. 2744 */ 2745 void 2746 vm_remove_all_page_mappings(vm_page* page) 2747 { 2748 while (vm_page_mapping* mapping = page->mappings.Head()) { 2749 VMArea* area = mapping->area; 2750 VMTranslationMap* map = area->address_space->TranslationMap(); 2751 addr_t address = virtual_page_address(area, page); 2752 map->UnmapPage(area, address, false); 2753 } 2754 } 2755 2756 2757 int32 2758 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2759 { 2760 int32 count = 0; 2761 2762 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2763 vm_page_mapping* mapping; 2764 while ((mapping = iterator.Next()) != NULL) { 2765 VMArea* area = mapping->area; 2766 VMTranslationMap* map = area->address_space->TranslationMap(); 2767 2768 bool modified; 2769 if (map->ClearAccessedAndModified(area, 2770 virtual_page_address(area, page), false, modified)) { 2771 count++; 2772 } 2773 2774 page->modified |= modified; 2775 } 2776 2777 2778 if (page->accessed) { 2779 count++; 2780 page->accessed = false; 2781 } 2782 2783 return count; 2784 } 2785 2786 2787 /*! Removes all mappings of a page and/or clears the accessed bits of the 2788 mappings. 2789 The function iterates through the page mappings and removes them until 2790 encountering one that has been accessed. From then on it will continue to 2791 iterate, but only clear the accessed flag of the mapping. The page's 2792 \c modified bit will be updated accordingly, the \c accessed bit will be 2793 cleared. 2794 \return The number of mapping accessed bits encountered, including the 2795 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2796 of the page have been removed. 2797 */ 2798 int32 2799 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2800 { 2801 ASSERT(page->WiredCount() == 0); 2802 2803 if (page->accessed) 2804 return vm_clear_page_mapping_accessed_flags(page); 2805 2806 while (vm_page_mapping* mapping = page->mappings.Head()) { 2807 VMArea* area = mapping->area; 2808 VMTranslationMap* map = area->address_space->TranslationMap(); 2809 addr_t address = virtual_page_address(area, page); 2810 bool modified = false; 2811 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2812 page->accessed = true; 2813 page->modified |= modified; 2814 return vm_clear_page_mapping_accessed_flags(page); 2815 } 2816 page->modified |= modified; 2817 } 2818 2819 return 0; 2820 } 2821 2822 2823 static int 2824 display_mem(int argc, char** argv) 2825 { 2826 bool physical = false; 2827 addr_t copyAddress; 2828 int32 displayWidth; 2829 int32 itemSize; 2830 int32 num = -1; 2831 addr_t address; 2832 int i = 1, j; 2833 2834 if (argc > 1 && argv[1][0] == '-') { 2835 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2836 physical = true; 2837 i++; 2838 } else 2839 i = 99; 2840 } 2841 2842 if (argc < i + 1 || argc > i + 2) { 2843 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2844 "\tdl - 8 bytes\n" 2845 "\tdw - 4 bytes\n" 2846 "\tds - 2 bytes\n" 2847 "\tdb - 1 byte\n" 2848 "\tstring - a whole string\n" 2849 " -p or --physical only allows memory from a single page to be " 2850 "displayed.\n"); 2851 return 0; 2852 } 2853 2854 address = parse_expression(argv[i]); 2855 2856 if (argc > i + 1) 2857 num = parse_expression(argv[i + 1]); 2858 2859 // build the format string 2860 if (strcmp(argv[0], "db") == 0) { 2861 itemSize = 1; 2862 displayWidth = 16; 2863 } else if (strcmp(argv[0], "ds") == 0) { 2864 itemSize = 2; 2865 displayWidth = 8; 2866 } else if (strcmp(argv[0], "dw") == 0) { 2867 itemSize = 4; 2868 displayWidth = 4; 2869 } else if (strcmp(argv[0], "dl") == 0) { 2870 itemSize = 8; 2871 displayWidth = 2; 2872 } else if (strcmp(argv[0], "string") == 0) { 2873 itemSize = 1; 2874 displayWidth = -1; 2875 } else { 2876 kprintf("display_mem called in an invalid way!\n"); 2877 return 0; 2878 } 2879 2880 if (num <= 0) 2881 num = displayWidth; 2882 2883 void* physicalPageHandle = NULL; 2884 2885 if (physical) { 2886 int32 offset = address & (B_PAGE_SIZE - 1); 2887 if (num * itemSize + offset > B_PAGE_SIZE) { 2888 num = (B_PAGE_SIZE - offset) / itemSize; 2889 kprintf("NOTE: number of bytes has been cut to page size\n"); 2890 } 2891 2892 address = ROUNDDOWN(address, B_PAGE_SIZE); 2893 2894 if (vm_get_physical_page_debug(address, ©Address, 2895 &physicalPageHandle) != B_OK) { 2896 kprintf("getting the hardware page failed."); 2897 return 0; 2898 } 2899 2900 address += offset; 2901 copyAddress += offset; 2902 } else 2903 copyAddress = address; 2904 2905 if (!strcmp(argv[0], "string")) { 2906 kprintf("%p \"", (char*)copyAddress); 2907 2908 // string mode 2909 for (i = 0; true; i++) { 2910 char c; 2911 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 2912 != B_OK 2913 || c == '\0') { 2914 break; 2915 } 2916 2917 if (c == '\n') 2918 kprintf("\\n"); 2919 else if (c == '\t') 2920 kprintf("\\t"); 2921 else { 2922 if (!isprint(c)) 2923 c = '.'; 2924 2925 kprintf("%c", c); 2926 } 2927 } 2928 2929 kprintf("\"\n"); 2930 } else { 2931 // number mode 2932 for (i = 0; i < num; i++) { 2933 uint32 value; 2934 2935 if ((i % displayWidth) == 0) { 2936 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2937 if (i != 0) 2938 kprintf("\n"); 2939 2940 kprintf("[0x%lx] ", address + i * itemSize); 2941 2942 for (j = 0; j < displayed; j++) { 2943 char c; 2944 if (debug_memcpy(B_CURRENT_TEAM, &c, 2945 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 2946 displayed = j; 2947 break; 2948 } 2949 if (!isprint(c)) 2950 c = '.'; 2951 2952 kprintf("%c", c); 2953 } 2954 if (num > displayWidth) { 2955 // make sure the spacing in the last line is correct 2956 for (j = displayed; j < displayWidth * itemSize; j++) 2957 kprintf(" "); 2958 } 2959 kprintf(" "); 2960 } 2961 2962 if (debug_memcpy(B_CURRENT_TEAM, &value, 2963 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 2964 kprintf("read fault"); 2965 break; 2966 } 2967 2968 switch (itemSize) { 2969 case 1: 2970 kprintf(" %02x", *(uint8*)&value); 2971 break; 2972 case 2: 2973 kprintf(" %04x", *(uint16*)&value); 2974 break; 2975 case 4: 2976 kprintf(" %08lx", *(uint32*)&value); 2977 break; 2978 case 8: 2979 kprintf(" %016Lx", *(uint64*)&value); 2980 break; 2981 } 2982 } 2983 2984 kprintf("\n"); 2985 } 2986 2987 if (physical) { 2988 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 2989 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 2990 } 2991 return 0; 2992 } 2993 2994 2995 static void 2996 dump_cache_tree_recursively(VMCache* cache, int level, 2997 VMCache* highlightCache) 2998 { 2999 // print this cache 3000 for (int i = 0; i < level; i++) 3001 kprintf(" "); 3002 if (cache == highlightCache) 3003 kprintf("%p <--\n", cache); 3004 else 3005 kprintf("%p\n", cache); 3006 3007 // recursively print its consumers 3008 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3009 VMCache* consumer = it.Next();) { 3010 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3011 } 3012 } 3013 3014 3015 static int 3016 dump_cache_tree(int argc, char** argv) 3017 { 3018 if (argc != 2 || !strcmp(argv[1], "--help")) { 3019 kprintf("usage: %s <address>\n", argv[0]); 3020 return 0; 3021 } 3022 3023 addr_t address = parse_expression(argv[1]); 3024 if (address == 0) 3025 return 0; 3026 3027 VMCache* cache = (VMCache*)address; 3028 VMCache* root = cache; 3029 3030 // find the root cache (the transitive source) 3031 while (root->source != NULL) 3032 root = root->source; 3033 3034 dump_cache_tree_recursively(root, 0, cache); 3035 3036 return 0; 3037 } 3038 3039 3040 const char* 3041 vm_cache_type_to_string(int32 type) 3042 { 3043 switch (type) { 3044 case CACHE_TYPE_RAM: 3045 return "RAM"; 3046 case CACHE_TYPE_DEVICE: 3047 return "device"; 3048 case CACHE_TYPE_VNODE: 3049 return "vnode"; 3050 case CACHE_TYPE_NULL: 3051 return "null"; 3052 3053 default: 3054 return "unknown"; 3055 } 3056 } 3057 3058 3059 #if DEBUG_CACHE_LIST 3060 3061 static void 3062 update_cache_info_recursively(VMCache* cache, cache_info& info) 3063 { 3064 info.page_count += cache->page_count; 3065 if (cache->type == CACHE_TYPE_RAM) 3066 info.committed += cache->committed_size; 3067 3068 // recurse 3069 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3070 VMCache* consumer = it.Next();) { 3071 update_cache_info_recursively(consumer, info); 3072 } 3073 } 3074 3075 3076 static int 3077 cache_info_compare_page_count(const void* _a, const void* _b) 3078 { 3079 const cache_info* a = (const cache_info*)_a; 3080 const cache_info* b = (const cache_info*)_b; 3081 if (a->page_count == b->page_count) 3082 return 0; 3083 return a->page_count < b->page_count ? 1 : -1; 3084 } 3085 3086 3087 static int 3088 cache_info_compare_committed(const void* _a, const void* _b) 3089 { 3090 const cache_info* a = (const cache_info*)_a; 3091 const cache_info* b = (const cache_info*)_b; 3092 if (a->committed == b->committed) 3093 return 0; 3094 return a->committed < b->committed ? 1 : -1; 3095 } 3096 3097 3098 static void 3099 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3100 { 3101 for (int i = 0; i < level; i++) 3102 kprintf(" "); 3103 3104 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3105 vm_cache_type_to_string(cache->type), cache->virtual_base, 3106 cache->virtual_end, cache->page_count); 3107 3108 if (level == 0) 3109 kprintf("/%lu", info.page_count); 3110 3111 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3112 kprintf(", committed: %lld", cache->committed_size); 3113 3114 if (level == 0) 3115 kprintf("/%lu", info.committed); 3116 } 3117 3118 // areas 3119 if (cache->areas != NULL) { 3120 VMArea* area = cache->areas; 3121 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3122 area->address_space->ID()); 3123 3124 while (area->cache_next != NULL) { 3125 area = area->cache_next; 3126 kprintf(", %ld", area->id); 3127 } 3128 } 3129 3130 kputs("\n"); 3131 3132 // recurse 3133 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3134 VMCache* consumer = it.Next();) { 3135 dump_caches_recursively(consumer, info, level + 1); 3136 } 3137 } 3138 3139 3140 static int 3141 dump_caches(int argc, char** argv) 3142 { 3143 if (sCacheInfoTable == NULL) { 3144 kprintf("No cache info table!\n"); 3145 return 0; 3146 } 3147 3148 bool sortByPageCount = true; 3149 3150 for (int32 i = 1; i < argc; i++) { 3151 if (strcmp(argv[i], "-c") == 0) { 3152 sortByPageCount = false; 3153 } else { 3154 print_debugger_command_usage(argv[0]); 3155 return 0; 3156 } 3157 } 3158 3159 uint32 totalCount = 0; 3160 uint32 rootCount = 0; 3161 off_t totalCommitted = 0; 3162 page_num_t totalPages = 0; 3163 3164 VMCache* cache = gDebugCacheList; 3165 while (cache) { 3166 totalCount++; 3167 if (cache->source == NULL) { 3168 cache_info stackInfo; 3169 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3170 ? sCacheInfoTable[rootCount] : stackInfo; 3171 rootCount++; 3172 info.cache = cache; 3173 info.page_count = 0; 3174 info.committed = 0; 3175 update_cache_info_recursively(cache, info); 3176 totalCommitted += info.committed; 3177 totalPages += info.page_count; 3178 } 3179 3180 cache = cache->debug_next; 3181 } 3182 3183 if (rootCount <= (uint32)kCacheInfoTableCount) { 3184 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3185 sortByPageCount 3186 ? &cache_info_compare_page_count 3187 : &cache_info_compare_committed); 3188 } 3189 3190 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3191 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3192 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3193 "tree...\n\n", totalCount, rootCount, 3194 sortByPageCount ? "page count" : "committed size"); 3195 3196 if (rootCount <= (uint32)kCacheInfoTableCount) { 3197 for (uint32 i = 0; i < rootCount; i++) { 3198 cache_info& info = sCacheInfoTable[i]; 3199 dump_caches_recursively(info.cache, info, 0); 3200 } 3201 } else 3202 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3203 3204 return 0; 3205 } 3206 3207 #endif // DEBUG_CACHE_LIST 3208 3209 3210 static int 3211 dump_cache(int argc, char** argv) 3212 { 3213 VMCache* cache; 3214 bool showPages = false; 3215 int i = 1; 3216 3217 if (argc < 2 || !strcmp(argv[1], "--help")) { 3218 kprintf("usage: %s [-ps] <address>\n" 3219 " if -p is specified, all pages are shown, if -s is used\n" 3220 " only the cache info is shown respectively.\n", argv[0]); 3221 return 0; 3222 } 3223 while (argv[i][0] == '-') { 3224 char* arg = argv[i] + 1; 3225 while (arg[0]) { 3226 if (arg[0] == 'p') 3227 showPages = true; 3228 arg++; 3229 } 3230 i++; 3231 } 3232 if (argv[i] == NULL) { 3233 kprintf("%s: invalid argument, pass address\n", argv[0]); 3234 return 0; 3235 } 3236 3237 addr_t address = parse_expression(argv[i]); 3238 if (address == 0) 3239 return 0; 3240 3241 cache = (VMCache*)address; 3242 3243 cache->Dump(showPages); 3244 3245 set_debug_variable("_sourceCache", (addr_t)cache->source); 3246 3247 return 0; 3248 } 3249 3250 3251 static void 3252 dump_area_struct(VMArea* area, bool mappings) 3253 { 3254 kprintf("AREA: %p\n", area); 3255 kprintf("name:\t\t'%s'\n", area->name); 3256 kprintf("owner:\t\t0x%lx\n", area->address_space->ID()); 3257 kprintf("id:\t\t0x%lx\n", area->id); 3258 kprintf("base:\t\t0x%lx\n", area->Base()); 3259 kprintf("size:\t\t0x%lx\n", area->Size()); 3260 kprintf("protection:\t0x%lx\n", area->protection); 3261 kprintf("wiring:\t\t0x%x\n", area->wiring); 3262 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3263 kprintf("cache:\t\t%p\n", area->cache); 3264 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3265 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3266 kprintf("cache_next:\t%p\n", area->cache_next); 3267 kprintf("cache_prev:\t%p\n", area->cache_prev); 3268 3269 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3270 if (mappings) { 3271 kprintf("page mappings:\n"); 3272 while (iterator.HasNext()) { 3273 vm_page_mapping* mapping = iterator.Next(); 3274 kprintf(" %p", mapping->page); 3275 } 3276 kprintf("\n"); 3277 } else { 3278 uint32 count = 0; 3279 while (iterator.Next() != NULL) { 3280 count++; 3281 } 3282 kprintf("page mappings:\t%lu\n", count); 3283 } 3284 } 3285 3286 3287 static int 3288 dump_area(int argc, char** argv) 3289 { 3290 bool mappings = false; 3291 bool found = false; 3292 int32 index = 1; 3293 VMArea* area; 3294 addr_t num; 3295 3296 if (argc < 2 || !strcmp(argv[1], "--help")) { 3297 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3298 "All areas matching either id/address/name are listed. You can\n" 3299 "force to check only a specific item by prefixing the specifier\n" 3300 "with the id/contains/address/name keywords.\n" 3301 "-m shows the area's mappings as well.\n"); 3302 return 0; 3303 } 3304 3305 if (!strcmp(argv[1], "-m")) { 3306 mappings = true; 3307 index++; 3308 } 3309 3310 int32 mode = 0xf; 3311 if (!strcmp(argv[index], "id")) 3312 mode = 1; 3313 else if (!strcmp(argv[index], "contains")) 3314 mode = 2; 3315 else if (!strcmp(argv[index], "name")) 3316 mode = 4; 3317 else if (!strcmp(argv[index], "address")) 3318 mode = 0; 3319 if (mode != 0xf) 3320 index++; 3321 3322 if (index >= argc) { 3323 kprintf("No area specifier given.\n"); 3324 return 0; 3325 } 3326 3327 num = parse_expression(argv[index]); 3328 3329 if (mode == 0) { 3330 dump_area_struct((struct VMArea*)num, mappings); 3331 } else { 3332 // walk through the area list, looking for the arguments as a name 3333 3334 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3335 while ((area = it.Next()) != NULL) { 3336 if (((mode & 4) != 0 && area->name != NULL 3337 && !strcmp(argv[index], area->name)) 3338 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3339 || (((mode & 2) != 0 && area->Base() <= num 3340 && area->Base() + area->Size() > num))))) { 3341 dump_area_struct(area, mappings); 3342 found = true; 3343 } 3344 } 3345 3346 if (!found) 3347 kprintf("could not find area %s (%ld)\n", argv[index], num); 3348 } 3349 3350 return 0; 3351 } 3352 3353 3354 static int 3355 dump_area_list(int argc, char** argv) 3356 { 3357 VMArea* area; 3358 const char* name = NULL; 3359 int32 id = 0; 3360 3361 if (argc > 1) { 3362 id = parse_expression(argv[1]); 3363 if (id == 0) 3364 name = argv[1]; 3365 } 3366 3367 kprintf("addr id base\t\tsize protect lock name\n"); 3368 3369 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3370 while ((area = it.Next()) != NULL) { 3371 if ((id != 0 && area->address_space->ID() != id) 3372 || (name != NULL && strstr(area->name, name) == NULL)) 3373 continue; 3374 3375 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 3376 (void*)area->Base(), (void*)area->Size(), area->protection, 3377 area->wiring, area->name); 3378 } 3379 return 0; 3380 } 3381 3382 3383 static int 3384 dump_available_memory(int argc, char** argv) 3385 { 3386 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3387 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3388 return 0; 3389 } 3390 3391 3392 /*! Deletes all areas and reserved regions in the given address space. 3393 3394 The caller must ensure that none of the areas has any wired ranges. 3395 3396 \param addressSpace The address space. 3397 \param deletingAddressSpace \c true, if the address space is in the process 3398 of being deleted. 3399 */ 3400 void 3401 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3402 { 3403 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 3404 addressSpace->ID())); 3405 3406 addressSpace->WriteLock(); 3407 3408 // remove all reserved areas in this address space 3409 addressSpace->UnreserveAllAddressRanges(0); 3410 3411 // delete all the areas in this address space 3412 while (VMArea* area = addressSpace->FirstArea()) { 3413 ASSERT(!area->IsWired()); 3414 delete_area(addressSpace, area, deletingAddressSpace); 3415 } 3416 3417 addressSpace->WriteUnlock(); 3418 } 3419 3420 3421 static area_id 3422 vm_area_for(addr_t address, bool kernel) 3423 { 3424 team_id team; 3425 if (IS_USER_ADDRESS(address)) { 3426 // we try the user team address space, if any 3427 team = VMAddressSpace::CurrentID(); 3428 if (team < 0) 3429 return team; 3430 } else 3431 team = VMAddressSpace::KernelID(); 3432 3433 AddressSpaceReadLocker locker(team); 3434 if (!locker.IsLocked()) 3435 return B_BAD_TEAM_ID; 3436 3437 VMArea* area = locker.AddressSpace()->LookupArea(address); 3438 if (area != NULL) { 3439 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3440 return B_ERROR; 3441 3442 return area->id; 3443 } 3444 3445 return B_ERROR; 3446 } 3447 3448 3449 /*! Frees physical pages that were used during the boot process. 3450 \a end is inclusive. 3451 */ 3452 static void 3453 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3454 { 3455 // free all physical pages in the specified range 3456 3457 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3458 phys_addr_t physicalAddress; 3459 uint32 flags; 3460 3461 if (map->Query(current, &physicalAddress, &flags) == B_OK 3462 && (flags & PAGE_PRESENT) != 0) { 3463 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3464 if (page != NULL && page->State() != PAGE_STATE_FREE 3465 && page->State() != PAGE_STATE_CLEAR 3466 && page->State() != PAGE_STATE_UNUSED) { 3467 DEBUG_PAGE_ACCESS_START(page); 3468 vm_page_set_state(page, PAGE_STATE_FREE); 3469 } 3470 } 3471 } 3472 3473 // unmap the memory 3474 map->Unmap(start, end); 3475 } 3476 3477 3478 void 3479 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3480 { 3481 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3482 addr_t end = start + (size - 1); 3483 addr_t lastEnd = start; 3484 3485 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3486 (void*)start, (void*)end)); 3487 3488 // The areas are sorted in virtual address space order, so 3489 // we just have to find the holes between them that fall 3490 // into the area we should dispose 3491 3492 map->Lock(); 3493 3494 for (VMAddressSpace::AreaIterator it 3495 = VMAddressSpace::Kernel()->GetAreaIterator(); 3496 VMArea* area = it.Next();) { 3497 addr_t areaStart = area->Base(); 3498 addr_t areaEnd = areaStart + (area->Size() - 1); 3499 3500 if (areaEnd < start) 3501 continue; 3502 3503 if (areaStart > end) { 3504 // we are done, the area is already beyond of what we have to free 3505 break; 3506 } 3507 3508 if (areaStart > lastEnd) { 3509 // this is something we can free 3510 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3511 (void*)areaStart)); 3512 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3513 } 3514 3515 if (areaEnd >= end) { 3516 lastEnd = areaEnd; 3517 // no +1 to prevent potential overflow 3518 break; 3519 } 3520 3521 lastEnd = areaEnd + 1; 3522 } 3523 3524 if (lastEnd < end) { 3525 // we can also get rid of some space at the end of the area 3526 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3527 (void*)end)); 3528 unmap_and_free_physical_pages(map, lastEnd, end); 3529 } 3530 3531 map->Unlock(); 3532 } 3533 3534 3535 static void 3536 create_preloaded_image_areas(struct preloaded_image* image) 3537 { 3538 char name[B_OS_NAME_LENGTH]; 3539 void* address; 3540 int32 length; 3541 3542 // use file name to create a good area name 3543 char* fileName = strrchr(image->name, '/'); 3544 if (fileName == NULL) 3545 fileName = image->name; 3546 else 3547 fileName++; 3548 3549 length = strlen(fileName); 3550 // make sure there is enough space for the suffix 3551 if (length > 25) 3552 length = 25; 3553 3554 memcpy(name, fileName, length); 3555 strcpy(name + length, "_text"); 3556 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3557 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3558 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3559 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3560 // this will later be remapped read-only/executable by the 3561 // ELF initialization code 3562 3563 strcpy(name + length, "_data"); 3564 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3565 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3566 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3567 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3568 } 3569 3570 3571 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3572 Any boot loader resources contained in that arguments must not be accessed 3573 anymore past this point. 3574 */ 3575 void 3576 vm_free_kernel_args(kernel_args* args) 3577 { 3578 uint32 i; 3579 3580 TRACE(("vm_free_kernel_args()\n")); 3581 3582 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3583 area_id area = area_for((void*)args->kernel_args_range[i].start); 3584 if (area >= B_OK) 3585 delete_area(area); 3586 } 3587 } 3588 3589 3590 static void 3591 allocate_kernel_args(kernel_args* args) 3592 { 3593 TRACE(("allocate_kernel_args()\n")); 3594 3595 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3596 void* address = (void*)args->kernel_args_range[i].start; 3597 3598 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3599 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3600 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3601 } 3602 } 3603 3604 3605 static void 3606 unreserve_boot_loader_ranges(kernel_args* args) 3607 { 3608 TRACE(("unreserve_boot_loader_ranges()\n")); 3609 3610 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3611 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3612 (void*)args->virtual_allocated_range[i].start, 3613 args->virtual_allocated_range[i].size); 3614 } 3615 } 3616 3617 3618 static void 3619 reserve_boot_loader_ranges(kernel_args* args) 3620 { 3621 TRACE(("reserve_boot_loader_ranges()\n")); 3622 3623 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3624 void* address = (void*)args->virtual_allocated_range[i].start; 3625 3626 // If the address is no kernel address, we just skip it. The 3627 // architecture specific code has to deal with it. 3628 if (!IS_KERNEL_ADDRESS(address)) { 3629 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 3630 address, args->virtual_allocated_range[i].size); 3631 continue; 3632 } 3633 3634 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3635 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3636 if (status < B_OK) 3637 panic("could not reserve boot loader ranges\n"); 3638 } 3639 } 3640 3641 3642 static addr_t 3643 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3644 { 3645 size = PAGE_ALIGN(size); 3646 3647 // find a slot in the virtual allocation addr range 3648 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3649 // check to see if the space between this one and the last is big enough 3650 addr_t rangeStart = args->virtual_allocated_range[i].start; 3651 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3652 + args->virtual_allocated_range[i - 1].size; 3653 3654 addr_t base = alignment > 0 3655 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3656 3657 if (base >= KERNEL_BASE && base < rangeStart 3658 && rangeStart - base >= size) { 3659 args->virtual_allocated_range[i - 1].size 3660 += base + size - previousRangeEnd; 3661 return base; 3662 } 3663 } 3664 3665 // we hadn't found one between allocation ranges. this is ok. 3666 // see if there's a gap after the last one 3667 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3668 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3669 + args->virtual_allocated_range[lastEntryIndex].size; 3670 addr_t base = alignment > 0 3671 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3672 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3673 args->virtual_allocated_range[lastEntryIndex].size 3674 += base + size - lastRangeEnd; 3675 return base; 3676 } 3677 3678 // see if there's a gap before the first one 3679 addr_t rangeStart = args->virtual_allocated_range[0].start; 3680 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3681 base = rangeStart - size; 3682 if (alignment > 0) 3683 base = ROUNDDOWN(base, alignment); 3684 3685 if (base >= KERNEL_BASE) { 3686 args->virtual_allocated_range[0].start = base; 3687 args->virtual_allocated_range[0].size += rangeStart - base; 3688 return base; 3689 } 3690 } 3691 3692 return 0; 3693 } 3694 3695 3696 static bool 3697 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 3698 { 3699 // TODO: horrible brute-force method of determining if the page can be 3700 // allocated 3701 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3702 if (address >= args->physical_memory_range[i].start 3703 && address < args->physical_memory_range[i].start 3704 + args->physical_memory_range[i].size) 3705 return true; 3706 } 3707 return false; 3708 } 3709 3710 3711 page_num_t 3712 vm_allocate_early_physical_page(kernel_args* args) 3713 { 3714 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3715 phys_addr_t nextPage; 3716 3717 nextPage = args->physical_allocated_range[i].start 3718 + args->physical_allocated_range[i].size; 3719 // see if the page after the next allocated paddr run can be allocated 3720 if (i + 1 < args->num_physical_allocated_ranges 3721 && args->physical_allocated_range[i + 1].size != 0) { 3722 // see if the next page will collide with the next allocated range 3723 if (nextPage >= args->physical_allocated_range[i+1].start) 3724 continue; 3725 } 3726 // see if the next physical page fits in the memory block 3727 if (is_page_in_physical_memory_range(args, nextPage)) { 3728 // we got one! 3729 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3730 return nextPage / B_PAGE_SIZE; 3731 } 3732 } 3733 3734 return 0; 3735 // could not allocate a block 3736 } 3737 3738 3739 /*! This one uses the kernel_args' physical and virtual memory ranges to 3740 allocate some pages before the VM is completely up. 3741 */ 3742 addr_t 3743 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3744 uint32 attributes, addr_t alignment) 3745 { 3746 if (physicalSize > virtualSize) 3747 physicalSize = virtualSize; 3748 3749 // find the vaddr to allocate at 3750 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 3751 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 3752 3753 // map the pages 3754 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3755 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 3756 if (physicalAddress == 0) 3757 panic("error allocating early page!\n"); 3758 3759 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3760 3761 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3762 physicalAddress * B_PAGE_SIZE, attributes, 3763 &vm_allocate_early_physical_page); 3764 } 3765 3766 return virtualBase; 3767 } 3768 3769 3770 /*! The main entrance point to initialize the VM. */ 3771 status_t 3772 vm_init(kernel_args* args) 3773 { 3774 struct preloaded_image* image; 3775 void* address; 3776 status_t err = 0; 3777 uint32 i; 3778 3779 TRACE(("vm_init: entry\n")); 3780 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3781 err = arch_vm_init(args); 3782 3783 // initialize some globals 3784 vm_page_init_num_pages(args); 3785 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3786 3787 slab_init(args); 3788 3789 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3790 size_t heapSize = INITIAL_HEAP_SIZE; 3791 // try to accomodate low memory systems 3792 while (heapSize > sAvailableMemory / 8) 3793 heapSize /= 2; 3794 if (heapSize < 1024 * 1024) 3795 panic("vm_init: go buy some RAM please."); 3796 3797 // map in the new heap and initialize it 3798 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3799 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3800 TRACE(("heap at 0x%lx\n", heapBase)); 3801 heap_init(heapBase, heapSize); 3802 #endif 3803 3804 // initialize the free page list and physical page mapper 3805 vm_page_init(args); 3806 3807 // initialize the cache allocators 3808 vm_cache_init(args); 3809 3810 { 3811 status_t error = VMAreaHash::Init(); 3812 if (error != B_OK) 3813 panic("vm_init: error initializing area hash table\n"); 3814 } 3815 3816 VMAddressSpace::Init(); 3817 reserve_boot_loader_ranges(args); 3818 3819 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3820 heap_init_post_area(); 3821 #endif 3822 3823 // Do any further initialization that the architecture dependant layers may 3824 // need now 3825 arch_vm_translation_map_init_post_area(args); 3826 arch_vm_init_post_area(args); 3827 vm_page_init_post_area(args); 3828 slab_init_post_area(); 3829 3830 // allocate areas to represent stuff that already exists 3831 3832 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3833 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3834 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3835 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3836 #endif 3837 3838 allocate_kernel_args(args); 3839 3840 create_preloaded_image_areas(&args->kernel_image); 3841 3842 // allocate areas for preloaded images 3843 for (image = args->preloaded_images; image != NULL; image = image->next) 3844 create_preloaded_image_areas(image); 3845 3846 // allocate kernel stacks 3847 for (i = 0; i < args->num_cpus; i++) { 3848 char name[64]; 3849 3850 sprintf(name, "idle thread %lu kstack", i + 1); 3851 address = (void*)args->cpu_kstack[i].start; 3852 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3853 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3854 } 3855 3856 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3857 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3858 3859 #if PARANOID_KERNEL_MALLOC 3860 vm_block_address_range("uninitialized heap memory", 3861 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3862 #endif 3863 #if PARANOID_KERNEL_FREE 3864 vm_block_address_range("freed heap memory", 3865 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3866 #endif 3867 3868 // create the object cache for the page mappings 3869 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3870 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3871 NULL, NULL); 3872 if (gPageMappingsObjectCache == NULL) 3873 panic("failed to create page mappings object cache"); 3874 3875 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3876 3877 #if DEBUG_CACHE_LIST 3878 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 3879 virtual_address_restrictions virtualRestrictions = {}; 3880 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 3881 physical_address_restrictions physicalRestrictions = {}; 3882 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 3883 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3884 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 3885 CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions, 3886 (void**)&sCacheInfoTable); 3887 } 3888 #endif // DEBUG_CACHE_LIST 3889 3890 // add some debugger commands 3891 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3892 add_debugger_command("area", &dump_area, 3893 "Dump info about a particular area"); 3894 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3895 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3896 #if DEBUG_CACHE_LIST 3897 if (sCacheInfoTable != NULL) { 3898 add_debugger_command_etc("caches", &dump_caches, 3899 "List all VMCache trees", 3900 "[ \"-c\" ]\n" 3901 "All cache trees are listed sorted in decreasing order by number " 3902 "of\n" 3903 "used pages or, if \"-c\" is specified, by size of committed " 3904 "memory.\n", 3905 0); 3906 } 3907 #endif 3908 add_debugger_command("avail", &dump_available_memory, 3909 "Dump available memory"); 3910 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3911 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3912 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3913 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3914 add_debugger_command("string", &display_mem, "dump strings"); 3915 3916 TRACE(("vm_init: exit\n")); 3917 3918 vm_cache_init_post_heap(); 3919 3920 return err; 3921 } 3922 3923 3924 status_t 3925 vm_init_post_sem(kernel_args* args) 3926 { 3927 // This frees all unused boot loader resources and makes its space available 3928 // again 3929 arch_vm_init_end(args); 3930 unreserve_boot_loader_ranges(args); 3931 3932 // fill in all of the semaphores that were not allocated before 3933 // since we're still single threaded and only the kernel address space 3934 // exists, it isn't that hard to find all of the ones we need to create 3935 3936 arch_vm_translation_map_init_post_sem(args); 3937 3938 slab_init_post_sem(); 3939 3940 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3941 heap_init_post_sem(); 3942 #endif 3943 3944 return B_OK; 3945 } 3946 3947 3948 status_t 3949 vm_init_post_thread(kernel_args* args) 3950 { 3951 vm_page_init_post_thread(args); 3952 slab_init_post_thread(); 3953 return heap_init_post_thread(); 3954 } 3955 3956 3957 status_t 3958 vm_init_post_modules(kernel_args* args) 3959 { 3960 return arch_vm_init_post_modules(args); 3961 } 3962 3963 3964 void 3965 permit_page_faults(void) 3966 { 3967 Thread* thread = thread_get_current_thread(); 3968 if (thread != NULL) 3969 atomic_add(&thread->page_faults_allowed, 1); 3970 } 3971 3972 3973 void 3974 forbid_page_faults(void) 3975 { 3976 Thread* thread = thread_get_current_thread(); 3977 if (thread != NULL) 3978 atomic_add(&thread->page_faults_allowed, -1); 3979 } 3980 3981 3982 status_t 3983 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3984 addr_t* newIP) 3985 { 3986 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3987 faultAddress)); 3988 3989 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 3990 3991 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 3992 VMAddressSpace* addressSpace = NULL; 3993 3994 status_t status = B_OK; 3995 *newIP = 0; 3996 atomic_add((int32*)&sPageFaults, 1); 3997 3998 if (IS_KERNEL_ADDRESS(pageAddress)) { 3999 addressSpace = VMAddressSpace::GetKernel(); 4000 } else if (IS_USER_ADDRESS(pageAddress)) { 4001 addressSpace = VMAddressSpace::GetCurrent(); 4002 if (addressSpace == NULL) { 4003 if (!isUser) { 4004 dprintf("vm_page_fault: kernel thread accessing invalid user " 4005 "memory!\n"); 4006 status = B_BAD_ADDRESS; 4007 TPF(PageFaultError(-1, 4008 VMPageFaultTracing 4009 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4010 } else { 4011 // XXX weird state. 4012 panic("vm_page_fault: non kernel thread accessing user memory " 4013 "that doesn't exist!\n"); 4014 status = B_BAD_ADDRESS; 4015 } 4016 } 4017 } else { 4018 // the hit was probably in the 64k DMZ between kernel and user space 4019 // this keeps a user space thread from passing a buffer that crosses 4020 // into kernel space 4021 status = B_BAD_ADDRESS; 4022 TPF(PageFaultError(-1, 4023 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4024 } 4025 4026 if (status == B_OK) { 4027 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser, 4028 NULL); 4029 } 4030 4031 if (status < B_OK) { 4032 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4033 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4034 strerror(status), address, faultAddress, isWrite, isUser, 4035 thread_get_current_thread_id()); 4036 if (!isUser) { 4037 Thread* thread = thread_get_current_thread(); 4038 if (thread != NULL && thread->fault_handler != 0) { 4039 // this will cause the arch dependant page fault handler to 4040 // modify the IP on the interrupt frame or whatever to return 4041 // to this address 4042 *newIP = thread->fault_handler; 4043 } else { 4044 // unhandled page fault in the kernel 4045 panic("vm_page_fault: unhandled page fault in kernel space at " 4046 "0x%lx, ip 0x%lx\n", address, faultAddress); 4047 } 4048 } else { 4049 #if 1 4050 addressSpace->ReadLock(); 4051 4052 // TODO: remove me once we have proper userland debugging support 4053 // (and tools) 4054 VMArea* area = addressSpace->LookupArea(faultAddress); 4055 4056 Thread* thread = thread_get_current_thread(); 4057 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4058 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4059 thread->name, thread->id, thread->team->Name(), 4060 thread->team->id, isWrite ? "write" : "read", address, 4061 faultAddress, area ? area->name : "???", 4062 faultAddress - (area ? area->Base() : 0x0)); 4063 4064 // We can print a stack trace of the userland thread here. 4065 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4066 // fault and someone is already waiting for a write lock on the same address 4067 // space. This thread will then try to acquire the lock again and will 4068 // be queued after the writer. 4069 # if 0 4070 if (area) { 4071 struct stack_frame { 4072 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4073 struct stack_frame* previous; 4074 void* return_address; 4075 #else 4076 // ... 4077 #warning writeme 4078 #endif 4079 } frame; 4080 # ifdef __INTEL__ 4081 struct iframe* iframe = i386_get_user_iframe(); 4082 if (iframe == NULL) 4083 panic("iframe is NULL!"); 4084 4085 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4086 sizeof(struct stack_frame)); 4087 # elif defined(__POWERPC__) 4088 struct iframe* iframe = ppc_get_user_iframe(); 4089 if (iframe == NULL) 4090 panic("iframe is NULL!"); 4091 4092 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4093 sizeof(struct stack_frame)); 4094 # else 4095 # warning "vm_page_fault() stack trace won't work" 4096 status = B_ERROR; 4097 # endif 4098 4099 dprintf("stack trace:\n"); 4100 int32 maxFrames = 50; 4101 while (status == B_OK && --maxFrames >= 0 4102 && frame.return_address != NULL) { 4103 dprintf(" %p", frame.return_address); 4104 area = addressSpace->LookupArea( 4105 (addr_t)frame.return_address); 4106 if (area) { 4107 dprintf(" (%s + %#lx)", area->name, 4108 (addr_t)frame.return_address - area->Base()); 4109 } 4110 dprintf("\n"); 4111 4112 status = user_memcpy(&frame, frame.previous, 4113 sizeof(struct stack_frame)); 4114 } 4115 } 4116 # endif // 0 (stack trace) 4117 4118 addressSpace->ReadUnlock(); 4119 #endif 4120 4121 // TODO: the fault_callback is a temporary solution for vm86 4122 if (thread->fault_callback == NULL 4123 || thread->fault_callback(address, faultAddress, isWrite)) { 4124 // If the thread has a signal handler for SIGSEGV, we simply 4125 // send it the signal. Otherwise we notify the user debugger 4126 // first. 4127 struct sigaction action; 4128 if ((sigaction(SIGSEGV, NULL, &action) == 0 4129 && action.sa_handler != SIG_DFL 4130 && action.sa_handler != SIG_IGN) 4131 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4132 SIGSEGV)) { 4133 Signal signal(SIGSEGV, 4134 status == B_PERMISSION_DENIED 4135 ? SEGV_ACCERR : SEGV_MAPERR, 4136 EFAULT, thread->team->id); 4137 signal.SetAddress((void*)address); 4138 send_signal_to_thread(thread, signal, 0); 4139 } 4140 } 4141 } 4142 } 4143 4144 if (addressSpace != NULL) 4145 addressSpace->Put(); 4146 4147 return B_HANDLED_INTERRUPT; 4148 } 4149 4150 4151 struct PageFaultContext { 4152 AddressSpaceReadLocker addressSpaceLocker; 4153 VMCacheChainLocker cacheChainLocker; 4154 4155 VMTranslationMap* map; 4156 VMCache* topCache; 4157 off_t cacheOffset; 4158 vm_page_reservation reservation; 4159 bool isWrite; 4160 4161 // return values 4162 vm_page* page; 4163 bool restart; 4164 4165 4166 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4167 : 4168 addressSpaceLocker(addressSpace, true), 4169 map(addressSpace->TranslationMap()), 4170 isWrite(isWrite) 4171 { 4172 } 4173 4174 ~PageFaultContext() 4175 { 4176 UnlockAll(); 4177 vm_page_unreserve_pages(&reservation); 4178 } 4179 4180 void Prepare(VMCache* topCache, off_t cacheOffset) 4181 { 4182 this->topCache = topCache; 4183 this->cacheOffset = cacheOffset; 4184 page = NULL; 4185 restart = false; 4186 4187 cacheChainLocker.SetTo(topCache); 4188 } 4189 4190 void UnlockAll(VMCache* exceptCache = NULL) 4191 { 4192 topCache = NULL; 4193 addressSpaceLocker.Unlock(); 4194 cacheChainLocker.Unlock(exceptCache); 4195 } 4196 }; 4197 4198 4199 /*! Gets the page that should be mapped into the area. 4200 Returns an error code other than \c B_OK, if the page couldn't be found or 4201 paged in. The locking state of the address space and the caches is undefined 4202 in that case. 4203 Returns \c B_OK with \c context.restart set to \c true, if the functions 4204 had to unlock the address space and all caches and is supposed to be called 4205 again. 4206 Returns \c B_OK with \c context.restart set to \c false, if the page was 4207 found. It is returned in \c context.page. The address space will still be 4208 locked as well as all caches starting from the top cache to at least the 4209 cache the page lives in. 4210 */ 4211 static status_t 4212 fault_get_page(PageFaultContext& context) 4213 { 4214 VMCache* cache = context.topCache; 4215 VMCache* lastCache = NULL; 4216 vm_page* page = NULL; 4217 4218 while (cache != NULL) { 4219 // We already hold the lock of the cache at this point. 4220 4221 lastCache = cache; 4222 4223 page = cache->LookupPage(context.cacheOffset); 4224 if (page != NULL && page->busy) { 4225 // page must be busy -- wait for it to become unbusy 4226 context.UnlockAll(cache); 4227 cache->ReleaseRefLocked(); 4228 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4229 4230 // restart the whole process 4231 context.restart = true; 4232 return B_OK; 4233 } 4234 4235 if (page != NULL) 4236 break; 4237 4238 // The current cache does not contain the page we're looking for. 4239 4240 // see if the backing store has it 4241 if (cache->HasPage(context.cacheOffset)) { 4242 // insert a fresh page and mark it busy -- we're going to read it in 4243 page = vm_page_allocate_page(&context.reservation, 4244 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4245 cache->InsertPage(page, context.cacheOffset); 4246 4247 // We need to unlock all caches and the address space while reading 4248 // the page in. Keep a reference to the cache around. 4249 cache->AcquireRefLocked(); 4250 context.UnlockAll(); 4251 4252 // read the page in 4253 generic_io_vec vec; 4254 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4255 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4256 4257 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4258 B_PHYSICAL_IO_REQUEST, &bytesRead); 4259 4260 cache->Lock(); 4261 4262 if (status < B_OK) { 4263 // on error remove and free the page 4264 dprintf("reading page from cache %p returned: %s!\n", 4265 cache, strerror(status)); 4266 4267 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4268 cache->RemovePage(page); 4269 vm_page_set_state(page, PAGE_STATE_FREE); 4270 4271 cache->ReleaseRefAndUnlock(); 4272 return status; 4273 } 4274 4275 // mark the page unbusy again 4276 cache->MarkPageUnbusy(page); 4277 4278 DEBUG_PAGE_ACCESS_END(page); 4279 4280 // Since we needed to unlock everything temporarily, the area 4281 // situation might have changed. So we need to restart the whole 4282 // process. 4283 cache->ReleaseRefAndUnlock(); 4284 context.restart = true; 4285 return B_OK; 4286 } 4287 4288 cache = context.cacheChainLocker.LockSourceCache(); 4289 } 4290 4291 if (page == NULL) { 4292 // There was no adequate page, determine the cache for a clean one. 4293 // Read-only pages come in the deepest cache, only the top most cache 4294 // may have direct write access. 4295 cache = context.isWrite ? context.topCache : lastCache; 4296 4297 // allocate a clean page 4298 page = vm_page_allocate_page(&context.reservation, 4299 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4300 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 4301 page->physical_page_number)); 4302 4303 // insert the new page into our cache 4304 cache->InsertPage(page, context.cacheOffset); 4305 } else if (page->Cache() != context.topCache && context.isWrite) { 4306 // We have a page that has the data we want, but in the wrong cache 4307 // object so we need to copy it and stick it into the top cache. 4308 vm_page* sourcePage = page; 4309 4310 // TODO: If memory is low, it might be a good idea to steal the page 4311 // from our source cache -- if possible, that is. 4312 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4313 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4314 4315 // To not needlessly kill concurrency we unlock all caches but the top 4316 // one while copying the page. Lacking another mechanism to ensure that 4317 // the source page doesn't disappear, we mark it busy. 4318 sourcePage->busy = true; 4319 context.cacheChainLocker.UnlockKeepRefs(true); 4320 4321 // copy the page 4322 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4323 sourcePage->physical_page_number * B_PAGE_SIZE); 4324 4325 context.cacheChainLocker.RelockCaches(true); 4326 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4327 4328 // insert the new page into our cache 4329 context.topCache->InsertPage(page, context.cacheOffset); 4330 } else 4331 DEBUG_PAGE_ACCESS_START(page); 4332 4333 context.page = page; 4334 return B_OK; 4335 } 4336 4337 4338 /*! Makes sure the address in the given address space is mapped. 4339 4340 \param addressSpace The address space. 4341 \param originalAddress The address. Doesn't need to be page aligned. 4342 \param isWrite If \c true the address shall be write-accessible. 4343 \param isUser If \c true the access is requested by a userland team. 4344 \param wirePage On success, if non \c NULL, the wired count of the page 4345 mapped at the given address is incremented and the page is returned 4346 via this parameter. 4347 \param wiredRange If given, this wiredRange is ignored when checking whether 4348 an already mapped page at the virtual address can be unmapped. 4349 \return \c B_OK on success, another error code otherwise. 4350 */ 4351 static status_t 4352 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4353 bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange) 4354 { 4355 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 4356 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 4357 4358 PageFaultContext context(addressSpace, isWrite); 4359 4360 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4361 status_t status = B_OK; 4362 4363 addressSpace->IncrementFaultCount(); 4364 4365 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4366 // the pages upfront makes sure we don't have any cache locked, so that the 4367 // page daemon/thief can do their job without problems. 4368 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4369 originalAddress); 4370 context.addressSpaceLocker.Unlock(); 4371 vm_page_reserve_pages(&context.reservation, reservePages, 4372 addressSpace == VMAddressSpace::Kernel() 4373 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4374 4375 while (true) { 4376 context.addressSpaceLocker.Lock(); 4377 4378 // get the area the fault was in 4379 VMArea* area = addressSpace->LookupArea(address); 4380 if (area == NULL) { 4381 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4382 "space\n", originalAddress); 4383 TPF(PageFaultError(-1, 4384 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4385 status = B_BAD_ADDRESS; 4386 break; 4387 } 4388 4389 // check permissions 4390 uint32 protection = get_area_page_protection(area, address); 4391 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4392 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 4393 (void*)originalAddress); 4394 TPF(PageFaultError(area->id, 4395 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4396 status = B_PERMISSION_DENIED; 4397 break; 4398 } 4399 if (isWrite && (protection 4400 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4401 dprintf("write access attempted on write-protected area 0x%lx at" 4402 " %p\n", area->id, (void*)originalAddress); 4403 TPF(PageFaultError(area->id, 4404 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4405 status = B_PERMISSION_DENIED; 4406 break; 4407 } else if (!isWrite && (protection 4408 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4409 dprintf("read access attempted on read-protected area 0x%lx at" 4410 " %p\n", area->id, (void*)originalAddress); 4411 TPF(PageFaultError(area->id, 4412 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4413 status = B_PERMISSION_DENIED; 4414 break; 4415 } 4416 4417 // We have the area, it was a valid access, so let's try to resolve the 4418 // page fault now. 4419 // At first, the top most cache from the area is investigated. 4420 4421 context.Prepare(vm_area_get_locked_cache(area), 4422 address - area->Base() + area->cache_offset); 4423 4424 // See if this cache has a fault handler -- this will do all the work 4425 // for us. 4426 { 4427 // Note, since the page fault is resolved with interrupts enabled, 4428 // the fault handler could be called more than once for the same 4429 // reason -- the store must take this into account. 4430 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4431 if (status != B_BAD_HANDLER) 4432 break; 4433 } 4434 4435 // The top most cache has no fault handler, so let's see if the cache or 4436 // its sources already have the page we're searching for (we're going 4437 // from top to bottom). 4438 status = fault_get_page(context); 4439 if (status != B_OK) { 4440 TPF(PageFaultError(area->id, status)); 4441 break; 4442 } 4443 4444 if (context.restart) 4445 continue; 4446 4447 // All went fine, all there is left to do is to map the page into the 4448 // address space. 4449 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4450 context.page)); 4451 4452 // If the page doesn't reside in the area's cache, we need to make sure 4453 // it's mapped in read-only, so that we cannot overwrite someone else's 4454 // data (copy-on-write) 4455 uint32 newProtection = protection; 4456 if (context.page->Cache() != context.topCache && !isWrite) 4457 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4458 4459 bool unmapPage = false; 4460 bool mapPage = true; 4461 4462 // check whether there's already a page mapped at the address 4463 context.map->Lock(); 4464 4465 phys_addr_t physicalAddress; 4466 uint32 flags; 4467 vm_page* mappedPage = NULL; 4468 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4469 && (flags & PAGE_PRESENT) != 0 4470 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4471 != NULL) { 4472 // Yep there's already a page. If it's ours, we can simply adjust 4473 // its protection. Otherwise we have to unmap it. 4474 if (mappedPage == context.page) { 4475 context.map->ProtectPage(area, address, newProtection); 4476 // Note: We assume that ProtectPage() is atomic (i.e. 4477 // the page isn't temporarily unmapped), otherwise we'd have 4478 // to make sure it isn't wired. 4479 mapPage = false; 4480 } else 4481 unmapPage = true; 4482 } 4483 4484 context.map->Unlock(); 4485 4486 if (unmapPage) { 4487 // If the page is wired, we can't unmap it. Wait until it is unwired 4488 // again and restart. 4489 VMAreaUnwiredWaiter waiter; 4490 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4491 wiredRange)) { 4492 // unlock everything and wait 4493 context.UnlockAll(); 4494 waiter.waitEntry.Wait(); 4495 continue; 4496 } 4497 4498 // Note: The mapped page is a page of a lower cache. We are 4499 // guaranteed to have that cached locked, our new page is a copy of 4500 // that page, and the page is not busy. The logic for that guarantee 4501 // is as follows: Since the page is mapped, it must live in the top 4502 // cache (ruled out above) or any of its lower caches, and there is 4503 // (was before the new page was inserted) no other page in any 4504 // cache between the top cache and the page's cache (otherwise that 4505 // would be mapped instead). That in turn means that our algorithm 4506 // must have found it and therefore it cannot be busy either. 4507 DEBUG_PAGE_ACCESS_START(mappedPage); 4508 unmap_page(area, address); 4509 DEBUG_PAGE_ACCESS_END(mappedPage); 4510 } 4511 4512 if (mapPage) { 4513 if (map_page(area, context.page, address, newProtection, 4514 &context.reservation) != B_OK) { 4515 // Mapping can only fail, when the page mapping object couldn't 4516 // be allocated. Save for the missing mapping everything is 4517 // fine, though. If this was a regular page fault, we'll simply 4518 // leave and probably fault again. To make sure we'll have more 4519 // luck then, we ensure that the minimum object reserve is 4520 // available. 4521 DEBUG_PAGE_ACCESS_END(context.page); 4522 4523 context.UnlockAll(); 4524 4525 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4526 != B_OK) { 4527 // Apparently the situation is serious. Let's get ourselves 4528 // killed. 4529 status = B_NO_MEMORY; 4530 } else if (wirePage != NULL) { 4531 // The caller expects us to wire the page. Since 4532 // object_cache_reserve() succeeded, we should now be able 4533 // to allocate a mapping structure. Restart. 4534 continue; 4535 } 4536 4537 break; 4538 } 4539 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4540 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4541 4542 // also wire the page, if requested 4543 if (wirePage != NULL && status == B_OK) { 4544 increment_page_wired_count(context.page); 4545 *wirePage = context.page; 4546 } 4547 4548 DEBUG_PAGE_ACCESS_END(context.page); 4549 4550 break; 4551 } 4552 4553 return status; 4554 } 4555 4556 4557 status_t 4558 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4559 { 4560 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4561 } 4562 4563 status_t 4564 vm_put_physical_page(addr_t vaddr, void* handle) 4565 { 4566 return sPhysicalPageMapper->PutPage(vaddr, handle); 4567 } 4568 4569 4570 status_t 4571 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4572 void** _handle) 4573 { 4574 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4575 } 4576 4577 status_t 4578 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4579 { 4580 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4581 } 4582 4583 4584 status_t 4585 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4586 { 4587 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4588 } 4589 4590 status_t 4591 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4592 { 4593 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4594 } 4595 4596 4597 void 4598 vm_get_info(system_memory_info* info) 4599 { 4600 swap_get_info(info); 4601 4602 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4603 info->page_faults = sPageFaults; 4604 4605 MutexLocker locker(sAvailableMemoryLock); 4606 info->free_memory = sAvailableMemory; 4607 info->needed_memory = sNeededMemory; 4608 } 4609 4610 4611 uint32 4612 vm_num_page_faults(void) 4613 { 4614 return sPageFaults; 4615 } 4616 4617 4618 off_t 4619 vm_available_memory(void) 4620 { 4621 MutexLocker locker(sAvailableMemoryLock); 4622 return sAvailableMemory; 4623 } 4624 4625 4626 off_t 4627 vm_available_not_needed_memory(void) 4628 { 4629 MutexLocker locker(sAvailableMemoryLock); 4630 return sAvailableMemory - sNeededMemory; 4631 } 4632 4633 4634 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4635 debugger. 4636 */ 4637 off_t 4638 vm_available_not_needed_memory_debug(void) 4639 { 4640 return sAvailableMemory - sNeededMemory; 4641 } 4642 4643 4644 size_t 4645 vm_kernel_address_space_left(void) 4646 { 4647 return VMAddressSpace::Kernel()->FreeSpace(); 4648 } 4649 4650 4651 void 4652 vm_unreserve_memory(size_t amount) 4653 { 4654 mutex_lock(&sAvailableMemoryLock); 4655 4656 sAvailableMemory += amount; 4657 4658 mutex_unlock(&sAvailableMemoryLock); 4659 } 4660 4661 4662 status_t 4663 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4664 { 4665 size_t reserve = kMemoryReserveForPriority[priority]; 4666 4667 MutexLocker locker(sAvailableMemoryLock); 4668 4669 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4670 4671 if (sAvailableMemory >= amount + reserve) { 4672 sAvailableMemory -= amount; 4673 return B_OK; 4674 } 4675 4676 if (timeout <= 0) 4677 return B_NO_MEMORY; 4678 4679 // turn timeout into an absolute timeout 4680 timeout += system_time(); 4681 4682 // loop until we've got the memory or the timeout occurs 4683 do { 4684 sNeededMemory += amount; 4685 4686 // call the low resource manager 4687 locker.Unlock(); 4688 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4689 B_ABSOLUTE_TIMEOUT, timeout); 4690 locker.Lock(); 4691 4692 sNeededMemory -= amount; 4693 4694 if (sAvailableMemory >= amount + reserve) { 4695 sAvailableMemory -= amount; 4696 return B_OK; 4697 } 4698 } while (timeout > system_time()); 4699 4700 return B_NO_MEMORY; 4701 } 4702 4703 4704 status_t 4705 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 4706 { 4707 // NOTE: The caller is responsible for synchronizing calls to this function! 4708 4709 AddressSpaceReadLocker locker; 4710 VMArea* area; 4711 status_t status = locker.SetFromArea(id, area); 4712 if (status != B_OK) 4713 return status; 4714 4715 // nothing to do, if the type doesn't change 4716 uint32 oldType = area->MemoryType(); 4717 if (type == oldType) 4718 return B_OK; 4719 4720 // set the memory type of the area and the mapped pages 4721 VMTranslationMap* map = area->address_space->TranslationMap(); 4722 map->Lock(); 4723 area->SetMemoryType(type); 4724 map->ProtectArea(area, area->protection); 4725 map->Unlock(); 4726 4727 // set the physical memory type 4728 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 4729 if (error != B_OK) { 4730 // reset the memory type of the area and the mapped pages 4731 map->Lock(); 4732 area->SetMemoryType(oldType); 4733 map->ProtectArea(area, area->protection); 4734 map->Unlock(); 4735 return error; 4736 } 4737 4738 return B_OK; 4739 4740 } 4741 4742 4743 /*! This function enforces some protection properties: 4744 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 4745 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4746 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4747 and B_KERNEL_WRITE_AREA. 4748 */ 4749 static void 4750 fix_protection(uint32* protection) 4751 { 4752 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4753 if ((*protection & B_USER_PROTECTION) == 0 4754 || (*protection & B_WRITE_AREA) != 0) 4755 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4756 else 4757 *protection |= B_KERNEL_READ_AREA; 4758 } 4759 } 4760 4761 4762 static void 4763 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4764 { 4765 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4766 info->area = area->id; 4767 info->address = (void*)area->Base(); 4768 info->size = area->Size(); 4769 info->protection = area->protection; 4770 info->lock = B_FULL_LOCK; 4771 info->team = area->address_space->ID(); 4772 info->copy_count = 0; 4773 info->in_count = 0; 4774 info->out_count = 0; 4775 // TODO: retrieve real values here! 4776 4777 VMCache* cache = vm_area_get_locked_cache(area); 4778 4779 // Note, this is a simplification; the cache could be larger than this area 4780 info->ram_size = cache->page_count * B_PAGE_SIZE; 4781 4782 vm_area_put_locked_cache(cache); 4783 } 4784 4785 4786 static status_t 4787 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4788 { 4789 // is newSize a multiple of B_PAGE_SIZE? 4790 if (newSize & (B_PAGE_SIZE - 1)) 4791 return B_BAD_VALUE; 4792 4793 // lock all affected address spaces and the cache 4794 VMArea* area; 4795 VMCache* cache; 4796 4797 MultiAddressSpaceLocker locker; 4798 AreaCacheLocker cacheLocker; 4799 4800 status_t status; 4801 size_t oldSize; 4802 bool anyKernelArea; 4803 bool restart; 4804 4805 do { 4806 anyKernelArea = false; 4807 restart = false; 4808 4809 locker.Unset(); 4810 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4811 if (status != B_OK) 4812 return status; 4813 cacheLocker.SetTo(cache, true); // already locked 4814 4815 // enforce restrictions 4816 if (!kernel) { 4817 if ((area->protection & B_KERNEL_AREA) != 0) 4818 return B_NOT_ALLOWED; 4819 // TODO: Enforce all restrictions (team, etc.)! 4820 } 4821 4822 oldSize = area->Size(); 4823 if (newSize == oldSize) 4824 return B_OK; 4825 4826 if (cache->type != CACHE_TYPE_RAM) 4827 return B_NOT_ALLOWED; 4828 4829 if (oldSize < newSize) { 4830 // We need to check if all areas of this cache can be resized. 4831 for (VMArea* current = cache->areas; current != NULL; 4832 current = current->cache_next) { 4833 if (!current->address_space->CanResizeArea(current, newSize)) 4834 return B_ERROR; 4835 anyKernelArea 4836 |= current->address_space == VMAddressSpace::Kernel(); 4837 } 4838 } else { 4839 // We're shrinking the areas, so we must make sure the affected 4840 // ranges are not wired. 4841 for (VMArea* current = cache->areas; current != NULL; 4842 current = current->cache_next) { 4843 anyKernelArea 4844 |= current->address_space == VMAddressSpace::Kernel(); 4845 4846 if (wait_if_area_range_is_wired(current, 4847 current->Base() + newSize, oldSize - newSize, &locker, 4848 &cacheLocker)) { 4849 restart = true; 4850 break; 4851 } 4852 } 4853 } 4854 } while (restart); 4855 4856 // Okay, looks good so far, so let's do it 4857 4858 int priority = kernel && anyKernelArea 4859 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4860 uint32 allocationFlags = kernel && anyKernelArea 4861 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4862 4863 if (oldSize < newSize) { 4864 // Growing the cache can fail, so we do it first. 4865 status = cache->Resize(cache->virtual_base + newSize, priority); 4866 if (status != B_OK) 4867 return status; 4868 } 4869 4870 for (VMArea* current = cache->areas; current != NULL; 4871 current = current->cache_next) { 4872 status = current->address_space->ResizeArea(current, newSize, 4873 allocationFlags); 4874 if (status != B_OK) 4875 break; 4876 4877 // We also need to unmap all pages beyond the new size, if the area has 4878 // shrunk 4879 if (newSize < oldSize) { 4880 VMCacheChainLocker cacheChainLocker(cache); 4881 cacheChainLocker.LockAllSourceCaches(); 4882 4883 unmap_pages(current, current->Base() + newSize, 4884 oldSize - newSize); 4885 4886 cacheChainLocker.Unlock(cache); 4887 } 4888 } 4889 4890 if (status == B_OK) { 4891 // Shrink or grow individual page protections if in use. 4892 if (area->page_protections != NULL) { 4893 uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2; 4894 uint8* newProtections 4895 = (uint8*)realloc(area->page_protections, bytes); 4896 if (newProtections == NULL) 4897 status = B_NO_MEMORY; 4898 else { 4899 area->page_protections = newProtections; 4900 4901 if (oldSize < newSize) { 4902 // init the additional page protections to that of the area 4903 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 4904 uint32 areaProtection = area->protection 4905 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 4906 memset(area->page_protections + offset, 4907 areaProtection | (areaProtection << 4), bytes - offset); 4908 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 4909 uint8& entry = area->page_protections[offset - 1]; 4910 entry = (entry & 0x0f) | (areaProtection << 4); 4911 } 4912 } 4913 } 4914 } 4915 } 4916 4917 // shrinking the cache can't fail, so we do it now 4918 if (status == B_OK && newSize < oldSize) 4919 status = cache->Resize(cache->virtual_base + newSize, priority); 4920 4921 if (status != B_OK) { 4922 // Something failed -- resize the areas back to their original size. 4923 // This can fail, too, in which case we're seriously screwed. 4924 for (VMArea* current = cache->areas; current != NULL; 4925 current = current->cache_next) { 4926 if (current->address_space->ResizeArea(current, oldSize, 4927 allocationFlags) != B_OK) { 4928 panic("vm_resize_area(): Failed and not being able to restore " 4929 "original state."); 4930 } 4931 } 4932 4933 cache->Resize(cache->virtual_base + oldSize, priority); 4934 } 4935 4936 // TODO: we must honour the lock restrictions of this area 4937 return status; 4938 } 4939 4940 4941 status_t 4942 vm_memset_physical(phys_addr_t address, int value, size_t length) 4943 { 4944 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4945 } 4946 4947 4948 status_t 4949 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 4950 { 4951 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4952 } 4953 4954 4955 status_t 4956 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 4957 bool user) 4958 { 4959 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4960 } 4961 4962 4963 void 4964 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 4965 { 4966 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4967 } 4968 4969 4970 /*! Copies a range of memory directly from/to a page that might not be mapped 4971 at the moment. 4972 4973 For \a unsafeMemory the current mapping (if any is ignored). The function 4974 walks through the respective area's cache chain to find the physical page 4975 and copies from/to it directly. 4976 The memory range starting at \a unsafeMemory with a length of \a size bytes 4977 must not cross a page boundary. 4978 4979 \param teamID The team ID identifying the address space \a unsafeMemory is 4980 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 4981 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 4982 is passed, the address space of the thread returned by 4983 debug_get_debugged_thread() is used. 4984 \param unsafeMemory The start of the unsafe memory range to be copied 4985 from/to. 4986 \param buffer A safely accessible kernel buffer to be copied from/to. 4987 \param size The number of bytes to be copied. 4988 \param copyToUnsafe If \c true, memory is copied from \a buffer to 4989 \a unsafeMemory, the other way around otherwise. 4990 */ 4991 status_t 4992 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 4993 size_t size, bool copyToUnsafe) 4994 { 4995 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 4996 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 4997 return B_BAD_VALUE; 4998 } 4999 5000 // get the address space for the debugged thread 5001 VMAddressSpace* addressSpace; 5002 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5003 addressSpace = VMAddressSpace::Kernel(); 5004 } else if (teamID == B_CURRENT_TEAM) { 5005 Thread* thread = debug_get_debugged_thread(); 5006 if (thread == NULL || thread->team == NULL) 5007 return B_BAD_ADDRESS; 5008 5009 addressSpace = thread->team->address_space; 5010 } else 5011 addressSpace = VMAddressSpace::DebugGet(teamID); 5012 5013 if (addressSpace == NULL) 5014 return B_BAD_ADDRESS; 5015 5016 // get the area 5017 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5018 if (area == NULL) 5019 return B_BAD_ADDRESS; 5020 5021 // search the page 5022 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5023 + area->cache_offset; 5024 VMCache* cache = area->cache; 5025 vm_page* page = NULL; 5026 while (cache != NULL) { 5027 page = cache->DebugLookupPage(cacheOffset); 5028 if (page != NULL) 5029 break; 5030 5031 // Page not found in this cache -- if it is paged out, we must not try 5032 // to get it from lower caches. 5033 if (cache->DebugHasPage(cacheOffset)) 5034 break; 5035 5036 cache = cache->source; 5037 } 5038 5039 if (page == NULL) 5040 return B_UNSUPPORTED; 5041 5042 // copy from/to physical memory 5043 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5044 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5045 5046 if (copyToUnsafe) { 5047 if (page->Cache() != area->cache) 5048 return B_UNSUPPORTED; 5049 5050 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5051 } 5052 5053 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5054 } 5055 5056 5057 // #pragma mark - kernel public API 5058 5059 5060 status_t 5061 user_memcpy(void* to, const void* from, size_t size) 5062 { 5063 // don't allow address overflows 5064 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5065 return B_BAD_ADDRESS; 5066 5067 if (arch_cpu_user_memcpy(to, from, size, 5068 &thread_get_current_thread()->fault_handler) < B_OK) 5069 return B_BAD_ADDRESS; 5070 5071 return B_OK; 5072 } 5073 5074 5075 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5076 the string in \a to, NULL-terminating the result. 5077 5078 \param to Pointer to the destination C-string. 5079 \param from Pointer to the source C-string. 5080 \param size Size in bytes of the string buffer pointed to by \a to. 5081 5082 \return strlen(\a from). 5083 */ 5084 ssize_t 5085 user_strlcpy(char* to, const char* from, size_t size) 5086 { 5087 if (to == NULL && size != 0) 5088 return B_BAD_VALUE; 5089 if (from == NULL) 5090 return B_BAD_ADDRESS; 5091 5092 // limit size to avoid address overflows 5093 size_t maxSize = std::min(size, 5094 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5095 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5096 // the source address might still overflow. 5097 5098 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5099 &thread_get_current_thread()->fault_handler); 5100 5101 // If we hit the address overflow boundary, fail. 5102 if (result >= 0 && (size_t)result >= maxSize && maxSize < size) 5103 return B_BAD_ADDRESS; 5104 5105 return result; 5106 } 5107 5108 5109 status_t 5110 user_memset(void* s, char c, size_t count) 5111 { 5112 // don't allow address overflows 5113 if ((addr_t)s + count < (addr_t)s) 5114 return B_BAD_ADDRESS; 5115 5116 if (arch_cpu_user_memset(s, c, count, 5117 &thread_get_current_thread()->fault_handler) < B_OK) 5118 return B_BAD_ADDRESS; 5119 5120 return B_OK; 5121 } 5122 5123 5124 /*! Wires a single page at the given address. 5125 5126 \param team The team whose address space the address belongs to. Supports 5127 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5128 parameter is ignored. 5129 \param address address The virtual address to wire down. Does not need to 5130 be page aligned. 5131 \param writable If \c true the page shall be writable. 5132 \param info On success the info is filled in, among other things 5133 containing the physical address the given virtual one translates to. 5134 \return \c B_OK, when the page could be wired, another error code otherwise. 5135 */ 5136 status_t 5137 vm_wire_page(team_id team, addr_t address, bool writable, 5138 VMPageWiringInfo* info) 5139 { 5140 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5141 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5142 5143 // compute the page protection that is required 5144 bool isUser = IS_USER_ADDRESS(address); 5145 uint32 requiredProtection = PAGE_PRESENT 5146 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5147 if (writable) 5148 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5149 5150 // get and read lock the address space 5151 VMAddressSpace* addressSpace = NULL; 5152 if (isUser) { 5153 if (team == B_CURRENT_TEAM) 5154 addressSpace = VMAddressSpace::GetCurrent(); 5155 else 5156 addressSpace = VMAddressSpace::Get(team); 5157 } else 5158 addressSpace = VMAddressSpace::GetKernel(); 5159 if (addressSpace == NULL) 5160 return B_ERROR; 5161 5162 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5163 5164 VMTranslationMap* map = addressSpace->TranslationMap(); 5165 status_t error = B_OK; 5166 5167 // get the area 5168 VMArea* area = addressSpace->LookupArea(pageAddress); 5169 if (area == NULL) { 5170 addressSpace->Put(); 5171 return B_BAD_ADDRESS; 5172 } 5173 5174 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5175 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5176 5177 // mark the area range wired 5178 area->Wire(&info->range); 5179 5180 // Lock the area's cache chain and the translation map. Needed to look 5181 // up the page and play with its wired count. 5182 cacheChainLocker.LockAllSourceCaches(); 5183 map->Lock(); 5184 5185 phys_addr_t physicalAddress; 5186 uint32 flags; 5187 vm_page* page; 5188 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5189 && (flags & requiredProtection) == requiredProtection 5190 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5191 != NULL) { 5192 // Already mapped with the correct permissions -- just increment 5193 // the page's wired count. 5194 increment_page_wired_count(page); 5195 5196 map->Unlock(); 5197 cacheChainLocker.Unlock(); 5198 addressSpaceLocker.Unlock(); 5199 } else { 5200 // Let vm_soft_fault() map the page for us, if possible. We need 5201 // to fully unlock to avoid deadlocks. Since we have already 5202 // wired the area itself, nothing disturbing will happen with it 5203 // in the meantime. 5204 map->Unlock(); 5205 cacheChainLocker.Unlock(); 5206 addressSpaceLocker.Unlock(); 5207 5208 error = vm_soft_fault(addressSpace, pageAddress, writable, isUser, 5209 &page, &info->range); 5210 5211 if (error != B_OK) { 5212 // The page could not be mapped -- clean up. 5213 VMCache* cache = vm_area_get_locked_cache(area); 5214 area->Unwire(&info->range); 5215 cache->ReleaseRefAndUnlock(); 5216 addressSpace->Put(); 5217 return error; 5218 } 5219 } 5220 5221 info->physicalAddress 5222 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5223 + address % B_PAGE_SIZE; 5224 info->page = page; 5225 5226 return B_OK; 5227 } 5228 5229 5230 /*! Unwires a single page previously wired via vm_wire_page(). 5231 5232 \param info The same object passed to vm_wire_page() before. 5233 */ 5234 void 5235 vm_unwire_page(VMPageWiringInfo* info) 5236 { 5237 // lock the address space 5238 VMArea* area = info->range.area; 5239 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5240 // takes over our reference 5241 5242 // lock the top cache 5243 VMCache* cache = vm_area_get_locked_cache(area); 5244 VMCacheChainLocker cacheChainLocker(cache); 5245 5246 if (info->page->Cache() != cache) { 5247 // The page is not in the top cache, so we lock the whole cache chain 5248 // before touching the page's wired count. 5249 cacheChainLocker.LockAllSourceCaches(); 5250 } 5251 5252 decrement_page_wired_count(info->page); 5253 5254 // remove the wired range from the range 5255 area->Unwire(&info->range); 5256 5257 cacheChainLocker.Unlock(); 5258 } 5259 5260 5261 /*! Wires down the given address range in the specified team's address space. 5262 5263 If successful the function 5264 - acquires a reference to the specified team's address space, 5265 - adds respective wired ranges to all areas that intersect with the given 5266 address range, 5267 - makes sure all pages in the given address range are mapped with the 5268 requested access permissions and increments their wired count. 5269 5270 It fails, when \a team doesn't specify a valid address space, when any part 5271 of the specified address range is not covered by areas, when the concerned 5272 areas don't allow mapping with the requested permissions, or when mapping 5273 failed for another reason. 5274 5275 When successful the call must be balanced by a unlock_memory_etc() call with 5276 the exact same parameters. 5277 5278 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5279 supported. 5280 \param address The start of the address range to be wired. 5281 \param numBytes The size of the address range to be wired. 5282 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5283 requests that the range must be wired writable ("read from device 5284 into memory"). 5285 \return \c B_OK on success, another error code otherwise. 5286 */ 5287 status_t 5288 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5289 { 5290 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5291 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5292 5293 // compute the page protection that is required 5294 bool isUser = IS_USER_ADDRESS(address); 5295 bool writable = (flags & B_READ_DEVICE) == 0; 5296 uint32 requiredProtection = PAGE_PRESENT 5297 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5298 if (writable) 5299 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5300 5301 uint32 mallocFlags = isUser 5302 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5303 5304 // get and read lock the address space 5305 VMAddressSpace* addressSpace = NULL; 5306 if (isUser) { 5307 if (team == B_CURRENT_TEAM) 5308 addressSpace = VMAddressSpace::GetCurrent(); 5309 else 5310 addressSpace = VMAddressSpace::Get(team); 5311 } else 5312 addressSpace = VMAddressSpace::GetKernel(); 5313 if (addressSpace == NULL) 5314 return B_ERROR; 5315 5316 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5317 5318 VMTranslationMap* map = addressSpace->TranslationMap(); 5319 status_t error = B_OK; 5320 5321 // iterate through all concerned areas 5322 addr_t nextAddress = lockBaseAddress; 5323 while (nextAddress != lockEndAddress) { 5324 // get the next area 5325 VMArea* area = addressSpace->LookupArea(nextAddress); 5326 if (area == NULL) { 5327 error = B_BAD_ADDRESS; 5328 break; 5329 } 5330 5331 addr_t areaStart = nextAddress; 5332 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5333 5334 // allocate the wired range (do that before locking the cache to avoid 5335 // deadlocks) 5336 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5337 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5338 if (range == NULL) { 5339 error = B_NO_MEMORY; 5340 break; 5341 } 5342 5343 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5344 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5345 5346 // mark the area range wired 5347 area->Wire(range); 5348 5349 // Depending on the area cache type and the wiring, we may not need to 5350 // look at the individual pages. 5351 if (area->cache_type == CACHE_TYPE_NULL 5352 || area->cache_type == CACHE_TYPE_DEVICE 5353 || area->wiring == B_FULL_LOCK 5354 || area->wiring == B_CONTIGUOUS) { 5355 nextAddress = areaEnd; 5356 continue; 5357 } 5358 5359 // Lock the area's cache chain and the translation map. Needed to look 5360 // up pages and play with their wired count. 5361 cacheChainLocker.LockAllSourceCaches(); 5362 map->Lock(); 5363 5364 // iterate through the pages and wire them 5365 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5366 phys_addr_t physicalAddress; 5367 uint32 flags; 5368 5369 vm_page* page; 5370 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5371 && (flags & requiredProtection) == requiredProtection 5372 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5373 != NULL) { 5374 // Already mapped with the correct permissions -- just increment 5375 // the page's wired count. 5376 increment_page_wired_count(page); 5377 } else { 5378 // Let vm_soft_fault() map the page for us, if possible. We need 5379 // to fully unlock to avoid deadlocks. Since we have already 5380 // wired the area itself, nothing disturbing will happen with it 5381 // in the meantime. 5382 map->Unlock(); 5383 cacheChainLocker.Unlock(); 5384 addressSpaceLocker.Unlock(); 5385 5386 error = vm_soft_fault(addressSpace, nextAddress, writable, 5387 isUser, &page, range); 5388 5389 addressSpaceLocker.Lock(); 5390 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5391 cacheChainLocker.LockAllSourceCaches(); 5392 map->Lock(); 5393 } 5394 5395 if (error != B_OK) 5396 break; 5397 } 5398 5399 map->Unlock(); 5400 5401 if (error == B_OK) { 5402 cacheChainLocker.Unlock(); 5403 } else { 5404 // An error occurred, so abort right here. If the current address 5405 // is the first in this area, unwire the area, since we won't get 5406 // to it when reverting what we've done so far. 5407 if (nextAddress == areaStart) { 5408 area->Unwire(range); 5409 cacheChainLocker.Unlock(); 5410 range->~VMAreaWiredRange(); 5411 free_etc(range, mallocFlags); 5412 } else 5413 cacheChainLocker.Unlock(); 5414 5415 break; 5416 } 5417 } 5418 5419 if (error != B_OK) { 5420 // An error occurred, so unwire all that we've already wired. Note that 5421 // even if not a single page was wired, unlock_memory_etc() is called 5422 // to put the address space reference. 5423 addressSpaceLocker.Unlock(); 5424 unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress, 5425 flags); 5426 } 5427 5428 return error; 5429 } 5430 5431 5432 status_t 5433 lock_memory(void* address, size_t numBytes, uint32 flags) 5434 { 5435 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5436 } 5437 5438 5439 /*! Unwires an address range previously wired with lock_memory_etc(). 5440 5441 Note that a call to this function must balance a previous lock_memory_etc() 5442 call with exactly the same parameters. 5443 */ 5444 status_t 5445 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5446 { 5447 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5448 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5449 5450 // compute the page protection that is required 5451 bool isUser = IS_USER_ADDRESS(address); 5452 bool writable = (flags & B_READ_DEVICE) == 0; 5453 uint32 requiredProtection = PAGE_PRESENT 5454 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5455 if (writable) 5456 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5457 5458 uint32 mallocFlags = isUser 5459 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5460 5461 // get and read lock the address space 5462 VMAddressSpace* addressSpace = NULL; 5463 if (isUser) { 5464 if (team == B_CURRENT_TEAM) 5465 addressSpace = VMAddressSpace::GetCurrent(); 5466 else 5467 addressSpace = VMAddressSpace::Get(team); 5468 } else 5469 addressSpace = VMAddressSpace::GetKernel(); 5470 if (addressSpace == NULL) 5471 return B_ERROR; 5472 5473 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5474 5475 VMTranslationMap* map = addressSpace->TranslationMap(); 5476 status_t error = B_OK; 5477 5478 // iterate through all concerned areas 5479 addr_t nextAddress = lockBaseAddress; 5480 while (nextAddress != lockEndAddress) { 5481 // get the next area 5482 VMArea* area = addressSpace->LookupArea(nextAddress); 5483 if (area == NULL) { 5484 error = B_BAD_ADDRESS; 5485 break; 5486 } 5487 5488 addr_t areaStart = nextAddress; 5489 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5490 5491 // Lock the area's top cache. This is a requirement for 5492 // VMArea::Unwire(). 5493 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5494 5495 // Depending on the area cache type and the wiring, we may not need to 5496 // look at the individual pages. 5497 if (area->cache_type == CACHE_TYPE_NULL 5498 || area->cache_type == CACHE_TYPE_DEVICE 5499 || area->wiring == B_FULL_LOCK 5500 || area->wiring == B_CONTIGUOUS) { 5501 // unwire the range (to avoid deadlocks we delete the range after 5502 // unlocking the cache) 5503 nextAddress = areaEnd; 5504 VMAreaWiredRange* range = area->Unwire(areaStart, 5505 areaEnd - areaStart, writable); 5506 cacheChainLocker.Unlock(); 5507 if (range != NULL) { 5508 range->~VMAreaWiredRange(); 5509 free_etc(range, mallocFlags); 5510 } 5511 continue; 5512 } 5513 5514 // Lock the area's cache chain and the translation map. Needed to look 5515 // up pages and play with their wired count. 5516 cacheChainLocker.LockAllSourceCaches(); 5517 map->Lock(); 5518 5519 // iterate through the pages and unwire them 5520 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5521 phys_addr_t physicalAddress; 5522 uint32 flags; 5523 5524 vm_page* page; 5525 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5526 && (flags & PAGE_PRESENT) != 0 5527 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5528 != NULL) { 5529 // Already mapped with the correct permissions -- just increment 5530 // the page's wired count. 5531 decrement_page_wired_count(page); 5532 } else { 5533 panic("unlock_memory_etc(): Failed to unwire page: address " 5534 "space %p, address: %#" B_PRIxADDR, addressSpace, 5535 nextAddress); 5536 error = B_BAD_VALUE; 5537 break; 5538 } 5539 } 5540 5541 map->Unlock(); 5542 5543 // All pages are unwired. Remove the area's wired range as well (to 5544 // avoid deadlocks we delete the range after unlocking the cache). 5545 VMAreaWiredRange* range = area->Unwire(areaStart, 5546 areaEnd - areaStart, writable); 5547 5548 cacheChainLocker.Unlock(); 5549 5550 if (range != NULL) { 5551 range->~VMAreaWiredRange(); 5552 free_etc(range, mallocFlags); 5553 } 5554 5555 if (error != B_OK) 5556 break; 5557 } 5558 5559 // get rid of the address space reference 5560 addressSpace->Put(); 5561 5562 return error; 5563 } 5564 5565 5566 status_t 5567 unlock_memory(void* address, size_t numBytes, uint32 flags) 5568 { 5569 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5570 } 5571 5572 5573 /*! Similar to get_memory_map(), but also allows to specify the address space 5574 for the memory in question and has a saner semantics. 5575 Returns \c B_OK when the complete range could be translated or 5576 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5577 case the actual number of entries is written to \c *_numEntries. Any other 5578 error case indicates complete failure; \c *_numEntries will be set to \c 0 5579 in this case. 5580 */ 5581 status_t 5582 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5583 physical_entry* table, uint32* _numEntries) 5584 { 5585 uint32 numEntries = *_numEntries; 5586 *_numEntries = 0; 5587 5588 VMAddressSpace* addressSpace; 5589 addr_t virtualAddress = (addr_t)address; 5590 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5591 phys_addr_t physicalAddress; 5592 status_t status = B_OK; 5593 int32 index = -1; 5594 addr_t offset = 0; 5595 bool interrupts = are_interrupts_enabled(); 5596 5597 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5598 address, numBytes, numEntries)); 5599 5600 if (numEntries == 0 || numBytes == 0) 5601 return B_BAD_VALUE; 5602 5603 // in which address space is the address to be found? 5604 if (IS_USER_ADDRESS(virtualAddress)) { 5605 if (team == B_CURRENT_TEAM) 5606 addressSpace = VMAddressSpace::GetCurrent(); 5607 else 5608 addressSpace = VMAddressSpace::Get(team); 5609 } else 5610 addressSpace = VMAddressSpace::GetKernel(); 5611 5612 if (addressSpace == NULL) 5613 return B_ERROR; 5614 5615 VMTranslationMap* map = addressSpace->TranslationMap(); 5616 5617 if (interrupts) 5618 map->Lock(); 5619 5620 while (offset < numBytes) { 5621 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5622 uint32 flags; 5623 5624 if (interrupts) { 5625 status = map->Query((addr_t)address + offset, &physicalAddress, 5626 &flags); 5627 } else { 5628 status = map->QueryInterrupt((addr_t)address + offset, 5629 &physicalAddress, &flags); 5630 } 5631 if (status < B_OK) 5632 break; 5633 if ((flags & PAGE_PRESENT) == 0) { 5634 panic("get_memory_map() called on unmapped memory!"); 5635 return B_BAD_ADDRESS; 5636 } 5637 5638 if (index < 0 && pageOffset > 0) { 5639 physicalAddress += pageOffset; 5640 if (bytes > B_PAGE_SIZE - pageOffset) 5641 bytes = B_PAGE_SIZE - pageOffset; 5642 } 5643 5644 // need to switch to the next physical_entry? 5645 if (index < 0 || table[index].address 5646 != physicalAddress - table[index].size) { 5647 if ((uint32)++index + 1 > numEntries) { 5648 // table to small 5649 break; 5650 } 5651 table[index].address = physicalAddress; 5652 table[index].size = bytes; 5653 } else { 5654 // page does fit in current entry 5655 table[index].size += bytes; 5656 } 5657 5658 offset += bytes; 5659 } 5660 5661 if (interrupts) 5662 map->Unlock(); 5663 5664 if (status != B_OK) 5665 return status; 5666 5667 if ((uint32)index + 1 > numEntries) { 5668 *_numEntries = index; 5669 return B_BUFFER_OVERFLOW; 5670 } 5671 5672 *_numEntries = index + 1; 5673 return B_OK; 5674 } 5675 5676 5677 /*! According to the BeBook, this function should always succeed. 5678 This is no longer the case. 5679 */ 5680 extern "C" int32 5681 __get_memory_map_haiku(const void* address, size_t numBytes, 5682 physical_entry* table, int32 numEntries) 5683 { 5684 uint32 entriesRead = numEntries; 5685 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5686 table, &entriesRead); 5687 if (error != B_OK) 5688 return error; 5689 5690 // close the entry list 5691 5692 // if it's only one entry, we will silently accept the missing ending 5693 if (numEntries == 1) 5694 return B_OK; 5695 5696 if (entriesRead + 1 > (uint32)numEntries) 5697 return B_BUFFER_OVERFLOW; 5698 5699 table[entriesRead].address = 0; 5700 table[entriesRead].size = 0; 5701 5702 return B_OK; 5703 } 5704 5705 5706 area_id 5707 area_for(void* address) 5708 { 5709 return vm_area_for((addr_t)address, true); 5710 } 5711 5712 5713 area_id 5714 find_area(const char* name) 5715 { 5716 return VMAreaHash::Find(name); 5717 } 5718 5719 5720 status_t 5721 _get_area_info(area_id id, area_info* info, size_t size) 5722 { 5723 if (size != sizeof(area_info) || info == NULL) 5724 return B_BAD_VALUE; 5725 5726 AddressSpaceReadLocker locker; 5727 VMArea* area; 5728 status_t status = locker.SetFromArea(id, area); 5729 if (status != B_OK) 5730 return status; 5731 5732 fill_area_info(area, info, size); 5733 return B_OK; 5734 } 5735 5736 5737 status_t 5738 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 5739 { 5740 addr_t nextBase = *(addr_t*)cookie; 5741 5742 // we're already through the list 5743 if (nextBase == (addr_t)-1) 5744 return B_ENTRY_NOT_FOUND; 5745 5746 if (team == B_CURRENT_TEAM) 5747 team = team_get_current_team_id(); 5748 5749 AddressSpaceReadLocker locker(team); 5750 if (!locker.IsLocked()) 5751 return B_BAD_TEAM_ID; 5752 5753 VMArea* area; 5754 for (VMAddressSpace::AreaIterator it 5755 = locker.AddressSpace()->GetAreaIterator(); 5756 (area = it.Next()) != NULL;) { 5757 if (area->Base() > nextBase) 5758 break; 5759 } 5760 5761 if (area == NULL) { 5762 nextBase = (addr_t)-1; 5763 return B_ENTRY_NOT_FOUND; 5764 } 5765 5766 fill_area_info(area, info, size); 5767 *cookie = (int32)(area->Base()); 5768 // TODO: Not 64 bit safe! 5769 5770 return B_OK; 5771 } 5772 5773 5774 status_t 5775 set_area_protection(area_id area, uint32 newProtection) 5776 { 5777 fix_protection(&newProtection); 5778 5779 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5780 newProtection, true); 5781 } 5782 5783 5784 status_t 5785 resize_area(area_id areaID, size_t newSize) 5786 { 5787 return vm_resize_area(areaID, newSize, true); 5788 } 5789 5790 5791 /*! Transfers the specified area to a new team. The caller must be the owner 5792 of the area. 5793 */ 5794 area_id 5795 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5796 bool kernel) 5797 { 5798 area_info info; 5799 status_t status = get_area_info(id, &info); 5800 if (status != B_OK) 5801 return status; 5802 5803 if (info.team != thread_get_current_thread()->team->id) 5804 return B_PERMISSION_DENIED; 5805 5806 area_id clonedArea = vm_clone_area(target, info.name, _address, 5807 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5808 if (clonedArea < 0) 5809 return clonedArea; 5810 5811 status = vm_delete_area(info.team, id, kernel); 5812 if (status != B_OK) { 5813 vm_delete_area(target, clonedArea, kernel); 5814 return status; 5815 } 5816 5817 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5818 5819 return clonedArea; 5820 } 5821 5822 5823 extern "C" area_id 5824 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 5825 size_t numBytes, uint32 addressSpec, uint32 protection, 5826 void** _virtualAddress) 5827 { 5828 if (!arch_vm_supports_protection(protection)) 5829 return B_NOT_SUPPORTED; 5830 5831 fix_protection(&protection); 5832 5833 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5834 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 5835 false); 5836 } 5837 5838 5839 area_id 5840 clone_area(const char* name, void** _address, uint32 addressSpec, 5841 uint32 protection, area_id source) 5842 { 5843 if ((protection & B_KERNEL_PROTECTION) == 0) 5844 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5845 5846 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5847 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5848 } 5849 5850 5851 area_id 5852 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock, 5853 uint32 protection, uint32 flags, 5854 const virtual_address_restrictions* virtualAddressRestrictions, 5855 const physical_address_restrictions* physicalAddressRestrictions, 5856 void** _address) 5857 { 5858 fix_protection(&protection); 5859 5860 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 5861 virtualAddressRestrictions, physicalAddressRestrictions, true, 5862 _address); 5863 } 5864 5865 5866 extern "C" area_id 5867 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 5868 size_t size, uint32 lock, uint32 protection) 5869 { 5870 fix_protection(&protection); 5871 5872 virtual_address_restrictions virtualRestrictions = {}; 5873 virtualRestrictions.address = *_address; 5874 virtualRestrictions.address_specification = addressSpec; 5875 physical_address_restrictions physicalRestrictions = {}; 5876 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 5877 lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true, 5878 _address); 5879 } 5880 5881 5882 status_t 5883 delete_area(area_id area) 5884 { 5885 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5886 } 5887 5888 5889 // #pragma mark - Userland syscalls 5890 5891 5892 status_t 5893 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5894 addr_t size) 5895 { 5896 // filter out some unavailable values (for userland) 5897 switch (addressSpec) { 5898 case B_ANY_KERNEL_ADDRESS: 5899 case B_ANY_KERNEL_BLOCK_ADDRESS: 5900 return B_BAD_VALUE; 5901 } 5902 5903 addr_t address; 5904 5905 if (!IS_USER_ADDRESS(userAddress) 5906 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5907 return B_BAD_ADDRESS; 5908 5909 status_t status = vm_reserve_address_range( 5910 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5911 RESERVED_AVOID_BASE); 5912 if (status != B_OK) 5913 return status; 5914 5915 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5916 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5917 (void*)address, size); 5918 return B_BAD_ADDRESS; 5919 } 5920 5921 return B_OK; 5922 } 5923 5924 5925 status_t 5926 _user_unreserve_address_range(addr_t address, addr_t size) 5927 { 5928 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5929 (void*)address, size); 5930 } 5931 5932 5933 area_id 5934 _user_area_for(void* address) 5935 { 5936 return vm_area_for((addr_t)address, false); 5937 } 5938 5939 5940 area_id 5941 _user_find_area(const char* userName) 5942 { 5943 char name[B_OS_NAME_LENGTH]; 5944 5945 if (!IS_USER_ADDRESS(userName) 5946 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5947 return B_BAD_ADDRESS; 5948 5949 return find_area(name); 5950 } 5951 5952 5953 status_t 5954 _user_get_area_info(area_id area, area_info* userInfo) 5955 { 5956 if (!IS_USER_ADDRESS(userInfo)) 5957 return B_BAD_ADDRESS; 5958 5959 area_info info; 5960 status_t status = get_area_info(area, &info); 5961 if (status < B_OK) 5962 return status; 5963 5964 // TODO: do we want to prevent userland from seeing kernel protections? 5965 //info.protection &= B_USER_PROTECTION; 5966 5967 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5968 return B_BAD_ADDRESS; 5969 5970 return status; 5971 } 5972 5973 5974 status_t 5975 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 5976 { 5977 int32 cookie; 5978 5979 if (!IS_USER_ADDRESS(userCookie) 5980 || !IS_USER_ADDRESS(userInfo) 5981 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 5982 return B_BAD_ADDRESS; 5983 5984 area_info info; 5985 status_t status = _get_next_area_info(team, &cookie, &info, 5986 sizeof(area_info)); 5987 if (status != B_OK) 5988 return status; 5989 5990 //info.protection &= B_USER_PROTECTION; 5991 5992 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 5993 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5994 return B_BAD_ADDRESS; 5995 5996 return status; 5997 } 5998 5999 6000 status_t 6001 _user_set_area_protection(area_id area, uint32 newProtection) 6002 { 6003 if ((newProtection & ~B_USER_PROTECTION) != 0) 6004 return B_BAD_VALUE; 6005 6006 fix_protection(&newProtection); 6007 6008 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6009 newProtection, false); 6010 } 6011 6012 6013 status_t 6014 _user_resize_area(area_id area, size_t newSize) 6015 { 6016 // TODO: Since we restrict deleting of areas to those owned by the team, 6017 // we should also do that for resizing (check other functions, too). 6018 return vm_resize_area(area, newSize, false); 6019 } 6020 6021 6022 area_id 6023 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6024 team_id target) 6025 { 6026 // filter out some unavailable values (for userland) 6027 switch (addressSpec) { 6028 case B_ANY_KERNEL_ADDRESS: 6029 case B_ANY_KERNEL_BLOCK_ADDRESS: 6030 return B_BAD_VALUE; 6031 } 6032 6033 void* address; 6034 if (!IS_USER_ADDRESS(userAddress) 6035 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6036 return B_BAD_ADDRESS; 6037 6038 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6039 if (newArea < B_OK) 6040 return newArea; 6041 6042 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6043 return B_BAD_ADDRESS; 6044 6045 return newArea; 6046 } 6047 6048 6049 area_id 6050 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6051 uint32 protection, area_id sourceArea) 6052 { 6053 char name[B_OS_NAME_LENGTH]; 6054 void* address; 6055 6056 // filter out some unavailable values (for userland) 6057 switch (addressSpec) { 6058 case B_ANY_KERNEL_ADDRESS: 6059 case B_ANY_KERNEL_BLOCK_ADDRESS: 6060 return B_BAD_VALUE; 6061 } 6062 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6063 return B_BAD_VALUE; 6064 6065 if (!IS_USER_ADDRESS(userName) 6066 || !IS_USER_ADDRESS(userAddress) 6067 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6068 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6069 return B_BAD_ADDRESS; 6070 6071 fix_protection(&protection); 6072 6073 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6074 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6075 false); 6076 if (clonedArea < B_OK) 6077 return clonedArea; 6078 6079 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6080 delete_area(clonedArea); 6081 return B_BAD_ADDRESS; 6082 } 6083 6084 return clonedArea; 6085 } 6086 6087 6088 area_id 6089 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6090 size_t size, uint32 lock, uint32 protection) 6091 { 6092 char name[B_OS_NAME_LENGTH]; 6093 void* address; 6094 6095 // filter out some unavailable values (for userland) 6096 switch (addressSpec) { 6097 case B_ANY_KERNEL_ADDRESS: 6098 case B_ANY_KERNEL_BLOCK_ADDRESS: 6099 return B_BAD_VALUE; 6100 } 6101 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6102 return B_BAD_VALUE; 6103 6104 if (!IS_USER_ADDRESS(userName) 6105 || !IS_USER_ADDRESS(userAddress) 6106 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6107 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6108 return B_BAD_ADDRESS; 6109 6110 if (addressSpec == B_EXACT_ADDRESS 6111 && IS_KERNEL_ADDRESS(address)) 6112 return B_BAD_VALUE; 6113 6114 fix_protection(&protection); 6115 6116 virtual_address_restrictions virtualRestrictions = {}; 6117 virtualRestrictions.address = address; 6118 virtualRestrictions.address_specification = addressSpec; 6119 physical_address_restrictions physicalRestrictions = {}; 6120 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6121 size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions, 6122 false, &address); 6123 6124 if (area >= B_OK 6125 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6126 delete_area(area); 6127 return B_BAD_ADDRESS; 6128 } 6129 6130 return area; 6131 } 6132 6133 6134 status_t 6135 _user_delete_area(area_id area) 6136 { 6137 // Unlike the BeOS implementation, you can now only delete areas 6138 // that you have created yourself from userland. 6139 // The documentation to delete_area() explicitly states that this 6140 // will be restricted in the future, and so it will. 6141 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6142 } 6143 6144 6145 // TODO: create a BeOS style call for this! 6146 6147 area_id 6148 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6149 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6150 int fd, off_t offset) 6151 { 6152 char name[B_OS_NAME_LENGTH]; 6153 void* address; 6154 area_id area; 6155 6156 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6157 return B_BAD_VALUE; 6158 6159 fix_protection(&protection); 6160 6161 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6162 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6163 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6164 return B_BAD_ADDRESS; 6165 6166 if (addressSpec == B_EXACT_ADDRESS) { 6167 if ((addr_t)address + size < (addr_t)address 6168 || (addr_t)address % B_PAGE_SIZE != 0) { 6169 return B_BAD_VALUE; 6170 } 6171 if (!IS_USER_ADDRESS(address) 6172 || !IS_USER_ADDRESS((addr_t)address + size)) { 6173 return B_BAD_ADDRESS; 6174 } 6175 } 6176 6177 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6178 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6179 false); 6180 if (area < B_OK) 6181 return area; 6182 6183 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6184 return B_BAD_ADDRESS; 6185 6186 return area; 6187 } 6188 6189 6190 status_t 6191 _user_unmap_memory(void* _address, size_t size) 6192 { 6193 addr_t address = (addr_t)_address; 6194 6195 // check params 6196 if (size == 0 || (addr_t)address + size < (addr_t)address 6197 || (addr_t)address % B_PAGE_SIZE != 0) { 6198 return B_BAD_VALUE; 6199 } 6200 6201 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6202 return B_BAD_ADDRESS; 6203 6204 // Write lock the address space and ensure the address range is not wired. 6205 AddressSpaceWriteLocker locker; 6206 do { 6207 status_t status = locker.SetTo(team_get_current_team_id()); 6208 if (status != B_OK) 6209 return status; 6210 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6211 size, &locker)); 6212 6213 // unmap 6214 return unmap_address_range(locker.AddressSpace(), address, size, false); 6215 } 6216 6217 6218 status_t 6219 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6220 { 6221 // check address range 6222 addr_t address = (addr_t)_address; 6223 size = PAGE_ALIGN(size); 6224 6225 if ((address % B_PAGE_SIZE) != 0) 6226 return B_BAD_VALUE; 6227 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6228 || !IS_USER_ADDRESS((addr_t)address + size)) { 6229 // weird error code required by POSIX 6230 return ENOMEM; 6231 } 6232 6233 // extend and check protection 6234 if ((protection & ~B_USER_PROTECTION) != 0) 6235 return B_BAD_VALUE; 6236 6237 fix_protection(&protection); 6238 6239 // We need to write lock the address space, since we're going to play with 6240 // the areas. Also make sure that none of the areas is wired and that we're 6241 // actually allowed to change the protection. 6242 AddressSpaceWriteLocker locker; 6243 6244 bool restart; 6245 do { 6246 restart = false; 6247 6248 status_t status = locker.SetTo(team_get_current_team_id()); 6249 if (status != B_OK) 6250 return status; 6251 6252 // First round: Check whether the whole range is covered by areas and we 6253 // are allowed to modify them. 6254 addr_t currentAddress = address; 6255 size_t sizeLeft = size; 6256 while (sizeLeft > 0) { 6257 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6258 if (area == NULL) 6259 return B_NO_MEMORY; 6260 6261 if ((area->protection & B_KERNEL_AREA) != 0) 6262 return B_NOT_ALLOWED; 6263 6264 AreaCacheLocker cacheLocker(area); 6265 6266 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) { 6267 restart = true; 6268 break; 6269 } 6270 6271 cacheLocker.Unlock(); 6272 6273 // TODO: For (shared) mapped files we should check whether the new 6274 // protections are compatible with the file permissions. We don't 6275 // have a way to do that yet, though. 6276 6277 addr_t offset = currentAddress - area->Base(); 6278 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6279 6280 currentAddress += rangeSize; 6281 sizeLeft -= rangeSize; 6282 } 6283 } while (restart); 6284 6285 // Second round: If the protections differ from that of the area, create a 6286 // page protection array and re-map mapped pages. 6287 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6288 addr_t currentAddress = address; 6289 size_t sizeLeft = size; 6290 while (sizeLeft > 0) { 6291 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6292 if (area == NULL) 6293 return B_NO_MEMORY; 6294 6295 addr_t offset = currentAddress - area->Base(); 6296 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6297 6298 currentAddress += rangeSize; 6299 sizeLeft -= rangeSize; 6300 6301 if (area->page_protections == NULL) { 6302 if (area->protection == protection) 6303 continue; 6304 6305 status_t status = allocate_area_page_protections(area); 6306 if (status != B_OK) 6307 return status; 6308 } 6309 6310 // We need to lock the complete cache chain, since we potentially unmap 6311 // pages of lower caches. 6312 VMCache* topCache = vm_area_get_locked_cache(area); 6313 VMCacheChainLocker cacheChainLocker(topCache); 6314 cacheChainLocker.LockAllSourceCaches(); 6315 6316 for (addr_t pageAddress = area->Base() + offset; 6317 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6318 map->Lock(); 6319 6320 set_area_page_protection(area, pageAddress, protection); 6321 6322 phys_addr_t physicalAddress; 6323 uint32 flags; 6324 6325 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6326 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6327 map->Unlock(); 6328 continue; 6329 } 6330 6331 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6332 if (page == NULL) { 6333 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6334 "\n", area, physicalAddress); 6335 map->Unlock(); 6336 return B_ERROR; 6337 } 6338 6339 // If the page is not in the topmost cache and write access is 6340 // requested, we have to unmap it. Otherwise we can re-map it with 6341 // the new protection. 6342 bool unmapPage = page->Cache() != topCache 6343 && (protection & B_WRITE_AREA) != 0; 6344 6345 if (!unmapPage) 6346 map->ProtectPage(area, pageAddress, protection); 6347 6348 map->Unlock(); 6349 6350 if (unmapPage) { 6351 DEBUG_PAGE_ACCESS_START(page); 6352 unmap_page(area, pageAddress); 6353 DEBUG_PAGE_ACCESS_END(page); 6354 } 6355 } 6356 } 6357 6358 return B_OK; 6359 } 6360 6361 6362 status_t 6363 _user_sync_memory(void* _address, size_t size, uint32 flags) 6364 { 6365 addr_t address = (addr_t)_address; 6366 size = PAGE_ALIGN(size); 6367 6368 // check params 6369 if ((address % B_PAGE_SIZE) != 0) 6370 return B_BAD_VALUE; 6371 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6372 || !IS_USER_ADDRESS((addr_t)address + size)) { 6373 // weird error code required by POSIX 6374 return ENOMEM; 6375 } 6376 6377 bool writeSync = (flags & MS_SYNC) != 0; 6378 bool writeAsync = (flags & MS_ASYNC) != 0; 6379 if (writeSync && writeAsync) 6380 return B_BAD_VALUE; 6381 6382 if (size == 0 || (!writeSync && !writeAsync)) 6383 return B_OK; 6384 6385 // iterate through the range and sync all concerned areas 6386 while (size > 0) { 6387 // read lock the address space 6388 AddressSpaceReadLocker locker; 6389 status_t error = locker.SetTo(team_get_current_team_id()); 6390 if (error != B_OK) 6391 return error; 6392 6393 // get the first area 6394 VMArea* area = locker.AddressSpace()->LookupArea(address); 6395 if (area == NULL) 6396 return B_NO_MEMORY; 6397 6398 uint32 offset = address - area->Base(); 6399 size_t rangeSize = min_c(area->Size() - offset, size); 6400 offset += area->cache_offset; 6401 6402 // lock the cache 6403 AreaCacheLocker cacheLocker(area); 6404 if (!cacheLocker) 6405 return B_BAD_VALUE; 6406 VMCache* cache = area->cache; 6407 6408 locker.Unlock(); 6409 6410 uint32 firstPage = offset >> PAGE_SHIFT; 6411 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6412 6413 // write the pages 6414 if (cache->type == CACHE_TYPE_VNODE) { 6415 if (writeSync) { 6416 // synchronous 6417 error = vm_page_write_modified_page_range(cache, firstPage, 6418 endPage); 6419 if (error != B_OK) 6420 return error; 6421 } else { 6422 // asynchronous 6423 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6424 // TODO: This is probably not quite what is supposed to happen. 6425 // Especially when a lot has to be written, it might take ages 6426 // until it really hits the disk. 6427 } 6428 } 6429 6430 address += rangeSize; 6431 size -= rangeSize; 6432 } 6433 6434 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6435 // synchronize multiple mappings of the same file. In our VM they never get 6436 // out of sync, though, so we don't have to do anything. 6437 6438 return B_OK; 6439 } 6440 6441 6442 status_t 6443 _user_memory_advice(void* address, size_t size, uint32 advice) 6444 { 6445 // TODO: Implement! 6446 return B_OK; 6447 } 6448 6449 6450 status_t 6451 _user_get_memory_properties(team_id teamID, const void* address, 6452 uint32* _protected, uint32* _lock) 6453 { 6454 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6455 return B_BAD_ADDRESS; 6456 6457 AddressSpaceReadLocker locker; 6458 status_t error = locker.SetTo(teamID); 6459 if (error != B_OK) 6460 return error; 6461 6462 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6463 if (area == NULL) 6464 return B_NO_MEMORY; 6465 6466 6467 uint32 protection = area->protection; 6468 if (area->page_protections != NULL) 6469 protection = get_area_page_protection(area, (addr_t)address); 6470 6471 uint32 wiring = area->wiring; 6472 6473 locker.Unlock(); 6474 6475 error = user_memcpy(_protected, &protection, sizeof(protection)); 6476 if (error != B_OK) 6477 return error; 6478 6479 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6480 6481 return error; 6482 } 6483 6484 6485 // #pragma mark -- compatibility 6486 6487 6488 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6489 6490 6491 struct physical_entry_beos { 6492 uint32 address; 6493 uint32 size; 6494 }; 6495 6496 6497 /*! The physical_entry structure has changed. We need to translate it to the 6498 old one. 6499 */ 6500 extern "C" int32 6501 __get_memory_map_beos(const void* _address, size_t numBytes, 6502 physical_entry_beos* table, int32 numEntries) 6503 { 6504 if (numEntries <= 0) 6505 return B_BAD_VALUE; 6506 6507 const uint8* address = (const uint8*)_address; 6508 6509 int32 count = 0; 6510 while (numBytes > 0 && count < numEntries) { 6511 physical_entry entry; 6512 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 6513 if (result < 0) { 6514 if (result != B_BUFFER_OVERFLOW) 6515 return result; 6516 } 6517 6518 if (entry.address >= (phys_addr_t)1 << 32) { 6519 panic("get_memory_map(): Address is greater 4 GB!"); 6520 return B_ERROR; 6521 } 6522 6523 table[count].address = entry.address; 6524 table[count++].size = entry.size; 6525 6526 address += entry.size; 6527 numBytes -= entry.size; 6528 } 6529 6530 // null-terminate the table, if possible 6531 if (count < numEntries) { 6532 table[count].address = 0; 6533 table[count].size = 0; 6534 } 6535 6536 return B_OK; 6537 } 6538 6539 6540 /*! The type of the \a physicalAddress parameter has changed from void* to 6541 phys_addr_t. 6542 */ 6543 extern "C" area_id 6544 __map_physical_memory_beos(const char* name, void* physicalAddress, 6545 size_t numBytes, uint32 addressSpec, uint32 protection, 6546 void** _virtualAddress) 6547 { 6548 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 6549 addressSpec, protection, _virtualAddress); 6550 } 6551 6552 6553 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 6554 we meddle with the \a lock parameter to force 32 bit. 6555 */ 6556 extern "C" area_id 6557 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 6558 size_t size, uint32 lock, uint32 protection) 6559 { 6560 switch (lock) { 6561 case B_NO_LOCK: 6562 break; 6563 case B_FULL_LOCK: 6564 case B_LAZY_LOCK: 6565 lock = B_32_BIT_FULL_LOCK; 6566 break; 6567 case B_CONTIGUOUS: 6568 lock = B_32_BIT_CONTIGUOUS; 6569 break; 6570 } 6571 6572 return __create_area_haiku(name, _address, addressSpec, size, lock, 6573 protection); 6574 } 6575 6576 6577 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 6578 "BASE"); 6579 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 6580 "map_physical_memory@", "BASE"); 6581 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 6582 "BASE"); 6583 6584 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6585 "get_memory_map@@", "1_ALPHA3"); 6586 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6587 "map_physical_memory@@", "1_ALPHA3"); 6588 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6589 "1_ALPHA3"); 6590 6591 6592 #else 6593 6594 6595 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6596 "get_memory_map@@", "BASE"); 6597 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6598 "map_physical_memory@@", "BASE"); 6599 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6600 "BASE"); 6601 6602 6603 #endif // defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32 6604