1 /* 2 * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <arch/cpu.h> 27 #include <arch/vm.h> 28 #include <boot/elf.h> 29 #include <boot/stage2.h> 30 #include <condition_variable.h> 31 #include <console.h> 32 #include <debug.h> 33 #include <file_cache.h> 34 #include <fs/fd.h> 35 #include <heap.h> 36 #include <kernel.h> 37 #include <int.h> 38 #include <lock.h> 39 #include <low_resource_manager.h> 40 #include <slab/Slab.h> 41 #include <smp.h> 42 #include <system_info.h> 43 #include <thread.h> 44 #include <team.h> 45 #include <tracing.h> 46 #include <util/AutoLock.h> 47 #include <util/khash.h> 48 #include <vm/vm_page.h> 49 #include <vm/vm_priv.h> 50 #include <vm/VMAddressSpace.h> 51 #include <vm/VMArea.h> 52 #include <vm/VMCache.h> 53 54 #include "VMAddressSpaceLocking.h" 55 #include "VMAnonymousCache.h" 56 #include "IORequest.h" 57 58 59 //#define TRACE_VM 60 //#define TRACE_FAULTS 61 #ifdef TRACE_VM 62 # define TRACE(x) dprintf x 63 #else 64 # define TRACE(x) ; 65 #endif 66 #ifdef TRACE_FAULTS 67 # define FTRACE(x) dprintf x 68 #else 69 # define FTRACE(x) ; 70 #endif 71 72 73 class AreaCacheLocking { 74 public: 75 inline bool Lock(VMCache* lockable) 76 { 77 return false; 78 } 79 80 inline void Unlock(VMCache* lockable) 81 { 82 vm_area_put_locked_cache(lockable); 83 } 84 }; 85 86 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 87 public: 88 inline AreaCacheLocker(VMCache* cache = NULL) 89 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 90 { 91 } 92 93 inline AreaCacheLocker(VMArea* area) 94 : AutoLocker<VMCache, AreaCacheLocking>() 95 { 96 SetTo(area); 97 } 98 99 inline void SetTo(VMCache* cache, bool alreadyLocked) 100 { 101 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 102 } 103 104 inline void SetTo(VMArea* area) 105 { 106 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 107 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 108 } 109 }; 110 111 112 class VMCacheChainLocker { 113 public: 114 VMCacheChainLocker() 115 : 116 fTopCache(NULL), 117 fBottomCache(NULL) 118 { 119 } 120 121 VMCacheChainLocker(VMCache* topCache) 122 : 123 fTopCache(topCache), 124 fBottomCache(topCache) 125 { 126 } 127 128 ~VMCacheChainLocker() 129 { 130 Unlock(); 131 } 132 133 void SetTo(VMCache* topCache) 134 { 135 fTopCache = topCache; 136 fBottomCache = topCache; 137 138 if (topCache != NULL) 139 topCache->SetUserData(NULL); 140 } 141 142 VMCache* LockSourceCache() 143 { 144 if (fBottomCache == NULL || fBottomCache->source == NULL) 145 return NULL; 146 147 VMCache* previousCache = fBottomCache; 148 149 fBottomCache = fBottomCache->source; 150 fBottomCache->Lock(); 151 fBottomCache->AcquireRefLocked(); 152 fBottomCache->SetUserData(previousCache); 153 154 return fBottomCache; 155 } 156 157 void LockAllSourceCaches() 158 { 159 while (LockSourceCache() != NULL) { 160 } 161 } 162 163 void Unlock(VMCache* exceptCache = NULL) 164 { 165 if (fTopCache == NULL) 166 return; 167 168 // Unlock caches in source -> consumer direction. This is important to 169 // avoid double-locking and a reversal of locking order in case a cache 170 // is eligable for merging. 171 VMCache* cache = fBottomCache; 172 while (cache != NULL) { 173 VMCache* nextCache = (VMCache*)cache->UserData(); 174 if (cache != exceptCache) 175 cache->ReleaseRefAndUnlock(cache != fTopCache); 176 177 if (cache == fTopCache) 178 break; 179 180 cache = nextCache; 181 } 182 183 fTopCache = NULL; 184 fBottomCache = NULL; 185 } 186 187 void UnlockKeepRefs(bool keepTopCacheLocked) 188 { 189 if (fTopCache == NULL) 190 return; 191 192 VMCache* nextCache = fBottomCache; 193 VMCache* cache = NULL; 194 195 while (keepTopCacheLocked 196 ? nextCache != fTopCache : cache != fTopCache) { 197 cache = nextCache; 198 nextCache = (VMCache*)cache->UserData(); 199 cache->Unlock(cache != fTopCache); 200 } 201 } 202 203 void RelockCaches(bool topCacheLocked) 204 { 205 if (fTopCache == NULL) 206 return; 207 208 VMCache* nextCache = fTopCache; 209 VMCache* cache = NULL; 210 if (topCacheLocked) { 211 cache = nextCache; 212 nextCache = cache->source; 213 } 214 215 while (cache != fBottomCache && nextCache != NULL) { 216 VMCache* consumer = cache; 217 cache = nextCache; 218 nextCache = cache->source; 219 cache->Lock(); 220 cache->SetUserData(consumer); 221 } 222 } 223 224 private: 225 VMCache* fTopCache; 226 VMCache* fBottomCache; 227 }; 228 229 230 // The memory reserve an allocation of the certain priority must not touch. 231 static const size_t kMemoryReserveForPriority[] = { 232 VM_MEMORY_RESERVE_USER, // user 233 VM_MEMORY_RESERVE_SYSTEM, // system 234 0 // VIP 235 }; 236 237 238 ObjectCache* gPageMappingsObjectCache; 239 240 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 241 242 static off_t sAvailableMemory; 243 static off_t sNeededMemory; 244 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 245 static uint32 sPageFaults; 246 247 static VMPhysicalPageMapper* sPhysicalPageMapper; 248 249 #if DEBUG_CACHE_LIST 250 251 struct cache_info { 252 VMCache* cache; 253 addr_t page_count; 254 addr_t committed; 255 }; 256 257 static const int kCacheInfoTableCount = 100 * 1024; 258 static cache_info* sCacheInfoTable; 259 260 #endif // DEBUG_CACHE_LIST 261 262 263 // function declarations 264 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 265 bool addressSpaceCleanup); 266 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 267 bool isWrite, bool isUser, vm_page** wirePage, 268 VMAreaWiredRange* wiredRange = NULL); 269 static status_t map_backing_store(VMAddressSpace* addressSpace, 270 VMCache* cache, void** _virtualAddress, off_t offset, addr_t size, 271 uint32 addressSpec, int wiring, int protection, int mapping, 272 VMArea** _area, const char* areaName, uint32 flags, bool kernel); 273 274 275 // #pragma mark - 276 277 278 #if VM_PAGE_FAULT_TRACING 279 280 namespace VMPageFaultTracing { 281 282 class PageFaultStart : public AbstractTraceEntry { 283 public: 284 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 285 : 286 fAddress(address), 287 fPC(pc), 288 fWrite(write), 289 fUser(user) 290 { 291 Initialized(); 292 } 293 294 virtual void AddDump(TraceOutput& out) 295 { 296 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 297 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 298 } 299 300 private: 301 addr_t fAddress; 302 addr_t fPC; 303 bool fWrite; 304 bool fUser; 305 }; 306 307 308 // page fault errors 309 enum { 310 PAGE_FAULT_ERROR_NO_AREA = 0, 311 PAGE_FAULT_ERROR_KERNEL_ONLY, 312 PAGE_FAULT_ERROR_WRITE_PROTECTED, 313 PAGE_FAULT_ERROR_READ_PROTECTED, 314 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 315 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 316 }; 317 318 319 class PageFaultError : public AbstractTraceEntry { 320 public: 321 PageFaultError(area_id area, status_t error) 322 : 323 fArea(area), 324 fError(error) 325 { 326 Initialized(); 327 } 328 329 virtual void AddDump(TraceOutput& out) 330 { 331 switch (fError) { 332 case PAGE_FAULT_ERROR_NO_AREA: 333 out.Print("page fault error: no area"); 334 break; 335 case PAGE_FAULT_ERROR_KERNEL_ONLY: 336 out.Print("page fault error: area: %ld, kernel only", fArea); 337 break; 338 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 339 out.Print("page fault error: area: %ld, write protected", 340 fArea); 341 break; 342 case PAGE_FAULT_ERROR_READ_PROTECTED: 343 out.Print("page fault error: area: %ld, read protected", fArea); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 346 out.Print("page fault error: kernel touching bad user memory"); 347 break; 348 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 349 out.Print("page fault error: no address space"); 350 break; 351 default: 352 out.Print("page fault error: area: %ld, error: %s", fArea, 353 strerror(fError)); 354 break; 355 } 356 } 357 358 private: 359 area_id fArea; 360 status_t fError; 361 }; 362 363 364 class PageFaultDone : public AbstractTraceEntry { 365 public: 366 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 367 vm_page* page) 368 : 369 fArea(area), 370 fTopCache(topCache), 371 fCache(cache), 372 fPage(page) 373 { 374 Initialized(); 375 } 376 377 virtual void AddDump(TraceOutput& out) 378 { 379 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 380 "page: %p", fArea, fTopCache, fCache, fPage); 381 } 382 383 private: 384 area_id fArea; 385 VMCache* fTopCache; 386 VMCache* fCache; 387 vm_page* fPage; 388 }; 389 390 } // namespace VMPageFaultTracing 391 392 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 393 #else 394 # define TPF(x) ; 395 #endif // VM_PAGE_FAULT_TRACING 396 397 398 // #pragma mark - 399 400 401 /*! The page's cache must be locked. 402 */ 403 static inline void 404 increment_page_wired_count(vm_page* page) 405 { 406 if (page->wired_count++ == 0 && page->mappings.IsEmpty()) 407 atomic_add(&gMappedPagesCount, 1); 408 } 409 410 411 /*! The page's cache must be locked. 412 */ 413 static inline void 414 decrement_page_wired_count(vm_page* page) 415 { 416 if (--page->wired_count == 0 && page->mappings.IsEmpty()) 417 atomic_add(&gMappedPagesCount, -1); 418 } 419 420 421 static inline addr_t 422 virtual_page_address(VMArea* area, vm_page* page) 423 { 424 return area->Base() 425 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 426 } 427 428 429 //! You need to have the address space locked when calling this function 430 static VMArea* 431 lookup_area(VMAddressSpace* addressSpace, area_id id) 432 { 433 VMAreaHash::ReadLock(); 434 435 VMArea* area = VMAreaHash::LookupLocked(id); 436 if (area != NULL && area->address_space != addressSpace) 437 area = NULL; 438 439 VMAreaHash::ReadUnlock(); 440 441 return area; 442 } 443 444 445 static inline void 446 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 447 { 448 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 449 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 450 uint8& entry = area->page_protections[pageIndex / 2]; 451 if (pageIndex % 2 == 0) 452 entry = (entry & 0xf0) | protection; 453 else 454 entry = (entry & 0x0f) | (protection << 4); 455 } 456 457 458 static inline uint32 459 get_area_page_protection(VMArea* area, addr_t pageAddress) 460 { 461 if (area->page_protections == NULL) 462 return area->protection; 463 464 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 465 uint32 protection = area->page_protections[pageIndex / 2]; 466 if (pageIndex % 2 == 0) 467 protection &= 0x0f; 468 else 469 protection >>= 4; 470 471 return protection | B_KERNEL_READ_AREA 472 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 473 } 474 475 476 /*! The caller must have reserved enough pages the translation map 477 implementation might need to map this page. 478 The page's cache must be locked. 479 */ 480 static status_t 481 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 482 vm_page_reservation* reservation) 483 { 484 VMTranslationMap* map = area->address_space->TranslationMap(); 485 486 bool wasMapped = page->wired_count > 0 || !page->mappings.IsEmpty(); 487 488 if (area->wiring == B_NO_LOCK) { 489 DEBUG_PAGE_ACCESS_CHECK(page); 490 491 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 492 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 493 gPageMappingsObjectCache, 494 CACHE_DONT_WAIT_FOR_MEMORY 495 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 496 if (mapping == NULL) 497 return B_NO_MEMORY; 498 499 mapping->page = page; 500 mapping->area = area; 501 502 map->Lock(); 503 504 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 505 reservation); 506 507 // insert mapping into lists 508 if (page->mappings.IsEmpty() && page->wired_count == 0) 509 atomic_add(&gMappedPagesCount, 1); 510 511 page->mappings.Add(mapping); 512 area->mappings.Add(mapping); 513 514 map->Unlock(); 515 } else { 516 DEBUG_PAGE_ACCESS_CHECK(page); 517 518 map->Lock(); 519 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 520 reservation); 521 map->Unlock(); 522 523 increment_page_wired_count(page); 524 } 525 526 if (!wasMapped) { 527 // The page is mapped now, so we must not remain in the cached queue. 528 // It also makes sense to move it from the inactive to the active, since 529 // otherwise the page daemon wouldn't come to keep track of it (in idle 530 // mode) -- if the page isn't touched, it will be deactivated after a 531 // full iteration through the queue at the latest. 532 if (page->State() == PAGE_STATE_CACHED 533 || page->State() == PAGE_STATE_INACTIVE) { 534 vm_page_set_state(page, PAGE_STATE_ACTIVE); 535 } 536 } 537 538 return B_OK; 539 } 540 541 542 /*! If \a preserveModified is \c true, the caller must hold the lock of the 543 page's cache. 544 */ 545 static inline bool 546 unmap_page(VMArea* area, addr_t virtualAddress) 547 { 548 return area->address_space->TranslationMap()->UnmapPage(area, 549 virtualAddress, true); 550 } 551 552 553 /*! If \a preserveModified is \c true, the caller must hold the lock of all 554 mapped pages' caches. 555 */ 556 static inline void 557 unmap_pages(VMArea* area, addr_t base, size_t size) 558 { 559 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 560 } 561 562 563 /*! Cuts a piece out of an area. If the given cut range covers the complete 564 area, it is deleted. If it covers the beginning or the end, the area is 565 resized accordingly. If the range covers some part in the middle of the 566 area, it is split in two; in this case the second area is returned via 567 \a _secondArea (the variable is left untouched in the other cases). 568 The address space must be write locked. 569 The caller must ensure that no part of the given range is wired. 570 */ 571 static status_t 572 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 573 addr_t lastAddress, VMArea** _secondArea, bool kernel) 574 { 575 // Does the cut range intersect with the area at all? 576 addr_t areaLast = area->Base() + (area->Size() - 1); 577 if (area->Base() > lastAddress || areaLast < address) 578 return B_OK; 579 580 // Is the area fully covered? 581 if (area->Base() >= address && areaLast <= lastAddress) { 582 delete_area(addressSpace, area, false); 583 return B_OK; 584 } 585 586 int priority; 587 uint32 allocationFlags; 588 if (addressSpace == VMAddressSpace::Kernel()) { 589 priority = VM_PRIORITY_SYSTEM; 590 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 591 | HEAP_DONT_LOCK_KERNEL_SPACE; 592 } else { 593 priority = VM_PRIORITY_USER; 594 allocationFlags = 0; 595 } 596 597 VMCache* cache = vm_area_get_locked_cache(area); 598 VMCacheChainLocker cacheChainLocker(cache); 599 cacheChainLocker.LockAllSourceCaches(); 600 601 // Cut the end only? 602 if (areaLast <= lastAddress) { 603 size_t oldSize = area->Size(); 604 size_t newSize = address - area->Base(); 605 606 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 607 allocationFlags); 608 if (error != B_OK) 609 return error; 610 611 // unmap pages 612 unmap_pages(area, address, oldSize - newSize); 613 614 // If no one else uses the area's cache, we can resize it, too. 615 if (cache->areas == area && area->cache_next == NULL 616 && list_is_empty(&cache->consumers)) { 617 // Since VMCache::Resize() can temporarily drop the lock, we must 618 // unlock all lower caches to prevent locking order inversion. 619 cacheChainLocker.Unlock(cache); 620 cache->Resize(cache->virtual_base + newSize, priority); 621 cache->ReleaseRefAndUnlock(); 622 } 623 624 return B_OK; 625 } 626 627 // Cut the beginning only? 628 if (area->Base() >= address) { 629 addr_t oldBase = area->Base(); 630 addr_t newBase = lastAddress + 1; 631 size_t newSize = areaLast - lastAddress; 632 633 // unmap pages 634 unmap_pages(area, oldBase, newBase - oldBase); 635 636 // resize the area 637 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 638 allocationFlags); 639 if (error != B_OK) 640 return error; 641 642 // TODO: If no one else uses the area's cache, we should resize it, too! 643 644 area->cache_offset += newBase - oldBase; 645 646 return B_OK; 647 } 648 649 // The tough part -- cut a piece out of the middle of the area. 650 // We do that by shrinking the area to the begin section and creating a 651 // new area for the end section. 652 653 addr_t firstNewSize = address - area->Base(); 654 addr_t secondBase = lastAddress + 1; 655 addr_t secondSize = areaLast - lastAddress; 656 657 // unmap pages 658 unmap_pages(area, address, area->Size() - firstNewSize); 659 660 // resize the area 661 addr_t oldSize = area->Size(); 662 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 663 allocationFlags); 664 if (error != B_OK) 665 return error; 666 667 // TODO: If no one else uses the area's cache, we might want to create a 668 // new cache for the second area, transfer the concerned pages from the 669 // first cache to it and resize the first cache. 670 671 // map the second area 672 VMArea* secondArea; 673 void* secondBaseAddress = (void*)secondBase; 674 error = map_backing_store(addressSpace, cache, &secondBaseAddress, 675 area->cache_offset + (secondBase - area->Base()), secondSize, 676 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 677 &secondArea, area->name, 0, kernel); 678 if (error != B_OK) { 679 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 680 return error; 681 } 682 683 // We need a cache reference for the new area. 684 cache->AcquireRefLocked(); 685 686 if (_secondArea != NULL) 687 *_secondArea = secondArea; 688 689 return B_OK; 690 } 691 692 693 /*! Deletes all areas in the given address range. 694 The address space must be write-locked. 695 The caller must ensure that no part of the given range is wired. 696 */ 697 static status_t 698 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 699 bool kernel) 700 { 701 size = PAGE_ALIGN(size); 702 addr_t lastAddress = address + (size - 1); 703 704 // Check, whether the caller is allowed to modify the concerned areas. 705 if (!kernel) { 706 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 707 VMArea* area = it.Next();) { 708 addr_t areaLast = area->Base() + (area->Size() - 1); 709 if (area->Base() < lastAddress && address < areaLast) { 710 if ((area->protection & B_KERNEL_AREA) != 0) 711 return B_NOT_ALLOWED; 712 } 713 } 714 } 715 716 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 717 VMArea* area = it.Next();) { 718 addr_t areaLast = area->Base() + (area->Size() - 1); 719 if (area->Base() < lastAddress && address < areaLast) { 720 status_t error = cut_area(addressSpace, area, address, 721 lastAddress, NULL, kernel); 722 if (error != B_OK) 723 return error; 724 // Failing after already messing with areas is ugly, but we 725 // can't do anything about it. 726 } 727 } 728 729 return B_OK; 730 } 731 732 733 /*! You need to hold the lock of the cache and the write lock of the address 734 space when calling this function. 735 Note, that in case of error your cache will be temporarily unlocked. 736 If \a addressSpec is \c B_EXACT_ADDRESS and the 737 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 738 that no part of the specified address range (base \c *_virtualAddress, size 739 \a size) is wired. 740 */ 741 static status_t 742 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, 743 void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 744 int wiring, int protection, int mapping, VMArea** _area, 745 const char* areaName, uint32 flags, bool kernel) 746 { 747 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, " 748 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 749 "'%s'\n", addressSpace, cache, *_virtualAddress, offset, size, 750 addressSpec, wiring, protection, _area, areaName)); 751 cache->AssertLocked(); 752 753 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 754 | HEAP_DONT_LOCK_KERNEL_SPACE; 755 int priority; 756 if (addressSpace != VMAddressSpace::Kernel()) { 757 priority = VM_PRIORITY_USER; 758 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 759 priority = VM_PRIORITY_VIP; 760 allocationFlags |= HEAP_PRIORITY_VIP; 761 } else 762 priority = VM_PRIORITY_SYSTEM; 763 764 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 765 allocationFlags); 766 if (area == NULL) 767 return B_NO_MEMORY; 768 769 status_t status; 770 771 // if this is a private map, we need to create a new cache 772 // to handle the private copies of pages as they are written to 773 VMCache* sourceCache = cache; 774 if (mapping == REGION_PRIVATE_MAP) { 775 VMCache* newCache; 776 777 // create an anonymous cache 778 status = VMCacheFactory::CreateAnonymousCache(newCache, 779 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true, 780 VM_PRIORITY_USER); 781 if (status != B_OK) 782 goto err1; 783 784 newCache->Lock(); 785 newCache->temporary = 1; 786 newCache->scan_skip = cache->scan_skip; 787 newCache->virtual_base = offset; 788 newCache->virtual_end = offset + size; 789 790 cache->AddConsumer(newCache); 791 792 cache = newCache; 793 } 794 795 status = cache->SetMinimalCommitment(size, priority); 796 if (status != B_OK) 797 goto err2; 798 799 // check to see if this address space has entered DELETE state 800 if (addressSpace->IsBeingDeleted()) { 801 // okay, someone is trying to delete this address space now, so we can't 802 // insert the area, so back out 803 status = B_BAD_TEAM_ID; 804 goto err2; 805 } 806 807 if (addressSpec == B_EXACT_ADDRESS 808 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 809 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 810 size, kernel); 811 if (status != B_OK) 812 goto err2; 813 } 814 815 status = addressSpace->InsertArea(_virtualAddress, addressSpec, size, area, 816 allocationFlags); 817 if (status != B_OK) { 818 // TODO: wait and try again once this is working in the backend 819 #if 0 820 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 821 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 822 0, 0); 823 } 824 #endif 825 goto err2; 826 } 827 828 // attach the cache to the area 829 area->cache = cache; 830 area->cache_offset = offset; 831 832 // point the cache back to the area 833 cache->InsertAreaLocked(area); 834 if (mapping == REGION_PRIVATE_MAP) 835 cache->Unlock(); 836 837 // insert the area in the global area hash table 838 VMAreaHash::Insert(area); 839 840 // grab a ref to the address space (the area holds this) 841 addressSpace->Get(); 842 843 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 844 // cache, sourceCache, areaName, area); 845 846 *_area = area; 847 return B_OK; 848 849 err2: 850 if (mapping == REGION_PRIVATE_MAP) { 851 // We created this cache, so we must delete it again. Note, that we 852 // need to temporarily unlock the source cache or we'll otherwise 853 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 854 sourceCache->Unlock(); 855 cache->ReleaseRefAndUnlock(); 856 sourceCache->Lock(); 857 } 858 err1: 859 addressSpace->DeleteArea(area, allocationFlags); 860 return status; 861 } 862 863 864 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 865 locker1, locker2). 866 */ 867 template<typename LockerType1, typename LockerType2> 868 static inline bool 869 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 870 { 871 area->cache->AssertLocked(); 872 873 VMAreaUnwiredWaiter waiter; 874 if (!area->AddWaiterIfWired(&waiter)) 875 return false; 876 877 // unlock everything and wait 878 if (locker1 != NULL) 879 locker1->Unlock(); 880 if (locker2 != NULL) 881 locker2->Unlock(); 882 883 waiter.waitEntry.Wait(); 884 885 return true; 886 } 887 888 889 /*! Checks whether the given area has any wired ranges intersecting with the 890 specified range and waits, if so. 891 892 When it has to wait, the function calls \c Unlock() on both \a locker1 893 and \a locker2, if given. 894 The area's top cache must be locked and must be unlocked as a side effect 895 of calling \c Unlock() on either \a locker1 or \a locker2. 896 897 If the function does not have to wait it does not modify or unlock any 898 object. 899 900 \param area The area to be checked. 901 \param base The base address of the range to check. 902 \param size The size of the address range to check. 903 \param locker1 An object to be unlocked when before starting to wait (may 904 be \c NULL). 905 \param locker2 An object to be unlocked when before starting to wait (may 906 be \c NULL). 907 \return \c true, if the function had to wait, \c false otherwise. 908 */ 909 template<typename LockerType1, typename LockerType2> 910 static inline bool 911 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 912 LockerType1* locker1, LockerType2* locker2) 913 { 914 area->cache->AssertLocked(); 915 916 VMAreaUnwiredWaiter waiter; 917 if (!area->AddWaiterIfWired(&waiter, base, size)) 918 return false; 919 920 // unlock everything and wait 921 if (locker1 != NULL) 922 locker1->Unlock(); 923 if (locker2 != NULL) 924 locker2->Unlock(); 925 926 waiter.waitEntry.Wait(); 927 928 return true; 929 } 930 931 932 /*! Checks whether the given address space has any wired ranges intersecting 933 with the specified range and waits, if so. 934 935 Similar to wait_if_area_range_is_wired(), with the following differences: 936 - All areas intersecting with the range are checked (respectively all until 937 one is found that contains a wired range intersecting with the given 938 range). 939 - The given address space must at least be read-locked and must be unlocked 940 when \c Unlock() is called on \a locker. 941 - None of the areas' caches are allowed to be locked. 942 */ 943 template<typename LockerType> 944 static inline bool 945 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 946 size_t size, LockerType* locker) 947 { 948 addr_t end = base + size - 1; 949 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 950 VMArea* area = it.Next();) { 951 // TODO: Introduce a VMAddressSpace method to get a close iterator! 952 if (area->Base() > end) 953 return false; 954 955 if (base >= area->Base() + area->Size() - 1) 956 continue; 957 958 VMCache* cache = vm_area_get_locked_cache(area); 959 960 if (wait_if_area_range_is_wired(area, base, size, locker, cache)) 961 return true; 962 963 cache->Unlock(); 964 } 965 966 return false; 967 } 968 969 970 status_t 971 vm_block_address_range(const char* name, void* address, addr_t size) 972 { 973 if (!arch_vm_supports_protection(0)) 974 return B_NOT_SUPPORTED; 975 976 AddressSpaceWriteLocker locker; 977 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 978 if (status != B_OK) 979 return status; 980 981 VMAddressSpace* addressSpace = locker.AddressSpace(); 982 983 // create an anonymous cache 984 VMCache* cache; 985 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 986 VM_PRIORITY_SYSTEM); 987 if (status != B_OK) 988 return status; 989 990 cache->temporary = 1; 991 cache->virtual_end = size; 992 cache->scan_skip = 1; 993 cache->Lock(); 994 995 VMArea* area; 996 void* areaAddress = address; 997 status = map_backing_store(addressSpace, cache, &areaAddress, 0, size, 998 B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name, 999 0, true); 1000 if (status != B_OK) { 1001 cache->ReleaseRefAndUnlock(); 1002 return status; 1003 } 1004 1005 cache->Unlock(); 1006 area->cache_type = CACHE_TYPE_RAM; 1007 return area->id; 1008 } 1009 1010 1011 status_t 1012 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1013 { 1014 AddressSpaceWriteLocker locker(team); 1015 if (!locker.IsLocked()) 1016 return B_BAD_TEAM_ID; 1017 1018 VMAddressSpace* addressSpace = locker.AddressSpace(); 1019 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1020 addressSpace == VMAddressSpace::Kernel() 1021 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1022 } 1023 1024 1025 status_t 1026 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1027 addr_t size, uint32 flags) 1028 { 1029 if (size == 0) 1030 return B_BAD_VALUE; 1031 1032 AddressSpaceWriteLocker locker(team); 1033 if (!locker.IsLocked()) 1034 return B_BAD_TEAM_ID; 1035 1036 VMAddressSpace* addressSpace = locker.AddressSpace(); 1037 return addressSpace->ReserveAddressRange(_address, addressSpec, 1038 size, flags, 1039 addressSpace == VMAddressSpace::Kernel() 1040 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1041 } 1042 1043 1044 area_id 1045 vm_create_anonymous_area(team_id team, const char* name, void** address, 1046 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 1047 addr_t physicalAddress, uint32 flags, bool kernel) 1048 { 1049 VMArea* area; 1050 VMCache* cache; 1051 vm_page* page = NULL; 1052 bool isStack = (protection & B_STACK_AREA) != 0; 1053 page_num_t guardPages; 1054 bool canOvercommit = false; 1055 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1056 ? VM_PAGE_ALLOC_CLEAR : 0; 1057 1058 TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size)); 1059 1060 size = PAGE_ALIGN(size); 1061 1062 if (size == 0) 1063 return B_BAD_VALUE; 1064 if (!arch_vm_supports_protection(protection)) 1065 return B_NOT_SUPPORTED; 1066 1067 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1068 canOvercommit = true; 1069 1070 #ifdef DEBUG_KERNEL_STACKS 1071 if ((protection & B_KERNEL_STACK_AREA) != 0) 1072 isStack = true; 1073 #endif 1074 1075 // check parameters 1076 switch (addressSpec) { 1077 case B_ANY_ADDRESS: 1078 case B_EXACT_ADDRESS: 1079 case B_BASE_ADDRESS: 1080 case B_ANY_KERNEL_ADDRESS: 1081 case B_ANY_KERNEL_BLOCK_ADDRESS: 1082 break; 1083 case B_PHYSICAL_BASE_ADDRESS: 1084 physicalAddress = (addr_t)*address; 1085 addressSpec = B_ANY_KERNEL_ADDRESS; 1086 break; 1087 1088 default: 1089 return B_BAD_VALUE; 1090 } 1091 1092 if (physicalAddress != 0) 1093 wiring = B_CONTIGUOUS; 1094 1095 bool doReserveMemory = false; 1096 switch (wiring) { 1097 case B_NO_LOCK: 1098 break; 1099 case B_FULL_LOCK: 1100 case B_LAZY_LOCK: 1101 case B_CONTIGUOUS: 1102 doReserveMemory = true; 1103 break; 1104 case B_ALREADY_WIRED: 1105 break; 1106 case B_LOMEM: 1107 //case B_SLOWMEM: 1108 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 1109 wiring = B_FULL_LOCK; 1110 doReserveMemory = true; 1111 break; 1112 default: 1113 return B_BAD_VALUE; 1114 } 1115 1116 // For full lock or contiguous areas we're also going to map the pages and 1117 // thus need to reserve pages for the mapping backend upfront. 1118 addr_t reservedMapPages = 0; 1119 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1120 AddressSpaceWriteLocker locker; 1121 status_t status = locker.SetTo(team); 1122 if (status != B_OK) 1123 return status; 1124 1125 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1126 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1127 } 1128 1129 int priority; 1130 if (team != VMAddressSpace::KernelID()) 1131 priority = VM_PRIORITY_USER; 1132 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1133 priority = VM_PRIORITY_VIP; 1134 else 1135 priority = VM_PRIORITY_SYSTEM; 1136 1137 // Reserve memory before acquiring the address space lock. This reduces the 1138 // chances of failure, since while holding the write lock to the address 1139 // space (if it is the kernel address space that is), the low memory handler 1140 // won't be able to free anything for us. 1141 addr_t reservedMemory = 0; 1142 if (doReserveMemory) { 1143 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1144 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1145 return B_NO_MEMORY; 1146 reservedMemory = size; 1147 // TODO: We don't reserve the memory for the pages for the page 1148 // directories/tables. We actually need to do since we currently don't 1149 // reclaim them (and probably can't reclaim all of them anyway). Thus 1150 // there are actually less physical pages than there should be, which 1151 // can get the VM into trouble in low memory situations. 1152 } 1153 1154 AddressSpaceWriteLocker locker; 1155 VMAddressSpace* addressSpace; 1156 status_t status; 1157 1158 // For full lock areas reserve the pages before locking the address 1159 // space. E.g. block caches can't release their memory while we hold the 1160 // address space lock. 1161 page_num_t reservedPages = reservedMapPages; 1162 if (wiring == B_FULL_LOCK) 1163 reservedPages += size / B_PAGE_SIZE; 1164 1165 vm_page_reservation reservation; 1166 if (reservedPages > 0) { 1167 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1168 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1169 priority)) { 1170 reservedPages = 0; 1171 status = B_WOULD_BLOCK; 1172 goto err0; 1173 } 1174 } else 1175 vm_page_reserve_pages(&reservation, reservedPages, priority); 1176 } 1177 1178 // Lock the address space and, if B_EXACT_ADDRESS and 1179 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1180 // is not wired. 1181 do { 1182 status = locker.SetTo(team); 1183 if (status != B_OK) 1184 goto err0; 1185 1186 addressSpace = locker.AddressSpace(); 1187 } while (addressSpec == B_EXACT_ADDRESS 1188 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1189 && wait_if_address_range_is_wired(addressSpace, (addr_t)*address, size, 1190 &locker)); 1191 1192 if (wiring == B_CONTIGUOUS) { 1193 // we try to allocate the page run here upfront as this may easily 1194 // fail for obvious reasons 1195 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1196 physicalAddress, size / B_PAGE_SIZE, priority); 1197 if (page == NULL) { 1198 status = B_NO_MEMORY; 1199 goto err0; 1200 } 1201 } 1202 1203 // create an anonymous cache 1204 // if it's a stack, make sure that two pages are available at least 1205 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1206 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1207 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1208 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1209 wiring == B_NO_LOCK, priority); 1210 if (status != B_OK) 1211 goto err1; 1212 1213 cache->temporary = 1; 1214 cache->virtual_end = size; 1215 cache->committed_size = reservedMemory; 1216 // TODO: This should be done via a method. 1217 reservedMemory = 0; 1218 1219 switch (wiring) { 1220 case B_LAZY_LOCK: 1221 case B_FULL_LOCK: 1222 case B_CONTIGUOUS: 1223 case B_ALREADY_WIRED: 1224 cache->scan_skip = 1; 1225 break; 1226 case B_NO_LOCK: 1227 cache->scan_skip = 0; 1228 break; 1229 } 1230 1231 cache->Lock(); 1232 1233 status = map_backing_store(addressSpace, cache, address, 0, size, 1234 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 1235 flags, kernel); 1236 1237 if (status != B_OK) { 1238 cache->ReleaseRefAndUnlock(); 1239 goto err1; 1240 } 1241 1242 locker.DegradeToReadLock(); 1243 1244 switch (wiring) { 1245 case B_NO_LOCK: 1246 case B_LAZY_LOCK: 1247 // do nothing - the pages are mapped in as needed 1248 break; 1249 1250 case B_FULL_LOCK: 1251 { 1252 // Allocate and map all pages for this area 1253 1254 off_t offset = 0; 1255 for (addr_t address = area->Base(); 1256 address < area->Base() + (area->Size() - 1); 1257 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1258 #ifdef DEBUG_KERNEL_STACKS 1259 # ifdef STACK_GROWS_DOWNWARDS 1260 if (isStack && address < area->Base() 1261 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1262 # else 1263 if (isStack && address >= area->Base() + area->Size() 1264 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1265 # endif 1266 continue; 1267 #endif 1268 vm_page* page = vm_page_allocate_page(&reservation, 1269 PAGE_STATE_WIRED | pageAllocFlags); 1270 cache->InsertPage(page, offset); 1271 map_page(area, page, address, protection, &reservation); 1272 1273 DEBUG_PAGE_ACCESS_END(page); 1274 } 1275 1276 break; 1277 } 1278 1279 case B_ALREADY_WIRED: 1280 { 1281 // The pages should already be mapped. This is only really useful 1282 // during boot time. Find the appropriate vm_page objects and stick 1283 // them in the cache object. 1284 VMTranslationMap* map = addressSpace->TranslationMap(); 1285 off_t offset = 0; 1286 1287 if (!gKernelStartup) 1288 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1289 1290 map->Lock(); 1291 1292 for (addr_t virtualAddress = area->Base(); 1293 virtualAddress < area->Base() + (area->Size() - 1); 1294 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1295 addr_t physicalAddress; 1296 uint32 flags; 1297 status = map->Query(virtualAddress, &physicalAddress, &flags); 1298 if (status < B_OK) { 1299 panic("looking up mapping failed for va 0x%lx\n", 1300 virtualAddress); 1301 } 1302 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1303 if (page == NULL) { 1304 panic("looking up page failed for pa 0x%lx\n", 1305 physicalAddress); 1306 } 1307 1308 DEBUG_PAGE_ACCESS_START(page); 1309 1310 increment_page_wired_count(page); 1311 cache->InsertPage(page, offset); 1312 vm_page_set_state(page, PAGE_STATE_WIRED); 1313 page->busy = false; 1314 1315 DEBUG_PAGE_ACCESS_END(page); 1316 } 1317 1318 map->Unlock(); 1319 break; 1320 } 1321 1322 case B_CONTIGUOUS: 1323 { 1324 // We have already allocated our continuous pages run, so we can now 1325 // just map them in the address space 1326 VMTranslationMap* map = addressSpace->TranslationMap(); 1327 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 1328 addr_t virtualAddress = area->Base(); 1329 off_t offset = 0; 1330 1331 map->Lock(); 1332 1333 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1334 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1335 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1336 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1337 if (page == NULL) 1338 panic("couldn't lookup physical page just allocated\n"); 1339 1340 status = map->Map(virtualAddress, physicalAddress, protection, 1341 &reservation); 1342 if (status < B_OK) 1343 panic("couldn't map physical page in page run\n"); 1344 1345 increment_page_wired_count(page); 1346 cache->InsertPage(page, offset); 1347 1348 DEBUG_PAGE_ACCESS_END(page); 1349 } 1350 1351 map->Unlock(); 1352 break; 1353 } 1354 1355 default: 1356 break; 1357 } 1358 1359 cache->Unlock(); 1360 1361 if (reservedPages > 0) 1362 vm_page_unreserve_pages(&reservation); 1363 1364 TRACE(("vm_create_anonymous_area: done\n")); 1365 1366 area->cache_type = CACHE_TYPE_RAM; 1367 return area->id; 1368 1369 err1: 1370 if (wiring == B_CONTIGUOUS) { 1371 // we had reserved the area space upfront... 1372 addr_t pageNumber = page->physical_page_number; 1373 int32 i; 1374 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1375 page = vm_lookup_page(pageNumber); 1376 if (page == NULL) 1377 panic("couldn't lookup physical page just allocated\n"); 1378 1379 vm_page_set_state(page, PAGE_STATE_FREE); 1380 } 1381 } 1382 1383 err0: 1384 if (reservedPages > 0) 1385 vm_page_unreserve_pages(&reservation); 1386 if (reservedMemory > 0) 1387 vm_unreserve_memory(reservedMemory); 1388 1389 return status; 1390 } 1391 1392 1393 area_id 1394 vm_map_physical_memory(team_id team, const char* name, void** _address, 1395 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress, 1396 bool alreadyWired) 1397 { 1398 VMArea* area; 1399 VMCache* cache; 1400 addr_t mapOffset; 1401 1402 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 1403 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 1404 name, _address, addressSpec, size, protection, physicalAddress)); 1405 1406 if (!arch_vm_supports_protection(protection)) 1407 return B_NOT_SUPPORTED; 1408 1409 AddressSpaceWriteLocker locker(team); 1410 if (!locker.IsLocked()) 1411 return B_BAD_TEAM_ID; 1412 1413 // if the physical address is somewhat inside a page, 1414 // move the actual area down to align on a page boundary 1415 mapOffset = physicalAddress % B_PAGE_SIZE; 1416 size += mapOffset; 1417 physicalAddress -= mapOffset; 1418 1419 size = PAGE_ALIGN(size); 1420 1421 // create a device cache 1422 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1423 if (status != B_OK) 1424 return status; 1425 1426 // tell the page scanner to skip over this area, it's pages are special 1427 cache->scan_skip = 1; 1428 cache->virtual_end = size; 1429 1430 cache->Lock(); 1431 1432 status = map_backing_store(locker.AddressSpace(), cache, _address, 1433 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 1434 REGION_NO_PRIVATE_MAP, &area, name, 0, true); 1435 1436 if (status < B_OK) 1437 cache->ReleaseRefLocked(); 1438 1439 cache->Unlock(); 1440 1441 if (status == B_OK) { 1442 // set requested memory type -- use uncached, if not given 1443 uint32 memoryType = addressSpec & B_MTR_MASK; 1444 if (memoryType == 0) 1445 memoryType = B_MTR_UC; 1446 1447 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1448 if (status != B_OK) 1449 delete_area(locker.AddressSpace(), area, false); 1450 } 1451 1452 if (status >= B_OK && !alreadyWired) { 1453 // make sure our area is mapped in completely 1454 1455 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1456 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1457 area->Base() + (size - 1)); 1458 1459 vm_page_reservation reservation; 1460 vm_page_reserve_pages(&reservation, reservePages, 1461 team == VMAddressSpace::KernelID() 1462 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1463 map->Lock(); 1464 1465 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1466 map->Map(area->Base() + offset, physicalAddress + offset, 1467 protection, &reservation); 1468 } 1469 1470 map->Unlock(); 1471 vm_page_unreserve_pages(&reservation); 1472 } 1473 1474 if (status < B_OK) 1475 return status; 1476 1477 // modify the pointer returned to be offset back into the new area 1478 // the same way the physical address in was offset 1479 *_address = (void*)((addr_t)*_address + mapOffset); 1480 1481 area->cache_type = CACHE_TYPE_DEVICE; 1482 return area->id; 1483 } 1484 1485 1486 /*! Don't use! 1487 TODO: This function was introduced to map physical page vecs to 1488 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1489 use a device cache and does not track vm_page::wired_count! 1490 */ 1491 area_id 1492 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1493 uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs, 1494 uint32 vecCount) 1495 { 1496 TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, " 1497 "spec = %ld, _size = %p, protection = %ld, vecs = %p, " 1498 "vecCount = %ld)\n", team, name, _address, addressSpec, _size, 1499 protection, vecs, vecCount)); 1500 1501 if (!arch_vm_supports_protection(protection) 1502 || (addressSpec & B_MTR_MASK) != 0) { 1503 return B_NOT_SUPPORTED; 1504 } 1505 1506 AddressSpaceWriteLocker locker(team); 1507 if (!locker.IsLocked()) 1508 return B_BAD_TEAM_ID; 1509 1510 if (vecCount == 0) 1511 return B_BAD_VALUE; 1512 1513 addr_t size = 0; 1514 for (uint32 i = 0; i < vecCount; i++) { 1515 if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0 1516 || vecs[i].iov_len % B_PAGE_SIZE != 0) { 1517 return B_BAD_VALUE; 1518 } 1519 1520 size += vecs[i].iov_len; 1521 } 1522 1523 // create a device cache 1524 VMCache* cache; 1525 status_t result = VMCacheFactory::CreateDeviceCache(cache, 1526 (addr_t)vecs[0].iov_base); 1527 if (result != B_OK) 1528 return result; 1529 1530 // tell the page scanner to skip over this area, it's pages are special 1531 cache->scan_skip = 1; 1532 cache->virtual_end = size; 1533 1534 cache->Lock(); 1535 1536 VMArea* area; 1537 result = map_backing_store(locker.AddressSpace(), cache, _address, 1538 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 1539 REGION_NO_PRIVATE_MAP, &area, name, 0, true); 1540 1541 if (result != B_OK) 1542 cache->ReleaseRefLocked(); 1543 1544 cache->Unlock(); 1545 1546 if (result != B_OK) 1547 return result; 1548 1549 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1550 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1551 area->Base() + (size - 1)); 1552 1553 vm_page_reservation reservation; 1554 vm_page_reserve_pages(&reservation, reservePages, 1555 team == VMAddressSpace::KernelID() 1556 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1557 map->Lock(); 1558 1559 uint32 vecIndex = 0; 1560 size_t vecOffset = 0; 1561 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1562 while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) { 1563 vecOffset = 0; 1564 vecIndex++; 1565 } 1566 1567 if (vecIndex >= vecCount) 1568 break; 1569 1570 map->Map(area->Base() + offset, 1571 (addr_t)vecs[vecIndex].iov_base + vecOffset, protection, 1572 &reservation); 1573 1574 vecOffset += B_PAGE_SIZE; 1575 } 1576 1577 map->Unlock(); 1578 vm_page_unreserve_pages(&reservation); 1579 1580 if (_size != NULL) 1581 *_size = size; 1582 1583 area->cache_type = CACHE_TYPE_DEVICE; 1584 return area->id; 1585 } 1586 1587 1588 area_id 1589 vm_create_null_area(team_id team, const char* name, void** address, 1590 uint32 addressSpec, addr_t size, uint32 flags) 1591 { 1592 size = PAGE_ALIGN(size); 1593 1594 // Lock the address space and, if B_EXACT_ADDRESS and 1595 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1596 // is not wired. 1597 AddressSpaceWriteLocker locker; 1598 do { 1599 if (locker.SetTo(team) != B_OK) 1600 return B_BAD_TEAM_ID; 1601 } while (addressSpec == B_EXACT_ADDRESS 1602 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1603 && wait_if_address_range_is_wired(locker.AddressSpace(), 1604 (addr_t)*address, size, &locker)); 1605 1606 // create a null cache 1607 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1608 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1609 VMCache* cache; 1610 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1611 if (status != B_OK) 1612 return status; 1613 1614 // tell the page scanner to skip over this area, no pages will be mapped 1615 // here 1616 cache->scan_skip = 1; 1617 cache->virtual_end = size; 1618 1619 cache->Lock(); 1620 1621 VMArea* area; 1622 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 1623 addressSpec, B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, 1624 &area, name, flags, true); 1625 1626 if (status < B_OK) { 1627 cache->ReleaseRefAndUnlock(); 1628 return status; 1629 } 1630 1631 cache->Unlock(); 1632 1633 area->cache_type = CACHE_TYPE_NULL; 1634 return area->id; 1635 } 1636 1637 1638 /*! Creates the vnode cache for the specified \a vnode. 1639 The vnode has to be marked busy when calling this function. 1640 */ 1641 status_t 1642 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1643 { 1644 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1645 } 1646 1647 1648 /*! \a cache must be locked. The area's address space must be read-locked. 1649 */ 1650 static void 1651 pre_map_area_pages(VMArea* area, VMCache* cache, 1652 vm_page_reservation* reservation) 1653 { 1654 addr_t baseAddress = area->Base(); 1655 addr_t cacheOffset = area->cache_offset; 1656 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1657 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1658 1659 for (VMCachePagesTree::Iterator it 1660 = cache->pages.GetIterator(firstPage, true, true); 1661 vm_page* page = it.Next();) { 1662 if (page->cache_offset >= endPage) 1663 break; 1664 1665 // skip busy and inactive pages 1666 if (page->busy || page->usage_count == 0) 1667 continue; 1668 1669 DEBUG_PAGE_ACCESS_START(page); 1670 map_page(area, page, 1671 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1672 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1673 DEBUG_PAGE_ACCESS_END(page); 1674 } 1675 } 1676 1677 1678 /*! Will map the file specified by \a fd to an area in memory. 1679 The file will be mirrored beginning at the specified \a offset. The 1680 \a offset and \a size arguments have to be page aligned. 1681 */ 1682 static area_id 1683 _vm_map_file(team_id team, const char* name, void** _address, 1684 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1685 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1686 { 1687 // TODO: for binary files, we want to make sure that they get the 1688 // copy of a file at a given time, ie. later changes should not 1689 // make it into the mapped copy -- this will need quite some changes 1690 // to be done in a nice way 1691 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 1692 fd, offset, size, mapping)); 1693 1694 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1695 size = PAGE_ALIGN(size); 1696 1697 if (mapping == REGION_NO_PRIVATE_MAP) 1698 protection |= B_SHARED_AREA; 1699 if (addressSpec != B_EXACT_ADDRESS) 1700 unmapAddressRange = false; 1701 1702 if (fd < 0) { 1703 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1704 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 1705 B_NO_LOCK, protection, 0, flags, kernel); 1706 } 1707 1708 // get the open flags of the FD 1709 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1710 if (descriptor == NULL) 1711 return EBADF; 1712 int32 openMode = descriptor->open_mode; 1713 put_fd(descriptor); 1714 1715 // The FD must open for reading at any rate. For shared mapping with write 1716 // access, additionally the FD must be open for writing. 1717 if ((openMode & O_ACCMODE) == O_WRONLY 1718 || (mapping == REGION_NO_PRIVATE_MAP 1719 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1720 && (openMode & O_ACCMODE) == O_RDONLY)) { 1721 return EACCES; 1722 } 1723 1724 // get the vnode for the object, this also grabs a ref to it 1725 struct vnode* vnode = NULL; 1726 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1727 if (status < B_OK) 1728 return status; 1729 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1730 1731 // If we're going to pre-map pages, we need to reserve the pages needed by 1732 // the mapping backend upfront. 1733 page_num_t reservedPreMapPages = 0; 1734 vm_page_reservation reservation; 1735 if ((protection & B_READ_AREA) != 0) { 1736 AddressSpaceWriteLocker locker; 1737 status = locker.SetTo(team); 1738 if (status != B_OK) 1739 return status; 1740 1741 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1742 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1743 1744 locker.Unlock(); 1745 1746 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1747 team == VMAddressSpace::KernelID() 1748 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1749 } 1750 1751 struct PageUnreserver { 1752 PageUnreserver(vm_page_reservation* reservation) 1753 : 1754 fReservation(reservation) 1755 { 1756 } 1757 1758 ~PageUnreserver() 1759 { 1760 if (fReservation != NULL) 1761 vm_page_unreserve_pages(fReservation); 1762 } 1763 1764 vm_page_reservation* fReservation; 1765 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1766 1767 // Lock the address space and, if the specified address range shall be 1768 // unmapped, ensure it is not wired. 1769 AddressSpaceWriteLocker locker; 1770 do { 1771 if (locker.SetTo(team) != B_OK) 1772 return B_BAD_TEAM_ID; 1773 } while (unmapAddressRange 1774 && wait_if_address_range_is_wired(locker.AddressSpace(), 1775 (addr_t)*_address, size, &locker)); 1776 1777 // TODO: this only works for file systems that use the file cache 1778 VMCache* cache; 1779 status = vfs_get_vnode_cache(vnode, &cache, false); 1780 if (status < B_OK) 1781 return status; 1782 1783 cache->Lock(); 1784 1785 VMArea* area; 1786 status = map_backing_store(locker.AddressSpace(), cache, _address, 1787 offset, size, addressSpec, 0, protection, mapping, &area, name, 1788 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, kernel); 1789 1790 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1791 // map_backing_store() cannot know we no longer need the ref 1792 cache->ReleaseRefLocked(); 1793 } 1794 1795 if (status == B_OK && (protection & B_READ_AREA) != 0) 1796 pre_map_area_pages(area, cache, &reservation); 1797 1798 cache->Unlock(); 1799 1800 if (status == B_OK) { 1801 // TODO: this probably deserves a smarter solution, ie. don't always 1802 // prefetch stuff, and also, probably don't trigger it at this place. 1803 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1804 // prefetches at max 10 MB starting from "offset" 1805 } 1806 1807 if (status != B_OK) 1808 return status; 1809 1810 area->cache_type = CACHE_TYPE_VNODE; 1811 return area->id; 1812 } 1813 1814 1815 area_id 1816 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 1817 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 1818 int fd, off_t offset) 1819 { 1820 if (!arch_vm_supports_protection(protection)) 1821 return B_NOT_SUPPORTED; 1822 1823 return _vm_map_file(aid, name, address, addressSpec, size, protection, 1824 mapping, unmapAddressRange, fd, offset, true); 1825 } 1826 1827 1828 VMCache* 1829 vm_area_get_locked_cache(VMArea* area) 1830 { 1831 rw_lock_read_lock(&sAreaCacheLock); 1832 1833 while (true) { 1834 VMCache* cache = area->cache; 1835 1836 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 1837 // cache has been deleted 1838 rw_lock_read_lock(&sAreaCacheLock); 1839 continue; 1840 } 1841 1842 rw_lock_read_lock(&sAreaCacheLock); 1843 1844 if (cache == area->cache) { 1845 cache->AcquireRefLocked(); 1846 rw_lock_read_unlock(&sAreaCacheLock); 1847 return cache; 1848 } 1849 1850 // the cache changed in the meantime 1851 cache->Unlock(); 1852 } 1853 } 1854 1855 1856 void 1857 vm_area_put_locked_cache(VMCache* cache) 1858 { 1859 cache->ReleaseRefAndUnlock(); 1860 } 1861 1862 1863 area_id 1864 vm_clone_area(team_id team, const char* name, void** address, 1865 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 1866 bool kernel) 1867 { 1868 VMArea* newArea = NULL; 1869 VMArea* sourceArea; 1870 1871 // Check whether the source area exists and is cloneable. If so, mark it 1872 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 1873 { 1874 AddressSpaceWriteLocker locker; 1875 status_t status = locker.SetFromArea(sourceID, sourceArea); 1876 if (status != B_OK) 1877 return status; 1878 1879 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 1880 return B_NOT_ALLOWED; 1881 1882 sourceArea->protection |= B_SHARED_AREA; 1883 protection |= B_SHARED_AREA; 1884 } 1885 1886 // Now lock both address spaces and actually do the cloning. 1887 1888 MultiAddressSpaceLocker locker; 1889 VMAddressSpace* sourceAddressSpace; 1890 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 1891 if (status != B_OK) 1892 return status; 1893 1894 VMAddressSpace* targetAddressSpace; 1895 status = locker.AddTeam(team, true, &targetAddressSpace); 1896 if (status != B_OK) 1897 return status; 1898 1899 status = locker.Lock(); 1900 if (status != B_OK) 1901 return status; 1902 1903 sourceArea = lookup_area(sourceAddressSpace, sourceID); 1904 if (sourceArea == NULL) 1905 return B_BAD_VALUE; 1906 1907 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 1908 return B_NOT_ALLOWED; 1909 1910 VMCache* cache = vm_area_get_locked_cache(sourceArea); 1911 1912 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 1913 // have been adapted. Maybe it should be part of the kernel settings, 1914 // anyway (so that old drivers can always work). 1915 #if 0 1916 if (sourceArea->aspace == VMAddressSpace::Kernel() 1917 && addressSpace != VMAddressSpace::Kernel() 1918 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 1919 // kernel areas must not be cloned in userland, unless explicitly 1920 // declared user-cloneable upon construction 1921 status = B_NOT_ALLOWED; 1922 } else 1923 #endif 1924 if (sourceArea->cache_type == CACHE_TYPE_NULL) 1925 status = B_NOT_ALLOWED; 1926 else { 1927 status = map_backing_store(targetAddressSpace, cache, address, 1928 sourceArea->cache_offset, sourceArea->Size(), addressSpec, 1929 sourceArea->wiring, protection, mapping, &newArea, name, 0, kernel); 1930 } 1931 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 1932 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 1933 // to create a new cache, and has therefore already acquired a reference 1934 // to the source cache - but otherwise it has no idea that we need 1935 // one. 1936 cache->AcquireRefLocked(); 1937 } 1938 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 1939 // we need to map in everything at this point 1940 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 1941 // we don't have actual pages to map but a physical area 1942 VMTranslationMap* map 1943 = sourceArea->address_space->TranslationMap(); 1944 map->Lock(); 1945 1946 addr_t physicalAddress; 1947 uint32 oldProtection; 1948 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 1949 1950 map->Unlock(); 1951 1952 map = targetAddressSpace->TranslationMap(); 1953 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 1954 newArea->Base() + (newArea->Size() - 1)); 1955 1956 vm_page_reservation reservation; 1957 vm_page_reserve_pages(&reservation, reservePages, 1958 targetAddressSpace == VMAddressSpace::Kernel() 1959 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1960 map->Lock(); 1961 1962 for (addr_t offset = 0; offset < newArea->Size(); 1963 offset += B_PAGE_SIZE) { 1964 map->Map(newArea->Base() + offset, physicalAddress + offset, 1965 protection, &reservation); 1966 } 1967 1968 map->Unlock(); 1969 vm_page_unreserve_pages(&reservation); 1970 } else { 1971 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 1972 size_t reservePages = map->MaxPagesNeededToMap( 1973 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 1974 vm_page_reservation reservation; 1975 vm_page_reserve_pages(&reservation, reservePages, 1976 targetAddressSpace == VMAddressSpace::Kernel() 1977 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1978 1979 // map in all pages from source 1980 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 1981 vm_page* page = it.Next();) { 1982 if (!page->busy) { 1983 DEBUG_PAGE_ACCESS_START(page); 1984 map_page(newArea, page, 1985 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 1986 - newArea->cache_offset), 1987 protection, &reservation); 1988 DEBUG_PAGE_ACCESS_END(page); 1989 } 1990 } 1991 // TODO: B_FULL_LOCK means that all pages are locked. We are not 1992 // ensuring that! 1993 1994 vm_page_unreserve_pages(&reservation); 1995 } 1996 } 1997 if (status == B_OK) 1998 newArea->cache_type = sourceArea->cache_type; 1999 2000 vm_area_put_locked_cache(cache); 2001 2002 if (status < B_OK) 2003 return status; 2004 2005 return newArea->id; 2006 } 2007 2008 2009 /*! Deletes the specified area of the given address space. 2010 2011 The address space must be write-locked. 2012 The caller must ensure that the area does not have any wired ranges. 2013 2014 \param addressSpace The address space containing the area. 2015 \param area The area to be deleted. 2016 \param deletingAddressSpace \c true, if the address space is in the process 2017 of being deleted. 2018 */ 2019 static void 2020 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2021 bool deletingAddressSpace) 2022 { 2023 ASSERT(!area->IsWired()); 2024 2025 VMAreaHash::Remove(area); 2026 2027 // At this point the area is removed from the global hash table, but 2028 // still exists in the area list. 2029 2030 // Unmap the virtual address space the area occupied. 2031 { 2032 // We need to lock the complete cache chain. 2033 VMCache* topCache = vm_area_get_locked_cache(area); 2034 VMCacheChainLocker cacheChainLocker(topCache); 2035 cacheChainLocker.LockAllSourceCaches(); 2036 2037 // If the area's top cache is a temporary cache and the area is the only 2038 // one referencing it (besides us currently holding a second reference), 2039 // the unmapping code doesn't need to care about preserving the accessed 2040 // and dirty flags of the top cache page mappings. 2041 bool ignoreTopCachePageFlags 2042 = topCache->temporary && topCache->RefCount() == 2; 2043 2044 area->address_space->TranslationMap()->UnmapArea(area, 2045 deletingAddressSpace, ignoreTopCachePageFlags); 2046 } 2047 2048 if (!area->cache->temporary) 2049 area->cache->WriteModified(); 2050 2051 arch_vm_unset_memory_type(area); 2052 addressSpace->RemoveArea(area, 0); 2053 addressSpace->Put(); 2054 2055 area->cache->RemoveArea(area); 2056 area->cache->ReleaseRef(); 2057 2058 addressSpace->DeleteArea(area, 0); 2059 } 2060 2061 2062 status_t 2063 vm_delete_area(team_id team, area_id id, bool kernel) 2064 { 2065 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2066 2067 // lock the address space and make sure the area isn't wired 2068 AddressSpaceWriteLocker locker; 2069 VMArea* area; 2070 AreaCacheLocker cacheLocker; 2071 2072 do { 2073 status_t status = locker.SetFromArea(team, id, area); 2074 if (status != B_OK) 2075 return status; 2076 2077 cacheLocker.SetTo(area); 2078 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2079 2080 cacheLocker.Unlock(); 2081 2082 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2083 return B_NOT_ALLOWED; 2084 2085 delete_area(locker.AddressSpace(), area, false); 2086 return B_OK; 2087 } 2088 2089 2090 /*! Creates a new cache on top of given cache, moves all areas from 2091 the old cache to the new one, and changes the protection of all affected 2092 areas' pages to read-only. 2093 Preconditions: 2094 - The given cache must be locked. 2095 - All of the cache's areas' address spaces must be read locked. 2096 - None of the cache's areas must have any wired ranges. 2097 */ 2098 static status_t 2099 vm_copy_on_write_area(VMCache* lowerCache) 2100 { 2101 VMCache* upperCache; 2102 2103 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2104 2105 // We need to separate the cache from its areas. The cache goes one level 2106 // deeper and we create a new cache inbetween. 2107 2108 // create an anonymous cache 2109 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2110 0, true, VM_PRIORITY_USER); 2111 if (status != B_OK) 2112 return status; 2113 2114 upperCache->Lock(); 2115 2116 upperCache->temporary = 1; 2117 upperCache->scan_skip = lowerCache->scan_skip; 2118 upperCache->virtual_base = lowerCache->virtual_base; 2119 upperCache->virtual_end = lowerCache->virtual_end; 2120 2121 // transfer the lower cache areas to the upper cache 2122 rw_lock_write_lock(&sAreaCacheLock); 2123 upperCache->TransferAreas(lowerCache); 2124 rw_lock_write_unlock(&sAreaCacheLock); 2125 2126 lowerCache->AddConsumer(upperCache); 2127 2128 // We now need to remap all pages from all of the cache's areas read-only, so 2129 // that a copy will be created on next write access 2130 2131 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2132 tempArea = tempArea->cache_next) { 2133 // The area must be readable in the same way it was previously writable 2134 uint32 protection = B_KERNEL_READ_AREA; 2135 if ((tempArea->protection & B_READ_AREA) != 0) 2136 protection |= B_READ_AREA; 2137 2138 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2139 map->Lock(); 2140 map->ProtectArea(tempArea, protection); 2141 map->Unlock(); 2142 } 2143 2144 vm_area_put_locked_cache(upperCache); 2145 2146 return B_OK; 2147 } 2148 2149 2150 area_id 2151 vm_copy_area(team_id team, const char* name, void** _address, 2152 uint32 addressSpec, uint32 protection, area_id sourceID) 2153 { 2154 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2155 2156 if ((protection & B_KERNEL_PROTECTION) == 0) { 2157 // set the same protection for the kernel as for userland 2158 protection |= B_KERNEL_READ_AREA; 2159 if (writableCopy) 2160 protection |= B_KERNEL_WRITE_AREA; 2161 } 2162 2163 // Do the locking: target address space, all address spaces associated with 2164 // the source cache, and the cache itself. 2165 MultiAddressSpaceLocker locker; 2166 VMAddressSpace* targetAddressSpace; 2167 VMCache* cache; 2168 VMArea* source; 2169 AreaCacheLocker cacheLocker; 2170 status_t status; 2171 bool sharedArea; 2172 2173 bool restart; 2174 do { 2175 restart = false; 2176 2177 locker.Unset(); 2178 status = locker.AddTeam(team, true, &targetAddressSpace); 2179 if (status == B_OK) { 2180 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2181 &cache); 2182 } 2183 if (status != B_OK) 2184 return status; 2185 2186 cacheLocker.SetTo(cache, true); // already locked 2187 2188 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2189 2190 // Make sure the source area (respectively, if not shared, all areas of 2191 // the cache) doesn't have any wired ranges. 2192 if (sharedArea) { 2193 if (wait_if_area_is_wired(source, &locker, &cacheLocker)) 2194 restart = true; 2195 } else { 2196 for (VMArea* area = cache->areas; area != NULL; 2197 area = area->cache_next) { 2198 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) { 2199 restart = true; 2200 break; 2201 } 2202 } 2203 } 2204 } while (restart); 2205 2206 if (addressSpec == B_CLONE_ADDRESS) { 2207 addressSpec = B_EXACT_ADDRESS; 2208 *_address = (void*)source->Base(); 2209 } 2210 2211 // First, create a cache on top of the source area, respectively use the 2212 // existing one, if this is a shared area. 2213 2214 VMArea* target; 2215 status = map_backing_store(targetAddressSpace, cache, _address, 2216 source->cache_offset, source->Size(), addressSpec, source->wiring, 2217 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2218 &target, name, 0, true); 2219 if (status < B_OK) 2220 return status; 2221 2222 if (sharedArea) { 2223 // The new area uses the old area's cache, but map_backing_store() 2224 // hasn't acquired a ref. So we have to do that now. 2225 cache->AcquireRefLocked(); 2226 } 2227 2228 // If the source area is writable, we need to move it one layer up as well 2229 2230 if (!sharedArea) { 2231 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2232 // TODO: do something more useful if this fails! 2233 if (vm_copy_on_write_area(cache) < B_OK) 2234 panic("vm_copy_on_write_area() failed!\n"); 2235 } 2236 } 2237 2238 // we return the ID of the newly created area 2239 return target->id; 2240 } 2241 2242 2243 static status_t 2244 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2245 bool kernel) 2246 { 2247 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2248 "%#lx)\n", team, areaID, newProtection)); 2249 2250 if (!arch_vm_supports_protection(newProtection)) 2251 return B_NOT_SUPPORTED; 2252 2253 bool becomesWritable 2254 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2255 2256 // lock address spaces and cache 2257 MultiAddressSpaceLocker locker; 2258 VMCache* cache; 2259 VMArea* area; 2260 status_t status; 2261 AreaCacheLocker cacheLocker; 2262 bool isWritable; 2263 2264 bool restart; 2265 do { 2266 restart = false; 2267 2268 locker.Unset(); 2269 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2270 if (status != B_OK) 2271 return status; 2272 2273 cacheLocker.SetTo(cache, true); // already locked 2274 2275 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2276 return B_NOT_ALLOWED; 2277 2278 if (area->protection == newProtection) 2279 return B_OK; 2280 2281 if (team != VMAddressSpace::KernelID() 2282 && area->address_space->ID() != team) { 2283 // unless you're the kernel, you are only allowed to set 2284 // the protection of your own areas 2285 return B_NOT_ALLOWED; 2286 } 2287 2288 isWritable 2289 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2290 2291 // Make sure the area (respectively, if we're going to call 2292 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2293 // wired ranges. 2294 if (!isWritable && becomesWritable && !list_is_empty(&cache->consumers)) { 2295 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2296 otherArea = otherArea->cache_next) { 2297 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2298 restart = true; 2299 break; 2300 } 2301 } 2302 } else { 2303 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2304 restart = true; 2305 } 2306 } while (restart); 2307 2308 bool changePageProtection = true; 2309 bool changeTopCachePagesOnly = false; 2310 2311 if (isWritable && !becomesWritable) { 2312 // writable -> !writable 2313 2314 if (cache->source != NULL && cache->temporary) { 2315 if (cache->CountWritableAreas(area) == 0) { 2316 // Since this cache now lives from the pages in its source cache, 2317 // we can change the cache's commitment to take only those pages 2318 // into account that really are in this cache. 2319 2320 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2321 team == VMAddressSpace::KernelID() 2322 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2323 2324 // TODO: we may be able to join with our source cache, if 2325 // count == 0 2326 } 2327 } 2328 2329 // If only the writability changes, we can just remap the pages of the 2330 // top cache, since the pages of lower caches are mapped read-only 2331 // anyway. That's advantageous only, if the number of pages in the cache 2332 // is significantly smaller than the number of pages in the area, 2333 // though. 2334 if (newProtection 2335 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2336 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2337 changeTopCachePagesOnly = true; 2338 } 2339 } else if (!isWritable && becomesWritable) { 2340 // !writable -> writable 2341 2342 if (!list_is_empty(&cache->consumers)) { 2343 // There are consumers -- we have to insert a new cache. Fortunately 2344 // vm_copy_on_write_area() does everything that's needed. 2345 changePageProtection = false; 2346 status = vm_copy_on_write_area(cache); 2347 } else { 2348 // No consumers, so we don't need to insert a new one. 2349 if (cache->source != NULL && cache->temporary) { 2350 // the cache's commitment must contain all possible pages 2351 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2352 team == VMAddressSpace::KernelID() 2353 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2354 } 2355 2356 if (status == B_OK && cache->source != NULL) { 2357 // There's a source cache, hence we can't just change all pages' 2358 // protection or we might allow writing into pages belonging to 2359 // a lower cache. 2360 changeTopCachePagesOnly = true; 2361 } 2362 } 2363 } else { 2364 // we don't have anything special to do in all other cases 2365 } 2366 2367 if (status == B_OK) { 2368 // remap existing pages in this cache 2369 if (changePageProtection) { 2370 VMTranslationMap* map = area->address_space->TranslationMap(); 2371 map->Lock(); 2372 2373 if (changeTopCachePagesOnly) { 2374 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2375 page_num_t lastPageOffset 2376 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2377 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2378 vm_page* page = it.Next();) { 2379 if (page->cache_offset >= firstPageOffset 2380 && page->cache_offset <= lastPageOffset) { 2381 addr_t address = virtual_page_address(area, page); 2382 map->ProtectPage(area, address, newProtection); 2383 } 2384 } 2385 } else 2386 map->ProtectArea(area, newProtection); 2387 2388 map->Unlock(); 2389 } 2390 2391 area->protection = newProtection; 2392 } 2393 2394 return status; 2395 } 2396 2397 2398 status_t 2399 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr) 2400 { 2401 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2402 if (addressSpace == NULL) 2403 return B_BAD_TEAM_ID; 2404 2405 VMTranslationMap* map = addressSpace->TranslationMap(); 2406 2407 map->Lock(); 2408 uint32 dummyFlags; 2409 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2410 map->Unlock(); 2411 2412 addressSpace->Put(); 2413 return status; 2414 } 2415 2416 2417 /*! The page's cache must be locked. 2418 */ 2419 bool 2420 vm_test_map_modification(vm_page* page) 2421 { 2422 if (page->modified) 2423 return true; 2424 2425 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2426 vm_page_mapping* mapping; 2427 while ((mapping = iterator.Next()) != NULL) { 2428 VMArea* area = mapping->area; 2429 VMTranslationMap* map = area->address_space->TranslationMap(); 2430 2431 addr_t physicalAddress; 2432 uint32 flags; 2433 map->Lock(); 2434 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2435 map->Unlock(); 2436 2437 if ((flags & PAGE_MODIFIED) != 0) 2438 return true; 2439 } 2440 2441 return false; 2442 } 2443 2444 2445 /*! The page's cache must be locked. 2446 */ 2447 void 2448 vm_clear_map_flags(vm_page* page, uint32 flags) 2449 { 2450 if ((flags & PAGE_ACCESSED) != 0) 2451 page->accessed = false; 2452 if ((flags & PAGE_MODIFIED) != 0) 2453 page->modified = false; 2454 2455 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2456 vm_page_mapping* mapping; 2457 while ((mapping = iterator.Next()) != NULL) { 2458 VMArea* area = mapping->area; 2459 VMTranslationMap* map = area->address_space->TranslationMap(); 2460 2461 map->Lock(); 2462 map->ClearFlags(virtual_page_address(area, page), flags); 2463 map->Unlock(); 2464 } 2465 } 2466 2467 2468 /*! Removes all mappings from a page. 2469 After you've called this function, the page is unmapped from memory and 2470 the page's \c accessed and \c modified flags have been updated according 2471 to the state of the mappings. 2472 The page's cache must be locked. 2473 */ 2474 void 2475 vm_remove_all_page_mappings(vm_page* page) 2476 { 2477 while (vm_page_mapping* mapping = page->mappings.Head()) { 2478 VMArea* area = mapping->area; 2479 VMTranslationMap* map = area->address_space->TranslationMap(); 2480 addr_t address = virtual_page_address(area, page); 2481 map->UnmapPage(area, address, false); 2482 } 2483 } 2484 2485 2486 int32 2487 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2488 { 2489 int32 count = 0; 2490 2491 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2492 vm_page_mapping* mapping; 2493 while ((mapping = iterator.Next()) != NULL) { 2494 VMArea* area = mapping->area; 2495 VMTranslationMap* map = area->address_space->TranslationMap(); 2496 2497 bool modified; 2498 if (map->ClearAccessedAndModified(area, 2499 virtual_page_address(area, page), false, modified)) { 2500 count++; 2501 } 2502 2503 page->modified |= modified; 2504 } 2505 2506 2507 if (page->accessed) { 2508 count++; 2509 page->accessed = false; 2510 } 2511 2512 return count; 2513 } 2514 2515 2516 /*! Removes all mappings of a page and/or clears the accessed bits of the 2517 mappings. 2518 The function iterates through the page mappings and removes them until 2519 encountering one that has been accessed. From then on it will continue to 2520 iterate, but only clear the accessed flag of the mapping. The page's 2521 \c modified bit will be updated accordingly, the \c accessed bit will be 2522 cleared. 2523 \return The number of mapping accessed bits encountered, including the 2524 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2525 of the page have been removed. 2526 */ 2527 int32 2528 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2529 { 2530 ASSERT(page->wired_count == 0); 2531 2532 if (page->accessed) 2533 return vm_clear_page_mapping_accessed_flags(page); 2534 2535 while (vm_page_mapping* mapping = page->mappings.Head()) { 2536 VMArea* area = mapping->area; 2537 VMTranslationMap* map = area->address_space->TranslationMap(); 2538 addr_t address = virtual_page_address(area, page); 2539 bool modified = false; 2540 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2541 page->accessed = true; 2542 page->modified |= modified; 2543 return vm_clear_page_mapping_accessed_flags(page); 2544 } 2545 page->modified |= modified; 2546 } 2547 2548 return 0; 2549 } 2550 2551 2552 static int 2553 display_mem(int argc, char** argv) 2554 { 2555 bool physical = false; 2556 addr_t copyAddress; 2557 int32 displayWidth; 2558 int32 itemSize; 2559 int32 num = -1; 2560 addr_t address; 2561 int i = 1, j; 2562 2563 if (argc > 1 && argv[1][0] == '-') { 2564 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2565 physical = true; 2566 i++; 2567 } else 2568 i = 99; 2569 } 2570 2571 if (argc < i + 1 || argc > i + 2) { 2572 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2573 "\tdl - 8 bytes\n" 2574 "\tdw - 4 bytes\n" 2575 "\tds - 2 bytes\n" 2576 "\tdb - 1 byte\n" 2577 "\tstring - a whole string\n" 2578 " -p or --physical only allows memory from a single page to be " 2579 "displayed.\n"); 2580 return 0; 2581 } 2582 2583 address = parse_expression(argv[i]); 2584 2585 if (argc > i + 1) 2586 num = parse_expression(argv[i + 1]); 2587 2588 // build the format string 2589 if (strcmp(argv[0], "db") == 0) { 2590 itemSize = 1; 2591 displayWidth = 16; 2592 } else if (strcmp(argv[0], "ds") == 0) { 2593 itemSize = 2; 2594 displayWidth = 8; 2595 } else if (strcmp(argv[0], "dw") == 0) { 2596 itemSize = 4; 2597 displayWidth = 4; 2598 } else if (strcmp(argv[0], "dl") == 0) { 2599 itemSize = 8; 2600 displayWidth = 2; 2601 } else if (strcmp(argv[0], "string") == 0) { 2602 itemSize = 1; 2603 displayWidth = -1; 2604 } else { 2605 kprintf("display_mem called in an invalid way!\n"); 2606 return 0; 2607 } 2608 2609 if (num <= 0) 2610 num = displayWidth; 2611 2612 void* physicalPageHandle = NULL; 2613 2614 if (physical) { 2615 int32 offset = address & (B_PAGE_SIZE - 1); 2616 if (num * itemSize + offset > B_PAGE_SIZE) { 2617 num = (B_PAGE_SIZE - offset) / itemSize; 2618 kprintf("NOTE: number of bytes has been cut to page size\n"); 2619 } 2620 2621 address = ROUNDDOWN(address, B_PAGE_SIZE); 2622 2623 if (vm_get_physical_page_debug(address, ©Address, 2624 &physicalPageHandle) != B_OK) { 2625 kprintf("getting the hardware page failed."); 2626 return 0; 2627 } 2628 2629 address += offset; 2630 copyAddress += offset; 2631 } else 2632 copyAddress = address; 2633 2634 if (!strcmp(argv[0], "string")) { 2635 kprintf("%p \"", (char*)copyAddress); 2636 2637 // string mode 2638 for (i = 0; true; i++) { 2639 char c; 2640 if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 2641 || c == '\0') 2642 break; 2643 2644 if (c == '\n') 2645 kprintf("\\n"); 2646 else if (c == '\t') 2647 kprintf("\\t"); 2648 else { 2649 if (!isprint(c)) 2650 c = '.'; 2651 2652 kprintf("%c", c); 2653 } 2654 } 2655 2656 kprintf("\"\n"); 2657 } else { 2658 // number mode 2659 for (i = 0; i < num; i++) { 2660 uint32 value; 2661 2662 if ((i % displayWidth) == 0) { 2663 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2664 if (i != 0) 2665 kprintf("\n"); 2666 2667 kprintf("[0x%lx] ", address + i * itemSize); 2668 2669 for (j = 0; j < displayed; j++) { 2670 char c; 2671 if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j, 2672 1) != B_OK) { 2673 displayed = j; 2674 break; 2675 } 2676 if (!isprint(c)) 2677 c = '.'; 2678 2679 kprintf("%c", c); 2680 } 2681 if (num > displayWidth) { 2682 // make sure the spacing in the last line is correct 2683 for (j = displayed; j < displayWidth * itemSize; j++) 2684 kprintf(" "); 2685 } 2686 kprintf(" "); 2687 } 2688 2689 if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize, 2690 itemSize) != B_OK) { 2691 kprintf("read fault"); 2692 break; 2693 } 2694 2695 switch (itemSize) { 2696 case 1: 2697 kprintf(" %02x", *(uint8*)&value); 2698 break; 2699 case 2: 2700 kprintf(" %04x", *(uint16*)&value); 2701 break; 2702 case 4: 2703 kprintf(" %08lx", *(uint32*)&value); 2704 break; 2705 case 8: 2706 kprintf(" %016Lx", *(uint64*)&value); 2707 break; 2708 } 2709 } 2710 2711 kprintf("\n"); 2712 } 2713 2714 if (physical) { 2715 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 2716 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 2717 } 2718 return 0; 2719 } 2720 2721 2722 static void 2723 dump_cache_tree_recursively(VMCache* cache, int level, 2724 VMCache* highlightCache) 2725 { 2726 // print this cache 2727 for (int i = 0; i < level; i++) 2728 kprintf(" "); 2729 if (cache == highlightCache) 2730 kprintf("%p <--\n", cache); 2731 else 2732 kprintf("%p\n", cache); 2733 2734 // recursively print its consumers 2735 VMCache* consumer = NULL; 2736 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2737 consumer)) != NULL) { 2738 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 2739 } 2740 } 2741 2742 2743 static int 2744 dump_cache_tree(int argc, char** argv) 2745 { 2746 if (argc != 2 || !strcmp(argv[1], "--help")) { 2747 kprintf("usage: %s <address>\n", argv[0]); 2748 return 0; 2749 } 2750 2751 addr_t address = parse_expression(argv[1]); 2752 if (address == 0) 2753 return 0; 2754 2755 VMCache* cache = (VMCache*)address; 2756 VMCache* root = cache; 2757 2758 // find the root cache (the transitive source) 2759 while (root->source != NULL) 2760 root = root->source; 2761 2762 dump_cache_tree_recursively(root, 0, cache); 2763 2764 return 0; 2765 } 2766 2767 2768 static const char* 2769 cache_type_to_string(int32 type) 2770 { 2771 switch (type) { 2772 case CACHE_TYPE_RAM: 2773 return "RAM"; 2774 case CACHE_TYPE_DEVICE: 2775 return "device"; 2776 case CACHE_TYPE_VNODE: 2777 return "vnode"; 2778 case CACHE_TYPE_NULL: 2779 return "null"; 2780 2781 default: 2782 return "unknown"; 2783 } 2784 } 2785 2786 2787 #if DEBUG_CACHE_LIST 2788 2789 static void 2790 update_cache_info_recursively(VMCache* cache, cache_info& info) 2791 { 2792 info.page_count += cache->page_count; 2793 if (cache->type == CACHE_TYPE_RAM) 2794 info.committed += cache->committed_size; 2795 2796 // recurse 2797 VMCache* consumer = NULL; 2798 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2799 consumer)) != NULL) { 2800 update_cache_info_recursively(consumer, info); 2801 } 2802 } 2803 2804 2805 static int 2806 cache_info_compare_page_count(const void* _a, const void* _b) 2807 { 2808 const cache_info* a = (const cache_info*)_a; 2809 const cache_info* b = (const cache_info*)_b; 2810 if (a->page_count == b->page_count) 2811 return 0; 2812 return a->page_count < b->page_count ? 1 : -1; 2813 } 2814 2815 2816 static int 2817 cache_info_compare_committed(const void* _a, const void* _b) 2818 { 2819 const cache_info* a = (const cache_info*)_a; 2820 const cache_info* b = (const cache_info*)_b; 2821 if (a->committed == b->committed) 2822 return 0; 2823 return a->committed < b->committed ? 1 : -1; 2824 } 2825 2826 2827 static void 2828 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 2829 { 2830 for (int i = 0; i < level; i++) 2831 kprintf(" "); 2832 2833 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 2834 cache_type_to_string(cache->type), cache->virtual_base, 2835 cache->virtual_end, cache->page_count); 2836 2837 if (level == 0) 2838 kprintf("/%lu", info.page_count); 2839 2840 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 2841 kprintf(", committed: %lld", cache->committed_size); 2842 2843 if (level == 0) 2844 kprintf("/%lu", info.committed); 2845 } 2846 2847 // areas 2848 if (cache->areas != NULL) { 2849 VMArea* area = cache->areas; 2850 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 2851 area->address_space->ID()); 2852 2853 while (area->cache_next != NULL) { 2854 area = area->cache_next; 2855 kprintf(", %ld", area->id); 2856 } 2857 } 2858 2859 kputs("\n"); 2860 2861 // recurse 2862 VMCache* consumer = NULL; 2863 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2864 consumer)) != NULL) { 2865 dump_caches_recursively(consumer, info, level + 1); 2866 } 2867 } 2868 2869 2870 static int 2871 dump_caches(int argc, char** argv) 2872 { 2873 if (sCacheInfoTable == NULL) { 2874 kprintf("No cache info table!\n"); 2875 return 0; 2876 } 2877 2878 bool sortByPageCount = true; 2879 2880 for (int32 i = 1; i < argc; i++) { 2881 if (strcmp(argv[i], "-c") == 0) { 2882 sortByPageCount = false; 2883 } else { 2884 print_debugger_command_usage(argv[0]); 2885 return 0; 2886 } 2887 } 2888 2889 uint32 totalCount = 0; 2890 uint32 rootCount = 0; 2891 off_t totalCommitted = 0; 2892 page_num_t totalPages = 0; 2893 2894 VMCache* cache = gDebugCacheList; 2895 while (cache) { 2896 totalCount++; 2897 if (cache->source == NULL) { 2898 cache_info stackInfo; 2899 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 2900 ? sCacheInfoTable[rootCount] : stackInfo; 2901 rootCount++; 2902 info.cache = cache; 2903 info.page_count = 0; 2904 info.committed = 0; 2905 update_cache_info_recursively(cache, info); 2906 totalCommitted += info.committed; 2907 totalPages += info.page_count; 2908 } 2909 2910 cache = cache->debug_next; 2911 } 2912 2913 if (rootCount <= (uint32)kCacheInfoTableCount) { 2914 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 2915 sortByPageCount 2916 ? &cache_info_compare_page_count 2917 : &cache_info_compare_committed); 2918 } 2919 2920 kprintf("total committed memory: %lld, total used pages: %lu\n", 2921 totalCommitted, totalPages); 2922 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 2923 "tree...\n\n", totalCount, rootCount, 2924 sortByPageCount ? "page count" : "committed size"); 2925 2926 if (rootCount <= (uint32)kCacheInfoTableCount) { 2927 for (uint32 i = 0; i < rootCount; i++) { 2928 cache_info& info = sCacheInfoTable[i]; 2929 dump_caches_recursively(info.cache, info, 0); 2930 } 2931 } else 2932 kprintf("Cache info table too small! Can't sort and print caches!\n"); 2933 2934 return 0; 2935 } 2936 2937 #endif // DEBUG_CACHE_LIST 2938 2939 2940 static int 2941 dump_cache(int argc, char** argv) 2942 { 2943 VMCache* cache; 2944 bool showPages = false; 2945 int i = 1; 2946 2947 if (argc < 2 || !strcmp(argv[1], "--help")) { 2948 kprintf("usage: %s [-ps] <address>\n" 2949 " if -p is specified, all pages are shown, if -s is used\n" 2950 " only the cache info is shown respectively.\n", argv[0]); 2951 return 0; 2952 } 2953 while (argv[i][0] == '-') { 2954 char* arg = argv[i] + 1; 2955 while (arg[0]) { 2956 if (arg[0] == 'p') 2957 showPages = true; 2958 arg++; 2959 } 2960 i++; 2961 } 2962 if (argv[i] == NULL) { 2963 kprintf("%s: invalid argument, pass address\n", argv[0]); 2964 return 0; 2965 } 2966 2967 addr_t address = parse_expression(argv[i]); 2968 if (address == 0) 2969 return 0; 2970 2971 cache = (VMCache*)address; 2972 2973 kprintf("CACHE %p:\n", cache); 2974 kprintf(" ref_count: %ld\n", cache->RefCount()); 2975 kprintf(" source: %p\n", cache->source); 2976 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 2977 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 2978 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 2979 kprintf(" temporary: %ld\n", cache->temporary); 2980 kprintf(" scan_skip: %ld\n", cache->scan_skip); 2981 kprintf(" lock: %p\n", cache->GetLock()); 2982 #if KDEBUG 2983 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 2984 #endif 2985 kprintf(" areas:\n"); 2986 2987 for (VMArea* area = cache->areas; area != NULL; area = area->cache_next) { 2988 kprintf(" area 0x%lx, %s\n", area->id, area->name); 2989 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->Base(), 2990 area->Size()); 2991 kprintf("\tprotection: 0x%lx\n", area->protection); 2992 kprintf("\towner: 0x%lx\n", area->address_space->ID()); 2993 } 2994 2995 kprintf(" consumers:\n"); 2996 VMCache* consumer = NULL; 2997 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2998 consumer)) != NULL) { 2999 kprintf("\t%p\n", consumer); 3000 } 3001 3002 kprintf(" pages:\n"); 3003 if (showPages) { 3004 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3005 vm_page* page = it.Next();) { 3006 if (!vm_page_is_dummy(page)) { 3007 kprintf("\t%p ppn 0x%lx offset 0x%lx state %u (%s) " 3008 "wired_count %u\n", page, page->physical_page_number, 3009 page->cache_offset, page->State(), 3010 page_state_to_string(page->State()), page->wired_count); 3011 } else { 3012 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 3013 page, page->State(), page_state_to_string(page->State())); 3014 } 3015 } 3016 } else 3017 kprintf("\t%ld in cache\n", cache->page_count); 3018 3019 set_debug_variable("_sourceCache", (addr_t)cache->source); 3020 3021 return 0; 3022 } 3023 3024 3025 static void 3026 dump_area_struct(VMArea* area, bool mappings) 3027 { 3028 kprintf("AREA: %p\n", area); 3029 kprintf("name:\t\t'%s'\n", area->name); 3030 kprintf("owner:\t\t0x%lx\n", area->address_space->ID()); 3031 kprintf("id:\t\t0x%lx\n", area->id); 3032 kprintf("base:\t\t0x%lx\n", area->Base()); 3033 kprintf("size:\t\t0x%lx\n", area->Size()); 3034 kprintf("protection:\t0x%lx\n", area->protection); 3035 kprintf("wiring:\t\t0x%x\n", area->wiring); 3036 kprintf("memory_type:\t0x%x\n", area->memory_type); 3037 kprintf("cache:\t\t%p\n", area->cache); 3038 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 3039 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3040 kprintf("cache_next:\t%p\n", area->cache_next); 3041 kprintf("cache_prev:\t%p\n", area->cache_prev); 3042 3043 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3044 if (mappings) { 3045 kprintf("page mappings:\n"); 3046 while (iterator.HasNext()) { 3047 vm_page_mapping* mapping = iterator.Next(); 3048 kprintf(" %p", mapping->page); 3049 } 3050 kprintf("\n"); 3051 } else { 3052 uint32 count = 0; 3053 while (iterator.Next() != NULL) { 3054 count++; 3055 } 3056 kprintf("page mappings:\t%lu\n", count); 3057 } 3058 } 3059 3060 3061 static int 3062 dump_area(int argc, char** argv) 3063 { 3064 bool mappings = false; 3065 bool found = false; 3066 int32 index = 1; 3067 VMArea* area; 3068 addr_t num; 3069 3070 if (argc < 2 || !strcmp(argv[1], "--help")) { 3071 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3072 "All areas matching either id/address/name are listed. You can\n" 3073 "force to check only a specific item by prefixing the specifier\n" 3074 "with the id/contains/address/name keywords.\n" 3075 "-m shows the area's mappings as well.\n"); 3076 return 0; 3077 } 3078 3079 if (!strcmp(argv[1], "-m")) { 3080 mappings = true; 3081 index++; 3082 } 3083 3084 int32 mode = 0xf; 3085 if (!strcmp(argv[index], "id")) 3086 mode = 1; 3087 else if (!strcmp(argv[index], "contains")) 3088 mode = 2; 3089 else if (!strcmp(argv[index], "name")) 3090 mode = 4; 3091 else if (!strcmp(argv[index], "address")) 3092 mode = 0; 3093 if (mode != 0xf) 3094 index++; 3095 3096 if (index >= argc) { 3097 kprintf("No area specifier given.\n"); 3098 return 0; 3099 } 3100 3101 num = parse_expression(argv[index]); 3102 3103 if (mode == 0) { 3104 dump_area_struct((struct VMArea*)num, mappings); 3105 } else { 3106 // walk through the area list, looking for the arguments as a name 3107 3108 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3109 while ((area = it.Next()) != NULL) { 3110 if (((mode & 4) != 0 && area->name != NULL 3111 && !strcmp(argv[index], area->name)) 3112 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3113 || (((mode & 2) != 0 && area->Base() <= num 3114 && area->Base() + area->Size() > num))))) { 3115 dump_area_struct(area, mappings); 3116 found = true; 3117 } 3118 } 3119 3120 if (!found) 3121 kprintf("could not find area %s (%ld)\n", argv[index], num); 3122 } 3123 3124 return 0; 3125 } 3126 3127 3128 static int 3129 dump_area_list(int argc, char** argv) 3130 { 3131 VMArea* area; 3132 const char* name = NULL; 3133 int32 id = 0; 3134 3135 if (argc > 1) { 3136 id = parse_expression(argv[1]); 3137 if (id == 0) 3138 name = argv[1]; 3139 } 3140 3141 kprintf("addr id base\t\tsize protect lock name\n"); 3142 3143 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3144 while ((area = it.Next()) != NULL) { 3145 if ((id != 0 && area->address_space->ID() != id) 3146 || (name != NULL && strstr(area->name, name) == NULL)) 3147 continue; 3148 3149 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 3150 (void*)area->Base(), (void*)area->Size(), area->protection, 3151 area->wiring, area->name); 3152 } 3153 return 0; 3154 } 3155 3156 3157 static int 3158 dump_available_memory(int argc, char** argv) 3159 { 3160 kprintf("Available memory: %Ld/%lu bytes\n", 3161 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 3162 return 0; 3163 } 3164 3165 3166 /*! Deletes all areas and reserved regions in the given address space. 3167 3168 The caller must ensure that none of the areas has any wired ranges. 3169 3170 \param addressSpace The address space. 3171 \param deletingAddressSpace \c true, if the address space is in the process 3172 of being deleted. 3173 */ 3174 void 3175 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3176 { 3177 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 3178 addressSpace->ID())); 3179 3180 addressSpace->WriteLock(); 3181 3182 // remove all reserved areas in this address space 3183 addressSpace->UnreserveAllAddressRanges(0); 3184 3185 // delete all the areas in this address space 3186 while (VMArea* area = addressSpace->FirstArea()) { 3187 ASSERT(!area->IsWired()); 3188 delete_area(addressSpace, area, deletingAddressSpace); 3189 } 3190 3191 addressSpace->WriteUnlock(); 3192 } 3193 3194 3195 static area_id 3196 vm_area_for(addr_t address, bool kernel) 3197 { 3198 team_id team; 3199 if (IS_USER_ADDRESS(address)) { 3200 // we try the user team address space, if any 3201 team = VMAddressSpace::CurrentID(); 3202 if (team < 0) 3203 return team; 3204 } else 3205 team = VMAddressSpace::KernelID(); 3206 3207 AddressSpaceReadLocker locker(team); 3208 if (!locker.IsLocked()) 3209 return B_BAD_TEAM_ID; 3210 3211 VMArea* area = locker.AddressSpace()->LookupArea(address); 3212 if (area != NULL) { 3213 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3214 return B_ERROR; 3215 3216 return area->id; 3217 } 3218 3219 return B_ERROR; 3220 } 3221 3222 3223 /*! Frees physical pages that were used during the boot process. 3224 \a end is inclusive. 3225 */ 3226 static void 3227 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3228 { 3229 // free all physical pages in the specified range 3230 3231 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3232 addr_t physicalAddress; 3233 uint32 flags; 3234 3235 if (map->Query(current, &physicalAddress, &flags) == B_OK 3236 && (flags & PAGE_PRESENT) != 0) { 3237 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3238 if (page != NULL && page->State() != PAGE_STATE_FREE 3239 && page->State() != PAGE_STATE_CLEAR 3240 && page->State() != PAGE_STATE_UNUSED) { 3241 DEBUG_PAGE_ACCESS_START(page); 3242 vm_page_set_state(page, PAGE_STATE_FREE); 3243 } 3244 } 3245 } 3246 3247 // unmap the memory 3248 map->Unmap(start, end); 3249 } 3250 3251 3252 void 3253 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3254 { 3255 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3256 addr_t end = start + (size - 1); 3257 addr_t lastEnd = start; 3258 3259 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3260 (void*)start, (void*)end)); 3261 3262 // The areas are sorted in virtual address space order, so 3263 // we just have to find the holes between them that fall 3264 // into the area we should dispose 3265 3266 map->Lock(); 3267 3268 for (VMAddressSpace::AreaIterator it 3269 = VMAddressSpace::Kernel()->GetAreaIterator(); 3270 VMArea* area = it.Next();) { 3271 addr_t areaStart = area->Base(); 3272 addr_t areaEnd = areaStart + (area->Size() - 1); 3273 3274 if (areaEnd < start) 3275 continue; 3276 3277 if (areaStart > end) { 3278 // we are done, the area is already beyond of what we have to free 3279 end = areaStart - 1; 3280 break; 3281 } 3282 3283 if (areaStart > lastEnd) { 3284 // this is something we can free 3285 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3286 (void*)areaStart)); 3287 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3288 } 3289 3290 if (areaEnd >= end) { 3291 lastEnd = areaEnd; 3292 // no +1 to prevent potential overflow 3293 break; 3294 } 3295 3296 lastEnd = areaEnd + 1; 3297 } 3298 3299 if (lastEnd < end) { 3300 // we can also get rid of some space at the end of the area 3301 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3302 (void*)end)); 3303 unmap_and_free_physical_pages(map, lastEnd, end); 3304 } 3305 3306 map->Unlock(); 3307 } 3308 3309 3310 static void 3311 create_preloaded_image_areas(struct preloaded_image* image) 3312 { 3313 char name[B_OS_NAME_LENGTH]; 3314 void* address; 3315 int32 length; 3316 3317 // use file name to create a good area name 3318 char* fileName = strrchr(image->name, '/'); 3319 if (fileName == NULL) 3320 fileName = image->name; 3321 else 3322 fileName++; 3323 3324 length = strlen(fileName); 3325 // make sure there is enough space for the suffix 3326 if (length > 25) 3327 length = 25; 3328 3329 memcpy(name, fileName, length); 3330 strcpy(name + length, "_text"); 3331 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3332 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3333 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3334 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3335 // this will later be remapped read-only/executable by the 3336 // ELF initialization code 3337 3338 strcpy(name + length, "_data"); 3339 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3340 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3341 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3342 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3343 } 3344 3345 3346 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3347 Any boot loader resources contained in that arguments must not be accessed 3348 anymore past this point. 3349 */ 3350 void 3351 vm_free_kernel_args(kernel_args* args) 3352 { 3353 uint32 i; 3354 3355 TRACE(("vm_free_kernel_args()\n")); 3356 3357 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3358 area_id area = area_for((void*)args->kernel_args_range[i].start); 3359 if (area >= B_OK) 3360 delete_area(area); 3361 } 3362 } 3363 3364 3365 static void 3366 allocate_kernel_args(kernel_args* args) 3367 { 3368 TRACE(("allocate_kernel_args()\n")); 3369 3370 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3371 void* address = (void*)args->kernel_args_range[i].start; 3372 3373 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3374 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3375 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3376 } 3377 } 3378 3379 3380 static void 3381 unreserve_boot_loader_ranges(kernel_args* args) 3382 { 3383 TRACE(("unreserve_boot_loader_ranges()\n")); 3384 3385 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3386 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3387 (void*)args->virtual_allocated_range[i].start, 3388 args->virtual_allocated_range[i].size); 3389 } 3390 } 3391 3392 3393 static void 3394 reserve_boot_loader_ranges(kernel_args* args) 3395 { 3396 TRACE(("reserve_boot_loader_ranges()\n")); 3397 3398 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3399 void* address = (void*)args->virtual_allocated_range[i].start; 3400 3401 // If the address is no kernel address, we just skip it. The 3402 // architecture specific code has to deal with it. 3403 if (!IS_KERNEL_ADDRESS(address)) { 3404 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 3405 address, args->virtual_allocated_range[i].size); 3406 continue; 3407 } 3408 3409 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3410 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3411 if (status < B_OK) 3412 panic("could not reserve boot loader ranges\n"); 3413 } 3414 } 3415 3416 3417 static addr_t 3418 allocate_early_virtual(kernel_args* args, size_t size, bool blockAlign) 3419 { 3420 size = PAGE_ALIGN(size); 3421 3422 // find a slot in the virtual allocation addr range 3423 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3424 // check to see if the space between this one and the last is big enough 3425 addr_t rangeStart = args->virtual_allocated_range[i].start; 3426 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3427 + args->virtual_allocated_range[i - 1].size; 3428 3429 addr_t base = blockAlign 3430 ? ROUNDUP(previousRangeEnd, size) : previousRangeEnd; 3431 3432 if (base >= KERNEL_BASE && base < rangeStart 3433 && rangeStart - base >= size) { 3434 args->virtual_allocated_range[i - 1].size 3435 += base + size - previousRangeEnd; 3436 return base; 3437 } 3438 } 3439 3440 // we hadn't found one between allocation ranges. this is ok. 3441 // see if there's a gap after the last one 3442 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3443 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3444 + args->virtual_allocated_range[lastEntryIndex].size; 3445 addr_t base = blockAlign ? ROUNDUP(lastRangeEnd, size) : lastRangeEnd; 3446 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3447 args->virtual_allocated_range[lastEntryIndex].size 3448 += base + size - lastRangeEnd; 3449 return base; 3450 } 3451 3452 // see if there's a gap before the first one 3453 addr_t rangeStart = args->virtual_allocated_range[0].start; 3454 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3455 base = rangeStart - size; 3456 if (blockAlign) 3457 base = ROUNDDOWN(base, size); 3458 3459 if (base >= KERNEL_BASE) { 3460 args->virtual_allocated_range[0].start = base; 3461 args->virtual_allocated_range[0].size += rangeStart - base; 3462 return base; 3463 } 3464 } 3465 3466 return 0; 3467 } 3468 3469 3470 static bool 3471 is_page_in_physical_memory_range(kernel_args* args, addr_t address) 3472 { 3473 // TODO: horrible brute-force method of determining if the page can be 3474 // allocated 3475 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3476 if (address >= args->physical_memory_range[i].start 3477 && address < args->physical_memory_range[i].start 3478 + args->physical_memory_range[i].size) 3479 return true; 3480 } 3481 return false; 3482 } 3483 3484 3485 static addr_t 3486 allocate_early_physical_page(kernel_args* args) 3487 { 3488 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3489 addr_t nextPage; 3490 3491 nextPage = args->physical_allocated_range[i].start 3492 + args->physical_allocated_range[i].size; 3493 // see if the page after the next allocated paddr run can be allocated 3494 if (i + 1 < args->num_physical_allocated_ranges 3495 && args->physical_allocated_range[i + 1].size != 0) { 3496 // see if the next page will collide with the next allocated range 3497 if (nextPage >= args->physical_allocated_range[i+1].start) 3498 continue; 3499 } 3500 // see if the next physical page fits in the memory block 3501 if (is_page_in_physical_memory_range(args, nextPage)) { 3502 // we got one! 3503 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3504 return nextPage / B_PAGE_SIZE; 3505 } 3506 } 3507 3508 return 0; 3509 // could not allocate a block 3510 } 3511 3512 3513 /*! This one uses the kernel_args' physical and virtual memory ranges to 3514 allocate some pages before the VM is completely up. 3515 */ 3516 addr_t 3517 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3518 uint32 attributes, bool blockAlign) 3519 { 3520 if (physicalSize > virtualSize) 3521 physicalSize = virtualSize; 3522 3523 // find the vaddr to allocate at 3524 addr_t virtualBase = allocate_early_virtual(args, virtualSize, blockAlign); 3525 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 3526 3527 // map the pages 3528 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3529 addr_t physicalAddress = allocate_early_physical_page(args); 3530 if (physicalAddress == 0) 3531 panic("error allocating early page!\n"); 3532 3533 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3534 3535 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3536 physicalAddress * B_PAGE_SIZE, attributes, 3537 &allocate_early_physical_page); 3538 } 3539 3540 return virtualBase; 3541 } 3542 3543 3544 /*! The main entrance point to initialize the VM. */ 3545 status_t 3546 vm_init(kernel_args* args) 3547 { 3548 struct preloaded_image* image; 3549 void* address; 3550 status_t err = 0; 3551 uint32 i; 3552 3553 TRACE(("vm_init: entry\n")); 3554 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3555 err = arch_vm_init(args); 3556 3557 // initialize some globals 3558 vm_page_init_num_pages(args); 3559 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3560 3561 size_t heapSize = INITIAL_HEAP_SIZE; 3562 // try to accomodate low memory systems 3563 while (heapSize > sAvailableMemory / 8) 3564 heapSize /= 2; 3565 if (heapSize < 1024 * 1024) 3566 panic("vm_init: go buy some RAM please."); 3567 3568 slab_init(args); 3569 3570 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC 3571 // map in the new heap and initialize it 3572 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3573 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false); 3574 TRACE(("heap at 0x%lx\n", heapBase)); 3575 heap_init(heapBase, heapSize); 3576 #endif 3577 3578 // initialize the free page list and physical page mapper 3579 vm_page_init(args); 3580 3581 // initialize the hash table that stores the pages mapped to caches 3582 vm_cache_init(args); 3583 3584 { 3585 status_t error = VMAreaHash::Init(); 3586 if (error != B_OK) 3587 panic("vm_init: error initializing area hash table\n"); 3588 } 3589 3590 VMAddressSpace::Init(); 3591 reserve_boot_loader_ranges(args); 3592 3593 // Do any further initialization that the architecture dependant layers may 3594 // need now 3595 arch_vm_translation_map_init_post_area(args); 3596 arch_vm_init_post_area(args); 3597 vm_page_init_post_area(args); 3598 slab_init_post_area(); 3599 3600 // allocate areas to represent stuff that already exists 3601 3602 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC 3603 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3604 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3605 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3606 #endif 3607 3608 allocate_kernel_args(args); 3609 3610 create_preloaded_image_areas(&args->kernel_image); 3611 3612 // allocate areas for preloaded images 3613 for (image = args->preloaded_images; image != NULL; image = image->next) 3614 create_preloaded_image_areas(image); 3615 3616 // allocate kernel stacks 3617 for (i = 0; i < args->num_cpus; i++) { 3618 char name[64]; 3619 3620 sprintf(name, "idle thread %lu kstack", i + 1); 3621 address = (void*)args->cpu_kstack[i].start; 3622 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3623 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3624 } 3625 3626 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3627 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3628 3629 // create the object cache for the page mappings 3630 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3631 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3632 NULL, NULL); 3633 if (gPageMappingsObjectCache == NULL) 3634 panic("failed to create page mappings object cache"); 3635 3636 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3637 3638 #if DEBUG_CACHE_LIST 3639 create_area("cache info table", (void**)&sCacheInfoTable, 3640 B_ANY_KERNEL_ADDRESS, 3641 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3642 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3643 #endif // DEBUG_CACHE_LIST 3644 3645 // add some debugger commands 3646 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3647 add_debugger_command("area", &dump_area, 3648 "Dump info about a particular area"); 3649 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3650 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3651 #if DEBUG_CACHE_LIST 3652 add_debugger_command_etc("caches", &dump_caches, 3653 "List all VMCache trees", 3654 "[ \"-c\" ]\n" 3655 "All cache trees are listed sorted in decreasing order by number of\n" 3656 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 3657 0); 3658 #endif 3659 add_debugger_command("avail", &dump_available_memory, 3660 "Dump available memory"); 3661 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3662 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3663 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3664 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3665 add_debugger_command("string", &display_mem, "dump strings"); 3666 3667 TRACE(("vm_init: exit\n")); 3668 3669 vm_cache_init_post_heap(); 3670 3671 return err; 3672 } 3673 3674 3675 status_t 3676 vm_init_post_sem(kernel_args* args) 3677 { 3678 // This frees all unused boot loader resources and makes its space available 3679 // again 3680 arch_vm_init_end(args); 3681 unreserve_boot_loader_ranges(args); 3682 3683 // fill in all of the semaphores that were not allocated before 3684 // since we're still single threaded and only the kernel address space 3685 // exists, it isn't that hard to find all of the ones we need to create 3686 3687 arch_vm_translation_map_init_post_sem(args); 3688 VMAddressSpace::InitPostSem(); 3689 3690 slab_init_post_sem(); 3691 3692 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC 3693 heap_init_post_sem(); 3694 #endif 3695 3696 return B_OK; 3697 } 3698 3699 3700 status_t 3701 vm_init_post_thread(kernel_args* args) 3702 { 3703 vm_page_init_post_thread(args); 3704 slab_init_post_thread(); 3705 return heap_init_post_thread(); 3706 } 3707 3708 3709 status_t 3710 vm_init_post_modules(kernel_args* args) 3711 { 3712 return arch_vm_init_post_modules(args); 3713 } 3714 3715 3716 void 3717 permit_page_faults(void) 3718 { 3719 struct thread* thread = thread_get_current_thread(); 3720 if (thread != NULL) 3721 atomic_add(&thread->page_faults_allowed, 1); 3722 } 3723 3724 3725 void 3726 forbid_page_faults(void) 3727 { 3728 struct thread* thread = thread_get_current_thread(); 3729 if (thread != NULL) 3730 atomic_add(&thread->page_faults_allowed, -1); 3731 } 3732 3733 3734 status_t 3735 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3736 addr_t* newIP) 3737 { 3738 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3739 faultAddress)); 3740 3741 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 3742 3743 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 3744 VMAddressSpace* addressSpace = NULL; 3745 3746 status_t status = B_OK; 3747 *newIP = 0; 3748 atomic_add((int32*)&sPageFaults, 1); 3749 3750 if (IS_KERNEL_ADDRESS(pageAddress)) { 3751 addressSpace = VMAddressSpace::GetKernel(); 3752 } else if (IS_USER_ADDRESS(pageAddress)) { 3753 addressSpace = VMAddressSpace::GetCurrent(); 3754 if (addressSpace == NULL) { 3755 if (!isUser) { 3756 dprintf("vm_page_fault: kernel thread accessing invalid user " 3757 "memory!\n"); 3758 status = B_BAD_ADDRESS; 3759 TPF(PageFaultError(-1, 3760 VMPageFaultTracing 3761 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 3762 } else { 3763 // XXX weird state. 3764 panic("vm_page_fault: non kernel thread accessing user memory " 3765 "that doesn't exist!\n"); 3766 status = B_BAD_ADDRESS; 3767 } 3768 } 3769 } else { 3770 // the hit was probably in the 64k DMZ between kernel and user space 3771 // this keeps a user space thread from passing a buffer that crosses 3772 // into kernel space 3773 status = B_BAD_ADDRESS; 3774 TPF(PageFaultError(-1, 3775 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 3776 } 3777 3778 if (status == B_OK) { 3779 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser, 3780 NULL); 3781 } 3782 3783 if (status < B_OK) { 3784 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 3785 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 3786 strerror(status), address, faultAddress, isWrite, isUser, 3787 thread_get_current_thread_id()); 3788 if (!isUser) { 3789 struct thread* thread = thread_get_current_thread(); 3790 if (thread != NULL && thread->fault_handler != 0) { 3791 // this will cause the arch dependant page fault handler to 3792 // modify the IP on the interrupt frame or whatever to return 3793 // to this address 3794 *newIP = thread->fault_handler; 3795 } else { 3796 // unhandled page fault in the kernel 3797 panic("vm_page_fault: unhandled page fault in kernel space at " 3798 "0x%lx, ip 0x%lx\n", address, faultAddress); 3799 } 3800 } else { 3801 #if 1 3802 addressSpace->ReadLock(); 3803 3804 // TODO: remove me once we have proper userland debugging support 3805 // (and tools) 3806 VMArea* area = addressSpace->LookupArea(faultAddress); 3807 3808 struct thread* thread = thread_get_current_thread(); 3809 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 3810 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 3811 thread->name, thread->id, thread->team->name, thread->team->id, 3812 isWrite ? "write" : "read", address, faultAddress, 3813 area ? area->name : "???", 3814 faultAddress - (area ? area->Base() : 0x0)); 3815 3816 // We can print a stack trace of the userland thread here. 3817 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 3818 // fault and someone is already waiting for a write lock on the same address 3819 // space. This thread will then try to acquire the lock again and will 3820 // be queued after the writer. 3821 # if 0 3822 if (area) { 3823 struct stack_frame { 3824 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 3825 struct stack_frame* previous; 3826 void* return_address; 3827 #else 3828 // ... 3829 #warning writeme 3830 #endif 3831 } frame; 3832 # ifdef __INTEL__ 3833 struct iframe* iframe = i386_get_user_iframe(); 3834 if (iframe == NULL) 3835 panic("iframe is NULL!"); 3836 3837 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 3838 sizeof(struct stack_frame)); 3839 # elif defined(__POWERPC__) 3840 struct iframe* iframe = ppc_get_user_iframe(); 3841 if (iframe == NULL) 3842 panic("iframe is NULL!"); 3843 3844 status_t status = user_memcpy(&frame, (void*)iframe->r1, 3845 sizeof(struct stack_frame)); 3846 # else 3847 # warning "vm_page_fault() stack trace won't work" 3848 status = B_ERROR; 3849 # endif 3850 3851 dprintf("stack trace:\n"); 3852 int32 maxFrames = 50; 3853 while (status == B_OK && --maxFrames >= 0 3854 && frame.return_address != NULL) { 3855 dprintf(" %p", frame.return_address); 3856 area = addressSpace->LookupArea( 3857 (addr_t)frame.return_address); 3858 if (area) { 3859 dprintf(" (%s + %#lx)", area->name, 3860 (addr_t)frame.return_address - area->Base()); 3861 } 3862 dprintf("\n"); 3863 3864 status = user_memcpy(&frame, frame.previous, 3865 sizeof(struct stack_frame)); 3866 } 3867 } 3868 # endif // 0 (stack trace) 3869 3870 addressSpace->ReadUnlock(); 3871 #endif 3872 3873 // TODO: the fault_callback is a temporary solution for vm86 3874 if (thread->fault_callback == NULL 3875 || thread->fault_callback(address, faultAddress, isWrite)) { 3876 // If the thread has a signal handler for SIGSEGV, we simply 3877 // send it the signal. Otherwise we notify the user debugger 3878 // first. 3879 struct sigaction action; 3880 if (sigaction(SIGSEGV, NULL, &action) == 0 3881 && action.sa_handler != SIG_DFL 3882 && action.sa_handler != SIG_IGN) { 3883 send_signal(thread->id, SIGSEGV); 3884 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 3885 SIGSEGV)) { 3886 send_signal(thread->id, SIGSEGV); 3887 } 3888 } 3889 } 3890 } 3891 3892 if (addressSpace != NULL) 3893 addressSpace->Put(); 3894 3895 return B_HANDLED_INTERRUPT; 3896 } 3897 3898 3899 struct PageFaultContext { 3900 AddressSpaceReadLocker addressSpaceLocker; 3901 VMCacheChainLocker cacheChainLocker; 3902 3903 VMTranslationMap* map; 3904 VMCache* topCache; 3905 off_t cacheOffset; 3906 vm_page_reservation reservation; 3907 bool isWrite; 3908 3909 // return values 3910 vm_page* page; 3911 bool restart; 3912 3913 3914 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 3915 : 3916 addressSpaceLocker(addressSpace, true), 3917 map(addressSpace->TranslationMap()), 3918 isWrite(isWrite) 3919 { 3920 } 3921 3922 ~PageFaultContext() 3923 { 3924 UnlockAll(); 3925 vm_page_unreserve_pages(&reservation); 3926 } 3927 3928 void Prepare(VMCache* topCache, off_t cacheOffset) 3929 { 3930 this->topCache = topCache; 3931 this->cacheOffset = cacheOffset; 3932 page = NULL; 3933 restart = false; 3934 3935 cacheChainLocker.SetTo(topCache); 3936 } 3937 3938 void UnlockAll(VMCache* exceptCache = NULL) 3939 { 3940 topCache = NULL; 3941 addressSpaceLocker.Unlock(); 3942 cacheChainLocker.Unlock(exceptCache); 3943 } 3944 }; 3945 3946 3947 /*! Gets the page that should be mapped into the area. 3948 Returns an error code other than \c B_OK, if the page couldn't be found or 3949 paged in. The locking state of the address space and the caches is undefined 3950 in that case. 3951 Returns \c B_OK with \c context.restart set to \c true, if the functions 3952 had to unlock the address space and all caches and is supposed to be called 3953 again. 3954 Returns \c B_OK with \c context.restart set to \c false, if the page was 3955 found. It is returned in \c context.page. The address space will still be 3956 locked as well as all caches starting from the top cache to at least the 3957 cache the page lives in. 3958 */ 3959 static status_t 3960 fault_get_page(PageFaultContext& context) 3961 { 3962 VMCache* cache = context.topCache; 3963 VMCache* lastCache = NULL; 3964 vm_page* page = NULL; 3965 3966 while (cache != NULL) { 3967 // We already hold the lock of the cache at this point. 3968 3969 lastCache = cache; 3970 3971 for (;;) { 3972 page = cache->LookupPage(context.cacheOffset); 3973 if (page == NULL || !page->busy) { 3974 // Either there is no page or there is one and it is not busy. 3975 break; 3976 } 3977 3978 // page must be busy -- wait for it to become unbusy 3979 context.UnlockAll(cache); 3980 cache->ReleaseRefLocked(); 3981 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 3982 3983 // restart the whole process 3984 context.restart = true; 3985 return B_OK; 3986 } 3987 3988 if (page != NULL) 3989 break; 3990 3991 // The current cache does not contain the page we're looking for. 3992 3993 // see if the backing store has it 3994 if (cache->HasPage(context.cacheOffset)) { 3995 // insert a fresh page and mark it busy -- we're going to read it in 3996 page = vm_page_allocate_page(&context.reservation, 3997 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 3998 cache->InsertPage(page, context.cacheOffset); 3999 4000 // We need to unlock all caches and the address space while reading 4001 // the page in. Keep a reference to the cache around. 4002 cache->AcquireRefLocked(); 4003 context.UnlockAll(); 4004 4005 // read the page in 4006 iovec vec; 4007 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 4008 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 4009 4010 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4011 B_PHYSICAL_IO_REQUEST, &bytesRead); 4012 4013 cache->Lock(); 4014 4015 if (status < B_OK) { 4016 // on error remove and free the page 4017 dprintf("reading page from cache %p returned: %s!\n", 4018 cache, strerror(status)); 4019 4020 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4021 cache->RemovePage(page); 4022 vm_page_set_state(page, PAGE_STATE_FREE); 4023 4024 cache->ReleaseRefAndUnlock(); 4025 return status; 4026 } 4027 4028 // mark the page unbusy again 4029 cache->MarkPageUnbusy(page); 4030 4031 DEBUG_PAGE_ACCESS_END(page); 4032 4033 // Since we needed to unlock everything temporarily, the area 4034 // situation might have changed. So we need to restart the whole 4035 // process. 4036 cache->ReleaseRefAndUnlock(); 4037 context.restart = true; 4038 return B_OK; 4039 } 4040 4041 cache = context.cacheChainLocker.LockSourceCache(); 4042 } 4043 4044 if (page == NULL) { 4045 // There was no adequate page, determine the cache for a clean one. 4046 // Read-only pages come in the deepest cache, only the top most cache 4047 // may have direct write access. 4048 cache = context.isWrite ? context.topCache : lastCache; 4049 4050 // allocate a clean page 4051 page = vm_page_allocate_page(&context.reservation, 4052 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4053 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 4054 page->physical_page_number)); 4055 4056 // insert the new page into our cache 4057 cache->InsertPage(page, context.cacheOffset); 4058 } else if (page->Cache() != context.topCache && context.isWrite) { 4059 // We have a page that has the data we want, but in the wrong cache 4060 // object so we need to copy it and stick it into the top cache. 4061 vm_page* sourcePage = page; 4062 4063 // TODO: If memory is low, it might be a good idea to steal the page 4064 // from our source cache -- if possible, that is. 4065 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4066 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4067 4068 // To not needlessly kill concurrency we unlock all caches but the top 4069 // one while copying the page. Lacking another mechanism to ensure that 4070 // the source page doesn't disappear, we mark it busy. 4071 sourcePage->busy = true; 4072 context.cacheChainLocker.UnlockKeepRefs(true); 4073 4074 // copy the page 4075 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4076 sourcePage->physical_page_number * B_PAGE_SIZE); 4077 4078 context.cacheChainLocker.RelockCaches(true); 4079 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4080 4081 // insert the new page into our cache 4082 context.topCache->InsertPage(page, context.cacheOffset); 4083 } else 4084 DEBUG_PAGE_ACCESS_START(page); 4085 4086 context.page = page; 4087 return B_OK; 4088 } 4089 4090 4091 /*! Makes sure the address in the given address space is mapped. 4092 4093 \param addressSpace The address space. 4094 \param originalAddress The address. Doesn't need to be page aligned. 4095 \param isWrite If \c true the address shall be write-accessible. 4096 \param isUser If \c true the access is requested by a userland team. 4097 \param wirePage On success, if non \c NULL, the wired count of the page 4098 mapped at the given address is incremented and the page is returned 4099 via this parameter. 4100 \param wiredRange If given, this wiredRange is ignored when checking whether 4101 an already mapped page at the virtual address can be unmapped. 4102 \return \c B_OK on success, another error code otherwise. 4103 */ 4104 static status_t 4105 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4106 bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange) 4107 { 4108 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 4109 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 4110 4111 PageFaultContext context(addressSpace, isWrite); 4112 4113 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4114 status_t status = B_OK; 4115 4116 addressSpace->IncrementFaultCount(); 4117 4118 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4119 // the pages upfront makes sure we don't have any cache locked, so that the 4120 // page daemon/thief can do their job without problems. 4121 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4122 originalAddress); 4123 context.addressSpaceLocker.Unlock(); 4124 vm_page_reserve_pages(&context.reservation, reservePages, 4125 addressSpace == VMAddressSpace::Kernel() 4126 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4127 4128 while (true) { 4129 context.addressSpaceLocker.Lock(); 4130 4131 // get the area the fault was in 4132 VMArea* area = addressSpace->LookupArea(address); 4133 if (area == NULL) { 4134 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4135 "space\n", originalAddress); 4136 TPF(PageFaultError(-1, 4137 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4138 status = B_BAD_ADDRESS; 4139 break; 4140 } 4141 4142 // check permissions 4143 uint32 protection = get_area_page_protection(area, address); 4144 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4145 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 4146 (void*)originalAddress); 4147 TPF(PageFaultError(area->id, 4148 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4149 status = B_PERMISSION_DENIED; 4150 break; 4151 } 4152 if (isWrite && (protection 4153 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4154 dprintf("write access attempted on write-protected area 0x%lx at" 4155 " %p\n", area->id, (void*)originalAddress); 4156 TPF(PageFaultError(area->id, 4157 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4158 status = B_PERMISSION_DENIED; 4159 break; 4160 } else if (!isWrite && (protection 4161 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4162 dprintf("read access attempted on read-protected area 0x%lx at" 4163 " %p\n", area->id, (void*)originalAddress); 4164 TPF(PageFaultError(area->id, 4165 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4166 status = B_PERMISSION_DENIED; 4167 break; 4168 } 4169 4170 // We have the area, it was a valid access, so let's try to resolve the 4171 // page fault now. 4172 // At first, the top most cache from the area is investigated. 4173 4174 context.Prepare(vm_area_get_locked_cache(area), 4175 address - area->Base() + area->cache_offset); 4176 4177 // See if this cache has a fault handler -- this will do all the work 4178 // for us. 4179 { 4180 // Note, since the page fault is resolved with interrupts enabled, 4181 // the fault handler could be called more than once for the same 4182 // reason -- the store must take this into account. 4183 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4184 if (status != B_BAD_HANDLER) 4185 break; 4186 } 4187 4188 // The top most cache has no fault handler, so let's see if the cache or 4189 // its sources already have the page we're searching for (we're going 4190 // from top to bottom). 4191 status = fault_get_page(context); 4192 if (status != B_OK) { 4193 TPF(PageFaultError(area->id, status)); 4194 break; 4195 } 4196 4197 if (context.restart) 4198 continue; 4199 4200 // All went fine, all there is left to do is to map the page into the 4201 // address space. 4202 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4203 context.page)); 4204 4205 // If the page doesn't reside in the area's cache, we need to make sure 4206 // it's mapped in read-only, so that we cannot overwrite someone else's 4207 // data (copy-on-write) 4208 uint32 newProtection = protection; 4209 if (context.page->Cache() != context.topCache && !isWrite) 4210 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4211 4212 bool unmapPage = false; 4213 bool mapPage = true; 4214 4215 // check whether there's already a page mapped at the address 4216 context.map->Lock(); 4217 4218 addr_t physicalAddress; 4219 uint32 flags; 4220 vm_page* mappedPage = NULL; 4221 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4222 && (flags & PAGE_PRESENT) != 0 4223 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4224 != NULL) { 4225 // Yep there's already a page. If it's ours, we can simply adjust 4226 // its protection. Otherwise we have to unmap it. 4227 if (mappedPage == context.page) { 4228 context.map->ProtectPage(area, address, newProtection); 4229 // Note: We assume that ProtectPage() is atomic (i.e. 4230 // the page isn't temporarily unmapped), otherwise we'd have 4231 // to make sure it isn't wired. 4232 mapPage = false; 4233 } else 4234 unmapPage = true; 4235 } 4236 4237 context.map->Unlock(); 4238 4239 if (unmapPage) { 4240 // If the page is wired, we can't unmap it. Wait until it is unwired 4241 // again and restart. 4242 VMAreaUnwiredWaiter waiter; 4243 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4244 wiredRange)) { 4245 // unlock everything and wait 4246 context.UnlockAll(); 4247 waiter.waitEntry.Wait(); 4248 continue; 4249 } 4250 4251 // Note: The mapped page is a page of a lower cache. We are 4252 // guaranteed to have that cached locked, our new page is a copy of 4253 // that page, and the page is not busy. The logic for that guarantee 4254 // is as follows: Since the page is mapped, it must live in the top 4255 // cache (ruled out above) or any of its lower caches, and there is 4256 // (was before the new page was inserted) no other page in any 4257 // cache between the top cache and the page's cache (otherwise that 4258 // would be mapped instead). That in turn means that our algorithm 4259 // must have found it and therefore it cannot be busy either. 4260 DEBUG_PAGE_ACCESS_START(mappedPage); 4261 unmap_page(area, address); 4262 DEBUG_PAGE_ACCESS_END(mappedPage); 4263 } 4264 4265 if (mapPage) { 4266 if (map_page(area, context.page, address, newProtection, 4267 &context.reservation) != B_OK) { 4268 // Mapping can only fail, when the page mapping object couldn't 4269 // be allocated. Save for the missing mapping everything is 4270 // fine, though. If this was a regular page fault, we'll simply 4271 // leave and probably fault again. To make sure we'll have more 4272 // luck then, we ensure that the minimum object reserve is 4273 // available. 4274 DEBUG_PAGE_ACCESS_END(context.page); 4275 4276 context.UnlockAll(); 4277 4278 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4279 != B_OK) { 4280 // Apparently the situation is serious. Let's get ourselves 4281 // killed. 4282 status = B_NO_MEMORY; 4283 } else if (wirePage != NULL) { 4284 // The caller expects us to wire the page. Since 4285 // object_cache_reserve() succeeded, we should now be able 4286 // to allocate a mapping structure. Restart. 4287 continue; 4288 } 4289 4290 break; 4291 } 4292 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4293 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4294 4295 // also wire the page, if requested 4296 if (wirePage != NULL && status == B_OK) { 4297 increment_page_wired_count(context.page); 4298 *wirePage = context.page; 4299 } 4300 4301 DEBUG_PAGE_ACCESS_END(context.page); 4302 4303 break; 4304 } 4305 4306 return status; 4307 } 4308 4309 4310 status_t 4311 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 4312 { 4313 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4314 } 4315 4316 status_t 4317 vm_put_physical_page(addr_t vaddr, void* handle) 4318 { 4319 return sPhysicalPageMapper->PutPage(vaddr, handle); 4320 } 4321 4322 4323 status_t 4324 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 4325 { 4326 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4327 } 4328 4329 status_t 4330 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4331 { 4332 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4333 } 4334 4335 4336 status_t 4337 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 4338 { 4339 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4340 } 4341 4342 status_t 4343 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4344 { 4345 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4346 } 4347 4348 4349 void 4350 vm_get_info(system_memory_info* info) 4351 { 4352 swap_get_info(info); 4353 4354 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4355 info->page_faults = sPageFaults; 4356 4357 MutexLocker locker(sAvailableMemoryLock); 4358 info->free_memory = sAvailableMemory; 4359 info->needed_memory = sNeededMemory; 4360 } 4361 4362 4363 uint32 4364 vm_num_page_faults(void) 4365 { 4366 return sPageFaults; 4367 } 4368 4369 4370 off_t 4371 vm_available_memory(void) 4372 { 4373 MutexLocker locker(sAvailableMemoryLock); 4374 return sAvailableMemory; 4375 } 4376 4377 4378 off_t 4379 vm_available_not_needed_memory(void) 4380 { 4381 MutexLocker locker(sAvailableMemoryLock); 4382 return sAvailableMemory - sNeededMemory; 4383 } 4384 4385 4386 size_t 4387 vm_kernel_address_space_left(void) 4388 { 4389 return VMAddressSpace::Kernel()->FreeSpace(); 4390 } 4391 4392 4393 void 4394 vm_unreserve_memory(size_t amount) 4395 { 4396 mutex_lock(&sAvailableMemoryLock); 4397 4398 sAvailableMemory += amount; 4399 4400 mutex_unlock(&sAvailableMemoryLock); 4401 } 4402 4403 4404 status_t 4405 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4406 { 4407 size_t reserve = kMemoryReserveForPriority[priority]; 4408 4409 MutexLocker locker(sAvailableMemoryLock); 4410 4411 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4412 4413 if (sAvailableMemory >= amount + reserve) { 4414 sAvailableMemory -= amount; 4415 return B_OK; 4416 } 4417 4418 if (timeout <= 0) 4419 return B_NO_MEMORY; 4420 4421 // turn timeout into an absolute timeout 4422 timeout += system_time(); 4423 4424 // loop until we've got the memory or the timeout occurs 4425 do { 4426 sNeededMemory += amount; 4427 4428 // call the low resource manager 4429 locker.Unlock(); 4430 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4431 B_ABSOLUTE_TIMEOUT, timeout); 4432 locker.Lock(); 4433 4434 sNeededMemory -= amount; 4435 4436 if (sAvailableMemory >= amount + reserve) { 4437 sAvailableMemory -= amount; 4438 return B_OK; 4439 } 4440 } while (timeout > system_time()); 4441 4442 return B_NO_MEMORY; 4443 } 4444 4445 4446 status_t 4447 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 4448 { 4449 AddressSpaceReadLocker locker; 4450 VMArea* area; 4451 status_t status = locker.SetFromArea(id, area); 4452 if (status != B_OK) 4453 return status; 4454 4455 return arch_vm_set_memory_type(area, physicalBase, type); 4456 } 4457 4458 4459 /*! This function enforces some protection properties: 4460 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 4461 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4462 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4463 and B_KERNEL_WRITE_AREA. 4464 */ 4465 static void 4466 fix_protection(uint32* protection) 4467 { 4468 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4469 if ((*protection & B_USER_PROTECTION) == 0 4470 || (*protection & B_WRITE_AREA) != 0) 4471 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4472 else 4473 *protection |= B_KERNEL_READ_AREA; 4474 } 4475 } 4476 4477 4478 static void 4479 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4480 { 4481 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4482 info->area = area->id; 4483 info->address = (void*)area->Base(); 4484 info->size = area->Size(); 4485 info->protection = area->protection; 4486 info->lock = B_FULL_LOCK; 4487 info->team = area->address_space->ID(); 4488 info->copy_count = 0; 4489 info->in_count = 0; 4490 info->out_count = 0; 4491 // TODO: retrieve real values here! 4492 4493 VMCache* cache = vm_area_get_locked_cache(area); 4494 4495 // Note, this is a simplification; the cache could be larger than this area 4496 info->ram_size = cache->page_count * B_PAGE_SIZE; 4497 4498 vm_area_put_locked_cache(cache); 4499 } 4500 4501 4502 static status_t 4503 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4504 { 4505 // is newSize a multiple of B_PAGE_SIZE? 4506 if (newSize & (B_PAGE_SIZE - 1)) 4507 return B_BAD_VALUE; 4508 4509 // lock all affected address spaces and the cache 4510 VMArea* area; 4511 VMCache* cache; 4512 4513 MultiAddressSpaceLocker locker; 4514 AreaCacheLocker cacheLocker; 4515 4516 status_t status; 4517 size_t oldSize; 4518 bool anyKernelArea; 4519 bool restart; 4520 4521 do { 4522 anyKernelArea = false; 4523 restart = false; 4524 4525 locker.Unset(); 4526 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4527 if (status != B_OK) 4528 return status; 4529 cacheLocker.SetTo(cache, true); // already locked 4530 4531 // enforce restrictions 4532 if (!kernel) { 4533 if ((area->protection & B_KERNEL_AREA) != 0) 4534 return B_NOT_ALLOWED; 4535 // TODO: Enforce all restrictions (team, etc.)! 4536 } 4537 4538 oldSize = area->Size(); 4539 if (newSize == oldSize) 4540 return B_OK; 4541 4542 if (cache->type != CACHE_TYPE_RAM) 4543 return B_NOT_ALLOWED; 4544 4545 if (oldSize < newSize) { 4546 // We need to check if all areas of this cache can be resized. 4547 for (VMArea* current = cache->areas; current != NULL; 4548 current = current->cache_next) { 4549 if (!current->address_space->CanResizeArea(current, newSize)) 4550 return B_ERROR; 4551 anyKernelArea 4552 |= current->address_space == VMAddressSpace::Kernel(); 4553 } 4554 } else { 4555 // We're shrinking the areas, so we must make sure the affected 4556 // ranges are not wired. 4557 for (VMArea* current = cache->areas; current != NULL; 4558 current = current->cache_next) { 4559 anyKernelArea 4560 |= current->address_space == VMAddressSpace::Kernel(); 4561 4562 if (wait_if_area_range_is_wired(current, 4563 current->Base() + newSize, oldSize - newSize, &locker, 4564 &cacheLocker)) { 4565 restart = true; 4566 break; 4567 } 4568 } 4569 } 4570 } while (restart); 4571 4572 // Okay, looks good so far, so let's do it 4573 4574 int priority = kernel && anyKernelArea 4575 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4576 uint32 allocationFlags = kernel && anyKernelArea 4577 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4578 4579 if (oldSize < newSize) { 4580 // Growing the cache can fail, so we do it first. 4581 status = cache->Resize(cache->virtual_base + newSize, priority); 4582 if (status != B_OK) 4583 return status; 4584 } 4585 4586 for (VMArea* current = cache->areas; current != NULL; 4587 current = current->cache_next) { 4588 status = current->address_space->ResizeArea(current, newSize, 4589 allocationFlags); 4590 if (status != B_OK) 4591 break; 4592 4593 // We also need to unmap all pages beyond the new size, if the area has 4594 // shrunk 4595 if (newSize < oldSize) { 4596 VMCacheChainLocker cacheChainLocker(cache); 4597 cacheChainLocker.LockAllSourceCaches(); 4598 4599 unmap_pages(current, current->Base() + newSize, 4600 oldSize - newSize); 4601 4602 cacheChainLocker.Unlock(cache); 4603 } 4604 } 4605 4606 // shrinking the cache can't fail, so we do it now 4607 if (status == B_OK && newSize < oldSize) 4608 status = cache->Resize(cache->virtual_base + newSize, priority); 4609 4610 if (status != B_OK) { 4611 // Something failed -- resize the areas back to their original size. 4612 // This can fail, too, in which case we're seriously screwed. 4613 for (VMArea* current = cache->areas; current != NULL; 4614 current = current->cache_next) { 4615 if (current->address_space->ResizeArea(current, oldSize, 4616 allocationFlags) != B_OK) { 4617 panic("vm_resize_area(): Failed and not being able to restore " 4618 "original state."); 4619 } 4620 } 4621 4622 cache->Resize(cache->virtual_base + oldSize, priority); 4623 } 4624 4625 // TODO: we must honour the lock restrictions of this area 4626 return status; 4627 } 4628 4629 4630 status_t 4631 vm_memset_physical(addr_t address, int value, size_t length) 4632 { 4633 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4634 } 4635 4636 4637 status_t 4638 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 4639 { 4640 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4641 } 4642 4643 4644 status_t 4645 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 4646 { 4647 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4648 } 4649 4650 4651 void 4652 vm_memcpy_physical_page(addr_t to, addr_t from) 4653 { 4654 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4655 } 4656 4657 4658 // #pragma mark - kernel public API 4659 4660 4661 status_t 4662 user_memcpy(void* to, const void* from, size_t size) 4663 { 4664 // don't allow address overflows 4665 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 4666 return B_BAD_ADDRESS; 4667 4668 if (arch_cpu_user_memcpy(to, from, size, 4669 &thread_get_current_thread()->fault_handler) < B_OK) 4670 return B_BAD_ADDRESS; 4671 4672 return B_OK; 4673 } 4674 4675 4676 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 4677 the string in \a to, NULL-terminating the result. 4678 4679 \param to Pointer to the destination C-string. 4680 \param from Pointer to the source C-string. 4681 \param size Size in bytes of the string buffer pointed to by \a to. 4682 4683 \return strlen(\a from). 4684 */ 4685 ssize_t 4686 user_strlcpy(char* to, const char* from, size_t size) 4687 { 4688 if (to == NULL && size != 0) 4689 return B_BAD_VALUE; 4690 if (from == NULL) 4691 return B_BAD_ADDRESS; 4692 4693 // limit size to avoid address overflows 4694 size_t maxSize = std::min(size, 4695 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 4696 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 4697 // the source address might still overflow. 4698 4699 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 4700 &thread_get_current_thread()->fault_handler); 4701 4702 // If we hit the address overflow boundary, fail. 4703 if (result >= 0 && (size_t)result >= maxSize && maxSize < size) 4704 return B_BAD_ADDRESS; 4705 4706 return result; 4707 } 4708 4709 4710 status_t 4711 user_memset(void* s, char c, size_t count) 4712 { 4713 // don't allow address overflows 4714 if ((addr_t)s + count < (addr_t)s) 4715 return B_BAD_ADDRESS; 4716 4717 if (arch_cpu_user_memset(s, c, count, 4718 &thread_get_current_thread()->fault_handler) < B_OK) 4719 return B_BAD_ADDRESS; 4720 4721 return B_OK; 4722 } 4723 4724 4725 /*! Wires down the given address range in the specified team's address space. 4726 4727 If successful the function 4728 - acquires a reference to the specified team's address space, 4729 - adds respective wired ranges to all areas that intersect with the given 4730 address range, 4731 - makes sure all pages in the given address range are mapped with the 4732 requested access permissions and increments their wired count. 4733 4734 It fails, when \a team doesn't specify a valid address space, when any part 4735 of the specified address range is not covered by areas, when the concerned 4736 areas don't allow mapping with the requested permissions, or when mapping 4737 failed for another reason. 4738 4739 When successful the call must be balanced by a unlock_memory_etc() call with 4740 the exact same parameters. 4741 4742 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 4743 supported. 4744 \param address The start of the address range to be wired. 4745 \param numBytes The size of the address range to be wired. 4746 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 4747 requests that the range must be wired writable ("read from device 4748 into memory"). 4749 \return \c B_OK on success, another error code otherwise. 4750 */ 4751 status_t 4752 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 4753 { 4754 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 4755 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 4756 4757 // compute the page protection that is required 4758 bool isUser = IS_USER_ADDRESS(address); 4759 bool writable = (flags & B_READ_DEVICE) == 0; 4760 uint32 requiredProtection = PAGE_PRESENT 4761 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 4762 if (writable) 4763 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 4764 4765 uint32 mallocFlags = isUser 4766 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 4767 4768 // get and read lock the address space 4769 VMAddressSpace* addressSpace = NULL; 4770 if (isUser) { 4771 if (team == B_CURRENT_TEAM) 4772 addressSpace = VMAddressSpace::GetCurrent(); 4773 else 4774 addressSpace = VMAddressSpace::Get(team); 4775 } else 4776 addressSpace = VMAddressSpace::GetKernel(); 4777 if (addressSpace == NULL) 4778 return B_ERROR; 4779 4780 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 4781 4782 VMTranslationMap* map = addressSpace->TranslationMap(); 4783 status_t error = B_OK; 4784 4785 // iterate through all concerned areas 4786 addr_t nextAddress = lockBaseAddress; 4787 while (nextAddress != lockEndAddress) { 4788 // get the next area 4789 VMArea* area = addressSpace->LookupArea(nextAddress); 4790 if (area == NULL) { 4791 error = B_BAD_ADDRESS; 4792 break; 4793 } 4794 4795 addr_t areaStart = nextAddress; 4796 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 4797 4798 // allocate the wired range (do that before locking the cache to avoid 4799 // deadlocks) 4800 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 4801 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 4802 if (range == NULL) { 4803 error = B_NO_MEMORY; 4804 break; 4805 } 4806 4807 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 4808 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 4809 4810 // mark the area range wired 4811 area->Wire(range); 4812 4813 // Depending on the area cache type and the wiring, we may not need to 4814 // look at the individual pages. 4815 if (area->cache_type == CACHE_TYPE_NULL 4816 || area->cache_type == CACHE_TYPE_DEVICE 4817 || area->wiring == B_FULL_LOCK 4818 || area->wiring == B_CONTIGUOUS) { 4819 nextAddress = areaEnd; 4820 continue; 4821 } 4822 4823 // Lock the area's cache chain and the translation map. Needed to look 4824 // up pages and play with their wired count. 4825 cacheChainLocker.LockAllSourceCaches(); 4826 map->Lock(); 4827 4828 // iterate through the pages and wire them 4829 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 4830 addr_t physicalAddress; 4831 uint32 flags; 4832 4833 vm_page* page; 4834 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 4835 && (flags & requiredProtection) == requiredProtection 4836 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4837 != NULL) { 4838 // Already mapped with the correct permissions -- just increment 4839 // the page's wired count. 4840 increment_page_wired_count(page); 4841 } else { 4842 // Let vm_soft_fault() map the page for us, if possible. We need 4843 // to fully unlock to avoid deadlocks. Since we have already 4844 // wired the area itself, nothing disturbing will happen with it 4845 // in the meantime. 4846 map->Unlock(); 4847 cacheChainLocker.Unlock(); 4848 addressSpaceLocker.Unlock(); 4849 4850 error = vm_soft_fault(addressSpace, nextAddress, writable, 4851 isUser, &page, range); 4852 4853 addressSpaceLocker.Lock(); 4854 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 4855 cacheChainLocker.LockAllSourceCaches(); 4856 map->Lock(); 4857 } 4858 4859 if (error != B_OK) 4860 break; 4861 } 4862 4863 map->Unlock(); 4864 4865 if (error == B_OK) { 4866 cacheChainLocker.Unlock(); 4867 } else { 4868 // An error occurred, so abort right here. If the current address 4869 // is the first in this area, unwire the area, since we won't get 4870 // to it when reverting what we've done so far. 4871 if (nextAddress == areaStart) { 4872 area->Unwire(range); 4873 cacheChainLocker.Unlock(); 4874 range->~VMAreaWiredRange(); 4875 free_etc(range, mallocFlags); 4876 } else 4877 cacheChainLocker.Unlock(); 4878 4879 break; 4880 } 4881 } 4882 4883 if (error != B_OK) { 4884 // An error occurred, so unwire all that we've already wired. Note that 4885 // even if not a single page was wired, unlock_memory_etc() is called 4886 // to put the address space reference. 4887 addressSpaceLocker.Unlock(); 4888 unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress, 4889 flags); 4890 } 4891 4892 return error; 4893 } 4894 4895 4896 status_t 4897 lock_memory(void* address, size_t numBytes, uint32 flags) 4898 { 4899 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 4900 } 4901 4902 4903 /*! Unwires an address range previously wired with lock_memory_etc(). 4904 4905 Note that a call to this function must balance a previous lock_memory_etc() 4906 call with exactly the same parameters. 4907 */ 4908 status_t 4909 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 4910 { 4911 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 4912 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 4913 4914 // compute the page protection that is required 4915 bool isUser = IS_USER_ADDRESS(address); 4916 bool writable = (flags & B_READ_DEVICE) == 0; 4917 uint32 requiredProtection = PAGE_PRESENT 4918 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 4919 if (writable) 4920 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 4921 4922 uint32 mallocFlags = isUser 4923 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 4924 4925 // get and read lock the address space 4926 VMAddressSpace* addressSpace = NULL; 4927 if (isUser) { 4928 if (team == B_CURRENT_TEAM) 4929 addressSpace = VMAddressSpace::GetCurrent(); 4930 else 4931 addressSpace = VMAddressSpace::Get(team); 4932 } else 4933 addressSpace = VMAddressSpace::GetKernel(); 4934 if (addressSpace == NULL) 4935 return B_ERROR; 4936 4937 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 4938 4939 VMTranslationMap* map = addressSpace->TranslationMap(); 4940 status_t error = B_OK; 4941 4942 // iterate through all concerned areas 4943 addr_t nextAddress = lockBaseAddress; 4944 while (nextAddress != lockEndAddress) { 4945 // get the next area 4946 VMArea* area = addressSpace->LookupArea(nextAddress); 4947 if (area == NULL) { 4948 error = B_BAD_ADDRESS; 4949 break; 4950 } 4951 4952 addr_t areaStart = nextAddress; 4953 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 4954 4955 // Lock the area's top cache. This is a requirement for 4956 // VMArea::Unwire(). 4957 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 4958 4959 // Depending on the area cache type and the wiring, we may not need to 4960 // look at the individual pages. 4961 if (area->cache_type == CACHE_TYPE_NULL 4962 || area->cache_type == CACHE_TYPE_DEVICE 4963 || area->wiring == B_FULL_LOCK 4964 || area->wiring == B_CONTIGUOUS) { 4965 // unwire the range (to avoid deadlocks we delete the range after 4966 // unlocking the cache) 4967 nextAddress = areaEnd; 4968 VMAreaWiredRange* range = area->Unwire(areaStart, 4969 areaEnd - areaStart, writable); 4970 cacheChainLocker.Unlock(); 4971 if (range != NULL) { 4972 range->~VMAreaWiredRange(); 4973 free_etc(range, mallocFlags); 4974 } 4975 continue; 4976 } 4977 4978 // Lock the area's cache chain and the translation map. Needed to look 4979 // up pages and play with their wired count. 4980 cacheChainLocker.LockAllSourceCaches(); 4981 map->Lock(); 4982 4983 // iterate through the pages and unwire them 4984 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 4985 addr_t physicalAddress; 4986 uint32 flags; 4987 4988 vm_page* page; 4989 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 4990 && (flags & PAGE_PRESENT) != 0 4991 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4992 != NULL) { 4993 // Already mapped with the correct permissions -- just increment 4994 // the page's wired count. 4995 decrement_page_wired_count(page); 4996 } else { 4997 panic("unlock_memory_etc(): Failed to unwire page: address " 4998 "space %p, address: %#" B_PRIxADDR, addressSpace, 4999 nextAddress); 5000 error = B_BAD_VALUE; 5001 break; 5002 } 5003 } 5004 5005 map->Unlock(); 5006 5007 // All pages are unwired. Remove the area's wired range as well (to 5008 // avoid deadlocks we delete the range after unlocking the cache). 5009 VMAreaWiredRange* range = area->Unwire(areaStart, 5010 areaEnd - areaStart, writable); 5011 5012 cacheChainLocker.Unlock(); 5013 5014 if (range != NULL) { 5015 range->~VMAreaWiredRange(); 5016 free_etc(range, mallocFlags); 5017 } 5018 5019 if (error != B_OK) 5020 break; 5021 } 5022 5023 // get rid of the address space reference 5024 addressSpace->Put(); 5025 5026 return error; 5027 } 5028 5029 5030 status_t 5031 unlock_memory(void* address, size_t numBytes, uint32 flags) 5032 { 5033 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5034 } 5035 5036 5037 /*! Similar to get_memory_map(), but also allows to specify the address space 5038 for the memory in question and has a saner semantics. 5039 Returns \c B_OK when the complete range could be translated or 5040 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5041 case the actual number of entries is written to \c *_numEntries. Any other 5042 error case indicates complete failure; \c *_numEntries will be set to \c 0 5043 in this case. 5044 */ 5045 status_t 5046 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5047 physical_entry* table, uint32* _numEntries) 5048 { 5049 uint32 numEntries = *_numEntries; 5050 *_numEntries = 0; 5051 5052 VMAddressSpace* addressSpace; 5053 addr_t virtualAddress = (addr_t)address; 5054 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5055 addr_t physicalAddress; 5056 status_t status = B_OK; 5057 int32 index = -1; 5058 addr_t offset = 0; 5059 bool interrupts = are_interrupts_enabled(); 5060 5061 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5062 address, numBytes, numEntries)); 5063 5064 if (numEntries == 0 || numBytes == 0) 5065 return B_BAD_VALUE; 5066 5067 // in which address space is the address to be found? 5068 if (IS_USER_ADDRESS(virtualAddress)) { 5069 if (team == B_CURRENT_TEAM) 5070 addressSpace = VMAddressSpace::GetCurrent(); 5071 else 5072 addressSpace = VMAddressSpace::Get(team); 5073 } else 5074 addressSpace = VMAddressSpace::GetKernel(); 5075 5076 if (addressSpace == NULL) 5077 return B_ERROR; 5078 5079 VMTranslationMap* map = addressSpace->TranslationMap(); 5080 5081 if (interrupts) 5082 map->Lock(); 5083 5084 while (offset < numBytes) { 5085 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5086 uint32 flags; 5087 5088 if (interrupts) { 5089 status = map->Query((addr_t)address + offset, &physicalAddress, 5090 &flags); 5091 } else { 5092 status = map->QueryInterrupt((addr_t)address + offset, 5093 &physicalAddress, &flags); 5094 } 5095 if (status < B_OK) 5096 break; 5097 if ((flags & PAGE_PRESENT) == 0) { 5098 panic("get_memory_map() called on unmapped memory!"); 5099 return B_BAD_ADDRESS; 5100 } 5101 5102 if (index < 0 && pageOffset > 0) { 5103 physicalAddress += pageOffset; 5104 if (bytes > B_PAGE_SIZE - pageOffset) 5105 bytes = B_PAGE_SIZE - pageOffset; 5106 } 5107 5108 // need to switch to the next physical_entry? 5109 if (index < 0 || (addr_t)table[index].address 5110 != physicalAddress - table[index].size) { 5111 if ((uint32)++index + 1 > numEntries) { 5112 // table to small 5113 status = B_BUFFER_OVERFLOW; 5114 break; 5115 } 5116 table[index].address = (void*)physicalAddress; 5117 table[index].size = bytes; 5118 } else { 5119 // page does fit in current entry 5120 table[index].size += bytes; 5121 } 5122 5123 offset += bytes; 5124 } 5125 5126 if (interrupts) 5127 map->Unlock(); 5128 5129 if (status != B_OK) 5130 return status; 5131 5132 if ((uint32)index + 1 > numEntries) { 5133 *_numEntries = index; 5134 return B_BUFFER_OVERFLOW; 5135 } 5136 5137 *_numEntries = index + 1; 5138 return B_OK; 5139 } 5140 5141 5142 /*! According to the BeBook, this function should always succeed. 5143 This is no longer the case. 5144 */ 5145 long 5146 get_memory_map(const void* address, ulong numBytes, physical_entry* table, 5147 long numEntries) 5148 { 5149 uint32 entriesRead = numEntries; 5150 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5151 table, &entriesRead); 5152 if (error != B_OK) 5153 return error; 5154 5155 // close the entry list 5156 5157 // if it's only one entry, we will silently accept the missing ending 5158 if (numEntries == 1) 5159 return B_OK; 5160 5161 if (entriesRead + 1 > (uint32)numEntries) 5162 return B_BUFFER_OVERFLOW; 5163 5164 table[entriesRead].address = NULL; 5165 table[entriesRead].size = 0; 5166 5167 return B_OK; 5168 } 5169 5170 5171 area_id 5172 area_for(void* address) 5173 { 5174 return vm_area_for((addr_t)address, true); 5175 } 5176 5177 5178 area_id 5179 find_area(const char* name) 5180 { 5181 return VMAreaHash::Find(name); 5182 } 5183 5184 5185 status_t 5186 _get_area_info(area_id id, area_info* info, size_t size) 5187 { 5188 if (size != sizeof(area_info) || info == NULL) 5189 return B_BAD_VALUE; 5190 5191 AddressSpaceReadLocker locker; 5192 VMArea* area; 5193 status_t status = locker.SetFromArea(id, area); 5194 if (status != B_OK) 5195 return status; 5196 5197 fill_area_info(area, info, size); 5198 return B_OK; 5199 } 5200 5201 5202 status_t 5203 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 5204 { 5205 addr_t nextBase = *(addr_t*)cookie; 5206 5207 // we're already through the list 5208 if (nextBase == (addr_t)-1) 5209 return B_ENTRY_NOT_FOUND; 5210 5211 if (team == B_CURRENT_TEAM) 5212 team = team_get_current_team_id(); 5213 5214 AddressSpaceReadLocker locker(team); 5215 if (!locker.IsLocked()) 5216 return B_BAD_TEAM_ID; 5217 5218 VMArea* area; 5219 for (VMAddressSpace::AreaIterator it 5220 = locker.AddressSpace()->GetAreaIterator(); 5221 (area = it.Next()) != NULL;) { 5222 if (area->Base() > nextBase) 5223 break; 5224 } 5225 5226 if (area == NULL) { 5227 nextBase = (addr_t)-1; 5228 return B_ENTRY_NOT_FOUND; 5229 } 5230 5231 fill_area_info(area, info, size); 5232 *cookie = (int32)(area->Base()); 5233 // TODO: Not 64 bit safe! 5234 5235 return B_OK; 5236 } 5237 5238 5239 status_t 5240 set_area_protection(area_id area, uint32 newProtection) 5241 { 5242 fix_protection(&newProtection); 5243 5244 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5245 newProtection, true); 5246 } 5247 5248 5249 status_t 5250 resize_area(area_id areaID, size_t newSize) 5251 { 5252 return vm_resize_area(areaID, newSize, true); 5253 } 5254 5255 5256 /*! Transfers the specified area to a new team. The caller must be the owner 5257 of the area. 5258 */ 5259 area_id 5260 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5261 bool kernel) 5262 { 5263 area_info info; 5264 status_t status = get_area_info(id, &info); 5265 if (status != B_OK) 5266 return status; 5267 5268 if (info.team != thread_get_current_thread()->team->id) 5269 return B_PERMISSION_DENIED; 5270 5271 area_id clonedArea = vm_clone_area(target, info.name, _address, 5272 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5273 if (clonedArea < 0) 5274 return clonedArea; 5275 5276 status = vm_delete_area(info.team, id, kernel); 5277 if (status != B_OK) { 5278 vm_delete_area(target, clonedArea, kernel); 5279 return status; 5280 } 5281 5282 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5283 5284 return clonedArea; 5285 } 5286 5287 5288 area_id 5289 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes, 5290 uint32 addressSpec, uint32 protection, void** _virtualAddress) 5291 { 5292 if (!arch_vm_supports_protection(protection)) 5293 return B_NOT_SUPPORTED; 5294 5295 fix_protection(&protection); 5296 5297 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5298 _virtualAddress, addressSpec, numBytes, protection, 5299 (addr_t)physicalAddress, false); 5300 } 5301 5302 5303 area_id 5304 clone_area(const char* name, void** _address, uint32 addressSpec, 5305 uint32 protection, area_id source) 5306 { 5307 if ((protection & B_KERNEL_PROTECTION) == 0) 5308 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5309 5310 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5311 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5312 } 5313 5314 5315 area_id 5316 create_area_etc(team_id team, const char* name, void** address, 5317 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 5318 addr_t physicalAddress, uint32 flags) 5319 { 5320 fix_protection(&protection); 5321 5322 return vm_create_anonymous_area(team, (char*)name, address, addressSpec, 5323 size, lock, protection, physicalAddress, flags, true); 5324 } 5325 5326 5327 area_id 5328 create_area(const char* name, void** _address, uint32 addressSpec, size_t size, 5329 uint32 lock, uint32 protection) 5330 { 5331 fix_protection(&protection); 5332 5333 return vm_create_anonymous_area(VMAddressSpace::KernelID(), (char*)name, 5334 _address, addressSpec, size, lock, protection, 0, 0, true); 5335 } 5336 5337 5338 status_t 5339 delete_area(area_id area) 5340 { 5341 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5342 } 5343 5344 5345 // #pragma mark - Userland syscalls 5346 5347 5348 status_t 5349 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5350 addr_t size) 5351 { 5352 // filter out some unavailable values (for userland) 5353 switch (addressSpec) { 5354 case B_ANY_KERNEL_ADDRESS: 5355 case B_ANY_KERNEL_BLOCK_ADDRESS: 5356 return B_BAD_VALUE; 5357 } 5358 5359 addr_t address; 5360 5361 if (!IS_USER_ADDRESS(userAddress) 5362 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5363 return B_BAD_ADDRESS; 5364 5365 status_t status = vm_reserve_address_range( 5366 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5367 RESERVED_AVOID_BASE); 5368 if (status != B_OK) 5369 return status; 5370 5371 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5372 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5373 (void*)address, size); 5374 return B_BAD_ADDRESS; 5375 } 5376 5377 return B_OK; 5378 } 5379 5380 5381 status_t 5382 _user_unreserve_address_range(addr_t address, addr_t size) 5383 { 5384 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5385 (void*)address, size); 5386 } 5387 5388 5389 area_id 5390 _user_area_for(void* address) 5391 { 5392 return vm_area_for((addr_t)address, false); 5393 } 5394 5395 5396 area_id 5397 _user_find_area(const char* userName) 5398 { 5399 char name[B_OS_NAME_LENGTH]; 5400 5401 if (!IS_USER_ADDRESS(userName) 5402 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5403 return B_BAD_ADDRESS; 5404 5405 return find_area(name); 5406 } 5407 5408 5409 status_t 5410 _user_get_area_info(area_id area, area_info* userInfo) 5411 { 5412 if (!IS_USER_ADDRESS(userInfo)) 5413 return B_BAD_ADDRESS; 5414 5415 area_info info; 5416 status_t status = get_area_info(area, &info); 5417 if (status < B_OK) 5418 return status; 5419 5420 // TODO: do we want to prevent userland from seeing kernel protections? 5421 //info.protection &= B_USER_PROTECTION; 5422 5423 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5424 return B_BAD_ADDRESS; 5425 5426 return status; 5427 } 5428 5429 5430 status_t 5431 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 5432 { 5433 int32 cookie; 5434 5435 if (!IS_USER_ADDRESS(userCookie) 5436 || !IS_USER_ADDRESS(userInfo) 5437 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 5438 return B_BAD_ADDRESS; 5439 5440 area_info info; 5441 status_t status = _get_next_area_info(team, &cookie, &info, 5442 sizeof(area_info)); 5443 if (status != B_OK) 5444 return status; 5445 5446 //info.protection &= B_USER_PROTECTION; 5447 5448 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 5449 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5450 return B_BAD_ADDRESS; 5451 5452 return status; 5453 } 5454 5455 5456 status_t 5457 _user_set_area_protection(area_id area, uint32 newProtection) 5458 { 5459 if ((newProtection & ~B_USER_PROTECTION) != 0) 5460 return B_BAD_VALUE; 5461 5462 fix_protection(&newProtection); 5463 5464 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 5465 newProtection, false); 5466 } 5467 5468 5469 status_t 5470 _user_resize_area(area_id area, size_t newSize) 5471 { 5472 // TODO: Since we restrict deleting of areas to those owned by the team, 5473 // we should also do that for resizing (check other functions, too). 5474 return vm_resize_area(area, newSize, false); 5475 } 5476 5477 5478 area_id 5479 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 5480 team_id target) 5481 { 5482 // filter out some unavailable values (for userland) 5483 switch (addressSpec) { 5484 case B_ANY_KERNEL_ADDRESS: 5485 case B_ANY_KERNEL_BLOCK_ADDRESS: 5486 return B_BAD_VALUE; 5487 } 5488 5489 void* address; 5490 if (!IS_USER_ADDRESS(userAddress) 5491 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5492 return B_BAD_ADDRESS; 5493 5494 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 5495 if (newArea < B_OK) 5496 return newArea; 5497 5498 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 5499 return B_BAD_ADDRESS; 5500 5501 return newArea; 5502 } 5503 5504 5505 area_id 5506 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 5507 uint32 protection, area_id sourceArea) 5508 { 5509 char name[B_OS_NAME_LENGTH]; 5510 void* address; 5511 5512 // filter out some unavailable values (for userland) 5513 switch (addressSpec) { 5514 case B_ANY_KERNEL_ADDRESS: 5515 case B_ANY_KERNEL_BLOCK_ADDRESS: 5516 return B_BAD_VALUE; 5517 } 5518 if ((protection & ~B_USER_PROTECTION) != 0) 5519 return B_BAD_VALUE; 5520 5521 if (!IS_USER_ADDRESS(userName) 5522 || !IS_USER_ADDRESS(userAddress) 5523 || user_strlcpy(name, userName, sizeof(name)) < B_OK 5524 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5525 return B_BAD_ADDRESS; 5526 5527 fix_protection(&protection); 5528 5529 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 5530 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 5531 false); 5532 if (clonedArea < B_OK) 5533 return clonedArea; 5534 5535 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 5536 delete_area(clonedArea); 5537 return B_BAD_ADDRESS; 5538 } 5539 5540 return clonedArea; 5541 } 5542 5543 5544 area_id 5545 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 5546 size_t size, uint32 lock, uint32 protection) 5547 { 5548 char name[B_OS_NAME_LENGTH]; 5549 void* address; 5550 5551 // filter out some unavailable values (for userland) 5552 switch (addressSpec) { 5553 case B_ANY_KERNEL_ADDRESS: 5554 case B_ANY_KERNEL_BLOCK_ADDRESS: 5555 return B_BAD_VALUE; 5556 } 5557 if ((protection & ~B_USER_PROTECTION) != 0) 5558 return B_BAD_VALUE; 5559 5560 if (!IS_USER_ADDRESS(userName) 5561 || !IS_USER_ADDRESS(userAddress) 5562 || user_strlcpy(name, userName, sizeof(name)) < B_OK 5563 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5564 return B_BAD_ADDRESS; 5565 5566 if (addressSpec == B_EXACT_ADDRESS 5567 && IS_KERNEL_ADDRESS(address)) 5568 return B_BAD_VALUE; 5569 5570 fix_protection(&protection); 5571 5572 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), 5573 (char*)name, &address, addressSpec, size, lock, protection, 0, 0, 5574 false); 5575 5576 if (area >= B_OK 5577 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 5578 delete_area(area); 5579 return B_BAD_ADDRESS; 5580 } 5581 5582 return area; 5583 } 5584 5585 5586 status_t 5587 _user_delete_area(area_id area) 5588 { 5589 // Unlike the BeOS implementation, you can now only delete areas 5590 // that you have created yourself from userland. 5591 // The documentation to delete_area() explicitly states that this 5592 // will be restricted in the future, and so it will. 5593 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 5594 } 5595 5596 5597 // TODO: create a BeOS style call for this! 5598 5599 area_id 5600 _user_map_file(const char* userName, void** userAddress, int addressSpec, 5601 size_t size, int protection, int mapping, bool unmapAddressRange, int fd, 5602 off_t offset) 5603 { 5604 char name[B_OS_NAME_LENGTH]; 5605 void* address; 5606 area_id area; 5607 5608 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 5609 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 5610 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5611 return B_BAD_ADDRESS; 5612 5613 if (addressSpec == B_EXACT_ADDRESS) { 5614 if ((addr_t)address + size < (addr_t)address 5615 || (addr_t)address % B_PAGE_SIZE != 0) { 5616 return B_BAD_VALUE; 5617 } 5618 if (!IS_USER_ADDRESS(address) 5619 || !IS_USER_ADDRESS((addr_t)address + size)) { 5620 return B_BAD_ADDRESS; 5621 } 5622 } 5623 5624 // userland created areas can always be accessed by the kernel 5625 protection |= B_KERNEL_READ_AREA 5626 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 5627 5628 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 5629 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 5630 false); 5631 if (area < B_OK) 5632 return area; 5633 5634 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 5635 return B_BAD_ADDRESS; 5636 5637 return area; 5638 } 5639 5640 5641 status_t 5642 _user_unmap_memory(void* _address, size_t size) 5643 { 5644 addr_t address = (addr_t)_address; 5645 5646 // check params 5647 if (size == 0 || (addr_t)address + size < (addr_t)address 5648 || (addr_t)address % B_PAGE_SIZE != 0) { 5649 return B_BAD_VALUE; 5650 } 5651 5652 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 5653 return B_BAD_ADDRESS; 5654 5655 // Write lock the address space and ensure the address range is not wired. 5656 AddressSpaceWriteLocker locker; 5657 do { 5658 status_t status = locker.SetTo(team_get_current_team_id()); 5659 if (status != B_OK) 5660 return status; 5661 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 5662 size, &locker)); 5663 5664 // unmap 5665 return unmap_address_range(locker.AddressSpace(), address, size, false); 5666 } 5667 5668 5669 status_t 5670 _user_set_memory_protection(void* _address, size_t size, int protection) 5671 { 5672 // check address range 5673 addr_t address = (addr_t)_address; 5674 size = PAGE_ALIGN(size); 5675 5676 if ((address % B_PAGE_SIZE) != 0) 5677 return B_BAD_VALUE; 5678 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 5679 || !IS_USER_ADDRESS((addr_t)address + size)) { 5680 // weird error code required by POSIX 5681 return ENOMEM; 5682 } 5683 5684 // extend and check protection 5685 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 5686 uint32 actualProtection = protection | B_KERNEL_READ_AREA 5687 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 5688 5689 if (!arch_vm_supports_protection(actualProtection)) 5690 return B_NOT_SUPPORTED; 5691 5692 // We need to write lock the address space, since we're going to play with 5693 // the areas. Also make sure that none of the areas is wired and that we're 5694 // actually allowed to change the protection. 5695 AddressSpaceWriteLocker locker; 5696 5697 bool restart; 5698 do { 5699 restart = false; 5700 5701 status_t status = locker.SetTo(team_get_current_team_id()); 5702 if (status != B_OK) 5703 return status; 5704 5705 // First round: Check whether the whole range is covered by areas and we 5706 // are allowed to modify them. 5707 addr_t currentAddress = address; 5708 size_t sizeLeft = size; 5709 while (sizeLeft > 0) { 5710 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 5711 if (area == NULL) 5712 return B_NO_MEMORY; 5713 5714 if ((area->protection & B_KERNEL_AREA) != 0) 5715 return B_NOT_ALLOWED; 5716 5717 AreaCacheLocker cacheLocker(area); 5718 5719 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) { 5720 restart = true; 5721 break; 5722 } 5723 5724 cacheLocker.Unlock(); 5725 5726 // TODO: For (shared) mapped files we should check whether the new 5727 // protections are compatible with the file permissions. We don't 5728 // have a way to do that yet, though. 5729 5730 addr_t offset = currentAddress - area->Base(); 5731 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 5732 5733 currentAddress += rangeSize; 5734 sizeLeft -= rangeSize; 5735 } 5736 } while (restart); 5737 5738 // Second round: If the protections differ from that of the area, create a 5739 // page protection array and re-map mapped pages. 5740 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 5741 addr_t currentAddress = address; 5742 size_t sizeLeft = size; 5743 while (sizeLeft > 0) { 5744 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 5745 if (area == NULL) 5746 return B_NO_MEMORY; 5747 5748 addr_t offset = currentAddress - area->Base(); 5749 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 5750 5751 currentAddress += rangeSize; 5752 sizeLeft -= rangeSize; 5753 5754 if (area->page_protections == NULL) { 5755 if (area->protection == actualProtection) 5756 continue; 5757 5758 // In the page protections we store only the three user protections, 5759 // so we use 4 bits per page. 5760 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 5761 area->page_protections = (uint8*)malloc(bytes); 5762 if (area->page_protections == NULL) 5763 return B_NO_MEMORY; 5764 5765 // init the page protections for all pages to that of the area 5766 uint32 areaProtection = area->protection 5767 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5768 memset(area->page_protections, 5769 areaProtection | (areaProtection << 4), bytes); 5770 } 5771 5772 // We need to lock the complete cache chain, since we potentially unmap 5773 // pages of lower caches. 5774 VMCache* topCache = vm_area_get_locked_cache(area); 5775 VMCacheChainLocker cacheChainLocker(topCache); 5776 cacheChainLocker.LockAllSourceCaches(); 5777 5778 for (addr_t pageAddress = area->Base() + offset; 5779 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 5780 map->Lock(); 5781 5782 set_area_page_protection(area, pageAddress, protection); 5783 5784 addr_t physicalAddress; 5785 uint32 flags; 5786 5787 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 5788 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 5789 map->Unlock(); 5790 continue; 5791 } 5792 5793 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5794 if (page == NULL) { 5795 panic("area %p looking up page failed for pa 0x%lx\n", area, 5796 physicalAddress); 5797 map->Unlock(); 5798 return B_ERROR; 5799 } 5800 5801 // If the page is not in the topmost cache and write access is 5802 // requested, we have to unmap it. Otherwise we can re-map it with 5803 // the new protection. 5804 bool unmapPage = page->Cache() != topCache 5805 && (protection & B_WRITE_AREA) != 0; 5806 5807 if (!unmapPage) 5808 map->ProtectPage(area, pageAddress, actualProtection); 5809 5810 map->Unlock(); 5811 5812 if (unmapPage) { 5813 DEBUG_PAGE_ACCESS_START(page); 5814 unmap_page(area, pageAddress); 5815 DEBUG_PAGE_ACCESS_END(page); 5816 } 5817 } 5818 } 5819 5820 return B_OK; 5821 } 5822 5823 5824 status_t 5825 _user_sync_memory(void* _address, size_t size, int flags) 5826 { 5827 addr_t address = (addr_t)_address; 5828 size = PAGE_ALIGN(size); 5829 5830 // check params 5831 if ((address % B_PAGE_SIZE) != 0) 5832 return B_BAD_VALUE; 5833 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 5834 || !IS_USER_ADDRESS((addr_t)address + size)) { 5835 // weird error code required by POSIX 5836 return ENOMEM; 5837 } 5838 5839 bool writeSync = (flags & MS_SYNC) != 0; 5840 bool writeAsync = (flags & MS_ASYNC) != 0; 5841 if (writeSync && writeAsync) 5842 return B_BAD_VALUE; 5843 5844 if (size == 0 || (!writeSync && !writeAsync)) 5845 return B_OK; 5846 5847 // iterate through the range and sync all concerned areas 5848 while (size > 0) { 5849 // read lock the address space 5850 AddressSpaceReadLocker locker; 5851 status_t error = locker.SetTo(team_get_current_team_id()); 5852 if (error != B_OK) 5853 return error; 5854 5855 // get the first area 5856 VMArea* area = locker.AddressSpace()->LookupArea(address); 5857 if (area == NULL) 5858 return B_NO_MEMORY; 5859 5860 uint32 offset = address - area->Base(); 5861 size_t rangeSize = min_c(area->Size() - offset, size); 5862 offset += area->cache_offset; 5863 5864 // lock the cache 5865 AreaCacheLocker cacheLocker(area); 5866 if (!cacheLocker) 5867 return B_BAD_VALUE; 5868 VMCache* cache = area->cache; 5869 5870 locker.Unlock(); 5871 5872 uint32 firstPage = offset >> PAGE_SHIFT; 5873 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 5874 5875 // write the pages 5876 if (cache->type == CACHE_TYPE_VNODE) { 5877 if (writeSync) { 5878 // synchronous 5879 error = vm_page_write_modified_page_range(cache, firstPage, 5880 endPage); 5881 if (error != B_OK) 5882 return error; 5883 } else { 5884 // asynchronous 5885 vm_page_schedule_write_page_range(cache, firstPage, endPage); 5886 // TODO: This is probably not quite what is supposed to happen. 5887 // Especially when a lot has to be written, it might take ages 5888 // until it really hits the disk. 5889 } 5890 } 5891 5892 address += rangeSize; 5893 size -= rangeSize; 5894 } 5895 5896 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 5897 // synchronize multiple mappings of the same file. In our VM they never get 5898 // out of sync, though, so we don't have to do anything. 5899 5900 return B_OK; 5901 } 5902 5903 5904 status_t 5905 _user_memory_advice(void* address, size_t size, int advice) 5906 { 5907 // TODO: Implement! 5908 return B_OK; 5909 } 5910