1 /* 2 * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <arch/cpu.h> 27 #include <arch/vm.h> 28 #include <boot/elf.h> 29 #include <boot/stage2.h> 30 #include <condition_variable.h> 31 #include <console.h> 32 #include <debug.h> 33 #include <file_cache.h> 34 #include <fs/fd.h> 35 #include <heap.h> 36 #include <kernel.h> 37 #include <int.h> 38 #include <lock.h> 39 #include <low_resource_manager.h> 40 #include <slab/Slab.h> 41 #include <smp.h> 42 #include <system_info.h> 43 #include <thread.h> 44 #include <team.h> 45 #include <tracing.h> 46 #include <util/AutoLock.h> 47 #include <util/khash.h> 48 #include <vm/vm_page.h> 49 #include <vm/vm_priv.h> 50 #include <vm/VMAddressSpace.h> 51 #include <vm/VMArea.h> 52 #include <vm/VMCache.h> 53 54 #include "VMAddressSpaceLocking.h" 55 #include "VMAnonymousCache.h" 56 #include "IORequest.h" 57 58 59 //#define TRACE_VM 60 //#define TRACE_FAULTS 61 #ifdef TRACE_VM 62 # define TRACE(x) dprintf x 63 #else 64 # define TRACE(x) ; 65 #endif 66 #ifdef TRACE_FAULTS 67 # define FTRACE(x) dprintf x 68 #else 69 # define FTRACE(x) ; 70 #endif 71 72 73 class AreaCacheLocking { 74 public: 75 inline bool Lock(VMCache* lockable) 76 { 77 return false; 78 } 79 80 inline void Unlock(VMCache* lockable) 81 { 82 vm_area_put_locked_cache(lockable); 83 } 84 }; 85 86 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 87 public: 88 inline AreaCacheLocker(VMCache* cache = NULL) 89 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 90 { 91 } 92 93 inline AreaCacheLocker(VMArea* area) 94 : AutoLocker<VMCache, AreaCacheLocking>() 95 { 96 SetTo(area); 97 } 98 99 inline void SetTo(VMArea* area) 100 { 101 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 102 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 103 } 104 }; 105 106 107 class VMCacheChainLocker { 108 public: 109 VMCacheChainLocker() 110 : 111 fTopCache(NULL), 112 fBottomCache(NULL) 113 { 114 } 115 116 VMCacheChainLocker(VMCache* topCache) 117 : 118 fTopCache(topCache), 119 fBottomCache(topCache) 120 { 121 } 122 123 ~VMCacheChainLocker() 124 { 125 Unlock(); 126 } 127 128 void SetTo(VMCache* topCache) 129 { 130 fTopCache = topCache; 131 fBottomCache = topCache; 132 133 if (topCache != NULL) 134 topCache->SetUserData(NULL); 135 } 136 137 VMCache* LockSourceCache() 138 { 139 if (fBottomCache == NULL || fBottomCache->source == NULL) 140 return NULL; 141 142 VMCache* previousCache = fBottomCache; 143 144 fBottomCache = fBottomCache->source; 145 fBottomCache->Lock(); 146 fBottomCache->AcquireRefLocked(); 147 fBottomCache->SetUserData(previousCache); 148 149 return fBottomCache; 150 } 151 152 void LockAllSourceCaches() 153 { 154 while (LockSourceCache() != NULL) { 155 } 156 } 157 158 void Unlock(VMCache* exceptCache = NULL) 159 { 160 if (fTopCache == NULL) 161 return; 162 163 // Unlock caches in source -> consumer direction. This is important to 164 // avoid double-locking and a reversal of locking order in case a cache 165 // is eligable for merging. 166 VMCache* cache = fBottomCache; 167 while (cache != NULL) { 168 VMCache* nextCache = (VMCache*)cache->UserData(); 169 if (cache != exceptCache) 170 cache->ReleaseRefAndUnlock(cache != fTopCache); 171 172 if (cache == fTopCache) 173 break; 174 175 cache = nextCache; 176 } 177 178 fTopCache = NULL; 179 fBottomCache = NULL; 180 } 181 182 void UnlockKeepRefs(bool keepTopCacheLocked) 183 { 184 if (fTopCache == NULL) 185 return; 186 187 VMCache* nextCache = fBottomCache; 188 VMCache* cache = NULL; 189 190 while (keepTopCacheLocked 191 ? nextCache != fTopCache : cache != fTopCache) { 192 cache = nextCache; 193 nextCache = (VMCache*)cache->UserData(); 194 cache->Unlock(cache != fTopCache); 195 } 196 } 197 198 void RelockCaches(bool topCacheLocked) 199 { 200 if (fTopCache == NULL) 201 return; 202 203 VMCache* nextCache = fTopCache; 204 VMCache* cache = NULL; 205 if (topCacheLocked) { 206 cache = nextCache; 207 nextCache = cache->source; 208 } 209 210 while (cache != fBottomCache && nextCache != NULL) { 211 VMCache* consumer = cache; 212 cache = nextCache; 213 nextCache = cache->source; 214 cache->Lock(); 215 cache->SetUserData(consumer); 216 } 217 } 218 219 private: 220 VMCache* fTopCache; 221 VMCache* fBottomCache; 222 }; 223 224 225 // The memory reserve an allocation of the certain priority must not touch. 226 static const size_t kMemoryReserveForPriority[] = { 227 VM_MEMORY_RESERVE_USER, // user 228 VM_MEMORY_RESERVE_SYSTEM, // system 229 0 // VIP 230 }; 231 232 233 ObjectCache* gPageMappingsObjectCache; 234 235 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 236 237 static off_t sAvailableMemory; 238 static off_t sNeededMemory; 239 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 240 static uint32 sPageFaults; 241 242 static VMPhysicalPageMapper* sPhysicalPageMapper; 243 244 #if DEBUG_CACHE_LIST 245 246 struct cache_info { 247 VMCache* cache; 248 addr_t page_count; 249 addr_t committed; 250 }; 251 252 static const int kCacheInfoTableCount = 100 * 1024; 253 static cache_info* sCacheInfoTable; 254 255 #endif // DEBUG_CACHE_LIST 256 257 258 // function declarations 259 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 260 bool addressSpaceCleanup); 261 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 262 bool isWrite, bool isUser); 263 static status_t map_backing_store(VMAddressSpace* addressSpace, 264 VMCache* cache, void** _virtualAddress, off_t offset, addr_t size, 265 uint32 addressSpec, int wiring, int protection, int mapping, 266 VMArea** _area, const char* areaName, uint32 flags, bool kernel); 267 268 269 // #pragma mark - 270 271 272 #if VM_PAGE_FAULT_TRACING 273 274 namespace VMPageFaultTracing { 275 276 class PageFaultStart : public AbstractTraceEntry { 277 public: 278 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 279 : 280 fAddress(address), 281 fPC(pc), 282 fWrite(write), 283 fUser(user) 284 { 285 Initialized(); 286 } 287 288 virtual void AddDump(TraceOutput& out) 289 { 290 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 291 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 292 } 293 294 private: 295 addr_t fAddress; 296 addr_t fPC; 297 bool fWrite; 298 bool fUser; 299 }; 300 301 302 // page fault errors 303 enum { 304 PAGE_FAULT_ERROR_NO_AREA = 0, 305 PAGE_FAULT_ERROR_KERNEL_ONLY, 306 PAGE_FAULT_ERROR_WRITE_PROTECTED, 307 PAGE_FAULT_ERROR_READ_PROTECTED, 308 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 309 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 310 }; 311 312 313 class PageFaultError : public AbstractTraceEntry { 314 public: 315 PageFaultError(area_id area, status_t error) 316 : 317 fArea(area), 318 fError(error) 319 { 320 Initialized(); 321 } 322 323 virtual void AddDump(TraceOutput& out) 324 { 325 switch (fError) { 326 case PAGE_FAULT_ERROR_NO_AREA: 327 out.Print("page fault error: no area"); 328 break; 329 case PAGE_FAULT_ERROR_KERNEL_ONLY: 330 out.Print("page fault error: area: %ld, kernel only", fArea); 331 break; 332 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 333 out.Print("page fault error: area: %ld, write protected", 334 fArea); 335 break; 336 case PAGE_FAULT_ERROR_READ_PROTECTED: 337 out.Print("page fault error: area: %ld, read protected", fArea); 338 break; 339 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 340 out.Print("page fault error: kernel touching bad user memory"); 341 break; 342 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 343 out.Print("page fault error: no address space"); 344 break; 345 default: 346 out.Print("page fault error: area: %ld, error: %s", fArea, 347 strerror(fError)); 348 break; 349 } 350 } 351 352 private: 353 area_id fArea; 354 status_t fError; 355 }; 356 357 358 class PageFaultDone : public AbstractTraceEntry { 359 public: 360 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 361 vm_page* page) 362 : 363 fArea(area), 364 fTopCache(topCache), 365 fCache(cache), 366 fPage(page) 367 { 368 Initialized(); 369 } 370 371 virtual void AddDump(TraceOutput& out) 372 { 373 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 374 "page: %p", fArea, fTopCache, fCache, fPage); 375 } 376 377 private: 378 area_id fArea; 379 VMCache* fTopCache; 380 VMCache* fCache; 381 vm_page* fPage; 382 }; 383 384 } // namespace VMPageFaultTracing 385 386 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 387 #else 388 # define TPF(x) ; 389 #endif // VM_PAGE_FAULT_TRACING 390 391 392 // #pragma mark - 393 394 395 /*! The page's cache must be locked. 396 */ 397 static inline void 398 increment_page_wired_count(vm_page* page) 399 { 400 if (page->wired_count++ == 0 && page->mappings.IsEmpty()) 401 atomic_add(&gMappedPagesCount, 1); 402 } 403 404 405 /*! The page's cache must be locked. 406 */ 407 static inline void 408 decrement_page_wired_count(vm_page* page) 409 { 410 if (--page->wired_count == 0 && page->mappings.IsEmpty()) 411 atomic_add(&gMappedPagesCount, -1); 412 } 413 414 415 static inline addr_t 416 virtual_page_address(VMArea* area, vm_page* page) 417 { 418 return area->Base() 419 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 420 } 421 422 423 //! You need to have the address space locked when calling this function 424 static VMArea* 425 lookup_area(VMAddressSpace* addressSpace, area_id id) 426 { 427 VMAreaHash::ReadLock(); 428 429 VMArea* area = VMAreaHash::LookupLocked(id); 430 if (area != NULL && area->address_space != addressSpace) 431 area = NULL; 432 433 VMAreaHash::ReadUnlock(); 434 435 return area; 436 } 437 438 439 static inline void 440 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 441 { 442 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 443 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 444 uint8& entry = area->page_protections[pageIndex / 2]; 445 if (pageIndex % 2 == 0) 446 entry = (entry & 0xf0) | protection; 447 else 448 entry = (entry & 0x0f) | (protection << 4); 449 } 450 451 452 static inline uint32 453 get_area_page_protection(VMArea* area, addr_t pageAddress) 454 { 455 if (area->page_protections == NULL) 456 return area->protection; 457 458 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 459 uint32 protection = area->page_protections[pageIndex / 2]; 460 if (pageIndex % 2 == 0) 461 protection &= 0x0f; 462 else 463 protection >>= 4; 464 465 return protection | B_KERNEL_READ_AREA 466 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 467 } 468 469 470 /*! The caller must have reserved enough pages the translation map 471 implementation might need to map this page. 472 The page's cache must be locked. 473 */ 474 static status_t 475 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 476 vm_page_reservation* reservation) 477 { 478 VMTranslationMap* map = area->address_space->TranslationMap(); 479 480 bool wasMapped = page->wired_count > 0 || !page->mappings.IsEmpty(); 481 482 if (area->wiring == B_NO_LOCK) { 483 DEBUG_PAGE_ACCESS_CHECK(page); 484 485 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 486 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 487 gPageMappingsObjectCache, 488 CACHE_DONT_WAIT_FOR_MEMORY 489 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 490 if (mapping == NULL) 491 return B_NO_MEMORY; 492 493 mapping->page = page; 494 mapping->area = area; 495 496 map->Lock(); 497 498 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 499 reservation); 500 501 // insert mapping into lists 502 if (page->mappings.IsEmpty() && page->wired_count == 0) 503 atomic_add(&gMappedPagesCount, 1); 504 505 page->mappings.Add(mapping); 506 area->mappings.Add(mapping); 507 508 map->Unlock(); 509 } else { 510 DEBUG_PAGE_ACCESS_CHECK(page); 511 512 map->Lock(); 513 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 514 reservation); 515 map->Unlock(); 516 517 increment_page_wired_count(page); 518 } 519 520 if (!wasMapped) { 521 // The page is mapped now, so we must not remain in the cached queue. 522 // It also makes sense to move it from the inactive to the active, since 523 // otherwise the page daemon wouldn't come to keep track of it (in idle 524 // mode) -- if the page isn't touched, it will be deactivated after a 525 // full iteration through the queue at the latest. 526 if (page->State() == PAGE_STATE_CACHED 527 || page->State() == PAGE_STATE_INACTIVE) { 528 vm_page_set_state(page, PAGE_STATE_ACTIVE); 529 } 530 } 531 532 return B_OK; 533 } 534 535 536 /*! If \a preserveModified is \c true, the caller must hold the lock of the 537 page's cache. 538 */ 539 static inline bool 540 unmap_page(VMArea* area, addr_t virtualAddress) 541 { 542 return area->address_space->TranslationMap()->UnmapPage(area, 543 virtualAddress, true); 544 } 545 546 547 /*! If \a preserveModified is \c true, the caller must hold the lock of all 548 mapped pages' caches. 549 */ 550 static inline void 551 unmap_pages(VMArea* area, addr_t base, size_t size) 552 { 553 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 554 } 555 556 557 /*! Cuts a piece out of an area. If the given cut range covers the complete 558 area, it is deleted. If it covers the beginning or the end, the area is 559 resized accordingly. If the range covers some part in the middle of the 560 area, it is split in two; in this case the second area is returned via 561 \a _secondArea (the variable is left untouched in the other cases). 562 The address space must be write locked. 563 */ 564 static status_t 565 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 566 addr_t lastAddress, VMArea** _secondArea, bool kernel) 567 { 568 // Does the cut range intersect with the area at all? 569 addr_t areaLast = area->Base() + (area->Size() - 1); 570 if (area->Base() > lastAddress || areaLast < address) 571 return B_OK; 572 573 // Is the area fully covered? 574 if (area->Base() >= address && areaLast <= lastAddress) { 575 delete_area(addressSpace, area, false); 576 return B_OK; 577 } 578 579 int priority; 580 uint32 allocationFlags; 581 if (addressSpace == VMAddressSpace::Kernel()) { 582 priority = VM_PRIORITY_SYSTEM; 583 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 584 | HEAP_DONT_LOCK_KERNEL_SPACE; 585 } else { 586 priority = VM_PRIORITY_USER; 587 allocationFlags = 0; 588 } 589 590 VMCache* cache = vm_area_get_locked_cache(area); 591 VMCacheChainLocker cacheChainLocker(cache); 592 cacheChainLocker.LockAllSourceCaches(); 593 594 // Cut the end only? 595 if (areaLast <= lastAddress) { 596 size_t oldSize = area->Size(); 597 size_t newSize = address - area->Base(); 598 599 status_t error = addressSpace->ShrinkAreaTail(area, newSize, 600 allocationFlags); 601 if (error != B_OK) 602 return error; 603 604 // unmap pages 605 unmap_pages(area, address, oldSize - newSize); 606 607 // If no one else uses the area's cache, we can resize it, too. 608 if (cache->areas == area && area->cache_next == NULL 609 && list_is_empty(&cache->consumers)) { 610 // Since VMCache::Resize() can temporarily drop the lock, we must 611 // unlock all lower caches to prevent locking order inversion. 612 cacheChainLocker.Unlock(cache); 613 cache->Resize(cache->virtual_base + newSize, priority); 614 cache->ReleaseRefAndUnlock(); 615 } 616 617 return B_OK; 618 } 619 620 // Cut the beginning only? 621 if (area->Base() >= address) { 622 addr_t oldBase = area->Base(); 623 addr_t newBase = lastAddress + 1; 624 size_t newSize = areaLast - lastAddress; 625 626 // unmap pages 627 unmap_pages(area, oldBase, newBase - oldBase); 628 629 // resize the area 630 status_t error = addressSpace->ShrinkAreaHead(area, newSize, 631 allocationFlags); 632 if (error != B_OK) 633 return error; 634 635 // TODO: If no one else uses the area's cache, we should resize it, too! 636 637 area->cache_offset += newBase - oldBase; 638 639 return B_OK; 640 } 641 642 // The tough part -- cut a piece out of the middle of the area. 643 // We do that by shrinking the area to the begin section and creating a 644 // new area for the end section. 645 646 addr_t firstNewSize = address - area->Base(); 647 addr_t secondBase = lastAddress + 1; 648 addr_t secondSize = areaLast - lastAddress; 649 650 // unmap pages 651 unmap_pages(area, address, area->Size() - firstNewSize); 652 653 // resize the area 654 addr_t oldSize = area->Size(); 655 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 656 allocationFlags); 657 if (error != B_OK) 658 return error; 659 660 // TODO: If no one else uses the area's cache, we might want to create a 661 // new cache for the second area, transfer the concerned pages from the 662 // first cache to it and resize the first cache. 663 664 // map the second area 665 VMArea* secondArea; 666 void* secondBaseAddress = (void*)secondBase; 667 error = map_backing_store(addressSpace, cache, &secondBaseAddress, 668 area->cache_offset + (secondBase - area->Base()), secondSize, 669 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 670 &secondArea, area->name, 0, kernel); 671 if (error != B_OK) { 672 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 673 return error; 674 } 675 676 // We need a cache reference for the new area. 677 cache->AcquireRefLocked(); 678 679 if (_secondArea != NULL) 680 *_secondArea = secondArea; 681 682 return B_OK; 683 } 684 685 686 /*! Deletes all areas in the given address range. 687 The address space must be write-locked. 688 */ 689 static status_t 690 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 691 bool kernel) 692 { 693 size = PAGE_ALIGN(size); 694 addr_t lastAddress = address + (size - 1); 695 696 // Check, whether the caller is allowed to modify the concerned areas. 697 if (!kernel) { 698 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 699 VMArea* area = it.Next();) { 700 addr_t areaLast = area->Base() + (area->Size() - 1); 701 if (area->Base() < lastAddress && address < areaLast) { 702 if ((area->protection & B_KERNEL_AREA) != 0) 703 return B_NOT_ALLOWED; 704 } 705 } 706 } 707 708 for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 709 VMArea* area = it.Next();) { 710 addr_t areaLast = area->Base() + (area->Size() - 1); 711 if (area->Base() < lastAddress && address < areaLast) { 712 status_t error = cut_area(addressSpace, area, address, 713 lastAddress, NULL, kernel); 714 if (error != B_OK) 715 return error; 716 // Failing after already messing with areas is ugly, but we 717 // can't do anything about it. 718 } 719 } 720 721 return B_OK; 722 } 723 724 725 /*! You need to hold the lock of the cache and the write lock of the address 726 space when calling this function. 727 Note, that in case of error your cache will be temporarily unlocked. 728 */ 729 static status_t 730 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, 731 void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 732 int wiring, int protection, int mapping, VMArea** _area, 733 const char* areaName, uint32 flags, bool kernel) 734 { 735 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, " 736 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 737 "'%s'\n", addressSpace, cache, *_virtualAddress, offset, size, 738 addressSpec, wiring, protection, _area, areaName)); 739 cache->AssertLocked(); 740 741 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 742 | HEAP_DONT_LOCK_KERNEL_SPACE; 743 int priority; 744 if (addressSpace != VMAddressSpace::Kernel()) { 745 priority = VM_PRIORITY_USER; 746 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 747 priority = VM_PRIORITY_VIP; 748 allocationFlags |= HEAP_PRIORITY_VIP; 749 } else 750 priority = VM_PRIORITY_SYSTEM; 751 752 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 753 allocationFlags); 754 if (area == NULL) 755 return B_NO_MEMORY; 756 757 status_t status; 758 759 // if this is a private map, we need to create a new cache 760 // to handle the private copies of pages as they are written to 761 VMCache* sourceCache = cache; 762 if (mapping == REGION_PRIVATE_MAP) { 763 VMCache* newCache; 764 765 // create an anonymous cache 766 status = VMCacheFactory::CreateAnonymousCache(newCache, 767 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true, 768 VM_PRIORITY_USER); 769 if (status != B_OK) 770 goto err1; 771 772 newCache->Lock(); 773 newCache->temporary = 1; 774 newCache->scan_skip = cache->scan_skip; 775 newCache->virtual_base = offset; 776 newCache->virtual_end = offset + size; 777 778 cache->AddConsumer(newCache); 779 780 cache = newCache; 781 } 782 783 status = cache->SetMinimalCommitment(size, priority); 784 if (status != B_OK) 785 goto err2; 786 787 // check to see if this address space has entered DELETE state 788 if (addressSpace->IsBeingDeleted()) { 789 // okay, someone is trying to delete this address space now, so we can't 790 // insert the area, so back out 791 status = B_BAD_TEAM_ID; 792 goto err2; 793 } 794 795 if (addressSpec == B_EXACT_ADDRESS 796 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 797 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 798 size, kernel); 799 if (status != B_OK) 800 goto err2; 801 } 802 803 status = addressSpace->InsertArea(_virtualAddress, addressSpec, size, area, 804 allocationFlags); 805 if (status != B_OK) { 806 // TODO: wait and try again once this is working in the backend 807 #if 0 808 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 809 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 810 0, 0); 811 } 812 #endif 813 goto err2; 814 } 815 816 // attach the cache to the area 817 area->cache = cache; 818 area->cache_offset = offset; 819 820 // point the cache back to the area 821 cache->InsertAreaLocked(area); 822 if (mapping == REGION_PRIVATE_MAP) 823 cache->Unlock(); 824 825 // insert the area in the global area hash table 826 VMAreaHash::Insert(area); 827 828 // grab a ref to the address space (the area holds this) 829 addressSpace->Get(); 830 831 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 832 // cache, sourceCache, areaName, area); 833 834 *_area = area; 835 return B_OK; 836 837 err2: 838 if (mapping == REGION_PRIVATE_MAP) { 839 // We created this cache, so we must delete it again. Note, that we 840 // need to temporarily unlock the source cache or we'll otherwise 841 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 842 sourceCache->Unlock(); 843 cache->ReleaseRefAndUnlock(); 844 sourceCache->Lock(); 845 } 846 err1: 847 addressSpace->DeleteArea(area, allocationFlags); 848 return status; 849 } 850 851 852 status_t 853 vm_block_address_range(const char* name, void* address, addr_t size) 854 { 855 if (!arch_vm_supports_protection(0)) 856 return B_NOT_SUPPORTED; 857 858 AddressSpaceWriteLocker locker; 859 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 860 if (status != B_OK) 861 return status; 862 863 VMAddressSpace* addressSpace = locker.AddressSpace(); 864 865 // create an anonymous cache 866 VMCache* cache; 867 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 868 VM_PRIORITY_SYSTEM); 869 if (status != B_OK) 870 return status; 871 872 cache->temporary = 1; 873 cache->virtual_end = size; 874 cache->scan_skip = 1; 875 cache->Lock(); 876 877 VMArea* area; 878 void* areaAddress = address; 879 status = map_backing_store(addressSpace, cache, &areaAddress, 0, size, 880 B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name, 881 0, true); 882 if (status != B_OK) { 883 cache->ReleaseRefAndUnlock(); 884 return status; 885 } 886 887 cache->Unlock(); 888 area->cache_type = CACHE_TYPE_RAM; 889 return area->id; 890 } 891 892 893 status_t 894 vm_unreserve_address_range(team_id team, void* address, addr_t size) 895 { 896 AddressSpaceWriteLocker locker(team); 897 if (!locker.IsLocked()) 898 return B_BAD_TEAM_ID; 899 900 VMAddressSpace* addressSpace = locker.AddressSpace(); 901 return addressSpace->UnreserveAddressRange((addr_t)address, size, 902 addressSpace == VMAddressSpace::Kernel() 903 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 904 } 905 906 907 status_t 908 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 909 addr_t size, uint32 flags) 910 { 911 if (size == 0) 912 return B_BAD_VALUE; 913 914 AddressSpaceWriteLocker locker(team); 915 if (!locker.IsLocked()) 916 return B_BAD_TEAM_ID; 917 918 VMAddressSpace* addressSpace = locker.AddressSpace(); 919 return addressSpace->ReserveAddressRange(_address, addressSpec, 920 size, flags, 921 addressSpace == VMAddressSpace::Kernel() 922 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 923 } 924 925 926 area_id 927 vm_create_anonymous_area(team_id team, const char* name, void** address, 928 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 929 addr_t physicalAddress, uint32 flags, bool kernel) 930 { 931 VMArea* area; 932 VMCache* cache; 933 vm_page* page = NULL; 934 bool isStack = (protection & B_STACK_AREA) != 0; 935 page_num_t guardPages; 936 bool canOvercommit = false; 937 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 938 ? VM_PAGE_ALLOC_CLEAR : 0; 939 940 TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size)); 941 942 size = PAGE_ALIGN(size); 943 944 if (size == 0) 945 return B_BAD_VALUE; 946 if (!arch_vm_supports_protection(protection)) 947 return B_NOT_SUPPORTED; 948 949 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 950 canOvercommit = true; 951 952 #ifdef DEBUG_KERNEL_STACKS 953 if ((protection & B_KERNEL_STACK_AREA) != 0) 954 isStack = true; 955 #endif 956 957 // check parameters 958 switch (addressSpec) { 959 case B_ANY_ADDRESS: 960 case B_EXACT_ADDRESS: 961 case B_BASE_ADDRESS: 962 case B_ANY_KERNEL_ADDRESS: 963 case B_ANY_KERNEL_BLOCK_ADDRESS: 964 break; 965 case B_PHYSICAL_BASE_ADDRESS: 966 physicalAddress = (addr_t)*address; 967 addressSpec = B_ANY_KERNEL_ADDRESS; 968 break; 969 970 default: 971 return B_BAD_VALUE; 972 } 973 974 if (physicalAddress != 0) 975 wiring = B_CONTIGUOUS; 976 977 bool doReserveMemory = false; 978 switch (wiring) { 979 case B_NO_LOCK: 980 break; 981 case B_FULL_LOCK: 982 case B_LAZY_LOCK: 983 case B_CONTIGUOUS: 984 doReserveMemory = true; 985 break; 986 case B_ALREADY_WIRED: 987 break; 988 case B_LOMEM: 989 //case B_SLOWMEM: 990 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 991 wiring = B_FULL_LOCK; 992 doReserveMemory = true; 993 break; 994 default: 995 return B_BAD_VALUE; 996 } 997 998 // For full lock or contiguous areas we're also going to map the pages and 999 // thus need to reserve pages for the mapping backend upfront. 1000 addr_t reservedMapPages = 0; 1001 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1002 AddressSpaceWriteLocker locker; 1003 status_t status = locker.SetTo(team); 1004 if (status != B_OK) 1005 return status; 1006 1007 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1008 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1009 } 1010 1011 int priority; 1012 if (team != VMAddressSpace::KernelID()) 1013 priority = VM_PRIORITY_USER; 1014 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1015 priority = VM_PRIORITY_VIP; 1016 else 1017 priority = VM_PRIORITY_SYSTEM; 1018 1019 // Reserve memory before acquiring the address space lock. This reduces the 1020 // chances of failure, since while holding the write lock to the address 1021 // space (if it is the kernel address space that is), the low memory handler 1022 // won't be able to free anything for us. 1023 addr_t reservedMemory = 0; 1024 if (doReserveMemory) { 1025 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1026 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1027 return B_NO_MEMORY; 1028 reservedMemory = size; 1029 // TODO: We don't reserve the memory for the pages for the page 1030 // directories/tables. We actually need to do since we currently don't 1031 // reclaim them (and probably can't reclaim all of them anyway). Thus 1032 // there are actually less physical pages than there should be, which 1033 // can get the VM into trouble in low memory situations. 1034 } 1035 1036 AddressSpaceWriteLocker locker; 1037 VMAddressSpace* addressSpace; 1038 status_t status; 1039 1040 // For full lock areas reserve the pages before locking the address 1041 // space. E.g. block caches can't release their memory while we hold the 1042 // address space lock. 1043 page_num_t reservedPages = reservedMapPages; 1044 if (wiring == B_FULL_LOCK) 1045 reservedPages += size / B_PAGE_SIZE; 1046 1047 vm_page_reservation reservation; 1048 if (reservedPages > 0) { 1049 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1050 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1051 priority)) { 1052 reservedPages = 0; 1053 status = B_WOULD_BLOCK; 1054 goto err0; 1055 } 1056 } else 1057 vm_page_reserve_pages(&reservation, reservedPages, priority); 1058 } 1059 1060 status = locker.SetTo(team); 1061 if (status != B_OK) 1062 goto err0; 1063 1064 addressSpace = locker.AddressSpace(); 1065 1066 if (wiring == B_CONTIGUOUS) { 1067 // we try to allocate the page run here upfront as this may easily 1068 // fail for obvious reasons 1069 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1070 physicalAddress, size / B_PAGE_SIZE, priority); 1071 if (page == NULL) { 1072 status = B_NO_MEMORY; 1073 goto err0; 1074 } 1075 } 1076 1077 // create an anonymous cache 1078 // if it's a stack, make sure that two pages are available at least 1079 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1080 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1081 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1082 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1083 wiring == B_NO_LOCK, priority); 1084 if (status != B_OK) 1085 goto err1; 1086 1087 cache->temporary = 1; 1088 cache->virtual_end = size; 1089 cache->committed_size = reservedMemory; 1090 // TODO: This should be done via a method. 1091 reservedMemory = 0; 1092 1093 switch (wiring) { 1094 case B_LAZY_LOCK: 1095 case B_FULL_LOCK: 1096 case B_CONTIGUOUS: 1097 case B_ALREADY_WIRED: 1098 cache->scan_skip = 1; 1099 break; 1100 case B_NO_LOCK: 1101 cache->scan_skip = 0; 1102 break; 1103 } 1104 1105 cache->Lock(); 1106 1107 status = map_backing_store(addressSpace, cache, address, 0, size, 1108 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 1109 flags, kernel); 1110 1111 if (status != B_OK) { 1112 cache->ReleaseRefAndUnlock(); 1113 goto err1; 1114 } 1115 1116 locker.DegradeToReadLock(); 1117 1118 switch (wiring) { 1119 case B_NO_LOCK: 1120 case B_LAZY_LOCK: 1121 // do nothing - the pages are mapped in as needed 1122 break; 1123 1124 case B_FULL_LOCK: 1125 { 1126 // Allocate and map all pages for this area 1127 1128 off_t offset = 0; 1129 for (addr_t address = area->Base(); 1130 address < area->Base() + (area->Size() - 1); 1131 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1132 #ifdef DEBUG_KERNEL_STACKS 1133 # ifdef STACK_GROWS_DOWNWARDS 1134 if (isStack && address < area->Base() 1135 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1136 # else 1137 if (isStack && address >= area->Base() + area->Size() 1138 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1139 # endif 1140 continue; 1141 #endif 1142 vm_page* page = vm_page_allocate_page(&reservation, 1143 PAGE_STATE_WIRED | pageAllocFlags); 1144 cache->InsertPage(page, offset); 1145 map_page(area, page, address, protection, &reservation); 1146 1147 DEBUG_PAGE_ACCESS_END(page); 1148 } 1149 1150 break; 1151 } 1152 1153 case B_ALREADY_WIRED: 1154 { 1155 // The pages should already be mapped. This is only really useful 1156 // during boot time. Find the appropriate vm_page objects and stick 1157 // them in the cache object. 1158 VMTranslationMap* map = addressSpace->TranslationMap(); 1159 off_t offset = 0; 1160 1161 if (!gKernelStartup) 1162 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1163 1164 map->Lock(); 1165 1166 for (addr_t virtualAddress = area->Base(); 1167 virtualAddress < area->Base() + (area->Size() - 1); 1168 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1169 addr_t physicalAddress; 1170 uint32 flags; 1171 status = map->Query(virtualAddress, &physicalAddress, &flags); 1172 if (status < B_OK) { 1173 panic("looking up mapping failed for va 0x%lx\n", 1174 virtualAddress); 1175 } 1176 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1177 if (page == NULL) { 1178 panic("looking up page failed for pa 0x%lx\n", 1179 physicalAddress); 1180 } 1181 1182 DEBUG_PAGE_ACCESS_START(page); 1183 1184 increment_page_wired_count(page); 1185 cache->InsertPage(page, offset); 1186 vm_page_set_state(page, PAGE_STATE_WIRED); 1187 page->busy = false; 1188 1189 DEBUG_PAGE_ACCESS_END(page); 1190 } 1191 1192 map->Unlock(); 1193 break; 1194 } 1195 1196 case B_CONTIGUOUS: 1197 { 1198 // We have already allocated our continuous pages run, so we can now 1199 // just map them in the address space 1200 VMTranslationMap* map = addressSpace->TranslationMap(); 1201 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 1202 addr_t virtualAddress = area->Base(); 1203 off_t offset = 0; 1204 1205 map->Lock(); 1206 1207 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1208 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1209 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1210 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1211 if (page == NULL) 1212 panic("couldn't lookup physical page just allocated\n"); 1213 1214 status = map->Map(virtualAddress, physicalAddress, protection, 1215 &reservation); 1216 if (status < B_OK) 1217 panic("couldn't map physical page in page run\n"); 1218 1219 increment_page_wired_count(page); 1220 cache->InsertPage(page, offset); 1221 1222 DEBUG_PAGE_ACCESS_END(page); 1223 } 1224 1225 map->Unlock(); 1226 break; 1227 } 1228 1229 default: 1230 break; 1231 } 1232 1233 cache->Unlock(); 1234 1235 if (reservedPages > 0) 1236 vm_page_unreserve_pages(&reservation); 1237 1238 TRACE(("vm_create_anonymous_area: done\n")); 1239 1240 area->cache_type = CACHE_TYPE_RAM; 1241 return area->id; 1242 1243 err1: 1244 if (wiring == B_CONTIGUOUS) { 1245 // we had reserved the area space upfront... 1246 addr_t pageNumber = page->physical_page_number; 1247 int32 i; 1248 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1249 page = vm_lookup_page(pageNumber); 1250 if (page == NULL) 1251 panic("couldn't lookup physical page just allocated\n"); 1252 1253 vm_page_set_state(page, PAGE_STATE_FREE); 1254 } 1255 } 1256 1257 err0: 1258 if (reservedPages > 0) 1259 vm_page_unreserve_pages(&reservation); 1260 if (reservedMemory > 0) 1261 vm_unreserve_memory(reservedMemory); 1262 1263 return status; 1264 } 1265 1266 1267 area_id 1268 vm_map_physical_memory(team_id team, const char* name, void** _address, 1269 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress, 1270 bool alreadyWired) 1271 { 1272 VMArea* area; 1273 VMCache* cache; 1274 addr_t mapOffset; 1275 1276 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 1277 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 1278 name, _address, addressSpec, size, protection, physicalAddress)); 1279 1280 if (!arch_vm_supports_protection(protection)) 1281 return B_NOT_SUPPORTED; 1282 1283 AddressSpaceWriteLocker locker(team); 1284 if (!locker.IsLocked()) 1285 return B_BAD_TEAM_ID; 1286 1287 // if the physical address is somewhat inside a page, 1288 // move the actual area down to align on a page boundary 1289 mapOffset = physicalAddress % B_PAGE_SIZE; 1290 size += mapOffset; 1291 physicalAddress -= mapOffset; 1292 1293 size = PAGE_ALIGN(size); 1294 1295 // create a device cache 1296 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1297 if (status != B_OK) 1298 return status; 1299 1300 // tell the page scanner to skip over this area, it's pages are special 1301 cache->scan_skip = 1; 1302 cache->virtual_end = size; 1303 1304 cache->Lock(); 1305 1306 status = map_backing_store(locker.AddressSpace(), cache, _address, 1307 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 1308 REGION_NO_PRIVATE_MAP, &area, name, 0, true); 1309 1310 if (status < B_OK) 1311 cache->ReleaseRefLocked(); 1312 1313 cache->Unlock(); 1314 1315 if (status == B_OK) { 1316 // set requested memory type -- use uncached, if not given 1317 uint32 memoryType = addressSpec & B_MTR_MASK; 1318 if (memoryType == 0) 1319 memoryType = B_MTR_UC; 1320 1321 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1322 if (status != B_OK) 1323 delete_area(locker.AddressSpace(), area, false); 1324 } 1325 1326 if (status >= B_OK && !alreadyWired) { 1327 // make sure our area is mapped in completely 1328 1329 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1330 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1331 area->Base() + (size - 1)); 1332 1333 vm_page_reservation reservation; 1334 vm_page_reserve_pages(&reservation, reservePages, 1335 team == VMAddressSpace::KernelID() 1336 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1337 map->Lock(); 1338 1339 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1340 map->Map(area->Base() + offset, physicalAddress + offset, 1341 protection, &reservation); 1342 } 1343 1344 map->Unlock(); 1345 vm_page_unreserve_pages(&reservation); 1346 } 1347 1348 if (status < B_OK) 1349 return status; 1350 1351 // modify the pointer returned to be offset back into the new area 1352 // the same way the physical address in was offset 1353 *_address = (void*)((addr_t)*_address + mapOffset); 1354 1355 area->cache_type = CACHE_TYPE_DEVICE; 1356 return area->id; 1357 } 1358 1359 1360 /*! Don't use! 1361 TODO: This function was introduced to map physical page vecs to 1362 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1363 use a device cache and does not track vm_page::wired_count! 1364 */ 1365 area_id 1366 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1367 uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs, 1368 uint32 vecCount) 1369 { 1370 TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, " 1371 "spec = %ld, _size = %p, protection = %ld, vecs = %p, " 1372 "vecCount = %ld)\n", team, name, _address, addressSpec, _size, 1373 protection, vecs, vecCount)); 1374 1375 if (!arch_vm_supports_protection(protection) 1376 || (addressSpec & B_MTR_MASK) != 0) { 1377 return B_NOT_SUPPORTED; 1378 } 1379 1380 AddressSpaceWriteLocker locker(team); 1381 if (!locker.IsLocked()) 1382 return B_BAD_TEAM_ID; 1383 1384 if (vecCount == 0) 1385 return B_BAD_VALUE; 1386 1387 addr_t size = 0; 1388 for (uint32 i = 0; i < vecCount; i++) { 1389 if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0 1390 || vecs[i].iov_len % B_PAGE_SIZE != 0) { 1391 return B_BAD_VALUE; 1392 } 1393 1394 size += vecs[i].iov_len; 1395 } 1396 1397 // create a device cache 1398 VMCache* cache; 1399 status_t result = VMCacheFactory::CreateDeviceCache(cache, 1400 (addr_t)vecs[0].iov_base); 1401 if (result != B_OK) 1402 return result; 1403 1404 // tell the page scanner to skip over this area, it's pages are special 1405 cache->scan_skip = 1; 1406 cache->virtual_end = size; 1407 1408 cache->Lock(); 1409 1410 VMArea* area; 1411 result = map_backing_store(locker.AddressSpace(), cache, _address, 1412 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 1413 REGION_NO_PRIVATE_MAP, &area, name, 0, true); 1414 1415 if (result != B_OK) 1416 cache->ReleaseRefLocked(); 1417 1418 cache->Unlock(); 1419 1420 if (result != B_OK) 1421 return result; 1422 1423 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1424 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1425 area->Base() + (size - 1)); 1426 1427 vm_page_reservation reservation; 1428 vm_page_reserve_pages(&reservation, reservePages, 1429 team == VMAddressSpace::KernelID() 1430 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1431 map->Lock(); 1432 1433 uint32 vecIndex = 0; 1434 size_t vecOffset = 0; 1435 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1436 while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) { 1437 vecOffset = 0; 1438 vecIndex++; 1439 } 1440 1441 if (vecIndex >= vecCount) 1442 break; 1443 1444 map->Map(area->Base() + offset, 1445 (addr_t)vecs[vecIndex].iov_base + vecOffset, protection, 1446 &reservation); 1447 1448 vecOffset += B_PAGE_SIZE; 1449 } 1450 1451 map->Unlock(); 1452 vm_page_unreserve_pages(&reservation); 1453 1454 if (_size != NULL) 1455 *_size = size; 1456 1457 area->cache_type = CACHE_TYPE_DEVICE; 1458 return area->id; 1459 } 1460 1461 1462 area_id 1463 vm_create_null_area(team_id team, const char* name, void** address, 1464 uint32 addressSpec, addr_t size, uint32 flags) 1465 { 1466 AddressSpaceWriteLocker locker(team); 1467 if (!locker.IsLocked()) 1468 return B_BAD_TEAM_ID; 1469 1470 size = PAGE_ALIGN(size); 1471 1472 // create a null cache 1473 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1474 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1475 VMCache* cache; 1476 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1477 if (status != B_OK) 1478 return status; 1479 1480 // tell the page scanner to skip over this area, no pages will be mapped 1481 // here 1482 cache->scan_skip = 1; 1483 cache->virtual_end = size; 1484 1485 cache->Lock(); 1486 1487 VMArea* area; 1488 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 1489 addressSpec, B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, 1490 &area, name, flags, true); 1491 1492 if (status < B_OK) { 1493 cache->ReleaseRefAndUnlock(); 1494 return status; 1495 } 1496 1497 cache->Unlock(); 1498 1499 area->cache_type = CACHE_TYPE_NULL; 1500 return area->id; 1501 } 1502 1503 1504 /*! Creates the vnode cache for the specified \a vnode. 1505 The vnode has to be marked busy when calling this function. 1506 */ 1507 status_t 1508 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1509 { 1510 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1511 } 1512 1513 1514 /*! \a cache must be locked. The area's address space must be read-locked. 1515 */ 1516 static void 1517 pre_map_area_pages(VMArea* area, VMCache* cache, 1518 vm_page_reservation* reservation) 1519 { 1520 addr_t baseAddress = area->Base(); 1521 addr_t cacheOffset = area->cache_offset; 1522 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1523 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1524 1525 for (VMCachePagesTree::Iterator it 1526 = cache->pages.GetIterator(firstPage, true, true); 1527 vm_page* page = it.Next();) { 1528 if (page->cache_offset >= endPage) 1529 break; 1530 1531 // skip busy and inactive pages 1532 if (page->busy || page->usage_count == 0) 1533 continue; 1534 1535 DEBUG_PAGE_ACCESS_START(page); 1536 map_page(area, page, 1537 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1538 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1539 DEBUG_PAGE_ACCESS_END(page); 1540 } 1541 } 1542 1543 1544 /*! Will map the file specified by \a fd to an area in memory. 1545 The file will be mirrored beginning at the specified \a offset. The 1546 \a offset and \a size arguments have to be page aligned. 1547 */ 1548 static area_id 1549 _vm_map_file(team_id team, const char* name, void** _address, 1550 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 1551 bool unmapAddressRange, int fd, off_t offset, bool kernel) 1552 { 1553 // TODO: for binary files, we want to make sure that they get the 1554 // copy of a file at a given time, ie. later changes should not 1555 // make it into the mapped copy -- this will need quite some changes 1556 // to be done in a nice way 1557 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 1558 fd, offset, size, mapping)); 1559 1560 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 1561 size = PAGE_ALIGN(size); 1562 1563 if (mapping == REGION_NO_PRIVATE_MAP) 1564 protection |= B_SHARED_AREA; 1565 if (addressSpec != B_EXACT_ADDRESS) 1566 unmapAddressRange = false; 1567 1568 if (fd < 0) { 1569 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 1570 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 1571 B_NO_LOCK, protection, 0, flags, kernel); 1572 } 1573 1574 // get the open flags of the FD 1575 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 1576 if (descriptor == NULL) 1577 return EBADF; 1578 int32 openMode = descriptor->open_mode; 1579 put_fd(descriptor); 1580 1581 // The FD must open for reading at any rate. For shared mapping with write 1582 // access, additionally the FD must be open for writing. 1583 if ((openMode & O_ACCMODE) == O_WRONLY 1584 || (mapping == REGION_NO_PRIVATE_MAP 1585 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1586 && (openMode & O_ACCMODE) == O_RDONLY)) { 1587 return EACCES; 1588 } 1589 1590 // get the vnode for the object, this also grabs a ref to it 1591 struct vnode* vnode = NULL; 1592 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 1593 if (status < B_OK) 1594 return status; 1595 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 1596 1597 // If we're going to pre-map pages, we need to reserve the pages needed by 1598 // the mapping backend upfront. 1599 page_num_t reservedPreMapPages = 0; 1600 vm_page_reservation reservation; 1601 if ((protection & B_READ_AREA) != 0) { 1602 AddressSpaceWriteLocker locker; 1603 status = locker.SetTo(team); 1604 if (status != B_OK) 1605 return status; 1606 1607 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1608 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 1609 1610 locker.Unlock(); 1611 1612 vm_page_reserve_pages(&reservation, reservedPreMapPages, 1613 team == VMAddressSpace::KernelID() 1614 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1615 } 1616 1617 struct PageUnreserver { 1618 PageUnreserver(vm_page_reservation* reservation) 1619 : 1620 fReservation(reservation) 1621 { 1622 } 1623 1624 ~PageUnreserver() 1625 { 1626 if (fReservation != NULL) 1627 vm_page_unreserve_pages(fReservation); 1628 } 1629 1630 vm_page_reservation* fReservation; 1631 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 1632 1633 AddressSpaceWriteLocker locker(team); 1634 if (!locker.IsLocked()) 1635 return B_BAD_TEAM_ID; 1636 1637 // TODO: this only works for file systems that use the file cache 1638 VMCache* cache; 1639 status = vfs_get_vnode_cache(vnode, &cache, false); 1640 if (status < B_OK) 1641 return status; 1642 1643 cache->Lock(); 1644 1645 VMArea* area; 1646 status = map_backing_store(locker.AddressSpace(), cache, _address, 1647 offset, size, addressSpec, 0, protection, mapping, &area, name, 1648 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, kernel); 1649 1650 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 1651 // map_backing_store() cannot know we no longer need the ref 1652 cache->ReleaseRefLocked(); 1653 } 1654 1655 if (status == B_OK && (protection & B_READ_AREA) != 0) 1656 pre_map_area_pages(area, cache, &reservation); 1657 1658 cache->Unlock(); 1659 1660 if (status == B_OK) { 1661 // TODO: this probably deserves a smarter solution, ie. don't always 1662 // prefetch stuff, and also, probably don't trigger it at this place. 1663 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 1664 // prefetches at max 10 MB starting from "offset" 1665 } 1666 1667 if (status != B_OK) 1668 return status; 1669 1670 area->cache_type = CACHE_TYPE_VNODE; 1671 return area->id; 1672 } 1673 1674 1675 area_id 1676 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 1677 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 1678 int fd, off_t offset) 1679 { 1680 if (!arch_vm_supports_protection(protection)) 1681 return B_NOT_SUPPORTED; 1682 1683 return _vm_map_file(aid, name, address, addressSpec, size, protection, 1684 mapping, unmapAddressRange, fd, offset, true); 1685 } 1686 1687 1688 VMCache* 1689 vm_area_get_locked_cache(VMArea* area) 1690 { 1691 rw_lock_read_lock(&sAreaCacheLock); 1692 1693 while (true) { 1694 VMCache* cache = area->cache; 1695 1696 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 1697 // cache has been deleted 1698 rw_lock_read_lock(&sAreaCacheLock); 1699 continue; 1700 } 1701 1702 rw_lock_read_lock(&sAreaCacheLock); 1703 1704 if (cache == area->cache) { 1705 cache->AcquireRefLocked(); 1706 rw_lock_read_unlock(&sAreaCacheLock); 1707 return cache; 1708 } 1709 1710 // the cache changed in the meantime 1711 cache->Unlock(); 1712 } 1713 } 1714 1715 1716 void 1717 vm_area_put_locked_cache(VMCache* cache) 1718 { 1719 cache->ReleaseRefAndUnlock(); 1720 } 1721 1722 1723 area_id 1724 vm_clone_area(team_id team, const char* name, void** address, 1725 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 1726 bool kernel) 1727 { 1728 VMArea* newArea = NULL; 1729 VMArea* sourceArea; 1730 1731 // Check whether the source area exists and is cloneable. If so, mark it 1732 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 1733 { 1734 AddressSpaceWriteLocker locker; 1735 status_t status = locker.SetFromArea(sourceID, sourceArea); 1736 if (status != B_OK) 1737 return status; 1738 1739 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 1740 return B_NOT_ALLOWED; 1741 1742 sourceArea->protection |= B_SHARED_AREA; 1743 protection |= B_SHARED_AREA; 1744 } 1745 1746 // Now lock both address spaces and actually do the cloning. 1747 1748 MultiAddressSpaceLocker locker; 1749 VMAddressSpace* sourceAddressSpace; 1750 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 1751 if (status != B_OK) 1752 return status; 1753 1754 VMAddressSpace* targetAddressSpace; 1755 status = locker.AddTeam(team, true, &targetAddressSpace); 1756 if (status != B_OK) 1757 return status; 1758 1759 status = locker.Lock(); 1760 if (status != B_OK) 1761 return status; 1762 1763 sourceArea = lookup_area(sourceAddressSpace, sourceID); 1764 if (sourceArea == NULL) 1765 return B_BAD_VALUE; 1766 1767 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 1768 return B_NOT_ALLOWED; 1769 1770 VMCache* cache = vm_area_get_locked_cache(sourceArea); 1771 1772 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 1773 // have been adapted. Maybe it should be part of the kernel settings, 1774 // anyway (so that old drivers can always work). 1775 #if 0 1776 if (sourceArea->aspace == VMAddressSpace::Kernel() 1777 && addressSpace != VMAddressSpace::Kernel() 1778 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 1779 // kernel areas must not be cloned in userland, unless explicitly 1780 // declared user-cloneable upon construction 1781 status = B_NOT_ALLOWED; 1782 } else 1783 #endif 1784 if (sourceArea->cache_type == CACHE_TYPE_NULL) 1785 status = B_NOT_ALLOWED; 1786 else { 1787 status = map_backing_store(targetAddressSpace, cache, address, 1788 sourceArea->cache_offset, sourceArea->Size(), addressSpec, 1789 sourceArea->wiring, protection, mapping, &newArea, name, 0, kernel); 1790 } 1791 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 1792 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 1793 // to create a new cache, and has therefore already acquired a reference 1794 // to the source cache - but otherwise it has no idea that we need 1795 // one. 1796 cache->AcquireRefLocked(); 1797 } 1798 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 1799 // we need to map in everything at this point 1800 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 1801 // we don't have actual pages to map but a physical area 1802 VMTranslationMap* map 1803 = sourceArea->address_space->TranslationMap(); 1804 map->Lock(); 1805 1806 addr_t physicalAddress; 1807 uint32 oldProtection; 1808 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 1809 1810 map->Unlock(); 1811 1812 map = targetAddressSpace->TranslationMap(); 1813 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 1814 newArea->Base() + (newArea->Size() - 1)); 1815 1816 vm_page_reservation reservation; 1817 vm_page_reserve_pages(&reservation, reservePages, 1818 targetAddressSpace == VMAddressSpace::Kernel() 1819 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1820 map->Lock(); 1821 1822 for (addr_t offset = 0; offset < newArea->Size(); 1823 offset += B_PAGE_SIZE) { 1824 map->Map(newArea->Base() + offset, physicalAddress + offset, 1825 protection, &reservation); 1826 } 1827 1828 map->Unlock(); 1829 vm_page_unreserve_pages(&reservation); 1830 } else { 1831 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 1832 size_t reservePages = map->MaxPagesNeededToMap( 1833 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 1834 vm_page_reservation reservation; 1835 vm_page_reserve_pages(&reservation, reservePages, 1836 targetAddressSpace == VMAddressSpace::Kernel() 1837 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1838 1839 // map in all pages from source 1840 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 1841 vm_page* page = it.Next();) { 1842 if (!page->busy) { 1843 DEBUG_PAGE_ACCESS_START(page); 1844 map_page(newArea, page, 1845 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 1846 - newArea->cache_offset), 1847 protection, &reservation); 1848 DEBUG_PAGE_ACCESS_END(page); 1849 } 1850 } 1851 // TODO: B_FULL_LOCK means that all pages are locked. We are not 1852 // ensuring that! 1853 1854 vm_page_unreserve_pages(&reservation); 1855 } 1856 } 1857 if (status == B_OK) 1858 newArea->cache_type = sourceArea->cache_type; 1859 1860 vm_area_put_locked_cache(cache); 1861 1862 if (status < B_OK) 1863 return status; 1864 1865 return newArea->id; 1866 } 1867 1868 1869 static void 1870 delete_area(VMAddressSpace* addressSpace, VMArea* area, 1871 bool deletingAddressSpace) 1872 { 1873 VMAreaHash::Remove(area); 1874 1875 // At this point the area is removed from the global hash table, but 1876 // still exists in the area list. 1877 1878 // Unmap the virtual address space the area occupied. 1879 { 1880 // We need to lock the complete cache chain. 1881 VMCache* topCache = vm_area_get_locked_cache(area); 1882 VMCacheChainLocker cacheChainLocker(topCache); 1883 cacheChainLocker.LockAllSourceCaches(); 1884 1885 // If the area's top cache is a temporary cache and the area is the only 1886 // one referencing it (besides us currently holding a second reference), 1887 // the unmapping code doesn't need to care about preserving the accessed 1888 // and dirty flags of the top cache page mappings. 1889 bool ignoreTopCachePageFlags 1890 = topCache->temporary && topCache->RefCount() == 2; 1891 1892 area->address_space->TranslationMap()->UnmapArea(area, 1893 deletingAddressSpace, ignoreTopCachePageFlags); 1894 } 1895 1896 if (!area->cache->temporary) 1897 area->cache->WriteModified(); 1898 1899 arch_vm_unset_memory_type(area); 1900 addressSpace->RemoveArea(area, 0); 1901 addressSpace->Put(); 1902 1903 area->cache->RemoveArea(area); 1904 area->cache->ReleaseRef(); 1905 1906 addressSpace->DeleteArea(area, 0); 1907 } 1908 1909 1910 status_t 1911 vm_delete_area(team_id team, area_id id, bool kernel) 1912 { 1913 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 1914 1915 AddressSpaceWriteLocker locker; 1916 VMArea* area; 1917 status_t status = locker.SetFromArea(team, id, area); 1918 if (status != B_OK) 1919 return status; 1920 1921 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 1922 return B_NOT_ALLOWED; 1923 1924 delete_area(locker.AddressSpace(), area, false); 1925 return B_OK; 1926 } 1927 1928 1929 /*! Creates a new cache on top of given cache, moves all areas from 1930 the old cache to the new one, and changes the protection of all affected 1931 areas' pages to read-only. 1932 Preconditions: 1933 - The given cache must be locked. 1934 - All of the cache's areas' address spaces must be read locked. 1935 */ 1936 static status_t 1937 vm_copy_on_write_area(VMCache* lowerCache) 1938 { 1939 VMCache* upperCache; 1940 1941 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 1942 1943 // We need to separate the cache from its areas. The cache goes one level 1944 // deeper and we create a new cache inbetween. 1945 1946 // create an anonymous cache 1947 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 1948 0, true, VM_PRIORITY_USER); 1949 if (status != B_OK) 1950 return status; 1951 1952 upperCache->Lock(); 1953 1954 upperCache->temporary = 1; 1955 upperCache->scan_skip = lowerCache->scan_skip; 1956 upperCache->virtual_base = lowerCache->virtual_base; 1957 upperCache->virtual_end = lowerCache->virtual_end; 1958 1959 // transfer the lower cache areas to the upper cache 1960 rw_lock_write_lock(&sAreaCacheLock); 1961 upperCache->TransferAreas(lowerCache); 1962 rw_lock_write_unlock(&sAreaCacheLock); 1963 1964 lowerCache->AddConsumer(upperCache); 1965 1966 // We now need to remap all pages from all of the cache's areas read-only, so 1967 // that a copy will be created on next write access 1968 1969 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 1970 tempArea = tempArea->cache_next) { 1971 // The area must be readable in the same way it was previously writable 1972 uint32 protection = B_KERNEL_READ_AREA; 1973 if ((tempArea->protection & B_READ_AREA) != 0) 1974 protection |= B_READ_AREA; 1975 1976 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 1977 map->Lock(); 1978 map->ProtectArea(tempArea, protection); 1979 map->Unlock(); 1980 } 1981 1982 vm_area_put_locked_cache(upperCache); 1983 1984 return B_OK; 1985 } 1986 1987 1988 area_id 1989 vm_copy_area(team_id team, const char* name, void** _address, 1990 uint32 addressSpec, uint32 protection, area_id sourceID) 1991 { 1992 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 1993 1994 if ((protection & B_KERNEL_PROTECTION) == 0) { 1995 // set the same protection for the kernel as for userland 1996 protection |= B_KERNEL_READ_AREA; 1997 if (writableCopy) 1998 protection |= B_KERNEL_WRITE_AREA; 1999 } 2000 2001 // Do the locking: target address space, all address spaces associated with 2002 // the source cache, and the cache itself. 2003 MultiAddressSpaceLocker locker; 2004 VMAddressSpace* targetAddressSpace; 2005 VMCache* cache; 2006 VMArea* source; 2007 status_t status = locker.AddTeam(team, true, &targetAddressSpace); 2008 if (status == B_OK) { 2009 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2010 &cache); 2011 } 2012 if (status != B_OK) 2013 return status; 2014 2015 AreaCacheLocker cacheLocker(cache); // already locked 2016 2017 if (addressSpec == B_CLONE_ADDRESS) { 2018 addressSpec = B_EXACT_ADDRESS; 2019 *_address = (void*)source->Base(); 2020 } 2021 2022 bool sharedArea = (source->protection & B_SHARED_AREA) != 0; 2023 2024 // First, create a cache on top of the source area, respectively use the 2025 // existing one, if this is a shared area. 2026 2027 VMArea* target; 2028 status = map_backing_store(targetAddressSpace, cache, _address, 2029 source->cache_offset, source->Size(), addressSpec, source->wiring, 2030 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2031 &target, name, 0, true); 2032 if (status < B_OK) 2033 return status; 2034 2035 if (sharedArea) { 2036 // The new area uses the old area's cache, but map_backing_store() 2037 // hasn't acquired a ref. So we have to do that now. 2038 cache->AcquireRefLocked(); 2039 } 2040 2041 // If the source area is writable, we need to move it one layer up as well 2042 2043 if (!sharedArea) { 2044 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2045 // TODO: do something more useful if this fails! 2046 if (vm_copy_on_write_area(cache) < B_OK) 2047 panic("vm_copy_on_write_area() failed!\n"); 2048 } 2049 } 2050 2051 // we return the ID of the newly created area 2052 return target->id; 2053 } 2054 2055 2056 static status_t 2057 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2058 bool kernel) 2059 { 2060 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2061 "%#lx)\n", team, areaID, newProtection)); 2062 2063 if (!arch_vm_supports_protection(newProtection)) 2064 return B_NOT_SUPPORTED; 2065 2066 // lock address spaces and cache 2067 MultiAddressSpaceLocker locker; 2068 VMCache* cache; 2069 VMArea* area; 2070 status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area, 2071 &cache); 2072 AreaCacheLocker cacheLocker(cache); // already locked 2073 2074 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2075 return B_NOT_ALLOWED; 2076 2077 if (area->protection == newProtection) 2078 return B_OK; 2079 2080 if (team != VMAddressSpace::KernelID() 2081 && area->address_space->ID() != team) { 2082 // unless you're the kernel, you are only allowed to set 2083 // the protection of your own areas 2084 return B_NOT_ALLOWED; 2085 } 2086 2087 bool changePageProtection = true; 2088 bool changeTopCachePagesOnly = false; 2089 2090 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2091 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 2092 // writable -> !writable 2093 2094 if (cache->source != NULL && cache->temporary) { 2095 if (cache->CountWritableAreas(area) == 0) { 2096 // Since this cache now lives from the pages in its source cache, 2097 // we can change the cache's commitment to take only those pages 2098 // into account that really are in this cache. 2099 2100 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2101 team == VMAddressSpace::KernelID() 2102 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2103 2104 // TODO: we may be able to join with our source cache, if 2105 // count == 0 2106 } 2107 } 2108 2109 // If only the writability changes, we can just remap the pages of the 2110 // top cache, since the pages of lower caches are mapped read-only 2111 // anyway. That's advantageous only, if the number of pages in the cache 2112 // is significantly smaller than the number of pages in the area, 2113 // though. 2114 if (newProtection 2115 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2116 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2117 changeTopCachePagesOnly = true; 2118 } 2119 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 2120 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 2121 // !writable -> writable 2122 2123 if (!list_is_empty(&cache->consumers)) { 2124 // There are consumers -- we have to insert a new cache. Fortunately 2125 // vm_copy_on_write_area() does everything that's needed. 2126 changePageProtection = false; 2127 status = vm_copy_on_write_area(cache); 2128 } else { 2129 // No consumers, so we don't need to insert a new one. 2130 if (cache->source != NULL && cache->temporary) { 2131 // the cache's commitment must contain all possible pages 2132 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2133 team == VMAddressSpace::KernelID() 2134 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2135 } 2136 2137 if (status == B_OK && cache->source != NULL) { 2138 // There's a source cache, hence we can't just change all pages' 2139 // protection or we might allow writing into pages belonging to 2140 // a lower cache. 2141 changeTopCachePagesOnly = true; 2142 } 2143 } 2144 } else { 2145 // we don't have anything special to do in all other cases 2146 } 2147 2148 if (status == B_OK) { 2149 // remap existing pages in this cache 2150 if (changePageProtection) { 2151 VMTranslationMap* map = area->address_space->TranslationMap(); 2152 map->Lock(); 2153 2154 if (changeTopCachePagesOnly) { 2155 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2156 page_num_t lastPageOffset 2157 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2158 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2159 vm_page* page = it.Next();) { 2160 if (page->cache_offset >= firstPageOffset 2161 && page->cache_offset <= lastPageOffset) { 2162 addr_t address = virtual_page_address(area, page); 2163 map->ProtectPage(area, address, newProtection); 2164 } 2165 } 2166 } else 2167 map->ProtectArea(area, newProtection); 2168 2169 map->Unlock(); 2170 } 2171 2172 area->protection = newProtection; 2173 } 2174 2175 return status; 2176 } 2177 2178 2179 status_t 2180 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr) 2181 { 2182 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2183 if (addressSpace == NULL) 2184 return B_BAD_TEAM_ID; 2185 2186 VMTranslationMap* map = addressSpace->TranslationMap(); 2187 2188 map->Lock(); 2189 uint32 dummyFlags; 2190 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2191 map->Unlock(); 2192 2193 addressSpace->Put(); 2194 return status; 2195 } 2196 2197 2198 /*! The page's cache must be locked. 2199 */ 2200 bool 2201 vm_test_map_modification(vm_page* page) 2202 { 2203 if (page->modified) 2204 return true; 2205 2206 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2207 vm_page_mapping* mapping; 2208 while ((mapping = iterator.Next()) != NULL) { 2209 VMArea* area = mapping->area; 2210 VMTranslationMap* map = area->address_space->TranslationMap(); 2211 2212 addr_t physicalAddress; 2213 uint32 flags; 2214 map->Lock(); 2215 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2216 map->Unlock(); 2217 2218 if ((flags & PAGE_MODIFIED) != 0) 2219 return true; 2220 } 2221 2222 return false; 2223 } 2224 2225 2226 /*! The page's cache must be locked. 2227 */ 2228 void 2229 vm_clear_map_flags(vm_page* page, uint32 flags) 2230 { 2231 if ((flags & PAGE_ACCESSED) != 0) 2232 page->accessed = false; 2233 if ((flags & PAGE_MODIFIED) != 0) 2234 page->modified = false; 2235 2236 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2237 vm_page_mapping* mapping; 2238 while ((mapping = iterator.Next()) != NULL) { 2239 VMArea* area = mapping->area; 2240 VMTranslationMap* map = area->address_space->TranslationMap(); 2241 2242 map->Lock(); 2243 map->ClearFlags(virtual_page_address(area, page), flags); 2244 map->Unlock(); 2245 } 2246 } 2247 2248 2249 /*! Removes all mappings from a page. 2250 After you've called this function, the page is unmapped from memory and 2251 the page's \c accessed and \c modified flags have been updated according 2252 to the state of the mappings. 2253 The page's cache must be locked. 2254 */ 2255 void 2256 vm_remove_all_page_mappings(vm_page* page) 2257 { 2258 while (vm_page_mapping* mapping = page->mappings.Head()) { 2259 VMArea* area = mapping->area; 2260 VMTranslationMap* map = area->address_space->TranslationMap(); 2261 addr_t address = virtual_page_address(area, page); 2262 map->UnmapPage(area, address, false); 2263 } 2264 } 2265 2266 2267 int32 2268 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2269 { 2270 int32 count = 0; 2271 2272 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2273 vm_page_mapping* mapping; 2274 while ((mapping = iterator.Next()) != NULL) { 2275 VMArea* area = mapping->area; 2276 VMTranslationMap* map = area->address_space->TranslationMap(); 2277 2278 bool modified; 2279 if (map->ClearAccessedAndModified(area, 2280 virtual_page_address(area, page), false, modified)) { 2281 count++; 2282 } 2283 2284 page->modified |= modified; 2285 } 2286 2287 2288 if (page->accessed) { 2289 count++; 2290 page->accessed = false; 2291 } 2292 2293 return count; 2294 } 2295 2296 2297 /*! Removes all mappings of a page and/or clears the accessed bits of the 2298 mappings. 2299 The function iterates through the page mappings and removes them until 2300 encountering one that has been accessed. From then on it will continue to 2301 iterate, but only clear the accessed flag of the mapping. The page's 2302 \c modified bit will be updated accordingly, the \c accessed bit will be 2303 cleared. 2304 \return The number of mapping accessed bits encountered, including the 2305 \c accessed bit of the page itself. If \c 0 is returned, all mappings 2306 of the page have been removed. 2307 */ 2308 int32 2309 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 2310 { 2311 if (page->accessed) 2312 return vm_clear_page_mapping_accessed_flags(page); 2313 2314 while (vm_page_mapping* mapping = page->mappings.Head()) { 2315 VMArea* area = mapping->area; 2316 VMTranslationMap* map = area->address_space->TranslationMap(); 2317 addr_t address = virtual_page_address(area, page); 2318 bool modified = false; 2319 if (map->ClearAccessedAndModified(area, address, true, modified)) { 2320 page->accessed = true; 2321 page->modified |= modified; 2322 return vm_clear_page_mapping_accessed_flags(page); 2323 } 2324 page->modified |= modified; 2325 } 2326 2327 return 0; 2328 } 2329 2330 2331 static int 2332 display_mem(int argc, char** argv) 2333 { 2334 bool physical = false; 2335 addr_t copyAddress; 2336 int32 displayWidth; 2337 int32 itemSize; 2338 int32 num = -1; 2339 addr_t address; 2340 int i = 1, j; 2341 2342 if (argc > 1 && argv[1][0] == '-') { 2343 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2344 physical = true; 2345 i++; 2346 } else 2347 i = 99; 2348 } 2349 2350 if (argc < i + 1 || argc > i + 2) { 2351 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 2352 "\tdl - 8 bytes\n" 2353 "\tdw - 4 bytes\n" 2354 "\tds - 2 bytes\n" 2355 "\tdb - 1 byte\n" 2356 "\tstring - a whole string\n" 2357 " -p or --physical only allows memory from a single page to be " 2358 "displayed.\n"); 2359 return 0; 2360 } 2361 2362 address = parse_expression(argv[i]); 2363 2364 if (argc > i + 1) 2365 num = parse_expression(argv[i + 1]); 2366 2367 // build the format string 2368 if (strcmp(argv[0], "db") == 0) { 2369 itemSize = 1; 2370 displayWidth = 16; 2371 } else if (strcmp(argv[0], "ds") == 0) { 2372 itemSize = 2; 2373 displayWidth = 8; 2374 } else if (strcmp(argv[0], "dw") == 0) { 2375 itemSize = 4; 2376 displayWidth = 4; 2377 } else if (strcmp(argv[0], "dl") == 0) { 2378 itemSize = 8; 2379 displayWidth = 2; 2380 } else if (strcmp(argv[0], "string") == 0) { 2381 itemSize = 1; 2382 displayWidth = -1; 2383 } else { 2384 kprintf("display_mem called in an invalid way!\n"); 2385 return 0; 2386 } 2387 2388 if (num <= 0) 2389 num = displayWidth; 2390 2391 void* physicalPageHandle = NULL; 2392 2393 if (physical) { 2394 int32 offset = address & (B_PAGE_SIZE - 1); 2395 if (num * itemSize + offset > B_PAGE_SIZE) { 2396 num = (B_PAGE_SIZE - offset) / itemSize; 2397 kprintf("NOTE: number of bytes has been cut to page size\n"); 2398 } 2399 2400 address = ROUNDDOWN(address, B_PAGE_SIZE); 2401 2402 if (vm_get_physical_page_debug(address, ©Address, 2403 &physicalPageHandle) != B_OK) { 2404 kprintf("getting the hardware page failed."); 2405 return 0; 2406 } 2407 2408 address += offset; 2409 copyAddress += offset; 2410 } else 2411 copyAddress = address; 2412 2413 if (!strcmp(argv[0], "string")) { 2414 kprintf("%p \"", (char*)copyAddress); 2415 2416 // string mode 2417 for (i = 0; true; i++) { 2418 char c; 2419 if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 2420 || c == '\0') 2421 break; 2422 2423 if (c == '\n') 2424 kprintf("\\n"); 2425 else if (c == '\t') 2426 kprintf("\\t"); 2427 else { 2428 if (!isprint(c)) 2429 c = '.'; 2430 2431 kprintf("%c", c); 2432 } 2433 } 2434 2435 kprintf("\"\n"); 2436 } else { 2437 // number mode 2438 for (i = 0; i < num; i++) { 2439 uint32 value; 2440 2441 if ((i % displayWidth) == 0) { 2442 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2443 if (i != 0) 2444 kprintf("\n"); 2445 2446 kprintf("[0x%lx] ", address + i * itemSize); 2447 2448 for (j = 0; j < displayed; j++) { 2449 char c; 2450 if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j, 2451 1) != B_OK) { 2452 displayed = j; 2453 break; 2454 } 2455 if (!isprint(c)) 2456 c = '.'; 2457 2458 kprintf("%c", c); 2459 } 2460 if (num > displayWidth) { 2461 // make sure the spacing in the last line is correct 2462 for (j = displayed; j < displayWidth * itemSize; j++) 2463 kprintf(" "); 2464 } 2465 kprintf(" "); 2466 } 2467 2468 if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize, 2469 itemSize) != B_OK) { 2470 kprintf("read fault"); 2471 break; 2472 } 2473 2474 switch (itemSize) { 2475 case 1: 2476 kprintf(" %02x", *(uint8*)&value); 2477 break; 2478 case 2: 2479 kprintf(" %04x", *(uint16*)&value); 2480 break; 2481 case 4: 2482 kprintf(" %08lx", *(uint32*)&value); 2483 break; 2484 case 8: 2485 kprintf(" %016Lx", *(uint64*)&value); 2486 break; 2487 } 2488 } 2489 2490 kprintf("\n"); 2491 } 2492 2493 if (physical) { 2494 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 2495 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 2496 } 2497 return 0; 2498 } 2499 2500 2501 static void 2502 dump_cache_tree_recursively(VMCache* cache, int level, 2503 VMCache* highlightCache) 2504 { 2505 // print this cache 2506 for (int i = 0; i < level; i++) 2507 kprintf(" "); 2508 if (cache == highlightCache) 2509 kprintf("%p <--\n", cache); 2510 else 2511 kprintf("%p\n", cache); 2512 2513 // recursively print its consumers 2514 VMCache* consumer = NULL; 2515 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2516 consumer)) != NULL) { 2517 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 2518 } 2519 } 2520 2521 2522 static int 2523 dump_cache_tree(int argc, char** argv) 2524 { 2525 if (argc != 2 || !strcmp(argv[1], "--help")) { 2526 kprintf("usage: %s <address>\n", argv[0]); 2527 return 0; 2528 } 2529 2530 addr_t address = parse_expression(argv[1]); 2531 if (address == 0) 2532 return 0; 2533 2534 VMCache* cache = (VMCache*)address; 2535 VMCache* root = cache; 2536 2537 // find the root cache (the transitive source) 2538 while (root->source != NULL) 2539 root = root->source; 2540 2541 dump_cache_tree_recursively(root, 0, cache); 2542 2543 return 0; 2544 } 2545 2546 2547 static const char* 2548 cache_type_to_string(int32 type) 2549 { 2550 switch (type) { 2551 case CACHE_TYPE_RAM: 2552 return "RAM"; 2553 case CACHE_TYPE_DEVICE: 2554 return "device"; 2555 case CACHE_TYPE_VNODE: 2556 return "vnode"; 2557 case CACHE_TYPE_NULL: 2558 return "null"; 2559 2560 default: 2561 return "unknown"; 2562 } 2563 } 2564 2565 2566 #if DEBUG_CACHE_LIST 2567 2568 static void 2569 update_cache_info_recursively(VMCache* cache, cache_info& info) 2570 { 2571 info.page_count += cache->page_count; 2572 if (cache->type == CACHE_TYPE_RAM) 2573 info.committed += cache->committed_size; 2574 2575 // recurse 2576 VMCache* consumer = NULL; 2577 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2578 consumer)) != NULL) { 2579 update_cache_info_recursively(consumer, info); 2580 } 2581 } 2582 2583 2584 static int 2585 cache_info_compare_page_count(const void* _a, const void* _b) 2586 { 2587 const cache_info* a = (const cache_info*)_a; 2588 const cache_info* b = (const cache_info*)_b; 2589 if (a->page_count == b->page_count) 2590 return 0; 2591 return a->page_count < b->page_count ? 1 : -1; 2592 } 2593 2594 2595 static int 2596 cache_info_compare_committed(const void* _a, const void* _b) 2597 { 2598 const cache_info* a = (const cache_info*)_a; 2599 const cache_info* b = (const cache_info*)_b; 2600 if (a->committed == b->committed) 2601 return 0; 2602 return a->committed < b->committed ? 1 : -1; 2603 } 2604 2605 2606 static void 2607 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 2608 { 2609 for (int i = 0; i < level; i++) 2610 kprintf(" "); 2611 2612 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 2613 cache_type_to_string(cache->type), cache->virtual_base, 2614 cache->virtual_end, cache->page_count); 2615 2616 if (level == 0) 2617 kprintf("/%lu", info.page_count); 2618 2619 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 2620 kprintf(", committed: %lld", cache->committed_size); 2621 2622 if (level == 0) 2623 kprintf("/%lu", info.committed); 2624 } 2625 2626 // areas 2627 if (cache->areas != NULL) { 2628 VMArea* area = cache->areas; 2629 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 2630 area->address_space->ID()); 2631 2632 while (area->cache_next != NULL) { 2633 area = area->cache_next; 2634 kprintf(", %ld", area->id); 2635 } 2636 } 2637 2638 kputs("\n"); 2639 2640 // recurse 2641 VMCache* consumer = NULL; 2642 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2643 consumer)) != NULL) { 2644 dump_caches_recursively(consumer, info, level + 1); 2645 } 2646 } 2647 2648 2649 static int 2650 dump_caches(int argc, char** argv) 2651 { 2652 if (sCacheInfoTable == NULL) { 2653 kprintf("No cache info table!\n"); 2654 return 0; 2655 } 2656 2657 bool sortByPageCount = true; 2658 2659 for (int32 i = 1; i < argc; i++) { 2660 if (strcmp(argv[i], "-c") == 0) { 2661 sortByPageCount = false; 2662 } else { 2663 print_debugger_command_usage(argv[0]); 2664 return 0; 2665 } 2666 } 2667 2668 uint32 totalCount = 0; 2669 uint32 rootCount = 0; 2670 off_t totalCommitted = 0; 2671 page_num_t totalPages = 0; 2672 2673 VMCache* cache = gDebugCacheList; 2674 while (cache) { 2675 totalCount++; 2676 if (cache->source == NULL) { 2677 cache_info stackInfo; 2678 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 2679 ? sCacheInfoTable[rootCount] : stackInfo; 2680 rootCount++; 2681 info.cache = cache; 2682 info.page_count = 0; 2683 info.committed = 0; 2684 update_cache_info_recursively(cache, info); 2685 totalCommitted += info.committed; 2686 totalPages += info.page_count; 2687 } 2688 2689 cache = cache->debug_next; 2690 } 2691 2692 if (rootCount <= (uint32)kCacheInfoTableCount) { 2693 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 2694 sortByPageCount 2695 ? &cache_info_compare_page_count 2696 : &cache_info_compare_committed); 2697 } 2698 2699 kprintf("total committed memory: %lld, total used pages: %lu\n", 2700 totalCommitted, totalPages); 2701 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 2702 "tree...\n\n", totalCount, rootCount, 2703 sortByPageCount ? "page count" : "committed size"); 2704 2705 if (rootCount <= (uint32)kCacheInfoTableCount) { 2706 for (uint32 i = 0; i < rootCount; i++) { 2707 cache_info& info = sCacheInfoTable[i]; 2708 dump_caches_recursively(info.cache, info, 0); 2709 } 2710 } else 2711 kprintf("Cache info table too small! Can't sort and print caches!\n"); 2712 2713 return 0; 2714 } 2715 2716 #endif // DEBUG_CACHE_LIST 2717 2718 2719 static int 2720 dump_cache(int argc, char** argv) 2721 { 2722 VMCache* cache; 2723 bool showPages = false; 2724 int i = 1; 2725 2726 if (argc < 2 || !strcmp(argv[1], "--help")) { 2727 kprintf("usage: %s [-ps] <address>\n" 2728 " if -p is specified, all pages are shown, if -s is used\n" 2729 " only the cache info is shown respectively.\n", argv[0]); 2730 return 0; 2731 } 2732 while (argv[i][0] == '-') { 2733 char* arg = argv[i] + 1; 2734 while (arg[0]) { 2735 if (arg[0] == 'p') 2736 showPages = true; 2737 arg++; 2738 } 2739 i++; 2740 } 2741 if (argv[i] == NULL) { 2742 kprintf("%s: invalid argument, pass address\n", argv[0]); 2743 return 0; 2744 } 2745 2746 addr_t address = parse_expression(argv[i]); 2747 if (address == 0) 2748 return 0; 2749 2750 cache = (VMCache*)address; 2751 2752 kprintf("CACHE %p:\n", cache); 2753 kprintf(" ref_count: %ld\n", cache->RefCount()); 2754 kprintf(" source: %p\n", cache->source); 2755 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 2756 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 2757 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 2758 kprintf(" temporary: %ld\n", cache->temporary); 2759 kprintf(" scan_skip: %ld\n", cache->scan_skip); 2760 kprintf(" lock: %p\n", cache->GetLock()); 2761 #if KDEBUG 2762 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 2763 #endif 2764 kprintf(" areas:\n"); 2765 2766 for (VMArea* area = cache->areas; area != NULL; area = area->cache_next) { 2767 kprintf(" area 0x%lx, %s\n", area->id, area->name); 2768 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->Base(), 2769 area->Size()); 2770 kprintf("\tprotection: 0x%lx\n", area->protection); 2771 kprintf("\towner: 0x%lx\n", area->address_space->ID()); 2772 } 2773 2774 kprintf(" consumers:\n"); 2775 VMCache* consumer = NULL; 2776 while ((consumer = (VMCache*)list_get_next_item(&cache->consumers, 2777 consumer)) != NULL) { 2778 kprintf("\t%p\n", consumer); 2779 } 2780 2781 kprintf(" pages:\n"); 2782 if (showPages) { 2783 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2784 vm_page* page = it.Next();) { 2785 if (!vm_page_is_dummy(page)) { 2786 kprintf("\t%p ppn 0x%lx offset 0x%lx state %u (%s) " 2787 "wired_count %u\n", page, page->physical_page_number, 2788 page->cache_offset, page->State(), 2789 page_state_to_string(page->State()), page->wired_count); 2790 } else { 2791 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 2792 page, page->State(), page_state_to_string(page->State())); 2793 } 2794 } 2795 } else 2796 kprintf("\t%ld in cache\n", cache->page_count); 2797 2798 set_debug_variable("_sourceCache", (addr_t)cache->source); 2799 2800 return 0; 2801 } 2802 2803 2804 static void 2805 dump_area_struct(VMArea* area, bool mappings) 2806 { 2807 kprintf("AREA: %p\n", area); 2808 kprintf("name:\t\t'%s'\n", area->name); 2809 kprintf("owner:\t\t0x%lx\n", area->address_space->ID()); 2810 kprintf("id:\t\t0x%lx\n", area->id); 2811 kprintf("base:\t\t0x%lx\n", area->Base()); 2812 kprintf("size:\t\t0x%lx\n", area->Size()); 2813 kprintf("protection:\t0x%lx\n", area->protection); 2814 kprintf("wiring:\t\t0x%x\n", area->wiring); 2815 kprintf("memory_type:\t0x%x\n", area->memory_type); 2816 kprintf("cache:\t\t%p\n", area->cache); 2817 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 2818 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 2819 kprintf("cache_next:\t%p\n", area->cache_next); 2820 kprintf("cache_prev:\t%p\n", area->cache_prev); 2821 2822 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 2823 if (mappings) { 2824 kprintf("page mappings:\n"); 2825 while (iterator.HasNext()) { 2826 vm_page_mapping* mapping = iterator.Next(); 2827 kprintf(" %p", mapping->page); 2828 } 2829 kprintf("\n"); 2830 } else { 2831 uint32 count = 0; 2832 while (iterator.Next() != NULL) { 2833 count++; 2834 } 2835 kprintf("page mappings:\t%lu\n", count); 2836 } 2837 } 2838 2839 2840 static int 2841 dump_area(int argc, char** argv) 2842 { 2843 bool mappings = false; 2844 bool found = false; 2845 int32 index = 1; 2846 VMArea* area; 2847 addr_t num; 2848 2849 if (argc < 2 || !strcmp(argv[1], "--help")) { 2850 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 2851 "All areas matching either id/address/name are listed. You can\n" 2852 "force to check only a specific item by prefixing the specifier\n" 2853 "with the id/contains/address/name keywords.\n" 2854 "-m shows the area's mappings as well.\n"); 2855 return 0; 2856 } 2857 2858 if (!strcmp(argv[1], "-m")) { 2859 mappings = true; 2860 index++; 2861 } 2862 2863 int32 mode = 0xf; 2864 if (!strcmp(argv[index], "id")) 2865 mode = 1; 2866 else if (!strcmp(argv[index], "contains")) 2867 mode = 2; 2868 else if (!strcmp(argv[index], "name")) 2869 mode = 4; 2870 else if (!strcmp(argv[index], "address")) 2871 mode = 0; 2872 if (mode != 0xf) 2873 index++; 2874 2875 if (index >= argc) { 2876 kprintf("No area specifier given.\n"); 2877 return 0; 2878 } 2879 2880 num = parse_expression(argv[index]); 2881 2882 if (mode == 0) { 2883 dump_area_struct((struct VMArea*)num, mappings); 2884 } else { 2885 // walk through the area list, looking for the arguments as a name 2886 2887 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 2888 while ((area = it.Next()) != NULL) { 2889 if (((mode & 4) != 0 && area->name != NULL 2890 && !strcmp(argv[index], area->name)) 2891 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 2892 || (((mode & 2) != 0 && area->Base() <= num 2893 && area->Base() + area->Size() > num))))) { 2894 dump_area_struct(area, mappings); 2895 found = true; 2896 } 2897 } 2898 2899 if (!found) 2900 kprintf("could not find area %s (%ld)\n", argv[index], num); 2901 } 2902 2903 return 0; 2904 } 2905 2906 2907 static int 2908 dump_area_list(int argc, char** argv) 2909 { 2910 VMArea* area; 2911 const char* name = NULL; 2912 int32 id = 0; 2913 2914 if (argc > 1) { 2915 id = parse_expression(argv[1]); 2916 if (id == 0) 2917 name = argv[1]; 2918 } 2919 2920 kprintf("addr id base\t\tsize protect lock name\n"); 2921 2922 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 2923 while ((area = it.Next()) != NULL) { 2924 if ((id != 0 && area->address_space->ID() != id) 2925 || (name != NULL && strstr(area->name, name) == NULL)) 2926 continue; 2927 2928 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 2929 (void*)area->Base(), (void*)area->Size(), area->protection, 2930 area->wiring, area->name); 2931 } 2932 return 0; 2933 } 2934 2935 2936 static int 2937 dump_available_memory(int argc, char** argv) 2938 { 2939 kprintf("Available memory: %Ld/%lu bytes\n", 2940 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 2941 return 0; 2942 } 2943 2944 2945 status_t 2946 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 2947 { 2948 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 2949 addressSpace->ID())); 2950 2951 addressSpace->WriteLock(); 2952 2953 // remove all reserved areas in this address space 2954 addressSpace->UnreserveAllAddressRanges(0); 2955 2956 // delete all the areas in this address space 2957 while (VMArea* area = addressSpace->FirstArea()) 2958 delete_area(addressSpace, area, deletingAddressSpace); 2959 2960 addressSpace->WriteUnlock(); 2961 return B_OK; 2962 } 2963 2964 2965 static area_id 2966 vm_area_for(addr_t address, bool kernel) 2967 { 2968 team_id team; 2969 if (IS_USER_ADDRESS(address)) { 2970 // we try the user team address space, if any 2971 team = VMAddressSpace::CurrentID(); 2972 if (team < 0) 2973 return team; 2974 } else 2975 team = VMAddressSpace::KernelID(); 2976 2977 AddressSpaceReadLocker locker(team); 2978 if (!locker.IsLocked()) 2979 return B_BAD_TEAM_ID; 2980 2981 VMArea* area = locker.AddressSpace()->LookupArea(address); 2982 if (area != NULL) { 2983 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 2984 return B_ERROR; 2985 2986 return area->id; 2987 } 2988 2989 return B_ERROR; 2990 } 2991 2992 2993 /*! Frees physical pages that were used during the boot process. 2994 \a end is inclusive. 2995 */ 2996 static void 2997 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 2998 { 2999 // free all physical pages in the specified range 3000 3001 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3002 addr_t physicalAddress; 3003 uint32 flags; 3004 3005 if (map->Query(current, &physicalAddress, &flags) == B_OK 3006 && (flags & PAGE_PRESENT) != 0) { 3007 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3008 if (page != NULL && page->State() != PAGE_STATE_FREE 3009 && page->State() != PAGE_STATE_CLEAR 3010 && page->State() != PAGE_STATE_UNUSED) { 3011 DEBUG_PAGE_ACCESS_START(page); 3012 vm_page_set_state(page, PAGE_STATE_FREE); 3013 } 3014 } 3015 } 3016 3017 // unmap the memory 3018 map->Unmap(start, end); 3019 } 3020 3021 3022 void 3023 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3024 { 3025 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3026 addr_t end = start + (size - 1); 3027 addr_t lastEnd = start; 3028 3029 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3030 (void*)start, (void*)end)); 3031 3032 // The areas are sorted in virtual address space order, so 3033 // we just have to find the holes between them that fall 3034 // into the area we should dispose 3035 3036 map->Lock(); 3037 3038 for (VMAddressSpace::AreaIterator it 3039 = VMAddressSpace::Kernel()->GetAreaIterator(); 3040 VMArea* area = it.Next();) { 3041 addr_t areaStart = area->Base(); 3042 addr_t areaEnd = areaStart + (area->Size() - 1); 3043 3044 if (areaEnd < start) 3045 continue; 3046 3047 if (areaStart > end) { 3048 // we are done, the area is already beyond of what we have to free 3049 end = areaStart - 1; 3050 break; 3051 } 3052 3053 if (areaStart > lastEnd) { 3054 // this is something we can free 3055 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3056 (void*)areaStart)); 3057 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3058 } 3059 3060 if (areaEnd >= end) { 3061 lastEnd = areaEnd; 3062 // no +1 to prevent potential overflow 3063 break; 3064 } 3065 3066 lastEnd = areaEnd + 1; 3067 } 3068 3069 if (lastEnd < end) { 3070 // we can also get rid of some space at the end of the area 3071 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3072 (void*)end)); 3073 unmap_and_free_physical_pages(map, lastEnd, end); 3074 } 3075 3076 map->Unlock(); 3077 } 3078 3079 3080 static void 3081 create_preloaded_image_areas(struct preloaded_image* image) 3082 { 3083 char name[B_OS_NAME_LENGTH]; 3084 void* address; 3085 int32 length; 3086 3087 // use file name to create a good area name 3088 char* fileName = strrchr(image->name, '/'); 3089 if (fileName == NULL) 3090 fileName = image->name; 3091 else 3092 fileName++; 3093 3094 length = strlen(fileName); 3095 // make sure there is enough space for the suffix 3096 if (length > 25) 3097 length = 25; 3098 3099 memcpy(name, fileName, length); 3100 strcpy(name + length, "_text"); 3101 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3102 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3103 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3104 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3105 // this will later be remapped read-only/executable by the 3106 // ELF initialization code 3107 3108 strcpy(name + length, "_data"); 3109 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3110 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3111 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3112 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3113 } 3114 3115 3116 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3117 Any boot loader resources contained in that arguments must not be accessed 3118 anymore past this point. 3119 */ 3120 void 3121 vm_free_kernel_args(kernel_args* args) 3122 { 3123 uint32 i; 3124 3125 TRACE(("vm_free_kernel_args()\n")); 3126 3127 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3128 area_id area = area_for((void*)args->kernel_args_range[i].start); 3129 if (area >= B_OK) 3130 delete_area(area); 3131 } 3132 } 3133 3134 3135 static void 3136 allocate_kernel_args(kernel_args* args) 3137 { 3138 TRACE(("allocate_kernel_args()\n")); 3139 3140 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3141 void* address = (void*)args->kernel_args_range[i].start; 3142 3143 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3144 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3145 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3146 } 3147 } 3148 3149 3150 static void 3151 unreserve_boot_loader_ranges(kernel_args* args) 3152 { 3153 TRACE(("unreserve_boot_loader_ranges()\n")); 3154 3155 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3156 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3157 (void*)args->virtual_allocated_range[i].start, 3158 args->virtual_allocated_range[i].size); 3159 } 3160 } 3161 3162 3163 static void 3164 reserve_boot_loader_ranges(kernel_args* args) 3165 { 3166 TRACE(("reserve_boot_loader_ranges()\n")); 3167 3168 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3169 void* address = (void*)args->virtual_allocated_range[i].start; 3170 3171 // If the address is no kernel address, we just skip it. The 3172 // architecture specific code has to deal with it. 3173 if (!IS_KERNEL_ADDRESS(address)) { 3174 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 3175 address, args->virtual_allocated_range[i].size); 3176 continue; 3177 } 3178 3179 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3180 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3181 if (status < B_OK) 3182 panic("could not reserve boot loader ranges\n"); 3183 } 3184 } 3185 3186 3187 static addr_t 3188 allocate_early_virtual(kernel_args* args, size_t size, bool blockAlign) 3189 { 3190 size = PAGE_ALIGN(size); 3191 3192 // find a slot in the virtual allocation addr range 3193 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3194 // check to see if the space between this one and the last is big enough 3195 addr_t rangeStart = args->virtual_allocated_range[i].start; 3196 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 3197 + args->virtual_allocated_range[i - 1].size; 3198 3199 addr_t base = blockAlign 3200 ? ROUNDUP(previousRangeEnd, size) : previousRangeEnd; 3201 3202 if (base >= KERNEL_BASE && base < rangeStart 3203 && rangeStart - base >= size) { 3204 args->virtual_allocated_range[i - 1].size 3205 += base + size - previousRangeEnd; 3206 return base; 3207 } 3208 } 3209 3210 // we hadn't found one between allocation ranges. this is ok. 3211 // see if there's a gap after the last one 3212 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 3213 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 3214 + args->virtual_allocated_range[lastEntryIndex].size; 3215 addr_t base = blockAlign ? ROUNDUP(lastRangeEnd, size) : lastRangeEnd; 3216 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 3217 args->virtual_allocated_range[lastEntryIndex].size 3218 += base + size - lastRangeEnd; 3219 return base; 3220 } 3221 3222 // see if there's a gap before the first one 3223 addr_t rangeStart = args->virtual_allocated_range[0].start; 3224 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 3225 base = rangeStart - size; 3226 if (blockAlign) 3227 base = ROUNDDOWN(base, size); 3228 3229 if (base >= KERNEL_BASE) { 3230 args->virtual_allocated_range[0].start = base; 3231 args->virtual_allocated_range[0].size += rangeStart - base; 3232 return base; 3233 } 3234 } 3235 3236 return 0; 3237 } 3238 3239 3240 static bool 3241 is_page_in_physical_memory_range(kernel_args* args, addr_t address) 3242 { 3243 // TODO: horrible brute-force method of determining if the page can be 3244 // allocated 3245 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3246 if (address >= args->physical_memory_range[i].start 3247 && address < args->physical_memory_range[i].start 3248 + args->physical_memory_range[i].size) 3249 return true; 3250 } 3251 return false; 3252 } 3253 3254 3255 static addr_t 3256 allocate_early_physical_page(kernel_args* args) 3257 { 3258 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3259 addr_t nextPage; 3260 3261 nextPage = args->physical_allocated_range[i].start 3262 + args->physical_allocated_range[i].size; 3263 // see if the page after the next allocated paddr run can be allocated 3264 if (i + 1 < args->num_physical_allocated_ranges 3265 && args->physical_allocated_range[i + 1].size != 0) { 3266 // see if the next page will collide with the next allocated range 3267 if (nextPage >= args->physical_allocated_range[i+1].start) 3268 continue; 3269 } 3270 // see if the next physical page fits in the memory block 3271 if (is_page_in_physical_memory_range(args, nextPage)) { 3272 // we got one! 3273 args->physical_allocated_range[i].size += B_PAGE_SIZE; 3274 return nextPage / B_PAGE_SIZE; 3275 } 3276 } 3277 3278 return 0; 3279 // could not allocate a block 3280 } 3281 3282 3283 /*! This one uses the kernel_args' physical and virtual memory ranges to 3284 allocate some pages before the VM is completely up. 3285 */ 3286 addr_t 3287 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3288 uint32 attributes, bool blockAlign) 3289 { 3290 if (physicalSize > virtualSize) 3291 physicalSize = virtualSize; 3292 3293 // find the vaddr to allocate at 3294 addr_t virtualBase = allocate_early_virtual(args, virtualSize, blockAlign); 3295 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 3296 3297 // map the pages 3298 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 3299 addr_t physicalAddress = allocate_early_physical_page(args); 3300 if (physicalAddress == 0) 3301 panic("error allocating early page!\n"); 3302 3303 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3304 3305 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 3306 physicalAddress * B_PAGE_SIZE, attributes, 3307 &allocate_early_physical_page); 3308 } 3309 3310 return virtualBase; 3311 } 3312 3313 3314 /*! The main entrance point to initialize the VM. */ 3315 status_t 3316 vm_init(kernel_args* args) 3317 { 3318 struct preloaded_image* image; 3319 void* address; 3320 status_t err = 0; 3321 uint32 i; 3322 3323 TRACE(("vm_init: entry\n")); 3324 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3325 err = arch_vm_init(args); 3326 3327 // initialize some globals 3328 vm_page_init_num_pages(args); 3329 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3330 3331 size_t heapSize = INITIAL_HEAP_SIZE; 3332 // try to accomodate low memory systems 3333 while (heapSize > sAvailableMemory / 8) 3334 heapSize /= 2; 3335 if (heapSize < 1024 * 1024) 3336 panic("vm_init: go buy some RAM please."); 3337 3338 slab_init(args); 3339 3340 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC 3341 // map in the new heap and initialize it 3342 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3343 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false); 3344 TRACE(("heap at 0x%lx\n", heapBase)); 3345 heap_init(heapBase, heapSize); 3346 #endif 3347 3348 // initialize the free page list and physical page mapper 3349 vm_page_init(args); 3350 3351 // initialize the hash table that stores the pages mapped to caches 3352 vm_cache_init(args); 3353 3354 { 3355 status_t error = VMAreaHash::Init(); 3356 if (error != B_OK) 3357 panic("vm_init: error initializing area hash table\n"); 3358 } 3359 3360 VMAddressSpace::Init(); 3361 reserve_boot_loader_ranges(args); 3362 3363 // Do any further initialization that the architecture dependant layers may 3364 // need now 3365 arch_vm_translation_map_init_post_area(args); 3366 arch_vm_init_post_area(args); 3367 vm_page_init_post_area(args); 3368 slab_init_post_area(); 3369 3370 // allocate areas to represent stuff that already exists 3371 3372 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC 3373 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3374 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3375 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3376 #endif 3377 3378 allocate_kernel_args(args); 3379 3380 create_preloaded_image_areas(&args->kernel_image); 3381 3382 // allocate areas for preloaded images 3383 for (image = args->preloaded_images; image != NULL; image = image->next) 3384 create_preloaded_image_areas(image); 3385 3386 // allocate kernel stacks 3387 for (i = 0; i < args->num_cpus; i++) { 3388 char name[64]; 3389 3390 sprintf(name, "idle thread %lu kstack", i + 1); 3391 address = (void*)args->cpu_kstack[i].start; 3392 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3393 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3394 } 3395 3396 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3397 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3398 3399 // create the object cache for the page mappings 3400 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 3401 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 3402 NULL, NULL); 3403 if (gPageMappingsObjectCache == NULL) 3404 panic("failed to create page mappings object cache"); 3405 3406 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 3407 3408 #if DEBUG_CACHE_LIST 3409 create_area("cache info table", (void**)&sCacheInfoTable, 3410 B_ANY_KERNEL_ADDRESS, 3411 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 3412 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3413 #endif // DEBUG_CACHE_LIST 3414 3415 // add some debugger commands 3416 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 3417 add_debugger_command("area", &dump_area, 3418 "Dump info about a particular area"); 3419 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 3420 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 3421 #if DEBUG_CACHE_LIST 3422 add_debugger_command_etc("caches", &dump_caches, 3423 "List all VMCache trees", 3424 "[ \"-c\" ]\n" 3425 "All cache trees are listed sorted in decreasing order by number of\n" 3426 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 3427 0); 3428 #endif 3429 add_debugger_command("avail", &dump_available_memory, 3430 "Dump available memory"); 3431 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 3432 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 3433 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 3434 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 3435 add_debugger_command("string", &display_mem, "dump strings"); 3436 3437 TRACE(("vm_init: exit\n")); 3438 3439 vm_cache_init_post_heap(); 3440 3441 return err; 3442 } 3443 3444 3445 status_t 3446 vm_init_post_sem(kernel_args* args) 3447 { 3448 // This frees all unused boot loader resources and makes its space available 3449 // again 3450 arch_vm_init_end(args); 3451 unreserve_boot_loader_ranges(args); 3452 3453 // fill in all of the semaphores that were not allocated before 3454 // since we're still single threaded and only the kernel address space 3455 // exists, it isn't that hard to find all of the ones we need to create 3456 3457 arch_vm_translation_map_init_post_sem(args); 3458 VMAddressSpace::InitPostSem(); 3459 3460 slab_init_post_sem(); 3461 3462 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC 3463 heap_init_post_sem(); 3464 #endif 3465 3466 return B_OK; 3467 } 3468 3469 3470 status_t 3471 vm_init_post_thread(kernel_args* args) 3472 { 3473 vm_page_init_post_thread(args); 3474 slab_init_post_thread(); 3475 return heap_init_post_thread(); 3476 } 3477 3478 3479 status_t 3480 vm_init_post_modules(kernel_args* args) 3481 { 3482 return arch_vm_init_post_modules(args); 3483 } 3484 3485 3486 void 3487 permit_page_faults(void) 3488 { 3489 struct thread* thread = thread_get_current_thread(); 3490 if (thread != NULL) 3491 atomic_add(&thread->page_faults_allowed, 1); 3492 } 3493 3494 3495 void 3496 forbid_page_faults(void) 3497 { 3498 struct thread* thread = thread_get_current_thread(); 3499 if (thread != NULL) 3500 atomic_add(&thread->page_faults_allowed, -1); 3501 } 3502 3503 3504 status_t 3505 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3506 addr_t* newIP) 3507 { 3508 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3509 faultAddress)); 3510 3511 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 3512 3513 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 3514 VMAddressSpace* addressSpace = NULL; 3515 3516 status_t status = B_OK; 3517 *newIP = 0; 3518 atomic_add((int32*)&sPageFaults, 1); 3519 3520 if (IS_KERNEL_ADDRESS(pageAddress)) { 3521 addressSpace = VMAddressSpace::GetKernel(); 3522 } else if (IS_USER_ADDRESS(pageAddress)) { 3523 addressSpace = VMAddressSpace::GetCurrent(); 3524 if (addressSpace == NULL) { 3525 if (!isUser) { 3526 dprintf("vm_page_fault: kernel thread accessing invalid user " 3527 "memory!\n"); 3528 status = B_BAD_ADDRESS; 3529 TPF(PageFaultError(-1, 3530 VMPageFaultTracing 3531 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 3532 } else { 3533 // XXX weird state. 3534 panic("vm_page_fault: non kernel thread accessing user memory " 3535 "that doesn't exist!\n"); 3536 status = B_BAD_ADDRESS; 3537 } 3538 } 3539 } else { 3540 // the hit was probably in the 64k DMZ between kernel and user space 3541 // this keeps a user space thread from passing a buffer that crosses 3542 // into kernel space 3543 status = B_BAD_ADDRESS; 3544 TPF(PageFaultError(-1, 3545 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 3546 } 3547 3548 if (status == B_OK) 3549 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser); 3550 3551 if (status < B_OK) { 3552 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 3553 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 3554 strerror(status), address, faultAddress, isWrite, isUser, 3555 thread_get_current_thread_id()); 3556 if (!isUser) { 3557 struct thread* thread = thread_get_current_thread(); 3558 if (thread != NULL && thread->fault_handler != 0) { 3559 // this will cause the arch dependant page fault handler to 3560 // modify the IP on the interrupt frame or whatever to return 3561 // to this address 3562 *newIP = thread->fault_handler; 3563 } else { 3564 // unhandled page fault in the kernel 3565 panic("vm_page_fault: unhandled page fault in kernel space at " 3566 "0x%lx, ip 0x%lx\n", address, faultAddress); 3567 } 3568 } else { 3569 #if 1 3570 addressSpace->ReadLock(); 3571 3572 // TODO: remove me once we have proper userland debugging support 3573 // (and tools) 3574 VMArea* area = addressSpace->LookupArea(faultAddress); 3575 3576 struct thread* thread = thread_get_current_thread(); 3577 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 3578 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 3579 thread->name, thread->id, thread->team->name, thread->team->id, 3580 isWrite ? "write" : "read", address, faultAddress, 3581 area ? area->name : "???", 3582 faultAddress - (area ? area->Base() : 0x0)); 3583 3584 // We can print a stack trace of the userland thread here. 3585 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 3586 // fault and someone is already waiting for a write lock on the same address 3587 // space. This thread will then try to acquire the lock again and will 3588 // be queued after the writer. 3589 # if 0 3590 if (area) { 3591 struct stack_frame { 3592 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 3593 struct stack_frame* previous; 3594 void* return_address; 3595 #else 3596 // ... 3597 #warning writeme 3598 #endif 3599 } frame; 3600 # ifdef __INTEL__ 3601 struct iframe* iframe = i386_get_user_iframe(); 3602 if (iframe == NULL) 3603 panic("iframe is NULL!"); 3604 3605 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 3606 sizeof(struct stack_frame)); 3607 # elif defined(__POWERPC__) 3608 struct iframe* iframe = ppc_get_user_iframe(); 3609 if (iframe == NULL) 3610 panic("iframe is NULL!"); 3611 3612 status_t status = user_memcpy(&frame, (void*)iframe->r1, 3613 sizeof(struct stack_frame)); 3614 # else 3615 # warning "vm_page_fault() stack trace won't work" 3616 status = B_ERROR; 3617 # endif 3618 3619 dprintf("stack trace:\n"); 3620 int32 maxFrames = 50; 3621 while (status == B_OK && --maxFrames >= 0 3622 && frame.return_address != NULL) { 3623 dprintf(" %p", frame.return_address); 3624 area = addressSpace->LookupArea( 3625 (addr_t)frame.return_address); 3626 if (area) { 3627 dprintf(" (%s + %#lx)", area->name, 3628 (addr_t)frame.return_address - area->Base()); 3629 } 3630 dprintf("\n"); 3631 3632 status = user_memcpy(&frame, frame.previous, 3633 sizeof(struct stack_frame)); 3634 } 3635 } 3636 # endif // 0 (stack trace) 3637 3638 addressSpace->ReadUnlock(); 3639 #endif 3640 3641 // TODO: the fault_callback is a temporary solution for vm86 3642 if (thread->fault_callback == NULL 3643 || thread->fault_callback(address, faultAddress, isWrite)) { 3644 // If the thread has a signal handler for SIGSEGV, we simply 3645 // send it the signal. Otherwise we notify the user debugger 3646 // first. 3647 struct sigaction action; 3648 if (sigaction(SIGSEGV, NULL, &action) == 0 3649 && action.sa_handler != SIG_DFL 3650 && action.sa_handler != SIG_IGN) { 3651 send_signal(thread->id, SIGSEGV); 3652 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 3653 SIGSEGV)) { 3654 send_signal(thread->id, SIGSEGV); 3655 } 3656 } 3657 } 3658 } 3659 3660 if (addressSpace != NULL) 3661 addressSpace->Put(); 3662 3663 return B_HANDLED_INTERRUPT; 3664 } 3665 3666 3667 struct PageFaultContext { 3668 AddressSpaceReadLocker addressSpaceLocker; 3669 VMCacheChainLocker cacheChainLocker; 3670 3671 VMTranslationMap* map; 3672 VMCache* topCache; 3673 off_t cacheOffset; 3674 vm_page_reservation reservation; 3675 bool isWrite; 3676 3677 // return values 3678 vm_page* page; 3679 bool restart; 3680 3681 3682 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 3683 : 3684 addressSpaceLocker(addressSpace, true), 3685 map(addressSpace->TranslationMap()), 3686 isWrite(isWrite) 3687 { 3688 } 3689 3690 ~PageFaultContext() 3691 { 3692 UnlockAll(); 3693 vm_page_unreserve_pages(&reservation); 3694 } 3695 3696 void Prepare(VMCache* topCache, off_t cacheOffset) 3697 { 3698 this->topCache = topCache; 3699 this->cacheOffset = cacheOffset; 3700 page = NULL; 3701 restart = false; 3702 3703 cacheChainLocker.SetTo(topCache); 3704 } 3705 3706 void UnlockAll(VMCache* exceptCache = NULL) 3707 { 3708 topCache = NULL; 3709 addressSpaceLocker.Unlock(); 3710 cacheChainLocker.Unlock(exceptCache); 3711 } 3712 }; 3713 3714 3715 /*! Gets the page that should be mapped into the area. 3716 Returns an error code other than \c B_OK, if the page couldn't be found or 3717 paged in. The locking state of the address space and the caches is undefined 3718 in that case. 3719 Returns \c B_OK with \c context.restart set to \c true, if the functions 3720 had to unlock the address space and all caches and is supposed to be called 3721 again. 3722 Returns \c B_OK with \c context.restart set to \c false, if the page was 3723 found. It is returned in \c context.page. The address space will still be 3724 locked as well as all caches starting from the top cache to at least the 3725 cache the page lives in. 3726 */ 3727 static status_t 3728 fault_get_page(PageFaultContext& context) 3729 { 3730 VMCache* cache = context.topCache; 3731 VMCache* lastCache = NULL; 3732 vm_page* page = NULL; 3733 3734 while (cache != NULL) { 3735 // We already hold the lock of the cache at this point. 3736 3737 lastCache = cache; 3738 3739 for (;;) { 3740 page = cache->LookupPage(context.cacheOffset); 3741 if (page == NULL || !page->busy) { 3742 // Either there is no page or there is one and it is not busy. 3743 break; 3744 } 3745 3746 // page must be busy -- wait for it to become unbusy 3747 context.UnlockAll(cache); 3748 cache->ReleaseRefLocked(); 3749 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 3750 3751 // restart the whole process 3752 context.restart = true; 3753 return B_OK; 3754 } 3755 3756 if (page != NULL) 3757 break; 3758 3759 // The current cache does not contain the page we're looking for. 3760 3761 // see if the backing store has it 3762 if (cache->HasPage(context.cacheOffset)) { 3763 // insert a fresh page and mark it busy -- we're going to read it in 3764 page = vm_page_allocate_page(&context.reservation, 3765 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 3766 cache->InsertPage(page, context.cacheOffset); 3767 3768 // We need to unlock all caches and the address space while reading 3769 // the page in. Keep a reference to the cache around. 3770 cache->AcquireRefLocked(); 3771 context.UnlockAll(); 3772 3773 // read the page in 3774 iovec vec; 3775 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 3776 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 3777 3778 status_t status = cache->Read(context.cacheOffset, &vec, 1, 3779 B_PHYSICAL_IO_REQUEST, &bytesRead); 3780 3781 cache->Lock(); 3782 3783 if (status < B_OK) { 3784 // on error remove and free the page 3785 dprintf("reading page from cache %p returned: %s!\n", 3786 cache, strerror(status)); 3787 3788 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 3789 cache->RemovePage(page); 3790 vm_page_set_state(page, PAGE_STATE_FREE); 3791 3792 cache->ReleaseRefAndUnlock(); 3793 return status; 3794 } 3795 3796 // mark the page unbusy again 3797 cache->MarkPageUnbusy(page); 3798 3799 DEBUG_PAGE_ACCESS_END(page); 3800 3801 // Since we needed to unlock everything temporarily, the area 3802 // situation might have changed. So we need to restart the whole 3803 // process. 3804 cache->ReleaseRefAndUnlock(); 3805 context.restart = true; 3806 return B_OK; 3807 } 3808 3809 cache = context.cacheChainLocker.LockSourceCache(); 3810 } 3811 3812 if (page == NULL) { 3813 // There was no adequate page, determine the cache for a clean one. 3814 // Read-only pages come in the deepest cache, only the top most cache 3815 // may have direct write access. 3816 cache = context.isWrite ? context.topCache : lastCache; 3817 3818 // allocate a clean page 3819 page = vm_page_allocate_page(&context.reservation, 3820 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 3821 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 3822 page->physical_page_number)); 3823 3824 // insert the new page into our cache 3825 cache->InsertPage(page, context.cacheOffset); 3826 } else if (page->Cache() != context.topCache && context.isWrite) { 3827 // We have a page that has the data we want, but in the wrong cache 3828 // object so we need to copy it and stick it into the top cache. 3829 vm_page* sourcePage = page; 3830 3831 // TODO: If memory is low, it might be a good idea to steal the page 3832 // from our source cache -- if possible, that is. 3833 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 3834 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 3835 3836 // To not needlessly kill concurrency we unlock all caches but the top 3837 // one while copying the page. Lacking another mechanism to ensure that 3838 // the source page doesn't disappear, we mark it busy. 3839 sourcePage->busy = true; 3840 context.cacheChainLocker.UnlockKeepRefs(true); 3841 3842 // copy the page 3843 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 3844 sourcePage->physical_page_number * B_PAGE_SIZE); 3845 3846 context.cacheChainLocker.RelockCaches(true); 3847 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 3848 3849 // insert the new page into our cache 3850 context.topCache->InsertPage(page, context.cacheOffset); 3851 } else 3852 DEBUG_PAGE_ACCESS_START(page); 3853 3854 context.page = page; 3855 return B_OK; 3856 } 3857 3858 3859 static status_t 3860 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 3861 bool isWrite, bool isUser) 3862 { 3863 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 3864 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 3865 3866 PageFaultContext context(addressSpace, isWrite); 3867 3868 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 3869 status_t status = B_OK; 3870 3871 addressSpace->IncrementFaultCount(); 3872 3873 // We may need up to 2 pages plus pages needed for mapping them -- reserving 3874 // the pages upfront makes sure we don't have any cache locked, so that the 3875 // page daemon/thief can do their job without problems. 3876 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 3877 originalAddress); 3878 context.addressSpaceLocker.Unlock(); 3879 vm_page_reserve_pages(&context.reservation, reservePages, 3880 addressSpace == VMAddressSpace::Kernel() 3881 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 3882 3883 while (true) { 3884 context.addressSpaceLocker.Lock(); 3885 3886 // get the area the fault was in 3887 VMArea* area = addressSpace->LookupArea(address); 3888 if (area == NULL) { 3889 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 3890 "space\n", originalAddress); 3891 TPF(PageFaultError(-1, 3892 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 3893 status = B_BAD_ADDRESS; 3894 break; 3895 } 3896 3897 // check permissions 3898 uint32 protection = get_area_page_protection(area, address); 3899 if (isUser && (protection & B_USER_PROTECTION) == 0) { 3900 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 3901 (void*)originalAddress); 3902 TPF(PageFaultError(area->id, 3903 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 3904 status = B_PERMISSION_DENIED; 3905 break; 3906 } 3907 if (isWrite && (protection 3908 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 3909 dprintf("write access attempted on write-protected area 0x%lx at" 3910 " %p\n", area->id, (void*)originalAddress); 3911 TPF(PageFaultError(area->id, 3912 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 3913 status = B_PERMISSION_DENIED; 3914 break; 3915 } else if (!isWrite && (protection 3916 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 3917 dprintf("read access attempted on read-protected area 0x%lx at" 3918 " %p\n", area->id, (void*)originalAddress); 3919 TPF(PageFaultError(area->id, 3920 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 3921 status = B_PERMISSION_DENIED; 3922 break; 3923 } 3924 3925 // We have the area, it was a valid access, so let's try to resolve the 3926 // page fault now. 3927 // At first, the top most cache from the area is investigated. 3928 3929 context.Prepare(vm_area_get_locked_cache(area), 3930 address - area->Base() + area->cache_offset); 3931 3932 // See if this cache has a fault handler -- this will do all the work 3933 // for us. 3934 { 3935 // Note, since the page fault is resolved with interrupts enabled, 3936 // the fault handler could be called more than once for the same 3937 // reason -- the store must take this into account. 3938 status = context.topCache->Fault(addressSpace, context.cacheOffset); 3939 if (status != B_BAD_HANDLER) 3940 break; 3941 } 3942 3943 // The top most cache has no fault handler, so let's see if the cache or 3944 // its sources already have the page we're searching for (we're going 3945 // from top to bottom). 3946 status = fault_get_page(context); 3947 if (status != B_OK) { 3948 TPF(PageFaultError(area->id, status)); 3949 break; 3950 } 3951 3952 if (context.restart) 3953 continue; 3954 3955 // All went fine, all there is left to do is to map the page into the 3956 // address space. 3957 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 3958 context.page)); 3959 3960 // If the page doesn't reside in the area's cache, we need to make sure 3961 // it's mapped in read-only, so that we cannot overwrite someone else's 3962 // data (copy-on-write) 3963 uint32 newProtection = protection; 3964 if (context.page->Cache() != context.topCache && !isWrite) 3965 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 3966 3967 bool unmapPage = false; 3968 bool mapPage = true; 3969 3970 // check whether there's already a page mapped at the address 3971 context.map->Lock(); 3972 3973 addr_t physicalAddress; 3974 uint32 flags; 3975 vm_page* mappedPage = NULL; 3976 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 3977 && (flags & PAGE_PRESENT) != 0 3978 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 3979 != NULL) { 3980 // Yep there's already a page. If it's ours, we can simply adjust 3981 // its protection. Otherwise we have to unmap it. 3982 if (mappedPage == context.page) { 3983 context.map->ProtectPage(area, address, newProtection); 3984 mapPage = false; 3985 } else 3986 unmapPage = true; 3987 } 3988 3989 context.map->Unlock(); 3990 3991 if (unmapPage) { 3992 // Note: The mapped page is a page of a lower cache. We are 3993 // guaranteed to have that cached locked, our new page is a copy of 3994 // that page, and the page is not busy. The logic for that guarantee 3995 // is as follows: Since the page is mapped, it must live in the top 3996 // cache (ruled out above) or any of its lower caches, and there is 3997 // (was before the new page was inserted) no other page in any 3998 // cache between the top cache and the page's cache (otherwise that 3999 // would be mapped instead). That in turn means that our algorithm 4000 // must have found it and therefore it cannot be busy either. 4001 DEBUG_PAGE_ACCESS_START(mappedPage); 4002 unmap_page(area, address); 4003 DEBUG_PAGE_ACCESS_END(mappedPage); 4004 } 4005 4006 if (mapPage) { 4007 if (map_page(area, context.page, address, newProtection, 4008 &context.reservation) != B_OK) { 4009 // Mapping can only fail, when the page mapping object couldn't 4010 // be allocated. Save for the missing mapping everything is 4011 // fine, though. We'll simply leave and probably fault again. 4012 // To make sure we'll have more luck then, we ensure that the 4013 // minimum object reserve is available. 4014 DEBUG_PAGE_ACCESS_END(context.page); 4015 4016 context.UnlockAll(); 4017 4018 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4019 != B_OK) { 4020 // Apparently the situation is serious. Let's get ourselves 4021 // killed. 4022 status = B_NO_MEMORY; 4023 } 4024 4025 break; 4026 } 4027 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4028 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4029 4030 DEBUG_PAGE_ACCESS_END(context.page); 4031 4032 break; 4033 } 4034 4035 return status; 4036 } 4037 4038 4039 status_t 4040 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 4041 { 4042 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4043 } 4044 4045 status_t 4046 vm_put_physical_page(addr_t vaddr, void* handle) 4047 { 4048 return sPhysicalPageMapper->PutPage(vaddr, handle); 4049 } 4050 4051 4052 status_t 4053 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 4054 { 4055 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4056 } 4057 4058 status_t 4059 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4060 { 4061 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4062 } 4063 4064 4065 status_t 4066 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 4067 { 4068 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4069 } 4070 4071 status_t 4072 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4073 { 4074 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4075 } 4076 4077 4078 void 4079 vm_get_info(system_memory_info* info) 4080 { 4081 swap_get_info(info); 4082 4083 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 4084 info->page_faults = sPageFaults; 4085 4086 MutexLocker locker(sAvailableMemoryLock); 4087 info->free_memory = sAvailableMemory; 4088 info->needed_memory = sNeededMemory; 4089 } 4090 4091 4092 uint32 4093 vm_num_page_faults(void) 4094 { 4095 return sPageFaults; 4096 } 4097 4098 4099 off_t 4100 vm_available_memory(void) 4101 { 4102 MutexLocker locker(sAvailableMemoryLock); 4103 return sAvailableMemory; 4104 } 4105 4106 4107 off_t 4108 vm_available_not_needed_memory(void) 4109 { 4110 MutexLocker locker(sAvailableMemoryLock); 4111 return sAvailableMemory - sNeededMemory; 4112 } 4113 4114 4115 size_t 4116 vm_kernel_address_space_left(void) 4117 { 4118 return VMAddressSpace::Kernel()->FreeSpace(); 4119 } 4120 4121 4122 void 4123 vm_unreserve_memory(size_t amount) 4124 { 4125 mutex_lock(&sAvailableMemoryLock); 4126 4127 sAvailableMemory += amount; 4128 4129 mutex_unlock(&sAvailableMemoryLock); 4130 } 4131 4132 4133 status_t 4134 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4135 { 4136 size_t reserve = kMemoryReserveForPriority[priority]; 4137 4138 MutexLocker locker(sAvailableMemoryLock); 4139 4140 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4141 4142 if (sAvailableMemory >= amount + reserve) { 4143 sAvailableMemory -= amount; 4144 return B_OK; 4145 } 4146 4147 if (timeout <= 0) 4148 return B_NO_MEMORY; 4149 4150 // turn timeout into an absolute timeout 4151 timeout += system_time(); 4152 4153 // loop until we've got the memory or the timeout occurs 4154 do { 4155 sNeededMemory += amount; 4156 4157 // call the low resource manager 4158 locker.Unlock(); 4159 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4160 B_ABSOLUTE_TIMEOUT, timeout); 4161 locker.Lock(); 4162 4163 sNeededMemory -= amount; 4164 4165 if (sAvailableMemory >= amount + reserve) { 4166 sAvailableMemory -= amount; 4167 return B_OK; 4168 } 4169 } while (timeout > system_time()); 4170 4171 return B_NO_MEMORY; 4172 } 4173 4174 4175 status_t 4176 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 4177 { 4178 AddressSpaceReadLocker locker; 4179 VMArea* area; 4180 status_t status = locker.SetFromArea(id, area); 4181 if (status != B_OK) 4182 return status; 4183 4184 return arch_vm_set_memory_type(area, physicalBase, type); 4185 } 4186 4187 4188 /*! This function enforces some protection properties: 4189 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 4190 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4191 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 4192 and B_KERNEL_WRITE_AREA. 4193 */ 4194 static void 4195 fix_protection(uint32* protection) 4196 { 4197 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4198 if ((*protection & B_USER_PROTECTION) == 0 4199 || (*protection & B_WRITE_AREA) != 0) 4200 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4201 else 4202 *protection |= B_KERNEL_READ_AREA; 4203 } 4204 } 4205 4206 4207 static void 4208 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4209 { 4210 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4211 info->area = area->id; 4212 info->address = (void*)area->Base(); 4213 info->size = area->Size(); 4214 info->protection = area->protection; 4215 info->lock = B_FULL_LOCK; 4216 info->team = area->address_space->ID(); 4217 info->copy_count = 0; 4218 info->in_count = 0; 4219 info->out_count = 0; 4220 // TODO: retrieve real values here! 4221 4222 VMCache* cache = vm_area_get_locked_cache(area); 4223 4224 // Note, this is a simplification; the cache could be larger than this area 4225 info->ram_size = cache->page_count * B_PAGE_SIZE; 4226 4227 vm_area_put_locked_cache(cache); 4228 } 4229 4230 4231 /*! 4232 Tests whether or not the area that contains the specified address 4233 needs any kind of locking, and actually exists. 4234 Used by both lock_memory() and unlock_memory(). 4235 */ 4236 static status_t 4237 test_lock_memory(VMAddressSpace* addressSpace, addr_t address, 4238 bool& needsLocking) 4239 { 4240 addressSpace->ReadLock(); 4241 4242 VMArea* area = addressSpace->LookupArea(address); 4243 if (area != NULL) { 4244 // This determines if we need to lock the memory at all 4245 needsLocking = area->cache_type != CACHE_TYPE_NULL 4246 && area->cache_type != CACHE_TYPE_DEVICE 4247 && area->wiring != B_FULL_LOCK 4248 && area->wiring != B_CONTIGUOUS; 4249 } 4250 4251 addressSpace->ReadUnlock(); 4252 4253 if (area == NULL) 4254 return B_BAD_ADDRESS; 4255 4256 return B_OK; 4257 } 4258 4259 4260 static status_t 4261 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4262 { 4263 // is newSize a multiple of B_PAGE_SIZE? 4264 if (newSize & (B_PAGE_SIZE - 1)) 4265 return B_BAD_VALUE; 4266 4267 // lock all affected address spaces and the cache 4268 VMArea* area; 4269 VMCache* cache; 4270 4271 MultiAddressSpaceLocker locker; 4272 status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area, 4273 &cache); 4274 if (status != B_OK) 4275 return status; 4276 AreaCacheLocker cacheLocker(cache); // already locked 4277 4278 // enforce restrictions 4279 if (!kernel) { 4280 if ((area->protection & B_KERNEL_AREA) != 0) 4281 return B_NOT_ALLOWED; 4282 // TODO: Enforce all restrictions (team, etc.)! 4283 } 4284 4285 size_t oldSize = area->Size(); 4286 if (newSize == oldSize) 4287 return B_OK; 4288 4289 // Resize all areas of this area's cache 4290 4291 if (cache->type != CACHE_TYPE_RAM) 4292 return B_NOT_ALLOWED; 4293 4294 bool anyKernelArea = false; 4295 if (oldSize < newSize) { 4296 // We need to check if all areas of this cache can be resized 4297 for (VMArea* current = cache->areas; current != NULL; 4298 current = current->cache_next) { 4299 if (!current->address_space->CanResizeArea(current, newSize)) 4300 return B_ERROR; 4301 anyKernelArea |= current->address_space == VMAddressSpace::Kernel(); 4302 } 4303 } 4304 4305 // Okay, looks good so far, so let's do it 4306 4307 int priority = kernel && anyKernelArea 4308 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4309 uint32 allocationFlags = kernel && anyKernelArea 4310 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4311 4312 if (oldSize < newSize) { 4313 // Growing the cache can fail, so we do it first. 4314 status = cache->Resize(cache->virtual_base + newSize, priority); 4315 if (status != B_OK) 4316 return status; 4317 } 4318 4319 for (VMArea* current = cache->areas; current != NULL; 4320 current = current->cache_next) { 4321 status = current->address_space->ResizeArea(current, newSize, 4322 allocationFlags); 4323 if (status != B_OK) 4324 break; 4325 4326 // We also need to unmap all pages beyond the new size, if the area has 4327 // shrunk 4328 if (newSize < oldSize) { 4329 VMCacheChainLocker cacheChainLocker(cache); 4330 cacheChainLocker.LockAllSourceCaches(); 4331 4332 unmap_pages(current, current->Base() + newSize, 4333 oldSize - newSize); 4334 4335 cacheChainLocker.Unlock(cache); 4336 } 4337 } 4338 4339 // shrinking the cache can't fail, so we do it now 4340 if (status == B_OK && newSize < oldSize) 4341 status = cache->Resize(cache->virtual_base + newSize, priority); 4342 4343 if (status != B_OK) { 4344 // Something failed -- resize the areas back to their original size. 4345 // This can fail, too, in which case we're seriously screwed. 4346 for (VMArea* current = cache->areas; current != NULL; 4347 current = current->cache_next) { 4348 if (current->address_space->ResizeArea(current, oldSize, 4349 allocationFlags) != B_OK) { 4350 panic("vm_resize_area(): Failed and not being able to restore " 4351 "original state."); 4352 } 4353 } 4354 4355 cache->Resize(cache->virtual_base + oldSize, priority); 4356 } 4357 4358 // TODO: we must honour the lock restrictions of this area 4359 return status; 4360 } 4361 4362 4363 status_t 4364 vm_memset_physical(addr_t address, int value, size_t length) 4365 { 4366 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4367 } 4368 4369 4370 status_t 4371 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 4372 { 4373 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4374 } 4375 4376 4377 status_t 4378 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 4379 { 4380 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4381 } 4382 4383 4384 void 4385 vm_memcpy_physical_page(addr_t to, addr_t from) 4386 { 4387 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4388 } 4389 4390 4391 // #pragma mark - kernel public API 4392 4393 4394 status_t 4395 user_memcpy(void* to, const void* from, size_t size) 4396 { 4397 // don't allow address overflows 4398 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 4399 return B_BAD_ADDRESS; 4400 4401 if (arch_cpu_user_memcpy(to, from, size, 4402 &thread_get_current_thread()->fault_handler) < B_OK) 4403 return B_BAD_ADDRESS; 4404 4405 return B_OK; 4406 } 4407 4408 4409 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 4410 the string in \a to, NULL-terminating the result. 4411 4412 \param to Pointer to the destination C-string. 4413 \param from Pointer to the source C-string. 4414 \param size Size in bytes of the string buffer pointed to by \a to. 4415 4416 \return strlen(\a from). 4417 */ 4418 ssize_t 4419 user_strlcpy(char* to, const char* from, size_t size) 4420 { 4421 if (to == NULL && size != 0) 4422 return B_BAD_VALUE; 4423 if (from == NULL) 4424 return B_BAD_ADDRESS; 4425 4426 // limit size to avoid address overflows 4427 size_t maxSize = std::min(size, 4428 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 4429 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 4430 // the source address might still overflow. 4431 4432 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 4433 &thread_get_current_thread()->fault_handler); 4434 4435 // If we hit the address overflow boundary, fail. 4436 if (result >= 0 && (size_t)result >= maxSize && maxSize < size) 4437 return B_BAD_ADDRESS; 4438 4439 return result; 4440 } 4441 4442 4443 status_t 4444 user_memset(void* s, char c, size_t count) 4445 { 4446 // don't allow address overflows 4447 if ((addr_t)s + count < (addr_t)s) 4448 return B_BAD_ADDRESS; 4449 4450 if (arch_cpu_user_memset(s, c, count, 4451 &thread_get_current_thread()->fault_handler) < B_OK) 4452 return B_BAD_ADDRESS; 4453 4454 return B_OK; 4455 } 4456 4457 4458 status_t 4459 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 4460 { 4461 VMAddressSpace* addressSpace = NULL; 4462 addr_t unalignedBase = (addr_t)address; 4463 addr_t end = unalignedBase + numBytes; 4464 addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE); 4465 bool isUser = IS_USER_ADDRESS(address); 4466 bool needsLocking = true; 4467 4468 if (isUser) { 4469 if (team == B_CURRENT_TEAM) 4470 addressSpace = VMAddressSpace::GetCurrent(); 4471 else 4472 addressSpace = VMAddressSpace::Get(team); 4473 } else 4474 addressSpace = VMAddressSpace::GetKernel(); 4475 if (addressSpace == NULL) 4476 return B_ERROR; 4477 4478 // test if we're on an area that allows faults at all 4479 4480 VMTranslationMap* map = addressSpace->TranslationMap(); 4481 4482 status_t status = test_lock_memory(addressSpace, base, needsLocking); 4483 if (status < B_OK) 4484 goto out; 4485 if (!needsLocking) 4486 goto out; 4487 4488 for (; base < end; base += B_PAGE_SIZE) { 4489 addr_t physicalAddress; 4490 uint32 protection; 4491 status_t status; 4492 4493 map->Lock(); 4494 status = map->Query(base, &physicalAddress, &protection); 4495 map->Unlock(); 4496 4497 if (status < B_OK) 4498 goto out; 4499 4500 if ((protection & PAGE_PRESENT) != 0) { 4501 // if B_READ_DEVICE is set, the caller intents to write to the locked 4502 // memory, so if it hasn't been mapped writable, we'll try the soft 4503 // fault anyway 4504 if ((flags & B_READ_DEVICE) == 0 4505 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 4506 // update wiring 4507 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 4508 if (page == NULL) 4509 panic("couldn't lookup physical page just allocated\n"); 4510 4511 increment_page_wired_count(page); 4512 continue; 4513 } 4514 } 4515 4516 status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0, 4517 isUser); 4518 if (status != B_OK) { 4519 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) " 4520 "failed: %s\n", (void*)unalignedBase, numBytes, flags, 4521 strerror(status)); 4522 goto out; 4523 } 4524 4525 // TODO: Here's a race condition. We should probably add a parameter 4526 // to vm_soft_fault() that would cause the page's wired count to be 4527 // incremented immediately. 4528 // TODO: After memory has been locked in an area, we need to prevent the 4529 // area from being deleted, resized, cut, etc. That could be done using 4530 // a "locked pages" count in VMArea, and maybe a condition variable, if 4531 // we want to allow waiting for the area to become eligible for these 4532 // operations again. 4533 4534 map->Lock(); 4535 status = map->Query(base, &physicalAddress, &protection); 4536 map->Unlock(); 4537 4538 if (status < B_OK) 4539 goto out; 4540 4541 // update wiring 4542 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 4543 if (page == NULL) 4544 panic("couldn't lookup physical page"); 4545 4546 increment_page_wired_count(page); 4547 // TODO: We need the cache to be locked at this point! See TODO 4548 // above for a possible solution. 4549 } 4550 4551 out: 4552 addressSpace->Put(); 4553 return status; 4554 } 4555 4556 4557 status_t 4558 lock_memory(void* address, size_t numBytes, uint32 flags) 4559 { 4560 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 4561 } 4562 4563 4564 status_t 4565 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 4566 { 4567 VMAddressSpace* addressSpace = NULL; 4568 addr_t unalignedBase = (addr_t)address; 4569 addr_t end = unalignedBase + numBytes; 4570 addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE); 4571 bool needsLocking = true; 4572 4573 if (IS_USER_ADDRESS(address)) { 4574 if (team == B_CURRENT_TEAM) 4575 addressSpace = VMAddressSpace::GetCurrent(); 4576 else 4577 addressSpace = VMAddressSpace::Get(team); 4578 } else 4579 addressSpace = VMAddressSpace::GetKernel(); 4580 if (addressSpace == NULL) 4581 return B_ERROR; 4582 4583 VMTranslationMap* map = addressSpace->TranslationMap(); 4584 4585 status_t status = test_lock_memory(addressSpace, base, needsLocking); 4586 if (status < B_OK) 4587 goto out; 4588 if (!needsLocking) 4589 goto out; 4590 4591 for (; base < end; base += B_PAGE_SIZE) { 4592 map->Lock(); 4593 4594 addr_t physicalAddress; 4595 uint32 protection; 4596 status = map->Query(base, &physicalAddress, &protection); 4597 // TODO: ATM there's no mechanism that guarantees that the page 4598 // we've marked wired in lock_memory_etc() is the one we find here. 4599 // If we only locked for reading, the original page might stem from 4600 // a lower cache and a page fault in the meantime might have mapped 4601 // a page from the top cache. 4602 // Moreover fork() can insert a new top cache and re-map pages 4603 // read-only at any time. This would even cause a violation of the 4604 // lock_memory() guarantee. 4605 4606 map->Unlock(); 4607 4608 if (status < B_OK) 4609 goto out; 4610 if ((protection & PAGE_PRESENT) == 0) 4611 panic("calling unlock_memory() on unmapped memory!"); 4612 4613 // update wiring 4614 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 4615 if (page == NULL) 4616 panic("couldn't lookup physical page"); 4617 4618 decrement_page_wired_count(page); 4619 // TODO: We need the cache to be locked at this point! 4620 } 4621 4622 out: 4623 addressSpace->Put(); 4624 return status; 4625 } 4626 4627 4628 status_t 4629 unlock_memory(void* address, size_t numBytes, uint32 flags) 4630 { 4631 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 4632 } 4633 4634 4635 /*! Similar to get_memory_map(), but also allows to specify the address space 4636 for the memory in question and has a saner semantics. 4637 Returns \c B_OK when the complete range could be translated or 4638 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 4639 case the actual number of entries is written to \c *_numEntries. Any other 4640 error case indicates complete failure; \c *_numEntries will be set to \c 0 4641 in this case. 4642 */ 4643 status_t 4644 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 4645 physical_entry* table, uint32* _numEntries) 4646 { 4647 uint32 numEntries = *_numEntries; 4648 *_numEntries = 0; 4649 4650 VMAddressSpace* addressSpace; 4651 addr_t virtualAddress = (addr_t)address; 4652 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 4653 addr_t physicalAddress; 4654 status_t status = B_OK; 4655 int32 index = -1; 4656 addr_t offset = 0; 4657 bool interrupts = are_interrupts_enabled(); 4658 4659 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 4660 address, numBytes, numEntries)); 4661 4662 if (numEntries == 0 || numBytes == 0) 4663 return B_BAD_VALUE; 4664 4665 // in which address space is the address to be found? 4666 if (IS_USER_ADDRESS(virtualAddress)) { 4667 if (team == B_CURRENT_TEAM) 4668 addressSpace = VMAddressSpace::GetCurrent(); 4669 else 4670 addressSpace = VMAddressSpace::Get(team); 4671 } else 4672 addressSpace = VMAddressSpace::GetKernel(); 4673 4674 if (addressSpace == NULL) 4675 return B_ERROR; 4676 4677 VMTranslationMap* map = addressSpace->TranslationMap(); 4678 4679 if (interrupts) 4680 map->Lock(); 4681 4682 while (offset < numBytes) { 4683 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 4684 uint32 flags; 4685 4686 if (interrupts) { 4687 status = map->Query((addr_t)address + offset, &physicalAddress, 4688 &flags); 4689 } else { 4690 status = map->QueryInterrupt((addr_t)address + offset, 4691 &physicalAddress, &flags); 4692 } 4693 if (status < B_OK) 4694 break; 4695 if ((flags & PAGE_PRESENT) == 0) { 4696 panic("get_memory_map() called on unmapped memory!"); 4697 return B_BAD_ADDRESS; 4698 } 4699 4700 if (index < 0 && pageOffset > 0) { 4701 physicalAddress += pageOffset; 4702 if (bytes > B_PAGE_SIZE - pageOffset) 4703 bytes = B_PAGE_SIZE - pageOffset; 4704 } 4705 4706 // need to switch to the next physical_entry? 4707 if (index < 0 || (addr_t)table[index].address 4708 != physicalAddress - table[index].size) { 4709 if ((uint32)++index + 1 > numEntries) { 4710 // table to small 4711 status = B_BUFFER_OVERFLOW; 4712 break; 4713 } 4714 table[index].address = (void*)physicalAddress; 4715 table[index].size = bytes; 4716 } else { 4717 // page does fit in current entry 4718 table[index].size += bytes; 4719 } 4720 4721 offset += bytes; 4722 } 4723 4724 if (interrupts) 4725 map->Unlock(); 4726 4727 if (status != B_OK) 4728 return status; 4729 4730 if ((uint32)index + 1 > numEntries) { 4731 *_numEntries = index; 4732 return B_BUFFER_OVERFLOW; 4733 } 4734 4735 *_numEntries = index + 1; 4736 return B_OK; 4737 } 4738 4739 4740 /*! According to the BeBook, this function should always succeed. 4741 This is no longer the case. 4742 */ 4743 long 4744 get_memory_map(const void* address, ulong numBytes, physical_entry* table, 4745 long numEntries) 4746 { 4747 uint32 entriesRead = numEntries; 4748 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 4749 table, &entriesRead); 4750 if (error != B_OK) 4751 return error; 4752 4753 // close the entry list 4754 4755 // if it's only one entry, we will silently accept the missing ending 4756 if (numEntries == 1) 4757 return B_OK; 4758 4759 if (entriesRead + 1 > (uint32)numEntries) 4760 return B_BUFFER_OVERFLOW; 4761 4762 table[entriesRead].address = NULL; 4763 table[entriesRead].size = 0; 4764 4765 return B_OK; 4766 } 4767 4768 4769 area_id 4770 area_for(void* address) 4771 { 4772 return vm_area_for((addr_t)address, true); 4773 } 4774 4775 4776 area_id 4777 find_area(const char* name) 4778 { 4779 return VMAreaHash::Find(name); 4780 } 4781 4782 4783 status_t 4784 _get_area_info(area_id id, area_info* info, size_t size) 4785 { 4786 if (size != sizeof(area_info) || info == NULL) 4787 return B_BAD_VALUE; 4788 4789 AddressSpaceReadLocker locker; 4790 VMArea* area; 4791 status_t status = locker.SetFromArea(id, area); 4792 if (status != B_OK) 4793 return status; 4794 4795 fill_area_info(area, info, size); 4796 return B_OK; 4797 } 4798 4799 4800 status_t 4801 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 4802 { 4803 addr_t nextBase = *(addr_t*)cookie; 4804 4805 // we're already through the list 4806 if (nextBase == (addr_t)-1) 4807 return B_ENTRY_NOT_FOUND; 4808 4809 if (team == B_CURRENT_TEAM) 4810 team = team_get_current_team_id(); 4811 4812 AddressSpaceReadLocker locker(team); 4813 if (!locker.IsLocked()) 4814 return B_BAD_TEAM_ID; 4815 4816 VMArea* area; 4817 for (VMAddressSpace::AreaIterator it 4818 = locker.AddressSpace()->GetAreaIterator(); 4819 (area = it.Next()) != NULL;) { 4820 if (area->Base() > nextBase) 4821 break; 4822 } 4823 4824 if (area == NULL) { 4825 nextBase = (addr_t)-1; 4826 return B_ENTRY_NOT_FOUND; 4827 } 4828 4829 fill_area_info(area, info, size); 4830 *cookie = (int32)(area->Base()); 4831 // TODO: Not 64 bit safe! 4832 4833 return B_OK; 4834 } 4835 4836 4837 status_t 4838 set_area_protection(area_id area, uint32 newProtection) 4839 { 4840 fix_protection(&newProtection); 4841 4842 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 4843 newProtection, true); 4844 } 4845 4846 4847 status_t 4848 resize_area(area_id areaID, size_t newSize) 4849 { 4850 return vm_resize_area(areaID, newSize, true); 4851 } 4852 4853 4854 /*! Transfers the specified area to a new team. The caller must be the owner 4855 of the area. 4856 */ 4857 area_id 4858 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 4859 bool kernel) 4860 { 4861 area_info info; 4862 status_t status = get_area_info(id, &info); 4863 if (status != B_OK) 4864 return status; 4865 4866 if (info.team != thread_get_current_thread()->team->id) 4867 return B_PERMISSION_DENIED; 4868 4869 area_id clonedArea = vm_clone_area(target, info.name, _address, 4870 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 4871 if (clonedArea < 0) 4872 return clonedArea; 4873 4874 status = vm_delete_area(info.team, id, kernel); 4875 if (status != B_OK) { 4876 vm_delete_area(target, clonedArea, kernel); 4877 return status; 4878 } 4879 4880 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 4881 4882 return clonedArea; 4883 } 4884 4885 4886 area_id 4887 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes, 4888 uint32 addressSpec, uint32 protection, void** _virtualAddress) 4889 { 4890 if (!arch_vm_supports_protection(protection)) 4891 return B_NOT_SUPPORTED; 4892 4893 fix_protection(&protection); 4894 4895 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 4896 _virtualAddress, addressSpec, numBytes, protection, 4897 (addr_t)physicalAddress, false); 4898 } 4899 4900 4901 area_id 4902 clone_area(const char* name, void** _address, uint32 addressSpec, 4903 uint32 protection, area_id source) 4904 { 4905 if ((protection & B_KERNEL_PROTECTION) == 0) 4906 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4907 4908 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 4909 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 4910 } 4911 4912 4913 area_id 4914 create_area_etc(team_id team, const char* name, void** address, 4915 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 4916 addr_t physicalAddress, uint32 flags) 4917 { 4918 fix_protection(&protection); 4919 4920 return vm_create_anonymous_area(team, (char*)name, address, addressSpec, 4921 size, lock, protection, physicalAddress, flags, true); 4922 } 4923 4924 4925 area_id 4926 create_area(const char* name, void** _address, uint32 addressSpec, size_t size, 4927 uint32 lock, uint32 protection) 4928 { 4929 fix_protection(&protection); 4930 4931 return vm_create_anonymous_area(VMAddressSpace::KernelID(), (char*)name, 4932 _address, addressSpec, size, lock, protection, 0, 0, true); 4933 } 4934 4935 4936 status_t 4937 delete_area(area_id area) 4938 { 4939 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 4940 } 4941 4942 4943 // #pragma mark - Userland syscalls 4944 4945 4946 status_t 4947 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 4948 addr_t size) 4949 { 4950 // filter out some unavailable values (for userland) 4951 switch (addressSpec) { 4952 case B_ANY_KERNEL_ADDRESS: 4953 case B_ANY_KERNEL_BLOCK_ADDRESS: 4954 return B_BAD_VALUE; 4955 } 4956 4957 addr_t address; 4958 4959 if (!IS_USER_ADDRESS(userAddress) 4960 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 4961 return B_BAD_ADDRESS; 4962 4963 status_t status = vm_reserve_address_range( 4964 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 4965 RESERVED_AVOID_BASE); 4966 if (status != B_OK) 4967 return status; 4968 4969 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 4970 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 4971 (void*)address, size); 4972 return B_BAD_ADDRESS; 4973 } 4974 4975 return B_OK; 4976 } 4977 4978 4979 status_t 4980 _user_unreserve_address_range(addr_t address, addr_t size) 4981 { 4982 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 4983 (void*)address, size); 4984 } 4985 4986 4987 area_id 4988 _user_area_for(void* address) 4989 { 4990 return vm_area_for((addr_t)address, false); 4991 } 4992 4993 4994 area_id 4995 _user_find_area(const char* userName) 4996 { 4997 char name[B_OS_NAME_LENGTH]; 4998 4999 if (!IS_USER_ADDRESS(userName) 5000 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5001 return B_BAD_ADDRESS; 5002 5003 return find_area(name); 5004 } 5005 5006 5007 status_t 5008 _user_get_area_info(area_id area, area_info* userInfo) 5009 { 5010 if (!IS_USER_ADDRESS(userInfo)) 5011 return B_BAD_ADDRESS; 5012 5013 area_info info; 5014 status_t status = get_area_info(area, &info); 5015 if (status < B_OK) 5016 return status; 5017 5018 // TODO: do we want to prevent userland from seeing kernel protections? 5019 //info.protection &= B_USER_PROTECTION; 5020 5021 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5022 return B_BAD_ADDRESS; 5023 5024 return status; 5025 } 5026 5027 5028 status_t 5029 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 5030 { 5031 int32 cookie; 5032 5033 if (!IS_USER_ADDRESS(userCookie) 5034 || !IS_USER_ADDRESS(userInfo) 5035 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 5036 return B_BAD_ADDRESS; 5037 5038 area_info info; 5039 status_t status = _get_next_area_info(team, &cookie, &info, 5040 sizeof(area_info)); 5041 if (status != B_OK) 5042 return status; 5043 5044 //info.protection &= B_USER_PROTECTION; 5045 5046 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 5047 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5048 return B_BAD_ADDRESS; 5049 5050 return status; 5051 } 5052 5053 5054 status_t 5055 _user_set_area_protection(area_id area, uint32 newProtection) 5056 { 5057 if ((newProtection & ~B_USER_PROTECTION) != 0) 5058 return B_BAD_VALUE; 5059 5060 fix_protection(&newProtection); 5061 5062 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 5063 newProtection, false); 5064 } 5065 5066 5067 status_t 5068 _user_resize_area(area_id area, size_t newSize) 5069 { 5070 // TODO: Since we restrict deleting of areas to those owned by the team, 5071 // we should also do that for resizing (check other functions, too). 5072 return vm_resize_area(area, newSize, false); 5073 } 5074 5075 5076 area_id 5077 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 5078 team_id target) 5079 { 5080 // filter out some unavailable values (for userland) 5081 switch (addressSpec) { 5082 case B_ANY_KERNEL_ADDRESS: 5083 case B_ANY_KERNEL_BLOCK_ADDRESS: 5084 return B_BAD_VALUE; 5085 } 5086 5087 void* address; 5088 if (!IS_USER_ADDRESS(userAddress) 5089 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5090 return B_BAD_ADDRESS; 5091 5092 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 5093 if (newArea < B_OK) 5094 return newArea; 5095 5096 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 5097 return B_BAD_ADDRESS; 5098 5099 return newArea; 5100 } 5101 5102 5103 area_id 5104 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 5105 uint32 protection, area_id sourceArea) 5106 { 5107 char name[B_OS_NAME_LENGTH]; 5108 void* address; 5109 5110 // filter out some unavailable values (for userland) 5111 switch (addressSpec) { 5112 case B_ANY_KERNEL_ADDRESS: 5113 case B_ANY_KERNEL_BLOCK_ADDRESS: 5114 return B_BAD_VALUE; 5115 } 5116 if ((protection & ~B_USER_PROTECTION) != 0) 5117 return B_BAD_VALUE; 5118 5119 if (!IS_USER_ADDRESS(userName) 5120 || !IS_USER_ADDRESS(userAddress) 5121 || user_strlcpy(name, userName, sizeof(name)) < B_OK 5122 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5123 return B_BAD_ADDRESS; 5124 5125 fix_protection(&protection); 5126 5127 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 5128 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 5129 false); 5130 if (clonedArea < B_OK) 5131 return clonedArea; 5132 5133 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 5134 delete_area(clonedArea); 5135 return B_BAD_ADDRESS; 5136 } 5137 5138 return clonedArea; 5139 } 5140 5141 5142 area_id 5143 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 5144 size_t size, uint32 lock, uint32 protection) 5145 { 5146 char name[B_OS_NAME_LENGTH]; 5147 void* address; 5148 5149 // filter out some unavailable values (for userland) 5150 switch (addressSpec) { 5151 case B_ANY_KERNEL_ADDRESS: 5152 case B_ANY_KERNEL_BLOCK_ADDRESS: 5153 return B_BAD_VALUE; 5154 } 5155 if ((protection & ~B_USER_PROTECTION) != 0) 5156 return B_BAD_VALUE; 5157 5158 if (!IS_USER_ADDRESS(userName) 5159 || !IS_USER_ADDRESS(userAddress) 5160 || user_strlcpy(name, userName, sizeof(name)) < B_OK 5161 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5162 return B_BAD_ADDRESS; 5163 5164 if (addressSpec == B_EXACT_ADDRESS 5165 && IS_KERNEL_ADDRESS(address)) 5166 return B_BAD_VALUE; 5167 5168 fix_protection(&protection); 5169 5170 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), 5171 (char*)name, &address, addressSpec, size, lock, protection, 0, 0, 5172 false); 5173 5174 if (area >= B_OK 5175 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 5176 delete_area(area); 5177 return B_BAD_ADDRESS; 5178 } 5179 5180 return area; 5181 } 5182 5183 5184 status_t 5185 _user_delete_area(area_id area) 5186 { 5187 // Unlike the BeOS implementation, you can now only delete areas 5188 // that you have created yourself from userland. 5189 // The documentation to delete_area() explicitly states that this 5190 // will be restricted in the future, and so it will. 5191 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 5192 } 5193 5194 5195 // TODO: create a BeOS style call for this! 5196 5197 area_id 5198 _user_map_file(const char* userName, void** userAddress, int addressSpec, 5199 size_t size, int protection, int mapping, bool unmapAddressRange, int fd, 5200 off_t offset) 5201 { 5202 char name[B_OS_NAME_LENGTH]; 5203 void* address; 5204 area_id area; 5205 5206 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 5207 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 5208 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5209 return B_BAD_ADDRESS; 5210 5211 if (addressSpec == B_EXACT_ADDRESS) { 5212 if ((addr_t)address + size < (addr_t)address) 5213 return B_BAD_VALUE; 5214 if (!IS_USER_ADDRESS(address) 5215 || !IS_USER_ADDRESS((addr_t)address + size)) { 5216 return B_BAD_ADDRESS; 5217 } 5218 } 5219 5220 // userland created areas can always be accessed by the kernel 5221 protection |= B_KERNEL_READ_AREA 5222 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 5223 5224 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 5225 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 5226 false); 5227 if (area < B_OK) 5228 return area; 5229 5230 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 5231 return B_BAD_ADDRESS; 5232 5233 return area; 5234 } 5235 5236 5237 status_t 5238 _user_unmap_memory(void* _address, size_t size) 5239 { 5240 addr_t address = (addr_t)_address; 5241 5242 // check params 5243 if (size == 0 || (addr_t)address + size < (addr_t)address) 5244 return B_BAD_VALUE; 5245 5246 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 5247 return B_BAD_ADDRESS; 5248 5249 // write lock the address space 5250 AddressSpaceWriteLocker locker; 5251 status_t status = locker.SetTo(team_get_current_team_id()); 5252 if (status != B_OK) 5253 return status; 5254 5255 // unmap 5256 return unmap_address_range(locker.AddressSpace(), address, size, false); 5257 } 5258 5259 5260 status_t 5261 _user_set_memory_protection(void* _address, size_t size, int protection) 5262 { 5263 // check address range 5264 addr_t address = (addr_t)_address; 5265 size = PAGE_ALIGN(size); 5266 5267 if ((address % B_PAGE_SIZE) != 0) 5268 return B_BAD_VALUE; 5269 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 5270 || !IS_USER_ADDRESS((addr_t)address + size)) { 5271 // weird error code required by POSIX 5272 return ENOMEM; 5273 } 5274 5275 // extend and check protection 5276 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 5277 uint32 actualProtection = protection | B_KERNEL_READ_AREA 5278 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 5279 5280 if (!arch_vm_supports_protection(actualProtection)) 5281 return B_NOT_SUPPORTED; 5282 5283 // We need to write lock the address space, since we're going to play with 5284 // the areas. 5285 AddressSpaceWriteLocker locker; 5286 status_t status = locker.SetTo(team_get_current_team_id()); 5287 if (status != B_OK) 5288 return status; 5289 5290 // First round: Check whether the whole range is covered by areas and we are 5291 // allowed to modify them. 5292 addr_t currentAddress = address; 5293 size_t sizeLeft = size; 5294 while (sizeLeft > 0) { 5295 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 5296 if (area == NULL) 5297 return B_NO_MEMORY; 5298 5299 if ((area->protection & B_KERNEL_AREA) != 0) 5300 return B_NOT_ALLOWED; 5301 5302 // TODO: For (shared) mapped files we should check whether the new 5303 // protections are compatible with the file permissions. We don't have 5304 // a way to do that yet, though. 5305 5306 addr_t offset = currentAddress - area->Base(); 5307 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 5308 5309 currentAddress += rangeSize; 5310 sizeLeft -= rangeSize; 5311 } 5312 5313 // Second round: If the protections differ from that of the area, create a 5314 // page protection array and re-map mapped pages. 5315 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 5316 currentAddress = address; 5317 sizeLeft = size; 5318 while (sizeLeft > 0) { 5319 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 5320 if (area == NULL) 5321 return B_NO_MEMORY; 5322 5323 addr_t offset = currentAddress - area->Base(); 5324 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 5325 5326 currentAddress += rangeSize; 5327 sizeLeft -= rangeSize; 5328 5329 if (area->page_protections == NULL) { 5330 if (area->protection == actualProtection) 5331 continue; 5332 5333 // In the page protections we store only the three user protections, 5334 // so we use 4 bits per page. 5335 uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 5336 area->page_protections = (uint8*)malloc(bytes); 5337 if (area->page_protections == NULL) 5338 return B_NO_MEMORY; 5339 5340 // init the page protections for all pages to that of the area 5341 uint32 areaProtection = area->protection 5342 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5343 memset(area->page_protections, 5344 areaProtection | (areaProtection << 4), bytes); 5345 } 5346 5347 // We need to lock the complete cache chain, since we potentially unmap 5348 // pages of lower caches. 5349 VMCache* topCache = vm_area_get_locked_cache(area); 5350 VMCacheChainLocker cacheChainLocker(topCache); 5351 cacheChainLocker.LockAllSourceCaches(); 5352 5353 for (addr_t pageAddress = area->Base() + offset; 5354 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 5355 map->Lock(); 5356 5357 set_area_page_protection(area, pageAddress, protection); 5358 5359 addr_t physicalAddress; 5360 uint32 flags; 5361 5362 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 5363 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 5364 map->Unlock(); 5365 continue; 5366 } 5367 5368 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5369 if (page == NULL) { 5370 panic("area %p looking up page failed for pa 0x%lx\n", area, 5371 physicalAddress); 5372 map->Unlock(); 5373 return B_ERROR; 5374 } 5375 5376 // If the page is not in the topmost cache and write access is 5377 // requested, we have to unmap it. Otherwise we can re-map it with 5378 // the new protection. 5379 bool unmapPage = page->Cache() != topCache 5380 && (protection & B_WRITE_AREA) != 0; 5381 5382 if (!unmapPage) 5383 map->ProtectPage(area, pageAddress, actualProtection); 5384 5385 map->Unlock(); 5386 5387 if (unmapPage) { 5388 DEBUG_PAGE_ACCESS_START(page); 5389 unmap_page(area, pageAddress); 5390 DEBUG_PAGE_ACCESS_END(page); 5391 } 5392 } 5393 } 5394 5395 return B_OK; 5396 } 5397 5398 5399 status_t 5400 _user_sync_memory(void* _address, size_t size, int flags) 5401 { 5402 addr_t address = (addr_t)_address; 5403 size = PAGE_ALIGN(size); 5404 5405 // check params 5406 if ((address % B_PAGE_SIZE) != 0) 5407 return B_BAD_VALUE; 5408 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 5409 || !IS_USER_ADDRESS((addr_t)address + size)) { 5410 // weird error code required by POSIX 5411 return ENOMEM; 5412 } 5413 5414 bool writeSync = (flags & MS_SYNC) != 0; 5415 bool writeAsync = (flags & MS_ASYNC) != 0; 5416 if (writeSync && writeAsync) 5417 return B_BAD_VALUE; 5418 5419 if (size == 0 || (!writeSync && !writeAsync)) 5420 return B_OK; 5421 5422 // iterate through the range and sync all concerned areas 5423 while (size > 0) { 5424 // read lock the address space 5425 AddressSpaceReadLocker locker; 5426 status_t error = locker.SetTo(team_get_current_team_id()); 5427 if (error != B_OK) 5428 return error; 5429 5430 // get the first area 5431 VMArea* area = locker.AddressSpace()->LookupArea(address); 5432 if (area == NULL) 5433 return B_NO_MEMORY; 5434 5435 uint32 offset = address - area->Base(); 5436 size_t rangeSize = min_c(area->Size() - offset, size); 5437 offset += area->cache_offset; 5438 5439 // lock the cache 5440 AreaCacheLocker cacheLocker(area); 5441 if (!cacheLocker) 5442 return B_BAD_VALUE; 5443 VMCache* cache = area->cache; 5444 5445 locker.Unlock(); 5446 5447 uint32 firstPage = offset >> PAGE_SHIFT; 5448 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 5449 5450 // write the pages 5451 if (cache->type == CACHE_TYPE_VNODE) { 5452 if (writeSync) { 5453 // synchronous 5454 error = vm_page_write_modified_page_range(cache, firstPage, 5455 endPage); 5456 if (error != B_OK) 5457 return error; 5458 } else { 5459 // asynchronous 5460 vm_page_schedule_write_page_range(cache, firstPage, endPage); 5461 // TODO: This is probably not quite what is supposed to happen. 5462 // Especially when a lot has to be written, it might take ages 5463 // until it really hits the disk. 5464 } 5465 } 5466 5467 address += rangeSize; 5468 size -= rangeSize; 5469 } 5470 5471 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 5472 // synchronize multiple mappings of the same file. In our VM they never get 5473 // out of sync, though, so we don't have to do anything. 5474 5475 return B_OK; 5476 } 5477 5478 5479 status_t 5480 _user_memory_advice(void* address, size_t size, int advice) 5481 { 5482 // TODO: Implement! 5483 return B_OK; 5484 } 5485