1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <OS.h> 20 #include <KernelExport.h> 21 22 #include <AutoDeleter.h> 23 24 #include <arch/cpu.h> 25 #include <arch/vm.h> 26 #include <boot/elf.h> 27 #include <boot/stage2.h> 28 #include <condition_variable.h> 29 #include <console.h> 30 #include <debug.h> 31 #include <file_cache.h> 32 #include <fs/fd.h> 33 #include <heap.h> 34 #include <int.h> 35 #include <lock.h> 36 #include <low_resource_manager.h> 37 #include <smp.h> 38 #include <system_info.h> 39 #include <thread.h> 40 #include <team.h> 41 #include <tracing.h> 42 #include <util/AutoLock.h> 43 #include <util/khash.h> 44 #include <vm_address_space.h> 45 #include <vm_cache.h> 46 #include <vm_page.h> 47 #include <vm_priv.h> 48 49 #include "VMAnonymousCache.h" 50 #include "IORequest.h" 51 52 53 //#define TRACE_VM 54 //#define TRACE_FAULTS 55 #ifdef TRACE_VM 56 # define TRACE(x) dprintf x 57 #else 58 # define TRACE(x) ; 59 #endif 60 #ifdef TRACE_FAULTS 61 # define FTRACE(x) dprintf x 62 #else 63 # define FTRACE(x) ; 64 #endif 65 66 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1)) 67 #define ROUNDOWN(a, b) (((a) / (b)) * (b)) 68 69 70 class AddressSpaceReadLocker { 71 public: 72 AddressSpaceReadLocker(team_id team); 73 AddressSpaceReadLocker(vm_address_space* space, bool getNewReference); 74 AddressSpaceReadLocker(); 75 ~AddressSpaceReadLocker(); 76 77 status_t SetTo(team_id team); 78 void SetTo(vm_address_space* space, bool getNewReference); 79 status_t SetFromArea(area_id areaID, vm_area*& area); 80 81 bool IsLocked() const { return fLocked; } 82 bool Lock(); 83 void Unlock(); 84 85 void Unset(); 86 87 vm_address_space* AddressSpace() { return fSpace; } 88 89 private: 90 vm_address_space* fSpace; 91 bool fLocked; 92 }; 93 94 class AddressSpaceWriteLocker { 95 public: 96 AddressSpaceWriteLocker(team_id team); 97 AddressSpaceWriteLocker(); 98 ~AddressSpaceWriteLocker(); 99 100 status_t SetTo(team_id team); 101 status_t SetFromArea(area_id areaID, vm_area*& area); 102 status_t SetFromArea(team_id team, area_id areaID, bool allowKernel, 103 vm_area*& area); 104 status_t SetFromArea(team_id team, area_id areaID, vm_area*& area); 105 106 bool IsLocked() const { return fLocked; } 107 void Unlock(); 108 109 void DegradeToReadLock(); 110 void Unset(); 111 112 vm_address_space* AddressSpace() { return fSpace; } 113 114 private: 115 vm_address_space* fSpace; 116 bool fLocked; 117 bool fDegraded; 118 }; 119 120 class MultiAddressSpaceLocker { 121 public: 122 MultiAddressSpaceLocker(); 123 ~MultiAddressSpaceLocker(); 124 125 inline status_t AddTeam(team_id team, bool writeLock, 126 vm_address_space** _space = NULL); 127 inline status_t AddArea(area_id area, bool writeLock, 128 vm_address_space** _space = NULL); 129 130 status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne, 131 bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL); 132 133 status_t Lock(); 134 void Unlock(); 135 bool IsLocked() const { return fLocked; } 136 137 void Unset(); 138 139 private: 140 struct lock_item { 141 vm_address_space* space; 142 bool write_lock; 143 }; 144 145 bool _ResizeIfNeeded(); 146 int32 _IndexOfAddressSpace(vm_address_space* space) const; 147 status_t _AddAddressSpace(vm_address_space* space, bool writeLock, 148 vm_address_space** _space); 149 150 static int _CompareItems(const void* _a, const void* _b); 151 152 lock_item* fItems; 153 int32 fCapacity; 154 int32 fCount; 155 bool fLocked; 156 }; 157 158 159 class AreaCacheLocking { 160 public: 161 inline bool Lock(vm_cache* lockable) 162 { 163 return false; 164 } 165 166 inline void Unlock(vm_cache* lockable) 167 { 168 vm_area_put_locked_cache(lockable); 169 } 170 }; 171 172 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> { 173 public: 174 inline AreaCacheLocker(vm_cache* cache = NULL) 175 : AutoLocker<vm_cache, AreaCacheLocking>(cache, true) 176 { 177 } 178 179 inline AreaCacheLocker(vm_area* area) 180 : AutoLocker<vm_cache, AreaCacheLocking>() 181 { 182 SetTo(area); 183 } 184 185 inline void SetTo(vm_area* area) 186 { 187 return AutoLocker<vm_cache, AreaCacheLocking>::SetTo( 188 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 189 } 190 }; 191 192 193 #define AREA_HASH_TABLE_SIZE 1024 194 static area_id sNextAreaID = 1; 195 static hash_table* sAreaHash; 196 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash"); 197 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings"); 198 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache"); 199 200 static off_t sAvailableMemory; 201 static off_t sNeededMemory; 202 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 203 static uint32 sPageFaults; 204 205 #if DEBUG_CACHE_LIST 206 207 struct cache_info { 208 vm_cache* cache; 209 addr_t page_count; 210 addr_t committed; 211 }; 212 213 static const int kCacheInfoTableCount = 100 * 1024; 214 static cache_info* sCacheInfoTable; 215 216 #endif // DEBUG_CACHE_LIST 217 218 219 // function declarations 220 static void delete_area(vm_address_space* addressSpace, vm_area* area); 221 static vm_address_space* get_address_space_by_area_id(area_id id); 222 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address, 223 bool isWrite, bool isUser); 224 static status_t map_backing_store(vm_address_space* addressSpace, 225 vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size, 226 uint32 addressSpec, int wiring, int protection, int mapping, 227 vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel); 228 229 230 // #pragma mark - 231 232 233 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team) 234 : 235 fSpace(NULL), 236 fLocked(false) 237 { 238 SetTo(team); 239 } 240 241 242 /*! Takes over the reference of the address space, if \a getNewReference is 243 \c false. 244 */ 245 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space, 246 bool getNewReference) 247 : 248 fSpace(NULL), 249 fLocked(false) 250 { 251 SetTo(space, getNewReference); 252 } 253 254 255 AddressSpaceReadLocker::AddressSpaceReadLocker() 256 : 257 fSpace(NULL), 258 fLocked(false) 259 { 260 } 261 262 263 AddressSpaceReadLocker::~AddressSpaceReadLocker() 264 { 265 Unset(); 266 } 267 268 269 void 270 AddressSpaceReadLocker::Unset() 271 { 272 Unlock(); 273 if (fSpace != NULL) 274 vm_put_address_space(fSpace); 275 } 276 277 278 status_t 279 AddressSpaceReadLocker::SetTo(team_id team) 280 { 281 fSpace = vm_get_address_space(team); 282 if (fSpace == NULL) 283 return B_BAD_TEAM_ID; 284 285 rw_lock_read_lock(&fSpace->lock); 286 fLocked = true; 287 return B_OK; 288 } 289 290 291 /*! Takes over the reference of the address space, if \a getNewReference is 292 \c false. 293 */ 294 void 295 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference) 296 { 297 fSpace = space; 298 299 if (getNewReference) 300 atomic_add(&fSpace->ref_count, 1); 301 302 rw_lock_read_lock(&fSpace->lock); 303 fLocked = true; 304 } 305 306 307 status_t 308 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area) 309 { 310 fSpace = get_address_space_by_area_id(areaID); 311 if (fSpace == NULL) 312 return B_BAD_TEAM_ID; 313 314 rw_lock_read_lock(&fSpace->lock); 315 316 rw_lock_read_lock(&sAreaHashLock); 317 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 318 rw_lock_read_unlock(&sAreaHashLock); 319 320 if (area == NULL || area->address_space != fSpace) { 321 rw_lock_read_unlock(&fSpace->lock); 322 return B_BAD_VALUE; 323 } 324 325 fLocked = true; 326 return B_OK; 327 } 328 329 330 bool 331 AddressSpaceReadLocker::Lock() 332 { 333 if (fLocked) 334 return true; 335 if (fSpace == NULL) 336 return false; 337 338 rw_lock_read_lock(&fSpace->lock); 339 fLocked = true; 340 341 return true; 342 } 343 344 345 void 346 AddressSpaceReadLocker::Unlock() 347 { 348 if (fLocked) { 349 rw_lock_read_unlock(&fSpace->lock); 350 fLocked = false; 351 } 352 } 353 354 355 // #pragma mark - 356 357 358 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team) 359 : 360 fSpace(NULL), 361 fLocked(false), 362 fDegraded(false) 363 { 364 SetTo(team); 365 } 366 367 368 AddressSpaceWriteLocker::AddressSpaceWriteLocker() 369 : 370 fSpace(NULL), 371 fLocked(false), 372 fDegraded(false) 373 { 374 } 375 376 377 AddressSpaceWriteLocker::~AddressSpaceWriteLocker() 378 { 379 Unset(); 380 } 381 382 383 void 384 AddressSpaceWriteLocker::Unset() 385 { 386 Unlock(); 387 if (fSpace != NULL) 388 vm_put_address_space(fSpace); 389 } 390 391 392 status_t 393 AddressSpaceWriteLocker::SetTo(team_id team) 394 { 395 fSpace = vm_get_address_space(team); 396 if (fSpace == NULL) 397 return B_BAD_TEAM_ID; 398 399 rw_lock_write_lock(&fSpace->lock); 400 fLocked = true; 401 return B_OK; 402 } 403 404 405 status_t 406 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area) 407 { 408 fSpace = get_address_space_by_area_id(areaID); 409 if (fSpace == NULL) 410 return B_BAD_VALUE; 411 412 rw_lock_write_lock(&fSpace->lock); 413 414 rw_lock_read_lock(&sAreaHashLock); 415 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 416 rw_lock_read_unlock(&sAreaHashLock); 417 418 if (area == NULL || area->address_space != fSpace) { 419 rw_lock_write_unlock(&fSpace->lock); 420 return B_BAD_VALUE; 421 } 422 423 fLocked = true; 424 return B_OK; 425 } 426 427 428 status_t 429 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 430 bool allowKernel, vm_area*& area) 431 { 432 rw_lock_read_lock(&sAreaHashLock); 433 434 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 435 if (area != NULL 436 && (area->address_space->id == team 437 || (allowKernel && team == vm_kernel_address_space_id()))) { 438 fSpace = area->address_space; 439 atomic_add(&fSpace->ref_count, 1); 440 } 441 442 rw_lock_read_unlock(&sAreaHashLock); 443 444 if (fSpace == NULL) 445 return B_BAD_VALUE; 446 447 // Second try to get the area -- this time with the address space 448 // write lock held 449 450 rw_lock_write_lock(&fSpace->lock); 451 452 rw_lock_read_lock(&sAreaHashLock); 453 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 454 rw_lock_read_unlock(&sAreaHashLock); 455 456 if (area == NULL) { 457 rw_lock_write_unlock(&fSpace->lock); 458 return B_BAD_VALUE; 459 } 460 461 fLocked = true; 462 return B_OK; 463 } 464 465 466 status_t 467 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 468 vm_area*& area) 469 { 470 return SetFromArea(team, areaID, false, area); 471 } 472 473 474 void 475 AddressSpaceWriteLocker::Unlock() 476 { 477 if (fLocked) { 478 if (fDegraded) 479 rw_lock_read_unlock(&fSpace->lock); 480 else 481 rw_lock_write_unlock(&fSpace->lock); 482 fLocked = false; 483 fDegraded = false; 484 } 485 } 486 487 488 void 489 AddressSpaceWriteLocker::DegradeToReadLock() 490 { 491 // TODO: the current R/W lock implementation just keeps the write lock here 492 rw_lock_read_lock(&fSpace->lock); 493 rw_lock_write_unlock(&fSpace->lock); 494 fDegraded = true; 495 } 496 497 498 // #pragma mark - 499 500 501 MultiAddressSpaceLocker::MultiAddressSpaceLocker() 502 : 503 fItems(NULL), 504 fCapacity(0), 505 fCount(0), 506 fLocked(false) 507 { 508 } 509 510 511 MultiAddressSpaceLocker::~MultiAddressSpaceLocker() 512 { 513 Unset(); 514 free(fItems); 515 } 516 517 518 /*static*/ int 519 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b) 520 { 521 lock_item* a = (lock_item*)_a; 522 lock_item* b = (lock_item*)_b; 523 return a->space->id - b->space->id; 524 } 525 526 527 bool 528 MultiAddressSpaceLocker::_ResizeIfNeeded() 529 { 530 if (fCount == fCapacity) { 531 lock_item* items = (lock_item*)realloc(fItems, 532 (fCapacity + 4) * sizeof(lock_item)); 533 if (items == NULL) 534 return false; 535 536 fCapacity += 4; 537 fItems = items; 538 } 539 540 return true; 541 } 542 543 544 int32 545 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const 546 { 547 for (int32 i = 0; i < fCount; i++) { 548 if (fItems[i].space == space) 549 return i; 550 } 551 552 return -1; 553 } 554 555 556 status_t 557 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space, 558 bool writeLock, vm_address_space** _space) 559 { 560 if (!space) 561 return B_BAD_VALUE; 562 563 int32 index = _IndexOfAddressSpace(space); 564 if (index < 0) { 565 if (!_ResizeIfNeeded()) { 566 vm_put_address_space(space); 567 return B_NO_MEMORY; 568 } 569 570 lock_item& item = fItems[fCount++]; 571 item.space = space; 572 item.write_lock = writeLock; 573 } else { 574 575 // one reference is enough 576 vm_put_address_space(space); 577 578 fItems[index].write_lock |= writeLock; 579 } 580 581 if (_space != NULL) 582 *_space = space; 583 584 return B_OK; 585 } 586 587 588 inline status_t 589 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock, 590 vm_address_space** _space) 591 { 592 return _AddAddressSpace(vm_get_address_space(team), writeLock, 593 _space); 594 } 595 596 597 inline status_t 598 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock, 599 vm_address_space** _space) 600 { 601 return _AddAddressSpace(get_address_space_by_area_id(area), writeLock, 602 _space); 603 } 604 605 606 void 607 MultiAddressSpaceLocker::Unset() 608 { 609 Unlock(); 610 611 for (int32 i = 0; i < fCount; i++) 612 vm_put_address_space(fItems[i].space); 613 614 fCount = 0; 615 } 616 617 618 status_t 619 MultiAddressSpaceLocker::Lock() 620 { 621 ASSERT(!fLocked); 622 623 qsort(fItems, fCount, sizeof(lock_item), &_CompareItems); 624 625 for (int32 i = 0; i < fCount; i++) { 626 status_t status; 627 if (fItems[i].write_lock) 628 status = rw_lock_write_lock(&fItems[i].space->lock); 629 else 630 status = rw_lock_read_lock(&fItems[i].space->lock); 631 632 if (status < B_OK) { 633 while (--i >= 0) { 634 if (fItems[i].write_lock) 635 rw_lock_write_unlock(&fItems[i].space->lock); 636 else 637 rw_lock_read_unlock(&fItems[i].space->lock); 638 } 639 return status; 640 } 641 } 642 643 fLocked = true; 644 return B_OK; 645 } 646 647 648 void 649 MultiAddressSpaceLocker::Unlock() 650 { 651 if (!fLocked) 652 return; 653 654 for (int32 i = 0; i < fCount; i++) { 655 if (fItems[i].write_lock) 656 rw_lock_write_unlock(&fItems[i].space->lock); 657 else 658 rw_lock_read_unlock(&fItems[i].space->lock); 659 } 660 661 fLocked = false; 662 } 663 664 665 /*! Adds all address spaces of the areas associated with the given area's cache, 666 locks them, and locks the cache (including a reference to it). It retries 667 until the situation is stable (i.e. the neither cache nor cache's areas 668 changed) or an error occurs. 669 */ 670 status_t 671 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID, 672 bool writeLockThisOne, bool writeLockOthers, vm_area*& _area, 673 vm_cache** _cache) 674 { 675 // remember the original state 676 int originalCount = fCount; 677 lock_item* originalItems = NULL; 678 if (fCount > 0) { 679 originalItems = new(nothrow) lock_item[fCount]; 680 if (originalItems == NULL) 681 return B_NO_MEMORY; 682 memcpy(originalItems, fItems, fCount * sizeof(lock_item)); 683 } 684 ArrayDeleter<lock_item> _(originalItems); 685 686 // get the cache 687 vm_cache* cache; 688 vm_area* area; 689 status_t error; 690 { 691 AddressSpaceReadLocker locker; 692 error = locker.SetFromArea(areaID, area); 693 if (error != B_OK) 694 return error; 695 696 cache = vm_area_get_locked_cache(area); 697 } 698 699 while (true) { 700 // add all areas 701 vm_area* firstArea = cache->areas; 702 for (vm_area* current = firstArea; current; 703 current = current->cache_next) { 704 error = AddArea(current->id, 705 current == area ? writeLockThisOne : writeLockOthers); 706 if (error != B_OK) { 707 vm_area_put_locked_cache(cache); 708 return error; 709 } 710 } 711 712 // unlock the cache and attempt to lock the address spaces 713 vm_area_put_locked_cache(cache); 714 715 error = Lock(); 716 if (error != B_OK) 717 return error; 718 719 // lock the cache again and check whether anything has changed 720 721 // check whether the area is gone in the meantime 722 rw_lock_read_lock(&sAreaHashLock); 723 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 724 rw_lock_read_unlock(&sAreaHashLock); 725 726 if (area == NULL) { 727 Unlock(); 728 return B_BAD_VALUE; 729 } 730 731 // lock the cache 732 vm_cache* oldCache = cache; 733 cache = vm_area_get_locked_cache(area); 734 735 // If neither the area's cache has changed nor its area list we're 736 // done. 737 if (cache == oldCache && firstArea == cache->areas) { 738 _area = area; 739 if (_cache != NULL) 740 *_cache = cache; 741 return B_OK; 742 } 743 744 // Restore the original state and try again. 745 746 // Unlock the address spaces, but keep the cache locked for the next 747 // iteration. 748 Unlock(); 749 750 // Get an additional reference to the original address spaces. 751 for (int32 i = 0; i < originalCount; i++) 752 atomic_add(&originalItems[i].space->ref_count, 1); 753 754 // Release all references to the current address spaces. 755 for (int32 i = 0; i < fCount; i++) 756 vm_put_address_space(fItems[i].space); 757 758 // Copy over the original state. 759 fCount = originalCount; 760 if (originalItems != NULL) 761 memcpy(fItems, originalItems, fCount * sizeof(lock_item)); 762 } 763 } 764 765 766 // #pragma mark - 767 768 769 #if VM_PAGE_FAULT_TRACING 770 771 namespace VMPageFaultTracing { 772 773 class PageFaultStart : public AbstractTraceEntry { 774 public: 775 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 776 : 777 fAddress(address), 778 fPC(pc), 779 fWrite(write), 780 fUser(user) 781 { 782 Initialized(); 783 } 784 785 virtual void AddDump(TraceOutput& out) 786 { 787 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 788 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 789 } 790 791 private: 792 addr_t fAddress; 793 addr_t fPC; 794 bool fWrite; 795 bool fUser; 796 }; 797 798 799 // page fault errors 800 enum { 801 PAGE_FAULT_ERROR_NO_AREA = 0, 802 PAGE_FAULT_ERROR_KERNEL_ONLY, 803 PAGE_FAULT_ERROR_READ_ONLY, 804 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 805 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 806 }; 807 808 809 class PageFaultError : public AbstractTraceEntry { 810 public: 811 PageFaultError(area_id area, status_t error) 812 : 813 fArea(area), 814 fError(error) 815 { 816 Initialized(); 817 } 818 819 virtual void AddDump(TraceOutput& out) 820 { 821 switch (fError) { 822 case PAGE_FAULT_ERROR_NO_AREA: 823 out.Print("page fault error: no area"); 824 break; 825 case PAGE_FAULT_ERROR_KERNEL_ONLY: 826 out.Print("page fault error: area: %ld, kernel only", fArea); 827 break; 828 case PAGE_FAULT_ERROR_READ_ONLY: 829 out.Print("page fault error: area: %ld, read only", fArea); 830 break; 831 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 832 out.Print("page fault error: kernel touching bad user memory"); 833 break; 834 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 835 out.Print("page fault error: no address space"); 836 break; 837 default: 838 out.Print("page fault error: area: %ld, error: %s", fArea, 839 strerror(fError)); 840 break; 841 } 842 } 843 844 private: 845 area_id fArea; 846 status_t fError; 847 }; 848 849 850 class PageFaultDone : public AbstractTraceEntry { 851 public: 852 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 853 vm_page* page) 854 : 855 fArea(area), 856 fTopCache(topCache), 857 fCache(cache), 858 fPage(page) 859 { 860 Initialized(); 861 } 862 863 virtual void AddDump(TraceOutput& out) 864 { 865 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 866 "page: %p", fArea, fTopCache, fCache, fPage); 867 } 868 869 private: 870 area_id fArea; 871 VMCache* fTopCache; 872 VMCache* fCache; 873 vm_page* fPage; 874 }; 875 876 } // namespace VMPageFaultTracing 877 878 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 879 #else 880 # define TPF(x) ; 881 #endif // VM_PAGE_FAULT_TRACING 882 883 884 // #pragma mark - 885 886 887 static int 888 area_compare(void* _area, const void* key) 889 { 890 vm_area* area = (vm_area*)_area; 891 const area_id* id = (const area_id*)key; 892 893 if (area->id == *id) 894 return 0; 895 896 return -1; 897 } 898 899 900 static uint32 901 area_hash(void* _area, const void* key, uint32 range) 902 { 903 vm_area* area = (vm_area*)_area; 904 const area_id* id = (const area_id*)key; 905 906 if (area != NULL) 907 return area->id % range; 908 909 return (uint32)*id % range; 910 } 911 912 913 static vm_address_space* 914 get_address_space_by_area_id(area_id id) 915 { 916 vm_address_space* addressSpace = NULL; 917 918 rw_lock_read_lock(&sAreaHashLock); 919 920 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 921 if (area != NULL) { 922 addressSpace = area->address_space; 923 atomic_add(&addressSpace->ref_count, 1); 924 } 925 926 rw_lock_read_unlock(&sAreaHashLock); 927 928 return addressSpace; 929 } 930 931 932 //! You need to have the address space locked when calling this function 933 static vm_area* 934 lookup_area(vm_address_space* addressSpace, area_id id) 935 { 936 rw_lock_read_lock(&sAreaHashLock); 937 938 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 939 if (area != NULL && area->address_space != addressSpace) 940 area = NULL; 941 942 rw_lock_read_unlock(&sAreaHashLock); 943 944 return area; 945 } 946 947 948 static vm_area* 949 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags) 950 { 951 vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area)); 952 if (reserved == NULL) 953 return NULL; 954 955 memset(reserved, 0, sizeof(vm_area)); 956 reserved->id = RESERVED_AREA_ID; 957 // this marks it as reserved space 958 reserved->protection = flags; 959 reserved->address_space = addressSpace; 960 961 return reserved; 962 } 963 964 965 static vm_area* 966 create_area_struct(vm_address_space* addressSpace, const char* name, 967 uint32 wiring, uint32 protection) 968 { 969 // restrict the area name to B_OS_NAME_LENGTH 970 size_t length = strlen(name) + 1; 971 if (length > B_OS_NAME_LENGTH) 972 length = B_OS_NAME_LENGTH; 973 974 vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area)); 975 if (area == NULL) 976 return NULL; 977 978 area->name = (char*)malloc_nogrow(length); 979 if (area->name == NULL) { 980 free(area); 981 return NULL; 982 } 983 strlcpy(area->name, name, length); 984 985 area->id = atomic_add(&sNextAreaID, 1); 986 area->base = 0; 987 area->size = 0; 988 area->protection = protection; 989 area->wiring = wiring; 990 area->memory_type = 0; 991 992 area->cache = NULL; 993 area->cache_offset = 0; 994 995 area->address_space = addressSpace; 996 area->address_space_next = NULL; 997 area->cache_next = area->cache_prev = NULL; 998 area->hash_next = NULL; 999 new (&area->mappings) vm_area_mappings; 1000 area->page_protections = NULL; 1001 1002 return area; 1003 } 1004 1005 1006 /*! Finds a reserved area that covers the region spanned by \a start and 1007 \a size, inserts the \a area into that region and makes sure that 1008 there are reserved regions for the remaining parts. 1009 */ 1010 static status_t 1011 find_reserved_area(vm_address_space* addressSpace, addr_t start, 1012 addr_t size, vm_area* area) 1013 { 1014 vm_area* last = NULL; 1015 vm_area* next; 1016 1017 next = addressSpace->areas; 1018 while (next) { 1019 if (next->base <= start && next->base + next->size >= start + size) { 1020 // this area covers the requested range 1021 if (next->id != RESERVED_AREA_ID) { 1022 // but it's not reserved space, it's a real area 1023 return B_BAD_VALUE; 1024 } 1025 1026 break; 1027 } 1028 last = next; 1029 next = next->address_space_next; 1030 } 1031 if (next == NULL) 1032 return B_ENTRY_NOT_FOUND; 1033 1034 // now we have to transfer the requested part of the reserved 1035 // range to the new area - and remove, resize or split the old 1036 // reserved area. 1037 1038 if (start == next->base) { 1039 // the area starts at the beginning of the reserved range 1040 if (last) 1041 last->address_space_next = area; 1042 else 1043 addressSpace->areas = area; 1044 1045 if (size == next->size) { 1046 // the new area fully covers the reversed range 1047 area->address_space_next = next->address_space_next; 1048 vm_put_address_space(addressSpace); 1049 free(next); 1050 } else { 1051 // resize the reserved range behind the area 1052 area->address_space_next = next; 1053 next->base += size; 1054 next->size -= size; 1055 } 1056 } else if (start + size == next->base + next->size) { 1057 // the area is at the end of the reserved range 1058 area->address_space_next = next->address_space_next; 1059 next->address_space_next = area; 1060 1061 // resize the reserved range before the area 1062 next->size = start - next->base; 1063 } else { 1064 // the area splits the reserved range into two separate ones 1065 // we need a new reserved area to cover this space 1066 vm_area* reserved = create_reserved_area_struct(addressSpace, 1067 next->protection); 1068 if (reserved == NULL) 1069 return B_NO_MEMORY; 1070 1071 atomic_add(&addressSpace->ref_count, 1); 1072 reserved->address_space_next = next->address_space_next; 1073 area->address_space_next = reserved; 1074 next->address_space_next = area; 1075 1076 // resize regions 1077 reserved->size = next->base + next->size - start - size; 1078 next->size = start - next->base; 1079 reserved->base = start + size; 1080 reserved->cache_offset = next->cache_offset; 1081 } 1082 1083 area->base = start; 1084 area->size = size; 1085 addressSpace->change_count++; 1086 1087 return B_OK; 1088 } 1089 1090 1091 /*! Must be called with this address space's sem held */ 1092 static status_t 1093 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start, 1094 addr_t size, addr_t end, uint32 addressSpec, vm_area* area) 1095 { 1096 vm_area* last = NULL; 1097 vm_area* next; 1098 bool foundSpot = false; 1099 1100 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 1101 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 1102 size, end, addressSpec, area)); 1103 1104 // do some sanity checking 1105 if (start < addressSpace->base || size == 0 1106 || (end - 1) > (addressSpace->base + (addressSpace->size - 1)) 1107 || start + size > end) 1108 return B_BAD_ADDRESS; 1109 1110 if (addressSpec == B_EXACT_ADDRESS) { 1111 // search for a reserved area 1112 status_t status = find_reserved_area(addressSpace, start, size, area); 1113 if (status == B_OK || status == B_BAD_VALUE) 1114 return status; 1115 1116 // There was no reserved area, and the slot doesn't seem to be used 1117 // already 1118 // TODO: this could be further optimized. 1119 } 1120 1121 size_t alignment = B_PAGE_SIZE; 1122 if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) { 1123 // align the memory to the next power of two of the size 1124 while (alignment < size) 1125 alignment <<= 1; 1126 } 1127 1128 start = ROUNDUP(start, alignment); 1129 1130 // walk up to the spot where we should start searching 1131 second_chance: 1132 next = addressSpace->areas; 1133 while (next) { 1134 if (next->base >= start + size) { 1135 // we have a winner 1136 break; 1137 } 1138 last = next; 1139 next = next->address_space_next; 1140 } 1141 1142 // find the right spot depending on the address specification - the area 1143 // will be inserted directly after "last" ("next" is not referenced anymore) 1144 1145 switch (addressSpec) { 1146 case B_ANY_ADDRESS: 1147 case B_ANY_KERNEL_ADDRESS: 1148 case B_ANY_KERNEL_BLOCK_ADDRESS: 1149 // find a hole big enough for a new area 1150 if (!last) { 1151 // see if we can build it at the beginning of the virtual map 1152 if (!next || (next->base >= ROUNDUP(addressSpace->base, 1153 alignment) + size)) { 1154 foundSpot = true; 1155 area->base = ROUNDUP(addressSpace->base, alignment); 1156 break; 1157 } 1158 last = next; 1159 next = next->address_space_next; 1160 } 1161 // keep walking 1162 while (next) { 1163 if (next->base >= ROUNDUP(last->base + last->size, alignment) 1164 + size) { 1165 // we found a spot (it'll be filled up below) 1166 break; 1167 } 1168 last = next; 1169 next = next->address_space_next; 1170 } 1171 1172 if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP( 1173 last->base + last->size, alignment) + (size - 1))) { 1174 // got a spot 1175 foundSpot = true; 1176 area->base = ROUNDUP(last->base + last->size, alignment); 1177 break; 1178 } else { 1179 // We didn't find a free spot - if there were any reserved areas 1180 // with the RESERVED_AVOID_BASE flag set, we can now test those 1181 // for free space 1182 // TODO: it would make sense to start with the biggest of them 1183 next = addressSpace->areas; 1184 last = NULL; 1185 for (last = NULL; next; next = next->address_space_next) { 1186 if (next->id != RESERVED_AREA_ID) { 1187 last = next; 1188 continue; 1189 } 1190 1191 // TODO: take free space after the reserved area into 1192 // account! 1193 if (next->base == ROUNDUP(next->base, alignment) 1194 && next->size == size) { 1195 // The reserved area is entirely covered, and thus, 1196 // removed 1197 if (last) 1198 last->address_space_next = next->address_space_next; 1199 else 1200 addressSpace->areas = next->address_space_next; 1201 1202 foundSpot = true; 1203 area->base = next->base; 1204 free(next); 1205 break; 1206 } 1207 if (next->size - (ROUNDUP(next->base, alignment) 1208 - next->base) >= size) { 1209 // The new area will be placed at the end of the 1210 // reserved area, and the reserved area will be resized 1211 // to make space 1212 foundSpot = true; 1213 next->size -= size; 1214 last = next; 1215 area->base = next->base + next->size; 1216 break; 1217 } 1218 1219 last = next; 1220 } 1221 } 1222 break; 1223 1224 case B_BASE_ADDRESS: 1225 // find a hole big enough for a new area beginning with "start" 1226 if (!last) { 1227 // see if we can build it at the beginning of the specified start 1228 if (!next || (next->base >= start + size)) { 1229 foundSpot = true; 1230 area->base = start; 1231 break; 1232 } 1233 last = next; 1234 next = next->address_space_next; 1235 } 1236 // keep walking 1237 while (next) { 1238 if (next->base >= last->base + last->size + size) { 1239 // we found a spot (it'll be filled up below) 1240 break; 1241 } 1242 last = next; 1243 next = next->address_space_next; 1244 } 1245 1246 if ((addressSpace->base + (addressSpace->size - 1)) 1247 >= (last->base + last->size + (size - 1))) { 1248 // got a spot 1249 foundSpot = true; 1250 if (last->base + last->size <= start) 1251 area->base = start; 1252 else 1253 area->base = last->base + last->size; 1254 break; 1255 } 1256 // we didn't find a free spot in the requested range, so we'll 1257 // try again without any restrictions 1258 start = addressSpace->base; 1259 addressSpec = B_ANY_ADDRESS; 1260 last = NULL; 1261 goto second_chance; 1262 1263 case B_EXACT_ADDRESS: 1264 // see if we can create it exactly here 1265 if (!last) { 1266 if (!next || (next->base >= start + size)) { 1267 foundSpot = true; 1268 area->base = start; 1269 break; 1270 } 1271 } else { 1272 if (next) { 1273 if (last->base + last->size <= start 1274 && next->base >= start + size) { 1275 foundSpot = true; 1276 area->base = start; 1277 break; 1278 } 1279 } else { 1280 if ((last->base + (last->size - 1)) <= start - 1) { 1281 foundSpot = true; 1282 area->base = start; 1283 } 1284 } 1285 } 1286 break; 1287 default: 1288 return B_BAD_VALUE; 1289 } 1290 1291 if (!foundSpot) 1292 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 1293 1294 area->size = size; 1295 if (last) { 1296 area->address_space_next = last->address_space_next; 1297 last->address_space_next = area; 1298 } else { 1299 area->address_space_next = addressSpace->areas; 1300 addressSpace->areas = area; 1301 } 1302 addressSpace->change_count++; 1303 return B_OK; 1304 } 1305 1306 1307 /*! This inserts the area you pass into the specified address space. 1308 It will also set the "_address" argument to its base address when 1309 the call succeeds. 1310 You need to hold the vm_address_space semaphore. 1311 */ 1312 static status_t 1313 insert_area(vm_address_space* addressSpace, void** _address, 1314 uint32 addressSpec, addr_t size, vm_area* area) 1315 { 1316 addr_t searchBase, searchEnd; 1317 status_t status; 1318 1319 switch (addressSpec) { 1320 case B_EXACT_ADDRESS: 1321 searchBase = (addr_t)*_address; 1322 searchEnd = (addr_t)*_address + size; 1323 break; 1324 1325 case B_BASE_ADDRESS: 1326 searchBase = (addr_t)*_address; 1327 searchEnd = addressSpace->base + (addressSpace->size - 1); 1328 break; 1329 1330 case B_ANY_ADDRESS: 1331 case B_ANY_KERNEL_ADDRESS: 1332 case B_ANY_KERNEL_BLOCK_ADDRESS: 1333 searchBase = addressSpace->base; 1334 // TODO: remove this again when vm86 mode is moved into the kernel 1335 // completely (currently needs a userland address space!) 1336 if (searchBase == USER_BASE) 1337 searchBase = USER_BASE_ANY; 1338 searchEnd = addressSpace->base + (addressSpace->size - 1); 1339 break; 1340 1341 default: 1342 return B_BAD_VALUE; 1343 } 1344 1345 status = find_and_insert_area_slot(addressSpace, searchBase, size, 1346 searchEnd, addressSpec, area); 1347 if (status == B_OK) { 1348 // TODO: do we have to do anything about B_ANY_KERNEL_ADDRESS 1349 // vs. B_ANY_KERNEL_BLOCK_ADDRESS here? 1350 *_address = (void*)area->base; 1351 } 1352 1353 return status; 1354 } 1355 1356 1357 static inline void 1358 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection) 1359 { 1360 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 1361 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1362 uint8& entry = area->page_protections[pageIndex / 2]; 1363 if (pageIndex % 2 == 0) 1364 entry = (entry & 0xf0) | protection; 1365 else 1366 entry = (entry & 0x0f) | (protection << 4); 1367 } 1368 1369 1370 static inline uint32 1371 get_area_page_protection(vm_area* area, addr_t pageAddress) 1372 { 1373 if (area->page_protections == NULL) 1374 return area->protection; 1375 1376 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1377 uint32 protection = area->page_protections[pageIndex / 2]; 1378 if (pageIndex % 2 == 0) 1379 protection &= 0x0f; 1380 else 1381 protection >>= 4; 1382 1383 return protection | B_KERNEL_READ_AREA 1384 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1385 } 1386 1387 1388 /*! Cuts a piece out of an area. If the given cut range covers the complete 1389 area, it is deleted. If it covers the beginning or the end, the area is 1390 resized accordingly. If the range covers some part in the middle of the 1391 area, it is split in two; in this case the second area is returned via 1392 \a _secondArea (the variable is left untouched in the other cases). 1393 The address space must be write locked. 1394 */ 1395 static status_t 1396 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address, 1397 addr_t lastAddress, vm_area** _secondArea, bool kernel) 1398 { 1399 // Does the cut range intersect with the area at all? 1400 addr_t areaLast = area->base + (area->size - 1); 1401 if (area->base > lastAddress || areaLast < address) 1402 return B_OK; 1403 1404 // Is the area fully covered? 1405 if (area->base >= address && areaLast <= lastAddress) { 1406 delete_area(addressSpace, area); 1407 return B_OK; 1408 } 1409 1410 AreaCacheLocker cacheLocker(area); 1411 vm_cache* cache = area->cache; 1412 1413 // Cut the end only? 1414 if (areaLast <= lastAddress) { 1415 addr_t newSize = address - area->base; 1416 1417 // unmap pages 1418 vm_unmap_pages(area, address, area->size - newSize, false); 1419 1420 // If no one else uses the area's cache, we can resize it, too. 1421 if (cache->areas == area && area->cache_next == NULL 1422 && list_is_empty(&cache->consumers)) { 1423 status_t error = cache->Resize(cache->virtual_base + newSize); 1424 if (error != B_OK) 1425 return error; 1426 } 1427 1428 area->size = newSize; 1429 1430 return B_OK; 1431 } 1432 1433 // Cut the beginning only? 1434 if (area->base >= address) { 1435 addr_t newBase = lastAddress + 1; 1436 addr_t newSize = areaLast - lastAddress; 1437 1438 // unmap pages 1439 vm_unmap_pages(area, area->base, newBase - area->base, false); 1440 1441 // TODO: If no one else uses the area's cache, we should resize it, too! 1442 1443 area->cache_offset += newBase - area->base; 1444 area->base = newBase; 1445 area->size = newSize; 1446 1447 return B_OK; 1448 } 1449 1450 // The tough part -- cut a piece out of the middle of the area. 1451 // We do that by shrinking the area to the begin section and creating a 1452 // new area for the end section. 1453 1454 addr_t firstNewSize = address - area->base; 1455 addr_t secondBase = lastAddress + 1; 1456 addr_t secondSize = areaLast - lastAddress; 1457 1458 // unmap pages 1459 vm_unmap_pages(area, address, area->size - firstNewSize, false); 1460 1461 // resize the area 1462 addr_t oldSize = area->size; 1463 area->size = firstNewSize; 1464 1465 // TODO: If no one else uses the area's cache, we might want to create a 1466 // new cache for the second area, transfer the concerned pages from the 1467 // first cache to it and resize the first cache. 1468 1469 // map the second area 1470 vm_area* secondArea; 1471 void* secondBaseAddress = (void*)secondBase; 1472 status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress, 1473 area->cache_offset + (secondBase - area->base), secondSize, 1474 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 1475 &secondArea, area->name, false, kernel); 1476 if (error != B_OK) { 1477 area->size = oldSize; 1478 return error; 1479 } 1480 1481 // We need a cache reference for the new area. 1482 cache->AcquireRefLocked(); 1483 1484 if (_secondArea != NULL) 1485 *_secondArea = secondArea; 1486 1487 return B_OK; 1488 } 1489 1490 1491 static inline void 1492 increment_page_wired_count(vm_page* page) 1493 { 1494 // TODO: needs to be atomic on all platforms! 1495 // ... but at least the check isn't. Consequently we should hold 1496 // sMappingLock, which would allows us to even avoid atomic_add() on 1497 // gMappedPagesCount. 1498 if (page->wired_count++ == 0) { 1499 if (page->mappings.IsEmpty()) 1500 atomic_add(&gMappedPagesCount, 1); 1501 } 1502 } 1503 1504 1505 static inline void 1506 decrement_page_wired_count(vm_page* page) 1507 { 1508 if (--page->wired_count == 0) { 1509 // TODO: needs to be atomic on all platforms! 1510 // See above! 1511 if (page->mappings.IsEmpty()) 1512 atomic_add(&gMappedPagesCount, -1); 1513 } 1514 } 1515 1516 1517 /*! Deletes all areas in the given address range. 1518 The address space must be write-locked. 1519 */ 1520 static status_t 1521 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size, 1522 bool kernel) 1523 { 1524 size = PAGE_ALIGN(size); 1525 addr_t lastAddress = address + (size - 1); 1526 1527 // Check, whether the caller is allowed to modify the concerned areas. 1528 vm_area* area; 1529 if (!kernel) { 1530 area = addressSpace->areas; 1531 while (area != NULL) { 1532 vm_area* nextArea = area->address_space_next; 1533 1534 if (area->id != RESERVED_AREA_ID) { 1535 addr_t areaLast = area->base + (area->size - 1); 1536 if (area->base < lastAddress && address < areaLast) { 1537 if ((area->protection & B_KERNEL_AREA) != 0) 1538 return B_NOT_ALLOWED; 1539 } 1540 } 1541 1542 area = nextArea; 1543 } 1544 } 1545 1546 area = addressSpace->areas; 1547 while (area != NULL) { 1548 vm_area* nextArea = area->address_space_next; 1549 1550 if (area->id != RESERVED_AREA_ID) { 1551 addr_t areaLast = area->base + (area->size - 1); 1552 if (area->base < lastAddress && address < areaLast) { 1553 status_t error = cut_area(addressSpace, area, address, 1554 lastAddress, NULL, kernel); 1555 if (error != B_OK) 1556 return error; 1557 // Failing after already messing with areas is ugly, but we 1558 // can't do anything about it. 1559 } 1560 } 1561 1562 area = nextArea; 1563 } 1564 1565 return B_OK; 1566 } 1567 1568 1569 /*! You need to hold the lock of the cache and the write lock of the address 1570 space when calling this function. 1571 Note, that in case of error your cache will be temporarily unlocked. 1572 */ 1573 static status_t 1574 map_backing_store(vm_address_space* addressSpace, vm_cache* cache, 1575 void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 1576 int wiring, int protection, int mapping, vm_area** _area, 1577 const char* areaName, bool unmapAddressRange, bool kernel) 1578 { 1579 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, " 1580 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 1581 "'%s'\n", addressSpace, cache, *_virtualAddress, offset, size, 1582 addressSpec, wiring, protection, _area, areaName)); 1583 cache->AssertLocked(); 1584 1585 vm_area* area = create_area_struct(addressSpace, areaName, wiring, 1586 protection); 1587 if (area == NULL) 1588 return B_NO_MEMORY; 1589 1590 status_t status; 1591 1592 // if this is a private map, we need to create a new cache 1593 // to handle the private copies of pages as they are written to 1594 vm_cache* sourceCache = cache; 1595 if (mapping == REGION_PRIVATE_MAP) { 1596 vm_cache* newCache; 1597 1598 // create an anonymous cache 1599 status = VMCacheFactory::CreateAnonymousCache(newCache, 1600 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true); 1601 if (status != B_OK) 1602 goto err1; 1603 1604 newCache->Lock(); 1605 newCache->temporary = 1; 1606 newCache->scan_skip = cache->scan_skip; 1607 newCache->virtual_base = offset; 1608 newCache->virtual_end = offset + size; 1609 1610 cache->AddConsumer(newCache); 1611 1612 cache = newCache; 1613 } 1614 1615 status = cache->SetMinimalCommitment(size); 1616 if (status != B_OK) 1617 goto err2; 1618 1619 // check to see if this address space has entered DELETE state 1620 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 1621 // okay, someone is trying to delete this address space now, so we can't 1622 // insert the area, so back out 1623 status = B_BAD_TEAM_ID; 1624 goto err2; 1625 } 1626 1627 if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) { 1628 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 1629 size, kernel); 1630 if (status != B_OK) 1631 goto err2; 1632 } 1633 1634 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 1635 if (status != B_OK) 1636 goto err2; 1637 1638 // attach the cache to the area 1639 area->cache = cache; 1640 area->cache_offset = offset; 1641 1642 // point the cache back to the area 1643 cache->InsertAreaLocked(area); 1644 if (mapping == REGION_PRIVATE_MAP) 1645 cache->Unlock(); 1646 1647 // insert the area in the global area hash table 1648 rw_lock_write_lock(&sAreaHashLock); 1649 hash_insert(sAreaHash, area); 1650 rw_lock_write_unlock(&sAreaHashLock); 1651 1652 // grab a ref to the address space (the area holds this) 1653 atomic_add(&addressSpace->ref_count, 1); 1654 1655 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1656 // cache, sourceCache, areaName, area); 1657 1658 *_area = area; 1659 return B_OK; 1660 1661 err2: 1662 if (mapping == REGION_PRIVATE_MAP) { 1663 // We created this cache, so we must delete it again. Note, that we 1664 // need to temporarily unlock the source cache or we'll otherwise 1665 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1666 sourceCache->Unlock(); 1667 cache->ReleaseRefAndUnlock(); 1668 sourceCache->Lock(); 1669 } 1670 err1: 1671 free(area->name); 1672 free(area); 1673 return status; 1674 } 1675 1676 1677 status_t 1678 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1679 { 1680 AddressSpaceWriteLocker locker(team); 1681 if (!locker.IsLocked()) 1682 return B_BAD_TEAM_ID; 1683 1684 // check to see if this address space has entered DELETE state 1685 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1686 // okay, someone is trying to delete this address space now, so we can't 1687 // insert the area, so back out 1688 return B_BAD_TEAM_ID; 1689 } 1690 1691 // search area list and remove any matching reserved ranges 1692 1693 vm_area* area = locker.AddressSpace()->areas; 1694 vm_area* last = NULL; 1695 while (area) { 1696 // the area must be completely part of the reserved range 1697 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 1698 && area->base + area->size <= (addr_t)address + size) { 1699 // remove reserved range 1700 vm_area* reserved = area; 1701 if (last) 1702 last->address_space_next = reserved->address_space_next; 1703 else 1704 locker.AddressSpace()->areas = reserved->address_space_next; 1705 1706 area = reserved->address_space_next; 1707 vm_put_address_space(locker.AddressSpace()); 1708 free(reserved); 1709 continue; 1710 } 1711 1712 last = area; 1713 area = area->address_space_next; 1714 } 1715 1716 return B_OK; 1717 } 1718 1719 1720 status_t 1721 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1722 addr_t size, uint32 flags) 1723 { 1724 if (size == 0) 1725 return B_BAD_VALUE; 1726 1727 AddressSpaceWriteLocker locker(team); 1728 if (!locker.IsLocked()) 1729 return B_BAD_TEAM_ID; 1730 1731 // check to see if this address space has entered DELETE state 1732 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1733 // okay, someone is trying to delete this address space now, so we 1734 // can't insert the area, let's back out 1735 return B_BAD_TEAM_ID; 1736 } 1737 1738 vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags); 1739 if (area == NULL) 1740 return B_NO_MEMORY; 1741 1742 status_t status = insert_area(locker.AddressSpace(), _address, addressSpec, 1743 size, area); 1744 if (status != B_OK) { 1745 free(area); 1746 return status; 1747 } 1748 1749 // the area is now reserved! 1750 1751 area->cache_offset = area->base; 1752 // we cache the original base address here 1753 1754 atomic_add(&locker.AddressSpace()->ref_count, 1); 1755 return B_OK; 1756 } 1757 1758 1759 area_id 1760 vm_create_anonymous_area(team_id team, const char* name, void** address, 1761 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 1762 uint32 flags, bool kernel) 1763 { 1764 vm_area* area; 1765 vm_cache* cache; 1766 vm_page* page = NULL; 1767 bool isStack = (protection & B_STACK_AREA) != 0; 1768 page_num_t guardPages; 1769 bool canOvercommit = false; 1770 addr_t physicalBase = 0; 1771 1772 TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size)); 1773 1774 size = PAGE_ALIGN(size); 1775 1776 if (size == 0) 1777 return B_BAD_VALUE; 1778 if (!arch_vm_supports_protection(protection)) 1779 return B_NOT_SUPPORTED; 1780 1781 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1782 canOvercommit = true; 1783 1784 #ifdef DEBUG_KERNEL_STACKS 1785 if ((protection & B_KERNEL_STACK_AREA) != 0) 1786 isStack = true; 1787 #endif 1788 1789 // check parameters 1790 switch (addressSpec) { 1791 case B_ANY_ADDRESS: 1792 case B_EXACT_ADDRESS: 1793 case B_BASE_ADDRESS: 1794 case B_ANY_KERNEL_ADDRESS: 1795 case B_ANY_KERNEL_BLOCK_ADDRESS: 1796 break; 1797 case B_PHYSICAL_BASE_ADDRESS: 1798 physicalBase = (addr_t)*address; 1799 addressSpec = B_ANY_KERNEL_ADDRESS; 1800 break; 1801 1802 default: 1803 return B_BAD_VALUE; 1804 } 1805 1806 bool doReserveMemory = false; 1807 switch (wiring) { 1808 case B_NO_LOCK: 1809 break; 1810 case B_FULL_LOCK: 1811 case B_LAZY_LOCK: 1812 case B_CONTIGUOUS: 1813 doReserveMemory = true; 1814 break; 1815 case B_ALREADY_WIRED: 1816 break; 1817 case B_LOMEM: 1818 //case B_SLOWMEM: 1819 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 1820 wiring = B_FULL_LOCK; 1821 doReserveMemory = true; 1822 break; 1823 default: 1824 return B_BAD_VALUE; 1825 } 1826 1827 // For full lock or contiguous areas we're also going to map the pages and 1828 // thus need to reserve pages for the mapping backend upfront. 1829 addr_t reservedMapPages = 0; 1830 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1831 AddressSpaceWriteLocker locker; 1832 status_t status = locker.SetTo(team); 1833 if (status != B_OK) 1834 return status; 1835 1836 vm_translation_map* map = &locker.AddressSpace()->translation_map; 1837 reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 1838 } 1839 1840 // Reserve memory before acquiring the address space lock. This reduces the 1841 // chances of failure, since while holding the write lock to the address 1842 // space (if it is the kernel address space that is), the low memory handler 1843 // won't be able to free anything for us. 1844 addr_t reservedMemory = 0; 1845 if (doReserveMemory) { 1846 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1847 if (vm_try_reserve_memory(size, timeout) != B_OK) 1848 return B_NO_MEMORY; 1849 reservedMemory = size; 1850 // TODO: We don't reserve the memory for the pages for the page 1851 // directories/tables. We actually need to do since we currently don't 1852 // reclaim them (and probably can't reclaim all of them anyway). Thus 1853 // there are actually less physical pages than there should be, which 1854 // can get the VM into trouble in low memory situations. 1855 } 1856 1857 AddressSpaceWriteLocker locker; 1858 vm_address_space* addressSpace; 1859 status_t status; 1860 1861 // For full lock areas reserve the pages before locking the address 1862 // space. E.g. block caches can't release their memory while we hold the 1863 // address space lock. 1864 page_num_t reservedPages = reservedMapPages; 1865 if (wiring == B_FULL_LOCK) 1866 reservedPages += size / B_PAGE_SIZE; 1867 if (reservedPages > 0) { 1868 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1869 if (!vm_page_try_reserve_pages(reservedPages)) { 1870 reservedPages = 0; 1871 status = B_WOULD_BLOCK; 1872 goto err0; 1873 } 1874 } else 1875 vm_page_reserve_pages(reservedPages); 1876 } 1877 1878 status = locker.SetTo(team); 1879 if (status != B_OK) 1880 goto err0; 1881 1882 addressSpace = locker.AddressSpace(); 1883 1884 if (wiring == B_CONTIGUOUS) { 1885 // we try to allocate the page run here upfront as this may easily 1886 // fail for obvious reasons 1887 page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, physicalBase, 1888 size / B_PAGE_SIZE); 1889 if (page == NULL) { 1890 status = B_NO_MEMORY; 1891 goto err0; 1892 } 1893 } 1894 1895 // create an anonymous cache 1896 // if it's a stack, make sure that two pages are available at least 1897 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1898 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1899 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1900 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1901 wiring == B_NO_LOCK); 1902 if (status != B_OK) 1903 goto err1; 1904 1905 cache->temporary = 1; 1906 cache->virtual_end = size; 1907 cache->committed_size = reservedMemory; 1908 // TODO: This should be done via a method. 1909 reservedMemory = 0; 1910 1911 switch (wiring) { 1912 case B_LAZY_LOCK: 1913 case B_FULL_LOCK: 1914 case B_CONTIGUOUS: 1915 case B_ALREADY_WIRED: 1916 cache->scan_skip = 1; 1917 break; 1918 case B_NO_LOCK: 1919 cache->scan_skip = 0; 1920 break; 1921 } 1922 1923 cache->Lock(); 1924 1925 status = map_backing_store(addressSpace, cache, address, 0, size, 1926 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 1927 (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel); 1928 1929 if (status < B_OK) { 1930 cache->ReleaseRefAndUnlock(); 1931 goto err1; 1932 } 1933 1934 locker.DegradeToReadLock(); 1935 1936 switch (wiring) { 1937 case B_NO_LOCK: 1938 case B_LAZY_LOCK: 1939 // do nothing - the pages are mapped in as needed 1940 break; 1941 1942 case B_FULL_LOCK: 1943 { 1944 // Allocate and map all pages for this area 1945 1946 off_t offset = 0; 1947 for (addr_t address = area->base; 1948 address < area->base + (area->size - 1); 1949 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1950 #ifdef DEBUG_KERNEL_STACKS 1951 # ifdef STACK_GROWS_DOWNWARDS 1952 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 1953 * B_PAGE_SIZE) 1954 # else 1955 if (isStack && address >= area->base + area->size 1956 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1957 # endif 1958 continue; 1959 #endif 1960 vm_page* page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 1961 cache->InsertPage(page, offset); 1962 vm_map_page(area, page, address, protection); 1963 1964 // Periodically unreserve pages we've already allocated, so that 1965 // we don't unnecessarily increase the pressure on the VM. 1966 if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) { 1967 page_num_t toUnreserve = 128; 1968 vm_page_unreserve_pages(toUnreserve); 1969 reservedPages -= toUnreserve; 1970 } 1971 } 1972 1973 break; 1974 } 1975 1976 case B_ALREADY_WIRED: 1977 { 1978 // The pages should already be mapped. This is only really useful 1979 // during boot time. Find the appropriate vm_page objects and stick 1980 // them in the cache object. 1981 vm_translation_map* map = &addressSpace->translation_map; 1982 off_t offset = 0; 1983 1984 if (!gKernelStartup) 1985 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1986 1987 map->ops->lock(map); 1988 1989 for (addr_t virtualAddress = area->base; virtualAddress < area->base 1990 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 1991 offset += B_PAGE_SIZE) { 1992 addr_t physicalAddress; 1993 uint32 flags; 1994 status = map->ops->query(map, virtualAddress, 1995 &physicalAddress, &flags); 1996 if (status < B_OK) { 1997 panic("looking up mapping failed for va 0x%lx\n", 1998 virtualAddress); 1999 } 2000 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2001 if (page == NULL) { 2002 panic("looking up page failed for pa 0x%lx\n", 2003 physicalAddress); 2004 } 2005 2006 increment_page_wired_count(page); 2007 vm_page_set_state(page, PAGE_STATE_WIRED); 2008 cache->InsertPage(page, offset); 2009 } 2010 2011 map->ops->unlock(map); 2012 break; 2013 } 2014 2015 case B_CONTIGUOUS: 2016 { 2017 // We have already allocated our continuous pages run, so we can now 2018 // just map them in the address space 2019 vm_translation_map* map = &addressSpace->translation_map; 2020 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 2021 addr_t virtualAddress = area->base; 2022 off_t offset = 0; 2023 2024 map->ops->lock(map); 2025 2026 for (virtualAddress = area->base; virtualAddress < area->base 2027 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2028 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 2029 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2030 if (page == NULL) 2031 panic("couldn't lookup physical page just allocated\n"); 2032 2033 status = map->ops->map(map, virtualAddress, physicalAddress, 2034 protection); 2035 if (status < B_OK) 2036 panic("couldn't map physical page in page run\n"); 2037 2038 increment_page_wired_count(page); 2039 vm_page_set_state(page, PAGE_STATE_WIRED); 2040 cache->InsertPage(page, offset); 2041 } 2042 2043 map->ops->unlock(map); 2044 break; 2045 } 2046 2047 default: 2048 break; 2049 } 2050 2051 cache->Unlock(); 2052 2053 if (reservedPages > 0) 2054 vm_page_unreserve_pages(reservedPages); 2055 2056 TRACE(("vm_create_anonymous_area: done\n")); 2057 2058 area->cache_type = CACHE_TYPE_RAM; 2059 return area->id; 2060 2061 err1: 2062 if (wiring == B_CONTIGUOUS) { 2063 // we had reserved the area space upfront... 2064 addr_t pageNumber = page->physical_page_number; 2065 int32 i; 2066 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 2067 page = vm_lookup_page(pageNumber); 2068 if (page == NULL) 2069 panic("couldn't lookup physical page just allocated\n"); 2070 2071 vm_page_set_state(page, PAGE_STATE_FREE); 2072 } 2073 } 2074 2075 err0: 2076 if (reservedPages > 0) 2077 vm_page_unreserve_pages(reservedPages); 2078 if (reservedMemory > 0) 2079 vm_unreserve_memory(reservedMemory); 2080 2081 return status; 2082 } 2083 2084 2085 area_id 2086 vm_map_physical_memory(team_id team, const char* name, void** _address, 2087 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 2088 { 2089 vm_area* area; 2090 vm_cache* cache; 2091 addr_t mapOffset; 2092 2093 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 2094 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2095 name, _address, addressSpec, size, protection, physicalAddress)); 2096 2097 if (!arch_vm_supports_protection(protection)) 2098 return B_NOT_SUPPORTED; 2099 2100 AddressSpaceWriteLocker locker(team); 2101 if (!locker.IsLocked()) 2102 return B_BAD_TEAM_ID; 2103 2104 // if the physical address is somewhat inside a page, 2105 // move the actual area down to align on a page boundary 2106 mapOffset = physicalAddress % B_PAGE_SIZE; 2107 size += mapOffset; 2108 physicalAddress -= mapOffset; 2109 2110 size = PAGE_ALIGN(size); 2111 2112 // create an device cache 2113 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 2114 if (status != B_OK) 2115 return status; 2116 2117 // tell the page scanner to skip over this area, it's pages are special 2118 cache->scan_skip = 1; 2119 cache->virtual_end = size; 2120 2121 cache->Lock(); 2122 2123 status = map_backing_store(locker.AddressSpace(), cache, _address, 2124 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2125 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2126 2127 if (status < B_OK) 2128 cache->ReleaseRefLocked(); 2129 2130 cache->Unlock(); 2131 2132 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 2133 // set requested memory type 2134 status = arch_vm_set_memory_type(area, physicalAddress, 2135 addressSpec & B_MTR_MASK); 2136 if (status < B_OK) 2137 delete_area(locker.AddressSpace(), area); 2138 } 2139 2140 if (status >= B_OK) { 2141 // make sure our area is mapped in completely 2142 2143 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2144 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2145 area->base + (size - 1)); 2146 2147 vm_page_reserve_pages(reservePages); 2148 map->ops->lock(map); 2149 2150 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2151 map->ops->map(map, area->base + offset, physicalAddress + offset, 2152 protection); 2153 } 2154 2155 map->ops->unlock(map); 2156 vm_page_unreserve_pages(reservePages); 2157 } 2158 2159 if (status < B_OK) 2160 return status; 2161 2162 // modify the pointer returned to be offset back into the new area 2163 // the same way the physical address in was offset 2164 *_address = (void*)((addr_t)*_address + mapOffset); 2165 2166 area->cache_type = CACHE_TYPE_DEVICE; 2167 return area->id; 2168 } 2169 2170 2171 area_id 2172 vm_create_null_area(team_id team, const char* name, void** address, 2173 uint32 addressSpec, addr_t size) 2174 { 2175 vm_area* area; 2176 vm_cache* cache; 2177 status_t status; 2178 2179 AddressSpaceWriteLocker locker(team); 2180 if (!locker.IsLocked()) 2181 return B_BAD_TEAM_ID; 2182 2183 size = PAGE_ALIGN(size); 2184 2185 // create an null cache 2186 status = VMCacheFactory::CreateNullCache(cache); 2187 if (status != B_OK) 2188 return status; 2189 2190 // tell the page scanner to skip over this area, no pages will be mapped here 2191 cache->scan_skip = 1; 2192 cache->virtual_end = size; 2193 2194 cache->Lock(); 2195 2196 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 2197 addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name, 2198 false, true); 2199 2200 if (status < B_OK) { 2201 cache->ReleaseRefAndUnlock(); 2202 return status; 2203 } 2204 2205 cache->Unlock(); 2206 2207 area->cache_type = CACHE_TYPE_NULL; 2208 return area->id; 2209 } 2210 2211 2212 /*! Creates the vnode cache for the specified \a vnode. 2213 The vnode has to be marked busy when calling this function. 2214 */ 2215 status_t 2216 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2217 { 2218 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2219 } 2220 2221 2222 /*! \a cache must be locked. The area's address space must be read-locked. 2223 */ 2224 static void 2225 pre_map_area_pages(vm_area* area, VMCache* cache) 2226 { 2227 addr_t baseAddress = area->base; 2228 addr_t cacheOffset = area->cache_offset; 2229 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2230 page_num_t endPage = firstPage + area->size / B_PAGE_SIZE; 2231 2232 for (VMCachePagesTree::Iterator it 2233 = cache->pages.GetIterator(firstPage, true, true); 2234 vm_page* page = it.Next();) { 2235 if (page->cache_offset >= endPage) 2236 break; 2237 2238 // skip inactive pages 2239 if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0) 2240 continue; 2241 2242 vm_map_page(area, page, 2243 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2244 B_READ_AREA | B_KERNEL_READ_AREA); 2245 } 2246 } 2247 2248 2249 /*! Will map the file specified by \a fd to an area in memory. 2250 The file will be mirrored beginning at the specified \a offset. The 2251 \a offset and \a size arguments have to be page aligned. 2252 */ 2253 static area_id 2254 _vm_map_file(team_id team, const char* name, void** _address, 2255 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2256 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2257 { 2258 // TODO: for binary files, we want to make sure that they get the 2259 // copy of a file at a given time, ie. later changes should not 2260 // make it into the mapped copy -- this will need quite some changes 2261 // to be done in a nice way 2262 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 2263 fd, offset, size, mapping)); 2264 2265 offset = ROUNDOWN(offset, B_PAGE_SIZE); 2266 size = PAGE_ALIGN(size); 2267 2268 if (mapping == REGION_NO_PRIVATE_MAP) 2269 protection |= B_SHARED_AREA; 2270 if (addressSpec != B_EXACT_ADDRESS) 2271 unmapAddressRange = false; 2272 2273 if (fd < 0) { 2274 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2275 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 2276 B_NO_LOCK, protection, flags, kernel); 2277 } 2278 2279 // get the open flags of the FD 2280 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2281 if (descriptor == NULL) 2282 return EBADF; 2283 int32 openMode = descriptor->open_mode; 2284 put_fd(descriptor); 2285 2286 // The FD must open for reading at any rate. For shared mapping with write 2287 // access, additionally the FD must be open for writing. 2288 if ((openMode & O_ACCMODE) == O_WRONLY 2289 || (mapping == REGION_NO_PRIVATE_MAP 2290 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2291 && (openMode & O_ACCMODE) == O_RDONLY)) { 2292 return EACCES; 2293 } 2294 2295 // get the vnode for the object, this also grabs a ref to it 2296 struct vnode* vnode = NULL; 2297 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2298 if (status < B_OK) 2299 return status; 2300 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 2301 2302 // If we're going to pre-map pages, we need to reserve the pages needed by 2303 // the mapping backend upfront. 2304 page_num_t reservedPreMapPages = 0; 2305 if ((protection & B_READ_AREA) != 0) { 2306 AddressSpaceWriteLocker locker; 2307 status = locker.SetTo(team); 2308 if (status != B_OK) 2309 return status; 2310 2311 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2312 reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 2313 2314 locker.Unlock(); 2315 2316 vm_page_reserve_pages(reservedPreMapPages); 2317 } 2318 2319 struct PageUnreserver { 2320 PageUnreserver(page_num_t count) 2321 : fCount(count) 2322 { 2323 } 2324 2325 ~PageUnreserver() 2326 { 2327 if (fCount > 0) 2328 vm_page_unreserve_pages(fCount); 2329 } 2330 2331 page_num_t fCount; 2332 } pageUnreserver(reservedPreMapPages); 2333 2334 AddressSpaceWriteLocker locker(team); 2335 if (!locker.IsLocked()) 2336 return B_BAD_TEAM_ID; 2337 2338 // TODO: this only works for file systems that use the file cache 2339 vm_cache* cache; 2340 status = vfs_get_vnode_cache(vnode, &cache, false); 2341 if (status < B_OK) 2342 return status; 2343 2344 cache->Lock(); 2345 2346 vm_area* area; 2347 status = map_backing_store(locker.AddressSpace(), cache, _address, 2348 offset, size, addressSpec, 0, protection, mapping, &area, name, 2349 unmapAddressRange, kernel); 2350 2351 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2352 // map_backing_store() cannot know we no longer need the ref 2353 cache->ReleaseRefLocked(); 2354 } 2355 2356 if (status == B_OK && (protection & B_READ_AREA) != 0) 2357 pre_map_area_pages(area, cache); 2358 2359 cache->Unlock(); 2360 2361 if (status == B_OK) { 2362 // TODO: this probably deserves a smarter solution, ie. don't always 2363 // prefetch stuff, and also, probably don't trigger it at this place. 2364 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2365 // prefetches at max 10 MB starting from "offset" 2366 } 2367 2368 if (status != B_OK) 2369 return status; 2370 2371 area->cache_type = CACHE_TYPE_VNODE; 2372 return area->id; 2373 } 2374 2375 2376 area_id 2377 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2378 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2379 int fd, off_t offset) 2380 { 2381 if (!arch_vm_supports_protection(protection)) 2382 return B_NOT_SUPPORTED; 2383 2384 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2385 mapping, unmapAddressRange, fd, offset, true); 2386 } 2387 2388 2389 vm_cache* 2390 vm_area_get_locked_cache(vm_area* area) 2391 { 2392 mutex_lock(&sAreaCacheLock); 2393 2394 while (true) { 2395 vm_cache* cache = area->cache; 2396 2397 if (!cache->SwitchLock(&sAreaCacheLock)) { 2398 // cache has been deleted 2399 mutex_lock(&sAreaCacheLock); 2400 continue; 2401 } 2402 2403 mutex_lock(&sAreaCacheLock); 2404 2405 if (cache == area->cache) { 2406 cache->AcquireRefLocked(); 2407 mutex_unlock(&sAreaCacheLock); 2408 return cache; 2409 } 2410 2411 // the cache changed in the meantime 2412 cache->Unlock(); 2413 } 2414 } 2415 2416 2417 void 2418 vm_area_put_locked_cache(vm_cache* cache) 2419 { 2420 cache->ReleaseRefAndUnlock(); 2421 } 2422 2423 2424 area_id 2425 vm_clone_area(team_id team, const char* name, void** address, 2426 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2427 bool kernel) 2428 { 2429 vm_area* newArea = NULL; 2430 vm_area* sourceArea; 2431 2432 // Check whether the source area exists and is cloneable. If so, mark it 2433 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2434 { 2435 AddressSpaceWriteLocker locker; 2436 status_t status = locker.SetFromArea(sourceID, sourceArea); 2437 if (status != B_OK) 2438 return status; 2439 2440 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2441 return B_NOT_ALLOWED; 2442 2443 sourceArea->protection |= B_SHARED_AREA; 2444 protection |= B_SHARED_AREA; 2445 } 2446 2447 // Now lock both address spaces and actually do the cloning. 2448 2449 MultiAddressSpaceLocker locker; 2450 vm_address_space* sourceAddressSpace; 2451 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2452 if (status != B_OK) 2453 return status; 2454 2455 vm_address_space* targetAddressSpace; 2456 status = locker.AddTeam(team, true, &targetAddressSpace); 2457 if (status != B_OK) 2458 return status; 2459 2460 status = locker.Lock(); 2461 if (status != B_OK) 2462 return status; 2463 2464 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2465 if (sourceArea == NULL) 2466 return B_BAD_VALUE; 2467 2468 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2469 return B_NOT_ALLOWED; 2470 2471 vm_cache* cache = vm_area_get_locked_cache(sourceArea); 2472 2473 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2474 // have been adapted. Maybe it should be part of the kernel settings, 2475 // anyway (so that old drivers can always work). 2476 #if 0 2477 if (sourceArea->aspace == vm_kernel_address_space() 2478 && addressSpace != vm_kernel_address_space() 2479 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2480 // kernel areas must not be cloned in userland, unless explicitly 2481 // declared user-cloneable upon construction 2482 status = B_NOT_ALLOWED; 2483 } else 2484 #endif 2485 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2486 status = B_NOT_ALLOWED; 2487 else { 2488 status = map_backing_store(targetAddressSpace, cache, address, 2489 sourceArea->cache_offset, sourceArea->size, addressSpec, 2490 sourceArea->wiring, protection, mapping, &newArea, name, false, 2491 kernel); 2492 } 2493 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2494 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2495 // to create a new cache, and has therefore already acquired a reference 2496 // to the source cache - but otherwise it has no idea that we need 2497 // one. 2498 cache->AcquireRefLocked(); 2499 } 2500 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2501 // we need to map in everything at this point 2502 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2503 // we don't have actual pages to map but a physical area 2504 vm_translation_map* map 2505 = &sourceArea->address_space->translation_map; 2506 map->ops->lock(map); 2507 2508 addr_t physicalAddress; 2509 uint32 oldProtection; 2510 map->ops->query(map, sourceArea->base, &physicalAddress, 2511 &oldProtection); 2512 2513 map->ops->unlock(map); 2514 2515 map = &targetAddressSpace->translation_map; 2516 size_t reservePages = map->ops->map_max_pages_need(map, 2517 newArea->base, newArea->base + (newArea->size - 1)); 2518 2519 vm_page_reserve_pages(reservePages); 2520 map->ops->lock(map); 2521 2522 for (addr_t offset = 0; offset < newArea->size; 2523 offset += B_PAGE_SIZE) { 2524 map->ops->map(map, newArea->base + offset, 2525 physicalAddress + offset, protection); 2526 } 2527 2528 map->ops->unlock(map); 2529 vm_page_unreserve_pages(reservePages); 2530 } else { 2531 vm_translation_map* map = &targetAddressSpace->translation_map; 2532 size_t reservePages = map->ops->map_max_pages_need(map, 2533 newArea->base, newArea->base + (newArea->size - 1)); 2534 vm_page_reserve_pages(reservePages); 2535 2536 // map in all pages from source 2537 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2538 vm_page* page = it.Next();) { 2539 vm_map_page(newArea, page, newArea->base 2540 + ((page->cache_offset << PAGE_SHIFT) 2541 - newArea->cache_offset), protection); 2542 } 2543 2544 vm_page_unreserve_pages(reservePages); 2545 } 2546 } 2547 if (status == B_OK) 2548 newArea->cache_type = sourceArea->cache_type; 2549 2550 vm_area_put_locked_cache(cache); 2551 2552 if (status < B_OK) 2553 return status; 2554 2555 return newArea->id; 2556 } 2557 2558 2559 //! The address space must be write locked at this point 2560 static void 2561 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area) 2562 { 2563 vm_area* temp = addressSpace->areas; 2564 vm_area* last = NULL; 2565 2566 while (temp != NULL) { 2567 if (area == temp) { 2568 if (last != NULL) { 2569 last->address_space_next = temp->address_space_next; 2570 } else { 2571 addressSpace->areas = temp->address_space_next; 2572 } 2573 addressSpace->change_count++; 2574 break; 2575 } 2576 last = temp; 2577 temp = temp->address_space_next; 2578 } 2579 if (area == addressSpace->area_hint) 2580 addressSpace->area_hint = NULL; 2581 2582 if (temp == NULL) 2583 panic("vm_area_release_ref: area not found in aspace's area list\n"); 2584 } 2585 2586 2587 static void 2588 delete_area(vm_address_space* addressSpace, vm_area* area) 2589 { 2590 rw_lock_write_lock(&sAreaHashLock); 2591 hash_remove(sAreaHash, area); 2592 rw_lock_write_unlock(&sAreaHashLock); 2593 2594 // At this point the area is removed from the global hash table, but 2595 // still exists in the area list. 2596 2597 // Unmap the virtual address space the area occupied 2598 vm_unmap_pages(area, area->base, area->size, !area->cache->temporary); 2599 2600 if (!area->cache->temporary) 2601 area->cache->WriteModified(); 2602 2603 arch_vm_unset_memory_type(area); 2604 remove_area_from_address_space(addressSpace, area); 2605 vm_put_address_space(addressSpace); 2606 2607 area->cache->RemoveArea(area); 2608 area->cache->ReleaseRef(); 2609 2610 free(area->page_protections); 2611 free(area->name); 2612 free(area); 2613 } 2614 2615 2616 status_t 2617 vm_delete_area(team_id team, area_id id, bool kernel) 2618 { 2619 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2620 2621 AddressSpaceWriteLocker locker; 2622 vm_area* area; 2623 status_t status = locker.SetFromArea(team, id, area); 2624 if (status < B_OK) 2625 return status; 2626 2627 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2628 return B_NOT_ALLOWED; 2629 2630 delete_area(locker.AddressSpace(), area); 2631 return B_OK; 2632 } 2633 2634 2635 /*! Creates a new cache on top of given cache, moves all areas from 2636 the old cache to the new one, and changes the protection of all affected 2637 areas' pages to read-only. 2638 Preconditions: 2639 - The given cache must be locked. 2640 - All of the cache's areas' address spaces must be read locked. 2641 */ 2642 static status_t 2643 vm_copy_on_write_area(vm_cache* lowerCache) 2644 { 2645 vm_cache* upperCache; 2646 2647 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2648 2649 // We need to separate the cache from its areas. The cache goes one level 2650 // deeper and we create a new cache inbetween. 2651 2652 // create an anonymous cache 2653 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2654 0, true); 2655 if (status != B_OK) 2656 return status; 2657 2658 upperCache->Lock(); 2659 2660 upperCache->temporary = 1; 2661 upperCache->scan_skip = lowerCache->scan_skip; 2662 upperCache->virtual_base = lowerCache->virtual_base; 2663 upperCache->virtual_end = lowerCache->virtual_end; 2664 2665 // transfer the lower cache areas to the upper cache 2666 mutex_lock(&sAreaCacheLock); 2667 2668 upperCache->areas = lowerCache->areas; 2669 lowerCache->areas = NULL; 2670 2671 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2672 tempArea = tempArea->cache_next) { 2673 tempArea->cache = upperCache; 2674 upperCache->AcquireRefLocked(); 2675 lowerCache->ReleaseRefLocked(); 2676 } 2677 2678 mutex_unlock(&sAreaCacheLock); 2679 2680 lowerCache->AddConsumer(upperCache); 2681 2682 // We now need to remap all pages from all of the cache's areas read-only, so 2683 // that a copy will be created on next write access 2684 2685 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2686 tempArea = tempArea->cache_next) { 2687 // The area must be readable in the same way it was previously writable 2688 uint32 protection = B_KERNEL_READ_AREA; 2689 if ((tempArea->protection & B_READ_AREA) != 0) 2690 protection |= B_READ_AREA; 2691 2692 vm_translation_map* map = &tempArea->address_space->translation_map; 2693 map->ops->lock(map); 2694 map->ops->protect(map, tempArea->base, 2695 tempArea->base - 1 + tempArea->size, protection); 2696 map->ops->unlock(map); 2697 } 2698 2699 vm_area_put_locked_cache(upperCache); 2700 2701 return B_OK; 2702 } 2703 2704 2705 area_id 2706 vm_copy_area(team_id team, const char* name, void** _address, 2707 uint32 addressSpec, uint32 protection, area_id sourceID) 2708 { 2709 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2710 2711 if ((protection & B_KERNEL_PROTECTION) == 0) { 2712 // set the same protection for the kernel as for userland 2713 protection |= B_KERNEL_READ_AREA; 2714 if (writableCopy) 2715 protection |= B_KERNEL_WRITE_AREA; 2716 } 2717 2718 // Do the locking: target address space, all address spaces associated with 2719 // the source cache, and the cache itself. 2720 MultiAddressSpaceLocker locker; 2721 vm_address_space* targetAddressSpace; 2722 vm_cache* cache; 2723 vm_area* source; 2724 status_t status = locker.AddTeam(team, true, &targetAddressSpace); 2725 if (status == B_OK) { 2726 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2727 &cache); 2728 } 2729 if (status != B_OK) 2730 return status; 2731 2732 AreaCacheLocker cacheLocker(cache); // already locked 2733 2734 if (addressSpec == B_CLONE_ADDRESS) { 2735 addressSpec = B_EXACT_ADDRESS; 2736 *_address = (void*)source->base; 2737 } 2738 2739 bool sharedArea = (source->protection & B_SHARED_AREA) != 0; 2740 2741 // First, create a cache on top of the source area, respectively use the 2742 // existing one, if this is a shared area. 2743 2744 vm_area* target; 2745 status = map_backing_store(targetAddressSpace, cache, _address, 2746 source->cache_offset, source->size, addressSpec, source->wiring, 2747 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2748 &target, name, false, true); 2749 if (status < B_OK) 2750 return status; 2751 2752 if (sharedArea) { 2753 // The new area uses the old area's cache, but map_backing_store() 2754 // hasn't acquired a ref. So we have to do that now. 2755 cache->AcquireRefLocked(); 2756 } 2757 2758 // If the source area is writable, we need to move it one layer up as well 2759 2760 if (!sharedArea) { 2761 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2762 // TODO: do something more useful if this fails! 2763 if (vm_copy_on_write_area(cache) < B_OK) 2764 panic("vm_copy_on_write_area() failed!\n"); 2765 } 2766 } 2767 2768 // we return the ID of the newly created area 2769 return target->id; 2770 } 2771 2772 2773 //! You need to hold the cache lock when calling this function 2774 static int32 2775 count_writable_areas(vm_cache* cache, vm_area* ignoreArea) 2776 { 2777 struct vm_area* area = cache->areas; 2778 uint32 count = 0; 2779 2780 for (; area != NULL; area = area->cache_next) { 2781 if (area != ignoreArea 2782 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 2783 count++; 2784 } 2785 2786 return count; 2787 } 2788 2789 2790 static status_t 2791 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2792 bool kernel) 2793 { 2794 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2795 "%#lx)\n", team, areaID, newProtection)); 2796 2797 if (!arch_vm_supports_protection(newProtection)) 2798 return B_NOT_SUPPORTED; 2799 2800 // lock address spaces and cache 2801 MultiAddressSpaceLocker locker; 2802 vm_cache* cache; 2803 vm_area* area; 2804 status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area, 2805 &cache); 2806 AreaCacheLocker cacheLocker(cache); // already locked 2807 2808 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2809 return B_NOT_ALLOWED; 2810 2811 if (area->protection == newProtection) 2812 return B_OK; 2813 2814 if (team != vm_kernel_address_space_id() 2815 && area->address_space->id != team) { 2816 // unless you're the kernel, you are only allowed to set 2817 // the protection of your own areas 2818 return B_NOT_ALLOWED; 2819 } 2820 2821 bool changePageProtection = true; 2822 2823 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2824 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 2825 // writable -> !writable 2826 2827 if (cache->source != NULL && cache->temporary) { 2828 if (count_writable_areas(cache, area) == 0) { 2829 // Since this cache now lives from the pages in its source cache, 2830 // we can change the cache's commitment to take only those pages 2831 // into account that really are in this cache. 2832 2833 status = cache->Commit(cache->page_count * B_PAGE_SIZE); 2834 2835 // TODO: we may be able to join with our source cache, if 2836 // count == 0 2837 } 2838 } 2839 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 2840 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 2841 // !writable -> writable 2842 2843 if (!list_is_empty(&cache->consumers)) { 2844 // There are consumers -- we have to insert a new cache. Fortunately 2845 // vm_copy_on_write_area() does everything that's needed. 2846 changePageProtection = false; 2847 status = vm_copy_on_write_area(cache); 2848 } else { 2849 // No consumers, so we don't need to insert a new one. 2850 if (cache->source != NULL && cache->temporary) { 2851 // the cache's commitment must contain all possible pages 2852 status = cache->Commit(cache->virtual_end 2853 - cache->virtual_base); 2854 } 2855 2856 if (status == B_OK && cache->source != NULL) { 2857 // There's a source cache, hence we can't just change all pages' 2858 // protection or we might allow writing into pages belonging to 2859 // a lower cache. 2860 changePageProtection = false; 2861 2862 struct vm_translation_map* map 2863 = &area->address_space->translation_map; 2864 map->ops->lock(map); 2865 2866 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2867 vm_page* page = it.Next();) { 2868 addr_t address = area->base 2869 + (page->cache_offset << PAGE_SHIFT); 2870 map->ops->protect(map, address, address - 1 + B_PAGE_SIZE, 2871 newProtection); 2872 } 2873 2874 map->ops->unlock(map); 2875 } 2876 } 2877 } else { 2878 // we don't have anything special to do in all other cases 2879 } 2880 2881 if (status == B_OK) { 2882 // remap existing pages in this cache 2883 struct vm_translation_map* map = &area->address_space->translation_map; 2884 2885 if (changePageProtection) { 2886 map->ops->lock(map); 2887 map->ops->protect(map, area->base, area->base - 1 + area->size, 2888 newProtection); 2889 map->ops->unlock(map); 2890 } 2891 2892 area->protection = newProtection; 2893 } 2894 2895 return status; 2896 } 2897 2898 2899 status_t 2900 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr) 2901 { 2902 vm_address_space* addressSpace = vm_get_address_space(team); 2903 if (addressSpace == NULL) 2904 return B_BAD_TEAM_ID; 2905 2906 uint32 dummyFlags; 2907 status_t status = addressSpace->translation_map.ops->query( 2908 &addressSpace->translation_map, vaddr, paddr, &dummyFlags); 2909 2910 vm_put_address_space(addressSpace); 2911 return status; 2912 } 2913 2914 2915 static inline addr_t 2916 virtual_page_address(vm_area* area, vm_page* page) 2917 { 2918 return area->base 2919 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 2920 } 2921 2922 2923 bool 2924 vm_test_map_modification(vm_page* page) 2925 { 2926 MutexLocker locker(sMappingLock); 2927 2928 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2929 vm_page_mapping* mapping; 2930 while ((mapping = iterator.Next()) != NULL) { 2931 vm_area* area = mapping->area; 2932 vm_translation_map* map = &area->address_space->translation_map; 2933 2934 addr_t physicalAddress; 2935 uint32 flags; 2936 map->ops->lock(map); 2937 map->ops->query(map, virtual_page_address(area, page), 2938 &physicalAddress, &flags); 2939 map->ops->unlock(map); 2940 2941 if ((flags & PAGE_MODIFIED) != 0) 2942 return true; 2943 } 2944 2945 return false; 2946 } 2947 2948 2949 int32 2950 vm_test_map_activation(vm_page* page, bool* _modified) 2951 { 2952 int32 activation = 0; 2953 bool modified = false; 2954 2955 MutexLocker locker(sMappingLock); 2956 2957 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2958 vm_page_mapping* mapping; 2959 while ((mapping = iterator.Next()) != NULL) { 2960 vm_area* area = mapping->area; 2961 vm_translation_map* map = &area->address_space->translation_map; 2962 2963 addr_t physicalAddress; 2964 uint32 flags; 2965 map->ops->lock(map); 2966 map->ops->query(map, virtual_page_address(area, page), 2967 &physicalAddress, &flags); 2968 map->ops->unlock(map); 2969 2970 if ((flags & PAGE_ACCESSED) != 0) 2971 activation++; 2972 if ((flags & PAGE_MODIFIED) != 0) 2973 modified = true; 2974 } 2975 2976 if (_modified != NULL) 2977 *_modified = modified; 2978 2979 return activation; 2980 } 2981 2982 2983 void 2984 vm_clear_map_flags(vm_page* page, uint32 flags) 2985 { 2986 MutexLocker locker(sMappingLock); 2987 2988 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2989 vm_page_mapping* mapping; 2990 while ((mapping = iterator.Next()) != NULL) { 2991 vm_area* area = mapping->area; 2992 vm_translation_map* map = &area->address_space->translation_map; 2993 2994 map->ops->lock(map); 2995 map->ops->clear_flags(map, virtual_page_address(area, page), flags); 2996 map->ops->unlock(map); 2997 } 2998 } 2999 3000 3001 /*! Removes all mappings from a page. 3002 After you've called this function, the page is unmapped from memory. 3003 The accumulated page flags of all mappings can be found in \a _flags. 3004 */ 3005 void 3006 vm_remove_all_page_mappings(vm_page* page, uint32* _flags) 3007 { 3008 uint32 accumulatedFlags = 0; 3009 MutexLocker locker(sMappingLock); 3010 3011 vm_page_mappings queue; 3012 queue.MoveFrom(&page->mappings); 3013 3014 vm_page_mappings::Iterator iterator = queue.GetIterator(); 3015 vm_page_mapping* mapping; 3016 while ((mapping = iterator.Next()) != NULL) { 3017 vm_area* area = mapping->area; 3018 vm_translation_map* map = &area->address_space->translation_map; 3019 addr_t physicalAddress; 3020 uint32 flags; 3021 3022 map->ops->lock(map); 3023 addr_t address = virtual_page_address(area, page); 3024 map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1)); 3025 map->ops->flush(map); 3026 map->ops->query(map, address, &physicalAddress, &flags); 3027 map->ops->unlock(map); 3028 3029 area->mappings.Remove(mapping); 3030 3031 accumulatedFlags |= flags; 3032 } 3033 3034 if (page->wired_count == 0 && !queue.IsEmpty()) 3035 atomic_add(&gMappedPagesCount, -1); 3036 3037 locker.Unlock(); 3038 3039 // free now unused mappings 3040 3041 while ((mapping = queue.RemoveHead()) != NULL) { 3042 free(mapping); 3043 } 3044 3045 if (_flags != NULL) 3046 *_flags = accumulatedFlags; 3047 } 3048 3049 3050 bool 3051 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified) 3052 { 3053 vm_translation_map* map = &area->address_space->translation_map; 3054 3055 map->ops->lock(map); 3056 3057 addr_t physicalAddress; 3058 uint32 flags; 3059 status_t status = map->ops->query(map, virtualAddress, &physicalAddress, 3060 &flags); 3061 if (status < B_OK || (flags & PAGE_PRESENT) == 0) { 3062 map->ops->unlock(map); 3063 return false; 3064 } 3065 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3066 if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) { 3067 panic("area %p looking up page failed for pa 0x%lx\n", area, 3068 physicalAddress); 3069 } 3070 3071 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) 3072 decrement_page_wired_count(page); 3073 3074 map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1); 3075 3076 if (preserveModified) { 3077 map->ops->flush(map); 3078 3079 status = map->ops->query(map, virtualAddress, &physicalAddress, &flags); 3080 if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED) 3081 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3082 } 3083 3084 map->ops->unlock(map); 3085 3086 if (area->wiring == B_NO_LOCK) { 3087 vm_page_mapping* mapping; 3088 3089 mutex_lock(&sMappingLock); 3090 map->ops->lock(map); 3091 3092 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3093 while (iterator.HasNext()) { 3094 mapping = iterator.Next(); 3095 3096 if (mapping->area == area) { 3097 area->mappings.Remove(mapping); 3098 page->mappings.Remove(mapping); 3099 3100 if (page->mappings.IsEmpty() && page->wired_count == 0) 3101 atomic_add(&gMappedPagesCount, -1); 3102 3103 map->ops->unlock(map); 3104 mutex_unlock(&sMappingLock); 3105 3106 free(mapping); 3107 3108 return true; 3109 } 3110 } 3111 3112 map->ops->unlock(map); 3113 mutex_unlock(&sMappingLock); 3114 3115 dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n", 3116 area, page); 3117 } 3118 3119 return true; 3120 } 3121 3122 3123 status_t 3124 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified) 3125 { 3126 vm_translation_map* map = &area->address_space->translation_map; 3127 addr_t end = base + (size - 1); 3128 3129 map->ops->lock(map); 3130 3131 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 3132 // iterate through all pages and decrease their wired count 3133 for (addr_t virtualAddress = base; virtualAddress < end; 3134 virtualAddress += B_PAGE_SIZE) { 3135 addr_t physicalAddress; 3136 uint32 flags; 3137 status_t status = map->ops->query(map, virtualAddress, 3138 &physicalAddress, &flags); 3139 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3140 continue; 3141 3142 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3143 if (page == NULL) { 3144 panic("area %p looking up page failed for pa 0x%lx\n", area, 3145 physicalAddress); 3146 } 3147 3148 decrement_page_wired_count(page); 3149 } 3150 } 3151 3152 map->ops->unmap(map, base, end); 3153 if (preserveModified) { 3154 map->ops->flush(map); 3155 3156 for (addr_t virtualAddress = base; virtualAddress < end; 3157 virtualAddress += B_PAGE_SIZE) { 3158 addr_t physicalAddress; 3159 uint32 flags; 3160 status_t status = map->ops->query(map, virtualAddress, 3161 &physicalAddress, &flags); 3162 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3163 continue; 3164 3165 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3166 if (page == NULL) { 3167 panic("area %p looking up page failed for pa 0x%lx\n", area, 3168 physicalAddress); 3169 } 3170 3171 if ((flags & PAGE_MODIFIED) != 0 3172 && page->state != PAGE_STATE_MODIFIED) 3173 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3174 } 3175 } 3176 map->ops->unlock(map); 3177 3178 if (area->wiring == B_NO_LOCK) { 3179 uint32 startOffset = (area->cache_offset + base - area->base) 3180 >> PAGE_SHIFT; 3181 uint32 endOffset = startOffset + (size >> PAGE_SHIFT); 3182 vm_page_mapping* mapping; 3183 vm_area_mappings queue; 3184 3185 mutex_lock(&sMappingLock); 3186 map->ops->lock(map); 3187 3188 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3189 while (iterator.HasNext()) { 3190 mapping = iterator.Next(); 3191 3192 vm_page* page = mapping->page; 3193 if (page->cache_offset < startOffset 3194 || page->cache_offset >= endOffset) 3195 continue; 3196 3197 page->mappings.Remove(mapping); 3198 iterator.Remove(); 3199 3200 if (page->mappings.IsEmpty() && page->wired_count == 0) 3201 atomic_add(&gMappedPagesCount, -1); 3202 3203 queue.Add(mapping); 3204 } 3205 3206 map->ops->unlock(map); 3207 mutex_unlock(&sMappingLock); 3208 3209 while ((mapping = queue.RemoveHead()) != NULL) { 3210 free(mapping); 3211 } 3212 } 3213 3214 return B_OK; 3215 } 3216 3217 3218 /*! When calling this function, you need to have pages reserved! */ 3219 status_t 3220 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection) 3221 { 3222 vm_translation_map* map = &area->address_space->translation_map; 3223 vm_page_mapping* mapping = NULL; 3224 3225 if (area->wiring == B_NO_LOCK) { 3226 mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping)); 3227 if (mapping == NULL) 3228 return B_NO_MEMORY; 3229 3230 mapping->page = page; 3231 mapping->area = area; 3232 } 3233 3234 map->ops->lock(map); 3235 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 3236 protection); 3237 map->ops->unlock(map); 3238 3239 if (area->wiring != B_NO_LOCK) { 3240 increment_page_wired_count(page); 3241 } else { 3242 // insert mapping into lists 3243 MutexLocker locker(sMappingLock); 3244 3245 if (page->mappings.IsEmpty() && page->wired_count == 0) 3246 atomic_add(&gMappedPagesCount, 1); 3247 3248 page->mappings.Add(mapping); 3249 area->mappings.Add(mapping); 3250 } 3251 3252 if (page->usage_count < 0) 3253 page->usage_count = 1; 3254 3255 if (page->state != PAGE_STATE_MODIFIED) 3256 vm_page_set_state(page, PAGE_STATE_ACTIVE); 3257 3258 return B_OK; 3259 } 3260 3261 3262 static int 3263 display_mem(int argc, char** argv) 3264 { 3265 bool physical = false; 3266 addr_t copyAddress; 3267 int32 displayWidth; 3268 int32 itemSize; 3269 int32 num = -1; 3270 addr_t address; 3271 int i = 1, j; 3272 3273 if (argc > 1 && argv[1][0] == '-') { 3274 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3275 physical = true; 3276 i++; 3277 } else 3278 i = 99; 3279 } 3280 3281 if (argc < i + 1 || argc > i + 2) { 3282 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3283 "\tdl - 8 bytes\n" 3284 "\tdw - 4 bytes\n" 3285 "\tds - 2 bytes\n" 3286 "\tdb - 1 byte\n" 3287 "\tstring - a whole string\n" 3288 " -p or --physical only allows memory from a single page to be " 3289 "displayed.\n"); 3290 return 0; 3291 } 3292 3293 address = parse_expression(argv[i]); 3294 3295 if (argc > i + 1) 3296 num = parse_expression(argv[i + 1]); 3297 3298 // build the format string 3299 if (strcmp(argv[0], "db") == 0) { 3300 itemSize = 1; 3301 displayWidth = 16; 3302 } else if (strcmp(argv[0], "ds") == 0) { 3303 itemSize = 2; 3304 displayWidth = 8; 3305 } else if (strcmp(argv[0], "dw") == 0) { 3306 itemSize = 4; 3307 displayWidth = 4; 3308 } else if (strcmp(argv[0], "dl") == 0) { 3309 itemSize = 8; 3310 displayWidth = 2; 3311 } else if (strcmp(argv[0], "string") == 0) { 3312 itemSize = 1; 3313 displayWidth = -1; 3314 } else { 3315 kprintf("display_mem called in an invalid way!\n"); 3316 return 0; 3317 } 3318 3319 if (num <= 0) 3320 num = displayWidth; 3321 3322 void* physicalPageHandle = NULL; 3323 3324 if (physical) { 3325 int32 offset = address & (B_PAGE_SIZE - 1); 3326 if (num * itemSize + offset > B_PAGE_SIZE) { 3327 num = (B_PAGE_SIZE - offset) / itemSize; 3328 kprintf("NOTE: number of bytes has been cut to page size\n"); 3329 } 3330 3331 address = ROUNDOWN(address, B_PAGE_SIZE); 3332 3333 if (vm_get_physical_page_debug(address, ©Address, 3334 &physicalPageHandle) != B_OK) { 3335 kprintf("getting the hardware page failed."); 3336 return 0; 3337 } 3338 3339 address += offset; 3340 copyAddress += offset; 3341 } else 3342 copyAddress = address; 3343 3344 if (!strcmp(argv[0], "string")) { 3345 kprintf("%p \"", (char*)copyAddress); 3346 3347 // string mode 3348 for (i = 0; true; i++) { 3349 char c; 3350 if (user_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 3351 || c == '\0') 3352 break; 3353 3354 if (c == '\n') 3355 kprintf("\\n"); 3356 else if (c == '\t') 3357 kprintf("\\t"); 3358 else { 3359 if (!isprint(c)) 3360 c = '.'; 3361 3362 kprintf("%c", c); 3363 } 3364 } 3365 3366 kprintf("\"\n"); 3367 } else { 3368 // number mode 3369 for (i = 0; i < num; i++) { 3370 uint32 value; 3371 3372 if ((i % displayWidth) == 0) { 3373 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3374 if (i != 0) 3375 kprintf("\n"); 3376 3377 kprintf("[0x%lx] ", address + i * itemSize); 3378 3379 for (j = 0; j < displayed; j++) { 3380 char c; 3381 if (user_memcpy(&c, (char*)copyAddress + i * itemSize + j, 3382 1) != B_OK) { 3383 displayed = j; 3384 break; 3385 } 3386 if (!isprint(c)) 3387 c = '.'; 3388 3389 kprintf("%c", c); 3390 } 3391 if (num > displayWidth) { 3392 // make sure the spacing in the last line is correct 3393 for (j = displayed; j < displayWidth * itemSize; j++) 3394 kprintf(" "); 3395 } 3396 kprintf(" "); 3397 } 3398 3399 if (user_memcpy(&value, (uint8*)copyAddress + i * itemSize, 3400 itemSize) != B_OK) { 3401 kprintf("read fault"); 3402 break; 3403 } 3404 3405 switch (itemSize) { 3406 case 1: 3407 kprintf(" %02x", *(uint8*)&value); 3408 break; 3409 case 2: 3410 kprintf(" %04x", *(uint16*)&value); 3411 break; 3412 case 4: 3413 kprintf(" %08lx", *(uint32*)&value); 3414 break; 3415 case 8: 3416 kprintf(" %016Lx", *(uint64*)&value); 3417 break; 3418 } 3419 } 3420 3421 kprintf("\n"); 3422 } 3423 3424 if (physical) { 3425 copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE); 3426 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3427 } 3428 return 0; 3429 } 3430 3431 3432 static void 3433 dump_cache_tree_recursively(vm_cache* cache, int level, 3434 vm_cache* highlightCache) 3435 { 3436 // print this cache 3437 for (int i = 0; i < level; i++) 3438 kprintf(" "); 3439 if (cache == highlightCache) 3440 kprintf("%p <--\n", cache); 3441 else 3442 kprintf("%p\n", cache); 3443 3444 // recursively print its consumers 3445 vm_cache* consumer = NULL; 3446 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3447 consumer)) != NULL) { 3448 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3449 } 3450 } 3451 3452 3453 static int 3454 dump_cache_tree(int argc, char** argv) 3455 { 3456 if (argc != 2 || !strcmp(argv[1], "--help")) { 3457 kprintf("usage: %s <address>\n", argv[0]); 3458 return 0; 3459 } 3460 3461 addr_t address = parse_expression(argv[1]); 3462 if (address == 0) 3463 return 0; 3464 3465 vm_cache* cache = (vm_cache*)address; 3466 vm_cache* root = cache; 3467 3468 // find the root cache (the transitive source) 3469 while (root->source != NULL) 3470 root = root->source; 3471 3472 dump_cache_tree_recursively(root, 0, cache); 3473 3474 return 0; 3475 } 3476 3477 3478 static const char* 3479 cache_type_to_string(int32 type) 3480 { 3481 switch (type) { 3482 case CACHE_TYPE_RAM: 3483 return "RAM"; 3484 case CACHE_TYPE_DEVICE: 3485 return "device"; 3486 case CACHE_TYPE_VNODE: 3487 return "vnode"; 3488 case CACHE_TYPE_NULL: 3489 return "null"; 3490 3491 default: 3492 return "unknown"; 3493 } 3494 } 3495 3496 3497 #if DEBUG_CACHE_LIST 3498 3499 static void 3500 update_cache_info_recursively(vm_cache* cache, cache_info& info) 3501 { 3502 info.page_count += cache->page_count; 3503 if (cache->type == CACHE_TYPE_RAM) 3504 info.committed += cache->committed_size; 3505 3506 // recurse 3507 vm_cache* consumer = NULL; 3508 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3509 consumer)) != NULL) { 3510 update_cache_info_recursively(consumer, info); 3511 } 3512 } 3513 3514 3515 static int 3516 cache_info_compare_page_count(const void* _a, const void* _b) 3517 { 3518 const cache_info* a = (const cache_info*)_a; 3519 const cache_info* b = (const cache_info*)_b; 3520 if (a->page_count == b->page_count) 3521 return 0; 3522 return a->page_count < b->page_count ? 1 : -1; 3523 } 3524 3525 3526 static int 3527 cache_info_compare_committed(const void* _a, const void* _b) 3528 { 3529 const cache_info* a = (const cache_info*)_a; 3530 const cache_info* b = (const cache_info*)_b; 3531 if (a->committed == b->committed) 3532 return 0; 3533 return a->committed < b->committed ? 1 : -1; 3534 } 3535 3536 3537 static void 3538 dump_caches_recursively(vm_cache* cache, cache_info& info, int level) 3539 { 3540 for (int i = 0; i < level; i++) 3541 kprintf(" "); 3542 3543 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3544 cache_type_to_string(cache->type), cache->virtual_base, 3545 cache->virtual_end, cache->page_count); 3546 3547 if (level == 0) 3548 kprintf("/%lu", info.page_count); 3549 3550 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3551 kprintf(", committed: %lld", cache->committed_size); 3552 3553 if (level == 0) 3554 kprintf("/%lu", info.committed); 3555 } 3556 3557 // areas 3558 if (cache->areas != NULL) { 3559 vm_area* area = cache->areas; 3560 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3561 area->address_space->id); 3562 3563 while (area->cache_next != NULL) { 3564 area = area->cache_next; 3565 kprintf(", %ld", area->id); 3566 } 3567 } 3568 3569 kputs("\n"); 3570 3571 // recurse 3572 vm_cache* consumer = NULL; 3573 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3574 consumer)) != NULL) { 3575 dump_caches_recursively(consumer, info, level + 1); 3576 } 3577 } 3578 3579 3580 static int 3581 dump_caches(int argc, char** argv) 3582 { 3583 if (sCacheInfoTable == NULL) { 3584 kprintf("No cache info table!\n"); 3585 return 0; 3586 } 3587 3588 bool sortByPageCount = true; 3589 3590 for (int32 i = 1; i < argc; i++) { 3591 if (strcmp(argv[i], "-c") == 0) { 3592 sortByPageCount = false; 3593 } else { 3594 print_debugger_command_usage(argv[0]); 3595 return 0; 3596 } 3597 } 3598 3599 uint32 totalCount = 0; 3600 uint32 rootCount = 0; 3601 off_t totalCommitted = 0; 3602 page_num_t totalPages = 0; 3603 3604 vm_cache* cache = gDebugCacheList; 3605 while (cache) { 3606 totalCount++; 3607 if (cache->source == NULL) { 3608 cache_info stackInfo; 3609 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3610 ? sCacheInfoTable[rootCount] : stackInfo; 3611 rootCount++; 3612 info.cache = cache; 3613 info.page_count = 0; 3614 info.committed = 0; 3615 update_cache_info_recursively(cache, info); 3616 totalCommitted += info.committed; 3617 totalPages += info.page_count; 3618 } 3619 3620 cache = cache->debug_next; 3621 } 3622 3623 if (rootCount <= (uint32)kCacheInfoTableCount) { 3624 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3625 sortByPageCount 3626 ? &cache_info_compare_page_count 3627 : &cache_info_compare_committed); 3628 } 3629 3630 kprintf("total committed memory: %lld, total used pages: %lu\n", 3631 totalCommitted, totalPages); 3632 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3633 "tree...\n\n", totalCount, rootCount, 3634 sortByPageCount ? "page count" : "committed size"); 3635 3636 if (rootCount <= (uint32)kCacheInfoTableCount) { 3637 for (uint32 i = 0; i < rootCount; i++) { 3638 cache_info& info = sCacheInfoTable[i]; 3639 dump_caches_recursively(info.cache, info, 0); 3640 } 3641 } else 3642 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3643 3644 return 0; 3645 } 3646 3647 #endif // DEBUG_CACHE_LIST 3648 3649 3650 static int 3651 dump_cache(int argc, char** argv) 3652 { 3653 vm_cache* cache; 3654 bool showPages = false; 3655 int i = 1; 3656 3657 if (argc < 2 || !strcmp(argv[1], "--help")) { 3658 kprintf("usage: %s [-ps] <address>\n" 3659 " if -p is specified, all pages are shown, if -s is used\n" 3660 " only the cache info is shown respectively.\n", argv[0]); 3661 return 0; 3662 } 3663 while (argv[i][0] == '-') { 3664 char* arg = argv[i] + 1; 3665 while (arg[0]) { 3666 if (arg[0] == 'p') 3667 showPages = true; 3668 arg++; 3669 } 3670 i++; 3671 } 3672 if (argv[i] == NULL) { 3673 kprintf("%s: invalid argument, pass address\n", argv[0]); 3674 return 0; 3675 } 3676 3677 addr_t address = parse_expression(argv[i]); 3678 if (address == 0) 3679 return 0; 3680 3681 cache = (vm_cache*)address; 3682 3683 kprintf("CACHE %p:\n", cache); 3684 kprintf(" ref_count: %ld\n", cache->RefCount()); 3685 kprintf(" source: %p\n", cache->source); 3686 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 3687 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 3688 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 3689 kprintf(" temporary: %ld\n", cache->temporary); 3690 kprintf(" scan_skip: %ld\n", cache->scan_skip); 3691 kprintf(" lock: %p\n", cache->GetLock()); 3692 #if KDEBUG 3693 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 3694 #endif 3695 kprintf(" areas:\n"); 3696 3697 for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) { 3698 kprintf(" area 0x%lx, %s\n", area->id, area->name); 3699 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 3700 kprintf("\tprotection: 0x%lx\n", area->protection); 3701 kprintf("\towner: 0x%lx\n", area->address_space->id); 3702 } 3703 3704 kprintf(" consumers:\n"); 3705 vm_cache* consumer = NULL; 3706 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3707 consumer)) != NULL) { 3708 kprintf("\t%p\n", consumer); 3709 } 3710 3711 kprintf(" pages:\n"); 3712 if (showPages) { 3713 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3714 vm_page* page = it.Next();) { 3715 if (page->type == PAGE_TYPE_PHYSICAL) { 3716 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) " 3717 "wired_count %u\n", page, page->physical_page_number, 3718 page->cache_offset, page->type, page->state, 3719 page_state_to_string(page->state), page->wired_count); 3720 } else if(page->type == PAGE_TYPE_DUMMY) { 3721 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 3722 page, page->state, page_state_to_string(page->state)); 3723 } else 3724 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 3725 } 3726 } else 3727 kprintf("\t%ld in cache\n", cache->page_count); 3728 3729 return 0; 3730 } 3731 3732 3733 static void 3734 dump_area_struct(vm_area* area, bool mappings) 3735 { 3736 kprintf("AREA: %p\n", area); 3737 kprintf("name:\t\t'%s'\n", area->name); 3738 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 3739 kprintf("id:\t\t0x%lx\n", area->id); 3740 kprintf("base:\t\t0x%lx\n", area->base); 3741 kprintf("size:\t\t0x%lx\n", area->size); 3742 kprintf("protection:\t0x%lx\n", area->protection); 3743 kprintf("wiring:\t\t0x%x\n", area->wiring); 3744 kprintf("memory_type:\t0x%x\n", area->memory_type); 3745 kprintf("cache:\t\t%p\n", area->cache); 3746 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 3747 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3748 kprintf("cache_next:\t%p\n", area->cache_next); 3749 kprintf("cache_prev:\t%p\n", area->cache_prev); 3750 3751 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3752 if (mappings) { 3753 kprintf("page mappings:\n"); 3754 while (iterator.HasNext()) { 3755 vm_page_mapping* mapping = iterator.Next(); 3756 kprintf(" %p", mapping->page); 3757 } 3758 kprintf("\n"); 3759 } else { 3760 uint32 count = 0; 3761 while (iterator.Next() != NULL) { 3762 count++; 3763 } 3764 kprintf("page mappings:\t%lu\n", count); 3765 } 3766 } 3767 3768 3769 static int 3770 dump_area(int argc, char** argv) 3771 { 3772 bool mappings = false; 3773 bool found = false; 3774 int32 index = 1; 3775 vm_area* area; 3776 addr_t num; 3777 3778 if (argc < 2 || !strcmp(argv[1], "--help")) { 3779 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3780 "All areas matching either id/address/name are listed. You can\n" 3781 "force to check only a specific item by prefixing the specifier\n" 3782 "with the id/contains/address/name keywords.\n" 3783 "-m shows the area's mappings as well.\n"); 3784 return 0; 3785 } 3786 3787 if (!strcmp(argv[1], "-m")) { 3788 mappings = true; 3789 index++; 3790 } 3791 3792 int32 mode = 0xf; 3793 if (!strcmp(argv[index], "id")) 3794 mode = 1; 3795 else if (!strcmp(argv[index], "contains")) 3796 mode = 2; 3797 else if (!strcmp(argv[index], "name")) 3798 mode = 4; 3799 else if (!strcmp(argv[index], "address")) 3800 mode = 0; 3801 if (mode != 0xf) 3802 index++; 3803 3804 if (index >= argc) { 3805 kprintf("No area specifier given.\n"); 3806 return 0; 3807 } 3808 3809 num = parse_expression(argv[index]); 3810 3811 if (mode == 0) { 3812 dump_area_struct((struct vm_area*)num, mappings); 3813 } else { 3814 // walk through the area list, looking for the arguments as a name 3815 struct hash_iterator iter; 3816 3817 hash_open(sAreaHash, &iter); 3818 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 3819 if (((mode & 4) != 0 && area->name != NULL 3820 && !strcmp(argv[index], area->name)) 3821 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3822 || (((mode & 2) != 0 && area->base <= num 3823 && area->base + area->size > num))))) { 3824 dump_area_struct(area, mappings); 3825 found = true; 3826 } 3827 } 3828 3829 if (!found) 3830 kprintf("could not find area %s (%ld)\n", argv[index], num); 3831 } 3832 3833 return 0; 3834 } 3835 3836 3837 static int 3838 dump_area_list(int argc, char** argv) 3839 { 3840 vm_area* area; 3841 struct hash_iterator iter; 3842 const char* name = NULL; 3843 int32 id = 0; 3844 3845 if (argc > 1) { 3846 id = parse_expression(argv[1]); 3847 if (id == 0) 3848 name = argv[1]; 3849 } 3850 3851 kprintf("addr id base\t\tsize protect lock name\n"); 3852 3853 hash_open(sAreaHash, &iter); 3854 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 3855 if ((id != 0 && area->address_space->id != id) 3856 || (name != NULL && strstr(area->name, name) == NULL)) 3857 continue; 3858 3859 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 3860 (void*)area->base, (void*)area->size, area->protection, area->wiring, 3861 area->name); 3862 } 3863 hash_close(sAreaHash, &iter, false); 3864 return 0; 3865 } 3866 3867 3868 static int 3869 dump_available_memory(int argc, char** argv) 3870 { 3871 kprintf("Available memory: %Ld/%lu bytes\n", 3872 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 3873 return 0; 3874 } 3875 3876 3877 status_t 3878 vm_delete_areas(struct vm_address_space* addressSpace) 3879 { 3880 vm_area* area; 3881 vm_area* next; 3882 vm_area* last = NULL; 3883 3884 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 3885 addressSpace->id)); 3886 3887 rw_lock_write_lock(&addressSpace->lock); 3888 3889 // remove all reserved areas in this address space 3890 3891 for (area = addressSpace->areas; area; area = next) { 3892 next = area->address_space_next; 3893 3894 if (area->id == RESERVED_AREA_ID) { 3895 // just remove it 3896 if (last) 3897 last->address_space_next = area->address_space_next; 3898 else 3899 addressSpace->areas = area->address_space_next; 3900 3901 vm_put_address_space(addressSpace); 3902 free(area); 3903 continue; 3904 } 3905 3906 last = area; 3907 } 3908 3909 // delete all the areas in this address space 3910 3911 for (area = addressSpace->areas; area; area = next) { 3912 next = area->address_space_next; 3913 delete_area(addressSpace, area); 3914 } 3915 3916 rw_lock_write_unlock(&addressSpace->lock); 3917 return B_OK; 3918 } 3919 3920 3921 static area_id 3922 vm_area_for(team_id team, addr_t address) 3923 { 3924 AddressSpaceReadLocker locker(team); 3925 if (!locker.IsLocked()) 3926 return B_BAD_TEAM_ID; 3927 3928 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 3929 if (area != NULL) 3930 return area->id; 3931 3932 return B_ERROR; 3933 } 3934 3935 3936 /*! Frees physical pages that were used during the boot process. 3937 */ 3938 static void 3939 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end) 3940 { 3941 // free all physical pages in the specified range 3942 3943 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3944 addr_t physicalAddress; 3945 uint32 flags; 3946 3947 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 3948 vm_page* page = vm_lookup_page(current / B_PAGE_SIZE); 3949 if (page != NULL) 3950 vm_page_set_state(page, PAGE_STATE_FREE); 3951 } 3952 } 3953 3954 // unmap the memory 3955 map->ops->unmap(map, start, end - 1); 3956 } 3957 3958 3959 void 3960 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3961 { 3962 vm_translation_map* map = &vm_kernel_address_space()->translation_map; 3963 addr_t end = start + size; 3964 addr_t lastEnd = start; 3965 vm_area* area; 3966 3967 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3968 (void*)start, (void*)end)); 3969 3970 // The areas are sorted in virtual address space order, so 3971 // we just have to find the holes between them that fall 3972 // into the area we should dispose 3973 3974 map->ops->lock(map); 3975 3976 for (area = vm_kernel_address_space()->areas; area != NULL; 3977 area = area->address_space_next) { 3978 addr_t areaStart = area->base; 3979 addr_t areaEnd = areaStart + area->size; 3980 3981 if (area->id == RESERVED_AREA_ID) 3982 continue; 3983 3984 if (areaEnd >= end) { 3985 // we are done, the areas are already beyond of what we have to free 3986 lastEnd = end; 3987 break; 3988 } 3989 3990 if (areaStart > lastEnd) { 3991 // this is something we can free 3992 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3993 (void*)areaStart)); 3994 unmap_and_free_physical_pages(map, lastEnd, areaStart); 3995 } 3996 3997 lastEnd = areaEnd; 3998 } 3999 4000 if (lastEnd < end) { 4001 // we can also get rid of some space at the end of the area 4002 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4003 (void*)end)); 4004 unmap_and_free_physical_pages(map, lastEnd, end); 4005 } 4006 4007 map->ops->unlock(map); 4008 } 4009 4010 4011 static void 4012 create_preloaded_image_areas(struct preloaded_image* image) 4013 { 4014 char name[B_OS_NAME_LENGTH]; 4015 void* address; 4016 int32 length; 4017 4018 // use file name to create a good area name 4019 char* fileName = strrchr(image->name, '/'); 4020 if (fileName == NULL) 4021 fileName = image->name; 4022 else 4023 fileName++; 4024 4025 length = strlen(fileName); 4026 // make sure there is enough space for the suffix 4027 if (length > 25) 4028 length = 25; 4029 4030 memcpy(name, fileName, length); 4031 strcpy(name + length, "_text"); 4032 address = (void*)ROUNDOWN(image->text_region.start, B_PAGE_SIZE); 4033 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4034 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4035 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4036 // this will later be remapped read-only/executable by the 4037 // ELF initialization code 4038 4039 strcpy(name + length, "_data"); 4040 address = (void*)ROUNDOWN(image->data_region.start, B_PAGE_SIZE); 4041 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4042 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4043 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4044 } 4045 4046 4047 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4048 Any boot loader resources contained in that arguments must not be accessed 4049 anymore past this point. 4050 */ 4051 void 4052 vm_free_kernel_args(kernel_args* args) 4053 { 4054 uint32 i; 4055 4056 TRACE(("vm_free_kernel_args()\n")); 4057 4058 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4059 area_id area = area_for((void*)args->kernel_args_range[i].start); 4060 if (area >= B_OK) 4061 delete_area(area); 4062 } 4063 } 4064 4065 4066 static void 4067 allocate_kernel_args(kernel_args* args) 4068 { 4069 TRACE(("allocate_kernel_args()\n")); 4070 4071 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4072 void* address = (void*)args->kernel_args_range[i].start; 4073 4074 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4075 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4076 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4077 } 4078 } 4079 4080 4081 static void 4082 unreserve_boot_loader_ranges(kernel_args* args) 4083 { 4084 TRACE(("unreserve_boot_loader_ranges()\n")); 4085 4086 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4087 vm_unreserve_address_range(vm_kernel_address_space_id(), 4088 (void*)args->virtual_allocated_range[i].start, 4089 args->virtual_allocated_range[i].size); 4090 } 4091 } 4092 4093 4094 static void 4095 reserve_boot_loader_ranges(kernel_args* args) 4096 { 4097 TRACE(("reserve_boot_loader_ranges()\n")); 4098 4099 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4100 void* address = (void*)args->virtual_allocated_range[i].start; 4101 4102 // If the address is no kernel address, we just skip it. The 4103 // architecture specific code has to deal with it. 4104 if (!IS_KERNEL_ADDRESS(address)) { 4105 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 4106 address, args->virtual_allocated_range[i].size); 4107 continue; 4108 } 4109 4110 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), 4111 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4112 if (status < B_OK) 4113 panic("could not reserve boot loader ranges\n"); 4114 } 4115 } 4116 4117 4118 static addr_t 4119 allocate_early_virtual(kernel_args* args, size_t size) 4120 { 4121 addr_t spot = 0; 4122 uint32 i; 4123 int last_valloc_entry = 0; 4124 4125 size = PAGE_ALIGN(size); 4126 // find a slot in the virtual allocation addr range 4127 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 4128 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4129 + args->virtual_allocated_range[i - 1].size; 4130 last_valloc_entry = i; 4131 // check to see if the space between this one and the last is big enough 4132 if (previousRangeEnd >= KERNEL_BASE 4133 && args->virtual_allocated_range[i].start 4134 - previousRangeEnd >= size) { 4135 spot = previousRangeEnd; 4136 args->virtual_allocated_range[i - 1].size += size; 4137 goto out; 4138 } 4139 } 4140 if (spot == 0) { 4141 // we hadn't found one between allocation ranges. this is ok. 4142 // see if there's a gap after the last one 4143 addr_t lastRangeEnd 4144 = args->virtual_allocated_range[last_valloc_entry].start 4145 + args->virtual_allocated_range[last_valloc_entry].size; 4146 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 4147 spot = lastRangeEnd; 4148 args->virtual_allocated_range[last_valloc_entry].size += size; 4149 goto out; 4150 } 4151 // see if there's a gap before the first one 4152 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 4153 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 4154 args->virtual_allocated_range[0].start -= size; 4155 spot = args->virtual_allocated_range[0].start; 4156 goto out; 4157 } 4158 } 4159 } 4160 4161 out: 4162 return spot; 4163 } 4164 4165 4166 static bool 4167 is_page_in_physical_memory_range(kernel_args* args, addr_t address) 4168 { 4169 // TODO: horrible brute-force method of determining if the page can be 4170 // allocated 4171 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4172 if (address >= args->physical_memory_range[i].start 4173 && address < args->physical_memory_range[i].start 4174 + args->physical_memory_range[i].size) 4175 return true; 4176 } 4177 return false; 4178 } 4179 4180 4181 static addr_t 4182 allocate_early_physical_page(kernel_args* args) 4183 { 4184 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4185 addr_t nextPage; 4186 4187 nextPage = args->physical_allocated_range[i].start 4188 + args->physical_allocated_range[i].size; 4189 // see if the page after the next allocated paddr run can be allocated 4190 if (i + 1 < args->num_physical_allocated_ranges 4191 && args->physical_allocated_range[i + 1].size != 0) { 4192 // see if the next page will collide with the next allocated range 4193 if (nextPage >= args->physical_allocated_range[i+1].start) 4194 continue; 4195 } 4196 // see if the next physical page fits in the memory block 4197 if (is_page_in_physical_memory_range(args, nextPage)) { 4198 // we got one! 4199 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4200 return nextPage / B_PAGE_SIZE; 4201 } 4202 } 4203 4204 return 0; 4205 // could not allocate a block 4206 } 4207 4208 4209 /*! This one uses the kernel_args' physical and virtual memory ranges to 4210 allocate some pages before the VM is completely up. 4211 */ 4212 addr_t 4213 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4214 uint32 attributes) 4215 { 4216 if (physicalSize > virtualSize) 4217 physicalSize = virtualSize; 4218 4219 // find the vaddr to allocate at 4220 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 4221 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 4222 4223 // map the pages 4224 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4225 addr_t physicalAddress = allocate_early_physical_page(args); 4226 if (physicalAddress == 0) 4227 panic("error allocating early page!\n"); 4228 4229 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4230 4231 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4232 physicalAddress * B_PAGE_SIZE, attributes, 4233 &allocate_early_physical_page); 4234 } 4235 4236 return virtualBase; 4237 } 4238 4239 4240 /*! The main entrance point to initialize the VM. */ 4241 status_t 4242 vm_init(kernel_args* args) 4243 { 4244 struct preloaded_image* image; 4245 void* address; 4246 status_t err = 0; 4247 uint32 i; 4248 4249 TRACE(("vm_init: entry\n")); 4250 err = arch_vm_translation_map_init(args); 4251 err = arch_vm_init(args); 4252 4253 // initialize some globals 4254 sNextAreaID = 1; 4255 4256 vm_page_init_num_pages(args); 4257 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4258 4259 size_t heapSize = INITIAL_HEAP_SIZE; 4260 // try to accomodate low memory systems 4261 while (heapSize > sAvailableMemory / 8) 4262 heapSize /= 2; 4263 if (heapSize < 1024 * 1024) 4264 panic("vm_init: go buy some RAM please."); 4265 4266 // map in the new heap and initialize it 4267 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4268 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4269 TRACE(("heap at 0x%lx\n", heapBase)); 4270 heap_init(heapBase, heapSize); 4271 4272 size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE; 4273 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 4274 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4275 slab_init(args, slabInitialBase, slabInitialSize); 4276 4277 // initialize the free page list and physical page mapper 4278 vm_page_init(args); 4279 4280 // initialize the hash table that stores the pages mapped to caches 4281 vm_cache_init(args); 4282 4283 { 4284 vm_area* area; 4285 sAreaHash = hash_init(AREA_HASH_TABLE_SIZE, 4286 (addr_t)&area->hash_next - (addr_t)area, 4287 &area_compare, &area_hash); 4288 if (sAreaHash == NULL) 4289 panic("vm_init: error creating aspace hash table\n"); 4290 } 4291 4292 vm_address_space_init(); 4293 reserve_boot_loader_ranges(args); 4294 4295 // Do any further initialization that the architecture dependant layers may 4296 // need now 4297 arch_vm_translation_map_init_post_area(args); 4298 arch_vm_init_post_area(args); 4299 vm_page_init_post_area(args); 4300 4301 // allocate areas to represent stuff that already exists 4302 4303 address = (void*)ROUNDOWN(heapBase, B_PAGE_SIZE); 4304 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4305 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4306 4307 address = (void*)ROUNDOWN(slabInitialBase, B_PAGE_SIZE); 4308 create_area("initial slab space", &address, B_EXACT_ADDRESS, 4309 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 4310 | B_KERNEL_WRITE_AREA); 4311 4312 allocate_kernel_args(args); 4313 4314 create_preloaded_image_areas(&args->kernel_image); 4315 4316 // allocate areas for preloaded images 4317 for (image = args->preloaded_images; image != NULL; image = image->next) { 4318 create_preloaded_image_areas(image); 4319 } 4320 4321 // allocate kernel stacks 4322 for (i = 0; i < args->num_cpus; i++) { 4323 char name[64]; 4324 4325 sprintf(name, "idle thread %lu kstack", i + 1); 4326 address = (void*)args->cpu_kstack[i].start; 4327 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4328 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4329 } 4330 4331 #if DEBUG_CACHE_LIST 4332 create_area("cache info table", (void**)&sCacheInfoTable, 4333 B_ANY_KERNEL_ADDRESS, 4334 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4335 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4336 #endif // DEBUG_CACHE_LIST 4337 4338 // add some debugger commands 4339 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4340 add_debugger_command("area", &dump_area, 4341 "Dump info about a particular area"); 4342 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 4343 add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree"); 4344 #if DEBUG_CACHE_LIST 4345 add_debugger_command_etc("caches", &dump_caches, 4346 "List all vm_cache trees", 4347 "[ \"-c\" ]\n" 4348 "All cache trees are listed sorted in decreasing order by number of\n" 4349 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 4350 0); 4351 #endif 4352 add_debugger_command("avail", &dump_available_memory, 4353 "Dump available memory"); 4354 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4355 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4356 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4357 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4358 add_debugger_command("string", &display_mem, "dump strings"); 4359 4360 TRACE(("vm_init: exit\n")); 4361 4362 return err; 4363 } 4364 4365 4366 status_t 4367 vm_init_post_sem(kernel_args* args) 4368 { 4369 // This frees all unused boot loader resources and makes its space available 4370 // again 4371 arch_vm_init_end(args); 4372 unreserve_boot_loader_ranges(args); 4373 4374 // fill in all of the semaphores that were not allocated before 4375 // since we're still single threaded and only the kernel address space 4376 // exists, it isn't that hard to find all of the ones we need to create 4377 4378 arch_vm_translation_map_init_post_sem(args); 4379 vm_address_space_init_post_sem(); 4380 4381 slab_init_post_sem(); 4382 return heap_init_post_sem(); 4383 } 4384 4385 4386 status_t 4387 vm_init_post_thread(kernel_args* args) 4388 { 4389 vm_page_init_post_thread(args); 4390 vm_daemon_init(); 4391 slab_init_post_thread(); 4392 return heap_init_post_thread(); 4393 } 4394 4395 4396 status_t 4397 vm_init_post_modules(kernel_args* args) 4398 { 4399 return arch_vm_init_post_modules(args); 4400 } 4401 4402 4403 void 4404 permit_page_faults(void) 4405 { 4406 struct thread* thread = thread_get_current_thread(); 4407 if (thread != NULL) 4408 atomic_add(&thread->page_faults_allowed, 1); 4409 } 4410 4411 4412 void 4413 forbid_page_faults(void) 4414 { 4415 struct thread* thread = thread_get_current_thread(); 4416 if (thread != NULL) 4417 atomic_add(&thread->page_faults_allowed, -1); 4418 } 4419 4420 4421 status_t 4422 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 4423 addr_t* newIP) 4424 { 4425 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4426 faultAddress)); 4427 4428 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4429 4430 addr_t pageAddress = ROUNDOWN(address, B_PAGE_SIZE); 4431 vm_address_space* addressSpace = NULL; 4432 4433 status_t status = B_OK; 4434 *newIP = 0; 4435 atomic_add((int32*)&sPageFaults, 1); 4436 4437 if (IS_KERNEL_ADDRESS(pageAddress)) { 4438 addressSpace = vm_get_kernel_address_space(); 4439 } else if (IS_USER_ADDRESS(pageAddress)) { 4440 addressSpace = vm_get_current_user_address_space(); 4441 if (addressSpace == NULL) { 4442 if (!isUser) { 4443 dprintf("vm_page_fault: kernel thread accessing invalid user " 4444 "memory!\n"); 4445 status = B_BAD_ADDRESS; 4446 TPF(PageFaultError(-1, 4447 VMPageFaultTracing 4448 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4449 } else { 4450 // XXX weird state. 4451 panic("vm_page_fault: non kernel thread accessing user memory " 4452 "that doesn't exist!\n"); 4453 status = B_BAD_ADDRESS; 4454 } 4455 } 4456 } else { 4457 // the hit was probably in the 64k DMZ between kernel and user space 4458 // this keeps a user space thread from passing a buffer that crosses 4459 // into kernel space 4460 status = B_BAD_ADDRESS; 4461 TPF(PageFaultError(-1, 4462 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4463 } 4464 4465 if (status == B_OK) 4466 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser); 4467 4468 if (status < B_OK) { 4469 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4470 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4471 strerror(status), address, faultAddress, isWrite, isUser, 4472 thread_get_current_thread_id()); 4473 if (!isUser) { 4474 struct thread* thread = thread_get_current_thread(); 4475 if (thread != NULL && thread->fault_handler != 0) { 4476 // this will cause the arch dependant page fault handler to 4477 // modify the IP on the interrupt frame or whatever to return 4478 // to this address 4479 *newIP = thread->fault_handler; 4480 } else { 4481 // unhandled page fault in the kernel 4482 panic("vm_page_fault: unhandled page fault in kernel space at " 4483 "0x%lx, ip 0x%lx\n", address, faultAddress); 4484 } 4485 } else { 4486 #if 1 4487 rw_lock_read_lock(&addressSpace->lock); 4488 4489 // TODO: remove me once we have proper userland debugging support 4490 // (and tools) 4491 vm_area* area = vm_area_lookup(addressSpace, faultAddress); 4492 4493 struct thread* thread = thread_get_current_thread(); 4494 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4495 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4496 thread->name, thread->id, thread->team->name, thread->team->id, 4497 isWrite ? "write" : "read", address, faultAddress, 4498 area ? area->name : "???", 4499 faultAddress - (area ? area->base : 0x0)); 4500 4501 // We can print a stack trace of the userland thread here. 4502 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4503 // fault and someone is already waiting for a write lock on the same address 4504 // space. This thread will then try to acquire the lock again and will 4505 // be queued after the writer. 4506 #if 0 4507 if (area) { 4508 struct stack_frame { 4509 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4510 struct stack_frame* previous; 4511 void* return_address; 4512 #else 4513 // ... 4514 #warning writeme 4515 #endif 4516 } frame; 4517 #ifdef __INTEL__ 4518 struct iframe* iframe = i386_get_user_iframe(); 4519 if (iframe == NULL) 4520 panic("iframe is NULL!"); 4521 4522 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4523 sizeof(struct stack_frame)); 4524 #elif defined(__POWERPC__) 4525 struct iframe* iframe = ppc_get_user_iframe(); 4526 if (iframe == NULL) 4527 panic("iframe is NULL!"); 4528 4529 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4530 sizeof(struct stack_frame)); 4531 #else 4532 # warning "vm_page_fault() stack trace won't work" 4533 status = B_ERROR; 4534 #endif 4535 4536 dprintf("stack trace:\n"); 4537 int32 maxFrames = 50; 4538 while (status == B_OK && --maxFrames >= 0 4539 && frame.return_address != NULL) { 4540 dprintf(" %p", frame.return_address); 4541 area = vm_area_lookup(addressSpace, 4542 (addr_t)frame.return_address); 4543 if (area) { 4544 dprintf(" (%s + %#lx)", area->name, 4545 (addr_t)frame.return_address - area->base); 4546 } 4547 dprintf("\n"); 4548 4549 status = user_memcpy(&frame, frame.previous, 4550 sizeof(struct stack_frame)); 4551 } 4552 } 4553 #endif // 0 (stack trace) 4554 4555 rw_lock_read_unlock(&addressSpace->lock); 4556 #endif 4557 4558 // TODO: the fault_callback is a temporary solution for vm86 4559 if (thread->fault_callback == NULL 4560 || thread->fault_callback(address, faultAddress, isWrite)) { 4561 // If the thread has a signal handler for SIGSEGV, we simply 4562 // send it the signal. Otherwise we notify the user debugger 4563 // first. 4564 struct sigaction action; 4565 if (sigaction(SIGSEGV, NULL, &action) == 0 4566 && action.sa_handler != SIG_DFL 4567 && action.sa_handler != SIG_IGN) { 4568 send_signal(thread->id, SIGSEGV); 4569 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4570 SIGSEGV)) { 4571 send_signal(thread->id, SIGSEGV); 4572 } 4573 } 4574 } 4575 } 4576 4577 if (addressSpace != NULL) 4578 vm_put_address_space(addressSpace); 4579 4580 return B_HANDLED_INTERRUPT; 4581 } 4582 4583 4584 class VMCacheChainLocker { 4585 public: 4586 VMCacheChainLocker() 4587 : 4588 fTopCache(NULL), 4589 fBottomCache(NULL) 4590 { 4591 } 4592 4593 void SetTo(VMCache* topCache) 4594 { 4595 fTopCache = topCache; 4596 fBottomCache = topCache; 4597 } 4598 4599 VMCache* LockSourceCache() 4600 { 4601 if (fBottomCache == NULL || fBottomCache->source == NULL) 4602 return NULL; 4603 4604 fBottomCache = fBottomCache->source; 4605 fBottomCache->Lock(); 4606 fBottomCache->AcquireRefLocked(); 4607 4608 return fBottomCache; 4609 } 4610 4611 void Unlock() 4612 { 4613 if (fTopCache == NULL) 4614 return; 4615 4616 VMCache* cache = fTopCache; 4617 while (cache != NULL) { 4618 VMCache* nextCache = cache->source; 4619 cache->ReleaseRefAndUnlock(); 4620 4621 if (cache == fBottomCache) 4622 break; 4623 4624 cache = nextCache; 4625 } 4626 4627 fTopCache = NULL; 4628 fBottomCache = NULL; 4629 } 4630 4631 private: 4632 VMCache* fTopCache; 4633 VMCache* fBottomCache; 4634 }; 4635 4636 4637 struct PageFaultContext { 4638 AddressSpaceReadLocker addressSpaceLocker; 4639 VMCacheChainLocker cacheChainLocker; 4640 4641 vm_translation_map* map; 4642 vm_cache* topCache; 4643 off_t cacheOffset; 4644 bool isWrite; 4645 4646 // return values 4647 vm_page* page; 4648 bool restart; 4649 4650 4651 PageFaultContext(vm_address_space* addressSpace, bool isWrite) 4652 : 4653 addressSpaceLocker(addressSpace, true), 4654 map(&addressSpace->translation_map), 4655 isWrite(isWrite) 4656 { 4657 } 4658 4659 ~PageFaultContext() 4660 { 4661 UnlockAll(); 4662 } 4663 4664 void Prepare(VMCache* topCache, off_t cacheOffset) 4665 { 4666 this->topCache = topCache; 4667 this->cacheOffset = cacheOffset; 4668 page = NULL; 4669 restart = false; 4670 4671 cacheChainLocker.SetTo(topCache); 4672 } 4673 4674 void UnlockAll() 4675 { 4676 topCache = NULL; 4677 addressSpaceLocker.Unlock(); 4678 cacheChainLocker.Unlock(); 4679 } 4680 }; 4681 4682 4683 /*! Gets the page that should be mapped into the area. 4684 Returns an error code other than \c B_OK, if the page couldn't be found or 4685 paged in. The locking state of the address space and the caches is undefined 4686 in that case. 4687 Returns \c B_OK with \c context.restart set to \c true, if the functions 4688 had to unlock the address space and all caches and is supposed to be called 4689 again. 4690 Returns \c B_OK with \c context.restart set to \c false, if the page was 4691 found. It is returned in \c context.page. The address space will still be 4692 locked as well as all caches starting from the top cache to at least the 4693 cache the page lives in. 4694 */ 4695 static inline status_t 4696 fault_get_page(PageFaultContext& context) 4697 { 4698 vm_cache* cache = context.topCache; 4699 vm_cache* lastCache = NULL; 4700 vm_page* page = NULL; 4701 4702 while (cache != NULL) { 4703 // We already hold the lock of the cache at this point. 4704 4705 lastCache = cache; 4706 4707 for (;;) { 4708 page = cache->LookupPage(context.cacheOffset); 4709 if (page == NULL || page->state != PAGE_STATE_BUSY) { 4710 // Either there is no page or there is one and it is not busy. 4711 break; 4712 } 4713 4714 // page must be busy -- wait for it to become unbusy 4715 ConditionVariableEntry entry; 4716 entry.Add(page); 4717 context.UnlockAll(); 4718 entry.Wait(); 4719 4720 // restart the whole process 4721 context.restart = true; 4722 return B_OK; 4723 } 4724 4725 if (page != NULL) 4726 break; 4727 4728 // The current cache does not contain the page we're looking for. 4729 4730 // see if the backing store has it 4731 if (cache->HasPage(context.cacheOffset)) { 4732 // insert a fresh page and mark it busy -- we're going to read it in 4733 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4734 cache->InsertPage(page, context.cacheOffset); 4735 4736 ConditionVariable busyCondition; 4737 busyCondition.Publish(page, "page"); 4738 4739 // We need to unlock all caches and the address space while reading 4740 // the page in. Keep a reference to the cache around. 4741 cache->AcquireRefLocked(); 4742 context.UnlockAll(); 4743 4744 // read the page in 4745 iovec vec; 4746 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 4747 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 4748 4749 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4750 B_PHYSICAL_IO_REQUEST, &bytesRead); 4751 4752 cache->Lock(); 4753 4754 if (status < B_OK) { 4755 // on error remove and free the page 4756 dprintf("reading page from cache %p returned: %s!\n", 4757 cache, strerror(status)); 4758 4759 busyCondition.Unpublish(); 4760 cache->RemovePage(page); 4761 vm_page_set_state(page, PAGE_STATE_FREE); 4762 4763 cache->ReleaseRefAndUnlock(); 4764 return status; 4765 } 4766 4767 // mark the page unbusy again 4768 page->state = PAGE_STATE_ACTIVE; 4769 busyCondition.Unpublish(); 4770 4771 // Since we needed to unlock everything temporarily, the area 4772 // situation might have changed. So we need to restart the whole 4773 // process. 4774 cache->ReleaseRefAndUnlock(); 4775 context.restart = true; 4776 return B_OK; 4777 } 4778 4779 cache = context.cacheChainLocker.LockSourceCache(); 4780 } 4781 4782 if (page == NULL) { 4783 // There was no adequate page, determine the cache for a clean one. 4784 // Read-only pages come in the deepest cache, only the top most cache 4785 // may have direct write access. 4786 cache = context.isWrite ? context.topCache : lastCache; 4787 4788 // allocate a clean page 4789 page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 4790 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 4791 page->physical_page_number)); 4792 4793 // insert the new page into our cache 4794 cache->InsertPage(page, context.cacheOffset); 4795 4796 } else if (page->cache != context.topCache && context.isWrite) { 4797 // We have a page that has the data we want, but in the wrong cache 4798 // object so we need to copy it and stick it into the top cache. 4799 vm_page* sourcePage = page; 4800 4801 // TODO: If memory is low, it might be a good idea to steal the page 4802 // from our source cache -- if possible, that is. 4803 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4804 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4805 4806 // copy the page 4807 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4808 sourcePage->physical_page_number * B_PAGE_SIZE); 4809 4810 // insert the new page into our cache 4811 context.topCache->InsertPage(page, context.cacheOffset); 4812 } 4813 4814 context.page = page; 4815 return B_OK; 4816 } 4817 4818 4819 static status_t 4820 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress, 4821 bool isWrite, bool isUser) 4822 { 4823 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 4824 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 4825 4826 PageFaultContext context(addressSpace, isWrite); 4827 4828 addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE); 4829 status_t status = B_OK; 4830 4831 atomic_add(&addressSpace->fault_count, 1); 4832 4833 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4834 // the pages upfront makes sure we don't have any cache locked, so that the 4835 // page daemon/thief can do their job without problems. 4836 size_t reservePages = 2 + context.map->ops->map_max_pages_need(context.map, 4837 originalAddress, originalAddress); 4838 context.addressSpaceLocker.Unlock(); 4839 vm_page_reserve_pages(reservePages); 4840 4841 while (true) { 4842 context.addressSpaceLocker.Lock(); 4843 4844 // get the area the fault was in 4845 vm_area* area = vm_area_lookup(addressSpace, address); 4846 if (area == NULL) { 4847 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4848 "space\n", originalAddress); 4849 TPF(PageFaultError(-1, 4850 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4851 status = B_BAD_ADDRESS; 4852 break; 4853 } 4854 4855 // check permissions 4856 uint32 protection = get_area_page_protection(area, address); 4857 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4858 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 4859 (void*)originalAddress); 4860 TPF(PageFaultError(area->id, 4861 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4862 status = B_PERMISSION_DENIED; 4863 break; 4864 } 4865 if (isWrite && (protection 4866 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4867 dprintf("write access attempted on read-only area 0x%lx at %p\n", 4868 area->id, (void*)originalAddress); 4869 TPF(PageFaultError(area->id, 4870 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_ONLY)); 4871 status = B_PERMISSION_DENIED; 4872 break; 4873 } 4874 4875 // We have the area, it was a valid access, so let's try to resolve the 4876 // page fault now. 4877 // At first, the top most cache from the area is investigated. 4878 4879 context.Prepare(vm_area_get_locked_cache(area), 4880 address - area->base + area->cache_offset); 4881 4882 // See if this cache has a fault handler -- this will do all the work 4883 // for us. 4884 { 4885 // Note, since the page fault is resolved with interrupts enabled, 4886 // the fault handler could be called more than once for the same 4887 // reason -- the store must take this into account. 4888 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4889 if (status != B_BAD_HANDLER) 4890 break; 4891 } 4892 4893 // The top most cache has no fault handler, so let's see if the cache or 4894 // its sources already have the page we're searching for (we're going 4895 // from top to bottom). 4896 status = fault_get_page(context); 4897 if (status != B_OK) { 4898 TPF(PageFaultError(area->id, status)); 4899 break; 4900 } 4901 4902 if (context.restart) 4903 continue; 4904 4905 // All went fine, all there is left to do is to map the page into the 4906 // address space. 4907 TPF(PageFaultDone(area->id, context.topCache, context.page->cache, 4908 context.page)); 4909 4910 // If the page doesn't reside in the area's cache, we need to make sure 4911 // it's mapped in read-only, so that we cannot overwrite someone else's 4912 // data (copy-on-write) 4913 uint32 newProtection = protection; 4914 if (context.page->cache != context.topCache && !isWrite) 4915 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4916 4917 bool unmapPage = false; 4918 bool mapPage = true; 4919 4920 // check whether there's already a page mapped at the address 4921 context.map->ops->lock(context.map); 4922 4923 addr_t physicalAddress; 4924 uint32 flags; 4925 vm_page* mappedPage; 4926 if (context.map->ops->query(context.map, address, &physicalAddress, 4927 &flags) == B_OK 4928 && (flags & PAGE_PRESENT) != 0 4929 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4930 != NULL) { 4931 // Yep there's already a page. If it's ours, we can simply adjust 4932 // its protection. Otherwise we have to unmap it. 4933 if (mappedPage == context.page) { 4934 context.map->ops->protect(context.map, address, 4935 address + (B_PAGE_SIZE - 1), newProtection); 4936 4937 mapPage = false; 4938 } else 4939 unmapPage = true; 4940 } 4941 4942 context.map->ops->unlock(context.map); 4943 4944 if (unmapPage) 4945 vm_unmap_page(area, address, true); 4946 4947 if (mapPage) 4948 vm_map_page(area, context.page, address, newProtection); 4949 4950 break; 4951 } 4952 4953 vm_page_unreserve_pages(reservePages); 4954 4955 return status; 4956 } 4957 4958 4959 /*! You must have the address space's sem held */ 4960 vm_area* 4961 vm_area_lookup(vm_address_space* addressSpace, addr_t address) 4962 { 4963 vm_area* area; 4964 4965 // check the areas list first 4966 area = addressSpace->area_hint; 4967 if (area != NULL 4968 && area->base <= address 4969 && area->base + (area->size - 1) >= address) 4970 goto found; 4971 4972 for (area = addressSpace->areas; area != NULL; 4973 area = area->address_space_next) { 4974 if (area->id == RESERVED_AREA_ID) 4975 continue; 4976 4977 if (area->base <= address && area->base + (area->size - 1) >= address) 4978 break; 4979 } 4980 4981 found: 4982 if (area) 4983 addressSpace->area_hint = area; 4984 4985 return area; 4986 } 4987 4988 4989 status_t 4990 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 4991 { 4992 return vm_kernel_address_space()->translation_map.ops->get_physical_page( 4993 paddr, _vaddr, _handle); 4994 } 4995 4996 status_t 4997 vm_put_physical_page(addr_t vaddr, void* handle) 4998 { 4999 return vm_kernel_address_space()->translation_map.ops->put_physical_page( 5000 vaddr, handle); 5001 } 5002 5003 5004 status_t 5005 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 5006 { 5007 return vm_kernel_address_space()->translation_map.ops 5008 ->get_physical_page_current_cpu(paddr, _vaddr, _handle); 5009 } 5010 5011 status_t 5012 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5013 { 5014 return vm_kernel_address_space()->translation_map.ops 5015 ->put_physical_page_current_cpu(vaddr, handle); 5016 } 5017 5018 5019 status_t 5020 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 5021 { 5022 return vm_kernel_address_space()->translation_map.ops 5023 ->get_physical_page_debug(paddr, _vaddr, _handle); 5024 } 5025 5026 status_t 5027 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5028 { 5029 return vm_kernel_address_space()->translation_map.ops 5030 ->put_physical_page_debug(vaddr, handle); 5031 } 5032 5033 5034 void 5035 vm_get_info(system_memory_info* info) 5036 { 5037 swap_get_info(info); 5038 5039 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 5040 info->page_faults = sPageFaults; 5041 5042 MutexLocker locker(sAvailableMemoryLock); 5043 info->free_memory = sAvailableMemory; 5044 info->needed_memory = sNeededMemory; 5045 } 5046 5047 5048 uint32 5049 vm_num_page_faults(void) 5050 { 5051 return sPageFaults; 5052 } 5053 5054 5055 off_t 5056 vm_available_memory(void) 5057 { 5058 MutexLocker locker(sAvailableMemoryLock); 5059 return sAvailableMemory; 5060 } 5061 5062 5063 off_t 5064 vm_available_not_needed_memory(void) 5065 { 5066 MutexLocker locker(sAvailableMemoryLock); 5067 return sAvailableMemory - sNeededMemory; 5068 } 5069 5070 5071 void 5072 vm_unreserve_memory(size_t amount) 5073 { 5074 mutex_lock(&sAvailableMemoryLock); 5075 5076 sAvailableMemory += amount; 5077 5078 mutex_unlock(&sAvailableMemoryLock); 5079 } 5080 5081 5082 status_t 5083 vm_try_reserve_memory(size_t amount, bigtime_t timeout) 5084 { 5085 MutexLocker locker(sAvailableMemoryLock); 5086 5087 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5088 5089 if (sAvailableMemory >= amount) { 5090 sAvailableMemory -= amount; 5091 return B_OK; 5092 } 5093 5094 if (timeout <= 0) 5095 return B_NO_MEMORY; 5096 5097 // turn timeout into an absolute timeout 5098 timeout += system_time(); 5099 5100 // loop until we've got the memory or the timeout occurs 5101 do { 5102 sNeededMemory += amount; 5103 5104 // call the low resource manager 5105 locker.Unlock(); 5106 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5107 B_ABSOLUTE_TIMEOUT, timeout); 5108 locker.Lock(); 5109 5110 sNeededMemory -= amount; 5111 5112 if (sAvailableMemory >= amount) { 5113 sAvailableMemory -= amount; 5114 return B_OK; 5115 } 5116 } while (timeout > system_time()); 5117 5118 return B_NO_MEMORY; 5119 } 5120 5121 5122 status_t 5123 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 5124 { 5125 AddressSpaceReadLocker locker; 5126 vm_area* area; 5127 status_t status = locker.SetFromArea(id, area); 5128 if (status != B_OK) 5129 return status; 5130 5131 return arch_vm_set_memory_type(area, physicalBase, type); 5132 } 5133 5134 5135 /*! This function enforces some protection properties: 5136 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 5137 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5138 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5139 and B_KERNEL_WRITE_AREA. 5140 */ 5141 static void 5142 fix_protection(uint32* protection) 5143 { 5144 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5145 if ((*protection & B_USER_PROTECTION) == 0 5146 || (*protection & B_WRITE_AREA) != 0) 5147 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5148 else 5149 *protection |= B_KERNEL_READ_AREA; 5150 } 5151 } 5152 5153 5154 static void 5155 fill_area_info(struct vm_area* area, area_info* info, size_t size) 5156 { 5157 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5158 info->area = area->id; 5159 info->address = (void*)area->base; 5160 info->size = area->size; 5161 info->protection = area->protection; 5162 info->lock = B_FULL_LOCK; 5163 info->team = area->address_space->id; 5164 info->copy_count = 0; 5165 info->in_count = 0; 5166 info->out_count = 0; 5167 // TODO: retrieve real values here! 5168 5169 vm_cache* cache = vm_area_get_locked_cache(area); 5170 5171 // Note, this is a simplification; the cache could be larger than this area 5172 info->ram_size = cache->page_count * B_PAGE_SIZE; 5173 5174 vm_area_put_locked_cache(cache); 5175 } 5176 5177 5178 /*! 5179 Tests whether or not the area that contains the specified address 5180 needs any kind of locking, and actually exists. 5181 Used by both lock_memory() and unlock_memory(). 5182 */ 5183 static status_t 5184 test_lock_memory(vm_address_space* addressSpace, addr_t address, 5185 bool& needsLocking) 5186 { 5187 rw_lock_read_lock(&addressSpace->lock); 5188 5189 vm_area* area = vm_area_lookup(addressSpace, address); 5190 if (area != NULL) { 5191 // This determines if we need to lock the memory at all 5192 needsLocking = area->cache_type != CACHE_TYPE_NULL 5193 && area->cache_type != CACHE_TYPE_DEVICE 5194 && area->wiring != B_FULL_LOCK 5195 && area->wiring != B_CONTIGUOUS; 5196 } 5197 5198 rw_lock_read_unlock(&addressSpace->lock); 5199 5200 if (area == NULL) 5201 return B_BAD_ADDRESS; 5202 5203 return B_OK; 5204 } 5205 5206 5207 static status_t 5208 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5209 { 5210 // is newSize a multiple of B_PAGE_SIZE? 5211 if (newSize & (B_PAGE_SIZE - 1)) 5212 return B_BAD_VALUE; 5213 5214 // lock all affected address spaces and the cache 5215 vm_area* area; 5216 vm_cache* cache; 5217 5218 MultiAddressSpaceLocker locker; 5219 status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area, 5220 &cache); 5221 if (status != B_OK) 5222 return status; 5223 AreaCacheLocker cacheLocker(cache); // already locked 5224 5225 // enforce restrictions 5226 if (!kernel) { 5227 if ((area->protection & B_KERNEL_AREA) != 0) 5228 return B_NOT_ALLOWED; 5229 // TODO: Enforce all restrictions (team, etc.)! 5230 } 5231 5232 size_t oldSize = area->size; 5233 if (newSize == oldSize) 5234 return B_OK; 5235 5236 // Resize all areas of this area's cache 5237 5238 if (cache->type != CACHE_TYPE_RAM) 5239 return B_NOT_ALLOWED; 5240 5241 if (oldSize < newSize) { 5242 // We need to check if all areas of this cache can be resized 5243 5244 for (vm_area* current = cache->areas; current != NULL; 5245 current = current->cache_next) { 5246 vm_area* next = current->address_space_next; 5247 if (next != NULL && next->base <= (current->base + newSize)) { 5248 // If the area was created inside a reserved area, it can 5249 // also be resized in that area 5250 // TODO: if there is free space after the reserved area, it could 5251 // be used as well... 5252 if (next->id == RESERVED_AREA_ID 5253 && next->cache_offset <= current->base 5254 && next->base - 1 + next->size 5255 >= current->base - 1 + newSize) 5256 continue; 5257 5258 return B_ERROR; 5259 } 5260 } 5261 } 5262 5263 // Okay, looks good so far, so let's do it 5264 5265 if (oldSize < newSize) { 5266 // Growing the cache can fail, so we do it first. 5267 status = cache->Resize(cache->virtual_base + newSize); 5268 if (status != B_OK) 5269 return status; 5270 } 5271 5272 for (vm_area* current = cache->areas; current != NULL; 5273 current = current->cache_next) { 5274 vm_area* next = current->address_space_next; 5275 if (next != NULL && next->base <= (current->base + newSize)) { 5276 if (next->id == RESERVED_AREA_ID 5277 && next->cache_offset <= current->base 5278 && next->base - 1 + next->size >= current->base - 1 + newSize) { 5279 // resize reserved area 5280 addr_t offset = current->base + newSize - next->base; 5281 if (next->size <= offset) { 5282 current->address_space_next = next->address_space_next; 5283 free(next); 5284 } else { 5285 next->size -= offset; 5286 next->base += offset; 5287 } 5288 } else { 5289 panic("resize situation for area %p has changed although we " 5290 "should have the address space lock", current); 5291 status = B_ERROR; 5292 break; 5293 } 5294 } 5295 5296 current->size = newSize; 5297 5298 // We also need to unmap all pages beyond the new size, if the area has 5299 // shrinked 5300 if (newSize < oldSize) { 5301 vm_unmap_pages(current, current->base + newSize, oldSize - newSize, 5302 false); 5303 } 5304 } 5305 5306 // shrinking the cache can't fail, so we do it now 5307 if (status == B_OK && newSize < oldSize) 5308 status = cache->Resize(cache->virtual_base + newSize); 5309 5310 if (status < B_OK) { 5311 // This shouldn't really be possible, but hey, who knows 5312 for (vm_area* current = cache->areas; current != NULL; 5313 current = current->cache_next) { 5314 current->size = oldSize; 5315 } 5316 5317 cache->Resize(cache->virtual_base + oldSize); 5318 } 5319 5320 // TODO: we must honour the lock restrictions of this area 5321 return status; 5322 } 5323 5324 5325 status_t 5326 vm_memset_physical(addr_t address, int value, size_t length) 5327 { 5328 return vm_kernel_address_space()->translation_map.ops->memset_physical( 5329 address, value, length); 5330 } 5331 5332 5333 status_t 5334 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 5335 { 5336 return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical( 5337 to, from, length, user); 5338 } 5339 5340 5341 status_t 5342 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 5343 { 5344 return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical( 5345 to, _from, length, user); 5346 } 5347 5348 5349 void 5350 vm_memcpy_physical_page(addr_t to, addr_t from) 5351 { 5352 return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page( 5353 to, from); 5354 } 5355 5356 5357 // #pragma mark - kernel public API 5358 5359 5360 status_t 5361 user_memcpy(void* to, const void* from, size_t size) 5362 { 5363 if (arch_cpu_user_memcpy(to, from, size, 5364 &thread_get_current_thread()->fault_handler) < B_OK) 5365 return B_BAD_ADDRESS; 5366 5367 return B_OK; 5368 } 5369 5370 5371 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5372 the string in \a to, NULL-terminating the result. 5373 5374 \param to Pointer to the destination C-string. 5375 \param from Pointer to the source C-string. 5376 \param size Size in bytes of the string buffer pointed to by \a to. 5377 5378 \return strlen(\a from). 5379 */ 5380 ssize_t 5381 user_strlcpy(char* to, const char* from, size_t size) 5382 { 5383 return arch_cpu_user_strlcpy(to, from, size, 5384 &thread_get_current_thread()->fault_handler); 5385 } 5386 5387 5388 status_t 5389 user_memset(void* s, char c, size_t count) 5390 { 5391 if (arch_cpu_user_memset(s, c, count, 5392 &thread_get_current_thread()->fault_handler) < B_OK) 5393 return B_BAD_ADDRESS; 5394 5395 return B_OK; 5396 } 5397 5398 5399 status_t 5400 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5401 { 5402 vm_address_space* addressSpace = NULL; 5403 struct vm_translation_map* map; 5404 addr_t unalignedBase = (addr_t)address; 5405 addr_t end = unalignedBase + numBytes; 5406 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 5407 bool isUser = IS_USER_ADDRESS(address); 5408 bool needsLocking = true; 5409 5410 if (isUser) { 5411 if (team == B_CURRENT_TEAM) 5412 addressSpace = vm_get_current_user_address_space(); 5413 else 5414 addressSpace = vm_get_address_space(team); 5415 } else 5416 addressSpace = vm_get_kernel_address_space(); 5417 if (addressSpace == NULL) 5418 return B_ERROR; 5419 5420 // test if we're on an area that allows faults at all 5421 5422 map = &addressSpace->translation_map; 5423 5424 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5425 if (status < B_OK) 5426 goto out; 5427 if (!needsLocking) 5428 goto out; 5429 5430 for (; base < end; base += B_PAGE_SIZE) { 5431 addr_t physicalAddress; 5432 uint32 protection; 5433 status_t status; 5434 5435 map->ops->lock(map); 5436 status = map->ops->query(map, base, &physicalAddress, &protection); 5437 map->ops->unlock(map); 5438 5439 if (status < B_OK) 5440 goto out; 5441 5442 if ((protection & PAGE_PRESENT) != 0) { 5443 // if B_READ_DEVICE is set, the caller intents to write to the locked 5444 // memory, so if it hasn't been mapped writable, we'll try the soft 5445 // fault anyway 5446 if ((flags & B_READ_DEVICE) == 0 5447 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 5448 // update wiring 5449 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5450 if (page == NULL) 5451 panic("couldn't lookup physical page just allocated\n"); 5452 5453 increment_page_wired_count(page); 5454 continue; 5455 } 5456 } 5457 5458 status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0, 5459 isUser); 5460 if (status != B_OK) { 5461 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) " 5462 "failed: %s\n", (void*)unalignedBase, numBytes, flags, 5463 strerror(status)); 5464 goto out; 5465 } 5466 5467 // TODO: Here's a race condition. We should probably add a parameter 5468 // to vm_soft_fault() that would cause the page's wired count to be 5469 // incremented immediately. 5470 // TODO: After memory has been locked in an area, we need to prevent the 5471 // area from being deleted, resized, cut, etc. That could be done using 5472 // a "locked pages" count in vm_area, and maybe a condition variable, if 5473 // we want to allow waiting for the area to become eligible for these 5474 // operations again. 5475 5476 map->ops->lock(map); 5477 status = map->ops->query(map, base, &physicalAddress, &protection); 5478 map->ops->unlock(map); 5479 5480 if (status < B_OK) 5481 goto out; 5482 5483 // update wiring 5484 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5485 if (page == NULL) 5486 panic("couldn't lookup physical page"); 5487 5488 increment_page_wired_count(page); 5489 // TODO: needs to be atomic on all platforms! 5490 } 5491 5492 out: 5493 vm_put_address_space(addressSpace); 5494 return status; 5495 } 5496 5497 5498 status_t 5499 lock_memory(void* address, size_t numBytes, uint32 flags) 5500 { 5501 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5502 } 5503 5504 5505 status_t 5506 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5507 { 5508 vm_address_space* addressSpace = NULL; 5509 struct vm_translation_map* map; 5510 addr_t unalignedBase = (addr_t)address; 5511 addr_t end = unalignedBase + numBytes; 5512 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 5513 bool needsLocking = true; 5514 5515 if (IS_USER_ADDRESS(address)) { 5516 if (team == B_CURRENT_TEAM) 5517 addressSpace = vm_get_current_user_address_space(); 5518 else 5519 addressSpace = vm_get_address_space(team); 5520 } else 5521 addressSpace = vm_get_kernel_address_space(); 5522 if (addressSpace == NULL) 5523 return B_ERROR; 5524 5525 map = &addressSpace->translation_map; 5526 5527 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5528 if (status < B_OK) 5529 goto out; 5530 if (!needsLocking) 5531 goto out; 5532 5533 for (; base < end; base += B_PAGE_SIZE) { 5534 map->ops->lock(map); 5535 5536 addr_t physicalAddress; 5537 uint32 protection; 5538 status = map->ops->query(map, base, &physicalAddress, 5539 &protection); 5540 5541 map->ops->unlock(map); 5542 5543 if (status < B_OK) 5544 goto out; 5545 if ((protection & PAGE_PRESENT) == 0) 5546 panic("calling unlock_memory() on unmapped memory!"); 5547 5548 // update wiring 5549 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5550 if (page == NULL) 5551 panic("couldn't lookup physical page"); 5552 5553 decrement_page_wired_count(page); 5554 } 5555 5556 out: 5557 vm_put_address_space(addressSpace); 5558 return status; 5559 } 5560 5561 5562 status_t 5563 unlock_memory(void* address, size_t numBytes, uint32 flags) 5564 { 5565 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5566 } 5567 5568 5569 /*! Similar to get_memory_map(), but also allows to specify the address space 5570 for the memory in question and has a saner semantics. 5571 Returns \c B_OK when the complete range could be translated or 5572 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5573 case the actual number of entries is written to \c *_numEntries. Any other 5574 error case indicates complete failure; \c *_numEntries will be set to \c 0 5575 in this case. 5576 */ 5577 status_t 5578 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5579 physical_entry* table, uint32* _numEntries) 5580 { 5581 uint32 numEntries = *_numEntries; 5582 *_numEntries = 0; 5583 5584 vm_address_space* addressSpace; 5585 addr_t virtualAddress = (addr_t)address; 5586 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5587 addr_t physicalAddress; 5588 status_t status = B_OK; 5589 int32 index = -1; 5590 addr_t offset = 0; 5591 bool interrupts = are_interrupts_enabled(); 5592 5593 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5594 address, numBytes, numEntries)); 5595 5596 if (numEntries == 0 || numBytes == 0) 5597 return B_BAD_VALUE; 5598 5599 // in which address space is the address to be found? 5600 if (IS_USER_ADDRESS(virtualAddress)) { 5601 if (team == B_CURRENT_TEAM) 5602 addressSpace = vm_get_current_user_address_space(); 5603 else 5604 addressSpace = vm_get_address_space(team); 5605 } else 5606 addressSpace = vm_get_kernel_address_space(); 5607 5608 if (addressSpace == NULL) 5609 return B_ERROR; 5610 5611 vm_translation_map* map = &addressSpace->translation_map; 5612 5613 if (interrupts) 5614 map->ops->lock(map); 5615 5616 while (offset < numBytes) { 5617 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5618 uint32 flags; 5619 5620 if (interrupts) { 5621 status = map->ops->query(map, (addr_t)address + offset, 5622 &physicalAddress, &flags); 5623 } else { 5624 status = map->ops->query_interrupt(map, (addr_t)address + offset, 5625 &physicalAddress, &flags); 5626 } 5627 if (status < B_OK) 5628 break; 5629 if ((flags & PAGE_PRESENT) == 0) { 5630 panic("get_memory_map() called on unmapped memory!"); 5631 return B_BAD_ADDRESS; 5632 } 5633 5634 if (index < 0 && pageOffset > 0) { 5635 physicalAddress += pageOffset; 5636 if (bytes > B_PAGE_SIZE - pageOffset) 5637 bytes = B_PAGE_SIZE - pageOffset; 5638 } 5639 5640 // need to switch to the next physical_entry? 5641 if (index < 0 || (addr_t)table[index].address 5642 != physicalAddress - table[index].size) { 5643 if ((uint32)++index + 1 > numEntries) { 5644 // table to small 5645 status = B_BUFFER_OVERFLOW; 5646 break; 5647 } 5648 table[index].address = (void*)physicalAddress; 5649 table[index].size = bytes; 5650 } else { 5651 // page does fit in current entry 5652 table[index].size += bytes; 5653 } 5654 5655 offset += bytes; 5656 } 5657 5658 if (interrupts) 5659 map->ops->unlock(map); 5660 5661 if (status != B_OK) 5662 return status; 5663 5664 if ((uint32)index + 1 > numEntries) { 5665 *_numEntries = index; 5666 return B_BUFFER_OVERFLOW; 5667 } 5668 5669 *_numEntries = index + 1; 5670 return B_OK; 5671 } 5672 5673 5674 /*! According to the BeBook, this function should always succeed. 5675 This is no longer the case. 5676 */ 5677 long 5678 get_memory_map(const void* address, ulong numBytes, physical_entry* table, 5679 long numEntries) 5680 { 5681 uint32 entriesRead = numEntries; 5682 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5683 table, &entriesRead); 5684 if (error != B_OK) 5685 return error; 5686 5687 // close the entry list 5688 5689 // if it's only one entry, we will silently accept the missing ending 5690 if (numEntries == 1) 5691 return B_OK; 5692 5693 if (entriesRead + 1 > (uint32)numEntries) 5694 return B_BUFFER_OVERFLOW; 5695 5696 table[entriesRead].address = NULL; 5697 table[entriesRead].size = 0; 5698 5699 return B_OK; 5700 } 5701 5702 5703 area_id 5704 area_for(void* address) 5705 { 5706 team_id space; 5707 5708 if (IS_USER_ADDRESS(address)) { 5709 // we try the user team address space, if any 5710 space = vm_current_user_address_space_id(); 5711 if (space < B_OK) 5712 return space; 5713 } else 5714 space = vm_kernel_address_space_id(); 5715 5716 return vm_area_for(space, (addr_t)address); 5717 } 5718 5719 5720 area_id 5721 find_area(const char* name) 5722 { 5723 rw_lock_read_lock(&sAreaHashLock); 5724 struct hash_iterator iterator; 5725 hash_open(sAreaHash, &iterator); 5726 5727 vm_area* area; 5728 area_id id = B_NAME_NOT_FOUND; 5729 while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) { 5730 if (area->id == RESERVED_AREA_ID) 5731 continue; 5732 5733 if (!strcmp(area->name, name)) { 5734 id = area->id; 5735 break; 5736 } 5737 } 5738 5739 hash_close(sAreaHash, &iterator, false); 5740 rw_lock_read_unlock(&sAreaHashLock); 5741 5742 return id; 5743 } 5744 5745 5746 status_t 5747 _get_area_info(area_id id, area_info* info, size_t size) 5748 { 5749 if (size != sizeof(area_info) || info == NULL) 5750 return B_BAD_VALUE; 5751 5752 AddressSpaceReadLocker locker; 5753 vm_area* area; 5754 status_t status = locker.SetFromArea(id, area); 5755 if (status != B_OK) 5756 return status; 5757 5758 fill_area_info(area, info, size); 5759 return B_OK; 5760 } 5761 5762 5763 status_t 5764 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 5765 { 5766 addr_t nextBase = *(addr_t*)cookie; 5767 5768 // we're already through the list 5769 if (nextBase == (addr_t)-1) 5770 return B_ENTRY_NOT_FOUND; 5771 5772 if (team == B_CURRENT_TEAM) 5773 team = team_get_current_team_id(); 5774 5775 AddressSpaceReadLocker locker(team); 5776 if (!locker.IsLocked()) 5777 return B_BAD_TEAM_ID; 5778 5779 vm_area* area; 5780 for (area = locker.AddressSpace()->areas; area != NULL; 5781 area = area->address_space_next) { 5782 if (area->id == RESERVED_AREA_ID) 5783 continue; 5784 5785 if (area->base > nextBase) 5786 break; 5787 } 5788 5789 if (area == NULL) { 5790 nextBase = (addr_t)-1; 5791 return B_ENTRY_NOT_FOUND; 5792 } 5793 5794 fill_area_info(area, info, size); 5795 *cookie = (int32)(area->base); 5796 5797 return B_OK; 5798 } 5799 5800 5801 status_t 5802 set_area_protection(area_id area, uint32 newProtection) 5803 { 5804 fix_protection(&newProtection); 5805 5806 return vm_set_area_protection(vm_kernel_address_space_id(), area, 5807 newProtection, true); 5808 } 5809 5810 5811 status_t 5812 resize_area(area_id areaID, size_t newSize) 5813 { 5814 return vm_resize_area(areaID, newSize, true); 5815 } 5816 5817 5818 /*! Transfers the specified area to a new team. The caller must be the owner 5819 of the area (not yet enforced but probably should be). 5820 This function is currently not exported to the kernel namespace, but is 5821 only accessible using the _kern_transfer_area() syscall. 5822 */ 5823 static area_id 5824 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5825 bool kernel) 5826 { 5827 area_info info; 5828 status_t status = get_area_info(id, &info); 5829 if (status < B_OK) 5830 return status; 5831 5832 area_id clonedArea = vm_clone_area(target, info.name, _address, 5833 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5834 if (clonedArea < B_OK) 5835 return clonedArea; 5836 5837 status = vm_delete_area(info.team, id, kernel); 5838 if (status < B_OK) { 5839 vm_delete_area(target, clonedArea, kernel); 5840 return status; 5841 } 5842 5843 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5844 5845 return clonedArea; 5846 } 5847 5848 5849 area_id 5850 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes, 5851 uint32 addressSpec, uint32 protection, void** _virtualAddress) 5852 { 5853 if (!arch_vm_supports_protection(protection)) 5854 return B_NOT_SUPPORTED; 5855 5856 fix_protection(&protection); 5857 5858 return vm_map_physical_memory(vm_kernel_address_space_id(), name, 5859 _virtualAddress, addressSpec, numBytes, protection, 5860 (addr_t)physicalAddress); 5861 } 5862 5863 5864 area_id 5865 clone_area(const char* name, void** _address, uint32 addressSpec, 5866 uint32 protection, area_id source) 5867 { 5868 if ((protection & B_KERNEL_PROTECTION) == 0) 5869 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5870 5871 return vm_clone_area(vm_kernel_address_space_id(), name, _address, 5872 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5873 } 5874 5875 5876 area_id 5877 create_area_etc(team_id team, const char* name, void** address, 5878 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 5879 uint32 flags) 5880 { 5881 fix_protection(&protection); 5882 5883 return vm_create_anonymous_area(team, (char*)name, address, addressSpec, 5884 size, lock, protection, flags, true); 5885 } 5886 5887 5888 area_id 5889 create_area(const char* name, void** _address, uint32 addressSpec, size_t size, 5890 uint32 lock, uint32 protection) 5891 { 5892 fix_protection(&protection); 5893 5894 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name, 5895 _address, addressSpec, size, lock, protection, 0, true); 5896 } 5897 5898 5899 status_t 5900 delete_area(area_id area) 5901 { 5902 return vm_delete_area(vm_kernel_address_space_id(), area, true); 5903 } 5904 5905 5906 // #pragma mark - Userland syscalls 5907 5908 5909 status_t 5910 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size) 5911 { 5912 // filter out some unavailable values (for userland) 5913 switch (addressSpec) { 5914 case B_ANY_KERNEL_ADDRESS: 5915 case B_ANY_KERNEL_BLOCK_ADDRESS: 5916 return B_BAD_VALUE; 5917 } 5918 5919 addr_t address; 5920 5921 if (!IS_USER_ADDRESS(userAddress) 5922 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5923 return B_BAD_ADDRESS; 5924 5925 status_t status = vm_reserve_address_range( 5926 vm_current_user_address_space_id(), (void**)&address, addressSpec, size, 5927 RESERVED_AVOID_BASE); 5928 if (status != B_OK) 5929 return status; 5930 5931 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5932 vm_unreserve_address_range(vm_current_user_address_space_id(), 5933 (void*)address, size); 5934 return B_BAD_ADDRESS; 5935 } 5936 5937 return B_OK; 5938 } 5939 5940 5941 status_t 5942 _user_unreserve_address_range(addr_t address, addr_t size) 5943 { 5944 return vm_unreserve_address_range(vm_current_user_address_space_id(), 5945 (void*)address, size); 5946 } 5947 5948 5949 area_id 5950 _user_area_for(void* address) 5951 { 5952 return vm_area_for(vm_current_user_address_space_id(), (addr_t)address); 5953 } 5954 5955 5956 area_id 5957 _user_find_area(const char* userName) 5958 { 5959 char name[B_OS_NAME_LENGTH]; 5960 5961 if (!IS_USER_ADDRESS(userName) 5962 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5963 return B_BAD_ADDRESS; 5964 5965 return find_area(name); 5966 } 5967 5968 5969 status_t 5970 _user_get_area_info(area_id area, area_info* userInfo) 5971 { 5972 if (!IS_USER_ADDRESS(userInfo)) 5973 return B_BAD_ADDRESS; 5974 5975 area_info info; 5976 status_t status = get_area_info(area, &info); 5977 if (status < B_OK) 5978 return status; 5979 5980 // TODO: do we want to prevent userland from seeing kernel protections? 5981 //info.protection &= B_USER_PROTECTION; 5982 5983 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5984 return B_BAD_ADDRESS; 5985 5986 return status; 5987 } 5988 5989 5990 status_t 5991 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 5992 { 5993 int32 cookie; 5994 5995 if (!IS_USER_ADDRESS(userCookie) 5996 || !IS_USER_ADDRESS(userInfo) 5997 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 5998 return B_BAD_ADDRESS; 5999 6000 area_info info; 6001 status_t status = _get_next_area_info(team, &cookie, &info, 6002 sizeof(area_info)); 6003 if (status != B_OK) 6004 return status; 6005 6006 //info.protection &= B_USER_PROTECTION; 6007 6008 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 6009 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6010 return B_BAD_ADDRESS; 6011 6012 return status; 6013 } 6014 6015 6016 status_t 6017 _user_set_area_protection(area_id area, uint32 newProtection) 6018 { 6019 if ((newProtection & ~B_USER_PROTECTION) != 0) 6020 return B_BAD_VALUE; 6021 6022 fix_protection(&newProtection); 6023 6024 return vm_set_area_protection(vm_current_user_address_space_id(), area, 6025 newProtection, false); 6026 } 6027 6028 6029 status_t 6030 _user_resize_area(area_id area, size_t newSize) 6031 { 6032 // TODO: Since we restrict deleting of areas to those owned by the team, 6033 // we should also do that for resizing (check other functions, too). 6034 return vm_resize_area(area, newSize, false); 6035 } 6036 6037 6038 area_id 6039 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6040 team_id target) 6041 { 6042 // filter out some unavailable values (for userland) 6043 switch (addressSpec) { 6044 case B_ANY_KERNEL_ADDRESS: 6045 case B_ANY_KERNEL_BLOCK_ADDRESS: 6046 return B_BAD_VALUE; 6047 } 6048 6049 void* address; 6050 if (!IS_USER_ADDRESS(userAddress) 6051 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6052 return B_BAD_ADDRESS; 6053 6054 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6055 if (newArea < B_OK) 6056 return newArea; 6057 6058 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6059 return B_BAD_ADDRESS; 6060 6061 return newArea; 6062 } 6063 6064 6065 area_id 6066 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6067 uint32 protection, area_id sourceArea) 6068 { 6069 char name[B_OS_NAME_LENGTH]; 6070 void* address; 6071 6072 // filter out some unavailable values (for userland) 6073 switch (addressSpec) { 6074 case B_ANY_KERNEL_ADDRESS: 6075 case B_ANY_KERNEL_BLOCK_ADDRESS: 6076 return B_BAD_VALUE; 6077 } 6078 if ((protection & ~B_USER_PROTECTION) != 0) 6079 return B_BAD_VALUE; 6080 6081 if (!IS_USER_ADDRESS(userName) 6082 || !IS_USER_ADDRESS(userAddress) 6083 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6084 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6085 return B_BAD_ADDRESS; 6086 6087 fix_protection(&protection); 6088 6089 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, 6090 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6091 false); 6092 if (clonedArea < B_OK) 6093 return clonedArea; 6094 6095 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6096 delete_area(clonedArea); 6097 return B_BAD_ADDRESS; 6098 } 6099 6100 return clonedArea; 6101 } 6102 6103 6104 area_id 6105 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6106 size_t size, uint32 lock, uint32 protection) 6107 { 6108 char name[B_OS_NAME_LENGTH]; 6109 void* address; 6110 6111 // filter out some unavailable values (for userland) 6112 switch (addressSpec) { 6113 case B_ANY_KERNEL_ADDRESS: 6114 case B_ANY_KERNEL_BLOCK_ADDRESS: 6115 return B_BAD_VALUE; 6116 } 6117 if ((protection & ~B_USER_PROTECTION) != 0) 6118 return B_BAD_VALUE; 6119 6120 if (!IS_USER_ADDRESS(userName) 6121 || !IS_USER_ADDRESS(userAddress) 6122 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6123 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6124 return B_BAD_ADDRESS; 6125 6126 if (addressSpec == B_EXACT_ADDRESS 6127 && IS_KERNEL_ADDRESS(address)) 6128 return B_BAD_VALUE; 6129 6130 fix_protection(&protection); 6131 6132 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 6133 (char*)name, &address, addressSpec, size, lock, protection, 0, false); 6134 6135 if (area >= B_OK 6136 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6137 delete_area(area); 6138 return B_BAD_ADDRESS; 6139 } 6140 6141 return area; 6142 } 6143 6144 6145 status_t 6146 _user_delete_area(area_id area) 6147 { 6148 // Unlike the BeOS implementation, you can now only delete areas 6149 // that you have created yourself from userland. 6150 // The documentation to delete_area() explicitly states that this 6151 // will be restricted in the future, and so it will. 6152 return vm_delete_area(vm_current_user_address_space_id(), area, false); 6153 } 6154 6155 6156 // TODO: create a BeOS style call for this! 6157 6158 area_id 6159 _user_map_file(const char* userName, void** userAddress, int addressSpec, 6160 size_t size, int protection, int mapping, bool unmapAddressRange, int fd, 6161 off_t offset) 6162 { 6163 char name[B_OS_NAME_LENGTH]; 6164 void* address; 6165 area_id area; 6166 6167 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6168 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6169 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6170 return B_BAD_ADDRESS; 6171 6172 if (addressSpec == B_EXACT_ADDRESS) { 6173 if ((addr_t)address + size < (addr_t)address) 6174 return B_BAD_VALUE; 6175 if (!IS_USER_ADDRESS(address) 6176 || !IS_USER_ADDRESS((addr_t)address + size)) { 6177 return B_BAD_ADDRESS; 6178 } 6179 } 6180 6181 // userland created areas can always be accessed by the kernel 6182 protection |= B_KERNEL_READ_AREA 6183 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6184 6185 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 6186 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6187 false); 6188 if (area < B_OK) 6189 return area; 6190 6191 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6192 return B_BAD_ADDRESS; 6193 6194 return area; 6195 } 6196 6197 6198 status_t 6199 _user_unmap_memory(void* _address, size_t size) 6200 { 6201 addr_t address = (addr_t)_address; 6202 6203 // check params 6204 if (size == 0 || (addr_t)address + size < (addr_t)address) 6205 return B_BAD_VALUE; 6206 6207 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6208 return B_BAD_ADDRESS; 6209 6210 // write lock the address space 6211 AddressSpaceWriteLocker locker; 6212 status_t status = locker.SetTo(team_get_current_team_id()); 6213 if (status != B_OK) 6214 return status; 6215 6216 // unmap 6217 return unmap_address_range(locker.AddressSpace(), address, size, false); 6218 } 6219 6220 6221 status_t 6222 _user_set_memory_protection(void* _address, size_t size, int protection) 6223 { 6224 // check address range 6225 addr_t address = (addr_t)_address; 6226 size = PAGE_ALIGN(size); 6227 6228 if ((address % B_PAGE_SIZE) != 0) 6229 return B_BAD_VALUE; 6230 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6231 || !IS_USER_ADDRESS((addr_t)address + size)) { 6232 // weird error code required by POSIX 6233 return ENOMEM; 6234 } 6235 6236 // extend and check protection 6237 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 6238 uint32 actualProtection = protection | B_KERNEL_READ_AREA 6239 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6240 6241 if (!arch_vm_supports_protection(actualProtection)) 6242 return B_NOT_SUPPORTED; 6243 6244 // We need to write lock the address space, since we're going to play with 6245 // the areas. 6246 AddressSpaceWriteLocker locker; 6247 status_t status = locker.SetTo(team_get_current_team_id()); 6248 if (status != B_OK) 6249 return status; 6250 6251 // First round: Check whether the whole range is covered by areas and we are 6252 // allowed to modify them. 6253 addr_t currentAddress = address; 6254 size_t sizeLeft = size; 6255 while (sizeLeft > 0) { 6256 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6257 if (area == NULL) 6258 return B_NO_MEMORY; 6259 6260 if ((area->protection & B_KERNEL_AREA) != 0) 6261 return B_NOT_ALLOWED; 6262 6263 // TODO: For (shared) mapped files we should check whether the new 6264 // protections are compatible with the file permissions. We don't have 6265 // a way to do that yet, though. 6266 6267 addr_t offset = currentAddress - area->base; 6268 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6269 6270 currentAddress += rangeSize; 6271 sizeLeft -= rangeSize; 6272 } 6273 6274 // Second round: If the protections differ from that of the area, create a 6275 // page protection array and re-map mapped pages. 6276 vm_translation_map* map = &locker.AddressSpace()->translation_map; 6277 currentAddress = address; 6278 sizeLeft = size; 6279 while (sizeLeft > 0) { 6280 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6281 if (area == NULL) 6282 return B_NO_MEMORY; 6283 6284 addr_t offset = currentAddress - area->base; 6285 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6286 6287 currentAddress += rangeSize; 6288 sizeLeft -= rangeSize; 6289 6290 if (area->page_protections == NULL) { 6291 if (area->protection == actualProtection) 6292 continue; 6293 6294 // In the page protections we store only the three user protections, 6295 // so we use 4 bits per page. 6296 uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2; 6297 area->page_protections = (uint8*)malloc(bytes); 6298 if (area->page_protections == NULL) 6299 return B_NO_MEMORY; 6300 6301 // init the page protections for all pages to that of the area 6302 uint32 areaProtection = area->protection 6303 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 6304 memset(area->page_protections, 6305 areaProtection | (areaProtection << 4), bytes); 6306 } 6307 6308 for (addr_t pageAddress = area->base + offset; 6309 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6310 map->ops->lock(map); 6311 6312 set_area_page_protection(area, pageAddress, protection); 6313 6314 addr_t physicalAddress; 6315 uint32 flags; 6316 6317 status_t error = map->ops->query(map, pageAddress, &physicalAddress, 6318 &flags); 6319 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6320 map->ops->unlock(map); 6321 continue; 6322 } 6323 6324 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6325 if (page == NULL) { 6326 panic("area %p looking up page failed for pa 0x%lx\n", area, 6327 physicalAddress); 6328 map->ops->unlock(map); 6329 return B_ERROR;; 6330 } 6331 6332 // If the page is not in the topmost cache and write access is 6333 // requested, we have to unmap it. Otherwise we can re-map it with 6334 // the new protection. 6335 bool unmapPage = page->cache != area->cache 6336 && (protection & B_WRITE_AREA) != 0; 6337 6338 if (!unmapPage) { 6339 map->ops->unmap(map, pageAddress, 6340 pageAddress + B_PAGE_SIZE - 1); 6341 map->ops->map(map, pageAddress, physicalAddress, 6342 actualProtection); 6343 } 6344 6345 map->ops->unlock(map); 6346 6347 if (unmapPage) 6348 vm_unmap_page(area, pageAddress, true); 6349 } 6350 } 6351 6352 return B_OK; 6353 } 6354 6355 6356 status_t 6357 _user_sync_memory(void* _address, size_t size, int flags) 6358 { 6359 addr_t address = (addr_t)_address; 6360 size = PAGE_ALIGN(size); 6361 6362 // check params 6363 if ((address % B_PAGE_SIZE) != 0) 6364 return B_BAD_VALUE; 6365 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6366 || !IS_USER_ADDRESS((addr_t)address + size)) { 6367 // weird error code required by POSIX 6368 return ENOMEM; 6369 } 6370 6371 bool writeSync = (flags & MS_SYNC) != 0; 6372 bool writeAsync = (flags & MS_ASYNC) != 0; 6373 if (writeSync && writeAsync) 6374 return B_BAD_VALUE; 6375 6376 if (size == 0 || (!writeSync && !writeAsync)) 6377 return B_OK; 6378 6379 // iterate through the range and sync all concerned areas 6380 while (size > 0) { 6381 // read lock the address space 6382 AddressSpaceReadLocker locker; 6383 status_t error = locker.SetTo(team_get_current_team_id()); 6384 if (error != B_OK) 6385 return error; 6386 6387 // get the first area 6388 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 6389 if (area == NULL) 6390 return B_NO_MEMORY; 6391 6392 uint32 offset = address - area->base; 6393 size_t rangeSize = min_c(area->size - offset, size); 6394 offset += area->cache_offset; 6395 6396 // lock the cache 6397 AreaCacheLocker cacheLocker(area); 6398 if (!cacheLocker) 6399 return B_BAD_VALUE; 6400 vm_cache* cache = area->cache; 6401 6402 locker.Unlock(); 6403 6404 uint32 firstPage = offset >> PAGE_SHIFT; 6405 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6406 6407 // write the pages 6408 if (cache->type == CACHE_TYPE_VNODE) { 6409 if (writeSync) { 6410 // synchronous 6411 error = vm_page_write_modified_page_range(cache, firstPage, 6412 endPage); 6413 if (error != B_OK) 6414 return error; 6415 } else { 6416 // asynchronous 6417 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6418 // TODO: This is probably not quite what is supposed to happen. 6419 // Especially when a lot has to be written, it might take ages 6420 // until it really hits the disk. 6421 } 6422 } 6423 6424 address += rangeSize; 6425 size -= rangeSize; 6426 } 6427 6428 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6429 // synchronize multiple mappings of the same file. In our VM they never get 6430 // out of sync, though, so we don't have to do anything. 6431 6432 return B_OK; 6433 } 6434 6435 6436 status_t 6437 _user_memory_advice(void* address, size_t size, int advice) 6438 { 6439 // TODO: Implement! 6440 return B_OK; 6441 } 6442