1 /* 2 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 10 #include <vm.h> 11 12 #include <ctype.h> 13 #include <stdlib.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <sys/mman.h> 17 18 #include <OS.h> 19 #include <KernelExport.h> 20 21 #include <AutoDeleter.h> 22 23 #include <arch/cpu.h> 24 #include <arch/vm.h> 25 #include <boot/elf.h> 26 #include <boot/stage2.h> 27 #include <condition_variable.h> 28 #include <console.h> 29 #include <debug.h> 30 #include <file_cache.h> 31 #include <fs/fd.h> 32 #include <heap.h> 33 #include <int.h> 34 #include <lock.h> 35 #include <low_resource_manager.h> 36 #include <smp.h> 37 #include <system_info.h> 38 #include <thread.h> 39 #include <team.h> 40 #include <tracing.h> 41 #include <util/AutoLock.h> 42 #include <util/khash.h> 43 #include <vm_address_space.h> 44 #include <vm_cache.h> 45 #include <vm_page.h> 46 #include <vm_priv.h> 47 48 #include "VMAnonymousCache.h" 49 #include "io_requests.h" 50 51 52 //#define TRACE_VM 53 //#define TRACE_FAULTS 54 #ifdef TRACE_VM 55 # define TRACE(x) dprintf x 56 #else 57 # define TRACE(x) ; 58 #endif 59 #ifdef TRACE_FAULTS 60 # define FTRACE(x) dprintf x 61 #else 62 # define FTRACE(x) ; 63 #endif 64 65 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1)) 66 #define ROUNDOWN(a, b) (((a) / (b)) * (b)) 67 68 69 class AddressSpaceReadLocker { 70 public: 71 AddressSpaceReadLocker(team_id team); 72 AddressSpaceReadLocker(vm_address_space* space, bool getNewReference); 73 AddressSpaceReadLocker(); 74 ~AddressSpaceReadLocker(); 75 76 status_t SetTo(team_id team); 77 void SetTo(vm_address_space* space, bool getNewReference); 78 status_t SetFromArea(area_id areaID, vm_area*& area); 79 80 bool IsLocked() const { return fLocked; } 81 void Unlock(); 82 83 void Unset(); 84 85 vm_address_space* AddressSpace() { return fSpace; } 86 87 private: 88 vm_address_space* fSpace; 89 bool fLocked; 90 }; 91 92 class AddressSpaceWriteLocker { 93 public: 94 AddressSpaceWriteLocker(team_id team); 95 AddressSpaceWriteLocker(); 96 ~AddressSpaceWriteLocker(); 97 98 status_t SetTo(team_id team); 99 status_t SetFromArea(area_id areaID, vm_area*& area); 100 status_t SetFromArea(team_id team, area_id areaID, bool allowKernel, 101 vm_area*& area); 102 status_t SetFromArea(team_id team, area_id areaID, vm_area*& area); 103 104 bool IsLocked() const { return fLocked; } 105 void Unlock(); 106 107 void DegradeToReadLock(); 108 void Unset(); 109 110 vm_address_space* AddressSpace() { return fSpace; } 111 112 private: 113 vm_address_space* fSpace; 114 bool fLocked; 115 bool fDegraded; 116 }; 117 118 class MultiAddressSpaceLocker { 119 public: 120 MultiAddressSpaceLocker(); 121 ~MultiAddressSpaceLocker(); 122 123 inline status_t AddTeam(team_id team, bool writeLock, 124 vm_address_space** _space = NULL); 125 inline status_t AddArea(area_id area, bool writeLock, 126 vm_address_space** _space = NULL); 127 128 status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne, 129 bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL, 130 bool checkNoCacheChange = false); 131 132 status_t Lock(); 133 void Unlock(); 134 bool IsLocked() const { return fLocked; } 135 136 void Unset(); 137 138 private: 139 struct lock_item { 140 vm_address_space* space; 141 bool write_lock; 142 }; 143 144 bool _ResizeIfNeeded(); 145 int32 _IndexOfAddressSpace(vm_address_space* space) const; 146 status_t _AddAddressSpace(vm_address_space* space, bool writeLock, 147 vm_address_space** _space); 148 149 static int _CompareItems(const void* _a, const void* _b); 150 151 lock_item* fItems; 152 int32 fCapacity; 153 int32 fCount; 154 bool fLocked; 155 }; 156 157 158 class AreaCacheLocking { 159 public: 160 inline bool Lock(vm_cache* lockable) 161 { 162 return false; 163 } 164 165 inline void Unlock(vm_cache* lockable) 166 { 167 vm_area_put_locked_cache(lockable); 168 } 169 }; 170 171 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> { 172 public: 173 inline AreaCacheLocker(vm_cache* cache = NULL) 174 : AutoLocker<vm_cache, AreaCacheLocking>(cache, true) 175 { 176 } 177 178 inline AreaCacheLocker(vm_area* area) 179 : AutoLocker<vm_cache, AreaCacheLocking>() 180 { 181 SetTo(area); 182 } 183 184 inline void SetTo(vm_area* area) 185 { 186 return AutoLocker<vm_cache, AreaCacheLocking>::SetTo( 187 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 188 } 189 }; 190 191 192 #define AREA_HASH_TABLE_SIZE 1024 193 static area_id sNextAreaID = 1; 194 static hash_table *sAreaHash; 195 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash"); 196 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings"); 197 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache"); 198 199 static off_t sAvailableMemory; 200 static off_t sNeededMemory; 201 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 202 static uint32 sPageFaults; 203 204 #if DEBUG_CACHE_LIST 205 206 struct cache_info { 207 vm_cache* cache; 208 addr_t page_count; 209 addr_t committed; 210 }; 211 212 static const int kCacheInfoTableCount = 100 * 1024; 213 static cache_info* sCacheInfoTable; 214 215 #endif // DEBUG_CACHE_LIST 216 217 218 // function declarations 219 static void delete_area(vm_address_space *addressSpace, vm_area *area); 220 static vm_address_space *get_address_space_by_area_id(area_id id); 221 static status_t vm_soft_fault(vm_address_space *addressSpace, addr_t address, 222 bool isWrite, bool isUser); 223 static status_t map_backing_store(vm_address_space *addressSpace, 224 vm_cache *cache, void **_virtualAddress, off_t offset, addr_t size, 225 uint32 addressSpec, int wiring, int protection, int mapping, 226 vm_area **_area, const char *areaName, bool unmapAddressRange, bool kernel); 227 228 229 // #pragma mark - 230 231 232 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team) 233 : 234 fSpace(NULL), 235 fLocked(false) 236 { 237 SetTo(team); 238 } 239 240 241 /*! Takes over the reference of the address space, if \a getNewReference is 242 \c false. 243 */ 244 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space, 245 bool getNewReference) 246 : 247 fSpace(NULL), 248 fLocked(false) 249 { 250 SetTo(space, getNewReference); 251 } 252 253 254 AddressSpaceReadLocker::AddressSpaceReadLocker() 255 : 256 fSpace(NULL), 257 fLocked(false) 258 { 259 } 260 261 262 AddressSpaceReadLocker::~AddressSpaceReadLocker() 263 { 264 Unset(); 265 } 266 267 268 void 269 AddressSpaceReadLocker::Unset() 270 { 271 Unlock(); 272 if (fSpace != NULL) 273 vm_put_address_space(fSpace); 274 } 275 276 277 status_t 278 AddressSpaceReadLocker::SetTo(team_id team) 279 { 280 fSpace = vm_get_address_space(team); 281 if (fSpace == NULL) 282 return B_BAD_TEAM_ID; 283 284 rw_lock_read_lock(&fSpace->lock); 285 fLocked = true; 286 return B_OK; 287 } 288 289 290 /*! Takes over the reference of the address space, if \a getNewReference is 291 \c false. 292 */ 293 void 294 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference) 295 { 296 fSpace = space; 297 298 if (getNewReference) 299 atomic_add(&fSpace->ref_count, 1); 300 301 rw_lock_read_lock(&fSpace->lock); 302 fLocked = true; 303 } 304 305 306 status_t 307 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area) 308 { 309 fSpace = get_address_space_by_area_id(areaID); 310 if (fSpace == NULL) 311 return B_BAD_TEAM_ID; 312 313 rw_lock_read_lock(&fSpace->lock); 314 315 rw_lock_read_lock(&sAreaHashLock); 316 area = (vm_area *)hash_lookup(sAreaHash, &areaID); 317 rw_lock_read_unlock(&sAreaHashLock); 318 319 if (area == NULL || area->address_space != fSpace) { 320 rw_lock_read_unlock(&fSpace->lock); 321 return B_BAD_VALUE; 322 } 323 324 fLocked = true; 325 return B_OK; 326 } 327 328 329 void 330 AddressSpaceReadLocker::Unlock() 331 { 332 if (fLocked) { 333 rw_lock_read_unlock(&fSpace->lock); 334 fLocked = false; 335 } 336 } 337 338 339 // #pragma mark - 340 341 342 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team) 343 : 344 fSpace(NULL), 345 fLocked(false), 346 fDegraded(false) 347 { 348 SetTo(team); 349 } 350 351 352 AddressSpaceWriteLocker::AddressSpaceWriteLocker() 353 : 354 fSpace(NULL), 355 fLocked(false), 356 fDegraded(false) 357 { 358 } 359 360 361 AddressSpaceWriteLocker::~AddressSpaceWriteLocker() 362 { 363 Unset(); 364 } 365 366 367 void 368 AddressSpaceWriteLocker::Unset() 369 { 370 Unlock(); 371 if (fSpace != NULL) 372 vm_put_address_space(fSpace); 373 } 374 375 376 status_t 377 AddressSpaceWriteLocker::SetTo(team_id team) 378 { 379 fSpace = vm_get_address_space(team); 380 if (fSpace == NULL) 381 return B_BAD_TEAM_ID; 382 383 rw_lock_write_lock(&fSpace->lock); 384 fLocked = true; 385 return B_OK; 386 } 387 388 389 status_t 390 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area) 391 { 392 fSpace = get_address_space_by_area_id(areaID); 393 if (fSpace == NULL) 394 return B_BAD_VALUE; 395 396 rw_lock_write_lock(&fSpace->lock); 397 398 rw_lock_read_lock(&sAreaHashLock); 399 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 400 rw_lock_read_unlock(&sAreaHashLock); 401 402 if (area == NULL || area->address_space != fSpace) { 403 rw_lock_write_unlock(&fSpace->lock); 404 return B_BAD_VALUE; 405 } 406 407 fLocked = true; 408 return B_OK; 409 } 410 411 412 status_t 413 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 414 bool allowKernel, vm_area*& area) 415 { 416 rw_lock_read_lock(&sAreaHashLock); 417 418 area = (vm_area *)hash_lookup(sAreaHash, &areaID); 419 if (area != NULL 420 && (area->address_space->id == team 421 || allowKernel && team == vm_kernel_address_space_id())) { 422 fSpace = area->address_space; 423 atomic_add(&fSpace->ref_count, 1); 424 } 425 426 rw_lock_read_unlock(&sAreaHashLock); 427 428 if (fSpace == NULL) 429 return B_BAD_VALUE; 430 431 // Second try to get the area -- this time with the address space 432 // write lock held 433 434 rw_lock_write_lock(&fSpace->lock); 435 436 rw_lock_read_lock(&sAreaHashLock); 437 area = (vm_area *)hash_lookup(sAreaHash, &areaID); 438 rw_lock_read_unlock(&sAreaHashLock); 439 440 if (area == NULL) { 441 rw_lock_write_unlock(&fSpace->lock); 442 return B_BAD_VALUE; 443 } 444 445 fLocked = true; 446 return B_OK; 447 } 448 449 450 status_t 451 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 452 vm_area*& area) 453 { 454 return SetFromArea(team, areaID, false, area); 455 } 456 457 458 void 459 AddressSpaceWriteLocker::Unlock() 460 { 461 if (fLocked) { 462 if (fDegraded) 463 rw_lock_read_unlock(&fSpace->lock); 464 else 465 rw_lock_write_unlock(&fSpace->lock); 466 fLocked = false; 467 fDegraded = false; 468 } 469 } 470 471 472 void 473 AddressSpaceWriteLocker::DegradeToReadLock() 474 { 475 // TODO: the current R/W lock implementation just keeps the write lock here 476 rw_lock_read_lock(&fSpace->lock); 477 rw_lock_write_unlock(&fSpace->lock); 478 fDegraded = true; 479 } 480 481 482 // #pragma mark - 483 484 485 MultiAddressSpaceLocker::MultiAddressSpaceLocker() 486 : 487 fItems(NULL), 488 fCapacity(0), 489 fCount(0), 490 fLocked(false) 491 { 492 } 493 494 495 MultiAddressSpaceLocker::~MultiAddressSpaceLocker() 496 { 497 Unset(); 498 free(fItems); 499 } 500 501 502 /*static*/ int 503 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b) 504 { 505 lock_item* a = (lock_item*)_a; 506 lock_item* b = (lock_item*)_b; 507 return a->space->id - b->space->id; 508 } 509 510 511 bool 512 MultiAddressSpaceLocker::_ResizeIfNeeded() 513 { 514 if (fCount == fCapacity) { 515 lock_item* items = (lock_item*)realloc(fItems, 516 (fCapacity + 4) * sizeof(lock_item)); 517 if (items == NULL) 518 return false; 519 520 fCapacity += 4; 521 fItems = items; 522 } 523 524 return true; 525 } 526 527 528 int32 529 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const 530 { 531 for (int32 i = 0; i < fCount; i++) { 532 if (fItems[i].space == space) 533 return i; 534 } 535 536 return -1; 537 } 538 539 540 status_t 541 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space, 542 bool writeLock, vm_address_space** _space) 543 { 544 if (!space) 545 return B_BAD_VALUE; 546 547 int32 index = _IndexOfAddressSpace(space); 548 if (index < 0) { 549 if (!_ResizeIfNeeded()) { 550 vm_put_address_space(space); 551 return B_NO_MEMORY; 552 } 553 554 lock_item& item = fItems[fCount++]; 555 item.space = space; 556 item.write_lock = writeLock; 557 } else { 558 559 // one reference is enough 560 vm_put_address_space(space); 561 562 fItems[index].write_lock |= writeLock; 563 } 564 565 if (_space != NULL) 566 *_space = space; 567 568 return B_OK; 569 } 570 571 572 inline status_t 573 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock, 574 vm_address_space** _space) 575 { 576 return _AddAddressSpace(vm_get_address_space(team), writeLock, 577 _space); 578 } 579 580 581 inline status_t 582 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock, 583 vm_address_space** _space) 584 { 585 return _AddAddressSpace(get_address_space_by_area_id(area), writeLock, 586 _space); 587 } 588 589 590 void 591 MultiAddressSpaceLocker::Unset() 592 { 593 Unlock(); 594 595 for (int32 i = 0; i < fCount; i++) 596 vm_put_address_space(fItems[i].space); 597 598 fCount = 0; 599 } 600 601 602 status_t 603 MultiAddressSpaceLocker::Lock() 604 { 605 ASSERT(!fLocked); 606 607 qsort(fItems, fCount, sizeof(lock_item), &_CompareItems); 608 609 for (int32 i = 0; i < fCount; i++) { 610 status_t status; 611 if (fItems[i].write_lock) 612 status = rw_lock_write_lock(&fItems[i].space->lock); 613 else 614 status = rw_lock_read_lock(&fItems[i].space->lock); 615 616 if (status < B_OK) { 617 while (--i >= 0) { 618 if (fItems[i].write_lock) 619 rw_lock_write_unlock(&fItems[i].space->lock); 620 else 621 rw_lock_read_unlock(&fItems[i].space->lock); 622 } 623 return status; 624 } 625 } 626 627 fLocked = true; 628 return B_OK; 629 } 630 631 632 void 633 MultiAddressSpaceLocker::Unlock() 634 { 635 if (!fLocked) 636 return; 637 638 for (int32 i = 0; i < fCount; i++) { 639 if (fItems[i].write_lock) 640 rw_lock_write_unlock(&fItems[i].space->lock); 641 else 642 rw_lock_read_unlock(&fItems[i].space->lock); 643 } 644 645 fLocked = false; 646 } 647 648 649 /*! Adds all address spaces of the areas associated with the given area's cache, 650 locks them, and locks the cache (including a reference to it). It retries 651 until the situation is stable (i.e. the neither cache nor cache's areas 652 changed) or an error occurs. If \c checkNoCacheChange ist \c true it does 653 not return until all areas' \c no_cache_change flags is clear. 654 */ 655 status_t 656 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID, 657 bool writeLockThisOne, bool writeLockOthers, vm_area*& _area, 658 vm_cache** _cache, bool checkNoCacheChange) 659 { 660 // remember the original state 661 int originalCount = fCount; 662 lock_item* originalItems = NULL; 663 if (fCount > 0) { 664 originalItems = new(nothrow) lock_item[fCount]; 665 if (originalItems == NULL) 666 return B_NO_MEMORY; 667 memcpy(originalItems, fItems, fCount * sizeof(lock_item)); 668 } 669 ArrayDeleter<lock_item> _(originalItems); 670 671 // get the cache 672 vm_cache* cache; 673 vm_area* area; 674 status_t error; 675 { 676 AddressSpaceReadLocker locker; 677 error = locker.SetFromArea(areaID, area); 678 if (error != B_OK) 679 return error; 680 681 cache = vm_area_get_locked_cache(area); 682 } 683 684 while (true) { 685 // add all areas 686 vm_area* firstArea = cache->areas; 687 for (vm_area* current = firstArea; current; 688 current = current->cache_next) { 689 error = AddArea(current->id, 690 current == area ? writeLockThisOne : writeLockOthers); 691 if (error != B_OK) { 692 vm_area_put_locked_cache(cache); 693 return error; 694 } 695 } 696 697 // unlock the cache and attempt to lock the address spaces 698 vm_area_put_locked_cache(cache); 699 700 error = Lock(); 701 if (error != B_OK) 702 return error; 703 704 // lock the cache again and check whether anything has changed 705 706 // check whether the area is gone in the meantime 707 rw_lock_read_lock(&sAreaHashLock); 708 area = (vm_area *)hash_lookup(sAreaHash, &areaID); 709 rw_lock_read_unlock(&sAreaHashLock); 710 711 if (area == NULL) { 712 Unlock(); 713 return B_BAD_VALUE; 714 } 715 716 // lock the cache 717 vm_cache* oldCache = cache; 718 cache = vm_area_get_locked_cache(area); 719 720 // If neither the area's cache has changed nor its area list we're 721 // done... 722 bool done = (cache == oldCache || firstArea == cache->areas); 723 724 // ... unless we're supposed to check the areas' "no_cache_change" flag 725 bool yield = false; 726 if (done && checkNoCacheChange) { 727 for (vm_area *tempArea = cache->areas; tempArea != NULL; 728 tempArea = tempArea->cache_next) { 729 if (tempArea->no_cache_change) { 730 done = false; 731 yield = true; 732 break; 733 } 734 } 735 } 736 737 // If everything looks dandy, return the values. 738 if (done) { 739 _area = area; 740 if (_cache != NULL) 741 *_cache = cache; 742 return B_OK; 743 } 744 745 // Restore the original state and try again. 746 747 // Unlock the address spaces, but keep the cache locked for the next 748 // iteration. 749 Unlock(); 750 751 // Get an additional reference to the original address spaces. 752 for (int32 i = 0; i < originalCount; i++) 753 atomic_add(&originalItems[i].space->ref_count, 1); 754 755 // Release all references to the current address spaces. 756 for (int32 i = 0; i < fCount; i++) 757 vm_put_address_space(fItems[i].space); 758 759 // Copy over the original state. 760 fCount = originalCount; 761 if (originalItems != NULL) 762 memcpy(fItems, originalItems, fCount * sizeof(lock_item)); 763 764 if (yield) 765 thread_yield(true); 766 } 767 } 768 769 770 // #pragma mark - 771 772 773 #if VM_PAGE_FAULT_TRACING 774 775 namespace VMPageFaultTracing { 776 777 class PageFaultStart : public AbstractTraceEntry { 778 public: 779 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 780 : 781 fAddress(address), 782 fPC(pc), 783 fWrite(write), 784 fUser(user) 785 { 786 Initialized(); 787 } 788 789 virtual void AddDump(TraceOutput& out) 790 { 791 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 792 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 793 } 794 795 private: 796 addr_t fAddress; 797 addr_t fPC; 798 bool fWrite; 799 bool fUser; 800 }; 801 802 803 // page fault errors 804 enum { 805 PAGE_FAULT_ERROR_NO_AREA = 0, 806 PAGE_FAULT_ERROR_KERNEL_ONLY, 807 PAGE_FAULT_ERROR_READ_ONLY, 808 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 809 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 810 }; 811 812 813 class PageFaultError : public AbstractTraceEntry { 814 public: 815 PageFaultError(area_id area, status_t error) 816 : 817 fArea(area), 818 fError(error) 819 { 820 Initialized(); 821 } 822 823 virtual void AddDump(TraceOutput& out) 824 { 825 switch (fError) { 826 case PAGE_FAULT_ERROR_NO_AREA: 827 out.Print("page fault error: no area"); 828 break; 829 case PAGE_FAULT_ERROR_KERNEL_ONLY: 830 out.Print("page fault error: area: %ld, kernel only", fArea); 831 break; 832 case PAGE_FAULT_ERROR_READ_ONLY: 833 out.Print("page fault error: area: %ld, read only", fArea); 834 break; 835 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 836 out.Print("page fault error: kernel touching bad user memory"); 837 break; 838 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 839 out.Print("page fault error: no address space"); 840 break; 841 default: 842 out.Print("page fault error: area: %ld, error: %s", fArea, 843 strerror(fError)); 844 break; 845 } 846 } 847 848 private: 849 area_id fArea; 850 status_t fError; 851 }; 852 853 854 class PageFaultDone : public AbstractTraceEntry { 855 public: 856 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 857 vm_page* page) 858 : 859 fArea(area), 860 fTopCache(topCache), 861 fCache(cache), 862 fPage(page) 863 { 864 Initialized(); 865 } 866 867 virtual void AddDump(TraceOutput& out) 868 { 869 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 870 "page: %p", fArea, fTopCache, fCache, fPage); 871 } 872 873 private: 874 area_id fArea; 875 VMCache* fTopCache; 876 VMCache* fCache; 877 vm_page* fPage; 878 }; 879 880 } // namespace VMPageFaultTracing 881 882 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 883 #else 884 # define TPF(x) ; 885 #endif // VM_PAGE_FAULT_TRACING 886 887 888 // #pragma mark - 889 890 891 static int 892 area_compare(void *_area, const void *key) 893 { 894 vm_area *area = (vm_area *)_area; 895 const area_id *id = (const area_id *)key; 896 897 if (area->id == *id) 898 return 0; 899 900 return -1; 901 } 902 903 904 static uint32 905 area_hash(void *_area, const void *key, uint32 range) 906 { 907 vm_area *area = (vm_area *)_area; 908 const area_id *id = (const area_id *)key; 909 910 if (area != NULL) 911 return area->id % range; 912 913 return (uint32)*id % range; 914 } 915 916 917 static vm_address_space * 918 get_address_space_by_area_id(area_id id) 919 { 920 vm_address_space* addressSpace = NULL; 921 922 rw_lock_read_lock(&sAreaHashLock); 923 924 vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id); 925 if (area != NULL) { 926 addressSpace = area->address_space; 927 atomic_add(&addressSpace->ref_count, 1); 928 } 929 930 rw_lock_read_unlock(&sAreaHashLock); 931 932 return addressSpace; 933 } 934 935 936 //! You need to have the address space locked when calling this function 937 static vm_area * 938 lookup_area(vm_address_space* addressSpace, area_id id) 939 { 940 rw_lock_read_lock(&sAreaHashLock); 941 942 vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id); 943 if (area != NULL && area->address_space != addressSpace) 944 area = NULL; 945 946 rw_lock_read_unlock(&sAreaHashLock); 947 948 return area; 949 } 950 951 952 static vm_area * 953 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags) 954 { 955 vm_area *reserved = (vm_area *)malloc_nogrow(sizeof(vm_area)); 956 if (reserved == NULL) 957 return NULL; 958 959 memset(reserved, 0, sizeof(vm_area)); 960 reserved->id = RESERVED_AREA_ID; 961 // this marks it as reserved space 962 reserved->protection = flags; 963 reserved->address_space = addressSpace; 964 965 return reserved; 966 } 967 968 969 static vm_area * 970 create_area_struct(vm_address_space *addressSpace, const char *name, 971 uint32 wiring, uint32 protection) 972 { 973 // restrict the area name to B_OS_NAME_LENGTH 974 size_t length = strlen(name) + 1; 975 if (length > B_OS_NAME_LENGTH) 976 length = B_OS_NAME_LENGTH; 977 978 vm_area *area = (vm_area *)malloc_nogrow(sizeof(vm_area)); 979 if (area == NULL) 980 return NULL; 981 982 area->name = (char *)malloc_nogrow(length); 983 if (area->name == NULL) { 984 free(area); 985 return NULL; 986 } 987 strlcpy(area->name, name, length); 988 989 area->id = atomic_add(&sNextAreaID, 1); 990 area->base = 0; 991 area->size = 0; 992 area->protection = protection; 993 area->wiring = wiring; 994 area->memory_type = 0; 995 996 area->cache = NULL; 997 area->no_cache_change = 0; 998 area->cache_offset = 0; 999 1000 area->address_space = addressSpace; 1001 area->address_space_next = NULL; 1002 area->cache_next = area->cache_prev = NULL; 1003 area->hash_next = NULL; 1004 new (&area->mappings) vm_area_mappings; 1005 area->page_protections = NULL; 1006 1007 return area; 1008 } 1009 1010 1011 /** Finds a reserved area that covers the region spanned by \a start and 1012 * \a size, inserts the \a area into that region and makes sure that 1013 * there are reserved regions for the remaining parts. 1014 */ 1015 1016 static status_t 1017 find_reserved_area(vm_address_space *addressSpace, addr_t start, 1018 addr_t size, vm_area *area) 1019 { 1020 vm_area *next, *last = NULL; 1021 1022 next = addressSpace->areas; 1023 while (next) { 1024 if (next->base <= start && next->base + next->size >= start + size) { 1025 // this area covers the requested range 1026 if (next->id != RESERVED_AREA_ID) { 1027 // but it's not reserved space, it's a real area 1028 return B_BAD_VALUE; 1029 } 1030 1031 break; 1032 } 1033 last = next; 1034 next = next->address_space_next; 1035 } 1036 if (next == NULL) 1037 return B_ENTRY_NOT_FOUND; 1038 1039 // now we have to transfer the requested part of the reserved 1040 // range to the new area - and remove, resize or split the old 1041 // reserved area. 1042 1043 if (start == next->base) { 1044 // the area starts at the beginning of the reserved range 1045 if (last) 1046 last->address_space_next = area; 1047 else 1048 addressSpace->areas = area; 1049 1050 if (size == next->size) { 1051 // the new area fully covers the reversed range 1052 area->address_space_next = next->address_space_next; 1053 vm_put_address_space(addressSpace); 1054 free(next); 1055 } else { 1056 // resize the reserved range behind the area 1057 area->address_space_next = next; 1058 next->base += size; 1059 next->size -= size; 1060 } 1061 } else if (start + size == next->base + next->size) { 1062 // the area is at the end of the reserved range 1063 area->address_space_next = next->address_space_next; 1064 next->address_space_next = area; 1065 1066 // resize the reserved range before the area 1067 next->size = start - next->base; 1068 } else { 1069 // the area splits the reserved range into two separate ones 1070 // we need a new reserved area to cover this space 1071 vm_area *reserved = create_reserved_area_struct(addressSpace, 1072 next->protection); 1073 if (reserved == NULL) 1074 return B_NO_MEMORY; 1075 1076 atomic_add(&addressSpace->ref_count, 1); 1077 reserved->address_space_next = next->address_space_next; 1078 area->address_space_next = reserved; 1079 next->address_space_next = area; 1080 1081 // resize regions 1082 reserved->size = next->base + next->size - start - size; 1083 next->size = start - next->base; 1084 reserved->base = start + size; 1085 reserved->cache_offset = next->cache_offset; 1086 } 1087 1088 area->base = start; 1089 area->size = size; 1090 addressSpace->change_count++; 1091 1092 return B_OK; 1093 } 1094 1095 1096 /*! Must be called with this address space's sem held */ 1097 static status_t 1098 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start, 1099 addr_t size, addr_t end, uint32 addressSpec, vm_area *area) 1100 { 1101 vm_area *last = NULL; 1102 vm_area *next; 1103 bool foundSpot = false; 1104 1105 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 1106 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 1107 size, end, addressSpec, area)); 1108 1109 // do some sanity checking 1110 if (start < addressSpace->base || size == 0 1111 || (end - 1) > (addressSpace->base + (addressSpace->size - 1)) 1112 || start + size > end) 1113 return B_BAD_ADDRESS; 1114 1115 if (addressSpec == B_EXACT_ADDRESS) { 1116 // search for a reserved area 1117 status_t status = find_reserved_area(addressSpace, start, size, area); 1118 if (status == B_OK || status == B_BAD_VALUE) 1119 return status; 1120 1121 // There was no reserved area, and the slot doesn't seem to be used 1122 // already 1123 // ToDo: this could be further optimized. 1124 } 1125 1126 size_t alignment = B_PAGE_SIZE; 1127 if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) { 1128 // align the memory to the next power of two of the size 1129 while (alignment < size) 1130 alignment <<= 1; 1131 } 1132 1133 start = ROUNDUP(start, alignment); 1134 1135 // walk up to the spot where we should start searching 1136 second_chance: 1137 next = addressSpace->areas; 1138 while (next) { 1139 if (next->base >= start + size) { 1140 // we have a winner 1141 break; 1142 } 1143 last = next; 1144 next = next->address_space_next; 1145 } 1146 1147 // find the right spot depending on the address specification - the area 1148 // will be inserted directly after "last" ("next" is not referenced anymore) 1149 1150 switch (addressSpec) { 1151 case B_ANY_ADDRESS: 1152 case B_ANY_KERNEL_ADDRESS: 1153 case B_ANY_KERNEL_BLOCK_ADDRESS: 1154 // find a hole big enough for a new area 1155 if (!last) { 1156 // see if we can build it at the beginning of the virtual map 1157 if (!next || (next->base >= ROUNDUP(addressSpace->base, 1158 alignment) + size)) { 1159 foundSpot = true; 1160 area->base = ROUNDUP(addressSpace->base, alignment); 1161 break; 1162 } 1163 last = next; 1164 next = next->address_space_next; 1165 } 1166 // keep walking 1167 while (next) { 1168 if (next->base >= ROUNDUP(last->base + last->size, alignment) 1169 + size) { 1170 // we found a spot (it'll be filled up below) 1171 break; 1172 } 1173 last = next; 1174 next = next->address_space_next; 1175 } 1176 1177 if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP( 1178 last->base + last->size, alignment) + (size - 1))) { 1179 // got a spot 1180 foundSpot = true; 1181 area->base = ROUNDUP(last->base + last->size, alignment); 1182 break; 1183 } else { 1184 // We didn't find a free spot - if there were any reserved areas 1185 // with the RESERVED_AVOID_BASE flag set, we can now test those 1186 // for free space 1187 // ToDo: it would make sense to start with the biggest of them 1188 next = addressSpace->areas; 1189 last = NULL; 1190 for (last = NULL; next; next = next->address_space_next, 1191 last = next) { 1192 // ToDo: take free space after the reserved area into account! 1193 if (next->base == ROUNDUP(next->base, alignment) 1194 && next->size == size) { 1195 // The reserved area is entirely covered, and thus, 1196 // removed 1197 if (last) 1198 last->address_space_next = next->address_space_next; 1199 else 1200 addressSpace->areas = next->address_space_next; 1201 1202 foundSpot = true; 1203 area->base = next->base; 1204 free(next); 1205 break; 1206 } 1207 if (next->size - (ROUNDUP(next->base, alignment) 1208 - next->base) >= size) { 1209 // The new area will be placed at the end of the 1210 // reserved area, and the reserved area will be resized 1211 // to make space 1212 foundSpot = true; 1213 next->size -= size; 1214 last = next; 1215 area->base = next->base + next->size; 1216 break; 1217 } 1218 } 1219 } 1220 break; 1221 1222 case B_BASE_ADDRESS: 1223 // find a hole big enough for a new area beginning with "start" 1224 if (!last) { 1225 // see if we can build it at the beginning of the specified start 1226 if (!next || (next->base >= start + size)) { 1227 foundSpot = true; 1228 area->base = start; 1229 break; 1230 } 1231 last = next; 1232 next = next->address_space_next; 1233 } 1234 // keep walking 1235 while (next) { 1236 if (next->base >= last->base + last->size + size) { 1237 // we found a spot (it'll be filled up below) 1238 break; 1239 } 1240 last = next; 1241 next = next->address_space_next; 1242 } 1243 1244 if ((addressSpace->base + (addressSpace->size - 1)) 1245 >= (last->base + last->size + (size - 1))) { 1246 // got a spot 1247 foundSpot = true; 1248 if (last->base + last->size <= start) 1249 area->base = start; 1250 else 1251 area->base = last->base + last->size; 1252 break; 1253 } 1254 // we didn't find a free spot in the requested range, so we'll 1255 // try again without any restrictions 1256 start = addressSpace->base; 1257 addressSpec = B_ANY_ADDRESS; 1258 last = NULL; 1259 goto second_chance; 1260 1261 case B_EXACT_ADDRESS: 1262 // see if we can create it exactly here 1263 if (!last) { 1264 if (!next || (next->base >= start + size)) { 1265 foundSpot = true; 1266 area->base = start; 1267 break; 1268 } 1269 } else { 1270 if (next) { 1271 if (last->base + last->size <= start && next->base >= start + size) { 1272 foundSpot = true; 1273 area->base = start; 1274 break; 1275 } 1276 } else { 1277 if ((last->base + (last->size - 1)) <= start - 1) { 1278 foundSpot = true; 1279 area->base = start; 1280 } 1281 } 1282 } 1283 break; 1284 default: 1285 return B_BAD_VALUE; 1286 } 1287 1288 if (!foundSpot) 1289 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 1290 1291 area->size = size; 1292 if (last) { 1293 area->address_space_next = last->address_space_next; 1294 last->address_space_next = area; 1295 } else { 1296 area->address_space_next = addressSpace->areas; 1297 addressSpace->areas = area; 1298 } 1299 addressSpace->change_count++; 1300 return B_OK; 1301 } 1302 1303 1304 /*! This inserts the area you pass into the specified address space. 1305 It will also set the "_address" argument to its base address when 1306 the call succeeds. 1307 You need to hold the vm_address_space semaphore. 1308 */ 1309 static status_t 1310 insert_area(vm_address_space *addressSpace, void **_address, 1311 uint32 addressSpec, addr_t size, vm_area *area) 1312 { 1313 addr_t searchBase, searchEnd; 1314 status_t status; 1315 1316 switch (addressSpec) { 1317 case B_EXACT_ADDRESS: 1318 searchBase = (addr_t)*_address; 1319 searchEnd = (addr_t)*_address + size; 1320 break; 1321 1322 case B_BASE_ADDRESS: 1323 searchBase = (addr_t)*_address; 1324 searchEnd = addressSpace->base + (addressSpace->size - 1); 1325 break; 1326 1327 case B_ANY_ADDRESS: 1328 case B_ANY_KERNEL_ADDRESS: 1329 case B_ANY_KERNEL_BLOCK_ADDRESS: 1330 searchBase = addressSpace->base; 1331 // TODO: remove this again when vm86 mode is moved into the kernel 1332 // completely (currently needs a userland address space!) 1333 if (searchBase == USER_BASE) 1334 searchBase = USER_BASE_ANY; 1335 searchEnd = addressSpace->base + (addressSpace->size - 1); 1336 break; 1337 1338 default: 1339 return B_BAD_VALUE; 1340 } 1341 1342 status = find_and_insert_area_slot(addressSpace, searchBase, size, 1343 searchEnd, addressSpec, area); 1344 if (status == B_OK) { 1345 // ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS 1346 // vs. B_ANY_KERNEL_BLOCK_ADDRESS here? 1347 *_address = (void *)area->base; 1348 } 1349 1350 return status; 1351 } 1352 1353 1354 static inline void 1355 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection) 1356 { 1357 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 1358 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1359 uint8& entry = area->page_protections[pageIndex / 2]; 1360 if (pageIndex % 2 == 0) 1361 entry = entry & 0xf0 | protection; 1362 else 1363 entry = entry & 0x0f | (protection << 4); 1364 } 1365 1366 1367 static inline uint32 1368 get_area_page_protection(vm_area* area, addr_t pageAddress) 1369 { 1370 if (area->page_protections == NULL) 1371 return area->protection; 1372 1373 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1374 uint32 protection = area->page_protections[pageIndex / 2]; 1375 if (pageIndex % 2 == 0) 1376 protection &= 0x0f; 1377 else 1378 protection >>= 4; 1379 1380 return protection | B_KERNEL_READ_AREA 1381 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1382 } 1383 1384 1385 /*! Cuts a piece out of an area. If the given cut range covers the complete 1386 area, it is deleted. If it covers the beginning or the end, the area is 1387 resized accordingly. If the range covers some part in the middle of the 1388 area, it is split in two; in this case the second area is returned via 1389 \a _secondArea (the variable is left untouched in the other cases). 1390 The address space must be write locked. 1391 */ 1392 static status_t 1393 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address, 1394 addr_t lastAddress, vm_area** _secondArea, bool kernel) 1395 { 1396 // Does the cut range intersect with the area at all? 1397 addr_t areaLast = area->base + (area->size - 1); 1398 if (area->base > lastAddress || areaLast < address) 1399 return B_OK; 1400 1401 // Is the area fully covered? 1402 if (area->base >= address && areaLast <= lastAddress) { 1403 delete_area(addressSpace, area); 1404 return B_OK; 1405 } 1406 1407 AreaCacheLocker cacheLocker(area); 1408 vm_cache* cache = area->cache; 1409 1410 // Cut the end only? 1411 if (areaLast <= lastAddress) { 1412 addr_t newSize = address - area->base; 1413 1414 // unmap pages 1415 vm_unmap_pages(area, address, area->size - newSize, false); 1416 1417 // If no one else uses the area's cache, we can resize it, too. 1418 if (cache->areas == area && area->cache_next == NULL 1419 && list_is_empty(&cache->consumers)) { 1420 status_t error = cache->Resize(cache->virtual_base + newSize); 1421 if (error != B_OK) 1422 return error; 1423 } 1424 1425 area->size = newSize; 1426 1427 return B_OK; 1428 } 1429 1430 // Cut the beginning only? 1431 if (area->base >= address) { 1432 addr_t newBase = lastAddress + 1; 1433 addr_t newSize = areaLast - lastAddress; 1434 1435 // unmap pages 1436 vm_unmap_pages(area, area->base, newBase - area->base, false); 1437 1438 // TODO: If no one else uses the area's cache, we should resize it, too! 1439 1440 area->cache_offset += newBase - area->base; 1441 area->base = newBase; 1442 area->size = newSize; 1443 1444 return B_OK; 1445 } 1446 1447 // The tough part -- cut a piece out of the middle of the area. 1448 // We do that by shrinking the area to the begin section and creating a 1449 // new area for the end section. 1450 1451 addr_t firstNewSize = address - area->base; 1452 addr_t secondBase = lastAddress + 1; 1453 addr_t secondSize = areaLast - lastAddress; 1454 1455 // unmap pages 1456 vm_unmap_pages(area, address, area->size - firstNewSize, false); 1457 1458 // resize the area 1459 addr_t oldSize = area->size; 1460 area->size = firstNewSize; 1461 1462 // TODO: If no one else uses the area's cache, we might want to create a 1463 // new cache for the second area, transfer the concerned pages from the 1464 // first cache to it and resize the first cache. 1465 1466 // map the second area 1467 vm_area* secondArea; 1468 void* secondBaseAddress = (void*)secondBase; 1469 status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress, 1470 area->cache_offset + (secondBase - area->base), secondSize, 1471 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 1472 &secondArea, area->name, false, kernel); 1473 if (error != B_OK) { 1474 area->size = oldSize; 1475 return error; 1476 } 1477 1478 // We need a cache reference for the new area. 1479 cache->AcquireRefLocked(); 1480 1481 if (_secondArea != NULL) 1482 *_secondArea = secondArea; 1483 1484 return B_OK; 1485 } 1486 1487 1488 static inline void 1489 increment_page_wired_count(vm_page* page) 1490 { 1491 // TODO: needs to be atomic on all platforms! 1492 // ... but at least the check isn't. Consequently we should hold 1493 // sMappingLock, which would allows us to even avoid atomic_add() on 1494 // gMappedPagesCount. 1495 if (page->wired_count++ == 0) { 1496 if (page->mappings.IsEmpty()) 1497 atomic_add(&gMappedPagesCount, 1); 1498 } 1499 } 1500 1501 1502 static inline void 1503 decrement_page_wired_count(vm_page* page) 1504 { 1505 if (--page->wired_count == 0) { 1506 // TODO: needs to be atomic on all platforms! 1507 // See above! 1508 if (page->mappings.IsEmpty()) 1509 atomic_add(&gMappedPagesCount, -1); 1510 } 1511 } 1512 1513 1514 /*! Deletes all areas in the given address range. 1515 The address space must be write-locked. 1516 */ 1517 static status_t 1518 unmap_address_range(vm_address_space *addressSpace, addr_t address, addr_t size, 1519 bool kernel) 1520 { 1521 size = PAGE_ALIGN(size); 1522 addr_t lastAddress = address + (size - 1); 1523 1524 // Check, whether the caller is allowed to modify the concerned areas. 1525 vm_area* area; 1526 if (!kernel) { 1527 area = addressSpace->areas; 1528 while (area != NULL) { 1529 vm_area* nextArea = area->address_space_next; 1530 1531 if (area->id != RESERVED_AREA_ID) { 1532 addr_t areaLast = area->base + (area->size - 1); 1533 if (area->base < lastAddress && address < areaLast) { 1534 if ((area->protection & B_KERNEL_AREA) != 0) 1535 return B_NOT_ALLOWED; 1536 } 1537 } 1538 1539 area = nextArea; 1540 } 1541 } 1542 1543 area = addressSpace->areas; 1544 while (area != NULL) { 1545 vm_area* nextArea = area->address_space_next; 1546 1547 if (area->id != RESERVED_AREA_ID) { 1548 addr_t areaLast = area->base + (area->size - 1); 1549 if (area->base < lastAddress && address < areaLast) { 1550 status_t error = cut_area(addressSpace, area, address, 1551 lastAddress, NULL, kernel); 1552 if (error != B_OK) 1553 return error; 1554 // Failing after already messing with areas is ugly, but we 1555 // can't do anything about it. 1556 } 1557 } 1558 1559 area = nextArea; 1560 } 1561 1562 return B_OK; 1563 } 1564 1565 1566 /*! You need to hold the lock of the cache and the write lock of the address 1567 space when calling this function. 1568 Note, that in case of error your cache will be temporarily unlocked. 1569 */ 1570 static status_t 1571 map_backing_store(vm_address_space *addressSpace, vm_cache *cache, 1572 void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 1573 int wiring, int protection, int mapping, vm_area **_area, 1574 const char *areaName, bool unmapAddressRange, bool kernel) 1575 { 1576 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n", 1577 addressSpace, cache, *_virtualAddress, offset, size, addressSpec, 1578 wiring, protection, _area, areaName)); 1579 cache->AssertLocked(); 1580 1581 vm_area *area = create_area_struct(addressSpace, areaName, wiring, 1582 protection); 1583 if (area == NULL) 1584 return B_NO_MEMORY; 1585 1586 status_t status; 1587 1588 // if this is a private map, we need to create a new cache 1589 // to handle the private copies of pages as they are written to 1590 vm_cache* sourceCache = cache; 1591 if (mapping == REGION_PRIVATE_MAP) { 1592 vm_cache *newCache; 1593 1594 // create an anonymous cache 1595 status = VMCacheFactory::CreateAnonymousCache(newCache, 1596 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true); 1597 if (status != B_OK) 1598 goto err1; 1599 1600 newCache->Lock(); 1601 newCache->temporary = 1; 1602 newCache->scan_skip = cache->scan_skip; 1603 newCache->virtual_base = offset; 1604 newCache->virtual_end = offset + size; 1605 1606 cache->AddConsumer(newCache); 1607 1608 cache = newCache; 1609 } 1610 1611 status = cache->SetMinimalCommitment(size); 1612 if (status != B_OK) 1613 goto err2; 1614 1615 // check to see if this address space has entered DELETE state 1616 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 1617 // okay, someone is trying to delete this address space now, so we can't 1618 // insert the area, so back out 1619 status = B_BAD_TEAM_ID; 1620 goto err2; 1621 } 1622 1623 if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) { 1624 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 1625 size, kernel); 1626 if (status != B_OK) 1627 goto err2; 1628 } 1629 1630 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 1631 if (status < B_OK) 1632 goto err2; 1633 1634 // attach the cache to the area 1635 area->cache = cache; 1636 area->cache_offset = offset; 1637 1638 // point the cache back to the area 1639 cache->InsertAreaLocked(area); 1640 if (mapping == REGION_PRIVATE_MAP) 1641 cache->Unlock(); 1642 1643 // insert the area in the global area hash table 1644 rw_lock_write_lock(&sAreaHashLock); 1645 hash_insert(sAreaHash, area); 1646 rw_lock_write_unlock(&sAreaHashLock); 1647 1648 // grab a ref to the address space (the area holds this) 1649 atomic_add(&addressSpace->ref_count, 1); 1650 1651 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1652 // cache, sourceCache, areaName, area); 1653 1654 *_area = area; 1655 return B_OK; 1656 1657 err2: 1658 if (mapping == REGION_PRIVATE_MAP) { 1659 // We created this cache, so we must delete it again. Note, that we 1660 // need to temporarily unlock the source cache or we'll otherwise 1661 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1662 sourceCache->Unlock(); 1663 cache->ReleaseRefAndUnlock(); 1664 sourceCache->Lock(); 1665 } 1666 err1: 1667 free(area->name); 1668 free(area); 1669 return status; 1670 } 1671 1672 1673 status_t 1674 vm_unreserve_address_range(team_id team, void *address, addr_t size) 1675 { 1676 AddressSpaceWriteLocker locker(team); 1677 if (!locker.IsLocked()) 1678 return B_BAD_TEAM_ID; 1679 1680 // check to see if this address space has entered DELETE state 1681 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1682 // okay, someone is trying to delete this address space now, so we can't 1683 // insert the area, so back out 1684 return B_BAD_TEAM_ID; 1685 } 1686 1687 // search area list and remove any matching reserved ranges 1688 1689 vm_area* area = locker.AddressSpace()->areas; 1690 vm_area* last = NULL; 1691 while (area) { 1692 // the area must be completely part of the reserved range 1693 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 1694 && area->base + area->size <= (addr_t)address + size) { 1695 // remove reserved range 1696 vm_area *reserved = area; 1697 if (last) 1698 last->address_space_next = reserved->address_space_next; 1699 else 1700 locker.AddressSpace()->areas = reserved->address_space_next; 1701 1702 area = reserved->address_space_next; 1703 vm_put_address_space(locker.AddressSpace()); 1704 free(reserved); 1705 continue; 1706 } 1707 1708 last = area; 1709 area = area->address_space_next; 1710 } 1711 1712 return B_OK; 1713 } 1714 1715 1716 status_t 1717 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec, 1718 addr_t size, uint32 flags) 1719 { 1720 if (size == 0) 1721 return B_BAD_VALUE; 1722 1723 AddressSpaceWriteLocker locker(team); 1724 if (!locker.IsLocked()) 1725 return B_BAD_TEAM_ID; 1726 1727 // check to see if this address space has entered DELETE state 1728 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1729 // okay, someone is trying to delete this address space now, so we 1730 // can't insert the area, let's back out 1731 return B_BAD_TEAM_ID; 1732 } 1733 1734 vm_area *area = create_reserved_area_struct(locker.AddressSpace(), flags); 1735 if (area == NULL) 1736 return B_NO_MEMORY; 1737 1738 status_t status = insert_area(locker.AddressSpace(), _address, addressSpec, 1739 size, area); 1740 if (status < B_OK) { 1741 free(area); 1742 return status; 1743 } 1744 1745 // the area is now reserved! 1746 1747 area->cache_offset = area->base; 1748 // we cache the original base address here 1749 1750 atomic_add(&locker.AddressSpace()->ref_count, 1); 1751 return B_OK; 1752 } 1753 1754 1755 area_id 1756 vm_create_anonymous_area(team_id team, const char *name, void **address, 1757 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 1758 uint32 flags, bool kernel) 1759 { 1760 vm_area *area; 1761 vm_cache *cache; 1762 vm_page *page = NULL; 1763 bool isStack = (protection & B_STACK_AREA) != 0; 1764 page_num_t guardPages; 1765 bool canOvercommit = false; 1766 addr_t physicalBase = 0; 1767 1768 TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size)); 1769 1770 size = PAGE_ALIGN(size); 1771 1772 if (size == 0) 1773 return B_BAD_VALUE; 1774 if (!arch_vm_supports_protection(protection)) 1775 return B_NOT_SUPPORTED; 1776 1777 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1778 canOvercommit = true; 1779 1780 #ifdef DEBUG_KERNEL_STACKS 1781 if ((protection & B_KERNEL_STACK_AREA) != 0) 1782 isStack = true; 1783 #endif 1784 1785 /* check parameters */ 1786 switch (addressSpec) { 1787 case B_ANY_ADDRESS: 1788 case B_EXACT_ADDRESS: 1789 case B_BASE_ADDRESS: 1790 case B_ANY_KERNEL_ADDRESS: 1791 case B_ANY_KERNEL_BLOCK_ADDRESS: 1792 break; 1793 case B_PHYSICAL_BASE_ADDRESS: 1794 physicalBase = (addr_t)*address; 1795 addressSpec = B_ANY_KERNEL_ADDRESS; 1796 break; 1797 1798 default: 1799 return B_BAD_VALUE; 1800 } 1801 1802 bool doReserveMemory = false; 1803 switch (wiring) { 1804 case B_NO_LOCK: 1805 break; 1806 case B_FULL_LOCK: 1807 case B_LAZY_LOCK: 1808 case B_CONTIGUOUS: 1809 doReserveMemory = true; 1810 break; 1811 case B_ALREADY_WIRED: 1812 break; 1813 case B_LOMEM: 1814 //case B_SLOWMEM: 1815 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 1816 wiring = B_FULL_LOCK; 1817 doReserveMemory = true; 1818 break; 1819 default: 1820 return B_BAD_VALUE; 1821 } 1822 1823 // For full lock or contiguous areas we're also going to map the pages and 1824 // thus need to reserve pages for the mapping backend upfront. 1825 addr_t reservedMapPages = 0; 1826 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1827 AddressSpaceWriteLocker locker; 1828 status_t status = locker.SetTo(team); 1829 if (status != B_OK) 1830 return status; 1831 1832 vm_translation_map *map = &locker.AddressSpace()->translation_map; 1833 reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 1834 } 1835 1836 // Reserve memory before acquiring the address space lock. This reduces the 1837 // chances of failure, since while holding the write lock to the address 1838 // space (if it is the kernel address space that is), the low memory handler 1839 // won't be able to free anything for us. 1840 addr_t reservedMemory = 0; 1841 if (doReserveMemory) { 1842 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1843 if (vm_try_reserve_memory(size, timeout) != B_OK) 1844 return B_NO_MEMORY; 1845 reservedMemory = size; 1846 // TODO: We don't reserve the memory for the pages for the page 1847 // directories/tables. We actually need to do since we currently don't 1848 // reclaim them (and probably can't reclaim all of them anyway). Thus 1849 // there are actually less physical pages than there should be, which 1850 // can get the VM into trouble in low memory situations. 1851 } 1852 1853 AddressSpaceWriteLocker locker; 1854 vm_address_space *addressSpace; 1855 status_t status; 1856 1857 // For full lock areas reserve the pages before locking the address 1858 // space. E.g. block caches can't release their memory while we hold the 1859 // address space lock. 1860 page_num_t reservedPages = reservedMapPages; 1861 if (wiring == B_FULL_LOCK) 1862 reservedPages += size / B_PAGE_SIZE; 1863 if (reservedPages > 0) { 1864 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1865 if (!vm_page_try_reserve_pages(reservedPages)) { 1866 reservedPages = 0; 1867 status = B_WOULD_BLOCK; 1868 goto err0; 1869 } 1870 } else 1871 vm_page_reserve_pages(reservedPages); 1872 } 1873 1874 status = locker.SetTo(team); 1875 if (status != B_OK) 1876 goto err0; 1877 1878 addressSpace = locker.AddressSpace(); 1879 1880 if (wiring == B_CONTIGUOUS) { 1881 // we try to allocate the page run here upfront as this may easily 1882 // fail for obvious reasons 1883 page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, physicalBase, 1884 size / B_PAGE_SIZE); 1885 if (page == NULL) { 1886 status = B_NO_MEMORY; 1887 goto err0; 1888 } 1889 } 1890 1891 // create an anonymous cache 1892 // if it's a stack, make sure that two pages are available at least 1893 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1894 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1895 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1896 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1897 wiring == B_NO_LOCK); 1898 if (status != B_OK) 1899 goto err1; 1900 1901 cache->temporary = 1; 1902 cache->virtual_end = size; 1903 cache->committed_size = reservedMemory; 1904 // TODO: This should be done via a method. 1905 reservedMemory = 0; 1906 1907 switch (wiring) { 1908 case B_LAZY_LOCK: 1909 case B_FULL_LOCK: 1910 case B_CONTIGUOUS: 1911 case B_ALREADY_WIRED: 1912 cache->scan_skip = 1; 1913 break; 1914 case B_NO_LOCK: 1915 cache->scan_skip = 0; 1916 break; 1917 } 1918 1919 cache->Lock(); 1920 1921 status = map_backing_store(addressSpace, cache, address, 0, size, 1922 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 1923 (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel); 1924 1925 if (status < B_OK) { 1926 cache->ReleaseRefAndUnlock(); 1927 goto err1; 1928 } 1929 1930 locker.DegradeToReadLock(); 1931 1932 switch (wiring) { 1933 case B_NO_LOCK: 1934 case B_LAZY_LOCK: 1935 // do nothing - the pages are mapped in as needed 1936 break; 1937 1938 case B_FULL_LOCK: 1939 { 1940 // Allocate and map all pages for this area 1941 1942 off_t offset = 0; 1943 for (addr_t address = area->base; address < area->base + (area->size - 1); 1944 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1945 #ifdef DEBUG_KERNEL_STACKS 1946 # ifdef STACK_GROWS_DOWNWARDS 1947 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 1948 * B_PAGE_SIZE) 1949 # else 1950 if (isStack && address >= area->base + area->size 1951 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1952 # endif 1953 continue; 1954 #endif 1955 vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 1956 cache->InsertPage(page, offset); 1957 vm_map_page(area, page, address, protection); 1958 1959 // Periodically unreserve pages we've already allocated, so that 1960 // we don't unnecessarily increase the pressure on the VM. 1961 if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) { 1962 page_num_t toUnreserve = 128; 1963 vm_page_unreserve_pages(toUnreserve); 1964 reservedPages -= toUnreserve; 1965 } 1966 } 1967 1968 break; 1969 } 1970 1971 case B_ALREADY_WIRED: 1972 { 1973 // the pages should already be mapped. This is only really useful during 1974 // boot time. Find the appropriate vm_page objects and stick them in 1975 // the cache object. 1976 vm_translation_map *map = &addressSpace->translation_map; 1977 off_t offset = 0; 1978 1979 if (!gKernelStartup) 1980 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1981 1982 map->ops->lock(map); 1983 1984 for (addr_t virtualAddress = area->base; virtualAddress < area->base 1985 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 1986 offset += B_PAGE_SIZE) { 1987 addr_t physicalAddress; 1988 uint32 flags; 1989 status = map->ops->query(map, virtualAddress, 1990 &physicalAddress, &flags); 1991 if (status < B_OK) { 1992 panic("looking up mapping failed for va 0x%lx\n", 1993 virtualAddress); 1994 } 1995 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1996 if (page == NULL) { 1997 panic("looking up page failed for pa 0x%lx\n", 1998 physicalAddress); 1999 } 2000 2001 increment_page_wired_count(page); 2002 vm_page_set_state(page, PAGE_STATE_WIRED); 2003 cache->InsertPage(page, offset); 2004 } 2005 2006 map->ops->unlock(map); 2007 break; 2008 } 2009 2010 case B_CONTIGUOUS: 2011 { 2012 // We have already allocated our continuous pages run, so we can now just 2013 // map them in the address space 2014 vm_translation_map *map = &addressSpace->translation_map; 2015 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 2016 addr_t virtualAddress = area->base; 2017 off_t offset = 0; 2018 2019 map->ops->lock(map); 2020 2021 for (virtualAddress = area->base; virtualAddress < area->base 2022 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2023 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 2024 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2025 if (page == NULL) 2026 panic("couldn't lookup physical page just allocated\n"); 2027 2028 status = map->ops->map(map, virtualAddress, physicalAddress, 2029 protection); 2030 if (status < B_OK) 2031 panic("couldn't map physical page in page run\n"); 2032 2033 increment_page_wired_count(page); 2034 vm_page_set_state(page, PAGE_STATE_WIRED); 2035 cache->InsertPage(page, offset); 2036 } 2037 2038 map->ops->unlock(map); 2039 break; 2040 } 2041 2042 default: 2043 break; 2044 } 2045 2046 cache->Unlock(); 2047 2048 if (reservedPages > 0) 2049 vm_page_unreserve_pages(reservedPages); 2050 2051 TRACE(("vm_create_anonymous_area: done\n")); 2052 2053 area->cache_type = CACHE_TYPE_RAM; 2054 return area->id; 2055 2056 err1: 2057 if (wiring == B_CONTIGUOUS) { 2058 // we had reserved the area space upfront... 2059 addr_t pageNumber = page->physical_page_number; 2060 int32 i; 2061 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 2062 page = vm_lookup_page(pageNumber); 2063 if (page == NULL) 2064 panic("couldn't lookup physical page just allocated\n"); 2065 2066 vm_page_set_state(page, PAGE_STATE_FREE); 2067 } 2068 } 2069 2070 err0: 2071 if (reservedPages > 0) 2072 vm_page_unreserve_pages(reservedPages); 2073 if (reservedMemory > 0) 2074 vm_unreserve_memory(reservedMemory); 2075 2076 return status; 2077 } 2078 2079 2080 area_id 2081 vm_map_physical_memory(team_id team, const char *name, void **_address, 2082 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 2083 { 2084 vm_area *area; 2085 vm_cache *cache; 2086 addr_t mapOffset; 2087 2088 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 2089 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2090 name, _address, addressSpec, size, protection, physicalAddress)); 2091 2092 if (!arch_vm_supports_protection(protection)) 2093 return B_NOT_SUPPORTED; 2094 2095 AddressSpaceWriteLocker locker(team); 2096 if (!locker.IsLocked()) 2097 return B_BAD_TEAM_ID; 2098 2099 // if the physical address is somewhat inside a page, 2100 // move the actual area down to align on a page boundary 2101 mapOffset = physicalAddress % B_PAGE_SIZE; 2102 size += mapOffset; 2103 physicalAddress -= mapOffset; 2104 2105 size = PAGE_ALIGN(size); 2106 2107 // create an device cache 2108 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 2109 if (status != B_OK) 2110 return status; 2111 2112 // tell the page scanner to skip over this area, it's pages are special 2113 cache->scan_skip = 1; 2114 cache->virtual_end = size; 2115 2116 cache->Lock(); 2117 2118 status = map_backing_store(locker.AddressSpace(), cache, _address, 2119 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2120 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2121 2122 if (status < B_OK) 2123 cache->ReleaseRefLocked(); 2124 2125 cache->Unlock(); 2126 2127 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 2128 // set requested memory type 2129 status = arch_vm_set_memory_type(area, physicalAddress, 2130 addressSpec & B_MTR_MASK); 2131 if (status < B_OK) 2132 delete_area(locker.AddressSpace(), area); 2133 } 2134 2135 if (status >= B_OK) { 2136 // make sure our area is mapped in completely 2137 2138 vm_translation_map *map = &locker.AddressSpace()->translation_map; 2139 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2140 area->base + (size - 1)); 2141 2142 vm_page_reserve_pages(reservePages); 2143 map->ops->lock(map); 2144 2145 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2146 map->ops->map(map, area->base + offset, physicalAddress + offset, 2147 protection); 2148 } 2149 2150 map->ops->unlock(map); 2151 vm_page_unreserve_pages(reservePages); 2152 } 2153 2154 if (status < B_OK) 2155 return status; 2156 2157 // modify the pointer returned to be offset back into the new area 2158 // the same way the physical address in was offset 2159 *_address = (void *)((addr_t)*_address + mapOffset); 2160 2161 area->cache_type = CACHE_TYPE_DEVICE; 2162 return area->id; 2163 } 2164 2165 2166 area_id 2167 vm_create_null_area(team_id team, const char *name, void **address, 2168 uint32 addressSpec, addr_t size) 2169 { 2170 vm_area *area; 2171 vm_cache *cache; 2172 status_t status; 2173 2174 AddressSpaceWriteLocker locker(team); 2175 if (!locker.IsLocked()) 2176 return B_BAD_TEAM_ID; 2177 2178 size = PAGE_ALIGN(size); 2179 2180 // create an null cache 2181 status = VMCacheFactory::CreateNullCache(cache); 2182 if (status != B_OK) 2183 return status; 2184 2185 // tell the page scanner to skip over this area, no pages will be mapped here 2186 cache->scan_skip = 1; 2187 cache->virtual_end = size; 2188 2189 cache->Lock(); 2190 2191 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 2192 addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name, 2193 false, true); 2194 2195 if (status < B_OK) { 2196 cache->ReleaseRefAndUnlock(); 2197 return status; 2198 } 2199 2200 cache->Unlock(); 2201 2202 area->cache_type = CACHE_TYPE_NULL; 2203 return area->id; 2204 } 2205 2206 2207 /*! Creates the vnode cache for the specified \a vnode. 2208 The vnode has to be marked busy when calling this function. 2209 */ 2210 status_t 2211 vm_create_vnode_cache(struct vnode *vnode, struct VMCache **cache) 2212 { 2213 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2214 } 2215 2216 2217 /*! \a cache must be locked. The area's address space must be read-locked. 2218 */ 2219 static void 2220 pre_map_area_pages(vm_area* area, VMCache* cache) 2221 { 2222 addr_t baseAddress = area->base; 2223 addr_t cacheOffset = area->cache_offset; 2224 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2225 page_num_t endPage = firstPage + area->size / B_PAGE_SIZE; 2226 2227 for (VMCachePagesTree::Iterator it 2228 = cache->pages.GetIterator(firstPage, true, true); 2229 vm_page *page = it.Next();) { 2230 if (page->cache_offset >= endPage) 2231 break; 2232 2233 // skip inactive pages 2234 if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0) 2235 continue; 2236 2237 vm_map_page(area, page, 2238 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2239 B_READ_AREA | B_KERNEL_READ_AREA); 2240 } 2241 } 2242 2243 2244 /*! Will map the file specified by \a fd to an area in memory. 2245 The file will be mirrored beginning at the specified \a offset. The 2246 \a offset and \a size arguments have to be page aligned. 2247 */ 2248 static area_id 2249 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec, 2250 size_t size, uint32 protection, uint32 mapping, int fd, off_t offset, 2251 bool kernel) 2252 { 2253 // TODO: for binary files, we want to make sure that they get the 2254 // copy of a file at a given time, ie. later changes should not 2255 // make it into the mapped copy -- this will need quite some changes 2256 // to be done in a nice way 2257 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 2258 fd, offset, size, mapping)); 2259 2260 offset = ROUNDOWN(offset, B_PAGE_SIZE); 2261 size = PAGE_ALIGN(size); 2262 2263 if (mapping == REGION_NO_PRIVATE_MAP) 2264 protection |= B_SHARED_AREA; 2265 2266 if (fd < 0) { 2267 uint32 flags = addressSpec == B_EXACT_ADDRESS 2268 ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2269 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 2270 B_NO_LOCK, protection, flags, kernel); 2271 } 2272 2273 // get the open flags of the FD 2274 file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd); 2275 if (descriptor == NULL) 2276 return EBADF; 2277 int32 openMode = descriptor->open_mode; 2278 put_fd(descriptor); 2279 2280 // The FD must open for reading at any rate. For shared mapping with write 2281 // access, additionally the FD must be open for writing. 2282 if ((openMode & O_ACCMODE) == O_WRONLY 2283 || mapping == REGION_NO_PRIVATE_MAP 2284 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2285 && (openMode & O_ACCMODE) == O_RDONLY) { 2286 return EACCES; 2287 } 2288 2289 // get the vnode for the object, this also grabs a ref to it 2290 struct vnode *vnode = NULL; 2291 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2292 if (status < B_OK) 2293 return status; 2294 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 2295 2296 // If we're going to pre-map pages, we need to reserve the pages needed by 2297 // the mapping backend upfront. 2298 page_num_t reservedPreMapPages = 0; 2299 if ((protection & B_READ_AREA) != 0) { 2300 AddressSpaceWriteLocker locker; 2301 status = locker.SetTo(team); 2302 if (status != B_OK) 2303 return status; 2304 2305 vm_translation_map *map = &locker.AddressSpace()->translation_map; 2306 reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 2307 2308 locker.Unlock(); 2309 2310 vm_page_reserve_pages(reservedPreMapPages); 2311 } 2312 2313 struct PageUnreserver { 2314 PageUnreserver(page_num_t count) 2315 : fCount(count) 2316 { 2317 } 2318 2319 ~PageUnreserver() 2320 { 2321 if (fCount > 0) 2322 vm_page_unreserve_pages(fCount); 2323 } 2324 2325 page_num_t fCount; 2326 } pageUnreserver(reservedPreMapPages); 2327 2328 AddressSpaceWriteLocker locker(team); 2329 if (!locker.IsLocked()) 2330 return B_BAD_TEAM_ID; 2331 2332 // TODO: this only works for file systems that use the file cache 2333 vm_cache *cache; 2334 status = vfs_get_vnode_cache(vnode, &cache, false); 2335 if (status < B_OK) 2336 return status; 2337 2338 cache->Lock(); 2339 2340 vm_area *area; 2341 status = map_backing_store(locker.AddressSpace(), cache, _address, 2342 offset, size, addressSpec, 0, protection, mapping, &area, name, 2343 addressSpec == B_EXACT_ADDRESS, kernel); 2344 2345 if (status < B_OK || mapping == REGION_PRIVATE_MAP) { 2346 // map_backing_store() cannot know we no longer need the ref 2347 cache->ReleaseRefLocked(); 2348 } 2349 2350 if (status == B_OK && (protection & B_READ_AREA) != 0) 2351 pre_map_area_pages(area, cache); 2352 2353 cache->Unlock(); 2354 2355 if (status < B_OK) 2356 return status; 2357 2358 area->cache_type = CACHE_TYPE_VNODE; 2359 return area->id; 2360 } 2361 2362 2363 area_id 2364 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec, 2365 addr_t size, uint32 protection, uint32 mapping, int fd, off_t offset) 2366 { 2367 if (!arch_vm_supports_protection(protection)) 2368 return B_NOT_SUPPORTED; 2369 2370 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2371 mapping, fd, offset, true); 2372 } 2373 2374 2375 vm_cache * 2376 vm_area_get_locked_cache(vm_area *area) 2377 { 2378 mutex_lock(&sAreaCacheLock); 2379 2380 while (true) { 2381 vm_cache* cache = area->cache; 2382 2383 if (!cache->SwitchLock(&sAreaCacheLock)) { 2384 // cache has been deleted 2385 mutex_lock(&sAreaCacheLock); 2386 continue; 2387 } 2388 2389 mutex_lock(&sAreaCacheLock); 2390 2391 if (cache == area->cache) { 2392 cache->AcquireRefLocked(); 2393 mutex_unlock(&sAreaCacheLock); 2394 return cache; 2395 } 2396 2397 // the cache changed in the meantime 2398 cache->Unlock(); 2399 } 2400 } 2401 2402 2403 void 2404 vm_area_put_locked_cache(vm_cache *cache) 2405 { 2406 cache->ReleaseRefAndUnlock(); 2407 } 2408 2409 2410 area_id 2411 vm_clone_area(team_id team, const char *name, void **address, 2412 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2413 bool kernel) 2414 { 2415 vm_area *newArea = NULL; 2416 vm_area *sourceArea; 2417 2418 // Check whether the source area exists and is cloneable. If so, mark it 2419 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2420 { 2421 AddressSpaceWriteLocker locker; 2422 status_t status = locker.SetFromArea(sourceID, sourceArea); 2423 if (status != B_OK) 2424 return status; 2425 2426 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2427 return B_NOT_ALLOWED; 2428 2429 sourceArea->protection |= B_SHARED_AREA; 2430 protection |= B_SHARED_AREA; 2431 } 2432 2433 // Now lock both address spaces and actually do the cloning. 2434 2435 MultiAddressSpaceLocker locker; 2436 vm_address_space *sourceAddressSpace; 2437 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2438 if (status != B_OK) 2439 return status; 2440 2441 vm_address_space *targetAddressSpace; 2442 status = locker.AddTeam(team, true, &targetAddressSpace); 2443 if (status != B_OK) 2444 return status; 2445 2446 status = locker.Lock(); 2447 if (status != B_OK) 2448 return status; 2449 2450 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2451 if (sourceArea == NULL) 2452 return B_BAD_VALUE; 2453 2454 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2455 return B_NOT_ALLOWED; 2456 2457 vm_cache *cache = vm_area_get_locked_cache(sourceArea); 2458 2459 // ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers 2460 // have been adapted. Maybe it should be part of the kernel settings, 2461 // anyway (so that old drivers can always work). 2462 #if 0 2463 if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space() 2464 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2465 // kernel areas must not be cloned in userland, unless explicitly 2466 // declared user-cloneable upon construction 2467 status = B_NOT_ALLOWED; 2468 } else 2469 #endif 2470 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2471 status = B_NOT_ALLOWED; 2472 else { 2473 status = map_backing_store(targetAddressSpace, cache, address, 2474 sourceArea->cache_offset, sourceArea->size, addressSpec, 2475 sourceArea->wiring, protection, mapping, &newArea, name, false, 2476 kernel); 2477 } 2478 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2479 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2480 // to create a new cache, and has therefore already acquired a reference 2481 // to the source cache - but otherwise it has no idea that we need 2482 // one. 2483 cache->AcquireRefLocked(); 2484 } 2485 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2486 // we need to map in everything at this point 2487 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2488 // we don't have actual pages to map but a physical area 2489 vm_translation_map *map = &sourceArea->address_space->translation_map; 2490 map->ops->lock(map); 2491 2492 addr_t physicalAddress; 2493 uint32 oldProtection; 2494 map->ops->query(map, sourceArea->base, &physicalAddress, 2495 &oldProtection); 2496 2497 map->ops->unlock(map); 2498 2499 map = &targetAddressSpace->translation_map; 2500 size_t reservePages = map->ops->map_max_pages_need(map, 2501 newArea->base, newArea->base + (newArea->size - 1)); 2502 2503 vm_page_reserve_pages(reservePages); 2504 map->ops->lock(map); 2505 2506 for (addr_t offset = 0; offset < newArea->size; 2507 offset += B_PAGE_SIZE) { 2508 map->ops->map(map, newArea->base + offset, 2509 physicalAddress + offset, protection); 2510 } 2511 2512 map->ops->unlock(map); 2513 vm_page_unreserve_pages(reservePages); 2514 } else { 2515 vm_translation_map *map = &targetAddressSpace->translation_map; 2516 size_t reservePages = map->ops->map_max_pages_need(map, 2517 newArea->base, newArea->base + (newArea->size - 1)); 2518 vm_page_reserve_pages(reservePages); 2519 2520 // map in all pages from source 2521 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2522 vm_page* page = it.Next();) { 2523 vm_map_page(newArea, page, newArea->base 2524 + ((page->cache_offset << PAGE_SHIFT) 2525 - newArea->cache_offset), protection); 2526 } 2527 2528 vm_page_unreserve_pages(reservePages); 2529 } 2530 } 2531 if (status == B_OK) 2532 newArea->cache_type = sourceArea->cache_type; 2533 2534 vm_area_put_locked_cache(cache); 2535 2536 if (status < B_OK) 2537 return status; 2538 2539 return newArea->id; 2540 } 2541 2542 2543 //! The address space must be write locked at this point 2544 static void 2545 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area) 2546 { 2547 vm_area *temp, *last = NULL; 2548 2549 temp = addressSpace->areas; 2550 while (temp != NULL) { 2551 if (area == temp) { 2552 if (last != NULL) { 2553 last->address_space_next = temp->address_space_next; 2554 } else { 2555 addressSpace->areas = temp->address_space_next; 2556 } 2557 addressSpace->change_count++; 2558 break; 2559 } 2560 last = temp; 2561 temp = temp->address_space_next; 2562 } 2563 if (area == addressSpace->area_hint) 2564 addressSpace->area_hint = NULL; 2565 2566 if (temp == NULL) 2567 panic("vm_area_release_ref: area not found in aspace's area list\n"); 2568 } 2569 2570 2571 static void 2572 delete_area(vm_address_space *addressSpace, vm_area *area) 2573 { 2574 rw_lock_write_lock(&sAreaHashLock); 2575 hash_remove(sAreaHash, area); 2576 rw_lock_write_unlock(&sAreaHashLock); 2577 2578 // At this point the area is removed from the global hash table, but 2579 // still exists in the area list. 2580 2581 // Unmap the virtual address space the area occupied 2582 vm_unmap_pages(area, area->base, area->size, !area->cache->temporary); 2583 2584 if (!area->cache->temporary) 2585 area->cache->WriteModified(); 2586 2587 arch_vm_unset_memory_type(area); 2588 remove_area_from_address_space(addressSpace, area); 2589 vm_put_address_space(addressSpace); 2590 2591 area->cache->RemoveArea(area); 2592 area->cache->ReleaseRef(); 2593 2594 free(area->page_protections); 2595 free(area->name); 2596 free(area); 2597 } 2598 2599 2600 status_t 2601 vm_delete_area(team_id team, area_id id, bool kernel) 2602 { 2603 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2604 2605 AddressSpaceWriteLocker locker; 2606 vm_area *area; 2607 status_t status = locker.SetFromArea(team, id, area); 2608 if (status < B_OK) 2609 return status; 2610 2611 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2612 return B_NOT_ALLOWED; 2613 2614 delete_area(locker.AddressSpace(), area); 2615 return B_OK; 2616 } 2617 2618 2619 /*! Creates a new cache on top of given cache, moves all areas from 2620 the old cache to the new one, and changes the protection of all affected 2621 areas' pages to read-only. 2622 Preconditions: 2623 - The given cache must be locked. 2624 - All of the cache's areas' address spaces must be read locked. 2625 - All of the cache's areas must have a clear \c no_cache_change flags. 2626 */ 2627 static status_t 2628 vm_copy_on_write_area(vm_cache* lowerCache) 2629 { 2630 vm_cache *upperCache; 2631 2632 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2633 2634 // We need to separate the cache from its areas. The cache goes one level 2635 // deeper and we create a new cache inbetween. 2636 2637 // create an anonymous cache 2638 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2639 0, true); 2640 if (status != B_OK) 2641 return status; 2642 2643 upperCache->Lock(); 2644 2645 upperCache->temporary = 1; 2646 upperCache->scan_skip = lowerCache->scan_skip; 2647 upperCache->virtual_base = lowerCache->virtual_base; 2648 upperCache->virtual_end = lowerCache->virtual_end; 2649 2650 // transfer the lower cache areas to the upper cache 2651 mutex_lock(&sAreaCacheLock); 2652 2653 upperCache->areas = lowerCache->areas; 2654 lowerCache->areas = NULL; 2655 2656 for (vm_area *tempArea = upperCache->areas; tempArea != NULL; 2657 tempArea = tempArea->cache_next) { 2658 ASSERT(!tempArea->no_cache_change); 2659 2660 tempArea->cache = upperCache; 2661 upperCache->AcquireRefLocked(); 2662 lowerCache->ReleaseRefLocked(); 2663 } 2664 2665 mutex_unlock(&sAreaCacheLock); 2666 2667 lowerCache->AddConsumer(upperCache); 2668 2669 // We now need to remap all pages from all of the cache's areas read-only, so that 2670 // a copy will be created on next write access 2671 2672 for (vm_area *tempArea = upperCache->areas; tempArea != NULL; 2673 tempArea = tempArea->cache_next) { 2674 // The area must be readable in the same way it was previously writable 2675 uint32 protection = B_KERNEL_READ_AREA; 2676 if (tempArea->protection & B_READ_AREA) 2677 protection |= B_READ_AREA; 2678 2679 vm_translation_map *map = &tempArea->address_space->translation_map; 2680 map->ops->lock(map); 2681 map->ops->protect(map, tempArea->base, tempArea->base - 1 + tempArea->size, protection); 2682 map->ops->unlock(map); 2683 } 2684 2685 vm_area_put_locked_cache(upperCache); 2686 2687 return B_OK; 2688 } 2689 2690 2691 area_id 2692 vm_copy_area(team_id team, const char *name, void **_address, 2693 uint32 addressSpec, uint32 protection, area_id sourceID) 2694 { 2695 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2696 2697 if ((protection & B_KERNEL_PROTECTION) == 0) { 2698 // set the same protection for the kernel as for userland 2699 protection |= B_KERNEL_READ_AREA; 2700 if (writableCopy) 2701 protection |= B_KERNEL_WRITE_AREA; 2702 } 2703 2704 // Do the locking: target address space, all address spaces associated with 2705 // the source cache, and the cache itself. 2706 MultiAddressSpaceLocker locker; 2707 vm_address_space *targetAddressSpace; 2708 vm_cache *cache; 2709 vm_area* source; 2710 status_t status = locker.AddTeam(team, true, &targetAddressSpace); 2711 if (status == B_OK) { 2712 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2713 &cache, true); 2714 } 2715 if (status != B_OK) 2716 return status; 2717 2718 AreaCacheLocker cacheLocker(cache); // already locked 2719 2720 if (addressSpec == B_CLONE_ADDRESS) { 2721 addressSpec = B_EXACT_ADDRESS; 2722 *_address = (void *)source->base; 2723 } 2724 2725 bool sharedArea = (source->protection & B_SHARED_AREA) != 0; 2726 2727 // First, create a cache on top of the source area, respectively use the 2728 // existing one, if this is a shared area. 2729 2730 vm_area *target; 2731 status = map_backing_store(targetAddressSpace, cache, _address, 2732 source->cache_offset, source->size, addressSpec, source->wiring, 2733 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2734 &target, name, false, true); 2735 if (status < B_OK) 2736 return status; 2737 2738 if (sharedArea) { 2739 // The new area uses the old area's cache, but map_backing_store() 2740 // hasn't acquired a ref. So we have to do that now. 2741 cache->AcquireRefLocked(); 2742 } 2743 2744 // If the source area is writable, we need to move it one layer up as well 2745 2746 if (!sharedArea) { 2747 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2748 // TODO: do something more useful if this fails! 2749 if (vm_copy_on_write_area(cache) < B_OK) 2750 panic("vm_copy_on_write_area() failed!\n"); 2751 } 2752 } 2753 2754 // we return the ID of the newly created area 2755 return target->id; 2756 } 2757 2758 2759 //! You need to hold the cache lock when calling this function 2760 static int32 2761 count_writable_areas(vm_cache *cache, vm_area *ignoreArea) 2762 { 2763 struct vm_area *area = cache->areas; 2764 uint32 count = 0; 2765 2766 for (; area != NULL; area = area->cache_next) { 2767 if (area != ignoreArea 2768 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 2769 count++; 2770 } 2771 2772 return count; 2773 } 2774 2775 2776 static status_t 2777 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2778 bool kernel) 2779 { 2780 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = %#lx)\n", 2781 team, areaID, newProtection)); 2782 2783 if (!arch_vm_supports_protection(newProtection)) 2784 return B_NOT_SUPPORTED; 2785 2786 // lock address spaces and cache 2787 MultiAddressSpaceLocker locker; 2788 vm_cache *cache; 2789 vm_area* area; 2790 status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area, 2791 &cache, true); 2792 AreaCacheLocker cacheLocker(cache); // already locked 2793 2794 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2795 return B_NOT_ALLOWED; 2796 2797 if (area->protection == newProtection) 2798 return B_OK; 2799 2800 if (team != vm_kernel_address_space_id() 2801 && area->address_space->id != team) { 2802 // unless you're the kernel, you are only allowed to set 2803 // the protection of your own areas 2804 return B_NOT_ALLOWED; 2805 } 2806 2807 bool changePageProtection = true; 2808 2809 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2810 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 2811 // writable -> !writable 2812 2813 if (cache->source != NULL && cache->temporary) { 2814 if (count_writable_areas(cache, area) == 0) { 2815 // Since this cache now lives from the pages in its source cache, 2816 // we can change the cache's commitment to take only those pages 2817 // into account that really are in this cache. 2818 2819 status = cache->Commit(cache->page_count * B_PAGE_SIZE); 2820 2821 // ToDo: we may be able to join with our source cache, if count == 0 2822 } 2823 } 2824 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 2825 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 2826 // !writable -> writable 2827 2828 if (!list_is_empty(&cache->consumers)) { 2829 // There are consumers -- we have to insert a new cache. Fortunately 2830 // vm_copy_on_write_area() does everything that's needed. 2831 changePageProtection = false; 2832 status = vm_copy_on_write_area(cache); 2833 } else { 2834 // No consumers, so we don't need to insert a new one. 2835 if (cache->source != NULL && cache->temporary) { 2836 // the cache's commitment must contain all possible pages 2837 status = cache->Commit(cache->virtual_end 2838 - cache->virtual_base); 2839 } 2840 2841 if (status == B_OK && cache->source != NULL) { 2842 // There's a source cache, hence we can't just change all pages' 2843 // protection or we might allow writing into pages belonging to 2844 // a lower cache. 2845 changePageProtection = false; 2846 2847 struct vm_translation_map *map 2848 = &area->address_space->translation_map; 2849 map->ops->lock(map); 2850 2851 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2852 vm_page* page = it.Next();) { 2853 addr_t address = area->base 2854 + (page->cache_offset << PAGE_SHIFT); 2855 map->ops->protect(map, address, address - 1 + B_PAGE_SIZE, 2856 newProtection); 2857 } 2858 2859 map->ops->unlock(map); 2860 } 2861 } 2862 } else { 2863 // we don't have anything special to do in all other cases 2864 } 2865 2866 if (status == B_OK) { 2867 // remap existing pages in this cache 2868 struct vm_translation_map *map = &area->address_space->translation_map; 2869 2870 if (changePageProtection) { 2871 map->ops->lock(map); 2872 map->ops->protect(map, area->base, area->base + area->size, 2873 newProtection); 2874 map->ops->unlock(map); 2875 } 2876 2877 area->protection = newProtection; 2878 } 2879 2880 return status; 2881 } 2882 2883 2884 status_t 2885 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t *paddr) 2886 { 2887 vm_address_space *addressSpace = vm_get_address_space(team); 2888 if (addressSpace == NULL) 2889 return B_BAD_TEAM_ID; 2890 2891 uint32 dummyFlags; 2892 status_t status = addressSpace->translation_map.ops->query( 2893 &addressSpace->translation_map, vaddr, paddr, &dummyFlags); 2894 2895 vm_put_address_space(addressSpace); 2896 return status; 2897 } 2898 2899 2900 static inline addr_t 2901 virtual_page_address(vm_area *area, vm_page *page) 2902 { 2903 return area->base 2904 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 2905 } 2906 2907 2908 bool 2909 vm_test_map_modification(vm_page *page) 2910 { 2911 MutexLocker locker(sMappingLock); 2912 2913 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2914 vm_page_mapping *mapping; 2915 while ((mapping = iterator.Next()) != NULL) { 2916 vm_area *area = mapping->area; 2917 vm_translation_map *map = &area->address_space->translation_map; 2918 2919 addr_t physicalAddress; 2920 uint32 flags; 2921 map->ops->lock(map); 2922 map->ops->query(map, virtual_page_address(area, page), 2923 &physicalAddress, &flags); 2924 map->ops->unlock(map); 2925 2926 if (flags & PAGE_MODIFIED) 2927 return true; 2928 } 2929 2930 return false; 2931 } 2932 2933 2934 int32 2935 vm_test_map_activation(vm_page *page, bool *_modified) 2936 { 2937 int32 activation = 0; 2938 bool modified = false; 2939 2940 MutexLocker locker(sMappingLock); 2941 2942 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2943 vm_page_mapping *mapping; 2944 while ((mapping = iterator.Next()) != NULL) { 2945 vm_area *area = mapping->area; 2946 vm_translation_map *map = &area->address_space->translation_map; 2947 2948 addr_t physicalAddress; 2949 uint32 flags; 2950 map->ops->lock(map); 2951 map->ops->query(map, virtual_page_address(area, page), 2952 &physicalAddress, &flags); 2953 map->ops->unlock(map); 2954 2955 if (flags & PAGE_ACCESSED) 2956 activation++; 2957 if (flags & PAGE_MODIFIED) 2958 modified = true; 2959 } 2960 2961 if (_modified != NULL) 2962 *_modified = modified; 2963 2964 return activation; 2965 } 2966 2967 2968 void 2969 vm_clear_map_flags(vm_page *page, uint32 flags) 2970 { 2971 MutexLocker locker(sMappingLock); 2972 2973 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2974 vm_page_mapping *mapping; 2975 while ((mapping = iterator.Next()) != NULL) { 2976 vm_area *area = mapping->area; 2977 vm_translation_map *map = &area->address_space->translation_map; 2978 2979 map->ops->lock(map); 2980 map->ops->clear_flags(map, virtual_page_address(area, page), flags); 2981 map->ops->unlock(map); 2982 } 2983 } 2984 2985 2986 /*! Removes all mappings from a page. 2987 After you've called this function, the page is unmapped from memory. 2988 The accumulated page flags of all mappings can be found in \a _flags. 2989 */ 2990 void 2991 vm_remove_all_page_mappings(vm_page *page, uint32 *_flags) 2992 { 2993 uint32 accumulatedFlags = 0; 2994 MutexLocker locker(sMappingLock); 2995 2996 vm_page_mappings queue; 2997 queue.MoveFrom(&page->mappings); 2998 2999 vm_page_mappings::Iterator iterator = queue.GetIterator(); 3000 vm_page_mapping *mapping; 3001 while ((mapping = iterator.Next()) != NULL) { 3002 vm_area *area = mapping->area; 3003 vm_translation_map *map = &area->address_space->translation_map; 3004 addr_t physicalAddress; 3005 uint32 flags; 3006 3007 map->ops->lock(map); 3008 addr_t address = virtual_page_address(area, page); 3009 map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1)); 3010 map->ops->flush(map); 3011 map->ops->query(map, address, &physicalAddress, &flags); 3012 map->ops->unlock(map); 3013 3014 area->mappings.Remove(mapping); 3015 3016 accumulatedFlags |= flags; 3017 } 3018 3019 if (page->wired_count == 0 && !queue.IsEmpty()) 3020 atomic_add(&gMappedPagesCount, -1); 3021 3022 locker.Unlock(); 3023 3024 // free now unused mappings 3025 3026 while ((mapping = queue.RemoveHead()) != NULL) { 3027 free(mapping); 3028 } 3029 3030 if (_flags != NULL) 3031 *_flags = accumulatedFlags; 3032 } 3033 3034 3035 status_t 3036 vm_unmap_pages(vm_area *area, addr_t base, size_t size, bool preserveModified) 3037 { 3038 vm_translation_map *map = &area->address_space->translation_map; 3039 addr_t end = base + (size - 1); 3040 3041 map->ops->lock(map); 3042 3043 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 3044 // iterate through all pages and decrease their wired count 3045 for (addr_t virtualAddress = base; virtualAddress < end; 3046 virtualAddress += B_PAGE_SIZE) { 3047 addr_t physicalAddress; 3048 uint32 flags; 3049 status_t status = map->ops->query(map, virtualAddress, 3050 &physicalAddress, &flags); 3051 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3052 continue; 3053 3054 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3055 if (page == NULL) { 3056 panic("area %p looking up page failed for pa 0x%lx\n", area, 3057 physicalAddress); 3058 } 3059 3060 decrement_page_wired_count(page); 3061 } 3062 } 3063 3064 map->ops->unmap(map, base, end); 3065 if (preserveModified) { 3066 map->ops->flush(map); 3067 3068 for (addr_t virtualAddress = base; virtualAddress < end; 3069 virtualAddress += B_PAGE_SIZE) { 3070 addr_t physicalAddress; 3071 uint32 flags; 3072 status_t status = map->ops->query(map, virtualAddress, 3073 &physicalAddress, &flags); 3074 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3075 continue; 3076 3077 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3078 if (page == NULL) { 3079 panic("area %p looking up page failed for pa 0x%lx\n", area, 3080 physicalAddress); 3081 } 3082 3083 if ((flags & PAGE_MODIFIED) != 0 3084 && page->state != PAGE_STATE_MODIFIED) 3085 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3086 } 3087 } 3088 map->ops->unlock(map); 3089 3090 if (area->wiring == B_NO_LOCK) { 3091 uint32 startOffset = (area->cache_offset + base - area->base) 3092 >> PAGE_SHIFT; 3093 uint32 endOffset = startOffset + (size >> PAGE_SHIFT); 3094 vm_page_mapping *mapping; 3095 vm_area_mappings queue; 3096 3097 mutex_lock(&sMappingLock); 3098 map->ops->lock(map); 3099 3100 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3101 while (iterator.HasNext()) { 3102 mapping = iterator.Next(); 3103 3104 vm_page *page = mapping->page; 3105 if (page->cache_offset < startOffset 3106 || page->cache_offset >= endOffset) 3107 continue; 3108 3109 page->mappings.Remove(mapping); 3110 iterator.Remove(); 3111 3112 if (page->mappings.IsEmpty() && page->wired_count == 0) 3113 atomic_add(&gMappedPagesCount, -1); 3114 3115 queue.Add(mapping); 3116 } 3117 3118 map->ops->unlock(map); 3119 mutex_unlock(&sMappingLock); 3120 3121 while ((mapping = queue.RemoveHead()) != NULL) { 3122 free(mapping); 3123 } 3124 } 3125 3126 return B_OK; 3127 } 3128 3129 3130 /*! When calling this function, you need to have pages reserved! */ 3131 status_t 3132 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection) 3133 { 3134 vm_translation_map *map = &area->address_space->translation_map; 3135 vm_page_mapping *mapping = NULL; 3136 3137 if (area->wiring == B_NO_LOCK) { 3138 mapping = (vm_page_mapping *)malloc_nogrow(sizeof(vm_page_mapping)); 3139 if (mapping == NULL) 3140 return B_NO_MEMORY; 3141 3142 mapping->page = page; 3143 mapping->area = area; 3144 } 3145 3146 map->ops->lock(map); 3147 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 3148 protection); 3149 map->ops->unlock(map); 3150 3151 if (area->wiring != B_NO_LOCK) { 3152 increment_page_wired_count(page); 3153 } else { 3154 // insert mapping into lists 3155 MutexLocker locker(sMappingLock); 3156 3157 if (page->mappings.IsEmpty() && page->wired_count == 0) 3158 atomic_add(&gMappedPagesCount, 1); 3159 3160 page->mappings.Add(mapping); 3161 area->mappings.Add(mapping); 3162 } 3163 3164 if (page->usage_count < 0) 3165 page->usage_count = 1; 3166 3167 if (page->state != PAGE_STATE_MODIFIED) 3168 vm_page_set_state(page, PAGE_STATE_ACTIVE); 3169 3170 return B_OK; 3171 } 3172 3173 3174 static int 3175 display_mem(int argc, char **argv) 3176 { 3177 bool physical = false; 3178 addr_t copyAddress; 3179 int32 displayWidth; 3180 int32 itemSize; 3181 int32 num = -1; 3182 addr_t address; 3183 int i = 1, j; 3184 3185 if (argc > 1 && argv[1][0] == '-') { 3186 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3187 physical = true; 3188 i++; 3189 } else 3190 i = 99; 3191 } 3192 3193 if (argc < i + 1 || argc > i + 2) { 3194 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3195 "\tdl - 8 bytes\n" 3196 "\tdw - 4 bytes\n" 3197 "\tds - 2 bytes\n" 3198 "\tdb - 1 byte\n" 3199 "\tstring - a whole string\n" 3200 " -p or --physical only allows memory from a single page to be " 3201 "displayed.\n"); 3202 return 0; 3203 } 3204 3205 address = parse_expression(argv[i]); 3206 3207 if (argc > i + 1) 3208 num = parse_expression(argv[i + 1]); 3209 3210 // build the format string 3211 if (strcmp(argv[0], "db") == 0) { 3212 itemSize = 1; 3213 displayWidth = 16; 3214 } else if (strcmp(argv[0], "ds") == 0) { 3215 itemSize = 2; 3216 displayWidth = 8; 3217 } else if (strcmp(argv[0], "dw") == 0) { 3218 itemSize = 4; 3219 displayWidth = 4; 3220 } else if (strcmp(argv[0], "dl") == 0) { 3221 itemSize = 8; 3222 displayWidth = 2; 3223 } else if (strcmp(argv[0], "string") == 0) { 3224 itemSize = 1; 3225 displayWidth = -1; 3226 } else { 3227 kprintf("display_mem called in an invalid way!\n"); 3228 return 0; 3229 } 3230 3231 if (num <= 0) 3232 num = displayWidth; 3233 3234 void* physicalPageHandle = NULL; 3235 3236 if (physical) { 3237 int32 offset = address & (B_PAGE_SIZE - 1); 3238 if (num * itemSize + offset > B_PAGE_SIZE) { 3239 num = (B_PAGE_SIZE - offset) / itemSize; 3240 kprintf("NOTE: number of bytes has been cut to page size\n"); 3241 } 3242 3243 address = ROUNDOWN(address, B_PAGE_SIZE); 3244 3245 if (vm_get_physical_page_debug(address, ©Address, 3246 &physicalPageHandle) != B_OK) { 3247 kprintf("getting the hardware page failed."); 3248 return 0; 3249 } 3250 3251 address += offset; 3252 copyAddress += offset; 3253 } else 3254 copyAddress = address; 3255 3256 if (!strcmp(argv[0], "string")) { 3257 kprintf("%p \"", (char*)copyAddress); 3258 3259 // string mode 3260 for (i = 0; true; i++) { 3261 char c; 3262 if (user_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 3263 || c == '\0') 3264 break; 3265 3266 if (c == '\n') 3267 kprintf("\\n"); 3268 else if (c == '\t') 3269 kprintf("\\t"); 3270 else { 3271 if (!isprint(c)) 3272 c = '.'; 3273 3274 kprintf("%c", c); 3275 } 3276 } 3277 3278 kprintf("\"\n"); 3279 } else { 3280 // number mode 3281 for (i = 0; i < num; i++) { 3282 uint32 value; 3283 3284 if ((i % displayWidth) == 0) { 3285 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3286 if (i != 0) 3287 kprintf("\n"); 3288 3289 kprintf("[0x%lx] ", address + i * itemSize); 3290 3291 for (j = 0; j < displayed; j++) { 3292 char c; 3293 if (user_memcpy(&c, (char*)copyAddress + i * itemSize + j, 3294 1) != B_OK) { 3295 displayed = j; 3296 break; 3297 } 3298 if (!isprint(c)) 3299 c = '.'; 3300 3301 kprintf("%c", c); 3302 } 3303 if (num > displayWidth) { 3304 // make sure the spacing in the last line is correct 3305 for (j = displayed; j < displayWidth * itemSize; j++) 3306 kprintf(" "); 3307 } 3308 kprintf(" "); 3309 } 3310 3311 if (user_memcpy(&value, (uint8*)copyAddress + i * itemSize, 3312 itemSize) != B_OK) { 3313 kprintf("read fault"); 3314 break; 3315 } 3316 3317 switch (itemSize) { 3318 case 1: 3319 kprintf(" %02x", *(uint8 *)&value); 3320 break; 3321 case 2: 3322 kprintf(" %04x", *(uint16 *)&value); 3323 break; 3324 case 4: 3325 kprintf(" %08lx", *(uint32 *)&value); 3326 break; 3327 case 8: 3328 kprintf(" %016Lx", *(uint64 *)&value); 3329 break; 3330 } 3331 } 3332 3333 kprintf("\n"); 3334 } 3335 3336 if (physical) { 3337 copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE); 3338 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3339 } 3340 return 0; 3341 } 3342 3343 3344 static void 3345 dump_cache_tree_recursively(vm_cache* cache, int level, 3346 vm_cache* highlightCache) 3347 { 3348 // print this cache 3349 for (int i = 0; i < level; i++) 3350 kprintf(" "); 3351 if (cache == highlightCache) 3352 kprintf("%p <--\n", cache); 3353 else 3354 kprintf("%p\n", cache); 3355 3356 // recursively print its consumers 3357 vm_cache* consumer = NULL; 3358 while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, 3359 consumer)) != NULL) { 3360 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3361 } 3362 } 3363 3364 3365 static int 3366 dump_cache_tree(int argc, char **argv) 3367 { 3368 if (argc != 2 || !strcmp(argv[1], "--help")) { 3369 kprintf("usage: %s <address>\n", argv[0]); 3370 return 0; 3371 } 3372 3373 addr_t address = parse_expression(argv[1]); 3374 if (address == 0) 3375 return 0; 3376 3377 vm_cache *cache = (vm_cache *)address; 3378 vm_cache *root = cache; 3379 3380 // find the root cache (the transitive source) 3381 while (root->source != NULL) 3382 root = root->source; 3383 3384 dump_cache_tree_recursively(root, 0, cache); 3385 3386 return 0; 3387 } 3388 3389 3390 static const char * 3391 cache_type_to_string(int32 type) 3392 { 3393 switch (type) { 3394 case CACHE_TYPE_RAM: 3395 return "RAM"; 3396 case CACHE_TYPE_DEVICE: 3397 return "device"; 3398 case CACHE_TYPE_VNODE: 3399 return "vnode"; 3400 case CACHE_TYPE_NULL: 3401 return "null"; 3402 3403 default: 3404 return "unknown"; 3405 } 3406 } 3407 3408 3409 #if DEBUG_CACHE_LIST 3410 3411 static void 3412 update_cache_info_recursively(vm_cache* cache, cache_info& info) 3413 { 3414 info.page_count += cache->page_count; 3415 if (cache->type == CACHE_TYPE_RAM) 3416 info.committed += cache->committed_size; 3417 3418 // recurse 3419 vm_cache* consumer = NULL; 3420 while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, 3421 consumer)) != NULL) { 3422 update_cache_info_recursively(consumer, info); 3423 } 3424 } 3425 3426 3427 static int 3428 cache_info_compare_page_count(const void* _a, const void* _b) 3429 { 3430 const cache_info* a = (const cache_info*)_a; 3431 const cache_info* b = (const cache_info*)_b; 3432 if (a->page_count == b->page_count) 3433 return 0; 3434 return a->page_count < b->page_count ? 1 : -1; 3435 } 3436 3437 3438 static int 3439 cache_info_compare_committed(const void* _a, const void* _b) 3440 { 3441 const cache_info* a = (const cache_info*)_a; 3442 const cache_info* b = (const cache_info*)_b; 3443 if (a->committed == b->committed) 3444 return 0; 3445 return a->committed < b->committed ? 1 : -1; 3446 } 3447 3448 3449 static void 3450 dump_caches_recursively(vm_cache* cache, cache_info& info, int level) 3451 { 3452 for (int i = 0; i < level; i++) 3453 kprintf(" "); 3454 3455 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3456 cache_type_to_string(cache->type), cache->virtual_base, 3457 cache->virtual_end, cache->page_count); 3458 3459 if (level == 0) 3460 kprintf("/%lu", info.page_count); 3461 3462 if (cache->type == CACHE_TYPE_RAM || level == 0 && info.committed > 0) { 3463 kprintf(", committed: %lld", cache->committed_size); 3464 3465 if (level == 0) 3466 kprintf("/%lu", info.committed); 3467 } 3468 3469 // areas 3470 if (cache->areas != NULL) { 3471 vm_area* area = cache->areas; 3472 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3473 area->address_space->id); 3474 3475 while (area->cache_next != NULL) { 3476 area = area->cache_next; 3477 kprintf(", %ld", area->id); 3478 } 3479 } 3480 3481 kputs("\n"); 3482 3483 // recurse 3484 vm_cache* consumer = NULL; 3485 while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, 3486 consumer)) != NULL) { 3487 dump_caches_recursively(consumer, info, level + 1); 3488 } 3489 } 3490 3491 3492 static int 3493 dump_caches(int argc, char **argv) 3494 { 3495 if (sCacheInfoTable == NULL) { 3496 kprintf("No cache info table!\n"); 3497 return 0; 3498 } 3499 3500 bool sortByPageCount = true; 3501 3502 for (int32 i = 1; i < argc; i++) { 3503 if (strcmp(argv[i], "-c") == 0) { 3504 sortByPageCount = false; 3505 } else { 3506 print_debugger_command_usage(argv[0]); 3507 return 0; 3508 } 3509 } 3510 3511 uint32 totalCount = 0; 3512 uint32 rootCount = 0; 3513 off_t totalCommitted = 0; 3514 page_num_t totalPages = 0; 3515 3516 vm_cache* cache = gDebugCacheList; 3517 while (cache) { 3518 totalCount++; 3519 if (cache->source == NULL) { 3520 cache_info stackInfo; 3521 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3522 ? sCacheInfoTable[rootCount] : stackInfo; 3523 rootCount++; 3524 info.cache = cache; 3525 info.page_count = 0; 3526 info.committed = 0; 3527 update_cache_info_recursively(cache, info); 3528 totalCommitted += info.committed; 3529 totalPages += info.page_count; 3530 } 3531 3532 cache = cache->debug_next; 3533 } 3534 3535 if (rootCount <= (uint32)kCacheInfoTableCount) { 3536 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3537 sortByPageCount 3538 ? &cache_info_compare_page_count 3539 : &cache_info_compare_committed); 3540 } 3541 3542 kprintf("total committed memory: %lld, total used pages: %lu\n", 3543 totalCommitted, totalPages); 3544 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3545 "tree...\n\n", totalCount, rootCount, 3546 sortByPageCount ? "page count" : "committed size"); 3547 3548 if (rootCount <= (uint32)kCacheInfoTableCount) { 3549 for (uint32 i = 0; i < rootCount; i++) { 3550 cache_info& info = sCacheInfoTable[i]; 3551 dump_caches_recursively(info.cache, info, 0); 3552 } 3553 } else 3554 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3555 3556 return 0; 3557 } 3558 3559 #endif // DEBUG_CACHE_LIST 3560 3561 3562 static int 3563 dump_cache(int argc, char **argv) 3564 { 3565 vm_cache *cache; 3566 bool showPages = false; 3567 int i = 1; 3568 3569 if (argc < 2 || !strcmp(argv[1], "--help")) { 3570 kprintf("usage: %s [-ps] <address>\n" 3571 " if -p is specified, all pages are shown, if -s is used\n" 3572 " only the cache info is shown respectively.\n", argv[0]); 3573 return 0; 3574 } 3575 while (argv[i][0] == '-') { 3576 char *arg = argv[i] + 1; 3577 while (arg[0]) { 3578 if (arg[0] == 'p') 3579 showPages = true; 3580 arg++; 3581 } 3582 i++; 3583 } 3584 if (argv[i] == NULL) { 3585 kprintf("%s: invalid argument, pass address\n", argv[0]); 3586 return 0; 3587 } 3588 3589 addr_t address = parse_expression(argv[i]); 3590 if (address == 0) 3591 return 0; 3592 3593 cache = (vm_cache *)address; 3594 3595 kprintf("CACHE %p:\n", cache); 3596 kprintf(" ref_count: %ld\n", cache->RefCount()); 3597 kprintf(" source: %p\n", cache->source); 3598 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 3599 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 3600 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 3601 kprintf(" temporary: %ld\n", cache->temporary); 3602 kprintf(" scan_skip: %ld\n", cache->scan_skip); 3603 kprintf(" lock: %p\n", cache->GetLock()); 3604 #if KDEBUG 3605 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 3606 #endif 3607 kprintf(" areas:\n"); 3608 3609 for (vm_area *area = cache->areas; area != NULL; area = area->cache_next) { 3610 kprintf(" area 0x%lx, %s\n", area->id, area->name); 3611 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 3612 kprintf("\tprotection: 0x%lx\n", area->protection); 3613 kprintf("\towner: 0x%lx\n", area->address_space->id); 3614 } 3615 3616 kprintf(" consumers:\n"); 3617 vm_cache *consumer = NULL; 3618 while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) { 3619 kprintf("\t%p\n", consumer); 3620 } 3621 3622 kprintf(" pages:\n"); 3623 if (showPages) { 3624 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3625 vm_page *page = it.Next();) { 3626 if (page->type == PAGE_TYPE_PHYSICAL) { 3627 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) " 3628 "wired_count %u\n", page, page->physical_page_number, 3629 page->cache_offset, page->type, page->state, 3630 page_state_to_string(page->state), page->wired_count); 3631 } else if(page->type == PAGE_TYPE_DUMMY) { 3632 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 3633 page, page->state, page_state_to_string(page->state)); 3634 } else 3635 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 3636 } 3637 } else 3638 kprintf("\t%ld in cache\n", cache->page_count); 3639 3640 return 0; 3641 } 3642 3643 3644 static void 3645 dump_area_struct(vm_area *area, bool mappings) 3646 { 3647 kprintf("AREA: %p\n", area); 3648 kprintf("name:\t\t'%s'\n", area->name); 3649 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 3650 kprintf("id:\t\t0x%lx\n", area->id); 3651 kprintf("base:\t\t0x%lx\n", area->base); 3652 kprintf("size:\t\t0x%lx\n", area->size); 3653 kprintf("protection:\t0x%lx\n", area->protection); 3654 kprintf("wiring:\t\t0x%x\n", area->wiring); 3655 kprintf("memory_type:\t0x%x\n", area->memory_type); 3656 kprintf("cache:\t\t%p\n", area->cache); 3657 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 3658 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3659 kprintf("cache_next:\t%p\n", area->cache_next); 3660 kprintf("cache_prev:\t%p\n", area->cache_prev); 3661 3662 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3663 if (mappings) { 3664 kprintf("page mappings:\n"); 3665 while (iterator.HasNext()) { 3666 vm_page_mapping *mapping = iterator.Next(); 3667 kprintf(" %p", mapping->page); 3668 } 3669 kprintf("\n"); 3670 } else { 3671 uint32 count = 0; 3672 while (iterator.Next() != NULL) { 3673 count++; 3674 } 3675 kprintf("page mappings:\t%lu\n", count); 3676 } 3677 } 3678 3679 3680 static int 3681 dump_area(int argc, char **argv) 3682 { 3683 bool mappings = false; 3684 bool found = false; 3685 int32 index = 1; 3686 vm_area *area; 3687 addr_t num; 3688 3689 if (argc < 2 || !strcmp(argv[1], "--help")) { 3690 kprintf("usage: area [-m] <id|address|name>\n"); 3691 return 0; 3692 } 3693 3694 if (!strcmp(argv[1], "-m")) { 3695 mappings = true; 3696 index++; 3697 } 3698 3699 num = parse_expression(argv[index]); 3700 3701 // walk through the area list, looking for the arguments as a name 3702 struct hash_iterator iter; 3703 3704 hash_open(sAreaHash, &iter); 3705 while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) { 3706 if ((area->name != NULL && !strcmp(argv[index], area->name)) 3707 || num != 0 3708 && ((addr_t)area->id == num 3709 || area->base <= num && area->base + area->size > num)) { 3710 dump_area_struct(area, mappings); 3711 found = true; 3712 } 3713 } 3714 3715 if (!found) 3716 kprintf("could not find area %s (%ld)\n", argv[index], num); 3717 return 0; 3718 } 3719 3720 3721 static int 3722 dump_area_list(int argc, char **argv) 3723 { 3724 vm_area *area; 3725 struct hash_iterator iter; 3726 const char *name = NULL; 3727 int32 id = 0; 3728 3729 if (argc > 1) { 3730 id = parse_expression(argv[1]); 3731 if (id == 0) 3732 name = argv[1]; 3733 } 3734 3735 kprintf("addr id base\t\tsize protect lock name\n"); 3736 3737 hash_open(sAreaHash, &iter); 3738 while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) { 3739 if (id != 0 && area->address_space->id != id 3740 || name != NULL && strstr(area->name, name) == NULL) 3741 continue; 3742 3743 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, (void *)area->base, 3744 (void *)area->size, area->protection, area->wiring, area->name); 3745 } 3746 hash_close(sAreaHash, &iter, false); 3747 return 0; 3748 } 3749 3750 3751 static int 3752 dump_available_memory(int argc, char **argv) 3753 { 3754 kprintf("Available memory: %Ld/%lu bytes\n", 3755 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 3756 return 0; 3757 } 3758 3759 3760 status_t 3761 vm_delete_areas(struct vm_address_space *addressSpace) 3762 { 3763 vm_area *area; 3764 vm_area *next, *last = NULL; 3765 3766 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 3767 addressSpace->id)); 3768 3769 rw_lock_write_lock(&addressSpace->lock); 3770 3771 // remove all reserved areas in this address space 3772 3773 for (area = addressSpace->areas; area; area = next) { 3774 next = area->address_space_next; 3775 3776 if (area->id == RESERVED_AREA_ID) { 3777 // just remove it 3778 if (last) 3779 last->address_space_next = area->address_space_next; 3780 else 3781 addressSpace->areas = area->address_space_next; 3782 3783 vm_put_address_space(addressSpace); 3784 free(area); 3785 continue; 3786 } 3787 3788 last = area; 3789 } 3790 3791 // delete all the areas in this address space 3792 3793 for (area = addressSpace->areas; area; area = next) { 3794 next = area->address_space_next; 3795 delete_area(addressSpace, area); 3796 } 3797 3798 rw_lock_write_unlock(&addressSpace->lock); 3799 return B_OK; 3800 } 3801 3802 3803 static area_id 3804 vm_area_for(team_id team, addr_t address) 3805 { 3806 AddressSpaceReadLocker locker(team); 3807 if (!locker.IsLocked()) 3808 return B_BAD_TEAM_ID; 3809 3810 vm_area *area = vm_area_lookup(locker.AddressSpace(), address); 3811 if (area != NULL) 3812 return area->id; 3813 3814 return B_ERROR; 3815 } 3816 3817 3818 /*! 3819 Frees physical pages that were used during the boot process. 3820 */ 3821 static void 3822 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end) 3823 { 3824 // free all physical pages in the specified range 3825 3826 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3827 addr_t physicalAddress; 3828 uint32 flags; 3829 3830 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 3831 vm_page *page = vm_lookup_page(current / B_PAGE_SIZE); 3832 if (page != NULL) 3833 vm_page_set_state(page, PAGE_STATE_FREE); 3834 } 3835 } 3836 3837 // unmap the memory 3838 map->ops->unmap(map, start, end - 1); 3839 } 3840 3841 3842 void 3843 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3844 { 3845 vm_translation_map *map = &vm_kernel_address_space()->translation_map; 3846 addr_t end = start + size; 3847 addr_t lastEnd = start; 3848 vm_area *area; 3849 3850 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end)); 3851 3852 // The areas are sorted in virtual address space order, so 3853 // we just have to find the holes between them that fall 3854 // into the area we should dispose 3855 3856 map->ops->lock(map); 3857 3858 for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) { 3859 addr_t areaStart = area->base; 3860 addr_t areaEnd = areaStart + area->size; 3861 3862 if (area->id == RESERVED_AREA_ID) 3863 continue; 3864 3865 if (areaEnd >= end) { 3866 // we are done, the areas are already beyond of what we have to free 3867 lastEnd = end; 3868 break; 3869 } 3870 3871 if (areaStart > lastEnd) { 3872 // this is something we can free 3873 TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart)); 3874 unmap_and_free_physical_pages(map, lastEnd, areaStart); 3875 } 3876 3877 lastEnd = areaEnd; 3878 } 3879 3880 if (lastEnd < end) { 3881 // we can also get rid of some space at the end of the area 3882 TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end)); 3883 unmap_and_free_physical_pages(map, lastEnd, end); 3884 } 3885 3886 map->ops->unlock(map); 3887 } 3888 3889 3890 static void 3891 create_preloaded_image_areas(struct preloaded_image *image) 3892 { 3893 char name[B_OS_NAME_LENGTH]; 3894 void *address; 3895 int32 length; 3896 3897 // use file name to create a good area name 3898 char *fileName = strrchr(image->name, '/'); 3899 if (fileName == NULL) 3900 fileName = image->name; 3901 else 3902 fileName++; 3903 3904 length = strlen(fileName); 3905 // make sure there is enough space for the suffix 3906 if (length > 25) 3907 length = 25; 3908 3909 memcpy(name, fileName, length); 3910 strcpy(name + length, "_text"); 3911 address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE); 3912 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3913 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3914 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3915 // this will later be remapped read-only/executable by the 3916 // ELF initialization code 3917 3918 strcpy(name + length, "_data"); 3919 address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE); 3920 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3921 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3922 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3923 } 3924 3925 3926 /** Frees all previously kernel arguments areas from the kernel_args structure. 3927 * Any boot loader resources contained in that arguments must not be accessed 3928 * anymore past this point. 3929 */ 3930 3931 void 3932 vm_free_kernel_args(kernel_args *args) 3933 { 3934 uint32 i; 3935 3936 TRACE(("vm_free_kernel_args()\n")); 3937 3938 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3939 area_id area = area_for((void *)args->kernel_args_range[i].start); 3940 if (area >= B_OK) 3941 delete_area(area); 3942 } 3943 } 3944 3945 3946 static void 3947 allocate_kernel_args(kernel_args *args) 3948 { 3949 uint32 i; 3950 3951 TRACE(("allocate_kernel_args()\n")); 3952 3953 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3954 void *address = (void *)args->kernel_args_range[i].start; 3955 3956 create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size, 3957 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3958 } 3959 } 3960 3961 3962 static void 3963 unreserve_boot_loader_ranges(kernel_args *args) 3964 { 3965 uint32 i; 3966 3967 TRACE(("unreserve_boot_loader_ranges()\n")); 3968 3969 for (i = 0; i < args->num_virtual_allocated_ranges; i++) { 3970 vm_unreserve_address_range(vm_kernel_address_space_id(), 3971 (void *)args->virtual_allocated_range[i].start, 3972 args->virtual_allocated_range[i].size); 3973 } 3974 } 3975 3976 3977 static void 3978 reserve_boot_loader_ranges(kernel_args *args) 3979 { 3980 uint32 i; 3981 3982 TRACE(("reserve_boot_loader_ranges()\n")); 3983 3984 for (i = 0; i < args->num_virtual_allocated_ranges; i++) { 3985 void *address = (void *)args->virtual_allocated_range[i].start; 3986 3987 // If the address is no kernel address, we just skip it. The 3988 // architecture specific code has to deal with it. 3989 if (!IS_KERNEL_ADDRESS(address)) { 3990 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 3991 address, args->virtual_allocated_range[i].size); 3992 continue; 3993 } 3994 3995 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address, 3996 B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 3997 if (status < B_OK) 3998 panic("could not reserve boot loader ranges\n"); 3999 } 4000 } 4001 4002 4003 static addr_t 4004 allocate_early_virtual(kernel_args *args, size_t size) 4005 { 4006 addr_t spot = 0; 4007 uint32 i; 4008 int last_valloc_entry = 0; 4009 4010 size = PAGE_ALIGN(size); 4011 // find a slot in the virtual allocation addr range 4012 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 4013 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4014 + args->virtual_allocated_range[i - 1].size; 4015 last_valloc_entry = i; 4016 // check to see if the space between this one and the last is big enough 4017 if (previousRangeEnd >= KERNEL_BASE 4018 && args->virtual_allocated_range[i].start 4019 - previousRangeEnd >= size) { 4020 spot = previousRangeEnd; 4021 args->virtual_allocated_range[i - 1].size += size; 4022 goto out; 4023 } 4024 } 4025 if (spot == 0) { 4026 // we hadn't found one between allocation ranges. this is ok. 4027 // see if there's a gap after the last one 4028 addr_t lastRangeEnd 4029 = args->virtual_allocated_range[last_valloc_entry].start 4030 + args->virtual_allocated_range[last_valloc_entry].size; 4031 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 4032 spot = lastRangeEnd; 4033 args->virtual_allocated_range[last_valloc_entry].size += size; 4034 goto out; 4035 } 4036 // see if there's a gap before the first one 4037 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 4038 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 4039 args->virtual_allocated_range[0].start -= size; 4040 spot = args->virtual_allocated_range[0].start; 4041 goto out; 4042 } 4043 } 4044 } 4045 4046 out: 4047 return spot; 4048 } 4049 4050 4051 static bool 4052 is_page_in_physical_memory_range(kernel_args *args, addr_t address) 4053 { 4054 // TODO: horrible brute-force method of determining if the page can be allocated 4055 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4056 if (address >= args->physical_memory_range[i].start 4057 && address < args->physical_memory_range[i].start 4058 + args->physical_memory_range[i].size) 4059 return true; 4060 } 4061 return false; 4062 } 4063 4064 4065 static addr_t 4066 allocate_early_physical_page(kernel_args *args) 4067 { 4068 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4069 addr_t nextPage; 4070 4071 nextPage = args->physical_allocated_range[i].start 4072 + args->physical_allocated_range[i].size; 4073 // see if the page after the next allocated paddr run can be allocated 4074 if (i + 1 < args->num_physical_allocated_ranges 4075 && args->physical_allocated_range[i + 1].size != 0) { 4076 // see if the next page will collide with the next allocated range 4077 if (nextPage >= args->physical_allocated_range[i+1].start) 4078 continue; 4079 } 4080 // see if the next physical page fits in the memory block 4081 if (is_page_in_physical_memory_range(args, nextPage)) { 4082 // we got one! 4083 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4084 return nextPage / B_PAGE_SIZE; 4085 } 4086 } 4087 4088 return 0; 4089 // could not allocate a block 4090 } 4091 4092 4093 /*! 4094 This one uses the kernel_args' physical and virtual memory ranges to 4095 allocate some pages before the VM is completely up. 4096 */ 4097 addr_t 4098 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize, 4099 uint32 attributes) 4100 { 4101 if (physicalSize > virtualSize) 4102 physicalSize = virtualSize; 4103 4104 // find the vaddr to allocate at 4105 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 4106 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 4107 4108 // map the pages 4109 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4110 addr_t physicalAddress = allocate_early_physical_page(args); 4111 if (physicalAddress == 0) 4112 panic("error allocating early page!\n"); 4113 4114 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4115 4116 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4117 physicalAddress * B_PAGE_SIZE, attributes, 4118 &allocate_early_physical_page); 4119 } 4120 4121 return virtualBase; 4122 } 4123 4124 4125 status_t 4126 vm_init(kernel_args *args) 4127 { 4128 struct preloaded_image *image; 4129 void *address; 4130 status_t err = 0; 4131 uint32 i; 4132 4133 TRACE(("vm_init: entry\n")); 4134 err = arch_vm_translation_map_init(args); 4135 err = arch_vm_init(args); 4136 4137 // initialize some globals 4138 sNextAreaID = 1; 4139 4140 vm_page_init_num_pages(args); 4141 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4142 4143 size_t heapSize = INITIAL_HEAP_SIZE; 4144 // try to accomodate low memory systems 4145 while (heapSize > sAvailableMemory / 8) 4146 heapSize /= 2; 4147 if (heapSize < 1024 * 1024) 4148 panic("vm_init: go buy some RAM please."); 4149 4150 // map in the new heap and initialize it 4151 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4152 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4153 TRACE(("heap at 0x%lx\n", heapBase)); 4154 heap_init(heapBase, heapSize); 4155 4156 size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE; 4157 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 4158 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4159 slab_init(args, slabInitialBase, slabInitialSize); 4160 4161 // initialize the free page list and physical page mapper 4162 vm_page_init(args); 4163 4164 // initialize the hash table that stores the pages mapped to caches 4165 vm_cache_init(args); 4166 4167 { 4168 vm_area *area; 4169 sAreaHash = hash_init(AREA_HASH_TABLE_SIZE, 4170 (addr_t)&area->hash_next - (addr_t)area, 4171 &area_compare, &area_hash); 4172 if (sAreaHash == NULL) 4173 panic("vm_init: error creating aspace hash table\n"); 4174 } 4175 4176 vm_address_space_init(); 4177 reserve_boot_loader_ranges(args); 4178 4179 // do any further initialization that the architecture dependant layers may need now 4180 arch_vm_translation_map_init_post_area(args); 4181 arch_vm_init_post_area(args); 4182 vm_page_init_post_area(args); 4183 4184 // allocate areas to represent stuff that already exists 4185 4186 address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE); 4187 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4188 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4189 4190 address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE); 4191 create_area("initial slab space", &address, B_EXACT_ADDRESS, 4192 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 4193 | B_KERNEL_WRITE_AREA); 4194 4195 allocate_kernel_args(args); 4196 4197 create_preloaded_image_areas(&args->kernel_image); 4198 4199 // allocate areas for preloaded images 4200 for (image = args->preloaded_images; image != NULL; image = image->next) { 4201 create_preloaded_image_areas(image); 4202 } 4203 4204 // allocate kernel stacks 4205 for (i = 0; i < args->num_cpus; i++) { 4206 char name[64]; 4207 4208 sprintf(name, "idle thread %lu kstack", i + 1); 4209 address = (void *)args->cpu_kstack[i].start; 4210 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4211 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4212 } 4213 4214 #if DEBUG_CACHE_LIST 4215 create_area("cache info table", (void**)&sCacheInfoTable, 4216 B_ANY_KERNEL_ADDRESS, 4217 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4218 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4219 #endif // DEBUG_CACHE_LIST 4220 4221 // add some debugger commands 4222 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4223 add_debugger_command("area", &dump_area, "Dump info about a particular area"); 4224 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 4225 add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree"); 4226 #if DEBUG_CACHE_LIST 4227 add_debugger_command_etc("caches", &dump_caches, 4228 "List all vm_cache trees", 4229 "[ \"-c\" ]\n" 4230 "All cache trees are listed sorted in decreasing order by number of\n" 4231 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 4232 0); 4233 #endif 4234 add_debugger_command("avail", &dump_available_memory, "Dump available memory"); 4235 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4236 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4237 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4238 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4239 add_debugger_command("string", &display_mem, "dump strings"); 4240 4241 TRACE(("vm_init: exit\n")); 4242 4243 return err; 4244 } 4245 4246 4247 status_t 4248 vm_init_post_sem(kernel_args *args) 4249 { 4250 // This frees all unused boot loader resources and makes its space available again 4251 arch_vm_init_end(args); 4252 unreserve_boot_loader_ranges(args); 4253 4254 // fill in all of the semaphores that were not allocated before 4255 // since we're still single threaded and only the kernel address space exists, 4256 // it isn't that hard to find all of the ones we need to create 4257 4258 arch_vm_translation_map_init_post_sem(args); 4259 vm_address_space_init_post_sem(); 4260 4261 slab_init_post_sem(); 4262 return heap_init_post_sem(); 4263 } 4264 4265 4266 status_t 4267 vm_init_post_thread(kernel_args *args) 4268 { 4269 vm_page_init_post_thread(args); 4270 vm_daemon_init(); 4271 slab_init_post_thread(); 4272 return heap_init_post_thread(); 4273 } 4274 4275 4276 status_t 4277 vm_init_post_modules(kernel_args *args) 4278 { 4279 return arch_vm_init_post_modules(args); 4280 } 4281 4282 4283 void 4284 permit_page_faults(void) 4285 { 4286 struct thread *thread = thread_get_current_thread(); 4287 if (thread != NULL) 4288 atomic_add(&thread->page_faults_allowed, 1); 4289 } 4290 4291 4292 void 4293 forbid_page_faults(void) 4294 { 4295 struct thread *thread = thread_get_current_thread(); 4296 if (thread != NULL) 4297 atomic_add(&thread->page_faults_allowed, -1); 4298 } 4299 4300 4301 status_t 4302 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 4303 addr_t *newIP) 4304 { 4305 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4306 faultAddress)); 4307 4308 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4309 4310 addr_t pageAddress = ROUNDOWN(address, B_PAGE_SIZE); 4311 vm_address_space *addressSpace = NULL; 4312 4313 status_t status = B_OK; 4314 *newIP = 0; 4315 atomic_add((int32*)&sPageFaults, 1); 4316 4317 if (IS_KERNEL_ADDRESS(pageAddress)) { 4318 addressSpace = vm_get_kernel_address_space(); 4319 } else if (IS_USER_ADDRESS(pageAddress)) { 4320 addressSpace = vm_get_current_user_address_space(); 4321 if (addressSpace == NULL) { 4322 if (!isUser) { 4323 dprintf("vm_page_fault: kernel thread accessing invalid user " 4324 "memory!\n"); 4325 status = B_BAD_ADDRESS; 4326 TPF(PageFaultError(-1, 4327 VMPageFaultTracing 4328 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4329 } else { 4330 // XXX weird state. 4331 panic("vm_page_fault: non kernel thread accessing user memory " 4332 "that doesn't exist!\n"); 4333 status = B_BAD_ADDRESS; 4334 } 4335 } 4336 } else { 4337 // the hit was probably in the 64k DMZ between kernel and user space 4338 // this keeps a user space thread from passing a buffer that crosses 4339 // into kernel space 4340 status = B_BAD_ADDRESS; 4341 TPF(PageFaultError(-1, 4342 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4343 } 4344 4345 if (status == B_OK) 4346 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser); 4347 4348 if (status < B_OK) { 4349 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4350 strerror(status), address, faultAddress, isWrite, isUser, 4351 thread_get_current_thread_id()); 4352 if (!isUser) { 4353 struct thread *thread = thread_get_current_thread(); 4354 if (thread != NULL && thread->fault_handler != 0) { 4355 // this will cause the arch dependant page fault handler to 4356 // modify the IP on the interrupt frame or whatever to return 4357 // to this address 4358 *newIP = thread->fault_handler; 4359 } else { 4360 // unhandled page fault in the kernel 4361 panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n", 4362 address, faultAddress); 4363 } 4364 } else { 4365 #if 1 4366 rw_lock_read_lock(&addressSpace->lock); 4367 4368 // TODO: remove me once we have proper userland debugging support 4369 // (and tools) 4370 vm_area *area = vm_area_lookup(addressSpace, faultAddress); 4371 4372 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4373 // fault and someone is already waiting for a write lock on the same address 4374 // space. This thread will then try to acquire the semaphore again and will 4375 // be queued after the writer. 4376 struct thread *thread = thread_get_current_thread(); 4377 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4378 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4379 thread->name, thread->id, thread->team->name, thread->team->id, 4380 isWrite ? "write" : "read", address, faultAddress, 4381 area ? area->name : "???", 4382 faultAddress - (area ? area->base : 0x0)); 4383 4384 // We can print a stack trace of the userland thread here. 4385 #if 1 4386 if (area) { 4387 struct stack_frame { 4388 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4389 struct stack_frame* previous; 4390 void* return_address; 4391 #else 4392 // ... 4393 #warning writeme 4394 #endif 4395 } frame; 4396 #ifdef __INTEL__ 4397 struct iframe *iframe = i386_get_user_iframe(); 4398 if (iframe == NULL) 4399 panic("iframe is NULL!"); 4400 4401 status_t status = user_memcpy(&frame, (void *)iframe->ebp, 4402 sizeof(struct stack_frame)); 4403 #elif defined(__POWERPC__) 4404 struct iframe *iframe = ppc_get_user_iframe(); 4405 if (iframe == NULL) 4406 panic("iframe is NULL!"); 4407 4408 status_t status = user_memcpy(&frame, (void *)iframe->r1, 4409 sizeof(struct stack_frame)); 4410 #else 4411 # warning "vm_page_fault() stack trace won't work" 4412 status = B_ERROR; 4413 #endif 4414 4415 dprintf("stack trace:\n"); 4416 int32 maxFrames = 50; 4417 while (status == B_OK && --maxFrames >= 0 4418 && frame.return_address != NULL) { 4419 dprintf(" %p", frame.return_address); 4420 area = vm_area_lookup(addressSpace, 4421 (addr_t)frame.return_address); 4422 if (area) { 4423 dprintf(" (%s + %#lx)", area->name, 4424 (addr_t)frame.return_address - area->base); 4425 } 4426 dprintf("\n"); 4427 4428 status = user_memcpy(&frame, frame.previous, 4429 sizeof(struct stack_frame)); 4430 } 4431 } 4432 #endif // 0 (stack trace) 4433 4434 rw_lock_read_unlock(&addressSpace->lock); 4435 #endif 4436 4437 // TODO: the fault_callback is a temporary solution for vm86 4438 if (thread->fault_callback == NULL 4439 || thread->fault_callback(address, faultAddress, isWrite)) { 4440 // If the thread has a signal handler for SIGSEGV, we simply 4441 // send it the signal. Otherwise we notify the user debugger 4442 // first. 4443 struct sigaction action; 4444 if (sigaction(SIGSEGV, NULL, &action) == 0 4445 && action.sa_handler != SIG_DFL 4446 && action.sa_handler != SIG_IGN) { 4447 send_signal(thread->id, SIGSEGV); 4448 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4449 SIGSEGV)) { 4450 send_signal(thread->id, SIGSEGV); 4451 } 4452 } 4453 } 4454 } 4455 4456 if (addressSpace != NULL) 4457 vm_put_address_space(addressSpace); 4458 4459 return B_HANDLED_INTERRUPT; 4460 } 4461 4462 4463 static inline status_t 4464 fault_acquire_locked_source(vm_cache *cache, vm_cache **_source) 4465 { 4466 vm_cache *source = cache->source; 4467 if (source == NULL) 4468 return B_ERROR; 4469 4470 source->Lock(); 4471 source->AcquireRefLocked(); 4472 4473 *_source = source; 4474 return B_OK; 4475 } 4476 4477 4478 /*! 4479 Inserts a busy dummy page into a cache, and makes sure the cache won't go 4480 away by grabbing a reference to it. 4481 */ 4482 static inline void 4483 fault_insert_dummy_page(vm_cache *cache, vm_dummy_page &dummyPage, 4484 off_t cacheOffset) 4485 { 4486 dummyPage.state = PAGE_STATE_BUSY; 4487 cache->AcquireRefLocked(); 4488 cache->InsertPage(&dummyPage, cacheOffset); 4489 dummyPage.busy_condition.Publish(&dummyPage, "page"); 4490 } 4491 4492 4493 /*! 4494 Removes the busy dummy page from a cache, and releases its reference to 4495 the cache. 4496 */ 4497 static inline void 4498 fault_remove_dummy_page(vm_dummy_page &dummyPage, bool isLocked) 4499 { 4500 vm_cache *cache = dummyPage.cache; 4501 if (!isLocked) 4502 cache->Lock(); 4503 4504 if (dummyPage.state == PAGE_STATE_BUSY) { 4505 cache->RemovePage(&dummyPage); 4506 dummyPage.state = PAGE_STATE_INACTIVE; 4507 dummyPage.busy_condition.Unpublish(); 4508 } 4509 4510 cache->ReleaseRefLocked(); 4511 4512 if (!isLocked) 4513 cache->Unlock(); 4514 } 4515 4516 4517 /*! 4518 Finds a page at the specified \a cacheOffset in either the \a topCacheRef 4519 or in its source chain. Will also page in a missing page in case there is 4520 a cache that has the page. 4521 If it couldn't find a page, it will return the vm_cache that should get it, 4522 otherwise, it will return the vm_cache that contains the cache. 4523 It always grabs a reference to the vm_cache that it returns, and also locks it. 4524 */ 4525 static inline status_t 4526 fault_find_page(vm_translation_map *map, vm_cache *topCache, 4527 off_t cacheOffset, bool isWrite, vm_dummy_page &dummyPage, 4528 vm_cache **_pageCache, vm_page** _page, bool* _restart) 4529 { 4530 *_restart = false; 4531 vm_cache *cache = topCache; 4532 vm_cache *lastCache = NULL; 4533 vm_page *page = NULL; 4534 4535 cache->Lock(); 4536 cache->AcquireRefLocked(); 4537 // we release this later in the loop 4538 4539 while (cache != NULL) { 4540 if (lastCache != NULL) 4541 lastCache->ReleaseRefAndUnlock(); 4542 4543 // we hold the lock of the cache at this point 4544 4545 lastCache = cache; 4546 4547 for (;;) { 4548 page = cache->LookupPage(cacheOffset); 4549 if (page != NULL && page->state != PAGE_STATE_BUSY) { 4550 // we found the page 4551 break; 4552 } 4553 if (page == NULL || page == &dummyPage) 4554 break; 4555 4556 // page must be busy -- wait for it to become unbusy 4557 { 4558 ConditionVariableEntry entry; 4559 entry.Add(page); 4560 cache->Unlock(); 4561 entry.Wait(); 4562 cache->Lock(); 4563 } 4564 } 4565 4566 if (page != NULL && page != &dummyPage) 4567 break; 4568 4569 // The current cache does not contain the page we're looking for 4570 4571 // see if the backing store has it 4572 if (cache->HasPage(cacheOffset)) { 4573 // insert a fresh page and mark it busy -- we're going to read it in 4574 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4575 cache->InsertPage(page, cacheOffset); 4576 4577 ConditionVariable busyCondition; 4578 busyCondition.Publish(page, "page"); 4579 4580 cache->Unlock(); 4581 4582 // get a virtual address for the page 4583 iovec vec; 4584 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 4585 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 4586 4587 // read it in 4588 status_t status = cache->Read(cacheOffset, &vec, 1, 4589 B_PHYSICAL_IO_REQUEST, &bytesRead); 4590 4591 cache->Lock(); 4592 4593 if (status < B_OK) { 4594 // on error remove and free the page 4595 dprintf("reading page from cache %p returned: %s!\n", 4596 cache, strerror(status)); 4597 4598 busyCondition.Unpublish(); 4599 cache->RemovePage(page); 4600 vm_page_set_state(page, PAGE_STATE_FREE); 4601 4602 cache->ReleaseRefAndUnlock(); 4603 return status; 4604 } 4605 4606 // mark the page unbusy again 4607 page->state = PAGE_STATE_ACTIVE; 4608 busyCondition.Unpublish(); 4609 break; 4610 } 4611 4612 // If we're at the top most cache, insert the dummy page here to keep 4613 // other threads from faulting on the same address and chasing us up the 4614 // cache chain 4615 if (cache == topCache && dummyPage.state != PAGE_STATE_BUSY) 4616 fault_insert_dummy_page(cache, dummyPage, cacheOffset); 4617 4618 vm_cache *nextCache; 4619 status_t status = fault_acquire_locked_source(cache, &nextCache); 4620 if (status < B_OK) 4621 nextCache = NULL; 4622 4623 // at this point, we still hold a ref to this cache (through lastCacheRef) 4624 4625 cache = nextCache; 4626 } 4627 4628 if (page == &dummyPage) 4629 page = NULL; 4630 4631 if (page == NULL) { 4632 // there was no adequate page, determine the cache for a clean one 4633 4634 ASSERT(cache == NULL); 4635 4636 // We rolled off the end of the cache chain, so we need to decide which 4637 // cache will get the new page we're about to create. 4638 cache = isWrite ? topCache : lastCache; 4639 // Read-only pages come in the deepest cache - only the 4640 // top most cache may have direct write access. 4641 if (cache != lastCache) { 4642 lastCache->ReleaseRefAndUnlock(); 4643 cache->Lock(); 4644 cache->AcquireRefLocked(); 4645 } 4646 4647 vm_page* newPage = cache->LookupPage(cacheOffset); 4648 if (newPage && newPage != &dummyPage) { 4649 // A new page turned up. It could be the one we're looking 4650 // for, but it could as well be a dummy page from someone 4651 // else or an otherwise busy page. We can't really handle 4652 // that here. Hence we completely restart this functions. 4653 cache->ReleaseRefAndUnlock(); 4654 *_restart = true; 4655 } 4656 } else { 4657 // we still own reference and lock to the cache 4658 } 4659 4660 *_pageCache = cache; 4661 *_page = page; 4662 return B_OK; 4663 } 4664 4665 4666 /*! 4667 Returns the page that should be mapped into the area that got the fault. 4668 It returns the owner of the page in \a sourceCache - it keeps a reference 4669 to it, and has also locked it on exit. 4670 */ 4671 static inline status_t 4672 fault_get_page(vm_translation_map *map, vm_cache *topCache, off_t cacheOffset, 4673 bool isWrite, vm_dummy_page &dummyPage, vm_cache **_sourceCache, 4674 vm_cache **_copiedSource, vm_page** _page) 4675 { 4676 vm_cache *cache; 4677 vm_page *page; 4678 bool restart; 4679 for (;;) { 4680 status_t status = fault_find_page(map, topCache, cacheOffset, isWrite, 4681 dummyPage, &cache, &page, &restart); 4682 if (status != B_OK) 4683 return status; 4684 4685 if (!restart) 4686 break; 4687 4688 // Remove the dummy page, if it has been inserted. 4689 topCache->Lock(); 4690 4691 if (dummyPage.state == PAGE_STATE_BUSY) { 4692 ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n", 4693 &dummyPage); 4694 fault_remove_dummy_page(dummyPage, true); 4695 } 4696 4697 topCache->Unlock(); 4698 } 4699 4700 if (page == NULL) { 4701 // we still haven't found a page, so we allocate a clean one 4702 4703 page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 4704 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number)); 4705 4706 // Insert the new page into our cache, and replace it with the dummy page if necessary 4707 4708 // If we inserted a dummy page into this cache (i.e. if it is the top 4709 // cache), we have to remove it now 4710 if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cache) { 4711 #if DEBUG_PAGE_CACHE_TRANSITIONS 4712 page->debug_flags = dummyPage.debug_flags | 0x8; 4713 if (dummyPage.collided_page != NULL) { 4714 dummyPage.collided_page->collided_page = page; 4715 page->collided_page = dummyPage.collided_page; 4716 } 4717 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 4718 4719 fault_remove_dummy_page(dummyPage, true); 4720 } 4721 4722 cache->InsertPage(page, cacheOffset); 4723 4724 if (dummyPage.state == PAGE_STATE_BUSY) { 4725 #if DEBUG_PAGE_CACHE_TRANSITIONS 4726 page->debug_flags = dummyPage.debug_flags | 0x10; 4727 if (dummyPage.collided_page != NULL) { 4728 dummyPage.collided_page->collided_page = page; 4729 page->collided_page = dummyPage.collided_page; 4730 } 4731 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 4732 4733 // This is not the top cache into which we inserted the dummy page, 4734 // let's remove it from there. We need to temporarily unlock our 4735 // cache to comply with the cache locking policy. 4736 cache->Unlock(); 4737 fault_remove_dummy_page(dummyPage, false); 4738 cache->Lock(); 4739 } 4740 } 4741 4742 // We now have the page and a cache it belongs to - we now need to make 4743 // sure that the area's cache can access it, too, and sees the correct data 4744 4745 if (page->cache != topCache && isWrite) { 4746 // Now we have a page that has the data we want, but in the wrong cache 4747 // object so we need to copy it and stick it into the top cache. 4748 // Note that this and the "if" before are mutual exclusive. If 4749 // fault_find_page() didn't find the page, it would return the top cache 4750 // for write faults. 4751 vm_page *sourcePage = page; 4752 4753 // ToDo: if memory is low, it might be a good idea to steal the page 4754 // from our source cache - if possible, that is 4755 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4756 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4757 #if 0 4758 if (cacheOffset == 0x12000) 4759 dprintf("%ld: copy page %p to page %p from cache %p to cache %p\n", find_thread(NULL), 4760 sourcePage, page, sourcePage->cache, topCacheRef->cache); 4761 #endif 4762 4763 // copy the page 4764 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4765 sourcePage->physical_page_number * B_PAGE_SIZE); 4766 4767 if (sourcePage->state != PAGE_STATE_MODIFIED) 4768 vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE); 4769 4770 cache->Unlock(); 4771 topCache->Lock(); 4772 4773 // Since the top cache has been unlocked for a while, someone else 4774 // (RemoveConsumer()) might have replaced our dummy page. 4775 vm_page* newPage = NULL; 4776 for (;;) { 4777 newPage = topCache->LookupPage(cacheOffset); 4778 if (newPage == NULL || newPage == &dummyPage) { 4779 newPage = NULL; 4780 break; 4781 } 4782 4783 if (newPage->state != PAGE_STATE_BUSY) 4784 break; 4785 4786 // The page is busy, wait till it becomes unbusy. 4787 ConditionVariableEntry entry; 4788 entry.Add(newPage); 4789 topCache->Unlock(); 4790 entry.Wait(); 4791 topCache->Lock(); 4792 } 4793 4794 if (newPage) { 4795 // Indeed someone else threw in a page. We free ours and are happy. 4796 vm_page_set_state(page, PAGE_STATE_FREE); 4797 page = newPage; 4798 } else { 4799 // Insert the new page into our cache and remove the dummy page, if 4800 // necessary. 4801 4802 // if we inserted a dummy page into this cache, we have to remove it now 4803 if (dummyPage.state == PAGE_STATE_BUSY) { 4804 ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n", 4805 &dummyPage); 4806 fault_remove_dummy_page(dummyPage, true); 4807 } 4808 4809 topCache->InsertPage(page, cacheOffset); 4810 } 4811 4812 *_copiedSource = cache; 4813 4814 cache = topCache; 4815 cache->AcquireRefLocked(); 4816 } 4817 4818 *_sourceCache = cache; 4819 *_page = page; 4820 return B_OK; 4821 } 4822 4823 4824 static status_t 4825 vm_soft_fault(vm_address_space *addressSpace, addr_t originalAddress, 4826 bool isWrite, bool isUser) 4827 { 4828 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 4829 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 4830 4831 AddressSpaceReadLocker locker(addressSpace, true); 4832 4833 atomic_add(&addressSpace->fault_count, 1); 4834 4835 // Get the area the fault was in 4836 4837 addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE); 4838 4839 vm_area *area = vm_area_lookup(addressSpace, address); 4840 if (area == NULL) { 4841 dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n", 4842 originalAddress); 4843 TPF(PageFaultError(-1, VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4844 return B_BAD_ADDRESS; 4845 } 4846 4847 // check permissions 4848 uint32 protection = get_area_page_protection(area, address); 4849 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4850 dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress); 4851 TPF(PageFaultError(area->id, 4852 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4853 return B_PERMISSION_DENIED; 4854 } 4855 if (isWrite && (protection 4856 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4857 dprintf("write access attempted on read-only area 0x%lx at %p\n", 4858 area->id, (void *)originalAddress); 4859 TPF(PageFaultError(area->id, 4860 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_ONLY)); 4861 return B_PERMISSION_DENIED; 4862 } 4863 4864 // We have the area, it was a valid access, so let's try to resolve the page fault now. 4865 // At first, the top most cache from the area is investigated 4866 4867 vm_cache *topCache = vm_area_get_locked_cache(area); 4868 off_t cacheOffset = address - area->base + area->cache_offset; 4869 4870 atomic_add(&area->no_cache_change, 1); 4871 // make sure the area's cache isn't replaced during the page fault 4872 4873 // See if this cache has a fault handler - this will do all the work for us 4874 { 4875 // Note, since the page fault is resolved with interrupts enabled, the 4876 // fault handler could be called more than once for the same reason - 4877 // the store must take this into account 4878 status_t status = topCache->Fault(addressSpace, cacheOffset); 4879 if (status != B_BAD_HANDLER) { 4880 vm_area_put_locked_cache(topCache); 4881 return status; 4882 } 4883 } 4884 4885 topCache->Unlock(); 4886 4887 // The top most cache has no fault handler, so let's see if the cache or its sources 4888 // already have the page we're searching for (we're going from top to bottom) 4889 4890 vm_translation_map *map = &addressSpace->translation_map; 4891 size_t reservePages = 2 + map->ops->map_max_pages_need(map, 4892 originalAddress, originalAddress); 4893 vm_page_reserve_pages(reservePages); 4894 // we may need up to 2 pages - reserving them upfront makes sure 4895 // we don't have any cache locked, so that the page daemon/thief 4896 // can do their job without problems 4897 4898 vm_dummy_page dummyPage; 4899 dummyPage.cache = NULL; 4900 dummyPage.state = PAGE_STATE_INACTIVE; 4901 dummyPage.type = PAGE_TYPE_DUMMY; 4902 dummyPage.wired_count = 0; 4903 #if DEBUG_PAGE_CACHE_TRANSITIONS 4904 dummyPage.debug_flags = 0; 4905 dummyPage.collided_page = NULL; 4906 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 4907 4908 vm_cache *copiedPageSource = NULL; 4909 vm_cache *pageSource; 4910 vm_page *page; 4911 // TODO: We keep the address space read lock during the whole operation 4912 // which might be rather expensive depending on where the data has to 4913 // be retrieved from. 4914 status_t status = fault_get_page(map, topCache, cacheOffset, isWrite, 4915 dummyPage, &pageSource, &copiedPageSource, &page); 4916 4917 if (status == B_OK) { 4918 // All went fine, all there is left to do is to map the page into the address space 4919 TPF(PageFaultDone(area->id, topCache, page->cache, page)); 4920 4921 // In case this is a copy-on-write page, we need to unmap it from the area now 4922 if (isWrite && page->cache == topCache) 4923 vm_unmap_pages(area, address, B_PAGE_SIZE, true); 4924 4925 // TODO: there is currently no mechanism to prevent a page being mapped 4926 // more than once in case of a second page fault! 4927 4928 // If the page doesn't reside in the area's cache, we need to make sure it's 4929 // mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write) 4930 uint32 newProtection = protection; 4931 if (page->cache != topCache && !isWrite) 4932 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4933 4934 vm_map_page(area, page, address, newProtection); 4935 4936 pageSource->ReleaseRefAndUnlock(); 4937 } else 4938 TPF(PageFaultError(area->id, status)); 4939 4940 atomic_add(&area->no_cache_change, -1); 4941 4942 if (copiedPageSource) 4943 copiedPageSource->ReleaseRef(); 4944 4945 if (dummyPage.state == PAGE_STATE_BUSY) { 4946 // We still have the dummy page in the cache - that happens if we didn't need 4947 // to allocate a new page before, but could use one in another cache 4948 fault_remove_dummy_page(dummyPage, false); 4949 } 4950 4951 topCache->ReleaseRef(); 4952 vm_page_unreserve_pages(reservePages); 4953 4954 return status; 4955 } 4956 4957 4958 /*! You must have the address space's sem held */ 4959 vm_area * 4960 vm_area_lookup(vm_address_space *addressSpace, addr_t address) 4961 { 4962 vm_area *area; 4963 4964 // check the areas list first 4965 area = addressSpace->area_hint; 4966 if (area && area->base <= address && area->base + (area->size - 1) >= address) 4967 goto found; 4968 4969 for (area = addressSpace->areas; area != NULL; area = area->address_space_next) { 4970 if (area->id == RESERVED_AREA_ID) 4971 continue; 4972 4973 if (area->base <= address && area->base + (area->size - 1) >= address) 4974 break; 4975 } 4976 4977 found: 4978 if (area) 4979 addressSpace->area_hint = area; 4980 4981 return area; 4982 } 4983 4984 4985 status_t 4986 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 4987 { 4988 return vm_kernel_address_space()->translation_map.ops->get_physical_page( 4989 paddr, _vaddr, _handle); 4990 } 4991 4992 status_t 4993 vm_put_physical_page(addr_t vaddr, void* handle) 4994 { 4995 return vm_kernel_address_space()->translation_map.ops->put_physical_page( 4996 vaddr, handle); 4997 } 4998 4999 5000 status_t 5001 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 5002 { 5003 return vm_kernel_address_space()->translation_map.ops 5004 ->get_physical_page_current_cpu(paddr, _vaddr, _handle); 5005 } 5006 5007 status_t 5008 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5009 { 5010 return vm_kernel_address_space()->translation_map.ops 5011 ->put_physical_page_current_cpu(vaddr, handle); 5012 } 5013 5014 5015 status_t 5016 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 5017 { 5018 return vm_kernel_address_space()->translation_map.ops 5019 ->get_physical_page_debug(paddr, _vaddr, _handle); 5020 } 5021 5022 status_t 5023 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5024 { 5025 return vm_kernel_address_space()->translation_map.ops 5026 ->put_physical_page_debug(vaddr, handle); 5027 } 5028 5029 5030 void 5031 vm_get_info(system_memory_info* info) 5032 { 5033 swap_get_info(info); 5034 5035 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 5036 info->page_faults = sPageFaults; 5037 5038 MutexLocker locker(sAvailableMemoryLock); 5039 info->free_memory = sAvailableMemory; 5040 info->needed_memory = sNeededMemory; 5041 } 5042 5043 5044 uint32 5045 vm_num_page_faults(void) 5046 { 5047 return sPageFaults; 5048 } 5049 5050 5051 off_t 5052 vm_available_memory(void) 5053 { 5054 MutexLocker locker(sAvailableMemoryLock); 5055 return sAvailableMemory; 5056 } 5057 5058 5059 off_t 5060 vm_available_not_needed_memory(void) 5061 { 5062 MutexLocker locker(sAvailableMemoryLock); 5063 return sAvailableMemory - sNeededMemory; 5064 } 5065 5066 5067 void 5068 vm_unreserve_memory(size_t amount) 5069 { 5070 mutex_lock(&sAvailableMemoryLock); 5071 5072 sAvailableMemory += amount; 5073 5074 mutex_unlock(&sAvailableMemoryLock); 5075 } 5076 5077 5078 status_t 5079 vm_try_reserve_memory(size_t amount, bigtime_t timeout) 5080 { 5081 MutexLocker locker(sAvailableMemoryLock); 5082 5083 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5084 5085 if (sAvailableMemory >= amount) { 5086 sAvailableMemory -= amount; 5087 return B_OK; 5088 } 5089 5090 if (timeout <= 0) 5091 return B_NO_MEMORY; 5092 5093 // turn timeout into an absolute timeout 5094 timeout += system_time(); 5095 5096 // loop until we've got the memory or the timeout occurs 5097 do { 5098 sNeededMemory += amount; 5099 5100 // call the low resource manager 5101 locker.Unlock(); 5102 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5103 B_ABSOLUTE_TIMEOUT, timeout); 5104 locker.Lock(); 5105 5106 sNeededMemory -= amount; 5107 5108 if (sAvailableMemory >= amount) { 5109 sAvailableMemory -= amount; 5110 return B_OK; 5111 } 5112 } while (timeout > system_time()); 5113 5114 return B_NO_MEMORY; 5115 } 5116 5117 5118 status_t 5119 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 5120 { 5121 AddressSpaceReadLocker locker; 5122 vm_area *area; 5123 status_t status = locker.SetFromArea(id, area); 5124 if (status != B_OK) 5125 return status; 5126 5127 return arch_vm_set_memory_type(area, physicalBase, type); 5128 } 5129 5130 5131 /** This function enforces some protection properties: 5132 * - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 5133 * - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5134 * - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5135 * and B_KERNEL_WRITE_AREA. 5136 */ 5137 5138 static void 5139 fix_protection(uint32 *protection) 5140 { 5141 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5142 if ((*protection & B_USER_PROTECTION) == 0 5143 || (*protection & B_WRITE_AREA) != 0) 5144 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5145 else 5146 *protection |= B_KERNEL_READ_AREA; 5147 } 5148 } 5149 5150 5151 static void 5152 fill_area_info(struct vm_area *area, area_info *info, size_t size) 5153 { 5154 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5155 info->area = area->id; 5156 info->address = (void *)area->base; 5157 info->size = area->size; 5158 info->protection = area->protection; 5159 info->lock = B_FULL_LOCK; 5160 info->team = area->address_space->id; 5161 info->copy_count = 0; 5162 info->in_count = 0; 5163 info->out_count = 0; 5164 // ToDo: retrieve real values here! 5165 5166 vm_cache *cache = vm_area_get_locked_cache(area); 5167 5168 // Note, this is a simplification; the cache could be larger than this area 5169 info->ram_size = cache->page_count * B_PAGE_SIZE; 5170 5171 vm_area_put_locked_cache(cache); 5172 } 5173 5174 5175 /*! 5176 Tests whether or not the area that contains the specified address 5177 needs any kind of locking, and actually exists. 5178 Used by both lock_memory() and unlock_memory(). 5179 */ 5180 static status_t 5181 test_lock_memory(vm_address_space *addressSpace, addr_t address, 5182 bool &needsLocking) 5183 { 5184 rw_lock_read_lock(&addressSpace->lock); 5185 5186 vm_area *area = vm_area_lookup(addressSpace, address); 5187 if (area != NULL) { 5188 // This determines if we need to lock the memory at all 5189 needsLocking = area->cache_type != CACHE_TYPE_NULL 5190 && area->cache_type != CACHE_TYPE_DEVICE 5191 && area->wiring != B_FULL_LOCK 5192 && area->wiring != B_CONTIGUOUS; 5193 } 5194 5195 rw_lock_read_unlock(&addressSpace->lock); 5196 5197 if (area == NULL) 5198 return B_BAD_ADDRESS; 5199 5200 return B_OK; 5201 } 5202 5203 5204 static status_t 5205 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5206 { 5207 // is newSize a multiple of B_PAGE_SIZE? 5208 if (newSize & (B_PAGE_SIZE - 1)) 5209 return B_BAD_VALUE; 5210 5211 // lock all affected address spaces and the cache 5212 vm_area* area; 5213 vm_cache* cache; 5214 5215 MultiAddressSpaceLocker locker; 5216 status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area, 5217 &cache); 5218 if (status != B_OK) 5219 return status; 5220 AreaCacheLocker cacheLocker(cache); // already locked 5221 5222 // enforce restrictions 5223 if (!kernel) { 5224 if ((area->protection & B_KERNEL_AREA) != 0) 5225 return B_NOT_ALLOWED; 5226 // TODO: Enforce all restrictions (team, etc.)! 5227 } 5228 5229 size_t oldSize = area->size; 5230 if (newSize == oldSize) 5231 return B_OK; 5232 5233 // Resize all areas of this area's cache 5234 5235 if (cache->type != CACHE_TYPE_RAM) 5236 return B_NOT_ALLOWED; 5237 5238 if (oldSize < newSize) { 5239 // We need to check if all areas of this cache can be resized 5240 5241 for (vm_area* current = cache->areas; current != NULL; 5242 current = current->cache_next) { 5243 vm_area *next = current->address_space_next; 5244 if (next != NULL && next->base <= (current->base + newSize)) { 5245 // If the area was created inside a reserved area, it can 5246 // also be resized in that area 5247 // ToDo: if there is free space after the reserved area, it could be used as well... 5248 if (next->id == RESERVED_AREA_ID 5249 && next->cache_offset <= current->base 5250 && next->base - 1 + next->size >= current->base - 1 + newSize) 5251 continue; 5252 5253 return B_ERROR; 5254 } 5255 } 5256 } 5257 5258 // Okay, looks good so far, so let's do it 5259 5260 if (oldSize < newSize) { 5261 // Growing the cache can fail, so we do it first. 5262 status = cache->Resize(cache->virtual_base + newSize); 5263 if (status != B_OK) 5264 return status; 5265 } 5266 5267 for (vm_area* current = cache->areas; current != NULL; 5268 current = current->cache_next) { 5269 vm_area *next = current->address_space_next; 5270 if (next != NULL && next->base <= (current->base + newSize)) { 5271 if (next->id == RESERVED_AREA_ID 5272 && next->cache_offset <= current->base 5273 && next->base - 1 + next->size >= current->base - 1 + newSize) { 5274 // resize reserved area 5275 addr_t offset = current->base + newSize - next->base; 5276 if (next->size <= offset) { 5277 current->address_space_next = next->address_space_next; 5278 free(next); 5279 } else { 5280 next->size -= offset; 5281 next->base += offset; 5282 } 5283 } else { 5284 panic("resize situation for area %p has changed although we " 5285 "should have the address space lock", current); 5286 status = B_ERROR; 5287 break; 5288 } 5289 } 5290 5291 current->size = newSize; 5292 5293 // we also need to unmap all pages beyond the new size, if the area has shrinked 5294 if (newSize < oldSize) { 5295 vm_unmap_pages(current, current->base + newSize, oldSize - newSize, 5296 false); 5297 } 5298 } 5299 5300 // shrinking the cache can't fail, so we do it now 5301 if (status == B_OK && newSize < oldSize) 5302 status = cache->Resize(cache->virtual_base + newSize); 5303 5304 if (status < B_OK) { 5305 // This shouldn't really be possible, but hey, who knows 5306 for (vm_area* current = cache->areas; current != NULL; 5307 current = current->cache_next) { 5308 current->size = oldSize; 5309 } 5310 5311 cache->Resize(cache->virtual_base + oldSize); 5312 } 5313 5314 // TODO: we must honour the lock restrictions of this area 5315 return status; 5316 } 5317 5318 5319 status_t 5320 vm_memset_physical(addr_t address, int value, size_t length) 5321 { 5322 return vm_kernel_address_space()->translation_map.ops->memset_physical( 5323 address, value, length); 5324 } 5325 5326 5327 status_t 5328 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 5329 { 5330 return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical( 5331 to, from, length, user); 5332 } 5333 5334 5335 status_t 5336 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 5337 { 5338 return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical( 5339 to, _from, length, user); 5340 } 5341 5342 5343 void 5344 vm_memcpy_physical_page(addr_t to, addr_t from) 5345 { 5346 return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page( 5347 to, from); 5348 } 5349 5350 5351 // #pragma mark - kernel public API 5352 5353 5354 status_t 5355 user_memcpy(void *to, const void *from, size_t size) 5356 { 5357 if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK) 5358 return B_BAD_ADDRESS; 5359 return B_OK; 5360 } 5361 5362 5363 /** \brief Copies at most (\a size - 1) characters from the string in \a from to 5364 * the string in \a to, NULL-terminating the result. 5365 * 5366 * \param to Pointer to the destination C-string. 5367 * \param from Pointer to the source C-string. 5368 * \param size Size in bytes of the string buffer pointed to by \a to. 5369 * 5370 * \return strlen(\a from). 5371 */ 5372 5373 ssize_t 5374 user_strlcpy(char *to, const char *from, size_t size) 5375 { 5376 return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler); 5377 } 5378 5379 5380 status_t 5381 user_memset(void *s, char c, size_t count) 5382 { 5383 if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK) 5384 return B_BAD_ADDRESS; 5385 return B_OK; 5386 } 5387 5388 5389 status_t 5390 lock_memory_etc(team_id team, void *address, size_t numBytes, uint32 flags) 5391 { 5392 vm_address_space *addressSpace = NULL; 5393 struct vm_translation_map *map; 5394 addr_t unalignedBase = (addr_t)address; 5395 addr_t end = unalignedBase + numBytes; 5396 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 5397 bool isUser = IS_USER_ADDRESS(address); 5398 bool needsLocking = true; 5399 5400 if (isUser) { 5401 if (team == B_CURRENT_TEAM) 5402 addressSpace = vm_get_current_user_address_space(); 5403 else 5404 addressSpace = vm_get_address_space(team); 5405 } else 5406 addressSpace = vm_get_kernel_address_space(); 5407 if (addressSpace == NULL) 5408 return B_ERROR; 5409 5410 // test if we're on an area that allows faults at all 5411 5412 map = &addressSpace->translation_map; 5413 5414 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5415 if (status < B_OK) 5416 goto out; 5417 if (!needsLocking) 5418 goto out; 5419 5420 for (; base < end; base += B_PAGE_SIZE) { 5421 addr_t physicalAddress; 5422 uint32 protection; 5423 status_t status; 5424 5425 map->ops->lock(map); 5426 status = map->ops->query(map, base, &physicalAddress, &protection); 5427 map->ops->unlock(map); 5428 5429 if (status < B_OK) 5430 goto out; 5431 5432 if ((protection & PAGE_PRESENT) != 0) { 5433 // if B_READ_DEVICE is set, the caller intents to write to the locked 5434 // memory, so if it hasn't been mapped writable, we'll try the soft 5435 // fault anyway 5436 if ((flags & B_READ_DEVICE) == 0 5437 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 5438 // update wiring 5439 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5440 if (page == NULL) 5441 panic("couldn't lookup physical page just allocated\n"); 5442 5443 increment_page_wired_count(page); 5444 continue; 5445 } 5446 } 5447 5448 status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0, 5449 isUser); 5450 if (status != B_OK) { 5451 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n", 5452 (void *)unalignedBase, numBytes, flags, strerror(status)); 5453 goto out; 5454 } 5455 5456 // TODO: Here's a race condition. We should probably add a parameter 5457 // to vm_soft_fault() that would cause the page's wired count to be 5458 // incremented immediately. 5459 // TODO: After memory has been locked in an area, we need to prevent the 5460 // area from being deleted, resized, cut, etc. That could be done using 5461 // a "locked pages" count in vm_area, and maybe a condition variable, if 5462 // we want to allow waiting for the area to become eligible for these 5463 // operations again. 5464 5465 map->ops->lock(map); 5466 status = map->ops->query(map, base, &physicalAddress, &protection); 5467 map->ops->unlock(map); 5468 5469 if (status < B_OK) 5470 goto out; 5471 5472 // update wiring 5473 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5474 if (page == NULL) 5475 panic("couldn't lookup physical page"); 5476 5477 increment_page_wired_count(page); 5478 // TODO: needs to be atomic on all platforms! 5479 } 5480 5481 out: 5482 vm_put_address_space(addressSpace); 5483 return status; 5484 } 5485 5486 5487 status_t 5488 lock_memory(void *address, size_t numBytes, uint32 flags) 5489 { 5490 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5491 } 5492 5493 5494 status_t 5495 unlock_memory_etc(team_id team, void *address, size_t numBytes, uint32 flags) 5496 { 5497 vm_address_space *addressSpace = NULL; 5498 struct vm_translation_map *map; 5499 addr_t unalignedBase = (addr_t)address; 5500 addr_t end = unalignedBase + numBytes; 5501 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 5502 bool needsLocking = true; 5503 5504 if (IS_USER_ADDRESS(address)) { 5505 if (team == B_CURRENT_TEAM) 5506 addressSpace = vm_get_current_user_address_space(); 5507 else 5508 addressSpace = vm_get_address_space(team); 5509 } else 5510 addressSpace = vm_get_kernel_address_space(); 5511 if (addressSpace == NULL) 5512 return B_ERROR; 5513 5514 map = &addressSpace->translation_map; 5515 5516 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5517 if (status < B_OK) 5518 goto out; 5519 if (!needsLocking) 5520 goto out; 5521 5522 for (; base < end; base += B_PAGE_SIZE) { 5523 map->ops->lock(map); 5524 5525 addr_t physicalAddress; 5526 uint32 protection; 5527 status = map->ops->query(map, base, &physicalAddress, 5528 &protection); 5529 5530 map->ops->unlock(map); 5531 5532 if (status < B_OK) 5533 goto out; 5534 if ((protection & PAGE_PRESENT) == 0) 5535 panic("calling unlock_memory() on unmapped memory!"); 5536 5537 // update wiring 5538 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5539 if (page == NULL) 5540 panic("couldn't lookup physical page"); 5541 5542 decrement_page_wired_count(page); 5543 } 5544 5545 out: 5546 vm_put_address_space(addressSpace); 5547 return status; 5548 } 5549 5550 5551 status_t 5552 unlock_memory(void *address, size_t numBytes, uint32 flags) 5553 { 5554 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5555 } 5556 5557 5558 /*! Similar to get_memory_map(), but also allows to specify the address space 5559 for the memory in question and has a saner semantics. 5560 Returns \c B_OK when the complete range could be translated or 5561 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5562 case the actual number of entries is written to \c *_numEntries. Any other 5563 error case indicates complete failure; \c *_numEntries will be set to \c 0 5564 in this case. 5565 */ 5566 status_t 5567 get_memory_map_etc(team_id team, const void *address, size_t numBytes, 5568 physical_entry *table, uint32* _numEntries) 5569 { 5570 uint32 numEntries = *_numEntries; 5571 *_numEntries = 0; 5572 5573 vm_address_space *addressSpace; 5574 addr_t virtualAddress = (addr_t)address; 5575 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5576 addr_t physicalAddress; 5577 status_t status = B_OK; 5578 int32 index = -1; 5579 addr_t offset = 0; 5580 bool interrupts = are_interrupts_enabled(); 5581 5582 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5583 address, numBytes, numEntries)); 5584 5585 if (numEntries == 0 || numBytes == 0) 5586 return B_BAD_VALUE; 5587 5588 // in which address space is the address to be found? 5589 if (IS_USER_ADDRESS(virtualAddress)) { 5590 if (team == B_CURRENT_TEAM) 5591 addressSpace = vm_get_current_user_address_space(); 5592 else 5593 addressSpace = vm_get_address_space(team); 5594 } else 5595 addressSpace = vm_get_kernel_address_space(); 5596 5597 if (addressSpace == NULL) 5598 return B_ERROR; 5599 5600 vm_translation_map *map = &addressSpace->translation_map; 5601 5602 if (interrupts) 5603 map->ops->lock(map); 5604 5605 while (offset < numBytes) { 5606 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5607 uint32 flags; 5608 5609 if (interrupts) { 5610 status = map->ops->query(map, (addr_t)address + offset, 5611 &physicalAddress, &flags); 5612 } else { 5613 status = map->ops->query_interrupt(map, (addr_t)address + offset, 5614 &physicalAddress, &flags); 5615 } 5616 if (status < B_OK) 5617 break; 5618 if ((flags & PAGE_PRESENT) == 0) { 5619 panic("get_memory_map() called on unmapped memory!"); 5620 return B_BAD_ADDRESS; 5621 } 5622 5623 if (index < 0 && pageOffset > 0) { 5624 physicalAddress += pageOffset; 5625 if (bytes > B_PAGE_SIZE - pageOffset) 5626 bytes = B_PAGE_SIZE - pageOffset; 5627 } 5628 5629 // need to switch to the next physical_entry? 5630 if (index < 0 || (addr_t)table[index].address 5631 != physicalAddress - table[index].size) { 5632 if ((uint32)++index + 1 > numEntries) { 5633 // table to small 5634 status = B_BUFFER_OVERFLOW; 5635 break; 5636 } 5637 table[index].address = (void *)physicalAddress; 5638 table[index].size = bytes; 5639 } else { 5640 // page does fit in current entry 5641 table[index].size += bytes; 5642 } 5643 5644 offset += bytes; 5645 } 5646 5647 if (interrupts) 5648 map->ops->unlock(map); 5649 5650 if (status != B_OK) 5651 return status; 5652 5653 if ((uint32)index + 1 > numEntries) { 5654 *_numEntries = index; 5655 return B_BUFFER_OVERFLOW; 5656 } 5657 5658 *_numEntries = index + 1; 5659 return B_OK; 5660 } 5661 5662 5663 /*! According to the BeBook, this function should always succeed. 5664 This is no longer the case. 5665 */ 5666 long 5667 get_memory_map(const void *address, ulong numBytes, physical_entry *table, 5668 long numEntries) 5669 { 5670 uint32 entriesRead = numEntries; 5671 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5672 table, &entriesRead); 5673 if (error != B_OK) 5674 return error; 5675 5676 // close the entry list 5677 5678 // if it's only one entry, we will silently accept the missing ending 5679 if (numEntries == 1) 5680 return B_OK; 5681 5682 if (entriesRead + 1 > (uint32)numEntries) 5683 return B_BUFFER_OVERFLOW; 5684 5685 table[entriesRead].address = NULL; 5686 table[entriesRead].size = 0; 5687 5688 return B_OK; 5689 } 5690 5691 5692 area_id 5693 area_for(void *address) 5694 { 5695 team_id space; 5696 5697 if (IS_USER_ADDRESS(address)) { 5698 // we try the user team address space, if any 5699 space = vm_current_user_address_space_id(); 5700 if (space < B_OK) 5701 return space; 5702 } else 5703 space = vm_kernel_address_space_id(); 5704 5705 return vm_area_for(space, (addr_t)address); 5706 } 5707 5708 5709 area_id 5710 find_area(const char *name) 5711 { 5712 rw_lock_read_lock(&sAreaHashLock); 5713 struct hash_iterator iterator; 5714 hash_open(sAreaHash, &iterator); 5715 5716 vm_area *area; 5717 area_id id = B_NAME_NOT_FOUND; 5718 while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) { 5719 if (area->id == RESERVED_AREA_ID) 5720 continue; 5721 5722 if (!strcmp(area->name, name)) { 5723 id = area->id; 5724 break; 5725 } 5726 } 5727 5728 hash_close(sAreaHash, &iterator, false); 5729 rw_lock_read_unlock(&sAreaHashLock); 5730 5731 return id; 5732 } 5733 5734 5735 status_t 5736 _get_area_info(area_id id, area_info *info, size_t size) 5737 { 5738 if (size != sizeof(area_info) || info == NULL) 5739 return B_BAD_VALUE; 5740 5741 AddressSpaceReadLocker locker; 5742 vm_area *area; 5743 status_t status = locker.SetFromArea(id, area); 5744 if (status != B_OK) 5745 return status; 5746 5747 fill_area_info(area, info, size); 5748 return B_OK; 5749 } 5750 5751 5752 status_t 5753 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size) 5754 { 5755 addr_t nextBase = *(addr_t *)cookie; 5756 5757 // we're already through the list 5758 if (nextBase == (addr_t)-1) 5759 return B_ENTRY_NOT_FOUND; 5760 5761 if (team == B_CURRENT_TEAM) 5762 team = team_get_current_team_id(); 5763 5764 AddressSpaceReadLocker locker(team); 5765 if (!locker.IsLocked()) 5766 return B_BAD_TEAM_ID; 5767 5768 vm_area *area; 5769 for (area = locker.AddressSpace()->areas; area != NULL; 5770 area = area->address_space_next) { 5771 if (area->id == RESERVED_AREA_ID) 5772 continue; 5773 5774 if (area->base > nextBase) 5775 break; 5776 } 5777 5778 if (area == NULL) { 5779 nextBase = (addr_t)-1; 5780 return B_ENTRY_NOT_FOUND; 5781 } 5782 5783 fill_area_info(area, info, size); 5784 *cookie = (int32)(area->base); 5785 5786 return B_OK; 5787 } 5788 5789 5790 status_t 5791 set_area_protection(area_id area, uint32 newProtection) 5792 { 5793 fix_protection(&newProtection); 5794 5795 return vm_set_area_protection(vm_kernel_address_space_id(), area, 5796 newProtection, true); 5797 } 5798 5799 5800 status_t 5801 resize_area(area_id areaID, size_t newSize) 5802 { 5803 return vm_resize_area(areaID, newSize, true); 5804 } 5805 5806 5807 /** Transfers the specified area to a new team. The caller must be the owner 5808 * of the area (not yet enforced but probably should be). 5809 * This function is currently not exported to the kernel namespace, but is 5810 * only accessible using the _kern_transfer_area() syscall. 5811 */ 5812 5813 static area_id 5814 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target, 5815 bool kernel) 5816 { 5817 area_info info; 5818 status_t status = get_area_info(id, &info); 5819 if (status < B_OK) 5820 return status; 5821 5822 area_id clonedArea = vm_clone_area(target, info.name, _address, 5823 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5824 if (clonedArea < B_OK) 5825 return clonedArea; 5826 5827 status = vm_delete_area(info.team, id, kernel); 5828 if (status < B_OK) { 5829 vm_delete_area(target, clonedArea, kernel); 5830 return status; 5831 } 5832 5833 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5834 5835 return clonedArea; 5836 } 5837 5838 5839 area_id 5840 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes, 5841 uint32 addressSpec, uint32 protection, void **_virtualAddress) 5842 { 5843 if (!arch_vm_supports_protection(protection)) 5844 return B_NOT_SUPPORTED; 5845 5846 fix_protection(&protection); 5847 5848 return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress, 5849 addressSpec, numBytes, protection, (addr_t)physicalAddress); 5850 } 5851 5852 5853 area_id 5854 clone_area(const char *name, void **_address, uint32 addressSpec, 5855 uint32 protection, area_id source) 5856 { 5857 if ((protection & B_KERNEL_PROTECTION) == 0) 5858 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5859 5860 return vm_clone_area(vm_kernel_address_space_id(), name, _address, 5861 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5862 } 5863 5864 5865 area_id 5866 create_area_etc(team_id team, const char *name, void **address, 5867 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 5868 uint32 flags) 5869 { 5870 fix_protection(&protection); 5871 5872 return vm_create_anonymous_area(team, (char *)name, address, addressSpec, 5873 size, lock, protection, flags, true); 5874 } 5875 5876 5877 area_id 5878 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock, 5879 uint32 protection) 5880 { 5881 fix_protection(&protection); 5882 5883 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address, 5884 addressSpec, size, lock, protection, 0, true); 5885 } 5886 5887 5888 status_t 5889 delete_area(area_id area) 5890 { 5891 return vm_delete_area(vm_kernel_address_space_id(), area, true); 5892 } 5893 5894 5895 // #pragma mark - Userland syscalls 5896 5897 5898 status_t 5899 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size) 5900 { 5901 // filter out some unavailable values (for userland) 5902 switch (addressSpec) { 5903 case B_ANY_KERNEL_ADDRESS: 5904 case B_ANY_KERNEL_BLOCK_ADDRESS: 5905 return B_BAD_VALUE; 5906 } 5907 5908 addr_t address; 5909 5910 if (!IS_USER_ADDRESS(userAddress) 5911 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5912 return B_BAD_ADDRESS; 5913 5914 status_t status = vm_reserve_address_range(vm_current_user_address_space_id(), 5915 (void **)&address, addressSpec, size, RESERVED_AVOID_BASE); 5916 if (status < B_OK) 5917 return status; 5918 5919 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 5920 vm_unreserve_address_range(vm_current_user_address_space_id(), 5921 (void *)address, size); 5922 return B_BAD_ADDRESS; 5923 } 5924 5925 return B_OK; 5926 } 5927 5928 5929 area_id 5930 _user_area_for(void *address) 5931 { 5932 return vm_area_for(vm_current_user_address_space_id(), (addr_t)address); 5933 } 5934 5935 5936 area_id 5937 _user_find_area(const char *userName) 5938 { 5939 char name[B_OS_NAME_LENGTH]; 5940 5941 if (!IS_USER_ADDRESS(userName) 5942 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5943 return B_BAD_ADDRESS; 5944 5945 return find_area(name); 5946 } 5947 5948 5949 status_t 5950 _user_get_area_info(area_id area, area_info *userInfo) 5951 { 5952 if (!IS_USER_ADDRESS(userInfo)) 5953 return B_BAD_ADDRESS; 5954 5955 area_info info; 5956 status_t status = get_area_info(area, &info); 5957 if (status < B_OK) 5958 return status; 5959 5960 // TODO: do we want to prevent userland from seeing kernel protections? 5961 //info.protection &= B_USER_PROTECTION; 5962 5963 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5964 return B_BAD_ADDRESS; 5965 5966 return status; 5967 } 5968 5969 5970 status_t 5971 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo) 5972 { 5973 int32 cookie; 5974 5975 if (!IS_USER_ADDRESS(userCookie) 5976 || !IS_USER_ADDRESS(userInfo) 5977 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 5978 return B_BAD_ADDRESS; 5979 5980 area_info info; 5981 status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info)); 5982 if (status != B_OK) 5983 return status; 5984 5985 //info.protection &= B_USER_PROTECTION; 5986 5987 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 5988 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5989 return B_BAD_ADDRESS; 5990 5991 return status; 5992 } 5993 5994 5995 status_t 5996 _user_set_area_protection(area_id area, uint32 newProtection) 5997 { 5998 if ((newProtection & ~B_USER_PROTECTION) != 0) 5999 return B_BAD_VALUE; 6000 6001 fix_protection(&newProtection); 6002 6003 return vm_set_area_protection(vm_current_user_address_space_id(), area, 6004 newProtection, false); 6005 } 6006 6007 6008 status_t 6009 _user_resize_area(area_id area, size_t newSize) 6010 { 6011 // ToDo: Since we restrict deleting of areas to those owned by the team, 6012 // we should also do that for resizing (check other functions, too). 6013 return vm_resize_area(area, newSize, false); 6014 } 6015 6016 6017 area_id 6018 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target) 6019 { 6020 // filter out some unavailable values (for userland) 6021 switch (addressSpec) { 6022 case B_ANY_KERNEL_ADDRESS: 6023 case B_ANY_KERNEL_BLOCK_ADDRESS: 6024 return B_BAD_VALUE; 6025 } 6026 6027 void *address; 6028 if (!IS_USER_ADDRESS(userAddress) 6029 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6030 return B_BAD_ADDRESS; 6031 6032 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6033 if (newArea < B_OK) 6034 return newArea; 6035 6036 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6037 return B_BAD_ADDRESS; 6038 6039 return newArea; 6040 } 6041 6042 6043 area_id 6044 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec, 6045 uint32 protection, area_id sourceArea) 6046 { 6047 char name[B_OS_NAME_LENGTH]; 6048 void *address; 6049 6050 // filter out some unavailable values (for userland) 6051 switch (addressSpec) { 6052 case B_ANY_KERNEL_ADDRESS: 6053 case B_ANY_KERNEL_BLOCK_ADDRESS: 6054 return B_BAD_VALUE; 6055 } 6056 if ((protection & ~B_USER_PROTECTION) != 0) 6057 return B_BAD_VALUE; 6058 6059 if (!IS_USER_ADDRESS(userName) 6060 || !IS_USER_ADDRESS(userAddress) 6061 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6062 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6063 return B_BAD_ADDRESS; 6064 6065 fix_protection(&protection); 6066 6067 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address, 6068 addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, false); 6069 if (clonedArea < B_OK) 6070 return clonedArea; 6071 6072 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6073 delete_area(clonedArea); 6074 return B_BAD_ADDRESS; 6075 } 6076 6077 return clonedArea; 6078 } 6079 6080 6081 area_id 6082 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec, 6083 size_t size, uint32 lock, uint32 protection) 6084 { 6085 char name[B_OS_NAME_LENGTH]; 6086 void *address; 6087 6088 // filter out some unavailable values (for userland) 6089 switch (addressSpec) { 6090 case B_ANY_KERNEL_ADDRESS: 6091 case B_ANY_KERNEL_BLOCK_ADDRESS: 6092 return B_BAD_VALUE; 6093 } 6094 if ((protection & ~B_USER_PROTECTION) != 0) 6095 return B_BAD_VALUE; 6096 6097 if (!IS_USER_ADDRESS(userName) 6098 || !IS_USER_ADDRESS(userAddress) 6099 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6100 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6101 return B_BAD_ADDRESS; 6102 6103 if (addressSpec == B_EXACT_ADDRESS 6104 && IS_KERNEL_ADDRESS(address)) 6105 return B_BAD_VALUE; 6106 6107 fix_protection(&protection); 6108 6109 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 6110 (char *)name, &address, addressSpec, size, lock, protection, 0, false); 6111 6112 if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6113 delete_area(area); 6114 return B_BAD_ADDRESS; 6115 } 6116 6117 return area; 6118 } 6119 6120 6121 status_t 6122 _user_delete_area(area_id area) 6123 { 6124 // Unlike the BeOS implementation, you can now only delete areas 6125 // that you have created yourself from userland. 6126 // The documentation to delete_area() explicetly states that this 6127 // will be restricted in the future, and so it will. 6128 return vm_delete_area(vm_current_user_address_space_id(), area, false); 6129 } 6130 6131 6132 // ToDo: create a BeOS style call for this! 6133 6134 area_id 6135 _user_map_file(const char *userName, void **userAddress, int addressSpec, 6136 size_t size, int protection, int mapping, int fd, off_t offset) 6137 { 6138 char name[B_OS_NAME_LENGTH]; 6139 void *address; 6140 area_id area; 6141 6142 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6143 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6144 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6145 return B_BAD_ADDRESS; 6146 6147 if (addressSpec == B_EXACT_ADDRESS) { 6148 if ((addr_t)address + size < (addr_t)address) 6149 return B_BAD_VALUE; 6150 if (!IS_USER_ADDRESS(address) 6151 || !IS_USER_ADDRESS((addr_t)address + size)) { 6152 return B_BAD_ADDRESS; 6153 } 6154 } 6155 6156 // userland created areas can always be accessed by the kernel 6157 protection |= B_KERNEL_READ_AREA 6158 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6159 6160 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 6161 addressSpec, size, protection, mapping, fd, offset, false); 6162 if (area < B_OK) 6163 return area; 6164 6165 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6166 return B_BAD_ADDRESS; 6167 6168 return area; 6169 } 6170 6171 6172 status_t 6173 _user_unmap_memory(void *_address, size_t size) 6174 { 6175 addr_t address = (addr_t)_address; 6176 6177 // check params 6178 if (size == 0 || (addr_t)address + size < (addr_t)address) 6179 return B_BAD_VALUE; 6180 6181 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6182 return B_BAD_ADDRESS; 6183 6184 // write lock the address space 6185 AddressSpaceWriteLocker locker; 6186 status_t status = locker.SetTo(team_get_current_team_id()); 6187 if (status != B_OK) 6188 return status; 6189 6190 // unmap 6191 return unmap_address_range(locker.AddressSpace(), address, size, false); 6192 } 6193 6194 6195 status_t 6196 _user_set_memory_protection(void* _address, size_t size, int protection) 6197 { 6198 // check address range 6199 addr_t address = (addr_t)_address; 6200 size = PAGE_ALIGN(size); 6201 6202 if ((address % B_PAGE_SIZE) != 0) 6203 return B_BAD_VALUE; 6204 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6205 || !IS_USER_ADDRESS((addr_t)address + size)) { 6206 // weird error code required by POSIX 6207 return ENOMEM; 6208 } 6209 6210 // extend and check protection 6211 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 6212 uint32 actualProtection = protection | B_KERNEL_READ_AREA 6213 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6214 6215 if (!arch_vm_supports_protection(actualProtection)) 6216 return B_NOT_SUPPORTED; 6217 6218 // We need to write lock the address space, since we're going to play with 6219 // the areas. 6220 AddressSpaceWriteLocker locker; 6221 status_t status = locker.SetTo(team_get_current_team_id()); 6222 if (status != B_OK) 6223 return status; 6224 6225 // First round: Check whether the whole range is covered by areas and we are 6226 // allowed to modify them. 6227 addr_t currentAddress = address; 6228 size_t sizeLeft = size; 6229 while (sizeLeft > 0) { 6230 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6231 if (area == NULL) 6232 return B_NO_MEMORY; 6233 6234 if ((area->protection & B_KERNEL_AREA) != 0) 6235 return B_NOT_ALLOWED; 6236 6237 // TODO: For (shared) mapped files we should check whether the new 6238 // protections are compatible with the file permissions. We don't have 6239 // a way to do that yet, though. 6240 6241 addr_t offset = currentAddress - area->base; 6242 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6243 6244 currentAddress += rangeSize; 6245 sizeLeft -= rangeSize; 6246 } 6247 6248 // Second round: If the protections differ from that of the area, create a 6249 // page protection array and re-map mapped pages. 6250 vm_translation_map* map = &locker.AddressSpace()->translation_map; 6251 currentAddress = address; 6252 sizeLeft = size; 6253 while (sizeLeft > 0) { 6254 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6255 if (area == NULL) 6256 return B_NO_MEMORY; 6257 6258 addr_t offset = currentAddress - area->base; 6259 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6260 6261 currentAddress += rangeSize; 6262 sizeLeft -= rangeSize; 6263 6264 if (area->page_protections == NULL) { 6265 if (area->protection == actualProtection) 6266 continue; 6267 6268 // In the page protections we store only the three user protections, 6269 // so we use 4 bits per page. 6270 uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2; 6271 area->page_protections = (uint8*)malloc(bytes); 6272 if (area->page_protections == NULL) 6273 return B_NO_MEMORY; 6274 6275 // init the page protections for all pages to that of the area 6276 uint32 areaProtection = area->protection 6277 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 6278 memset(area->page_protections, 6279 areaProtection | (areaProtection << 4), bytes); 6280 } 6281 6282 for (addr_t pageAddress = area->base + offset; 6283 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6284 map->ops->lock(map); 6285 6286 set_area_page_protection(area, pageAddress, protection); 6287 6288 addr_t physicalAddress; 6289 uint32 flags; 6290 6291 status_t error = map->ops->query(map, pageAddress, &physicalAddress, 6292 &flags); 6293 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6294 map->ops->unlock(map); 6295 continue; 6296 } 6297 6298 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6299 if (page == NULL) { 6300 panic("area %p looking up page failed for pa 0x%lx\n", area, 6301 physicalAddress); 6302 map->ops->unlock(map); 6303 return B_ERROR;; 6304 } 6305 6306 // If the page is not in the topmost cache and write access is 6307 // requested, we have to unmap it. Otherwise we can re-map it with 6308 // the new protection. 6309 bool unmapPage = page->cache != area->cache 6310 && (protection & B_WRITE_AREA) != 0; 6311 6312 if (!unmapPage) { 6313 map->ops->unmap(map, pageAddress, 6314 pageAddress + B_PAGE_SIZE - 1); 6315 map->ops->map(map, pageAddress, physicalAddress, 6316 actualProtection); 6317 } 6318 6319 map->ops->unlock(map); 6320 6321 if (unmapPage) 6322 vm_unmap_pages(area, pageAddress, B_PAGE_SIZE, true); 6323 } 6324 } 6325 6326 return B_OK; 6327 } 6328 6329 6330 status_t 6331 _user_sync_memory(void *_address, size_t size, int flags) 6332 { 6333 addr_t address = (addr_t)_address; 6334 size = PAGE_ALIGN(size); 6335 6336 // check params 6337 if ((address % B_PAGE_SIZE) != 0) 6338 return B_BAD_VALUE; 6339 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6340 || !IS_USER_ADDRESS((addr_t)address + size)) { 6341 // weird error code required by POSIX 6342 return ENOMEM; 6343 } 6344 6345 bool writeSync = (flags & MS_SYNC) != 0; 6346 bool writeAsync = (flags & MS_ASYNC) != 0; 6347 if (writeSync && writeAsync) 6348 return B_BAD_VALUE; 6349 6350 if (size == 0 || !writeSync && !writeAsync) 6351 return B_OK; 6352 6353 // iterate through the range and sync all concerned areas 6354 while (size > 0) { 6355 // read lock the address space 6356 AddressSpaceReadLocker locker; 6357 status_t error = locker.SetTo(team_get_current_team_id()); 6358 if (error != B_OK) 6359 return error; 6360 6361 // get the first area 6362 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 6363 if (area == NULL) 6364 return B_NO_MEMORY; 6365 6366 uint32 offset = address - area->base; 6367 size_t rangeSize = min_c(area->size - offset, size); 6368 offset += area->cache_offset; 6369 6370 // lock the cache 6371 AreaCacheLocker cacheLocker(area); 6372 if (!cacheLocker) 6373 return B_BAD_VALUE; 6374 vm_cache* cache = area->cache; 6375 6376 locker.Unlock(); 6377 6378 uint32 firstPage = offset >> PAGE_SHIFT; 6379 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6380 6381 // write the pages 6382 if (cache->type == CACHE_TYPE_VNODE) { 6383 if (writeSync) { 6384 // synchronous 6385 error = vm_page_write_modified_page_range(cache, firstPage, 6386 endPage); 6387 if (error != B_OK) 6388 return error; 6389 } else { 6390 // asynchronous 6391 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6392 // TODO: This is probably not quite what is supposed to happen. 6393 // Especially when a lot has to be written, it might take ages 6394 // until it really hits the disk. 6395 } 6396 } 6397 6398 address += rangeSize; 6399 size -= rangeSize; 6400 } 6401 6402 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6403 // synchronize multiple mappings of the same file. In our VM they never get 6404 // out of sync, though, so we don't have to do anything. 6405 6406 return B_OK; 6407 } 6408 6409 6410 status_t 6411 _user_memory_advice(void* address, size_t size, int advice) 6412 { 6413 // TODO: Implement! 6414 return B_OK; 6415 } 6416