1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <arch/cpu.h> 27 #include <arch/vm.h> 28 #include <boot/elf.h> 29 #include <boot/stage2.h> 30 #include <condition_variable.h> 31 #include <console.h> 32 #include <debug.h> 33 #include <file_cache.h> 34 #include <fs/fd.h> 35 #include <heap.h> 36 #include <kernel.h> 37 #include <int.h> 38 #include <lock.h> 39 #include <low_resource_manager.h> 40 #include <smp.h> 41 #include <system_info.h> 42 #include <thread.h> 43 #include <team.h> 44 #include <tracing.h> 45 #include <util/AutoLock.h> 46 #include <util/khash.h> 47 #include <vm_address_space.h> 48 #include <vm_cache.h> 49 #include <vm_page.h> 50 #include <vm_priv.h> 51 52 #include "VMAnonymousCache.h" 53 #include "IORequest.h" 54 55 56 //#define TRACE_VM 57 //#define TRACE_FAULTS 58 #ifdef TRACE_VM 59 # define TRACE(x) dprintf x 60 #else 61 # define TRACE(x) ; 62 #endif 63 #ifdef TRACE_FAULTS 64 # define FTRACE(x) dprintf x 65 #else 66 # define FTRACE(x) ; 67 #endif 68 69 70 class AddressSpaceReadLocker { 71 public: 72 AddressSpaceReadLocker(team_id team); 73 AddressSpaceReadLocker(vm_address_space* space, bool getNewReference); 74 AddressSpaceReadLocker(); 75 ~AddressSpaceReadLocker(); 76 77 status_t SetTo(team_id team); 78 void SetTo(vm_address_space* space, bool getNewReference); 79 status_t SetFromArea(area_id areaID, vm_area*& area); 80 81 bool IsLocked() const { return fLocked; } 82 bool Lock(); 83 void Unlock(); 84 85 void Unset(); 86 87 vm_address_space* AddressSpace() { return fSpace; } 88 89 private: 90 vm_address_space* fSpace; 91 bool fLocked; 92 }; 93 94 class AddressSpaceWriteLocker { 95 public: 96 AddressSpaceWriteLocker(team_id team); 97 AddressSpaceWriteLocker(); 98 ~AddressSpaceWriteLocker(); 99 100 status_t SetTo(team_id team); 101 status_t SetFromArea(area_id areaID, vm_area*& area); 102 status_t SetFromArea(team_id team, area_id areaID, bool allowKernel, 103 vm_area*& area); 104 status_t SetFromArea(team_id team, area_id areaID, vm_area*& area); 105 106 bool IsLocked() const { return fLocked; } 107 void Unlock(); 108 109 void DegradeToReadLock(); 110 void Unset(); 111 112 vm_address_space* AddressSpace() { return fSpace; } 113 114 private: 115 vm_address_space* fSpace; 116 bool fLocked; 117 bool fDegraded; 118 }; 119 120 class MultiAddressSpaceLocker { 121 public: 122 MultiAddressSpaceLocker(); 123 ~MultiAddressSpaceLocker(); 124 125 inline status_t AddTeam(team_id team, bool writeLock, 126 vm_address_space** _space = NULL); 127 inline status_t AddArea(area_id area, bool writeLock, 128 vm_address_space** _space = NULL); 129 130 status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne, 131 bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL); 132 133 status_t Lock(); 134 void Unlock(); 135 bool IsLocked() const { return fLocked; } 136 137 void Unset(); 138 139 private: 140 struct lock_item { 141 vm_address_space* space; 142 bool write_lock; 143 }; 144 145 bool _ResizeIfNeeded(); 146 int32 _IndexOfAddressSpace(vm_address_space* space) const; 147 status_t _AddAddressSpace(vm_address_space* space, bool writeLock, 148 vm_address_space** _space); 149 150 static int _CompareItems(const void* _a, const void* _b); 151 152 lock_item* fItems; 153 int32 fCapacity; 154 int32 fCount; 155 bool fLocked; 156 }; 157 158 159 class AreaCacheLocking { 160 public: 161 inline bool Lock(vm_cache* lockable) 162 { 163 return false; 164 } 165 166 inline void Unlock(vm_cache* lockable) 167 { 168 vm_area_put_locked_cache(lockable); 169 } 170 }; 171 172 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> { 173 public: 174 inline AreaCacheLocker(vm_cache* cache = NULL) 175 : AutoLocker<vm_cache, AreaCacheLocking>(cache, true) 176 { 177 } 178 179 inline AreaCacheLocker(vm_area* area) 180 : AutoLocker<vm_cache, AreaCacheLocking>() 181 { 182 SetTo(area); 183 } 184 185 inline void SetTo(vm_area* area) 186 { 187 return AutoLocker<vm_cache, AreaCacheLocking>::SetTo( 188 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 189 } 190 }; 191 192 193 #define AREA_HASH_TABLE_SIZE 1024 194 static area_id sNextAreaID = 1; 195 static hash_table* sAreaHash; 196 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash"); 197 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings"); 198 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache"); 199 200 static off_t sAvailableMemory; 201 static off_t sNeededMemory; 202 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 203 static uint32 sPageFaults; 204 205 #if DEBUG_CACHE_LIST 206 207 struct cache_info { 208 vm_cache* cache; 209 addr_t page_count; 210 addr_t committed; 211 }; 212 213 static const int kCacheInfoTableCount = 100 * 1024; 214 static cache_info* sCacheInfoTable; 215 216 #endif // DEBUG_CACHE_LIST 217 218 219 // function declarations 220 static void delete_area(vm_address_space* addressSpace, vm_area* area); 221 static vm_address_space* get_address_space_by_area_id(area_id id); 222 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address, 223 bool isWrite, bool isUser); 224 static status_t map_backing_store(vm_address_space* addressSpace, 225 vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size, 226 uint32 addressSpec, int wiring, int protection, int mapping, 227 vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel); 228 229 230 // #pragma mark - 231 232 233 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team) 234 : 235 fSpace(NULL), 236 fLocked(false) 237 { 238 SetTo(team); 239 } 240 241 242 /*! Takes over the reference of the address space, if \a getNewReference is 243 \c false. 244 */ 245 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space, 246 bool getNewReference) 247 : 248 fSpace(NULL), 249 fLocked(false) 250 { 251 SetTo(space, getNewReference); 252 } 253 254 255 AddressSpaceReadLocker::AddressSpaceReadLocker() 256 : 257 fSpace(NULL), 258 fLocked(false) 259 { 260 } 261 262 263 AddressSpaceReadLocker::~AddressSpaceReadLocker() 264 { 265 Unset(); 266 } 267 268 269 void 270 AddressSpaceReadLocker::Unset() 271 { 272 Unlock(); 273 if (fSpace != NULL) 274 vm_put_address_space(fSpace); 275 } 276 277 278 status_t 279 AddressSpaceReadLocker::SetTo(team_id team) 280 { 281 fSpace = vm_get_address_space(team); 282 if (fSpace == NULL) 283 return B_BAD_TEAM_ID; 284 285 rw_lock_read_lock(&fSpace->lock); 286 fLocked = true; 287 return B_OK; 288 } 289 290 291 /*! Takes over the reference of the address space, if \a getNewReference is 292 \c false. 293 */ 294 void 295 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference) 296 { 297 fSpace = space; 298 299 if (getNewReference) 300 atomic_add(&fSpace->ref_count, 1); 301 302 rw_lock_read_lock(&fSpace->lock); 303 fLocked = true; 304 } 305 306 307 status_t 308 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area) 309 { 310 fSpace = get_address_space_by_area_id(areaID); 311 if (fSpace == NULL) 312 return B_BAD_TEAM_ID; 313 314 rw_lock_read_lock(&fSpace->lock); 315 316 rw_lock_read_lock(&sAreaHashLock); 317 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 318 rw_lock_read_unlock(&sAreaHashLock); 319 320 if (area == NULL || area->address_space != fSpace) { 321 rw_lock_read_unlock(&fSpace->lock); 322 return B_BAD_VALUE; 323 } 324 325 fLocked = true; 326 return B_OK; 327 } 328 329 330 bool 331 AddressSpaceReadLocker::Lock() 332 { 333 if (fLocked) 334 return true; 335 if (fSpace == NULL) 336 return false; 337 338 rw_lock_read_lock(&fSpace->lock); 339 fLocked = true; 340 341 return true; 342 } 343 344 345 void 346 AddressSpaceReadLocker::Unlock() 347 { 348 if (fLocked) { 349 rw_lock_read_unlock(&fSpace->lock); 350 fLocked = false; 351 } 352 } 353 354 355 // #pragma mark - 356 357 358 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team) 359 : 360 fSpace(NULL), 361 fLocked(false), 362 fDegraded(false) 363 { 364 SetTo(team); 365 } 366 367 368 AddressSpaceWriteLocker::AddressSpaceWriteLocker() 369 : 370 fSpace(NULL), 371 fLocked(false), 372 fDegraded(false) 373 { 374 } 375 376 377 AddressSpaceWriteLocker::~AddressSpaceWriteLocker() 378 { 379 Unset(); 380 } 381 382 383 void 384 AddressSpaceWriteLocker::Unset() 385 { 386 Unlock(); 387 if (fSpace != NULL) 388 vm_put_address_space(fSpace); 389 } 390 391 392 status_t 393 AddressSpaceWriteLocker::SetTo(team_id team) 394 { 395 fSpace = vm_get_address_space(team); 396 if (fSpace == NULL) 397 return B_BAD_TEAM_ID; 398 399 rw_lock_write_lock(&fSpace->lock); 400 fLocked = true; 401 return B_OK; 402 } 403 404 405 status_t 406 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area) 407 { 408 fSpace = get_address_space_by_area_id(areaID); 409 if (fSpace == NULL) 410 return B_BAD_VALUE; 411 412 rw_lock_write_lock(&fSpace->lock); 413 414 rw_lock_read_lock(&sAreaHashLock); 415 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 416 rw_lock_read_unlock(&sAreaHashLock); 417 418 if (area == NULL || area->address_space != fSpace) { 419 rw_lock_write_unlock(&fSpace->lock); 420 return B_BAD_VALUE; 421 } 422 423 fLocked = true; 424 return B_OK; 425 } 426 427 428 status_t 429 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 430 bool allowKernel, vm_area*& area) 431 { 432 rw_lock_read_lock(&sAreaHashLock); 433 434 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 435 if (area != NULL 436 && (area->address_space->id == team 437 || (allowKernel && team == vm_kernel_address_space_id()))) { 438 fSpace = area->address_space; 439 atomic_add(&fSpace->ref_count, 1); 440 } 441 442 rw_lock_read_unlock(&sAreaHashLock); 443 444 if (fSpace == NULL) 445 return B_BAD_VALUE; 446 447 // Second try to get the area -- this time with the address space 448 // write lock held 449 450 rw_lock_write_lock(&fSpace->lock); 451 452 rw_lock_read_lock(&sAreaHashLock); 453 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 454 rw_lock_read_unlock(&sAreaHashLock); 455 456 if (area == NULL) { 457 rw_lock_write_unlock(&fSpace->lock); 458 return B_BAD_VALUE; 459 } 460 461 fLocked = true; 462 return B_OK; 463 } 464 465 466 status_t 467 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 468 vm_area*& area) 469 { 470 return SetFromArea(team, areaID, false, area); 471 } 472 473 474 void 475 AddressSpaceWriteLocker::Unlock() 476 { 477 if (fLocked) { 478 if (fDegraded) 479 rw_lock_read_unlock(&fSpace->lock); 480 else 481 rw_lock_write_unlock(&fSpace->lock); 482 fLocked = false; 483 fDegraded = false; 484 } 485 } 486 487 488 void 489 AddressSpaceWriteLocker::DegradeToReadLock() 490 { 491 // TODO: the current R/W lock implementation just keeps the write lock here 492 rw_lock_read_lock(&fSpace->lock); 493 rw_lock_write_unlock(&fSpace->lock); 494 fDegraded = true; 495 } 496 497 498 // #pragma mark - 499 500 501 MultiAddressSpaceLocker::MultiAddressSpaceLocker() 502 : 503 fItems(NULL), 504 fCapacity(0), 505 fCount(0), 506 fLocked(false) 507 { 508 } 509 510 511 MultiAddressSpaceLocker::~MultiAddressSpaceLocker() 512 { 513 Unset(); 514 free(fItems); 515 } 516 517 518 /*static*/ int 519 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b) 520 { 521 lock_item* a = (lock_item*)_a; 522 lock_item* b = (lock_item*)_b; 523 return a->space->id - b->space->id; 524 } 525 526 527 bool 528 MultiAddressSpaceLocker::_ResizeIfNeeded() 529 { 530 if (fCount == fCapacity) { 531 lock_item* items = (lock_item*)realloc(fItems, 532 (fCapacity + 4) * sizeof(lock_item)); 533 if (items == NULL) 534 return false; 535 536 fCapacity += 4; 537 fItems = items; 538 } 539 540 return true; 541 } 542 543 544 int32 545 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const 546 { 547 for (int32 i = 0; i < fCount; i++) { 548 if (fItems[i].space == space) 549 return i; 550 } 551 552 return -1; 553 } 554 555 556 status_t 557 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space, 558 bool writeLock, vm_address_space** _space) 559 { 560 if (!space) 561 return B_BAD_VALUE; 562 563 int32 index = _IndexOfAddressSpace(space); 564 if (index < 0) { 565 if (!_ResizeIfNeeded()) { 566 vm_put_address_space(space); 567 return B_NO_MEMORY; 568 } 569 570 lock_item& item = fItems[fCount++]; 571 item.space = space; 572 item.write_lock = writeLock; 573 } else { 574 575 // one reference is enough 576 vm_put_address_space(space); 577 578 fItems[index].write_lock |= writeLock; 579 } 580 581 if (_space != NULL) 582 *_space = space; 583 584 return B_OK; 585 } 586 587 588 inline status_t 589 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock, 590 vm_address_space** _space) 591 { 592 return _AddAddressSpace(vm_get_address_space(team), writeLock, 593 _space); 594 } 595 596 597 inline status_t 598 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock, 599 vm_address_space** _space) 600 { 601 return _AddAddressSpace(get_address_space_by_area_id(area), writeLock, 602 _space); 603 } 604 605 606 void 607 MultiAddressSpaceLocker::Unset() 608 { 609 Unlock(); 610 611 for (int32 i = 0; i < fCount; i++) 612 vm_put_address_space(fItems[i].space); 613 614 fCount = 0; 615 } 616 617 618 status_t 619 MultiAddressSpaceLocker::Lock() 620 { 621 ASSERT(!fLocked); 622 623 qsort(fItems, fCount, sizeof(lock_item), &_CompareItems); 624 625 for (int32 i = 0; i < fCount; i++) { 626 status_t status; 627 if (fItems[i].write_lock) 628 status = rw_lock_write_lock(&fItems[i].space->lock); 629 else 630 status = rw_lock_read_lock(&fItems[i].space->lock); 631 632 if (status < B_OK) { 633 while (--i >= 0) { 634 if (fItems[i].write_lock) 635 rw_lock_write_unlock(&fItems[i].space->lock); 636 else 637 rw_lock_read_unlock(&fItems[i].space->lock); 638 } 639 return status; 640 } 641 } 642 643 fLocked = true; 644 return B_OK; 645 } 646 647 648 void 649 MultiAddressSpaceLocker::Unlock() 650 { 651 if (!fLocked) 652 return; 653 654 for (int32 i = 0; i < fCount; i++) { 655 if (fItems[i].write_lock) 656 rw_lock_write_unlock(&fItems[i].space->lock); 657 else 658 rw_lock_read_unlock(&fItems[i].space->lock); 659 } 660 661 fLocked = false; 662 } 663 664 665 /*! Adds all address spaces of the areas associated with the given area's cache, 666 locks them, and locks the cache (including a reference to it). It retries 667 until the situation is stable (i.e. the neither cache nor cache's areas 668 changed) or an error occurs. 669 */ 670 status_t 671 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID, 672 bool writeLockThisOne, bool writeLockOthers, vm_area*& _area, 673 vm_cache** _cache) 674 { 675 // remember the original state 676 int originalCount = fCount; 677 lock_item* originalItems = NULL; 678 if (fCount > 0) { 679 originalItems = new(nothrow) lock_item[fCount]; 680 if (originalItems == NULL) 681 return B_NO_MEMORY; 682 memcpy(originalItems, fItems, fCount * sizeof(lock_item)); 683 } 684 ArrayDeleter<lock_item> _(originalItems); 685 686 // get the cache 687 vm_cache* cache; 688 vm_area* area; 689 status_t error; 690 { 691 AddressSpaceReadLocker locker; 692 error = locker.SetFromArea(areaID, area); 693 if (error != B_OK) 694 return error; 695 696 cache = vm_area_get_locked_cache(area); 697 } 698 699 while (true) { 700 // add all areas 701 vm_area* firstArea = cache->areas; 702 for (vm_area* current = firstArea; current; 703 current = current->cache_next) { 704 error = AddArea(current->id, 705 current == area ? writeLockThisOne : writeLockOthers); 706 if (error != B_OK) { 707 vm_area_put_locked_cache(cache); 708 return error; 709 } 710 } 711 712 // unlock the cache and attempt to lock the address spaces 713 vm_area_put_locked_cache(cache); 714 715 error = Lock(); 716 if (error != B_OK) 717 return error; 718 719 // lock the cache again and check whether anything has changed 720 721 // check whether the area is gone in the meantime 722 rw_lock_read_lock(&sAreaHashLock); 723 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 724 rw_lock_read_unlock(&sAreaHashLock); 725 726 if (area == NULL) { 727 Unlock(); 728 return B_BAD_VALUE; 729 } 730 731 // lock the cache 732 vm_cache* oldCache = cache; 733 cache = vm_area_get_locked_cache(area); 734 735 // If neither the area's cache has changed nor its area list we're 736 // done. 737 if (cache == oldCache && firstArea == cache->areas) { 738 _area = area; 739 if (_cache != NULL) 740 *_cache = cache; 741 return B_OK; 742 } 743 744 // Restore the original state and try again. 745 746 // Unlock the address spaces, but keep the cache locked for the next 747 // iteration. 748 Unlock(); 749 750 // Get an additional reference to the original address spaces. 751 for (int32 i = 0; i < originalCount; i++) 752 atomic_add(&originalItems[i].space->ref_count, 1); 753 754 // Release all references to the current address spaces. 755 for (int32 i = 0; i < fCount; i++) 756 vm_put_address_space(fItems[i].space); 757 758 // Copy over the original state. 759 fCount = originalCount; 760 if (originalItems != NULL) 761 memcpy(fItems, originalItems, fCount * sizeof(lock_item)); 762 } 763 } 764 765 766 // #pragma mark - 767 768 769 #if VM_PAGE_FAULT_TRACING 770 771 namespace VMPageFaultTracing { 772 773 class PageFaultStart : public AbstractTraceEntry { 774 public: 775 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 776 : 777 fAddress(address), 778 fPC(pc), 779 fWrite(write), 780 fUser(user) 781 { 782 Initialized(); 783 } 784 785 virtual void AddDump(TraceOutput& out) 786 { 787 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 788 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 789 } 790 791 private: 792 addr_t fAddress; 793 addr_t fPC; 794 bool fWrite; 795 bool fUser; 796 }; 797 798 799 // page fault errors 800 enum { 801 PAGE_FAULT_ERROR_NO_AREA = 0, 802 PAGE_FAULT_ERROR_KERNEL_ONLY, 803 PAGE_FAULT_ERROR_WRITE_PROTECTED, 804 PAGE_FAULT_ERROR_READ_PROTECTED, 805 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 806 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 807 }; 808 809 810 class PageFaultError : public AbstractTraceEntry { 811 public: 812 PageFaultError(area_id area, status_t error) 813 : 814 fArea(area), 815 fError(error) 816 { 817 Initialized(); 818 } 819 820 virtual void AddDump(TraceOutput& out) 821 { 822 switch (fError) { 823 case PAGE_FAULT_ERROR_NO_AREA: 824 out.Print("page fault error: no area"); 825 break; 826 case PAGE_FAULT_ERROR_KERNEL_ONLY: 827 out.Print("page fault error: area: %ld, kernel only", fArea); 828 break; 829 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 830 out.Print("page fault error: area: %ld, write protected", 831 fArea); 832 break; 833 case PAGE_FAULT_ERROR_READ_PROTECTED: 834 out.Print("page fault error: area: %ld, read protected", fArea); 835 break; 836 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 837 out.Print("page fault error: kernel touching bad user memory"); 838 break; 839 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 840 out.Print("page fault error: no address space"); 841 break; 842 default: 843 out.Print("page fault error: area: %ld, error: %s", fArea, 844 strerror(fError)); 845 break; 846 } 847 } 848 849 private: 850 area_id fArea; 851 status_t fError; 852 }; 853 854 855 class PageFaultDone : public AbstractTraceEntry { 856 public: 857 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 858 vm_page* page) 859 : 860 fArea(area), 861 fTopCache(topCache), 862 fCache(cache), 863 fPage(page) 864 { 865 Initialized(); 866 } 867 868 virtual void AddDump(TraceOutput& out) 869 { 870 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 871 "page: %p", fArea, fTopCache, fCache, fPage); 872 } 873 874 private: 875 area_id fArea; 876 VMCache* fTopCache; 877 VMCache* fCache; 878 vm_page* fPage; 879 }; 880 881 } // namespace VMPageFaultTracing 882 883 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 884 #else 885 # define TPF(x) ; 886 #endif // VM_PAGE_FAULT_TRACING 887 888 889 // #pragma mark - 890 891 892 static int 893 area_compare(void* _area, const void* key) 894 { 895 vm_area* area = (vm_area*)_area; 896 const area_id* id = (const area_id*)key; 897 898 if (area->id == *id) 899 return 0; 900 901 return -1; 902 } 903 904 905 static uint32 906 area_hash(void* _area, const void* key, uint32 range) 907 { 908 vm_area* area = (vm_area*)_area; 909 const area_id* id = (const area_id*)key; 910 911 if (area != NULL) 912 return area->id % range; 913 914 return (uint32)*id % range; 915 } 916 917 918 static vm_address_space* 919 get_address_space_by_area_id(area_id id) 920 { 921 vm_address_space* addressSpace = NULL; 922 923 rw_lock_read_lock(&sAreaHashLock); 924 925 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 926 if (area != NULL) { 927 addressSpace = area->address_space; 928 atomic_add(&addressSpace->ref_count, 1); 929 } 930 931 rw_lock_read_unlock(&sAreaHashLock); 932 933 return addressSpace; 934 } 935 936 937 //! You need to have the address space locked when calling this function 938 static vm_area* 939 lookup_area(vm_address_space* addressSpace, area_id id) 940 { 941 rw_lock_read_lock(&sAreaHashLock); 942 943 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 944 if (area != NULL && area->address_space != addressSpace) 945 area = NULL; 946 947 rw_lock_read_unlock(&sAreaHashLock); 948 949 return area; 950 } 951 952 953 static vm_area* 954 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags) 955 { 956 vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area)); 957 if (reserved == NULL) 958 return NULL; 959 960 memset(reserved, 0, sizeof(vm_area)); 961 reserved->id = RESERVED_AREA_ID; 962 // this marks it as reserved space 963 reserved->protection = flags; 964 reserved->address_space = addressSpace; 965 966 return reserved; 967 } 968 969 970 static vm_area* 971 create_area_struct(vm_address_space* addressSpace, const char* name, 972 uint32 wiring, uint32 protection) 973 { 974 // restrict the area name to B_OS_NAME_LENGTH 975 size_t length = strlen(name) + 1; 976 if (length > B_OS_NAME_LENGTH) 977 length = B_OS_NAME_LENGTH; 978 979 vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area)); 980 if (area == NULL) 981 return NULL; 982 983 area->name = (char*)malloc_nogrow(length); 984 if (area->name == NULL) { 985 free(area); 986 return NULL; 987 } 988 strlcpy(area->name, name, length); 989 990 area->id = atomic_add(&sNextAreaID, 1); 991 area->base = 0; 992 area->size = 0; 993 area->protection = protection; 994 area->wiring = wiring; 995 area->memory_type = 0; 996 997 area->cache = NULL; 998 area->cache_offset = 0; 999 1000 area->address_space = addressSpace; 1001 area->address_space_next = NULL; 1002 area->cache_next = area->cache_prev = NULL; 1003 area->hash_next = NULL; 1004 new (&area->mappings) vm_area_mappings; 1005 area->page_protections = NULL; 1006 1007 return area; 1008 } 1009 1010 1011 /*! Finds a reserved area that covers the region spanned by \a start and 1012 \a size, inserts the \a area into that region and makes sure that 1013 there are reserved regions for the remaining parts. 1014 */ 1015 static status_t 1016 find_reserved_area(vm_address_space* addressSpace, addr_t start, 1017 addr_t size, vm_area* area) 1018 { 1019 vm_area* last = NULL; 1020 vm_area* next; 1021 1022 next = addressSpace->areas; 1023 while (next != NULL) { 1024 if (next->base <= start 1025 && next->base + (next->size - 1) >= start + (size - 1)) { 1026 // This area covers the requested range 1027 if (next->id != RESERVED_AREA_ID) { 1028 // but it's not reserved space, it's a real area 1029 return B_BAD_VALUE; 1030 } 1031 1032 break; 1033 } 1034 1035 last = next; 1036 next = next->address_space_next; 1037 } 1038 1039 if (next == NULL) 1040 return B_ENTRY_NOT_FOUND; 1041 1042 // Now we have to transfer the requested part of the reserved 1043 // range to the new area - and remove, resize or split the old 1044 // reserved area. 1045 1046 if (start == next->base) { 1047 // the area starts at the beginning of the reserved range 1048 if (last) 1049 last->address_space_next = area; 1050 else 1051 addressSpace->areas = area; 1052 1053 if (size == next->size) { 1054 // the new area fully covers the reversed range 1055 area->address_space_next = next->address_space_next; 1056 vm_put_address_space(addressSpace); 1057 free(next); 1058 } else { 1059 // resize the reserved range behind the area 1060 area->address_space_next = next; 1061 next->base += size; 1062 next->size -= size; 1063 } 1064 } else if (start + size == next->base + next->size) { 1065 // the area is at the end of the reserved range 1066 area->address_space_next = next->address_space_next; 1067 next->address_space_next = area; 1068 1069 // resize the reserved range before the area 1070 next->size = start - next->base; 1071 } else { 1072 // the area splits the reserved range into two separate ones 1073 // we need a new reserved area to cover this space 1074 vm_area* reserved = create_reserved_area_struct(addressSpace, 1075 next->protection); 1076 if (reserved == NULL) 1077 return B_NO_MEMORY; 1078 1079 atomic_add(&addressSpace->ref_count, 1); 1080 reserved->address_space_next = next->address_space_next; 1081 area->address_space_next = reserved; 1082 next->address_space_next = area; 1083 1084 // resize regions 1085 reserved->size = next->base + next->size - start - size; 1086 next->size = start - next->base; 1087 reserved->base = start + size; 1088 reserved->cache_offset = next->cache_offset; 1089 } 1090 1091 area->base = start; 1092 area->size = size; 1093 addressSpace->change_count++; 1094 1095 return B_OK; 1096 } 1097 1098 1099 /*! Verifies that an area with the given aligned base and size fits into 1100 the spot defined by base and limit and does check for overflows. 1101 */ 1102 static inline bool 1103 is_valid_spot(addr_t base, addr_t alignedBase, addr_t size, addr_t limit) 1104 { 1105 return (alignedBase >= base && alignedBase + (size - 1) > alignedBase 1106 && alignedBase + (size - 1) <= limit); 1107 } 1108 1109 1110 /*! Must be called with this address space's sem held */ 1111 static status_t 1112 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start, 1113 addr_t size, addr_t end, uint32 addressSpec, vm_area* area) 1114 { 1115 vm_area* last = NULL; 1116 vm_area* next; 1117 bool foundSpot = false; 1118 1119 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 1120 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 1121 size, end, addressSpec, area)); 1122 1123 // do some sanity checking 1124 if (start < addressSpace->base || size == 0 1125 || end > addressSpace->base + (addressSpace->size - 1) 1126 || start + (size - 1) > end) 1127 return B_BAD_ADDRESS; 1128 1129 if (addressSpec == B_EXACT_ADDRESS && area->id != RESERVED_AREA_ID) { 1130 // search for a reserved area 1131 status_t status = find_reserved_area(addressSpace, start, size, area); 1132 if (status == B_OK || status == B_BAD_VALUE) 1133 return status; 1134 1135 // There was no reserved area, and the slot doesn't seem to be used 1136 // already 1137 // TODO: this could be further optimized. 1138 } 1139 1140 size_t alignment = B_PAGE_SIZE; 1141 if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) { 1142 // align the memory to the next power of two of the size 1143 while (alignment < size) 1144 alignment <<= 1; 1145 } 1146 1147 start = ROUNDUP(start, alignment); 1148 1149 // walk up to the spot where we should start searching 1150 second_chance: 1151 next = addressSpace->areas; 1152 while (next != NULL) { 1153 if (next->base > start + (size - 1)) { 1154 // we have a winner 1155 break; 1156 } 1157 1158 last = next; 1159 next = next->address_space_next; 1160 } 1161 1162 // find the right spot depending on the address specification - the area 1163 // will be inserted directly after "last" ("next" is not referenced anymore) 1164 1165 switch (addressSpec) { 1166 case B_ANY_ADDRESS: 1167 case B_ANY_KERNEL_ADDRESS: 1168 case B_ANY_KERNEL_BLOCK_ADDRESS: 1169 { 1170 // find a hole big enough for a new area 1171 if (last == NULL) { 1172 // see if we can build it at the beginning of the virtual map 1173 addr_t alignedBase = ROUNDUP(addressSpace->base, alignment); 1174 if (is_valid_spot(addressSpace->base, alignedBase, size, 1175 next == NULL ? end : next->base)) { 1176 foundSpot = true; 1177 area->base = alignedBase; 1178 break; 1179 } 1180 1181 last = next; 1182 next = next->address_space_next; 1183 } 1184 1185 // keep walking 1186 while (next != NULL) { 1187 addr_t alignedBase = ROUNDUP(last->base + last->size, alignment); 1188 if (is_valid_spot(last->base + (last->size - 1), alignedBase, 1189 size, next->base)) { 1190 foundSpot = true; 1191 area->base = alignedBase; 1192 break; 1193 } 1194 1195 last = next; 1196 next = next->address_space_next; 1197 } 1198 1199 if (foundSpot) 1200 break; 1201 1202 addr_t alignedBase = ROUNDUP(last->base + last->size, alignment); 1203 if (is_valid_spot(last->base + (last->size - 1), alignedBase, 1204 size, end)) { 1205 // got a spot 1206 foundSpot = true; 1207 area->base = alignedBase; 1208 break; 1209 } else if (area->id != RESERVED_AREA_ID) { 1210 // We didn't find a free spot - if there are any reserved areas, 1211 // we can now test those for free space 1212 // TODO: it would make sense to start with the biggest of them 1213 next = addressSpace->areas; 1214 for (last = NULL; next != NULL; 1215 next = next->address_space_next) { 1216 if (next->id != RESERVED_AREA_ID) { 1217 last = next; 1218 continue; 1219 } 1220 1221 // TODO: take free space after the reserved area into 1222 // account! 1223 addr_t alignedBase = ROUNDUP(next->base, alignment); 1224 if (next->base == alignedBase && next->size == size) { 1225 // The reserved area is entirely covered, and thus, 1226 // removed 1227 if (last) 1228 last->address_space_next = next->address_space_next; 1229 else 1230 addressSpace->areas = next->address_space_next; 1231 1232 foundSpot = true; 1233 area->base = alignedBase; 1234 free(next); 1235 break; 1236 } 1237 1238 if ((next->protection & RESERVED_AVOID_BASE) == 0 1239 && alignedBase == next->base && next->size >= size) { 1240 // The new area will be placed at the beginning of the 1241 // reserved area and the reserved area will be offset 1242 // and resized 1243 foundSpot = true; 1244 next->base += size; 1245 next->size -= size; 1246 area->base = alignedBase; 1247 break; 1248 } 1249 1250 if (is_valid_spot(next->base, alignedBase, size, 1251 next->base + (next->size - 1))) { 1252 // The new area will be placed at the end of the 1253 // reserved area, and the reserved area will be resized 1254 // to make space 1255 alignedBase = ROUNDDOWN(next->base + next->size - size, 1256 alignment); 1257 1258 foundSpot = true; 1259 next->size = alignedBase - next->base; 1260 area->base = alignedBase; 1261 last = next; 1262 break; 1263 } 1264 1265 last = next; 1266 } 1267 } 1268 break; 1269 } 1270 1271 case B_BASE_ADDRESS: 1272 { 1273 // find a hole big enough for a new area beginning with "start" 1274 if (last == NULL) { 1275 // see if we can build it at the beginning of the specified start 1276 if (next == NULL || next->base > start + (size - 1)) { 1277 foundSpot = true; 1278 area->base = start; 1279 break; 1280 } 1281 1282 last = next; 1283 next = next->address_space_next; 1284 } 1285 1286 // keep walking 1287 while (next != NULL) { 1288 if (next->base - (last->base + last->size) >= size) { 1289 // we found a spot (it'll be filled up below) 1290 break; 1291 } 1292 1293 last = next; 1294 next = next->address_space_next; 1295 } 1296 1297 addr_t lastEnd = last->base + (last->size - 1); 1298 if (next != NULL || end - lastEnd >= size) { 1299 // got a spot 1300 foundSpot = true; 1301 if (lastEnd < start) 1302 area->base = start; 1303 else 1304 area->base = lastEnd + 1; 1305 break; 1306 } 1307 1308 // we didn't find a free spot in the requested range, so we'll 1309 // try again without any restrictions 1310 start = addressSpace->base; 1311 addressSpec = B_ANY_ADDRESS; 1312 last = NULL; 1313 goto second_chance; 1314 } 1315 1316 case B_EXACT_ADDRESS: 1317 // see if we can create it exactly here 1318 if ((last == NULL || last->base + (last->size - 1) < start) 1319 && (next == NULL || next->base > start + (size - 1))) { 1320 foundSpot = true; 1321 area->base = start; 1322 break; 1323 } 1324 break; 1325 default: 1326 return B_BAD_VALUE; 1327 } 1328 1329 if (!foundSpot) 1330 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 1331 1332 area->size = size; 1333 if (last) { 1334 area->address_space_next = last->address_space_next; 1335 last->address_space_next = area; 1336 } else { 1337 area->address_space_next = addressSpace->areas; 1338 addressSpace->areas = area; 1339 } 1340 1341 addressSpace->change_count++; 1342 return B_OK; 1343 } 1344 1345 1346 /*! This inserts the area you pass into the specified address space. 1347 It will also set the "_address" argument to its base address when 1348 the call succeeds. 1349 You need to hold the vm_address_space semaphore. 1350 */ 1351 static status_t 1352 insert_area(vm_address_space* addressSpace, void** _address, 1353 uint32 addressSpec, addr_t size, vm_area* area) 1354 { 1355 addr_t searchBase, searchEnd; 1356 status_t status; 1357 1358 switch (addressSpec) { 1359 case B_EXACT_ADDRESS: 1360 searchBase = (addr_t)*_address; 1361 searchEnd = (addr_t)*_address + (size - 1); 1362 break; 1363 1364 case B_BASE_ADDRESS: 1365 searchBase = (addr_t)*_address; 1366 searchEnd = addressSpace->base + (addressSpace->size - 1); 1367 break; 1368 1369 case B_ANY_ADDRESS: 1370 case B_ANY_KERNEL_ADDRESS: 1371 case B_ANY_KERNEL_BLOCK_ADDRESS: 1372 searchBase = addressSpace->base; 1373 // TODO: remove this again when vm86 mode is moved into the kernel 1374 // completely (currently needs a userland address space!) 1375 if (searchBase == USER_BASE) 1376 searchBase = USER_BASE_ANY; 1377 searchEnd = addressSpace->base + (addressSpace->size - 1); 1378 break; 1379 1380 default: 1381 return B_BAD_VALUE; 1382 } 1383 1384 status = find_and_insert_area_slot(addressSpace, searchBase, size, 1385 searchEnd, addressSpec, area); 1386 if (status == B_OK) 1387 *_address = (void*)area->base; 1388 1389 return status; 1390 } 1391 1392 1393 static inline void 1394 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection) 1395 { 1396 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 1397 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1398 uint8& entry = area->page_protections[pageIndex / 2]; 1399 if (pageIndex % 2 == 0) 1400 entry = (entry & 0xf0) | protection; 1401 else 1402 entry = (entry & 0x0f) | (protection << 4); 1403 } 1404 1405 1406 static inline uint32 1407 get_area_page_protection(vm_area* area, addr_t pageAddress) 1408 { 1409 if (area->page_protections == NULL) 1410 return area->protection; 1411 1412 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1413 uint32 protection = area->page_protections[pageIndex / 2]; 1414 if (pageIndex % 2 == 0) 1415 protection &= 0x0f; 1416 else 1417 protection >>= 4; 1418 1419 return protection | B_KERNEL_READ_AREA 1420 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1421 } 1422 1423 1424 /*! Cuts a piece out of an area. If the given cut range covers the complete 1425 area, it is deleted. If it covers the beginning or the end, the area is 1426 resized accordingly. If the range covers some part in the middle of the 1427 area, it is split in two; in this case the second area is returned via 1428 \a _secondArea (the variable is left untouched in the other cases). 1429 The address space must be write locked. 1430 */ 1431 static status_t 1432 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address, 1433 addr_t lastAddress, vm_area** _secondArea, bool kernel) 1434 { 1435 // Does the cut range intersect with the area at all? 1436 addr_t areaLast = area->base + (area->size - 1); 1437 if (area->base > lastAddress || areaLast < address) 1438 return B_OK; 1439 1440 // Is the area fully covered? 1441 if (area->base >= address && areaLast <= lastAddress) { 1442 delete_area(addressSpace, area); 1443 return B_OK; 1444 } 1445 1446 AreaCacheLocker cacheLocker(area); 1447 vm_cache* cache = area->cache; 1448 1449 // Cut the end only? 1450 if (areaLast <= lastAddress) { 1451 addr_t newSize = address - area->base; 1452 1453 // unmap pages 1454 vm_unmap_pages(area, address, area->size - newSize, false); 1455 1456 // If no one else uses the area's cache, we can resize it, too. 1457 if (cache->areas == area && area->cache_next == NULL 1458 && list_is_empty(&cache->consumers)) { 1459 status_t error = cache->Resize(cache->virtual_base + newSize); 1460 if (error != B_OK) 1461 return error; 1462 } 1463 1464 area->size = newSize; 1465 1466 return B_OK; 1467 } 1468 1469 // Cut the beginning only? 1470 if (area->base >= address) { 1471 addr_t newBase = lastAddress + 1; 1472 addr_t newSize = areaLast - lastAddress; 1473 1474 // unmap pages 1475 vm_unmap_pages(area, area->base, newBase - area->base, false); 1476 1477 // TODO: If no one else uses the area's cache, we should resize it, too! 1478 1479 area->cache_offset += newBase - area->base; 1480 area->base = newBase; 1481 area->size = newSize; 1482 1483 return B_OK; 1484 } 1485 1486 // The tough part -- cut a piece out of the middle of the area. 1487 // We do that by shrinking the area to the begin section and creating a 1488 // new area for the end section. 1489 1490 addr_t firstNewSize = address - area->base; 1491 addr_t secondBase = lastAddress + 1; 1492 addr_t secondSize = areaLast - lastAddress; 1493 1494 // unmap pages 1495 vm_unmap_pages(area, address, area->size - firstNewSize, false); 1496 1497 // resize the area 1498 addr_t oldSize = area->size; 1499 area->size = firstNewSize; 1500 1501 // TODO: If no one else uses the area's cache, we might want to create a 1502 // new cache for the second area, transfer the concerned pages from the 1503 // first cache to it and resize the first cache. 1504 1505 // map the second area 1506 vm_area* secondArea; 1507 void* secondBaseAddress = (void*)secondBase; 1508 status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress, 1509 area->cache_offset + (secondBase - area->base), secondSize, 1510 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 1511 &secondArea, area->name, false, kernel); 1512 if (error != B_OK) { 1513 area->size = oldSize; 1514 return error; 1515 } 1516 1517 // We need a cache reference for the new area. 1518 cache->AcquireRefLocked(); 1519 1520 if (_secondArea != NULL) 1521 *_secondArea = secondArea; 1522 1523 return B_OK; 1524 } 1525 1526 1527 static inline void 1528 increment_page_wired_count(vm_page* page) 1529 { 1530 // TODO: needs to be atomic on all platforms! 1531 // ... but at least the check isn't. Consequently we should hold 1532 // sMappingLock, which would allows us to even avoid atomic_add() on 1533 // gMappedPagesCount. 1534 if (page->wired_count++ == 0) { 1535 if (page->mappings.IsEmpty()) 1536 atomic_add(&gMappedPagesCount, 1); 1537 } 1538 } 1539 1540 1541 static inline void 1542 decrement_page_wired_count(vm_page* page) 1543 { 1544 if (--page->wired_count == 0) { 1545 // TODO: needs to be atomic on all platforms! 1546 // See above! 1547 if (page->mappings.IsEmpty()) 1548 atomic_add(&gMappedPagesCount, -1); 1549 } 1550 } 1551 1552 1553 /*! Deletes all areas in the given address range. 1554 The address space must be write-locked. 1555 */ 1556 static status_t 1557 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size, 1558 bool kernel) 1559 { 1560 size = PAGE_ALIGN(size); 1561 addr_t lastAddress = address + (size - 1); 1562 1563 // Check, whether the caller is allowed to modify the concerned areas. 1564 vm_area* area; 1565 if (!kernel) { 1566 area = addressSpace->areas; 1567 while (area != NULL) { 1568 vm_area* nextArea = area->address_space_next; 1569 1570 if (area->id != RESERVED_AREA_ID) { 1571 addr_t areaLast = area->base + (area->size - 1); 1572 if (area->base < lastAddress && address < areaLast) { 1573 if ((area->protection & B_KERNEL_AREA) != 0) 1574 return B_NOT_ALLOWED; 1575 } 1576 } 1577 1578 area = nextArea; 1579 } 1580 } 1581 1582 area = addressSpace->areas; 1583 while (area != NULL) { 1584 vm_area* nextArea = area->address_space_next; 1585 1586 if (area->id != RESERVED_AREA_ID) { 1587 addr_t areaLast = area->base + (area->size - 1); 1588 if (area->base < lastAddress && address < areaLast) { 1589 status_t error = cut_area(addressSpace, area, address, 1590 lastAddress, NULL, kernel); 1591 if (error != B_OK) 1592 return error; 1593 // Failing after already messing with areas is ugly, but we 1594 // can't do anything about it. 1595 } 1596 } 1597 1598 area = nextArea; 1599 } 1600 1601 return B_OK; 1602 } 1603 1604 1605 /*! You need to hold the lock of the cache and the write lock of the address 1606 space when calling this function. 1607 Note, that in case of error your cache will be temporarily unlocked. 1608 */ 1609 static status_t 1610 map_backing_store(vm_address_space* addressSpace, vm_cache* cache, 1611 void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 1612 int wiring, int protection, int mapping, vm_area** _area, 1613 const char* areaName, bool unmapAddressRange, bool kernel) 1614 { 1615 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, " 1616 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 1617 "'%s'\n", addressSpace, cache, *_virtualAddress, offset, size, 1618 addressSpec, wiring, protection, _area, areaName)); 1619 cache->AssertLocked(); 1620 1621 vm_area* area = create_area_struct(addressSpace, areaName, wiring, 1622 protection); 1623 if (area == NULL) 1624 return B_NO_MEMORY; 1625 1626 status_t status; 1627 1628 // if this is a private map, we need to create a new cache 1629 // to handle the private copies of pages as they are written to 1630 vm_cache* sourceCache = cache; 1631 if (mapping == REGION_PRIVATE_MAP) { 1632 vm_cache* newCache; 1633 1634 // create an anonymous cache 1635 status = VMCacheFactory::CreateAnonymousCache(newCache, 1636 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true); 1637 if (status != B_OK) 1638 goto err1; 1639 1640 newCache->Lock(); 1641 newCache->temporary = 1; 1642 newCache->scan_skip = cache->scan_skip; 1643 newCache->virtual_base = offset; 1644 newCache->virtual_end = offset + size; 1645 1646 cache->AddConsumer(newCache); 1647 1648 cache = newCache; 1649 } 1650 1651 status = cache->SetMinimalCommitment(size); 1652 if (status != B_OK) 1653 goto err2; 1654 1655 // check to see if this address space has entered DELETE state 1656 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 1657 // okay, someone is trying to delete this address space now, so we can't 1658 // insert the area, so back out 1659 status = B_BAD_TEAM_ID; 1660 goto err2; 1661 } 1662 1663 if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) { 1664 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 1665 size, kernel); 1666 if (status != B_OK) 1667 goto err2; 1668 } 1669 1670 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 1671 if (status != B_OK) 1672 goto err2; 1673 1674 // attach the cache to the area 1675 area->cache = cache; 1676 area->cache_offset = offset; 1677 1678 // point the cache back to the area 1679 cache->InsertAreaLocked(area); 1680 if (mapping == REGION_PRIVATE_MAP) 1681 cache->Unlock(); 1682 1683 // insert the area in the global area hash table 1684 rw_lock_write_lock(&sAreaHashLock); 1685 hash_insert(sAreaHash, area); 1686 rw_lock_write_unlock(&sAreaHashLock); 1687 1688 // grab a ref to the address space (the area holds this) 1689 atomic_add(&addressSpace->ref_count, 1); 1690 1691 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1692 // cache, sourceCache, areaName, area); 1693 1694 *_area = area; 1695 return B_OK; 1696 1697 err2: 1698 if (mapping == REGION_PRIVATE_MAP) { 1699 // We created this cache, so we must delete it again. Note, that we 1700 // need to temporarily unlock the source cache or we'll otherwise 1701 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1702 sourceCache->Unlock(); 1703 cache->ReleaseRefAndUnlock(); 1704 sourceCache->Lock(); 1705 } 1706 err1: 1707 free(area->name); 1708 free(area); 1709 return status; 1710 } 1711 1712 1713 status_t 1714 vm_block_address_range(const char* name, void* address, addr_t size) 1715 { 1716 if (!arch_vm_supports_protection(0)) 1717 return B_NOT_SUPPORTED; 1718 1719 AddressSpaceWriteLocker locker; 1720 status_t status = locker.SetTo(vm_kernel_address_space_id()); 1721 if (status != B_OK) 1722 return status; 1723 1724 vm_address_space* addressSpace = locker.AddressSpace(); 1725 1726 // create an anonymous cache 1727 vm_cache* cache; 1728 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false); 1729 if (status != B_OK) 1730 return status; 1731 1732 cache->temporary = 1; 1733 cache->virtual_end = size; 1734 cache->scan_skip = 1; 1735 cache->Lock(); 1736 1737 vm_area* area; 1738 void* areaAddress = address; 1739 status = map_backing_store(addressSpace, cache, &areaAddress, 0, size, 1740 B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name, 1741 false, true); 1742 if (status != B_OK) { 1743 cache->ReleaseRefAndUnlock(); 1744 return status; 1745 } 1746 1747 cache->Unlock(); 1748 area->cache_type = CACHE_TYPE_RAM; 1749 return area->id; 1750 } 1751 1752 1753 status_t 1754 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1755 { 1756 AddressSpaceWriteLocker locker(team); 1757 if (!locker.IsLocked()) 1758 return B_BAD_TEAM_ID; 1759 1760 // check to see if this address space has entered DELETE state 1761 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1762 // okay, someone is trying to delete this address space now, so we can't 1763 // insert the area, so back out 1764 return B_BAD_TEAM_ID; 1765 } 1766 1767 // search area list and remove any matching reserved ranges 1768 1769 vm_area* area = locker.AddressSpace()->areas; 1770 vm_area* last = NULL; 1771 while (area) { 1772 // the area must be completely part of the reserved range 1773 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 1774 && area->base + area->size <= (addr_t)address + size) { 1775 // remove reserved range 1776 vm_area* reserved = area; 1777 if (last) 1778 last->address_space_next = reserved->address_space_next; 1779 else 1780 locker.AddressSpace()->areas = reserved->address_space_next; 1781 1782 area = reserved->address_space_next; 1783 vm_put_address_space(locker.AddressSpace()); 1784 free(reserved); 1785 continue; 1786 } 1787 1788 last = area; 1789 area = area->address_space_next; 1790 } 1791 1792 return B_OK; 1793 } 1794 1795 1796 status_t 1797 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1798 addr_t size, uint32 flags) 1799 { 1800 if (size == 0) 1801 return B_BAD_VALUE; 1802 1803 AddressSpaceWriteLocker locker(team); 1804 if (!locker.IsLocked()) 1805 return B_BAD_TEAM_ID; 1806 1807 // check to see if this address space has entered DELETE state 1808 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1809 // okay, someone is trying to delete this address space now, so we 1810 // can't insert the area, let's back out 1811 return B_BAD_TEAM_ID; 1812 } 1813 1814 vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags); 1815 if (area == NULL) 1816 return B_NO_MEMORY; 1817 1818 status_t status = insert_area(locker.AddressSpace(), _address, addressSpec, 1819 size, area); 1820 if (status != B_OK) { 1821 free(area); 1822 return status; 1823 } 1824 1825 // the area is now reserved! 1826 1827 area->cache_offset = area->base; 1828 // we cache the original base address here 1829 1830 atomic_add(&locker.AddressSpace()->ref_count, 1); 1831 return B_OK; 1832 } 1833 1834 1835 area_id 1836 vm_create_anonymous_area(team_id team, const char* name, void** address, 1837 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 1838 addr_t physicalAddress, uint32 flags, bool kernel) 1839 { 1840 vm_area* area; 1841 vm_cache* cache; 1842 vm_page* page = NULL; 1843 bool isStack = (protection & B_STACK_AREA) != 0; 1844 page_num_t guardPages; 1845 bool canOvercommit = false; 1846 uint32 newPageState = (flags & CREATE_AREA_DONT_CLEAR) != 0 1847 ? PAGE_STATE_FREE : PAGE_STATE_CLEAR; 1848 1849 TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size)); 1850 1851 size = PAGE_ALIGN(size); 1852 1853 if (size == 0) 1854 return B_BAD_VALUE; 1855 if (!arch_vm_supports_protection(protection)) 1856 return B_NOT_SUPPORTED; 1857 1858 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1859 canOvercommit = true; 1860 1861 #ifdef DEBUG_KERNEL_STACKS 1862 if ((protection & B_KERNEL_STACK_AREA) != 0) 1863 isStack = true; 1864 #endif 1865 1866 // check parameters 1867 switch (addressSpec) { 1868 case B_ANY_ADDRESS: 1869 case B_EXACT_ADDRESS: 1870 case B_BASE_ADDRESS: 1871 case B_ANY_KERNEL_ADDRESS: 1872 case B_ANY_KERNEL_BLOCK_ADDRESS: 1873 break; 1874 case B_PHYSICAL_BASE_ADDRESS: 1875 physicalAddress = (addr_t)*address; 1876 addressSpec = B_ANY_KERNEL_ADDRESS; 1877 break; 1878 1879 default: 1880 return B_BAD_VALUE; 1881 } 1882 1883 if (physicalAddress != 0) 1884 wiring = B_CONTIGUOUS; 1885 1886 bool doReserveMemory = false; 1887 switch (wiring) { 1888 case B_NO_LOCK: 1889 break; 1890 case B_FULL_LOCK: 1891 case B_LAZY_LOCK: 1892 case B_CONTIGUOUS: 1893 doReserveMemory = true; 1894 break; 1895 case B_ALREADY_WIRED: 1896 break; 1897 case B_LOMEM: 1898 //case B_SLOWMEM: 1899 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 1900 wiring = B_FULL_LOCK; 1901 doReserveMemory = true; 1902 break; 1903 default: 1904 return B_BAD_VALUE; 1905 } 1906 1907 // For full lock or contiguous areas we're also going to map the pages and 1908 // thus need to reserve pages for the mapping backend upfront. 1909 addr_t reservedMapPages = 0; 1910 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1911 AddressSpaceWriteLocker locker; 1912 status_t status = locker.SetTo(team); 1913 if (status != B_OK) 1914 return status; 1915 1916 vm_translation_map* map = &locker.AddressSpace()->translation_map; 1917 reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 1918 } 1919 1920 // Reserve memory before acquiring the address space lock. This reduces the 1921 // chances of failure, since while holding the write lock to the address 1922 // space (if it is the kernel address space that is), the low memory handler 1923 // won't be able to free anything for us. 1924 addr_t reservedMemory = 0; 1925 if (doReserveMemory) { 1926 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1927 if (vm_try_reserve_memory(size, timeout) != B_OK) 1928 return B_NO_MEMORY; 1929 reservedMemory = size; 1930 // TODO: We don't reserve the memory for the pages for the page 1931 // directories/tables. We actually need to do since we currently don't 1932 // reclaim them (and probably can't reclaim all of them anyway). Thus 1933 // there are actually less physical pages than there should be, which 1934 // can get the VM into trouble in low memory situations. 1935 } 1936 1937 AddressSpaceWriteLocker locker; 1938 vm_address_space* addressSpace; 1939 status_t status; 1940 1941 // For full lock areas reserve the pages before locking the address 1942 // space. E.g. block caches can't release their memory while we hold the 1943 // address space lock. 1944 page_num_t reservedPages = reservedMapPages; 1945 if (wiring == B_FULL_LOCK) 1946 reservedPages += size / B_PAGE_SIZE; 1947 if (reservedPages > 0) { 1948 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1949 if (!vm_page_try_reserve_pages(reservedPages)) { 1950 reservedPages = 0; 1951 status = B_WOULD_BLOCK; 1952 goto err0; 1953 } 1954 } else 1955 vm_page_reserve_pages(reservedPages); 1956 } 1957 1958 status = locker.SetTo(team); 1959 if (status != B_OK) 1960 goto err0; 1961 1962 addressSpace = locker.AddressSpace(); 1963 1964 if (wiring == B_CONTIGUOUS) { 1965 // we try to allocate the page run here upfront as this may easily 1966 // fail for obvious reasons 1967 page = vm_page_allocate_page_run(newPageState, physicalAddress, 1968 size / B_PAGE_SIZE); 1969 if (page == NULL) { 1970 status = B_NO_MEMORY; 1971 goto err0; 1972 } 1973 } 1974 1975 // create an anonymous cache 1976 // if it's a stack, make sure that two pages are available at least 1977 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1978 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1979 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1980 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1981 wiring == B_NO_LOCK); 1982 if (status != B_OK) 1983 goto err1; 1984 1985 cache->temporary = 1; 1986 cache->virtual_end = size; 1987 cache->committed_size = reservedMemory; 1988 // TODO: This should be done via a method. 1989 reservedMemory = 0; 1990 1991 switch (wiring) { 1992 case B_LAZY_LOCK: 1993 case B_FULL_LOCK: 1994 case B_CONTIGUOUS: 1995 case B_ALREADY_WIRED: 1996 cache->scan_skip = 1; 1997 break; 1998 case B_NO_LOCK: 1999 cache->scan_skip = 0; 2000 break; 2001 } 2002 2003 cache->Lock(); 2004 2005 status = map_backing_store(addressSpace, cache, address, 0, size, 2006 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 2007 (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel); 2008 2009 if (status < B_OK) { 2010 cache->ReleaseRefAndUnlock(); 2011 goto err1; 2012 } 2013 2014 locker.DegradeToReadLock(); 2015 2016 switch (wiring) { 2017 case B_NO_LOCK: 2018 case B_LAZY_LOCK: 2019 // do nothing - the pages are mapped in as needed 2020 break; 2021 2022 case B_FULL_LOCK: 2023 { 2024 // Allocate and map all pages for this area 2025 2026 off_t offset = 0; 2027 for (addr_t address = area->base; 2028 address < area->base + (area->size - 1); 2029 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 2030 #ifdef DEBUG_KERNEL_STACKS 2031 # ifdef STACK_GROWS_DOWNWARDS 2032 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 2033 * B_PAGE_SIZE) 2034 # else 2035 if (isStack && address >= area->base + area->size 2036 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 2037 # endif 2038 continue; 2039 #endif 2040 vm_page* page = vm_page_allocate_page(newPageState, true); 2041 cache->InsertPage(page, offset); 2042 vm_map_page(area, page, address, protection); 2043 2044 // Periodically unreserve pages we've already allocated, so that 2045 // we don't unnecessarily increase the pressure on the VM. 2046 if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) { 2047 page_num_t toUnreserve = 128; 2048 vm_page_unreserve_pages(toUnreserve); 2049 reservedPages -= toUnreserve; 2050 } 2051 } 2052 2053 break; 2054 } 2055 2056 case B_ALREADY_WIRED: 2057 { 2058 // The pages should already be mapped. This is only really useful 2059 // during boot time. Find the appropriate vm_page objects and stick 2060 // them in the cache object. 2061 vm_translation_map* map = &addressSpace->translation_map; 2062 off_t offset = 0; 2063 2064 if (!gKernelStartup) 2065 panic("ALREADY_WIRED flag used outside kernel startup\n"); 2066 2067 map->ops->lock(map); 2068 2069 for (addr_t virtualAddress = area->base; virtualAddress < area->base 2070 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2071 offset += B_PAGE_SIZE) { 2072 addr_t physicalAddress; 2073 uint32 flags; 2074 status = map->ops->query(map, virtualAddress, 2075 &physicalAddress, &flags); 2076 if (status < B_OK) { 2077 panic("looking up mapping failed for va 0x%lx\n", 2078 virtualAddress); 2079 } 2080 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2081 if (page == NULL) { 2082 panic("looking up page failed for pa 0x%lx\n", 2083 physicalAddress); 2084 } 2085 2086 increment_page_wired_count(page); 2087 vm_page_set_state(page, PAGE_STATE_WIRED); 2088 cache->InsertPage(page, offset); 2089 } 2090 2091 map->ops->unlock(map); 2092 break; 2093 } 2094 2095 case B_CONTIGUOUS: 2096 { 2097 // We have already allocated our continuous pages run, so we can now 2098 // just map them in the address space 2099 vm_translation_map* map = &addressSpace->translation_map; 2100 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 2101 addr_t virtualAddress = area->base; 2102 off_t offset = 0; 2103 2104 map->ops->lock(map); 2105 2106 for (virtualAddress = area->base; virtualAddress < area->base 2107 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2108 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 2109 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2110 if (page == NULL) 2111 panic("couldn't lookup physical page just allocated\n"); 2112 2113 status = map->ops->map(map, virtualAddress, physicalAddress, 2114 protection); 2115 if (status < B_OK) 2116 panic("couldn't map physical page in page run\n"); 2117 2118 increment_page_wired_count(page); 2119 vm_page_set_state(page, PAGE_STATE_WIRED); 2120 cache->InsertPage(page, offset); 2121 } 2122 2123 map->ops->unlock(map); 2124 break; 2125 } 2126 2127 default: 2128 break; 2129 } 2130 2131 cache->Unlock(); 2132 2133 if (reservedPages > 0) 2134 vm_page_unreserve_pages(reservedPages); 2135 2136 TRACE(("vm_create_anonymous_area: done\n")); 2137 2138 area->cache_type = CACHE_TYPE_RAM; 2139 return area->id; 2140 2141 err1: 2142 if (wiring == B_CONTIGUOUS) { 2143 // we had reserved the area space upfront... 2144 addr_t pageNumber = page->physical_page_number; 2145 int32 i; 2146 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 2147 page = vm_lookup_page(pageNumber); 2148 if (page == NULL) 2149 panic("couldn't lookup physical page just allocated\n"); 2150 2151 vm_page_set_state(page, PAGE_STATE_FREE); 2152 } 2153 } 2154 2155 err0: 2156 if (reservedPages > 0) 2157 vm_page_unreserve_pages(reservedPages); 2158 if (reservedMemory > 0) 2159 vm_unreserve_memory(reservedMemory); 2160 2161 return status; 2162 } 2163 2164 2165 area_id 2166 vm_map_physical_memory(team_id team, const char* name, void** _address, 2167 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 2168 { 2169 vm_area* area; 2170 vm_cache* cache; 2171 addr_t mapOffset; 2172 2173 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 2174 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2175 name, _address, addressSpec, size, protection, physicalAddress)); 2176 2177 if (!arch_vm_supports_protection(protection)) 2178 return B_NOT_SUPPORTED; 2179 2180 AddressSpaceWriteLocker locker(team); 2181 if (!locker.IsLocked()) 2182 return B_BAD_TEAM_ID; 2183 2184 // if the physical address is somewhat inside a page, 2185 // move the actual area down to align on a page boundary 2186 mapOffset = physicalAddress % B_PAGE_SIZE; 2187 size += mapOffset; 2188 physicalAddress -= mapOffset; 2189 2190 size = PAGE_ALIGN(size); 2191 2192 // create a device cache 2193 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 2194 if (status != B_OK) 2195 return status; 2196 2197 // tell the page scanner to skip over this area, it's pages are special 2198 cache->scan_skip = 1; 2199 cache->virtual_end = size; 2200 2201 cache->Lock(); 2202 2203 status = map_backing_store(locker.AddressSpace(), cache, _address, 2204 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2205 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2206 2207 if (status < B_OK) 2208 cache->ReleaseRefLocked(); 2209 2210 cache->Unlock(); 2211 2212 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 2213 // set requested memory type 2214 status = arch_vm_set_memory_type(area, physicalAddress, 2215 addressSpec & B_MTR_MASK); 2216 if (status < B_OK) 2217 delete_area(locker.AddressSpace(), area); 2218 } 2219 2220 if (status >= B_OK) { 2221 // make sure our area is mapped in completely 2222 2223 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2224 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2225 area->base + (size - 1)); 2226 2227 vm_page_reserve_pages(reservePages); 2228 map->ops->lock(map); 2229 2230 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2231 map->ops->map(map, area->base + offset, physicalAddress + offset, 2232 protection); 2233 } 2234 2235 map->ops->unlock(map); 2236 vm_page_unreserve_pages(reservePages); 2237 } 2238 2239 if (status < B_OK) 2240 return status; 2241 2242 // modify the pointer returned to be offset back into the new area 2243 // the same way the physical address in was offset 2244 *_address = (void*)((addr_t)*_address + mapOffset); 2245 2246 area->cache_type = CACHE_TYPE_DEVICE; 2247 return area->id; 2248 } 2249 2250 2251 area_id 2252 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 2253 uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs, 2254 uint32 vecCount) 2255 { 2256 TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, " 2257 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2258 name, _address, addressSpec, size, protection, physicalAddress)); 2259 2260 if (!arch_vm_supports_protection(protection) 2261 || (addressSpec & B_MTR_MASK) != 0) { 2262 return B_NOT_SUPPORTED; 2263 } 2264 2265 AddressSpaceWriteLocker locker(team); 2266 if (!locker.IsLocked()) 2267 return B_BAD_TEAM_ID; 2268 2269 if (vecCount == 0) 2270 return B_BAD_VALUE; 2271 2272 addr_t size = 0; 2273 for (uint32 i = 0; i < vecCount; i++) { 2274 if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0 2275 || vecs[i].iov_len % B_PAGE_SIZE != 0) { 2276 return B_BAD_VALUE; 2277 } 2278 2279 size += vecs[i].iov_len; 2280 } 2281 2282 // create a device cache 2283 vm_cache* cache; 2284 status_t result = VMCacheFactory::CreateDeviceCache(cache, 2285 (addr_t)vecs[0].iov_base); 2286 if (result != B_OK) 2287 return result; 2288 2289 // tell the page scanner to skip over this area, it's pages are special 2290 cache->scan_skip = 1; 2291 cache->virtual_end = size; 2292 2293 cache->Lock(); 2294 2295 vm_area* area; 2296 result = map_backing_store(locker.AddressSpace(), cache, _address, 2297 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2298 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2299 2300 if (result != B_OK) 2301 cache->ReleaseRefLocked(); 2302 2303 cache->Unlock(); 2304 2305 if (result != B_OK) 2306 return result; 2307 2308 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2309 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2310 area->base + (size - 1)); 2311 2312 vm_page_reserve_pages(reservePages); 2313 map->ops->lock(map); 2314 2315 uint32 vecIndex = 0; 2316 size_t vecOffset = 0; 2317 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2318 while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) { 2319 vecOffset = 0; 2320 vecIndex++; 2321 } 2322 2323 if (vecIndex >= vecCount) 2324 break; 2325 2326 map->ops->map(map, area->base + offset, 2327 (addr_t)vecs[vecIndex].iov_base + vecOffset, protection); 2328 2329 vecOffset += B_PAGE_SIZE; 2330 } 2331 2332 map->ops->unlock(map); 2333 vm_page_unreserve_pages(reservePages); 2334 2335 if (_size != NULL) 2336 *_size = size; 2337 2338 area->cache_type = CACHE_TYPE_DEVICE; 2339 return area->id; 2340 } 2341 2342 2343 area_id 2344 vm_create_null_area(team_id team, const char* name, void** address, 2345 uint32 addressSpec, addr_t size) 2346 { 2347 vm_area* area; 2348 vm_cache* cache; 2349 status_t status; 2350 2351 AddressSpaceWriteLocker locker(team); 2352 if (!locker.IsLocked()) 2353 return B_BAD_TEAM_ID; 2354 2355 size = PAGE_ALIGN(size); 2356 2357 // create an null cache 2358 status = VMCacheFactory::CreateNullCache(cache); 2359 if (status != B_OK) 2360 return status; 2361 2362 // tell the page scanner to skip over this area, no pages will be mapped here 2363 cache->scan_skip = 1; 2364 cache->virtual_end = size; 2365 2366 cache->Lock(); 2367 2368 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 2369 addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name, 2370 false, true); 2371 2372 if (status < B_OK) { 2373 cache->ReleaseRefAndUnlock(); 2374 return status; 2375 } 2376 2377 cache->Unlock(); 2378 2379 area->cache_type = CACHE_TYPE_NULL; 2380 return area->id; 2381 } 2382 2383 2384 /*! Creates the vnode cache for the specified \a vnode. 2385 The vnode has to be marked busy when calling this function. 2386 */ 2387 status_t 2388 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2389 { 2390 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2391 } 2392 2393 2394 /*! \a cache must be locked. The area's address space must be read-locked. 2395 */ 2396 static void 2397 pre_map_area_pages(vm_area* area, VMCache* cache) 2398 { 2399 addr_t baseAddress = area->base; 2400 addr_t cacheOffset = area->cache_offset; 2401 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2402 page_num_t endPage = firstPage + area->size / B_PAGE_SIZE; 2403 2404 for (VMCachePagesTree::Iterator it 2405 = cache->pages.GetIterator(firstPage, true, true); 2406 vm_page* page = it.Next();) { 2407 if (page->cache_offset >= endPage) 2408 break; 2409 2410 // skip inactive pages 2411 if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0) 2412 continue; 2413 2414 vm_map_page(area, page, 2415 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2416 B_READ_AREA | B_KERNEL_READ_AREA); 2417 } 2418 } 2419 2420 2421 /*! Will map the file specified by \a fd to an area in memory. 2422 The file will be mirrored beginning at the specified \a offset. The 2423 \a offset and \a size arguments have to be page aligned. 2424 */ 2425 static area_id 2426 _vm_map_file(team_id team, const char* name, void** _address, 2427 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2428 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2429 { 2430 // TODO: for binary files, we want to make sure that they get the 2431 // copy of a file at a given time, ie. later changes should not 2432 // make it into the mapped copy -- this will need quite some changes 2433 // to be done in a nice way 2434 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 2435 fd, offset, size, mapping)); 2436 2437 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2438 size = PAGE_ALIGN(size); 2439 2440 if (mapping == REGION_NO_PRIVATE_MAP) 2441 protection |= B_SHARED_AREA; 2442 if (addressSpec != B_EXACT_ADDRESS) 2443 unmapAddressRange = false; 2444 2445 if (fd < 0) { 2446 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2447 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 2448 B_NO_LOCK, protection, 0, flags, kernel); 2449 } 2450 2451 // get the open flags of the FD 2452 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2453 if (descriptor == NULL) 2454 return EBADF; 2455 int32 openMode = descriptor->open_mode; 2456 put_fd(descriptor); 2457 2458 // The FD must open for reading at any rate. For shared mapping with write 2459 // access, additionally the FD must be open for writing. 2460 if ((openMode & O_ACCMODE) == O_WRONLY 2461 || (mapping == REGION_NO_PRIVATE_MAP 2462 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2463 && (openMode & O_ACCMODE) == O_RDONLY)) { 2464 return EACCES; 2465 } 2466 2467 // get the vnode for the object, this also grabs a ref to it 2468 struct vnode* vnode = NULL; 2469 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2470 if (status < B_OK) 2471 return status; 2472 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 2473 2474 // If we're going to pre-map pages, we need to reserve the pages needed by 2475 // the mapping backend upfront. 2476 page_num_t reservedPreMapPages = 0; 2477 if ((protection & B_READ_AREA) != 0) { 2478 AddressSpaceWriteLocker locker; 2479 status = locker.SetTo(team); 2480 if (status != B_OK) 2481 return status; 2482 2483 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2484 reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 2485 2486 locker.Unlock(); 2487 2488 vm_page_reserve_pages(reservedPreMapPages); 2489 } 2490 2491 struct PageUnreserver { 2492 PageUnreserver(page_num_t count) 2493 : fCount(count) 2494 { 2495 } 2496 2497 ~PageUnreserver() 2498 { 2499 if (fCount > 0) 2500 vm_page_unreserve_pages(fCount); 2501 } 2502 2503 page_num_t fCount; 2504 } pageUnreserver(reservedPreMapPages); 2505 2506 AddressSpaceWriteLocker locker(team); 2507 if (!locker.IsLocked()) 2508 return B_BAD_TEAM_ID; 2509 2510 // TODO: this only works for file systems that use the file cache 2511 vm_cache* cache; 2512 status = vfs_get_vnode_cache(vnode, &cache, false); 2513 if (status < B_OK) 2514 return status; 2515 2516 cache->Lock(); 2517 2518 vm_area* area; 2519 status = map_backing_store(locker.AddressSpace(), cache, _address, 2520 offset, size, addressSpec, 0, protection, mapping, &area, name, 2521 unmapAddressRange, kernel); 2522 2523 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2524 // map_backing_store() cannot know we no longer need the ref 2525 cache->ReleaseRefLocked(); 2526 } 2527 2528 if (status == B_OK && (protection & B_READ_AREA) != 0) 2529 pre_map_area_pages(area, cache); 2530 2531 cache->Unlock(); 2532 2533 if (status == B_OK) { 2534 // TODO: this probably deserves a smarter solution, ie. don't always 2535 // prefetch stuff, and also, probably don't trigger it at this place. 2536 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2537 // prefetches at max 10 MB starting from "offset" 2538 } 2539 2540 if (status != B_OK) 2541 return status; 2542 2543 area->cache_type = CACHE_TYPE_VNODE; 2544 return area->id; 2545 } 2546 2547 2548 area_id 2549 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2550 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2551 int fd, off_t offset) 2552 { 2553 if (!arch_vm_supports_protection(protection)) 2554 return B_NOT_SUPPORTED; 2555 2556 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2557 mapping, unmapAddressRange, fd, offset, true); 2558 } 2559 2560 2561 vm_cache* 2562 vm_area_get_locked_cache(vm_area* area) 2563 { 2564 mutex_lock(&sAreaCacheLock); 2565 2566 while (true) { 2567 vm_cache* cache = area->cache; 2568 2569 if (!cache->SwitchLock(&sAreaCacheLock)) { 2570 // cache has been deleted 2571 mutex_lock(&sAreaCacheLock); 2572 continue; 2573 } 2574 2575 mutex_lock(&sAreaCacheLock); 2576 2577 if (cache == area->cache) { 2578 cache->AcquireRefLocked(); 2579 mutex_unlock(&sAreaCacheLock); 2580 return cache; 2581 } 2582 2583 // the cache changed in the meantime 2584 cache->Unlock(); 2585 } 2586 } 2587 2588 2589 void 2590 vm_area_put_locked_cache(vm_cache* cache) 2591 { 2592 cache->ReleaseRefAndUnlock(); 2593 } 2594 2595 2596 area_id 2597 vm_clone_area(team_id team, const char* name, void** address, 2598 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2599 bool kernel) 2600 { 2601 vm_area* newArea = NULL; 2602 vm_area* sourceArea; 2603 2604 // Check whether the source area exists and is cloneable. If so, mark it 2605 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2606 { 2607 AddressSpaceWriteLocker locker; 2608 status_t status = locker.SetFromArea(sourceID, sourceArea); 2609 if (status != B_OK) 2610 return status; 2611 2612 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2613 return B_NOT_ALLOWED; 2614 2615 sourceArea->protection |= B_SHARED_AREA; 2616 protection |= B_SHARED_AREA; 2617 } 2618 2619 // Now lock both address spaces and actually do the cloning. 2620 2621 MultiAddressSpaceLocker locker; 2622 vm_address_space* sourceAddressSpace; 2623 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2624 if (status != B_OK) 2625 return status; 2626 2627 vm_address_space* targetAddressSpace; 2628 status = locker.AddTeam(team, true, &targetAddressSpace); 2629 if (status != B_OK) 2630 return status; 2631 2632 status = locker.Lock(); 2633 if (status != B_OK) 2634 return status; 2635 2636 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2637 if (sourceArea == NULL) 2638 return B_BAD_VALUE; 2639 2640 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2641 return B_NOT_ALLOWED; 2642 2643 vm_cache* cache = vm_area_get_locked_cache(sourceArea); 2644 2645 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2646 // have been adapted. Maybe it should be part of the kernel settings, 2647 // anyway (so that old drivers can always work). 2648 #if 0 2649 if (sourceArea->aspace == vm_kernel_address_space() 2650 && addressSpace != vm_kernel_address_space() 2651 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2652 // kernel areas must not be cloned in userland, unless explicitly 2653 // declared user-cloneable upon construction 2654 status = B_NOT_ALLOWED; 2655 } else 2656 #endif 2657 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2658 status = B_NOT_ALLOWED; 2659 else { 2660 status = map_backing_store(targetAddressSpace, cache, address, 2661 sourceArea->cache_offset, sourceArea->size, addressSpec, 2662 sourceArea->wiring, protection, mapping, &newArea, name, false, 2663 kernel); 2664 } 2665 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2666 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2667 // to create a new cache, and has therefore already acquired a reference 2668 // to the source cache - but otherwise it has no idea that we need 2669 // one. 2670 cache->AcquireRefLocked(); 2671 } 2672 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2673 // we need to map in everything at this point 2674 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2675 // we don't have actual pages to map but a physical area 2676 vm_translation_map* map 2677 = &sourceArea->address_space->translation_map; 2678 map->ops->lock(map); 2679 2680 addr_t physicalAddress; 2681 uint32 oldProtection; 2682 map->ops->query(map, sourceArea->base, &physicalAddress, 2683 &oldProtection); 2684 2685 map->ops->unlock(map); 2686 2687 map = &targetAddressSpace->translation_map; 2688 size_t reservePages = map->ops->map_max_pages_need(map, 2689 newArea->base, newArea->base + (newArea->size - 1)); 2690 2691 vm_page_reserve_pages(reservePages); 2692 map->ops->lock(map); 2693 2694 for (addr_t offset = 0; offset < newArea->size; 2695 offset += B_PAGE_SIZE) { 2696 map->ops->map(map, newArea->base + offset, 2697 physicalAddress + offset, protection); 2698 } 2699 2700 map->ops->unlock(map); 2701 vm_page_unreserve_pages(reservePages); 2702 } else { 2703 vm_translation_map* map = &targetAddressSpace->translation_map; 2704 size_t reservePages = map->ops->map_max_pages_need(map, 2705 newArea->base, newArea->base + (newArea->size - 1)); 2706 vm_page_reserve_pages(reservePages); 2707 2708 // map in all pages from source 2709 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2710 vm_page* page = it.Next();) { 2711 vm_map_page(newArea, page, newArea->base 2712 + ((page->cache_offset << PAGE_SHIFT) 2713 - newArea->cache_offset), protection); 2714 } 2715 2716 vm_page_unreserve_pages(reservePages); 2717 } 2718 } 2719 if (status == B_OK) 2720 newArea->cache_type = sourceArea->cache_type; 2721 2722 vm_area_put_locked_cache(cache); 2723 2724 if (status < B_OK) 2725 return status; 2726 2727 return newArea->id; 2728 } 2729 2730 2731 //! The address space must be write locked at this point 2732 static void 2733 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area) 2734 { 2735 vm_area* temp = addressSpace->areas; 2736 vm_area* last = NULL; 2737 2738 while (temp != NULL) { 2739 if (area == temp) { 2740 if (last != NULL) { 2741 last->address_space_next = temp->address_space_next; 2742 } else { 2743 addressSpace->areas = temp->address_space_next; 2744 } 2745 addressSpace->change_count++; 2746 break; 2747 } 2748 last = temp; 2749 temp = temp->address_space_next; 2750 } 2751 if (area == addressSpace->area_hint) 2752 addressSpace->area_hint = NULL; 2753 2754 if (temp == NULL) 2755 panic("vm_area_release_ref: area not found in aspace's area list\n"); 2756 } 2757 2758 2759 static void 2760 delete_area(vm_address_space* addressSpace, vm_area* area) 2761 { 2762 rw_lock_write_lock(&sAreaHashLock); 2763 hash_remove(sAreaHash, area); 2764 rw_lock_write_unlock(&sAreaHashLock); 2765 2766 // At this point the area is removed from the global hash table, but 2767 // still exists in the area list. 2768 2769 // Unmap the virtual address space the area occupied 2770 vm_unmap_pages(area, area->base, area->size, !area->cache->temporary); 2771 2772 if (!area->cache->temporary) 2773 area->cache->WriteModified(); 2774 2775 arch_vm_unset_memory_type(area); 2776 remove_area_from_address_space(addressSpace, area); 2777 vm_put_address_space(addressSpace); 2778 2779 area->cache->RemoveArea(area); 2780 area->cache->ReleaseRef(); 2781 2782 free(area->page_protections); 2783 free(area->name); 2784 free(area); 2785 } 2786 2787 2788 status_t 2789 vm_delete_area(team_id team, area_id id, bool kernel) 2790 { 2791 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2792 2793 AddressSpaceWriteLocker locker; 2794 vm_area* area; 2795 status_t status = locker.SetFromArea(team, id, area); 2796 if (status != B_OK) 2797 return status; 2798 2799 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2800 return B_NOT_ALLOWED; 2801 2802 delete_area(locker.AddressSpace(), area); 2803 return B_OK; 2804 } 2805 2806 2807 /*! Creates a new cache on top of given cache, moves all areas from 2808 the old cache to the new one, and changes the protection of all affected 2809 areas' pages to read-only. 2810 Preconditions: 2811 - The given cache must be locked. 2812 - All of the cache's areas' address spaces must be read locked. 2813 */ 2814 static status_t 2815 vm_copy_on_write_area(vm_cache* lowerCache) 2816 { 2817 vm_cache* upperCache; 2818 2819 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2820 2821 // We need to separate the cache from its areas. The cache goes one level 2822 // deeper and we create a new cache inbetween. 2823 2824 // create an anonymous cache 2825 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2826 0, true); 2827 if (status != B_OK) 2828 return status; 2829 2830 upperCache->Lock(); 2831 2832 upperCache->temporary = 1; 2833 upperCache->scan_skip = lowerCache->scan_skip; 2834 upperCache->virtual_base = lowerCache->virtual_base; 2835 upperCache->virtual_end = lowerCache->virtual_end; 2836 2837 // transfer the lower cache areas to the upper cache 2838 mutex_lock(&sAreaCacheLock); 2839 2840 upperCache->areas = lowerCache->areas; 2841 lowerCache->areas = NULL; 2842 2843 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2844 tempArea = tempArea->cache_next) { 2845 tempArea->cache = upperCache; 2846 upperCache->AcquireRefLocked(); 2847 lowerCache->ReleaseRefLocked(); 2848 } 2849 2850 mutex_unlock(&sAreaCacheLock); 2851 2852 lowerCache->AddConsumer(upperCache); 2853 2854 // We now need to remap all pages from all of the cache's areas read-only, so 2855 // that a copy will be created on next write access 2856 2857 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2858 tempArea = tempArea->cache_next) { 2859 // The area must be readable in the same way it was previously writable 2860 uint32 protection = B_KERNEL_READ_AREA; 2861 if ((tempArea->protection & B_READ_AREA) != 0) 2862 protection |= B_READ_AREA; 2863 2864 vm_translation_map* map = &tempArea->address_space->translation_map; 2865 map->ops->lock(map); 2866 map->ops->protect(map, tempArea->base, 2867 tempArea->base - 1 + tempArea->size, protection); 2868 map->ops->unlock(map); 2869 } 2870 2871 vm_area_put_locked_cache(upperCache); 2872 2873 return B_OK; 2874 } 2875 2876 2877 area_id 2878 vm_copy_area(team_id team, const char* name, void** _address, 2879 uint32 addressSpec, uint32 protection, area_id sourceID) 2880 { 2881 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2882 2883 if ((protection & B_KERNEL_PROTECTION) == 0) { 2884 // set the same protection for the kernel as for userland 2885 protection |= B_KERNEL_READ_AREA; 2886 if (writableCopy) 2887 protection |= B_KERNEL_WRITE_AREA; 2888 } 2889 2890 // Do the locking: target address space, all address spaces associated with 2891 // the source cache, and the cache itself. 2892 MultiAddressSpaceLocker locker; 2893 vm_address_space* targetAddressSpace; 2894 vm_cache* cache; 2895 vm_area* source; 2896 status_t status = locker.AddTeam(team, true, &targetAddressSpace); 2897 if (status == B_OK) { 2898 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2899 &cache); 2900 } 2901 if (status != B_OK) 2902 return status; 2903 2904 AreaCacheLocker cacheLocker(cache); // already locked 2905 2906 if (addressSpec == B_CLONE_ADDRESS) { 2907 addressSpec = B_EXACT_ADDRESS; 2908 *_address = (void*)source->base; 2909 } 2910 2911 bool sharedArea = (source->protection & B_SHARED_AREA) != 0; 2912 2913 // First, create a cache on top of the source area, respectively use the 2914 // existing one, if this is a shared area. 2915 2916 vm_area* target; 2917 status = map_backing_store(targetAddressSpace, cache, _address, 2918 source->cache_offset, source->size, addressSpec, source->wiring, 2919 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2920 &target, name, false, true); 2921 if (status < B_OK) 2922 return status; 2923 2924 if (sharedArea) { 2925 // The new area uses the old area's cache, but map_backing_store() 2926 // hasn't acquired a ref. So we have to do that now. 2927 cache->AcquireRefLocked(); 2928 } 2929 2930 // If the source area is writable, we need to move it one layer up as well 2931 2932 if (!sharedArea) { 2933 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2934 // TODO: do something more useful if this fails! 2935 if (vm_copy_on_write_area(cache) < B_OK) 2936 panic("vm_copy_on_write_area() failed!\n"); 2937 } 2938 } 2939 2940 // we return the ID of the newly created area 2941 return target->id; 2942 } 2943 2944 2945 //! You need to hold the cache lock when calling this function 2946 static int32 2947 count_writable_areas(vm_cache* cache, vm_area* ignoreArea) 2948 { 2949 struct vm_area* area = cache->areas; 2950 uint32 count = 0; 2951 2952 for (; area != NULL; area = area->cache_next) { 2953 if (area != ignoreArea 2954 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 2955 count++; 2956 } 2957 2958 return count; 2959 } 2960 2961 2962 static status_t 2963 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2964 bool kernel) 2965 { 2966 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2967 "%#lx)\n", team, areaID, newProtection)); 2968 2969 if (!arch_vm_supports_protection(newProtection)) 2970 return B_NOT_SUPPORTED; 2971 2972 // lock address spaces and cache 2973 MultiAddressSpaceLocker locker; 2974 vm_cache* cache; 2975 vm_area* area; 2976 status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area, 2977 &cache); 2978 AreaCacheLocker cacheLocker(cache); // already locked 2979 2980 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2981 return B_NOT_ALLOWED; 2982 2983 if (area->protection == newProtection) 2984 return B_OK; 2985 2986 if (team != vm_kernel_address_space_id() 2987 && area->address_space->id != team) { 2988 // unless you're the kernel, you are only allowed to set 2989 // the protection of your own areas 2990 return B_NOT_ALLOWED; 2991 } 2992 2993 bool changePageProtection = true; 2994 2995 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2996 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 2997 // writable -> !writable 2998 2999 if (cache->source != NULL && cache->temporary) { 3000 if (count_writable_areas(cache, area) == 0) { 3001 // Since this cache now lives from the pages in its source cache, 3002 // we can change the cache's commitment to take only those pages 3003 // into account that really are in this cache. 3004 3005 status = cache->Commit(cache->page_count * B_PAGE_SIZE); 3006 3007 // TODO: we may be able to join with our source cache, if 3008 // count == 0 3009 } 3010 } 3011 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 3012 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 3013 // !writable -> writable 3014 3015 if (!list_is_empty(&cache->consumers)) { 3016 // There are consumers -- we have to insert a new cache. Fortunately 3017 // vm_copy_on_write_area() does everything that's needed. 3018 changePageProtection = false; 3019 status = vm_copy_on_write_area(cache); 3020 } else { 3021 // No consumers, so we don't need to insert a new one. 3022 if (cache->source != NULL && cache->temporary) { 3023 // the cache's commitment must contain all possible pages 3024 status = cache->Commit(cache->virtual_end 3025 - cache->virtual_base); 3026 } 3027 3028 if (status == B_OK && cache->source != NULL) { 3029 // There's a source cache, hence we can't just change all pages' 3030 // protection or we might allow writing into pages belonging to 3031 // a lower cache. 3032 changePageProtection = false; 3033 3034 struct vm_translation_map* map 3035 = &area->address_space->translation_map; 3036 map->ops->lock(map); 3037 3038 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3039 vm_page* page = it.Next();) { 3040 addr_t address = area->base 3041 + (page->cache_offset << PAGE_SHIFT); 3042 map->ops->protect(map, address, address - 1 + B_PAGE_SIZE, 3043 newProtection); 3044 } 3045 3046 map->ops->unlock(map); 3047 } 3048 } 3049 } else { 3050 // we don't have anything special to do in all other cases 3051 } 3052 3053 if (status == B_OK) { 3054 // remap existing pages in this cache 3055 struct vm_translation_map* map = &area->address_space->translation_map; 3056 3057 if (changePageProtection) { 3058 map->ops->lock(map); 3059 map->ops->protect(map, area->base, area->base - 1 + area->size, 3060 newProtection); 3061 map->ops->unlock(map); 3062 } 3063 3064 area->protection = newProtection; 3065 } 3066 3067 return status; 3068 } 3069 3070 3071 status_t 3072 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr) 3073 { 3074 vm_address_space* addressSpace = vm_get_address_space(team); 3075 if (addressSpace == NULL) 3076 return B_BAD_TEAM_ID; 3077 3078 uint32 dummyFlags; 3079 status_t status = addressSpace->translation_map.ops->query( 3080 &addressSpace->translation_map, vaddr, paddr, &dummyFlags); 3081 3082 vm_put_address_space(addressSpace); 3083 return status; 3084 } 3085 3086 3087 static inline addr_t 3088 virtual_page_address(vm_area* area, vm_page* page) 3089 { 3090 return area->base 3091 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 3092 } 3093 3094 3095 bool 3096 vm_test_map_modification(vm_page* page) 3097 { 3098 MutexLocker locker(sMappingLock); 3099 3100 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3101 vm_page_mapping* mapping; 3102 while ((mapping = iterator.Next()) != NULL) { 3103 vm_area* area = mapping->area; 3104 vm_translation_map* map = &area->address_space->translation_map; 3105 3106 addr_t physicalAddress; 3107 uint32 flags; 3108 map->ops->lock(map); 3109 map->ops->query(map, virtual_page_address(area, page), 3110 &physicalAddress, &flags); 3111 map->ops->unlock(map); 3112 3113 if ((flags & PAGE_MODIFIED) != 0) 3114 return true; 3115 } 3116 3117 return false; 3118 } 3119 3120 3121 int32 3122 vm_test_map_activation(vm_page* page, bool* _modified) 3123 { 3124 int32 activation = 0; 3125 bool modified = false; 3126 3127 MutexLocker locker(sMappingLock); 3128 3129 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3130 vm_page_mapping* mapping; 3131 while ((mapping = iterator.Next()) != NULL) { 3132 vm_area* area = mapping->area; 3133 vm_translation_map* map = &area->address_space->translation_map; 3134 3135 addr_t physicalAddress; 3136 uint32 flags; 3137 map->ops->lock(map); 3138 map->ops->query(map, virtual_page_address(area, page), 3139 &physicalAddress, &flags); 3140 map->ops->unlock(map); 3141 3142 if ((flags & PAGE_ACCESSED) != 0) 3143 activation++; 3144 if ((flags & PAGE_MODIFIED) != 0) 3145 modified = true; 3146 } 3147 3148 if (_modified != NULL) 3149 *_modified = modified; 3150 3151 return activation; 3152 } 3153 3154 3155 void 3156 vm_clear_map_flags(vm_page* page, uint32 flags) 3157 { 3158 MutexLocker locker(sMappingLock); 3159 3160 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3161 vm_page_mapping* mapping; 3162 while ((mapping = iterator.Next()) != NULL) { 3163 vm_area* area = mapping->area; 3164 vm_translation_map* map = &area->address_space->translation_map; 3165 3166 map->ops->lock(map); 3167 map->ops->clear_flags(map, virtual_page_address(area, page), flags); 3168 map->ops->unlock(map); 3169 } 3170 } 3171 3172 3173 /*! Removes all mappings from a page. 3174 After you've called this function, the page is unmapped from memory. 3175 The accumulated page flags of all mappings can be found in \a _flags. 3176 */ 3177 void 3178 vm_remove_all_page_mappings(vm_page* page, uint32* _flags) 3179 { 3180 uint32 accumulatedFlags = 0; 3181 MutexLocker locker(sMappingLock); 3182 3183 vm_page_mappings queue; 3184 queue.MoveFrom(&page->mappings); 3185 3186 vm_page_mappings::Iterator iterator = queue.GetIterator(); 3187 vm_page_mapping* mapping; 3188 while ((mapping = iterator.Next()) != NULL) { 3189 vm_area* area = mapping->area; 3190 vm_translation_map* map = &area->address_space->translation_map; 3191 addr_t physicalAddress; 3192 uint32 flags; 3193 3194 map->ops->lock(map); 3195 addr_t address = virtual_page_address(area, page); 3196 map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1)); 3197 map->ops->flush(map); 3198 map->ops->query(map, address, &physicalAddress, &flags); 3199 map->ops->unlock(map); 3200 3201 area->mappings.Remove(mapping); 3202 3203 accumulatedFlags |= flags; 3204 } 3205 3206 if (page->wired_count == 0 && !queue.IsEmpty()) 3207 atomic_add(&gMappedPagesCount, -1); 3208 3209 locker.Unlock(); 3210 3211 // free now unused mappings 3212 3213 while ((mapping = queue.RemoveHead()) != NULL) { 3214 free(mapping); 3215 } 3216 3217 if (_flags != NULL) 3218 *_flags = accumulatedFlags; 3219 } 3220 3221 3222 bool 3223 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified) 3224 { 3225 vm_translation_map* map = &area->address_space->translation_map; 3226 3227 map->ops->lock(map); 3228 3229 addr_t physicalAddress; 3230 uint32 flags; 3231 status_t status = map->ops->query(map, virtualAddress, &physicalAddress, 3232 &flags); 3233 if (status < B_OK || (flags & PAGE_PRESENT) == 0) { 3234 map->ops->unlock(map); 3235 return false; 3236 } 3237 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3238 if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) { 3239 panic("area %p looking up page failed for pa 0x%lx\n", area, 3240 physicalAddress); 3241 } 3242 3243 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) 3244 decrement_page_wired_count(page); 3245 3246 map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1); 3247 3248 if (preserveModified) { 3249 map->ops->flush(map); 3250 3251 status = map->ops->query(map, virtualAddress, &physicalAddress, &flags); 3252 if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED) 3253 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3254 } 3255 3256 map->ops->unlock(map); 3257 3258 if (area->wiring == B_NO_LOCK) { 3259 vm_page_mapping* mapping; 3260 3261 mutex_lock(&sMappingLock); 3262 map->ops->lock(map); 3263 3264 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3265 while (iterator.HasNext()) { 3266 mapping = iterator.Next(); 3267 3268 if (mapping->area == area) { 3269 area->mappings.Remove(mapping); 3270 page->mappings.Remove(mapping); 3271 3272 if (page->mappings.IsEmpty() && page->wired_count == 0) 3273 atomic_add(&gMappedPagesCount, -1); 3274 3275 map->ops->unlock(map); 3276 mutex_unlock(&sMappingLock); 3277 3278 free(mapping); 3279 3280 return true; 3281 } 3282 } 3283 3284 map->ops->unlock(map); 3285 mutex_unlock(&sMappingLock); 3286 3287 dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n", 3288 area, page); 3289 } 3290 3291 return true; 3292 } 3293 3294 3295 status_t 3296 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified) 3297 { 3298 vm_translation_map* map = &area->address_space->translation_map; 3299 addr_t end = base + (size - 1); 3300 3301 map->ops->lock(map); 3302 3303 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 3304 // iterate through all pages and decrease their wired count 3305 for (addr_t virtualAddress = base; virtualAddress < end; 3306 virtualAddress += B_PAGE_SIZE) { 3307 addr_t physicalAddress; 3308 uint32 flags; 3309 status_t status = map->ops->query(map, virtualAddress, 3310 &physicalAddress, &flags); 3311 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3312 continue; 3313 3314 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3315 if (page == NULL) { 3316 panic("area %p looking up page failed for pa 0x%lx\n", area, 3317 physicalAddress); 3318 } 3319 3320 decrement_page_wired_count(page); 3321 } 3322 } 3323 3324 map->ops->unmap(map, base, end); 3325 if (preserveModified) { 3326 map->ops->flush(map); 3327 3328 for (addr_t virtualAddress = base; virtualAddress < end; 3329 virtualAddress += B_PAGE_SIZE) { 3330 addr_t physicalAddress; 3331 uint32 flags; 3332 status_t status = map->ops->query(map, virtualAddress, 3333 &physicalAddress, &flags); 3334 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3335 continue; 3336 3337 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3338 if (page == NULL) { 3339 panic("area %p looking up page failed for pa 0x%lx\n", area, 3340 physicalAddress); 3341 } 3342 3343 if ((flags & PAGE_MODIFIED) != 0 3344 && page->state != PAGE_STATE_MODIFIED) 3345 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3346 } 3347 } 3348 map->ops->unlock(map); 3349 3350 if (area->wiring == B_NO_LOCK) { 3351 uint32 startOffset = (area->cache_offset + base - area->base) 3352 >> PAGE_SHIFT; 3353 uint32 endOffset = startOffset + (size >> PAGE_SHIFT); 3354 vm_page_mapping* mapping; 3355 vm_area_mappings queue; 3356 3357 mutex_lock(&sMappingLock); 3358 map->ops->lock(map); 3359 3360 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3361 while (iterator.HasNext()) { 3362 mapping = iterator.Next(); 3363 3364 vm_page* page = mapping->page; 3365 if (page->cache_offset < startOffset 3366 || page->cache_offset >= endOffset) 3367 continue; 3368 3369 page->mappings.Remove(mapping); 3370 iterator.Remove(); 3371 3372 if (page->mappings.IsEmpty() && page->wired_count == 0) 3373 atomic_add(&gMappedPagesCount, -1); 3374 3375 queue.Add(mapping); 3376 } 3377 3378 map->ops->unlock(map); 3379 mutex_unlock(&sMappingLock); 3380 3381 while ((mapping = queue.RemoveHead()) != NULL) { 3382 free(mapping); 3383 } 3384 } 3385 3386 return B_OK; 3387 } 3388 3389 3390 /*! When calling this function, you need to have pages reserved! */ 3391 status_t 3392 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection) 3393 { 3394 vm_translation_map* map = &area->address_space->translation_map; 3395 vm_page_mapping* mapping = NULL; 3396 3397 if (area->wiring == B_NO_LOCK) { 3398 mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping)); 3399 if (mapping == NULL) 3400 return B_NO_MEMORY; 3401 3402 mapping->page = page; 3403 mapping->area = area; 3404 } 3405 3406 map->ops->lock(map); 3407 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 3408 protection); 3409 map->ops->unlock(map); 3410 3411 if (area->wiring != B_NO_LOCK) { 3412 increment_page_wired_count(page); 3413 } else { 3414 // insert mapping into lists 3415 MutexLocker locker(sMappingLock); 3416 3417 if (page->mappings.IsEmpty() && page->wired_count == 0) 3418 atomic_add(&gMappedPagesCount, 1); 3419 3420 page->mappings.Add(mapping); 3421 area->mappings.Add(mapping); 3422 } 3423 3424 if (page->usage_count < 0) 3425 page->usage_count = 1; 3426 3427 if (page->state != PAGE_STATE_MODIFIED) 3428 vm_page_set_state(page, PAGE_STATE_ACTIVE); 3429 3430 return B_OK; 3431 } 3432 3433 3434 static int 3435 display_mem(int argc, char** argv) 3436 { 3437 bool physical = false; 3438 addr_t copyAddress; 3439 int32 displayWidth; 3440 int32 itemSize; 3441 int32 num = -1; 3442 addr_t address; 3443 int i = 1, j; 3444 3445 if (argc > 1 && argv[1][0] == '-') { 3446 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3447 physical = true; 3448 i++; 3449 } else 3450 i = 99; 3451 } 3452 3453 if (argc < i + 1 || argc > i + 2) { 3454 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3455 "\tdl - 8 bytes\n" 3456 "\tdw - 4 bytes\n" 3457 "\tds - 2 bytes\n" 3458 "\tdb - 1 byte\n" 3459 "\tstring - a whole string\n" 3460 " -p or --physical only allows memory from a single page to be " 3461 "displayed.\n"); 3462 return 0; 3463 } 3464 3465 address = parse_expression(argv[i]); 3466 3467 if (argc > i + 1) 3468 num = parse_expression(argv[i + 1]); 3469 3470 // build the format string 3471 if (strcmp(argv[0], "db") == 0) { 3472 itemSize = 1; 3473 displayWidth = 16; 3474 } else if (strcmp(argv[0], "ds") == 0) { 3475 itemSize = 2; 3476 displayWidth = 8; 3477 } else if (strcmp(argv[0], "dw") == 0) { 3478 itemSize = 4; 3479 displayWidth = 4; 3480 } else if (strcmp(argv[0], "dl") == 0) { 3481 itemSize = 8; 3482 displayWidth = 2; 3483 } else if (strcmp(argv[0], "string") == 0) { 3484 itemSize = 1; 3485 displayWidth = -1; 3486 } else { 3487 kprintf("display_mem called in an invalid way!\n"); 3488 return 0; 3489 } 3490 3491 if (num <= 0) 3492 num = displayWidth; 3493 3494 void* physicalPageHandle = NULL; 3495 3496 if (physical) { 3497 int32 offset = address & (B_PAGE_SIZE - 1); 3498 if (num * itemSize + offset > B_PAGE_SIZE) { 3499 num = (B_PAGE_SIZE - offset) / itemSize; 3500 kprintf("NOTE: number of bytes has been cut to page size\n"); 3501 } 3502 3503 address = ROUNDDOWN(address, B_PAGE_SIZE); 3504 3505 if (vm_get_physical_page_debug(address, ©Address, 3506 &physicalPageHandle) != B_OK) { 3507 kprintf("getting the hardware page failed."); 3508 return 0; 3509 } 3510 3511 address += offset; 3512 copyAddress += offset; 3513 } else 3514 copyAddress = address; 3515 3516 if (!strcmp(argv[0], "string")) { 3517 kprintf("%p \"", (char*)copyAddress); 3518 3519 // string mode 3520 for (i = 0; true; i++) { 3521 char c; 3522 if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 3523 || c == '\0') 3524 break; 3525 3526 if (c == '\n') 3527 kprintf("\\n"); 3528 else if (c == '\t') 3529 kprintf("\\t"); 3530 else { 3531 if (!isprint(c)) 3532 c = '.'; 3533 3534 kprintf("%c", c); 3535 } 3536 } 3537 3538 kprintf("\"\n"); 3539 } else { 3540 // number mode 3541 for (i = 0; i < num; i++) { 3542 uint32 value; 3543 3544 if ((i % displayWidth) == 0) { 3545 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3546 if (i != 0) 3547 kprintf("\n"); 3548 3549 kprintf("[0x%lx] ", address + i * itemSize); 3550 3551 for (j = 0; j < displayed; j++) { 3552 char c; 3553 if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j, 3554 1) != B_OK) { 3555 displayed = j; 3556 break; 3557 } 3558 if (!isprint(c)) 3559 c = '.'; 3560 3561 kprintf("%c", c); 3562 } 3563 if (num > displayWidth) { 3564 // make sure the spacing in the last line is correct 3565 for (j = displayed; j < displayWidth * itemSize; j++) 3566 kprintf(" "); 3567 } 3568 kprintf(" "); 3569 } 3570 3571 if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize, 3572 itemSize) != B_OK) { 3573 kprintf("read fault"); 3574 break; 3575 } 3576 3577 switch (itemSize) { 3578 case 1: 3579 kprintf(" %02x", *(uint8*)&value); 3580 break; 3581 case 2: 3582 kprintf(" %04x", *(uint16*)&value); 3583 break; 3584 case 4: 3585 kprintf(" %08lx", *(uint32*)&value); 3586 break; 3587 case 8: 3588 kprintf(" %016Lx", *(uint64*)&value); 3589 break; 3590 } 3591 } 3592 3593 kprintf("\n"); 3594 } 3595 3596 if (physical) { 3597 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3598 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3599 } 3600 return 0; 3601 } 3602 3603 3604 static void 3605 dump_cache_tree_recursively(vm_cache* cache, int level, 3606 vm_cache* highlightCache) 3607 { 3608 // print this cache 3609 for (int i = 0; i < level; i++) 3610 kprintf(" "); 3611 if (cache == highlightCache) 3612 kprintf("%p <--\n", cache); 3613 else 3614 kprintf("%p\n", cache); 3615 3616 // recursively print its consumers 3617 vm_cache* consumer = NULL; 3618 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3619 consumer)) != NULL) { 3620 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3621 } 3622 } 3623 3624 3625 static int 3626 dump_cache_tree(int argc, char** argv) 3627 { 3628 if (argc != 2 || !strcmp(argv[1], "--help")) { 3629 kprintf("usage: %s <address>\n", argv[0]); 3630 return 0; 3631 } 3632 3633 addr_t address = parse_expression(argv[1]); 3634 if (address == 0) 3635 return 0; 3636 3637 vm_cache* cache = (vm_cache*)address; 3638 vm_cache* root = cache; 3639 3640 // find the root cache (the transitive source) 3641 while (root->source != NULL) 3642 root = root->source; 3643 3644 dump_cache_tree_recursively(root, 0, cache); 3645 3646 return 0; 3647 } 3648 3649 3650 static const char* 3651 cache_type_to_string(int32 type) 3652 { 3653 switch (type) { 3654 case CACHE_TYPE_RAM: 3655 return "RAM"; 3656 case CACHE_TYPE_DEVICE: 3657 return "device"; 3658 case CACHE_TYPE_VNODE: 3659 return "vnode"; 3660 case CACHE_TYPE_NULL: 3661 return "null"; 3662 3663 default: 3664 return "unknown"; 3665 } 3666 } 3667 3668 3669 #if DEBUG_CACHE_LIST 3670 3671 static void 3672 update_cache_info_recursively(vm_cache* cache, cache_info& info) 3673 { 3674 info.page_count += cache->page_count; 3675 if (cache->type == CACHE_TYPE_RAM) 3676 info.committed += cache->committed_size; 3677 3678 // recurse 3679 vm_cache* consumer = NULL; 3680 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3681 consumer)) != NULL) { 3682 update_cache_info_recursively(consumer, info); 3683 } 3684 } 3685 3686 3687 static int 3688 cache_info_compare_page_count(const void* _a, const void* _b) 3689 { 3690 const cache_info* a = (const cache_info*)_a; 3691 const cache_info* b = (const cache_info*)_b; 3692 if (a->page_count == b->page_count) 3693 return 0; 3694 return a->page_count < b->page_count ? 1 : -1; 3695 } 3696 3697 3698 static int 3699 cache_info_compare_committed(const void* _a, const void* _b) 3700 { 3701 const cache_info* a = (const cache_info*)_a; 3702 const cache_info* b = (const cache_info*)_b; 3703 if (a->committed == b->committed) 3704 return 0; 3705 return a->committed < b->committed ? 1 : -1; 3706 } 3707 3708 3709 static void 3710 dump_caches_recursively(vm_cache* cache, cache_info& info, int level) 3711 { 3712 for (int i = 0; i < level; i++) 3713 kprintf(" "); 3714 3715 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3716 cache_type_to_string(cache->type), cache->virtual_base, 3717 cache->virtual_end, cache->page_count); 3718 3719 if (level == 0) 3720 kprintf("/%lu", info.page_count); 3721 3722 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3723 kprintf(", committed: %lld", cache->committed_size); 3724 3725 if (level == 0) 3726 kprintf("/%lu", info.committed); 3727 } 3728 3729 // areas 3730 if (cache->areas != NULL) { 3731 vm_area* area = cache->areas; 3732 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3733 area->address_space->id); 3734 3735 while (area->cache_next != NULL) { 3736 area = area->cache_next; 3737 kprintf(", %ld", area->id); 3738 } 3739 } 3740 3741 kputs("\n"); 3742 3743 // recurse 3744 vm_cache* consumer = NULL; 3745 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3746 consumer)) != NULL) { 3747 dump_caches_recursively(consumer, info, level + 1); 3748 } 3749 } 3750 3751 3752 static int 3753 dump_caches(int argc, char** argv) 3754 { 3755 if (sCacheInfoTable == NULL) { 3756 kprintf("No cache info table!\n"); 3757 return 0; 3758 } 3759 3760 bool sortByPageCount = true; 3761 3762 for (int32 i = 1; i < argc; i++) { 3763 if (strcmp(argv[i], "-c") == 0) { 3764 sortByPageCount = false; 3765 } else { 3766 print_debugger_command_usage(argv[0]); 3767 return 0; 3768 } 3769 } 3770 3771 uint32 totalCount = 0; 3772 uint32 rootCount = 0; 3773 off_t totalCommitted = 0; 3774 page_num_t totalPages = 0; 3775 3776 vm_cache* cache = gDebugCacheList; 3777 while (cache) { 3778 totalCount++; 3779 if (cache->source == NULL) { 3780 cache_info stackInfo; 3781 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3782 ? sCacheInfoTable[rootCount] : stackInfo; 3783 rootCount++; 3784 info.cache = cache; 3785 info.page_count = 0; 3786 info.committed = 0; 3787 update_cache_info_recursively(cache, info); 3788 totalCommitted += info.committed; 3789 totalPages += info.page_count; 3790 } 3791 3792 cache = cache->debug_next; 3793 } 3794 3795 if (rootCount <= (uint32)kCacheInfoTableCount) { 3796 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3797 sortByPageCount 3798 ? &cache_info_compare_page_count 3799 : &cache_info_compare_committed); 3800 } 3801 3802 kprintf("total committed memory: %lld, total used pages: %lu\n", 3803 totalCommitted, totalPages); 3804 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3805 "tree...\n\n", totalCount, rootCount, 3806 sortByPageCount ? "page count" : "committed size"); 3807 3808 if (rootCount <= (uint32)kCacheInfoTableCount) { 3809 for (uint32 i = 0; i < rootCount; i++) { 3810 cache_info& info = sCacheInfoTable[i]; 3811 dump_caches_recursively(info.cache, info, 0); 3812 } 3813 } else 3814 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3815 3816 return 0; 3817 } 3818 3819 #endif // DEBUG_CACHE_LIST 3820 3821 3822 static int 3823 dump_cache(int argc, char** argv) 3824 { 3825 vm_cache* cache; 3826 bool showPages = false; 3827 int i = 1; 3828 3829 if (argc < 2 || !strcmp(argv[1], "--help")) { 3830 kprintf("usage: %s [-ps] <address>\n" 3831 " if -p is specified, all pages are shown, if -s is used\n" 3832 " only the cache info is shown respectively.\n", argv[0]); 3833 return 0; 3834 } 3835 while (argv[i][0] == '-') { 3836 char* arg = argv[i] + 1; 3837 while (arg[0]) { 3838 if (arg[0] == 'p') 3839 showPages = true; 3840 arg++; 3841 } 3842 i++; 3843 } 3844 if (argv[i] == NULL) { 3845 kprintf("%s: invalid argument, pass address\n", argv[0]); 3846 return 0; 3847 } 3848 3849 addr_t address = parse_expression(argv[i]); 3850 if (address == 0) 3851 return 0; 3852 3853 cache = (vm_cache*)address; 3854 3855 kprintf("CACHE %p:\n", cache); 3856 kprintf(" ref_count: %ld\n", cache->RefCount()); 3857 kprintf(" source: %p\n", cache->source); 3858 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 3859 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 3860 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 3861 kprintf(" temporary: %ld\n", cache->temporary); 3862 kprintf(" scan_skip: %ld\n", cache->scan_skip); 3863 kprintf(" lock: %p\n", cache->GetLock()); 3864 #if KDEBUG 3865 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 3866 #endif 3867 kprintf(" areas:\n"); 3868 3869 for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) { 3870 kprintf(" area 0x%lx, %s\n", area->id, area->name); 3871 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 3872 kprintf("\tprotection: 0x%lx\n", area->protection); 3873 kprintf("\towner: 0x%lx\n", area->address_space->id); 3874 } 3875 3876 kprintf(" consumers:\n"); 3877 vm_cache* consumer = NULL; 3878 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3879 consumer)) != NULL) { 3880 kprintf("\t%p\n", consumer); 3881 } 3882 3883 kprintf(" pages:\n"); 3884 if (showPages) { 3885 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3886 vm_page* page = it.Next();) { 3887 if (page->type == PAGE_TYPE_PHYSICAL) { 3888 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) " 3889 "wired_count %u\n", page, page->physical_page_number, 3890 page->cache_offset, page->type, page->state, 3891 page_state_to_string(page->state), page->wired_count); 3892 } else if(page->type == PAGE_TYPE_DUMMY) { 3893 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 3894 page, page->state, page_state_to_string(page->state)); 3895 } else 3896 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 3897 } 3898 } else 3899 kprintf("\t%ld in cache\n", cache->page_count); 3900 3901 return 0; 3902 } 3903 3904 3905 static void 3906 dump_area_struct(vm_area* area, bool mappings) 3907 { 3908 kprintf("AREA: %p\n", area); 3909 kprintf("name:\t\t'%s'\n", area->name); 3910 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 3911 kprintf("id:\t\t0x%lx\n", area->id); 3912 kprintf("base:\t\t0x%lx\n", area->base); 3913 kprintf("size:\t\t0x%lx\n", area->size); 3914 kprintf("protection:\t0x%lx\n", area->protection); 3915 kprintf("wiring:\t\t0x%x\n", area->wiring); 3916 kprintf("memory_type:\t0x%x\n", area->memory_type); 3917 kprintf("cache:\t\t%p\n", area->cache); 3918 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 3919 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3920 kprintf("cache_next:\t%p\n", area->cache_next); 3921 kprintf("cache_prev:\t%p\n", area->cache_prev); 3922 3923 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3924 if (mappings) { 3925 kprintf("page mappings:\n"); 3926 while (iterator.HasNext()) { 3927 vm_page_mapping* mapping = iterator.Next(); 3928 kprintf(" %p", mapping->page); 3929 } 3930 kprintf("\n"); 3931 } else { 3932 uint32 count = 0; 3933 while (iterator.Next() != NULL) { 3934 count++; 3935 } 3936 kprintf("page mappings:\t%lu\n", count); 3937 } 3938 } 3939 3940 3941 static int 3942 dump_area(int argc, char** argv) 3943 { 3944 bool mappings = false; 3945 bool found = false; 3946 int32 index = 1; 3947 vm_area* area; 3948 addr_t num; 3949 3950 if (argc < 2 || !strcmp(argv[1], "--help")) { 3951 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3952 "All areas matching either id/address/name are listed. You can\n" 3953 "force to check only a specific item by prefixing the specifier\n" 3954 "with the id/contains/address/name keywords.\n" 3955 "-m shows the area's mappings as well.\n"); 3956 return 0; 3957 } 3958 3959 if (!strcmp(argv[1], "-m")) { 3960 mappings = true; 3961 index++; 3962 } 3963 3964 int32 mode = 0xf; 3965 if (!strcmp(argv[index], "id")) 3966 mode = 1; 3967 else if (!strcmp(argv[index], "contains")) 3968 mode = 2; 3969 else if (!strcmp(argv[index], "name")) 3970 mode = 4; 3971 else if (!strcmp(argv[index], "address")) 3972 mode = 0; 3973 if (mode != 0xf) 3974 index++; 3975 3976 if (index >= argc) { 3977 kprintf("No area specifier given.\n"); 3978 return 0; 3979 } 3980 3981 num = parse_expression(argv[index]); 3982 3983 if (mode == 0) { 3984 dump_area_struct((struct vm_area*)num, mappings); 3985 } else { 3986 // walk through the area list, looking for the arguments as a name 3987 struct hash_iterator iter; 3988 3989 hash_open(sAreaHash, &iter); 3990 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 3991 if (((mode & 4) != 0 && area->name != NULL 3992 && !strcmp(argv[index], area->name)) 3993 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3994 || (((mode & 2) != 0 && area->base <= num 3995 && area->base + area->size > num))))) { 3996 dump_area_struct(area, mappings); 3997 found = true; 3998 } 3999 } 4000 4001 if (!found) 4002 kprintf("could not find area %s (%ld)\n", argv[index], num); 4003 } 4004 4005 return 0; 4006 } 4007 4008 4009 static int 4010 dump_area_list(int argc, char** argv) 4011 { 4012 vm_area* area; 4013 struct hash_iterator iter; 4014 const char* name = NULL; 4015 int32 id = 0; 4016 4017 if (argc > 1) { 4018 id = parse_expression(argv[1]); 4019 if (id == 0) 4020 name = argv[1]; 4021 } 4022 4023 kprintf("addr id base\t\tsize protect lock name\n"); 4024 4025 hash_open(sAreaHash, &iter); 4026 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 4027 if ((id != 0 && area->address_space->id != id) 4028 || (name != NULL && strstr(area->name, name) == NULL)) 4029 continue; 4030 4031 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 4032 (void*)area->base, (void*)area->size, area->protection, area->wiring, 4033 area->name); 4034 } 4035 hash_close(sAreaHash, &iter, false); 4036 return 0; 4037 } 4038 4039 4040 static int 4041 dump_available_memory(int argc, char** argv) 4042 { 4043 kprintf("Available memory: %Ld/%lu bytes\n", 4044 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 4045 return 0; 4046 } 4047 4048 4049 status_t 4050 vm_delete_areas(struct vm_address_space* addressSpace) 4051 { 4052 vm_area* area; 4053 vm_area* next; 4054 vm_area* last = NULL; 4055 4056 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 4057 addressSpace->id)); 4058 4059 rw_lock_write_lock(&addressSpace->lock); 4060 4061 // remove all reserved areas in this address space 4062 4063 for (area = addressSpace->areas; area; area = next) { 4064 next = area->address_space_next; 4065 4066 if (area->id == RESERVED_AREA_ID) { 4067 // just remove it 4068 if (last) 4069 last->address_space_next = area->address_space_next; 4070 else 4071 addressSpace->areas = area->address_space_next; 4072 4073 vm_put_address_space(addressSpace); 4074 free(area); 4075 continue; 4076 } 4077 4078 last = area; 4079 } 4080 4081 // delete all the areas in this address space 4082 4083 for (area = addressSpace->areas; area; area = next) { 4084 next = area->address_space_next; 4085 delete_area(addressSpace, area); 4086 } 4087 4088 rw_lock_write_unlock(&addressSpace->lock); 4089 return B_OK; 4090 } 4091 4092 4093 static area_id 4094 vm_area_for(addr_t address, bool kernel) 4095 { 4096 team_id team; 4097 if (IS_USER_ADDRESS(address)) { 4098 // we try the user team address space, if any 4099 team = vm_current_user_address_space_id(); 4100 if (team < 0) 4101 return team; 4102 } else 4103 team = vm_kernel_address_space_id(); 4104 4105 AddressSpaceReadLocker locker(team); 4106 if (!locker.IsLocked()) 4107 return B_BAD_TEAM_ID; 4108 4109 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 4110 if (area != NULL) { 4111 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 4112 return B_ERROR; 4113 4114 return area->id; 4115 } 4116 4117 return B_ERROR; 4118 } 4119 4120 4121 /*! Frees physical pages that were used during the boot process. 4122 */ 4123 static void 4124 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end) 4125 { 4126 // free all physical pages in the specified range 4127 4128 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 4129 addr_t physicalAddress; 4130 uint32 flags; 4131 4132 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 4133 vm_page* page = vm_lookup_page(current / B_PAGE_SIZE); 4134 if (page != NULL) 4135 vm_page_set_state(page, PAGE_STATE_FREE); 4136 } 4137 } 4138 4139 // unmap the memory 4140 map->ops->unmap(map, start, end - 1); 4141 } 4142 4143 4144 void 4145 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 4146 { 4147 vm_translation_map* map = &vm_kernel_address_space()->translation_map; 4148 addr_t end = start + size; 4149 addr_t lastEnd = start; 4150 vm_area* area; 4151 4152 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 4153 (void*)start, (void*)end)); 4154 4155 // The areas are sorted in virtual address space order, so 4156 // we just have to find the holes between them that fall 4157 // into the area we should dispose 4158 4159 map->ops->lock(map); 4160 4161 for (area = vm_kernel_address_space()->areas; area != NULL; 4162 area = area->address_space_next) { 4163 addr_t areaStart = area->base; 4164 addr_t areaEnd = areaStart + area->size; 4165 4166 if (area->id == RESERVED_AREA_ID) 4167 continue; 4168 4169 if (areaEnd >= end) { 4170 // we are done, the areas are already beyond of what we have to free 4171 lastEnd = end; 4172 break; 4173 } 4174 4175 if (areaStart > lastEnd) { 4176 // this is something we can free 4177 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 4178 (void*)areaStart)); 4179 unmap_and_free_physical_pages(map, lastEnd, areaStart); 4180 } 4181 4182 lastEnd = areaEnd; 4183 } 4184 4185 if (lastEnd < end) { 4186 // we can also get rid of some space at the end of the area 4187 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4188 (void*)end)); 4189 unmap_and_free_physical_pages(map, lastEnd, end); 4190 } 4191 4192 map->ops->unlock(map); 4193 } 4194 4195 4196 static void 4197 create_preloaded_image_areas(struct preloaded_image* image) 4198 { 4199 char name[B_OS_NAME_LENGTH]; 4200 void* address; 4201 int32 length; 4202 4203 // use file name to create a good area name 4204 char* fileName = strrchr(image->name, '/'); 4205 if (fileName == NULL) 4206 fileName = image->name; 4207 else 4208 fileName++; 4209 4210 length = strlen(fileName); 4211 // make sure there is enough space for the suffix 4212 if (length > 25) 4213 length = 25; 4214 4215 memcpy(name, fileName, length); 4216 strcpy(name + length, "_text"); 4217 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 4218 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4219 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4220 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4221 // this will later be remapped read-only/executable by the 4222 // ELF initialization code 4223 4224 strcpy(name + length, "_data"); 4225 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 4226 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4227 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4228 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4229 } 4230 4231 4232 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4233 Any boot loader resources contained in that arguments must not be accessed 4234 anymore past this point. 4235 */ 4236 void 4237 vm_free_kernel_args(kernel_args* args) 4238 { 4239 uint32 i; 4240 4241 TRACE(("vm_free_kernel_args()\n")); 4242 4243 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4244 area_id area = area_for((void*)args->kernel_args_range[i].start); 4245 if (area >= B_OK) 4246 delete_area(area); 4247 } 4248 } 4249 4250 4251 static void 4252 allocate_kernel_args(kernel_args* args) 4253 { 4254 TRACE(("allocate_kernel_args()\n")); 4255 4256 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4257 void* address = (void*)args->kernel_args_range[i].start; 4258 4259 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4260 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4261 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4262 } 4263 } 4264 4265 4266 static void 4267 unreserve_boot_loader_ranges(kernel_args* args) 4268 { 4269 TRACE(("unreserve_boot_loader_ranges()\n")); 4270 4271 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4272 vm_unreserve_address_range(vm_kernel_address_space_id(), 4273 (void*)args->virtual_allocated_range[i].start, 4274 args->virtual_allocated_range[i].size); 4275 } 4276 } 4277 4278 4279 static void 4280 reserve_boot_loader_ranges(kernel_args* args) 4281 { 4282 TRACE(("reserve_boot_loader_ranges()\n")); 4283 4284 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4285 void* address = (void*)args->virtual_allocated_range[i].start; 4286 4287 // If the address is no kernel address, we just skip it. The 4288 // architecture specific code has to deal with it. 4289 if (!IS_KERNEL_ADDRESS(address)) { 4290 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 4291 address, args->virtual_allocated_range[i].size); 4292 continue; 4293 } 4294 4295 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), 4296 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4297 if (status < B_OK) 4298 panic("could not reserve boot loader ranges\n"); 4299 } 4300 } 4301 4302 4303 static addr_t 4304 allocate_early_virtual(kernel_args* args, size_t size) 4305 { 4306 addr_t spot = 0; 4307 uint32 i; 4308 int last_valloc_entry = 0; 4309 4310 size = PAGE_ALIGN(size); 4311 // find a slot in the virtual allocation addr range 4312 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 4313 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4314 + args->virtual_allocated_range[i - 1].size; 4315 last_valloc_entry = i; 4316 // check to see if the space between this one and the last is big enough 4317 if (previousRangeEnd >= KERNEL_BASE 4318 && args->virtual_allocated_range[i].start 4319 - previousRangeEnd >= size) { 4320 spot = previousRangeEnd; 4321 args->virtual_allocated_range[i - 1].size += size; 4322 goto out; 4323 } 4324 } 4325 if (spot == 0) { 4326 // we hadn't found one between allocation ranges. this is ok. 4327 // see if there's a gap after the last one 4328 addr_t lastRangeEnd 4329 = args->virtual_allocated_range[last_valloc_entry].start 4330 + args->virtual_allocated_range[last_valloc_entry].size; 4331 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 4332 spot = lastRangeEnd; 4333 args->virtual_allocated_range[last_valloc_entry].size += size; 4334 goto out; 4335 } 4336 // see if there's a gap before the first one 4337 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 4338 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 4339 args->virtual_allocated_range[0].start -= size; 4340 spot = args->virtual_allocated_range[0].start; 4341 goto out; 4342 } 4343 } 4344 } 4345 4346 out: 4347 return spot; 4348 } 4349 4350 4351 static bool 4352 is_page_in_physical_memory_range(kernel_args* args, addr_t address) 4353 { 4354 // TODO: horrible brute-force method of determining if the page can be 4355 // allocated 4356 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4357 if (address >= args->physical_memory_range[i].start 4358 && address < args->physical_memory_range[i].start 4359 + args->physical_memory_range[i].size) 4360 return true; 4361 } 4362 return false; 4363 } 4364 4365 4366 static addr_t 4367 allocate_early_physical_page(kernel_args* args) 4368 { 4369 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4370 addr_t nextPage; 4371 4372 nextPage = args->physical_allocated_range[i].start 4373 + args->physical_allocated_range[i].size; 4374 // see if the page after the next allocated paddr run can be allocated 4375 if (i + 1 < args->num_physical_allocated_ranges 4376 && args->physical_allocated_range[i + 1].size != 0) { 4377 // see if the next page will collide with the next allocated range 4378 if (nextPage >= args->physical_allocated_range[i+1].start) 4379 continue; 4380 } 4381 // see if the next physical page fits in the memory block 4382 if (is_page_in_physical_memory_range(args, nextPage)) { 4383 // we got one! 4384 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4385 return nextPage / B_PAGE_SIZE; 4386 } 4387 } 4388 4389 return 0; 4390 // could not allocate a block 4391 } 4392 4393 4394 /*! This one uses the kernel_args' physical and virtual memory ranges to 4395 allocate some pages before the VM is completely up. 4396 */ 4397 addr_t 4398 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4399 uint32 attributes) 4400 { 4401 if (physicalSize > virtualSize) 4402 physicalSize = virtualSize; 4403 4404 // find the vaddr to allocate at 4405 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 4406 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 4407 4408 // map the pages 4409 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4410 addr_t physicalAddress = allocate_early_physical_page(args); 4411 if (physicalAddress == 0) 4412 panic("error allocating early page!\n"); 4413 4414 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4415 4416 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4417 physicalAddress * B_PAGE_SIZE, attributes, 4418 &allocate_early_physical_page); 4419 } 4420 4421 return virtualBase; 4422 } 4423 4424 4425 /*! The main entrance point to initialize the VM. */ 4426 status_t 4427 vm_init(kernel_args* args) 4428 { 4429 struct preloaded_image* image; 4430 void* address; 4431 status_t err = 0; 4432 uint32 i; 4433 4434 TRACE(("vm_init: entry\n")); 4435 err = arch_vm_translation_map_init(args); 4436 err = arch_vm_init(args); 4437 4438 // initialize some globals 4439 sNextAreaID = 1; 4440 4441 vm_page_init_num_pages(args); 4442 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4443 4444 size_t heapSize = INITIAL_HEAP_SIZE; 4445 // try to accomodate low memory systems 4446 while (heapSize > sAvailableMemory / 8) 4447 heapSize /= 2; 4448 if (heapSize < 1024 * 1024) 4449 panic("vm_init: go buy some RAM please."); 4450 4451 // map in the new heap and initialize it 4452 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4453 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4454 TRACE(("heap at 0x%lx\n", heapBase)); 4455 heap_init(heapBase, heapSize); 4456 4457 size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE; 4458 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 4459 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4460 slab_init(args, slabInitialBase, slabInitialSize); 4461 4462 // initialize the free page list and physical page mapper 4463 vm_page_init(args); 4464 4465 // initialize the hash table that stores the pages mapped to caches 4466 vm_cache_init(args); 4467 4468 { 4469 vm_area* area; 4470 sAreaHash = hash_init(AREA_HASH_TABLE_SIZE, 4471 (addr_t)&area->hash_next - (addr_t)area, 4472 &area_compare, &area_hash); 4473 if (sAreaHash == NULL) 4474 panic("vm_init: error creating aspace hash table\n"); 4475 } 4476 4477 vm_address_space_init(); 4478 reserve_boot_loader_ranges(args); 4479 4480 // Do any further initialization that the architecture dependant layers may 4481 // need now 4482 arch_vm_translation_map_init_post_area(args); 4483 arch_vm_init_post_area(args); 4484 vm_page_init_post_area(args); 4485 4486 // allocate areas to represent stuff that already exists 4487 4488 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4489 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4490 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4491 4492 address = (void*)ROUNDDOWN(slabInitialBase, B_PAGE_SIZE); 4493 create_area("initial slab space", &address, B_EXACT_ADDRESS, 4494 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 4495 | B_KERNEL_WRITE_AREA); 4496 4497 allocate_kernel_args(args); 4498 4499 create_preloaded_image_areas(&args->kernel_image); 4500 4501 // allocate areas for preloaded images 4502 for (image = args->preloaded_images; image != NULL; image = image->next) { 4503 create_preloaded_image_areas(image); 4504 } 4505 4506 // allocate kernel stacks 4507 for (i = 0; i < args->num_cpus; i++) { 4508 char name[64]; 4509 4510 sprintf(name, "idle thread %lu kstack", i + 1); 4511 address = (void*)args->cpu_kstack[i].start; 4512 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4513 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4514 } 4515 4516 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4517 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4518 4519 #if DEBUG_CACHE_LIST 4520 create_area("cache info table", (void**)&sCacheInfoTable, 4521 B_ANY_KERNEL_ADDRESS, 4522 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4523 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4524 #endif // DEBUG_CACHE_LIST 4525 4526 // add some debugger commands 4527 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4528 add_debugger_command("area", &dump_area, 4529 "Dump info about a particular area"); 4530 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 4531 add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree"); 4532 #if DEBUG_CACHE_LIST 4533 add_debugger_command_etc("caches", &dump_caches, 4534 "List all vm_cache trees", 4535 "[ \"-c\" ]\n" 4536 "All cache trees are listed sorted in decreasing order by number of\n" 4537 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 4538 0); 4539 #endif 4540 add_debugger_command("avail", &dump_available_memory, 4541 "Dump available memory"); 4542 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4543 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4544 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4545 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4546 add_debugger_command("string", &display_mem, "dump strings"); 4547 4548 TRACE(("vm_init: exit\n")); 4549 4550 return err; 4551 } 4552 4553 4554 status_t 4555 vm_init_post_sem(kernel_args* args) 4556 { 4557 // This frees all unused boot loader resources and makes its space available 4558 // again 4559 arch_vm_init_end(args); 4560 unreserve_boot_loader_ranges(args); 4561 4562 // fill in all of the semaphores that were not allocated before 4563 // since we're still single threaded and only the kernel address space 4564 // exists, it isn't that hard to find all of the ones we need to create 4565 4566 arch_vm_translation_map_init_post_sem(args); 4567 vm_address_space_init_post_sem(); 4568 4569 slab_init_post_sem(); 4570 return heap_init_post_sem(); 4571 } 4572 4573 4574 status_t 4575 vm_init_post_thread(kernel_args* args) 4576 { 4577 vm_page_init_post_thread(args); 4578 vm_daemon_init(); 4579 slab_init_post_thread(); 4580 return heap_init_post_thread(); 4581 } 4582 4583 4584 status_t 4585 vm_init_post_modules(kernel_args* args) 4586 { 4587 return arch_vm_init_post_modules(args); 4588 } 4589 4590 4591 void 4592 permit_page_faults(void) 4593 { 4594 struct thread* thread = thread_get_current_thread(); 4595 if (thread != NULL) 4596 atomic_add(&thread->page_faults_allowed, 1); 4597 } 4598 4599 4600 void 4601 forbid_page_faults(void) 4602 { 4603 struct thread* thread = thread_get_current_thread(); 4604 if (thread != NULL) 4605 atomic_add(&thread->page_faults_allowed, -1); 4606 } 4607 4608 4609 status_t 4610 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 4611 addr_t* newIP) 4612 { 4613 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4614 faultAddress)); 4615 4616 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4617 4618 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4619 vm_address_space* addressSpace = NULL; 4620 4621 status_t status = B_OK; 4622 *newIP = 0; 4623 atomic_add((int32*)&sPageFaults, 1); 4624 4625 if (IS_KERNEL_ADDRESS(pageAddress)) { 4626 addressSpace = vm_get_kernel_address_space(); 4627 } else if (IS_USER_ADDRESS(pageAddress)) { 4628 addressSpace = vm_get_current_user_address_space(); 4629 if (addressSpace == NULL) { 4630 if (!isUser) { 4631 dprintf("vm_page_fault: kernel thread accessing invalid user " 4632 "memory!\n"); 4633 status = B_BAD_ADDRESS; 4634 TPF(PageFaultError(-1, 4635 VMPageFaultTracing 4636 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4637 } else { 4638 // XXX weird state. 4639 panic("vm_page_fault: non kernel thread accessing user memory " 4640 "that doesn't exist!\n"); 4641 status = B_BAD_ADDRESS; 4642 } 4643 } 4644 } else { 4645 // the hit was probably in the 64k DMZ between kernel and user space 4646 // this keeps a user space thread from passing a buffer that crosses 4647 // into kernel space 4648 status = B_BAD_ADDRESS; 4649 TPF(PageFaultError(-1, 4650 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4651 } 4652 4653 if (status == B_OK) 4654 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser); 4655 4656 if (status < B_OK) { 4657 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4658 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4659 strerror(status), address, faultAddress, isWrite, isUser, 4660 thread_get_current_thread_id()); 4661 if (!isUser) { 4662 struct thread* thread = thread_get_current_thread(); 4663 if (thread != NULL && thread->fault_handler != 0) { 4664 // this will cause the arch dependant page fault handler to 4665 // modify the IP on the interrupt frame or whatever to return 4666 // to this address 4667 *newIP = thread->fault_handler; 4668 } else { 4669 // unhandled page fault in the kernel 4670 panic("vm_page_fault: unhandled page fault in kernel space at " 4671 "0x%lx, ip 0x%lx\n", address, faultAddress); 4672 } 4673 } else { 4674 #if 1 4675 rw_lock_read_lock(&addressSpace->lock); 4676 4677 // TODO: remove me once we have proper userland debugging support 4678 // (and tools) 4679 vm_area* area = vm_area_lookup(addressSpace, faultAddress); 4680 4681 struct thread* thread = thread_get_current_thread(); 4682 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4683 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4684 thread->name, thread->id, thread->team->name, thread->team->id, 4685 isWrite ? "write" : "read", address, faultAddress, 4686 area ? area->name : "???", 4687 faultAddress - (area ? area->base : 0x0)); 4688 4689 // We can print a stack trace of the userland thread here. 4690 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4691 // fault and someone is already waiting for a write lock on the same address 4692 // space. This thread will then try to acquire the lock again and will 4693 // be queued after the writer. 4694 # if 0 4695 if (area) { 4696 struct stack_frame { 4697 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4698 struct stack_frame* previous; 4699 void* return_address; 4700 #else 4701 // ... 4702 #warning writeme 4703 #endif 4704 } frame; 4705 # ifdef __INTEL__ 4706 struct iframe* iframe = i386_get_user_iframe(); 4707 if (iframe == NULL) 4708 panic("iframe is NULL!"); 4709 4710 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4711 sizeof(struct stack_frame)); 4712 # elif defined(__POWERPC__) 4713 struct iframe* iframe = ppc_get_user_iframe(); 4714 if (iframe == NULL) 4715 panic("iframe is NULL!"); 4716 4717 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4718 sizeof(struct stack_frame)); 4719 # else 4720 # warning "vm_page_fault() stack trace won't work" 4721 status = B_ERROR; 4722 # endif 4723 4724 dprintf("stack trace:\n"); 4725 int32 maxFrames = 50; 4726 while (status == B_OK && --maxFrames >= 0 4727 && frame.return_address != NULL) { 4728 dprintf(" %p", frame.return_address); 4729 area = vm_area_lookup(addressSpace, 4730 (addr_t)frame.return_address); 4731 if (area) { 4732 dprintf(" (%s + %#lx)", area->name, 4733 (addr_t)frame.return_address - area->base); 4734 } 4735 dprintf("\n"); 4736 4737 status = user_memcpy(&frame, frame.previous, 4738 sizeof(struct stack_frame)); 4739 } 4740 } 4741 # endif // 0 (stack trace) 4742 4743 rw_lock_read_unlock(&addressSpace->lock); 4744 #endif 4745 4746 // TODO: the fault_callback is a temporary solution for vm86 4747 if (thread->fault_callback == NULL 4748 || thread->fault_callback(address, faultAddress, isWrite)) { 4749 // If the thread has a signal handler for SIGSEGV, we simply 4750 // send it the signal. Otherwise we notify the user debugger 4751 // first. 4752 struct sigaction action; 4753 if (sigaction(SIGSEGV, NULL, &action) == 0 4754 && action.sa_handler != SIG_DFL 4755 && action.sa_handler != SIG_IGN) { 4756 send_signal(thread->id, SIGSEGV); 4757 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4758 SIGSEGV)) { 4759 send_signal(thread->id, SIGSEGV); 4760 } 4761 } 4762 } 4763 } 4764 4765 if (addressSpace != NULL) 4766 vm_put_address_space(addressSpace); 4767 4768 return B_HANDLED_INTERRUPT; 4769 } 4770 4771 4772 class VMCacheChainLocker { 4773 public: 4774 VMCacheChainLocker() 4775 : 4776 fTopCache(NULL), 4777 fBottomCache(NULL) 4778 { 4779 } 4780 4781 void SetTo(VMCache* topCache) 4782 { 4783 fTopCache = topCache; 4784 fBottomCache = topCache; 4785 } 4786 4787 VMCache* LockSourceCache() 4788 { 4789 if (fBottomCache == NULL || fBottomCache->source == NULL) 4790 return NULL; 4791 4792 fBottomCache = fBottomCache->source; 4793 fBottomCache->Lock(); 4794 fBottomCache->AcquireRefLocked(); 4795 4796 return fBottomCache; 4797 } 4798 4799 void Unlock() 4800 { 4801 if (fTopCache == NULL) 4802 return; 4803 4804 VMCache* cache = fTopCache; 4805 while (cache != NULL) { 4806 VMCache* nextCache = cache->source; 4807 cache->ReleaseRefAndUnlock(); 4808 4809 if (cache == fBottomCache) 4810 break; 4811 4812 cache = nextCache; 4813 } 4814 4815 fTopCache = NULL; 4816 fBottomCache = NULL; 4817 } 4818 4819 private: 4820 VMCache* fTopCache; 4821 VMCache* fBottomCache; 4822 }; 4823 4824 4825 struct PageFaultContext { 4826 AddressSpaceReadLocker addressSpaceLocker; 4827 VMCacheChainLocker cacheChainLocker; 4828 4829 vm_translation_map* map; 4830 vm_cache* topCache; 4831 off_t cacheOffset; 4832 bool isWrite; 4833 4834 // return values 4835 vm_page* page; 4836 bool restart; 4837 4838 4839 PageFaultContext(vm_address_space* addressSpace, bool isWrite) 4840 : 4841 addressSpaceLocker(addressSpace, true), 4842 map(&addressSpace->translation_map), 4843 isWrite(isWrite) 4844 { 4845 } 4846 4847 ~PageFaultContext() 4848 { 4849 UnlockAll(); 4850 } 4851 4852 void Prepare(VMCache* topCache, off_t cacheOffset) 4853 { 4854 this->topCache = topCache; 4855 this->cacheOffset = cacheOffset; 4856 page = NULL; 4857 restart = false; 4858 4859 cacheChainLocker.SetTo(topCache); 4860 } 4861 4862 void UnlockAll() 4863 { 4864 topCache = NULL; 4865 addressSpaceLocker.Unlock(); 4866 cacheChainLocker.Unlock(); 4867 } 4868 }; 4869 4870 4871 /*! Gets the page that should be mapped into the area. 4872 Returns an error code other than \c B_OK, if the page couldn't be found or 4873 paged in. The locking state of the address space and the caches is undefined 4874 in that case. 4875 Returns \c B_OK with \c context.restart set to \c true, if the functions 4876 had to unlock the address space and all caches and is supposed to be called 4877 again. 4878 Returns \c B_OK with \c context.restart set to \c false, if the page was 4879 found. It is returned in \c context.page. The address space will still be 4880 locked as well as all caches starting from the top cache to at least the 4881 cache the page lives in. 4882 */ 4883 static inline status_t 4884 fault_get_page(PageFaultContext& context) 4885 { 4886 vm_cache* cache = context.topCache; 4887 vm_cache* lastCache = NULL; 4888 vm_page* page = NULL; 4889 4890 while (cache != NULL) { 4891 // We already hold the lock of the cache at this point. 4892 4893 lastCache = cache; 4894 4895 for (;;) { 4896 page = cache->LookupPage(context.cacheOffset); 4897 if (page == NULL || page->state != PAGE_STATE_BUSY) { 4898 // Either there is no page or there is one and it is not busy. 4899 break; 4900 } 4901 4902 // page must be busy -- wait for it to become unbusy 4903 ConditionVariableEntry entry; 4904 entry.Add(page); 4905 context.UnlockAll(); 4906 entry.Wait(); 4907 4908 // restart the whole process 4909 context.restart = true; 4910 return B_OK; 4911 } 4912 4913 if (page != NULL) 4914 break; 4915 4916 // The current cache does not contain the page we're looking for. 4917 4918 // see if the backing store has it 4919 if (cache->HasPage(context.cacheOffset)) { 4920 // insert a fresh page and mark it busy -- we're going to read it in 4921 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4922 cache->InsertPage(page, context.cacheOffset); 4923 4924 ConditionVariable busyCondition; 4925 busyCondition.Publish(page, "page"); 4926 4927 // We need to unlock all caches and the address space while reading 4928 // the page in. Keep a reference to the cache around. 4929 cache->AcquireRefLocked(); 4930 context.UnlockAll(); 4931 4932 // read the page in 4933 iovec vec; 4934 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 4935 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 4936 4937 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4938 B_PHYSICAL_IO_REQUEST, &bytesRead); 4939 4940 cache->Lock(); 4941 4942 if (status < B_OK) { 4943 // on error remove and free the page 4944 dprintf("reading page from cache %p returned: %s!\n", 4945 cache, strerror(status)); 4946 4947 busyCondition.Unpublish(); 4948 cache->RemovePage(page); 4949 vm_page_set_state(page, PAGE_STATE_FREE); 4950 4951 cache->ReleaseRefAndUnlock(); 4952 return status; 4953 } 4954 4955 // mark the page unbusy again 4956 page->state = PAGE_STATE_ACTIVE; 4957 busyCondition.Unpublish(); 4958 4959 // Since we needed to unlock everything temporarily, the area 4960 // situation might have changed. So we need to restart the whole 4961 // process. 4962 cache->ReleaseRefAndUnlock(); 4963 context.restart = true; 4964 return B_OK; 4965 } 4966 4967 cache = context.cacheChainLocker.LockSourceCache(); 4968 } 4969 4970 if (page == NULL) { 4971 // There was no adequate page, determine the cache for a clean one. 4972 // Read-only pages come in the deepest cache, only the top most cache 4973 // may have direct write access. 4974 cache = context.isWrite ? context.topCache : lastCache; 4975 4976 // allocate a clean page 4977 page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 4978 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 4979 page->physical_page_number)); 4980 4981 // insert the new page into our cache 4982 cache->InsertPage(page, context.cacheOffset); 4983 4984 } else if (page->cache != context.topCache && context.isWrite) { 4985 // We have a page that has the data we want, but in the wrong cache 4986 // object so we need to copy it and stick it into the top cache. 4987 vm_page* sourcePage = page; 4988 4989 // TODO: If memory is low, it might be a good idea to steal the page 4990 // from our source cache -- if possible, that is. 4991 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4992 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4993 4994 // copy the page 4995 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4996 sourcePage->physical_page_number * B_PAGE_SIZE); 4997 4998 // insert the new page into our cache 4999 context.topCache->InsertPage(page, context.cacheOffset); 5000 } 5001 5002 context.page = page; 5003 return B_OK; 5004 } 5005 5006 5007 static status_t 5008 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress, 5009 bool isWrite, bool isUser) 5010 { 5011 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 5012 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 5013 5014 PageFaultContext context(addressSpace, isWrite); 5015 5016 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 5017 status_t status = B_OK; 5018 5019 atomic_add(&addressSpace->fault_count, 1); 5020 5021 // We may need up to 2 pages plus pages needed for mapping them -- reserving 5022 // the pages upfront makes sure we don't have any cache locked, so that the 5023 // page daemon/thief can do their job without problems. 5024 size_t reservePages = 2 + context.map->ops->map_max_pages_need(context.map, 5025 originalAddress, originalAddress); 5026 context.addressSpaceLocker.Unlock(); 5027 vm_page_reserve_pages(reservePages); 5028 5029 while (true) { 5030 context.addressSpaceLocker.Lock(); 5031 5032 // get the area the fault was in 5033 vm_area* area = vm_area_lookup(addressSpace, address); 5034 if (area == NULL) { 5035 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 5036 "space\n", originalAddress); 5037 TPF(PageFaultError(-1, 5038 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 5039 status = B_BAD_ADDRESS; 5040 break; 5041 } 5042 5043 // check permissions 5044 uint32 protection = get_area_page_protection(area, address); 5045 if (isUser && (protection & B_USER_PROTECTION) == 0) { 5046 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 5047 (void*)originalAddress); 5048 TPF(PageFaultError(area->id, 5049 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 5050 status = B_PERMISSION_DENIED; 5051 break; 5052 } 5053 if (isWrite && (protection 5054 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 5055 dprintf("write access attempted on write-protected area 0x%lx at" 5056 " %p\n", area->id, (void*)originalAddress); 5057 TPF(PageFaultError(area->id, 5058 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 5059 status = B_PERMISSION_DENIED; 5060 break; 5061 } else if (!isWrite && (protection 5062 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 5063 dprintf("read access attempted on read-protected area 0x%lx at" 5064 " %p\n", area->id, (void*)originalAddress); 5065 TPF(PageFaultError(area->id, 5066 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 5067 status = B_PERMISSION_DENIED; 5068 break; 5069 } 5070 5071 // We have the area, it was a valid access, so let's try to resolve the 5072 // page fault now. 5073 // At first, the top most cache from the area is investigated. 5074 5075 context.Prepare(vm_area_get_locked_cache(area), 5076 address - area->base + area->cache_offset); 5077 5078 // See if this cache has a fault handler -- this will do all the work 5079 // for us. 5080 { 5081 // Note, since the page fault is resolved with interrupts enabled, 5082 // the fault handler could be called more than once for the same 5083 // reason -- the store must take this into account. 5084 status = context.topCache->Fault(addressSpace, context.cacheOffset); 5085 if (status != B_BAD_HANDLER) 5086 break; 5087 } 5088 5089 // The top most cache has no fault handler, so let's see if the cache or 5090 // its sources already have the page we're searching for (we're going 5091 // from top to bottom). 5092 status = fault_get_page(context); 5093 if (status != B_OK) { 5094 TPF(PageFaultError(area->id, status)); 5095 break; 5096 } 5097 5098 if (context.restart) 5099 continue; 5100 5101 // All went fine, all there is left to do is to map the page into the 5102 // address space. 5103 TPF(PageFaultDone(area->id, context.topCache, context.page->cache, 5104 context.page)); 5105 5106 // If the page doesn't reside in the area's cache, we need to make sure 5107 // it's mapped in read-only, so that we cannot overwrite someone else's 5108 // data (copy-on-write) 5109 uint32 newProtection = protection; 5110 if (context.page->cache != context.topCache && !isWrite) 5111 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 5112 5113 bool unmapPage = false; 5114 bool mapPage = true; 5115 5116 // check whether there's already a page mapped at the address 5117 context.map->ops->lock(context.map); 5118 5119 addr_t physicalAddress; 5120 uint32 flags; 5121 vm_page* mappedPage; 5122 if (context.map->ops->query(context.map, address, &physicalAddress, 5123 &flags) == B_OK 5124 && (flags & PAGE_PRESENT) != 0 5125 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5126 != NULL) { 5127 // Yep there's already a page. If it's ours, we can simply adjust 5128 // its protection. Otherwise we have to unmap it. 5129 if (mappedPage == context.page) { 5130 context.map->ops->protect(context.map, address, 5131 address + (B_PAGE_SIZE - 1), newProtection); 5132 5133 mapPage = false; 5134 } else 5135 unmapPage = true; 5136 } 5137 5138 context.map->ops->unlock(context.map); 5139 5140 if (unmapPage) 5141 vm_unmap_page(area, address, true); 5142 5143 if (mapPage) 5144 vm_map_page(area, context.page, address, newProtection); 5145 5146 break; 5147 } 5148 5149 vm_page_unreserve_pages(reservePages); 5150 5151 return status; 5152 } 5153 5154 5155 /*! You must have the address space's sem held */ 5156 vm_area* 5157 vm_area_lookup(vm_address_space* addressSpace, addr_t address) 5158 { 5159 vm_area* area; 5160 5161 // check the areas list first 5162 area = addressSpace->area_hint; 5163 if (area != NULL 5164 && area->base <= address 5165 && area->base + (area->size - 1) >= address) 5166 goto found; 5167 5168 for (area = addressSpace->areas; area != NULL; 5169 area = area->address_space_next) { 5170 if (area->id == RESERVED_AREA_ID) 5171 continue; 5172 5173 if (area->base <= address && area->base + (area->size - 1) >= address) 5174 break; 5175 } 5176 5177 found: 5178 if (area) 5179 addressSpace->area_hint = area; 5180 5181 return area; 5182 } 5183 5184 5185 status_t 5186 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 5187 { 5188 return vm_kernel_address_space()->translation_map.ops->get_physical_page( 5189 paddr, _vaddr, _handle); 5190 } 5191 5192 status_t 5193 vm_put_physical_page(addr_t vaddr, void* handle) 5194 { 5195 return vm_kernel_address_space()->translation_map.ops->put_physical_page( 5196 vaddr, handle); 5197 } 5198 5199 5200 status_t 5201 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 5202 { 5203 return vm_kernel_address_space()->translation_map.ops 5204 ->get_physical_page_current_cpu(paddr, _vaddr, _handle); 5205 } 5206 5207 status_t 5208 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5209 { 5210 return vm_kernel_address_space()->translation_map.ops 5211 ->put_physical_page_current_cpu(vaddr, handle); 5212 } 5213 5214 5215 status_t 5216 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 5217 { 5218 return vm_kernel_address_space()->translation_map.ops 5219 ->get_physical_page_debug(paddr, _vaddr, _handle); 5220 } 5221 5222 status_t 5223 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5224 { 5225 return vm_kernel_address_space()->translation_map.ops 5226 ->put_physical_page_debug(vaddr, handle); 5227 } 5228 5229 5230 void 5231 vm_get_info(system_memory_info* info) 5232 { 5233 swap_get_info(info); 5234 5235 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 5236 info->page_faults = sPageFaults; 5237 5238 MutexLocker locker(sAvailableMemoryLock); 5239 info->free_memory = sAvailableMemory; 5240 info->needed_memory = sNeededMemory; 5241 } 5242 5243 5244 uint32 5245 vm_num_page_faults(void) 5246 { 5247 return sPageFaults; 5248 } 5249 5250 5251 off_t 5252 vm_available_memory(void) 5253 { 5254 MutexLocker locker(sAvailableMemoryLock); 5255 return sAvailableMemory; 5256 } 5257 5258 5259 off_t 5260 vm_available_not_needed_memory(void) 5261 { 5262 MutexLocker locker(sAvailableMemoryLock); 5263 return sAvailableMemory - sNeededMemory; 5264 } 5265 5266 5267 void 5268 vm_unreserve_memory(size_t amount) 5269 { 5270 mutex_lock(&sAvailableMemoryLock); 5271 5272 sAvailableMemory += amount; 5273 5274 mutex_unlock(&sAvailableMemoryLock); 5275 } 5276 5277 5278 status_t 5279 vm_try_reserve_memory(size_t amount, bigtime_t timeout) 5280 { 5281 MutexLocker locker(sAvailableMemoryLock); 5282 5283 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5284 5285 if (sAvailableMemory >= amount) { 5286 sAvailableMemory -= amount; 5287 return B_OK; 5288 } 5289 5290 if (timeout <= 0) 5291 return B_NO_MEMORY; 5292 5293 // turn timeout into an absolute timeout 5294 timeout += system_time(); 5295 5296 // loop until we've got the memory or the timeout occurs 5297 do { 5298 sNeededMemory += amount; 5299 5300 // call the low resource manager 5301 locker.Unlock(); 5302 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5303 B_ABSOLUTE_TIMEOUT, timeout); 5304 locker.Lock(); 5305 5306 sNeededMemory -= amount; 5307 5308 if (sAvailableMemory >= amount) { 5309 sAvailableMemory -= amount; 5310 return B_OK; 5311 } 5312 } while (timeout > system_time()); 5313 5314 return B_NO_MEMORY; 5315 } 5316 5317 5318 status_t 5319 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 5320 { 5321 AddressSpaceReadLocker locker; 5322 vm_area* area; 5323 status_t status = locker.SetFromArea(id, area); 5324 if (status != B_OK) 5325 return status; 5326 5327 return arch_vm_set_memory_type(area, physicalBase, type); 5328 } 5329 5330 5331 /*! This function enforces some protection properties: 5332 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 5333 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5334 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5335 and B_KERNEL_WRITE_AREA. 5336 */ 5337 static void 5338 fix_protection(uint32* protection) 5339 { 5340 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5341 if ((*protection & B_USER_PROTECTION) == 0 5342 || (*protection & B_WRITE_AREA) != 0) 5343 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5344 else 5345 *protection |= B_KERNEL_READ_AREA; 5346 } 5347 } 5348 5349 5350 static void 5351 fill_area_info(struct vm_area* area, area_info* info, size_t size) 5352 { 5353 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5354 info->area = area->id; 5355 info->address = (void*)area->base; 5356 info->size = area->size; 5357 info->protection = area->protection; 5358 info->lock = B_FULL_LOCK; 5359 info->team = area->address_space->id; 5360 info->copy_count = 0; 5361 info->in_count = 0; 5362 info->out_count = 0; 5363 // TODO: retrieve real values here! 5364 5365 vm_cache* cache = vm_area_get_locked_cache(area); 5366 5367 // Note, this is a simplification; the cache could be larger than this area 5368 info->ram_size = cache->page_count * B_PAGE_SIZE; 5369 5370 vm_area_put_locked_cache(cache); 5371 } 5372 5373 5374 /*! 5375 Tests whether or not the area that contains the specified address 5376 needs any kind of locking, and actually exists. 5377 Used by both lock_memory() and unlock_memory(). 5378 */ 5379 static status_t 5380 test_lock_memory(vm_address_space* addressSpace, addr_t address, 5381 bool& needsLocking) 5382 { 5383 rw_lock_read_lock(&addressSpace->lock); 5384 5385 vm_area* area = vm_area_lookup(addressSpace, address); 5386 if (area != NULL) { 5387 // This determines if we need to lock the memory at all 5388 needsLocking = area->cache_type != CACHE_TYPE_NULL 5389 && area->cache_type != CACHE_TYPE_DEVICE 5390 && area->wiring != B_FULL_LOCK 5391 && area->wiring != B_CONTIGUOUS; 5392 } 5393 5394 rw_lock_read_unlock(&addressSpace->lock); 5395 5396 if (area == NULL) 5397 return B_BAD_ADDRESS; 5398 5399 return B_OK; 5400 } 5401 5402 5403 static status_t 5404 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5405 { 5406 // is newSize a multiple of B_PAGE_SIZE? 5407 if (newSize & (B_PAGE_SIZE - 1)) 5408 return B_BAD_VALUE; 5409 5410 // lock all affected address spaces and the cache 5411 vm_area* area; 5412 vm_cache* cache; 5413 5414 MultiAddressSpaceLocker locker; 5415 status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area, 5416 &cache); 5417 if (status != B_OK) 5418 return status; 5419 AreaCacheLocker cacheLocker(cache); // already locked 5420 5421 // enforce restrictions 5422 if (!kernel) { 5423 if ((area->protection & B_KERNEL_AREA) != 0) 5424 return B_NOT_ALLOWED; 5425 // TODO: Enforce all restrictions (team, etc.)! 5426 } 5427 5428 size_t oldSize = area->size; 5429 if (newSize == oldSize) 5430 return B_OK; 5431 5432 // Resize all areas of this area's cache 5433 5434 if (cache->type != CACHE_TYPE_RAM) 5435 return B_NOT_ALLOWED; 5436 5437 if (oldSize < newSize) { 5438 // We need to check if all areas of this cache can be resized 5439 5440 for (vm_area* current = cache->areas; current != NULL; 5441 current = current->cache_next) { 5442 vm_area* next = current->address_space_next; 5443 if (next != NULL && next->base <= (current->base + newSize)) { 5444 // If the area was created inside a reserved area, it can 5445 // also be resized in that area 5446 // TODO: if there is free space after the reserved area, it could 5447 // be used as well... 5448 if (next->id == RESERVED_AREA_ID 5449 && next->cache_offset <= current->base 5450 && next->base - 1 + next->size 5451 >= current->base - 1 + newSize) 5452 continue; 5453 5454 return B_ERROR; 5455 } 5456 } 5457 } 5458 5459 // Okay, looks good so far, so let's do it 5460 5461 if (oldSize < newSize) { 5462 // Growing the cache can fail, so we do it first. 5463 status = cache->Resize(cache->virtual_base + newSize); 5464 if (status != B_OK) 5465 return status; 5466 } 5467 5468 for (vm_area* current = cache->areas; current != NULL; 5469 current = current->cache_next) { 5470 vm_area* next = current->address_space_next; 5471 if (next != NULL && next->base <= (current->base + newSize)) { 5472 if (next->id == RESERVED_AREA_ID 5473 && next->cache_offset <= current->base 5474 && next->base - 1 + next->size >= current->base - 1 + newSize) { 5475 // resize reserved area 5476 addr_t offset = current->base + newSize - next->base; 5477 if (next->size <= offset) { 5478 current->address_space_next = next->address_space_next; 5479 free(next); 5480 } else { 5481 next->size -= offset; 5482 next->base += offset; 5483 } 5484 } else { 5485 panic("resize situation for area %p has changed although we " 5486 "should have the address space lock", current); 5487 status = B_ERROR; 5488 break; 5489 } 5490 } 5491 5492 current->size = newSize; 5493 5494 // We also need to unmap all pages beyond the new size, if the area has 5495 // shrinked 5496 if (newSize < oldSize) { 5497 vm_unmap_pages(current, current->base + newSize, oldSize - newSize, 5498 false); 5499 } 5500 } 5501 5502 // shrinking the cache can't fail, so we do it now 5503 if (status == B_OK && newSize < oldSize) 5504 status = cache->Resize(cache->virtual_base + newSize); 5505 5506 if (status < B_OK) { 5507 // This shouldn't really be possible, but hey, who knows 5508 for (vm_area* current = cache->areas; current != NULL; 5509 current = current->cache_next) { 5510 current->size = oldSize; 5511 } 5512 5513 cache->Resize(cache->virtual_base + oldSize); 5514 } 5515 5516 // TODO: we must honour the lock restrictions of this area 5517 return status; 5518 } 5519 5520 5521 status_t 5522 vm_memset_physical(addr_t address, int value, size_t length) 5523 { 5524 return vm_kernel_address_space()->translation_map.ops->memset_physical( 5525 address, value, length); 5526 } 5527 5528 5529 status_t 5530 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 5531 { 5532 return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical( 5533 to, from, length, user); 5534 } 5535 5536 5537 status_t 5538 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 5539 { 5540 return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical( 5541 to, _from, length, user); 5542 } 5543 5544 5545 void 5546 vm_memcpy_physical_page(addr_t to, addr_t from) 5547 { 5548 return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page( 5549 to, from); 5550 } 5551 5552 5553 // #pragma mark - kernel public API 5554 5555 5556 status_t 5557 user_memcpy(void* to, const void* from, size_t size) 5558 { 5559 // don't allow address overflows 5560 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5561 return B_BAD_ADDRESS; 5562 5563 if (arch_cpu_user_memcpy(to, from, size, 5564 &thread_get_current_thread()->fault_handler) < B_OK) 5565 return B_BAD_ADDRESS; 5566 5567 return B_OK; 5568 } 5569 5570 5571 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5572 the string in \a to, NULL-terminating the result. 5573 5574 \param to Pointer to the destination C-string. 5575 \param from Pointer to the source C-string. 5576 \param size Size in bytes of the string buffer pointed to by \a to. 5577 5578 \return strlen(\a from). 5579 */ 5580 ssize_t 5581 user_strlcpy(char* to, const char* from, size_t size) 5582 { 5583 if (size == 0) 5584 return 0; 5585 if (from == NULL || to == NULL) 5586 return B_BAD_ADDRESS; 5587 5588 // limit size to avoid address overflows 5589 size_t maxSize = std::min(size, 5590 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5591 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5592 // the source address might still overflow. 5593 5594 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5595 &thread_get_current_thread()->fault_handler); 5596 5597 // If we hit the address overflow boundary, fail. 5598 if (result >= 0 && (size_t)result >= maxSize && maxSize < size) 5599 return B_BAD_ADDRESS; 5600 5601 return result; 5602 } 5603 5604 5605 status_t 5606 user_memset(void* s, char c, size_t count) 5607 { 5608 // don't allow address overflows 5609 if ((addr_t)s + count < (addr_t)s) 5610 return B_BAD_ADDRESS; 5611 5612 if (arch_cpu_user_memset(s, c, count, 5613 &thread_get_current_thread()->fault_handler) < B_OK) 5614 return B_BAD_ADDRESS; 5615 5616 return B_OK; 5617 } 5618 5619 5620 status_t 5621 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5622 { 5623 vm_address_space* addressSpace = NULL; 5624 struct vm_translation_map* map; 5625 addr_t unalignedBase = (addr_t)address; 5626 addr_t end = unalignedBase + numBytes; 5627 addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE); 5628 bool isUser = IS_USER_ADDRESS(address); 5629 bool needsLocking = true; 5630 5631 if (isUser) { 5632 if (team == B_CURRENT_TEAM) 5633 addressSpace = vm_get_current_user_address_space(); 5634 else 5635 addressSpace = vm_get_address_space(team); 5636 } else 5637 addressSpace = vm_get_kernel_address_space(); 5638 if (addressSpace == NULL) 5639 return B_ERROR; 5640 5641 // test if we're on an area that allows faults at all 5642 5643 map = &addressSpace->translation_map; 5644 5645 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5646 if (status < B_OK) 5647 goto out; 5648 if (!needsLocking) 5649 goto out; 5650 5651 for (; base < end; base += B_PAGE_SIZE) { 5652 addr_t physicalAddress; 5653 uint32 protection; 5654 status_t status; 5655 5656 map->ops->lock(map); 5657 status = map->ops->query(map, base, &physicalAddress, &protection); 5658 map->ops->unlock(map); 5659 5660 if (status < B_OK) 5661 goto out; 5662 5663 if ((protection & PAGE_PRESENT) != 0) { 5664 // if B_READ_DEVICE is set, the caller intents to write to the locked 5665 // memory, so if it hasn't been mapped writable, we'll try the soft 5666 // fault anyway 5667 if ((flags & B_READ_DEVICE) == 0 5668 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 5669 // update wiring 5670 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5671 if (page == NULL) 5672 panic("couldn't lookup physical page just allocated\n"); 5673 5674 increment_page_wired_count(page); 5675 continue; 5676 } 5677 } 5678 5679 status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0, 5680 isUser); 5681 if (status != B_OK) { 5682 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) " 5683 "failed: %s\n", (void*)unalignedBase, numBytes, flags, 5684 strerror(status)); 5685 goto out; 5686 } 5687 5688 // TODO: Here's a race condition. We should probably add a parameter 5689 // to vm_soft_fault() that would cause the page's wired count to be 5690 // incremented immediately. 5691 // TODO: After memory has been locked in an area, we need to prevent the 5692 // area from being deleted, resized, cut, etc. That could be done using 5693 // a "locked pages" count in vm_area, and maybe a condition variable, if 5694 // we want to allow waiting for the area to become eligible for these 5695 // operations again. 5696 5697 map->ops->lock(map); 5698 status = map->ops->query(map, base, &physicalAddress, &protection); 5699 map->ops->unlock(map); 5700 5701 if (status < B_OK) 5702 goto out; 5703 5704 // update wiring 5705 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5706 if (page == NULL) 5707 panic("couldn't lookup physical page"); 5708 5709 increment_page_wired_count(page); 5710 // TODO: needs to be atomic on all platforms! 5711 } 5712 5713 out: 5714 vm_put_address_space(addressSpace); 5715 return status; 5716 } 5717 5718 5719 status_t 5720 lock_memory(void* address, size_t numBytes, uint32 flags) 5721 { 5722 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5723 } 5724 5725 5726 status_t 5727 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5728 { 5729 vm_address_space* addressSpace = NULL; 5730 struct vm_translation_map* map; 5731 addr_t unalignedBase = (addr_t)address; 5732 addr_t end = unalignedBase + numBytes; 5733 addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE); 5734 bool needsLocking = true; 5735 5736 if (IS_USER_ADDRESS(address)) { 5737 if (team == B_CURRENT_TEAM) 5738 addressSpace = vm_get_current_user_address_space(); 5739 else 5740 addressSpace = vm_get_address_space(team); 5741 } else 5742 addressSpace = vm_get_kernel_address_space(); 5743 if (addressSpace == NULL) 5744 return B_ERROR; 5745 5746 map = &addressSpace->translation_map; 5747 5748 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5749 if (status < B_OK) 5750 goto out; 5751 if (!needsLocking) 5752 goto out; 5753 5754 for (; base < end; base += B_PAGE_SIZE) { 5755 map->ops->lock(map); 5756 5757 addr_t physicalAddress; 5758 uint32 protection; 5759 status = map->ops->query(map, base, &physicalAddress, 5760 &protection); 5761 5762 map->ops->unlock(map); 5763 5764 if (status < B_OK) 5765 goto out; 5766 if ((protection & PAGE_PRESENT) == 0) 5767 panic("calling unlock_memory() on unmapped memory!"); 5768 5769 // update wiring 5770 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5771 if (page == NULL) 5772 panic("couldn't lookup physical page"); 5773 5774 decrement_page_wired_count(page); 5775 } 5776 5777 out: 5778 vm_put_address_space(addressSpace); 5779 return status; 5780 } 5781 5782 5783 status_t 5784 unlock_memory(void* address, size_t numBytes, uint32 flags) 5785 { 5786 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5787 } 5788 5789 5790 /*! Similar to get_memory_map(), but also allows to specify the address space 5791 for the memory in question and has a saner semantics. 5792 Returns \c B_OK when the complete range could be translated or 5793 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5794 case the actual number of entries is written to \c *_numEntries. Any other 5795 error case indicates complete failure; \c *_numEntries will be set to \c 0 5796 in this case. 5797 */ 5798 status_t 5799 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5800 physical_entry* table, uint32* _numEntries) 5801 { 5802 uint32 numEntries = *_numEntries; 5803 *_numEntries = 0; 5804 5805 vm_address_space* addressSpace; 5806 addr_t virtualAddress = (addr_t)address; 5807 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5808 addr_t physicalAddress; 5809 status_t status = B_OK; 5810 int32 index = -1; 5811 addr_t offset = 0; 5812 bool interrupts = are_interrupts_enabled(); 5813 5814 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5815 address, numBytes, numEntries)); 5816 5817 if (numEntries == 0 || numBytes == 0) 5818 return B_BAD_VALUE; 5819 5820 // in which address space is the address to be found? 5821 if (IS_USER_ADDRESS(virtualAddress)) { 5822 if (team == B_CURRENT_TEAM) 5823 addressSpace = vm_get_current_user_address_space(); 5824 else 5825 addressSpace = vm_get_address_space(team); 5826 } else 5827 addressSpace = vm_get_kernel_address_space(); 5828 5829 if (addressSpace == NULL) 5830 return B_ERROR; 5831 5832 vm_translation_map* map = &addressSpace->translation_map; 5833 5834 if (interrupts) 5835 map->ops->lock(map); 5836 5837 while (offset < numBytes) { 5838 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5839 uint32 flags; 5840 5841 if (interrupts) { 5842 status = map->ops->query(map, (addr_t)address + offset, 5843 &physicalAddress, &flags); 5844 } else { 5845 status = map->ops->query_interrupt(map, (addr_t)address + offset, 5846 &physicalAddress, &flags); 5847 } 5848 if (status < B_OK) 5849 break; 5850 if ((flags & PAGE_PRESENT) == 0) { 5851 panic("get_memory_map() called on unmapped memory!"); 5852 return B_BAD_ADDRESS; 5853 } 5854 5855 if (index < 0 && pageOffset > 0) { 5856 physicalAddress += pageOffset; 5857 if (bytes > B_PAGE_SIZE - pageOffset) 5858 bytes = B_PAGE_SIZE - pageOffset; 5859 } 5860 5861 // need to switch to the next physical_entry? 5862 if (index < 0 || (addr_t)table[index].address 5863 != physicalAddress - table[index].size) { 5864 if ((uint32)++index + 1 > numEntries) { 5865 // table to small 5866 status = B_BUFFER_OVERFLOW; 5867 break; 5868 } 5869 table[index].address = (void*)physicalAddress; 5870 table[index].size = bytes; 5871 } else { 5872 // page does fit in current entry 5873 table[index].size += bytes; 5874 } 5875 5876 offset += bytes; 5877 } 5878 5879 if (interrupts) 5880 map->ops->unlock(map); 5881 5882 if (status != B_OK) 5883 return status; 5884 5885 if ((uint32)index + 1 > numEntries) { 5886 *_numEntries = index; 5887 return B_BUFFER_OVERFLOW; 5888 } 5889 5890 *_numEntries = index + 1; 5891 return B_OK; 5892 } 5893 5894 5895 /*! According to the BeBook, this function should always succeed. 5896 This is no longer the case. 5897 */ 5898 long 5899 get_memory_map(const void* address, ulong numBytes, physical_entry* table, 5900 long numEntries) 5901 { 5902 uint32 entriesRead = numEntries; 5903 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5904 table, &entriesRead); 5905 if (error != B_OK) 5906 return error; 5907 5908 // close the entry list 5909 5910 // if it's only one entry, we will silently accept the missing ending 5911 if (numEntries == 1) 5912 return B_OK; 5913 5914 if (entriesRead + 1 > (uint32)numEntries) 5915 return B_BUFFER_OVERFLOW; 5916 5917 table[entriesRead].address = NULL; 5918 table[entriesRead].size = 0; 5919 5920 return B_OK; 5921 } 5922 5923 5924 area_id 5925 area_for(void* address) 5926 { 5927 return vm_area_for((addr_t)address, true); 5928 } 5929 5930 5931 area_id 5932 find_area(const char* name) 5933 { 5934 rw_lock_read_lock(&sAreaHashLock); 5935 struct hash_iterator iterator; 5936 hash_open(sAreaHash, &iterator); 5937 5938 vm_area* area; 5939 area_id id = B_NAME_NOT_FOUND; 5940 while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) { 5941 if (area->id == RESERVED_AREA_ID) 5942 continue; 5943 5944 if (!strcmp(area->name, name)) { 5945 id = area->id; 5946 break; 5947 } 5948 } 5949 5950 hash_close(sAreaHash, &iterator, false); 5951 rw_lock_read_unlock(&sAreaHashLock); 5952 5953 return id; 5954 } 5955 5956 5957 status_t 5958 _get_area_info(area_id id, area_info* info, size_t size) 5959 { 5960 if (size != sizeof(area_info) || info == NULL) 5961 return B_BAD_VALUE; 5962 5963 AddressSpaceReadLocker locker; 5964 vm_area* area; 5965 status_t status = locker.SetFromArea(id, area); 5966 if (status != B_OK) 5967 return status; 5968 5969 fill_area_info(area, info, size); 5970 return B_OK; 5971 } 5972 5973 5974 status_t 5975 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 5976 { 5977 addr_t nextBase = *(addr_t*)cookie; 5978 5979 // we're already through the list 5980 if (nextBase == (addr_t)-1) 5981 return B_ENTRY_NOT_FOUND; 5982 5983 if (team == B_CURRENT_TEAM) 5984 team = team_get_current_team_id(); 5985 5986 AddressSpaceReadLocker locker(team); 5987 if (!locker.IsLocked()) 5988 return B_BAD_TEAM_ID; 5989 5990 vm_area* area; 5991 for (area = locker.AddressSpace()->areas; area != NULL; 5992 area = area->address_space_next) { 5993 if (area->id == RESERVED_AREA_ID) 5994 continue; 5995 5996 if (area->base > nextBase) 5997 break; 5998 } 5999 6000 if (area == NULL) { 6001 nextBase = (addr_t)-1; 6002 return B_ENTRY_NOT_FOUND; 6003 } 6004 6005 fill_area_info(area, info, size); 6006 *cookie = (int32)(area->base); 6007 6008 return B_OK; 6009 } 6010 6011 6012 status_t 6013 set_area_protection(area_id area, uint32 newProtection) 6014 { 6015 fix_protection(&newProtection); 6016 6017 return vm_set_area_protection(vm_kernel_address_space_id(), area, 6018 newProtection, true); 6019 } 6020 6021 6022 status_t 6023 resize_area(area_id areaID, size_t newSize) 6024 { 6025 return vm_resize_area(areaID, newSize, true); 6026 } 6027 6028 6029 /*! Transfers the specified area to a new team. The caller must be the owner 6030 of the area (not yet enforced but probably should be). 6031 */ 6032 area_id 6033 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6034 bool kernel) 6035 { 6036 area_info info; 6037 status_t status = get_area_info(id, &info); 6038 if (status != B_OK) 6039 return status; 6040 6041 area_id clonedArea = vm_clone_area(target, info.name, _address, 6042 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6043 if (clonedArea < 0) 6044 return clonedArea; 6045 6046 status = vm_delete_area(info.team, id, kernel); 6047 if (status != B_OK) { 6048 vm_delete_area(target, clonedArea, kernel); 6049 return status; 6050 } 6051 6052 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6053 6054 return clonedArea; 6055 } 6056 6057 6058 area_id 6059 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes, 6060 uint32 addressSpec, uint32 protection, void** _virtualAddress) 6061 { 6062 if (!arch_vm_supports_protection(protection)) 6063 return B_NOT_SUPPORTED; 6064 6065 fix_protection(&protection); 6066 6067 return vm_map_physical_memory(vm_kernel_address_space_id(), name, 6068 _virtualAddress, addressSpec, numBytes, protection, 6069 (addr_t)physicalAddress); 6070 } 6071 6072 6073 area_id 6074 clone_area(const char* name, void** _address, uint32 addressSpec, 6075 uint32 protection, area_id source) 6076 { 6077 if ((protection & B_KERNEL_PROTECTION) == 0) 6078 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6079 6080 return vm_clone_area(vm_kernel_address_space_id(), name, _address, 6081 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6082 } 6083 6084 6085 area_id 6086 create_area_etc(team_id team, const char* name, void** address, 6087 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 6088 addr_t physicalAddress, uint32 flags) 6089 { 6090 fix_protection(&protection); 6091 6092 return vm_create_anonymous_area(team, (char*)name, address, addressSpec, 6093 size, lock, protection, physicalAddress, flags, true); 6094 } 6095 6096 6097 area_id 6098 create_area(const char* name, void** _address, uint32 addressSpec, size_t size, 6099 uint32 lock, uint32 protection) 6100 { 6101 fix_protection(&protection); 6102 6103 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name, 6104 _address, addressSpec, size, lock, protection, 0, 0, true); 6105 } 6106 6107 6108 status_t 6109 delete_area(area_id area) 6110 { 6111 return vm_delete_area(vm_kernel_address_space_id(), area, true); 6112 } 6113 6114 6115 // #pragma mark - Userland syscalls 6116 6117 6118 status_t 6119 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6120 addr_t size) 6121 { 6122 // filter out some unavailable values (for userland) 6123 switch (addressSpec) { 6124 case B_ANY_KERNEL_ADDRESS: 6125 case B_ANY_KERNEL_BLOCK_ADDRESS: 6126 return B_BAD_VALUE; 6127 } 6128 6129 addr_t address; 6130 6131 if (!IS_USER_ADDRESS(userAddress) 6132 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6133 return B_BAD_ADDRESS; 6134 6135 status_t status = vm_reserve_address_range( 6136 vm_current_user_address_space_id(), (void**)&address, addressSpec, size, 6137 RESERVED_AVOID_BASE); 6138 if (status != B_OK) 6139 return status; 6140 6141 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6142 vm_unreserve_address_range(vm_current_user_address_space_id(), 6143 (void*)address, size); 6144 return B_BAD_ADDRESS; 6145 } 6146 6147 return B_OK; 6148 } 6149 6150 6151 status_t 6152 _user_unreserve_address_range(addr_t address, addr_t size) 6153 { 6154 return vm_unreserve_address_range(vm_current_user_address_space_id(), 6155 (void*)address, size); 6156 } 6157 6158 6159 area_id 6160 _user_area_for(void* address) 6161 { 6162 return vm_area_for((addr_t)address, false); 6163 } 6164 6165 6166 area_id 6167 _user_find_area(const char* userName) 6168 { 6169 char name[B_OS_NAME_LENGTH]; 6170 6171 if (!IS_USER_ADDRESS(userName) 6172 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6173 return B_BAD_ADDRESS; 6174 6175 return find_area(name); 6176 } 6177 6178 6179 status_t 6180 _user_get_area_info(area_id area, area_info* userInfo) 6181 { 6182 if (!IS_USER_ADDRESS(userInfo)) 6183 return B_BAD_ADDRESS; 6184 6185 area_info info; 6186 status_t status = get_area_info(area, &info); 6187 if (status < B_OK) 6188 return status; 6189 6190 // TODO: do we want to prevent userland from seeing kernel protections? 6191 //info.protection &= B_USER_PROTECTION; 6192 6193 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6194 return B_BAD_ADDRESS; 6195 6196 return status; 6197 } 6198 6199 6200 status_t 6201 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 6202 { 6203 int32 cookie; 6204 6205 if (!IS_USER_ADDRESS(userCookie) 6206 || !IS_USER_ADDRESS(userInfo) 6207 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 6208 return B_BAD_ADDRESS; 6209 6210 area_info info; 6211 status_t status = _get_next_area_info(team, &cookie, &info, 6212 sizeof(area_info)); 6213 if (status != B_OK) 6214 return status; 6215 6216 //info.protection &= B_USER_PROTECTION; 6217 6218 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 6219 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6220 return B_BAD_ADDRESS; 6221 6222 return status; 6223 } 6224 6225 6226 status_t 6227 _user_set_area_protection(area_id area, uint32 newProtection) 6228 { 6229 if ((newProtection & ~B_USER_PROTECTION) != 0) 6230 return B_BAD_VALUE; 6231 6232 fix_protection(&newProtection); 6233 6234 return vm_set_area_protection(vm_current_user_address_space_id(), area, 6235 newProtection, false); 6236 } 6237 6238 6239 status_t 6240 _user_resize_area(area_id area, size_t newSize) 6241 { 6242 // TODO: Since we restrict deleting of areas to those owned by the team, 6243 // we should also do that for resizing (check other functions, too). 6244 return vm_resize_area(area, newSize, false); 6245 } 6246 6247 6248 area_id 6249 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6250 team_id target) 6251 { 6252 // filter out some unavailable values (for userland) 6253 switch (addressSpec) { 6254 case B_ANY_KERNEL_ADDRESS: 6255 case B_ANY_KERNEL_BLOCK_ADDRESS: 6256 return B_BAD_VALUE; 6257 } 6258 6259 void* address; 6260 if (!IS_USER_ADDRESS(userAddress) 6261 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6262 return B_BAD_ADDRESS; 6263 6264 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6265 if (newArea < B_OK) 6266 return newArea; 6267 6268 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6269 return B_BAD_ADDRESS; 6270 6271 return newArea; 6272 } 6273 6274 6275 area_id 6276 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6277 uint32 protection, area_id sourceArea) 6278 { 6279 char name[B_OS_NAME_LENGTH]; 6280 void* address; 6281 6282 // filter out some unavailable values (for userland) 6283 switch (addressSpec) { 6284 case B_ANY_KERNEL_ADDRESS: 6285 case B_ANY_KERNEL_BLOCK_ADDRESS: 6286 return B_BAD_VALUE; 6287 } 6288 if ((protection & ~B_USER_PROTECTION) != 0) 6289 return B_BAD_VALUE; 6290 6291 if (!IS_USER_ADDRESS(userName) 6292 || !IS_USER_ADDRESS(userAddress) 6293 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6294 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6295 return B_BAD_ADDRESS; 6296 6297 fix_protection(&protection); 6298 6299 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, 6300 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6301 false); 6302 if (clonedArea < B_OK) 6303 return clonedArea; 6304 6305 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6306 delete_area(clonedArea); 6307 return B_BAD_ADDRESS; 6308 } 6309 6310 return clonedArea; 6311 } 6312 6313 6314 area_id 6315 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6316 size_t size, uint32 lock, uint32 protection) 6317 { 6318 char name[B_OS_NAME_LENGTH]; 6319 void* address; 6320 6321 // filter out some unavailable values (for userland) 6322 switch (addressSpec) { 6323 case B_ANY_KERNEL_ADDRESS: 6324 case B_ANY_KERNEL_BLOCK_ADDRESS: 6325 return B_BAD_VALUE; 6326 } 6327 if ((protection & ~B_USER_PROTECTION) != 0) 6328 return B_BAD_VALUE; 6329 6330 if (!IS_USER_ADDRESS(userName) 6331 || !IS_USER_ADDRESS(userAddress) 6332 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6333 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6334 return B_BAD_ADDRESS; 6335 6336 if (addressSpec == B_EXACT_ADDRESS 6337 && IS_KERNEL_ADDRESS(address)) 6338 return B_BAD_VALUE; 6339 6340 fix_protection(&protection); 6341 6342 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 6343 (char*)name, &address, addressSpec, size, lock, protection, 0, 0, 6344 false); 6345 6346 if (area >= B_OK 6347 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6348 delete_area(area); 6349 return B_BAD_ADDRESS; 6350 } 6351 6352 return area; 6353 } 6354 6355 6356 status_t 6357 _user_delete_area(area_id area) 6358 { 6359 // Unlike the BeOS implementation, you can now only delete areas 6360 // that you have created yourself from userland. 6361 // The documentation to delete_area() explicitly states that this 6362 // will be restricted in the future, and so it will. 6363 return vm_delete_area(vm_current_user_address_space_id(), area, false); 6364 } 6365 6366 6367 // TODO: create a BeOS style call for this! 6368 6369 area_id 6370 _user_map_file(const char* userName, void** userAddress, int addressSpec, 6371 size_t size, int protection, int mapping, bool unmapAddressRange, int fd, 6372 off_t offset) 6373 { 6374 char name[B_OS_NAME_LENGTH]; 6375 void* address; 6376 area_id area; 6377 6378 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6379 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6380 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6381 return B_BAD_ADDRESS; 6382 6383 if (addressSpec == B_EXACT_ADDRESS) { 6384 if ((addr_t)address + size < (addr_t)address) 6385 return B_BAD_VALUE; 6386 if (!IS_USER_ADDRESS(address) 6387 || !IS_USER_ADDRESS((addr_t)address + size)) { 6388 return B_BAD_ADDRESS; 6389 } 6390 } 6391 6392 // userland created areas can always be accessed by the kernel 6393 protection |= B_KERNEL_READ_AREA 6394 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6395 6396 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 6397 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6398 false); 6399 if (area < B_OK) 6400 return area; 6401 6402 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6403 return B_BAD_ADDRESS; 6404 6405 return area; 6406 } 6407 6408 6409 status_t 6410 _user_unmap_memory(void* _address, size_t size) 6411 { 6412 addr_t address = (addr_t)_address; 6413 6414 // check params 6415 if (size == 0 || (addr_t)address + size < (addr_t)address) 6416 return B_BAD_VALUE; 6417 6418 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6419 return B_BAD_ADDRESS; 6420 6421 // write lock the address space 6422 AddressSpaceWriteLocker locker; 6423 status_t status = locker.SetTo(team_get_current_team_id()); 6424 if (status != B_OK) 6425 return status; 6426 6427 // unmap 6428 return unmap_address_range(locker.AddressSpace(), address, size, false); 6429 } 6430 6431 6432 status_t 6433 _user_set_memory_protection(void* _address, size_t size, int protection) 6434 { 6435 // check address range 6436 addr_t address = (addr_t)_address; 6437 size = PAGE_ALIGN(size); 6438 6439 if ((address % B_PAGE_SIZE) != 0) 6440 return B_BAD_VALUE; 6441 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6442 || !IS_USER_ADDRESS((addr_t)address + size)) { 6443 // weird error code required by POSIX 6444 return ENOMEM; 6445 } 6446 6447 // extend and check protection 6448 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 6449 uint32 actualProtection = protection | B_KERNEL_READ_AREA 6450 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6451 6452 if (!arch_vm_supports_protection(actualProtection)) 6453 return B_NOT_SUPPORTED; 6454 6455 // We need to write lock the address space, since we're going to play with 6456 // the areas. 6457 AddressSpaceWriteLocker locker; 6458 status_t status = locker.SetTo(team_get_current_team_id()); 6459 if (status != B_OK) 6460 return status; 6461 6462 // First round: Check whether the whole range is covered by areas and we are 6463 // allowed to modify them. 6464 addr_t currentAddress = address; 6465 size_t sizeLeft = size; 6466 while (sizeLeft > 0) { 6467 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6468 if (area == NULL) 6469 return B_NO_MEMORY; 6470 6471 if ((area->protection & B_KERNEL_AREA) != 0) 6472 return B_NOT_ALLOWED; 6473 6474 // TODO: For (shared) mapped files we should check whether the new 6475 // protections are compatible with the file permissions. We don't have 6476 // a way to do that yet, though. 6477 6478 addr_t offset = currentAddress - area->base; 6479 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6480 6481 currentAddress += rangeSize; 6482 sizeLeft -= rangeSize; 6483 } 6484 6485 // Second round: If the protections differ from that of the area, create a 6486 // page protection array and re-map mapped pages. 6487 vm_translation_map* map = &locker.AddressSpace()->translation_map; 6488 currentAddress = address; 6489 sizeLeft = size; 6490 while (sizeLeft > 0) { 6491 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6492 if (area == NULL) 6493 return B_NO_MEMORY; 6494 6495 addr_t offset = currentAddress - area->base; 6496 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6497 6498 currentAddress += rangeSize; 6499 sizeLeft -= rangeSize; 6500 6501 if (area->page_protections == NULL) { 6502 if (area->protection == actualProtection) 6503 continue; 6504 6505 // In the page protections we store only the three user protections, 6506 // so we use 4 bits per page. 6507 uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2; 6508 area->page_protections = (uint8*)malloc(bytes); 6509 if (area->page_protections == NULL) 6510 return B_NO_MEMORY; 6511 6512 // init the page protections for all pages to that of the area 6513 uint32 areaProtection = area->protection 6514 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 6515 memset(area->page_protections, 6516 areaProtection | (areaProtection << 4), bytes); 6517 } 6518 6519 for (addr_t pageAddress = area->base + offset; 6520 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6521 map->ops->lock(map); 6522 6523 set_area_page_protection(area, pageAddress, protection); 6524 6525 addr_t physicalAddress; 6526 uint32 flags; 6527 6528 status_t error = map->ops->query(map, pageAddress, &physicalAddress, 6529 &flags); 6530 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6531 map->ops->unlock(map); 6532 continue; 6533 } 6534 6535 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6536 if (page == NULL) { 6537 panic("area %p looking up page failed for pa 0x%lx\n", area, 6538 physicalAddress); 6539 map->ops->unlock(map); 6540 return B_ERROR;; 6541 } 6542 6543 // If the page is not in the topmost cache and write access is 6544 // requested, we have to unmap it. Otherwise we can re-map it with 6545 // the new protection. 6546 bool unmapPage = page->cache != area->cache 6547 && (protection & B_WRITE_AREA) != 0; 6548 6549 if (!unmapPage) { 6550 map->ops->unmap(map, pageAddress, 6551 pageAddress + B_PAGE_SIZE - 1); 6552 map->ops->map(map, pageAddress, physicalAddress, 6553 actualProtection); 6554 } 6555 6556 map->ops->unlock(map); 6557 6558 if (unmapPage) 6559 vm_unmap_page(area, pageAddress, true); 6560 } 6561 } 6562 6563 return B_OK; 6564 } 6565 6566 6567 status_t 6568 _user_sync_memory(void* _address, size_t size, int flags) 6569 { 6570 addr_t address = (addr_t)_address; 6571 size = PAGE_ALIGN(size); 6572 6573 // check params 6574 if ((address % B_PAGE_SIZE) != 0) 6575 return B_BAD_VALUE; 6576 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6577 || !IS_USER_ADDRESS((addr_t)address + size)) { 6578 // weird error code required by POSIX 6579 return ENOMEM; 6580 } 6581 6582 bool writeSync = (flags & MS_SYNC) != 0; 6583 bool writeAsync = (flags & MS_ASYNC) != 0; 6584 if (writeSync && writeAsync) 6585 return B_BAD_VALUE; 6586 6587 if (size == 0 || (!writeSync && !writeAsync)) 6588 return B_OK; 6589 6590 // iterate through the range and sync all concerned areas 6591 while (size > 0) { 6592 // read lock the address space 6593 AddressSpaceReadLocker locker; 6594 status_t error = locker.SetTo(team_get_current_team_id()); 6595 if (error != B_OK) 6596 return error; 6597 6598 // get the first area 6599 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 6600 if (area == NULL) 6601 return B_NO_MEMORY; 6602 6603 uint32 offset = address - area->base; 6604 size_t rangeSize = min_c(area->size - offset, size); 6605 offset += area->cache_offset; 6606 6607 // lock the cache 6608 AreaCacheLocker cacheLocker(area); 6609 if (!cacheLocker) 6610 return B_BAD_VALUE; 6611 vm_cache* cache = area->cache; 6612 6613 locker.Unlock(); 6614 6615 uint32 firstPage = offset >> PAGE_SHIFT; 6616 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6617 6618 // write the pages 6619 if (cache->type == CACHE_TYPE_VNODE) { 6620 if (writeSync) { 6621 // synchronous 6622 error = vm_page_write_modified_page_range(cache, firstPage, 6623 endPage); 6624 if (error != B_OK) 6625 return error; 6626 } else { 6627 // asynchronous 6628 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6629 // TODO: This is probably not quite what is supposed to happen. 6630 // Especially when a lot has to be written, it might take ages 6631 // until it really hits the disk. 6632 } 6633 } 6634 6635 address += rangeSize; 6636 size -= rangeSize; 6637 } 6638 6639 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6640 // synchronize multiple mappings of the same file. In our VM they never get 6641 // out of sync, though, so we don't have to do anything. 6642 6643 return B_OK; 6644 } 6645 6646 6647 status_t 6648 _user_memory_advice(void* address, size_t size, int advice) 6649 { 6650 // TODO: Implement! 6651 return B_OK; 6652 } 6653