1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleter.h> 25 26 #include <arch/cpu.h> 27 #include <arch/vm.h> 28 #include <boot/elf.h> 29 #include <boot/stage2.h> 30 #include <condition_variable.h> 31 #include <console.h> 32 #include <debug.h> 33 #include <file_cache.h> 34 #include <fs/fd.h> 35 #include <heap.h> 36 #include <kernel.h> 37 #include <int.h> 38 #include <lock.h> 39 #include <low_resource_manager.h> 40 #include <smp.h> 41 #include <system_info.h> 42 #include <thread.h> 43 #include <team.h> 44 #include <tracing.h> 45 #include <util/AutoLock.h> 46 #include <util/khash.h> 47 #include <vm_address_space.h> 48 #include <vm_cache.h> 49 #include <vm_page.h> 50 #include <vm_priv.h> 51 52 #include "VMAnonymousCache.h" 53 #include "IORequest.h" 54 55 56 //#define TRACE_VM 57 //#define TRACE_FAULTS 58 #ifdef TRACE_VM 59 # define TRACE(x) dprintf x 60 #else 61 # define TRACE(x) ; 62 #endif 63 #ifdef TRACE_FAULTS 64 # define FTRACE(x) dprintf x 65 #else 66 # define FTRACE(x) ; 67 #endif 68 69 70 class AddressSpaceReadLocker { 71 public: 72 AddressSpaceReadLocker(team_id team); 73 AddressSpaceReadLocker(vm_address_space* space, bool getNewReference); 74 AddressSpaceReadLocker(); 75 ~AddressSpaceReadLocker(); 76 77 status_t SetTo(team_id team); 78 void SetTo(vm_address_space* space, bool getNewReference); 79 status_t SetFromArea(area_id areaID, vm_area*& area); 80 81 bool IsLocked() const { return fLocked; } 82 bool Lock(); 83 void Unlock(); 84 85 void Unset(); 86 87 vm_address_space* AddressSpace() { return fSpace; } 88 89 private: 90 vm_address_space* fSpace; 91 bool fLocked; 92 }; 93 94 class AddressSpaceWriteLocker { 95 public: 96 AddressSpaceWriteLocker(team_id team); 97 AddressSpaceWriteLocker(); 98 ~AddressSpaceWriteLocker(); 99 100 status_t SetTo(team_id team); 101 status_t SetFromArea(area_id areaID, vm_area*& area); 102 status_t SetFromArea(team_id team, area_id areaID, bool allowKernel, 103 vm_area*& area); 104 status_t SetFromArea(team_id team, area_id areaID, vm_area*& area); 105 106 bool IsLocked() const { return fLocked; } 107 void Unlock(); 108 109 void DegradeToReadLock(); 110 void Unset(); 111 112 vm_address_space* AddressSpace() { return fSpace; } 113 114 private: 115 vm_address_space* fSpace; 116 bool fLocked; 117 bool fDegraded; 118 }; 119 120 class MultiAddressSpaceLocker { 121 public: 122 MultiAddressSpaceLocker(); 123 ~MultiAddressSpaceLocker(); 124 125 inline status_t AddTeam(team_id team, bool writeLock, 126 vm_address_space** _space = NULL); 127 inline status_t AddArea(area_id area, bool writeLock, 128 vm_address_space** _space = NULL); 129 130 status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne, 131 bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL); 132 133 status_t Lock(); 134 void Unlock(); 135 bool IsLocked() const { return fLocked; } 136 137 void Unset(); 138 139 private: 140 struct lock_item { 141 vm_address_space* space; 142 bool write_lock; 143 }; 144 145 bool _ResizeIfNeeded(); 146 int32 _IndexOfAddressSpace(vm_address_space* space) const; 147 status_t _AddAddressSpace(vm_address_space* space, bool writeLock, 148 vm_address_space** _space); 149 150 static int _CompareItems(const void* _a, const void* _b); 151 152 lock_item* fItems; 153 int32 fCapacity; 154 int32 fCount; 155 bool fLocked; 156 }; 157 158 159 class AreaCacheLocking { 160 public: 161 inline bool Lock(vm_cache* lockable) 162 { 163 return false; 164 } 165 166 inline void Unlock(vm_cache* lockable) 167 { 168 vm_area_put_locked_cache(lockable); 169 } 170 }; 171 172 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> { 173 public: 174 inline AreaCacheLocker(vm_cache* cache = NULL) 175 : AutoLocker<vm_cache, AreaCacheLocking>(cache, true) 176 { 177 } 178 179 inline AreaCacheLocker(vm_area* area) 180 : AutoLocker<vm_cache, AreaCacheLocking>() 181 { 182 SetTo(area); 183 } 184 185 inline void SetTo(vm_area* area) 186 { 187 return AutoLocker<vm_cache, AreaCacheLocking>::SetTo( 188 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 189 } 190 }; 191 192 193 #define AREA_HASH_TABLE_SIZE 1024 194 static area_id sNextAreaID = 1; 195 static hash_table* sAreaHash; 196 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash"); 197 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings"); 198 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache"); 199 200 static off_t sAvailableMemory; 201 static off_t sNeededMemory; 202 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 203 static uint32 sPageFaults; 204 205 #if DEBUG_CACHE_LIST 206 207 struct cache_info { 208 vm_cache* cache; 209 addr_t page_count; 210 addr_t committed; 211 }; 212 213 static const int kCacheInfoTableCount = 100 * 1024; 214 static cache_info* sCacheInfoTable; 215 216 #endif // DEBUG_CACHE_LIST 217 218 219 // function declarations 220 static void delete_area(vm_address_space* addressSpace, vm_area* area); 221 static vm_address_space* get_address_space_by_area_id(area_id id); 222 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address, 223 bool isWrite, bool isUser); 224 static status_t map_backing_store(vm_address_space* addressSpace, 225 vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size, 226 uint32 addressSpec, int wiring, int protection, int mapping, 227 vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel); 228 229 230 static size_t sKernelAddressSpaceLeft = KERNEL_SIZE; 231 232 233 // #pragma mark - 234 235 236 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team) 237 : 238 fSpace(NULL), 239 fLocked(false) 240 { 241 SetTo(team); 242 } 243 244 245 /*! Takes over the reference of the address space, if \a getNewReference is 246 \c false. 247 */ 248 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space, 249 bool getNewReference) 250 : 251 fSpace(NULL), 252 fLocked(false) 253 { 254 SetTo(space, getNewReference); 255 } 256 257 258 AddressSpaceReadLocker::AddressSpaceReadLocker() 259 : 260 fSpace(NULL), 261 fLocked(false) 262 { 263 } 264 265 266 AddressSpaceReadLocker::~AddressSpaceReadLocker() 267 { 268 Unset(); 269 } 270 271 272 void 273 AddressSpaceReadLocker::Unset() 274 { 275 Unlock(); 276 if (fSpace != NULL) 277 vm_put_address_space(fSpace); 278 } 279 280 281 status_t 282 AddressSpaceReadLocker::SetTo(team_id team) 283 { 284 fSpace = vm_get_address_space(team); 285 if (fSpace == NULL) 286 return B_BAD_TEAM_ID; 287 288 rw_lock_read_lock(&fSpace->lock); 289 fLocked = true; 290 return B_OK; 291 } 292 293 294 /*! Takes over the reference of the address space, if \a getNewReference is 295 \c false. 296 */ 297 void 298 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference) 299 { 300 fSpace = space; 301 302 if (getNewReference) 303 atomic_add(&fSpace->ref_count, 1); 304 305 rw_lock_read_lock(&fSpace->lock); 306 fLocked = true; 307 } 308 309 310 status_t 311 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area) 312 { 313 fSpace = get_address_space_by_area_id(areaID); 314 if (fSpace == NULL) 315 return B_BAD_TEAM_ID; 316 317 rw_lock_read_lock(&fSpace->lock); 318 319 rw_lock_read_lock(&sAreaHashLock); 320 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 321 rw_lock_read_unlock(&sAreaHashLock); 322 323 if (area == NULL || area->address_space != fSpace) { 324 rw_lock_read_unlock(&fSpace->lock); 325 return B_BAD_VALUE; 326 } 327 328 fLocked = true; 329 return B_OK; 330 } 331 332 333 bool 334 AddressSpaceReadLocker::Lock() 335 { 336 if (fLocked) 337 return true; 338 if (fSpace == NULL) 339 return false; 340 341 rw_lock_read_lock(&fSpace->lock); 342 fLocked = true; 343 344 return true; 345 } 346 347 348 void 349 AddressSpaceReadLocker::Unlock() 350 { 351 if (fLocked) { 352 rw_lock_read_unlock(&fSpace->lock); 353 fLocked = false; 354 } 355 } 356 357 358 // #pragma mark - 359 360 361 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team) 362 : 363 fSpace(NULL), 364 fLocked(false), 365 fDegraded(false) 366 { 367 SetTo(team); 368 } 369 370 371 AddressSpaceWriteLocker::AddressSpaceWriteLocker() 372 : 373 fSpace(NULL), 374 fLocked(false), 375 fDegraded(false) 376 { 377 } 378 379 380 AddressSpaceWriteLocker::~AddressSpaceWriteLocker() 381 { 382 Unset(); 383 } 384 385 386 void 387 AddressSpaceWriteLocker::Unset() 388 { 389 Unlock(); 390 if (fSpace != NULL) 391 vm_put_address_space(fSpace); 392 } 393 394 395 status_t 396 AddressSpaceWriteLocker::SetTo(team_id team) 397 { 398 fSpace = vm_get_address_space(team); 399 if (fSpace == NULL) 400 return B_BAD_TEAM_ID; 401 402 rw_lock_write_lock(&fSpace->lock); 403 fLocked = true; 404 return B_OK; 405 } 406 407 408 status_t 409 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area) 410 { 411 fSpace = get_address_space_by_area_id(areaID); 412 if (fSpace == NULL) 413 return B_BAD_VALUE; 414 415 rw_lock_write_lock(&fSpace->lock); 416 417 rw_lock_read_lock(&sAreaHashLock); 418 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 419 rw_lock_read_unlock(&sAreaHashLock); 420 421 if (area == NULL || area->address_space != fSpace) { 422 rw_lock_write_unlock(&fSpace->lock); 423 return B_BAD_VALUE; 424 } 425 426 fLocked = true; 427 return B_OK; 428 } 429 430 431 status_t 432 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 433 bool allowKernel, vm_area*& area) 434 { 435 rw_lock_read_lock(&sAreaHashLock); 436 437 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 438 if (area != NULL 439 && (area->address_space->id == team 440 || (allowKernel && team == vm_kernel_address_space_id()))) { 441 fSpace = area->address_space; 442 atomic_add(&fSpace->ref_count, 1); 443 } 444 445 rw_lock_read_unlock(&sAreaHashLock); 446 447 if (fSpace == NULL) 448 return B_BAD_VALUE; 449 450 // Second try to get the area -- this time with the address space 451 // write lock held 452 453 rw_lock_write_lock(&fSpace->lock); 454 455 rw_lock_read_lock(&sAreaHashLock); 456 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 457 rw_lock_read_unlock(&sAreaHashLock); 458 459 if (area == NULL) { 460 rw_lock_write_unlock(&fSpace->lock); 461 return B_BAD_VALUE; 462 } 463 464 fLocked = true; 465 return B_OK; 466 } 467 468 469 status_t 470 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 471 vm_area*& area) 472 { 473 return SetFromArea(team, areaID, false, area); 474 } 475 476 477 void 478 AddressSpaceWriteLocker::Unlock() 479 { 480 if (fLocked) { 481 if (fDegraded) 482 rw_lock_read_unlock(&fSpace->lock); 483 else 484 rw_lock_write_unlock(&fSpace->lock); 485 fLocked = false; 486 fDegraded = false; 487 } 488 } 489 490 491 void 492 AddressSpaceWriteLocker::DegradeToReadLock() 493 { 494 // TODO: the current R/W lock implementation just keeps the write lock here 495 rw_lock_read_lock(&fSpace->lock); 496 rw_lock_write_unlock(&fSpace->lock); 497 fDegraded = true; 498 } 499 500 501 // #pragma mark - 502 503 504 MultiAddressSpaceLocker::MultiAddressSpaceLocker() 505 : 506 fItems(NULL), 507 fCapacity(0), 508 fCount(0), 509 fLocked(false) 510 { 511 } 512 513 514 MultiAddressSpaceLocker::~MultiAddressSpaceLocker() 515 { 516 Unset(); 517 free(fItems); 518 } 519 520 521 /*static*/ int 522 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b) 523 { 524 lock_item* a = (lock_item*)_a; 525 lock_item* b = (lock_item*)_b; 526 return a->space->id - b->space->id; 527 } 528 529 530 bool 531 MultiAddressSpaceLocker::_ResizeIfNeeded() 532 { 533 if (fCount == fCapacity) { 534 lock_item* items = (lock_item*)realloc(fItems, 535 (fCapacity + 4) * sizeof(lock_item)); 536 if (items == NULL) 537 return false; 538 539 fCapacity += 4; 540 fItems = items; 541 } 542 543 return true; 544 } 545 546 547 int32 548 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const 549 { 550 for (int32 i = 0; i < fCount; i++) { 551 if (fItems[i].space == space) 552 return i; 553 } 554 555 return -1; 556 } 557 558 559 status_t 560 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space, 561 bool writeLock, vm_address_space** _space) 562 { 563 if (!space) 564 return B_BAD_VALUE; 565 566 int32 index = _IndexOfAddressSpace(space); 567 if (index < 0) { 568 if (!_ResizeIfNeeded()) { 569 vm_put_address_space(space); 570 return B_NO_MEMORY; 571 } 572 573 lock_item& item = fItems[fCount++]; 574 item.space = space; 575 item.write_lock = writeLock; 576 } else { 577 578 // one reference is enough 579 vm_put_address_space(space); 580 581 fItems[index].write_lock |= writeLock; 582 } 583 584 if (_space != NULL) 585 *_space = space; 586 587 return B_OK; 588 } 589 590 591 inline status_t 592 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock, 593 vm_address_space** _space) 594 { 595 return _AddAddressSpace(vm_get_address_space(team), writeLock, 596 _space); 597 } 598 599 600 inline status_t 601 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock, 602 vm_address_space** _space) 603 { 604 return _AddAddressSpace(get_address_space_by_area_id(area), writeLock, 605 _space); 606 } 607 608 609 void 610 MultiAddressSpaceLocker::Unset() 611 { 612 Unlock(); 613 614 for (int32 i = 0; i < fCount; i++) 615 vm_put_address_space(fItems[i].space); 616 617 fCount = 0; 618 } 619 620 621 status_t 622 MultiAddressSpaceLocker::Lock() 623 { 624 ASSERT(!fLocked); 625 626 qsort(fItems, fCount, sizeof(lock_item), &_CompareItems); 627 628 for (int32 i = 0; i < fCount; i++) { 629 status_t status; 630 if (fItems[i].write_lock) 631 status = rw_lock_write_lock(&fItems[i].space->lock); 632 else 633 status = rw_lock_read_lock(&fItems[i].space->lock); 634 635 if (status < B_OK) { 636 while (--i >= 0) { 637 if (fItems[i].write_lock) 638 rw_lock_write_unlock(&fItems[i].space->lock); 639 else 640 rw_lock_read_unlock(&fItems[i].space->lock); 641 } 642 return status; 643 } 644 } 645 646 fLocked = true; 647 return B_OK; 648 } 649 650 651 void 652 MultiAddressSpaceLocker::Unlock() 653 { 654 if (!fLocked) 655 return; 656 657 for (int32 i = 0; i < fCount; i++) { 658 if (fItems[i].write_lock) 659 rw_lock_write_unlock(&fItems[i].space->lock); 660 else 661 rw_lock_read_unlock(&fItems[i].space->lock); 662 } 663 664 fLocked = false; 665 } 666 667 668 /*! Adds all address spaces of the areas associated with the given area's cache, 669 locks them, and locks the cache (including a reference to it). It retries 670 until the situation is stable (i.e. the neither cache nor cache's areas 671 changed) or an error occurs. 672 */ 673 status_t 674 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID, 675 bool writeLockThisOne, bool writeLockOthers, vm_area*& _area, 676 vm_cache** _cache) 677 { 678 // remember the original state 679 int originalCount = fCount; 680 lock_item* originalItems = NULL; 681 if (fCount > 0) { 682 originalItems = new(nothrow) lock_item[fCount]; 683 if (originalItems == NULL) 684 return B_NO_MEMORY; 685 memcpy(originalItems, fItems, fCount * sizeof(lock_item)); 686 } 687 ArrayDeleter<lock_item> _(originalItems); 688 689 // get the cache 690 vm_cache* cache; 691 vm_area* area; 692 status_t error; 693 { 694 AddressSpaceReadLocker locker; 695 error = locker.SetFromArea(areaID, area); 696 if (error != B_OK) 697 return error; 698 699 cache = vm_area_get_locked_cache(area); 700 } 701 702 while (true) { 703 // add all areas 704 vm_area* firstArea = cache->areas; 705 for (vm_area* current = firstArea; current; 706 current = current->cache_next) { 707 error = AddArea(current->id, 708 current == area ? writeLockThisOne : writeLockOthers); 709 if (error != B_OK) { 710 vm_area_put_locked_cache(cache); 711 return error; 712 } 713 } 714 715 // unlock the cache and attempt to lock the address spaces 716 vm_area_put_locked_cache(cache); 717 718 error = Lock(); 719 if (error != B_OK) 720 return error; 721 722 // lock the cache again and check whether anything has changed 723 724 // check whether the area is gone in the meantime 725 rw_lock_read_lock(&sAreaHashLock); 726 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 727 rw_lock_read_unlock(&sAreaHashLock); 728 729 if (area == NULL) { 730 Unlock(); 731 return B_BAD_VALUE; 732 } 733 734 // lock the cache 735 vm_cache* oldCache = cache; 736 cache = vm_area_get_locked_cache(area); 737 738 // If neither the area's cache has changed nor its area list we're 739 // done. 740 if (cache == oldCache && firstArea == cache->areas) { 741 _area = area; 742 if (_cache != NULL) 743 *_cache = cache; 744 return B_OK; 745 } 746 747 // Restore the original state and try again. 748 749 // Unlock the address spaces, but keep the cache locked for the next 750 // iteration. 751 Unlock(); 752 753 // Get an additional reference to the original address spaces. 754 for (int32 i = 0; i < originalCount; i++) 755 atomic_add(&originalItems[i].space->ref_count, 1); 756 757 // Release all references to the current address spaces. 758 for (int32 i = 0; i < fCount; i++) 759 vm_put_address_space(fItems[i].space); 760 761 // Copy over the original state. 762 fCount = originalCount; 763 if (originalItems != NULL) 764 memcpy(fItems, originalItems, fCount * sizeof(lock_item)); 765 } 766 } 767 768 769 // #pragma mark - 770 771 772 #if VM_PAGE_FAULT_TRACING 773 774 namespace VMPageFaultTracing { 775 776 class PageFaultStart : public AbstractTraceEntry { 777 public: 778 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 779 : 780 fAddress(address), 781 fPC(pc), 782 fWrite(write), 783 fUser(user) 784 { 785 Initialized(); 786 } 787 788 virtual void AddDump(TraceOutput& out) 789 { 790 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 791 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 792 } 793 794 private: 795 addr_t fAddress; 796 addr_t fPC; 797 bool fWrite; 798 bool fUser; 799 }; 800 801 802 // page fault errors 803 enum { 804 PAGE_FAULT_ERROR_NO_AREA = 0, 805 PAGE_FAULT_ERROR_KERNEL_ONLY, 806 PAGE_FAULT_ERROR_WRITE_PROTECTED, 807 PAGE_FAULT_ERROR_READ_PROTECTED, 808 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 809 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 810 }; 811 812 813 class PageFaultError : public AbstractTraceEntry { 814 public: 815 PageFaultError(area_id area, status_t error) 816 : 817 fArea(area), 818 fError(error) 819 { 820 Initialized(); 821 } 822 823 virtual void AddDump(TraceOutput& out) 824 { 825 switch (fError) { 826 case PAGE_FAULT_ERROR_NO_AREA: 827 out.Print("page fault error: no area"); 828 break; 829 case PAGE_FAULT_ERROR_KERNEL_ONLY: 830 out.Print("page fault error: area: %ld, kernel only", fArea); 831 break; 832 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 833 out.Print("page fault error: area: %ld, write protected", 834 fArea); 835 break; 836 case PAGE_FAULT_ERROR_READ_PROTECTED: 837 out.Print("page fault error: area: %ld, read protected", fArea); 838 break; 839 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 840 out.Print("page fault error: kernel touching bad user memory"); 841 break; 842 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 843 out.Print("page fault error: no address space"); 844 break; 845 default: 846 out.Print("page fault error: area: %ld, error: %s", fArea, 847 strerror(fError)); 848 break; 849 } 850 } 851 852 private: 853 area_id fArea; 854 status_t fError; 855 }; 856 857 858 class PageFaultDone : public AbstractTraceEntry { 859 public: 860 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 861 vm_page* page) 862 : 863 fArea(area), 864 fTopCache(topCache), 865 fCache(cache), 866 fPage(page) 867 { 868 Initialized(); 869 } 870 871 virtual void AddDump(TraceOutput& out) 872 { 873 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 874 "page: %p", fArea, fTopCache, fCache, fPage); 875 } 876 877 private: 878 area_id fArea; 879 VMCache* fTopCache; 880 VMCache* fCache; 881 vm_page* fPage; 882 }; 883 884 } // namespace VMPageFaultTracing 885 886 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 887 #else 888 # define TPF(x) ; 889 #endif // VM_PAGE_FAULT_TRACING 890 891 892 // #pragma mark - 893 894 895 static int 896 area_compare(void* _area, const void* key) 897 { 898 vm_area* area = (vm_area*)_area; 899 const area_id* id = (const area_id*)key; 900 901 if (area->id == *id) 902 return 0; 903 904 return -1; 905 } 906 907 908 static uint32 909 area_hash(void* _area, const void* key, uint32 range) 910 { 911 vm_area* area = (vm_area*)_area; 912 const area_id* id = (const area_id*)key; 913 914 if (area != NULL) 915 return area->id % range; 916 917 return (uint32)*id % range; 918 } 919 920 921 static vm_address_space* 922 get_address_space_by_area_id(area_id id) 923 { 924 vm_address_space* addressSpace = NULL; 925 926 rw_lock_read_lock(&sAreaHashLock); 927 928 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 929 if (area != NULL) { 930 addressSpace = area->address_space; 931 atomic_add(&addressSpace->ref_count, 1); 932 } 933 934 rw_lock_read_unlock(&sAreaHashLock); 935 936 return addressSpace; 937 } 938 939 940 //! You need to have the address space locked when calling this function 941 static vm_area* 942 lookup_area(vm_address_space* addressSpace, area_id id) 943 { 944 rw_lock_read_lock(&sAreaHashLock); 945 946 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 947 if (area != NULL && area->address_space != addressSpace) 948 area = NULL; 949 950 rw_lock_read_unlock(&sAreaHashLock); 951 952 return area; 953 } 954 955 956 static vm_area* 957 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags) 958 { 959 vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area)); 960 if (reserved == NULL) 961 return NULL; 962 963 memset(reserved, 0, sizeof(vm_area)); 964 reserved->id = RESERVED_AREA_ID; 965 // this marks it as reserved space 966 reserved->protection = flags; 967 reserved->address_space = addressSpace; 968 969 return reserved; 970 } 971 972 973 static vm_area* 974 create_area_struct(vm_address_space* addressSpace, const char* name, 975 uint32 wiring, uint32 protection) 976 { 977 // restrict the area name to B_OS_NAME_LENGTH 978 size_t length = strlen(name) + 1; 979 if (length > B_OS_NAME_LENGTH) 980 length = B_OS_NAME_LENGTH; 981 982 vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area)); 983 if (area == NULL) 984 return NULL; 985 986 area->name = (char*)malloc_nogrow(length); 987 if (area->name == NULL) { 988 free(area); 989 return NULL; 990 } 991 strlcpy(area->name, name, length); 992 993 area->id = atomic_add(&sNextAreaID, 1); 994 area->base = 0; 995 area->size = 0; 996 area->protection = protection; 997 area->wiring = wiring; 998 area->memory_type = 0; 999 1000 area->cache = NULL; 1001 area->cache_offset = 0; 1002 1003 area->address_space = addressSpace; 1004 area->address_space_next = NULL; 1005 area->cache_next = area->cache_prev = NULL; 1006 area->hash_next = NULL; 1007 new (&area->mappings) vm_area_mappings; 1008 area->page_protections = NULL; 1009 1010 return area; 1011 } 1012 1013 1014 /*! Finds a reserved area that covers the region spanned by \a start and 1015 \a size, inserts the \a area into that region and makes sure that 1016 there are reserved regions for the remaining parts. 1017 */ 1018 static status_t 1019 find_reserved_area(vm_address_space* addressSpace, addr_t start, 1020 addr_t size, vm_area* area) 1021 { 1022 vm_area* last = NULL; 1023 vm_area* next; 1024 1025 next = addressSpace->areas; 1026 while (next != NULL) { 1027 if (next->base <= start 1028 && next->base + (next->size - 1) >= start + (size - 1)) { 1029 // This area covers the requested range 1030 if (next->id != RESERVED_AREA_ID) { 1031 // but it's not reserved space, it's a real area 1032 return B_BAD_VALUE; 1033 } 1034 1035 break; 1036 } 1037 1038 last = next; 1039 next = next->address_space_next; 1040 } 1041 1042 if (next == NULL) 1043 return B_ENTRY_NOT_FOUND; 1044 1045 // Now we have to transfer the requested part of the reserved 1046 // range to the new area - and remove, resize or split the old 1047 // reserved area. 1048 1049 if (start == next->base) { 1050 // the area starts at the beginning of the reserved range 1051 if (last) 1052 last->address_space_next = area; 1053 else 1054 addressSpace->areas = area; 1055 1056 if (size == next->size) { 1057 // the new area fully covers the reversed range 1058 area->address_space_next = next->address_space_next; 1059 vm_put_address_space(addressSpace); 1060 free(next); 1061 } else { 1062 // resize the reserved range behind the area 1063 area->address_space_next = next; 1064 next->base += size; 1065 next->size -= size; 1066 } 1067 } else if (start + size == next->base + next->size) { 1068 // the area is at the end of the reserved range 1069 area->address_space_next = next->address_space_next; 1070 next->address_space_next = area; 1071 1072 // resize the reserved range before the area 1073 next->size = start - next->base; 1074 } else { 1075 // the area splits the reserved range into two separate ones 1076 // we need a new reserved area to cover this space 1077 vm_area* reserved = create_reserved_area_struct(addressSpace, 1078 next->protection); 1079 if (reserved == NULL) 1080 return B_NO_MEMORY; 1081 1082 atomic_add(&addressSpace->ref_count, 1); 1083 reserved->address_space_next = next->address_space_next; 1084 area->address_space_next = reserved; 1085 next->address_space_next = area; 1086 1087 // resize regions 1088 reserved->size = next->base + next->size - start - size; 1089 next->size = start - next->base; 1090 reserved->base = start + size; 1091 reserved->cache_offset = next->cache_offset; 1092 } 1093 1094 area->base = start; 1095 area->size = size; 1096 addressSpace->change_count++; 1097 1098 return B_OK; 1099 } 1100 1101 1102 /*! Verifies that an area with the given aligned base and size fits into 1103 the spot defined by base and limit and does check for overflows. 1104 */ 1105 static inline bool 1106 is_valid_spot(addr_t base, addr_t alignedBase, addr_t size, addr_t limit) 1107 { 1108 return (alignedBase >= base && alignedBase + (size - 1) > alignedBase 1109 && alignedBase + (size - 1) <= limit); 1110 } 1111 1112 1113 /*! Must be called with this address space's write lock held */ 1114 static status_t 1115 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start, 1116 addr_t size, addr_t end, uint32 addressSpec, vm_area* area) 1117 { 1118 vm_area* last = NULL; 1119 vm_area* next; 1120 bool foundSpot = false; 1121 1122 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 1123 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 1124 size, end, addressSpec, area)); 1125 1126 // do some sanity checking 1127 if (start < addressSpace->base || size == 0 1128 || end > addressSpace->base + (addressSpace->size - 1) 1129 || start + (size - 1) > end) 1130 return B_BAD_ADDRESS; 1131 1132 if (addressSpec == B_EXACT_ADDRESS && area->id != RESERVED_AREA_ID) { 1133 // search for a reserved area 1134 status_t status = find_reserved_area(addressSpace, start, size, area); 1135 if (status == B_OK || status == B_BAD_VALUE) 1136 return status; 1137 1138 // There was no reserved area, and the slot doesn't seem to be used 1139 // already 1140 // TODO: this could be further optimized. 1141 } 1142 1143 size_t alignment = B_PAGE_SIZE; 1144 if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) { 1145 // align the memory to the next power of two of the size 1146 while (alignment < size) 1147 alignment <<= 1; 1148 } 1149 1150 start = ROUNDUP(start, alignment); 1151 1152 // walk up to the spot where we should start searching 1153 second_chance: 1154 next = addressSpace->areas; 1155 while (next != NULL) { 1156 if (next->base > start + (size - 1)) { 1157 // we have a winner 1158 break; 1159 } 1160 1161 last = next; 1162 next = next->address_space_next; 1163 } 1164 1165 // find the right spot depending on the address specification - the area 1166 // will be inserted directly after "last" ("next" is not referenced anymore) 1167 1168 switch (addressSpec) { 1169 case B_ANY_ADDRESS: 1170 case B_ANY_KERNEL_ADDRESS: 1171 case B_ANY_KERNEL_BLOCK_ADDRESS: 1172 { 1173 // find a hole big enough for a new area 1174 if (last == NULL) { 1175 // see if we can build it at the beginning of the virtual map 1176 addr_t alignedBase = ROUNDUP(addressSpace->base, alignment); 1177 if (is_valid_spot(addressSpace->base, alignedBase, size, 1178 next == NULL ? end : next->base)) { 1179 foundSpot = true; 1180 area->base = alignedBase; 1181 break; 1182 } 1183 1184 last = next; 1185 next = next->address_space_next; 1186 } 1187 1188 // keep walking 1189 while (next != NULL) { 1190 addr_t alignedBase = ROUNDUP(last->base + last->size, alignment); 1191 if (is_valid_spot(last->base + (last->size - 1), alignedBase, 1192 size, next->base)) { 1193 foundSpot = true; 1194 area->base = alignedBase; 1195 break; 1196 } 1197 1198 last = next; 1199 next = next->address_space_next; 1200 } 1201 1202 if (foundSpot) 1203 break; 1204 1205 addr_t alignedBase = ROUNDUP(last->base + last->size, alignment); 1206 if (is_valid_spot(last->base + (last->size - 1), alignedBase, 1207 size, end)) { 1208 // got a spot 1209 foundSpot = true; 1210 area->base = alignedBase; 1211 break; 1212 } else if (area->id != RESERVED_AREA_ID) { 1213 // We didn't find a free spot - if there are any reserved areas, 1214 // we can now test those for free space 1215 // TODO: it would make sense to start with the biggest of them 1216 next = addressSpace->areas; 1217 for (last = NULL; next != NULL; 1218 next = next->address_space_next) { 1219 if (next->id != RESERVED_AREA_ID) { 1220 last = next; 1221 continue; 1222 } 1223 1224 // TODO: take free space after the reserved area into 1225 // account! 1226 addr_t alignedBase = ROUNDUP(next->base, alignment); 1227 if (next->base == alignedBase && next->size == size) { 1228 // The reserved area is entirely covered, and thus, 1229 // removed 1230 if (last) 1231 last->address_space_next = next->address_space_next; 1232 else 1233 addressSpace->areas = next->address_space_next; 1234 1235 foundSpot = true; 1236 area->base = alignedBase; 1237 free(next); 1238 break; 1239 } 1240 1241 if ((next->protection & RESERVED_AVOID_BASE) == 0 1242 && alignedBase == next->base && next->size >= size) { 1243 // The new area will be placed at the beginning of the 1244 // reserved area and the reserved area will be offset 1245 // and resized 1246 foundSpot = true; 1247 next->base += size; 1248 next->size -= size; 1249 area->base = alignedBase; 1250 break; 1251 } 1252 1253 if (is_valid_spot(next->base, alignedBase, size, 1254 next->base + (next->size - 1))) { 1255 // The new area will be placed at the end of the 1256 // reserved area, and the reserved area will be resized 1257 // to make space 1258 alignedBase = ROUNDDOWN(next->base + next->size - size, 1259 alignment); 1260 1261 foundSpot = true; 1262 next->size = alignedBase - next->base; 1263 area->base = alignedBase; 1264 last = next; 1265 break; 1266 } 1267 1268 last = next; 1269 } 1270 } 1271 break; 1272 } 1273 1274 case B_BASE_ADDRESS: 1275 { 1276 // find a hole big enough for a new area beginning with "start" 1277 if (last == NULL) { 1278 // see if we can build it at the beginning of the specified start 1279 if (next == NULL || next->base > start + (size - 1)) { 1280 foundSpot = true; 1281 area->base = start; 1282 break; 1283 } 1284 1285 last = next; 1286 next = next->address_space_next; 1287 } 1288 1289 // keep walking 1290 while (next != NULL) { 1291 if (next->base - (last->base + last->size) >= size) { 1292 // we found a spot (it'll be filled up below) 1293 break; 1294 } 1295 1296 last = next; 1297 next = next->address_space_next; 1298 } 1299 1300 addr_t lastEnd = last->base + (last->size - 1); 1301 if (next != NULL || end - lastEnd >= size) { 1302 // got a spot 1303 foundSpot = true; 1304 if (lastEnd < start) 1305 area->base = start; 1306 else 1307 area->base = lastEnd + 1; 1308 break; 1309 } 1310 1311 // we didn't find a free spot in the requested range, so we'll 1312 // try again without any restrictions 1313 start = addressSpace->base; 1314 addressSpec = B_ANY_ADDRESS; 1315 last = NULL; 1316 goto second_chance; 1317 } 1318 1319 case B_EXACT_ADDRESS: 1320 // see if we can create it exactly here 1321 if ((last == NULL || last->base + (last->size - 1) < start) 1322 && (next == NULL || next->base > start + (size - 1))) { 1323 foundSpot = true; 1324 area->base = start; 1325 break; 1326 } 1327 break; 1328 default: 1329 return B_BAD_VALUE; 1330 } 1331 1332 if (!foundSpot) 1333 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 1334 1335 area->size = size; 1336 if (last) { 1337 area->address_space_next = last->address_space_next; 1338 last->address_space_next = area; 1339 } else { 1340 area->address_space_next = addressSpace->areas; 1341 addressSpace->areas = area; 1342 } 1343 1344 addressSpace->change_count++; 1345 return B_OK; 1346 } 1347 1348 1349 /*! This inserts the area you pass into the specified address space. 1350 It will also set the "_address" argument to its base address when 1351 the call succeeds. 1352 You need to hold the vm_address_space write lock. 1353 */ 1354 static status_t 1355 insert_area(vm_address_space* addressSpace, void** _address, 1356 uint32 addressSpec, addr_t size, vm_area* area) 1357 { 1358 addr_t searchBase, searchEnd; 1359 status_t status; 1360 1361 switch (addressSpec) { 1362 case B_EXACT_ADDRESS: 1363 searchBase = (addr_t)*_address; 1364 searchEnd = (addr_t)*_address + (size - 1); 1365 break; 1366 1367 case B_BASE_ADDRESS: 1368 searchBase = (addr_t)*_address; 1369 searchEnd = addressSpace->base + (addressSpace->size - 1); 1370 break; 1371 1372 case B_ANY_ADDRESS: 1373 case B_ANY_KERNEL_ADDRESS: 1374 case B_ANY_KERNEL_BLOCK_ADDRESS: 1375 searchBase = addressSpace->base; 1376 // TODO: remove this again when vm86 mode is moved into the kernel 1377 // completely (currently needs a userland address space!) 1378 if (searchBase == USER_BASE) 1379 searchBase = USER_BASE_ANY; 1380 searchEnd = addressSpace->base + (addressSpace->size - 1); 1381 break; 1382 1383 default: 1384 return B_BAD_VALUE; 1385 } 1386 1387 status = find_and_insert_area_slot(addressSpace, searchBase, size, 1388 searchEnd, addressSpec, area); 1389 if (status == B_OK) { 1390 *_address = (void*)area->base; 1391 1392 if (addressSpace == vm_kernel_address_space()) 1393 sKernelAddressSpaceLeft -= area->size; 1394 } 1395 1396 return status; 1397 } 1398 1399 1400 static inline void 1401 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection) 1402 { 1403 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 1404 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1405 uint8& entry = area->page_protections[pageIndex / 2]; 1406 if (pageIndex % 2 == 0) 1407 entry = (entry & 0xf0) | protection; 1408 else 1409 entry = (entry & 0x0f) | (protection << 4); 1410 } 1411 1412 1413 static inline uint32 1414 get_area_page_protection(vm_area* area, addr_t pageAddress) 1415 { 1416 if (area->page_protections == NULL) 1417 return area->protection; 1418 1419 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1420 uint32 protection = area->page_protections[pageIndex / 2]; 1421 if (pageIndex % 2 == 0) 1422 protection &= 0x0f; 1423 else 1424 protection >>= 4; 1425 1426 return protection | B_KERNEL_READ_AREA 1427 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1428 } 1429 1430 1431 /*! Cuts a piece out of an area. If the given cut range covers the complete 1432 area, it is deleted. If it covers the beginning or the end, the area is 1433 resized accordingly. If the range covers some part in the middle of the 1434 area, it is split in two; in this case the second area is returned via 1435 \a _secondArea (the variable is left untouched in the other cases). 1436 The address space must be write locked. 1437 */ 1438 static status_t 1439 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address, 1440 addr_t lastAddress, vm_area** _secondArea, bool kernel) 1441 { 1442 // Does the cut range intersect with the area at all? 1443 addr_t areaLast = area->base + (area->size - 1); 1444 if (area->base > lastAddress || areaLast < address) 1445 return B_OK; 1446 1447 // Is the area fully covered? 1448 if (area->base >= address && areaLast <= lastAddress) { 1449 delete_area(addressSpace, area); 1450 return B_OK; 1451 } 1452 1453 AreaCacheLocker cacheLocker(area); 1454 vm_cache* cache = area->cache; 1455 1456 // Cut the end only? 1457 if (areaLast <= lastAddress) { 1458 addr_t newSize = address - area->base; 1459 1460 // unmap pages 1461 vm_unmap_pages(area, address, area->size - newSize, false); 1462 1463 // If no one else uses the area's cache, we can resize it, too. 1464 if (cache->areas == area && area->cache_next == NULL 1465 && list_is_empty(&cache->consumers)) { 1466 status_t error = cache->Resize(cache->virtual_base + newSize); 1467 if (error != B_OK) 1468 return error; 1469 } 1470 1471 area->size = newSize; 1472 1473 return B_OK; 1474 } 1475 1476 // Cut the beginning only? 1477 if (area->base >= address) { 1478 addr_t newBase = lastAddress + 1; 1479 addr_t newSize = areaLast - lastAddress; 1480 1481 // unmap pages 1482 vm_unmap_pages(area, area->base, newBase - area->base, false); 1483 1484 // TODO: If no one else uses the area's cache, we should resize it, too! 1485 1486 area->cache_offset += newBase - area->base; 1487 area->base = newBase; 1488 area->size = newSize; 1489 1490 return B_OK; 1491 } 1492 1493 // The tough part -- cut a piece out of the middle of the area. 1494 // We do that by shrinking the area to the begin section and creating a 1495 // new area for the end section. 1496 1497 addr_t firstNewSize = address - area->base; 1498 addr_t secondBase = lastAddress + 1; 1499 addr_t secondSize = areaLast - lastAddress; 1500 1501 // unmap pages 1502 vm_unmap_pages(area, address, area->size - firstNewSize, false); 1503 1504 // resize the area 1505 addr_t oldSize = area->size; 1506 area->size = firstNewSize; 1507 1508 // TODO: If no one else uses the area's cache, we might want to create a 1509 // new cache for the second area, transfer the concerned pages from the 1510 // first cache to it and resize the first cache. 1511 1512 // map the second area 1513 vm_area* secondArea; 1514 void* secondBaseAddress = (void*)secondBase; 1515 status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress, 1516 area->cache_offset + (secondBase - area->base), secondSize, 1517 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 1518 &secondArea, area->name, false, kernel); 1519 if (error != B_OK) { 1520 area->size = oldSize; 1521 return error; 1522 } 1523 1524 // We need a cache reference for the new area. 1525 cache->AcquireRefLocked(); 1526 1527 if (_secondArea != NULL) 1528 *_secondArea = secondArea; 1529 1530 return B_OK; 1531 } 1532 1533 1534 static inline void 1535 increment_page_wired_count(vm_page* page) 1536 { 1537 // TODO: needs to be atomic on all platforms! 1538 // ... but at least the check isn't. Consequently we should hold 1539 // sMappingLock, which would allows us to even avoid atomic_add() on 1540 // gMappedPagesCount. 1541 if (page->wired_count++ == 0) { 1542 if (page->mappings.IsEmpty()) 1543 atomic_add(&gMappedPagesCount, 1); 1544 } 1545 } 1546 1547 1548 static inline void 1549 decrement_page_wired_count(vm_page* page) 1550 { 1551 if (--page->wired_count == 0) { 1552 // TODO: needs to be atomic on all platforms! 1553 // See above! 1554 if (page->mappings.IsEmpty()) 1555 atomic_add(&gMappedPagesCount, -1); 1556 } 1557 } 1558 1559 1560 /*! Deletes all areas in the given address range. 1561 The address space must be write-locked. 1562 */ 1563 static status_t 1564 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size, 1565 bool kernel) 1566 { 1567 size = PAGE_ALIGN(size); 1568 addr_t lastAddress = address + (size - 1); 1569 1570 // Check, whether the caller is allowed to modify the concerned areas. 1571 vm_area* area; 1572 if (!kernel) { 1573 area = addressSpace->areas; 1574 while (area != NULL) { 1575 vm_area* nextArea = area->address_space_next; 1576 1577 if (area->id != RESERVED_AREA_ID) { 1578 addr_t areaLast = area->base + (area->size - 1); 1579 if (area->base < lastAddress && address < areaLast) { 1580 if ((area->protection & B_KERNEL_AREA) != 0) 1581 return B_NOT_ALLOWED; 1582 } 1583 } 1584 1585 area = nextArea; 1586 } 1587 } 1588 1589 area = addressSpace->areas; 1590 while (area != NULL) { 1591 vm_area* nextArea = area->address_space_next; 1592 1593 if (area->id != RESERVED_AREA_ID) { 1594 addr_t areaLast = area->base + (area->size - 1); 1595 if (area->base < lastAddress && address < areaLast) { 1596 status_t error = cut_area(addressSpace, area, address, 1597 lastAddress, NULL, kernel); 1598 if (error != B_OK) 1599 return error; 1600 // Failing after already messing with areas is ugly, but we 1601 // can't do anything about it. 1602 } 1603 } 1604 1605 area = nextArea; 1606 } 1607 1608 return B_OK; 1609 } 1610 1611 1612 /*! You need to hold the lock of the cache and the write lock of the address 1613 space when calling this function. 1614 Note, that in case of error your cache will be temporarily unlocked. 1615 */ 1616 static status_t 1617 map_backing_store(vm_address_space* addressSpace, vm_cache* cache, 1618 void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 1619 int wiring, int protection, int mapping, vm_area** _area, 1620 const char* areaName, bool unmapAddressRange, bool kernel) 1621 { 1622 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, " 1623 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 1624 "'%s'\n", addressSpace, cache, *_virtualAddress, offset, size, 1625 addressSpec, wiring, protection, _area, areaName)); 1626 cache->AssertLocked(); 1627 1628 vm_area* area = create_area_struct(addressSpace, areaName, wiring, 1629 protection); 1630 if (area == NULL) 1631 return B_NO_MEMORY; 1632 1633 status_t status; 1634 1635 // if this is a private map, we need to create a new cache 1636 // to handle the private copies of pages as they are written to 1637 vm_cache* sourceCache = cache; 1638 if (mapping == REGION_PRIVATE_MAP) { 1639 vm_cache* newCache; 1640 1641 // create an anonymous cache 1642 status = VMCacheFactory::CreateAnonymousCache(newCache, 1643 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true); 1644 if (status != B_OK) 1645 goto err1; 1646 1647 newCache->Lock(); 1648 newCache->temporary = 1; 1649 newCache->scan_skip = cache->scan_skip; 1650 newCache->virtual_base = offset; 1651 newCache->virtual_end = offset + size; 1652 1653 cache->AddConsumer(newCache); 1654 1655 cache = newCache; 1656 } 1657 1658 status = cache->SetMinimalCommitment(size); 1659 if (status != B_OK) 1660 goto err2; 1661 1662 // check to see if this address space has entered DELETE state 1663 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 1664 // okay, someone is trying to delete this address space now, so we can't 1665 // insert the area, so back out 1666 status = B_BAD_TEAM_ID; 1667 goto err2; 1668 } 1669 1670 if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) { 1671 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 1672 size, kernel); 1673 if (status != B_OK) 1674 goto err2; 1675 } 1676 1677 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 1678 if (status != B_OK) { 1679 // TODO: wait and try again once this is working in the backend 1680 #if 0 1681 if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) { 1682 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 1683 0, 0); 1684 } 1685 #endif 1686 goto err2; 1687 } 1688 1689 // attach the cache to the area 1690 area->cache = cache; 1691 area->cache_offset = offset; 1692 1693 // point the cache back to the area 1694 cache->InsertAreaLocked(area); 1695 if (mapping == REGION_PRIVATE_MAP) 1696 cache->Unlock(); 1697 1698 // insert the area in the global area hash table 1699 rw_lock_write_lock(&sAreaHashLock); 1700 hash_insert(sAreaHash, area); 1701 rw_lock_write_unlock(&sAreaHashLock); 1702 1703 // grab a ref to the address space (the area holds this) 1704 atomic_add(&addressSpace->ref_count, 1); 1705 1706 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1707 // cache, sourceCache, areaName, area); 1708 1709 *_area = area; 1710 return B_OK; 1711 1712 err2: 1713 if (mapping == REGION_PRIVATE_MAP) { 1714 // We created this cache, so we must delete it again. Note, that we 1715 // need to temporarily unlock the source cache or we'll otherwise 1716 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1717 sourceCache->Unlock(); 1718 cache->ReleaseRefAndUnlock(); 1719 sourceCache->Lock(); 1720 } 1721 err1: 1722 free(area->name); 1723 free(area); 1724 return status; 1725 } 1726 1727 1728 status_t 1729 vm_block_address_range(const char* name, void* address, addr_t size) 1730 { 1731 if (!arch_vm_supports_protection(0)) 1732 return B_NOT_SUPPORTED; 1733 1734 AddressSpaceWriteLocker locker; 1735 status_t status = locker.SetTo(vm_kernel_address_space_id()); 1736 if (status != B_OK) 1737 return status; 1738 1739 vm_address_space* addressSpace = locker.AddressSpace(); 1740 1741 // create an anonymous cache 1742 vm_cache* cache; 1743 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false); 1744 if (status != B_OK) 1745 return status; 1746 1747 cache->temporary = 1; 1748 cache->virtual_end = size; 1749 cache->scan_skip = 1; 1750 cache->Lock(); 1751 1752 vm_area* area; 1753 void* areaAddress = address; 1754 status = map_backing_store(addressSpace, cache, &areaAddress, 0, size, 1755 B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name, 1756 false, true); 1757 if (status != B_OK) { 1758 cache->ReleaseRefAndUnlock(); 1759 return status; 1760 } 1761 1762 cache->Unlock(); 1763 area->cache_type = CACHE_TYPE_RAM; 1764 return area->id; 1765 } 1766 1767 1768 status_t 1769 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1770 { 1771 AddressSpaceWriteLocker locker(team); 1772 if (!locker.IsLocked()) 1773 return B_BAD_TEAM_ID; 1774 1775 // check to see if this address space has entered DELETE state 1776 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1777 // okay, someone is trying to delete this address space now, so we can't 1778 // insert the area, so back out 1779 return B_BAD_TEAM_ID; 1780 } 1781 1782 // search area list and remove any matching reserved ranges 1783 1784 vm_area* area = locker.AddressSpace()->areas; 1785 vm_area* last = NULL; 1786 while (area) { 1787 // the area must be completely part of the reserved range 1788 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 1789 && area->base + area->size <= (addr_t)address + size) { 1790 // remove reserved range 1791 vm_area* reserved = area; 1792 if (last) 1793 last->address_space_next = reserved->address_space_next; 1794 else 1795 locker.AddressSpace()->areas = reserved->address_space_next; 1796 1797 area = reserved->address_space_next; 1798 vm_put_address_space(locker.AddressSpace()); 1799 free(reserved); 1800 continue; 1801 } 1802 1803 last = area; 1804 area = area->address_space_next; 1805 } 1806 1807 return B_OK; 1808 } 1809 1810 1811 status_t 1812 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1813 addr_t size, uint32 flags) 1814 { 1815 if (size == 0) 1816 return B_BAD_VALUE; 1817 1818 AddressSpaceWriteLocker locker(team); 1819 if (!locker.IsLocked()) 1820 return B_BAD_TEAM_ID; 1821 1822 // check to see if this address space has entered DELETE state 1823 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1824 // okay, someone is trying to delete this address space now, so we 1825 // can't insert the area, let's back out 1826 return B_BAD_TEAM_ID; 1827 } 1828 1829 vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags); 1830 if (area == NULL) 1831 return B_NO_MEMORY; 1832 1833 status_t status = insert_area(locker.AddressSpace(), _address, addressSpec, 1834 size, area); 1835 if (status != B_OK) { 1836 free(area); 1837 return status; 1838 } 1839 1840 // the area is now reserved! 1841 1842 area->cache_offset = area->base; 1843 // we cache the original base address here 1844 1845 atomic_add(&locker.AddressSpace()->ref_count, 1); 1846 return B_OK; 1847 } 1848 1849 1850 area_id 1851 vm_create_anonymous_area(team_id team, const char* name, void** address, 1852 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 1853 addr_t physicalAddress, uint32 flags, bool kernel) 1854 { 1855 vm_area* area; 1856 vm_cache* cache; 1857 vm_page* page = NULL; 1858 bool isStack = (protection & B_STACK_AREA) != 0; 1859 page_num_t guardPages; 1860 bool canOvercommit = false; 1861 uint32 newPageState = (flags & CREATE_AREA_DONT_CLEAR) != 0 1862 ? PAGE_STATE_FREE : PAGE_STATE_CLEAR; 1863 1864 TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size)); 1865 1866 size = PAGE_ALIGN(size); 1867 1868 if (size == 0) 1869 return B_BAD_VALUE; 1870 if (!arch_vm_supports_protection(protection)) 1871 return B_NOT_SUPPORTED; 1872 1873 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1874 canOvercommit = true; 1875 1876 #ifdef DEBUG_KERNEL_STACKS 1877 if ((protection & B_KERNEL_STACK_AREA) != 0) 1878 isStack = true; 1879 #endif 1880 1881 // check parameters 1882 switch (addressSpec) { 1883 case B_ANY_ADDRESS: 1884 case B_EXACT_ADDRESS: 1885 case B_BASE_ADDRESS: 1886 case B_ANY_KERNEL_ADDRESS: 1887 case B_ANY_KERNEL_BLOCK_ADDRESS: 1888 break; 1889 case B_PHYSICAL_BASE_ADDRESS: 1890 physicalAddress = (addr_t)*address; 1891 addressSpec = B_ANY_KERNEL_ADDRESS; 1892 break; 1893 1894 default: 1895 return B_BAD_VALUE; 1896 } 1897 1898 if (physicalAddress != 0) 1899 wiring = B_CONTIGUOUS; 1900 1901 bool doReserveMemory = false; 1902 switch (wiring) { 1903 case B_NO_LOCK: 1904 break; 1905 case B_FULL_LOCK: 1906 case B_LAZY_LOCK: 1907 case B_CONTIGUOUS: 1908 doReserveMemory = true; 1909 break; 1910 case B_ALREADY_WIRED: 1911 break; 1912 case B_LOMEM: 1913 //case B_SLOWMEM: 1914 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 1915 wiring = B_FULL_LOCK; 1916 doReserveMemory = true; 1917 break; 1918 default: 1919 return B_BAD_VALUE; 1920 } 1921 1922 // For full lock or contiguous areas we're also going to map the pages and 1923 // thus need to reserve pages for the mapping backend upfront. 1924 addr_t reservedMapPages = 0; 1925 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1926 AddressSpaceWriteLocker locker; 1927 status_t status = locker.SetTo(team); 1928 if (status != B_OK) 1929 return status; 1930 1931 vm_translation_map* map = &locker.AddressSpace()->translation_map; 1932 reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 1933 } 1934 1935 // Reserve memory before acquiring the address space lock. This reduces the 1936 // chances of failure, since while holding the write lock to the address 1937 // space (if it is the kernel address space that is), the low memory handler 1938 // won't be able to free anything for us. 1939 addr_t reservedMemory = 0; 1940 if (doReserveMemory) { 1941 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1942 if (vm_try_reserve_memory(size, timeout) != B_OK) 1943 return B_NO_MEMORY; 1944 reservedMemory = size; 1945 // TODO: We don't reserve the memory for the pages for the page 1946 // directories/tables. We actually need to do since we currently don't 1947 // reclaim them (and probably can't reclaim all of them anyway). Thus 1948 // there are actually less physical pages than there should be, which 1949 // can get the VM into trouble in low memory situations. 1950 } 1951 1952 AddressSpaceWriteLocker locker; 1953 vm_address_space* addressSpace; 1954 status_t status; 1955 1956 // For full lock areas reserve the pages before locking the address 1957 // space. E.g. block caches can't release their memory while we hold the 1958 // address space lock. 1959 page_num_t reservedPages = reservedMapPages; 1960 if (wiring == B_FULL_LOCK) 1961 reservedPages += size / B_PAGE_SIZE; 1962 if (reservedPages > 0) { 1963 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1964 if (!vm_page_try_reserve_pages(reservedPages)) { 1965 reservedPages = 0; 1966 status = B_WOULD_BLOCK; 1967 goto err0; 1968 } 1969 } else 1970 vm_page_reserve_pages(reservedPages); 1971 } 1972 1973 status = locker.SetTo(team); 1974 if (status != B_OK) 1975 goto err0; 1976 1977 addressSpace = locker.AddressSpace(); 1978 1979 if (wiring == B_CONTIGUOUS) { 1980 // we try to allocate the page run here upfront as this may easily 1981 // fail for obvious reasons 1982 page = vm_page_allocate_page_run(newPageState, physicalAddress, 1983 size / B_PAGE_SIZE); 1984 if (page == NULL) { 1985 status = B_NO_MEMORY; 1986 goto err0; 1987 } 1988 } 1989 1990 // create an anonymous cache 1991 // if it's a stack, make sure that two pages are available at least 1992 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1993 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1994 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1995 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1996 wiring == B_NO_LOCK); 1997 if (status != B_OK) 1998 goto err1; 1999 2000 cache->temporary = 1; 2001 cache->virtual_end = size; 2002 cache->committed_size = reservedMemory; 2003 // TODO: This should be done via a method. 2004 reservedMemory = 0; 2005 2006 switch (wiring) { 2007 case B_LAZY_LOCK: 2008 case B_FULL_LOCK: 2009 case B_CONTIGUOUS: 2010 case B_ALREADY_WIRED: 2011 cache->scan_skip = 1; 2012 break; 2013 case B_NO_LOCK: 2014 cache->scan_skip = 0; 2015 break; 2016 } 2017 2018 cache->Lock(); 2019 2020 status = map_backing_store(addressSpace, cache, address, 0, size, 2021 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 2022 (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel); 2023 2024 if (status != B_OK) { 2025 cache->ReleaseRefAndUnlock(); 2026 goto err1; 2027 } 2028 2029 locker.DegradeToReadLock(); 2030 2031 switch (wiring) { 2032 case B_NO_LOCK: 2033 case B_LAZY_LOCK: 2034 // do nothing - the pages are mapped in as needed 2035 break; 2036 2037 case B_FULL_LOCK: 2038 { 2039 // Allocate and map all pages for this area 2040 2041 off_t offset = 0; 2042 for (addr_t address = area->base; 2043 address < area->base + (area->size - 1); 2044 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 2045 #ifdef DEBUG_KERNEL_STACKS 2046 # ifdef STACK_GROWS_DOWNWARDS 2047 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 2048 * B_PAGE_SIZE) 2049 # else 2050 if (isStack && address >= area->base + area->size 2051 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 2052 # endif 2053 continue; 2054 #endif 2055 vm_page* page = vm_page_allocate_page(newPageState, true); 2056 cache->InsertPage(page, offset); 2057 vm_map_page(area, page, address, protection); 2058 2059 // Periodically unreserve pages we've already allocated, so that 2060 // we don't unnecessarily increase the pressure on the VM. 2061 if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) { 2062 page_num_t toUnreserve = 128; 2063 vm_page_unreserve_pages(toUnreserve); 2064 reservedPages -= toUnreserve; 2065 } 2066 } 2067 2068 break; 2069 } 2070 2071 case B_ALREADY_WIRED: 2072 { 2073 // The pages should already be mapped. This is only really useful 2074 // during boot time. Find the appropriate vm_page objects and stick 2075 // them in the cache object. 2076 vm_translation_map* map = &addressSpace->translation_map; 2077 off_t offset = 0; 2078 2079 if (!gKernelStartup) 2080 panic("ALREADY_WIRED flag used outside kernel startup\n"); 2081 2082 map->ops->lock(map); 2083 2084 for (addr_t virtualAddress = area->base; virtualAddress < area->base 2085 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2086 offset += B_PAGE_SIZE) { 2087 addr_t physicalAddress; 2088 uint32 flags; 2089 status = map->ops->query(map, virtualAddress, 2090 &physicalAddress, &flags); 2091 if (status < B_OK) { 2092 panic("looking up mapping failed for va 0x%lx\n", 2093 virtualAddress); 2094 } 2095 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2096 if (page == NULL) { 2097 panic("looking up page failed for pa 0x%lx\n", 2098 physicalAddress); 2099 } 2100 2101 increment_page_wired_count(page); 2102 vm_page_set_state(page, PAGE_STATE_WIRED); 2103 cache->InsertPage(page, offset); 2104 } 2105 2106 map->ops->unlock(map); 2107 break; 2108 } 2109 2110 case B_CONTIGUOUS: 2111 { 2112 // We have already allocated our continuous pages run, so we can now 2113 // just map them in the address space 2114 vm_translation_map* map = &addressSpace->translation_map; 2115 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 2116 addr_t virtualAddress = area->base; 2117 off_t offset = 0; 2118 2119 map->ops->lock(map); 2120 2121 for (virtualAddress = area->base; virtualAddress < area->base 2122 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2123 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 2124 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2125 if (page == NULL) 2126 panic("couldn't lookup physical page just allocated\n"); 2127 2128 status = map->ops->map(map, virtualAddress, physicalAddress, 2129 protection); 2130 if (status < B_OK) 2131 panic("couldn't map physical page in page run\n"); 2132 2133 increment_page_wired_count(page); 2134 vm_page_set_state(page, PAGE_STATE_WIRED); 2135 cache->InsertPage(page, offset); 2136 } 2137 2138 map->ops->unlock(map); 2139 break; 2140 } 2141 2142 default: 2143 break; 2144 } 2145 2146 cache->Unlock(); 2147 2148 if (reservedPages > 0) 2149 vm_page_unreserve_pages(reservedPages); 2150 2151 TRACE(("vm_create_anonymous_area: done\n")); 2152 2153 area->cache_type = CACHE_TYPE_RAM; 2154 return area->id; 2155 2156 err1: 2157 if (wiring == B_CONTIGUOUS) { 2158 // we had reserved the area space upfront... 2159 addr_t pageNumber = page->physical_page_number; 2160 int32 i; 2161 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 2162 page = vm_lookup_page(pageNumber); 2163 if (page == NULL) 2164 panic("couldn't lookup physical page just allocated\n"); 2165 2166 vm_page_set_state(page, PAGE_STATE_FREE); 2167 } 2168 } 2169 2170 err0: 2171 if (reservedPages > 0) 2172 vm_page_unreserve_pages(reservedPages); 2173 if (reservedMemory > 0) 2174 vm_unreserve_memory(reservedMemory); 2175 2176 return status; 2177 } 2178 2179 2180 area_id 2181 vm_map_physical_memory(team_id team, const char* name, void** _address, 2182 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 2183 { 2184 vm_area* area; 2185 vm_cache* cache; 2186 addr_t mapOffset; 2187 2188 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 2189 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2190 name, _address, addressSpec, size, protection, physicalAddress)); 2191 2192 if (!arch_vm_supports_protection(protection)) 2193 return B_NOT_SUPPORTED; 2194 2195 AddressSpaceWriteLocker locker(team); 2196 if (!locker.IsLocked()) 2197 return B_BAD_TEAM_ID; 2198 2199 // if the physical address is somewhat inside a page, 2200 // move the actual area down to align on a page boundary 2201 mapOffset = physicalAddress % B_PAGE_SIZE; 2202 size += mapOffset; 2203 physicalAddress -= mapOffset; 2204 2205 size = PAGE_ALIGN(size); 2206 2207 // create a device cache 2208 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 2209 if (status != B_OK) 2210 return status; 2211 2212 // tell the page scanner to skip over this area, it's pages are special 2213 cache->scan_skip = 1; 2214 cache->virtual_end = size; 2215 2216 cache->Lock(); 2217 2218 status = map_backing_store(locker.AddressSpace(), cache, _address, 2219 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2220 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2221 2222 if (status < B_OK) 2223 cache->ReleaseRefLocked(); 2224 2225 cache->Unlock(); 2226 2227 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 2228 // set requested memory type 2229 status = arch_vm_set_memory_type(area, physicalAddress, 2230 addressSpec & B_MTR_MASK); 2231 if (status < B_OK) 2232 delete_area(locker.AddressSpace(), area); 2233 } 2234 2235 if (status >= B_OK) { 2236 // make sure our area is mapped in completely 2237 2238 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2239 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2240 area->base + (size - 1)); 2241 2242 vm_page_reserve_pages(reservePages); 2243 map->ops->lock(map); 2244 2245 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2246 map->ops->map(map, area->base + offset, physicalAddress + offset, 2247 protection); 2248 } 2249 2250 map->ops->unlock(map); 2251 vm_page_unreserve_pages(reservePages); 2252 } 2253 2254 if (status < B_OK) 2255 return status; 2256 2257 // modify the pointer returned to be offset back into the new area 2258 // the same way the physical address in was offset 2259 *_address = (void*)((addr_t)*_address + mapOffset); 2260 2261 area->cache_type = CACHE_TYPE_DEVICE; 2262 return area->id; 2263 } 2264 2265 2266 area_id 2267 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 2268 uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs, 2269 uint32 vecCount) 2270 { 2271 TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, " 2272 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2273 name, _address, addressSpec, size, protection, physicalAddress)); 2274 2275 if (!arch_vm_supports_protection(protection) 2276 || (addressSpec & B_MTR_MASK) != 0) { 2277 return B_NOT_SUPPORTED; 2278 } 2279 2280 AddressSpaceWriteLocker locker(team); 2281 if (!locker.IsLocked()) 2282 return B_BAD_TEAM_ID; 2283 2284 if (vecCount == 0) 2285 return B_BAD_VALUE; 2286 2287 addr_t size = 0; 2288 for (uint32 i = 0; i < vecCount; i++) { 2289 if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0 2290 || vecs[i].iov_len % B_PAGE_SIZE != 0) { 2291 return B_BAD_VALUE; 2292 } 2293 2294 size += vecs[i].iov_len; 2295 } 2296 2297 // create a device cache 2298 vm_cache* cache; 2299 status_t result = VMCacheFactory::CreateDeviceCache(cache, 2300 (addr_t)vecs[0].iov_base); 2301 if (result != B_OK) 2302 return result; 2303 2304 // tell the page scanner to skip over this area, it's pages are special 2305 cache->scan_skip = 1; 2306 cache->virtual_end = size; 2307 2308 cache->Lock(); 2309 2310 vm_area* area; 2311 result = map_backing_store(locker.AddressSpace(), cache, _address, 2312 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2313 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2314 2315 if (result != B_OK) 2316 cache->ReleaseRefLocked(); 2317 2318 cache->Unlock(); 2319 2320 if (result != B_OK) 2321 return result; 2322 2323 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2324 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2325 area->base + (size - 1)); 2326 2327 vm_page_reserve_pages(reservePages); 2328 map->ops->lock(map); 2329 2330 uint32 vecIndex = 0; 2331 size_t vecOffset = 0; 2332 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2333 while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) { 2334 vecOffset = 0; 2335 vecIndex++; 2336 } 2337 2338 if (vecIndex >= vecCount) 2339 break; 2340 2341 map->ops->map(map, area->base + offset, 2342 (addr_t)vecs[vecIndex].iov_base + vecOffset, protection); 2343 2344 vecOffset += B_PAGE_SIZE; 2345 } 2346 2347 map->ops->unlock(map); 2348 vm_page_unreserve_pages(reservePages); 2349 2350 if (_size != NULL) 2351 *_size = size; 2352 2353 area->cache_type = CACHE_TYPE_DEVICE; 2354 return area->id; 2355 } 2356 2357 2358 area_id 2359 vm_create_null_area(team_id team, const char* name, void** address, 2360 uint32 addressSpec, addr_t size) 2361 { 2362 vm_area* area; 2363 vm_cache* cache; 2364 status_t status; 2365 2366 AddressSpaceWriteLocker locker(team); 2367 if (!locker.IsLocked()) 2368 return B_BAD_TEAM_ID; 2369 2370 size = PAGE_ALIGN(size); 2371 2372 // create an null cache 2373 status = VMCacheFactory::CreateNullCache(cache); 2374 if (status != B_OK) 2375 return status; 2376 2377 // tell the page scanner to skip over this area, no pages will be mapped here 2378 cache->scan_skip = 1; 2379 cache->virtual_end = size; 2380 2381 cache->Lock(); 2382 2383 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 2384 addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name, 2385 false, true); 2386 2387 if (status < B_OK) { 2388 cache->ReleaseRefAndUnlock(); 2389 return status; 2390 } 2391 2392 cache->Unlock(); 2393 2394 area->cache_type = CACHE_TYPE_NULL; 2395 return area->id; 2396 } 2397 2398 2399 /*! Creates the vnode cache for the specified \a vnode. 2400 The vnode has to be marked busy when calling this function. 2401 */ 2402 status_t 2403 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2404 { 2405 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2406 } 2407 2408 2409 /*! \a cache must be locked. The area's address space must be read-locked. 2410 */ 2411 static void 2412 pre_map_area_pages(vm_area* area, VMCache* cache) 2413 { 2414 addr_t baseAddress = area->base; 2415 addr_t cacheOffset = area->cache_offset; 2416 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2417 page_num_t endPage = firstPage + area->size / B_PAGE_SIZE; 2418 2419 for (VMCachePagesTree::Iterator it 2420 = cache->pages.GetIterator(firstPage, true, true); 2421 vm_page* page = it.Next();) { 2422 if (page->cache_offset >= endPage) 2423 break; 2424 2425 // skip inactive pages 2426 if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0) 2427 continue; 2428 2429 vm_map_page(area, page, 2430 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2431 B_READ_AREA | B_KERNEL_READ_AREA); 2432 } 2433 } 2434 2435 2436 /*! Will map the file specified by \a fd to an area in memory. 2437 The file will be mirrored beginning at the specified \a offset. The 2438 \a offset and \a size arguments have to be page aligned. 2439 */ 2440 static area_id 2441 _vm_map_file(team_id team, const char* name, void** _address, 2442 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2443 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2444 { 2445 // TODO: for binary files, we want to make sure that they get the 2446 // copy of a file at a given time, ie. later changes should not 2447 // make it into the mapped copy -- this will need quite some changes 2448 // to be done in a nice way 2449 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 2450 fd, offset, size, mapping)); 2451 2452 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2453 size = PAGE_ALIGN(size); 2454 2455 if (mapping == REGION_NO_PRIVATE_MAP) 2456 protection |= B_SHARED_AREA; 2457 if (addressSpec != B_EXACT_ADDRESS) 2458 unmapAddressRange = false; 2459 2460 if (fd < 0) { 2461 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2462 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 2463 B_NO_LOCK, protection, 0, flags, kernel); 2464 } 2465 2466 // get the open flags of the FD 2467 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2468 if (descriptor == NULL) 2469 return EBADF; 2470 int32 openMode = descriptor->open_mode; 2471 put_fd(descriptor); 2472 2473 // The FD must open for reading at any rate. For shared mapping with write 2474 // access, additionally the FD must be open for writing. 2475 if ((openMode & O_ACCMODE) == O_WRONLY 2476 || (mapping == REGION_NO_PRIVATE_MAP 2477 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2478 && (openMode & O_ACCMODE) == O_RDONLY)) { 2479 return EACCES; 2480 } 2481 2482 // get the vnode for the object, this also grabs a ref to it 2483 struct vnode* vnode = NULL; 2484 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2485 if (status < B_OK) 2486 return status; 2487 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 2488 2489 // If we're going to pre-map pages, we need to reserve the pages needed by 2490 // the mapping backend upfront. 2491 page_num_t reservedPreMapPages = 0; 2492 if ((protection & B_READ_AREA) != 0) { 2493 AddressSpaceWriteLocker locker; 2494 status = locker.SetTo(team); 2495 if (status != B_OK) 2496 return status; 2497 2498 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2499 reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 2500 2501 locker.Unlock(); 2502 2503 vm_page_reserve_pages(reservedPreMapPages); 2504 } 2505 2506 struct PageUnreserver { 2507 PageUnreserver(page_num_t count) 2508 : fCount(count) 2509 { 2510 } 2511 2512 ~PageUnreserver() 2513 { 2514 if (fCount > 0) 2515 vm_page_unreserve_pages(fCount); 2516 } 2517 2518 page_num_t fCount; 2519 } pageUnreserver(reservedPreMapPages); 2520 2521 AddressSpaceWriteLocker locker(team); 2522 if (!locker.IsLocked()) 2523 return B_BAD_TEAM_ID; 2524 2525 // TODO: this only works for file systems that use the file cache 2526 vm_cache* cache; 2527 status = vfs_get_vnode_cache(vnode, &cache, false); 2528 if (status < B_OK) 2529 return status; 2530 2531 cache->Lock(); 2532 2533 vm_area* area; 2534 status = map_backing_store(locker.AddressSpace(), cache, _address, 2535 offset, size, addressSpec, 0, protection, mapping, &area, name, 2536 unmapAddressRange, kernel); 2537 2538 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2539 // map_backing_store() cannot know we no longer need the ref 2540 cache->ReleaseRefLocked(); 2541 } 2542 2543 if (status == B_OK && (protection & B_READ_AREA) != 0) 2544 pre_map_area_pages(area, cache); 2545 2546 cache->Unlock(); 2547 2548 if (status == B_OK) { 2549 // TODO: this probably deserves a smarter solution, ie. don't always 2550 // prefetch stuff, and also, probably don't trigger it at this place. 2551 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2552 // prefetches at max 10 MB starting from "offset" 2553 } 2554 2555 if (status != B_OK) 2556 return status; 2557 2558 area->cache_type = CACHE_TYPE_VNODE; 2559 return area->id; 2560 } 2561 2562 2563 area_id 2564 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2565 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2566 int fd, off_t offset) 2567 { 2568 if (!arch_vm_supports_protection(protection)) 2569 return B_NOT_SUPPORTED; 2570 2571 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2572 mapping, unmapAddressRange, fd, offset, true); 2573 } 2574 2575 2576 vm_cache* 2577 vm_area_get_locked_cache(vm_area* area) 2578 { 2579 mutex_lock(&sAreaCacheLock); 2580 2581 while (true) { 2582 vm_cache* cache = area->cache; 2583 2584 if (!cache->SwitchLock(&sAreaCacheLock)) { 2585 // cache has been deleted 2586 mutex_lock(&sAreaCacheLock); 2587 continue; 2588 } 2589 2590 mutex_lock(&sAreaCacheLock); 2591 2592 if (cache == area->cache) { 2593 cache->AcquireRefLocked(); 2594 mutex_unlock(&sAreaCacheLock); 2595 return cache; 2596 } 2597 2598 // the cache changed in the meantime 2599 cache->Unlock(); 2600 } 2601 } 2602 2603 2604 void 2605 vm_area_put_locked_cache(vm_cache* cache) 2606 { 2607 cache->ReleaseRefAndUnlock(); 2608 } 2609 2610 2611 area_id 2612 vm_clone_area(team_id team, const char* name, void** address, 2613 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2614 bool kernel) 2615 { 2616 vm_area* newArea = NULL; 2617 vm_area* sourceArea; 2618 2619 // Check whether the source area exists and is cloneable. If so, mark it 2620 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2621 { 2622 AddressSpaceWriteLocker locker; 2623 status_t status = locker.SetFromArea(sourceID, sourceArea); 2624 if (status != B_OK) 2625 return status; 2626 2627 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2628 return B_NOT_ALLOWED; 2629 2630 sourceArea->protection |= B_SHARED_AREA; 2631 protection |= B_SHARED_AREA; 2632 } 2633 2634 // Now lock both address spaces and actually do the cloning. 2635 2636 MultiAddressSpaceLocker locker; 2637 vm_address_space* sourceAddressSpace; 2638 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2639 if (status != B_OK) 2640 return status; 2641 2642 vm_address_space* targetAddressSpace; 2643 status = locker.AddTeam(team, true, &targetAddressSpace); 2644 if (status != B_OK) 2645 return status; 2646 2647 status = locker.Lock(); 2648 if (status != B_OK) 2649 return status; 2650 2651 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2652 if (sourceArea == NULL) 2653 return B_BAD_VALUE; 2654 2655 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2656 return B_NOT_ALLOWED; 2657 2658 vm_cache* cache = vm_area_get_locked_cache(sourceArea); 2659 2660 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2661 // have been adapted. Maybe it should be part of the kernel settings, 2662 // anyway (so that old drivers can always work). 2663 #if 0 2664 if (sourceArea->aspace == vm_kernel_address_space() 2665 && addressSpace != vm_kernel_address_space() 2666 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2667 // kernel areas must not be cloned in userland, unless explicitly 2668 // declared user-cloneable upon construction 2669 status = B_NOT_ALLOWED; 2670 } else 2671 #endif 2672 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2673 status = B_NOT_ALLOWED; 2674 else { 2675 status = map_backing_store(targetAddressSpace, cache, address, 2676 sourceArea->cache_offset, sourceArea->size, addressSpec, 2677 sourceArea->wiring, protection, mapping, &newArea, name, false, 2678 kernel); 2679 } 2680 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2681 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2682 // to create a new cache, and has therefore already acquired a reference 2683 // to the source cache - but otherwise it has no idea that we need 2684 // one. 2685 cache->AcquireRefLocked(); 2686 } 2687 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2688 // we need to map in everything at this point 2689 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2690 // we don't have actual pages to map but a physical area 2691 vm_translation_map* map 2692 = &sourceArea->address_space->translation_map; 2693 map->ops->lock(map); 2694 2695 addr_t physicalAddress; 2696 uint32 oldProtection; 2697 map->ops->query(map, sourceArea->base, &physicalAddress, 2698 &oldProtection); 2699 2700 map->ops->unlock(map); 2701 2702 map = &targetAddressSpace->translation_map; 2703 size_t reservePages = map->ops->map_max_pages_need(map, 2704 newArea->base, newArea->base + (newArea->size - 1)); 2705 2706 vm_page_reserve_pages(reservePages); 2707 map->ops->lock(map); 2708 2709 for (addr_t offset = 0; offset < newArea->size; 2710 offset += B_PAGE_SIZE) { 2711 map->ops->map(map, newArea->base + offset, 2712 physicalAddress + offset, protection); 2713 } 2714 2715 map->ops->unlock(map); 2716 vm_page_unreserve_pages(reservePages); 2717 } else { 2718 vm_translation_map* map = &targetAddressSpace->translation_map; 2719 size_t reservePages = map->ops->map_max_pages_need(map, 2720 newArea->base, newArea->base + (newArea->size - 1)); 2721 vm_page_reserve_pages(reservePages); 2722 2723 // map in all pages from source 2724 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2725 vm_page* page = it.Next();) { 2726 vm_map_page(newArea, page, newArea->base 2727 + ((page->cache_offset << PAGE_SHIFT) 2728 - newArea->cache_offset), protection); 2729 } 2730 2731 vm_page_unreserve_pages(reservePages); 2732 } 2733 } 2734 if (status == B_OK) 2735 newArea->cache_type = sourceArea->cache_type; 2736 2737 vm_area_put_locked_cache(cache); 2738 2739 if (status < B_OK) 2740 return status; 2741 2742 return newArea->id; 2743 } 2744 2745 2746 //! The address space must be write locked at this point 2747 static void 2748 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area) 2749 { 2750 vm_area* temp = addressSpace->areas; 2751 vm_area* last = NULL; 2752 2753 while (temp != NULL) { 2754 if (area == temp) { 2755 if (last != NULL) { 2756 last->address_space_next = temp->address_space_next; 2757 } else { 2758 addressSpace->areas = temp->address_space_next; 2759 } 2760 addressSpace->change_count++; 2761 break; 2762 } 2763 last = temp; 2764 temp = temp->address_space_next; 2765 } 2766 if (area == addressSpace->area_hint) 2767 addressSpace->area_hint = NULL; 2768 2769 if (addressSpace == vm_kernel_address_space()) 2770 sKernelAddressSpaceLeft -= area->size; 2771 2772 if (temp == NULL) 2773 panic("vm_area_release_ref: area not found in aspace's area list\n"); 2774 } 2775 2776 2777 static void 2778 delete_area(vm_address_space* addressSpace, vm_area* area) 2779 { 2780 rw_lock_write_lock(&sAreaHashLock); 2781 hash_remove(sAreaHash, area); 2782 rw_lock_write_unlock(&sAreaHashLock); 2783 2784 // At this point the area is removed from the global hash table, but 2785 // still exists in the area list. 2786 2787 // Unmap the virtual address space the area occupied 2788 vm_unmap_pages(area, area->base, area->size, !area->cache->temporary); 2789 2790 if (!area->cache->temporary) 2791 area->cache->WriteModified(); 2792 2793 arch_vm_unset_memory_type(area); 2794 remove_area_from_address_space(addressSpace, area); 2795 vm_put_address_space(addressSpace); 2796 2797 area->cache->RemoveArea(area); 2798 area->cache->ReleaseRef(); 2799 2800 free(area->page_protections); 2801 free(area->name); 2802 free(area); 2803 } 2804 2805 2806 status_t 2807 vm_delete_area(team_id team, area_id id, bool kernel) 2808 { 2809 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2810 2811 AddressSpaceWriteLocker locker; 2812 vm_area* area; 2813 status_t status = locker.SetFromArea(team, id, area); 2814 if (status != B_OK) 2815 return status; 2816 2817 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2818 return B_NOT_ALLOWED; 2819 2820 delete_area(locker.AddressSpace(), area); 2821 return B_OK; 2822 } 2823 2824 2825 /*! Creates a new cache on top of given cache, moves all areas from 2826 the old cache to the new one, and changes the protection of all affected 2827 areas' pages to read-only. 2828 Preconditions: 2829 - The given cache must be locked. 2830 - All of the cache's areas' address spaces must be read locked. 2831 */ 2832 static status_t 2833 vm_copy_on_write_area(vm_cache* lowerCache) 2834 { 2835 vm_cache* upperCache; 2836 2837 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2838 2839 // We need to separate the cache from its areas. The cache goes one level 2840 // deeper and we create a new cache inbetween. 2841 2842 // create an anonymous cache 2843 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2844 0, true); 2845 if (status != B_OK) 2846 return status; 2847 2848 upperCache->Lock(); 2849 2850 upperCache->temporary = 1; 2851 upperCache->scan_skip = lowerCache->scan_skip; 2852 upperCache->virtual_base = lowerCache->virtual_base; 2853 upperCache->virtual_end = lowerCache->virtual_end; 2854 2855 // transfer the lower cache areas to the upper cache 2856 mutex_lock(&sAreaCacheLock); 2857 2858 upperCache->areas = lowerCache->areas; 2859 lowerCache->areas = NULL; 2860 2861 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2862 tempArea = tempArea->cache_next) { 2863 tempArea->cache = upperCache; 2864 upperCache->AcquireRefLocked(); 2865 lowerCache->ReleaseRefLocked(); 2866 } 2867 2868 mutex_unlock(&sAreaCacheLock); 2869 2870 lowerCache->AddConsumer(upperCache); 2871 2872 // We now need to remap all pages from all of the cache's areas read-only, so 2873 // that a copy will be created on next write access 2874 2875 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2876 tempArea = tempArea->cache_next) { 2877 // The area must be readable in the same way it was previously writable 2878 uint32 protection = B_KERNEL_READ_AREA; 2879 if ((tempArea->protection & B_READ_AREA) != 0) 2880 protection |= B_READ_AREA; 2881 2882 vm_translation_map* map = &tempArea->address_space->translation_map; 2883 map->ops->lock(map); 2884 map->ops->protect(map, tempArea->base, 2885 tempArea->base - 1 + tempArea->size, protection); 2886 map->ops->unlock(map); 2887 } 2888 2889 vm_area_put_locked_cache(upperCache); 2890 2891 return B_OK; 2892 } 2893 2894 2895 area_id 2896 vm_copy_area(team_id team, const char* name, void** _address, 2897 uint32 addressSpec, uint32 protection, area_id sourceID) 2898 { 2899 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2900 2901 if ((protection & B_KERNEL_PROTECTION) == 0) { 2902 // set the same protection for the kernel as for userland 2903 protection |= B_KERNEL_READ_AREA; 2904 if (writableCopy) 2905 protection |= B_KERNEL_WRITE_AREA; 2906 } 2907 2908 // Do the locking: target address space, all address spaces associated with 2909 // the source cache, and the cache itself. 2910 MultiAddressSpaceLocker locker; 2911 vm_address_space* targetAddressSpace; 2912 vm_cache* cache; 2913 vm_area* source; 2914 status_t status = locker.AddTeam(team, true, &targetAddressSpace); 2915 if (status == B_OK) { 2916 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2917 &cache); 2918 } 2919 if (status != B_OK) 2920 return status; 2921 2922 AreaCacheLocker cacheLocker(cache); // already locked 2923 2924 if (addressSpec == B_CLONE_ADDRESS) { 2925 addressSpec = B_EXACT_ADDRESS; 2926 *_address = (void*)source->base; 2927 } 2928 2929 bool sharedArea = (source->protection & B_SHARED_AREA) != 0; 2930 2931 // First, create a cache on top of the source area, respectively use the 2932 // existing one, if this is a shared area. 2933 2934 vm_area* target; 2935 status = map_backing_store(targetAddressSpace, cache, _address, 2936 source->cache_offset, source->size, addressSpec, source->wiring, 2937 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2938 &target, name, false, true); 2939 if (status < B_OK) 2940 return status; 2941 2942 if (sharedArea) { 2943 // The new area uses the old area's cache, but map_backing_store() 2944 // hasn't acquired a ref. So we have to do that now. 2945 cache->AcquireRefLocked(); 2946 } 2947 2948 // If the source area is writable, we need to move it one layer up as well 2949 2950 if (!sharedArea) { 2951 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2952 // TODO: do something more useful if this fails! 2953 if (vm_copy_on_write_area(cache) < B_OK) 2954 panic("vm_copy_on_write_area() failed!\n"); 2955 } 2956 } 2957 2958 // we return the ID of the newly created area 2959 return target->id; 2960 } 2961 2962 2963 //! You need to hold the cache lock when calling this function 2964 static int32 2965 count_writable_areas(vm_cache* cache, vm_area* ignoreArea) 2966 { 2967 struct vm_area* area = cache->areas; 2968 uint32 count = 0; 2969 2970 for (; area != NULL; area = area->cache_next) { 2971 if (area != ignoreArea 2972 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 2973 count++; 2974 } 2975 2976 return count; 2977 } 2978 2979 2980 static status_t 2981 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2982 bool kernel) 2983 { 2984 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2985 "%#lx)\n", team, areaID, newProtection)); 2986 2987 if (!arch_vm_supports_protection(newProtection)) 2988 return B_NOT_SUPPORTED; 2989 2990 // lock address spaces and cache 2991 MultiAddressSpaceLocker locker; 2992 vm_cache* cache; 2993 vm_area* area; 2994 status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area, 2995 &cache); 2996 AreaCacheLocker cacheLocker(cache); // already locked 2997 2998 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2999 return B_NOT_ALLOWED; 3000 3001 if (area->protection == newProtection) 3002 return B_OK; 3003 3004 if (team != vm_kernel_address_space_id() 3005 && area->address_space->id != team) { 3006 // unless you're the kernel, you are only allowed to set 3007 // the protection of your own areas 3008 return B_NOT_ALLOWED; 3009 } 3010 3011 bool changePageProtection = true; 3012 3013 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 3014 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 3015 // writable -> !writable 3016 3017 if (cache->source != NULL && cache->temporary) { 3018 if (count_writable_areas(cache, area) == 0) { 3019 // Since this cache now lives from the pages in its source cache, 3020 // we can change the cache's commitment to take only those pages 3021 // into account that really are in this cache. 3022 3023 status = cache->Commit(cache->page_count * B_PAGE_SIZE); 3024 3025 // TODO: we may be able to join with our source cache, if 3026 // count == 0 3027 } 3028 } 3029 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 3030 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 3031 // !writable -> writable 3032 3033 if (!list_is_empty(&cache->consumers)) { 3034 // There are consumers -- we have to insert a new cache. Fortunately 3035 // vm_copy_on_write_area() does everything that's needed. 3036 changePageProtection = false; 3037 status = vm_copy_on_write_area(cache); 3038 } else { 3039 // No consumers, so we don't need to insert a new one. 3040 if (cache->source != NULL && cache->temporary) { 3041 // the cache's commitment must contain all possible pages 3042 status = cache->Commit(cache->virtual_end 3043 - cache->virtual_base); 3044 } 3045 3046 if (status == B_OK && cache->source != NULL) { 3047 // There's a source cache, hence we can't just change all pages' 3048 // protection or we might allow writing into pages belonging to 3049 // a lower cache. 3050 changePageProtection = false; 3051 3052 struct vm_translation_map* map 3053 = &area->address_space->translation_map; 3054 map->ops->lock(map); 3055 3056 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3057 vm_page* page = it.Next();) { 3058 addr_t address = area->base 3059 + (page->cache_offset << PAGE_SHIFT); 3060 map->ops->protect(map, address, address - 1 + B_PAGE_SIZE, 3061 newProtection); 3062 } 3063 3064 map->ops->unlock(map); 3065 } 3066 } 3067 } else { 3068 // we don't have anything special to do in all other cases 3069 } 3070 3071 if (status == B_OK) { 3072 // remap existing pages in this cache 3073 struct vm_translation_map* map = &area->address_space->translation_map; 3074 3075 if (changePageProtection) { 3076 map->ops->lock(map); 3077 map->ops->protect(map, area->base, area->base - 1 + area->size, 3078 newProtection); 3079 map->ops->unlock(map); 3080 } 3081 3082 area->protection = newProtection; 3083 } 3084 3085 return status; 3086 } 3087 3088 3089 status_t 3090 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr) 3091 { 3092 vm_address_space* addressSpace = vm_get_address_space(team); 3093 if (addressSpace == NULL) 3094 return B_BAD_TEAM_ID; 3095 3096 uint32 dummyFlags; 3097 status_t status = addressSpace->translation_map.ops->query( 3098 &addressSpace->translation_map, vaddr, paddr, &dummyFlags); 3099 3100 vm_put_address_space(addressSpace); 3101 return status; 3102 } 3103 3104 3105 static inline addr_t 3106 virtual_page_address(vm_area* area, vm_page* page) 3107 { 3108 return area->base 3109 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 3110 } 3111 3112 3113 bool 3114 vm_test_map_modification(vm_page* page) 3115 { 3116 MutexLocker locker(sMappingLock); 3117 3118 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3119 vm_page_mapping* mapping; 3120 while ((mapping = iterator.Next()) != NULL) { 3121 vm_area* area = mapping->area; 3122 vm_translation_map* map = &area->address_space->translation_map; 3123 3124 addr_t physicalAddress; 3125 uint32 flags; 3126 map->ops->lock(map); 3127 map->ops->query(map, virtual_page_address(area, page), 3128 &physicalAddress, &flags); 3129 map->ops->unlock(map); 3130 3131 if ((flags & PAGE_MODIFIED) != 0) 3132 return true; 3133 } 3134 3135 return false; 3136 } 3137 3138 3139 int32 3140 vm_test_map_activation(vm_page* page, bool* _modified) 3141 { 3142 int32 activation = 0; 3143 bool modified = false; 3144 3145 MutexLocker locker(sMappingLock); 3146 3147 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3148 vm_page_mapping* mapping; 3149 while ((mapping = iterator.Next()) != NULL) { 3150 vm_area* area = mapping->area; 3151 vm_translation_map* map = &area->address_space->translation_map; 3152 3153 addr_t physicalAddress; 3154 uint32 flags; 3155 map->ops->lock(map); 3156 map->ops->query(map, virtual_page_address(area, page), 3157 &physicalAddress, &flags); 3158 map->ops->unlock(map); 3159 3160 if ((flags & PAGE_ACCESSED) != 0) 3161 activation++; 3162 if ((flags & PAGE_MODIFIED) != 0) 3163 modified = true; 3164 } 3165 3166 if (_modified != NULL) 3167 *_modified = modified; 3168 3169 return activation; 3170 } 3171 3172 3173 void 3174 vm_clear_map_flags(vm_page* page, uint32 flags) 3175 { 3176 MutexLocker locker(sMappingLock); 3177 3178 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3179 vm_page_mapping* mapping; 3180 while ((mapping = iterator.Next()) != NULL) { 3181 vm_area* area = mapping->area; 3182 vm_translation_map* map = &area->address_space->translation_map; 3183 3184 map->ops->lock(map); 3185 map->ops->clear_flags(map, virtual_page_address(area, page), flags); 3186 map->ops->unlock(map); 3187 } 3188 } 3189 3190 3191 /*! Removes all mappings from a page. 3192 After you've called this function, the page is unmapped from memory. 3193 The accumulated page flags of all mappings can be found in \a _flags. 3194 */ 3195 void 3196 vm_remove_all_page_mappings(vm_page* page, uint32* _flags) 3197 { 3198 uint32 accumulatedFlags = 0; 3199 MutexLocker locker(sMappingLock); 3200 3201 vm_page_mappings queue; 3202 queue.MoveFrom(&page->mappings); 3203 3204 vm_page_mappings::Iterator iterator = queue.GetIterator(); 3205 vm_page_mapping* mapping; 3206 while ((mapping = iterator.Next()) != NULL) { 3207 vm_area* area = mapping->area; 3208 vm_translation_map* map = &area->address_space->translation_map; 3209 addr_t physicalAddress; 3210 uint32 flags; 3211 3212 map->ops->lock(map); 3213 addr_t address = virtual_page_address(area, page); 3214 map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1)); 3215 map->ops->flush(map); 3216 map->ops->query(map, address, &physicalAddress, &flags); 3217 map->ops->unlock(map); 3218 3219 area->mappings.Remove(mapping); 3220 3221 accumulatedFlags |= flags; 3222 } 3223 3224 if (page->wired_count == 0 && !queue.IsEmpty()) 3225 atomic_add(&gMappedPagesCount, -1); 3226 3227 locker.Unlock(); 3228 3229 // free now unused mappings 3230 3231 while ((mapping = queue.RemoveHead()) != NULL) { 3232 free(mapping); 3233 } 3234 3235 if (_flags != NULL) 3236 *_flags = accumulatedFlags; 3237 } 3238 3239 3240 bool 3241 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified) 3242 { 3243 vm_translation_map* map = &area->address_space->translation_map; 3244 3245 map->ops->lock(map); 3246 3247 addr_t physicalAddress; 3248 uint32 flags; 3249 status_t status = map->ops->query(map, virtualAddress, &physicalAddress, 3250 &flags); 3251 if (status < B_OK || (flags & PAGE_PRESENT) == 0) { 3252 map->ops->unlock(map); 3253 return false; 3254 } 3255 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3256 if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) { 3257 panic("area %p looking up page failed for pa 0x%lx\n", area, 3258 physicalAddress); 3259 } 3260 3261 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) 3262 decrement_page_wired_count(page); 3263 3264 map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1); 3265 3266 if (preserveModified) { 3267 map->ops->flush(map); 3268 3269 status = map->ops->query(map, virtualAddress, &physicalAddress, &flags); 3270 if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED) 3271 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3272 } 3273 3274 map->ops->unlock(map); 3275 3276 if (area->wiring == B_NO_LOCK) { 3277 vm_page_mapping* mapping; 3278 3279 mutex_lock(&sMappingLock); 3280 map->ops->lock(map); 3281 3282 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3283 while (iterator.HasNext()) { 3284 mapping = iterator.Next(); 3285 3286 if (mapping->area == area) { 3287 area->mappings.Remove(mapping); 3288 page->mappings.Remove(mapping); 3289 3290 if (page->mappings.IsEmpty() && page->wired_count == 0) 3291 atomic_add(&gMappedPagesCount, -1); 3292 3293 map->ops->unlock(map); 3294 mutex_unlock(&sMappingLock); 3295 3296 free(mapping); 3297 3298 return true; 3299 } 3300 } 3301 3302 map->ops->unlock(map); 3303 mutex_unlock(&sMappingLock); 3304 3305 dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n", 3306 area, page); 3307 } 3308 3309 return true; 3310 } 3311 3312 3313 status_t 3314 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified) 3315 { 3316 vm_translation_map* map = &area->address_space->translation_map; 3317 addr_t end = base + (size - 1); 3318 3319 map->ops->lock(map); 3320 3321 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 3322 // iterate through all pages and decrease their wired count 3323 for (addr_t virtualAddress = base; virtualAddress < end; 3324 virtualAddress += B_PAGE_SIZE) { 3325 addr_t physicalAddress; 3326 uint32 flags; 3327 status_t status = map->ops->query(map, virtualAddress, 3328 &physicalAddress, &flags); 3329 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3330 continue; 3331 3332 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3333 if (page == NULL) { 3334 panic("area %p looking up page failed for pa 0x%lx\n", area, 3335 physicalAddress); 3336 } 3337 3338 decrement_page_wired_count(page); 3339 } 3340 } 3341 3342 map->ops->unmap(map, base, end); 3343 if (preserveModified) { 3344 map->ops->flush(map); 3345 3346 for (addr_t virtualAddress = base; virtualAddress < end; 3347 virtualAddress += B_PAGE_SIZE) { 3348 addr_t physicalAddress; 3349 uint32 flags; 3350 status_t status = map->ops->query(map, virtualAddress, 3351 &physicalAddress, &flags); 3352 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3353 continue; 3354 3355 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3356 if (page == NULL) { 3357 panic("area %p looking up page failed for pa 0x%lx\n", area, 3358 physicalAddress); 3359 } 3360 3361 if ((flags & PAGE_MODIFIED) != 0 3362 && page->state != PAGE_STATE_MODIFIED) 3363 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3364 } 3365 } 3366 map->ops->unlock(map); 3367 3368 if (area->wiring == B_NO_LOCK) { 3369 uint32 startOffset = (area->cache_offset + base - area->base) 3370 >> PAGE_SHIFT; 3371 uint32 endOffset = startOffset + (size >> PAGE_SHIFT); 3372 vm_page_mapping* mapping; 3373 vm_area_mappings queue; 3374 3375 mutex_lock(&sMappingLock); 3376 map->ops->lock(map); 3377 3378 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3379 while (iterator.HasNext()) { 3380 mapping = iterator.Next(); 3381 3382 vm_page* page = mapping->page; 3383 if (page->cache_offset < startOffset 3384 || page->cache_offset >= endOffset) 3385 continue; 3386 3387 page->mappings.Remove(mapping); 3388 iterator.Remove(); 3389 3390 if (page->mappings.IsEmpty() && page->wired_count == 0) 3391 atomic_add(&gMappedPagesCount, -1); 3392 3393 queue.Add(mapping); 3394 } 3395 3396 map->ops->unlock(map); 3397 mutex_unlock(&sMappingLock); 3398 3399 while ((mapping = queue.RemoveHead()) != NULL) { 3400 free(mapping); 3401 } 3402 } 3403 3404 return B_OK; 3405 } 3406 3407 3408 /*! When calling this function, you need to have pages reserved! */ 3409 status_t 3410 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection) 3411 { 3412 vm_translation_map* map = &area->address_space->translation_map; 3413 vm_page_mapping* mapping = NULL; 3414 3415 if (area->wiring == B_NO_LOCK) { 3416 mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping)); 3417 if (mapping == NULL) 3418 return B_NO_MEMORY; 3419 3420 mapping->page = page; 3421 mapping->area = area; 3422 } 3423 3424 map->ops->lock(map); 3425 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 3426 protection); 3427 map->ops->unlock(map); 3428 3429 if (area->wiring != B_NO_LOCK) { 3430 increment_page_wired_count(page); 3431 } else { 3432 // insert mapping into lists 3433 MutexLocker locker(sMappingLock); 3434 3435 if (page->mappings.IsEmpty() && page->wired_count == 0) 3436 atomic_add(&gMappedPagesCount, 1); 3437 3438 page->mappings.Add(mapping); 3439 area->mappings.Add(mapping); 3440 } 3441 3442 if (page->usage_count < 0) 3443 page->usage_count = 1; 3444 3445 if (page->state != PAGE_STATE_MODIFIED) 3446 vm_page_set_state(page, PAGE_STATE_ACTIVE); 3447 3448 return B_OK; 3449 } 3450 3451 3452 static int 3453 display_mem(int argc, char** argv) 3454 { 3455 bool physical = false; 3456 addr_t copyAddress; 3457 int32 displayWidth; 3458 int32 itemSize; 3459 int32 num = -1; 3460 addr_t address; 3461 int i = 1, j; 3462 3463 if (argc > 1 && argv[1][0] == '-') { 3464 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3465 physical = true; 3466 i++; 3467 } else 3468 i = 99; 3469 } 3470 3471 if (argc < i + 1 || argc > i + 2) { 3472 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3473 "\tdl - 8 bytes\n" 3474 "\tdw - 4 bytes\n" 3475 "\tds - 2 bytes\n" 3476 "\tdb - 1 byte\n" 3477 "\tstring - a whole string\n" 3478 " -p or --physical only allows memory from a single page to be " 3479 "displayed.\n"); 3480 return 0; 3481 } 3482 3483 address = parse_expression(argv[i]); 3484 3485 if (argc > i + 1) 3486 num = parse_expression(argv[i + 1]); 3487 3488 // build the format string 3489 if (strcmp(argv[0], "db") == 0) { 3490 itemSize = 1; 3491 displayWidth = 16; 3492 } else if (strcmp(argv[0], "ds") == 0) { 3493 itemSize = 2; 3494 displayWidth = 8; 3495 } else if (strcmp(argv[0], "dw") == 0) { 3496 itemSize = 4; 3497 displayWidth = 4; 3498 } else if (strcmp(argv[0], "dl") == 0) { 3499 itemSize = 8; 3500 displayWidth = 2; 3501 } else if (strcmp(argv[0], "string") == 0) { 3502 itemSize = 1; 3503 displayWidth = -1; 3504 } else { 3505 kprintf("display_mem called in an invalid way!\n"); 3506 return 0; 3507 } 3508 3509 if (num <= 0) 3510 num = displayWidth; 3511 3512 void* physicalPageHandle = NULL; 3513 3514 if (physical) { 3515 int32 offset = address & (B_PAGE_SIZE - 1); 3516 if (num * itemSize + offset > B_PAGE_SIZE) { 3517 num = (B_PAGE_SIZE - offset) / itemSize; 3518 kprintf("NOTE: number of bytes has been cut to page size\n"); 3519 } 3520 3521 address = ROUNDDOWN(address, B_PAGE_SIZE); 3522 3523 if (vm_get_physical_page_debug(address, ©Address, 3524 &physicalPageHandle) != B_OK) { 3525 kprintf("getting the hardware page failed."); 3526 return 0; 3527 } 3528 3529 address += offset; 3530 copyAddress += offset; 3531 } else 3532 copyAddress = address; 3533 3534 if (!strcmp(argv[0], "string")) { 3535 kprintf("%p \"", (char*)copyAddress); 3536 3537 // string mode 3538 for (i = 0; true; i++) { 3539 char c; 3540 if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 3541 || c == '\0') 3542 break; 3543 3544 if (c == '\n') 3545 kprintf("\\n"); 3546 else if (c == '\t') 3547 kprintf("\\t"); 3548 else { 3549 if (!isprint(c)) 3550 c = '.'; 3551 3552 kprintf("%c", c); 3553 } 3554 } 3555 3556 kprintf("\"\n"); 3557 } else { 3558 // number mode 3559 for (i = 0; i < num; i++) { 3560 uint32 value; 3561 3562 if ((i % displayWidth) == 0) { 3563 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3564 if (i != 0) 3565 kprintf("\n"); 3566 3567 kprintf("[0x%lx] ", address + i * itemSize); 3568 3569 for (j = 0; j < displayed; j++) { 3570 char c; 3571 if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j, 3572 1) != B_OK) { 3573 displayed = j; 3574 break; 3575 } 3576 if (!isprint(c)) 3577 c = '.'; 3578 3579 kprintf("%c", c); 3580 } 3581 if (num > displayWidth) { 3582 // make sure the spacing in the last line is correct 3583 for (j = displayed; j < displayWidth * itemSize; j++) 3584 kprintf(" "); 3585 } 3586 kprintf(" "); 3587 } 3588 3589 if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize, 3590 itemSize) != B_OK) { 3591 kprintf("read fault"); 3592 break; 3593 } 3594 3595 switch (itemSize) { 3596 case 1: 3597 kprintf(" %02x", *(uint8*)&value); 3598 break; 3599 case 2: 3600 kprintf(" %04x", *(uint16*)&value); 3601 break; 3602 case 4: 3603 kprintf(" %08lx", *(uint32*)&value); 3604 break; 3605 case 8: 3606 kprintf(" %016Lx", *(uint64*)&value); 3607 break; 3608 } 3609 } 3610 3611 kprintf("\n"); 3612 } 3613 3614 if (physical) { 3615 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3616 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3617 } 3618 return 0; 3619 } 3620 3621 3622 static void 3623 dump_cache_tree_recursively(vm_cache* cache, int level, 3624 vm_cache* highlightCache) 3625 { 3626 // print this cache 3627 for (int i = 0; i < level; i++) 3628 kprintf(" "); 3629 if (cache == highlightCache) 3630 kprintf("%p <--\n", cache); 3631 else 3632 kprintf("%p\n", cache); 3633 3634 // recursively print its consumers 3635 vm_cache* consumer = NULL; 3636 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3637 consumer)) != NULL) { 3638 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3639 } 3640 } 3641 3642 3643 static int 3644 dump_cache_tree(int argc, char** argv) 3645 { 3646 if (argc != 2 || !strcmp(argv[1], "--help")) { 3647 kprintf("usage: %s <address>\n", argv[0]); 3648 return 0; 3649 } 3650 3651 addr_t address = parse_expression(argv[1]); 3652 if (address == 0) 3653 return 0; 3654 3655 vm_cache* cache = (vm_cache*)address; 3656 vm_cache* root = cache; 3657 3658 // find the root cache (the transitive source) 3659 while (root->source != NULL) 3660 root = root->source; 3661 3662 dump_cache_tree_recursively(root, 0, cache); 3663 3664 return 0; 3665 } 3666 3667 3668 static const char* 3669 cache_type_to_string(int32 type) 3670 { 3671 switch (type) { 3672 case CACHE_TYPE_RAM: 3673 return "RAM"; 3674 case CACHE_TYPE_DEVICE: 3675 return "device"; 3676 case CACHE_TYPE_VNODE: 3677 return "vnode"; 3678 case CACHE_TYPE_NULL: 3679 return "null"; 3680 3681 default: 3682 return "unknown"; 3683 } 3684 } 3685 3686 3687 #if DEBUG_CACHE_LIST 3688 3689 static void 3690 update_cache_info_recursively(vm_cache* cache, cache_info& info) 3691 { 3692 info.page_count += cache->page_count; 3693 if (cache->type == CACHE_TYPE_RAM) 3694 info.committed += cache->committed_size; 3695 3696 // recurse 3697 vm_cache* consumer = NULL; 3698 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3699 consumer)) != NULL) { 3700 update_cache_info_recursively(consumer, info); 3701 } 3702 } 3703 3704 3705 static int 3706 cache_info_compare_page_count(const void* _a, const void* _b) 3707 { 3708 const cache_info* a = (const cache_info*)_a; 3709 const cache_info* b = (const cache_info*)_b; 3710 if (a->page_count == b->page_count) 3711 return 0; 3712 return a->page_count < b->page_count ? 1 : -1; 3713 } 3714 3715 3716 static int 3717 cache_info_compare_committed(const void* _a, const void* _b) 3718 { 3719 const cache_info* a = (const cache_info*)_a; 3720 const cache_info* b = (const cache_info*)_b; 3721 if (a->committed == b->committed) 3722 return 0; 3723 return a->committed < b->committed ? 1 : -1; 3724 } 3725 3726 3727 static void 3728 dump_caches_recursively(vm_cache* cache, cache_info& info, int level) 3729 { 3730 for (int i = 0; i < level; i++) 3731 kprintf(" "); 3732 3733 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3734 cache_type_to_string(cache->type), cache->virtual_base, 3735 cache->virtual_end, cache->page_count); 3736 3737 if (level == 0) 3738 kprintf("/%lu", info.page_count); 3739 3740 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3741 kprintf(", committed: %lld", cache->committed_size); 3742 3743 if (level == 0) 3744 kprintf("/%lu", info.committed); 3745 } 3746 3747 // areas 3748 if (cache->areas != NULL) { 3749 vm_area* area = cache->areas; 3750 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3751 area->address_space->id); 3752 3753 while (area->cache_next != NULL) { 3754 area = area->cache_next; 3755 kprintf(", %ld", area->id); 3756 } 3757 } 3758 3759 kputs("\n"); 3760 3761 // recurse 3762 vm_cache* consumer = NULL; 3763 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3764 consumer)) != NULL) { 3765 dump_caches_recursively(consumer, info, level + 1); 3766 } 3767 } 3768 3769 3770 static int 3771 dump_caches(int argc, char** argv) 3772 { 3773 if (sCacheInfoTable == NULL) { 3774 kprintf("No cache info table!\n"); 3775 return 0; 3776 } 3777 3778 bool sortByPageCount = true; 3779 3780 for (int32 i = 1; i < argc; i++) { 3781 if (strcmp(argv[i], "-c") == 0) { 3782 sortByPageCount = false; 3783 } else { 3784 print_debugger_command_usage(argv[0]); 3785 return 0; 3786 } 3787 } 3788 3789 uint32 totalCount = 0; 3790 uint32 rootCount = 0; 3791 off_t totalCommitted = 0; 3792 page_num_t totalPages = 0; 3793 3794 vm_cache* cache = gDebugCacheList; 3795 while (cache) { 3796 totalCount++; 3797 if (cache->source == NULL) { 3798 cache_info stackInfo; 3799 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3800 ? sCacheInfoTable[rootCount] : stackInfo; 3801 rootCount++; 3802 info.cache = cache; 3803 info.page_count = 0; 3804 info.committed = 0; 3805 update_cache_info_recursively(cache, info); 3806 totalCommitted += info.committed; 3807 totalPages += info.page_count; 3808 } 3809 3810 cache = cache->debug_next; 3811 } 3812 3813 if (rootCount <= (uint32)kCacheInfoTableCount) { 3814 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3815 sortByPageCount 3816 ? &cache_info_compare_page_count 3817 : &cache_info_compare_committed); 3818 } 3819 3820 kprintf("total committed memory: %lld, total used pages: %lu\n", 3821 totalCommitted, totalPages); 3822 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3823 "tree...\n\n", totalCount, rootCount, 3824 sortByPageCount ? "page count" : "committed size"); 3825 3826 if (rootCount <= (uint32)kCacheInfoTableCount) { 3827 for (uint32 i = 0; i < rootCount; i++) { 3828 cache_info& info = sCacheInfoTable[i]; 3829 dump_caches_recursively(info.cache, info, 0); 3830 } 3831 } else 3832 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3833 3834 return 0; 3835 } 3836 3837 #endif // DEBUG_CACHE_LIST 3838 3839 3840 static int 3841 dump_cache(int argc, char** argv) 3842 { 3843 vm_cache* cache; 3844 bool showPages = false; 3845 int i = 1; 3846 3847 if (argc < 2 || !strcmp(argv[1], "--help")) { 3848 kprintf("usage: %s [-ps] <address>\n" 3849 " if -p is specified, all pages are shown, if -s is used\n" 3850 " only the cache info is shown respectively.\n", argv[0]); 3851 return 0; 3852 } 3853 while (argv[i][0] == '-') { 3854 char* arg = argv[i] + 1; 3855 while (arg[0]) { 3856 if (arg[0] == 'p') 3857 showPages = true; 3858 arg++; 3859 } 3860 i++; 3861 } 3862 if (argv[i] == NULL) { 3863 kprintf("%s: invalid argument, pass address\n", argv[0]); 3864 return 0; 3865 } 3866 3867 addr_t address = parse_expression(argv[i]); 3868 if (address == 0) 3869 return 0; 3870 3871 cache = (vm_cache*)address; 3872 3873 kprintf("CACHE %p:\n", cache); 3874 kprintf(" ref_count: %ld\n", cache->RefCount()); 3875 kprintf(" source: %p\n", cache->source); 3876 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 3877 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 3878 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 3879 kprintf(" temporary: %ld\n", cache->temporary); 3880 kprintf(" scan_skip: %ld\n", cache->scan_skip); 3881 kprintf(" lock: %p\n", cache->GetLock()); 3882 #if KDEBUG 3883 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 3884 #endif 3885 kprintf(" areas:\n"); 3886 3887 for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) { 3888 kprintf(" area 0x%lx, %s\n", area->id, area->name); 3889 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 3890 kprintf("\tprotection: 0x%lx\n", area->protection); 3891 kprintf("\towner: 0x%lx\n", area->address_space->id); 3892 } 3893 3894 kprintf(" consumers:\n"); 3895 vm_cache* consumer = NULL; 3896 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3897 consumer)) != NULL) { 3898 kprintf("\t%p\n", consumer); 3899 } 3900 3901 kprintf(" pages:\n"); 3902 if (showPages) { 3903 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3904 vm_page* page = it.Next();) { 3905 if (page->type == PAGE_TYPE_PHYSICAL) { 3906 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) " 3907 "wired_count %u\n", page, page->physical_page_number, 3908 page->cache_offset, page->type, page->state, 3909 page_state_to_string(page->state), page->wired_count); 3910 } else if(page->type == PAGE_TYPE_DUMMY) { 3911 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 3912 page, page->state, page_state_to_string(page->state)); 3913 } else 3914 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 3915 } 3916 } else 3917 kprintf("\t%ld in cache\n", cache->page_count); 3918 3919 return 0; 3920 } 3921 3922 3923 static void 3924 dump_area_struct(vm_area* area, bool mappings) 3925 { 3926 kprintf("AREA: %p\n", area); 3927 kprintf("name:\t\t'%s'\n", area->name); 3928 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 3929 kprintf("id:\t\t0x%lx\n", area->id); 3930 kprintf("base:\t\t0x%lx\n", area->base); 3931 kprintf("size:\t\t0x%lx\n", area->size); 3932 kprintf("protection:\t0x%lx\n", area->protection); 3933 kprintf("wiring:\t\t0x%x\n", area->wiring); 3934 kprintf("memory_type:\t0x%x\n", area->memory_type); 3935 kprintf("cache:\t\t%p\n", area->cache); 3936 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 3937 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3938 kprintf("cache_next:\t%p\n", area->cache_next); 3939 kprintf("cache_prev:\t%p\n", area->cache_prev); 3940 3941 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3942 if (mappings) { 3943 kprintf("page mappings:\n"); 3944 while (iterator.HasNext()) { 3945 vm_page_mapping* mapping = iterator.Next(); 3946 kprintf(" %p", mapping->page); 3947 } 3948 kprintf("\n"); 3949 } else { 3950 uint32 count = 0; 3951 while (iterator.Next() != NULL) { 3952 count++; 3953 } 3954 kprintf("page mappings:\t%lu\n", count); 3955 } 3956 } 3957 3958 3959 static int 3960 dump_area(int argc, char** argv) 3961 { 3962 bool mappings = false; 3963 bool found = false; 3964 int32 index = 1; 3965 vm_area* area; 3966 addr_t num; 3967 3968 if (argc < 2 || !strcmp(argv[1], "--help")) { 3969 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3970 "All areas matching either id/address/name are listed. You can\n" 3971 "force to check only a specific item by prefixing the specifier\n" 3972 "with the id/contains/address/name keywords.\n" 3973 "-m shows the area's mappings as well.\n"); 3974 return 0; 3975 } 3976 3977 if (!strcmp(argv[1], "-m")) { 3978 mappings = true; 3979 index++; 3980 } 3981 3982 int32 mode = 0xf; 3983 if (!strcmp(argv[index], "id")) 3984 mode = 1; 3985 else if (!strcmp(argv[index], "contains")) 3986 mode = 2; 3987 else if (!strcmp(argv[index], "name")) 3988 mode = 4; 3989 else if (!strcmp(argv[index], "address")) 3990 mode = 0; 3991 if (mode != 0xf) 3992 index++; 3993 3994 if (index >= argc) { 3995 kprintf("No area specifier given.\n"); 3996 return 0; 3997 } 3998 3999 num = parse_expression(argv[index]); 4000 4001 if (mode == 0) { 4002 dump_area_struct((struct vm_area*)num, mappings); 4003 } else { 4004 // walk through the area list, looking for the arguments as a name 4005 struct hash_iterator iter; 4006 4007 hash_open(sAreaHash, &iter); 4008 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 4009 if (((mode & 4) != 0 && area->name != NULL 4010 && !strcmp(argv[index], area->name)) 4011 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 4012 || (((mode & 2) != 0 && area->base <= num 4013 && area->base + area->size > num))))) { 4014 dump_area_struct(area, mappings); 4015 found = true; 4016 } 4017 } 4018 4019 if (!found) 4020 kprintf("could not find area %s (%ld)\n", argv[index], num); 4021 } 4022 4023 return 0; 4024 } 4025 4026 4027 static int 4028 dump_area_list(int argc, char** argv) 4029 { 4030 vm_area* area; 4031 struct hash_iterator iter; 4032 const char* name = NULL; 4033 int32 id = 0; 4034 4035 if (argc > 1) { 4036 id = parse_expression(argv[1]); 4037 if (id == 0) 4038 name = argv[1]; 4039 } 4040 4041 kprintf("addr id base\t\tsize protect lock name\n"); 4042 4043 hash_open(sAreaHash, &iter); 4044 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 4045 if ((id != 0 && area->address_space->id != id) 4046 || (name != NULL && strstr(area->name, name) == NULL)) 4047 continue; 4048 4049 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 4050 (void*)area->base, (void*)area->size, area->protection, area->wiring, 4051 area->name); 4052 } 4053 hash_close(sAreaHash, &iter, false); 4054 return 0; 4055 } 4056 4057 4058 static int 4059 dump_available_memory(int argc, char** argv) 4060 { 4061 kprintf("Available memory: %Ld/%lu bytes\n", 4062 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 4063 return 0; 4064 } 4065 4066 4067 status_t 4068 vm_delete_areas(struct vm_address_space* addressSpace) 4069 { 4070 vm_area* area; 4071 vm_area* next; 4072 vm_area* last = NULL; 4073 4074 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 4075 addressSpace->id)); 4076 4077 rw_lock_write_lock(&addressSpace->lock); 4078 4079 // remove all reserved areas in this address space 4080 4081 for (area = addressSpace->areas; area; area = next) { 4082 next = area->address_space_next; 4083 4084 if (area->id == RESERVED_AREA_ID) { 4085 // just remove it 4086 if (last) 4087 last->address_space_next = area->address_space_next; 4088 else 4089 addressSpace->areas = area->address_space_next; 4090 4091 vm_put_address_space(addressSpace); 4092 free(area); 4093 continue; 4094 } 4095 4096 last = area; 4097 } 4098 4099 // delete all the areas in this address space 4100 4101 for (area = addressSpace->areas; area; area = next) { 4102 next = area->address_space_next; 4103 delete_area(addressSpace, area); 4104 } 4105 4106 rw_lock_write_unlock(&addressSpace->lock); 4107 return B_OK; 4108 } 4109 4110 4111 static area_id 4112 vm_area_for(addr_t address, bool kernel) 4113 { 4114 team_id team; 4115 if (IS_USER_ADDRESS(address)) { 4116 // we try the user team address space, if any 4117 team = vm_current_user_address_space_id(); 4118 if (team < 0) 4119 return team; 4120 } else 4121 team = vm_kernel_address_space_id(); 4122 4123 AddressSpaceReadLocker locker(team); 4124 if (!locker.IsLocked()) 4125 return B_BAD_TEAM_ID; 4126 4127 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 4128 if (area != NULL) { 4129 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 4130 return B_ERROR; 4131 4132 return area->id; 4133 } 4134 4135 return B_ERROR; 4136 } 4137 4138 4139 /*! Frees physical pages that were used during the boot process. 4140 */ 4141 static void 4142 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end) 4143 { 4144 // free all physical pages in the specified range 4145 4146 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 4147 addr_t physicalAddress; 4148 uint32 flags; 4149 4150 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 4151 vm_page* page = vm_lookup_page(current / B_PAGE_SIZE); 4152 if (page != NULL) 4153 vm_page_set_state(page, PAGE_STATE_FREE); 4154 } 4155 } 4156 4157 // unmap the memory 4158 map->ops->unmap(map, start, end - 1); 4159 } 4160 4161 4162 void 4163 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 4164 { 4165 vm_translation_map* map = &vm_kernel_address_space()->translation_map; 4166 addr_t end = start + size; 4167 addr_t lastEnd = start; 4168 vm_area* area; 4169 4170 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 4171 (void*)start, (void*)end)); 4172 4173 // The areas are sorted in virtual address space order, so 4174 // we just have to find the holes between them that fall 4175 // into the area we should dispose 4176 4177 map->ops->lock(map); 4178 4179 for (area = vm_kernel_address_space()->areas; area != NULL; 4180 area = area->address_space_next) { 4181 addr_t areaStart = area->base; 4182 addr_t areaEnd = areaStart + area->size; 4183 4184 if (area->id == RESERVED_AREA_ID) 4185 continue; 4186 4187 if (areaEnd >= end) { 4188 // we are done, the areas are already beyond of what we have to free 4189 lastEnd = end; 4190 break; 4191 } 4192 4193 if (areaStart > lastEnd) { 4194 // this is something we can free 4195 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 4196 (void*)areaStart)); 4197 unmap_and_free_physical_pages(map, lastEnd, areaStart); 4198 } 4199 4200 lastEnd = areaEnd; 4201 } 4202 4203 if (lastEnd < end) { 4204 // we can also get rid of some space at the end of the area 4205 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4206 (void*)end)); 4207 unmap_and_free_physical_pages(map, lastEnd, end); 4208 } 4209 4210 map->ops->unlock(map); 4211 } 4212 4213 4214 static void 4215 create_preloaded_image_areas(struct preloaded_image* image) 4216 { 4217 char name[B_OS_NAME_LENGTH]; 4218 void* address; 4219 int32 length; 4220 4221 // use file name to create a good area name 4222 char* fileName = strrchr(image->name, '/'); 4223 if (fileName == NULL) 4224 fileName = image->name; 4225 else 4226 fileName++; 4227 4228 length = strlen(fileName); 4229 // make sure there is enough space for the suffix 4230 if (length > 25) 4231 length = 25; 4232 4233 memcpy(name, fileName, length); 4234 strcpy(name + length, "_text"); 4235 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 4236 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4237 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4238 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4239 // this will later be remapped read-only/executable by the 4240 // ELF initialization code 4241 4242 strcpy(name + length, "_data"); 4243 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 4244 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4245 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4246 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4247 } 4248 4249 4250 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4251 Any boot loader resources contained in that arguments must not be accessed 4252 anymore past this point. 4253 */ 4254 void 4255 vm_free_kernel_args(kernel_args* args) 4256 { 4257 uint32 i; 4258 4259 TRACE(("vm_free_kernel_args()\n")); 4260 4261 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4262 area_id area = area_for((void*)args->kernel_args_range[i].start); 4263 if (area >= B_OK) 4264 delete_area(area); 4265 } 4266 } 4267 4268 4269 static void 4270 allocate_kernel_args(kernel_args* args) 4271 { 4272 TRACE(("allocate_kernel_args()\n")); 4273 4274 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4275 void* address = (void*)args->kernel_args_range[i].start; 4276 4277 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4278 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4279 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4280 } 4281 } 4282 4283 4284 static void 4285 unreserve_boot_loader_ranges(kernel_args* args) 4286 { 4287 TRACE(("unreserve_boot_loader_ranges()\n")); 4288 4289 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4290 vm_unreserve_address_range(vm_kernel_address_space_id(), 4291 (void*)args->virtual_allocated_range[i].start, 4292 args->virtual_allocated_range[i].size); 4293 } 4294 } 4295 4296 4297 static void 4298 reserve_boot_loader_ranges(kernel_args* args) 4299 { 4300 TRACE(("reserve_boot_loader_ranges()\n")); 4301 4302 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4303 void* address = (void*)args->virtual_allocated_range[i].start; 4304 4305 // If the address is no kernel address, we just skip it. The 4306 // architecture specific code has to deal with it. 4307 if (!IS_KERNEL_ADDRESS(address)) { 4308 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 4309 address, args->virtual_allocated_range[i].size); 4310 continue; 4311 } 4312 4313 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), 4314 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4315 if (status < B_OK) 4316 panic("could not reserve boot loader ranges\n"); 4317 } 4318 } 4319 4320 4321 static addr_t 4322 allocate_early_virtual(kernel_args* args, size_t size) 4323 { 4324 addr_t spot = 0; 4325 uint32 i; 4326 int last_valloc_entry = 0; 4327 4328 size = PAGE_ALIGN(size); 4329 // find a slot in the virtual allocation addr range 4330 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 4331 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4332 + args->virtual_allocated_range[i - 1].size; 4333 last_valloc_entry = i; 4334 // check to see if the space between this one and the last is big enough 4335 if (previousRangeEnd >= KERNEL_BASE 4336 && args->virtual_allocated_range[i].start 4337 - previousRangeEnd >= size) { 4338 spot = previousRangeEnd; 4339 args->virtual_allocated_range[i - 1].size += size; 4340 goto out; 4341 } 4342 } 4343 if (spot == 0) { 4344 // we hadn't found one between allocation ranges. this is ok. 4345 // see if there's a gap after the last one 4346 addr_t lastRangeEnd 4347 = args->virtual_allocated_range[last_valloc_entry].start 4348 + args->virtual_allocated_range[last_valloc_entry].size; 4349 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 4350 spot = lastRangeEnd; 4351 args->virtual_allocated_range[last_valloc_entry].size += size; 4352 goto out; 4353 } 4354 // see if there's a gap before the first one 4355 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 4356 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 4357 args->virtual_allocated_range[0].start -= size; 4358 spot = args->virtual_allocated_range[0].start; 4359 goto out; 4360 } 4361 } 4362 } 4363 4364 out: 4365 return spot; 4366 } 4367 4368 4369 static bool 4370 is_page_in_physical_memory_range(kernel_args* args, addr_t address) 4371 { 4372 // TODO: horrible brute-force method of determining if the page can be 4373 // allocated 4374 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4375 if (address >= args->physical_memory_range[i].start 4376 && address < args->physical_memory_range[i].start 4377 + args->physical_memory_range[i].size) 4378 return true; 4379 } 4380 return false; 4381 } 4382 4383 4384 static addr_t 4385 allocate_early_physical_page(kernel_args* args) 4386 { 4387 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4388 addr_t nextPage; 4389 4390 nextPage = args->physical_allocated_range[i].start 4391 + args->physical_allocated_range[i].size; 4392 // see if the page after the next allocated paddr run can be allocated 4393 if (i + 1 < args->num_physical_allocated_ranges 4394 && args->physical_allocated_range[i + 1].size != 0) { 4395 // see if the next page will collide with the next allocated range 4396 if (nextPage >= args->physical_allocated_range[i+1].start) 4397 continue; 4398 } 4399 // see if the next physical page fits in the memory block 4400 if (is_page_in_physical_memory_range(args, nextPage)) { 4401 // we got one! 4402 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4403 return nextPage / B_PAGE_SIZE; 4404 } 4405 } 4406 4407 return 0; 4408 // could not allocate a block 4409 } 4410 4411 4412 /*! This one uses the kernel_args' physical and virtual memory ranges to 4413 allocate some pages before the VM is completely up. 4414 */ 4415 addr_t 4416 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4417 uint32 attributes) 4418 { 4419 if (physicalSize > virtualSize) 4420 physicalSize = virtualSize; 4421 4422 // find the vaddr to allocate at 4423 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 4424 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 4425 4426 // map the pages 4427 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4428 addr_t physicalAddress = allocate_early_physical_page(args); 4429 if (physicalAddress == 0) 4430 panic("error allocating early page!\n"); 4431 4432 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4433 4434 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4435 physicalAddress * B_PAGE_SIZE, attributes, 4436 &allocate_early_physical_page); 4437 } 4438 4439 return virtualBase; 4440 } 4441 4442 4443 /*! The main entrance point to initialize the VM. */ 4444 status_t 4445 vm_init(kernel_args* args) 4446 { 4447 struct preloaded_image* image; 4448 void* address; 4449 status_t err = 0; 4450 uint32 i; 4451 4452 TRACE(("vm_init: entry\n")); 4453 err = arch_vm_translation_map_init(args); 4454 err = arch_vm_init(args); 4455 4456 // initialize some globals 4457 sNextAreaID = 1; 4458 4459 vm_page_init_num_pages(args); 4460 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4461 4462 size_t heapSize = INITIAL_HEAP_SIZE; 4463 // try to accomodate low memory systems 4464 while (heapSize > sAvailableMemory / 8) 4465 heapSize /= 2; 4466 if (heapSize < 1024 * 1024) 4467 panic("vm_init: go buy some RAM please."); 4468 4469 // map in the new heap and initialize it 4470 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4471 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4472 TRACE(("heap at 0x%lx\n", heapBase)); 4473 heap_init(heapBase, heapSize); 4474 4475 size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE; 4476 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 4477 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4478 slab_init(args, slabInitialBase, slabInitialSize); 4479 4480 // initialize the free page list and physical page mapper 4481 vm_page_init(args); 4482 4483 // initialize the hash table that stores the pages mapped to caches 4484 vm_cache_init(args); 4485 4486 { 4487 vm_area* area; 4488 sAreaHash = hash_init(AREA_HASH_TABLE_SIZE, 4489 (addr_t)&area->hash_next - (addr_t)area, 4490 &area_compare, &area_hash); 4491 if (sAreaHash == NULL) 4492 panic("vm_init: error creating aspace hash table\n"); 4493 } 4494 4495 vm_address_space_init(); 4496 reserve_boot_loader_ranges(args); 4497 4498 // Do any further initialization that the architecture dependant layers may 4499 // need now 4500 arch_vm_translation_map_init_post_area(args); 4501 arch_vm_init_post_area(args); 4502 vm_page_init_post_area(args); 4503 4504 // allocate areas to represent stuff that already exists 4505 4506 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4507 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4508 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4509 4510 address = (void*)ROUNDDOWN(slabInitialBase, B_PAGE_SIZE); 4511 create_area("initial slab space", &address, B_EXACT_ADDRESS, 4512 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 4513 | B_KERNEL_WRITE_AREA); 4514 4515 allocate_kernel_args(args); 4516 4517 create_preloaded_image_areas(&args->kernel_image); 4518 4519 // allocate areas for preloaded images 4520 for (image = args->preloaded_images; image != NULL; image = image->next) { 4521 create_preloaded_image_areas(image); 4522 } 4523 4524 // allocate kernel stacks 4525 for (i = 0; i < args->num_cpus; i++) { 4526 char name[64]; 4527 4528 sprintf(name, "idle thread %lu kstack", i + 1); 4529 address = (void*)args->cpu_kstack[i].start; 4530 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4531 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4532 } 4533 4534 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4535 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4536 4537 #if DEBUG_CACHE_LIST 4538 create_area("cache info table", (void**)&sCacheInfoTable, 4539 B_ANY_KERNEL_ADDRESS, 4540 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4541 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4542 #endif // DEBUG_CACHE_LIST 4543 4544 // add some debugger commands 4545 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4546 add_debugger_command("area", &dump_area, 4547 "Dump info about a particular area"); 4548 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 4549 add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree"); 4550 #if DEBUG_CACHE_LIST 4551 add_debugger_command_etc("caches", &dump_caches, 4552 "List all vm_cache trees", 4553 "[ \"-c\" ]\n" 4554 "All cache trees are listed sorted in decreasing order by number of\n" 4555 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 4556 0); 4557 #endif 4558 add_debugger_command("avail", &dump_available_memory, 4559 "Dump available memory"); 4560 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4561 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4562 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4563 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4564 add_debugger_command("string", &display_mem, "dump strings"); 4565 4566 TRACE(("vm_init: exit\n")); 4567 4568 return err; 4569 } 4570 4571 4572 status_t 4573 vm_init_post_sem(kernel_args* args) 4574 { 4575 // This frees all unused boot loader resources and makes its space available 4576 // again 4577 arch_vm_init_end(args); 4578 unreserve_boot_loader_ranges(args); 4579 4580 // fill in all of the semaphores that were not allocated before 4581 // since we're still single threaded and only the kernel address space 4582 // exists, it isn't that hard to find all of the ones we need to create 4583 4584 arch_vm_translation_map_init_post_sem(args); 4585 vm_address_space_init_post_sem(); 4586 4587 slab_init_post_sem(); 4588 return heap_init_post_sem(); 4589 } 4590 4591 4592 status_t 4593 vm_init_post_thread(kernel_args* args) 4594 { 4595 vm_page_init_post_thread(args); 4596 vm_daemon_init(); 4597 slab_init_post_thread(); 4598 return heap_init_post_thread(); 4599 } 4600 4601 4602 status_t 4603 vm_init_post_modules(kernel_args* args) 4604 { 4605 return arch_vm_init_post_modules(args); 4606 } 4607 4608 4609 void 4610 permit_page_faults(void) 4611 { 4612 struct thread* thread = thread_get_current_thread(); 4613 if (thread != NULL) 4614 atomic_add(&thread->page_faults_allowed, 1); 4615 } 4616 4617 4618 void 4619 forbid_page_faults(void) 4620 { 4621 struct thread* thread = thread_get_current_thread(); 4622 if (thread != NULL) 4623 atomic_add(&thread->page_faults_allowed, -1); 4624 } 4625 4626 4627 status_t 4628 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 4629 addr_t* newIP) 4630 { 4631 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4632 faultAddress)); 4633 4634 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4635 4636 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4637 vm_address_space* addressSpace = NULL; 4638 4639 status_t status = B_OK; 4640 *newIP = 0; 4641 atomic_add((int32*)&sPageFaults, 1); 4642 4643 if (IS_KERNEL_ADDRESS(pageAddress)) { 4644 addressSpace = vm_get_kernel_address_space(); 4645 } else if (IS_USER_ADDRESS(pageAddress)) { 4646 addressSpace = vm_get_current_user_address_space(); 4647 if (addressSpace == NULL) { 4648 if (!isUser) { 4649 dprintf("vm_page_fault: kernel thread accessing invalid user " 4650 "memory!\n"); 4651 status = B_BAD_ADDRESS; 4652 TPF(PageFaultError(-1, 4653 VMPageFaultTracing 4654 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4655 } else { 4656 // XXX weird state. 4657 panic("vm_page_fault: non kernel thread accessing user memory " 4658 "that doesn't exist!\n"); 4659 status = B_BAD_ADDRESS; 4660 } 4661 } 4662 } else { 4663 // the hit was probably in the 64k DMZ between kernel and user space 4664 // this keeps a user space thread from passing a buffer that crosses 4665 // into kernel space 4666 status = B_BAD_ADDRESS; 4667 TPF(PageFaultError(-1, 4668 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4669 } 4670 4671 if (status == B_OK) 4672 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser); 4673 4674 if (status < B_OK) { 4675 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4676 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4677 strerror(status), address, faultAddress, isWrite, isUser, 4678 thread_get_current_thread_id()); 4679 if (!isUser) { 4680 struct thread* thread = thread_get_current_thread(); 4681 if (thread != NULL && thread->fault_handler != 0) { 4682 // this will cause the arch dependant page fault handler to 4683 // modify the IP on the interrupt frame or whatever to return 4684 // to this address 4685 *newIP = thread->fault_handler; 4686 } else { 4687 // unhandled page fault in the kernel 4688 panic("vm_page_fault: unhandled page fault in kernel space at " 4689 "0x%lx, ip 0x%lx\n", address, faultAddress); 4690 } 4691 } else { 4692 #if 1 4693 rw_lock_read_lock(&addressSpace->lock); 4694 4695 // TODO: remove me once we have proper userland debugging support 4696 // (and tools) 4697 vm_area* area = vm_area_lookup(addressSpace, faultAddress); 4698 4699 struct thread* thread = thread_get_current_thread(); 4700 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4701 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4702 thread->name, thread->id, thread->team->name, thread->team->id, 4703 isWrite ? "write" : "read", address, faultAddress, 4704 area ? area->name : "???", 4705 faultAddress - (area ? area->base : 0x0)); 4706 4707 // We can print a stack trace of the userland thread here. 4708 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4709 // fault and someone is already waiting for a write lock on the same address 4710 // space. This thread will then try to acquire the lock again and will 4711 // be queued after the writer. 4712 # if 0 4713 if (area) { 4714 struct stack_frame { 4715 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4716 struct stack_frame* previous; 4717 void* return_address; 4718 #else 4719 // ... 4720 #warning writeme 4721 #endif 4722 } frame; 4723 # ifdef __INTEL__ 4724 struct iframe* iframe = i386_get_user_iframe(); 4725 if (iframe == NULL) 4726 panic("iframe is NULL!"); 4727 4728 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4729 sizeof(struct stack_frame)); 4730 # elif defined(__POWERPC__) 4731 struct iframe* iframe = ppc_get_user_iframe(); 4732 if (iframe == NULL) 4733 panic("iframe is NULL!"); 4734 4735 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4736 sizeof(struct stack_frame)); 4737 # else 4738 # warning "vm_page_fault() stack trace won't work" 4739 status = B_ERROR; 4740 # endif 4741 4742 dprintf("stack trace:\n"); 4743 int32 maxFrames = 50; 4744 while (status == B_OK && --maxFrames >= 0 4745 && frame.return_address != NULL) { 4746 dprintf(" %p", frame.return_address); 4747 area = vm_area_lookup(addressSpace, 4748 (addr_t)frame.return_address); 4749 if (area) { 4750 dprintf(" (%s + %#lx)", area->name, 4751 (addr_t)frame.return_address - area->base); 4752 } 4753 dprintf("\n"); 4754 4755 status = user_memcpy(&frame, frame.previous, 4756 sizeof(struct stack_frame)); 4757 } 4758 } 4759 # endif // 0 (stack trace) 4760 4761 rw_lock_read_unlock(&addressSpace->lock); 4762 #endif 4763 4764 // TODO: the fault_callback is a temporary solution for vm86 4765 if (thread->fault_callback == NULL 4766 || thread->fault_callback(address, faultAddress, isWrite)) { 4767 // If the thread has a signal handler for SIGSEGV, we simply 4768 // send it the signal. Otherwise we notify the user debugger 4769 // first. 4770 struct sigaction action; 4771 if (sigaction(SIGSEGV, NULL, &action) == 0 4772 && action.sa_handler != SIG_DFL 4773 && action.sa_handler != SIG_IGN) { 4774 send_signal(thread->id, SIGSEGV); 4775 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4776 SIGSEGV)) { 4777 send_signal(thread->id, SIGSEGV); 4778 } 4779 } 4780 } 4781 } 4782 4783 if (addressSpace != NULL) 4784 vm_put_address_space(addressSpace); 4785 4786 return B_HANDLED_INTERRUPT; 4787 } 4788 4789 4790 class VMCacheChainLocker { 4791 public: 4792 VMCacheChainLocker() 4793 : 4794 fTopCache(NULL), 4795 fBottomCache(NULL) 4796 { 4797 } 4798 4799 void SetTo(VMCache* topCache) 4800 { 4801 fTopCache = topCache; 4802 fBottomCache = topCache; 4803 } 4804 4805 VMCache* LockSourceCache() 4806 { 4807 if (fBottomCache == NULL || fBottomCache->source == NULL) 4808 return NULL; 4809 4810 fBottomCache = fBottomCache->source; 4811 fBottomCache->Lock(); 4812 fBottomCache->AcquireRefLocked(); 4813 4814 return fBottomCache; 4815 } 4816 4817 void Unlock() 4818 { 4819 if (fTopCache == NULL) 4820 return; 4821 4822 VMCache* cache = fTopCache; 4823 while (cache != NULL) { 4824 VMCache* nextCache = cache->source; 4825 cache->ReleaseRefAndUnlock(); 4826 4827 if (cache == fBottomCache) 4828 break; 4829 4830 cache = nextCache; 4831 } 4832 4833 fTopCache = NULL; 4834 fBottomCache = NULL; 4835 } 4836 4837 private: 4838 VMCache* fTopCache; 4839 VMCache* fBottomCache; 4840 }; 4841 4842 4843 struct PageFaultContext { 4844 AddressSpaceReadLocker addressSpaceLocker; 4845 VMCacheChainLocker cacheChainLocker; 4846 4847 vm_translation_map* map; 4848 vm_cache* topCache; 4849 off_t cacheOffset; 4850 bool isWrite; 4851 4852 // return values 4853 vm_page* page; 4854 bool restart; 4855 4856 4857 PageFaultContext(vm_address_space* addressSpace, bool isWrite) 4858 : 4859 addressSpaceLocker(addressSpace, true), 4860 map(&addressSpace->translation_map), 4861 isWrite(isWrite) 4862 { 4863 } 4864 4865 ~PageFaultContext() 4866 { 4867 UnlockAll(); 4868 } 4869 4870 void Prepare(VMCache* topCache, off_t cacheOffset) 4871 { 4872 this->topCache = topCache; 4873 this->cacheOffset = cacheOffset; 4874 page = NULL; 4875 restart = false; 4876 4877 cacheChainLocker.SetTo(topCache); 4878 } 4879 4880 void UnlockAll() 4881 { 4882 topCache = NULL; 4883 addressSpaceLocker.Unlock(); 4884 cacheChainLocker.Unlock(); 4885 } 4886 }; 4887 4888 4889 /*! Gets the page that should be mapped into the area. 4890 Returns an error code other than \c B_OK, if the page couldn't be found or 4891 paged in. The locking state of the address space and the caches is undefined 4892 in that case. 4893 Returns \c B_OK with \c context.restart set to \c true, if the functions 4894 had to unlock the address space and all caches and is supposed to be called 4895 again. 4896 Returns \c B_OK with \c context.restart set to \c false, if the page was 4897 found. It is returned in \c context.page. The address space will still be 4898 locked as well as all caches starting from the top cache to at least the 4899 cache the page lives in. 4900 */ 4901 static inline status_t 4902 fault_get_page(PageFaultContext& context) 4903 { 4904 vm_cache* cache = context.topCache; 4905 vm_cache* lastCache = NULL; 4906 vm_page* page = NULL; 4907 4908 while (cache != NULL) { 4909 // We already hold the lock of the cache at this point. 4910 4911 lastCache = cache; 4912 4913 for (;;) { 4914 page = cache->LookupPage(context.cacheOffset); 4915 if (page == NULL || page->state != PAGE_STATE_BUSY) { 4916 // Either there is no page or there is one and it is not busy. 4917 break; 4918 } 4919 4920 // page must be busy -- wait for it to become unbusy 4921 ConditionVariableEntry entry; 4922 entry.Add(page); 4923 context.UnlockAll(); 4924 entry.Wait(); 4925 4926 // restart the whole process 4927 context.restart = true; 4928 return B_OK; 4929 } 4930 4931 if (page != NULL) 4932 break; 4933 4934 // The current cache does not contain the page we're looking for. 4935 4936 // see if the backing store has it 4937 if (cache->HasPage(context.cacheOffset)) { 4938 // insert a fresh page and mark it busy -- we're going to read it in 4939 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4940 cache->InsertPage(page, context.cacheOffset); 4941 4942 ConditionVariable busyCondition; 4943 busyCondition.Publish(page, "page"); 4944 4945 // We need to unlock all caches and the address space while reading 4946 // the page in. Keep a reference to the cache around. 4947 cache->AcquireRefLocked(); 4948 context.UnlockAll(); 4949 4950 // read the page in 4951 iovec vec; 4952 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 4953 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 4954 4955 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4956 B_PHYSICAL_IO_REQUEST, &bytesRead); 4957 4958 cache->Lock(); 4959 4960 if (status < B_OK) { 4961 // on error remove and free the page 4962 dprintf("reading page from cache %p returned: %s!\n", 4963 cache, strerror(status)); 4964 4965 busyCondition.Unpublish(); 4966 cache->RemovePage(page); 4967 vm_page_set_state(page, PAGE_STATE_FREE); 4968 4969 cache->ReleaseRefAndUnlock(); 4970 return status; 4971 } 4972 4973 // mark the page unbusy again 4974 page->state = PAGE_STATE_ACTIVE; 4975 busyCondition.Unpublish(); 4976 4977 // Since we needed to unlock everything temporarily, the area 4978 // situation might have changed. So we need to restart the whole 4979 // process. 4980 cache->ReleaseRefAndUnlock(); 4981 context.restart = true; 4982 return B_OK; 4983 } 4984 4985 cache = context.cacheChainLocker.LockSourceCache(); 4986 } 4987 4988 if (page == NULL) { 4989 // There was no adequate page, determine the cache for a clean one. 4990 // Read-only pages come in the deepest cache, only the top most cache 4991 // may have direct write access. 4992 cache = context.isWrite ? context.topCache : lastCache; 4993 4994 // allocate a clean page 4995 page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 4996 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 4997 page->physical_page_number)); 4998 4999 // insert the new page into our cache 5000 cache->InsertPage(page, context.cacheOffset); 5001 5002 } else if (page->cache != context.topCache && context.isWrite) { 5003 // We have a page that has the data we want, but in the wrong cache 5004 // object so we need to copy it and stick it into the top cache. 5005 vm_page* sourcePage = page; 5006 5007 // TODO: If memory is low, it might be a good idea to steal the page 5008 // from our source cache -- if possible, that is. 5009 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 5010 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 5011 5012 // copy the page 5013 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 5014 sourcePage->physical_page_number * B_PAGE_SIZE); 5015 5016 // insert the new page into our cache 5017 context.topCache->InsertPage(page, context.cacheOffset); 5018 } 5019 5020 context.page = page; 5021 return B_OK; 5022 } 5023 5024 5025 static status_t 5026 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress, 5027 bool isWrite, bool isUser) 5028 { 5029 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 5030 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 5031 5032 PageFaultContext context(addressSpace, isWrite); 5033 5034 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 5035 status_t status = B_OK; 5036 5037 atomic_add(&addressSpace->fault_count, 1); 5038 5039 // We may need up to 2 pages plus pages needed for mapping them -- reserving 5040 // the pages upfront makes sure we don't have any cache locked, so that the 5041 // page daemon/thief can do their job without problems. 5042 size_t reservePages = 2 + context.map->ops->map_max_pages_need(context.map, 5043 originalAddress, originalAddress); 5044 context.addressSpaceLocker.Unlock(); 5045 vm_page_reserve_pages(reservePages); 5046 5047 while (true) { 5048 context.addressSpaceLocker.Lock(); 5049 5050 // get the area the fault was in 5051 vm_area* area = vm_area_lookup(addressSpace, address); 5052 if (area == NULL) { 5053 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 5054 "space\n", originalAddress); 5055 TPF(PageFaultError(-1, 5056 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 5057 status = B_BAD_ADDRESS; 5058 break; 5059 } 5060 5061 // check permissions 5062 uint32 protection = get_area_page_protection(area, address); 5063 if (isUser && (protection & B_USER_PROTECTION) == 0) { 5064 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 5065 (void*)originalAddress); 5066 TPF(PageFaultError(area->id, 5067 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 5068 status = B_PERMISSION_DENIED; 5069 break; 5070 } 5071 if (isWrite && (protection 5072 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 5073 dprintf("write access attempted on write-protected area 0x%lx at" 5074 " %p\n", area->id, (void*)originalAddress); 5075 TPF(PageFaultError(area->id, 5076 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 5077 status = B_PERMISSION_DENIED; 5078 break; 5079 } else if (!isWrite && (protection 5080 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 5081 dprintf("read access attempted on read-protected area 0x%lx at" 5082 " %p\n", area->id, (void*)originalAddress); 5083 TPF(PageFaultError(area->id, 5084 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 5085 status = B_PERMISSION_DENIED; 5086 break; 5087 } 5088 5089 // We have the area, it was a valid access, so let's try to resolve the 5090 // page fault now. 5091 // At first, the top most cache from the area is investigated. 5092 5093 context.Prepare(vm_area_get_locked_cache(area), 5094 address - area->base + area->cache_offset); 5095 5096 // See if this cache has a fault handler -- this will do all the work 5097 // for us. 5098 { 5099 // Note, since the page fault is resolved with interrupts enabled, 5100 // the fault handler could be called more than once for the same 5101 // reason -- the store must take this into account. 5102 status = context.topCache->Fault(addressSpace, context.cacheOffset); 5103 if (status != B_BAD_HANDLER) 5104 break; 5105 } 5106 5107 // The top most cache has no fault handler, so let's see if the cache or 5108 // its sources already have the page we're searching for (we're going 5109 // from top to bottom). 5110 status = fault_get_page(context); 5111 if (status != B_OK) { 5112 TPF(PageFaultError(area->id, status)); 5113 break; 5114 } 5115 5116 if (context.restart) 5117 continue; 5118 5119 // All went fine, all there is left to do is to map the page into the 5120 // address space. 5121 TPF(PageFaultDone(area->id, context.topCache, context.page->cache, 5122 context.page)); 5123 5124 // If the page doesn't reside in the area's cache, we need to make sure 5125 // it's mapped in read-only, so that we cannot overwrite someone else's 5126 // data (copy-on-write) 5127 uint32 newProtection = protection; 5128 if (context.page->cache != context.topCache && !isWrite) 5129 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 5130 5131 bool unmapPage = false; 5132 bool mapPage = true; 5133 5134 // check whether there's already a page mapped at the address 5135 context.map->ops->lock(context.map); 5136 5137 addr_t physicalAddress; 5138 uint32 flags; 5139 vm_page* mappedPage; 5140 if (context.map->ops->query(context.map, address, &physicalAddress, 5141 &flags) == B_OK 5142 && (flags & PAGE_PRESENT) != 0 5143 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5144 != NULL) { 5145 // Yep there's already a page. If it's ours, we can simply adjust 5146 // its protection. Otherwise we have to unmap it. 5147 if (mappedPage == context.page) { 5148 context.map->ops->protect(context.map, address, 5149 address + (B_PAGE_SIZE - 1), newProtection); 5150 5151 mapPage = false; 5152 } else 5153 unmapPage = true; 5154 } 5155 5156 context.map->ops->unlock(context.map); 5157 5158 if (unmapPage) 5159 vm_unmap_page(area, address, true); 5160 5161 if (mapPage) 5162 vm_map_page(area, context.page, address, newProtection); 5163 5164 break; 5165 } 5166 5167 vm_page_unreserve_pages(reservePages); 5168 5169 return status; 5170 } 5171 5172 5173 /*! You must have the address space's sem held */ 5174 vm_area* 5175 vm_area_lookup(vm_address_space* addressSpace, addr_t address) 5176 { 5177 vm_area* area; 5178 5179 // check the areas list first 5180 area = addressSpace->area_hint; 5181 if (area != NULL 5182 && area->base <= address 5183 && area->base + (area->size - 1) >= address) 5184 goto found; 5185 5186 for (area = addressSpace->areas; area != NULL; 5187 area = area->address_space_next) { 5188 if (area->id == RESERVED_AREA_ID) 5189 continue; 5190 5191 if (area->base <= address && area->base + (area->size - 1) >= address) 5192 break; 5193 } 5194 5195 found: 5196 if (area) 5197 addressSpace->area_hint = area; 5198 5199 return area; 5200 } 5201 5202 5203 status_t 5204 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 5205 { 5206 return vm_kernel_address_space()->translation_map.ops->get_physical_page( 5207 paddr, _vaddr, _handle); 5208 } 5209 5210 status_t 5211 vm_put_physical_page(addr_t vaddr, void* handle) 5212 { 5213 return vm_kernel_address_space()->translation_map.ops->put_physical_page( 5214 vaddr, handle); 5215 } 5216 5217 5218 status_t 5219 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 5220 { 5221 return vm_kernel_address_space()->translation_map.ops 5222 ->get_physical_page_current_cpu(paddr, _vaddr, _handle); 5223 } 5224 5225 status_t 5226 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5227 { 5228 return vm_kernel_address_space()->translation_map.ops 5229 ->put_physical_page_current_cpu(vaddr, handle); 5230 } 5231 5232 5233 status_t 5234 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 5235 { 5236 return vm_kernel_address_space()->translation_map.ops 5237 ->get_physical_page_debug(paddr, _vaddr, _handle); 5238 } 5239 5240 status_t 5241 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5242 { 5243 return vm_kernel_address_space()->translation_map.ops 5244 ->put_physical_page_debug(vaddr, handle); 5245 } 5246 5247 5248 void 5249 vm_get_info(system_memory_info* info) 5250 { 5251 swap_get_info(info); 5252 5253 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 5254 info->page_faults = sPageFaults; 5255 5256 MutexLocker locker(sAvailableMemoryLock); 5257 info->free_memory = sAvailableMemory; 5258 info->needed_memory = sNeededMemory; 5259 } 5260 5261 5262 uint32 5263 vm_num_page_faults(void) 5264 { 5265 return sPageFaults; 5266 } 5267 5268 5269 off_t 5270 vm_available_memory(void) 5271 { 5272 MutexLocker locker(sAvailableMemoryLock); 5273 return sAvailableMemory; 5274 } 5275 5276 5277 off_t 5278 vm_available_not_needed_memory(void) 5279 { 5280 MutexLocker locker(sAvailableMemoryLock); 5281 return sAvailableMemory - sNeededMemory; 5282 } 5283 5284 5285 size_t 5286 vm_kernel_address_space_left(void) 5287 { 5288 return sKernelAddressSpaceLeft; 5289 } 5290 5291 5292 void 5293 vm_unreserve_memory(size_t amount) 5294 { 5295 mutex_lock(&sAvailableMemoryLock); 5296 5297 sAvailableMemory += amount; 5298 5299 mutex_unlock(&sAvailableMemoryLock); 5300 } 5301 5302 5303 status_t 5304 vm_try_reserve_memory(size_t amount, bigtime_t timeout) 5305 { 5306 MutexLocker locker(sAvailableMemoryLock); 5307 5308 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5309 5310 if (sAvailableMemory >= amount) { 5311 sAvailableMemory -= amount; 5312 return B_OK; 5313 } 5314 5315 if (timeout <= 0) 5316 return B_NO_MEMORY; 5317 5318 // turn timeout into an absolute timeout 5319 timeout += system_time(); 5320 5321 // loop until we've got the memory or the timeout occurs 5322 do { 5323 sNeededMemory += amount; 5324 5325 // call the low resource manager 5326 locker.Unlock(); 5327 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5328 B_ABSOLUTE_TIMEOUT, timeout); 5329 locker.Lock(); 5330 5331 sNeededMemory -= amount; 5332 5333 if (sAvailableMemory >= amount) { 5334 sAvailableMemory -= amount; 5335 return B_OK; 5336 } 5337 } while (timeout > system_time()); 5338 5339 return B_NO_MEMORY; 5340 } 5341 5342 5343 status_t 5344 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 5345 { 5346 AddressSpaceReadLocker locker; 5347 vm_area* area; 5348 status_t status = locker.SetFromArea(id, area); 5349 if (status != B_OK) 5350 return status; 5351 5352 return arch_vm_set_memory_type(area, physicalBase, type); 5353 } 5354 5355 5356 /*! This function enforces some protection properties: 5357 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 5358 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5359 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5360 and B_KERNEL_WRITE_AREA. 5361 */ 5362 static void 5363 fix_protection(uint32* protection) 5364 { 5365 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5366 if ((*protection & B_USER_PROTECTION) == 0 5367 || (*protection & B_WRITE_AREA) != 0) 5368 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5369 else 5370 *protection |= B_KERNEL_READ_AREA; 5371 } 5372 } 5373 5374 5375 static void 5376 fill_area_info(struct vm_area* area, area_info* info, size_t size) 5377 { 5378 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5379 info->area = area->id; 5380 info->address = (void*)area->base; 5381 info->size = area->size; 5382 info->protection = area->protection; 5383 info->lock = B_FULL_LOCK; 5384 info->team = area->address_space->id; 5385 info->copy_count = 0; 5386 info->in_count = 0; 5387 info->out_count = 0; 5388 // TODO: retrieve real values here! 5389 5390 vm_cache* cache = vm_area_get_locked_cache(area); 5391 5392 // Note, this is a simplification; the cache could be larger than this area 5393 info->ram_size = cache->page_count * B_PAGE_SIZE; 5394 5395 vm_area_put_locked_cache(cache); 5396 } 5397 5398 5399 /*! 5400 Tests whether or not the area that contains the specified address 5401 needs any kind of locking, and actually exists. 5402 Used by both lock_memory() and unlock_memory(). 5403 */ 5404 static status_t 5405 test_lock_memory(vm_address_space* addressSpace, addr_t address, 5406 bool& needsLocking) 5407 { 5408 rw_lock_read_lock(&addressSpace->lock); 5409 5410 vm_area* area = vm_area_lookup(addressSpace, address); 5411 if (area != NULL) { 5412 // This determines if we need to lock the memory at all 5413 needsLocking = area->cache_type != CACHE_TYPE_NULL 5414 && area->cache_type != CACHE_TYPE_DEVICE 5415 && area->wiring != B_FULL_LOCK 5416 && area->wiring != B_CONTIGUOUS; 5417 } 5418 5419 rw_lock_read_unlock(&addressSpace->lock); 5420 5421 if (area == NULL) 5422 return B_BAD_ADDRESS; 5423 5424 return B_OK; 5425 } 5426 5427 5428 static status_t 5429 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5430 { 5431 // is newSize a multiple of B_PAGE_SIZE? 5432 if (newSize & (B_PAGE_SIZE - 1)) 5433 return B_BAD_VALUE; 5434 5435 // lock all affected address spaces and the cache 5436 vm_area* area; 5437 vm_cache* cache; 5438 5439 MultiAddressSpaceLocker locker; 5440 status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area, 5441 &cache); 5442 if (status != B_OK) 5443 return status; 5444 AreaCacheLocker cacheLocker(cache); // already locked 5445 5446 // enforce restrictions 5447 if (!kernel) { 5448 if ((area->protection & B_KERNEL_AREA) != 0) 5449 return B_NOT_ALLOWED; 5450 // TODO: Enforce all restrictions (team, etc.)! 5451 } 5452 5453 size_t oldSize = area->size; 5454 if (newSize == oldSize) 5455 return B_OK; 5456 5457 // Resize all areas of this area's cache 5458 5459 if (cache->type != CACHE_TYPE_RAM) 5460 return B_NOT_ALLOWED; 5461 5462 if (oldSize < newSize) { 5463 // We need to check if all areas of this cache can be resized 5464 5465 for (vm_area* current = cache->areas; current != NULL; 5466 current = current->cache_next) { 5467 vm_area* next = current->address_space_next; 5468 if (next != NULL && next->base <= (current->base + newSize)) { 5469 // If the area was created inside a reserved area, it can 5470 // also be resized in that area 5471 // TODO: if there is free space after the reserved area, it could 5472 // be used as well... 5473 if (next->id == RESERVED_AREA_ID 5474 && next->cache_offset <= current->base 5475 && next->base - 1 + next->size 5476 >= current->base - 1 + newSize) 5477 continue; 5478 5479 return B_ERROR; 5480 } 5481 } 5482 } 5483 5484 // Okay, looks good so far, so let's do it 5485 5486 if (oldSize < newSize) { 5487 // Growing the cache can fail, so we do it first. 5488 status = cache->Resize(cache->virtual_base + newSize); 5489 if (status != B_OK) 5490 return status; 5491 } 5492 5493 for (vm_area* current = cache->areas; current != NULL; 5494 current = current->cache_next) { 5495 vm_area* next = current->address_space_next; 5496 if (next != NULL && next->base <= (current->base + newSize)) { 5497 if (next->id == RESERVED_AREA_ID 5498 && next->cache_offset <= current->base 5499 && next->base - 1 + next->size >= current->base - 1 + newSize) { 5500 // resize reserved area 5501 addr_t offset = current->base + newSize - next->base; 5502 if (next->size <= offset) { 5503 current->address_space_next = next->address_space_next; 5504 free(next); 5505 } else { 5506 next->size -= offset; 5507 next->base += offset; 5508 } 5509 } else { 5510 panic("resize situation for area %p has changed although we " 5511 "should have the address space lock", current); 5512 status = B_ERROR; 5513 break; 5514 } 5515 } 5516 5517 current->size = newSize; 5518 5519 // We also need to unmap all pages beyond the new size, if the area has 5520 // shrinked 5521 if (newSize < oldSize) { 5522 vm_unmap_pages(current, current->base + newSize, oldSize - newSize, 5523 false); 5524 } 5525 } 5526 5527 // shrinking the cache can't fail, so we do it now 5528 if (status == B_OK && newSize < oldSize) 5529 status = cache->Resize(cache->virtual_base + newSize); 5530 5531 if (status < B_OK) { 5532 // This shouldn't really be possible, but hey, who knows 5533 for (vm_area* current = cache->areas; current != NULL; 5534 current = current->cache_next) { 5535 current->size = oldSize; 5536 } 5537 5538 cache->Resize(cache->virtual_base + oldSize); 5539 } 5540 5541 // TODO: we must honour the lock restrictions of this area 5542 return status; 5543 } 5544 5545 5546 status_t 5547 vm_memset_physical(addr_t address, int value, size_t length) 5548 { 5549 return vm_kernel_address_space()->translation_map.ops->memset_physical( 5550 address, value, length); 5551 } 5552 5553 5554 status_t 5555 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 5556 { 5557 return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical( 5558 to, from, length, user); 5559 } 5560 5561 5562 status_t 5563 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 5564 { 5565 return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical( 5566 to, _from, length, user); 5567 } 5568 5569 5570 void 5571 vm_memcpy_physical_page(addr_t to, addr_t from) 5572 { 5573 return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page( 5574 to, from); 5575 } 5576 5577 5578 // #pragma mark - kernel public API 5579 5580 5581 status_t 5582 user_memcpy(void* to, const void* from, size_t size) 5583 { 5584 // don't allow address overflows 5585 if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to) 5586 return B_BAD_ADDRESS; 5587 5588 if (arch_cpu_user_memcpy(to, from, size, 5589 &thread_get_current_thread()->fault_handler) < B_OK) 5590 return B_BAD_ADDRESS; 5591 5592 return B_OK; 5593 } 5594 5595 5596 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5597 the string in \a to, NULL-terminating the result. 5598 5599 \param to Pointer to the destination C-string. 5600 \param from Pointer to the source C-string. 5601 \param size Size in bytes of the string buffer pointed to by \a to. 5602 5603 \return strlen(\a from). 5604 */ 5605 ssize_t 5606 user_strlcpy(char* to, const char* from, size_t size) 5607 { 5608 if (to == NULL && size != 0) 5609 return B_BAD_VALUE; 5610 if (from == NULL) 5611 return B_BAD_ADDRESS; 5612 5613 // limit size to avoid address overflows 5614 size_t maxSize = std::min(size, 5615 ~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1); 5616 // NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from, 5617 // the source address might still overflow. 5618 5619 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize, 5620 &thread_get_current_thread()->fault_handler); 5621 5622 // If we hit the address overflow boundary, fail. 5623 if (result >= 0 && (size_t)result >= maxSize && maxSize < size) 5624 return B_BAD_ADDRESS; 5625 5626 return result; 5627 } 5628 5629 5630 status_t 5631 user_memset(void* s, char c, size_t count) 5632 { 5633 // don't allow address overflows 5634 if ((addr_t)s + count < (addr_t)s) 5635 return B_BAD_ADDRESS; 5636 5637 if (arch_cpu_user_memset(s, c, count, 5638 &thread_get_current_thread()->fault_handler) < B_OK) 5639 return B_BAD_ADDRESS; 5640 5641 return B_OK; 5642 } 5643 5644 5645 status_t 5646 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5647 { 5648 vm_address_space* addressSpace = NULL; 5649 struct vm_translation_map* map; 5650 addr_t unalignedBase = (addr_t)address; 5651 addr_t end = unalignedBase + numBytes; 5652 addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE); 5653 bool isUser = IS_USER_ADDRESS(address); 5654 bool needsLocking = true; 5655 5656 if (isUser) { 5657 if (team == B_CURRENT_TEAM) 5658 addressSpace = vm_get_current_user_address_space(); 5659 else 5660 addressSpace = vm_get_address_space(team); 5661 } else 5662 addressSpace = vm_get_kernel_address_space(); 5663 if (addressSpace == NULL) 5664 return B_ERROR; 5665 5666 // test if we're on an area that allows faults at all 5667 5668 map = &addressSpace->translation_map; 5669 5670 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5671 if (status < B_OK) 5672 goto out; 5673 if (!needsLocking) 5674 goto out; 5675 5676 for (; base < end; base += B_PAGE_SIZE) { 5677 addr_t physicalAddress; 5678 uint32 protection; 5679 status_t status; 5680 5681 map->ops->lock(map); 5682 status = map->ops->query(map, base, &physicalAddress, &protection); 5683 map->ops->unlock(map); 5684 5685 if (status < B_OK) 5686 goto out; 5687 5688 if ((protection & PAGE_PRESENT) != 0) { 5689 // if B_READ_DEVICE is set, the caller intents to write to the locked 5690 // memory, so if it hasn't been mapped writable, we'll try the soft 5691 // fault anyway 5692 if ((flags & B_READ_DEVICE) == 0 5693 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 5694 // update wiring 5695 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5696 if (page == NULL) 5697 panic("couldn't lookup physical page just allocated\n"); 5698 5699 increment_page_wired_count(page); 5700 continue; 5701 } 5702 } 5703 5704 status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0, 5705 isUser); 5706 if (status != B_OK) { 5707 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) " 5708 "failed: %s\n", (void*)unalignedBase, numBytes, flags, 5709 strerror(status)); 5710 goto out; 5711 } 5712 5713 // TODO: Here's a race condition. We should probably add a parameter 5714 // to vm_soft_fault() that would cause the page's wired count to be 5715 // incremented immediately. 5716 // TODO: After memory has been locked in an area, we need to prevent the 5717 // area from being deleted, resized, cut, etc. That could be done using 5718 // a "locked pages" count in vm_area, and maybe a condition variable, if 5719 // we want to allow waiting for the area to become eligible for these 5720 // operations again. 5721 5722 map->ops->lock(map); 5723 status = map->ops->query(map, base, &physicalAddress, &protection); 5724 map->ops->unlock(map); 5725 5726 if (status < B_OK) 5727 goto out; 5728 5729 // update wiring 5730 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5731 if (page == NULL) 5732 panic("couldn't lookup physical page"); 5733 5734 increment_page_wired_count(page); 5735 // TODO: needs to be atomic on all platforms! 5736 } 5737 5738 out: 5739 vm_put_address_space(addressSpace); 5740 return status; 5741 } 5742 5743 5744 status_t 5745 lock_memory(void* address, size_t numBytes, uint32 flags) 5746 { 5747 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5748 } 5749 5750 5751 status_t 5752 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5753 { 5754 vm_address_space* addressSpace = NULL; 5755 struct vm_translation_map* map; 5756 addr_t unalignedBase = (addr_t)address; 5757 addr_t end = unalignedBase + numBytes; 5758 addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE); 5759 bool needsLocking = true; 5760 5761 if (IS_USER_ADDRESS(address)) { 5762 if (team == B_CURRENT_TEAM) 5763 addressSpace = vm_get_current_user_address_space(); 5764 else 5765 addressSpace = vm_get_address_space(team); 5766 } else 5767 addressSpace = vm_get_kernel_address_space(); 5768 if (addressSpace == NULL) 5769 return B_ERROR; 5770 5771 map = &addressSpace->translation_map; 5772 5773 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5774 if (status < B_OK) 5775 goto out; 5776 if (!needsLocking) 5777 goto out; 5778 5779 for (; base < end; base += B_PAGE_SIZE) { 5780 map->ops->lock(map); 5781 5782 addr_t physicalAddress; 5783 uint32 protection; 5784 status = map->ops->query(map, base, &physicalAddress, 5785 &protection); 5786 5787 map->ops->unlock(map); 5788 5789 if (status < B_OK) 5790 goto out; 5791 if ((protection & PAGE_PRESENT) == 0) 5792 panic("calling unlock_memory() on unmapped memory!"); 5793 5794 // update wiring 5795 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5796 if (page == NULL) 5797 panic("couldn't lookup physical page"); 5798 5799 decrement_page_wired_count(page); 5800 } 5801 5802 out: 5803 vm_put_address_space(addressSpace); 5804 return status; 5805 } 5806 5807 5808 status_t 5809 unlock_memory(void* address, size_t numBytes, uint32 flags) 5810 { 5811 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5812 } 5813 5814 5815 /*! Similar to get_memory_map(), but also allows to specify the address space 5816 for the memory in question and has a saner semantics. 5817 Returns \c B_OK when the complete range could be translated or 5818 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5819 case the actual number of entries is written to \c *_numEntries. Any other 5820 error case indicates complete failure; \c *_numEntries will be set to \c 0 5821 in this case. 5822 */ 5823 status_t 5824 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5825 physical_entry* table, uint32* _numEntries) 5826 { 5827 uint32 numEntries = *_numEntries; 5828 *_numEntries = 0; 5829 5830 vm_address_space* addressSpace; 5831 addr_t virtualAddress = (addr_t)address; 5832 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5833 addr_t physicalAddress; 5834 status_t status = B_OK; 5835 int32 index = -1; 5836 addr_t offset = 0; 5837 bool interrupts = are_interrupts_enabled(); 5838 5839 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5840 address, numBytes, numEntries)); 5841 5842 if (numEntries == 0 || numBytes == 0) 5843 return B_BAD_VALUE; 5844 5845 // in which address space is the address to be found? 5846 if (IS_USER_ADDRESS(virtualAddress)) { 5847 if (team == B_CURRENT_TEAM) 5848 addressSpace = vm_get_current_user_address_space(); 5849 else 5850 addressSpace = vm_get_address_space(team); 5851 } else 5852 addressSpace = vm_get_kernel_address_space(); 5853 5854 if (addressSpace == NULL) 5855 return B_ERROR; 5856 5857 vm_translation_map* map = &addressSpace->translation_map; 5858 5859 if (interrupts) 5860 map->ops->lock(map); 5861 5862 while (offset < numBytes) { 5863 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5864 uint32 flags; 5865 5866 if (interrupts) { 5867 status = map->ops->query(map, (addr_t)address + offset, 5868 &physicalAddress, &flags); 5869 } else { 5870 status = map->ops->query_interrupt(map, (addr_t)address + offset, 5871 &physicalAddress, &flags); 5872 } 5873 if (status < B_OK) 5874 break; 5875 if ((flags & PAGE_PRESENT) == 0) { 5876 panic("get_memory_map() called on unmapped memory!"); 5877 return B_BAD_ADDRESS; 5878 } 5879 5880 if (index < 0 && pageOffset > 0) { 5881 physicalAddress += pageOffset; 5882 if (bytes > B_PAGE_SIZE - pageOffset) 5883 bytes = B_PAGE_SIZE - pageOffset; 5884 } 5885 5886 // need to switch to the next physical_entry? 5887 if (index < 0 || (addr_t)table[index].address 5888 != physicalAddress - table[index].size) { 5889 if ((uint32)++index + 1 > numEntries) { 5890 // table to small 5891 status = B_BUFFER_OVERFLOW; 5892 break; 5893 } 5894 table[index].address = (void*)physicalAddress; 5895 table[index].size = bytes; 5896 } else { 5897 // page does fit in current entry 5898 table[index].size += bytes; 5899 } 5900 5901 offset += bytes; 5902 } 5903 5904 if (interrupts) 5905 map->ops->unlock(map); 5906 5907 if (status != B_OK) 5908 return status; 5909 5910 if ((uint32)index + 1 > numEntries) { 5911 *_numEntries = index; 5912 return B_BUFFER_OVERFLOW; 5913 } 5914 5915 *_numEntries = index + 1; 5916 return B_OK; 5917 } 5918 5919 5920 /*! According to the BeBook, this function should always succeed. 5921 This is no longer the case. 5922 */ 5923 long 5924 get_memory_map(const void* address, ulong numBytes, physical_entry* table, 5925 long numEntries) 5926 { 5927 uint32 entriesRead = numEntries; 5928 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5929 table, &entriesRead); 5930 if (error != B_OK) 5931 return error; 5932 5933 // close the entry list 5934 5935 // if it's only one entry, we will silently accept the missing ending 5936 if (numEntries == 1) 5937 return B_OK; 5938 5939 if (entriesRead + 1 > (uint32)numEntries) 5940 return B_BUFFER_OVERFLOW; 5941 5942 table[entriesRead].address = NULL; 5943 table[entriesRead].size = 0; 5944 5945 return B_OK; 5946 } 5947 5948 5949 area_id 5950 area_for(void* address) 5951 { 5952 return vm_area_for((addr_t)address, true); 5953 } 5954 5955 5956 area_id 5957 find_area(const char* name) 5958 { 5959 rw_lock_read_lock(&sAreaHashLock); 5960 struct hash_iterator iterator; 5961 hash_open(sAreaHash, &iterator); 5962 5963 vm_area* area; 5964 area_id id = B_NAME_NOT_FOUND; 5965 while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) { 5966 if (area->id == RESERVED_AREA_ID) 5967 continue; 5968 5969 if (!strcmp(area->name, name)) { 5970 id = area->id; 5971 break; 5972 } 5973 } 5974 5975 hash_close(sAreaHash, &iterator, false); 5976 rw_lock_read_unlock(&sAreaHashLock); 5977 5978 return id; 5979 } 5980 5981 5982 status_t 5983 _get_area_info(area_id id, area_info* info, size_t size) 5984 { 5985 if (size != sizeof(area_info) || info == NULL) 5986 return B_BAD_VALUE; 5987 5988 AddressSpaceReadLocker locker; 5989 vm_area* area; 5990 status_t status = locker.SetFromArea(id, area); 5991 if (status != B_OK) 5992 return status; 5993 5994 fill_area_info(area, info, size); 5995 return B_OK; 5996 } 5997 5998 5999 status_t 6000 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 6001 { 6002 addr_t nextBase = *(addr_t*)cookie; 6003 6004 // we're already through the list 6005 if (nextBase == (addr_t)-1) 6006 return B_ENTRY_NOT_FOUND; 6007 6008 if (team == B_CURRENT_TEAM) 6009 team = team_get_current_team_id(); 6010 6011 AddressSpaceReadLocker locker(team); 6012 if (!locker.IsLocked()) 6013 return B_BAD_TEAM_ID; 6014 6015 vm_area* area; 6016 for (area = locker.AddressSpace()->areas; area != NULL; 6017 area = area->address_space_next) { 6018 if (area->id == RESERVED_AREA_ID) 6019 continue; 6020 6021 if (area->base > nextBase) 6022 break; 6023 } 6024 6025 if (area == NULL) { 6026 nextBase = (addr_t)-1; 6027 return B_ENTRY_NOT_FOUND; 6028 } 6029 6030 fill_area_info(area, info, size); 6031 *cookie = (int32)(area->base); 6032 6033 return B_OK; 6034 } 6035 6036 6037 status_t 6038 set_area_protection(area_id area, uint32 newProtection) 6039 { 6040 fix_protection(&newProtection); 6041 6042 return vm_set_area_protection(vm_kernel_address_space_id(), area, 6043 newProtection, true); 6044 } 6045 6046 6047 status_t 6048 resize_area(area_id areaID, size_t newSize) 6049 { 6050 return vm_resize_area(areaID, newSize, true); 6051 } 6052 6053 6054 /*! Transfers the specified area to a new team. The caller must be the owner 6055 of the area (not yet enforced but probably should be). 6056 */ 6057 area_id 6058 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6059 bool kernel) 6060 { 6061 area_info info; 6062 status_t status = get_area_info(id, &info); 6063 if (status != B_OK) 6064 return status; 6065 6066 area_id clonedArea = vm_clone_area(target, info.name, _address, 6067 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6068 if (clonedArea < 0) 6069 return clonedArea; 6070 6071 status = vm_delete_area(info.team, id, kernel); 6072 if (status != B_OK) { 6073 vm_delete_area(target, clonedArea, kernel); 6074 return status; 6075 } 6076 6077 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6078 6079 return clonedArea; 6080 } 6081 6082 6083 area_id 6084 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes, 6085 uint32 addressSpec, uint32 protection, void** _virtualAddress) 6086 { 6087 if (!arch_vm_supports_protection(protection)) 6088 return B_NOT_SUPPORTED; 6089 6090 fix_protection(&protection); 6091 6092 return vm_map_physical_memory(vm_kernel_address_space_id(), name, 6093 _virtualAddress, addressSpec, numBytes, protection, 6094 (addr_t)physicalAddress); 6095 } 6096 6097 6098 area_id 6099 clone_area(const char* name, void** _address, uint32 addressSpec, 6100 uint32 protection, area_id source) 6101 { 6102 if ((protection & B_KERNEL_PROTECTION) == 0) 6103 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6104 6105 return vm_clone_area(vm_kernel_address_space_id(), name, _address, 6106 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6107 } 6108 6109 6110 area_id 6111 create_area_etc(team_id team, const char* name, void** address, 6112 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 6113 addr_t physicalAddress, uint32 flags) 6114 { 6115 fix_protection(&protection); 6116 6117 return vm_create_anonymous_area(team, (char*)name, address, addressSpec, 6118 size, lock, protection, physicalAddress, flags, true); 6119 } 6120 6121 6122 area_id 6123 create_area(const char* name, void** _address, uint32 addressSpec, size_t size, 6124 uint32 lock, uint32 protection) 6125 { 6126 fix_protection(&protection); 6127 6128 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name, 6129 _address, addressSpec, size, lock, protection, 0, 0, true); 6130 } 6131 6132 6133 status_t 6134 delete_area(area_id area) 6135 { 6136 return vm_delete_area(vm_kernel_address_space_id(), area, true); 6137 } 6138 6139 6140 // #pragma mark - Userland syscalls 6141 6142 6143 status_t 6144 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6145 addr_t size) 6146 { 6147 // filter out some unavailable values (for userland) 6148 switch (addressSpec) { 6149 case B_ANY_KERNEL_ADDRESS: 6150 case B_ANY_KERNEL_BLOCK_ADDRESS: 6151 return B_BAD_VALUE; 6152 } 6153 6154 addr_t address; 6155 6156 if (!IS_USER_ADDRESS(userAddress) 6157 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6158 return B_BAD_ADDRESS; 6159 6160 status_t status = vm_reserve_address_range( 6161 vm_current_user_address_space_id(), (void**)&address, addressSpec, size, 6162 RESERVED_AVOID_BASE); 6163 if (status != B_OK) 6164 return status; 6165 6166 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6167 vm_unreserve_address_range(vm_current_user_address_space_id(), 6168 (void*)address, size); 6169 return B_BAD_ADDRESS; 6170 } 6171 6172 return B_OK; 6173 } 6174 6175 6176 status_t 6177 _user_unreserve_address_range(addr_t address, addr_t size) 6178 { 6179 return vm_unreserve_address_range(vm_current_user_address_space_id(), 6180 (void*)address, size); 6181 } 6182 6183 6184 area_id 6185 _user_area_for(void* address) 6186 { 6187 return vm_area_for((addr_t)address, false); 6188 } 6189 6190 6191 area_id 6192 _user_find_area(const char* userName) 6193 { 6194 char name[B_OS_NAME_LENGTH]; 6195 6196 if (!IS_USER_ADDRESS(userName) 6197 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6198 return B_BAD_ADDRESS; 6199 6200 return find_area(name); 6201 } 6202 6203 6204 status_t 6205 _user_get_area_info(area_id area, area_info* userInfo) 6206 { 6207 if (!IS_USER_ADDRESS(userInfo)) 6208 return B_BAD_ADDRESS; 6209 6210 area_info info; 6211 status_t status = get_area_info(area, &info); 6212 if (status < B_OK) 6213 return status; 6214 6215 // TODO: do we want to prevent userland from seeing kernel protections? 6216 //info.protection &= B_USER_PROTECTION; 6217 6218 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6219 return B_BAD_ADDRESS; 6220 6221 return status; 6222 } 6223 6224 6225 status_t 6226 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 6227 { 6228 int32 cookie; 6229 6230 if (!IS_USER_ADDRESS(userCookie) 6231 || !IS_USER_ADDRESS(userInfo) 6232 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 6233 return B_BAD_ADDRESS; 6234 6235 area_info info; 6236 status_t status = _get_next_area_info(team, &cookie, &info, 6237 sizeof(area_info)); 6238 if (status != B_OK) 6239 return status; 6240 6241 //info.protection &= B_USER_PROTECTION; 6242 6243 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 6244 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6245 return B_BAD_ADDRESS; 6246 6247 return status; 6248 } 6249 6250 6251 status_t 6252 _user_set_area_protection(area_id area, uint32 newProtection) 6253 { 6254 if ((newProtection & ~B_USER_PROTECTION) != 0) 6255 return B_BAD_VALUE; 6256 6257 fix_protection(&newProtection); 6258 6259 return vm_set_area_protection(vm_current_user_address_space_id(), area, 6260 newProtection, false); 6261 } 6262 6263 6264 status_t 6265 _user_resize_area(area_id area, size_t newSize) 6266 { 6267 // TODO: Since we restrict deleting of areas to those owned by the team, 6268 // we should also do that for resizing (check other functions, too). 6269 return vm_resize_area(area, newSize, false); 6270 } 6271 6272 6273 area_id 6274 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6275 team_id target) 6276 { 6277 // filter out some unavailable values (for userland) 6278 switch (addressSpec) { 6279 case B_ANY_KERNEL_ADDRESS: 6280 case B_ANY_KERNEL_BLOCK_ADDRESS: 6281 return B_BAD_VALUE; 6282 } 6283 6284 void* address; 6285 if (!IS_USER_ADDRESS(userAddress) 6286 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6287 return B_BAD_ADDRESS; 6288 6289 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6290 if (newArea < B_OK) 6291 return newArea; 6292 6293 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6294 return B_BAD_ADDRESS; 6295 6296 return newArea; 6297 } 6298 6299 6300 area_id 6301 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6302 uint32 protection, area_id sourceArea) 6303 { 6304 char name[B_OS_NAME_LENGTH]; 6305 void* address; 6306 6307 // filter out some unavailable values (for userland) 6308 switch (addressSpec) { 6309 case B_ANY_KERNEL_ADDRESS: 6310 case B_ANY_KERNEL_BLOCK_ADDRESS: 6311 return B_BAD_VALUE; 6312 } 6313 if ((protection & ~B_USER_PROTECTION) != 0) 6314 return B_BAD_VALUE; 6315 6316 if (!IS_USER_ADDRESS(userName) 6317 || !IS_USER_ADDRESS(userAddress) 6318 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6319 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6320 return B_BAD_ADDRESS; 6321 6322 fix_protection(&protection); 6323 6324 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, 6325 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6326 false); 6327 if (clonedArea < B_OK) 6328 return clonedArea; 6329 6330 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6331 delete_area(clonedArea); 6332 return B_BAD_ADDRESS; 6333 } 6334 6335 return clonedArea; 6336 } 6337 6338 6339 area_id 6340 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6341 size_t size, uint32 lock, uint32 protection) 6342 { 6343 char name[B_OS_NAME_LENGTH]; 6344 void* address; 6345 6346 // filter out some unavailable values (for userland) 6347 switch (addressSpec) { 6348 case B_ANY_KERNEL_ADDRESS: 6349 case B_ANY_KERNEL_BLOCK_ADDRESS: 6350 return B_BAD_VALUE; 6351 } 6352 if ((protection & ~B_USER_PROTECTION) != 0) 6353 return B_BAD_VALUE; 6354 6355 if (!IS_USER_ADDRESS(userName) 6356 || !IS_USER_ADDRESS(userAddress) 6357 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6358 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6359 return B_BAD_ADDRESS; 6360 6361 if (addressSpec == B_EXACT_ADDRESS 6362 && IS_KERNEL_ADDRESS(address)) 6363 return B_BAD_VALUE; 6364 6365 fix_protection(&protection); 6366 6367 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 6368 (char*)name, &address, addressSpec, size, lock, protection, 0, 0, 6369 false); 6370 6371 if (area >= B_OK 6372 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6373 delete_area(area); 6374 return B_BAD_ADDRESS; 6375 } 6376 6377 return area; 6378 } 6379 6380 6381 status_t 6382 _user_delete_area(area_id area) 6383 { 6384 // Unlike the BeOS implementation, you can now only delete areas 6385 // that you have created yourself from userland. 6386 // The documentation to delete_area() explicitly states that this 6387 // will be restricted in the future, and so it will. 6388 return vm_delete_area(vm_current_user_address_space_id(), area, false); 6389 } 6390 6391 6392 // TODO: create a BeOS style call for this! 6393 6394 area_id 6395 _user_map_file(const char* userName, void** userAddress, int addressSpec, 6396 size_t size, int protection, int mapping, bool unmapAddressRange, int fd, 6397 off_t offset) 6398 { 6399 char name[B_OS_NAME_LENGTH]; 6400 void* address; 6401 area_id area; 6402 6403 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6404 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6405 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6406 return B_BAD_ADDRESS; 6407 6408 if (addressSpec == B_EXACT_ADDRESS) { 6409 if ((addr_t)address + size < (addr_t)address) 6410 return B_BAD_VALUE; 6411 if (!IS_USER_ADDRESS(address) 6412 || !IS_USER_ADDRESS((addr_t)address + size)) { 6413 return B_BAD_ADDRESS; 6414 } 6415 } 6416 6417 // userland created areas can always be accessed by the kernel 6418 protection |= B_KERNEL_READ_AREA 6419 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6420 6421 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 6422 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6423 false); 6424 if (area < B_OK) 6425 return area; 6426 6427 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6428 return B_BAD_ADDRESS; 6429 6430 return area; 6431 } 6432 6433 6434 status_t 6435 _user_unmap_memory(void* _address, size_t size) 6436 { 6437 addr_t address = (addr_t)_address; 6438 6439 // check params 6440 if (size == 0 || (addr_t)address + size < (addr_t)address) 6441 return B_BAD_VALUE; 6442 6443 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6444 return B_BAD_ADDRESS; 6445 6446 // write lock the address space 6447 AddressSpaceWriteLocker locker; 6448 status_t status = locker.SetTo(team_get_current_team_id()); 6449 if (status != B_OK) 6450 return status; 6451 6452 // unmap 6453 return unmap_address_range(locker.AddressSpace(), address, size, false); 6454 } 6455 6456 6457 status_t 6458 _user_set_memory_protection(void* _address, size_t size, int protection) 6459 { 6460 // check address range 6461 addr_t address = (addr_t)_address; 6462 size = PAGE_ALIGN(size); 6463 6464 if ((address % B_PAGE_SIZE) != 0) 6465 return B_BAD_VALUE; 6466 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6467 || !IS_USER_ADDRESS((addr_t)address + size)) { 6468 // weird error code required by POSIX 6469 return ENOMEM; 6470 } 6471 6472 // extend and check protection 6473 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 6474 uint32 actualProtection = protection | B_KERNEL_READ_AREA 6475 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6476 6477 if (!arch_vm_supports_protection(actualProtection)) 6478 return B_NOT_SUPPORTED; 6479 6480 // We need to write lock the address space, since we're going to play with 6481 // the areas. 6482 AddressSpaceWriteLocker locker; 6483 status_t status = locker.SetTo(team_get_current_team_id()); 6484 if (status != B_OK) 6485 return status; 6486 6487 // First round: Check whether the whole range is covered by areas and we are 6488 // allowed to modify them. 6489 addr_t currentAddress = address; 6490 size_t sizeLeft = size; 6491 while (sizeLeft > 0) { 6492 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6493 if (area == NULL) 6494 return B_NO_MEMORY; 6495 6496 if ((area->protection & B_KERNEL_AREA) != 0) 6497 return B_NOT_ALLOWED; 6498 6499 // TODO: For (shared) mapped files we should check whether the new 6500 // protections are compatible with the file permissions. We don't have 6501 // a way to do that yet, though. 6502 6503 addr_t offset = currentAddress - area->base; 6504 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6505 6506 currentAddress += rangeSize; 6507 sizeLeft -= rangeSize; 6508 } 6509 6510 // Second round: If the protections differ from that of the area, create a 6511 // page protection array and re-map mapped pages. 6512 vm_translation_map* map = &locker.AddressSpace()->translation_map; 6513 currentAddress = address; 6514 sizeLeft = size; 6515 while (sizeLeft > 0) { 6516 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6517 if (area == NULL) 6518 return B_NO_MEMORY; 6519 6520 addr_t offset = currentAddress - area->base; 6521 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6522 6523 currentAddress += rangeSize; 6524 sizeLeft -= rangeSize; 6525 6526 if (area->page_protections == NULL) { 6527 if (area->protection == actualProtection) 6528 continue; 6529 6530 // In the page protections we store only the three user protections, 6531 // so we use 4 bits per page. 6532 uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2; 6533 area->page_protections = (uint8*)malloc(bytes); 6534 if (area->page_protections == NULL) 6535 return B_NO_MEMORY; 6536 6537 // init the page protections for all pages to that of the area 6538 uint32 areaProtection = area->protection 6539 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 6540 memset(area->page_protections, 6541 areaProtection | (areaProtection << 4), bytes); 6542 } 6543 6544 for (addr_t pageAddress = area->base + offset; 6545 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6546 map->ops->lock(map); 6547 6548 set_area_page_protection(area, pageAddress, protection); 6549 6550 addr_t physicalAddress; 6551 uint32 flags; 6552 6553 status_t error = map->ops->query(map, pageAddress, &physicalAddress, 6554 &flags); 6555 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6556 map->ops->unlock(map); 6557 continue; 6558 } 6559 6560 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6561 if (page == NULL) { 6562 panic("area %p looking up page failed for pa 0x%lx\n", area, 6563 physicalAddress); 6564 map->ops->unlock(map); 6565 return B_ERROR;; 6566 } 6567 6568 // If the page is not in the topmost cache and write access is 6569 // requested, we have to unmap it. Otherwise we can re-map it with 6570 // the new protection. 6571 bool unmapPage = page->cache != area->cache 6572 && (protection & B_WRITE_AREA) != 0; 6573 6574 if (!unmapPage) { 6575 map->ops->unmap(map, pageAddress, 6576 pageAddress + B_PAGE_SIZE - 1); 6577 map->ops->map(map, pageAddress, physicalAddress, 6578 actualProtection); 6579 } 6580 6581 map->ops->unlock(map); 6582 6583 if (unmapPage) 6584 vm_unmap_page(area, pageAddress, true); 6585 } 6586 } 6587 6588 return B_OK; 6589 } 6590 6591 6592 status_t 6593 _user_sync_memory(void* _address, size_t size, int flags) 6594 { 6595 addr_t address = (addr_t)_address; 6596 size = PAGE_ALIGN(size); 6597 6598 // check params 6599 if ((address % B_PAGE_SIZE) != 0) 6600 return B_BAD_VALUE; 6601 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6602 || !IS_USER_ADDRESS((addr_t)address + size)) { 6603 // weird error code required by POSIX 6604 return ENOMEM; 6605 } 6606 6607 bool writeSync = (flags & MS_SYNC) != 0; 6608 bool writeAsync = (flags & MS_ASYNC) != 0; 6609 if (writeSync && writeAsync) 6610 return B_BAD_VALUE; 6611 6612 if (size == 0 || (!writeSync && !writeAsync)) 6613 return B_OK; 6614 6615 // iterate through the range and sync all concerned areas 6616 while (size > 0) { 6617 // read lock the address space 6618 AddressSpaceReadLocker locker; 6619 status_t error = locker.SetTo(team_get_current_team_id()); 6620 if (error != B_OK) 6621 return error; 6622 6623 // get the first area 6624 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 6625 if (area == NULL) 6626 return B_NO_MEMORY; 6627 6628 uint32 offset = address - area->base; 6629 size_t rangeSize = min_c(area->size - offset, size); 6630 offset += area->cache_offset; 6631 6632 // lock the cache 6633 AreaCacheLocker cacheLocker(area); 6634 if (!cacheLocker) 6635 return B_BAD_VALUE; 6636 vm_cache* cache = area->cache; 6637 6638 locker.Unlock(); 6639 6640 uint32 firstPage = offset >> PAGE_SHIFT; 6641 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6642 6643 // write the pages 6644 if (cache->type == CACHE_TYPE_VNODE) { 6645 if (writeSync) { 6646 // synchronous 6647 error = vm_page_write_modified_page_range(cache, firstPage, 6648 endPage); 6649 if (error != B_OK) 6650 return error; 6651 } else { 6652 // asynchronous 6653 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6654 // TODO: This is probably not quite what is supposed to happen. 6655 // Especially when a lot has to be written, it might take ages 6656 // until it really hits the disk. 6657 } 6658 } 6659 6660 address += rangeSize; 6661 size -= rangeSize; 6662 } 6663 6664 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6665 // synchronize multiple mappings of the same file. In our VM they never get 6666 // out of sync, though, so we don't have to do anything. 6667 6668 return B_OK; 6669 } 6670 6671 6672 status_t 6673 _user_memory_advice(void* address, size_t size, int advice) 6674 { 6675 // TODO: Implement! 6676 return B_OK; 6677 } 6678