1 /* 2 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 10 #include <vm.h> 11 12 #include <ctype.h> 13 #include <stdlib.h> 14 #include <stdio.h> 15 #include <string.h> 16 #include <sys/mman.h> 17 18 #include <OS.h> 19 #include <KernelExport.h> 20 21 #include <AutoDeleter.h> 22 23 #include <arch/cpu.h> 24 #include <arch/vm.h> 25 #include <boot/elf.h> 26 #include <boot/stage2.h> 27 #include <condition_variable.h> 28 #include <console.h> 29 #include <debug.h> 30 #include <file_cache.h> 31 #include <fs/fd.h> 32 #include <heap.h> 33 #include <int.h> 34 #include <lock.h> 35 #include <low_resource_manager.h> 36 #include <smp.h> 37 #include <system_info.h> 38 #include <thread.h> 39 #include <team.h> 40 #include <tracing.h> 41 #include <util/AutoLock.h> 42 #include <util/khash.h> 43 #include <vm_address_space.h> 44 #include <vm_cache.h> 45 #include <vm_page.h> 46 #include <vm_priv.h> 47 48 #include "VMAnonymousCache.h" 49 #include "IORequest.h" 50 51 52 //#define TRACE_VM 53 //#define TRACE_FAULTS 54 #ifdef TRACE_VM 55 # define TRACE(x) dprintf x 56 #else 57 # define TRACE(x) ; 58 #endif 59 #ifdef TRACE_FAULTS 60 # define FTRACE(x) dprintf x 61 #else 62 # define FTRACE(x) ; 63 #endif 64 65 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1)) 66 #define ROUNDOWN(a, b) (((a) / (b)) * (b)) 67 68 69 class AddressSpaceReadLocker { 70 public: 71 AddressSpaceReadLocker(team_id team); 72 AddressSpaceReadLocker(vm_address_space* space, bool getNewReference); 73 AddressSpaceReadLocker(); 74 ~AddressSpaceReadLocker(); 75 76 status_t SetTo(team_id team); 77 void SetTo(vm_address_space* space, bool getNewReference); 78 status_t SetFromArea(area_id areaID, vm_area*& area); 79 80 bool IsLocked() const { return fLocked; } 81 void Unlock(); 82 83 void Unset(); 84 85 vm_address_space* AddressSpace() { return fSpace; } 86 87 private: 88 vm_address_space* fSpace; 89 bool fLocked; 90 }; 91 92 class AddressSpaceWriteLocker { 93 public: 94 AddressSpaceWriteLocker(team_id team); 95 AddressSpaceWriteLocker(); 96 ~AddressSpaceWriteLocker(); 97 98 status_t SetTo(team_id team); 99 status_t SetFromArea(area_id areaID, vm_area*& area); 100 status_t SetFromArea(team_id team, area_id areaID, bool allowKernel, 101 vm_area*& area); 102 status_t SetFromArea(team_id team, area_id areaID, vm_area*& area); 103 104 bool IsLocked() const { return fLocked; } 105 void Unlock(); 106 107 void DegradeToReadLock(); 108 void Unset(); 109 110 vm_address_space* AddressSpace() { return fSpace; } 111 112 private: 113 vm_address_space* fSpace; 114 bool fLocked; 115 bool fDegraded; 116 }; 117 118 class MultiAddressSpaceLocker { 119 public: 120 MultiAddressSpaceLocker(); 121 ~MultiAddressSpaceLocker(); 122 123 inline status_t AddTeam(team_id team, bool writeLock, 124 vm_address_space** _space = NULL); 125 inline status_t AddArea(area_id area, bool writeLock, 126 vm_address_space** _space = NULL); 127 128 status_t AddAreaCacheAndLock(area_id areaID, bool writeLockThisOne, 129 bool writeLockOthers, vm_area*& _area, vm_cache** _cache = NULL, 130 bool checkNoCacheChange = false); 131 132 status_t Lock(); 133 void Unlock(); 134 bool IsLocked() const { return fLocked; } 135 136 void Unset(); 137 138 private: 139 struct lock_item { 140 vm_address_space* space; 141 bool write_lock; 142 }; 143 144 bool _ResizeIfNeeded(); 145 int32 _IndexOfAddressSpace(vm_address_space* space) const; 146 status_t _AddAddressSpace(vm_address_space* space, bool writeLock, 147 vm_address_space** _space); 148 149 static int _CompareItems(const void* _a, const void* _b); 150 151 lock_item* fItems; 152 int32 fCapacity; 153 int32 fCount; 154 bool fLocked; 155 }; 156 157 158 class AreaCacheLocking { 159 public: 160 inline bool Lock(vm_cache* lockable) 161 { 162 return false; 163 } 164 165 inline void Unlock(vm_cache* lockable) 166 { 167 vm_area_put_locked_cache(lockable); 168 } 169 }; 170 171 class AreaCacheLocker : public AutoLocker<vm_cache, AreaCacheLocking> { 172 public: 173 inline AreaCacheLocker(vm_cache* cache = NULL) 174 : AutoLocker<vm_cache, AreaCacheLocking>(cache, true) 175 { 176 } 177 178 inline AreaCacheLocker(vm_area* area) 179 : AutoLocker<vm_cache, AreaCacheLocking>() 180 { 181 SetTo(area); 182 } 183 184 inline void SetTo(vm_area* area) 185 { 186 return AutoLocker<vm_cache, AreaCacheLocking>::SetTo( 187 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 188 } 189 }; 190 191 192 #define AREA_HASH_TABLE_SIZE 1024 193 static area_id sNextAreaID = 1; 194 static hash_table* sAreaHash; 195 static rw_lock sAreaHashLock = RW_LOCK_INITIALIZER("area hash"); 196 static mutex sMappingLock = MUTEX_INITIALIZER("page mappings"); 197 static mutex sAreaCacheLock = MUTEX_INITIALIZER("area->cache"); 198 199 static off_t sAvailableMemory; 200 static off_t sNeededMemory; 201 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 202 static uint32 sPageFaults; 203 204 #if DEBUG_CACHE_LIST 205 206 struct cache_info { 207 vm_cache* cache; 208 addr_t page_count; 209 addr_t committed; 210 }; 211 212 static const int kCacheInfoTableCount = 100 * 1024; 213 static cache_info* sCacheInfoTable; 214 215 #endif // DEBUG_CACHE_LIST 216 217 218 // function declarations 219 static void delete_area(vm_address_space* addressSpace, vm_area* area); 220 static vm_address_space* get_address_space_by_area_id(area_id id); 221 static status_t vm_soft_fault(vm_address_space* addressSpace, addr_t address, 222 bool isWrite, bool isUser); 223 static status_t map_backing_store(vm_address_space* addressSpace, 224 vm_cache* cache, void** _virtualAddress, off_t offset, addr_t size, 225 uint32 addressSpec, int wiring, int protection, int mapping, 226 vm_area** _area, const char* areaName, bool unmapAddressRange, bool kernel); 227 228 229 // #pragma mark - 230 231 232 AddressSpaceReadLocker::AddressSpaceReadLocker(team_id team) 233 : 234 fSpace(NULL), 235 fLocked(false) 236 { 237 SetTo(team); 238 } 239 240 241 /*! Takes over the reference of the address space, if \a getNewReference is 242 \c false. 243 */ 244 AddressSpaceReadLocker::AddressSpaceReadLocker(vm_address_space* space, 245 bool getNewReference) 246 : 247 fSpace(NULL), 248 fLocked(false) 249 { 250 SetTo(space, getNewReference); 251 } 252 253 254 AddressSpaceReadLocker::AddressSpaceReadLocker() 255 : 256 fSpace(NULL), 257 fLocked(false) 258 { 259 } 260 261 262 AddressSpaceReadLocker::~AddressSpaceReadLocker() 263 { 264 Unset(); 265 } 266 267 268 void 269 AddressSpaceReadLocker::Unset() 270 { 271 Unlock(); 272 if (fSpace != NULL) 273 vm_put_address_space(fSpace); 274 } 275 276 277 status_t 278 AddressSpaceReadLocker::SetTo(team_id team) 279 { 280 fSpace = vm_get_address_space(team); 281 if (fSpace == NULL) 282 return B_BAD_TEAM_ID; 283 284 rw_lock_read_lock(&fSpace->lock); 285 fLocked = true; 286 return B_OK; 287 } 288 289 290 /*! Takes over the reference of the address space, if \a getNewReference is 291 \c false. 292 */ 293 void 294 AddressSpaceReadLocker::SetTo(vm_address_space* space, bool getNewReference) 295 { 296 fSpace = space; 297 298 if (getNewReference) 299 atomic_add(&fSpace->ref_count, 1); 300 301 rw_lock_read_lock(&fSpace->lock); 302 fLocked = true; 303 } 304 305 306 status_t 307 AddressSpaceReadLocker::SetFromArea(area_id areaID, vm_area*& area) 308 { 309 fSpace = get_address_space_by_area_id(areaID); 310 if (fSpace == NULL) 311 return B_BAD_TEAM_ID; 312 313 rw_lock_read_lock(&fSpace->lock); 314 315 rw_lock_read_lock(&sAreaHashLock); 316 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 317 rw_lock_read_unlock(&sAreaHashLock); 318 319 if (area == NULL || area->address_space != fSpace) { 320 rw_lock_read_unlock(&fSpace->lock); 321 return B_BAD_VALUE; 322 } 323 324 fLocked = true; 325 return B_OK; 326 } 327 328 329 void 330 AddressSpaceReadLocker::Unlock() 331 { 332 if (fLocked) { 333 rw_lock_read_unlock(&fSpace->lock); 334 fLocked = false; 335 } 336 } 337 338 339 // #pragma mark - 340 341 342 AddressSpaceWriteLocker::AddressSpaceWriteLocker(team_id team) 343 : 344 fSpace(NULL), 345 fLocked(false), 346 fDegraded(false) 347 { 348 SetTo(team); 349 } 350 351 352 AddressSpaceWriteLocker::AddressSpaceWriteLocker() 353 : 354 fSpace(NULL), 355 fLocked(false), 356 fDegraded(false) 357 { 358 } 359 360 361 AddressSpaceWriteLocker::~AddressSpaceWriteLocker() 362 { 363 Unset(); 364 } 365 366 367 void 368 AddressSpaceWriteLocker::Unset() 369 { 370 Unlock(); 371 if (fSpace != NULL) 372 vm_put_address_space(fSpace); 373 } 374 375 376 status_t 377 AddressSpaceWriteLocker::SetTo(team_id team) 378 { 379 fSpace = vm_get_address_space(team); 380 if (fSpace == NULL) 381 return B_BAD_TEAM_ID; 382 383 rw_lock_write_lock(&fSpace->lock); 384 fLocked = true; 385 return B_OK; 386 } 387 388 389 status_t 390 AddressSpaceWriteLocker::SetFromArea(area_id areaID, vm_area*& area) 391 { 392 fSpace = get_address_space_by_area_id(areaID); 393 if (fSpace == NULL) 394 return B_BAD_VALUE; 395 396 rw_lock_write_lock(&fSpace->lock); 397 398 rw_lock_read_lock(&sAreaHashLock); 399 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 400 rw_lock_read_unlock(&sAreaHashLock); 401 402 if (area == NULL || area->address_space != fSpace) { 403 rw_lock_write_unlock(&fSpace->lock); 404 return B_BAD_VALUE; 405 } 406 407 fLocked = true; 408 return B_OK; 409 } 410 411 412 status_t 413 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 414 bool allowKernel, vm_area*& area) 415 { 416 rw_lock_read_lock(&sAreaHashLock); 417 418 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 419 if (area != NULL 420 && (area->address_space->id == team 421 || (allowKernel && team == vm_kernel_address_space_id()))) { 422 fSpace = area->address_space; 423 atomic_add(&fSpace->ref_count, 1); 424 } 425 426 rw_lock_read_unlock(&sAreaHashLock); 427 428 if (fSpace == NULL) 429 return B_BAD_VALUE; 430 431 // Second try to get the area -- this time with the address space 432 // write lock held 433 434 rw_lock_write_lock(&fSpace->lock); 435 436 rw_lock_read_lock(&sAreaHashLock); 437 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 438 rw_lock_read_unlock(&sAreaHashLock); 439 440 if (area == NULL) { 441 rw_lock_write_unlock(&fSpace->lock); 442 return B_BAD_VALUE; 443 } 444 445 fLocked = true; 446 return B_OK; 447 } 448 449 450 status_t 451 AddressSpaceWriteLocker::SetFromArea(team_id team, area_id areaID, 452 vm_area*& area) 453 { 454 return SetFromArea(team, areaID, false, area); 455 } 456 457 458 void 459 AddressSpaceWriteLocker::Unlock() 460 { 461 if (fLocked) { 462 if (fDegraded) 463 rw_lock_read_unlock(&fSpace->lock); 464 else 465 rw_lock_write_unlock(&fSpace->lock); 466 fLocked = false; 467 fDegraded = false; 468 } 469 } 470 471 472 void 473 AddressSpaceWriteLocker::DegradeToReadLock() 474 { 475 // TODO: the current R/W lock implementation just keeps the write lock here 476 rw_lock_read_lock(&fSpace->lock); 477 rw_lock_write_unlock(&fSpace->lock); 478 fDegraded = true; 479 } 480 481 482 // #pragma mark - 483 484 485 MultiAddressSpaceLocker::MultiAddressSpaceLocker() 486 : 487 fItems(NULL), 488 fCapacity(0), 489 fCount(0), 490 fLocked(false) 491 { 492 } 493 494 495 MultiAddressSpaceLocker::~MultiAddressSpaceLocker() 496 { 497 Unset(); 498 free(fItems); 499 } 500 501 502 /*static*/ int 503 MultiAddressSpaceLocker::_CompareItems(const void* _a, const void* _b) 504 { 505 lock_item* a = (lock_item*)_a; 506 lock_item* b = (lock_item*)_b; 507 return a->space->id - b->space->id; 508 } 509 510 511 bool 512 MultiAddressSpaceLocker::_ResizeIfNeeded() 513 { 514 if (fCount == fCapacity) { 515 lock_item* items = (lock_item*)realloc(fItems, 516 (fCapacity + 4) * sizeof(lock_item)); 517 if (items == NULL) 518 return false; 519 520 fCapacity += 4; 521 fItems = items; 522 } 523 524 return true; 525 } 526 527 528 int32 529 MultiAddressSpaceLocker::_IndexOfAddressSpace(vm_address_space* space) const 530 { 531 for (int32 i = 0; i < fCount; i++) { 532 if (fItems[i].space == space) 533 return i; 534 } 535 536 return -1; 537 } 538 539 540 status_t 541 MultiAddressSpaceLocker::_AddAddressSpace(vm_address_space* space, 542 bool writeLock, vm_address_space** _space) 543 { 544 if (!space) 545 return B_BAD_VALUE; 546 547 int32 index = _IndexOfAddressSpace(space); 548 if (index < 0) { 549 if (!_ResizeIfNeeded()) { 550 vm_put_address_space(space); 551 return B_NO_MEMORY; 552 } 553 554 lock_item& item = fItems[fCount++]; 555 item.space = space; 556 item.write_lock = writeLock; 557 } else { 558 559 // one reference is enough 560 vm_put_address_space(space); 561 562 fItems[index].write_lock |= writeLock; 563 } 564 565 if (_space != NULL) 566 *_space = space; 567 568 return B_OK; 569 } 570 571 572 inline status_t 573 MultiAddressSpaceLocker::AddTeam(team_id team, bool writeLock, 574 vm_address_space** _space) 575 { 576 return _AddAddressSpace(vm_get_address_space(team), writeLock, 577 _space); 578 } 579 580 581 inline status_t 582 MultiAddressSpaceLocker::AddArea(area_id area, bool writeLock, 583 vm_address_space** _space) 584 { 585 return _AddAddressSpace(get_address_space_by_area_id(area), writeLock, 586 _space); 587 } 588 589 590 void 591 MultiAddressSpaceLocker::Unset() 592 { 593 Unlock(); 594 595 for (int32 i = 0; i < fCount; i++) 596 vm_put_address_space(fItems[i].space); 597 598 fCount = 0; 599 } 600 601 602 status_t 603 MultiAddressSpaceLocker::Lock() 604 { 605 ASSERT(!fLocked); 606 607 qsort(fItems, fCount, sizeof(lock_item), &_CompareItems); 608 609 for (int32 i = 0; i < fCount; i++) { 610 status_t status; 611 if (fItems[i].write_lock) 612 status = rw_lock_write_lock(&fItems[i].space->lock); 613 else 614 status = rw_lock_read_lock(&fItems[i].space->lock); 615 616 if (status < B_OK) { 617 while (--i >= 0) { 618 if (fItems[i].write_lock) 619 rw_lock_write_unlock(&fItems[i].space->lock); 620 else 621 rw_lock_read_unlock(&fItems[i].space->lock); 622 } 623 return status; 624 } 625 } 626 627 fLocked = true; 628 return B_OK; 629 } 630 631 632 void 633 MultiAddressSpaceLocker::Unlock() 634 { 635 if (!fLocked) 636 return; 637 638 for (int32 i = 0; i < fCount; i++) { 639 if (fItems[i].write_lock) 640 rw_lock_write_unlock(&fItems[i].space->lock); 641 else 642 rw_lock_read_unlock(&fItems[i].space->lock); 643 } 644 645 fLocked = false; 646 } 647 648 649 /*! Adds all address spaces of the areas associated with the given area's cache, 650 locks them, and locks the cache (including a reference to it). It retries 651 until the situation is stable (i.e. the neither cache nor cache's areas 652 changed) or an error occurs. If \c checkNoCacheChange ist \c true it does 653 not return until all areas' \c no_cache_change flags is clear. 654 */ 655 status_t 656 MultiAddressSpaceLocker::AddAreaCacheAndLock(area_id areaID, 657 bool writeLockThisOne, bool writeLockOthers, vm_area*& _area, 658 vm_cache** _cache, bool checkNoCacheChange) 659 { 660 // remember the original state 661 int originalCount = fCount; 662 lock_item* originalItems = NULL; 663 if (fCount > 0) { 664 originalItems = new(nothrow) lock_item[fCount]; 665 if (originalItems == NULL) 666 return B_NO_MEMORY; 667 memcpy(originalItems, fItems, fCount * sizeof(lock_item)); 668 } 669 ArrayDeleter<lock_item> _(originalItems); 670 671 // get the cache 672 vm_cache* cache; 673 vm_area* area; 674 status_t error; 675 { 676 AddressSpaceReadLocker locker; 677 error = locker.SetFromArea(areaID, area); 678 if (error != B_OK) 679 return error; 680 681 cache = vm_area_get_locked_cache(area); 682 } 683 684 while (true) { 685 // add all areas 686 vm_area* firstArea = cache->areas; 687 for (vm_area* current = firstArea; current; 688 current = current->cache_next) { 689 error = AddArea(current->id, 690 current == area ? writeLockThisOne : writeLockOthers); 691 if (error != B_OK) { 692 vm_area_put_locked_cache(cache); 693 return error; 694 } 695 } 696 697 // unlock the cache and attempt to lock the address spaces 698 vm_area_put_locked_cache(cache); 699 700 error = Lock(); 701 if (error != B_OK) 702 return error; 703 704 // lock the cache again and check whether anything has changed 705 706 // check whether the area is gone in the meantime 707 rw_lock_read_lock(&sAreaHashLock); 708 area = (vm_area*)hash_lookup(sAreaHash, &areaID); 709 rw_lock_read_unlock(&sAreaHashLock); 710 711 if (area == NULL) { 712 Unlock(); 713 return B_BAD_VALUE; 714 } 715 716 // lock the cache 717 vm_cache* oldCache = cache; 718 cache = vm_area_get_locked_cache(area); 719 720 // If neither the area's cache has changed nor its area list we're 721 // done... 722 bool done = (cache == oldCache || firstArea == cache->areas); 723 724 // ... unless we're supposed to check the areas' "no_cache_change" flag 725 bool yield = false; 726 if (done && checkNoCacheChange) { 727 for (vm_area* tempArea = cache->areas; tempArea != NULL; 728 tempArea = tempArea->cache_next) { 729 if (tempArea->no_cache_change) { 730 done = false; 731 yield = true; 732 break; 733 } 734 } 735 } 736 737 // If everything looks dandy, return the values. 738 if (done) { 739 _area = area; 740 if (_cache != NULL) 741 *_cache = cache; 742 return B_OK; 743 } 744 745 // Restore the original state and try again. 746 747 // Unlock the address spaces, but keep the cache locked for the next 748 // iteration. 749 Unlock(); 750 751 // Get an additional reference to the original address spaces. 752 for (int32 i = 0; i < originalCount; i++) 753 atomic_add(&originalItems[i].space->ref_count, 1); 754 755 // Release all references to the current address spaces. 756 for (int32 i = 0; i < fCount; i++) 757 vm_put_address_space(fItems[i].space); 758 759 // Copy over the original state. 760 fCount = originalCount; 761 if (originalItems != NULL) 762 memcpy(fItems, originalItems, fCount * sizeof(lock_item)); 763 764 if (yield) 765 thread_yield(true); 766 } 767 } 768 769 770 // #pragma mark - 771 772 773 #if VM_PAGE_FAULT_TRACING 774 775 namespace VMPageFaultTracing { 776 777 class PageFaultStart : public AbstractTraceEntry { 778 public: 779 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 780 : 781 fAddress(address), 782 fPC(pc), 783 fWrite(write), 784 fUser(user) 785 { 786 Initialized(); 787 } 788 789 virtual void AddDump(TraceOutput& out) 790 { 791 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 792 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 793 } 794 795 private: 796 addr_t fAddress; 797 addr_t fPC; 798 bool fWrite; 799 bool fUser; 800 }; 801 802 803 // page fault errors 804 enum { 805 PAGE_FAULT_ERROR_NO_AREA = 0, 806 PAGE_FAULT_ERROR_KERNEL_ONLY, 807 PAGE_FAULT_ERROR_READ_ONLY, 808 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 809 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 810 }; 811 812 813 class PageFaultError : public AbstractTraceEntry { 814 public: 815 PageFaultError(area_id area, status_t error) 816 : 817 fArea(area), 818 fError(error) 819 { 820 Initialized(); 821 } 822 823 virtual void AddDump(TraceOutput& out) 824 { 825 switch (fError) { 826 case PAGE_FAULT_ERROR_NO_AREA: 827 out.Print("page fault error: no area"); 828 break; 829 case PAGE_FAULT_ERROR_KERNEL_ONLY: 830 out.Print("page fault error: area: %ld, kernel only", fArea); 831 break; 832 case PAGE_FAULT_ERROR_READ_ONLY: 833 out.Print("page fault error: area: %ld, read only", fArea); 834 break; 835 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 836 out.Print("page fault error: kernel touching bad user memory"); 837 break; 838 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 839 out.Print("page fault error: no address space"); 840 break; 841 default: 842 out.Print("page fault error: area: %ld, error: %s", fArea, 843 strerror(fError)); 844 break; 845 } 846 } 847 848 private: 849 area_id fArea; 850 status_t fError; 851 }; 852 853 854 class PageFaultDone : public AbstractTraceEntry { 855 public: 856 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 857 vm_page* page) 858 : 859 fArea(area), 860 fTopCache(topCache), 861 fCache(cache), 862 fPage(page) 863 { 864 Initialized(); 865 } 866 867 virtual void AddDump(TraceOutput& out) 868 { 869 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 870 "page: %p", fArea, fTopCache, fCache, fPage); 871 } 872 873 private: 874 area_id fArea; 875 VMCache* fTopCache; 876 VMCache* fCache; 877 vm_page* fPage; 878 }; 879 880 } // namespace VMPageFaultTracing 881 882 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 883 #else 884 # define TPF(x) ; 885 #endif // VM_PAGE_FAULT_TRACING 886 887 888 // #pragma mark - 889 890 891 static int 892 area_compare(void* _area, const void* key) 893 { 894 vm_area* area = (vm_area*)_area; 895 const area_id* id = (const area_id*)key; 896 897 if (area->id == *id) 898 return 0; 899 900 return -1; 901 } 902 903 904 static uint32 905 area_hash(void* _area, const void* key, uint32 range) 906 { 907 vm_area* area = (vm_area*)_area; 908 const area_id* id = (const area_id*)key; 909 910 if (area != NULL) 911 return area->id % range; 912 913 return (uint32)*id % range; 914 } 915 916 917 static vm_address_space* 918 get_address_space_by_area_id(area_id id) 919 { 920 vm_address_space* addressSpace = NULL; 921 922 rw_lock_read_lock(&sAreaHashLock); 923 924 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 925 if (area != NULL) { 926 addressSpace = area->address_space; 927 atomic_add(&addressSpace->ref_count, 1); 928 } 929 930 rw_lock_read_unlock(&sAreaHashLock); 931 932 return addressSpace; 933 } 934 935 936 //! You need to have the address space locked when calling this function 937 static vm_area* 938 lookup_area(vm_address_space* addressSpace, area_id id) 939 { 940 rw_lock_read_lock(&sAreaHashLock); 941 942 vm_area* area = (vm_area*)hash_lookup(sAreaHash, &id); 943 if (area != NULL && area->address_space != addressSpace) 944 area = NULL; 945 946 rw_lock_read_unlock(&sAreaHashLock); 947 948 return area; 949 } 950 951 952 static vm_area* 953 create_reserved_area_struct(vm_address_space* addressSpace, uint32 flags) 954 { 955 vm_area* reserved = (vm_area*)malloc_nogrow(sizeof(vm_area)); 956 if (reserved == NULL) 957 return NULL; 958 959 memset(reserved, 0, sizeof(vm_area)); 960 reserved->id = RESERVED_AREA_ID; 961 // this marks it as reserved space 962 reserved->protection = flags; 963 reserved->address_space = addressSpace; 964 965 return reserved; 966 } 967 968 969 static vm_area* 970 create_area_struct(vm_address_space* addressSpace, const char* name, 971 uint32 wiring, uint32 protection) 972 { 973 // restrict the area name to B_OS_NAME_LENGTH 974 size_t length = strlen(name) + 1; 975 if (length > B_OS_NAME_LENGTH) 976 length = B_OS_NAME_LENGTH; 977 978 vm_area* area = (vm_area*)malloc_nogrow(sizeof(vm_area)); 979 if (area == NULL) 980 return NULL; 981 982 area->name = (char*)malloc_nogrow(length); 983 if (area->name == NULL) { 984 free(area); 985 return NULL; 986 } 987 strlcpy(area->name, name, length); 988 989 area->id = atomic_add(&sNextAreaID, 1); 990 area->base = 0; 991 area->size = 0; 992 area->protection = protection; 993 area->wiring = wiring; 994 area->memory_type = 0; 995 996 area->cache = NULL; 997 area->no_cache_change = 0; 998 area->cache_offset = 0; 999 1000 area->address_space = addressSpace; 1001 area->address_space_next = NULL; 1002 area->cache_next = area->cache_prev = NULL; 1003 area->hash_next = NULL; 1004 new (&area->mappings) vm_area_mappings; 1005 area->page_protections = NULL; 1006 1007 return area; 1008 } 1009 1010 1011 /*! Finds a reserved area that covers the region spanned by \a start and 1012 \a size, inserts the \a area into that region and makes sure that 1013 there are reserved regions for the remaining parts. 1014 */ 1015 static status_t 1016 find_reserved_area(vm_address_space* addressSpace, addr_t start, 1017 addr_t size, vm_area* area) 1018 { 1019 vm_area* last = NULL; 1020 vm_area* next; 1021 1022 next = addressSpace->areas; 1023 while (next) { 1024 if (next->base <= start && next->base + next->size >= start + size) { 1025 // this area covers the requested range 1026 if (next->id != RESERVED_AREA_ID) { 1027 // but it's not reserved space, it's a real area 1028 return B_BAD_VALUE; 1029 } 1030 1031 break; 1032 } 1033 last = next; 1034 next = next->address_space_next; 1035 } 1036 if (next == NULL) 1037 return B_ENTRY_NOT_FOUND; 1038 1039 // now we have to transfer the requested part of the reserved 1040 // range to the new area - and remove, resize or split the old 1041 // reserved area. 1042 1043 if (start == next->base) { 1044 // the area starts at the beginning of the reserved range 1045 if (last) 1046 last->address_space_next = area; 1047 else 1048 addressSpace->areas = area; 1049 1050 if (size == next->size) { 1051 // the new area fully covers the reversed range 1052 area->address_space_next = next->address_space_next; 1053 vm_put_address_space(addressSpace); 1054 free(next); 1055 } else { 1056 // resize the reserved range behind the area 1057 area->address_space_next = next; 1058 next->base += size; 1059 next->size -= size; 1060 } 1061 } else if (start + size == next->base + next->size) { 1062 // the area is at the end of the reserved range 1063 area->address_space_next = next->address_space_next; 1064 next->address_space_next = area; 1065 1066 // resize the reserved range before the area 1067 next->size = start - next->base; 1068 } else { 1069 // the area splits the reserved range into two separate ones 1070 // we need a new reserved area to cover this space 1071 vm_area* reserved = create_reserved_area_struct(addressSpace, 1072 next->protection); 1073 if (reserved == NULL) 1074 return B_NO_MEMORY; 1075 1076 atomic_add(&addressSpace->ref_count, 1); 1077 reserved->address_space_next = next->address_space_next; 1078 area->address_space_next = reserved; 1079 next->address_space_next = area; 1080 1081 // resize regions 1082 reserved->size = next->base + next->size - start - size; 1083 next->size = start - next->base; 1084 reserved->base = start + size; 1085 reserved->cache_offset = next->cache_offset; 1086 } 1087 1088 area->base = start; 1089 area->size = size; 1090 addressSpace->change_count++; 1091 1092 return B_OK; 1093 } 1094 1095 1096 /*! Must be called with this address space's sem held */ 1097 static status_t 1098 find_and_insert_area_slot(vm_address_space* addressSpace, addr_t start, 1099 addr_t size, addr_t end, uint32 addressSpec, vm_area* area) 1100 { 1101 vm_area* last = NULL; 1102 vm_area* next; 1103 bool foundSpot = false; 1104 1105 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 1106 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 1107 size, end, addressSpec, area)); 1108 1109 // do some sanity checking 1110 if (start < addressSpace->base || size == 0 1111 || (end - 1) > (addressSpace->base + (addressSpace->size - 1)) 1112 || start + size > end) 1113 return B_BAD_ADDRESS; 1114 1115 if (addressSpec == B_EXACT_ADDRESS) { 1116 // search for a reserved area 1117 status_t status = find_reserved_area(addressSpace, start, size, area); 1118 if (status == B_OK || status == B_BAD_VALUE) 1119 return status; 1120 1121 // There was no reserved area, and the slot doesn't seem to be used 1122 // already 1123 // TODO: this could be further optimized. 1124 } 1125 1126 size_t alignment = B_PAGE_SIZE; 1127 if (addressSpec == B_ANY_KERNEL_BLOCK_ADDRESS) { 1128 // align the memory to the next power of two of the size 1129 while (alignment < size) 1130 alignment <<= 1; 1131 } 1132 1133 start = ROUNDUP(start, alignment); 1134 1135 // walk up to the spot where we should start searching 1136 second_chance: 1137 next = addressSpace->areas; 1138 while (next) { 1139 if (next->base >= start + size) { 1140 // we have a winner 1141 break; 1142 } 1143 last = next; 1144 next = next->address_space_next; 1145 } 1146 1147 // find the right spot depending on the address specification - the area 1148 // will be inserted directly after "last" ("next" is not referenced anymore) 1149 1150 switch (addressSpec) { 1151 case B_ANY_ADDRESS: 1152 case B_ANY_KERNEL_ADDRESS: 1153 case B_ANY_KERNEL_BLOCK_ADDRESS: 1154 // find a hole big enough for a new area 1155 if (!last) { 1156 // see if we can build it at the beginning of the virtual map 1157 if (!next || (next->base >= ROUNDUP(addressSpace->base, 1158 alignment) + size)) { 1159 foundSpot = true; 1160 area->base = ROUNDUP(addressSpace->base, alignment); 1161 break; 1162 } 1163 last = next; 1164 next = next->address_space_next; 1165 } 1166 // keep walking 1167 while (next) { 1168 if (next->base >= ROUNDUP(last->base + last->size, alignment) 1169 + size) { 1170 // we found a spot (it'll be filled up below) 1171 break; 1172 } 1173 last = next; 1174 next = next->address_space_next; 1175 } 1176 1177 if ((addressSpace->base + (addressSpace->size - 1)) >= (ROUNDUP( 1178 last->base + last->size, alignment) + (size - 1))) { 1179 // got a spot 1180 foundSpot = true; 1181 area->base = ROUNDUP(last->base + last->size, alignment); 1182 break; 1183 } else { 1184 // We didn't find a free spot - if there were any reserved areas 1185 // with the RESERVED_AVOID_BASE flag set, we can now test those 1186 // for free space 1187 // TODO: it would make sense to start with the biggest of them 1188 next = addressSpace->areas; 1189 last = NULL; 1190 for (last = NULL; next; next = next->address_space_next) { 1191 if (next->id != RESERVED_AREA_ID) { 1192 last = next; 1193 continue; 1194 } 1195 1196 // TODO: take free space after the reserved area into 1197 // account! 1198 if (next->base == ROUNDUP(next->base, alignment) 1199 && next->size == size) { 1200 // The reserved area is entirely covered, and thus, 1201 // removed 1202 if (last) 1203 last->address_space_next = next->address_space_next; 1204 else 1205 addressSpace->areas = next->address_space_next; 1206 1207 foundSpot = true; 1208 area->base = next->base; 1209 free(next); 1210 break; 1211 } 1212 if (next->size - (ROUNDUP(next->base, alignment) 1213 - next->base) >= size) { 1214 // The new area will be placed at the end of the 1215 // reserved area, and the reserved area will be resized 1216 // to make space 1217 foundSpot = true; 1218 next->size -= size; 1219 last = next; 1220 area->base = next->base + next->size; 1221 break; 1222 } 1223 1224 last = next; 1225 } 1226 } 1227 break; 1228 1229 case B_BASE_ADDRESS: 1230 // find a hole big enough for a new area beginning with "start" 1231 if (!last) { 1232 // see if we can build it at the beginning of the specified start 1233 if (!next || (next->base >= start + size)) { 1234 foundSpot = true; 1235 area->base = start; 1236 break; 1237 } 1238 last = next; 1239 next = next->address_space_next; 1240 } 1241 // keep walking 1242 while (next) { 1243 if (next->base >= last->base + last->size + size) { 1244 // we found a spot (it'll be filled up below) 1245 break; 1246 } 1247 last = next; 1248 next = next->address_space_next; 1249 } 1250 1251 if ((addressSpace->base + (addressSpace->size - 1)) 1252 >= (last->base + last->size + (size - 1))) { 1253 // got a spot 1254 foundSpot = true; 1255 if (last->base + last->size <= start) 1256 area->base = start; 1257 else 1258 area->base = last->base + last->size; 1259 break; 1260 } 1261 // we didn't find a free spot in the requested range, so we'll 1262 // try again without any restrictions 1263 start = addressSpace->base; 1264 addressSpec = B_ANY_ADDRESS; 1265 last = NULL; 1266 goto second_chance; 1267 1268 case B_EXACT_ADDRESS: 1269 // see if we can create it exactly here 1270 if (!last) { 1271 if (!next || (next->base >= start + size)) { 1272 foundSpot = true; 1273 area->base = start; 1274 break; 1275 } 1276 } else { 1277 if (next) { 1278 if (last->base + last->size <= start 1279 && next->base >= start + size) { 1280 foundSpot = true; 1281 area->base = start; 1282 break; 1283 } 1284 } else { 1285 if ((last->base + (last->size - 1)) <= start - 1) { 1286 foundSpot = true; 1287 area->base = start; 1288 } 1289 } 1290 } 1291 break; 1292 default: 1293 return B_BAD_VALUE; 1294 } 1295 1296 if (!foundSpot) 1297 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 1298 1299 area->size = size; 1300 if (last) { 1301 area->address_space_next = last->address_space_next; 1302 last->address_space_next = area; 1303 } else { 1304 area->address_space_next = addressSpace->areas; 1305 addressSpace->areas = area; 1306 } 1307 addressSpace->change_count++; 1308 return B_OK; 1309 } 1310 1311 1312 /*! This inserts the area you pass into the specified address space. 1313 It will also set the "_address" argument to its base address when 1314 the call succeeds. 1315 You need to hold the vm_address_space semaphore. 1316 */ 1317 static status_t 1318 insert_area(vm_address_space* addressSpace, void** _address, 1319 uint32 addressSpec, addr_t size, vm_area* area) 1320 { 1321 addr_t searchBase, searchEnd; 1322 status_t status; 1323 1324 switch (addressSpec) { 1325 case B_EXACT_ADDRESS: 1326 searchBase = (addr_t)*_address; 1327 searchEnd = (addr_t)*_address + size; 1328 break; 1329 1330 case B_BASE_ADDRESS: 1331 searchBase = (addr_t)*_address; 1332 searchEnd = addressSpace->base + (addressSpace->size - 1); 1333 break; 1334 1335 case B_ANY_ADDRESS: 1336 case B_ANY_KERNEL_ADDRESS: 1337 case B_ANY_KERNEL_BLOCK_ADDRESS: 1338 searchBase = addressSpace->base; 1339 // TODO: remove this again when vm86 mode is moved into the kernel 1340 // completely (currently needs a userland address space!) 1341 if (searchBase == USER_BASE) 1342 searchBase = USER_BASE_ANY; 1343 searchEnd = addressSpace->base + (addressSpace->size - 1); 1344 break; 1345 1346 default: 1347 return B_BAD_VALUE; 1348 } 1349 1350 status = find_and_insert_area_slot(addressSpace, searchBase, size, 1351 searchEnd, addressSpec, area); 1352 if (status == B_OK) { 1353 // TODO: do we have to do anything about B_ANY_KERNEL_ADDRESS 1354 // vs. B_ANY_KERNEL_BLOCK_ADDRESS here? 1355 *_address = (void*)area->base; 1356 } 1357 1358 return status; 1359 } 1360 1361 1362 static inline void 1363 set_area_page_protection(vm_area* area, addr_t pageAddress, uint32 protection) 1364 { 1365 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 1366 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1367 uint8& entry = area->page_protections[pageIndex / 2]; 1368 if (pageIndex % 2 == 0) 1369 entry = (entry & 0xf0) | protection; 1370 else 1371 entry = (entry & 0x0f) | (protection << 4); 1372 } 1373 1374 1375 static inline uint32 1376 get_area_page_protection(vm_area* area, addr_t pageAddress) 1377 { 1378 if (area->page_protections == NULL) 1379 return area->protection; 1380 1381 uint32 pageIndex = (pageAddress - area->base) / B_PAGE_SIZE; 1382 uint32 protection = area->page_protections[pageIndex / 2]; 1383 if (pageIndex % 2 == 0) 1384 protection &= 0x0f; 1385 else 1386 protection >>= 4; 1387 1388 return protection | B_KERNEL_READ_AREA 1389 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1390 } 1391 1392 1393 /*! Cuts a piece out of an area. If the given cut range covers the complete 1394 area, it is deleted. If it covers the beginning or the end, the area is 1395 resized accordingly. If the range covers some part in the middle of the 1396 area, it is split in two; in this case the second area is returned via 1397 \a _secondArea (the variable is left untouched in the other cases). 1398 The address space must be write locked. 1399 */ 1400 static status_t 1401 cut_area(vm_address_space* addressSpace, vm_area* area, addr_t address, 1402 addr_t lastAddress, vm_area** _secondArea, bool kernel) 1403 { 1404 // Does the cut range intersect with the area at all? 1405 addr_t areaLast = area->base + (area->size - 1); 1406 if (area->base > lastAddress || areaLast < address) 1407 return B_OK; 1408 1409 // Is the area fully covered? 1410 if (area->base >= address && areaLast <= lastAddress) { 1411 delete_area(addressSpace, area); 1412 return B_OK; 1413 } 1414 1415 AreaCacheLocker cacheLocker(area); 1416 vm_cache* cache = area->cache; 1417 1418 // Cut the end only? 1419 if (areaLast <= lastAddress) { 1420 addr_t newSize = address - area->base; 1421 1422 // unmap pages 1423 vm_unmap_pages(area, address, area->size - newSize, false); 1424 1425 // If no one else uses the area's cache, we can resize it, too. 1426 if (cache->areas == area && area->cache_next == NULL 1427 && list_is_empty(&cache->consumers)) { 1428 status_t error = cache->Resize(cache->virtual_base + newSize); 1429 if (error != B_OK) 1430 return error; 1431 } 1432 1433 area->size = newSize; 1434 1435 return B_OK; 1436 } 1437 1438 // Cut the beginning only? 1439 if (area->base >= address) { 1440 addr_t newBase = lastAddress + 1; 1441 addr_t newSize = areaLast - lastAddress; 1442 1443 // unmap pages 1444 vm_unmap_pages(area, area->base, newBase - area->base, false); 1445 1446 // TODO: If no one else uses the area's cache, we should resize it, too! 1447 1448 area->cache_offset += newBase - area->base; 1449 area->base = newBase; 1450 area->size = newSize; 1451 1452 return B_OK; 1453 } 1454 1455 // The tough part -- cut a piece out of the middle of the area. 1456 // We do that by shrinking the area to the begin section and creating a 1457 // new area for the end section. 1458 1459 addr_t firstNewSize = address - area->base; 1460 addr_t secondBase = lastAddress + 1; 1461 addr_t secondSize = areaLast - lastAddress; 1462 1463 // unmap pages 1464 vm_unmap_pages(area, address, area->size - firstNewSize, false); 1465 1466 // resize the area 1467 addr_t oldSize = area->size; 1468 area->size = firstNewSize; 1469 1470 // TODO: If no one else uses the area's cache, we might want to create a 1471 // new cache for the second area, transfer the concerned pages from the 1472 // first cache to it and resize the first cache. 1473 1474 // map the second area 1475 vm_area* secondArea; 1476 void* secondBaseAddress = (void*)secondBase; 1477 status_t error = map_backing_store(addressSpace, cache, &secondBaseAddress, 1478 area->cache_offset + (secondBase - area->base), secondSize, 1479 B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 1480 &secondArea, area->name, false, kernel); 1481 if (error != B_OK) { 1482 area->size = oldSize; 1483 return error; 1484 } 1485 1486 // We need a cache reference for the new area. 1487 cache->AcquireRefLocked(); 1488 1489 if (_secondArea != NULL) 1490 *_secondArea = secondArea; 1491 1492 return B_OK; 1493 } 1494 1495 1496 static inline void 1497 increment_page_wired_count(vm_page* page) 1498 { 1499 // TODO: needs to be atomic on all platforms! 1500 // ... but at least the check isn't. Consequently we should hold 1501 // sMappingLock, which would allows us to even avoid atomic_add() on 1502 // gMappedPagesCount. 1503 if (page->wired_count++ == 0) { 1504 if (page->mappings.IsEmpty()) 1505 atomic_add(&gMappedPagesCount, 1); 1506 } 1507 } 1508 1509 1510 static inline void 1511 decrement_page_wired_count(vm_page* page) 1512 { 1513 if (--page->wired_count == 0) { 1514 // TODO: needs to be atomic on all platforms! 1515 // See above! 1516 if (page->mappings.IsEmpty()) 1517 atomic_add(&gMappedPagesCount, -1); 1518 } 1519 } 1520 1521 1522 /*! Deletes all areas in the given address range. 1523 The address space must be write-locked. 1524 */ 1525 static status_t 1526 unmap_address_range(vm_address_space* addressSpace, addr_t address, addr_t size, 1527 bool kernel) 1528 { 1529 size = PAGE_ALIGN(size); 1530 addr_t lastAddress = address + (size - 1); 1531 1532 // Check, whether the caller is allowed to modify the concerned areas. 1533 vm_area* area; 1534 if (!kernel) { 1535 area = addressSpace->areas; 1536 while (area != NULL) { 1537 vm_area* nextArea = area->address_space_next; 1538 1539 if (area->id != RESERVED_AREA_ID) { 1540 addr_t areaLast = area->base + (area->size - 1); 1541 if (area->base < lastAddress && address < areaLast) { 1542 if ((area->protection & B_KERNEL_AREA) != 0) 1543 return B_NOT_ALLOWED; 1544 } 1545 } 1546 1547 area = nextArea; 1548 } 1549 } 1550 1551 area = addressSpace->areas; 1552 while (area != NULL) { 1553 vm_area* nextArea = area->address_space_next; 1554 1555 if (area->id != RESERVED_AREA_ID) { 1556 addr_t areaLast = area->base + (area->size - 1); 1557 if (area->base < lastAddress && address < areaLast) { 1558 status_t error = cut_area(addressSpace, area, address, 1559 lastAddress, NULL, kernel); 1560 if (error != B_OK) 1561 return error; 1562 // Failing after already messing with areas is ugly, but we 1563 // can't do anything about it. 1564 } 1565 } 1566 1567 area = nextArea; 1568 } 1569 1570 return B_OK; 1571 } 1572 1573 1574 /*! You need to hold the lock of the cache and the write lock of the address 1575 space when calling this function. 1576 Note, that in case of error your cache will be temporarily unlocked. 1577 */ 1578 static status_t 1579 map_backing_store(vm_address_space* addressSpace, vm_cache* cache, 1580 void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 1581 int wiring, int protection, int mapping, vm_area** _area, 1582 const char* areaName, bool unmapAddressRange, bool kernel) 1583 { 1584 TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, " 1585 "size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName " 1586 "'%s'\n", addressSpace, cache, *_virtualAddress, offset, size, 1587 addressSpec, wiring, protection, _area, areaName)); 1588 cache->AssertLocked(); 1589 1590 vm_area* area = create_area_struct(addressSpace, areaName, wiring, 1591 protection); 1592 if (area == NULL) 1593 return B_NO_MEMORY; 1594 1595 status_t status; 1596 1597 // if this is a private map, we need to create a new cache 1598 // to handle the private copies of pages as they are written to 1599 vm_cache* sourceCache = cache; 1600 if (mapping == REGION_PRIVATE_MAP) { 1601 vm_cache* newCache; 1602 1603 // create an anonymous cache 1604 status = VMCacheFactory::CreateAnonymousCache(newCache, 1605 (protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true); 1606 if (status != B_OK) 1607 goto err1; 1608 1609 newCache->Lock(); 1610 newCache->temporary = 1; 1611 newCache->scan_skip = cache->scan_skip; 1612 newCache->virtual_base = offset; 1613 newCache->virtual_end = offset + size; 1614 1615 cache->AddConsumer(newCache); 1616 1617 cache = newCache; 1618 } 1619 1620 status = cache->SetMinimalCommitment(size); 1621 if (status != B_OK) 1622 goto err2; 1623 1624 // check to see if this address space has entered DELETE state 1625 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 1626 // okay, someone is trying to delete this address space now, so we can't 1627 // insert the area, so back out 1628 status = B_BAD_TEAM_ID; 1629 goto err2; 1630 } 1631 1632 if (addressSpec == B_EXACT_ADDRESS && unmapAddressRange) { 1633 status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress, 1634 size, kernel); 1635 if (status != B_OK) 1636 goto err2; 1637 } 1638 1639 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 1640 if (status < B_OK) 1641 goto err2; 1642 1643 // attach the cache to the area 1644 area->cache = cache; 1645 area->cache_offset = offset; 1646 1647 // point the cache back to the area 1648 cache->InsertAreaLocked(area); 1649 if (mapping == REGION_PRIVATE_MAP) 1650 cache->Unlock(); 1651 1652 // insert the area in the global area hash table 1653 rw_lock_write_lock(&sAreaHashLock); 1654 hash_insert(sAreaHash, area); 1655 rw_lock_write_unlock(&sAreaHashLock); 1656 1657 // grab a ref to the address space (the area holds this) 1658 atomic_add(&addressSpace->ref_count, 1); 1659 1660 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1661 // cache, sourceCache, areaName, area); 1662 1663 *_area = area; 1664 return B_OK; 1665 1666 err2: 1667 if (mapping == REGION_PRIVATE_MAP) { 1668 // We created this cache, so we must delete it again. Note, that we 1669 // need to temporarily unlock the source cache or we'll otherwise 1670 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1671 sourceCache->Unlock(); 1672 cache->ReleaseRefAndUnlock(); 1673 sourceCache->Lock(); 1674 } 1675 err1: 1676 free(area->name); 1677 free(area); 1678 return status; 1679 } 1680 1681 1682 status_t 1683 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1684 { 1685 AddressSpaceWriteLocker locker(team); 1686 if (!locker.IsLocked()) 1687 return B_BAD_TEAM_ID; 1688 1689 // check to see if this address space has entered DELETE state 1690 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1691 // okay, someone is trying to delete this address space now, so we can't 1692 // insert the area, so back out 1693 return B_BAD_TEAM_ID; 1694 } 1695 1696 // search area list and remove any matching reserved ranges 1697 1698 vm_area* area = locker.AddressSpace()->areas; 1699 vm_area* last = NULL; 1700 while (area) { 1701 // the area must be completely part of the reserved range 1702 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 1703 && area->base + area->size <= (addr_t)address + size) { 1704 // remove reserved range 1705 vm_area* reserved = area; 1706 if (last) 1707 last->address_space_next = reserved->address_space_next; 1708 else 1709 locker.AddressSpace()->areas = reserved->address_space_next; 1710 1711 area = reserved->address_space_next; 1712 vm_put_address_space(locker.AddressSpace()); 1713 free(reserved); 1714 continue; 1715 } 1716 1717 last = area; 1718 area = area->address_space_next; 1719 } 1720 1721 return B_OK; 1722 } 1723 1724 1725 status_t 1726 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1727 addr_t size, uint32 flags) 1728 { 1729 if (size == 0) 1730 return B_BAD_VALUE; 1731 1732 AddressSpaceWriteLocker locker(team); 1733 if (!locker.IsLocked()) 1734 return B_BAD_TEAM_ID; 1735 1736 // check to see if this address space has entered DELETE state 1737 if (locker.AddressSpace()->state == VM_ASPACE_STATE_DELETION) { 1738 // okay, someone is trying to delete this address space now, so we 1739 // can't insert the area, let's back out 1740 return B_BAD_TEAM_ID; 1741 } 1742 1743 vm_area* area = create_reserved_area_struct(locker.AddressSpace(), flags); 1744 if (area == NULL) 1745 return B_NO_MEMORY; 1746 1747 status_t status = insert_area(locker.AddressSpace(), _address, addressSpec, 1748 size, area); 1749 if (status < B_OK) { 1750 free(area); 1751 return status; 1752 } 1753 1754 // the area is now reserved! 1755 1756 area->cache_offset = area->base; 1757 // we cache the original base address here 1758 1759 atomic_add(&locker.AddressSpace()->ref_count, 1); 1760 return B_OK; 1761 } 1762 1763 1764 area_id 1765 vm_create_anonymous_area(team_id team, const char* name, void** address, 1766 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection, 1767 uint32 flags, bool kernel) 1768 { 1769 vm_area* area; 1770 vm_cache* cache; 1771 vm_page* page = NULL; 1772 bool isStack = (protection & B_STACK_AREA) != 0; 1773 page_num_t guardPages; 1774 bool canOvercommit = false; 1775 addr_t physicalBase = 0; 1776 1777 TRACE(("create_anonymous_area [%d] %s: size 0x%lx\n", team, name, size)); 1778 1779 size = PAGE_ALIGN(size); 1780 1781 if (size == 0) 1782 return B_BAD_VALUE; 1783 if (!arch_vm_supports_protection(protection)) 1784 return B_NOT_SUPPORTED; 1785 1786 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1787 canOvercommit = true; 1788 1789 #ifdef DEBUG_KERNEL_STACKS 1790 if ((protection & B_KERNEL_STACK_AREA) != 0) 1791 isStack = true; 1792 #endif 1793 1794 // check parameters 1795 switch (addressSpec) { 1796 case B_ANY_ADDRESS: 1797 case B_EXACT_ADDRESS: 1798 case B_BASE_ADDRESS: 1799 case B_ANY_KERNEL_ADDRESS: 1800 case B_ANY_KERNEL_BLOCK_ADDRESS: 1801 break; 1802 case B_PHYSICAL_BASE_ADDRESS: 1803 physicalBase = (addr_t)*address; 1804 addressSpec = B_ANY_KERNEL_ADDRESS; 1805 break; 1806 1807 default: 1808 return B_BAD_VALUE; 1809 } 1810 1811 bool doReserveMemory = false; 1812 switch (wiring) { 1813 case B_NO_LOCK: 1814 break; 1815 case B_FULL_LOCK: 1816 case B_LAZY_LOCK: 1817 case B_CONTIGUOUS: 1818 doReserveMemory = true; 1819 break; 1820 case B_ALREADY_WIRED: 1821 break; 1822 case B_LOMEM: 1823 //case B_SLOWMEM: 1824 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 1825 wiring = B_FULL_LOCK; 1826 doReserveMemory = true; 1827 break; 1828 default: 1829 return B_BAD_VALUE; 1830 } 1831 1832 // For full lock or contiguous areas we're also going to map the pages and 1833 // thus need to reserve pages for the mapping backend upfront. 1834 addr_t reservedMapPages = 0; 1835 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1836 AddressSpaceWriteLocker locker; 1837 status_t status = locker.SetTo(team); 1838 if (status != B_OK) 1839 return status; 1840 1841 vm_translation_map* map = &locker.AddressSpace()->translation_map; 1842 reservedMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 1843 } 1844 1845 // Reserve memory before acquiring the address space lock. This reduces the 1846 // chances of failure, since while holding the write lock to the address 1847 // space (if it is the kernel address space that is), the low memory handler 1848 // won't be able to free anything for us. 1849 addr_t reservedMemory = 0; 1850 if (doReserveMemory) { 1851 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1852 if (vm_try_reserve_memory(size, timeout) != B_OK) 1853 return B_NO_MEMORY; 1854 reservedMemory = size; 1855 // TODO: We don't reserve the memory for the pages for the page 1856 // directories/tables. We actually need to do since we currently don't 1857 // reclaim them (and probably can't reclaim all of them anyway). Thus 1858 // there are actually less physical pages than there should be, which 1859 // can get the VM into trouble in low memory situations. 1860 } 1861 1862 AddressSpaceWriteLocker locker; 1863 vm_address_space* addressSpace; 1864 status_t status; 1865 1866 // For full lock areas reserve the pages before locking the address 1867 // space. E.g. block caches can't release their memory while we hold the 1868 // address space lock. 1869 page_num_t reservedPages = reservedMapPages; 1870 if (wiring == B_FULL_LOCK) 1871 reservedPages += size / B_PAGE_SIZE; 1872 if (reservedPages > 0) { 1873 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1874 if (!vm_page_try_reserve_pages(reservedPages)) { 1875 reservedPages = 0; 1876 status = B_WOULD_BLOCK; 1877 goto err0; 1878 } 1879 } else 1880 vm_page_reserve_pages(reservedPages); 1881 } 1882 1883 status = locker.SetTo(team); 1884 if (status != B_OK) 1885 goto err0; 1886 1887 addressSpace = locker.AddressSpace(); 1888 1889 if (wiring == B_CONTIGUOUS) { 1890 // we try to allocate the page run here upfront as this may easily 1891 // fail for obvious reasons 1892 page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, physicalBase, 1893 size / B_PAGE_SIZE); 1894 if (page == NULL) { 1895 status = B_NO_MEMORY; 1896 goto err0; 1897 } 1898 } 1899 1900 // create an anonymous cache 1901 // if it's a stack, make sure that two pages are available at least 1902 guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0 1903 ? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0; 1904 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1905 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1906 wiring == B_NO_LOCK); 1907 if (status != B_OK) 1908 goto err1; 1909 1910 cache->temporary = 1; 1911 cache->virtual_end = size; 1912 cache->committed_size = reservedMemory; 1913 // TODO: This should be done via a method. 1914 reservedMemory = 0; 1915 1916 switch (wiring) { 1917 case B_LAZY_LOCK: 1918 case B_FULL_LOCK: 1919 case B_CONTIGUOUS: 1920 case B_ALREADY_WIRED: 1921 cache->scan_skip = 1; 1922 break; 1923 case B_NO_LOCK: 1924 cache->scan_skip = 0; 1925 break; 1926 } 1927 1928 cache->Lock(); 1929 1930 status = map_backing_store(addressSpace, cache, address, 0, size, 1931 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name, 1932 (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0, kernel); 1933 1934 if (status < B_OK) { 1935 cache->ReleaseRefAndUnlock(); 1936 goto err1; 1937 } 1938 1939 locker.DegradeToReadLock(); 1940 1941 switch (wiring) { 1942 case B_NO_LOCK: 1943 case B_LAZY_LOCK: 1944 // do nothing - the pages are mapped in as needed 1945 break; 1946 1947 case B_FULL_LOCK: 1948 { 1949 // Allocate and map all pages for this area 1950 1951 off_t offset = 0; 1952 for (addr_t address = area->base; 1953 address < area->base + (area->size - 1); 1954 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1955 #ifdef DEBUG_KERNEL_STACKS 1956 # ifdef STACK_GROWS_DOWNWARDS 1957 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 1958 * B_PAGE_SIZE) 1959 # else 1960 if (isStack && address >= area->base + area->size 1961 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1962 # endif 1963 continue; 1964 #endif 1965 vm_page* page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 1966 cache->InsertPage(page, offset); 1967 vm_map_page(area, page, address, protection); 1968 1969 // Periodically unreserve pages we've already allocated, so that 1970 // we don't unnecessarily increase the pressure on the VM. 1971 if (offset > 0 && offset % (128 * B_PAGE_SIZE) == 0) { 1972 page_num_t toUnreserve = 128; 1973 vm_page_unreserve_pages(toUnreserve); 1974 reservedPages -= toUnreserve; 1975 } 1976 } 1977 1978 break; 1979 } 1980 1981 case B_ALREADY_WIRED: 1982 { 1983 // The pages should already be mapped. This is only really useful 1984 // during boot time. Find the appropriate vm_page objects and stick 1985 // them in the cache object. 1986 vm_translation_map* map = &addressSpace->translation_map; 1987 off_t offset = 0; 1988 1989 if (!gKernelStartup) 1990 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1991 1992 map->ops->lock(map); 1993 1994 for (addr_t virtualAddress = area->base; virtualAddress < area->base 1995 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 1996 offset += B_PAGE_SIZE) { 1997 addr_t physicalAddress; 1998 uint32 flags; 1999 status = map->ops->query(map, virtualAddress, 2000 &physicalAddress, &flags); 2001 if (status < B_OK) { 2002 panic("looking up mapping failed for va 0x%lx\n", 2003 virtualAddress); 2004 } 2005 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2006 if (page == NULL) { 2007 panic("looking up page failed for pa 0x%lx\n", 2008 physicalAddress); 2009 } 2010 2011 increment_page_wired_count(page); 2012 vm_page_set_state(page, PAGE_STATE_WIRED); 2013 cache->InsertPage(page, offset); 2014 } 2015 2016 map->ops->unlock(map); 2017 break; 2018 } 2019 2020 case B_CONTIGUOUS: 2021 { 2022 // We have already allocated our continuous pages run, so we can now 2023 // just map them in the address space 2024 vm_translation_map* map = &addressSpace->translation_map; 2025 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 2026 addr_t virtualAddress = area->base; 2027 off_t offset = 0; 2028 2029 map->ops->lock(map); 2030 2031 for (virtualAddress = area->base; virtualAddress < area->base 2032 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 2033 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 2034 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 2035 if (page == NULL) 2036 panic("couldn't lookup physical page just allocated\n"); 2037 2038 status = map->ops->map(map, virtualAddress, physicalAddress, 2039 protection); 2040 if (status < B_OK) 2041 panic("couldn't map physical page in page run\n"); 2042 2043 increment_page_wired_count(page); 2044 vm_page_set_state(page, PAGE_STATE_WIRED); 2045 cache->InsertPage(page, offset); 2046 } 2047 2048 map->ops->unlock(map); 2049 break; 2050 } 2051 2052 default: 2053 break; 2054 } 2055 2056 cache->Unlock(); 2057 2058 if (reservedPages > 0) 2059 vm_page_unreserve_pages(reservedPages); 2060 2061 TRACE(("vm_create_anonymous_area: done\n")); 2062 2063 area->cache_type = CACHE_TYPE_RAM; 2064 return area->id; 2065 2066 err1: 2067 if (wiring == B_CONTIGUOUS) { 2068 // we had reserved the area space upfront... 2069 addr_t pageNumber = page->physical_page_number; 2070 int32 i; 2071 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 2072 page = vm_lookup_page(pageNumber); 2073 if (page == NULL) 2074 panic("couldn't lookup physical page just allocated\n"); 2075 2076 vm_page_set_state(page, PAGE_STATE_FREE); 2077 } 2078 } 2079 2080 err0: 2081 if (reservedPages > 0) 2082 vm_page_unreserve_pages(reservedPages); 2083 if (reservedMemory > 0) 2084 vm_unreserve_memory(reservedMemory); 2085 2086 return status; 2087 } 2088 2089 2090 area_id 2091 vm_map_physical_memory(team_id team, const char* name, void** _address, 2092 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 2093 { 2094 vm_area* area; 2095 vm_cache* cache; 2096 addr_t mapOffset; 2097 2098 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, " 2099 "spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team, 2100 name, _address, addressSpec, size, protection, physicalAddress)); 2101 2102 if (!arch_vm_supports_protection(protection)) 2103 return B_NOT_SUPPORTED; 2104 2105 AddressSpaceWriteLocker locker(team); 2106 if (!locker.IsLocked()) 2107 return B_BAD_TEAM_ID; 2108 2109 // if the physical address is somewhat inside a page, 2110 // move the actual area down to align on a page boundary 2111 mapOffset = physicalAddress % B_PAGE_SIZE; 2112 size += mapOffset; 2113 physicalAddress -= mapOffset; 2114 2115 size = PAGE_ALIGN(size); 2116 2117 // create an device cache 2118 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 2119 if (status != B_OK) 2120 return status; 2121 2122 // tell the page scanner to skip over this area, it's pages are special 2123 cache->scan_skip = 1; 2124 cache->virtual_end = size; 2125 2126 cache->Lock(); 2127 2128 status = map_backing_store(locker.AddressSpace(), cache, _address, 2129 0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 2130 REGION_NO_PRIVATE_MAP, &area, name, false, true); 2131 2132 if (status < B_OK) 2133 cache->ReleaseRefLocked(); 2134 2135 cache->Unlock(); 2136 2137 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 2138 // set requested memory type 2139 status = arch_vm_set_memory_type(area, physicalAddress, 2140 addressSpec & B_MTR_MASK); 2141 if (status < B_OK) 2142 delete_area(locker.AddressSpace(), area); 2143 } 2144 2145 if (status >= B_OK) { 2146 // make sure our area is mapped in completely 2147 2148 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2149 size_t reservePages = map->ops->map_max_pages_need(map, area->base, 2150 area->base + (size - 1)); 2151 2152 vm_page_reserve_pages(reservePages); 2153 map->ops->lock(map); 2154 2155 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2156 map->ops->map(map, area->base + offset, physicalAddress + offset, 2157 protection); 2158 } 2159 2160 map->ops->unlock(map); 2161 vm_page_unreserve_pages(reservePages); 2162 } 2163 2164 if (status < B_OK) 2165 return status; 2166 2167 // modify the pointer returned to be offset back into the new area 2168 // the same way the physical address in was offset 2169 *_address = (void*)((addr_t)*_address + mapOffset); 2170 2171 area->cache_type = CACHE_TYPE_DEVICE; 2172 return area->id; 2173 } 2174 2175 2176 area_id 2177 vm_create_null_area(team_id team, const char* name, void** address, 2178 uint32 addressSpec, addr_t size) 2179 { 2180 vm_area* area; 2181 vm_cache* cache; 2182 status_t status; 2183 2184 AddressSpaceWriteLocker locker(team); 2185 if (!locker.IsLocked()) 2186 return B_BAD_TEAM_ID; 2187 2188 size = PAGE_ALIGN(size); 2189 2190 // create an null cache 2191 status = VMCacheFactory::CreateNullCache(cache); 2192 if (status != B_OK) 2193 return status; 2194 2195 // tell the page scanner to skip over this area, no pages will be mapped here 2196 cache->scan_skip = 1; 2197 cache->virtual_end = size; 2198 2199 cache->Lock(); 2200 2201 status = map_backing_store(locker.AddressSpace(), cache, address, 0, size, 2202 addressSpec, 0, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name, 2203 false, true); 2204 2205 if (status < B_OK) { 2206 cache->ReleaseRefAndUnlock(); 2207 return status; 2208 } 2209 2210 cache->Unlock(); 2211 2212 area->cache_type = CACHE_TYPE_NULL; 2213 return area->id; 2214 } 2215 2216 2217 /*! Creates the vnode cache for the specified \a vnode. 2218 The vnode has to be marked busy when calling this function. 2219 */ 2220 status_t 2221 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2222 { 2223 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2224 } 2225 2226 2227 /*! \a cache must be locked. The area's address space must be read-locked. 2228 */ 2229 static void 2230 pre_map_area_pages(vm_area* area, VMCache* cache) 2231 { 2232 addr_t baseAddress = area->base; 2233 addr_t cacheOffset = area->cache_offset; 2234 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2235 page_num_t endPage = firstPage + area->size / B_PAGE_SIZE; 2236 2237 for (VMCachePagesTree::Iterator it 2238 = cache->pages.GetIterator(firstPage, true, true); 2239 vm_page* page = it.Next();) { 2240 if (page->cache_offset >= endPage) 2241 break; 2242 2243 // skip inactive pages 2244 if (page->state == PAGE_STATE_BUSY || page->usage_count <= 0) 2245 continue; 2246 2247 vm_map_page(area, page, 2248 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2249 B_READ_AREA | B_KERNEL_READ_AREA); 2250 } 2251 } 2252 2253 2254 /*! Will map the file specified by \a fd to an area in memory. 2255 The file will be mirrored beginning at the specified \a offset. The 2256 \a offset and \a size arguments have to be page aligned. 2257 */ 2258 static area_id 2259 _vm_map_file(team_id team, const char* name, void** _address, uint32 addressSpec, 2260 size_t size, uint32 protection, uint32 mapping, int fd, off_t offset, 2261 bool kernel) 2262 { 2263 // TODO: for binary files, we want to make sure that they get the 2264 // copy of a file at a given time, ie. later changes should not 2265 // make it into the mapped copy -- this will need quite some changes 2266 // to be done in a nice way 2267 TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n", 2268 fd, offset, size, mapping)); 2269 2270 offset = ROUNDOWN(offset, B_PAGE_SIZE); 2271 size = PAGE_ALIGN(size); 2272 2273 if (mapping == REGION_NO_PRIVATE_MAP) 2274 protection |= B_SHARED_AREA; 2275 2276 if (fd < 0) { 2277 uint32 flags = addressSpec == B_EXACT_ADDRESS 2278 ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2279 return vm_create_anonymous_area(team, name, _address, addressSpec, size, 2280 B_NO_LOCK, protection, flags, kernel); 2281 } 2282 2283 // get the open flags of the FD 2284 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2285 if (descriptor == NULL) 2286 return EBADF; 2287 int32 openMode = descriptor->open_mode; 2288 put_fd(descriptor); 2289 2290 // The FD must open for reading at any rate. For shared mapping with write 2291 // access, additionally the FD must be open for writing. 2292 if ((openMode & O_ACCMODE) == O_WRONLY 2293 || (mapping == REGION_NO_PRIVATE_MAP 2294 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2295 && (openMode & O_ACCMODE) == O_RDONLY)) { 2296 return EACCES; 2297 } 2298 2299 // get the vnode for the object, this also grabs a ref to it 2300 struct vnode* vnode = NULL; 2301 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2302 if (status < B_OK) 2303 return status; 2304 CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode); 2305 2306 // If we're going to pre-map pages, we need to reserve the pages needed by 2307 // the mapping backend upfront. 2308 page_num_t reservedPreMapPages = 0; 2309 if ((protection & B_READ_AREA) != 0) { 2310 AddressSpaceWriteLocker locker; 2311 status = locker.SetTo(team); 2312 if (status != B_OK) 2313 return status; 2314 2315 vm_translation_map* map = &locker.AddressSpace()->translation_map; 2316 reservedPreMapPages = map->ops->map_max_pages_need(map, 0, size - 1); 2317 2318 locker.Unlock(); 2319 2320 vm_page_reserve_pages(reservedPreMapPages); 2321 } 2322 2323 struct PageUnreserver { 2324 PageUnreserver(page_num_t count) 2325 : fCount(count) 2326 { 2327 } 2328 2329 ~PageUnreserver() 2330 { 2331 if (fCount > 0) 2332 vm_page_unreserve_pages(fCount); 2333 } 2334 2335 page_num_t fCount; 2336 } pageUnreserver(reservedPreMapPages); 2337 2338 AddressSpaceWriteLocker locker(team); 2339 if (!locker.IsLocked()) 2340 return B_BAD_TEAM_ID; 2341 2342 // TODO: this only works for file systems that use the file cache 2343 vm_cache* cache; 2344 status = vfs_get_vnode_cache(vnode, &cache, false); 2345 if (status < B_OK) 2346 return status; 2347 2348 cache->Lock(); 2349 2350 vm_area* area; 2351 status = map_backing_store(locker.AddressSpace(), cache, _address, 2352 offset, size, addressSpec, 0, protection, mapping, &area, name, 2353 addressSpec == B_EXACT_ADDRESS, kernel); 2354 2355 if (status < B_OK || mapping == REGION_PRIVATE_MAP) { 2356 // map_backing_store() cannot know we no longer need the ref 2357 cache->ReleaseRefLocked(); 2358 } 2359 2360 if (status == B_OK && (protection & B_READ_AREA) != 0) 2361 pre_map_area_pages(area, cache); 2362 2363 cache->Unlock(); 2364 2365 if (status < B_OK) 2366 return status; 2367 2368 area->cache_type = CACHE_TYPE_VNODE; 2369 return area->id; 2370 } 2371 2372 2373 area_id 2374 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2375 addr_t size, uint32 protection, uint32 mapping, int fd, off_t offset) 2376 { 2377 if (!arch_vm_supports_protection(protection)) 2378 return B_NOT_SUPPORTED; 2379 2380 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2381 mapping, fd, offset, true); 2382 } 2383 2384 2385 vm_cache* 2386 vm_area_get_locked_cache(vm_area* area) 2387 { 2388 mutex_lock(&sAreaCacheLock); 2389 2390 while (true) { 2391 vm_cache* cache = area->cache; 2392 2393 if (!cache->SwitchLock(&sAreaCacheLock)) { 2394 // cache has been deleted 2395 mutex_lock(&sAreaCacheLock); 2396 continue; 2397 } 2398 2399 mutex_lock(&sAreaCacheLock); 2400 2401 if (cache == area->cache) { 2402 cache->AcquireRefLocked(); 2403 mutex_unlock(&sAreaCacheLock); 2404 return cache; 2405 } 2406 2407 // the cache changed in the meantime 2408 cache->Unlock(); 2409 } 2410 } 2411 2412 2413 void 2414 vm_area_put_locked_cache(vm_cache* cache) 2415 { 2416 cache->ReleaseRefAndUnlock(); 2417 } 2418 2419 2420 area_id 2421 vm_clone_area(team_id team, const char* name, void** address, 2422 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2423 bool kernel) 2424 { 2425 vm_area* newArea = NULL; 2426 vm_area* sourceArea; 2427 2428 // Check whether the source area exists and is cloneable. If so, mark it 2429 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2430 { 2431 AddressSpaceWriteLocker locker; 2432 status_t status = locker.SetFromArea(sourceID, sourceArea); 2433 if (status != B_OK) 2434 return status; 2435 2436 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2437 return B_NOT_ALLOWED; 2438 2439 sourceArea->protection |= B_SHARED_AREA; 2440 protection |= B_SHARED_AREA; 2441 } 2442 2443 // Now lock both address spaces and actually do the cloning. 2444 2445 MultiAddressSpaceLocker locker; 2446 vm_address_space* sourceAddressSpace; 2447 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2448 if (status != B_OK) 2449 return status; 2450 2451 vm_address_space* targetAddressSpace; 2452 status = locker.AddTeam(team, true, &targetAddressSpace); 2453 if (status != B_OK) 2454 return status; 2455 2456 status = locker.Lock(); 2457 if (status != B_OK) 2458 return status; 2459 2460 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2461 if (sourceArea == NULL) 2462 return B_BAD_VALUE; 2463 2464 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2465 return B_NOT_ALLOWED; 2466 2467 vm_cache* cache = vm_area_get_locked_cache(sourceArea); 2468 2469 // TODO: for now, B_USER_CLONEABLE is disabled, until all drivers 2470 // have been adapted. Maybe it should be part of the kernel settings, 2471 // anyway (so that old drivers can always work). 2472 #if 0 2473 if (sourceArea->aspace == vm_kernel_address_space() 2474 && addressSpace != vm_kernel_address_space() 2475 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 2476 // kernel areas must not be cloned in userland, unless explicitly 2477 // declared user-cloneable upon construction 2478 status = B_NOT_ALLOWED; 2479 } else 2480 #endif 2481 if (sourceArea->cache_type == CACHE_TYPE_NULL) 2482 status = B_NOT_ALLOWED; 2483 else { 2484 status = map_backing_store(targetAddressSpace, cache, address, 2485 sourceArea->cache_offset, sourceArea->size, addressSpec, 2486 sourceArea->wiring, protection, mapping, &newArea, name, false, 2487 kernel); 2488 } 2489 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2490 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2491 // to create a new cache, and has therefore already acquired a reference 2492 // to the source cache - but otherwise it has no idea that we need 2493 // one. 2494 cache->AcquireRefLocked(); 2495 } 2496 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2497 // we need to map in everything at this point 2498 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2499 // we don't have actual pages to map but a physical area 2500 vm_translation_map* map 2501 = &sourceArea->address_space->translation_map; 2502 map->ops->lock(map); 2503 2504 addr_t physicalAddress; 2505 uint32 oldProtection; 2506 map->ops->query(map, sourceArea->base, &physicalAddress, 2507 &oldProtection); 2508 2509 map->ops->unlock(map); 2510 2511 map = &targetAddressSpace->translation_map; 2512 size_t reservePages = map->ops->map_max_pages_need(map, 2513 newArea->base, newArea->base + (newArea->size - 1)); 2514 2515 vm_page_reserve_pages(reservePages); 2516 map->ops->lock(map); 2517 2518 for (addr_t offset = 0; offset < newArea->size; 2519 offset += B_PAGE_SIZE) { 2520 map->ops->map(map, newArea->base + offset, 2521 physicalAddress + offset, protection); 2522 } 2523 2524 map->ops->unlock(map); 2525 vm_page_unreserve_pages(reservePages); 2526 } else { 2527 vm_translation_map* map = &targetAddressSpace->translation_map; 2528 size_t reservePages = map->ops->map_max_pages_need(map, 2529 newArea->base, newArea->base + (newArea->size - 1)); 2530 vm_page_reserve_pages(reservePages); 2531 2532 // map in all pages from source 2533 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2534 vm_page* page = it.Next();) { 2535 vm_map_page(newArea, page, newArea->base 2536 + ((page->cache_offset << PAGE_SHIFT) 2537 - newArea->cache_offset), protection); 2538 } 2539 2540 vm_page_unreserve_pages(reservePages); 2541 } 2542 } 2543 if (status == B_OK) 2544 newArea->cache_type = sourceArea->cache_type; 2545 2546 vm_area_put_locked_cache(cache); 2547 2548 if (status < B_OK) 2549 return status; 2550 2551 return newArea->id; 2552 } 2553 2554 2555 //! The address space must be write locked at this point 2556 static void 2557 remove_area_from_address_space(vm_address_space* addressSpace, vm_area* area) 2558 { 2559 vm_area* temp = addressSpace->areas; 2560 vm_area* last = NULL; 2561 2562 while (temp != NULL) { 2563 if (area == temp) { 2564 if (last != NULL) { 2565 last->address_space_next = temp->address_space_next; 2566 } else { 2567 addressSpace->areas = temp->address_space_next; 2568 } 2569 addressSpace->change_count++; 2570 break; 2571 } 2572 last = temp; 2573 temp = temp->address_space_next; 2574 } 2575 if (area == addressSpace->area_hint) 2576 addressSpace->area_hint = NULL; 2577 2578 if (temp == NULL) 2579 panic("vm_area_release_ref: area not found in aspace's area list\n"); 2580 } 2581 2582 2583 static void 2584 delete_area(vm_address_space* addressSpace, vm_area* area) 2585 { 2586 rw_lock_write_lock(&sAreaHashLock); 2587 hash_remove(sAreaHash, area); 2588 rw_lock_write_unlock(&sAreaHashLock); 2589 2590 // At this point the area is removed from the global hash table, but 2591 // still exists in the area list. 2592 2593 // Unmap the virtual address space the area occupied 2594 vm_unmap_pages(area, area->base, area->size, !area->cache->temporary); 2595 2596 if (!area->cache->temporary) 2597 area->cache->WriteModified(); 2598 2599 arch_vm_unset_memory_type(area); 2600 remove_area_from_address_space(addressSpace, area); 2601 vm_put_address_space(addressSpace); 2602 2603 area->cache->RemoveArea(area); 2604 area->cache->ReleaseRef(); 2605 2606 free(area->page_protections); 2607 free(area->name); 2608 free(area); 2609 } 2610 2611 2612 status_t 2613 vm_delete_area(team_id team, area_id id, bool kernel) 2614 { 2615 TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id)); 2616 2617 AddressSpaceWriteLocker locker; 2618 vm_area* area; 2619 status_t status = locker.SetFromArea(team, id, area); 2620 if (status < B_OK) 2621 return status; 2622 2623 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2624 return B_NOT_ALLOWED; 2625 2626 delete_area(locker.AddressSpace(), area); 2627 return B_OK; 2628 } 2629 2630 2631 /*! Creates a new cache on top of given cache, moves all areas from 2632 the old cache to the new one, and changes the protection of all affected 2633 areas' pages to read-only. 2634 Preconditions: 2635 - The given cache must be locked. 2636 - All of the cache's areas' address spaces must be read locked. 2637 - All of the cache's areas must have a clear \c no_cache_change flags. 2638 */ 2639 static status_t 2640 vm_copy_on_write_area(vm_cache* lowerCache) 2641 { 2642 vm_cache* upperCache; 2643 2644 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2645 2646 // We need to separate the cache from its areas. The cache goes one level 2647 // deeper and we create a new cache inbetween. 2648 2649 // create an anonymous cache 2650 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2651 0, true); 2652 if (status != B_OK) 2653 return status; 2654 2655 upperCache->Lock(); 2656 2657 upperCache->temporary = 1; 2658 upperCache->scan_skip = lowerCache->scan_skip; 2659 upperCache->virtual_base = lowerCache->virtual_base; 2660 upperCache->virtual_end = lowerCache->virtual_end; 2661 2662 // transfer the lower cache areas to the upper cache 2663 mutex_lock(&sAreaCacheLock); 2664 2665 upperCache->areas = lowerCache->areas; 2666 lowerCache->areas = NULL; 2667 2668 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2669 tempArea = tempArea->cache_next) { 2670 ASSERT(!tempArea->no_cache_change); 2671 2672 tempArea->cache = upperCache; 2673 upperCache->AcquireRefLocked(); 2674 lowerCache->ReleaseRefLocked(); 2675 } 2676 2677 mutex_unlock(&sAreaCacheLock); 2678 2679 lowerCache->AddConsumer(upperCache); 2680 2681 // We now need to remap all pages from all of the cache's areas read-only, so 2682 // that a copy will be created on next write access 2683 2684 for (vm_area* tempArea = upperCache->areas; tempArea != NULL; 2685 tempArea = tempArea->cache_next) { 2686 // The area must be readable in the same way it was previously writable 2687 uint32 protection = B_KERNEL_READ_AREA; 2688 if ((tempArea->protection & B_READ_AREA) != 0) 2689 protection |= B_READ_AREA; 2690 2691 vm_translation_map* map = &tempArea->address_space->translation_map; 2692 map->ops->lock(map); 2693 map->ops->protect(map, tempArea->base, 2694 tempArea->base - 1 + tempArea->size, protection); 2695 map->ops->unlock(map); 2696 } 2697 2698 vm_area_put_locked_cache(upperCache); 2699 2700 return B_OK; 2701 } 2702 2703 2704 area_id 2705 vm_copy_area(team_id team, const char* name, void** _address, 2706 uint32 addressSpec, uint32 protection, area_id sourceID) 2707 { 2708 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2709 2710 if ((protection & B_KERNEL_PROTECTION) == 0) { 2711 // set the same protection for the kernel as for userland 2712 protection |= B_KERNEL_READ_AREA; 2713 if (writableCopy) 2714 protection |= B_KERNEL_WRITE_AREA; 2715 } 2716 2717 // Do the locking: target address space, all address spaces associated with 2718 // the source cache, and the cache itself. 2719 MultiAddressSpaceLocker locker; 2720 vm_address_space* targetAddressSpace; 2721 vm_cache* cache; 2722 vm_area* source; 2723 status_t status = locker.AddTeam(team, true, &targetAddressSpace); 2724 if (status == B_OK) { 2725 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2726 &cache, true); 2727 } 2728 if (status != B_OK) 2729 return status; 2730 2731 AreaCacheLocker cacheLocker(cache); // already locked 2732 2733 if (addressSpec == B_CLONE_ADDRESS) { 2734 addressSpec = B_EXACT_ADDRESS; 2735 *_address = (void*)source->base; 2736 } 2737 2738 bool sharedArea = (source->protection & B_SHARED_AREA) != 0; 2739 2740 // First, create a cache on top of the source area, respectively use the 2741 // existing one, if this is a shared area. 2742 2743 vm_area* target; 2744 status = map_backing_store(targetAddressSpace, cache, _address, 2745 source->cache_offset, source->size, addressSpec, source->wiring, 2746 protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2747 &target, name, false, true); 2748 if (status < B_OK) 2749 return status; 2750 2751 if (sharedArea) { 2752 // The new area uses the old area's cache, but map_backing_store() 2753 // hasn't acquired a ref. So we have to do that now. 2754 cache->AcquireRefLocked(); 2755 } 2756 2757 // If the source area is writable, we need to move it one layer up as well 2758 2759 if (!sharedArea) { 2760 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 2761 // TODO: do something more useful if this fails! 2762 if (vm_copy_on_write_area(cache) < B_OK) 2763 panic("vm_copy_on_write_area() failed!\n"); 2764 } 2765 } 2766 2767 // we return the ID of the newly created area 2768 return target->id; 2769 } 2770 2771 2772 //! You need to hold the cache lock when calling this function 2773 static int32 2774 count_writable_areas(vm_cache* cache, vm_area* ignoreArea) 2775 { 2776 struct vm_area* area = cache->areas; 2777 uint32 count = 0; 2778 2779 for (; area != NULL; area = area->cache_next) { 2780 if (area != ignoreArea 2781 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 2782 count++; 2783 } 2784 2785 return count; 2786 } 2787 2788 2789 static status_t 2790 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2791 bool kernel) 2792 { 2793 TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = " 2794 "%#lx)\n", team, areaID, newProtection)); 2795 2796 if (!arch_vm_supports_protection(newProtection)) 2797 return B_NOT_SUPPORTED; 2798 2799 // lock address spaces and cache 2800 MultiAddressSpaceLocker locker; 2801 vm_cache* cache; 2802 vm_area* area; 2803 status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area, 2804 &cache, true); 2805 AreaCacheLocker cacheLocker(cache); // already locked 2806 2807 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2808 return B_NOT_ALLOWED; 2809 2810 if (area->protection == newProtection) 2811 return B_OK; 2812 2813 if (team != vm_kernel_address_space_id() 2814 && area->address_space->id != team) { 2815 // unless you're the kernel, you are only allowed to set 2816 // the protection of your own areas 2817 return B_NOT_ALLOWED; 2818 } 2819 2820 bool changePageProtection = true; 2821 2822 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2823 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 2824 // writable -> !writable 2825 2826 if (cache->source != NULL && cache->temporary) { 2827 if (count_writable_areas(cache, area) == 0) { 2828 // Since this cache now lives from the pages in its source cache, 2829 // we can change the cache's commitment to take only those pages 2830 // into account that really are in this cache. 2831 2832 status = cache->Commit(cache->page_count * B_PAGE_SIZE); 2833 2834 // TODO: we may be able to join with our source cache, if 2835 // count == 0 2836 } 2837 } 2838 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 2839 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 2840 // !writable -> writable 2841 2842 if (!list_is_empty(&cache->consumers)) { 2843 // There are consumers -- we have to insert a new cache. Fortunately 2844 // vm_copy_on_write_area() does everything that's needed. 2845 changePageProtection = false; 2846 status = vm_copy_on_write_area(cache); 2847 } else { 2848 // No consumers, so we don't need to insert a new one. 2849 if (cache->source != NULL && cache->temporary) { 2850 // the cache's commitment must contain all possible pages 2851 status = cache->Commit(cache->virtual_end 2852 - cache->virtual_base); 2853 } 2854 2855 if (status == B_OK && cache->source != NULL) { 2856 // There's a source cache, hence we can't just change all pages' 2857 // protection or we might allow writing into pages belonging to 2858 // a lower cache. 2859 changePageProtection = false; 2860 2861 struct vm_translation_map* map 2862 = &area->address_space->translation_map; 2863 map->ops->lock(map); 2864 2865 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2866 vm_page* page = it.Next();) { 2867 addr_t address = area->base 2868 + (page->cache_offset << PAGE_SHIFT); 2869 map->ops->protect(map, address, address - 1 + B_PAGE_SIZE, 2870 newProtection); 2871 } 2872 2873 map->ops->unlock(map); 2874 } 2875 } 2876 } else { 2877 // we don't have anything special to do in all other cases 2878 } 2879 2880 if (status == B_OK) { 2881 // remap existing pages in this cache 2882 struct vm_translation_map* map = &area->address_space->translation_map; 2883 2884 if (changePageProtection) { 2885 map->ops->lock(map); 2886 map->ops->protect(map, area->base, area->base + area->size, 2887 newProtection); 2888 map->ops->unlock(map); 2889 } 2890 2891 area->protection = newProtection; 2892 } 2893 2894 return status; 2895 } 2896 2897 2898 status_t 2899 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr) 2900 { 2901 vm_address_space* addressSpace = vm_get_address_space(team); 2902 if (addressSpace == NULL) 2903 return B_BAD_TEAM_ID; 2904 2905 uint32 dummyFlags; 2906 status_t status = addressSpace->translation_map.ops->query( 2907 &addressSpace->translation_map, vaddr, paddr, &dummyFlags); 2908 2909 vm_put_address_space(addressSpace); 2910 return status; 2911 } 2912 2913 2914 static inline addr_t 2915 virtual_page_address(vm_area* area, vm_page* page) 2916 { 2917 return area->base 2918 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 2919 } 2920 2921 2922 bool 2923 vm_test_map_modification(vm_page* page) 2924 { 2925 MutexLocker locker(sMappingLock); 2926 2927 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2928 vm_page_mapping* mapping; 2929 while ((mapping = iterator.Next()) != NULL) { 2930 vm_area* area = mapping->area; 2931 vm_translation_map* map = &area->address_space->translation_map; 2932 2933 addr_t physicalAddress; 2934 uint32 flags; 2935 map->ops->lock(map); 2936 map->ops->query(map, virtual_page_address(area, page), 2937 &physicalAddress, &flags); 2938 map->ops->unlock(map); 2939 2940 if ((flags & PAGE_MODIFIED) != 0) 2941 return true; 2942 } 2943 2944 return false; 2945 } 2946 2947 2948 int32 2949 vm_test_map_activation(vm_page* page, bool* _modified) 2950 { 2951 int32 activation = 0; 2952 bool modified = false; 2953 2954 MutexLocker locker(sMappingLock); 2955 2956 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2957 vm_page_mapping* mapping; 2958 while ((mapping = iterator.Next()) != NULL) { 2959 vm_area* area = mapping->area; 2960 vm_translation_map* map = &area->address_space->translation_map; 2961 2962 addr_t physicalAddress; 2963 uint32 flags; 2964 map->ops->lock(map); 2965 map->ops->query(map, virtual_page_address(area, page), 2966 &physicalAddress, &flags); 2967 map->ops->unlock(map); 2968 2969 if ((flags & PAGE_ACCESSED) != 0) 2970 activation++; 2971 if ((flags & PAGE_MODIFIED) != 0) 2972 modified = true; 2973 } 2974 2975 if (_modified != NULL) 2976 *_modified = modified; 2977 2978 return activation; 2979 } 2980 2981 2982 void 2983 vm_clear_map_flags(vm_page* page, uint32 flags) 2984 { 2985 MutexLocker locker(sMappingLock); 2986 2987 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2988 vm_page_mapping* mapping; 2989 while ((mapping = iterator.Next()) != NULL) { 2990 vm_area* area = mapping->area; 2991 vm_translation_map* map = &area->address_space->translation_map; 2992 2993 map->ops->lock(map); 2994 map->ops->clear_flags(map, virtual_page_address(area, page), flags); 2995 map->ops->unlock(map); 2996 } 2997 } 2998 2999 3000 /*! Removes all mappings from a page. 3001 After you've called this function, the page is unmapped from memory. 3002 The accumulated page flags of all mappings can be found in \a _flags. 3003 */ 3004 void 3005 vm_remove_all_page_mappings(vm_page* page, uint32* _flags) 3006 { 3007 uint32 accumulatedFlags = 0; 3008 MutexLocker locker(sMappingLock); 3009 3010 vm_page_mappings queue; 3011 queue.MoveFrom(&page->mappings); 3012 3013 vm_page_mappings::Iterator iterator = queue.GetIterator(); 3014 vm_page_mapping* mapping; 3015 while ((mapping = iterator.Next()) != NULL) { 3016 vm_area* area = mapping->area; 3017 vm_translation_map* map = &area->address_space->translation_map; 3018 addr_t physicalAddress; 3019 uint32 flags; 3020 3021 map->ops->lock(map); 3022 addr_t address = virtual_page_address(area, page); 3023 map->ops->unmap(map, address, address + (B_PAGE_SIZE - 1)); 3024 map->ops->flush(map); 3025 map->ops->query(map, address, &physicalAddress, &flags); 3026 map->ops->unlock(map); 3027 3028 area->mappings.Remove(mapping); 3029 3030 accumulatedFlags |= flags; 3031 } 3032 3033 if (page->wired_count == 0 && !queue.IsEmpty()) 3034 atomic_add(&gMappedPagesCount, -1); 3035 3036 locker.Unlock(); 3037 3038 // free now unused mappings 3039 3040 while ((mapping = queue.RemoveHead()) != NULL) { 3041 free(mapping); 3042 } 3043 3044 if (_flags != NULL) 3045 *_flags = accumulatedFlags; 3046 } 3047 3048 3049 bool 3050 vm_unmap_page(vm_area* area, addr_t virtualAddress, bool preserveModified) 3051 { 3052 vm_translation_map* map = &area->address_space->translation_map; 3053 3054 map->ops->lock(map); 3055 3056 addr_t physicalAddress; 3057 uint32 flags; 3058 status_t status = map->ops->query(map, virtualAddress, &physicalAddress, 3059 &flags); 3060 if (status < B_OK || (flags & PAGE_PRESENT) == 0) { 3061 map->ops->unlock(map); 3062 return false; 3063 } 3064 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3065 if (page == NULL && area->cache_type != CACHE_TYPE_DEVICE) { 3066 panic("area %p looking up page failed for pa 0x%lx\n", area, 3067 physicalAddress); 3068 } 3069 3070 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) 3071 decrement_page_wired_count(page); 3072 3073 map->ops->unmap(map, virtualAddress, virtualAddress + B_PAGE_SIZE - 1); 3074 3075 if (preserveModified) { 3076 map->ops->flush(map); 3077 3078 status = map->ops->query(map, virtualAddress, &physicalAddress, &flags); 3079 if ((flags & PAGE_MODIFIED) != 0 && page->state != PAGE_STATE_MODIFIED) 3080 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3081 } 3082 3083 map->ops->unlock(map); 3084 3085 if (area->wiring == B_NO_LOCK) { 3086 vm_page_mapping* mapping; 3087 3088 mutex_lock(&sMappingLock); 3089 map->ops->lock(map); 3090 3091 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3092 while (iterator.HasNext()) { 3093 mapping = iterator.Next(); 3094 3095 if (mapping->area == area) { 3096 area->mappings.Remove(mapping); 3097 page->mappings.Remove(mapping); 3098 3099 if (page->mappings.IsEmpty() && page->wired_count == 0) 3100 atomic_add(&gMappedPagesCount, -1); 3101 3102 map->ops->unlock(map); 3103 mutex_unlock(&sMappingLock); 3104 3105 free(mapping); 3106 3107 return true; 3108 } 3109 } 3110 3111 map->ops->unlock(map); 3112 mutex_unlock(&sMappingLock); 3113 3114 dprintf("vm_unmap_page: couldn't find mapping for area %p in page %p\n", 3115 area, page); 3116 } 3117 3118 return true; 3119 } 3120 3121 3122 status_t 3123 vm_unmap_pages(vm_area* area, addr_t base, size_t size, bool preserveModified) 3124 { 3125 vm_translation_map* map = &area->address_space->translation_map; 3126 addr_t end = base + (size - 1); 3127 3128 map->ops->lock(map); 3129 3130 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 3131 // iterate through all pages and decrease their wired count 3132 for (addr_t virtualAddress = base; virtualAddress < end; 3133 virtualAddress += B_PAGE_SIZE) { 3134 addr_t physicalAddress; 3135 uint32 flags; 3136 status_t status = map->ops->query(map, virtualAddress, 3137 &physicalAddress, &flags); 3138 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3139 continue; 3140 3141 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3142 if (page == NULL) { 3143 panic("area %p looking up page failed for pa 0x%lx\n", area, 3144 physicalAddress); 3145 } 3146 3147 decrement_page_wired_count(page); 3148 } 3149 } 3150 3151 map->ops->unmap(map, base, end); 3152 if (preserveModified) { 3153 map->ops->flush(map); 3154 3155 for (addr_t virtualAddress = base; virtualAddress < end; 3156 virtualAddress += B_PAGE_SIZE) { 3157 addr_t physicalAddress; 3158 uint32 flags; 3159 status_t status = map->ops->query(map, virtualAddress, 3160 &physicalAddress, &flags); 3161 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 3162 continue; 3163 3164 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3165 if (page == NULL) { 3166 panic("area %p looking up page failed for pa 0x%lx\n", area, 3167 physicalAddress); 3168 } 3169 3170 if ((flags & PAGE_MODIFIED) != 0 3171 && page->state != PAGE_STATE_MODIFIED) 3172 vm_page_set_state(page, PAGE_STATE_MODIFIED); 3173 } 3174 } 3175 map->ops->unlock(map); 3176 3177 if (area->wiring == B_NO_LOCK) { 3178 uint32 startOffset = (area->cache_offset + base - area->base) 3179 >> PAGE_SHIFT; 3180 uint32 endOffset = startOffset + (size >> PAGE_SHIFT); 3181 vm_page_mapping* mapping; 3182 vm_area_mappings queue; 3183 3184 mutex_lock(&sMappingLock); 3185 map->ops->lock(map); 3186 3187 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3188 while (iterator.HasNext()) { 3189 mapping = iterator.Next(); 3190 3191 vm_page* page = mapping->page; 3192 if (page->cache_offset < startOffset 3193 || page->cache_offset >= endOffset) 3194 continue; 3195 3196 page->mappings.Remove(mapping); 3197 iterator.Remove(); 3198 3199 if (page->mappings.IsEmpty() && page->wired_count == 0) 3200 atomic_add(&gMappedPagesCount, -1); 3201 3202 queue.Add(mapping); 3203 } 3204 3205 map->ops->unlock(map); 3206 mutex_unlock(&sMappingLock); 3207 3208 while ((mapping = queue.RemoveHead()) != NULL) { 3209 free(mapping); 3210 } 3211 } 3212 3213 return B_OK; 3214 } 3215 3216 3217 /*! When calling this function, you need to have pages reserved! */ 3218 status_t 3219 vm_map_page(vm_area* area, vm_page* page, addr_t address, uint32 protection) 3220 { 3221 vm_translation_map* map = &area->address_space->translation_map; 3222 vm_page_mapping* mapping = NULL; 3223 3224 if (area->wiring == B_NO_LOCK) { 3225 mapping = (vm_page_mapping*)malloc_nogrow(sizeof(vm_page_mapping)); 3226 if (mapping == NULL) 3227 return B_NO_MEMORY; 3228 3229 mapping->page = page; 3230 mapping->area = area; 3231 } 3232 3233 map->ops->lock(map); 3234 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 3235 protection); 3236 map->ops->unlock(map); 3237 3238 if (area->wiring != B_NO_LOCK) { 3239 increment_page_wired_count(page); 3240 } else { 3241 // insert mapping into lists 3242 MutexLocker locker(sMappingLock); 3243 3244 if (page->mappings.IsEmpty() && page->wired_count == 0) 3245 atomic_add(&gMappedPagesCount, 1); 3246 3247 page->mappings.Add(mapping); 3248 area->mappings.Add(mapping); 3249 } 3250 3251 if (page->usage_count < 0) 3252 page->usage_count = 1; 3253 3254 if (page->state != PAGE_STATE_MODIFIED) 3255 vm_page_set_state(page, PAGE_STATE_ACTIVE); 3256 3257 return B_OK; 3258 } 3259 3260 3261 static int 3262 display_mem(int argc, char** argv) 3263 { 3264 bool physical = false; 3265 addr_t copyAddress; 3266 int32 displayWidth; 3267 int32 itemSize; 3268 int32 num = -1; 3269 addr_t address; 3270 int i = 1, j; 3271 3272 if (argc > 1 && argv[1][0] == '-') { 3273 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3274 physical = true; 3275 i++; 3276 } else 3277 i = 99; 3278 } 3279 3280 if (argc < i + 1 || argc > i + 2) { 3281 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3282 "\tdl - 8 bytes\n" 3283 "\tdw - 4 bytes\n" 3284 "\tds - 2 bytes\n" 3285 "\tdb - 1 byte\n" 3286 "\tstring - a whole string\n" 3287 " -p or --physical only allows memory from a single page to be " 3288 "displayed.\n"); 3289 return 0; 3290 } 3291 3292 address = parse_expression(argv[i]); 3293 3294 if (argc > i + 1) 3295 num = parse_expression(argv[i + 1]); 3296 3297 // build the format string 3298 if (strcmp(argv[0], "db") == 0) { 3299 itemSize = 1; 3300 displayWidth = 16; 3301 } else if (strcmp(argv[0], "ds") == 0) { 3302 itemSize = 2; 3303 displayWidth = 8; 3304 } else if (strcmp(argv[0], "dw") == 0) { 3305 itemSize = 4; 3306 displayWidth = 4; 3307 } else if (strcmp(argv[0], "dl") == 0) { 3308 itemSize = 8; 3309 displayWidth = 2; 3310 } else if (strcmp(argv[0], "string") == 0) { 3311 itemSize = 1; 3312 displayWidth = -1; 3313 } else { 3314 kprintf("display_mem called in an invalid way!\n"); 3315 return 0; 3316 } 3317 3318 if (num <= 0) 3319 num = displayWidth; 3320 3321 void* physicalPageHandle = NULL; 3322 3323 if (physical) { 3324 int32 offset = address & (B_PAGE_SIZE - 1); 3325 if (num * itemSize + offset > B_PAGE_SIZE) { 3326 num = (B_PAGE_SIZE - offset) / itemSize; 3327 kprintf("NOTE: number of bytes has been cut to page size\n"); 3328 } 3329 3330 address = ROUNDOWN(address, B_PAGE_SIZE); 3331 3332 if (vm_get_physical_page_debug(address, ©Address, 3333 &physicalPageHandle) != B_OK) { 3334 kprintf("getting the hardware page failed."); 3335 return 0; 3336 } 3337 3338 address += offset; 3339 copyAddress += offset; 3340 } else 3341 copyAddress = address; 3342 3343 if (!strcmp(argv[0], "string")) { 3344 kprintf("%p \"", (char*)copyAddress); 3345 3346 // string mode 3347 for (i = 0; true; i++) { 3348 char c; 3349 if (user_memcpy(&c, (char*)copyAddress + i, 1) != B_OK 3350 || c == '\0') 3351 break; 3352 3353 if (c == '\n') 3354 kprintf("\\n"); 3355 else if (c == '\t') 3356 kprintf("\\t"); 3357 else { 3358 if (!isprint(c)) 3359 c = '.'; 3360 3361 kprintf("%c", c); 3362 } 3363 } 3364 3365 kprintf("\"\n"); 3366 } else { 3367 // number mode 3368 for (i = 0; i < num; i++) { 3369 uint32 value; 3370 3371 if ((i % displayWidth) == 0) { 3372 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3373 if (i != 0) 3374 kprintf("\n"); 3375 3376 kprintf("[0x%lx] ", address + i * itemSize); 3377 3378 for (j = 0; j < displayed; j++) { 3379 char c; 3380 if (user_memcpy(&c, (char*)copyAddress + i * itemSize + j, 3381 1) != B_OK) { 3382 displayed = j; 3383 break; 3384 } 3385 if (!isprint(c)) 3386 c = '.'; 3387 3388 kprintf("%c", c); 3389 } 3390 if (num > displayWidth) { 3391 // make sure the spacing in the last line is correct 3392 for (j = displayed; j < displayWidth * itemSize; j++) 3393 kprintf(" "); 3394 } 3395 kprintf(" "); 3396 } 3397 3398 if (user_memcpy(&value, (uint8*)copyAddress + i * itemSize, 3399 itemSize) != B_OK) { 3400 kprintf("read fault"); 3401 break; 3402 } 3403 3404 switch (itemSize) { 3405 case 1: 3406 kprintf(" %02x", *(uint8*)&value); 3407 break; 3408 case 2: 3409 kprintf(" %04x", *(uint16*)&value); 3410 break; 3411 case 4: 3412 kprintf(" %08lx", *(uint32*)&value); 3413 break; 3414 case 8: 3415 kprintf(" %016Lx", *(uint64*)&value); 3416 break; 3417 } 3418 } 3419 3420 kprintf("\n"); 3421 } 3422 3423 if (physical) { 3424 copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE); 3425 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3426 } 3427 return 0; 3428 } 3429 3430 3431 static void 3432 dump_cache_tree_recursively(vm_cache* cache, int level, 3433 vm_cache* highlightCache) 3434 { 3435 // print this cache 3436 for (int i = 0; i < level; i++) 3437 kprintf(" "); 3438 if (cache == highlightCache) 3439 kprintf("%p <--\n", cache); 3440 else 3441 kprintf("%p\n", cache); 3442 3443 // recursively print its consumers 3444 vm_cache* consumer = NULL; 3445 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3446 consumer)) != NULL) { 3447 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3448 } 3449 } 3450 3451 3452 static int 3453 dump_cache_tree(int argc, char** argv) 3454 { 3455 if (argc != 2 || !strcmp(argv[1], "--help")) { 3456 kprintf("usage: %s <address>\n", argv[0]); 3457 return 0; 3458 } 3459 3460 addr_t address = parse_expression(argv[1]); 3461 if (address == 0) 3462 return 0; 3463 3464 vm_cache* cache = (vm_cache*)address; 3465 vm_cache* root = cache; 3466 3467 // find the root cache (the transitive source) 3468 while (root->source != NULL) 3469 root = root->source; 3470 3471 dump_cache_tree_recursively(root, 0, cache); 3472 3473 return 0; 3474 } 3475 3476 3477 static const char* 3478 cache_type_to_string(int32 type) 3479 { 3480 switch (type) { 3481 case CACHE_TYPE_RAM: 3482 return "RAM"; 3483 case CACHE_TYPE_DEVICE: 3484 return "device"; 3485 case CACHE_TYPE_VNODE: 3486 return "vnode"; 3487 case CACHE_TYPE_NULL: 3488 return "null"; 3489 3490 default: 3491 return "unknown"; 3492 } 3493 } 3494 3495 3496 #if DEBUG_CACHE_LIST 3497 3498 static void 3499 update_cache_info_recursively(vm_cache* cache, cache_info& info) 3500 { 3501 info.page_count += cache->page_count; 3502 if (cache->type == CACHE_TYPE_RAM) 3503 info.committed += cache->committed_size; 3504 3505 // recurse 3506 vm_cache* consumer = NULL; 3507 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3508 consumer)) != NULL) { 3509 update_cache_info_recursively(consumer, info); 3510 } 3511 } 3512 3513 3514 static int 3515 cache_info_compare_page_count(const void* _a, const void* _b) 3516 { 3517 const cache_info* a = (const cache_info*)_a; 3518 const cache_info* b = (const cache_info*)_b; 3519 if (a->page_count == b->page_count) 3520 return 0; 3521 return a->page_count < b->page_count ? 1 : -1; 3522 } 3523 3524 3525 static int 3526 cache_info_compare_committed(const void* _a, const void* _b) 3527 { 3528 const cache_info* a = (const cache_info*)_a; 3529 const cache_info* b = (const cache_info*)_b; 3530 if (a->committed == b->committed) 3531 return 0; 3532 return a->committed < b->committed ? 1 : -1; 3533 } 3534 3535 3536 static void 3537 dump_caches_recursively(vm_cache* cache, cache_info& info, int level) 3538 { 3539 for (int i = 0; i < level; i++) 3540 kprintf(" "); 3541 3542 kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache, 3543 cache_type_to_string(cache->type), cache->virtual_base, 3544 cache->virtual_end, cache->page_count); 3545 3546 if (level == 0) 3547 kprintf("/%lu", info.page_count); 3548 3549 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3550 kprintf(", committed: %lld", cache->committed_size); 3551 3552 if (level == 0) 3553 kprintf("/%lu", info.committed); 3554 } 3555 3556 // areas 3557 if (cache->areas != NULL) { 3558 vm_area* area = cache->areas; 3559 kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name, 3560 area->address_space->id); 3561 3562 while (area->cache_next != NULL) { 3563 area = area->cache_next; 3564 kprintf(", %ld", area->id); 3565 } 3566 } 3567 3568 kputs("\n"); 3569 3570 // recurse 3571 vm_cache* consumer = NULL; 3572 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3573 consumer)) != NULL) { 3574 dump_caches_recursively(consumer, info, level + 1); 3575 } 3576 } 3577 3578 3579 static int 3580 dump_caches(int argc, char** argv) 3581 { 3582 if (sCacheInfoTable == NULL) { 3583 kprintf("No cache info table!\n"); 3584 return 0; 3585 } 3586 3587 bool sortByPageCount = true; 3588 3589 for (int32 i = 1; i < argc; i++) { 3590 if (strcmp(argv[i], "-c") == 0) { 3591 sortByPageCount = false; 3592 } else { 3593 print_debugger_command_usage(argv[0]); 3594 return 0; 3595 } 3596 } 3597 3598 uint32 totalCount = 0; 3599 uint32 rootCount = 0; 3600 off_t totalCommitted = 0; 3601 page_num_t totalPages = 0; 3602 3603 vm_cache* cache = gDebugCacheList; 3604 while (cache) { 3605 totalCount++; 3606 if (cache->source == NULL) { 3607 cache_info stackInfo; 3608 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3609 ? sCacheInfoTable[rootCount] : stackInfo; 3610 rootCount++; 3611 info.cache = cache; 3612 info.page_count = 0; 3613 info.committed = 0; 3614 update_cache_info_recursively(cache, info); 3615 totalCommitted += info.committed; 3616 totalPages += info.page_count; 3617 } 3618 3619 cache = cache->debug_next; 3620 } 3621 3622 if (rootCount <= (uint32)kCacheInfoTableCount) { 3623 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3624 sortByPageCount 3625 ? &cache_info_compare_page_count 3626 : &cache_info_compare_committed); 3627 } 3628 3629 kprintf("total committed memory: %lld, total used pages: %lu\n", 3630 totalCommitted, totalPages); 3631 kprintf("%lu caches (%lu root caches), sorted by %s per cache " 3632 "tree...\n\n", totalCount, rootCount, 3633 sortByPageCount ? "page count" : "committed size"); 3634 3635 if (rootCount <= (uint32)kCacheInfoTableCount) { 3636 for (uint32 i = 0; i < rootCount; i++) { 3637 cache_info& info = sCacheInfoTable[i]; 3638 dump_caches_recursively(info.cache, info, 0); 3639 } 3640 } else 3641 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3642 3643 return 0; 3644 } 3645 3646 #endif // DEBUG_CACHE_LIST 3647 3648 3649 static int 3650 dump_cache(int argc, char** argv) 3651 { 3652 vm_cache* cache; 3653 bool showPages = false; 3654 int i = 1; 3655 3656 if (argc < 2 || !strcmp(argv[1], "--help")) { 3657 kprintf("usage: %s [-ps] <address>\n" 3658 " if -p is specified, all pages are shown, if -s is used\n" 3659 " only the cache info is shown respectively.\n", argv[0]); 3660 return 0; 3661 } 3662 while (argv[i][0] == '-') { 3663 char* arg = argv[i] + 1; 3664 while (arg[0]) { 3665 if (arg[0] == 'p') 3666 showPages = true; 3667 arg++; 3668 } 3669 i++; 3670 } 3671 if (argv[i] == NULL) { 3672 kprintf("%s: invalid argument, pass address\n", argv[0]); 3673 return 0; 3674 } 3675 3676 addr_t address = parse_expression(argv[i]); 3677 if (address == 0) 3678 return 0; 3679 3680 cache = (vm_cache*)address; 3681 3682 kprintf("CACHE %p:\n", cache); 3683 kprintf(" ref_count: %ld\n", cache->RefCount()); 3684 kprintf(" source: %p\n", cache->source); 3685 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 3686 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 3687 kprintf(" virtual_end: 0x%Lx\n", cache->virtual_end); 3688 kprintf(" temporary: %ld\n", cache->temporary); 3689 kprintf(" scan_skip: %ld\n", cache->scan_skip); 3690 kprintf(" lock: %p\n", cache->GetLock()); 3691 #if KDEBUG 3692 kprintf(" lock.holder: %ld\n", cache->GetLock()->holder); 3693 #endif 3694 kprintf(" areas:\n"); 3695 3696 for (vm_area* area = cache->areas; area != NULL; area = area->cache_next) { 3697 kprintf(" area 0x%lx, %s\n", area->id, area->name); 3698 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 3699 kprintf("\tprotection: 0x%lx\n", area->protection); 3700 kprintf("\towner: 0x%lx\n", area->address_space->id); 3701 } 3702 3703 kprintf(" consumers:\n"); 3704 vm_cache* consumer = NULL; 3705 while ((consumer = (vm_cache*)list_get_next_item(&cache->consumers, 3706 consumer)) != NULL) { 3707 kprintf("\t%p\n", consumer); 3708 } 3709 3710 kprintf(" pages:\n"); 3711 if (showPages) { 3712 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3713 vm_page* page = it.Next();) { 3714 if (page->type == PAGE_TYPE_PHYSICAL) { 3715 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) " 3716 "wired_count %u\n", page, page->physical_page_number, 3717 page->cache_offset, page->type, page->state, 3718 page_state_to_string(page->state), page->wired_count); 3719 } else if(page->type == PAGE_TYPE_DUMMY) { 3720 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 3721 page, page->state, page_state_to_string(page->state)); 3722 } else 3723 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 3724 } 3725 } else 3726 kprintf("\t%ld in cache\n", cache->page_count); 3727 3728 return 0; 3729 } 3730 3731 3732 static void 3733 dump_area_struct(vm_area* area, bool mappings) 3734 { 3735 kprintf("AREA: %p\n", area); 3736 kprintf("name:\t\t'%s'\n", area->name); 3737 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 3738 kprintf("id:\t\t0x%lx\n", area->id); 3739 kprintf("base:\t\t0x%lx\n", area->base); 3740 kprintf("size:\t\t0x%lx\n", area->size); 3741 kprintf("protection:\t0x%lx\n", area->protection); 3742 kprintf("wiring:\t\t0x%x\n", area->wiring); 3743 kprintf("memory_type:\t0x%x\n", area->memory_type); 3744 kprintf("cache:\t\t%p\n", area->cache); 3745 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 3746 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 3747 kprintf("cache_next:\t%p\n", area->cache_next); 3748 kprintf("cache_prev:\t%p\n", area->cache_prev); 3749 3750 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 3751 if (mappings) { 3752 kprintf("page mappings:\n"); 3753 while (iterator.HasNext()) { 3754 vm_page_mapping* mapping = iterator.Next(); 3755 kprintf(" %p", mapping->page); 3756 } 3757 kprintf("\n"); 3758 } else { 3759 uint32 count = 0; 3760 while (iterator.Next() != NULL) { 3761 count++; 3762 } 3763 kprintf("page mappings:\t%lu\n", count); 3764 } 3765 } 3766 3767 3768 static int 3769 dump_area(int argc, char** argv) 3770 { 3771 bool mappings = false; 3772 bool found = false; 3773 int32 index = 1; 3774 vm_area* area; 3775 addr_t num; 3776 3777 if (argc < 2 || !strcmp(argv[1], "--help")) { 3778 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3779 "All areas matching either id/address/name are listed. You can\n" 3780 "force to check only a specific item by prefixing the specifier\n" 3781 "with the id/contains/address/name keywords.\n" 3782 "-m shows the area's mappings as well.\n"); 3783 return 0; 3784 } 3785 3786 if (!strcmp(argv[1], "-m")) { 3787 mappings = true; 3788 index++; 3789 } 3790 3791 int32 mode = 0xf; 3792 if (!strcmp(argv[index], "id")) 3793 mode = 1; 3794 else if (!strcmp(argv[index], "contains")) 3795 mode = 2; 3796 else if (!strcmp(argv[index], "name")) 3797 mode = 4; 3798 else if (!strcmp(argv[index], "address")) 3799 mode = 0; 3800 if (mode != 0xf) 3801 index++; 3802 3803 if (index >= argc) { 3804 kprintf("No area specifier given.\n"); 3805 return 0; 3806 } 3807 3808 num = parse_expression(argv[index]); 3809 3810 if (mode == 0) { 3811 dump_area_struct((struct vm_area*)num, mappings); 3812 } else { 3813 // walk through the area list, looking for the arguments as a name 3814 struct hash_iterator iter; 3815 3816 hash_open(sAreaHash, &iter); 3817 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 3818 if (((mode & 4) != 0 && area->name != NULL 3819 && !strcmp(argv[index], area->name)) 3820 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3821 || (((mode & 2) != 0 && area->base <= num 3822 && area->base + area->size > num))))) { 3823 dump_area_struct(area, mappings); 3824 found = true; 3825 } 3826 } 3827 3828 if (!found) 3829 kprintf("could not find area %s (%ld)\n", argv[index], num); 3830 } 3831 3832 return 0; 3833 } 3834 3835 3836 static int 3837 dump_area_list(int argc, char** argv) 3838 { 3839 vm_area* area; 3840 struct hash_iterator iter; 3841 const char* name = NULL; 3842 int32 id = 0; 3843 3844 if (argc > 1) { 3845 id = parse_expression(argv[1]); 3846 if (id == 0) 3847 name = argv[1]; 3848 } 3849 3850 kprintf("addr id base\t\tsize protect lock name\n"); 3851 3852 hash_open(sAreaHash, &iter); 3853 while ((area = (vm_area*)hash_next(sAreaHash, &iter)) != NULL) { 3854 if ((id != 0 && area->address_space->id != id) 3855 || (name != NULL && strstr(area->name, name) == NULL)) 3856 continue; 3857 3858 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, 3859 (void*)area->base, (void*)area->size, area->protection, area->wiring, 3860 area->name); 3861 } 3862 hash_close(sAreaHash, &iter, false); 3863 return 0; 3864 } 3865 3866 3867 static int 3868 dump_available_memory(int argc, char** argv) 3869 { 3870 kprintf("Available memory: %Ld/%lu bytes\n", 3871 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 3872 return 0; 3873 } 3874 3875 3876 status_t 3877 vm_delete_areas(struct vm_address_space* addressSpace) 3878 { 3879 vm_area* area; 3880 vm_area* next; 3881 vm_area* last = NULL; 3882 3883 TRACE(("vm_delete_areas: called on address space 0x%lx\n", 3884 addressSpace->id)); 3885 3886 rw_lock_write_lock(&addressSpace->lock); 3887 3888 // remove all reserved areas in this address space 3889 3890 for (area = addressSpace->areas; area; area = next) { 3891 next = area->address_space_next; 3892 3893 if (area->id == RESERVED_AREA_ID) { 3894 // just remove it 3895 if (last) 3896 last->address_space_next = area->address_space_next; 3897 else 3898 addressSpace->areas = area->address_space_next; 3899 3900 vm_put_address_space(addressSpace); 3901 free(area); 3902 continue; 3903 } 3904 3905 last = area; 3906 } 3907 3908 // delete all the areas in this address space 3909 3910 for (area = addressSpace->areas; area; area = next) { 3911 next = area->address_space_next; 3912 delete_area(addressSpace, area); 3913 } 3914 3915 rw_lock_write_unlock(&addressSpace->lock); 3916 return B_OK; 3917 } 3918 3919 3920 static area_id 3921 vm_area_for(team_id team, addr_t address) 3922 { 3923 AddressSpaceReadLocker locker(team); 3924 if (!locker.IsLocked()) 3925 return B_BAD_TEAM_ID; 3926 3927 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 3928 if (area != NULL) 3929 return area->id; 3930 3931 return B_ERROR; 3932 } 3933 3934 3935 /*! Frees physical pages that were used during the boot process. 3936 */ 3937 static void 3938 unmap_and_free_physical_pages(vm_translation_map* map, addr_t start, addr_t end) 3939 { 3940 // free all physical pages in the specified range 3941 3942 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3943 addr_t physicalAddress; 3944 uint32 flags; 3945 3946 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 3947 vm_page* page = vm_lookup_page(current / B_PAGE_SIZE); 3948 if (page != NULL) 3949 vm_page_set_state(page, PAGE_STATE_FREE); 3950 } 3951 } 3952 3953 // unmap the memory 3954 map->ops->unmap(map, start, end - 1); 3955 } 3956 3957 3958 void 3959 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3960 { 3961 vm_translation_map* map = &vm_kernel_address_space()->translation_map; 3962 addr_t end = start + size; 3963 addr_t lastEnd = start; 3964 vm_area* area; 3965 3966 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3967 (void*)start, (void*)end)); 3968 3969 // The areas are sorted in virtual address space order, so 3970 // we just have to find the holes between them that fall 3971 // into the area we should dispose 3972 3973 map->ops->lock(map); 3974 3975 for (area = vm_kernel_address_space()->areas; area != NULL; 3976 area = area->address_space_next) { 3977 addr_t areaStart = area->base; 3978 addr_t areaEnd = areaStart + area->size; 3979 3980 if (area->id == RESERVED_AREA_ID) 3981 continue; 3982 3983 if (areaEnd >= end) { 3984 // we are done, the areas are already beyond of what we have to free 3985 lastEnd = end; 3986 break; 3987 } 3988 3989 if (areaStart > lastEnd) { 3990 // this is something we can free 3991 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3992 (void*)areaStart)); 3993 unmap_and_free_physical_pages(map, lastEnd, areaStart); 3994 } 3995 3996 lastEnd = areaEnd; 3997 } 3998 3999 if (lastEnd < end) { 4000 // we can also get rid of some space at the end of the area 4001 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4002 (void*)end)); 4003 unmap_and_free_physical_pages(map, lastEnd, end); 4004 } 4005 4006 map->ops->unlock(map); 4007 } 4008 4009 4010 static void 4011 create_preloaded_image_areas(struct preloaded_image* image) 4012 { 4013 char name[B_OS_NAME_LENGTH]; 4014 void* address; 4015 int32 length; 4016 4017 // use file name to create a good area name 4018 char* fileName = strrchr(image->name, '/'); 4019 if (fileName == NULL) 4020 fileName = image->name; 4021 else 4022 fileName++; 4023 4024 length = strlen(fileName); 4025 // make sure there is enough space for the suffix 4026 if (length > 25) 4027 length = 25; 4028 4029 memcpy(name, fileName, length); 4030 strcpy(name + length, "_text"); 4031 address = (void*)ROUNDOWN(image->text_region.start, B_PAGE_SIZE); 4032 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4033 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4034 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4035 // this will later be remapped read-only/executable by the 4036 // ELF initialization code 4037 4038 strcpy(name + length, "_data"); 4039 address = (void*)ROUNDOWN(image->data_region.start, B_PAGE_SIZE); 4040 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4041 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4042 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4043 } 4044 4045 4046 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4047 Any boot loader resources contained in that arguments must not be accessed 4048 anymore past this point. 4049 */ 4050 void 4051 vm_free_kernel_args(kernel_args* args) 4052 { 4053 uint32 i; 4054 4055 TRACE(("vm_free_kernel_args()\n")); 4056 4057 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4058 area_id area = area_for((void*)args->kernel_args_range[i].start); 4059 if (area >= B_OK) 4060 delete_area(area); 4061 } 4062 } 4063 4064 4065 static void 4066 allocate_kernel_args(kernel_args* args) 4067 { 4068 TRACE(("allocate_kernel_args()\n")); 4069 4070 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4071 void* address = (void*)args->kernel_args_range[i].start; 4072 4073 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4074 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4075 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4076 } 4077 } 4078 4079 4080 static void 4081 unreserve_boot_loader_ranges(kernel_args* args) 4082 { 4083 TRACE(("unreserve_boot_loader_ranges()\n")); 4084 4085 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4086 vm_unreserve_address_range(vm_kernel_address_space_id(), 4087 (void*)args->virtual_allocated_range[i].start, 4088 args->virtual_allocated_range[i].size); 4089 } 4090 } 4091 4092 4093 static void 4094 reserve_boot_loader_ranges(kernel_args* args) 4095 { 4096 TRACE(("reserve_boot_loader_ranges()\n")); 4097 4098 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4099 void* address = (void*)args->virtual_allocated_range[i].start; 4100 4101 // If the address is no kernel address, we just skip it. The 4102 // architecture specific code has to deal with it. 4103 if (!IS_KERNEL_ADDRESS(address)) { 4104 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 4105 address, args->virtual_allocated_range[i].size); 4106 continue; 4107 } 4108 4109 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), 4110 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4111 if (status < B_OK) 4112 panic("could not reserve boot loader ranges\n"); 4113 } 4114 } 4115 4116 4117 static addr_t 4118 allocate_early_virtual(kernel_args* args, size_t size) 4119 { 4120 addr_t spot = 0; 4121 uint32 i; 4122 int last_valloc_entry = 0; 4123 4124 size = PAGE_ALIGN(size); 4125 // find a slot in the virtual allocation addr range 4126 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 4127 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4128 + args->virtual_allocated_range[i - 1].size; 4129 last_valloc_entry = i; 4130 // check to see if the space between this one and the last is big enough 4131 if (previousRangeEnd >= KERNEL_BASE 4132 && args->virtual_allocated_range[i].start 4133 - previousRangeEnd >= size) { 4134 spot = previousRangeEnd; 4135 args->virtual_allocated_range[i - 1].size += size; 4136 goto out; 4137 } 4138 } 4139 if (spot == 0) { 4140 // we hadn't found one between allocation ranges. this is ok. 4141 // see if there's a gap after the last one 4142 addr_t lastRangeEnd 4143 = args->virtual_allocated_range[last_valloc_entry].start 4144 + args->virtual_allocated_range[last_valloc_entry].size; 4145 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 4146 spot = lastRangeEnd; 4147 args->virtual_allocated_range[last_valloc_entry].size += size; 4148 goto out; 4149 } 4150 // see if there's a gap before the first one 4151 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 4152 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 4153 args->virtual_allocated_range[0].start -= size; 4154 spot = args->virtual_allocated_range[0].start; 4155 goto out; 4156 } 4157 } 4158 } 4159 4160 out: 4161 return spot; 4162 } 4163 4164 4165 static bool 4166 is_page_in_physical_memory_range(kernel_args* args, addr_t address) 4167 { 4168 // TODO: horrible brute-force method of determining if the page can be 4169 // allocated 4170 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4171 if (address >= args->physical_memory_range[i].start 4172 && address < args->physical_memory_range[i].start 4173 + args->physical_memory_range[i].size) 4174 return true; 4175 } 4176 return false; 4177 } 4178 4179 4180 static addr_t 4181 allocate_early_physical_page(kernel_args* args) 4182 { 4183 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4184 addr_t nextPage; 4185 4186 nextPage = args->physical_allocated_range[i].start 4187 + args->physical_allocated_range[i].size; 4188 // see if the page after the next allocated paddr run can be allocated 4189 if (i + 1 < args->num_physical_allocated_ranges 4190 && args->physical_allocated_range[i + 1].size != 0) { 4191 // see if the next page will collide with the next allocated range 4192 if (nextPage >= args->physical_allocated_range[i+1].start) 4193 continue; 4194 } 4195 // see if the next physical page fits in the memory block 4196 if (is_page_in_physical_memory_range(args, nextPage)) { 4197 // we got one! 4198 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4199 return nextPage / B_PAGE_SIZE; 4200 } 4201 } 4202 4203 return 0; 4204 // could not allocate a block 4205 } 4206 4207 4208 /*! This one uses the kernel_args' physical and virtual memory ranges to 4209 allocate some pages before the VM is completely up. 4210 */ 4211 addr_t 4212 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4213 uint32 attributes) 4214 { 4215 if (physicalSize > virtualSize) 4216 physicalSize = virtualSize; 4217 4218 // find the vaddr to allocate at 4219 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 4220 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 4221 4222 // map the pages 4223 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4224 addr_t physicalAddress = allocate_early_physical_page(args); 4225 if (physicalAddress == 0) 4226 panic("error allocating early page!\n"); 4227 4228 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4229 4230 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4231 physicalAddress * B_PAGE_SIZE, attributes, 4232 &allocate_early_physical_page); 4233 } 4234 4235 return virtualBase; 4236 } 4237 4238 4239 /*! The main entrance point to initialize the VM. */ 4240 status_t 4241 vm_init(kernel_args* args) 4242 { 4243 struct preloaded_image* image; 4244 void* address; 4245 status_t err = 0; 4246 uint32 i; 4247 4248 TRACE(("vm_init: entry\n")); 4249 err = arch_vm_translation_map_init(args); 4250 err = arch_vm_init(args); 4251 4252 // initialize some globals 4253 sNextAreaID = 1; 4254 4255 vm_page_init_num_pages(args); 4256 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4257 4258 size_t heapSize = INITIAL_HEAP_SIZE; 4259 // try to accomodate low memory systems 4260 while (heapSize > sAvailableMemory / 8) 4261 heapSize /= 2; 4262 if (heapSize < 1024 * 1024) 4263 panic("vm_init: go buy some RAM please."); 4264 4265 // map in the new heap and initialize it 4266 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4267 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4268 TRACE(("heap at 0x%lx\n", heapBase)); 4269 heap_init(heapBase, heapSize); 4270 4271 size_t slabInitialSize = args->num_cpus * 2 * B_PAGE_SIZE; 4272 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 4273 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4274 slab_init(args, slabInitialBase, slabInitialSize); 4275 4276 // initialize the free page list and physical page mapper 4277 vm_page_init(args); 4278 4279 // initialize the hash table that stores the pages mapped to caches 4280 vm_cache_init(args); 4281 4282 { 4283 vm_area* area; 4284 sAreaHash = hash_init(AREA_HASH_TABLE_SIZE, 4285 (addr_t)&area->hash_next - (addr_t)area, 4286 &area_compare, &area_hash); 4287 if (sAreaHash == NULL) 4288 panic("vm_init: error creating aspace hash table\n"); 4289 } 4290 4291 vm_address_space_init(); 4292 reserve_boot_loader_ranges(args); 4293 4294 // Do any further initialization that the architecture dependant layers may 4295 // need now 4296 arch_vm_translation_map_init_post_area(args); 4297 arch_vm_init_post_area(args); 4298 vm_page_init_post_area(args); 4299 4300 // allocate areas to represent stuff that already exists 4301 4302 address = (void*)ROUNDOWN(heapBase, B_PAGE_SIZE); 4303 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4304 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4305 4306 address = (void*)ROUNDOWN(slabInitialBase, B_PAGE_SIZE); 4307 create_area("initial slab space", &address, B_EXACT_ADDRESS, 4308 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 4309 | B_KERNEL_WRITE_AREA); 4310 4311 allocate_kernel_args(args); 4312 4313 create_preloaded_image_areas(&args->kernel_image); 4314 4315 // allocate areas for preloaded images 4316 for (image = args->preloaded_images; image != NULL; image = image->next) { 4317 create_preloaded_image_areas(image); 4318 } 4319 4320 // allocate kernel stacks 4321 for (i = 0; i < args->num_cpus; i++) { 4322 char name[64]; 4323 4324 sprintf(name, "idle thread %lu kstack", i + 1); 4325 address = (void*)args->cpu_kstack[i].start; 4326 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4327 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4328 } 4329 4330 #if DEBUG_CACHE_LIST 4331 create_area("cache info table", (void**)&sCacheInfoTable, 4332 B_ANY_KERNEL_ADDRESS, 4333 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4334 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4335 #endif // DEBUG_CACHE_LIST 4336 4337 // add some debugger commands 4338 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4339 add_debugger_command("area", &dump_area, 4340 "Dump info about a particular area"); 4341 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 4342 add_debugger_command("cache_tree", &dump_cache_tree, "Dump vm_cache tree"); 4343 #if DEBUG_CACHE_LIST 4344 add_debugger_command_etc("caches", &dump_caches, 4345 "List all vm_cache trees", 4346 "[ \"-c\" ]\n" 4347 "All cache trees are listed sorted in decreasing order by number of\n" 4348 "used pages or, if \"-c\" is specified, by size of committed memory.\n", 4349 0); 4350 #endif 4351 add_debugger_command("avail", &dump_available_memory, 4352 "Dump available memory"); 4353 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4354 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4355 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4356 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4357 add_debugger_command("string", &display_mem, "dump strings"); 4358 4359 TRACE(("vm_init: exit\n")); 4360 4361 return err; 4362 } 4363 4364 4365 status_t 4366 vm_init_post_sem(kernel_args* args) 4367 { 4368 // This frees all unused boot loader resources and makes its space available 4369 // again 4370 arch_vm_init_end(args); 4371 unreserve_boot_loader_ranges(args); 4372 4373 // fill in all of the semaphores that were not allocated before 4374 // since we're still single threaded and only the kernel address space 4375 // exists, it isn't that hard to find all of the ones we need to create 4376 4377 arch_vm_translation_map_init_post_sem(args); 4378 vm_address_space_init_post_sem(); 4379 4380 slab_init_post_sem(); 4381 return heap_init_post_sem(); 4382 } 4383 4384 4385 status_t 4386 vm_init_post_thread(kernel_args* args) 4387 { 4388 vm_page_init_post_thread(args); 4389 vm_daemon_init(); 4390 slab_init_post_thread(); 4391 return heap_init_post_thread(); 4392 } 4393 4394 4395 status_t 4396 vm_init_post_modules(kernel_args* args) 4397 { 4398 return arch_vm_init_post_modules(args); 4399 } 4400 4401 4402 void 4403 permit_page_faults(void) 4404 { 4405 struct thread* thread = thread_get_current_thread(); 4406 if (thread != NULL) 4407 atomic_add(&thread->page_faults_allowed, 1); 4408 } 4409 4410 4411 void 4412 forbid_page_faults(void) 4413 { 4414 struct thread* thread = thread_get_current_thread(); 4415 if (thread != NULL) 4416 atomic_add(&thread->page_faults_allowed, -1); 4417 } 4418 4419 4420 status_t 4421 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 4422 addr_t* newIP) 4423 { 4424 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4425 faultAddress)); 4426 4427 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4428 4429 addr_t pageAddress = ROUNDOWN(address, B_PAGE_SIZE); 4430 vm_address_space* addressSpace = NULL; 4431 4432 status_t status = B_OK; 4433 *newIP = 0; 4434 atomic_add((int32*)&sPageFaults, 1); 4435 4436 if (IS_KERNEL_ADDRESS(pageAddress)) { 4437 addressSpace = vm_get_kernel_address_space(); 4438 } else if (IS_USER_ADDRESS(pageAddress)) { 4439 addressSpace = vm_get_current_user_address_space(); 4440 if (addressSpace == NULL) { 4441 if (!isUser) { 4442 dprintf("vm_page_fault: kernel thread accessing invalid user " 4443 "memory!\n"); 4444 status = B_BAD_ADDRESS; 4445 TPF(PageFaultError(-1, 4446 VMPageFaultTracing 4447 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4448 } else { 4449 // XXX weird state. 4450 panic("vm_page_fault: non kernel thread accessing user memory " 4451 "that doesn't exist!\n"); 4452 status = B_BAD_ADDRESS; 4453 } 4454 } 4455 } else { 4456 // the hit was probably in the 64k DMZ between kernel and user space 4457 // this keeps a user space thread from passing a buffer that crosses 4458 // into kernel space 4459 status = B_BAD_ADDRESS; 4460 TPF(PageFaultError(-1, 4461 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4462 } 4463 4464 if (status == B_OK) 4465 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser); 4466 4467 if (status < B_OK) { 4468 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4469 "0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 4470 strerror(status), address, faultAddress, isWrite, isUser, 4471 thread_get_current_thread_id()); 4472 if (!isUser) { 4473 struct thread* thread = thread_get_current_thread(); 4474 if (thread != NULL && thread->fault_handler != 0) { 4475 // this will cause the arch dependant page fault handler to 4476 // modify the IP on the interrupt frame or whatever to return 4477 // to this address 4478 *newIP = thread->fault_handler; 4479 } else { 4480 // unhandled page fault in the kernel 4481 panic("vm_page_fault: unhandled page fault in kernel space at " 4482 "0x%lx, ip 0x%lx\n", address, faultAddress); 4483 } 4484 } else { 4485 #if 1 4486 rw_lock_read_lock(&addressSpace->lock); 4487 4488 // TODO: remove me once we have proper userland debugging support 4489 // (and tools) 4490 vm_area* area = vm_area_lookup(addressSpace, faultAddress); 4491 4492 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page 4493 // fault and someone is already waiting for a write lock on the same address 4494 // space. This thread will then try to acquire the semaphore again and will 4495 // be queued after the writer. 4496 struct thread* thread = thread_get_current_thread(); 4497 dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) " 4498 "tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n", 4499 thread->name, thread->id, thread->team->name, thread->team->id, 4500 isWrite ? "write" : "read", address, faultAddress, 4501 area ? area->name : "???", 4502 faultAddress - (area ? area->base : 0x0)); 4503 4504 // We can print a stack trace of the userland thread here. 4505 #if 1 4506 if (area) { 4507 struct stack_frame { 4508 #if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__) 4509 struct stack_frame* previous; 4510 void* return_address; 4511 #else 4512 // ... 4513 #warning writeme 4514 #endif 4515 } frame; 4516 #ifdef __INTEL__ 4517 struct iframe* iframe = i386_get_user_iframe(); 4518 if (iframe == NULL) 4519 panic("iframe is NULL!"); 4520 4521 status_t status = user_memcpy(&frame, (void*)iframe->ebp, 4522 sizeof(struct stack_frame)); 4523 #elif defined(__POWERPC__) 4524 struct iframe* iframe = ppc_get_user_iframe(); 4525 if (iframe == NULL) 4526 panic("iframe is NULL!"); 4527 4528 status_t status = user_memcpy(&frame, (void*)iframe->r1, 4529 sizeof(struct stack_frame)); 4530 #else 4531 # warning "vm_page_fault() stack trace won't work" 4532 status = B_ERROR; 4533 #endif 4534 4535 dprintf("stack trace:\n"); 4536 int32 maxFrames = 50; 4537 while (status == B_OK && --maxFrames >= 0 4538 && frame.return_address != NULL) { 4539 dprintf(" %p", frame.return_address); 4540 area = vm_area_lookup(addressSpace, 4541 (addr_t)frame.return_address); 4542 if (area) { 4543 dprintf(" (%s + %#lx)", area->name, 4544 (addr_t)frame.return_address - area->base); 4545 } 4546 dprintf("\n"); 4547 4548 status = user_memcpy(&frame, frame.previous, 4549 sizeof(struct stack_frame)); 4550 } 4551 } 4552 #endif // 0 (stack trace) 4553 4554 rw_lock_read_unlock(&addressSpace->lock); 4555 #endif 4556 4557 // TODO: the fault_callback is a temporary solution for vm86 4558 if (thread->fault_callback == NULL 4559 || thread->fault_callback(address, faultAddress, isWrite)) { 4560 // If the thread has a signal handler for SIGSEGV, we simply 4561 // send it the signal. Otherwise we notify the user debugger 4562 // first. 4563 struct sigaction action; 4564 if (sigaction(SIGSEGV, NULL, &action) == 0 4565 && action.sa_handler != SIG_DFL 4566 && action.sa_handler != SIG_IGN) { 4567 send_signal(thread->id, SIGSEGV); 4568 } else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4569 SIGSEGV)) { 4570 send_signal(thread->id, SIGSEGV); 4571 } 4572 } 4573 } 4574 } 4575 4576 if (addressSpace != NULL) 4577 vm_put_address_space(addressSpace); 4578 4579 return B_HANDLED_INTERRUPT; 4580 } 4581 4582 4583 static inline status_t 4584 fault_acquire_locked_source(vm_cache* cache, vm_cache** _source) 4585 { 4586 vm_cache* source = cache->source; 4587 if (source == NULL) 4588 return B_ERROR; 4589 4590 source->Lock(); 4591 source->AcquireRefLocked(); 4592 4593 *_source = source; 4594 return B_OK; 4595 } 4596 4597 4598 /*! Inserts a busy dummy page into a cache, and makes sure the cache won't go 4599 away by grabbing a reference to it. 4600 */ 4601 static inline void 4602 fault_insert_dummy_page(vm_cache* cache, vm_dummy_page& dummyPage, 4603 off_t cacheOffset) 4604 { 4605 dummyPage.state = PAGE_STATE_BUSY; 4606 cache->AcquireRefLocked(); 4607 cache->InsertPage(&dummyPage, cacheOffset); 4608 dummyPage.busy_condition.Publish(&dummyPage, "page"); 4609 } 4610 4611 4612 /*! Removes the busy dummy page from a cache, and releases its reference to 4613 the cache. 4614 */ 4615 static inline void 4616 fault_remove_dummy_page(vm_dummy_page& dummyPage, bool isLocked) 4617 { 4618 vm_cache* cache = dummyPage.cache; 4619 if (!isLocked) 4620 cache->Lock(); 4621 4622 if (dummyPage.state == PAGE_STATE_BUSY) { 4623 cache->RemovePage(&dummyPage); 4624 dummyPage.state = PAGE_STATE_INACTIVE; 4625 dummyPage.busy_condition.Unpublish(); 4626 } 4627 4628 cache->ReleaseRefLocked(); 4629 4630 if (!isLocked) 4631 cache->Unlock(); 4632 } 4633 4634 4635 /*! Finds a page at the specified \a cacheOffset in either the \a topCache 4636 or in its source chain. Will also page in a missing page in case there is 4637 a cache, whose backing store has the page. 4638 If it couldn't find a page, it will return the vm_cache that should get it, 4639 otherwise, it will return the vm_cache that contains the page. 4640 It always grabs a reference to the vm_cache that it returns, and also locks 4641 it. 4642 */ 4643 static inline status_t 4644 fault_find_page(vm_translation_map* map, vm_cache* topCache, 4645 off_t cacheOffset, bool isWrite, vm_dummy_page& dummyPage, 4646 vm_cache** _pageCache, vm_page** _page, bool* _restart) 4647 { 4648 *_restart = false; 4649 vm_cache* cache = topCache; 4650 vm_cache* lastCache = NULL; 4651 vm_page* page = NULL; 4652 4653 cache->Lock(); 4654 cache->AcquireRefLocked(); 4655 // we release this later in the loop 4656 4657 while (cache != NULL) { 4658 if (lastCache != NULL) 4659 lastCache->ReleaseRefAndUnlock(); 4660 4661 // we hold the lock of the cache at this point 4662 4663 lastCache = cache; 4664 4665 for (;;) { 4666 page = cache->LookupPage(cacheOffset); 4667 if (page != NULL && page->state != PAGE_STATE_BUSY) { 4668 // we found the page 4669 break; 4670 } 4671 if (page == NULL || page == &dummyPage) 4672 break; 4673 4674 // page must be busy -- wait for it to become unbusy 4675 { 4676 ConditionVariableEntry entry; 4677 entry.Add(page); 4678 cache->Unlock(); 4679 entry.Wait(); 4680 cache->Lock(); 4681 } 4682 } 4683 4684 if (page != NULL && page != &dummyPage) 4685 break; 4686 4687 // The current cache does not contain the page we're looking for 4688 4689 // see if the backing store has it 4690 if (cache->HasPage(cacheOffset)) { 4691 // insert a fresh page and mark it busy -- we're going to read it in 4692 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4693 cache->InsertPage(page, cacheOffset); 4694 4695 ConditionVariable busyCondition; 4696 busyCondition.Publish(page, "page"); 4697 4698 cache->Unlock(); 4699 4700 // get a virtual address for the page 4701 iovec vec; 4702 vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE); 4703 size_t bytesRead = vec.iov_len = B_PAGE_SIZE; 4704 4705 // read it in 4706 status_t status = cache->Read(cacheOffset, &vec, 1, 4707 B_PHYSICAL_IO_REQUEST, &bytesRead); 4708 4709 cache->Lock(); 4710 4711 if (status < B_OK) { 4712 // on error remove and free the page 4713 dprintf("reading page from cache %p returned: %s!\n", 4714 cache, strerror(status)); 4715 4716 busyCondition.Unpublish(); 4717 cache->RemovePage(page); 4718 vm_page_set_state(page, PAGE_STATE_FREE); 4719 4720 cache->ReleaseRefAndUnlock(); 4721 return status; 4722 } 4723 4724 // mark the page unbusy again 4725 page->state = PAGE_STATE_ACTIVE; 4726 busyCondition.Unpublish(); 4727 break; 4728 } 4729 4730 // If we're at the top most cache, insert the dummy page here to keep 4731 // other threads from faulting on the same address and chasing us up the 4732 // cache chain 4733 if (cache == topCache && dummyPage.state != PAGE_STATE_BUSY) 4734 fault_insert_dummy_page(cache, dummyPage, cacheOffset); 4735 4736 vm_cache* nextCache; 4737 status_t status = fault_acquire_locked_source(cache, &nextCache); 4738 if (status < B_OK) 4739 nextCache = NULL; 4740 4741 // at this point, we still hold a ref to this cache 4742 // (through lastCacheRef) 4743 4744 cache = nextCache; 4745 } 4746 4747 if (page == &dummyPage) 4748 page = NULL; 4749 4750 if (page == NULL) { 4751 // there was no adequate page, determine the cache for a clean one 4752 4753 ASSERT(cache == NULL); 4754 4755 // We rolled off the end of the cache chain, so we need to decide which 4756 // cache will get the new page we're about to create. 4757 cache = isWrite ? topCache : lastCache; 4758 // Read-only pages come in the deepest cache - only the 4759 // top most cache may have direct write access. 4760 if (cache != lastCache) { 4761 lastCache->ReleaseRefAndUnlock(); 4762 cache->Lock(); 4763 cache->AcquireRefLocked(); 4764 } 4765 4766 vm_page* newPage = cache->LookupPage(cacheOffset); 4767 if (newPage && newPage != &dummyPage) { 4768 // A new page turned up. It could be the one we're looking 4769 // for, but it could as well be a dummy page from someone 4770 // else or an otherwise busy page. We can't really handle 4771 // that here. Hence we completely restart this functions. 4772 cache->ReleaseRefAndUnlock(); 4773 *_restart = true; 4774 } 4775 } else { 4776 // we still own reference and lock to the cache 4777 } 4778 4779 *_pageCache = cache; 4780 *_page = page; 4781 return B_OK; 4782 } 4783 4784 4785 /*! Returns the page that should be mapped into the area that got the fault. 4786 It returns the owner of the page in \a sourceCache - it keeps a reference 4787 to it, and has also locked it on exit. 4788 */ 4789 static inline status_t 4790 fault_get_page(vm_translation_map* map, vm_cache* topCache, off_t cacheOffset, 4791 bool isWrite, vm_dummy_page& dummyPage, vm_cache** _sourceCache, 4792 vm_cache** _copiedSource, vm_page** _page) 4793 { 4794 vm_cache* cache; 4795 vm_page* page; 4796 bool restart; 4797 for (;;) { 4798 status_t status = fault_find_page(map, topCache, cacheOffset, isWrite, 4799 dummyPage, &cache, &page, &restart); 4800 if (status != B_OK) 4801 return status; 4802 4803 if (!restart) 4804 break; 4805 4806 // Remove the dummy page, if it has been inserted. 4807 topCache->Lock(); 4808 4809 if (dummyPage.state == PAGE_STATE_BUSY) { 4810 ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n", 4811 &dummyPage); 4812 fault_remove_dummy_page(dummyPage, true); 4813 } 4814 4815 topCache->Unlock(); 4816 } 4817 4818 if (page == NULL) { 4819 // we still haven't found a page, so we allocate a clean one 4820 4821 page = vm_page_allocate_page(PAGE_STATE_CLEAR, true); 4822 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", 4823 page->physical_page_number)); 4824 4825 // Insert the new page into our cache, and replace it with the dummy page 4826 // if necessary 4827 4828 // If we inserted a dummy page into this cache (i.e. if it is the top 4829 // cache), we have to remove it now 4830 if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cache) { 4831 #if DEBUG_PAGE_CACHE_TRANSITIONS 4832 page->debug_flags = dummyPage.debug_flags | 0x8; 4833 if (dummyPage.collided_page != NULL) { 4834 dummyPage.collided_page->collided_page = page; 4835 page->collided_page = dummyPage.collided_page; 4836 } 4837 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 4838 4839 fault_remove_dummy_page(dummyPage, true); 4840 } 4841 4842 cache->InsertPage(page, cacheOffset); 4843 4844 if (dummyPage.state == PAGE_STATE_BUSY) { 4845 #if DEBUG_PAGE_CACHE_TRANSITIONS 4846 page->debug_flags = dummyPage.debug_flags | 0x10; 4847 if (dummyPage.collided_page != NULL) { 4848 dummyPage.collided_page->collided_page = page; 4849 page->collided_page = dummyPage.collided_page; 4850 } 4851 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 4852 4853 // This is not the top cache into which we inserted the dummy page, 4854 // let's remove it from there. We need to temporarily unlock our 4855 // cache to comply with the cache locking policy. 4856 cache->Unlock(); 4857 fault_remove_dummy_page(dummyPage, false); 4858 cache->Lock(); 4859 } 4860 } 4861 4862 // We now have the page and a cache it belongs to - we now need to make 4863 // sure that the area's cache can access it, too, and sees the correct data 4864 4865 if (page->cache != topCache && isWrite) { 4866 // Now we have a page that has the data we want, but in the wrong cache 4867 // object so we need to copy it and stick it into the top cache. 4868 // Note that this and the "if" before are mutual exclusive. If 4869 // fault_find_page() didn't find the page, it would return the top cache 4870 // for write faults. 4871 vm_page* sourcePage = page; 4872 4873 // TODO: if memory is low, it might be a good idea to steal the page 4874 // from our source cache - if possible, that is 4875 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4876 page = vm_page_allocate_page(PAGE_STATE_FREE, true); 4877 4878 // copy the page 4879 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4880 sourcePage->physical_page_number * B_PAGE_SIZE); 4881 4882 if (sourcePage->state != PAGE_STATE_MODIFIED) 4883 vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE); 4884 4885 cache->Unlock(); 4886 topCache->Lock(); 4887 4888 // Since the top cache has been unlocked for a while, someone else 4889 // (RemoveConsumer()) might have replaced our dummy page. 4890 vm_page* newPage = NULL; 4891 for (;;) { 4892 newPage = topCache->LookupPage(cacheOffset); 4893 if (newPage == NULL || newPage == &dummyPage) { 4894 newPage = NULL; 4895 break; 4896 } 4897 4898 if (newPage->state != PAGE_STATE_BUSY) 4899 break; 4900 4901 // The page is busy, wait till it becomes unbusy. 4902 ConditionVariableEntry entry; 4903 entry.Add(newPage); 4904 topCache->Unlock(); 4905 entry.Wait(); 4906 topCache->Lock(); 4907 } 4908 4909 if (newPage) { 4910 // Indeed someone else threw in a page. We free ours and are happy. 4911 vm_page_set_state(page, PAGE_STATE_FREE); 4912 page = newPage; 4913 } else { 4914 // Insert the new page into our cache and remove the dummy page, if 4915 // necessary. 4916 4917 // if we inserted a dummy page into this cache, we have to remove it 4918 // now 4919 if (dummyPage.state == PAGE_STATE_BUSY) { 4920 ASSERT_PRINT(dummyPage.cache == topCache, "dummy page: %p\n", 4921 &dummyPage); 4922 fault_remove_dummy_page(dummyPage, true); 4923 } 4924 4925 topCache->InsertPage(page, cacheOffset); 4926 } 4927 4928 *_copiedSource = cache; 4929 4930 cache = topCache; 4931 cache->AcquireRefLocked(); 4932 } 4933 4934 *_sourceCache = cache; 4935 *_page = page; 4936 return B_OK; 4937 } 4938 4939 4940 static status_t 4941 vm_soft_fault(vm_address_space* addressSpace, addr_t originalAddress, 4942 bool isWrite, bool isUser) 4943 { 4944 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 4945 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 4946 4947 AddressSpaceReadLocker locker(addressSpace, true); 4948 4949 atomic_add(&addressSpace->fault_count, 1); 4950 4951 // Get the area the fault was in 4952 4953 addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE); 4954 4955 vm_area* area = vm_area_lookup(addressSpace, address); 4956 if (area == NULL) { 4957 dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n", 4958 originalAddress); 4959 TPF(PageFaultError(-1, VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4960 return B_BAD_ADDRESS; 4961 } 4962 4963 // check permissions 4964 uint32 protection = get_area_page_protection(area, address); 4965 if (isUser && (protection & B_USER_PROTECTION) == 0) { 4966 dprintf("user access on kernel area 0x%lx at %p\n", area->id, 4967 (void*)originalAddress); 4968 TPF(PageFaultError(area->id, 4969 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4970 return B_PERMISSION_DENIED; 4971 } 4972 if (isWrite && (protection 4973 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4974 dprintf("write access attempted on read-only area 0x%lx at %p\n", 4975 area->id, (void*)originalAddress); 4976 TPF(PageFaultError(area->id, 4977 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_ONLY)); 4978 return B_PERMISSION_DENIED; 4979 } 4980 4981 // We have the area, it was a valid access, so let's try to resolve the page 4982 // fault now. 4983 // At first, the top most cache from the area is investigated 4984 4985 vm_cache* topCache = vm_area_get_locked_cache(area); 4986 off_t cacheOffset = address - area->base + area->cache_offset; 4987 4988 atomic_add(&area->no_cache_change, 1); 4989 // make sure the area's cache isn't replaced during the page fault 4990 4991 // See if this cache has a fault handler - this will do all the work for us 4992 { 4993 // Note, since the page fault is resolved with interrupts enabled, the 4994 // fault handler could be called more than once for the same reason - 4995 // the store must take this into account 4996 status_t status = topCache->Fault(addressSpace, cacheOffset); 4997 if (status != B_BAD_HANDLER) { 4998 vm_area_put_locked_cache(topCache); 4999 return status; 5000 } 5001 } 5002 5003 topCache->Unlock(); 5004 5005 // The top most cache has no fault handler, so let's see if the cache or its 5006 // sources already have the page we're searching for (we're going from top to 5007 // bottom) 5008 5009 vm_translation_map* map = &addressSpace->translation_map; 5010 size_t reservePages = 2 + map->ops->map_max_pages_need(map, 5011 originalAddress, originalAddress); 5012 vm_page_reserve_pages(reservePages); 5013 // we may need up to 2 pages - reserving them upfront makes sure 5014 // we don't have any cache locked, so that the page daemon/thief 5015 // can do their job without problems 5016 5017 vm_dummy_page dummyPage; 5018 dummyPage.cache = NULL; 5019 dummyPage.state = PAGE_STATE_INACTIVE; 5020 dummyPage.type = PAGE_TYPE_DUMMY; 5021 dummyPage.wired_count = 0; 5022 #if DEBUG_PAGE_CACHE_TRANSITIONS 5023 dummyPage.debug_flags = 0; 5024 dummyPage.collided_page = NULL; 5025 #endif // DEBUG_PAGE_CACHE_TRANSITIONS 5026 5027 vm_cache* copiedPageSource = NULL; 5028 vm_cache* pageSource; 5029 vm_page* page; 5030 // TODO: We keep the address space read lock during the whole operation 5031 // which might be rather expensive depending on where the data has to 5032 // be retrieved from. 5033 status_t status = fault_get_page(map, topCache, cacheOffset, isWrite, 5034 dummyPage, &pageSource, &copiedPageSource, &page); 5035 5036 if (status == B_OK) { 5037 // All went fine, all there is left to do is to map the page into the 5038 // address space 5039 TPF(PageFaultDone(area->id, topCache, page->cache, page)); 5040 5041 // In case this is a copy-on-write page, we need to unmap it from the 5042 // area now 5043 if (isWrite && page->cache == topCache) 5044 vm_unmap_page(area, address, true); 5045 5046 // TODO: there is currently no mechanism to prevent a page being mapped 5047 // more than once in case of a second page fault! 5048 5049 // If the page doesn't reside in the area's cache, we need to make sure 5050 // it's mapped in read-only, so that we cannot overwrite someone else's 5051 // data (copy-on-write) 5052 uint32 newProtection = protection; 5053 if (page->cache != topCache && !isWrite) 5054 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 5055 5056 vm_map_page(area, page, address, newProtection); 5057 5058 pageSource->ReleaseRefAndUnlock(); 5059 } else 5060 TPF(PageFaultError(area->id, status)); 5061 5062 atomic_add(&area->no_cache_change, -1); 5063 5064 if (copiedPageSource) 5065 copiedPageSource->ReleaseRef(); 5066 5067 if (dummyPage.state == PAGE_STATE_BUSY) { 5068 // We still have the dummy page in the cache - that happens if we didn't 5069 // need to allocate a new page before, but could use one in another cache 5070 fault_remove_dummy_page(dummyPage, false); 5071 } 5072 5073 topCache->ReleaseRef(); 5074 vm_page_unreserve_pages(reservePages); 5075 5076 return status; 5077 } 5078 5079 5080 /*! You must have the address space's sem held */ 5081 vm_area* 5082 vm_area_lookup(vm_address_space* addressSpace, addr_t address) 5083 { 5084 vm_area* area; 5085 5086 // check the areas list first 5087 area = addressSpace->area_hint; 5088 if (area != NULL 5089 && area->base <= address 5090 && area->base + (area->size - 1) >= address) 5091 goto found; 5092 5093 for (area = addressSpace->areas; area != NULL; 5094 area = area->address_space_next) { 5095 if (area->id == RESERVED_AREA_ID) 5096 continue; 5097 5098 if (area->base <= address && area->base + (area->size - 1) >= address) 5099 break; 5100 } 5101 5102 found: 5103 if (area) 5104 addressSpace->area_hint = area; 5105 5106 return area; 5107 } 5108 5109 5110 status_t 5111 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle) 5112 { 5113 return vm_kernel_address_space()->translation_map.ops->get_physical_page( 5114 paddr, _vaddr, _handle); 5115 } 5116 5117 status_t 5118 vm_put_physical_page(addr_t vaddr, void* handle) 5119 { 5120 return vm_kernel_address_space()->translation_map.ops->put_physical_page( 5121 vaddr, handle); 5122 } 5123 5124 5125 status_t 5126 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle) 5127 { 5128 return vm_kernel_address_space()->translation_map.ops 5129 ->get_physical_page_current_cpu(paddr, _vaddr, _handle); 5130 } 5131 5132 status_t 5133 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5134 { 5135 return vm_kernel_address_space()->translation_map.ops 5136 ->put_physical_page_current_cpu(vaddr, handle); 5137 } 5138 5139 5140 status_t 5141 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle) 5142 { 5143 return vm_kernel_address_space()->translation_map.ops 5144 ->get_physical_page_debug(paddr, _vaddr, _handle); 5145 } 5146 5147 status_t 5148 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5149 { 5150 return vm_kernel_address_space()->translation_map.ops 5151 ->put_physical_page_debug(vaddr, handle); 5152 } 5153 5154 5155 void 5156 vm_get_info(system_memory_info* info) 5157 { 5158 swap_get_info(info); 5159 5160 info->max_memory = vm_page_num_pages() * B_PAGE_SIZE; 5161 info->page_faults = sPageFaults; 5162 5163 MutexLocker locker(sAvailableMemoryLock); 5164 info->free_memory = sAvailableMemory; 5165 info->needed_memory = sNeededMemory; 5166 } 5167 5168 5169 uint32 5170 vm_num_page_faults(void) 5171 { 5172 return sPageFaults; 5173 } 5174 5175 5176 off_t 5177 vm_available_memory(void) 5178 { 5179 MutexLocker locker(sAvailableMemoryLock); 5180 return sAvailableMemory; 5181 } 5182 5183 5184 off_t 5185 vm_available_not_needed_memory(void) 5186 { 5187 MutexLocker locker(sAvailableMemoryLock); 5188 return sAvailableMemory - sNeededMemory; 5189 } 5190 5191 5192 void 5193 vm_unreserve_memory(size_t amount) 5194 { 5195 mutex_lock(&sAvailableMemoryLock); 5196 5197 sAvailableMemory += amount; 5198 5199 mutex_unlock(&sAvailableMemoryLock); 5200 } 5201 5202 5203 status_t 5204 vm_try_reserve_memory(size_t amount, bigtime_t timeout) 5205 { 5206 MutexLocker locker(sAvailableMemoryLock); 5207 5208 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5209 5210 if (sAvailableMemory >= amount) { 5211 sAvailableMemory -= amount; 5212 return B_OK; 5213 } 5214 5215 if (timeout <= 0) 5216 return B_NO_MEMORY; 5217 5218 // turn timeout into an absolute timeout 5219 timeout += system_time(); 5220 5221 // loop until we've got the memory or the timeout occurs 5222 do { 5223 sNeededMemory += amount; 5224 5225 // call the low resource manager 5226 locker.Unlock(); 5227 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5228 B_ABSOLUTE_TIMEOUT, timeout); 5229 locker.Lock(); 5230 5231 sNeededMemory -= amount; 5232 5233 if (sAvailableMemory >= amount) { 5234 sAvailableMemory -= amount; 5235 return B_OK; 5236 } 5237 } while (timeout > system_time()); 5238 5239 return B_NO_MEMORY; 5240 } 5241 5242 5243 status_t 5244 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 5245 { 5246 AddressSpaceReadLocker locker; 5247 vm_area* area; 5248 status_t status = locker.SetFromArea(id, area); 5249 if (status != B_OK) 5250 return status; 5251 5252 return arch_vm_set_memory_type(area, physicalBase, type); 5253 } 5254 5255 5256 /*! This function enforces some protection properties: 5257 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 5258 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5259 - if no protection is specified, it defaults to B_KERNEL_READ_AREA 5260 and B_KERNEL_WRITE_AREA. 5261 */ 5262 static void 5263 fix_protection(uint32* protection) 5264 { 5265 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5266 if ((*protection & B_USER_PROTECTION) == 0 5267 || (*protection & B_WRITE_AREA) != 0) 5268 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5269 else 5270 *protection |= B_KERNEL_READ_AREA; 5271 } 5272 } 5273 5274 5275 static void 5276 fill_area_info(struct vm_area* area, area_info* info, size_t size) 5277 { 5278 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5279 info->area = area->id; 5280 info->address = (void*)area->base; 5281 info->size = area->size; 5282 info->protection = area->protection; 5283 info->lock = B_FULL_LOCK; 5284 info->team = area->address_space->id; 5285 info->copy_count = 0; 5286 info->in_count = 0; 5287 info->out_count = 0; 5288 // TODO: retrieve real values here! 5289 5290 vm_cache* cache = vm_area_get_locked_cache(area); 5291 5292 // Note, this is a simplification; the cache could be larger than this area 5293 info->ram_size = cache->page_count * B_PAGE_SIZE; 5294 5295 vm_area_put_locked_cache(cache); 5296 } 5297 5298 5299 /*! 5300 Tests whether or not the area that contains the specified address 5301 needs any kind of locking, and actually exists. 5302 Used by both lock_memory() and unlock_memory(). 5303 */ 5304 static status_t 5305 test_lock_memory(vm_address_space* addressSpace, addr_t address, 5306 bool& needsLocking) 5307 { 5308 rw_lock_read_lock(&addressSpace->lock); 5309 5310 vm_area* area = vm_area_lookup(addressSpace, address); 5311 if (area != NULL) { 5312 // This determines if we need to lock the memory at all 5313 needsLocking = area->cache_type != CACHE_TYPE_NULL 5314 && area->cache_type != CACHE_TYPE_DEVICE 5315 && area->wiring != B_FULL_LOCK 5316 && area->wiring != B_CONTIGUOUS; 5317 } 5318 5319 rw_lock_read_unlock(&addressSpace->lock); 5320 5321 if (area == NULL) 5322 return B_BAD_ADDRESS; 5323 5324 return B_OK; 5325 } 5326 5327 5328 static status_t 5329 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5330 { 5331 // is newSize a multiple of B_PAGE_SIZE? 5332 if (newSize & (B_PAGE_SIZE - 1)) 5333 return B_BAD_VALUE; 5334 5335 // lock all affected address spaces and the cache 5336 vm_area* area; 5337 vm_cache* cache; 5338 5339 MultiAddressSpaceLocker locker; 5340 status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area, 5341 &cache); 5342 if (status != B_OK) 5343 return status; 5344 AreaCacheLocker cacheLocker(cache); // already locked 5345 5346 // enforce restrictions 5347 if (!kernel) { 5348 if ((area->protection & B_KERNEL_AREA) != 0) 5349 return B_NOT_ALLOWED; 5350 // TODO: Enforce all restrictions (team, etc.)! 5351 } 5352 5353 size_t oldSize = area->size; 5354 if (newSize == oldSize) 5355 return B_OK; 5356 5357 // Resize all areas of this area's cache 5358 5359 if (cache->type != CACHE_TYPE_RAM) 5360 return B_NOT_ALLOWED; 5361 5362 if (oldSize < newSize) { 5363 // We need to check if all areas of this cache can be resized 5364 5365 for (vm_area* current = cache->areas; current != NULL; 5366 current = current->cache_next) { 5367 vm_area* next = current->address_space_next; 5368 if (next != NULL && next->base <= (current->base + newSize)) { 5369 // If the area was created inside a reserved area, it can 5370 // also be resized in that area 5371 // TODO: if there is free space after the reserved area, it could 5372 // be used as well... 5373 if (next->id == RESERVED_AREA_ID 5374 && next->cache_offset <= current->base 5375 && next->base - 1 + next->size 5376 >= current->base - 1 + newSize) 5377 continue; 5378 5379 return B_ERROR; 5380 } 5381 } 5382 } 5383 5384 // Okay, looks good so far, so let's do it 5385 5386 if (oldSize < newSize) { 5387 // Growing the cache can fail, so we do it first. 5388 status = cache->Resize(cache->virtual_base + newSize); 5389 if (status != B_OK) 5390 return status; 5391 } 5392 5393 for (vm_area* current = cache->areas; current != NULL; 5394 current = current->cache_next) { 5395 vm_area* next = current->address_space_next; 5396 if (next != NULL && next->base <= (current->base + newSize)) { 5397 if (next->id == RESERVED_AREA_ID 5398 && next->cache_offset <= current->base 5399 && next->base - 1 + next->size >= current->base - 1 + newSize) { 5400 // resize reserved area 5401 addr_t offset = current->base + newSize - next->base; 5402 if (next->size <= offset) { 5403 current->address_space_next = next->address_space_next; 5404 free(next); 5405 } else { 5406 next->size -= offset; 5407 next->base += offset; 5408 } 5409 } else { 5410 panic("resize situation for area %p has changed although we " 5411 "should have the address space lock", current); 5412 status = B_ERROR; 5413 break; 5414 } 5415 } 5416 5417 current->size = newSize; 5418 5419 // We also need to unmap all pages beyond the new size, if the area has 5420 // shrinked 5421 if (newSize < oldSize) { 5422 vm_unmap_pages(current, current->base + newSize, oldSize - newSize, 5423 false); 5424 } 5425 } 5426 5427 // shrinking the cache can't fail, so we do it now 5428 if (status == B_OK && newSize < oldSize) 5429 status = cache->Resize(cache->virtual_base + newSize); 5430 5431 if (status < B_OK) { 5432 // This shouldn't really be possible, but hey, who knows 5433 for (vm_area* current = cache->areas; current != NULL; 5434 current = current->cache_next) { 5435 current->size = oldSize; 5436 } 5437 5438 cache->Resize(cache->virtual_base + oldSize); 5439 } 5440 5441 // TODO: we must honour the lock restrictions of this area 5442 return status; 5443 } 5444 5445 5446 status_t 5447 vm_memset_physical(addr_t address, int value, size_t length) 5448 { 5449 return vm_kernel_address_space()->translation_map.ops->memset_physical( 5450 address, value, length); 5451 } 5452 5453 5454 status_t 5455 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user) 5456 { 5457 return vm_kernel_address_space()->translation_map.ops->memcpy_from_physical( 5458 to, from, length, user); 5459 } 5460 5461 5462 status_t 5463 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user) 5464 { 5465 return vm_kernel_address_space()->translation_map.ops->memcpy_to_physical( 5466 to, _from, length, user); 5467 } 5468 5469 5470 void 5471 vm_memcpy_physical_page(addr_t to, addr_t from) 5472 { 5473 return vm_kernel_address_space()->translation_map.ops->memcpy_physical_page( 5474 to, from); 5475 } 5476 5477 5478 // #pragma mark - kernel public API 5479 5480 5481 status_t 5482 user_memcpy(void* to, const void* from, size_t size) 5483 { 5484 if (arch_cpu_user_memcpy(to, from, size, 5485 &thread_get_current_thread()->fault_handler) < B_OK) 5486 return B_BAD_ADDRESS; 5487 5488 return B_OK; 5489 } 5490 5491 5492 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5493 the string in \a to, NULL-terminating the result. 5494 5495 \param to Pointer to the destination C-string. 5496 \param from Pointer to the source C-string. 5497 \param size Size in bytes of the string buffer pointed to by \a to. 5498 5499 \return strlen(\a from). 5500 */ 5501 ssize_t 5502 user_strlcpy(char* to, const char* from, size_t size) 5503 { 5504 return arch_cpu_user_strlcpy(to, from, size, 5505 &thread_get_current_thread()->fault_handler); 5506 } 5507 5508 5509 status_t 5510 user_memset(void* s, char c, size_t count) 5511 { 5512 if (arch_cpu_user_memset(s, c, count, 5513 &thread_get_current_thread()->fault_handler) < B_OK) 5514 return B_BAD_ADDRESS; 5515 5516 return B_OK; 5517 } 5518 5519 5520 status_t 5521 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5522 { 5523 vm_address_space* addressSpace = NULL; 5524 struct vm_translation_map* map; 5525 addr_t unalignedBase = (addr_t)address; 5526 addr_t end = unalignedBase + numBytes; 5527 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 5528 bool isUser = IS_USER_ADDRESS(address); 5529 bool needsLocking = true; 5530 5531 if (isUser) { 5532 if (team == B_CURRENT_TEAM) 5533 addressSpace = vm_get_current_user_address_space(); 5534 else 5535 addressSpace = vm_get_address_space(team); 5536 } else 5537 addressSpace = vm_get_kernel_address_space(); 5538 if (addressSpace == NULL) 5539 return B_ERROR; 5540 5541 // test if we're on an area that allows faults at all 5542 5543 map = &addressSpace->translation_map; 5544 5545 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5546 if (status < B_OK) 5547 goto out; 5548 if (!needsLocking) 5549 goto out; 5550 5551 for (; base < end; base += B_PAGE_SIZE) { 5552 addr_t physicalAddress; 5553 uint32 protection; 5554 status_t status; 5555 5556 map->ops->lock(map); 5557 status = map->ops->query(map, base, &physicalAddress, &protection); 5558 map->ops->unlock(map); 5559 5560 if (status < B_OK) 5561 goto out; 5562 5563 if ((protection & PAGE_PRESENT) != 0) { 5564 // if B_READ_DEVICE is set, the caller intents to write to the locked 5565 // memory, so if it hasn't been mapped writable, we'll try the soft 5566 // fault anyway 5567 if ((flags & B_READ_DEVICE) == 0 5568 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 5569 // update wiring 5570 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5571 if (page == NULL) 5572 panic("couldn't lookup physical page just allocated\n"); 5573 5574 increment_page_wired_count(page); 5575 continue; 5576 } 5577 } 5578 5579 status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0, 5580 isUser); 5581 if (status != B_OK) { 5582 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) " 5583 "failed: %s\n", (void*)unalignedBase, numBytes, flags, 5584 strerror(status)); 5585 goto out; 5586 } 5587 5588 // TODO: Here's a race condition. We should probably add a parameter 5589 // to vm_soft_fault() that would cause the page's wired count to be 5590 // incremented immediately. 5591 // TODO: After memory has been locked in an area, we need to prevent the 5592 // area from being deleted, resized, cut, etc. That could be done using 5593 // a "locked pages" count in vm_area, and maybe a condition variable, if 5594 // we want to allow waiting for the area to become eligible for these 5595 // operations again. 5596 5597 map->ops->lock(map); 5598 status = map->ops->query(map, base, &physicalAddress, &protection); 5599 map->ops->unlock(map); 5600 5601 if (status < B_OK) 5602 goto out; 5603 5604 // update wiring 5605 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5606 if (page == NULL) 5607 panic("couldn't lookup physical page"); 5608 5609 increment_page_wired_count(page); 5610 // TODO: needs to be atomic on all platforms! 5611 } 5612 5613 out: 5614 vm_put_address_space(addressSpace); 5615 return status; 5616 } 5617 5618 5619 status_t 5620 lock_memory(void* address, size_t numBytes, uint32 flags) 5621 { 5622 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5623 } 5624 5625 5626 status_t 5627 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5628 { 5629 vm_address_space* addressSpace = NULL; 5630 struct vm_translation_map* map; 5631 addr_t unalignedBase = (addr_t)address; 5632 addr_t end = unalignedBase + numBytes; 5633 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 5634 bool needsLocking = true; 5635 5636 if (IS_USER_ADDRESS(address)) { 5637 if (team == B_CURRENT_TEAM) 5638 addressSpace = vm_get_current_user_address_space(); 5639 else 5640 addressSpace = vm_get_address_space(team); 5641 } else 5642 addressSpace = vm_get_kernel_address_space(); 5643 if (addressSpace == NULL) 5644 return B_ERROR; 5645 5646 map = &addressSpace->translation_map; 5647 5648 status_t status = test_lock_memory(addressSpace, base, needsLocking); 5649 if (status < B_OK) 5650 goto out; 5651 if (!needsLocking) 5652 goto out; 5653 5654 for (; base < end; base += B_PAGE_SIZE) { 5655 map->ops->lock(map); 5656 5657 addr_t physicalAddress; 5658 uint32 protection; 5659 status = map->ops->query(map, base, &physicalAddress, 5660 &protection); 5661 5662 map->ops->unlock(map); 5663 5664 if (status < B_OK) 5665 goto out; 5666 if ((protection & PAGE_PRESENT) == 0) 5667 panic("calling unlock_memory() on unmapped memory!"); 5668 5669 // update wiring 5670 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 5671 if (page == NULL) 5672 panic("couldn't lookup physical page"); 5673 5674 decrement_page_wired_count(page); 5675 } 5676 5677 out: 5678 vm_put_address_space(addressSpace); 5679 return status; 5680 } 5681 5682 5683 status_t 5684 unlock_memory(void* address, size_t numBytes, uint32 flags) 5685 { 5686 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5687 } 5688 5689 5690 /*! Similar to get_memory_map(), but also allows to specify the address space 5691 for the memory in question and has a saner semantics. 5692 Returns \c B_OK when the complete range could be translated or 5693 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5694 case the actual number of entries is written to \c *_numEntries. Any other 5695 error case indicates complete failure; \c *_numEntries will be set to \c 0 5696 in this case. 5697 */ 5698 status_t 5699 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5700 physical_entry* table, uint32* _numEntries) 5701 { 5702 uint32 numEntries = *_numEntries; 5703 *_numEntries = 0; 5704 5705 vm_address_space* addressSpace; 5706 addr_t virtualAddress = (addr_t)address; 5707 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5708 addr_t physicalAddress; 5709 status_t status = B_OK; 5710 int32 index = -1; 5711 addr_t offset = 0; 5712 bool interrupts = are_interrupts_enabled(); 5713 5714 TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team, 5715 address, numBytes, numEntries)); 5716 5717 if (numEntries == 0 || numBytes == 0) 5718 return B_BAD_VALUE; 5719 5720 // in which address space is the address to be found? 5721 if (IS_USER_ADDRESS(virtualAddress)) { 5722 if (team == B_CURRENT_TEAM) 5723 addressSpace = vm_get_current_user_address_space(); 5724 else 5725 addressSpace = vm_get_address_space(team); 5726 } else 5727 addressSpace = vm_get_kernel_address_space(); 5728 5729 if (addressSpace == NULL) 5730 return B_ERROR; 5731 5732 vm_translation_map* map = &addressSpace->translation_map; 5733 5734 if (interrupts) 5735 map->ops->lock(map); 5736 5737 while (offset < numBytes) { 5738 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5739 uint32 flags; 5740 5741 if (interrupts) { 5742 status = map->ops->query(map, (addr_t)address + offset, 5743 &physicalAddress, &flags); 5744 } else { 5745 status = map->ops->query_interrupt(map, (addr_t)address + offset, 5746 &physicalAddress, &flags); 5747 } 5748 if (status < B_OK) 5749 break; 5750 if ((flags & PAGE_PRESENT) == 0) { 5751 panic("get_memory_map() called on unmapped memory!"); 5752 return B_BAD_ADDRESS; 5753 } 5754 5755 if (index < 0 && pageOffset > 0) { 5756 physicalAddress += pageOffset; 5757 if (bytes > B_PAGE_SIZE - pageOffset) 5758 bytes = B_PAGE_SIZE - pageOffset; 5759 } 5760 5761 // need to switch to the next physical_entry? 5762 if (index < 0 || (addr_t)table[index].address 5763 != physicalAddress - table[index].size) { 5764 if ((uint32)++index + 1 > numEntries) { 5765 // table to small 5766 status = B_BUFFER_OVERFLOW; 5767 break; 5768 } 5769 table[index].address = (void*)physicalAddress; 5770 table[index].size = bytes; 5771 } else { 5772 // page does fit in current entry 5773 table[index].size += bytes; 5774 } 5775 5776 offset += bytes; 5777 } 5778 5779 if (interrupts) 5780 map->ops->unlock(map); 5781 5782 if (status != B_OK) 5783 return status; 5784 5785 if ((uint32)index + 1 > numEntries) { 5786 *_numEntries = index; 5787 return B_BUFFER_OVERFLOW; 5788 } 5789 5790 *_numEntries = index + 1; 5791 return B_OK; 5792 } 5793 5794 5795 /*! According to the BeBook, this function should always succeed. 5796 This is no longer the case. 5797 */ 5798 long 5799 get_memory_map(const void* address, ulong numBytes, physical_entry* table, 5800 long numEntries) 5801 { 5802 uint32 entriesRead = numEntries; 5803 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5804 table, &entriesRead); 5805 if (error != B_OK) 5806 return error; 5807 5808 // close the entry list 5809 5810 // if it's only one entry, we will silently accept the missing ending 5811 if (numEntries == 1) 5812 return B_OK; 5813 5814 if (entriesRead + 1 > (uint32)numEntries) 5815 return B_BUFFER_OVERFLOW; 5816 5817 table[entriesRead].address = NULL; 5818 table[entriesRead].size = 0; 5819 5820 return B_OK; 5821 } 5822 5823 5824 area_id 5825 area_for(void* address) 5826 { 5827 team_id space; 5828 5829 if (IS_USER_ADDRESS(address)) { 5830 // we try the user team address space, if any 5831 space = vm_current_user_address_space_id(); 5832 if (space < B_OK) 5833 return space; 5834 } else 5835 space = vm_kernel_address_space_id(); 5836 5837 return vm_area_for(space, (addr_t)address); 5838 } 5839 5840 5841 area_id 5842 find_area(const char* name) 5843 { 5844 rw_lock_read_lock(&sAreaHashLock); 5845 struct hash_iterator iterator; 5846 hash_open(sAreaHash, &iterator); 5847 5848 vm_area* area; 5849 area_id id = B_NAME_NOT_FOUND; 5850 while ((area = (vm_area*)hash_next(sAreaHash, &iterator)) != NULL) { 5851 if (area->id == RESERVED_AREA_ID) 5852 continue; 5853 5854 if (!strcmp(area->name, name)) { 5855 id = area->id; 5856 break; 5857 } 5858 } 5859 5860 hash_close(sAreaHash, &iterator, false); 5861 rw_lock_read_unlock(&sAreaHashLock); 5862 5863 return id; 5864 } 5865 5866 5867 status_t 5868 _get_area_info(area_id id, area_info* info, size_t size) 5869 { 5870 if (size != sizeof(area_info) || info == NULL) 5871 return B_BAD_VALUE; 5872 5873 AddressSpaceReadLocker locker; 5874 vm_area* area; 5875 status_t status = locker.SetFromArea(id, area); 5876 if (status != B_OK) 5877 return status; 5878 5879 fill_area_info(area, info, size); 5880 return B_OK; 5881 } 5882 5883 5884 status_t 5885 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size) 5886 { 5887 addr_t nextBase = *(addr_t*)cookie; 5888 5889 // we're already through the list 5890 if (nextBase == (addr_t)-1) 5891 return B_ENTRY_NOT_FOUND; 5892 5893 if (team == B_CURRENT_TEAM) 5894 team = team_get_current_team_id(); 5895 5896 AddressSpaceReadLocker locker(team); 5897 if (!locker.IsLocked()) 5898 return B_BAD_TEAM_ID; 5899 5900 vm_area* area; 5901 for (area = locker.AddressSpace()->areas; area != NULL; 5902 area = area->address_space_next) { 5903 if (area->id == RESERVED_AREA_ID) 5904 continue; 5905 5906 if (area->base > nextBase) 5907 break; 5908 } 5909 5910 if (area == NULL) { 5911 nextBase = (addr_t)-1; 5912 return B_ENTRY_NOT_FOUND; 5913 } 5914 5915 fill_area_info(area, info, size); 5916 *cookie = (int32)(area->base); 5917 5918 return B_OK; 5919 } 5920 5921 5922 status_t 5923 set_area_protection(area_id area, uint32 newProtection) 5924 { 5925 fix_protection(&newProtection); 5926 5927 return vm_set_area_protection(vm_kernel_address_space_id(), area, 5928 newProtection, true); 5929 } 5930 5931 5932 status_t 5933 resize_area(area_id areaID, size_t newSize) 5934 { 5935 return vm_resize_area(areaID, newSize, true); 5936 } 5937 5938 5939 /*! Transfers the specified area to a new team. The caller must be the owner 5940 of the area (not yet enforced but probably should be). 5941 This function is currently not exported to the kernel namespace, but is 5942 only accessible using the _kern_transfer_area() syscall. 5943 */ 5944 static area_id 5945 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5946 bool kernel) 5947 { 5948 area_info info; 5949 status_t status = get_area_info(id, &info); 5950 if (status < B_OK) 5951 return status; 5952 5953 area_id clonedArea = vm_clone_area(target, info.name, _address, 5954 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5955 if (clonedArea < B_OK) 5956 return clonedArea; 5957 5958 status = vm_delete_area(info.team, id, kernel); 5959 if (status < B_OK) { 5960 vm_delete_area(target, clonedArea, kernel); 5961 return status; 5962 } 5963 5964 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5965 5966 return clonedArea; 5967 } 5968 5969 5970 area_id 5971 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes, 5972 uint32 addressSpec, uint32 protection, void** _virtualAddress) 5973 { 5974 if (!arch_vm_supports_protection(protection)) 5975 return B_NOT_SUPPORTED; 5976 5977 fix_protection(&protection); 5978 5979 return vm_map_physical_memory(vm_kernel_address_space_id(), name, 5980 _virtualAddress, addressSpec, numBytes, protection, 5981 (addr_t)physicalAddress); 5982 } 5983 5984 5985 area_id 5986 clone_area(const char* name, void** _address, uint32 addressSpec, 5987 uint32 protection, area_id source) 5988 { 5989 if ((protection & B_KERNEL_PROTECTION) == 0) 5990 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5991 5992 return vm_clone_area(vm_kernel_address_space_id(), name, _address, 5993 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5994 } 5995 5996 5997 area_id 5998 create_area_etc(team_id team, const char* name, void** address, 5999 uint32 addressSpec, uint32 size, uint32 lock, uint32 protection, 6000 uint32 flags) 6001 { 6002 fix_protection(&protection); 6003 6004 return vm_create_anonymous_area(team, (char*)name, address, addressSpec, 6005 size, lock, protection, flags, true); 6006 } 6007 6008 6009 area_id 6010 create_area(const char* name, void** _address, uint32 addressSpec, size_t size, 6011 uint32 lock, uint32 protection) 6012 { 6013 fix_protection(&protection); 6014 6015 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char*)name, 6016 _address, addressSpec, size, lock, protection, 0, true); 6017 } 6018 6019 6020 status_t 6021 delete_area(area_id area) 6022 { 6023 return vm_delete_area(vm_kernel_address_space_id(), area, true); 6024 } 6025 6026 6027 // #pragma mark - Userland syscalls 6028 6029 6030 status_t 6031 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, 6032 addr_t size) 6033 { 6034 // filter out some unavailable values (for userland) 6035 switch (addressSpec) { 6036 case B_ANY_KERNEL_ADDRESS: 6037 case B_ANY_KERNEL_BLOCK_ADDRESS: 6038 return B_BAD_VALUE; 6039 } 6040 6041 addr_t address; 6042 6043 if (!IS_USER_ADDRESS(userAddress) 6044 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6045 return B_BAD_ADDRESS; 6046 6047 status_t status = vm_reserve_address_range( 6048 vm_current_user_address_space_id(), (void**)&address, addressSpec, size, 6049 RESERVED_AVOID_BASE); 6050 if (status < B_OK) 6051 return status; 6052 6053 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6054 vm_unreserve_address_range(vm_current_user_address_space_id(), 6055 (void*)address, size); 6056 return B_BAD_ADDRESS; 6057 } 6058 6059 return B_OK; 6060 } 6061 6062 6063 area_id 6064 _user_area_for(void* address) 6065 { 6066 return vm_area_for(vm_current_user_address_space_id(), (addr_t)address); 6067 } 6068 6069 6070 area_id 6071 _user_find_area(const char* userName) 6072 { 6073 char name[B_OS_NAME_LENGTH]; 6074 6075 if (!IS_USER_ADDRESS(userName) 6076 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6077 return B_BAD_ADDRESS; 6078 6079 return find_area(name); 6080 } 6081 6082 6083 status_t 6084 _user_get_area_info(area_id area, area_info* userInfo) 6085 { 6086 if (!IS_USER_ADDRESS(userInfo)) 6087 return B_BAD_ADDRESS; 6088 6089 area_info info; 6090 status_t status = get_area_info(area, &info); 6091 if (status < B_OK) 6092 return status; 6093 6094 // TODO: do we want to prevent userland from seeing kernel protections? 6095 //info.protection &= B_USER_PROTECTION; 6096 6097 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6098 return B_BAD_ADDRESS; 6099 6100 return status; 6101 } 6102 6103 6104 status_t 6105 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo) 6106 { 6107 int32 cookie; 6108 6109 if (!IS_USER_ADDRESS(userCookie) 6110 || !IS_USER_ADDRESS(userInfo) 6111 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 6112 return B_BAD_ADDRESS; 6113 6114 area_info info; 6115 status_t status = _get_next_area_info(team, &cookie, &info, 6116 sizeof(area_info)); 6117 if (status != B_OK) 6118 return status; 6119 6120 //info.protection &= B_USER_PROTECTION; 6121 6122 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 6123 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6124 return B_BAD_ADDRESS; 6125 6126 return status; 6127 } 6128 6129 6130 status_t 6131 _user_set_area_protection(area_id area, uint32 newProtection) 6132 { 6133 if ((newProtection & ~B_USER_PROTECTION) != 0) 6134 return B_BAD_VALUE; 6135 6136 fix_protection(&newProtection); 6137 6138 return vm_set_area_protection(vm_current_user_address_space_id(), area, 6139 newProtection, false); 6140 } 6141 6142 6143 status_t 6144 _user_resize_area(area_id area, size_t newSize) 6145 { 6146 // TODO: Since we restrict deleting of areas to those owned by the team, 6147 // we should also do that for resizing (check other functions, too). 6148 return vm_resize_area(area, newSize, false); 6149 } 6150 6151 6152 area_id 6153 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6154 team_id target) 6155 { 6156 // filter out some unavailable values (for userland) 6157 switch (addressSpec) { 6158 case B_ANY_KERNEL_ADDRESS: 6159 case B_ANY_KERNEL_BLOCK_ADDRESS: 6160 return B_BAD_VALUE; 6161 } 6162 6163 void* address; 6164 if (!IS_USER_ADDRESS(userAddress) 6165 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6166 return B_BAD_ADDRESS; 6167 6168 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6169 if (newArea < B_OK) 6170 return newArea; 6171 6172 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6173 return B_BAD_ADDRESS; 6174 6175 return newArea; 6176 } 6177 6178 6179 area_id 6180 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6181 uint32 protection, area_id sourceArea) 6182 { 6183 char name[B_OS_NAME_LENGTH]; 6184 void* address; 6185 6186 // filter out some unavailable values (for userland) 6187 switch (addressSpec) { 6188 case B_ANY_KERNEL_ADDRESS: 6189 case B_ANY_KERNEL_BLOCK_ADDRESS: 6190 return B_BAD_VALUE; 6191 } 6192 if ((protection & ~B_USER_PROTECTION) != 0) 6193 return B_BAD_VALUE; 6194 6195 if (!IS_USER_ADDRESS(userName) 6196 || !IS_USER_ADDRESS(userAddress) 6197 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6198 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6199 return B_BAD_ADDRESS; 6200 6201 fix_protection(&protection); 6202 6203 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, 6204 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6205 false); 6206 if (clonedArea < B_OK) 6207 return clonedArea; 6208 6209 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6210 delete_area(clonedArea); 6211 return B_BAD_ADDRESS; 6212 } 6213 6214 return clonedArea; 6215 } 6216 6217 6218 area_id 6219 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6220 size_t size, uint32 lock, uint32 protection) 6221 { 6222 char name[B_OS_NAME_LENGTH]; 6223 void* address; 6224 6225 // filter out some unavailable values (for userland) 6226 switch (addressSpec) { 6227 case B_ANY_KERNEL_ADDRESS: 6228 case B_ANY_KERNEL_BLOCK_ADDRESS: 6229 return B_BAD_VALUE; 6230 } 6231 if ((protection & ~B_USER_PROTECTION) != 0) 6232 return B_BAD_VALUE; 6233 6234 if (!IS_USER_ADDRESS(userName) 6235 || !IS_USER_ADDRESS(userAddress) 6236 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6237 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6238 return B_BAD_ADDRESS; 6239 6240 if (addressSpec == B_EXACT_ADDRESS 6241 && IS_KERNEL_ADDRESS(address)) 6242 return B_BAD_VALUE; 6243 6244 fix_protection(&protection); 6245 6246 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 6247 (char*)name, &address, addressSpec, size, lock, protection, 0, false); 6248 6249 if (area >= B_OK 6250 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6251 delete_area(area); 6252 return B_BAD_ADDRESS; 6253 } 6254 6255 return area; 6256 } 6257 6258 6259 status_t 6260 _user_delete_area(area_id area) 6261 { 6262 // Unlike the BeOS implementation, you can now only delete areas 6263 // that you have created yourself from userland. 6264 // The documentation to delete_area() explicitly states that this 6265 // will be restricted in the future, and so it will. 6266 return vm_delete_area(vm_current_user_address_space_id(), area, false); 6267 } 6268 6269 6270 // TODO: create a BeOS style call for this! 6271 6272 area_id 6273 _user_map_file(const char* userName, void** userAddress, int addressSpec, 6274 size_t size, int protection, int mapping, int fd, off_t offset) 6275 { 6276 char name[B_OS_NAME_LENGTH]; 6277 void* address; 6278 area_id area; 6279 6280 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6281 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6282 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6283 return B_BAD_ADDRESS; 6284 6285 if (addressSpec == B_EXACT_ADDRESS) { 6286 if ((addr_t)address + size < (addr_t)address) 6287 return B_BAD_VALUE; 6288 if (!IS_USER_ADDRESS(address) 6289 || !IS_USER_ADDRESS((addr_t)address + size)) { 6290 return B_BAD_ADDRESS; 6291 } 6292 } 6293 6294 // userland created areas can always be accessed by the kernel 6295 protection |= B_KERNEL_READ_AREA 6296 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6297 6298 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 6299 addressSpec, size, protection, mapping, fd, offset, false); 6300 if (area < B_OK) 6301 return area; 6302 6303 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6304 return B_BAD_ADDRESS; 6305 6306 return area; 6307 } 6308 6309 6310 status_t 6311 _user_unmap_memory(void* _address, size_t size) 6312 { 6313 addr_t address = (addr_t)_address; 6314 6315 // check params 6316 if (size == 0 || (addr_t)address + size < (addr_t)address) 6317 return B_BAD_VALUE; 6318 6319 if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size)) 6320 return B_BAD_ADDRESS; 6321 6322 // write lock the address space 6323 AddressSpaceWriteLocker locker; 6324 status_t status = locker.SetTo(team_get_current_team_id()); 6325 if (status != B_OK) 6326 return status; 6327 6328 // unmap 6329 return unmap_address_range(locker.AddressSpace(), address, size, false); 6330 } 6331 6332 6333 status_t 6334 _user_set_memory_protection(void* _address, size_t size, int protection) 6335 { 6336 // check address range 6337 addr_t address = (addr_t)_address; 6338 size = PAGE_ALIGN(size); 6339 6340 if ((address % B_PAGE_SIZE) != 0) 6341 return B_BAD_VALUE; 6342 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6343 || !IS_USER_ADDRESS((addr_t)address + size)) { 6344 // weird error code required by POSIX 6345 return ENOMEM; 6346 } 6347 6348 // extend and check protection 6349 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 6350 uint32 actualProtection = protection | B_KERNEL_READ_AREA 6351 | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 6352 6353 if (!arch_vm_supports_protection(actualProtection)) 6354 return B_NOT_SUPPORTED; 6355 6356 // We need to write lock the address space, since we're going to play with 6357 // the areas. 6358 AddressSpaceWriteLocker locker; 6359 status_t status = locker.SetTo(team_get_current_team_id()); 6360 if (status != B_OK) 6361 return status; 6362 6363 // First round: Check whether the whole range is covered by areas and we are 6364 // allowed to modify them. 6365 addr_t currentAddress = address; 6366 size_t sizeLeft = size; 6367 while (sizeLeft > 0) { 6368 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6369 if (area == NULL) 6370 return B_NO_MEMORY; 6371 6372 if ((area->protection & B_KERNEL_AREA) != 0) 6373 return B_NOT_ALLOWED; 6374 6375 // TODO: For (shared) mapped files we should check whether the new 6376 // protections are compatible with the file permissions. We don't have 6377 // a way to do that yet, though. 6378 6379 addr_t offset = currentAddress - area->base; 6380 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6381 6382 currentAddress += rangeSize; 6383 sizeLeft -= rangeSize; 6384 } 6385 6386 // Second round: If the protections differ from that of the area, create a 6387 // page protection array and re-map mapped pages. 6388 vm_translation_map* map = &locker.AddressSpace()->translation_map; 6389 currentAddress = address; 6390 sizeLeft = size; 6391 while (sizeLeft > 0) { 6392 vm_area* area = vm_area_lookup(locker.AddressSpace(), currentAddress); 6393 if (area == NULL) 6394 return B_NO_MEMORY; 6395 6396 addr_t offset = currentAddress - area->base; 6397 size_t rangeSize = min_c(area->size - offset, sizeLeft); 6398 6399 currentAddress += rangeSize; 6400 sizeLeft -= rangeSize; 6401 6402 if (area->page_protections == NULL) { 6403 if (area->protection == actualProtection) 6404 continue; 6405 6406 // In the page protections we store only the three user protections, 6407 // so we use 4 bits per page. 6408 uint32 bytes = (area->size / B_PAGE_SIZE + 1) / 2; 6409 area->page_protections = (uint8*)malloc(bytes); 6410 if (area->page_protections == NULL) 6411 return B_NO_MEMORY; 6412 6413 // init the page protections for all pages to that of the area 6414 uint32 areaProtection = area->protection 6415 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 6416 memset(area->page_protections, 6417 areaProtection | (areaProtection << 4), bytes); 6418 } 6419 6420 for (addr_t pageAddress = area->base + offset; 6421 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6422 map->ops->lock(map); 6423 6424 set_area_page_protection(area, pageAddress, protection); 6425 6426 addr_t physicalAddress; 6427 uint32 flags; 6428 6429 status_t error = map->ops->query(map, pageAddress, &physicalAddress, 6430 &flags); 6431 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6432 map->ops->unlock(map); 6433 continue; 6434 } 6435 6436 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6437 if (page == NULL) { 6438 panic("area %p looking up page failed for pa 0x%lx\n", area, 6439 physicalAddress); 6440 map->ops->unlock(map); 6441 return B_ERROR;; 6442 } 6443 6444 // If the page is not in the topmost cache and write access is 6445 // requested, we have to unmap it. Otherwise we can re-map it with 6446 // the new protection. 6447 bool unmapPage = page->cache != area->cache 6448 && (protection & B_WRITE_AREA) != 0; 6449 6450 if (!unmapPage) { 6451 map->ops->unmap(map, pageAddress, 6452 pageAddress + B_PAGE_SIZE - 1); 6453 map->ops->map(map, pageAddress, physicalAddress, 6454 actualProtection); 6455 } 6456 6457 map->ops->unlock(map); 6458 6459 if (unmapPage) 6460 vm_unmap_page(area, pageAddress, true); 6461 } 6462 } 6463 6464 return B_OK; 6465 } 6466 6467 6468 status_t 6469 _user_sync_memory(void* _address, size_t size, int flags) 6470 { 6471 addr_t address = (addr_t)_address; 6472 size = PAGE_ALIGN(size); 6473 6474 // check params 6475 if ((address % B_PAGE_SIZE) != 0) 6476 return B_BAD_VALUE; 6477 if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address) 6478 || !IS_USER_ADDRESS((addr_t)address + size)) { 6479 // weird error code required by POSIX 6480 return ENOMEM; 6481 } 6482 6483 bool writeSync = (flags & MS_SYNC) != 0; 6484 bool writeAsync = (flags & MS_ASYNC) != 0; 6485 if (writeSync && writeAsync) 6486 return B_BAD_VALUE; 6487 6488 if (size == 0 || (!writeSync && !writeAsync)) 6489 return B_OK; 6490 6491 // iterate through the range and sync all concerned areas 6492 while (size > 0) { 6493 // read lock the address space 6494 AddressSpaceReadLocker locker; 6495 status_t error = locker.SetTo(team_get_current_team_id()); 6496 if (error != B_OK) 6497 return error; 6498 6499 // get the first area 6500 vm_area* area = vm_area_lookup(locker.AddressSpace(), address); 6501 if (area == NULL) 6502 return B_NO_MEMORY; 6503 6504 uint32 offset = address - area->base; 6505 size_t rangeSize = min_c(area->size - offset, size); 6506 offset += area->cache_offset; 6507 6508 // lock the cache 6509 AreaCacheLocker cacheLocker(area); 6510 if (!cacheLocker) 6511 return B_BAD_VALUE; 6512 vm_cache* cache = area->cache; 6513 6514 locker.Unlock(); 6515 6516 uint32 firstPage = offset >> PAGE_SHIFT; 6517 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6518 6519 // write the pages 6520 if (cache->type == CACHE_TYPE_VNODE) { 6521 if (writeSync) { 6522 // synchronous 6523 error = vm_page_write_modified_page_range(cache, firstPage, 6524 endPage); 6525 if (error != B_OK) 6526 return error; 6527 } else { 6528 // asynchronous 6529 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6530 // TODO: This is probably not quite what is supposed to happen. 6531 // Especially when a lot has to be written, it might take ages 6532 // until it really hits the disk. 6533 } 6534 } 6535 6536 address += rangeSize; 6537 size -= rangeSize; 6538 } 6539 6540 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6541 // synchronize multiple mappings of the same file. In our VM they never get 6542 // out of sync, though, so we don't have to do anything. 6543 6544 return B_OK; 6545 } 6546 6547 6548 status_t 6549 _user_memory_advice(void* address, size_t size, int advice) 6550 { 6551 // TODO: Implement! 6552 return B_OK; 6553 } 6554