1 /* 2 * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 10 #include "vm_store_anonymous_noswap.h" 11 #include "vm_store_device.h" 12 #include "vm_store_null.h" 13 14 #include <OS.h> 15 #include <KernelExport.h> 16 17 #include <vm.h> 18 #include <vm_address_space.h> 19 #include <vm_priv.h> 20 #include <vm_page.h> 21 #include <vm_cache.h> 22 #include <vm_low_memory.h> 23 #include <file_cache.h> 24 #include <memheap.h> 25 #include <debug.h> 26 #include <console.h> 27 #include <int.h> 28 #include <smp.h> 29 #include <lock.h> 30 #include <thread.h> 31 #include <team.h> 32 33 #include <boot/stage2.h> 34 #include <boot/elf.h> 35 36 #include <arch/cpu.h> 37 #include <arch/vm.h> 38 39 #include <string.h> 40 #include <ctype.h> 41 #include <stdlib.h> 42 #include <stdio.h> 43 44 //#define TRACE_VM 45 //#define TRACE_FAULTS 46 #ifdef TRACE_VM 47 # define TRACE(x) dprintf x 48 #else 49 # define TRACE(x) ; 50 #endif 51 #ifdef TRACE_FAULTS 52 # define FTRACE(x) dprintf x 53 #else 54 # define FTRACE(x) ; 55 #endif 56 57 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1)) 58 #define ROUNDOWN(a, b) (((a) / (b)) * (b)) 59 60 61 #define REGION_HASH_TABLE_SIZE 1024 62 static area_id sNextAreaID; 63 static hash_table *sAreaHash; 64 static sem_id sAreaHashLock; 65 static spinlock sMappingLock; 66 67 static off_t sAvailableMemory; 68 static benaphore sAvailableMemoryLock; 69 70 // function declarations 71 static status_t vm_soft_fault(addr_t address, bool is_write, bool is_user); 72 static bool vm_put_area(vm_area *area); 73 74 75 static int 76 area_compare(void *_area, const void *key) 77 { 78 vm_area *area = (vm_area *)_area; 79 const area_id *id = (const area_id *)key; 80 81 if (area->id == *id) 82 return 0; 83 84 return -1; 85 } 86 87 88 static uint32 89 area_hash(void *_area, const void *key, uint32 range) 90 { 91 vm_area *area = (vm_area *)_area; 92 const area_id *id = (const area_id *)key; 93 94 if (area != NULL) 95 return area->id % range; 96 97 return (uint32)*id % range; 98 } 99 100 101 static vm_area * 102 vm_get_area(area_id id) 103 { 104 acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0); 105 106 vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id); 107 if (area != NULL) 108 atomic_add(&area->ref_count, 1); 109 110 release_sem_etc(sAreaHashLock, READ_COUNT, 0); 111 112 return area; 113 } 114 115 116 static vm_area * 117 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags) 118 { 119 vm_area *reserved = (vm_area *)malloc(sizeof(vm_area)); 120 if (reserved == NULL) 121 return NULL; 122 123 memset(reserved, 0, sizeof(vm_area)); 124 reserved->id = RESERVED_AREA_ID; 125 // this marks it as reserved space 126 reserved->protection = flags; 127 reserved->address_space = addressSpace; 128 129 return reserved; 130 } 131 132 133 static vm_area * 134 create_area_struct(vm_address_space *addressSpace, const char *name, 135 uint32 wiring, uint32 protection) 136 { 137 // restrict the area name to B_OS_NAME_LENGTH 138 size_t length = strlen(name) + 1; 139 if (length > B_OS_NAME_LENGTH) 140 length = B_OS_NAME_LENGTH; 141 142 vm_area *area = (vm_area *)malloc(sizeof(vm_area)); 143 if (area == NULL) 144 return NULL; 145 146 area->name = (char *)malloc(length); 147 if (area->name == NULL) { 148 free(area); 149 return NULL; 150 } 151 strlcpy(area->name, name, length); 152 153 area->id = atomic_add(&sNextAreaID, 1); 154 area->base = 0; 155 area->size = 0; 156 area->protection = protection; 157 area->wiring = wiring; 158 area->memory_type = 0; 159 area->ref_count = 1; 160 161 area->cache_ref = NULL; 162 area->cache_offset = 0; 163 164 area->address_space = addressSpace; 165 area->address_space_next = NULL; 166 area->cache_next = area->cache_prev = NULL; 167 area->hash_next = NULL; 168 new (&area->mappings) vm_area_mappings; 169 170 return area; 171 } 172 173 174 /** Finds a reserved area that covers the region spanned by \a start and 175 * \a size, inserts the \a area into that region and makes sure that 176 * there are reserved regions for the remaining parts. 177 */ 178 179 static status_t 180 find_reserved_area(vm_address_space *addressSpace, addr_t start, 181 addr_t size, vm_area *area) 182 { 183 vm_area *next, *last = NULL; 184 185 next = addressSpace->areas; 186 while (next) { 187 if (next->base <= start && next->base + next->size >= start + size) { 188 // this area covers the requested range 189 if (next->id != RESERVED_AREA_ID) { 190 // but it's not reserved space, it's a real area 191 return B_BAD_VALUE; 192 } 193 194 break; 195 } 196 last = next; 197 next = next->address_space_next; 198 } 199 if (next == NULL) 200 return B_ENTRY_NOT_FOUND; 201 202 // now we have to transfer the requested part of the reserved 203 // range to the new area - and remove, resize or split the old 204 // reserved area. 205 206 if (start == next->base) { 207 // the area starts at the beginning of the reserved range 208 if (last) 209 last->address_space_next = area; 210 else 211 addressSpace->areas = area; 212 213 if (size == next->size) { 214 // the new area fully covers the reversed range 215 area->address_space_next = next->address_space_next; 216 free(next); 217 } else { 218 // resize the reserved range behind the area 219 area->address_space_next = next; 220 next->base += size; 221 next->size -= size; 222 } 223 } else if (start + size == next->base + next->size) { 224 // the area is at the end of the reserved range 225 area->address_space_next = next->address_space_next; 226 next->address_space_next = area; 227 228 // resize the reserved range before the area 229 next->size = start - next->base; 230 } else { 231 // the area splits the reserved range into two separate ones 232 // we need a new reserved area to cover this space 233 vm_area *reserved = create_reserved_area_struct(addressSpace, 234 next->protection); 235 if (reserved == NULL) 236 return B_NO_MEMORY; 237 238 reserved->address_space_next = next->address_space_next; 239 area->address_space_next = reserved; 240 next->address_space_next = area; 241 242 // resize regions 243 reserved->size = next->base + next->size - start - size; 244 next->size = start - next->base; 245 reserved->base = start + size; 246 reserved->cache_offset = next->cache_offset; 247 } 248 249 area->base = start; 250 area->size = size; 251 addressSpace->change_count++; 252 253 return B_OK; 254 } 255 256 257 /*! Must be called with this address space's sem held */ 258 static status_t 259 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start, 260 addr_t size, addr_t end, uint32 addressSpec, vm_area *area) 261 { 262 vm_area *last = NULL; 263 vm_area *next; 264 bool foundSpot = false; 265 266 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 267 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 268 size, end, addressSpec, area)); 269 270 // do some sanity checking 271 if (start < addressSpace->base || size == 0 272 || (end - 1) > (addressSpace->base + (addressSpace->size - 1)) 273 || start + size > end) 274 return B_BAD_ADDRESS; 275 276 if (addressSpec == B_EXACT_ADDRESS) { 277 // search for a reserved area 278 status_t status = find_reserved_area(addressSpace, start, size, area); 279 if (status == B_OK || status == B_BAD_VALUE) 280 return status; 281 282 // there was no reserved area, and the slot doesn't seem to be used already 283 // ToDo: this could be further optimized. 284 } 285 286 // walk up to the spot where we should start searching 287 second_chance: 288 next = addressSpace->areas; 289 while (next) { 290 if (next->base >= start + size) { 291 // we have a winner 292 break; 293 } 294 last = next; 295 next = next->address_space_next; 296 } 297 298 // find the right spot depending on the address specification - the area 299 // will be inserted directly after "last" ("next" is not referenced anymore) 300 301 switch (addressSpec) { 302 case B_ANY_ADDRESS: 303 case B_ANY_KERNEL_ADDRESS: 304 case B_ANY_KERNEL_BLOCK_ADDRESS: 305 // find a hole big enough for a new area 306 if (!last) { 307 // see if we can build it at the beginning of the virtual map 308 if (!next || (next->base >= addressSpace->base + size)) { 309 foundSpot = true; 310 area->base = addressSpace->base; 311 break; 312 } 313 last = next; 314 next = next->address_space_next; 315 } 316 // keep walking 317 while (next) { 318 if (next->base >= last->base + last->size + size) { 319 // we found a spot (it'll be filled up below) 320 break; 321 } 322 last = next; 323 next = next->address_space_next; 324 } 325 326 if ((addressSpace->base + (addressSpace->size - 1)) 327 >= (last->base + last->size + (size - 1))) { 328 // got a spot 329 foundSpot = true; 330 area->base = last->base + last->size; 331 break; 332 } else { 333 // we didn't find a free spot - if there were any reserved areas with 334 // the RESERVED_AVOID_BASE flag set, we can now test those for free 335 // space 336 // ToDo: it would make sense to start with the biggest of them 337 next = addressSpace->areas; 338 last = NULL; 339 for (last = NULL; next; next = next->address_space_next, last = next) { 340 // ToDo: take free space after the reserved area into account! 341 if (next->size == size) { 342 // the reserved area is entirely covered, and thus, removed 343 if (last) 344 last->address_space_next = next->address_space_next; 345 else 346 addressSpace->areas = next->address_space_next; 347 348 foundSpot = true; 349 area->base = next->base; 350 free(next); 351 break; 352 } 353 if (next->size >= size) { 354 // the new area will be placed at the end of the reserved 355 // area, and the reserved area will be resized to make space 356 foundSpot = true; 357 next->size -= size; 358 last = next; 359 area->base = next->base + next->size; 360 break; 361 } 362 } 363 } 364 break; 365 366 case B_BASE_ADDRESS: 367 // find a hole big enough for a new area beginning with "start" 368 if (!last) { 369 // see if we can build it at the beginning of the specified start 370 if (!next || (next->base >= start + size)) { 371 foundSpot = true; 372 area->base = start; 373 break; 374 } 375 last = next; 376 next = next->address_space_next; 377 } 378 // keep walking 379 while (next) { 380 if (next->base >= last->base + last->size + size) { 381 // we found a spot (it'll be filled up below) 382 break; 383 } 384 last = next; 385 next = next->address_space_next; 386 } 387 388 if ((addressSpace->base + (addressSpace->size - 1)) 389 >= (last->base + last->size + (size - 1))) { 390 // got a spot 391 foundSpot = true; 392 if (last->base + last->size <= start) 393 area->base = start; 394 else 395 area->base = last->base + last->size; 396 break; 397 } 398 // we didn't find a free spot in the requested range, so we'll 399 // try again without any restrictions 400 start = addressSpace->base; 401 addressSpec = B_ANY_ADDRESS; 402 last = NULL; 403 goto second_chance; 404 405 case B_EXACT_ADDRESS: 406 // see if we can create it exactly here 407 if (!last) { 408 if (!next || (next->base >= start + size)) { 409 foundSpot = true; 410 area->base = start; 411 break; 412 } 413 } else { 414 if (next) { 415 if (last->base + last->size <= start && next->base >= start + size) { 416 foundSpot = true; 417 area->base = start; 418 break; 419 } 420 } else { 421 if ((last->base + (last->size - 1)) <= start - 1) { 422 foundSpot = true; 423 area->base = start; 424 } 425 } 426 } 427 break; 428 default: 429 return B_BAD_VALUE; 430 } 431 432 if (!foundSpot) 433 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 434 435 area->size = size; 436 if (last) { 437 area->address_space_next = last->address_space_next; 438 last->address_space_next = area; 439 } else { 440 area->address_space_next = addressSpace->areas; 441 addressSpace->areas = area; 442 } 443 addressSpace->change_count++; 444 return B_OK; 445 } 446 447 448 /** This inserts the area you pass into the specified address space. 449 * It will also set the "_address" argument to its base address when 450 * the call succeeds. 451 * You need to hold the vm_address_space semaphore. 452 */ 453 454 static status_t 455 insert_area(vm_address_space *addressSpace, void **_address, 456 uint32 addressSpec, addr_t size, vm_area *area) 457 { 458 addr_t searchBase, searchEnd; 459 status_t status; 460 461 switch (addressSpec) { 462 case B_EXACT_ADDRESS: 463 searchBase = (addr_t)*_address; 464 searchEnd = (addr_t)*_address + size; 465 break; 466 467 case B_BASE_ADDRESS: 468 searchBase = (addr_t)*_address; 469 searchEnd = addressSpace->base + (addressSpace->size - 1); 470 break; 471 472 case B_ANY_ADDRESS: 473 case B_ANY_KERNEL_ADDRESS: 474 case B_ANY_KERNEL_BLOCK_ADDRESS: 475 searchBase = addressSpace->base; 476 searchEnd = addressSpace->base + (addressSpace->size - 1); 477 break; 478 479 default: 480 return B_BAD_VALUE; 481 } 482 483 status = find_and_insert_area_slot(addressSpace, searchBase, size, 484 searchEnd, addressSpec, area); 485 if (status == B_OK) { 486 // ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS 487 // vs. B_ANY_KERNEL_BLOCK_ADDRESS here? 488 *_address = (void *)area->base; 489 } 490 491 return status; 492 } 493 494 495 static status_t 496 map_backing_store(vm_address_space *addressSpace, vm_cache_ref *cacheRef, 497 void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 498 int wiring, int protection, int mapping, vm_area **_area, const char *areaName) 499 { 500 TRACE(("map_backing_store: aspace %p, cacheref %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n", 501 addressSpace, cacheRef, *_virtualAddress, offset, size, addressSpec, 502 wiring, protection, _area, areaName)); 503 504 vm_area *area = create_area_struct(addressSpace, areaName, wiring, protection); 505 if (area == NULL) 506 return B_NO_MEMORY; 507 508 mutex_lock(&cacheRef->lock); 509 510 vm_cache *cache = cacheRef->cache; 511 vm_store *store = cache->store; 512 bool unlock = true; 513 status_t status; 514 515 // if this is a private map, we need to create a new cache & store object 516 // pair to handle the private copies of pages as they are written to 517 if (mapping == REGION_PRIVATE_MAP) { 518 vm_cache_ref *newCacheRef; 519 vm_cache *newCache; 520 vm_store *newStore; 521 522 // create an anonymous store object 523 newStore = vm_store_create_anonymous_noswap((protection & B_STACK_AREA) != 0, 524 0, USER_STACK_GUARD_PAGES); 525 if (newStore == NULL) { 526 status = B_NO_MEMORY; 527 goto err1; 528 } 529 newCache = vm_cache_create(newStore); 530 if (newCache == NULL) { 531 status = B_NO_MEMORY; 532 newStore->ops->destroy(newStore); 533 goto err1; 534 } 535 status = vm_cache_ref_create(newCache, false); 536 if (status < B_OK) { 537 newStore->ops->destroy(newStore); 538 free(newCache); 539 goto err1; 540 } 541 542 newCacheRef = newCache->ref; 543 newCache->type = CACHE_TYPE_RAM; 544 newCache->temporary = 1; 545 newCache->scan_skip = cache->scan_skip; 546 547 vm_cache_add_consumer_locked(cacheRef, newCache); 548 549 mutex_unlock(&cacheRef->lock); 550 mutex_lock(&newCacheRef->lock); 551 552 cache = newCache; 553 cacheRef = newCache->ref; 554 store = newStore; 555 cache->virtual_base = offset; 556 cache->virtual_size = offset + size; 557 } 558 559 status = vm_cache_set_minimal_commitment_locked(cacheRef, offset + size); 560 if (status != B_OK) 561 goto err2; 562 563 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 564 565 // check to see if this address space has entered DELETE state 566 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 567 // okay, someone is trying to delete this address space now, so we can't 568 // insert the area, so back out 569 status = B_BAD_TEAM_ID; 570 goto err3; 571 } 572 573 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 574 if (status < B_OK) 575 goto err3; 576 577 // attach the cache to the area 578 area->cache_ref = cacheRef; 579 area->cache_offset = offset; 580 581 // point the cache back to the area 582 vm_cache_insert_area_locked(cacheRef, area); 583 mutex_unlock(&cacheRef->lock); 584 585 // insert the area in the global area hash table 586 acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0); 587 hash_insert(sAreaHash, area); 588 release_sem_etc(sAreaHashLock, WRITE_COUNT, 0); 589 590 // grab a ref to the address space (the area holds this) 591 atomic_add(&addressSpace->ref_count, 1); 592 593 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 594 595 *_area = area; 596 return B_OK; 597 598 err3: 599 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 600 err2: 601 if (mapping == REGION_PRIVATE_MAP) { 602 // we created this cache, so we must delete it again 603 mutex_unlock(&cacheRef->lock); 604 vm_cache_release_ref(cacheRef); 605 unlock = false; 606 } 607 err1: 608 if (unlock) 609 mutex_unlock(&cacheRef->lock); 610 free(area->name); 611 free(area); 612 return status; 613 } 614 615 616 status_t 617 vm_unreserve_address_range(team_id team, void *address, addr_t size) 618 { 619 vm_address_space *addressSpace; 620 vm_area *area, *last = NULL; 621 status_t status = B_OK; 622 623 addressSpace = vm_get_address_space_by_id(team); 624 if (addressSpace == NULL) 625 return B_BAD_TEAM_ID; 626 627 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 628 629 // check to see if this address space has entered DELETE state 630 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 631 // okay, someone is trying to delete this address space now, so we can't 632 // insert the area, so back out 633 status = B_BAD_TEAM_ID; 634 goto out; 635 } 636 637 // search area list and remove any matching reserved ranges 638 639 area = addressSpace->areas; 640 while (area) { 641 // the area must be completely part of the reserved range 642 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 643 && area->base + area->size <= (addr_t)address + size) { 644 // remove reserved range 645 vm_area *reserved = area; 646 if (last) 647 last->address_space_next = reserved->address_space_next; 648 else 649 addressSpace->areas = reserved->address_space_next; 650 651 area = reserved->address_space_next; 652 free(reserved); 653 continue; 654 } 655 656 last = area; 657 area = area->address_space_next; 658 } 659 660 out: 661 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 662 vm_put_address_space(addressSpace); 663 return status; 664 } 665 666 667 status_t 668 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec, 669 addr_t size, uint32 flags) 670 { 671 vm_address_space *addressSpace; 672 vm_area *area; 673 status_t status = B_OK; 674 675 if (size == 0) 676 return B_BAD_VALUE; 677 678 addressSpace = vm_get_address_space_by_id(team); 679 if (addressSpace == NULL) 680 return B_BAD_TEAM_ID; 681 682 area = create_reserved_area_struct(addressSpace, flags); 683 if (area == NULL) { 684 status = B_NO_MEMORY; 685 goto err1; 686 } 687 688 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 689 690 // check to see if this address space has entered DELETE state 691 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 692 // okay, someone is trying to delete this address space now, so we can't 693 // insert the area, let's back out 694 status = B_BAD_TEAM_ID; 695 goto err2; 696 } 697 698 status = insert_area(addressSpace, _address, addressSpec, size, area); 699 if (status < B_OK) 700 goto err2; 701 702 // the area is now reserved! 703 704 area->cache_offset = area->base; 705 // we cache the original base address here 706 707 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 708 return B_OK; 709 710 err2: 711 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 712 free(area); 713 err1: 714 vm_put_address_space(addressSpace); 715 return status; 716 } 717 718 719 area_id 720 vm_create_anonymous_area(team_id aid, const char *name, void **address, 721 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection) 722 { 723 vm_cache_ref *cacheRef; 724 vm_area *area; 725 vm_cache *cache; 726 vm_store *store; 727 vm_page *page = NULL; 728 bool isStack = (protection & B_STACK_AREA) != 0; 729 bool canOvercommit = false; 730 status_t status; 731 732 TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size)); 733 734 if (size == 0) 735 return B_BAD_VALUE; 736 if (!arch_vm_supports_protection(protection)) 737 return B_NOT_SUPPORTED; 738 739 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 740 canOvercommit = true; 741 742 #ifdef DEBUG_KERNEL_STACKS 743 if ((protection & B_KERNEL_STACK_AREA) != 0) 744 isStack = true; 745 #endif 746 747 /* check parameters */ 748 switch (addressSpec) { 749 case B_ANY_ADDRESS: 750 case B_EXACT_ADDRESS: 751 case B_BASE_ADDRESS: 752 case B_ANY_KERNEL_ADDRESS: 753 break; 754 755 default: 756 return B_BAD_VALUE; 757 } 758 759 switch (wiring) { 760 case B_NO_LOCK: 761 case B_FULL_LOCK: 762 case B_LAZY_LOCK: 763 case B_CONTIGUOUS: 764 case B_ALREADY_WIRED: 765 break; 766 case B_LOMEM: 767 //case B_SLOWMEM: 768 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 769 wiring = B_FULL_LOCK; 770 break; 771 default: 772 return B_BAD_VALUE; 773 } 774 775 vm_address_space *addressSpace = vm_get_address_space_by_id(aid); 776 if (addressSpace == NULL) 777 return B_BAD_TEAM_ID; 778 779 size = PAGE_ALIGN(size); 780 781 if (wiring == B_CONTIGUOUS) { 782 // we try to allocate the page run here upfront as this may easily 783 // fail for obvious reasons 784 page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE); 785 if (page == NULL) { 786 vm_put_address_space(addressSpace); 787 return B_NO_MEMORY; 788 } 789 } 790 791 // create an anonymous store object 792 // if it's a stack, make sure that two pages are available at least 793 store = vm_store_create_anonymous_noswap(canOvercommit, isStack ? 2 : 0, 794 isStack ? ((protection & B_USER_PROTECTION) != 0 ? 795 USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0); 796 if (store == NULL) { 797 status = B_NO_MEMORY; 798 goto err1; 799 } 800 cache = vm_cache_create(store); 801 if (cache == NULL) { 802 status = B_NO_MEMORY; 803 goto err2; 804 } 805 status = vm_cache_ref_create(cache, false); 806 if (status < B_OK) 807 goto err3; 808 809 cache->temporary = 1; 810 cache->type = CACHE_TYPE_RAM; 811 cache->virtual_size = size; 812 813 switch (wiring) { 814 case B_LAZY_LOCK: 815 case B_FULL_LOCK: 816 case B_CONTIGUOUS: 817 case B_ALREADY_WIRED: 818 cache->scan_skip = 1; 819 break; 820 case B_NO_LOCK: 821 cache->scan_skip = 0; 822 break; 823 } 824 825 cacheRef = cache->ref; 826 827 status = map_backing_store(addressSpace, cacheRef, address, 0, size, 828 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name); 829 if (status < B_OK) { 830 vm_cache_release_ref(cacheRef); 831 goto err1; 832 } 833 834 switch (wiring) { 835 case B_NO_LOCK: 836 case B_LAZY_LOCK: 837 // do nothing - the pages are mapped in as needed 838 break; 839 840 case B_FULL_LOCK: 841 { 842 // Allocate and map all pages for this area 843 mutex_lock(&cacheRef->lock); 844 845 off_t offset = 0; 846 for (addr_t address = area->base; address < area->base + (area->size - 1); 847 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 848 #ifdef DEBUG_KERNEL_STACKS 849 # ifdef STACK_GROWS_DOWNWARDS 850 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 851 * B_PAGE_SIZE) 852 # else 853 if (isStack && address >= area->base + area->size 854 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 855 # endif 856 continue; 857 #endif 858 vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR); 859 if (page == NULL) { 860 // this shouldn't really happen, as we reserve the memory upfront 861 panic("couldn't fulfill B_FULL lock!"); 862 } 863 864 vm_cache_insert_page(cacheRef, page, offset); 865 vm_map_page(area, page, address, protection); 866 } 867 868 mutex_unlock(&cacheRef->lock); 869 break; 870 } 871 872 case B_ALREADY_WIRED: 873 { 874 // the pages should already be mapped. This is only really useful during 875 // boot time. Find the appropriate vm_page objects and stick them in 876 // the cache object. 877 vm_translation_map *map = &addressSpace->translation_map; 878 off_t offset = 0; 879 880 if (!kernel_startup) 881 panic("ALREADY_WIRED flag used outside kernel startup\n"); 882 883 mutex_lock(&cacheRef->lock); 884 map->ops->lock(map); 885 886 for (addr_t virtualAddress = area->base; virtualAddress < area->base 887 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 888 offset += B_PAGE_SIZE) { 889 addr_t physicalAddress; 890 uint32 flags; 891 status = map->ops->query(map, virtualAddress, 892 &physicalAddress, &flags); 893 if (status < B_OK) { 894 panic("looking up mapping failed for va 0x%lx\n", 895 virtualAddress); 896 } 897 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 898 if (page == NULL) { 899 panic("looking up page failed for pa 0x%lx\n", 900 physicalAddress); 901 } 902 903 page->wired_count++; 904 // TODO: needs to be atomic on all platforms! 905 vm_page_set_state(page, PAGE_STATE_WIRED); 906 vm_cache_insert_page(cacheRef, page, offset); 907 } 908 909 map->ops->unlock(map); 910 mutex_unlock(&cacheRef->lock); 911 break; 912 } 913 914 case B_CONTIGUOUS: 915 { 916 // We have already allocated our continuous pages run, so we can now just 917 // map them in the address space 918 vm_translation_map *map = &addressSpace->translation_map; 919 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 920 addr_t virtualAddress; 921 off_t offset = 0; 922 923 mutex_lock(&cacheRef->lock); 924 map->ops->lock(map); 925 926 for (virtualAddress = area->base; virtualAddress < area->base 927 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 928 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 929 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 930 if (page == NULL) 931 panic("couldn't lookup physical page just allocated\n"); 932 933 status = map->ops->map(map, virtualAddress, physicalAddress, 934 protection); 935 if (status < B_OK) 936 panic("couldn't map physical page in page run\n"); 937 938 page->wired_count++; 939 // TODO: needs to be atomic on all platforms! 940 vm_page_set_state(page, PAGE_STATE_WIRED); 941 vm_cache_insert_page(cacheRef, page, offset); 942 } 943 944 map->ops->unlock(map); 945 mutex_unlock(&cacheRef->lock); 946 break; 947 } 948 949 default: 950 break; 951 } 952 vm_put_address_space(addressSpace); 953 954 TRACE(("vm_create_anonymous_area: done\n")); 955 956 area->cache_type = CACHE_TYPE_RAM; 957 return area->id; 958 959 err3: 960 free(cache); 961 err2: 962 store->ops->destroy(store); 963 err1: 964 if (wiring == B_CONTIGUOUS) { 965 // we had reserved the area space upfront... 966 addr_t pageNumber = page->physical_page_number; 967 int32 i; 968 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 969 page = vm_lookup_page(pageNumber); 970 if (page == NULL) 971 panic("couldn't lookup physical page just allocated\n"); 972 973 vm_page_set_state(page, PAGE_STATE_FREE); 974 } 975 } 976 977 vm_put_address_space(addressSpace); 978 return status; 979 } 980 981 982 area_id 983 vm_map_physical_memory(team_id aspaceID, const char *name, void **_address, 984 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 985 { 986 vm_cache_ref *cacheRef; 987 vm_area *area; 988 vm_cache *cache; 989 vm_store *store; 990 addr_t mapOffset; 991 status_t status; 992 993 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, spec = %ld," 994 " size = %lu, protection = %ld, phys = %p)\n", 995 aspaceID, name, _address, addressSpec, size, protection, 996 (void *)physicalAddress)); 997 998 if (!arch_vm_supports_protection(protection)) 999 return B_NOT_SUPPORTED; 1000 1001 vm_address_space *addressSpace = vm_get_address_space_by_id(aspaceID); 1002 if (addressSpace == NULL) 1003 return B_BAD_TEAM_ID; 1004 1005 // if the physical address is somewhat inside a page, 1006 // move the actual area down to align on a page boundary 1007 mapOffset = physicalAddress % B_PAGE_SIZE; 1008 size += mapOffset; 1009 physicalAddress -= mapOffset; 1010 1011 size = PAGE_ALIGN(size); 1012 1013 // create an device store object 1014 1015 store = vm_store_create_device(physicalAddress); 1016 if (store == NULL) { 1017 status = B_NO_MEMORY; 1018 goto err1; 1019 } 1020 cache = vm_cache_create(store); 1021 if (cache == NULL) { 1022 status = B_NO_MEMORY; 1023 goto err2; 1024 } 1025 status = vm_cache_ref_create(cache, false); 1026 if (status < B_OK) 1027 goto err3; 1028 1029 // tell the page scanner to skip over this area, it's pages are special 1030 cache->scan_skip = 1; 1031 cache->type = CACHE_TYPE_DEVICE; 1032 cache->virtual_size = size; 1033 1034 cacheRef = cache->ref; 1035 1036 status = map_backing_store(addressSpace, cacheRef, _address, 0, size, 1037 addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 1038 REGION_NO_PRIVATE_MAP, &area, name); 1039 if (status < B_OK) 1040 vm_cache_release_ref(cacheRef); 1041 1042 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 1043 // set requested memory type 1044 status = arch_vm_set_memory_type(area, physicalAddress, 1045 addressSpec & B_MTR_MASK); 1046 if (status < B_OK) 1047 vm_put_area(area); 1048 } 1049 1050 if (status >= B_OK) { 1051 // make sure our area is mapped in completely 1052 1053 vm_translation_map *map = &addressSpace->translation_map; 1054 map->ops->lock(map); 1055 1056 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1057 map->ops->map(map, area->base + offset, physicalAddress + offset, 1058 protection); 1059 } 1060 1061 map->ops->unlock(map); 1062 } 1063 1064 vm_put_address_space(addressSpace); 1065 if (status < B_OK) 1066 return status; 1067 1068 // modify the pointer returned to be offset back into the new area 1069 // the same way the physical address in was offset 1070 *_address = (void *)((addr_t)*_address + mapOffset); 1071 1072 area->cache_type = CACHE_TYPE_DEVICE; 1073 return area->id; 1074 1075 err3: 1076 free(cache); 1077 err2: 1078 store->ops->destroy(store); 1079 err1: 1080 vm_put_address_space(addressSpace); 1081 return status; 1082 } 1083 1084 1085 area_id 1086 vm_create_null_area(team_id team, const char *name, void **address, 1087 uint32 addressSpec, addr_t size) 1088 { 1089 vm_area *area; 1090 vm_cache *cache; 1091 vm_cache_ref *cacheRef; 1092 vm_store *store; 1093 status_t status; 1094 1095 vm_address_space *addressSpace = vm_get_address_space_by_id(team); 1096 if (addressSpace == NULL) 1097 return B_BAD_TEAM_ID; 1098 1099 size = PAGE_ALIGN(size); 1100 1101 // create an null store object 1102 1103 store = vm_store_create_null(); 1104 if (store == NULL) { 1105 status = B_NO_MEMORY; 1106 goto err1; 1107 } 1108 cache = vm_cache_create(store); 1109 if (cache == NULL) { 1110 status = B_NO_MEMORY; 1111 goto err2; 1112 } 1113 status = vm_cache_ref_create(cache, false); 1114 if (status < B_OK) 1115 goto err3; 1116 1117 // tell the page scanner to skip over this area, no pages will be mapped here 1118 cache->scan_skip = 1; 1119 cache->type = CACHE_TYPE_NULL; 1120 cache->virtual_size = size; 1121 1122 cacheRef = cache->ref; 1123 1124 status = map_backing_store(addressSpace, cacheRef, address, 0, size, addressSpec, 0, 1125 B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name); 1126 1127 vm_put_address_space(addressSpace); 1128 1129 if (status < B_OK) { 1130 vm_cache_release_ref(cacheRef); 1131 return status; 1132 } 1133 1134 area->cache_type = CACHE_TYPE_NULL; 1135 return area->id; 1136 1137 err3: 1138 free(cache); 1139 err2: 1140 store->ops->destroy(store); 1141 err1: 1142 vm_put_address_space(addressSpace); 1143 return status; 1144 } 1145 1146 1147 /** Creates the vnode cache for the specified \a vnode. 1148 * The vnode has to be marked busy when calling this function. 1149 * If successful, it will also acquire an extra reference to 1150 * the vnode (as the vnode store itself can't do this 1151 * automatically). 1152 */ 1153 1154 status_t 1155 vm_create_vnode_cache(void *vnode, struct vm_cache_ref **_cacheRef) 1156 { 1157 status_t status; 1158 1159 // create a vnode store object 1160 vm_store *store = vm_create_vnode_store(vnode); 1161 if (store == NULL) 1162 return B_NO_MEMORY; 1163 1164 vm_cache *cache = vm_cache_create(store); 1165 if (cache == NULL) { 1166 status = B_NO_MEMORY; 1167 goto err1; 1168 } 1169 status = vm_cache_ref_create(cache, false); 1170 if (status < B_OK) 1171 goto err2; 1172 1173 cache->type = CACHE_TYPE_VNODE; 1174 1175 *_cacheRef = cache->ref; 1176 vfs_acquire_vnode(vnode); 1177 return B_OK; 1178 1179 err2: 1180 free(cache); 1181 err1: 1182 store->ops->destroy(store); 1183 return status; 1184 } 1185 1186 1187 /** Will map the file at the path specified by \a name to an area in memory. 1188 * The file will be mirrored beginning at the specified \a offset. The \a offset 1189 * and \a size arguments have to be page aligned. 1190 */ 1191 1192 static area_id 1193 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec, 1194 size_t size, uint32 protection, uint32 mapping, const char *path, 1195 off_t offset, bool kernel) 1196 { 1197 vm_cache_ref *cacheRef; 1198 vm_area *area; 1199 void *vnode; 1200 status_t status; 1201 1202 // ToDo: maybe attach to an FD, not a path (or both, like VFS calls) 1203 // ToDo: check file access permissions (would be already done if the above were true) 1204 // ToDo: for binary files, we want to make sure that they get the 1205 // copy of a file at a given time, ie. later changes should not 1206 // make it into the mapped copy -- this will need quite some changes 1207 // to be done in a nice way 1208 1209 vm_address_space *addressSpace = vm_get_address_space_by_id(team); 1210 if (addressSpace == NULL) 1211 return B_BAD_TEAM_ID; 1212 1213 TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n", 1214 path, offset, size, mapping)); 1215 1216 offset = ROUNDOWN(offset, B_PAGE_SIZE); 1217 size = PAGE_ALIGN(size); 1218 1219 // get the vnode for the object, this also grabs a ref to it 1220 status = vfs_get_vnode_from_path(path, kernel, &vnode); 1221 if (status < B_OK) 1222 goto err1; 1223 1224 // ToDo: this only works for file systems that use the file cache 1225 status = vfs_get_vnode_cache(vnode, &cacheRef, false); 1226 1227 vfs_put_vnode(vnode); 1228 // we don't need this vnode anymore - if the above call was 1229 // successful, the store already has a ref to it 1230 1231 if (status < B_OK) 1232 goto err1; 1233 1234 status = map_backing_store(addressSpace, cacheRef, _address, 1235 offset, size, addressSpec, 0, protection, mapping, &area, name); 1236 if (status < B_OK || mapping == REGION_PRIVATE_MAP) { 1237 // map_backing_store() cannot know we no longer need the ref 1238 vm_cache_release_ref(cacheRef); 1239 } 1240 if (status < B_OK) 1241 goto err1; 1242 1243 vm_put_address_space(addressSpace); 1244 area->cache_type = CACHE_TYPE_VNODE; 1245 return area->id; 1246 1247 err1: 1248 vm_put_address_space(addressSpace); 1249 return status; 1250 } 1251 1252 1253 area_id 1254 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec, 1255 addr_t size, uint32 protection, uint32 mapping, const char *path, off_t offset) 1256 { 1257 if (!arch_vm_supports_protection(protection)) 1258 return B_NOT_SUPPORTED; 1259 1260 return _vm_map_file(aid, name, address, addressSpec, size, protection, 1261 mapping, path, offset, true); 1262 } 1263 1264 1265 // ToDo: create a BeOS style call for this! 1266 1267 area_id 1268 _user_vm_map_file(const char *userName, void **userAddress, int addressSpec, 1269 addr_t size, int protection, int mapping, const char *userPath, off_t offset) 1270 { 1271 char name[B_OS_NAME_LENGTH]; 1272 char path[B_PATH_NAME_LENGTH]; 1273 void *address; 1274 area_id area; 1275 1276 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 1277 || !IS_USER_ADDRESS(userPath) 1278 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 1279 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK 1280 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 1281 return B_BAD_ADDRESS; 1282 1283 // userland created areas can always be accessed by the kernel 1284 protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1285 1286 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 1287 addressSpec, size, protection, mapping, path, offset, false); 1288 if (area < B_OK) 1289 return area; 1290 1291 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 1292 return B_BAD_ADDRESS; 1293 1294 return area; 1295 } 1296 1297 1298 area_id 1299 vm_clone_area(team_id team, const char *name, void **address, uint32 addressSpec, 1300 uint32 protection, uint32 mapping, area_id sourceID) 1301 { 1302 vm_area *newArea = NULL; 1303 vm_area *sourceArea; 1304 status_t status; 1305 1306 vm_address_space *addressSpace = vm_get_address_space_by_id(team); 1307 if (addressSpace == NULL) 1308 return B_BAD_TEAM_ID; 1309 1310 sourceArea = vm_get_area(sourceID); 1311 if (sourceArea == NULL) { 1312 vm_put_address_space(addressSpace); 1313 return B_BAD_VALUE; 1314 } 1315 1316 vm_cache_acquire_ref(sourceArea->cache_ref); 1317 1318 // ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers 1319 // have been adapted. Maybe it should be part of the kernel settings, 1320 // anyway (so that old drivers can always work). 1321 #if 0 1322 if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space() 1323 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 1324 // kernel areas must not be cloned in userland, unless explicitly 1325 // declared user-cloneable upon construction 1326 status = B_NOT_ALLOWED; 1327 } else 1328 #endif 1329 if (sourceArea->cache_type == CACHE_TYPE_NULL) 1330 status = B_NOT_ALLOWED; 1331 else { 1332 status = map_backing_store(addressSpace, sourceArea->cache_ref, 1333 address, sourceArea->cache_offset, sourceArea->size, addressSpec, 1334 sourceArea->wiring, protection, mapping, &newArea, name); 1335 } 1336 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 1337 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 1338 // to create a new ref, and has therefore already acquired a reference 1339 // to the source cache - but otherwise it has no idea that we need 1340 // one. 1341 vm_cache_acquire_ref(sourceArea->cache_ref); 1342 } 1343 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 1344 // we need to map in everything at this point 1345 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 1346 // we don't have actual pages to map but a physical area 1347 vm_translation_map *map = &sourceArea->address_space->translation_map; 1348 map->ops->lock(map); 1349 1350 addr_t physicalAddress; 1351 uint32 oldProtection; 1352 map->ops->query(map, sourceArea->base, &physicalAddress, 1353 &oldProtection); 1354 1355 map->ops->unlock(map); 1356 1357 map = &addressSpace->translation_map; 1358 map->ops->lock(map); 1359 1360 for (addr_t offset = 0; offset < newArea->size; 1361 offset += B_PAGE_SIZE) { 1362 map->ops->map(map, newArea->base + offset, 1363 physicalAddress + offset, protection); 1364 } 1365 1366 map->ops->unlock(map); 1367 } else { 1368 // map in all pages from source 1369 mutex_lock(&sourceArea->cache_ref->lock); 1370 1371 for (vm_page *page = sourceArea->cache_ref->cache->page_list; 1372 page != NULL; page = page->cache_next) { 1373 vm_map_page(newArea, page, newArea->base 1374 + ((page->cache_offset << PAGE_SHIFT) - newArea->cache_offset), 1375 protection); 1376 } 1377 1378 mutex_unlock(&sourceArea->cache_ref->lock); 1379 } 1380 } 1381 if (status == B_OK) 1382 newArea->cache_type = sourceArea->cache_type; 1383 1384 vm_cache_release_ref(sourceArea->cache_ref); 1385 1386 vm_put_area(sourceArea); 1387 vm_put_address_space(addressSpace); 1388 1389 if (status < B_OK) 1390 return status; 1391 1392 return newArea->id; 1393 } 1394 1395 1396 static status_t 1397 _vm_delete_area(vm_address_space *addressSpace, area_id id) 1398 { 1399 status_t status = B_OK; 1400 vm_area *area; 1401 1402 TRACE(("vm_delete_area: aspace id 0x%lx, area id 0x%lx\n", addressSpace->id, id)); 1403 1404 area = vm_get_area(id); 1405 if (area == NULL) 1406 return B_BAD_VALUE; 1407 1408 if (area->address_space == addressSpace) { 1409 vm_put_area(area); 1410 // next put below will actually delete it 1411 } else 1412 status = B_NOT_ALLOWED; 1413 1414 vm_put_area(area); 1415 return status; 1416 } 1417 1418 1419 status_t 1420 vm_delete_area(team_id team, area_id id) 1421 { 1422 vm_address_space *addressSpace; 1423 status_t err; 1424 1425 addressSpace = vm_get_address_space_by_id(team); 1426 if (addressSpace == NULL) 1427 return B_BAD_TEAM_ID; 1428 1429 err = _vm_delete_area(addressSpace, id); 1430 vm_put_address_space(addressSpace); 1431 return err; 1432 } 1433 1434 1435 static void 1436 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area, bool locked) 1437 { 1438 vm_area *temp, *last = NULL; 1439 1440 if (!locked) 1441 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 1442 1443 temp = addressSpace->areas; 1444 while (temp != NULL) { 1445 if (area == temp) { 1446 if (last != NULL) { 1447 last->address_space_next = temp->address_space_next; 1448 } else { 1449 addressSpace->areas = temp->address_space_next; 1450 } 1451 addressSpace->change_count++; 1452 break; 1453 } 1454 last = temp; 1455 temp = temp->address_space_next; 1456 } 1457 if (area == addressSpace->area_hint) 1458 addressSpace->area_hint = NULL; 1459 1460 if (!locked) 1461 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 1462 1463 if (temp == NULL) 1464 panic("vm_area_release_ref: area not found in aspace's area list\n"); 1465 } 1466 1467 1468 static bool 1469 _vm_put_area(vm_area *area, bool aspaceLocked) 1470 { 1471 vm_address_space *addressSpace; 1472 bool removeit = false; 1473 1474 TRACE(("_vm_put_area(area = %p, aspaceLocked = %s)\n", 1475 area, aspaceLocked ? "yes" : "no")); 1476 1477 // we should never get here, but if we do, we can handle it 1478 if (area->id == RESERVED_AREA_ID) 1479 return false; 1480 1481 addressSpace = area->address_space; 1482 1483 // grab a write lock on the address space around the removal of the area 1484 // from the global hash table to avoid a race with vm_soft_fault() 1485 if (!aspaceLocked) 1486 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 1487 1488 acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0); 1489 if (atomic_add(&area->ref_count, -1) == 1) { 1490 hash_remove(sAreaHash, area); 1491 removeit = true; 1492 } 1493 release_sem_etc(sAreaHashLock, WRITE_COUNT, 0); 1494 1495 if (!aspaceLocked) 1496 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 1497 1498 if (!removeit) 1499 return false; 1500 1501 // at this point the area is removed from the global hash table, but still 1502 // exists in the area list. it's ref_count is zero, and is guaranteed not to 1503 // be incremented anymore (by a direct hash lookup, or vm_area_lookup()). 1504 1505 // unmap the virtual address space the area occupied. any page faults at this 1506 // point should fail in vm_area_lookup(). 1507 vm_unmap_pages(area, area->base, area->size); 1508 1509 // ToDo: do that only for vnode stores 1510 vm_cache_write_modified(area->cache_ref, false); 1511 1512 arch_vm_unset_memory_type(area); 1513 remove_area_from_address_space(addressSpace, area, aspaceLocked); 1514 1515 vm_cache_remove_area(area->cache_ref, area); 1516 vm_cache_release_ref(area->cache_ref); 1517 1518 // now we can give up the area's reference to the address space 1519 vm_put_address_space(addressSpace); 1520 1521 free(area->name); 1522 free(area); 1523 return true; 1524 } 1525 1526 1527 static bool 1528 vm_put_area(vm_area *area) 1529 { 1530 return _vm_put_area(area, false); 1531 } 1532 1533 1534 static status_t 1535 vm_copy_on_write_area(vm_area *area) 1536 { 1537 vm_store *store; 1538 vm_cache *upperCache, *lowerCache; 1539 vm_cache_ref *upperCacheRef, *lowerCacheRef; 1540 vm_translation_map *map; 1541 vm_page *page; 1542 uint32 protection; 1543 status_t status; 1544 1545 TRACE(("vm_copy_on_write_area(area = %p)\n", area)); 1546 1547 // We need to separate the vm_cache from its vm_cache_ref: the area 1548 // and its cache_ref goes into a new layer on top of the old one. 1549 // So the old cache gets a new cache_ref and the area a new cache. 1550 1551 upperCacheRef = area->cache_ref; 1552 1553 // we will exchange the cache_ref's cache, so we better hold its lock 1554 mutex_lock(&upperCacheRef->lock); 1555 1556 lowerCache = upperCacheRef->cache; 1557 1558 // create an anonymous store object 1559 store = vm_store_create_anonymous_noswap(false, 0, 0); 1560 if (store == NULL) { 1561 status = B_NO_MEMORY; 1562 goto err1; 1563 } 1564 1565 upperCache = vm_cache_create(store); 1566 if (upperCache == NULL) { 1567 status = B_NO_MEMORY; 1568 goto err2; 1569 } 1570 1571 // we need to hold the cache_ref lock when we want to switch its cache 1572 status = vm_cache_ref_create(lowerCache, true); 1573 if (status < B_OK) 1574 goto err3; 1575 1576 lowerCacheRef = lowerCache->ref; 1577 1578 // The area must be readable in the same way it was previously writable 1579 protection = B_KERNEL_READ_AREA; 1580 if (area->protection & B_READ_AREA) 1581 protection |= B_READ_AREA; 1582 1583 upperCache->type = CACHE_TYPE_RAM; 1584 upperCache->temporary = 1; 1585 upperCache->scan_skip = lowerCache->scan_skip; 1586 upperCache->virtual_base = lowerCache->virtual_base; 1587 upperCache->virtual_size = lowerCache->virtual_size; 1588 1589 upperCache->ref = upperCacheRef; 1590 upperCacheRef->cache = upperCache; 1591 1592 // we need to manually alter the ref_count (divide it between the two) 1593 // the lower cache_ref has only known refs, so compute them 1594 { 1595 int32 count = 0; 1596 vm_cache *consumer = NULL; 1597 while ((consumer = (vm_cache *)list_get_next_item( 1598 &lowerCache->consumers, consumer)) != NULL) { 1599 count++; 1600 } 1601 1602 atomic_add(&lowerCacheRef->ref_count, count); 1603 atomic_add(&upperCacheRef->ref_count, -count); 1604 } 1605 1606 vm_cache_add_consumer_locked(lowerCacheRef, upperCache); 1607 1608 // We now need to remap all pages from the area read-only, so that 1609 // a copy will be created on next write access 1610 1611 map = &area->address_space->translation_map; 1612 map->ops->lock(map); 1613 map->ops->unmap(map, area->base, area->base - 1 + area->size); 1614 map->ops->flush(map); 1615 1616 // TODO: does anything guarantee that we remap the same pages here? 1617 // Shouldn't we better introduce a "change mapping"? 1618 1619 for (page = lowerCache->page_list; page; page = page->cache_next) { 1620 map->ops->map(map, area->base + (page->cache_offset << PAGE_SHIFT) 1621 - area->cache_offset, page->physical_page_number << PAGE_SHIFT, 1622 protection); 1623 } 1624 1625 map->ops->unlock(map); 1626 1627 mutex_unlock(&lowerCacheRef->lock); 1628 mutex_unlock(&upperCacheRef->lock); 1629 1630 vm_cache_release_ref(lowerCacheRef); 1631 1632 return B_OK; 1633 1634 err3: 1635 free(upperCache); 1636 err2: 1637 store->ops->destroy(store); 1638 err1: 1639 mutex_unlock(&upperCacheRef->lock); 1640 return status; 1641 } 1642 1643 1644 area_id 1645 vm_copy_area(team_id addressSpaceID, const char *name, void **_address, uint32 addressSpec, 1646 uint32 protection, area_id sourceID) 1647 { 1648 vm_address_space *addressSpace; 1649 vm_cache_ref *cacheRef; 1650 vm_area *target, *source; 1651 status_t status; 1652 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 1653 1654 if ((protection & B_KERNEL_PROTECTION) == 0) { 1655 // set the same protection for the kernel as for userland 1656 protection |= B_KERNEL_READ_AREA; 1657 if (writableCopy) 1658 protection |= B_KERNEL_WRITE_AREA; 1659 } 1660 1661 if ((source = vm_get_area(sourceID)) == NULL) 1662 return B_BAD_VALUE; 1663 1664 addressSpace = vm_get_address_space_by_id(addressSpaceID); 1665 cacheRef = source->cache_ref; 1666 1667 if (addressSpec == B_CLONE_ADDRESS) { 1668 addressSpec = B_EXACT_ADDRESS; 1669 *_address = (void *)source->base; 1670 } 1671 1672 // First, create a cache on top of the source area 1673 1674 if (!writableCopy) { 1675 // map_backing_store() cannot know it has to acquire a ref to 1676 // the store for REGION_NO_PRIVATE_MAP 1677 vm_cache_acquire_ref(cacheRef); 1678 } 1679 1680 status = map_backing_store(addressSpace, cacheRef, _address, 1681 source->cache_offset, source->size, addressSpec, source->wiring, protection, 1682 writableCopy ? REGION_PRIVATE_MAP : REGION_NO_PRIVATE_MAP, 1683 &target, name); 1684 if (status < B_OK) { 1685 if (!writableCopy) 1686 vm_cache_release_ref(cacheRef); 1687 goto err; 1688 } 1689 1690 // If the source area is writable, we need to move it one layer up as well 1691 1692 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 1693 // ToDo: do something more useful if this fails! 1694 if (vm_copy_on_write_area(source) < B_OK) 1695 panic("vm_copy_on_write_area() failed!\n"); 1696 } 1697 1698 // we want to return the ID of the newly created area 1699 status = target->id; 1700 1701 err: 1702 vm_put_address_space(addressSpace); 1703 vm_put_area(source); 1704 1705 return status; 1706 } 1707 1708 1709 static int32 1710 count_writable_areas(vm_cache_ref *ref, vm_area *ignoreArea) 1711 { 1712 struct vm_area *area = ref->areas; 1713 uint32 count = 0; 1714 1715 for (; area != NULL; area = area->cache_next) { 1716 if (area != ignoreArea 1717 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 1718 count++; 1719 } 1720 1721 return count; 1722 } 1723 1724 1725 static status_t 1726 vm_set_area_protection(team_id aspaceID, area_id areaID, uint32 newProtection) 1727 { 1728 vm_cache_ref *cacheRef; 1729 vm_cache *cache; 1730 vm_area *area; 1731 status_t status = B_OK; 1732 1733 TRACE(("vm_set_area_protection(aspace = %#lx, area = %#lx, protection = %#lx)\n", 1734 aspaceID, areaID, newProtection)); 1735 1736 if (!arch_vm_supports_protection(newProtection)) 1737 return B_NOT_SUPPORTED; 1738 1739 area = vm_get_area(areaID); 1740 if (area == NULL) 1741 return B_BAD_VALUE; 1742 1743 if (aspaceID != vm_kernel_address_space_id() && area->address_space->id != aspaceID) { 1744 // unless you're the kernel, you are only allowed to set 1745 // the protection of your own areas 1746 vm_put_area(area); 1747 return B_NOT_ALLOWED; 1748 } 1749 1750 cacheRef = area->cache_ref; 1751 mutex_lock(&cacheRef->lock); 1752 1753 cache = cacheRef->cache; 1754 1755 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1756 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 1757 // change from read/write to read-only 1758 1759 if (cache->source != NULL && cache->temporary) { 1760 if (count_writable_areas(cacheRef, area) == 0) { 1761 // Since this cache now lives from the pages in its source cache, 1762 // we can change the cache's commitment to take only those pages 1763 // into account that really are in this cache. 1764 1765 // count existing pages in this cache 1766 struct vm_page *page = cache->page_list; 1767 uint32 count = 0; 1768 1769 for (; page != NULL; page = page->cache_next) { 1770 count++; 1771 } 1772 1773 status = cache->store->ops->commit(cache->store, 1774 cache->virtual_base + count * B_PAGE_SIZE); 1775 1776 // ToDo: we may be able to join with our source cache, if count == 0 1777 } 1778 } 1779 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 1780 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 1781 // change from read-only to read/write 1782 1783 // ToDo: if this is a shared cache, insert new cache (we only know about other 1784 // areas in this cache yet, though, not about child areas) 1785 // -> use this call with care, it might currently have unwanted consequences 1786 // because of this. It should always be safe though, if there are no other 1787 // (child) areas referencing this area's cache (you just might not know). 1788 if (count_writable_areas(cacheRef, area) == 0 1789 && (cacheRef->areas != area || area->cache_next)) { 1790 // ToDo: child areas are not tested for yet 1791 dprintf("set_area_protection(): warning, would need to insert a new cache_ref (not yet implemented)!\n"); 1792 status = B_NOT_ALLOWED; 1793 } else 1794 dprintf("set_area_protection() may not work correctly yet in this direction!\n"); 1795 1796 if (status == B_OK && cache->source != NULL && cache->temporary) { 1797 // the cache's commitment must contain all possible pages 1798 status = cache->store->ops->commit(cache->store, cache->virtual_size); 1799 } 1800 } else { 1801 // we don't have anything special to do in all other cases 1802 } 1803 1804 if (status == B_OK && area->protection != newProtection) { 1805 // remap existing pages in this cache 1806 struct vm_translation_map *map = &area->address_space->translation_map; 1807 1808 map->ops->lock(map); 1809 map->ops->protect(map, area->base, area->base + area->size, newProtection); 1810 map->ops->unlock(map); 1811 1812 area->protection = newProtection; 1813 } 1814 1815 mutex_unlock(&cacheRef->lock); 1816 vm_put_area(area); 1817 1818 return status; 1819 } 1820 1821 1822 status_t 1823 vm_get_page_mapping(team_id aid, addr_t vaddr, addr_t *paddr) 1824 { 1825 vm_address_space *addressSpace; 1826 uint32 null_flags; 1827 status_t err; 1828 1829 addressSpace = vm_get_address_space_by_id(aid); 1830 if (addressSpace == NULL) 1831 return B_BAD_TEAM_ID; 1832 1833 err = addressSpace->translation_map.ops->query(&addressSpace->translation_map, 1834 vaddr, paddr, &null_flags); 1835 1836 vm_put_address_space(addressSpace); 1837 return err; 1838 } 1839 1840 1841 int32 1842 vm_test_map_activation(vm_page *page) 1843 { 1844 int32 activation = 0; 1845 1846 // TODO: this can't work... (we need to lock the map, so this has to be a mutex) 1847 cpu_status state = disable_interrupts(); 1848 acquire_spinlock(&sMappingLock); 1849 1850 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 1851 vm_page_mapping *mapping; 1852 while ((mapping = iterator.Next()) != NULL) { 1853 vm_area *area = mapping->area; 1854 vm_translation_map *map = &area->address_space->translation_map; 1855 1856 addr_t physicalAddress; 1857 uint32 flags; 1858 // map->ops->lock(map); 1859 addr_t address = area->base + (page->cache_offset << PAGE_SHIFT); 1860 map->ops->query_interrupt(map, address, &physicalAddress, &flags); 1861 // map->ops->unlock(map); 1862 1863 if (flags & PAGE_ACCESSED) 1864 activation++; 1865 } 1866 1867 release_spinlock(&sMappingLock); 1868 restore_interrupts(state); 1869 1870 return activation; 1871 } 1872 1873 1874 void 1875 vm_clear_map_activation(vm_page *page) 1876 { 1877 // TODO: this can't work... (we need to lock the map, so this has to be a mutex) 1878 cpu_status state = disable_interrupts(); 1879 acquire_spinlock(&sMappingLock); 1880 1881 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 1882 vm_page_mapping *mapping; 1883 while ((mapping = iterator.Next()) != NULL) { 1884 vm_area *area = mapping->area; 1885 vm_translation_map *map = &area->address_space->translation_map; 1886 1887 // map->ops->lock(map); 1888 addr_t address = area->base + (page->cache_offset << PAGE_SHIFT); 1889 map->ops->clear_flags(map, address, PAGE_ACCESSED); 1890 // map->ops->unlock(map); 1891 } 1892 1893 release_spinlock(&sMappingLock); 1894 restore_interrupts(state); 1895 } 1896 1897 1898 void 1899 vm_remove_all_page_mappings(vm_page *page) 1900 { 1901 // TODO: this can't work... (we need to lock the map, so this has to be a mutex) 1902 cpu_status state = disable_interrupts(); 1903 acquire_spinlock(&sMappingLock); 1904 1905 vm_page_mappings queue; 1906 queue.MoveFrom(&page->mappings); 1907 1908 vm_page_mappings::Iterator iterator = queue.GetIterator(); 1909 vm_page_mapping *mapping; 1910 while ((mapping = iterator.Next()) != NULL) { 1911 vm_area *area = mapping->area; 1912 vm_translation_map *map = &area->address_space->translation_map; 1913 1914 // map->ops->lock(map); 1915 addr_t base = area->base + (page->cache_offset << PAGE_SHIFT); 1916 map->ops->unmap(map, base, base + (B_PAGE_SIZE - 1)); 1917 // map->ops->unlock(map); 1918 1919 area->mappings.Remove(mapping); 1920 } 1921 1922 release_spinlock(&sMappingLock); 1923 restore_interrupts(state); 1924 1925 // free now unused mappings 1926 1927 while ((mapping = queue.RemoveHead()) != NULL) { 1928 free(mapping); 1929 } 1930 } 1931 1932 1933 status_t 1934 vm_unmap_pages(vm_area *area, addr_t base, size_t size) 1935 { 1936 vm_translation_map *map = &area->address_space->translation_map; 1937 addr_t end = base + (size - 1); 1938 1939 map->ops->lock(map); 1940 1941 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 1942 // iterate through all pages and decrease their wired count 1943 for (addr_t virtualAddress = base; virtualAddress < end; 1944 virtualAddress += B_PAGE_SIZE) { 1945 addr_t physicalAddress; 1946 uint32 flags; 1947 status_t status = map->ops->query(map, virtualAddress, 1948 &physicalAddress, &flags); 1949 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 1950 continue; 1951 1952 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1953 if (page == NULL) { 1954 panic("area %p looking up page failed for pa 0x%lx\n", area, 1955 physicalAddress); 1956 } 1957 1958 page->wired_count--; 1959 // TODO: needs to be atomic on all platforms! 1960 } 1961 } 1962 1963 map->ops->unmap(map, base, end); 1964 1965 if (area->wiring == B_NO_LOCK) { 1966 uint32 startOffset = (area->cache_offset + base - area->base) 1967 >> PAGE_SHIFT; 1968 uint32 endOffset = startOffset + (size >> PAGE_SHIFT); 1969 vm_page_mapping *mapping; 1970 vm_area_mappings queue; 1971 1972 cpu_status state = disable_interrupts(); 1973 acquire_spinlock(&sMappingLock); 1974 1975 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 1976 while (iterator.HasNext()) { 1977 mapping = iterator.Next(); 1978 1979 vm_page *page = mapping->page; 1980 if (page->cache_offset < startOffset || page->cache_offset >= endOffset) 1981 continue; 1982 1983 mapping->page->mappings.Remove(mapping); 1984 iterator.Remove(); 1985 1986 queue.Add(mapping); 1987 } 1988 1989 release_spinlock(&sMappingLock); 1990 restore_interrupts(state); 1991 1992 while ((mapping = queue.RemoveHead()) != NULL) { 1993 free(mapping); 1994 } 1995 } 1996 1997 map->ops->unlock(map); 1998 return B_OK; 1999 } 2000 2001 2002 status_t 2003 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection) 2004 { 2005 vm_translation_map *map = &area->address_space->translation_map; 2006 vm_page_mapping *mapping = NULL; 2007 2008 if (area->wiring == B_NO_LOCK) { 2009 mapping = (vm_page_mapping *)malloc(sizeof(vm_page_mapping)); 2010 if (mapping == NULL) 2011 return B_NO_MEMORY; 2012 2013 mapping->page = page; 2014 mapping->area = area; 2015 } 2016 2017 map->ops->lock(map); 2018 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 2019 protection); 2020 2021 if (area->wiring != B_NO_LOCK) { 2022 page->wired_count++; 2023 // TODO: needs to be atomic on all platforms! 2024 } else { 2025 // insert mapping into lists 2026 cpu_status state = disable_interrupts(); 2027 acquire_spinlock(&sMappingLock); 2028 2029 page->mappings.Add(mapping); 2030 area->mappings.Add(mapping); 2031 2032 release_spinlock(&sMappingLock); 2033 restore_interrupts(state); 2034 } 2035 2036 map->ops->unlock(map); 2037 2038 vm_page_set_state(page, PAGE_STATE_ACTIVE); 2039 return B_OK; 2040 } 2041 2042 2043 static int 2044 display_mem(int argc, char **argv) 2045 { 2046 bool physical = false; 2047 addr_t copyAddress; 2048 int32 displayWidth; 2049 int32 itemSize; 2050 int32 num = -1; 2051 addr_t address; 2052 int i = 1, j; 2053 2054 if (argc > 1 && argv[1][0] == '-') { 2055 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2056 physical = true; 2057 i++; 2058 } else 2059 i = 99; 2060 } 2061 2062 if (argc < i + 1 || argc > i + 2) { 2063 kprintf("usage: dl/dw/ds/db [-p|--physical] <address> [num]\n" 2064 "\tdl - 8 bytes\n" 2065 "\tdw - 4 bytes\n" 2066 "\tds - 2 bytes\n" 2067 "\tdb - 1 byte\n" 2068 " -p or --physical only allows memory from a single page to be displayed.\n"); 2069 return 0; 2070 } 2071 2072 address = strtoul(argv[i], NULL, 0); 2073 2074 if (argc > i + 1) 2075 num = atoi(argv[i + 1]); 2076 2077 // build the format string 2078 if (strcmp(argv[0], "db") == 0) { 2079 itemSize = 1; 2080 displayWidth = 16; 2081 } else if (strcmp(argv[0], "ds") == 0) { 2082 itemSize = 2; 2083 displayWidth = 8; 2084 } else if (strcmp(argv[0], "dw") == 0) { 2085 itemSize = 4; 2086 displayWidth = 4; 2087 } else if (strcmp(argv[0], "dl") == 0) { 2088 itemSize = 8; 2089 displayWidth = 2; 2090 } else { 2091 kprintf("display_mem called in an invalid way!\n"); 2092 return 0; 2093 } 2094 2095 if (num <= 0) 2096 num = displayWidth; 2097 2098 if (physical) { 2099 int32 offset = address & (B_PAGE_SIZE - 1); 2100 if (num * itemSize + offset > B_PAGE_SIZE) { 2101 num = (B_PAGE_SIZE - offset) / itemSize; 2102 kprintf("NOTE: number of bytes has been cut to page size\n"); 2103 } 2104 2105 address = ROUNDOWN(address, B_PAGE_SIZE); 2106 2107 kernel_startup = true; 2108 // vm_get_physical_page() needs to lock... 2109 2110 if (vm_get_physical_page(address, ©Address, PHYSICAL_PAGE_NO_WAIT) != B_OK) { 2111 kprintf("getting the hardware page failed."); 2112 kernel_startup = false; 2113 return 0; 2114 } 2115 2116 kernel_startup = false; 2117 address += offset; 2118 copyAddress += offset; 2119 } else 2120 copyAddress = address; 2121 2122 for (i = 0; i < num; i++) { 2123 uint32 value; 2124 2125 if ((i % displayWidth) == 0) { 2126 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2127 if (i != 0) 2128 kprintf("\n"); 2129 2130 kprintf("[0x%lx] ", address + i * itemSize); 2131 2132 for (j = 0; j < displayed; j++) { 2133 char c; 2134 if (user_memcpy(&c, (char *)copyAddress + i * itemSize + j, 1) != B_OK) { 2135 displayed = j; 2136 break; 2137 } 2138 if (!isprint(c)) 2139 c = '.'; 2140 2141 kprintf("%c", c); 2142 } 2143 if (num > displayWidth) { 2144 // make sure the spacing in the last line is correct 2145 for (j = displayed; j < displayWidth * itemSize; j++) 2146 kprintf(" "); 2147 } 2148 kprintf(" "); 2149 } 2150 2151 if (user_memcpy(&value, (uint8 *)copyAddress + i * itemSize, itemSize) != B_OK) { 2152 kprintf("read fault"); 2153 break; 2154 } 2155 2156 switch (itemSize) { 2157 case 1: 2158 kprintf(" %02x", *(uint8 *)&value); 2159 break; 2160 case 2: 2161 kprintf(" %04x", *(uint16 *)&value); 2162 break; 2163 case 4: 2164 kprintf(" %08lx", *(uint32 *)&value); 2165 break; 2166 case 8: 2167 kprintf(" %016Lx", *(uint64 *)&value); 2168 break; 2169 } 2170 } 2171 2172 kprintf("\n"); 2173 2174 if (physical) { 2175 copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE); 2176 kernel_startup = true; 2177 vm_put_physical_page(copyAddress); 2178 kernel_startup = false; 2179 } 2180 return 0; 2181 } 2182 2183 2184 static const char * 2185 page_state_to_string(int state) 2186 { 2187 switch(state) { 2188 case PAGE_STATE_ACTIVE: 2189 return "active"; 2190 case PAGE_STATE_INACTIVE: 2191 return "inactive"; 2192 case PAGE_STATE_BUSY: 2193 return "busy"; 2194 case PAGE_STATE_MODIFIED: 2195 return "modified"; 2196 case PAGE_STATE_FREE: 2197 return "free"; 2198 case PAGE_STATE_CLEAR: 2199 return "clear"; 2200 case PAGE_STATE_WIRED: 2201 return "wired"; 2202 case PAGE_STATE_UNUSED: 2203 return "unused"; 2204 default: 2205 return "unknown"; 2206 } 2207 } 2208 2209 2210 static int 2211 dump_cache_chain(int argc, char **argv) 2212 { 2213 if (argc < 2 || strlen(argv[1]) < 2 2214 || argv[1][0] != '0' 2215 || argv[1][1] != 'x') { 2216 kprintf("%s: invalid argument, pass address\n", argv[0]); 2217 return 0; 2218 } 2219 2220 addr_t address = strtoul(argv[1], NULL, 0); 2221 if (address == NULL) 2222 return 0; 2223 2224 vm_cache *cache = (vm_cache *)address; 2225 while (cache != NULL) { 2226 dprintf("%p (ref %p)\n", cache, cache->ref); 2227 cache = cache->source; 2228 } 2229 2230 return 0; 2231 } 2232 2233 2234 static const char * 2235 cache_type_to_string(int32 type) 2236 { 2237 switch (type) { 2238 case CACHE_TYPE_RAM: 2239 return "RAM"; 2240 case CACHE_TYPE_DEVICE: 2241 return "device"; 2242 case CACHE_TYPE_VNODE: 2243 return "vnode"; 2244 case CACHE_TYPE_NULL: 2245 return "null"; 2246 2247 default: 2248 return "unknown"; 2249 } 2250 } 2251 2252 2253 static int 2254 dump_cache(int argc, char **argv) 2255 { 2256 vm_cache *cache; 2257 vm_cache_ref *cacheRef; 2258 bool showPages = false; 2259 bool showCache = true; 2260 bool showCacheRef = true; 2261 int i = 1; 2262 2263 if (argc < 2) { 2264 kprintf("usage: %s [-ps] <address>\n" 2265 " if -p is specified, all pages are shown, if -s is used\n" 2266 " only the cache/cache_ref info is shown respectively.\n", argv[0]); 2267 return 0; 2268 } 2269 while (argv[i][0] == '-') { 2270 char *arg = argv[i] + 1; 2271 while (arg[0]) { 2272 if (arg[0] == 'p') 2273 showPages = true; 2274 else if (arg[0] == 's') { 2275 if (!strcmp(argv[0], "cache")) 2276 showCacheRef = false; 2277 else 2278 showCache = false; 2279 } 2280 arg++; 2281 } 2282 i++; 2283 } 2284 if (argv[i] == NULL || strlen(argv[i]) < 2 2285 || argv[i][0] != '0' 2286 || argv[i][1] != 'x') { 2287 kprintf("%s: invalid argument, pass address\n", argv[0]); 2288 return 0; 2289 } 2290 2291 addr_t address = strtoul(argv[i], NULL, 0); 2292 if (address == NULL) 2293 return 0; 2294 2295 if (!strcmp(argv[0], "cache")) { 2296 cache = (vm_cache *)address; 2297 cacheRef = cache->ref; 2298 } else { 2299 cacheRef = (vm_cache_ref *)address; 2300 cache = cacheRef->cache; 2301 } 2302 2303 if (showCacheRef) { 2304 kprintf("CACHE_REF %p:\n", cacheRef); 2305 if (!showCache) 2306 kprintf(" cache: %p\n", cacheRef->cache); 2307 kprintf(" ref_count: %ld\n", cacheRef->ref_count); 2308 kprintf(" lock.holder: %ld\n", cacheRef->lock.holder); 2309 kprintf(" lock.sem: 0x%lx\n", cacheRef->lock.sem); 2310 kprintf(" areas:\n"); 2311 2312 for (vm_area *area = cacheRef->areas; area != NULL; area = area->cache_next) { 2313 kprintf(" area 0x%lx, %s\n", area->id, area->name); 2314 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 2315 kprintf("\tprotection: 0x%lx\n", area->protection); 2316 kprintf("\towner: 0x%lx\n", area->address_space->id); 2317 } 2318 } 2319 2320 if (showCache) { 2321 kprintf("CACHE %p:\n", cache); 2322 if (!showCacheRef) 2323 kprintf(" cache_ref: %p\n", cache->ref); 2324 kprintf(" source: %p\n", cache->source); 2325 kprintf(" store: %p\n", cache->store); 2326 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 2327 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 2328 kprintf(" virtual_size: 0x%Lx\n", cache->virtual_size); 2329 kprintf(" temporary: %ld\n", cache->temporary); 2330 kprintf(" scan_skip: %ld\n", cache->scan_skip); 2331 2332 kprintf(" consumers:\n"); 2333 vm_cache *consumer = NULL; 2334 while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) { 2335 kprintf("\t%p\n", consumer); 2336 } 2337 2338 kprintf(" pages:\n"); 2339 int32 count = 0; 2340 for (vm_page *page = cache->page_list; page != NULL; page = page->cache_next) { 2341 count++; 2342 if (!showPages) 2343 continue; 2344 2345 if (page->type == PAGE_TYPE_PHYSICAL) { 2346 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) wired_count %u\n", 2347 page, page->physical_page_number, page->cache_offset, page->type, page->state, 2348 page_state_to_string(page->state), page->wired_count); 2349 } else if(page->type == PAGE_TYPE_DUMMY) { 2350 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 2351 page, page->state, page_state_to_string(page->state)); 2352 } else 2353 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 2354 } 2355 2356 if (!showPages) 2357 kprintf("\t%ld in cache\n", count); 2358 } 2359 2360 return 0; 2361 } 2362 2363 2364 static void 2365 dump_area_struct(vm_area *area, bool mappings) 2366 { 2367 kprintf("AREA: %p\n", area); 2368 kprintf("name:\t\t'%s'\n", area->name); 2369 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 2370 kprintf("id:\t\t0x%lx\n", area->id); 2371 kprintf("base:\t\t0x%lx\n", area->base); 2372 kprintf("size:\t\t0x%lx\n", area->size); 2373 kprintf("protection:\t0x%lx\n", area->protection); 2374 kprintf("wiring:\t\t0x%x\n", area->wiring); 2375 kprintf("memory_type:\t0x%x\n", area->memory_type); 2376 kprintf("ref_count:\t%ld\n", area->ref_count); 2377 kprintf("cache_ref:\t%p\n", area->cache_ref); 2378 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 2379 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 2380 kprintf("cache_next:\t%p\n", area->cache_next); 2381 kprintf("cache_prev:\t%p\n", area->cache_prev); 2382 2383 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 2384 if (mappings) { 2385 kprintf("page mappings:\n"); 2386 while (iterator.HasNext()) { 2387 vm_page_mapping *mapping = iterator.Next(); 2388 kprintf(" %p", mapping->page); 2389 } 2390 kprintf("\n"); 2391 } else { 2392 uint32 count = 0; 2393 while (iterator.Next() != NULL) { 2394 count++; 2395 } 2396 kprintf("page mappings:\t%lu\n", count); 2397 } 2398 } 2399 2400 2401 static int 2402 dump_area(int argc, char **argv) 2403 { 2404 bool mappings = false; 2405 bool found = false; 2406 int32 index = 1; 2407 vm_area *area; 2408 addr_t num; 2409 2410 if (argc < 2) { 2411 kprintf("usage: area [-m] <id|address|name>\n"); 2412 return 0; 2413 } 2414 2415 if (!strcmp(argv[1], "-m")) { 2416 mappings = true; 2417 index++; 2418 } 2419 2420 num = strtoul(argv[index], NULL, 0); 2421 2422 // walk through the area list, looking for the arguments as a name 2423 struct hash_iterator iter; 2424 2425 hash_open(sAreaHash, &iter); 2426 while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) { 2427 if ((area->name != NULL && !strcmp(argv[index], area->name)) 2428 || num != 0 2429 && ((addr_t)area->id == num 2430 || area->base <= num && area->base + area->size > num)) { 2431 dump_area_struct(area, mappings); 2432 found = true; 2433 } 2434 } 2435 2436 if (!found) 2437 kprintf("could not find area %s (%ld)\n", argv[index], num); 2438 return 0; 2439 } 2440 2441 2442 static int 2443 dump_area_list(int argc, char **argv) 2444 { 2445 vm_area *area; 2446 struct hash_iterator iter; 2447 const char *name = NULL; 2448 int32 id = 0; 2449 2450 if (argc > 1) { 2451 id = strtoul(argv[1], NULL, 0); 2452 if (id == 0) 2453 name = argv[1]; 2454 } 2455 2456 kprintf("addr id base\t\tsize protect lock name\n"); 2457 2458 hash_open(sAreaHash, &iter); 2459 while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) { 2460 if (id != 0 && area->address_space->id != id 2461 || name != NULL && strstr(area->name, name) == NULL) 2462 continue; 2463 2464 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, (void *)area->base, 2465 (void *)area->size, area->protection, area->wiring, area->name); 2466 } 2467 hash_close(sAreaHash, &iter, false); 2468 return 0; 2469 } 2470 2471 2472 static int 2473 dump_available_memory(int argc, char **argv) 2474 { 2475 kprintf("Available memory: %Ld/%lu bytes\n", 2476 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 2477 return 0; 2478 } 2479 2480 2481 status_t 2482 vm_delete_areas(struct vm_address_space *addressSpace) 2483 { 2484 vm_area *area; 2485 vm_area *next, *last = NULL; 2486 2487 TRACE(("vm_delete_areas: called on address space 0x%lx\n", addressSpace->id)); 2488 2489 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 2490 2491 // remove all reserved areas in this address space 2492 2493 for (area = addressSpace->areas; area; area = next) { 2494 next = area->address_space_next; 2495 2496 if (area->id == RESERVED_AREA_ID) { 2497 // just remove it 2498 if (last) 2499 last->address_space_next = area->address_space_next; 2500 else 2501 addressSpace->areas = area->address_space_next; 2502 2503 vm_put_address_space(addressSpace); 2504 free(area); 2505 continue; 2506 } 2507 2508 last = area; 2509 } 2510 2511 // delete all the areas in this address space 2512 2513 for (area = addressSpace->areas; area; area = next) { 2514 next = area->address_space_next; 2515 2516 // decrement the ref on this area, may actually push the ref < 0, if there 2517 // is a concurrent delete_area() on that specific area, but that's ok here 2518 if (!_vm_put_area(area, true)) 2519 dprintf("vm_delete_areas() did not delete area %p\n", area); 2520 } 2521 2522 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 2523 2524 return B_OK; 2525 } 2526 2527 2528 static area_id 2529 vm_area_for(team_id team, addr_t address) 2530 { 2531 vm_address_space *addressSpace; 2532 area_id id = B_ERROR; 2533 2534 addressSpace = vm_get_address_space_by_id(team); 2535 if (addressSpace == NULL) 2536 return B_BAD_TEAM_ID; 2537 2538 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 2539 2540 vm_area *area = vm_area_lookup(addressSpace, address); 2541 if (area != NULL) 2542 id = area->id; 2543 2544 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 2545 vm_put_address_space(addressSpace); 2546 2547 return id; 2548 } 2549 2550 2551 /*! 2552 Frees physical pages that were used during the boot process. 2553 */ 2554 static void 2555 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end) 2556 { 2557 // free all physical pages in the specified range 2558 2559 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 2560 addr_t physicalAddress; 2561 uint32 flags; 2562 2563 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 2564 vm_page *page = vm_lookup_page(current / B_PAGE_SIZE); 2565 if (page != NULL) 2566 vm_page_set_state(page, PAGE_STATE_FREE); 2567 } 2568 } 2569 2570 // unmap the memory 2571 map->ops->unmap(map, start, end - 1); 2572 } 2573 2574 2575 void 2576 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 2577 { 2578 vm_translation_map *map = &vm_kernel_address_space()->translation_map; 2579 addr_t end = start + size; 2580 addr_t lastEnd = start; 2581 vm_area *area; 2582 2583 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end)); 2584 2585 // The areas are sorted in virtual address space order, so 2586 // we just have to find the holes between them that fall 2587 // into the area we should dispose 2588 2589 map->ops->lock(map); 2590 2591 for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) { 2592 addr_t areaStart = area->base; 2593 addr_t areaEnd = areaStart + area->size; 2594 2595 if (area->id == RESERVED_AREA_ID) 2596 continue; 2597 2598 if (areaEnd >= end) { 2599 // we are done, the areas are already beyond of what we have to free 2600 lastEnd = end; 2601 break; 2602 } 2603 2604 if (areaStart > lastEnd) { 2605 // this is something we can free 2606 TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart)); 2607 unmap_and_free_physical_pages(map, lastEnd, areaStart); 2608 } 2609 2610 lastEnd = areaEnd; 2611 } 2612 2613 if (lastEnd < end) { 2614 // we can also get rid of some space at the end of the area 2615 TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end)); 2616 unmap_and_free_physical_pages(map, lastEnd, end); 2617 } 2618 2619 map->ops->unlock(map); 2620 } 2621 2622 2623 static void 2624 create_preloaded_image_areas(struct preloaded_image *image) 2625 { 2626 char name[B_OS_NAME_LENGTH]; 2627 void *address; 2628 int32 length; 2629 2630 // use file name to create a good area name 2631 char *fileName = strrchr(image->name, '/'); 2632 if (fileName == NULL) 2633 fileName = image->name; 2634 else 2635 fileName++; 2636 2637 length = strlen(fileName); 2638 // make sure there is enough space for the suffix 2639 if (length > 25) 2640 length = 25; 2641 2642 memcpy(name, fileName, length); 2643 strcpy(name + length, "_text"); 2644 address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE); 2645 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 2646 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 2647 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2648 // this will later be remapped read-only/executable by the 2649 // ELF initialization code 2650 2651 strcpy(name + length, "_data"); 2652 address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE); 2653 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 2654 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 2655 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2656 } 2657 2658 2659 /** Frees all previously kernel arguments areas from the kernel_args structure. 2660 * Any boot loader resources contained in that arguments must not be accessed 2661 * anymore past this point. 2662 */ 2663 2664 void 2665 vm_free_kernel_args(kernel_args *args) 2666 { 2667 uint32 i; 2668 2669 TRACE(("vm_free_kernel_args()\n")); 2670 2671 for (i = 0; i < args->num_kernel_args_ranges; i++) { 2672 area_id area = area_for((void *)args->kernel_args_range[i].start); 2673 if (area >= B_OK) 2674 delete_area(area); 2675 } 2676 } 2677 2678 2679 static void 2680 allocate_kernel_args(kernel_args *args) 2681 { 2682 uint32 i; 2683 2684 TRACE(("allocate_kernel_args()\n")); 2685 2686 for (i = 0; i < args->num_kernel_args_ranges; i++) { 2687 void *address = (void *)args->kernel_args_range[i].start; 2688 2689 create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size, 2690 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2691 } 2692 } 2693 2694 2695 static void 2696 unreserve_boot_loader_ranges(kernel_args *args) 2697 { 2698 uint32 i; 2699 2700 TRACE(("unreserve_boot_loader_ranges()\n")); 2701 2702 for (i = 0; i < args->num_virtual_allocated_ranges; i++) { 2703 vm_unreserve_address_range(vm_kernel_address_space_id(), 2704 (void *)args->virtual_allocated_range[i].start, 2705 args->virtual_allocated_range[i].size); 2706 } 2707 } 2708 2709 2710 static void 2711 reserve_boot_loader_ranges(kernel_args *args) 2712 { 2713 uint32 i; 2714 2715 TRACE(("reserve_boot_loader_ranges()\n")); 2716 2717 for (i = 0; i < args->num_virtual_allocated_ranges; i++) { 2718 void *address = (void *)args->virtual_allocated_range[i].start; 2719 2720 // If the address is no kernel address, we just skip it. The 2721 // architecture specific code has to deal with it. 2722 if (!IS_KERNEL_ADDRESS(address)) { 2723 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 2724 address, args->virtual_allocated_range[i].size); 2725 continue; 2726 } 2727 2728 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address, 2729 B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 2730 if (status < B_OK) 2731 panic("could not reserve boot loader ranges\n"); 2732 } 2733 } 2734 2735 2736 static addr_t 2737 allocate_early_virtual(kernel_args *args, size_t size) 2738 { 2739 addr_t spot = 0; 2740 uint32 i; 2741 int last_valloc_entry = 0; 2742 2743 size = PAGE_ALIGN(size); 2744 // find a slot in the virtual allocation addr range 2745 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 2746 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 2747 + args->virtual_allocated_range[i - 1].size; 2748 last_valloc_entry = i; 2749 // check to see if the space between this one and the last is big enough 2750 if (previousRangeEnd >= KERNEL_BASE 2751 && args->virtual_allocated_range[i].start 2752 - previousRangeEnd >= size) { 2753 spot = previousRangeEnd; 2754 args->virtual_allocated_range[i - 1].size += size; 2755 goto out; 2756 } 2757 } 2758 if (spot == 0) { 2759 // we hadn't found one between allocation ranges. this is ok. 2760 // see if there's a gap after the last one 2761 addr_t lastRangeEnd 2762 = args->virtual_allocated_range[last_valloc_entry].start 2763 + args->virtual_allocated_range[last_valloc_entry].size; 2764 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 2765 spot = lastRangeEnd; 2766 args->virtual_allocated_range[last_valloc_entry].size += size; 2767 goto out; 2768 } 2769 // see if there's a gap before the first one 2770 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 2771 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 2772 args->virtual_allocated_range[0].start -= size; 2773 spot = args->virtual_allocated_range[0].start; 2774 goto out; 2775 } 2776 } 2777 } 2778 2779 out: 2780 return spot; 2781 } 2782 2783 2784 static bool 2785 is_page_in_physical_memory_range(kernel_args *args, addr_t address) 2786 { 2787 // TODO: horrible brute-force method of determining if the page can be allocated 2788 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 2789 if (address >= args->physical_memory_range[i].start 2790 && address < args->physical_memory_range[i].start 2791 + args->physical_memory_range[i].size) 2792 return true; 2793 } 2794 return false; 2795 } 2796 2797 2798 static addr_t 2799 allocate_early_physical_page(kernel_args *args) 2800 { 2801 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 2802 addr_t nextPage; 2803 2804 nextPage = args->physical_allocated_range[i].start 2805 + args->physical_allocated_range[i].size; 2806 // see if the page after the next allocated paddr run can be allocated 2807 if (i + 1 < args->num_physical_allocated_ranges 2808 && args->physical_allocated_range[i + 1].size != 0) { 2809 // see if the next page will collide with the next allocated range 2810 if (nextPage >= args->physical_allocated_range[i+1].start) 2811 continue; 2812 } 2813 // see if the next physical page fits in the memory block 2814 if (is_page_in_physical_memory_range(args, nextPage)) { 2815 // we got one! 2816 args->physical_allocated_range[i].size += B_PAGE_SIZE; 2817 return nextPage / B_PAGE_SIZE; 2818 } 2819 } 2820 2821 return 0; 2822 // could not allocate a block 2823 } 2824 2825 2826 /*! 2827 This one uses the kernel_args' physical and virtual memory ranges to 2828 allocate some pages before the VM is completely up. 2829 */ 2830 addr_t 2831 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize, 2832 uint32 attributes) 2833 { 2834 if (physicalSize > virtualSize) 2835 physicalSize = virtualSize; 2836 2837 // find the vaddr to allocate at 2838 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 2839 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 2840 2841 // map the pages 2842 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 2843 addr_t physicalAddress = allocate_early_physical_page(args); 2844 if (physicalAddress == 0) 2845 panic("error allocating early page!\n"); 2846 2847 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 2848 2849 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 2850 physicalAddress * B_PAGE_SIZE, attributes, 2851 &allocate_early_physical_page); 2852 } 2853 2854 return virtualBase; 2855 } 2856 2857 2858 status_t 2859 vm_init(kernel_args *args) 2860 { 2861 struct preloaded_image *image; 2862 void *address; 2863 status_t err = 0; 2864 uint32 i; 2865 2866 TRACE(("vm_init: entry\n")); 2867 err = arch_vm_translation_map_init(args); 2868 err = arch_vm_init(args); 2869 2870 // initialize some globals 2871 sNextAreaID = 1; 2872 sAreaHashLock = -1; 2873 sAvailableMemoryLock.sem = -1; 2874 2875 vm_page_init_num_pages(args); 2876 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 2877 2878 // reduce the heap size if we have not so much RAM 2879 size_t heapSize = HEAP_SIZE; 2880 if (sAvailableMemory < 100 * 1024 * 1024) 2881 heapSize /= 4; 2882 else if (sAvailableMemory < 200 * 1024 * 1024) 2883 heapSize /= 2; 2884 2885 // map in the new heap and initialize it 2886 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 2887 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2888 TRACE(("heap at 0x%lx\n", heapBase)); 2889 heap_init(heapBase, heapSize); 2890 2891 size_t slabInitialSize = 2 * B_PAGE_SIZE; 2892 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 2893 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2894 slab_init(args, slabInitialBase, slabInitialSize); 2895 2896 // initialize the free page list and physical page mapper 2897 vm_page_init(args); 2898 2899 // initialize the hash table that stores the pages mapped to caches 2900 vm_cache_init(args); 2901 2902 { 2903 vm_area *area; 2904 sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area, 2905 &area_compare, &area_hash); 2906 if (sAreaHash == NULL) 2907 panic("vm_init: error creating aspace hash table\n"); 2908 } 2909 2910 vm_address_space_init(); 2911 reserve_boot_loader_ranges(args); 2912 2913 // do any further initialization that the architecture dependant layers may need now 2914 arch_vm_translation_map_init_post_area(args); 2915 arch_vm_init_post_area(args); 2916 vm_page_init_post_area(args); 2917 2918 // allocate areas to represent stuff that already exists 2919 2920 address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE); 2921 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 2922 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2923 2924 address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE); 2925 create_area("initial slab space", &address, B_EXACT_ADDRESS, 2926 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 2927 | B_KERNEL_WRITE_AREA); 2928 2929 allocate_kernel_args(args); 2930 2931 args->kernel_image.name = "kernel"; 2932 // the lazy boot loader currently doesn't set the kernel's name... 2933 create_preloaded_image_areas(&args->kernel_image); 2934 2935 // allocate areas for preloaded images 2936 for (image = args->preloaded_images; image != NULL; image = image->next) { 2937 create_preloaded_image_areas(image); 2938 } 2939 2940 // allocate kernel stacks 2941 for (i = 0; i < args->num_cpus; i++) { 2942 char name[64]; 2943 2944 sprintf(name, "idle thread %lu kstack", i + 1); 2945 address = (void *)args->cpu_kstack[i].start; 2946 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 2947 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2948 } 2949 2950 // add some debugger commands 2951 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 2952 add_debugger_command("area", &dump_area, "Dump info about a particular area"); 2953 add_debugger_command("cache_ref", &dump_cache, "Dump vm_cache"); 2954 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 2955 add_debugger_command("cache_chain", &dump_cache_chain, "Dump vm_cache chain"); 2956 add_debugger_command("avail", &dump_available_memory, "Dump available memory"); 2957 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 2958 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 2959 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 2960 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 2961 2962 TRACE(("vm_init: exit\n")); 2963 2964 return err; 2965 } 2966 2967 2968 status_t 2969 vm_init_post_sem(kernel_args *args) 2970 { 2971 vm_area *area; 2972 2973 // This frees all unused boot loader resources and makes its space available again 2974 arch_vm_init_end(args); 2975 unreserve_boot_loader_ranges(args); 2976 2977 // fill in all of the semaphores that were not allocated before 2978 // since we're still single threaded and only the kernel address space exists, 2979 // it isn't that hard to find all of the ones we need to create 2980 2981 benaphore_init(&sAvailableMemoryLock, "available memory lock"); 2982 arch_vm_translation_map_init_post_sem(args); 2983 vm_address_space_init_post_sem(); 2984 2985 for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) { 2986 if (area->id == RESERVED_AREA_ID) 2987 continue; 2988 2989 if (area->cache_ref->lock.sem < 0) 2990 mutex_init(&area->cache_ref->lock, "cache_ref_mutex"); 2991 } 2992 2993 sAreaHashLock = create_sem(WRITE_COUNT, "area hash"); 2994 2995 slab_init_post_sem(); 2996 2997 return heap_init_post_sem(args); 2998 } 2999 3000 3001 status_t 3002 vm_init_post_thread(kernel_args *args) 3003 { 3004 vm_page_init_post_thread(args); 3005 vm_daemon_init(); 3006 vm_low_memory_init(); 3007 3008 return heap_init_post_thread(args); 3009 } 3010 3011 3012 status_t 3013 vm_init_post_modules(kernel_args *args) 3014 { 3015 return arch_vm_init_post_modules(args); 3016 } 3017 3018 3019 void 3020 permit_page_faults(void) 3021 { 3022 struct thread *thread = thread_get_current_thread(); 3023 if (thread != NULL) 3024 atomic_add(&thread->page_faults_allowed, 1); 3025 } 3026 3027 3028 void 3029 forbid_page_faults(void) 3030 { 3031 struct thread *thread = thread_get_current_thread(); 3032 if (thread != NULL) 3033 atomic_add(&thread->page_faults_allowed, -1); 3034 } 3035 3036 3037 status_t 3038 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3039 addr_t *newIP) 3040 { 3041 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, faultAddress)); 3042 3043 *newIP = 0; 3044 3045 status_t status = vm_soft_fault(address, isWrite, isUser); 3046 if (status < B_OK) { 3047 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 3048 strerror(status), address, faultAddress, isWrite, isUser, 3049 thread_get_current_thread_id()); 3050 if (!isUser) { 3051 struct thread *thread = thread_get_current_thread(); 3052 if (thread != NULL && thread->fault_handler != 0) { 3053 // this will cause the arch dependant page fault handler to 3054 // modify the IP on the interrupt frame or whatever to return 3055 // to this address 3056 *newIP = thread->fault_handler; 3057 } else { 3058 // unhandled page fault in the kernel 3059 panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n", 3060 address, faultAddress); 3061 } 3062 } else { 3063 #if 1 3064 // ToDo: remove me once we have proper userland debugging support (and tools) 3065 vm_address_space *addressSpace = vm_get_current_user_address_space(); 3066 vm_area *area; 3067 3068 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3069 area = vm_area_lookup(addressSpace, faultAddress); 3070 3071 dprintf("vm_page_fault: sending team \"%s\" 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n", 3072 thread_get_current_thread()->team->name, 3073 thread_get_current_thread()->team->id, faultAddress, 3074 area ? area->name : "???", faultAddress - (area ? area->base : 0x0)); 3075 3076 // We can print a stack trace of the userland thread here. 3077 #if 1 3078 if (area) { 3079 struct stack_frame { 3080 #if defined(__INTEL__) || defined(__POWERPC__) 3081 struct stack_frame* previous; 3082 void* return_address; 3083 #else 3084 // ... 3085 #endif 3086 } frame; 3087 #ifdef __INTEL__ 3088 struct iframe *iframe = i386_get_user_iframe(); 3089 if (iframe == NULL) 3090 panic("iframe is NULL!"); 3091 3092 status_t status = user_memcpy(&frame, (void *)iframe->ebp, 3093 sizeof(struct stack_frame)); 3094 #elif defined(__POWERPC__) 3095 struct iframe *iframe = ppc_get_user_iframe(); 3096 if (iframe == NULL) 3097 panic("iframe is NULL!"); 3098 3099 status_t status = user_memcpy(&frame, (void *)iframe->r1, 3100 sizeof(struct stack_frame)); 3101 #else 3102 # warn "vm_page_fault() stack trace won't work" 3103 status = B_ERROR; 3104 #endif 3105 3106 dprintf("stack trace:\n"); 3107 while (status == B_OK) { 3108 dprintf(" %p", frame.return_address); 3109 area = vm_area_lookup(addressSpace, 3110 (addr_t)frame.return_address); 3111 if (area) { 3112 dprintf(" (%s + %#lx)", area->name, 3113 (addr_t)frame.return_address - area->base); 3114 } 3115 dprintf("\n"); 3116 3117 status = user_memcpy(&frame, frame.previous, 3118 sizeof(struct stack_frame)); 3119 } 3120 } 3121 #endif // 0 (stack trace) 3122 3123 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3124 vm_put_address_space(addressSpace); 3125 #endif 3126 if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV)) 3127 send_signal(team_get_current_team_id(), SIGSEGV); 3128 } 3129 } 3130 3131 return B_HANDLED_INTERRUPT; 3132 } 3133 3134 3135 static inline status_t 3136 fault_acquire_locked_source(vm_cache *cache, vm_cache_ref **_sourceRef) 3137 { 3138 retry: 3139 vm_cache *source = cache->source; 3140 if (source == NULL) 3141 return B_ERROR; 3142 if (source->busy) 3143 return B_BUSY; 3144 3145 vm_cache_ref *sourceRef = source->ref; 3146 vm_cache_acquire_ref(sourceRef); 3147 3148 mutex_lock(&sourceRef->lock); 3149 3150 if (sourceRef->cache != cache->source || sourceRef->cache->busy) { 3151 mutex_unlock(&sourceRef->lock); 3152 vm_cache_release_ref(sourceRef); 3153 goto retry; 3154 } 3155 3156 *_sourceRef = sourceRef; 3157 return B_OK; 3158 } 3159 3160 3161 /*! 3162 Inserts a busy dummy page into a cache, and makes sure the cache won't go 3163 away by grabbing a reference to it. 3164 */ 3165 static inline void 3166 fault_insert_dummy_page(vm_cache_ref *cacheRef, vm_page &dummyPage, off_t cacheOffset) 3167 { 3168 dummyPage.state = PAGE_STATE_BUSY; 3169 vm_cache_acquire_ref(cacheRef); 3170 vm_cache_insert_page(cacheRef, &dummyPage, cacheOffset); 3171 } 3172 3173 3174 /*! 3175 Removes the busy dummy page from a cache, and releases its reference to 3176 the cache. 3177 */ 3178 static inline void 3179 fault_remove_dummy_page(vm_page &dummyPage, bool isLocked) 3180 { 3181 vm_cache_ref *cacheRef = dummyPage.cache->ref; 3182 if (!isLocked) 3183 mutex_lock(&cacheRef->lock); 3184 3185 if (dummyPage.state == PAGE_STATE_BUSY) { 3186 vm_cache_remove_page(cacheRef, &dummyPage); 3187 dummyPage.state = PAGE_STATE_INACTIVE; 3188 } 3189 3190 if (!isLocked) 3191 mutex_unlock(&cacheRef->lock); 3192 3193 vm_cache_release_ref(cacheRef); 3194 } 3195 3196 3197 /*! 3198 Finds a page at the specified \a cacheOffset in either the \a topCacheRef 3199 or in its source chain. Will also page in a missing page in case there is 3200 a cache that has the page. 3201 If it couldn't find a page, it will return the vm_cache that should get it, 3202 otherwise, it will return the vm_cache that contains the cache. 3203 It always grabs a reference to the vm_cache that it returns, and also locks it. 3204 */ 3205 static inline vm_page * 3206 fault_find_page(vm_translation_map *map, vm_cache_ref *topCacheRef, 3207 off_t cacheOffset, bool isWrite, vm_page &dummyPage, vm_cache_ref **_pageRef) 3208 { 3209 vm_cache_ref *cacheRef = topCacheRef; 3210 vm_cache_ref *lastCacheRef = NULL; 3211 vm_page *page = NULL; 3212 3213 vm_cache_acquire_ref(cacheRef); 3214 mutex_lock(&cacheRef->lock); 3215 // we release this later in the loop 3216 3217 while (cacheRef != NULL) { 3218 if (lastCacheRef != NULL) 3219 vm_cache_release_ref(lastCacheRef); 3220 3221 // we hold the lock of the cacheRef at this point 3222 3223 lastCacheRef = cacheRef; 3224 3225 for (;;) { 3226 page = vm_cache_lookup_page(cacheRef, cacheOffset); 3227 if (page != NULL && page->state != PAGE_STATE_BUSY) { 3228 vm_page_set_state(page, PAGE_STATE_BUSY); 3229 break; 3230 } 3231 if (page == NULL || page == &dummyPage) 3232 break; 3233 3234 // page must be busy 3235 // ToDo: don't wait forever! 3236 mutex_unlock(&cacheRef->lock); 3237 snooze(20000); 3238 mutex_lock(&cacheRef->lock); 3239 } 3240 3241 if (page != NULL && page != &dummyPage) 3242 break; 3243 3244 // The current cache does not contain the page we're looking for 3245 3246 // If we're at the top most cache, insert the dummy page here to keep other threads 3247 // from faulting on the same address and chasing us up the cache chain 3248 if (cacheRef == topCacheRef && dummyPage.state != PAGE_STATE_BUSY) 3249 fault_insert_dummy_page(cacheRef, dummyPage, cacheOffset); 3250 3251 // see if the vm_store has it 3252 vm_store *store = cacheRef->cache->store; 3253 if (store->ops->has_page != NULL && store->ops->has_page(store, cacheOffset)) { 3254 size_t bytesRead; 3255 iovec vec; 3256 3257 page = vm_page_allocate_page(PAGE_STATE_FREE); 3258 3259 // we mark that page busy reading, so that the file cache can 3260 // ignore us in case it works on the very same page 3261 // (this is actually only needed if this is the topRefCache, but we 3262 // do it anyway for simplicity's sake) 3263 dummyPage.queue_next = page; 3264 dummyPage.busy_reading = true; 3265 3266 mutex_unlock(&cacheRef->lock); 3267 3268 map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE, 3269 (addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT); 3270 vec.iov_len = bytesRead = B_PAGE_SIZE; 3271 3272 status_t status = store->ops->read(store, cacheOffset, &vec, 1, 3273 &bytesRead, false); 3274 if (status < B_OK) { 3275 // TODO: real error handling! 3276 panic("reading from store %p (cacheRef %p) returned: %s!\n", 3277 store, cacheRef, strerror(status)); 3278 } 3279 map->ops->put_physical_page((addr_t)vec.iov_base); 3280 3281 mutex_lock(&cacheRef->lock); 3282 3283 if (cacheRef == topCacheRef) 3284 fault_remove_dummy_page(dummyPage, true); 3285 3286 // We insert the queue_next here, because someone else could have 3287 // replaced our page 3288 vm_cache_insert_page(cacheRef, dummyPage.queue_next, cacheOffset); 3289 3290 if (dummyPage.queue_next != page) { 3291 // Indeed, the page got replaced by someone else - we can safely 3292 // throw our page away now 3293 vm_page_set_state(page, PAGE_STATE_FREE); 3294 page = dummyPage.queue_next; 3295 } 3296 break; 3297 } 3298 3299 vm_cache_ref *nextCacheRef; 3300 status_t status = fault_acquire_locked_source(cacheRef->cache, &nextCacheRef); 3301 if (status == B_BUSY) { 3302 // the source cache is currently in the process of being merged 3303 // with his only consumer (cacheRef); since its pages are moved 3304 // upwards, too, we try this cache again 3305 mutex_unlock(&cacheRef->lock); 3306 mutex_lock(&cacheRef->lock); 3307 lastCacheRef = NULL; 3308 continue; 3309 } else if (status < B_OK) 3310 nextCacheRef = NULL; 3311 3312 mutex_unlock(&cacheRef->lock); 3313 // at this point, we still hold a ref to this cache (through lastCacheRef) 3314 3315 cacheRef = nextCacheRef; 3316 } 3317 3318 if (page == NULL) { 3319 // there was no adequate page, determine the cache for a clean one 3320 if (cacheRef == NULL) { 3321 // We rolled off the end of the cache chain, so we need to decide which 3322 // cache will get the new page we're about to create. 3323 cacheRef = isWrite ? topCacheRef : lastCacheRef; 3324 // Read-only pages come in the deepest cache - only the 3325 // top most cache may have direct write access. 3326 vm_cache_acquire_ref(cacheRef); 3327 mutex_lock(&cacheRef->lock); 3328 } 3329 3330 // release the reference of the last vm_cache_ref we still have from the loop above 3331 if (lastCacheRef != NULL) 3332 vm_cache_release_ref(lastCacheRef); 3333 } else { 3334 // we still own a reference to the cacheRef 3335 } 3336 3337 *_pageRef = cacheRef; 3338 return page; 3339 } 3340 3341 3342 /*! 3343 Returns the page that should be mapped into the area that got the fault. 3344 It returns the owner of the page in \a sourceRef - it keeps a reference 3345 to it, and has also locked it on exit. 3346 */ 3347 static inline vm_page * 3348 fault_get_page(vm_translation_map *map, vm_cache_ref *topCacheRef, 3349 off_t cacheOffset, bool isWrite, vm_page &dummyPage, vm_cache_ref **_sourceRef, 3350 vm_cache_ref **_copiedSourceRef) 3351 { 3352 vm_cache_ref *cacheRef; 3353 vm_page *page = fault_find_page(map, topCacheRef, cacheOffset, isWrite, 3354 dummyPage, &cacheRef); 3355 if (page == NULL) { 3356 // we still haven't found a page, so we allocate a clean one 3357 3358 page = vm_page_allocate_page(PAGE_STATE_CLEAR); 3359 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number)); 3360 3361 // Insert the new page into our cache, and replace it with the dummy page if necessary 3362 3363 // if we inserted a dummy page into this cache, we have to remove it now 3364 if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cacheRef->cache) 3365 fault_remove_dummy_page(dummyPage, true); 3366 3367 vm_cache_insert_page(cacheRef, page, cacheOffset); 3368 3369 if (dummyPage.state == PAGE_STATE_BUSY) { 3370 // we had inserted the dummy cache in another cache, so let's remove it from there 3371 fault_remove_dummy_page(dummyPage, false); 3372 } 3373 } 3374 3375 // We now have the page and a cache it belongs to - we now need to make 3376 // sure that the area's cache can access it, too, and sees the correct data 3377 3378 if (page->cache != topCacheRef->cache && isWrite) { 3379 // now we have a page that has the data we want, but in the wrong cache object 3380 // so we need to copy it and stick it into the top cache 3381 vm_page *sourcePage = page; 3382 void *source, *dest; 3383 3384 // ToDo: if memory is low, it might be a good idea to steal the page 3385 // from our source cache - if possible, that is 3386 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 3387 page = vm_page_allocate_page(PAGE_STATE_FREE); 3388 #if 0 3389 if (cacheOffset == 0x12000) 3390 dprintf("%ld: copy page %p to page %p from cache %p to cache %p\n", find_thread(NULL), 3391 sourcePage, page, sourcePage->cache, topCacheRef->cache); 3392 #endif 3393 3394 // try to get a mapping for the src and dest page so we can copy it 3395 for (;;) { 3396 map->ops->get_physical_page(sourcePage->physical_page_number * B_PAGE_SIZE, 3397 (addr_t *)&source, PHYSICAL_PAGE_CAN_WAIT); 3398 3399 if (map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE, 3400 (addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT) == B_OK) 3401 break; 3402 3403 // it couldn't map the second one, so sleep and retry 3404 // keeps an extremely rare deadlock from occuring 3405 map->ops->put_physical_page((addr_t)source); 3406 snooze(5000); 3407 } 3408 3409 memcpy(dest, source, B_PAGE_SIZE); 3410 map->ops->put_physical_page((addr_t)source); 3411 map->ops->put_physical_page((addr_t)dest); 3412 3413 vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE); 3414 3415 mutex_unlock(&cacheRef->lock); 3416 mutex_lock(&topCacheRef->lock); 3417 3418 // Insert the new page into our cache, and replace it with the dummy page if necessary 3419 3420 // if we inserted a dummy page into this cache, we have to remove it now 3421 if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == topCacheRef->cache) 3422 fault_remove_dummy_page(dummyPage, true); 3423 3424 vm_cache_insert_page(topCacheRef, page, cacheOffset); 3425 3426 if (dummyPage.state == PAGE_STATE_BUSY) { 3427 // we had inserted the dummy cache in another cache, so let's remove it from there 3428 fault_remove_dummy_page(dummyPage, false); 3429 } 3430 3431 *_copiedSourceRef = cacheRef; 3432 3433 cacheRef = topCacheRef; 3434 vm_cache_acquire_ref(cacheRef); 3435 } 3436 3437 *_sourceRef = cacheRef; 3438 return page; 3439 } 3440 3441 3442 static status_t 3443 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser) 3444 { 3445 vm_address_space *addressSpace; 3446 3447 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 3448 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 3449 3450 addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE); 3451 3452 if (IS_KERNEL_ADDRESS(address)) { 3453 addressSpace = vm_get_kernel_address_space(); 3454 } else if (IS_USER_ADDRESS(address)) { 3455 addressSpace = vm_get_current_user_address_space(); 3456 if (addressSpace == NULL) { 3457 if (!isUser) { 3458 dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n"); 3459 return B_BAD_ADDRESS; 3460 } else { 3461 // XXX weird state. 3462 panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n"); 3463 } 3464 } 3465 } else { 3466 // the hit was probably in the 64k DMZ between kernel and user space 3467 // this keeps a user space thread from passing a buffer that crosses 3468 // into kernel space 3469 return B_BAD_ADDRESS; 3470 } 3471 3472 atomic_add(&addressSpace->fault_count, 1); 3473 3474 // Get the area the fault was in 3475 3476 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3477 3478 vm_area *area = vm_area_lookup(addressSpace, address); 3479 if (area == NULL) { 3480 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3481 vm_put_address_space(addressSpace); 3482 dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n", 3483 originalAddress); 3484 return B_BAD_ADDRESS; 3485 } 3486 3487 // check permissions 3488 if (isUser && (area->protection & B_USER_PROTECTION) == 0) { 3489 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3490 vm_put_address_space(addressSpace); 3491 dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress); 3492 return B_PERMISSION_DENIED; 3493 } 3494 if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 3495 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3496 vm_put_address_space(addressSpace); 3497 dprintf("write access attempted on read-only area 0x%lx at %p\n", 3498 area->id, (void *)originalAddress); 3499 return B_PERMISSION_DENIED; 3500 } 3501 3502 // We have the area, it was a valid access, so let's try to resolve the page fault now. 3503 // At first, the top most cache from the area is investigated 3504 3505 vm_cache_ref *topCacheRef = area->cache_ref; 3506 off_t cacheOffset = address - area->base + area->cache_offset; 3507 int32 changeCount = addressSpace->change_count; 3508 3509 vm_cache_acquire_ref(topCacheRef); 3510 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3511 3512 mutex_lock(&topCacheRef->lock); 3513 3514 // See if this cache has a fault handler - this will do all the work for us 3515 { 3516 vm_store *store = topCacheRef->cache->store; 3517 if (store->ops->fault != NULL) { 3518 // Note, since the page fault is resolved with interrupts enabled, the 3519 // fault handler could be called more than once for the same reason - 3520 // the store must take this into account 3521 status_t status = store->ops->fault(store, addressSpace, cacheOffset); 3522 if (status != B_BAD_HANDLER) { 3523 mutex_unlock(&topCacheRef->lock); 3524 vm_cache_release_ref(topCacheRef); 3525 vm_put_address_space(addressSpace); 3526 return status; 3527 } 3528 } 3529 } 3530 3531 mutex_unlock(&topCacheRef->lock); 3532 3533 // The top most cache has no fault handler, so let's see if the cache or its sources 3534 // already have the page we're searching for (we're going from top to bottom) 3535 3536 vm_translation_map *map = &addressSpace->translation_map; 3537 vm_page dummyPage; 3538 dummyPage.cache = NULL; 3539 dummyPage.state = PAGE_STATE_INACTIVE; 3540 dummyPage.type = PAGE_TYPE_DUMMY; 3541 dummyPage.busy_writing = isWrite; 3542 dummyPage.wired_count = 0; 3543 3544 vm_cache_ref *copiedPageSourceRef = NULL; 3545 vm_cache_ref *pageSourceRef; 3546 vm_page *page = fault_get_page(map, topCacheRef, cacheOffset, isWrite, 3547 dummyPage, &pageSourceRef, &copiedPageSourceRef); 3548 3549 status_t status = B_OK; 3550 3551 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3552 if (changeCount != addressSpace->change_count) { 3553 // something may have changed, see if the address is still valid 3554 area = vm_area_lookup(addressSpace, address); 3555 if (area == NULL 3556 || area->cache_ref != topCacheRef 3557 || (address - area->base + area->cache_offset) != cacheOffset) { 3558 dprintf("vm_soft_fault: address space layout changed effecting ongoing soft fault\n"); 3559 status = B_BAD_ADDRESS; 3560 } 3561 } 3562 3563 if (status == B_OK) { 3564 // All went fine, all there is left to do is to map the page into the address space 3565 3566 // In case this is a copy-on-write page, we need to unmap it from the area now 3567 if (isWrite && page->cache == topCacheRef->cache) 3568 vm_unmap_pages(area, address, B_PAGE_SIZE); 3569 3570 // TODO: there is currently no mechanism to prevent a page being mapped 3571 // more than once in case of a second page fault! 3572 3573 // If the page doesn't reside in the area's cache, we need to make sure it's 3574 // mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write) 3575 uint32 newProtection = area->protection; 3576 if (page->cache != topCacheRef->cache && !isWrite) 3577 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 3578 3579 vm_map_page(area, page, address, newProtection); 3580 } 3581 3582 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3583 3584 mutex_unlock(&pageSourceRef->lock); 3585 vm_cache_release_ref(pageSourceRef); 3586 if (copiedPageSourceRef) 3587 vm_cache_release_ref(copiedPageSourceRef); 3588 3589 if (dummyPage.state == PAGE_STATE_BUSY) { 3590 // We still have the dummy page in the cache - that happens if we didn't need 3591 // to allocate a new page before, but could use one in another cache 3592 fault_remove_dummy_page(dummyPage, false); 3593 } 3594 3595 vm_cache_release_ref(topCacheRef); 3596 vm_put_address_space(addressSpace); 3597 3598 return status; 3599 } 3600 3601 3602 /*! You must have the address space's sem held */ 3603 vm_area * 3604 vm_area_lookup(vm_address_space *addressSpace, addr_t address) 3605 { 3606 vm_area *area; 3607 3608 // check the areas list first 3609 area = addressSpace->area_hint; 3610 if (area && area->base <= address && area->base + (area->size - 1) >= address) 3611 goto found; 3612 3613 for (area = addressSpace->areas; area != NULL; area = area->address_space_next) { 3614 if (area->id == RESERVED_AREA_ID) 3615 continue; 3616 3617 if (area->base <= address && area->base + (area->size - 1) >= address) 3618 break; 3619 } 3620 3621 found: 3622 // if the ref count is zero, the area is in the middle of being 3623 // destroyed in _vm_put_area. pretend it doesn't exist. 3624 if (area && area->ref_count == 0) 3625 return NULL; 3626 3627 if (area) 3628 addressSpace->area_hint = area; 3629 3630 return area; 3631 } 3632 3633 3634 status_t 3635 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, uint32 flags) 3636 { 3637 return (*vm_kernel_address_space()->translation_map.ops->get_physical_page)(paddr, _vaddr, flags); 3638 } 3639 3640 3641 status_t 3642 vm_put_physical_page(addr_t vaddr) 3643 { 3644 return (*vm_kernel_address_space()->translation_map.ops->put_physical_page)(vaddr); 3645 } 3646 3647 3648 void 3649 vm_unreserve_memory(size_t amount) 3650 { 3651 benaphore_lock(&sAvailableMemoryLock); 3652 3653 sAvailableMemory += amount; 3654 3655 benaphore_unlock(&sAvailableMemoryLock); 3656 } 3657 3658 3659 status_t 3660 vm_try_reserve_memory(size_t amount) 3661 { 3662 status_t status; 3663 benaphore_lock(&sAvailableMemoryLock); 3664 3665 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 3666 3667 if (sAvailableMemory > amount) { 3668 sAvailableMemory -= amount; 3669 status = B_OK; 3670 } else 3671 status = B_NO_MEMORY; 3672 3673 benaphore_unlock(&sAvailableMemoryLock); 3674 return status; 3675 } 3676 3677 3678 status_t 3679 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 3680 { 3681 vm_area *area = vm_get_area(id); 3682 if (area == NULL) 3683 return B_BAD_VALUE; 3684 3685 status_t status = arch_vm_set_memory_type(area, physicalBase, type); 3686 3687 vm_put_area(area); 3688 return status; 3689 } 3690 3691 3692 /** This function enforces some protection properties: 3693 * - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 3694 * - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 3695 * - if no protection is specified, it defaults to B_KERNEL_READ_AREA 3696 * and B_KERNEL_WRITE_AREA. 3697 */ 3698 3699 static void 3700 fix_protection(uint32 *protection) 3701 { 3702 if ((*protection & B_KERNEL_PROTECTION) == 0) { 3703 if ((*protection & B_USER_PROTECTION) == 0 3704 || (*protection & B_WRITE_AREA) != 0) 3705 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 3706 else 3707 *protection |= B_KERNEL_READ_AREA; 3708 } 3709 } 3710 3711 3712 static void 3713 fill_area_info(struct vm_area *area, area_info *info, size_t size) 3714 { 3715 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 3716 info->area = area->id; 3717 info->address = (void *)area->base; 3718 info->size = area->size; 3719 info->protection = area->protection; 3720 info->lock = B_FULL_LOCK; 3721 info->team = area->address_space->id; 3722 info->copy_count = 0; 3723 info->in_count = 0; 3724 info->out_count = 0; 3725 // ToDo: retrieve real values here! 3726 3727 mutex_lock(&area->cache_ref->lock); 3728 3729 // Note, this is a simplification; the cache could be larger than this area 3730 info->ram_size = area->cache_ref->cache->page_count * B_PAGE_SIZE; 3731 3732 mutex_unlock(&area->cache_ref->lock); 3733 } 3734 3735 3736 /*! 3737 Tests wether or not the area that contains the specified address 3738 needs any kind of locking, and actually exists. 3739 Used by both lock_memory() and unlock_memory(). 3740 */ 3741 status_t 3742 test_lock_memory(vm_address_space *addressSpace, addr_t address, 3743 bool &needsLocking) 3744 { 3745 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3746 3747 vm_area *area = vm_area_lookup(addressSpace, address); 3748 if (area != NULL) { 3749 // This determines if we need to lock the memory at all 3750 needsLocking = area->cache_type != CACHE_TYPE_NULL 3751 && area->cache_type != CACHE_TYPE_DEVICE 3752 && area->wiring != B_FULL_LOCK 3753 && area->wiring != B_CONTIGUOUS; 3754 } 3755 3756 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3757 3758 if (area == NULL) 3759 return B_BAD_ADDRESS; 3760 3761 return B_OK; 3762 } 3763 3764 3765 // #pragma mark - 3766 3767 3768 status_t 3769 user_memcpy(void *to, const void *from, size_t size) 3770 { 3771 if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK) 3772 return B_BAD_ADDRESS; 3773 return B_OK; 3774 } 3775 3776 3777 /** \brief Copies at most (\a size - 1) characters from the string in \a from to 3778 * the string in \a to, NULL-terminating the result. 3779 * 3780 * \param to Pointer to the destination C-string. 3781 * \param from Pointer to the source C-string. 3782 * \param size Size in bytes of the string buffer pointed to by \a to. 3783 * 3784 * \return strlen(\a from). 3785 */ 3786 3787 ssize_t 3788 user_strlcpy(char *to, const char *from, size_t size) 3789 { 3790 return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler); 3791 } 3792 3793 3794 status_t 3795 user_memset(void *s, char c, size_t count) 3796 { 3797 if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK) 3798 return B_BAD_ADDRESS; 3799 return B_OK; 3800 } 3801 3802 // #pragma mark - kernel public API 3803 3804 3805 long 3806 lock_memory(void *address, ulong numBytes, ulong flags) 3807 { 3808 vm_address_space *addressSpace = NULL; 3809 struct vm_translation_map *map; 3810 addr_t unalignedBase = (addr_t)address; 3811 addr_t end = unalignedBase + numBytes; 3812 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 3813 bool isUser = IS_USER_ADDRESS(address); 3814 bool needsLocking = true; 3815 3816 if (isUser) 3817 addressSpace = vm_get_current_user_address_space(); 3818 else 3819 addressSpace = vm_get_kernel_address_space(); 3820 if (addressSpace == NULL) 3821 return B_ERROR; 3822 3823 // test if we're on an area that allows faults at all 3824 3825 map = &addressSpace->translation_map; 3826 3827 status_t status = test_lock_memory(addressSpace, base, needsLocking); 3828 if (status < B_OK) 3829 goto out; 3830 if (!needsLocking) 3831 goto out; 3832 3833 for (; base < end; base += B_PAGE_SIZE) { 3834 addr_t physicalAddress; 3835 uint32 protection; 3836 status_t status; 3837 3838 map->ops->lock(map); 3839 status = map->ops->query(map, base, &physicalAddress, &protection); 3840 map->ops->unlock(map); 3841 3842 if (status < B_OK) 3843 goto out; 3844 3845 if ((protection & PAGE_PRESENT) != 0) { 3846 // if B_READ_DEVICE is set, the caller intents to write to the locked 3847 // memory, so if it hasn't been mapped writable, we'll try the soft 3848 // fault anyway 3849 if ((flags & B_READ_DEVICE) == 0 3850 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 3851 // update wiring 3852 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3853 if (page == NULL) 3854 panic("couldn't lookup physical page just allocated\n"); 3855 3856 page->wired_count++; 3857 // TODO: needs to be atomic on all platforms! 3858 continue; 3859 } 3860 } 3861 3862 status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser); 3863 if (status != B_OK) { 3864 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n", 3865 (void *)unalignedBase, numBytes, flags, strerror(status)); 3866 goto out; 3867 } 3868 3869 map->ops->lock(map); 3870 status = map->ops->query(map, base, &physicalAddress, &protection); 3871 map->ops->unlock(map); 3872 3873 if (status < B_OK) 3874 goto out; 3875 3876 // update wiring 3877 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3878 if (page == NULL) 3879 panic("couldn't lookup physical page"); 3880 3881 page->wired_count++; 3882 // TODO: needs to be atomic on all platforms! 3883 } 3884 3885 out: 3886 vm_put_address_space(addressSpace); 3887 return status; 3888 } 3889 3890 3891 long 3892 unlock_memory(void *address, ulong numBytes, ulong flags) 3893 { 3894 vm_address_space *addressSpace = NULL; 3895 struct vm_translation_map *map; 3896 addr_t unalignedBase = (addr_t)address; 3897 addr_t end = unalignedBase + numBytes; 3898 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 3899 bool needsLocking = true; 3900 3901 if (IS_USER_ADDRESS(address)) 3902 addressSpace = vm_get_current_user_address_space(); 3903 else 3904 addressSpace = vm_get_kernel_address_space(); 3905 if (addressSpace == NULL) 3906 return B_ERROR; 3907 3908 map = &addressSpace->translation_map; 3909 3910 status_t status = test_lock_memory(addressSpace, base, needsLocking); 3911 if (status < B_OK) 3912 goto out; 3913 if (!needsLocking) 3914 goto out; 3915 3916 for (; base < end; base += B_PAGE_SIZE) { 3917 map->ops->lock(map); 3918 3919 addr_t physicalAddress; 3920 uint32 protection; 3921 status = map->ops->query(map, base, &physicalAddress, 3922 &protection); 3923 3924 map->ops->unlock(map); 3925 3926 if (status < B_OK) 3927 goto out; 3928 if ((protection & PAGE_PRESENT) == 0) 3929 panic("calling unlock_memory() on unmapped memory!"); 3930 3931 // update wiring 3932 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3933 if (page == NULL) 3934 panic("couldn't lookup physical page"); 3935 3936 page->wired_count--; 3937 // TODO: needs to be atomic on all platforms! 3938 } 3939 3940 out: 3941 vm_put_address_space(addressSpace); 3942 return status; 3943 } 3944 3945 3946 /** According to the BeBook, this function should always succeed. 3947 * This is no longer the case. 3948 */ 3949 3950 long 3951 get_memory_map(const void *address, ulong numBytes, physical_entry *table, long numEntries) 3952 { 3953 vm_address_space *addressSpace; 3954 addr_t virtualAddress = (addr_t)address; 3955 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 3956 addr_t physicalAddress; 3957 status_t status = B_OK; 3958 int32 index = -1; 3959 addr_t offset = 0; 3960 bool interrupts = are_interrupts_enabled(); 3961 3962 TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes, numEntries)); 3963 3964 if (numEntries == 0 || numBytes == 0) 3965 return B_BAD_VALUE; 3966 3967 // in which address space is the address to be found? 3968 if (IS_USER_ADDRESS(virtualAddress)) 3969 addressSpace = vm_get_current_user_address_space(); 3970 else 3971 addressSpace = vm_get_kernel_address_space(); 3972 3973 if (addressSpace == NULL) 3974 return B_ERROR; 3975 3976 vm_translation_map *map = &addressSpace->translation_map; 3977 3978 if (interrupts) 3979 map->ops->lock(map); 3980 3981 while (offset < numBytes) { 3982 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 3983 uint32 flags; 3984 3985 if (interrupts) { 3986 status = map->ops->query(map, (addr_t)address + offset, 3987 &physicalAddress, &flags); 3988 } else { 3989 status = map->ops->query_interrupt(map, (addr_t)address + offset, 3990 &physicalAddress, &flags); 3991 } 3992 if (status < B_OK) 3993 break; 3994 if ((flags & PAGE_PRESENT) == 0) { 3995 panic("get_memory_map() called on unmapped memory!"); 3996 return B_BAD_ADDRESS; 3997 } 3998 3999 if (index < 0 && pageOffset > 0) { 4000 physicalAddress += pageOffset; 4001 if (bytes > B_PAGE_SIZE - pageOffset) 4002 bytes = B_PAGE_SIZE - pageOffset; 4003 } 4004 4005 // need to switch to the next physical_entry? 4006 if (index < 0 || (addr_t)table[index].address 4007 != physicalAddress - table[index].size) { 4008 if (++index + 1 > numEntries) { 4009 // table to small 4010 status = B_BUFFER_OVERFLOW; 4011 break; 4012 } 4013 table[index].address = (void *)physicalAddress; 4014 table[index].size = bytes; 4015 } else { 4016 // page does fit in current entry 4017 table[index].size += bytes; 4018 } 4019 4020 offset += bytes; 4021 } 4022 4023 if (interrupts) 4024 map->ops->unlock(map); 4025 4026 // close the entry list 4027 4028 if (status == B_OK) { 4029 // if it's only one entry, we will silently accept the missing ending 4030 if (numEntries == 1) 4031 return B_OK; 4032 4033 if (++index + 1 > numEntries) 4034 return B_BUFFER_OVERFLOW; 4035 4036 table[index].address = NULL; 4037 table[index].size = 0; 4038 } 4039 4040 return status; 4041 } 4042 4043 4044 area_id 4045 area_for(void *address) 4046 { 4047 return vm_area_for(vm_kernel_address_space_id(), (addr_t)address); 4048 } 4049 4050 4051 area_id 4052 find_area(const char *name) 4053 { 4054 acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0); 4055 struct hash_iterator iterator; 4056 hash_open(sAreaHash, &iterator); 4057 4058 vm_area *area; 4059 area_id id = B_NAME_NOT_FOUND; 4060 while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) { 4061 if (area->id == RESERVED_AREA_ID) 4062 continue; 4063 4064 if (!strcmp(area->name, name)) { 4065 id = area->id; 4066 break; 4067 } 4068 } 4069 4070 hash_close(sAreaHash, &iterator, false); 4071 release_sem_etc(sAreaHashLock, READ_COUNT, 0); 4072 4073 return id; 4074 } 4075 4076 4077 status_t 4078 _get_area_info(area_id id, area_info *info, size_t size) 4079 { 4080 if (size != sizeof(area_info) || info == NULL) 4081 return B_BAD_VALUE; 4082 4083 vm_area *area = vm_get_area(id); 4084 if (area == NULL) 4085 return B_BAD_VALUE; 4086 4087 fill_area_info(area, info, size); 4088 vm_put_area(area); 4089 4090 return B_OK; 4091 } 4092 4093 4094 status_t 4095 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size) 4096 { 4097 addr_t nextBase = *(addr_t *)cookie; 4098 4099 // we're already through the list 4100 if (nextBase == (addr_t)-1) 4101 return B_ENTRY_NOT_FOUND; 4102 4103 if (team == B_CURRENT_TEAM) 4104 team = team_get_current_team_id(); 4105 4106 vm_address_space *addressSpace; 4107 if (!team_is_valid(team) 4108 || team_get_address_space(team, &addressSpace) != B_OK) 4109 return B_BAD_VALUE; 4110 4111 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 4112 4113 vm_area *area; 4114 for (area = addressSpace->areas; area; area = area->address_space_next) { 4115 if (area->id == RESERVED_AREA_ID) 4116 continue; 4117 4118 if (area->base > nextBase) 4119 break; 4120 } 4121 4122 // make sure this area won't go away 4123 if (area != NULL) 4124 area = vm_get_area(area->id); 4125 4126 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 4127 vm_put_address_space(addressSpace); 4128 4129 if (area == NULL) { 4130 nextBase = (addr_t)-1; 4131 return B_ENTRY_NOT_FOUND; 4132 } 4133 4134 fill_area_info(area, info, size); 4135 *cookie = (int32)(area->base); 4136 4137 vm_put_area(area); 4138 4139 return B_OK; 4140 } 4141 4142 4143 status_t 4144 set_area_protection(area_id area, uint32 newProtection) 4145 { 4146 fix_protection(&newProtection); 4147 4148 return vm_set_area_protection(vm_kernel_address_space_id(), area, newProtection); 4149 } 4150 4151 4152 status_t 4153 resize_area(area_id areaID, size_t newSize) 4154 { 4155 vm_area *current; 4156 4157 // is newSize a multiple of B_PAGE_SIZE? 4158 if (newSize & (B_PAGE_SIZE - 1)) 4159 return B_BAD_VALUE; 4160 4161 vm_area *area = vm_get_area(areaID); 4162 if (area == NULL) 4163 return B_BAD_VALUE; 4164 4165 vm_cache_ref *cacheRef = area->cache_ref; 4166 mutex_lock(&cacheRef->lock); 4167 4168 // Resize all areas of this area's cache 4169 4170 size_t oldSize = area->size; 4171 status_t status = B_OK; 4172 4173 // ToDo: we should only allow to resize anonymous memory areas! 4174 if (!cacheRef->cache->temporary) { 4175 status = B_NOT_ALLOWED; 4176 goto out; 4177 } 4178 4179 // ToDo: we must lock all address spaces here! 4180 if (oldSize < newSize) { 4181 // We need to check if all areas of this cache can be resized 4182 4183 for (current = cacheRef->areas; current; current = current->cache_next) { 4184 if (current->address_space_next 4185 && current->address_space_next->base <= (current->base 4186 + newSize)) { 4187 // if the area was created inside a reserved area, it can also be 4188 // resized in that area 4189 // ToDo: if there is free space after the reserved area, it could be used as well... 4190 vm_area *next = current->address_space_next; 4191 if (next->id == RESERVED_AREA_ID 4192 && next->cache_offset <= current->base 4193 && next->base - 1 + next->size >= current->base - 1 + newSize) 4194 continue; 4195 4196 status = B_ERROR; 4197 goto out; 4198 } 4199 } 4200 } 4201 4202 // Okay, looks good so far, so let's do it 4203 4204 for (current = cacheRef->areas; current; current = current->cache_next) { 4205 if (current->address_space_next 4206 && current->address_space_next->base <= (current->base + newSize)) { 4207 vm_area *next = current->address_space_next; 4208 if (next->id == RESERVED_AREA_ID 4209 && next->cache_offset <= current->base 4210 && next->base - 1 + next->size >= current->base - 1 + newSize) { 4211 // resize reserved area 4212 addr_t offset = current->base + newSize - next->base; 4213 if (next->size <= offset) { 4214 current->address_space_next = next->address_space_next; 4215 free(next); 4216 } else { 4217 next->size -= offset; 4218 next->base += offset; 4219 } 4220 } else { 4221 status = B_ERROR; 4222 break; 4223 } 4224 } 4225 4226 current->size = newSize; 4227 4228 // we also need to unmap all pages beyond the new size, if the area has shrinked 4229 if (newSize < oldSize) 4230 vm_unmap_pages(current, current->base + newSize, oldSize - newSize); 4231 } 4232 4233 if (status == B_OK) 4234 status = vm_cache_resize(cacheRef, newSize); 4235 4236 if (status < B_OK) { 4237 // This shouldn't really be possible, but hey, who knows 4238 for (current = cacheRef->areas; current; current = current->cache_next) 4239 current->size = oldSize; 4240 } 4241 4242 out: 4243 mutex_unlock(&cacheRef->lock); 4244 vm_put_area(area); 4245 4246 // ToDo: we must honour the lock restrictions of this area 4247 return status; 4248 } 4249 4250 4251 /** Transfers the specified area to a new team. The caller must be the owner 4252 * of the area (not yet enforced but probably should be). 4253 * This function is currently not exported to the kernel namespace, but is 4254 * only accessible using the _kern_transfer_area() syscall. 4255 */ 4256 4257 static status_t 4258 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target) 4259 { 4260 vm_address_space *sourceAddressSpace; 4261 vm_address_space *targetAddressSpace; 4262 void *reservedAddress = NULL; 4263 vm_area *reserved; 4264 vm_area *area = vm_get_area(id); 4265 if (area == NULL) 4266 return B_BAD_VALUE; 4267 4268 // ToDo: check if the current team owns the area 4269 status_t status = team_get_address_space(target, &targetAddressSpace); 4270 if (status != B_OK) 4271 goto err1; 4272 4273 // We will first remove the area, and then reserve its former 4274 // address range so that we can later reclaim it if the 4275 // transfer failed. 4276 4277 sourceAddressSpace = area->address_space; 4278 reserved = create_reserved_area_struct(sourceAddressSpace, 0); 4279 if (reserved == NULL) { 4280 status = B_NO_MEMORY; 4281 goto err2; 4282 } 4283 4284 acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0); 4285 4286 // unmap the area in the source address space 4287 vm_unmap_pages(area, area->base, area->size); 4288 4289 // TODO: there might be additional page faults at this point! 4290 4291 reservedAddress = (void *)area->base; 4292 remove_area_from_address_space(sourceAddressSpace, area, true); 4293 status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, 4294 area->size, reserved); 4295 // famous last words: this cannot fail :) 4296 4297 release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0); 4298 4299 if (status != B_OK) 4300 goto err3; 4301 4302 // insert the area into the target address space 4303 4304 acquire_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0, 0); 4305 // check to see if this address space has entered DELETE state 4306 if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) { 4307 // okay, someone is trying to delete this adress space now, so we can't 4308 // insert the area, so back out 4309 status = B_BAD_TEAM_ID; 4310 goto err4; 4311 } 4312 4313 status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area); 4314 if (status < B_OK) 4315 goto err4; 4316 4317 // The area was successfully transferred to the new team when we got here 4318 area->address_space = targetAddressSpace; 4319 4320 // TODO: take area lock/wiring into account! 4321 4322 release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0); 4323 4324 vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress, area->size); 4325 vm_put_address_space(sourceAddressSpace); 4326 // we keep the reference of the target address space for the 4327 // area, so we only have to put the one from the source 4328 vm_put_area(area); 4329 4330 return B_OK; 4331 4332 err4: 4333 release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0); 4334 err3: 4335 // insert the area again into the source address space 4336 acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0); 4337 // check to see if this address space has entered DELETE state 4338 if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION 4339 || insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, area->size, area) != B_OK) { 4340 // We can't insert the area anymore - we have to delete it manually 4341 vm_cache_remove_area(area->cache_ref, area); 4342 vm_cache_release_ref(area->cache_ref); 4343 free(area->name); 4344 free(area); 4345 area = NULL; 4346 } 4347 release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0); 4348 err2: 4349 vm_put_address_space(targetAddressSpace); 4350 err1: 4351 if (area != NULL) 4352 vm_put_area(area); 4353 return status; 4354 } 4355 4356 4357 area_id 4358 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes, 4359 uint32 addressSpec, uint32 protection, void **_virtualAddress) 4360 { 4361 if (!arch_vm_supports_protection(protection)) 4362 return B_NOT_SUPPORTED; 4363 4364 fix_protection(&protection); 4365 4366 return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress, 4367 addressSpec, numBytes, protection, (addr_t)physicalAddress); 4368 } 4369 4370 4371 area_id 4372 clone_area(const char *name, void **_address, uint32 addressSpec, uint32 protection, 4373 area_id source) 4374 { 4375 if ((protection & B_KERNEL_PROTECTION) == 0) 4376 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4377 4378 return vm_clone_area(vm_kernel_address_space_id(), name, _address, addressSpec, 4379 protection, REGION_NO_PRIVATE_MAP, source); 4380 } 4381 4382 4383 area_id 4384 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec, 4385 uint32 size, uint32 lock, uint32 protection) 4386 { 4387 fix_protection(&protection); 4388 4389 return vm_create_anonymous_area(team->id, (char *)name, address, 4390 addressSpec, size, lock, protection); 4391 } 4392 4393 4394 area_id 4395 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock, 4396 uint32 protection) 4397 { 4398 fix_protection(&protection); 4399 4400 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address, 4401 addressSpec, size, lock, protection); 4402 } 4403 4404 4405 status_t 4406 delete_area_etc(struct team *team, area_id area) 4407 { 4408 return vm_delete_area(team->id, area); 4409 } 4410 4411 4412 status_t 4413 delete_area(area_id area) 4414 { 4415 return vm_delete_area(vm_kernel_address_space_id(), area); 4416 } 4417 4418 4419 // #pragma mark - Userland syscalls 4420 4421 4422 status_t 4423 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size) 4424 { 4425 // filter out some unavailable values (for userland) 4426 switch (addressSpec) { 4427 case B_ANY_KERNEL_ADDRESS: 4428 case B_ANY_KERNEL_BLOCK_ADDRESS: 4429 return B_BAD_VALUE; 4430 } 4431 4432 addr_t address; 4433 4434 if (!IS_USER_ADDRESS(userAddress) 4435 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4436 return B_BAD_ADDRESS; 4437 4438 status_t status = vm_reserve_address_range(vm_current_user_address_space_id(), 4439 (void **)&address, addressSpec, size, RESERVED_AVOID_BASE); 4440 if (status < B_OK) 4441 return status; 4442 4443 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 4444 vm_unreserve_address_range(vm_current_user_address_space_id(), 4445 (void *)address, size); 4446 return B_BAD_ADDRESS; 4447 } 4448 4449 return B_OK; 4450 } 4451 4452 4453 area_id 4454 _user_area_for(void *address) 4455 { 4456 return vm_area_for(vm_current_user_address_space_id(), (addr_t)address); 4457 } 4458 4459 4460 area_id 4461 _user_find_area(const char *userName) 4462 { 4463 char name[B_OS_NAME_LENGTH]; 4464 4465 if (!IS_USER_ADDRESS(userName) 4466 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 4467 return B_BAD_ADDRESS; 4468 4469 return find_area(name); 4470 } 4471 4472 4473 status_t 4474 _user_get_area_info(area_id area, area_info *userInfo) 4475 { 4476 if (!IS_USER_ADDRESS(userInfo)) 4477 return B_BAD_ADDRESS; 4478 4479 area_info info; 4480 status_t status = get_area_info(area, &info); 4481 if (status < B_OK) 4482 return status; 4483 4484 // TODO: do we want to prevent userland from seeing kernel protections? 4485 //info.protection &= B_USER_PROTECTION; 4486 4487 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 4488 return B_BAD_ADDRESS; 4489 4490 return status; 4491 } 4492 4493 4494 status_t 4495 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo) 4496 { 4497 int32 cookie; 4498 4499 if (!IS_USER_ADDRESS(userCookie) 4500 || !IS_USER_ADDRESS(userInfo) 4501 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 4502 return B_BAD_ADDRESS; 4503 4504 area_info info; 4505 status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info)); 4506 if (status != B_OK) 4507 return status; 4508 4509 //info.protection &= B_USER_PROTECTION; 4510 4511 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 4512 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 4513 return B_BAD_ADDRESS; 4514 4515 return status; 4516 } 4517 4518 4519 status_t 4520 _user_set_area_protection(area_id area, uint32 newProtection) 4521 { 4522 if ((newProtection & ~B_USER_PROTECTION) != 0) 4523 return B_BAD_VALUE; 4524 4525 fix_protection(&newProtection); 4526 4527 return vm_set_area_protection(vm_current_user_address_space_id(), area, 4528 newProtection); 4529 } 4530 4531 4532 status_t 4533 _user_resize_area(area_id area, size_t newSize) 4534 { 4535 // ToDo: Since we restrict deleting of areas to those owned by the team, 4536 // we should also do that for resizing (check other functions, too). 4537 return resize_area(area, newSize); 4538 } 4539 4540 4541 status_t 4542 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target) 4543 { 4544 // filter out some unavailable values (for userland) 4545 switch (addressSpec) { 4546 case B_ANY_KERNEL_ADDRESS: 4547 case B_ANY_KERNEL_BLOCK_ADDRESS: 4548 return B_BAD_VALUE; 4549 } 4550 4551 void *address; 4552 if (!IS_USER_ADDRESS(userAddress) 4553 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4554 return B_BAD_ADDRESS; 4555 4556 status_t status = transfer_area(area, &address, addressSpec, target); 4557 if (status < B_OK) 4558 return status; 4559 4560 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 4561 return B_BAD_ADDRESS; 4562 4563 return status; 4564 } 4565 4566 4567 area_id 4568 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec, 4569 uint32 protection, area_id sourceArea) 4570 { 4571 char name[B_OS_NAME_LENGTH]; 4572 void *address; 4573 4574 // filter out some unavailable values (for userland) 4575 switch (addressSpec) { 4576 case B_ANY_KERNEL_ADDRESS: 4577 case B_ANY_KERNEL_BLOCK_ADDRESS: 4578 return B_BAD_VALUE; 4579 } 4580 if ((protection & ~B_USER_PROTECTION) != 0) 4581 return B_BAD_VALUE; 4582 4583 if (!IS_USER_ADDRESS(userName) 4584 || !IS_USER_ADDRESS(userAddress) 4585 || user_strlcpy(name, userName, sizeof(name)) < B_OK 4586 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4587 return B_BAD_ADDRESS; 4588 4589 fix_protection(&protection); 4590 4591 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address, 4592 addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea); 4593 if (clonedArea < B_OK) 4594 return clonedArea; 4595 4596 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 4597 delete_area(clonedArea); 4598 return B_BAD_ADDRESS; 4599 } 4600 4601 return clonedArea; 4602 } 4603 4604 4605 area_id 4606 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec, 4607 size_t size, uint32 lock, uint32 protection) 4608 { 4609 char name[B_OS_NAME_LENGTH]; 4610 void *address; 4611 4612 // filter out some unavailable values (for userland) 4613 switch (addressSpec) { 4614 case B_ANY_KERNEL_ADDRESS: 4615 case B_ANY_KERNEL_BLOCK_ADDRESS: 4616 return B_BAD_VALUE; 4617 } 4618 if ((protection & ~B_USER_PROTECTION) != 0) 4619 return B_BAD_VALUE; 4620 4621 if (!IS_USER_ADDRESS(userName) 4622 || !IS_USER_ADDRESS(userAddress) 4623 || user_strlcpy(name, userName, sizeof(name)) < B_OK 4624 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4625 return B_BAD_ADDRESS; 4626 4627 if (addressSpec == B_EXACT_ADDRESS 4628 && IS_KERNEL_ADDRESS(address)) 4629 return B_BAD_VALUE; 4630 4631 fix_protection(&protection); 4632 4633 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 4634 (char *)name, &address, addressSpec, size, lock, protection); 4635 4636 if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 4637 delete_area(area); 4638 return B_BAD_ADDRESS; 4639 } 4640 4641 return area; 4642 } 4643 4644 4645 status_t 4646 _user_delete_area(area_id area) 4647 { 4648 // Unlike the BeOS implementation, you can now only delete areas 4649 // that you have created yourself from userland. 4650 // The documentation to delete_area() explicetly states that this 4651 // will be restricted in the future, and so it will. 4652 return vm_delete_area(vm_current_user_address_space_id(), area); 4653 } 4654 4655