1 /* 2 * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 * 5 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 6 * Distributed under the terms of the NewOS License. 7 */ 8 9 10 #include "vm_store_anonymous_noswap.h" 11 #include "vm_store_device.h" 12 #include "vm_store_null.h" 13 14 #include <OS.h> 15 #include <KernelExport.h> 16 17 #include <vm.h> 18 #include <vm_address_space.h> 19 #include <vm_priv.h> 20 #include <vm_page.h> 21 #include <vm_cache.h> 22 #include <vm_low_memory.h> 23 #include <file_cache.h> 24 #include <memheap.h> 25 #include <debug.h> 26 #include <console.h> 27 #include <int.h> 28 #include <smp.h> 29 #include <lock.h> 30 #include <thread.h> 31 #include <team.h> 32 33 #include <boot/stage2.h> 34 #include <boot/elf.h> 35 36 #include <arch/cpu.h> 37 #include <arch/vm.h> 38 39 #include <string.h> 40 #include <ctype.h> 41 #include <stdlib.h> 42 #include <stdio.h> 43 44 //#define TRACE_VM 45 //#define TRACE_FAULTS 46 #ifdef TRACE_VM 47 # define TRACE(x) dprintf x 48 #else 49 # define TRACE(x) ; 50 #endif 51 #ifdef TRACE_FAULTS 52 # define FTRACE(x) dprintf x 53 #else 54 # define FTRACE(x) ; 55 #endif 56 57 #define ROUNDUP(a, b) (((a) + ((b)-1)) & ~((b)-1)) 58 #define ROUNDOWN(a, b) (((a) / (b)) * (b)) 59 60 61 #define REGION_HASH_TABLE_SIZE 1024 62 static area_id sNextAreaID; 63 static hash_table *sAreaHash; 64 static sem_id sAreaHashLock; 65 static spinlock sMappingLock; 66 67 static off_t sAvailableMemory; 68 static benaphore sAvailableMemoryLock; 69 70 // function declarations 71 static status_t vm_soft_fault(addr_t address, bool is_write, bool is_user); 72 static bool vm_put_area(vm_area *area); 73 74 75 static int 76 area_compare(void *_area, const void *key) 77 { 78 vm_area *area = (vm_area *)_area; 79 const area_id *id = (const area_id *)key; 80 81 if (area->id == *id) 82 return 0; 83 84 return -1; 85 } 86 87 88 static uint32 89 area_hash(void *_area, const void *key, uint32 range) 90 { 91 vm_area *area = (vm_area *)_area; 92 const area_id *id = (const area_id *)key; 93 94 if (area != NULL) 95 return area->id % range; 96 97 return (uint32)*id % range; 98 } 99 100 101 static vm_area * 102 vm_get_area(area_id id) 103 { 104 acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0); 105 106 vm_area *area = (vm_area *)hash_lookup(sAreaHash, &id); 107 if (area != NULL) 108 atomic_add(&area->ref_count, 1); 109 110 release_sem_etc(sAreaHashLock, READ_COUNT, 0); 111 112 return area; 113 } 114 115 116 static vm_area * 117 create_reserved_area_struct(vm_address_space *addressSpace, uint32 flags) 118 { 119 vm_area *reserved = (vm_area *)malloc(sizeof(vm_area)); 120 if (reserved == NULL) 121 return NULL; 122 123 memset(reserved, 0, sizeof(vm_area)); 124 reserved->id = RESERVED_AREA_ID; 125 // this marks it as reserved space 126 reserved->protection = flags; 127 reserved->address_space = addressSpace; 128 129 return reserved; 130 } 131 132 133 static vm_area * 134 create_area_struct(vm_address_space *addressSpace, const char *name, 135 uint32 wiring, uint32 protection) 136 { 137 // restrict the area name to B_OS_NAME_LENGTH 138 size_t length = strlen(name) + 1; 139 if (length > B_OS_NAME_LENGTH) 140 length = B_OS_NAME_LENGTH; 141 142 vm_area *area = (vm_area *)malloc(sizeof(vm_area)); 143 if (area == NULL) 144 return NULL; 145 146 area->name = (char *)malloc(length); 147 if (area->name == NULL) { 148 free(area); 149 return NULL; 150 } 151 strlcpy(area->name, name, length); 152 153 area->id = atomic_add(&sNextAreaID, 1); 154 area->base = 0; 155 area->size = 0; 156 area->protection = protection; 157 area->wiring = wiring; 158 area->memory_type = 0; 159 area->ref_count = 1; 160 161 area->cache_ref = NULL; 162 area->cache_offset = 0; 163 164 area->address_space = addressSpace; 165 area->address_space_next = NULL; 166 area->cache_next = area->cache_prev = NULL; 167 area->hash_next = NULL; 168 new (&area->mappings) vm_area_mappings; 169 170 return area; 171 } 172 173 174 /** Finds a reserved area that covers the region spanned by \a start and 175 * \a size, inserts the \a area into that region and makes sure that 176 * there are reserved regions for the remaining parts. 177 */ 178 179 static status_t 180 find_reserved_area(vm_address_space *addressSpace, addr_t start, 181 addr_t size, vm_area *area) 182 { 183 vm_area *next, *last = NULL; 184 185 next = addressSpace->areas; 186 while (next) { 187 if (next->base <= start && next->base + next->size >= start + size) { 188 // this area covers the requested range 189 if (next->id != RESERVED_AREA_ID) { 190 // but it's not reserved space, it's a real area 191 return B_BAD_VALUE; 192 } 193 194 break; 195 } 196 last = next; 197 next = next->address_space_next; 198 } 199 if (next == NULL) 200 return B_ENTRY_NOT_FOUND; 201 202 // now we have to transfer the requested part of the reserved 203 // range to the new area - and remove, resize or split the old 204 // reserved area. 205 206 if (start == next->base) { 207 // the area starts at the beginning of the reserved range 208 if (last) 209 last->address_space_next = area; 210 else 211 addressSpace->areas = area; 212 213 if (size == next->size) { 214 // the new area fully covers the reversed range 215 area->address_space_next = next->address_space_next; 216 free(next); 217 } else { 218 // resize the reserved range behind the area 219 area->address_space_next = next; 220 next->base += size; 221 next->size -= size; 222 } 223 } else if (start + size == next->base + next->size) { 224 // the area is at the end of the reserved range 225 area->address_space_next = next->address_space_next; 226 next->address_space_next = area; 227 228 // resize the reserved range before the area 229 next->size = start - next->base; 230 } else { 231 // the area splits the reserved range into two separate ones 232 // we need a new reserved area to cover this space 233 vm_area *reserved = create_reserved_area_struct(addressSpace, 234 next->protection); 235 if (reserved == NULL) 236 return B_NO_MEMORY; 237 238 reserved->address_space_next = next->address_space_next; 239 area->address_space_next = reserved; 240 next->address_space_next = area; 241 242 // resize regions 243 reserved->size = next->base + next->size - start - size; 244 next->size = start - next->base; 245 reserved->base = start + size; 246 reserved->cache_offset = next->cache_offset; 247 } 248 249 area->base = start; 250 area->size = size; 251 addressSpace->change_count++; 252 253 return B_OK; 254 } 255 256 257 /*! Must be called with this address space's sem held */ 258 static status_t 259 find_and_insert_area_slot(vm_address_space *addressSpace, addr_t start, 260 addr_t size, addr_t end, uint32 addressSpec, vm_area *area) 261 { 262 vm_area *last = NULL; 263 vm_area *next; 264 bool foundSpot = false; 265 266 TRACE(("find_and_insert_area_slot: address space %p, start 0x%lx, " 267 "size %ld, end 0x%lx, addressSpec %ld, area %p\n", addressSpace, start, 268 size, end, addressSpec, area)); 269 270 // do some sanity checking 271 if (start < addressSpace->base || size == 0 272 || (end - 1) > (addressSpace->base + (addressSpace->size - 1)) 273 || start + size > end) 274 return B_BAD_ADDRESS; 275 276 if (addressSpec == B_EXACT_ADDRESS) { 277 // search for a reserved area 278 status_t status = find_reserved_area(addressSpace, start, size, area); 279 if (status == B_OK || status == B_BAD_VALUE) 280 return status; 281 282 // there was no reserved area, and the slot doesn't seem to be used already 283 // ToDo: this could be further optimized. 284 } 285 286 // walk up to the spot where we should start searching 287 second_chance: 288 next = addressSpace->areas; 289 while (next) { 290 if (next->base >= start + size) { 291 // we have a winner 292 break; 293 } 294 last = next; 295 next = next->address_space_next; 296 } 297 298 // find the right spot depending on the address specification - the area 299 // will be inserted directly after "last" ("next" is not referenced anymore) 300 301 switch (addressSpec) { 302 case B_ANY_ADDRESS: 303 case B_ANY_KERNEL_ADDRESS: 304 case B_ANY_KERNEL_BLOCK_ADDRESS: 305 // find a hole big enough for a new area 306 if (!last) { 307 // see if we can build it at the beginning of the virtual map 308 if (!next || (next->base >= addressSpace->base + size)) { 309 foundSpot = true; 310 area->base = addressSpace->base; 311 break; 312 } 313 last = next; 314 next = next->address_space_next; 315 } 316 // keep walking 317 while (next) { 318 if (next->base >= last->base + last->size + size) { 319 // we found a spot (it'll be filled up below) 320 break; 321 } 322 last = next; 323 next = next->address_space_next; 324 } 325 326 if ((addressSpace->base + (addressSpace->size - 1)) 327 >= (last->base + last->size + (size - 1))) { 328 // got a spot 329 foundSpot = true; 330 area->base = last->base + last->size; 331 break; 332 } else { 333 // we didn't find a free spot - if there were any reserved areas with 334 // the RESERVED_AVOID_BASE flag set, we can now test those for free 335 // space 336 // ToDo: it would make sense to start with the biggest of them 337 next = addressSpace->areas; 338 last = NULL; 339 for (last = NULL; next; next = next->address_space_next, last = next) { 340 // ToDo: take free space after the reserved area into account! 341 if (next->size == size) { 342 // the reserved area is entirely covered, and thus, removed 343 if (last) 344 last->address_space_next = next->address_space_next; 345 else 346 addressSpace->areas = next->address_space_next; 347 348 foundSpot = true; 349 area->base = next->base; 350 free(next); 351 break; 352 } 353 if (next->size >= size) { 354 // the new area will be placed at the end of the reserved 355 // area, and the reserved area will be resized to make space 356 foundSpot = true; 357 next->size -= size; 358 last = next; 359 area->base = next->base + next->size; 360 break; 361 } 362 } 363 } 364 break; 365 366 case B_BASE_ADDRESS: 367 // find a hole big enough for a new area beginning with "start" 368 if (!last) { 369 // see if we can build it at the beginning of the specified start 370 if (!next || (next->base >= start + size)) { 371 foundSpot = true; 372 area->base = start; 373 break; 374 } 375 last = next; 376 next = next->address_space_next; 377 } 378 // keep walking 379 while (next) { 380 if (next->base >= last->base + last->size + size) { 381 // we found a spot (it'll be filled up below) 382 break; 383 } 384 last = next; 385 next = next->address_space_next; 386 } 387 388 if ((addressSpace->base + (addressSpace->size - 1)) 389 >= (last->base + last->size + (size - 1))) { 390 // got a spot 391 foundSpot = true; 392 if (last->base + last->size <= start) 393 area->base = start; 394 else 395 area->base = last->base + last->size; 396 break; 397 } 398 // we didn't find a free spot in the requested range, so we'll 399 // try again without any restrictions 400 start = addressSpace->base; 401 addressSpec = B_ANY_ADDRESS; 402 last = NULL; 403 goto second_chance; 404 405 case B_EXACT_ADDRESS: 406 // see if we can create it exactly here 407 if (!last) { 408 if (!next || (next->base >= start + size)) { 409 foundSpot = true; 410 area->base = start; 411 break; 412 } 413 } else { 414 if (next) { 415 if (last->base + last->size <= start && next->base >= start + size) { 416 foundSpot = true; 417 area->base = start; 418 break; 419 } 420 } else { 421 if ((last->base + (last->size - 1)) <= start - 1) { 422 foundSpot = true; 423 area->base = start; 424 } 425 } 426 } 427 break; 428 default: 429 return B_BAD_VALUE; 430 } 431 432 if (!foundSpot) 433 return addressSpec == B_EXACT_ADDRESS ? B_BAD_VALUE : B_NO_MEMORY; 434 435 area->size = size; 436 if (last) { 437 area->address_space_next = last->address_space_next; 438 last->address_space_next = area; 439 } else { 440 area->address_space_next = addressSpace->areas; 441 addressSpace->areas = area; 442 } 443 addressSpace->change_count++; 444 return B_OK; 445 } 446 447 448 /** This inserts the area you pass into the specified address space. 449 * It will also set the "_address" argument to its base address when 450 * the call succeeds. 451 * You need to hold the vm_address_space semaphore. 452 */ 453 454 static status_t 455 insert_area(vm_address_space *addressSpace, void **_address, 456 uint32 addressSpec, addr_t size, vm_area *area) 457 { 458 addr_t searchBase, searchEnd; 459 status_t status; 460 461 switch (addressSpec) { 462 case B_EXACT_ADDRESS: 463 searchBase = (addr_t)*_address; 464 searchEnd = (addr_t)*_address + size; 465 break; 466 467 case B_BASE_ADDRESS: 468 searchBase = (addr_t)*_address; 469 searchEnd = addressSpace->base + (addressSpace->size - 1); 470 break; 471 472 case B_ANY_ADDRESS: 473 case B_ANY_KERNEL_ADDRESS: 474 case B_ANY_KERNEL_BLOCK_ADDRESS: 475 searchBase = addressSpace->base; 476 searchEnd = addressSpace->base + (addressSpace->size - 1); 477 break; 478 479 default: 480 return B_BAD_VALUE; 481 } 482 483 status = find_and_insert_area_slot(addressSpace, searchBase, size, 484 searchEnd, addressSpec, area); 485 if (status == B_OK) { 486 // ToDo: do we have to do anything about B_ANY_KERNEL_ADDRESS 487 // vs. B_ANY_KERNEL_BLOCK_ADDRESS here? 488 *_address = (void *)area->base; 489 } 490 491 return status; 492 } 493 494 495 static status_t 496 map_backing_store(vm_address_space *addressSpace, vm_cache_ref *cacheRef, 497 void **_virtualAddress, off_t offset, addr_t size, uint32 addressSpec, 498 int wiring, int protection, int mapping, vm_area **_area, const char *areaName) 499 { 500 TRACE(("map_backing_store: aspace %p, cacheref %p, *vaddr %p, offset 0x%Lx, size %lu, addressSpec %ld, wiring %d, protection %d, _area %p, area_name '%s'\n", 501 addressSpace, cacheRef, *_virtualAddress, offset, size, addressSpec, 502 wiring, protection, _area, areaName)); 503 504 vm_area *area = create_area_struct(addressSpace, areaName, wiring, protection); 505 if (area == NULL) 506 return B_NO_MEMORY; 507 508 mutex_lock(&cacheRef->lock); 509 510 vm_cache *cache = cacheRef->cache; 511 vm_store *store = cache->store; 512 bool unlock = true; 513 status_t status; 514 515 // if this is a private map, we need to create a new cache & store object 516 // pair to handle the private copies of pages as they are written to 517 if (mapping == REGION_PRIVATE_MAP) { 518 vm_cache_ref *newCacheRef; 519 vm_cache *newCache; 520 vm_store *newStore; 521 522 // create an anonymous store object 523 newStore = vm_store_create_anonymous_noswap((protection & B_STACK_AREA) != 0, 524 0, USER_STACK_GUARD_PAGES); 525 if (newStore == NULL) { 526 status = B_NO_MEMORY; 527 goto err1; 528 } 529 newCache = vm_cache_create(newStore); 530 if (newCache == NULL) { 531 status = B_NO_MEMORY; 532 newStore->ops->destroy(newStore); 533 goto err1; 534 } 535 status = vm_cache_ref_create(newCache); 536 if (status < B_OK) { 537 newStore->ops->destroy(newStore); 538 free(newCache); 539 goto err1; 540 } 541 542 newCacheRef = newCache->ref; 543 newCache->type = CACHE_TYPE_RAM; 544 newCache->temporary = 1; 545 newCache->scan_skip = cache->scan_skip; 546 547 vm_cache_add_consumer_locked(cacheRef, newCache); 548 549 mutex_unlock(&cacheRef->lock); 550 mutex_lock(&newCacheRef->lock); 551 552 cache = newCache; 553 cacheRef = newCache->ref; 554 store = newStore; 555 cache->virtual_base = offset; 556 cache->virtual_size = offset + size; 557 } 558 559 status = vm_cache_set_minimal_commitment_locked(cacheRef, offset + size); 560 if (status != B_OK) 561 goto err2; 562 563 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 564 565 // check to see if this address space has entered DELETE state 566 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 567 // okay, someone is trying to delete this address space now, so we can't 568 // insert the area, so back out 569 status = B_BAD_TEAM_ID; 570 goto err3; 571 } 572 573 status = insert_area(addressSpace, _virtualAddress, addressSpec, size, area); 574 if (status < B_OK) 575 goto err3; 576 577 // attach the cache to the area 578 area->cache_ref = cacheRef; 579 area->cache_offset = offset; 580 581 // point the cache back to the area 582 vm_cache_insert_area_locked(cacheRef, area); 583 mutex_unlock(&cacheRef->lock); 584 585 // insert the area in the global area hash table 586 acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0 ,0); 587 hash_insert(sAreaHash, area); 588 release_sem_etc(sAreaHashLock, WRITE_COUNT, 0); 589 590 // grab a ref to the address space (the area holds this) 591 atomic_add(&addressSpace->ref_count, 1); 592 593 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 594 595 *_area = area; 596 return B_OK; 597 598 err3: 599 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 600 err2: 601 if (mapping == REGION_PRIVATE_MAP) { 602 // we created this cache, so we must delete it again 603 mutex_unlock(&cacheRef->lock); 604 vm_cache_release_ref(cacheRef); 605 unlock = false; 606 } 607 err1: 608 if (unlock) 609 mutex_unlock(&cacheRef->lock); 610 free(area->name); 611 free(area); 612 return status; 613 } 614 615 616 status_t 617 vm_unreserve_address_range(team_id team, void *address, addr_t size) 618 { 619 vm_address_space *addressSpace; 620 vm_area *area, *last = NULL; 621 status_t status = B_OK; 622 623 addressSpace = vm_get_address_space_by_id(team); 624 if (addressSpace == NULL) 625 return B_BAD_TEAM_ID; 626 627 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 628 629 // check to see if this address space has entered DELETE state 630 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 631 // okay, someone is trying to delete this address space now, so we can't 632 // insert the area, so back out 633 status = B_BAD_TEAM_ID; 634 goto out; 635 } 636 637 // search area list and remove any matching reserved ranges 638 639 area = addressSpace->areas; 640 while (area) { 641 // the area must be completely part of the reserved range 642 if (area->id == RESERVED_AREA_ID && area->base >= (addr_t)address 643 && area->base + area->size <= (addr_t)address + size) { 644 // remove reserved range 645 vm_area *reserved = area; 646 if (last) 647 last->address_space_next = reserved->address_space_next; 648 else 649 addressSpace->areas = reserved->address_space_next; 650 651 area = reserved->address_space_next; 652 free(reserved); 653 continue; 654 } 655 656 last = area; 657 area = area->address_space_next; 658 } 659 660 out: 661 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 662 vm_put_address_space(addressSpace); 663 return status; 664 } 665 666 667 status_t 668 vm_reserve_address_range(team_id team, void **_address, uint32 addressSpec, 669 addr_t size, uint32 flags) 670 { 671 vm_address_space *addressSpace; 672 vm_area *area; 673 status_t status = B_OK; 674 675 if (size == 0) 676 return B_BAD_VALUE; 677 678 addressSpace = vm_get_address_space_by_id(team); 679 if (addressSpace == NULL) 680 return B_BAD_TEAM_ID; 681 682 area = create_reserved_area_struct(addressSpace, flags); 683 if (area == NULL) { 684 status = B_NO_MEMORY; 685 goto err1; 686 } 687 688 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 689 690 // check to see if this address space has entered DELETE state 691 if (addressSpace->state == VM_ASPACE_STATE_DELETION) { 692 // okay, someone is trying to delete this address space now, so we can't 693 // insert the area, let's back out 694 status = B_BAD_TEAM_ID; 695 goto err2; 696 } 697 698 status = insert_area(addressSpace, _address, addressSpec, size, area); 699 if (status < B_OK) 700 goto err2; 701 702 // the area is now reserved! 703 704 area->cache_offset = area->base; 705 // we cache the original base address here 706 707 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 708 return B_OK; 709 710 err2: 711 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 712 free(area); 713 err1: 714 vm_put_address_space(addressSpace); 715 return status; 716 } 717 718 719 area_id 720 vm_create_anonymous_area(team_id aid, const char *name, void **address, 721 uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection) 722 { 723 vm_cache_ref *cacheRef; 724 vm_area *area; 725 vm_cache *cache; 726 vm_store *store; 727 vm_page *page = NULL; 728 bool isStack = (protection & B_STACK_AREA) != 0; 729 bool canOvercommit = false; 730 status_t status; 731 732 TRACE(("create_anonymous_area %s: size 0x%lx\n", name, size)); 733 734 if (size == 0) 735 return B_BAD_VALUE; 736 if (!arch_vm_supports_protection(protection)) 737 return B_NOT_SUPPORTED; 738 739 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 740 canOvercommit = true; 741 742 #ifdef DEBUG_KERNEL_STACKS 743 if ((protection & B_KERNEL_STACK_AREA) != 0) 744 isStack = true; 745 #endif 746 747 /* check parameters */ 748 switch (addressSpec) { 749 case B_ANY_ADDRESS: 750 case B_EXACT_ADDRESS: 751 case B_BASE_ADDRESS: 752 case B_ANY_KERNEL_ADDRESS: 753 break; 754 755 default: 756 return B_BAD_VALUE; 757 } 758 759 switch (wiring) { 760 case B_NO_LOCK: 761 case B_FULL_LOCK: 762 case B_LAZY_LOCK: 763 case B_CONTIGUOUS: 764 case B_ALREADY_WIRED: 765 break; 766 case B_LOMEM: 767 //case B_SLOWMEM: 768 dprintf("B_LOMEM/SLOWMEM is not yet supported!\n"); 769 wiring = B_FULL_LOCK; 770 break; 771 default: 772 return B_BAD_VALUE; 773 } 774 775 vm_address_space *addressSpace = vm_get_address_space_by_id(aid); 776 if (addressSpace == NULL) 777 return B_BAD_TEAM_ID; 778 779 size = PAGE_ALIGN(size); 780 781 if (wiring == B_CONTIGUOUS) { 782 // we try to allocate the page run here upfront as this may easily 783 // fail for obvious reasons 784 page = vm_page_allocate_page_run(PAGE_STATE_CLEAR, size / B_PAGE_SIZE); 785 if (page == NULL) { 786 vm_put_address_space(addressSpace); 787 return B_NO_MEMORY; 788 } 789 } 790 791 // create an anonymous store object 792 // if it's a stack, make sure that two pages are available at least 793 store = vm_store_create_anonymous_noswap(canOvercommit, isStack ? 2 : 0, 794 isStack ? ((protection & B_USER_PROTECTION) != 0 ? 795 USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0); 796 if (store == NULL) { 797 status = B_NO_MEMORY; 798 goto err1; 799 } 800 cache = vm_cache_create(store); 801 if (cache == NULL) { 802 status = B_NO_MEMORY; 803 goto err2; 804 } 805 status = vm_cache_ref_create(cache); 806 if (status < B_OK) 807 goto err3; 808 809 cache->temporary = 1; 810 cache->type = CACHE_TYPE_RAM; 811 cache->virtual_size = size; 812 813 switch (wiring) { 814 case B_LAZY_LOCK: 815 case B_FULL_LOCK: 816 case B_CONTIGUOUS: 817 case B_ALREADY_WIRED: 818 cache->scan_skip = 1; 819 break; 820 case B_NO_LOCK: 821 cache->scan_skip = 0; 822 break; 823 } 824 825 cacheRef = cache->ref; 826 827 status = map_backing_store(addressSpace, cacheRef, address, 0, size, 828 addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name); 829 if (status < B_OK) { 830 vm_cache_release_ref(cacheRef); 831 goto err1; 832 } 833 834 switch (wiring) { 835 case B_NO_LOCK: 836 case B_LAZY_LOCK: 837 // do nothing - the pages are mapped in as needed 838 break; 839 840 case B_FULL_LOCK: 841 { 842 // Allocate and map all pages for this area 843 mutex_lock(&cacheRef->lock); 844 845 off_t offset = 0; 846 for (addr_t address = area->base; address < area->base + (area->size - 1); 847 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 848 #ifdef DEBUG_KERNEL_STACKS 849 # ifdef STACK_GROWS_DOWNWARDS 850 if (isStack && address < area->base + KERNEL_STACK_GUARD_PAGES 851 * B_PAGE_SIZE) 852 # else 853 if (isStack && address >= area->base + area->size 854 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 855 # endif 856 continue; 857 #endif 858 vm_page *page = vm_page_allocate_page(PAGE_STATE_CLEAR); 859 if (page == NULL) { 860 // this shouldn't really happen, as we reserve the memory upfront 861 panic("couldn't fulfill B_FULL lock!"); 862 } 863 864 vm_cache_insert_page(cacheRef, page, offset); 865 vm_map_page(area, page, address, protection); 866 } 867 868 mutex_unlock(&cacheRef->lock); 869 break; 870 } 871 872 case B_ALREADY_WIRED: 873 { 874 // the pages should already be mapped. This is only really useful during 875 // boot time. Find the appropriate vm_page objects and stick them in 876 // the cache object. 877 vm_translation_map *map = &addressSpace->translation_map; 878 off_t offset = 0; 879 880 if (!kernel_startup) 881 panic("ALREADY_WIRED flag used outside kernel startup\n"); 882 883 mutex_lock(&cacheRef->lock); 884 map->ops->lock(map); 885 886 for (addr_t virtualAddress = area->base; virtualAddress < area->base 887 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 888 offset += B_PAGE_SIZE) { 889 addr_t physicalAddress; 890 uint32 flags; 891 status = map->ops->query(map, virtualAddress, 892 &physicalAddress, &flags); 893 if (status < B_OK) { 894 panic("looking up mapping failed for va 0x%lx\n", 895 virtualAddress); 896 } 897 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 898 if (page == NULL) { 899 panic("looking up page failed for pa 0x%lx\n", 900 physicalAddress); 901 } 902 903 page->wired_count++; 904 // TODO: needs to be atomic on all platforms! 905 vm_page_set_state(page, PAGE_STATE_WIRED); 906 vm_cache_insert_page(cacheRef, page, offset); 907 } 908 909 map->ops->unlock(map); 910 mutex_unlock(&cacheRef->lock); 911 break; 912 } 913 914 case B_CONTIGUOUS: 915 { 916 // We have already allocated our continuous pages run, so we can now just 917 // map them in the address space 918 vm_translation_map *map = &addressSpace->translation_map; 919 addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE; 920 addr_t virtualAddress; 921 off_t offset = 0; 922 923 mutex_lock(&cacheRef->lock); 924 map->ops->lock(map); 925 926 for (virtualAddress = area->base; virtualAddress < area->base 927 + (area->size - 1); virtualAddress += B_PAGE_SIZE, 928 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 929 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 930 if (page == NULL) 931 panic("couldn't lookup physical page just allocated\n"); 932 933 status = map->ops->map(map, virtualAddress, physicalAddress, 934 protection); 935 if (status < B_OK) 936 panic("couldn't map physical page in page run\n"); 937 938 page->wired_count++; 939 // TODO: needs to be atomic on all platforms! 940 vm_page_set_state(page, PAGE_STATE_WIRED); 941 vm_cache_insert_page(cacheRef, page, offset); 942 } 943 944 map->ops->unlock(map); 945 mutex_unlock(&cacheRef->lock); 946 break; 947 } 948 949 default: 950 break; 951 } 952 vm_put_address_space(addressSpace); 953 954 TRACE(("vm_create_anonymous_area: done\n")); 955 956 area->cache_type = CACHE_TYPE_RAM; 957 return area->id; 958 959 err3: 960 free(cache); 961 err2: 962 store->ops->destroy(store); 963 err1: 964 if (wiring == B_CONTIGUOUS) { 965 // we had reserved the area space upfront... 966 addr_t pageNumber = page->physical_page_number; 967 int32 i; 968 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 969 page = vm_lookup_page(pageNumber); 970 if (page == NULL) 971 panic("couldn't lookup physical page just allocated\n"); 972 973 vm_page_set_state(page, PAGE_STATE_FREE); 974 } 975 } 976 977 vm_put_address_space(addressSpace); 978 return status; 979 } 980 981 982 area_id 983 vm_map_physical_memory(team_id aspaceID, const char *name, void **_address, 984 uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress) 985 { 986 vm_cache_ref *cacheRef; 987 vm_area *area; 988 vm_cache *cache; 989 vm_store *store; 990 addr_t mapOffset; 991 status_t status; 992 993 TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, spec = %ld," 994 " size = %lu, protection = %ld, phys = %p)\n", 995 aspaceID, name, _address, addressSpec, size, protection, 996 (void *)physicalAddress)); 997 998 if (!arch_vm_supports_protection(protection)) 999 return B_NOT_SUPPORTED; 1000 1001 vm_address_space *addressSpace = vm_get_address_space_by_id(aspaceID); 1002 if (addressSpace == NULL) 1003 return B_BAD_TEAM_ID; 1004 1005 // if the physical address is somewhat inside a page, 1006 // move the actual area down to align on a page boundary 1007 mapOffset = physicalAddress % B_PAGE_SIZE; 1008 size += mapOffset; 1009 physicalAddress -= mapOffset; 1010 1011 size = PAGE_ALIGN(size); 1012 1013 // create an device store object 1014 1015 store = vm_store_create_device(physicalAddress); 1016 if (store == NULL) { 1017 status = B_NO_MEMORY; 1018 goto err1; 1019 } 1020 cache = vm_cache_create(store); 1021 if (cache == NULL) { 1022 status = B_NO_MEMORY; 1023 goto err2; 1024 } 1025 status = vm_cache_ref_create(cache); 1026 if (status < B_OK) 1027 goto err3; 1028 1029 // tell the page scanner to skip over this area, it's pages are special 1030 cache->scan_skip = 1; 1031 cache->type = CACHE_TYPE_DEVICE; 1032 cache->virtual_size = size; 1033 1034 cacheRef = cache->ref; 1035 1036 status = map_backing_store(addressSpace, cacheRef, _address, 0, size, 1037 addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection, 1038 REGION_NO_PRIVATE_MAP, &area, name); 1039 if (status < B_OK) 1040 vm_cache_release_ref(cacheRef); 1041 1042 if (status >= B_OK && (addressSpec & B_MTR_MASK) != 0) { 1043 // set requested memory type 1044 status = arch_vm_set_memory_type(area, physicalAddress, 1045 addressSpec & B_MTR_MASK); 1046 if (status < B_OK) 1047 vm_put_area(area); 1048 } 1049 1050 if (status >= B_OK) { 1051 // make sure our area is mapped in completely 1052 1053 vm_translation_map *map = &addressSpace->translation_map; 1054 map->ops->lock(map); 1055 1056 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1057 map->ops->map(map, area->base + offset, physicalAddress + offset, 1058 protection); 1059 } 1060 1061 map->ops->unlock(map); 1062 } 1063 1064 vm_put_address_space(addressSpace); 1065 if (status < B_OK) 1066 return status; 1067 1068 // modify the pointer returned to be offset back into the new area 1069 // the same way the physical address in was offset 1070 *_address = (void *)((addr_t)*_address + mapOffset); 1071 1072 area->cache_type = CACHE_TYPE_DEVICE; 1073 return area->id; 1074 1075 err3: 1076 free(cache); 1077 err2: 1078 store->ops->destroy(store); 1079 err1: 1080 vm_put_address_space(addressSpace); 1081 return status; 1082 } 1083 1084 1085 area_id 1086 vm_create_null_area(team_id team, const char *name, void **address, 1087 uint32 addressSpec, addr_t size) 1088 { 1089 vm_area *area; 1090 vm_cache *cache; 1091 vm_cache_ref *cacheRef; 1092 vm_store *store; 1093 status_t status; 1094 1095 vm_address_space *addressSpace = vm_get_address_space_by_id(team); 1096 if (addressSpace == NULL) 1097 return B_BAD_TEAM_ID; 1098 1099 size = PAGE_ALIGN(size); 1100 1101 // create an null store object 1102 1103 store = vm_store_create_null(); 1104 if (store == NULL) { 1105 status = B_NO_MEMORY; 1106 goto err1; 1107 } 1108 cache = vm_cache_create(store); 1109 if (cache == NULL) { 1110 status = B_NO_MEMORY; 1111 goto err2; 1112 } 1113 status = vm_cache_ref_create(cache); 1114 if (status < B_OK) 1115 goto err3; 1116 1117 // tell the page scanner to skip over this area, no pages will be mapped here 1118 cache->scan_skip = 1; 1119 cache->type = CACHE_TYPE_NULL; 1120 cache->virtual_size = size; 1121 1122 cacheRef = cache->ref; 1123 1124 status = map_backing_store(addressSpace, cacheRef, address, 0, size, addressSpec, 0, 1125 B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, &area, name); 1126 1127 vm_put_address_space(addressSpace); 1128 1129 if (status < B_OK) { 1130 vm_cache_release_ref(cacheRef); 1131 return status; 1132 } 1133 1134 area->cache_type = CACHE_TYPE_NULL; 1135 return area->id; 1136 1137 err3: 1138 free(cache); 1139 err2: 1140 store->ops->destroy(store); 1141 err1: 1142 vm_put_address_space(addressSpace); 1143 return status; 1144 } 1145 1146 1147 /** Creates the vnode cache for the specified \a vnode. 1148 * The vnode has to be marked busy when calling this function. 1149 * If successful, it will also acquire an extra reference to 1150 * the vnode (as the vnode store itself can't do this 1151 * automatically). 1152 */ 1153 1154 status_t 1155 vm_create_vnode_cache(void *vnode, struct vm_cache_ref **_cacheRef) 1156 { 1157 status_t status; 1158 1159 // create a vnode store object 1160 vm_store *store = vm_create_vnode_store(vnode); 1161 if (store == NULL) 1162 return B_NO_MEMORY; 1163 1164 vm_cache *cache = vm_cache_create(store); 1165 if (cache == NULL) { 1166 status = B_NO_MEMORY; 1167 goto err1; 1168 } 1169 status = vm_cache_ref_create(cache); 1170 if (status < B_OK) 1171 goto err2; 1172 1173 cache->type = CACHE_TYPE_VNODE; 1174 1175 *_cacheRef = cache->ref; 1176 vfs_acquire_vnode(vnode); 1177 return B_OK; 1178 1179 err2: 1180 free(cache); 1181 err1: 1182 store->ops->destroy(store); 1183 return status; 1184 } 1185 1186 1187 /** Will map the file at the path specified by \a name to an area in memory. 1188 * The file will be mirrored beginning at the specified \a offset. The \a offset 1189 * and \a size arguments have to be page aligned. 1190 */ 1191 1192 static area_id 1193 _vm_map_file(team_id team, const char *name, void **_address, uint32 addressSpec, 1194 size_t size, uint32 protection, uint32 mapping, const char *path, 1195 off_t offset, bool kernel) 1196 { 1197 vm_cache_ref *cacheRef; 1198 vm_area *area; 1199 void *vnode; 1200 status_t status; 1201 1202 // ToDo: maybe attach to an FD, not a path (or both, like VFS calls) 1203 // ToDo: check file access permissions (would be already done if the above were true) 1204 // ToDo: for binary files, we want to make sure that they get the 1205 // copy of a file at a given time, ie. later changes should not 1206 // make it into the mapped copy -- this will need quite some changes 1207 // to be done in a nice way 1208 1209 vm_address_space *addressSpace = vm_get_address_space_by_id(team); 1210 if (addressSpace == NULL) 1211 return B_BAD_TEAM_ID; 1212 1213 TRACE(("_vm_map_file(\"%s\", offset = %Ld, size = %lu, mapping %ld)\n", 1214 path, offset, size, mapping)); 1215 1216 offset = ROUNDOWN(offset, B_PAGE_SIZE); 1217 size = PAGE_ALIGN(size); 1218 1219 // get the vnode for the object, this also grabs a ref to it 1220 status = vfs_get_vnode_from_path(path, kernel, &vnode); 1221 if (status < B_OK) 1222 goto err1; 1223 1224 // ToDo: this only works for file systems that use the file cache 1225 status = vfs_get_vnode_cache(vnode, &cacheRef, false); 1226 1227 vfs_put_vnode(vnode); 1228 // we don't need this vnode anymore - if the above call was 1229 // successful, the store already has a ref to it 1230 1231 if (status < B_OK) 1232 goto err1; 1233 1234 status = map_backing_store(addressSpace, cacheRef, _address, 1235 offset, size, addressSpec, 0, protection, mapping, &area, name); 1236 if (status < B_OK || mapping == REGION_PRIVATE_MAP) { 1237 // map_backing_store() cannot know we no longer need the ref 1238 vm_cache_release_ref(cacheRef); 1239 } 1240 if (status < B_OK) 1241 goto err1; 1242 1243 vm_put_address_space(addressSpace); 1244 area->cache_type = CACHE_TYPE_VNODE; 1245 return area->id; 1246 1247 err1: 1248 vm_put_address_space(addressSpace); 1249 return status; 1250 } 1251 1252 1253 area_id 1254 vm_map_file(team_id aid, const char *name, void **address, uint32 addressSpec, 1255 addr_t size, uint32 protection, uint32 mapping, const char *path, off_t offset) 1256 { 1257 if (!arch_vm_supports_protection(protection)) 1258 return B_NOT_SUPPORTED; 1259 1260 return _vm_map_file(aid, name, address, addressSpec, size, protection, 1261 mapping, path, offset, true); 1262 } 1263 1264 1265 // ToDo: create a BeOS style call for this! 1266 1267 area_id 1268 _user_vm_map_file(const char *userName, void **userAddress, int addressSpec, 1269 addr_t size, int protection, int mapping, const char *userPath, off_t offset) 1270 { 1271 char name[B_OS_NAME_LENGTH]; 1272 char path[B_PATH_NAME_LENGTH]; 1273 void *address; 1274 area_id area; 1275 1276 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 1277 || !IS_USER_ADDRESS(userPath) 1278 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 1279 || user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK 1280 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 1281 return B_BAD_ADDRESS; 1282 1283 // userland created areas can always be accessed by the kernel 1284 protection |= B_KERNEL_READ_AREA | (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0); 1285 1286 area = _vm_map_file(vm_current_user_address_space_id(), name, &address, 1287 addressSpec, size, protection, mapping, path, offset, false); 1288 if (area < B_OK) 1289 return area; 1290 1291 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 1292 return B_BAD_ADDRESS; 1293 1294 return area; 1295 } 1296 1297 1298 area_id 1299 vm_clone_area(team_id team, const char *name, void **address, uint32 addressSpec, 1300 uint32 protection, uint32 mapping, area_id sourceID) 1301 { 1302 vm_area *newArea = NULL; 1303 vm_area *sourceArea; 1304 status_t status; 1305 1306 vm_address_space *addressSpace = vm_get_address_space_by_id(team); 1307 if (addressSpace == NULL) 1308 return B_BAD_TEAM_ID; 1309 1310 sourceArea = vm_get_area(sourceID); 1311 if (sourceArea == NULL) { 1312 vm_put_address_space(addressSpace); 1313 return B_BAD_VALUE; 1314 } 1315 1316 vm_cache_acquire_ref(sourceArea->cache_ref); 1317 1318 // ToDo: for now, B_USER_CLONEABLE is disabled, until all drivers 1319 // have been adapted. Maybe it should be part of the kernel settings, 1320 // anyway (so that old drivers can always work). 1321 #if 0 1322 if (sourceArea->aspace == vm_kernel_address_space() && addressSpace != vm_kernel_address_space() 1323 && !(sourceArea->protection & B_USER_CLONEABLE_AREA)) { 1324 // kernel areas must not be cloned in userland, unless explicitly 1325 // declared user-cloneable upon construction 1326 status = B_NOT_ALLOWED; 1327 } else 1328 #endif 1329 if (sourceArea->cache_type == CACHE_TYPE_NULL) 1330 status = B_NOT_ALLOWED; 1331 else { 1332 status = map_backing_store(addressSpace, sourceArea->cache_ref, 1333 address, sourceArea->cache_offset, sourceArea->size, addressSpec, 1334 sourceArea->wiring, protection, mapping, &newArea, name); 1335 } 1336 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 1337 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 1338 // to create a new ref, and has therefore already acquired a reference 1339 // to the source cache - but otherwise it has no idea that we need 1340 // one. 1341 vm_cache_acquire_ref(sourceArea->cache_ref); 1342 } 1343 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 1344 // we need to map in everything at this point 1345 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 1346 // we don't have actual pages to map but a physical area 1347 vm_translation_map *map = &sourceArea->address_space->translation_map; 1348 map->ops->lock(map); 1349 1350 addr_t physicalAddress; 1351 uint32 oldProtection; 1352 map->ops->query(map, sourceArea->base, &physicalAddress, 1353 &oldProtection); 1354 1355 map->ops->unlock(map); 1356 1357 map = &addressSpace->translation_map; 1358 map->ops->lock(map); 1359 1360 for (addr_t offset = 0; offset < newArea->size; 1361 offset += B_PAGE_SIZE) { 1362 map->ops->map(map, newArea->base + offset, 1363 physicalAddress + offset, protection); 1364 } 1365 1366 map->ops->unlock(map); 1367 } else { 1368 // map in all pages from source 1369 mutex_lock(&sourceArea->cache_ref->lock); 1370 1371 for (vm_page *page = sourceArea->cache_ref->cache->page_list; 1372 page != NULL; page = page->cache_next) { 1373 vm_map_page(newArea, page, newArea->base 1374 + ((page->cache_offset << PAGE_SHIFT) - newArea->cache_offset), 1375 protection); 1376 } 1377 1378 mutex_unlock(&sourceArea->cache_ref->lock); 1379 } 1380 } 1381 if (status == B_OK) 1382 newArea->cache_type = sourceArea->cache_type; 1383 1384 vm_cache_release_ref(sourceArea->cache_ref); 1385 1386 vm_put_area(sourceArea); 1387 vm_put_address_space(addressSpace); 1388 1389 if (status < B_OK) 1390 return status; 1391 1392 return newArea->id; 1393 } 1394 1395 1396 static status_t 1397 _vm_delete_area(vm_address_space *addressSpace, area_id id) 1398 { 1399 status_t status = B_OK; 1400 vm_area *area; 1401 1402 TRACE(("vm_delete_area: aspace id 0x%lx, area id 0x%lx\n", addressSpace->id, id)); 1403 1404 area = vm_get_area(id); 1405 if (area == NULL) 1406 return B_BAD_VALUE; 1407 1408 if (area->address_space == addressSpace) { 1409 vm_put_area(area); 1410 // next put below will actually delete it 1411 } else 1412 status = B_NOT_ALLOWED; 1413 1414 vm_put_area(area); 1415 return status; 1416 } 1417 1418 1419 status_t 1420 vm_delete_area(team_id team, area_id id) 1421 { 1422 vm_address_space *addressSpace; 1423 status_t err; 1424 1425 addressSpace = vm_get_address_space_by_id(team); 1426 if (addressSpace == NULL) 1427 return B_BAD_TEAM_ID; 1428 1429 err = _vm_delete_area(addressSpace, id); 1430 vm_put_address_space(addressSpace); 1431 return err; 1432 } 1433 1434 1435 static void 1436 remove_area_from_address_space(vm_address_space *addressSpace, vm_area *area, bool locked) 1437 { 1438 vm_area *temp, *last = NULL; 1439 1440 if (!locked) 1441 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 1442 1443 temp = addressSpace->areas; 1444 while (temp != NULL) { 1445 if (area == temp) { 1446 if (last != NULL) { 1447 last->address_space_next = temp->address_space_next; 1448 } else { 1449 addressSpace->areas = temp->address_space_next; 1450 } 1451 addressSpace->change_count++; 1452 break; 1453 } 1454 last = temp; 1455 temp = temp->address_space_next; 1456 } 1457 if (area == addressSpace->area_hint) 1458 addressSpace->area_hint = NULL; 1459 1460 if (!locked) 1461 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 1462 1463 if (temp == NULL) 1464 panic("vm_area_release_ref: area not found in aspace's area list\n"); 1465 } 1466 1467 1468 static bool 1469 _vm_put_area(vm_area *area, bool aspaceLocked) 1470 { 1471 vm_address_space *addressSpace; 1472 bool removeit = false; 1473 1474 TRACE(("_vm_put_area(area = %p, aspaceLocked = %s)\n", 1475 area, aspaceLocked ? "yes" : "no")); 1476 1477 // we should never get here, but if we do, we can handle it 1478 if (area->id == RESERVED_AREA_ID) 1479 return false; 1480 1481 addressSpace = area->address_space; 1482 1483 // grab a write lock on the address space around the removal of the area 1484 // from the global hash table to avoid a race with vm_soft_fault() 1485 if (!aspaceLocked) 1486 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 1487 1488 acquire_sem_etc(sAreaHashLock, WRITE_COUNT, 0, 0); 1489 if (atomic_add(&area->ref_count, -1) == 1) { 1490 hash_remove(sAreaHash, area); 1491 removeit = true; 1492 } 1493 release_sem_etc(sAreaHashLock, WRITE_COUNT, 0); 1494 1495 if (!aspaceLocked) 1496 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 1497 1498 if (!removeit) 1499 return false; 1500 1501 // at this point the area is removed from the global hash table, but still 1502 // exists in the area list. it's ref_count is zero, and is guaranteed not to 1503 // be incremented anymore (by a direct hash lookup, or vm_area_lookup()). 1504 1505 // unmap the virtual address space the area occupied. any page faults at this 1506 // point should fail in vm_area_lookup(). 1507 vm_unmap_pages(area, area->base, area->size); 1508 1509 // ToDo: do that only for vnode stores 1510 vm_cache_write_modified(area->cache_ref, false); 1511 1512 arch_vm_unset_memory_type(area); 1513 remove_area_from_address_space(addressSpace, area, aspaceLocked); 1514 1515 vm_cache_remove_area(area->cache_ref, area); 1516 vm_cache_release_ref(area->cache_ref); 1517 1518 // now we can give up the area's reference to the address space 1519 vm_put_address_space(addressSpace); 1520 1521 free(area->name); 1522 free(area); 1523 return true; 1524 } 1525 1526 1527 static bool 1528 vm_put_area(vm_area *area) 1529 { 1530 return _vm_put_area(area, false); 1531 } 1532 1533 1534 static status_t 1535 vm_copy_on_write_area(vm_area *area) 1536 { 1537 vm_store *store; 1538 vm_cache *upperCache, *lowerCache; 1539 vm_cache_ref *upperCacheRef, *lowerCacheRef; 1540 vm_translation_map *map; 1541 vm_page *page; 1542 uint32 protection; 1543 status_t status; 1544 1545 TRACE(("vm_copy_on_write_area(area = %p)\n", area)); 1546 1547 // We need to separate the vm_cache from its vm_cache_ref: the area 1548 // and its cache_ref goes into a new layer on top of the old one. 1549 // So the old cache gets a new cache_ref and the area a new cache. 1550 1551 upperCacheRef = area->cache_ref; 1552 1553 // we will exchange the cache_ref's cache, so we better hold its lock 1554 mutex_lock(&upperCacheRef->lock); 1555 1556 lowerCache = upperCacheRef->cache; 1557 1558 // create an anonymous store object 1559 store = vm_store_create_anonymous_noswap(false, 0, 0); 1560 if (store == NULL) { 1561 status = B_NO_MEMORY; 1562 goto err1; 1563 } 1564 1565 upperCache = vm_cache_create(store); 1566 if (upperCache == NULL) { 1567 status = B_NO_MEMORY; 1568 goto err2; 1569 } 1570 1571 status = vm_cache_ref_create(lowerCache); 1572 if (status < B_OK) 1573 goto err3; 1574 1575 lowerCacheRef = lowerCache->ref; 1576 1577 // The area must be readable in the same way it was previously writable 1578 protection = B_KERNEL_READ_AREA; 1579 if (area->protection & B_READ_AREA) 1580 protection |= B_READ_AREA; 1581 1582 // we need to hold the cache_ref lock when we want to switch its cache 1583 mutex_lock(&lowerCacheRef->lock); 1584 1585 upperCache->type = CACHE_TYPE_RAM; 1586 upperCache->temporary = 1; 1587 upperCache->scan_skip = lowerCache->scan_skip; 1588 upperCache->virtual_base = lowerCache->virtual_base; 1589 upperCache->virtual_size = lowerCache->virtual_size; 1590 1591 upperCache->ref = upperCacheRef; 1592 upperCacheRef->cache = upperCache; 1593 1594 // we need to manually alter the ref_count (divide it between the two) 1595 // the lower cache_ref has only known refs, so compute them 1596 { 1597 int32 count = 0; 1598 vm_cache *consumer = NULL; 1599 while ((consumer = (vm_cache *)list_get_next_item( 1600 &lowerCache->consumers, consumer)) != NULL) { 1601 count++; 1602 } 1603 lowerCacheRef->ref_count = count; 1604 atomic_add(&upperCacheRef->ref_count, -count); 1605 } 1606 1607 vm_cache_add_consumer_locked(lowerCacheRef, upperCache); 1608 1609 // We now need to remap all pages from the area read-only, so that 1610 // a copy will be created on next write access 1611 1612 map = &area->address_space->translation_map; 1613 map->ops->lock(map); 1614 map->ops->unmap(map, area->base, area->base - 1 + area->size); 1615 map->ops->flush(map); 1616 1617 // TODO: does anything guarantee that we remap the same pages here? 1618 // Shouldn't we better introduce a "change mapping"? 1619 1620 for (page = lowerCache->page_list; page; page = page->cache_next) { 1621 map->ops->map(map, area->base + (page->cache_offset << PAGE_SHIFT) 1622 - area->cache_offset, page->physical_page_number << PAGE_SHIFT, 1623 protection); 1624 } 1625 1626 map->ops->unlock(map); 1627 1628 mutex_unlock(&lowerCacheRef->lock); 1629 mutex_unlock(&upperCacheRef->lock); 1630 1631 return B_OK; 1632 1633 err3: 1634 free(upperCache); 1635 err2: 1636 store->ops->destroy(store); 1637 err1: 1638 mutex_unlock(&upperCacheRef->lock); 1639 return status; 1640 } 1641 1642 1643 area_id 1644 vm_copy_area(team_id addressSpaceID, const char *name, void **_address, uint32 addressSpec, 1645 uint32 protection, area_id sourceID) 1646 { 1647 vm_address_space *addressSpace; 1648 vm_cache_ref *cacheRef; 1649 vm_area *target, *source; 1650 status_t status; 1651 bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 1652 1653 if ((protection & B_KERNEL_PROTECTION) == 0) { 1654 // set the same protection for the kernel as for userland 1655 protection |= B_KERNEL_READ_AREA; 1656 if (writableCopy) 1657 protection |= B_KERNEL_WRITE_AREA; 1658 } 1659 1660 if ((source = vm_get_area(sourceID)) == NULL) 1661 return B_BAD_VALUE; 1662 1663 addressSpace = vm_get_address_space_by_id(addressSpaceID); 1664 cacheRef = source->cache_ref; 1665 1666 if (addressSpec == B_CLONE_ADDRESS) { 1667 addressSpec = B_EXACT_ADDRESS; 1668 *_address = (void *)source->base; 1669 } 1670 1671 // First, create a cache on top of the source area 1672 1673 if (!writableCopy) { 1674 // map_backing_store() cannot know it has to acquire a ref to 1675 // the store for REGION_NO_PRIVATE_MAP 1676 vm_cache_acquire_ref(cacheRef); 1677 } 1678 1679 status = map_backing_store(addressSpace, cacheRef, _address, 1680 source->cache_offset, source->size, addressSpec, source->wiring, protection, 1681 writableCopy ? REGION_PRIVATE_MAP : REGION_NO_PRIVATE_MAP, 1682 &target, name); 1683 if (status < B_OK) { 1684 if (!writableCopy) 1685 vm_cache_release_ref(cacheRef); 1686 goto err; 1687 } 1688 1689 // If the source area is writable, we need to move it one layer up as well 1690 1691 if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) { 1692 // ToDo: do something more useful if this fails! 1693 if (vm_copy_on_write_area(source) < B_OK) 1694 panic("vm_copy_on_write_area() failed!\n"); 1695 } 1696 1697 // we want to return the ID of the newly created area 1698 status = target->id; 1699 1700 err: 1701 vm_put_address_space(addressSpace); 1702 vm_put_area(source); 1703 1704 return status; 1705 } 1706 1707 1708 static int32 1709 count_writable_areas(vm_cache_ref *ref, vm_area *ignoreArea) 1710 { 1711 struct vm_area *area = ref->areas; 1712 uint32 count = 0; 1713 1714 for (; area != NULL; area = area->cache_next) { 1715 if (area != ignoreArea 1716 && (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) 1717 count++; 1718 } 1719 1720 return count; 1721 } 1722 1723 1724 static status_t 1725 vm_set_area_protection(team_id aspaceID, area_id areaID, uint32 newProtection) 1726 { 1727 vm_cache_ref *cacheRef; 1728 vm_cache *cache; 1729 vm_area *area; 1730 status_t status = B_OK; 1731 1732 TRACE(("vm_set_area_protection(aspace = %#lx, area = %#lx, protection = %#lx)\n", 1733 aspaceID, areaID, newProtection)); 1734 1735 if (!arch_vm_supports_protection(newProtection)) 1736 return B_NOT_SUPPORTED; 1737 1738 area = vm_get_area(areaID); 1739 if (area == NULL) 1740 return B_BAD_VALUE; 1741 1742 if (aspaceID != vm_kernel_address_space_id() && area->address_space->id != aspaceID) { 1743 // unless you're the kernel, you are only allowed to set 1744 // the protection of your own areas 1745 vm_put_area(area); 1746 return B_NOT_ALLOWED; 1747 } 1748 1749 cacheRef = area->cache_ref; 1750 mutex_lock(&cacheRef->lock); 1751 1752 cache = cacheRef->cache; 1753 1754 if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 1755 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) { 1756 // change from read/write to read-only 1757 1758 if (cache->source != NULL && cache->temporary) { 1759 if (count_writable_areas(cacheRef, area) == 0) { 1760 // Since this cache now lives from the pages in its source cache, 1761 // we can change the cache's commitment to take only those pages 1762 // into account that really are in this cache. 1763 1764 // count existing pages in this cache 1765 struct vm_page *page = cache->page_list; 1766 uint32 count = 0; 1767 1768 for (; page != NULL; page = page->cache_next) { 1769 count++; 1770 } 1771 1772 status = cache->store->ops->commit(cache->store, 1773 cache->virtual_base + count * B_PAGE_SIZE); 1774 1775 // ToDo: we may be able to join with our source cache, if count == 0 1776 } 1777 } 1778 } else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0 1779 && (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 1780 // change from read-only to read/write 1781 1782 // ToDo: if this is a shared cache, insert new cache (we only know about other 1783 // areas in this cache yet, though, not about child areas) 1784 // -> use this call with care, it might currently have unwanted consequences 1785 // because of this. It should always be safe though, if there are no other 1786 // (child) areas referencing this area's cache (you just might not know). 1787 if (count_writable_areas(cacheRef, area) == 0 1788 && (cacheRef->areas != area || area->cache_next)) { 1789 // ToDo: child areas are not tested for yet 1790 dprintf("set_area_protection(): warning, would need to insert a new cache_ref (not yet implemented)!\n"); 1791 status = B_NOT_ALLOWED; 1792 } else 1793 dprintf("set_area_protection() may not work correctly yet in this direction!\n"); 1794 1795 if (status == B_OK && cache->source != NULL && cache->temporary) { 1796 // the cache's commitment must contain all possible pages 1797 status = cache->store->ops->commit(cache->store, cache->virtual_size); 1798 } 1799 } else { 1800 // we don't have anything special to do in all other cases 1801 } 1802 1803 if (status == B_OK && area->protection != newProtection) { 1804 // remap existing pages in this cache 1805 struct vm_translation_map *map = &area->address_space->translation_map; 1806 1807 map->ops->lock(map); 1808 map->ops->protect(map, area->base, area->base + area->size, newProtection); 1809 map->ops->unlock(map); 1810 1811 area->protection = newProtection; 1812 } 1813 1814 mutex_unlock(&cacheRef->lock); 1815 vm_put_area(area); 1816 1817 return status; 1818 } 1819 1820 1821 status_t 1822 vm_get_page_mapping(team_id aid, addr_t vaddr, addr_t *paddr) 1823 { 1824 vm_address_space *addressSpace; 1825 uint32 null_flags; 1826 status_t err; 1827 1828 addressSpace = vm_get_address_space_by_id(aid); 1829 if (addressSpace == NULL) 1830 return B_BAD_TEAM_ID; 1831 1832 err = addressSpace->translation_map.ops->query(&addressSpace->translation_map, 1833 vaddr, paddr, &null_flags); 1834 1835 vm_put_address_space(addressSpace); 1836 return err; 1837 } 1838 1839 1840 int32 1841 vm_test_map_activation(vm_page *page) 1842 { 1843 int32 activation = 0; 1844 1845 // TODO: this can't work... (we need to lock the map, so this has to be a mutex) 1846 cpu_status state = disable_interrupts(); 1847 acquire_spinlock(&sMappingLock); 1848 1849 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 1850 vm_page_mapping *mapping; 1851 while ((mapping = iterator.Next()) != NULL) { 1852 vm_area *area = mapping->area; 1853 vm_translation_map *map = &area->address_space->translation_map; 1854 1855 addr_t physicalAddress; 1856 uint32 flags; 1857 // map->ops->lock(map); 1858 addr_t address = area->base + (page->cache_offset << PAGE_SHIFT); 1859 map->ops->query_interrupt(map, address, &physicalAddress, &flags); 1860 // map->ops->unlock(map); 1861 1862 if (flags & PAGE_ACCESSED) 1863 activation++; 1864 } 1865 1866 release_spinlock(&sMappingLock); 1867 restore_interrupts(state); 1868 1869 return activation; 1870 } 1871 1872 1873 void 1874 vm_clear_map_activation(vm_page *page) 1875 { 1876 // TODO: this can't work... (we need to lock the map, so this has to be a mutex) 1877 cpu_status state = disable_interrupts(); 1878 acquire_spinlock(&sMappingLock); 1879 1880 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 1881 vm_page_mapping *mapping; 1882 while ((mapping = iterator.Next()) != NULL) { 1883 vm_area *area = mapping->area; 1884 vm_translation_map *map = &area->address_space->translation_map; 1885 1886 // map->ops->lock(map); 1887 addr_t address = area->base + (page->cache_offset << PAGE_SHIFT); 1888 map->ops->clear_flags(map, address, PAGE_ACCESSED); 1889 // map->ops->unlock(map); 1890 } 1891 1892 release_spinlock(&sMappingLock); 1893 restore_interrupts(state); 1894 } 1895 1896 1897 void 1898 vm_remove_all_page_mappings(vm_page *page) 1899 { 1900 // TODO: this can't work... (we need to lock the map, so this has to be a mutex) 1901 cpu_status state = disable_interrupts(); 1902 acquire_spinlock(&sMappingLock); 1903 1904 vm_page_mappings queue; 1905 queue.MoveFrom(&page->mappings); 1906 1907 vm_page_mappings::Iterator iterator = queue.GetIterator(); 1908 vm_page_mapping *mapping; 1909 while ((mapping = iterator.Next()) != NULL) { 1910 vm_area *area = mapping->area; 1911 vm_translation_map *map = &area->address_space->translation_map; 1912 1913 // map->ops->lock(map); 1914 addr_t base = area->base + (page->cache_offset << PAGE_SHIFT); 1915 map->ops->unmap(map, base, base + (B_PAGE_SIZE - 1)); 1916 // map->ops->unlock(map); 1917 1918 area->mappings.Remove(mapping); 1919 } 1920 1921 release_spinlock(&sMappingLock); 1922 restore_interrupts(state); 1923 1924 // free now unused mappings 1925 1926 while ((mapping = queue.RemoveHead()) != NULL) { 1927 free(mapping); 1928 } 1929 } 1930 1931 1932 status_t 1933 vm_unmap_pages(vm_area *area, addr_t base, size_t size) 1934 { 1935 vm_translation_map *map = &area->address_space->translation_map; 1936 addr_t end = base + (size - 1); 1937 1938 map->ops->lock(map); 1939 1940 if (area->wiring != B_NO_LOCK && area->cache_type != CACHE_TYPE_DEVICE) { 1941 // iterate through all pages and decrease their wired count 1942 for (addr_t virtualAddress = base; virtualAddress < end; 1943 virtualAddress += B_PAGE_SIZE) { 1944 addr_t physicalAddress; 1945 uint32 flags; 1946 status_t status = map->ops->query(map, virtualAddress, 1947 &physicalAddress, &flags); 1948 if (status < B_OK || (flags & PAGE_PRESENT) == 0) 1949 continue; 1950 1951 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1952 if (page == NULL) { 1953 panic("area %p looking up page failed for pa 0x%lx\n", area, 1954 physicalAddress); 1955 } 1956 1957 page->wired_count--; 1958 // TODO: needs to be atomic on all platforms! 1959 } 1960 } 1961 1962 map->ops->unmap(map, base, end); 1963 1964 if (area->wiring == B_NO_LOCK) { 1965 vm_area_mappings queue; 1966 uint32 count = 0; 1967 1968 cpu_status state = disable_interrupts(); 1969 acquire_spinlock(&sMappingLock); 1970 1971 vm_page_mapping *mapping; 1972 while ((mapping = area->mappings.RemoveHead()) != NULL) { 1973 mapping->page->mappings.Remove(mapping); 1974 queue.Add(mapping); 1975 1976 // temporary unlock to handle interrupts and let others play as well 1977 if ((++count % 256) == 0) { 1978 release_spinlock(&sMappingLock); 1979 restore_interrupts(state); 1980 1981 state = disable_interrupts(); 1982 acquire_spinlock(&sMappingLock); 1983 } 1984 } 1985 1986 release_spinlock(&sMappingLock); 1987 restore_interrupts(state); 1988 1989 while ((mapping = queue.RemoveHead()) != NULL) { 1990 free(mapping); 1991 } 1992 } 1993 1994 map->ops->unlock(map); 1995 return B_OK; 1996 } 1997 1998 1999 status_t 2000 vm_map_page(vm_area *area, vm_page *page, addr_t address, uint32 protection) 2001 { 2002 vm_translation_map *map = &area->address_space->translation_map; 2003 vm_page_mapping *mapping = NULL; 2004 2005 if (area->wiring == B_NO_LOCK) { 2006 mapping = (vm_page_mapping *)malloc(sizeof(vm_page_mapping)); 2007 if (mapping == NULL) 2008 return B_NO_MEMORY; 2009 2010 mapping->page = page; 2011 mapping->area = area; 2012 } 2013 2014 map->ops->lock(map); 2015 map->ops->map(map, address, page->physical_page_number * B_PAGE_SIZE, 2016 protection); 2017 2018 if (area->wiring != B_NO_LOCK) { 2019 page->wired_count++; 2020 // TODO: needs to be atomic on all platforms! 2021 } else { 2022 // insert mapping into lists 2023 cpu_status state = disable_interrupts(); 2024 acquire_spinlock(&sMappingLock); 2025 2026 page->mappings.Add(mapping); 2027 area->mappings.Add(mapping); 2028 2029 release_spinlock(&sMappingLock); 2030 restore_interrupts(state); 2031 } 2032 2033 map->ops->unlock(map); 2034 2035 vm_page_set_state(page, PAGE_STATE_ACTIVE); 2036 return B_OK; 2037 } 2038 2039 2040 static int 2041 display_mem(int argc, char **argv) 2042 { 2043 bool physical = false; 2044 addr_t copyAddress; 2045 int32 displayWidth; 2046 int32 itemSize; 2047 int32 num = -1; 2048 addr_t address; 2049 int i = 1, j; 2050 2051 if (argc > 1 && argv[1][0] == '-') { 2052 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 2053 physical = true; 2054 i++; 2055 } else 2056 i = 99; 2057 } 2058 2059 if (argc < i + 1 || argc > i + 2) { 2060 kprintf("usage: dl/dw/ds/db [-p|--physical] <address> [num]\n" 2061 "\tdl - 8 bytes\n" 2062 "\tdw - 4 bytes\n" 2063 "\tds - 2 bytes\n" 2064 "\tdb - 1 byte\n" 2065 " -p or --physical only allows memory from a single page to be displayed.\n"); 2066 return 0; 2067 } 2068 2069 address = strtoul(argv[i], NULL, 0); 2070 2071 if (argc > i + 1) 2072 num = atoi(argv[i + 1]); 2073 2074 // build the format string 2075 if (strcmp(argv[0], "db") == 0) { 2076 itemSize = 1; 2077 displayWidth = 16; 2078 } else if (strcmp(argv[0], "ds") == 0) { 2079 itemSize = 2; 2080 displayWidth = 8; 2081 } else if (strcmp(argv[0], "dw") == 0) { 2082 itemSize = 4; 2083 displayWidth = 4; 2084 } else if (strcmp(argv[0], "dl") == 0) { 2085 itemSize = 8; 2086 displayWidth = 2; 2087 } else { 2088 kprintf("display_mem called in an invalid way!\n"); 2089 return 0; 2090 } 2091 2092 if (num <= 0) 2093 num = displayWidth; 2094 2095 if (physical) { 2096 int32 offset = address & (B_PAGE_SIZE - 1); 2097 if (num * itemSize + offset > B_PAGE_SIZE) { 2098 num = (B_PAGE_SIZE - offset) / itemSize; 2099 kprintf("NOTE: number of bytes has been cut to page size\n"); 2100 } 2101 2102 address = ROUNDOWN(address, B_PAGE_SIZE); 2103 2104 kernel_startup = true; 2105 // vm_get_physical_page() needs to lock... 2106 2107 if (vm_get_physical_page(address, ©Address, PHYSICAL_PAGE_NO_WAIT) != B_OK) { 2108 kprintf("getting the hardware page failed."); 2109 kernel_startup = false; 2110 return 0; 2111 } 2112 2113 kernel_startup = false; 2114 address += offset; 2115 copyAddress += offset; 2116 } else 2117 copyAddress = address; 2118 2119 for (i = 0; i < num; i++) { 2120 uint32 value; 2121 2122 if ((i % displayWidth) == 0) { 2123 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 2124 if (i != 0) 2125 kprintf("\n"); 2126 2127 kprintf("[0x%lx] ", address + i * itemSize); 2128 2129 for (j = 0; j < displayed; j++) { 2130 char c; 2131 if (user_memcpy(&c, (char *)copyAddress + i * itemSize + j, 1) != B_OK) { 2132 displayed = j; 2133 break; 2134 } 2135 if (!isprint(c)) 2136 c = '.'; 2137 2138 kprintf("%c", c); 2139 } 2140 if (num > displayWidth) { 2141 // make sure the spacing in the last line is correct 2142 for (j = displayed; j < displayWidth * itemSize; j++) 2143 kprintf(" "); 2144 } 2145 kprintf(" "); 2146 } 2147 2148 if (user_memcpy(&value, (uint8 *)copyAddress + i * itemSize, itemSize) != B_OK) { 2149 kprintf("read fault"); 2150 break; 2151 } 2152 2153 switch (itemSize) { 2154 case 1: 2155 kprintf(" %02x", *(uint8 *)&value); 2156 break; 2157 case 2: 2158 kprintf(" %04x", *(uint16 *)&value); 2159 break; 2160 case 4: 2161 kprintf(" %08lx", *(uint32 *)&value); 2162 break; 2163 case 8: 2164 kprintf(" %016Lx", *(uint64 *)&value); 2165 break; 2166 } 2167 } 2168 2169 kprintf("\n"); 2170 2171 if (physical) { 2172 copyAddress = ROUNDOWN(copyAddress, B_PAGE_SIZE); 2173 kernel_startup = true; 2174 vm_put_physical_page(copyAddress); 2175 kernel_startup = false; 2176 } 2177 return 0; 2178 } 2179 2180 2181 static const char * 2182 page_state_to_string(int state) 2183 { 2184 switch(state) { 2185 case PAGE_STATE_ACTIVE: 2186 return "active"; 2187 case PAGE_STATE_INACTIVE: 2188 return "inactive"; 2189 case PAGE_STATE_BUSY: 2190 return "busy"; 2191 case PAGE_STATE_MODIFIED: 2192 return "modified"; 2193 case PAGE_STATE_FREE: 2194 return "free"; 2195 case PAGE_STATE_CLEAR: 2196 return "clear"; 2197 case PAGE_STATE_WIRED: 2198 return "wired"; 2199 case PAGE_STATE_UNUSED: 2200 return "unused"; 2201 default: 2202 return "unknown"; 2203 } 2204 } 2205 2206 2207 static int 2208 dump_cache_chain(int argc, char **argv) 2209 { 2210 if (argc < 2 || strlen(argv[1]) < 2 2211 || argv[1][0] != '0' 2212 || argv[1][1] != 'x') { 2213 kprintf("%s: invalid argument, pass address\n", argv[0]); 2214 return 0; 2215 } 2216 2217 addr_t address = strtoul(argv[1], NULL, 0); 2218 if (address == NULL) 2219 return 0; 2220 2221 vm_cache *cache = (vm_cache *)address; 2222 while (cache != NULL) { 2223 dprintf("%p (ref %p)\n", cache, cache->ref); 2224 cache = cache->source; 2225 } 2226 2227 return 0; 2228 } 2229 2230 2231 static const char * 2232 cache_type_to_string(int32 type) 2233 { 2234 switch (type) { 2235 case CACHE_TYPE_RAM: 2236 return "RAM"; 2237 case CACHE_TYPE_DEVICE: 2238 return "device"; 2239 case CACHE_TYPE_VNODE: 2240 return "vnode"; 2241 case CACHE_TYPE_NULL: 2242 return "null"; 2243 2244 default: 2245 return "unknown"; 2246 } 2247 } 2248 2249 2250 static int 2251 dump_cache(int argc, char **argv) 2252 { 2253 vm_cache *cache; 2254 vm_cache_ref *cacheRef; 2255 bool showPages = false; 2256 bool showCache = true; 2257 bool showCacheRef = true; 2258 int i = 1; 2259 2260 if (argc < 2) { 2261 kprintf("usage: %s [-ps] <address>\n" 2262 " if -p is specified, all pages are shown, if -s is used\n" 2263 " only the cache/cache_ref info is shown respectively.\n", argv[0]); 2264 return 0; 2265 } 2266 while (argv[i][0] == '-') { 2267 char *arg = argv[i] + 1; 2268 while (arg[0]) { 2269 if (arg[0] == 'p') 2270 showPages = true; 2271 else if (arg[0] == 's') { 2272 if (!strcmp(argv[0], "cache")) 2273 showCacheRef = false; 2274 else 2275 showCache = false; 2276 } 2277 arg++; 2278 } 2279 i++; 2280 } 2281 if (argv[i] == NULL || strlen(argv[i]) < 2 2282 || argv[i][0] != '0' 2283 || argv[i][1] != 'x') { 2284 kprintf("%s: invalid argument, pass address\n", argv[0]); 2285 return 0; 2286 } 2287 2288 addr_t address = strtoul(argv[i], NULL, 0); 2289 if (address == NULL) 2290 return 0; 2291 2292 if (!strcmp(argv[0], "cache")) { 2293 cache = (vm_cache *)address; 2294 cacheRef = cache->ref; 2295 } else { 2296 cacheRef = (vm_cache_ref *)address; 2297 cache = cacheRef->cache; 2298 } 2299 2300 if (showCacheRef) { 2301 kprintf("CACHE_REF %p:\n", cacheRef); 2302 if (!showCache) 2303 kprintf(" cache: %p\n", cacheRef->cache); 2304 kprintf(" ref_count: %ld\n", cacheRef->ref_count); 2305 kprintf(" lock.holder: %ld\n", cacheRef->lock.holder); 2306 kprintf(" lock.sem: 0x%lx\n", cacheRef->lock.sem); 2307 kprintf(" areas:\n"); 2308 2309 for (vm_area *area = cacheRef->areas; area != NULL; area = area->cache_next) { 2310 kprintf(" area 0x%lx, %s\n", area->id, area->name); 2311 kprintf("\tbase_addr: 0x%lx, size: 0x%lx\n", area->base, area->size); 2312 kprintf("\tprotection: 0x%lx\n", area->protection); 2313 kprintf("\towner: 0x%lx\n", area->address_space->id); 2314 } 2315 } 2316 2317 if (showCache) { 2318 kprintf("CACHE %p:\n", cache); 2319 if (!showCacheRef) 2320 kprintf(" cache_ref: %p\n", cache->ref); 2321 kprintf(" source: %p\n", cache->source); 2322 kprintf(" store: %p\n", cache->store); 2323 kprintf(" type: %s\n", cache_type_to_string(cache->type)); 2324 kprintf(" virtual_base: 0x%Lx\n", cache->virtual_base); 2325 kprintf(" virtual_size: 0x%Lx\n", cache->virtual_size); 2326 kprintf(" temporary: %ld\n", cache->temporary); 2327 kprintf(" scan_skip: %ld\n", cache->scan_skip); 2328 2329 kprintf(" consumers:\n"); 2330 vm_cache *consumer = NULL; 2331 while ((consumer = (vm_cache *)list_get_next_item(&cache->consumers, consumer)) != NULL) { 2332 kprintf("\t%p\n", consumer); 2333 } 2334 2335 kprintf(" pages:\n"); 2336 int32 count = 0; 2337 for (vm_page *page = cache->page_list; page != NULL; page = page->cache_next) { 2338 count++; 2339 if (!showPages) 2340 continue; 2341 2342 if (page->type == PAGE_TYPE_PHYSICAL) { 2343 kprintf("\t%p ppn 0x%lx offset 0x%lx type %u state %u (%s) wired_count %u\n", 2344 page, page->physical_page_number, page->cache_offset, page->type, page->state, 2345 page_state_to_string(page->state), page->wired_count); 2346 } else if(page->type == PAGE_TYPE_DUMMY) { 2347 kprintf("\t%p DUMMY PAGE state %u (%s)\n", 2348 page, page->state, page_state_to_string(page->state)); 2349 } else 2350 kprintf("\t%p UNKNOWN PAGE type %u\n", page, page->type); 2351 } 2352 2353 if (!showPages) 2354 kprintf("\t%ld in cache\n", count); 2355 } 2356 2357 return 0; 2358 } 2359 2360 2361 static void 2362 dump_area_struct(vm_area *area, bool mappings) 2363 { 2364 kprintf("AREA: %p\n", area); 2365 kprintf("name:\t\t'%s'\n", area->name); 2366 kprintf("owner:\t\t0x%lx\n", area->address_space->id); 2367 kprintf("id:\t\t0x%lx\n", area->id); 2368 kprintf("base:\t\t0x%lx\n", area->base); 2369 kprintf("size:\t\t0x%lx\n", area->size); 2370 kprintf("protection:\t0x%lx\n", area->protection); 2371 kprintf("wiring:\t\t0x%x\n", area->wiring); 2372 kprintf("memory_type:\t0x%x\n", area->memory_type); 2373 kprintf("ref_count:\t%ld\n", area->ref_count); 2374 kprintf("cache_ref:\t%p\n", area->cache_ref); 2375 kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type)); 2376 kprintf("cache_offset:\t0x%Lx\n", area->cache_offset); 2377 kprintf("cache_next:\t%p\n", area->cache_next); 2378 kprintf("cache_prev:\t%p\n", area->cache_prev); 2379 2380 vm_area_mappings::Iterator iterator = area->mappings.GetIterator(); 2381 if (mappings) { 2382 kprintf("page mappings:\n"); 2383 while (iterator.HasNext()) { 2384 vm_page_mapping *mapping = iterator.Next(); 2385 kprintf(" %p", mapping->page); 2386 } 2387 kprintf("\n"); 2388 } else { 2389 uint32 count = 0; 2390 while (iterator.Next() != NULL) { 2391 count++; 2392 } 2393 kprintf("page mappings:\t%lu\n", count); 2394 } 2395 } 2396 2397 2398 static int 2399 dump_area(int argc, char **argv) 2400 { 2401 bool mappings = false; 2402 bool found = false; 2403 int32 index = 1; 2404 vm_area *area; 2405 addr_t num; 2406 2407 if (argc < 2) { 2408 kprintf("usage: area [-m] <id|address|name>\n"); 2409 return 0; 2410 } 2411 2412 if (!strcmp(argv[1], "-m")) { 2413 mappings = true; 2414 index++; 2415 } 2416 2417 num = strtoul(argv[index], NULL, 0); 2418 2419 // walk through the area list, looking for the arguments as a name 2420 struct hash_iterator iter; 2421 2422 hash_open(sAreaHash, &iter); 2423 while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) { 2424 if ((area->name != NULL && !strcmp(argv[index], area->name)) 2425 || num != 0 2426 && ((addr_t)area->id == num 2427 || area->base <= num && area->base + area->size > num)) { 2428 dump_area_struct(area, mappings); 2429 found = true; 2430 } 2431 } 2432 2433 if (!found) 2434 kprintf("could not find area %s (%ld)\n", argv[index], num); 2435 return 0; 2436 } 2437 2438 2439 static int 2440 dump_area_list(int argc, char **argv) 2441 { 2442 vm_area *area; 2443 struct hash_iterator iter; 2444 const char *name = NULL; 2445 int32 id = 0; 2446 2447 if (argc > 1) { 2448 id = strtoul(argv[1], NULL, 0); 2449 if (id == 0) 2450 name = argv[1]; 2451 } 2452 2453 kprintf("addr id base\t\tsize protect lock name\n"); 2454 2455 hash_open(sAreaHash, &iter); 2456 while ((area = (vm_area *)hash_next(sAreaHash, &iter)) != NULL) { 2457 if (id != 0 && area->address_space->id != id 2458 || name != NULL && strstr(area->name, name) == NULL) 2459 continue; 2460 2461 kprintf("%p %5lx %p\t%p %4lx\t%4d %s\n", area, area->id, (void *)area->base, 2462 (void *)area->size, area->protection, area->wiring, area->name); 2463 } 2464 hash_close(sAreaHash, &iter, false); 2465 return 0; 2466 } 2467 2468 2469 static int 2470 dump_available_memory(int argc, char **argv) 2471 { 2472 kprintf("Available memory: %Ld/%lu bytes\n", 2473 sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE); 2474 return 0; 2475 } 2476 2477 2478 status_t 2479 vm_delete_areas(struct vm_address_space *addressSpace) 2480 { 2481 vm_area *area; 2482 vm_area *next, *last = NULL; 2483 2484 TRACE(("vm_delete_areas: called on address space 0x%lx\n", addressSpace->id)); 2485 2486 acquire_sem_etc(addressSpace->sem, WRITE_COUNT, 0, 0); 2487 2488 // remove all reserved areas in this address space 2489 2490 for (area = addressSpace->areas; area; area = next) { 2491 next = area->address_space_next; 2492 2493 if (area->id == RESERVED_AREA_ID) { 2494 // just remove it 2495 if (last) 2496 last->address_space_next = area->address_space_next; 2497 else 2498 addressSpace->areas = area->address_space_next; 2499 2500 vm_put_address_space(addressSpace); 2501 free(area); 2502 continue; 2503 } 2504 2505 last = area; 2506 } 2507 2508 // delete all the areas in this address space 2509 2510 for (area = addressSpace->areas; area; area = next) { 2511 next = area->address_space_next; 2512 2513 // decrement the ref on this area, may actually push the ref < 0, if there 2514 // is a concurrent delete_area() on that specific area, but that's ok here 2515 if (!_vm_put_area(area, true)) 2516 dprintf("vm_delete_areas() did not delete area %p\n", area); 2517 } 2518 2519 release_sem_etc(addressSpace->sem, WRITE_COUNT, 0); 2520 2521 return B_OK; 2522 } 2523 2524 2525 static area_id 2526 vm_area_for(team_id team, addr_t address) 2527 { 2528 vm_address_space *addressSpace; 2529 area_id id = B_ERROR; 2530 2531 addressSpace = vm_get_address_space_by_id(team); 2532 if (addressSpace == NULL) 2533 return B_BAD_TEAM_ID; 2534 2535 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 2536 2537 vm_area *area = vm_area_lookup(addressSpace, address); 2538 if (area != NULL) 2539 id = area->id; 2540 2541 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 2542 vm_put_address_space(addressSpace); 2543 2544 return id; 2545 } 2546 2547 2548 /*! 2549 Frees physical pages that were used during the boot process. 2550 */ 2551 static void 2552 unmap_and_free_physical_pages(vm_translation_map *map, addr_t start, addr_t end) 2553 { 2554 // free all physical pages in the specified range 2555 2556 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 2557 addr_t physicalAddress; 2558 uint32 flags; 2559 2560 if (map->ops->query(map, current, &physicalAddress, &flags) == B_OK) { 2561 vm_page *page = vm_lookup_page(current / B_PAGE_SIZE); 2562 if (page != NULL) 2563 vm_page_set_state(page, PAGE_STATE_FREE); 2564 } 2565 } 2566 2567 // unmap the memory 2568 map->ops->unmap(map, start, end - 1); 2569 } 2570 2571 2572 void 2573 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 2574 { 2575 vm_translation_map *map = &vm_kernel_address_space()->translation_map; 2576 addr_t end = start + size; 2577 addr_t lastEnd = start; 2578 vm_area *area; 2579 2580 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", (void *)start, (void *)end)); 2581 2582 // The areas are sorted in virtual address space order, so 2583 // we just have to find the holes between them that fall 2584 // into the area we should dispose 2585 2586 map->ops->lock(map); 2587 2588 for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) { 2589 addr_t areaStart = area->base; 2590 addr_t areaEnd = areaStart + area->size; 2591 2592 if (area->id == RESERVED_AREA_ID) 2593 continue; 2594 2595 if (areaEnd >= end) { 2596 // we are done, the areas are already beyond of what we have to free 2597 lastEnd = end; 2598 break; 2599 } 2600 2601 if (areaStart > lastEnd) { 2602 // this is something we can free 2603 TRACE(("free boot range: get rid of %p - %p\n", (void *)lastEnd, (void *)areaStart)); 2604 unmap_and_free_physical_pages(map, lastEnd, areaStart); 2605 } 2606 2607 lastEnd = areaEnd; 2608 } 2609 2610 if (lastEnd < end) { 2611 // we can also get rid of some space at the end of the area 2612 TRACE(("free boot range: also remove %p - %p\n", (void *)lastEnd, (void *)end)); 2613 unmap_and_free_physical_pages(map, lastEnd, end); 2614 } 2615 2616 map->ops->unlock(map); 2617 } 2618 2619 2620 static void 2621 create_preloaded_image_areas(struct preloaded_image *image) 2622 { 2623 char name[B_OS_NAME_LENGTH]; 2624 void *address; 2625 int32 length; 2626 2627 // use file name to create a good area name 2628 char *fileName = strrchr(image->name, '/'); 2629 if (fileName == NULL) 2630 fileName = image->name; 2631 else 2632 fileName++; 2633 2634 length = strlen(fileName); 2635 // make sure there is enough space for the suffix 2636 if (length > 25) 2637 length = 25; 2638 2639 memcpy(name, fileName, length); 2640 strcpy(name + length, "_text"); 2641 address = (void *)ROUNDOWN(image->text_region.start, B_PAGE_SIZE); 2642 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 2643 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 2644 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2645 // this will later be remapped read-only/executable by the 2646 // ELF initialization code 2647 2648 strcpy(name + length, "_data"); 2649 address = (void *)ROUNDOWN(image->data_region.start, B_PAGE_SIZE); 2650 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 2651 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 2652 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2653 } 2654 2655 2656 /** Frees all previously kernel arguments areas from the kernel_args structure. 2657 * Any boot loader resources contained in that arguments must not be accessed 2658 * anymore past this point. 2659 */ 2660 2661 void 2662 vm_free_kernel_args(kernel_args *args) 2663 { 2664 uint32 i; 2665 2666 TRACE(("vm_free_kernel_args()\n")); 2667 2668 for (i = 0; i < args->num_kernel_args_ranges; i++) { 2669 area_id area = area_for((void *)args->kernel_args_range[i].start); 2670 if (area >= B_OK) 2671 delete_area(area); 2672 } 2673 } 2674 2675 2676 static void 2677 allocate_kernel_args(kernel_args *args) 2678 { 2679 uint32 i; 2680 2681 TRACE(("allocate_kernel_args()\n")); 2682 2683 for (i = 0; i < args->num_kernel_args_ranges; i++) { 2684 void *address = (void *)args->kernel_args_range[i].start; 2685 2686 create_area("_kernel args_", &address, B_EXACT_ADDRESS, args->kernel_args_range[i].size, 2687 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2688 } 2689 } 2690 2691 2692 static void 2693 unreserve_boot_loader_ranges(kernel_args *args) 2694 { 2695 uint32 i; 2696 2697 TRACE(("unreserve_boot_loader_ranges()\n")); 2698 2699 for (i = 0; i < args->num_virtual_allocated_ranges; i++) { 2700 vm_unreserve_address_range(vm_kernel_address_space_id(), 2701 (void *)args->virtual_allocated_range[i].start, 2702 args->virtual_allocated_range[i].size); 2703 } 2704 } 2705 2706 2707 static void 2708 reserve_boot_loader_ranges(kernel_args *args) 2709 { 2710 uint32 i; 2711 2712 TRACE(("reserve_boot_loader_ranges()\n")); 2713 2714 for (i = 0; i < args->num_virtual_allocated_ranges; i++) { 2715 void *address = (void *)args->virtual_allocated_range[i].start; 2716 2717 // If the address is no kernel address, we just skip it. The 2718 // architecture specific code has to deal with it. 2719 if (!IS_KERNEL_ADDRESS(address)) { 2720 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n", 2721 address, args->virtual_allocated_range[i].size); 2722 continue; 2723 } 2724 2725 status_t status = vm_reserve_address_range(vm_kernel_address_space_id(), &address, 2726 B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 2727 if (status < B_OK) 2728 panic("could not reserve boot loader ranges\n"); 2729 } 2730 } 2731 2732 2733 static addr_t 2734 allocate_early_virtual(kernel_args *args, size_t size) 2735 { 2736 addr_t spot = 0; 2737 uint32 i; 2738 int last_valloc_entry = 0; 2739 2740 size = PAGE_ALIGN(size); 2741 // find a slot in the virtual allocation addr range 2742 for (i = 1; i < args->num_virtual_allocated_ranges; i++) { 2743 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 2744 + args->virtual_allocated_range[i - 1].size; 2745 last_valloc_entry = i; 2746 // check to see if the space between this one and the last is big enough 2747 if (previousRangeEnd >= KERNEL_BASE 2748 && args->virtual_allocated_range[i].start 2749 - previousRangeEnd >= size) { 2750 spot = previousRangeEnd; 2751 args->virtual_allocated_range[i - 1].size += size; 2752 goto out; 2753 } 2754 } 2755 if (spot == 0) { 2756 // we hadn't found one between allocation ranges. this is ok. 2757 // see if there's a gap after the last one 2758 addr_t lastRangeEnd 2759 = args->virtual_allocated_range[last_valloc_entry].start 2760 + args->virtual_allocated_range[last_valloc_entry].size; 2761 if (KERNEL_BASE + (KERNEL_SIZE - 1) - lastRangeEnd >= size) { 2762 spot = lastRangeEnd; 2763 args->virtual_allocated_range[last_valloc_entry].size += size; 2764 goto out; 2765 } 2766 // see if there's a gap before the first one 2767 if (args->virtual_allocated_range[0].start > KERNEL_BASE) { 2768 if (args->virtual_allocated_range[0].start - KERNEL_BASE >= size) { 2769 args->virtual_allocated_range[0].start -= size; 2770 spot = args->virtual_allocated_range[0].start; 2771 goto out; 2772 } 2773 } 2774 } 2775 2776 out: 2777 return spot; 2778 } 2779 2780 2781 static bool 2782 is_page_in_physical_memory_range(kernel_args *args, addr_t address) 2783 { 2784 // TODO: horrible brute-force method of determining if the page can be allocated 2785 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 2786 if (address >= args->physical_memory_range[i].start 2787 && address < args->physical_memory_range[i].start 2788 + args->physical_memory_range[i].size) 2789 return true; 2790 } 2791 return false; 2792 } 2793 2794 2795 static addr_t 2796 allocate_early_physical_page(kernel_args *args) 2797 { 2798 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 2799 addr_t nextPage; 2800 2801 nextPage = args->physical_allocated_range[i].start 2802 + args->physical_allocated_range[i].size; 2803 // see if the page after the next allocated paddr run can be allocated 2804 if (i + 1 < args->num_physical_allocated_ranges 2805 && args->physical_allocated_range[i + 1].size != 0) { 2806 // see if the next page will collide with the next allocated range 2807 if (nextPage >= args->physical_allocated_range[i+1].start) 2808 continue; 2809 } 2810 // see if the next physical page fits in the memory block 2811 if (is_page_in_physical_memory_range(args, nextPage)) { 2812 // we got one! 2813 args->physical_allocated_range[i].size += B_PAGE_SIZE; 2814 return nextPage / B_PAGE_SIZE; 2815 } 2816 } 2817 2818 return 0; 2819 // could not allocate a block 2820 } 2821 2822 2823 /*! 2824 This one uses the kernel_args' physical and virtual memory ranges to 2825 allocate some pages before the VM is completely up. 2826 */ 2827 addr_t 2828 vm_allocate_early(kernel_args *args, size_t virtualSize, size_t physicalSize, 2829 uint32 attributes) 2830 { 2831 if (physicalSize > virtualSize) 2832 physicalSize = virtualSize; 2833 2834 // find the vaddr to allocate at 2835 addr_t virtualBase = allocate_early_virtual(args, virtualSize); 2836 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress); 2837 2838 // map the pages 2839 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 2840 addr_t physicalAddress = allocate_early_physical_page(args); 2841 if (physicalAddress == 0) 2842 panic("error allocating early page!\n"); 2843 2844 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 2845 2846 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 2847 physicalAddress * B_PAGE_SIZE, attributes, 2848 &allocate_early_physical_page); 2849 } 2850 2851 return virtualBase; 2852 } 2853 2854 2855 status_t 2856 vm_init(kernel_args *args) 2857 { 2858 struct preloaded_image *image; 2859 void *address; 2860 status_t err = 0; 2861 uint32 i; 2862 2863 TRACE(("vm_init: entry\n")); 2864 err = arch_vm_translation_map_init(args); 2865 err = arch_vm_init(args); 2866 2867 // initialize some globals 2868 sNextAreaID = 1; 2869 sAreaHashLock = -1; 2870 sAvailableMemoryLock.sem = -1; 2871 2872 vm_page_init_num_pages(args); 2873 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 2874 2875 // reduce the heap size if we have not so much RAM 2876 size_t heapSize = HEAP_SIZE; 2877 if (sAvailableMemory < 100 * 1024 * 1024) 2878 heapSize /= 4; 2879 else if (sAvailableMemory < 200 * 1024 * 1024) 2880 heapSize /= 2; 2881 2882 // map in the new heap and initialize it 2883 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 2884 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2885 TRACE(("heap at 0x%lx\n", heapBase)); 2886 heap_init(heapBase, heapSize); 2887 2888 size_t slabInitialSize = 2 * B_PAGE_SIZE; 2889 addr_t slabInitialBase = vm_allocate_early(args, slabInitialSize, 2890 slabInitialSize, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2891 slab_init(args, slabInitialBase, slabInitialSize); 2892 2893 // initialize the free page list and physical page mapper 2894 vm_page_init(args); 2895 2896 // initialize the hash table that stores the pages mapped to caches 2897 vm_cache_init(args); 2898 2899 { 2900 vm_area *area; 2901 sAreaHash = hash_init(REGION_HASH_TABLE_SIZE, (addr_t)&area->hash_next - (addr_t)area, 2902 &area_compare, &area_hash); 2903 if (sAreaHash == NULL) 2904 panic("vm_init: error creating aspace hash table\n"); 2905 } 2906 2907 vm_address_space_init(); 2908 reserve_boot_loader_ranges(args); 2909 2910 // do any further initialization that the architecture dependant layers may need now 2911 arch_vm_translation_map_init_post_area(args); 2912 arch_vm_init_post_area(args); 2913 vm_page_init_post_area(args); 2914 2915 // allocate areas to represent stuff that already exists 2916 2917 address = (void *)ROUNDOWN(heapBase, B_PAGE_SIZE); 2918 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 2919 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2920 2921 address = (void *)ROUNDOWN(slabInitialBase, B_PAGE_SIZE); 2922 create_area("initial slab space", &address, B_EXACT_ADDRESS, 2923 slabInitialSize, B_ALREADY_WIRED, B_KERNEL_READ_AREA 2924 | B_KERNEL_WRITE_AREA); 2925 2926 allocate_kernel_args(args); 2927 2928 args->kernel_image.name = "kernel"; 2929 // the lazy boot loader currently doesn't set the kernel's name... 2930 create_preloaded_image_areas(&args->kernel_image); 2931 2932 // allocate areas for preloaded images 2933 for (image = args->preloaded_images; image != NULL; image = image->next) { 2934 create_preloaded_image_areas(image); 2935 } 2936 2937 // allocate kernel stacks 2938 for (i = 0; i < args->num_cpus; i++) { 2939 char name[64]; 2940 2941 sprintf(name, "idle thread %lu kstack", i + 1); 2942 address = (void *)args->cpu_kstack[i].start; 2943 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 2944 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 2945 } 2946 2947 // add some debugger commands 2948 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 2949 add_debugger_command("area", &dump_area, "Dump info about a particular area"); 2950 add_debugger_command("cache_ref", &dump_cache, "Dump vm_cache"); 2951 add_debugger_command("cache", &dump_cache, "Dump vm_cache"); 2952 add_debugger_command("cache_chain", &dump_cache_chain, "Dump vm_cache chain"); 2953 add_debugger_command("avail", &dump_available_memory, "Dump available memory"); 2954 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 2955 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 2956 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 2957 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 2958 2959 TRACE(("vm_init: exit\n")); 2960 2961 return err; 2962 } 2963 2964 2965 status_t 2966 vm_init_post_sem(kernel_args *args) 2967 { 2968 vm_area *area; 2969 2970 // This frees all unused boot loader resources and makes its space available again 2971 arch_vm_init_end(args); 2972 unreserve_boot_loader_ranges(args); 2973 2974 // fill in all of the semaphores that were not allocated before 2975 // since we're still single threaded and only the kernel address space exists, 2976 // it isn't that hard to find all of the ones we need to create 2977 2978 benaphore_init(&sAvailableMemoryLock, "available memory lock"); 2979 arch_vm_translation_map_init_post_sem(args); 2980 vm_address_space_init_post_sem(); 2981 2982 for (area = vm_kernel_address_space()->areas; area; area = area->address_space_next) { 2983 if (area->id == RESERVED_AREA_ID) 2984 continue; 2985 2986 if (area->cache_ref->lock.sem < 0) 2987 mutex_init(&area->cache_ref->lock, "cache_ref_mutex"); 2988 } 2989 2990 sAreaHashLock = create_sem(WRITE_COUNT, "area hash"); 2991 2992 slab_init_post_sem(); 2993 2994 return heap_init_post_sem(args); 2995 } 2996 2997 2998 status_t 2999 vm_init_post_thread(kernel_args *args) 3000 { 3001 vm_page_init_post_thread(args); 3002 vm_daemon_init(); 3003 vm_low_memory_init(); 3004 3005 return heap_init_post_thread(args); 3006 } 3007 3008 3009 status_t 3010 vm_init_post_modules(kernel_args *args) 3011 { 3012 return arch_vm_init_post_modules(args); 3013 } 3014 3015 3016 void 3017 permit_page_faults(void) 3018 { 3019 struct thread *thread = thread_get_current_thread(); 3020 if (thread != NULL) 3021 atomic_add(&thread->page_faults_allowed, 1); 3022 } 3023 3024 3025 void 3026 forbid_page_faults(void) 3027 { 3028 struct thread *thread = thread_get_current_thread(); 3029 if (thread != NULL) 3030 atomic_add(&thread->page_faults_allowed, -1); 3031 } 3032 3033 3034 status_t 3035 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser, 3036 addr_t *newIP) 3037 { 3038 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, faultAddress)); 3039 3040 *newIP = 0; 3041 3042 status_t status = vm_soft_fault(address, isWrite, isUser); 3043 if (status < B_OK) { 3044 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at 0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n", 3045 strerror(status), address, faultAddress, isWrite, isUser, 3046 thread_get_current_thread_id()); 3047 if (!isUser) { 3048 struct thread *thread = thread_get_current_thread(); 3049 if (thread != NULL && thread->fault_handler != 0) { 3050 // this will cause the arch dependant page fault handler to 3051 // modify the IP on the interrupt frame or whatever to return 3052 // to this address 3053 *newIP = thread->fault_handler; 3054 } else { 3055 // unhandled page fault in the kernel 3056 panic("vm_page_fault: unhandled page fault in kernel space at 0x%lx, ip 0x%lx\n", 3057 address, faultAddress); 3058 } 3059 } else { 3060 #if 1 3061 // ToDo: remove me once we have proper userland debugging support (and tools) 3062 vm_address_space *addressSpace = vm_get_current_user_address_space(); 3063 vm_area *area; 3064 3065 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3066 area = vm_area_lookup(addressSpace, faultAddress); 3067 3068 dprintf("vm_page_fault: sending team \"%s\" 0x%lx SIGSEGV, ip %#lx (\"%s\" +%#lx)\n", 3069 thread_get_current_thread()->team->name, 3070 thread_get_current_thread()->team->id, faultAddress, 3071 area ? area->name : "???", faultAddress - (area ? area->base : 0x0)); 3072 3073 // We can print a stack trace of the userland thread here. 3074 #if 1 3075 if (area) { 3076 struct stack_frame { 3077 #if defined(__INTEL__) || defined(__POWERPC__) 3078 struct stack_frame* previous; 3079 void* return_address; 3080 #else 3081 // ... 3082 #endif 3083 } frame; 3084 #ifdef __INTEL__ 3085 struct iframe *iframe = i386_get_user_iframe(); 3086 if (iframe == NULL) 3087 panic("iframe is NULL!"); 3088 3089 status_t status = user_memcpy(&frame, (void *)iframe->ebp, 3090 sizeof(struct stack_frame)); 3091 #elif defined(__POWERPC__) 3092 struct iframe *iframe = ppc_get_user_iframe(); 3093 if (iframe == NULL) 3094 panic("iframe is NULL!"); 3095 3096 status_t status = user_memcpy(&frame, (void *)iframe->r1, 3097 sizeof(struct stack_frame)); 3098 #else 3099 # warn "vm_page_fault() stack trace won't work" 3100 status = B_ERROR; 3101 #endif 3102 3103 dprintf("stack trace:\n"); 3104 while (status == B_OK) { 3105 dprintf(" %p", frame.return_address); 3106 area = vm_area_lookup(addressSpace, 3107 (addr_t)frame.return_address); 3108 if (area) { 3109 dprintf(" (%s + %#lx)", area->name, 3110 (addr_t)frame.return_address - area->base); 3111 } 3112 dprintf("\n"); 3113 3114 status = user_memcpy(&frame, frame.previous, 3115 sizeof(struct stack_frame)); 3116 } 3117 } 3118 #endif // 0 (stack trace) 3119 3120 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3121 vm_put_address_space(addressSpace); 3122 #endif 3123 if (user_debug_exception_occurred(B_SEGMENT_VIOLATION, SIGSEGV)) 3124 send_signal(team_get_current_team_id(), SIGSEGV); 3125 } 3126 } 3127 3128 return B_HANDLED_INTERRUPT; 3129 } 3130 3131 3132 static inline status_t 3133 fault_acquire_locked_source(vm_cache *cache, vm_cache_ref **_sourceRef) 3134 { 3135 retry: 3136 vm_cache *source = cache->source; 3137 if (source == NULL) 3138 return B_ERROR; 3139 if (source->busy) 3140 return B_BUSY; 3141 3142 vm_cache_ref *sourceRef = source->ref; 3143 vm_cache_acquire_ref(sourceRef); 3144 3145 mutex_lock(&sourceRef->lock); 3146 3147 if (sourceRef->cache != cache->source || sourceRef->cache->busy) { 3148 mutex_unlock(&sourceRef->lock); 3149 vm_cache_release_ref(sourceRef); 3150 goto retry; 3151 } 3152 3153 *_sourceRef = sourceRef; 3154 return B_OK; 3155 } 3156 3157 3158 /*! 3159 Inserts a busy dummy page into a cache, and makes sure the cache won't go 3160 away by grabbing a reference to it. 3161 */ 3162 static inline void 3163 fault_insert_dummy_page(vm_cache_ref *cacheRef, vm_page &dummyPage, off_t cacheOffset) 3164 { 3165 dummyPage.state = PAGE_STATE_BUSY; 3166 vm_cache_acquire_ref(cacheRef); 3167 vm_cache_insert_page(cacheRef, &dummyPage, cacheOffset); 3168 } 3169 3170 3171 /*! 3172 Removes the busy dummy page from a cache, and releases its reference to 3173 the cache. 3174 */ 3175 static inline void 3176 fault_remove_dummy_page(vm_page &dummyPage, bool isLocked) 3177 { 3178 vm_cache_ref *cacheRef = dummyPage.cache->ref; 3179 if (!isLocked) 3180 mutex_lock(&cacheRef->lock); 3181 3182 vm_cache_remove_page(cacheRef, &dummyPage); 3183 3184 if (!isLocked) 3185 mutex_unlock(&cacheRef->lock); 3186 3187 vm_cache_release_ref(cacheRef); 3188 3189 dummyPage.state = PAGE_STATE_INACTIVE; 3190 } 3191 3192 3193 /*! 3194 Finds a page at the specified \a cacheOffset in either the \a topCacheRef 3195 or in its source chain. Will also page in a missing page in case there is 3196 a cache that has the page. 3197 If it couldn't find a page, it will return the vm_cache that should get it, 3198 otherwise, it will return the vm_cache that contains the cache. 3199 It always grabs a reference to the vm_cache that it returns, and also locks it. 3200 */ 3201 static inline vm_page * 3202 fault_find_page(vm_translation_map *map, vm_cache_ref *topCacheRef, 3203 off_t cacheOffset, bool isWrite, vm_page &dummyPage, vm_cache_ref **_pageRef) 3204 { 3205 vm_cache_ref *cacheRef = topCacheRef; 3206 vm_cache_ref *lastCacheRef = NULL; 3207 vm_page *page = NULL; 3208 3209 vm_cache_acquire_ref(cacheRef); 3210 mutex_lock(&cacheRef->lock); 3211 // we release this later in the loop 3212 3213 while (cacheRef != NULL) { 3214 if (lastCacheRef != NULL) 3215 vm_cache_release_ref(lastCacheRef); 3216 3217 // we hold the lock of the cacheRef at this point 3218 3219 lastCacheRef = cacheRef; 3220 3221 for (;;) { 3222 page = vm_cache_lookup_page(cacheRef, cacheOffset); 3223 if (page != NULL && page->state != PAGE_STATE_BUSY) { 3224 vm_page_set_state(page, PAGE_STATE_BUSY); 3225 break; 3226 } 3227 if (page == NULL || page == &dummyPage) 3228 break; 3229 3230 // page must be busy 3231 // ToDo: don't wait forever! 3232 mutex_unlock(&cacheRef->lock); 3233 snooze(20000); 3234 mutex_lock(&cacheRef->lock); 3235 } 3236 3237 if (page != NULL && page != &dummyPage) 3238 break; 3239 3240 // The current cache does not contain the page we're looking for 3241 3242 // If we're at the top most cache, insert the dummy page here to keep other threads 3243 // from faulting on the same address and chasing us up the cache chain 3244 if (cacheRef == topCacheRef && dummyPage.state != PAGE_STATE_BUSY) 3245 fault_insert_dummy_page(cacheRef, dummyPage, cacheOffset); 3246 3247 // see if the vm_store has it 3248 vm_store *store = cacheRef->cache->store; 3249 if (store->ops->has_page != NULL && store->ops->has_page(store, cacheOffset)) { 3250 size_t bytesRead; 3251 iovec vec; 3252 3253 vec.iov_len = bytesRead = B_PAGE_SIZE; 3254 3255 mutex_unlock(&cacheRef->lock); 3256 3257 page = vm_page_allocate_page(PAGE_STATE_FREE); 3258 3259 dummyPage.queue_next = page; 3260 dummyPage.busy_reading = true; 3261 // we mark that page busy reading, so that the file cache can ignore 3262 // us in case it works on the very same page 3263 3264 map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE, (addr_t *)&vec.iov_base, PHYSICAL_PAGE_CAN_WAIT); 3265 status_t status = store->ops->read(store, cacheOffset, &vec, 1, &bytesRead, false); 3266 if (status < B_OK) { 3267 // TODO: real error handling! 3268 panic("reading from store %p (cacheRef %p) returned: %s!\n", store, cacheRef, strerror(status)); 3269 } 3270 map->ops->put_physical_page((addr_t)vec.iov_base); 3271 3272 mutex_lock(&cacheRef->lock); 3273 3274 if (cacheRef == topCacheRef) 3275 fault_remove_dummy_page(dummyPage, true); 3276 3277 // We insert the queue_next here, because someone else could have 3278 // replaced our page 3279 vm_cache_insert_page(cacheRef, dummyPage.queue_next, cacheOffset); 3280 3281 if (dummyPage.queue_next != page) { 3282 // Indeed, the page got replaced by someone else - we can safely 3283 // throw our page away now 3284 vm_page_set_state(page, PAGE_STATE_FREE); 3285 page = dummyPage.queue_next; 3286 } 3287 break; 3288 } 3289 3290 vm_cache_ref *nextCacheRef; 3291 status_t status = fault_acquire_locked_source(cacheRef->cache, &nextCacheRef); 3292 if (status == B_BUSY) { 3293 // the source cache is currently in the process of being merged 3294 // with his only consumer (cacheRef); since its pages are moved 3295 // upwards, too, we try this cache again 3296 mutex_unlock(&cacheRef->lock); 3297 mutex_lock(&cacheRef->lock); 3298 lastCacheRef = NULL; 3299 continue; 3300 } else if (status < B_OK) 3301 nextCacheRef = NULL; 3302 3303 mutex_unlock(&cacheRef->lock); 3304 // at this point, we still hold a ref to this cache (through lastCacheRef) 3305 3306 cacheRef = nextCacheRef; 3307 } 3308 3309 if (page == NULL) { 3310 // there was no adequate page, determine the cache for a clean one 3311 if (cacheRef == NULL) { 3312 // We rolled off the end of the cache chain, so we need to decide which 3313 // cache will get the new page we're about to create. 3314 cacheRef = isWrite ? topCacheRef : lastCacheRef; 3315 // Read-only pages come in the deepest cache - only the 3316 // top most cache may have direct write access. 3317 vm_cache_acquire_ref(cacheRef); 3318 mutex_lock(&cacheRef->lock); 3319 } 3320 3321 // release the reference of the last vm_cache_ref we still have from the loop above 3322 if (lastCacheRef != NULL) 3323 vm_cache_release_ref(lastCacheRef); 3324 } else { 3325 // we still own a reference to the cacheRef 3326 } 3327 3328 *_pageRef = cacheRef; 3329 return page; 3330 } 3331 3332 3333 /*! 3334 Returns the page that should be mapped into the area that got the fault. 3335 It returns the owner of the page in \a sourceRef - it keeps a reference 3336 to it, and has also locked it on exit. 3337 */ 3338 static inline vm_page * 3339 fault_get_page(vm_translation_map *map, vm_cache_ref *topCacheRef, 3340 off_t cacheOffset, bool isWrite, vm_page &dummyPage, vm_cache_ref **_sourceRef) 3341 { 3342 vm_cache_ref *cacheRef; 3343 vm_page *page = fault_find_page(map, topCacheRef, cacheOffset, isWrite, 3344 dummyPage, &cacheRef); 3345 if (page == NULL) { 3346 // we still haven't found a page, so we allocate a clean one 3347 3348 page = vm_page_allocate_page(PAGE_STATE_CLEAR); 3349 FTRACE(("vm_soft_fault: just allocated page 0x%lx\n", page->physical_page_number)); 3350 3351 // Insert the new page into our cache, and replace it with the dummy page if necessary 3352 3353 // if we inserted a dummy page into this cache, we have to remove it now 3354 if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == cacheRef->cache) 3355 fault_remove_dummy_page(dummyPage, true); 3356 3357 vm_cache_insert_page(cacheRef, page, cacheOffset); 3358 3359 if (dummyPage.state == PAGE_STATE_BUSY) { 3360 // we had inserted the dummy cache in another cache, so let's remove it from there 3361 fault_remove_dummy_page(dummyPage, false); 3362 } 3363 } 3364 3365 // We now have the page and a cache it belongs to - we now need to make 3366 // sure that the area's cache can access it, too, and sees the correct data 3367 3368 if (page->cache != topCacheRef->cache && isWrite) { 3369 // now we have a page that has the data we want, but in the wrong cache object 3370 // so we need to copy it and stick it into the top cache 3371 vm_page *sourcePage = page; 3372 void *source, *dest; 3373 3374 // ToDo: if memory is low, it might be a good idea to steal the page 3375 // from our source cache - if possible, that is 3376 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 3377 page = vm_page_allocate_page(PAGE_STATE_FREE); 3378 3379 // try to get a mapping for the src and dest page so we can copy it 3380 for (;;) { 3381 map->ops->get_physical_page(sourcePage->physical_page_number * B_PAGE_SIZE, 3382 (addr_t *)&source, PHYSICAL_PAGE_CAN_WAIT); 3383 3384 if (map->ops->get_physical_page(page->physical_page_number * B_PAGE_SIZE, 3385 (addr_t *)&dest, PHYSICAL_PAGE_NO_WAIT) == B_OK) 3386 break; 3387 3388 // it couldn't map the second one, so sleep and retry 3389 // keeps an extremely rare deadlock from occuring 3390 map->ops->put_physical_page((addr_t)source); 3391 snooze(5000); 3392 } 3393 3394 memcpy(dest, source, B_PAGE_SIZE); 3395 map->ops->put_physical_page((addr_t)source); 3396 map->ops->put_physical_page((addr_t)dest); 3397 3398 vm_page_set_state(sourcePage, PAGE_STATE_ACTIVE); 3399 3400 mutex_unlock(&cacheRef->lock); 3401 mutex_lock(&topCacheRef->lock); 3402 3403 // Insert the new page into our cache, and replace it with the dummy page if necessary 3404 3405 // if we inserted a dummy page into this cache, we have to remove it now 3406 if (dummyPage.state == PAGE_STATE_BUSY && dummyPage.cache == topCacheRef->cache) 3407 fault_remove_dummy_page(dummyPage, true); 3408 3409 vm_cache_insert_page(topCacheRef, page, cacheOffset); 3410 3411 if (dummyPage.state == PAGE_STATE_BUSY) { 3412 // we had inserted the dummy cache in another cache, so let's remove it from there 3413 fault_remove_dummy_page(dummyPage, false); 3414 } 3415 3416 vm_cache_release_ref(cacheRef); 3417 3418 cacheRef = topCacheRef; 3419 vm_cache_acquire_ref(cacheRef); 3420 } 3421 3422 *_sourceRef = cacheRef; 3423 return page; 3424 } 3425 3426 3427 static status_t 3428 vm_soft_fault(addr_t originalAddress, bool isWrite, bool isUser) 3429 { 3430 vm_address_space *addressSpace; 3431 3432 FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n", 3433 thread_get_current_thread_id(), originalAddress, isWrite, isUser)); 3434 3435 addr_t address = ROUNDOWN(originalAddress, B_PAGE_SIZE); 3436 3437 if (IS_KERNEL_ADDRESS(address)) { 3438 addressSpace = vm_get_kernel_address_space(); 3439 } else if (IS_USER_ADDRESS(address)) { 3440 addressSpace = vm_get_current_user_address_space(); 3441 if (addressSpace == NULL) { 3442 if (!isUser) { 3443 dprintf("vm_soft_fault: kernel thread accessing invalid user memory!\n"); 3444 return B_BAD_ADDRESS; 3445 } else { 3446 // XXX weird state. 3447 panic("vm_soft_fault: non kernel thread accessing user memory that doesn't exist!\n"); 3448 } 3449 } 3450 } else { 3451 // the hit was probably in the 64k DMZ between kernel and user space 3452 // this keeps a user space thread from passing a buffer that crosses 3453 // into kernel space 3454 return B_BAD_ADDRESS; 3455 } 3456 3457 atomic_add(&addressSpace->fault_count, 1); 3458 3459 // Get the area the fault was in 3460 3461 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3462 3463 vm_area *area = vm_area_lookup(addressSpace, address); 3464 if (area == NULL) { 3465 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3466 vm_put_address_space(addressSpace); 3467 dprintf("vm_soft_fault: va 0x%lx not covered by area in address space\n", 3468 originalAddress); 3469 return B_BAD_ADDRESS; 3470 } 3471 3472 // check permissions 3473 if (isUser && (area->protection & B_USER_PROTECTION) == 0) { 3474 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3475 vm_put_address_space(addressSpace); 3476 dprintf("user access on kernel area 0x%lx at %p\n", area->id, (void *)originalAddress); 3477 return B_PERMISSION_DENIED; 3478 } 3479 if (isWrite && (area->protection & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 3480 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3481 vm_put_address_space(addressSpace); 3482 dprintf("write access attempted on read-only area 0x%lx at %p\n", 3483 area->id, (void *)originalAddress); 3484 return B_PERMISSION_DENIED; 3485 } 3486 3487 // We have the area, it was a valid access, so let's try to resolve the page fault now. 3488 // At first, the top most cache from the area is investigated 3489 3490 vm_cache_ref *topCacheRef = area->cache_ref; 3491 off_t cacheOffset = address - area->base + area->cache_offset; 3492 int32 changeCount = addressSpace->change_count; 3493 3494 vm_cache_acquire_ref(topCacheRef); 3495 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3496 3497 mutex_lock(&topCacheRef->lock); 3498 3499 // See if this cache has a fault handler - this will do all the work for us 3500 { 3501 vm_store *store = topCacheRef->cache->store; 3502 if (store->ops->fault != NULL) { 3503 // Note, since the page fault is resolved with interrupts enabled, the 3504 // fault handler could be called more than once for the same reason - 3505 // the store must take this into account 3506 status_t status = store->ops->fault(store, addressSpace, cacheOffset); 3507 if (status != B_BAD_HANDLER) { 3508 mutex_unlock(&topCacheRef->lock); 3509 vm_cache_release_ref(topCacheRef); 3510 vm_put_address_space(addressSpace); 3511 return status; 3512 } 3513 } 3514 } 3515 3516 mutex_unlock(&topCacheRef->lock); 3517 3518 // The top most cache has no fault handler, so let's see if the cache or its sources 3519 // already have the page we're searching for (we're going from top to bottom) 3520 3521 vm_translation_map *map = &addressSpace->translation_map; 3522 vm_page dummyPage; 3523 dummyPage.cache = NULL; 3524 dummyPage.state = PAGE_STATE_INACTIVE; 3525 dummyPage.type = PAGE_TYPE_DUMMY; 3526 dummyPage.wired_count = 0; 3527 3528 vm_cache_ref *pageSourceRef; 3529 vm_page *page = fault_get_page(map, topCacheRef, cacheOffset, isWrite, 3530 dummyPage, &pageSourceRef); 3531 3532 status_t status = B_OK; 3533 3534 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3535 if (changeCount != addressSpace->change_count) { 3536 // something may have changed, see if the address is still valid 3537 area = vm_area_lookup(addressSpace, address); 3538 if (area == NULL 3539 || area->cache_ref != topCacheRef 3540 || (address - area->base + area->cache_offset) != cacheOffset) { 3541 dprintf("vm_soft_fault: address space layout changed effecting ongoing soft fault\n"); 3542 status = B_BAD_ADDRESS; 3543 } 3544 } 3545 3546 if (status == B_OK) { 3547 // All went fine, all there is left to do is to map the page into the address space 3548 3549 // In case this is a copy-on-write page, we need to unmap it from the area now 3550 if (isWrite && page->cache == topCacheRef->cache) 3551 vm_unmap_pages(area, address, B_PAGE_SIZE); 3552 3553 // TODO: there is currently no mechanism to prevent a page being mapped 3554 // more than once in case of a second page fault! 3555 3556 // If the page doesn't reside in the area's cache, we need to make sure it's 3557 // mapped in read-only, so that we cannot overwrite someone else's data (copy-on-write) 3558 uint32 newProtection = area->protection; 3559 if (page->cache != topCacheRef->cache && !isWrite) 3560 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 3561 3562 vm_map_page(area, page, address, newProtection); 3563 } 3564 3565 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3566 3567 mutex_unlock(&pageSourceRef->lock); 3568 vm_cache_release_ref(pageSourceRef); 3569 3570 if (dummyPage.state == PAGE_STATE_BUSY) { 3571 // We still have the dummy page in the cache - that happens if we didn't need 3572 // to allocate a new page before, but could use one in another cache 3573 fault_remove_dummy_page(dummyPage, false); 3574 } 3575 3576 vm_cache_release_ref(topCacheRef); 3577 vm_put_address_space(addressSpace); 3578 3579 return status; 3580 } 3581 3582 3583 /*! You must have the address space's sem held */ 3584 vm_area * 3585 vm_area_lookup(vm_address_space *addressSpace, addr_t address) 3586 { 3587 vm_area *area; 3588 3589 // check the areas list first 3590 area = addressSpace->area_hint; 3591 if (area && area->base <= address && area->base + (area->size - 1) >= address) 3592 goto found; 3593 3594 for (area = addressSpace->areas; area != NULL; area = area->address_space_next) { 3595 if (area->id == RESERVED_AREA_ID) 3596 continue; 3597 3598 if (area->base <= address && area->base + (area->size - 1) >= address) 3599 break; 3600 } 3601 3602 found: 3603 // if the ref count is zero, the area is in the middle of being 3604 // destroyed in _vm_put_area. pretend it doesn't exist. 3605 if (area && area->ref_count == 0) 3606 return NULL; 3607 3608 if (area) 3609 addressSpace->area_hint = area; 3610 3611 return area; 3612 } 3613 3614 3615 status_t 3616 vm_get_physical_page(addr_t paddr, addr_t *_vaddr, uint32 flags) 3617 { 3618 return (*vm_kernel_address_space()->translation_map.ops->get_physical_page)(paddr, _vaddr, flags); 3619 } 3620 3621 3622 status_t 3623 vm_put_physical_page(addr_t vaddr) 3624 { 3625 return (*vm_kernel_address_space()->translation_map.ops->put_physical_page)(vaddr); 3626 } 3627 3628 3629 void 3630 vm_unreserve_memory(size_t amount) 3631 { 3632 benaphore_lock(&sAvailableMemoryLock); 3633 3634 sAvailableMemory += amount; 3635 3636 benaphore_unlock(&sAvailableMemoryLock); 3637 } 3638 3639 3640 status_t 3641 vm_try_reserve_memory(size_t amount) 3642 { 3643 status_t status; 3644 benaphore_lock(&sAvailableMemoryLock); 3645 3646 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 3647 3648 if (sAvailableMemory > amount) { 3649 sAvailableMemory -= amount; 3650 status = B_OK; 3651 } else 3652 status = B_NO_MEMORY; 3653 3654 benaphore_unlock(&sAvailableMemoryLock); 3655 return status; 3656 } 3657 3658 3659 status_t 3660 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type) 3661 { 3662 vm_area *area = vm_get_area(id); 3663 if (area == NULL) 3664 return B_BAD_VALUE; 3665 3666 status_t status = arch_vm_set_memory_type(area, physicalBase, type); 3667 3668 vm_put_area(area); 3669 return status; 3670 } 3671 3672 3673 /** This function enforces some protection properties: 3674 * - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well 3675 * - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 3676 * - if no protection is specified, it defaults to B_KERNEL_READ_AREA 3677 * and B_KERNEL_WRITE_AREA. 3678 */ 3679 3680 static void 3681 fix_protection(uint32 *protection) 3682 { 3683 if ((*protection & B_KERNEL_PROTECTION) == 0) { 3684 if ((*protection & B_USER_PROTECTION) == 0 3685 || (*protection & B_WRITE_AREA) != 0) 3686 *protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 3687 else 3688 *protection |= B_KERNEL_READ_AREA; 3689 } 3690 } 3691 3692 3693 static void 3694 fill_area_info(struct vm_area *area, area_info *info, size_t size) 3695 { 3696 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 3697 info->area = area->id; 3698 info->address = (void *)area->base; 3699 info->size = area->size; 3700 info->protection = area->protection; 3701 info->lock = B_FULL_LOCK; 3702 info->team = area->address_space->id; 3703 info->copy_count = 0; 3704 info->in_count = 0; 3705 info->out_count = 0; 3706 // ToDo: retrieve real values here! 3707 3708 mutex_lock(&area->cache_ref->lock); 3709 3710 // Note, this is a simplification; the cache could be larger than this area 3711 info->ram_size = area->cache_ref->cache->page_count * B_PAGE_SIZE; 3712 3713 mutex_unlock(&area->cache_ref->lock); 3714 } 3715 3716 3717 /*! 3718 Tests wether or not the area that contains the specified address 3719 needs any kind of locking, and actually exists. 3720 Used by both lock_memory() and unlock_memory(). 3721 */ 3722 status_t 3723 test_lock_memory(vm_address_space *addressSpace, addr_t address, 3724 bool &needsLocking) 3725 { 3726 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 3727 3728 vm_area *area = vm_area_lookup(addressSpace, address); 3729 if (area != NULL) { 3730 // This determines if we need to lock the memory at all 3731 needsLocking = area->cache_type != CACHE_TYPE_NULL 3732 && area->cache_type != CACHE_TYPE_DEVICE 3733 && area->wiring != B_FULL_LOCK 3734 && area->wiring != B_CONTIGUOUS; 3735 } 3736 3737 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 3738 3739 if (area == NULL) 3740 return B_BAD_ADDRESS; 3741 3742 return B_OK; 3743 } 3744 3745 3746 // #pragma mark - 3747 3748 3749 status_t 3750 user_memcpy(void *to, const void *from, size_t size) 3751 { 3752 if (arch_cpu_user_memcpy(to, from, size, &thread_get_current_thread()->fault_handler) < B_OK) 3753 return B_BAD_ADDRESS; 3754 return B_OK; 3755 } 3756 3757 3758 /** \brief Copies at most (\a size - 1) characters from the string in \a from to 3759 * the string in \a to, NULL-terminating the result. 3760 * 3761 * \param to Pointer to the destination C-string. 3762 * \param from Pointer to the source C-string. 3763 * \param size Size in bytes of the string buffer pointed to by \a to. 3764 * 3765 * \return strlen(\a from). 3766 */ 3767 3768 ssize_t 3769 user_strlcpy(char *to, const char *from, size_t size) 3770 { 3771 return arch_cpu_user_strlcpy(to, from, size, &thread_get_current_thread()->fault_handler); 3772 } 3773 3774 3775 status_t 3776 user_memset(void *s, char c, size_t count) 3777 { 3778 if (arch_cpu_user_memset(s, c, count, &thread_get_current_thread()->fault_handler) < B_OK) 3779 return B_BAD_ADDRESS; 3780 return B_OK; 3781 } 3782 3783 // #pragma mark - kernel public API 3784 3785 3786 long 3787 lock_memory(void *address, ulong numBytes, ulong flags) 3788 { 3789 vm_address_space *addressSpace = NULL; 3790 struct vm_translation_map *map; 3791 addr_t unalignedBase = (addr_t)address; 3792 addr_t end = unalignedBase + numBytes; 3793 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 3794 bool isUser = IS_USER_ADDRESS(address); 3795 bool needsLocking = true; 3796 3797 if (isUser) 3798 addressSpace = vm_get_current_user_address_space(); 3799 else 3800 addressSpace = vm_get_kernel_address_space(); 3801 if (addressSpace == NULL) 3802 return B_ERROR; 3803 3804 // test if we're on an area that allows faults at all 3805 3806 map = &addressSpace->translation_map; 3807 3808 status_t status = test_lock_memory(addressSpace, base, needsLocking); 3809 if (status < B_OK) 3810 goto out; 3811 if (!needsLocking) 3812 goto out; 3813 3814 for (; base < end; base += B_PAGE_SIZE) { 3815 addr_t physicalAddress; 3816 uint32 protection; 3817 status_t status; 3818 3819 map->ops->lock(map); 3820 status = map->ops->query(map, base, &physicalAddress, &protection); 3821 map->ops->unlock(map); 3822 3823 if (status < B_OK) 3824 goto out; 3825 3826 if ((protection & PAGE_PRESENT) != 0) { 3827 // if B_READ_DEVICE is set, the caller intents to write to the locked 3828 // memory, so if it hasn't been mapped writable, we'll try the soft 3829 // fault anyway 3830 if ((flags & B_READ_DEVICE) == 0 3831 || (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) { 3832 // update wiring 3833 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3834 if (page == NULL) 3835 panic("couldn't lookup physical page just allocated\n"); 3836 3837 page->wired_count++; 3838 // TODO: needs to be atomic on all platforms! 3839 continue; 3840 } 3841 } 3842 3843 status = vm_soft_fault(base, (flags & B_READ_DEVICE) != 0, isUser); 3844 if (status != B_OK) { 3845 dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) failed: %s\n", 3846 (void *)unalignedBase, numBytes, flags, strerror(status)); 3847 goto out; 3848 } 3849 3850 map->ops->lock(map); 3851 status = map->ops->query(map, base, &physicalAddress, &protection); 3852 map->ops->unlock(map); 3853 3854 if (status < B_OK) 3855 goto out; 3856 3857 // update wiring 3858 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3859 if (page == NULL) 3860 panic("couldn't lookup physical page"); 3861 3862 page->wired_count++; 3863 // TODO: needs to be atomic on all platforms! 3864 } 3865 3866 out: 3867 vm_put_address_space(addressSpace); 3868 return status; 3869 } 3870 3871 3872 long 3873 unlock_memory(void *address, ulong numBytes, ulong flags) 3874 { 3875 vm_address_space *addressSpace = NULL; 3876 struct vm_translation_map *map; 3877 addr_t unalignedBase = (addr_t)address; 3878 addr_t end = unalignedBase + numBytes; 3879 addr_t base = ROUNDOWN(unalignedBase, B_PAGE_SIZE); 3880 bool needsLocking = true; 3881 3882 if (IS_USER_ADDRESS(address)) 3883 addressSpace = vm_get_current_user_address_space(); 3884 else 3885 addressSpace = vm_get_kernel_address_space(); 3886 if (addressSpace == NULL) 3887 return B_ERROR; 3888 3889 map = &addressSpace->translation_map; 3890 3891 status_t status = test_lock_memory(addressSpace, base, needsLocking); 3892 if (status < B_OK) 3893 goto out; 3894 if (!needsLocking) 3895 goto out; 3896 3897 for (; base < end; base += B_PAGE_SIZE) { 3898 map->ops->lock(map); 3899 3900 addr_t physicalAddress; 3901 uint32 protection; 3902 status = map->ops->query(map, base, &physicalAddress, 3903 &protection); 3904 3905 map->ops->unlock(map); 3906 3907 if (status < B_OK) 3908 goto out; 3909 if ((protection & PAGE_PRESENT) == 0) 3910 panic("calling unlock_memory() on unmapped memory!"); 3911 3912 // update wiring 3913 vm_page *page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3914 if (page == NULL) 3915 panic("couldn't lookup physical page"); 3916 3917 page->wired_count--; 3918 // TODO: needs to be atomic on all platforms! 3919 } 3920 3921 out: 3922 vm_put_address_space(addressSpace); 3923 return status; 3924 } 3925 3926 3927 /** According to the BeBook, this function should always succeed. 3928 * This is no longer the case. 3929 */ 3930 3931 long 3932 get_memory_map(const void *address, ulong numBytes, physical_entry *table, long numEntries) 3933 { 3934 vm_address_space *addressSpace; 3935 addr_t virtualAddress = (addr_t)address; 3936 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 3937 addr_t physicalAddress; 3938 status_t status = B_OK; 3939 int32 index = -1; 3940 addr_t offset = 0; 3941 bool interrupts = are_interrupts_enabled(); 3942 3943 TRACE(("get_memory_map(%p, %lu bytes, %ld entries)\n", address, numBytes, numEntries)); 3944 3945 if (numEntries == 0 || numBytes == 0) 3946 return B_BAD_VALUE; 3947 3948 // in which address space is the address to be found? 3949 if (IS_USER_ADDRESS(virtualAddress)) 3950 addressSpace = vm_get_current_user_address_space(); 3951 else 3952 addressSpace = vm_get_kernel_address_space(); 3953 3954 if (addressSpace == NULL) 3955 return B_ERROR; 3956 3957 vm_translation_map *map = &addressSpace->translation_map; 3958 3959 if (interrupts) 3960 map->ops->lock(map); 3961 3962 while (offset < numBytes) { 3963 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 3964 uint32 flags; 3965 3966 if (interrupts) { 3967 status = map->ops->query(map, (addr_t)address + offset, 3968 &physicalAddress, &flags); 3969 } else { 3970 status = map->ops->query_interrupt(map, (addr_t)address + offset, 3971 &physicalAddress, &flags); 3972 } 3973 if (status < B_OK) 3974 break; 3975 if ((flags & PAGE_PRESENT) == 0) { 3976 panic("get_memory_map() called on unmapped memory!"); 3977 return B_BAD_ADDRESS; 3978 } 3979 3980 if (index < 0 && pageOffset > 0) { 3981 physicalAddress += pageOffset; 3982 if (bytes > B_PAGE_SIZE - pageOffset) 3983 bytes = B_PAGE_SIZE - pageOffset; 3984 } 3985 3986 // need to switch to the next physical_entry? 3987 if (index < 0 || (addr_t)table[index].address 3988 != physicalAddress - table[index].size) { 3989 if (++index + 1 > numEntries) { 3990 // table to small 3991 status = B_BUFFER_OVERFLOW; 3992 break; 3993 } 3994 table[index].address = (void *)physicalAddress; 3995 table[index].size = bytes; 3996 } else { 3997 // page does fit in current entry 3998 table[index].size += bytes; 3999 } 4000 4001 offset += bytes; 4002 } 4003 4004 if (interrupts) 4005 map->ops->unlock(map); 4006 4007 // close the entry list 4008 4009 if (status == B_OK) { 4010 // if it's only one entry, we will silently accept the missing ending 4011 if (numEntries == 1) 4012 return B_OK; 4013 4014 if (++index + 1 > numEntries) 4015 return B_BUFFER_OVERFLOW; 4016 4017 table[index].address = NULL; 4018 table[index].size = 0; 4019 } 4020 4021 return status; 4022 } 4023 4024 4025 area_id 4026 area_for(void *address) 4027 { 4028 return vm_area_for(vm_kernel_address_space_id(), (addr_t)address); 4029 } 4030 4031 4032 area_id 4033 find_area(const char *name) 4034 { 4035 acquire_sem_etc(sAreaHashLock, READ_COUNT, 0, 0); 4036 struct hash_iterator iterator; 4037 hash_open(sAreaHash, &iterator); 4038 4039 vm_area *area; 4040 area_id id = B_NAME_NOT_FOUND; 4041 while ((area = (vm_area *)hash_next(sAreaHash, &iterator)) != NULL) { 4042 if (area->id == RESERVED_AREA_ID) 4043 continue; 4044 4045 if (!strcmp(area->name, name)) { 4046 id = area->id; 4047 break; 4048 } 4049 } 4050 4051 hash_close(sAreaHash, &iterator, false); 4052 release_sem_etc(sAreaHashLock, READ_COUNT, 0); 4053 4054 return id; 4055 } 4056 4057 4058 status_t 4059 _get_area_info(area_id id, area_info *info, size_t size) 4060 { 4061 if (size != sizeof(area_info) || info == NULL) 4062 return B_BAD_VALUE; 4063 4064 vm_area *area = vm_get_area(id); 4065 if (area == NULL) 4066 return B_BAD_VALUE; 4067 4068 fill_area_info(area, info, size); 4069 vm_put_area(area); 4070 4071 return B_OK; 4072 } 4073 4074 4075 status_t 4076 _get_next_area_info(team_id team, int32 *cookie, area_info *info, size_t size) 4077 { 4078 addr_t nextBase = *(addr_t *)cookie; 4079 4080 // we're already through the list 4081 if (nextBase == (addr_t)-1) 4082 return B_ENTRY_NOT_FOUND; 4083 4084 if (team == B_CURRENT_TEAM) 4085 team = team_get_current_team_id(); 4086 4087 vm_address_space *addressSpace; 4088 if (!team_is_valid(team) 4089 || team_get_address_space(team, &addressSpace) != B_OK) 4090 return B_BAD_VALUE; 4091 4092 acquire_sem_etc(addressSpace->sem, READ_COUNT, 0, 0); 4093 4094 vm_area *area; 4095 for (area = addressSpace->areas; area; area = area->address_space_next) { 4096 if (area->id == RESERVED_AREA_ID) 4097 continue; 4098 4099 if (area->base > nextBase) 4100 break; 4101 } 4102 4103 // make sure this area won't go away 4104 if (area != NULL) 4105 area = vm_get_area(area->id); 4106 4107 release_sem_etc(addressSpace->sem, READ_COUNT, 0); 4108 vm_put_address_space(addressSpace); 4109 4110 if (area == NULL) { 4111 nextBase = (addr_t)-1; 4112 return B_ENTRY_NOT_FOUND; 4113 } 4114 4115 fill_area_info(area, info, size); 4116 *cookie = (int32)(area->base); 4117 4118 vm_put_area(area); 4119 4120 return B_OK; 4121 } 4122 4123 4124 status_t 4125 set_area_protection(area_id area, uint32 newProtection) 4126 { 4127 fix_protection(&newProtection); 4128 4129 return vm_set_area_protection(vm_kernel_address_space_id(), area, newProtection); 4130 } 4131 4132 4133 status_t 4134 resize_area(area_id areaID, size_t newSize) 4135 { 4136 vm_area *current; 4137 4138 // is newSize a multiple of B_PAGE_SIZE? 4139 if (newSize & (B_PAGE_SIZE - 1)) 4140 return B_BAD_VALUE; 4141 4142 vm_area *area = vm_get_area(areaID); 4143 if (area == NULL) 4144 return B_BAD_VALUE; 4145 4146 vm_cache_ref *cacheRef = area->cache_ref; 4147 mutex_lock(&cacheRef->lock); 4148 4149 // Resize all areas of this area's cache 4150 4151 size_t oldSize = area->size; 4152 status_t status = B_OK; 4153 4154 // ToDo: we should only allow to resize anonymous memory areas! 4155 if (!cacheRef->cache->temporary) { 4156 status = B_NOT_ALLOWED; 4157 goto out; 4158 } 4159 4160 // ToDo: we must lock all address spaces here! 4161 if (oldSize < newSize) { 4162 // We need to check if all areas of this cache can be resized 4163 4164 for (current = cacheRef->areas; current; current = current->cache_next) { 4165 if (current->address_space_next 4166 && current->address_space_next->base <= (current->base 4167 + newSize)) { 4168 // if the area was created inside a reserved area, it can also be 4169 // resized in that area 4170 // ToDo: if there is free space after the reserved area, it could be used as well... 4171 vm_area *next = current->address_space_next; 4172 if (next->id == RESERVED_AREA_ID 4173 && next->cache_offset <= current->base 4174 && next->base - 1 + next->size >= current->base - 1 + newSize) 4175 continue; 4176 4177 status = B_ERROR; 4178 goto out; 4179 } 4180 } 4181 } 4182 4183 // Okay, looks good so far, so let's do it 4184 4185 for (current = cacheRef->areas; current; current = current->cache_next) { 4186 if (current->address_space_next 4187 && current->address_space_next->base <= (current->base + newSize)) { 4188 vm_area *next = current->address_space_next; 4189 if (next->id == RESERVED_AREA_ID 4190 && next->cache_offset <= current->base 4191 && next->base - 1 + next->size >= current->base - 1 + newSize) { 4192 // resize reserved area 4193 addr_t offset = current->base + newSize - next->base; 4194 if (next->size <= offset) { 4195 current->address_space_next = next->address_space_next; 4196 free(next); 4197 } else { 4198 next->size -= offset; 4199 next->base += offset; 4200 } 4201 } else { 4202 status = B_ERROR; 4203 break; 4204 } 4205 } 4206 4207 current->size = newSize; 4208 4209 // we also need to unmap all pages beyond the new size, if the area has shrinked 4210 if (newSize < oldSize) 4211 vm_unmap_pages(current, current->base + newSize, oldSize - newSize); 4212 } 4213 4214 if (status == B_OK) 4215 status = vm_cache_resize(cacheRef, newSize); 4216 4217 if (status < B_OK) { 4218 // This shouldn't really be possible, but hey, who knows 4219 for (current = cacheRef->areas; current; current = current->cache_next) 4220 current->size = oldSize; 4221 } 4222 4223 out: 4224 mutex_unlock(&cacheRef->lock); 4225 vm_put_area(area); 4226 4227 // ToDo: we must honour the lock restrictions of this area 4228 return status; 4229 } 4230 4231 4232 /** Transfers the specified area to a new team. The caller must be the owner 4233 * of the area (not yet enforced but probably should be). 4234 * This function is currently not exported to the kernel namespace, but is 4235 * only accessible using the _kern_transfer_area() syscall. 4236 */ 4237 4238 static status_t 4239 transfer_area(area_id id, void **_address, uint32 addressSpec, team_id target) 4240 { 4241 vm_address_space *sourceAddressSpace; 4242 vm_address_space *targetAddressSpace; 4243 void *reservedAddress = NULL; 4244 vm_area *reserved; 4245 vm_area *area = vm_get_area(id); 4246 if (area == NULL) 4247 return B_BAD_VALUE; 4248 4249 // ToDo: check if the current team owns the area 4250 status_t status = team_get_address_space(target, &targetAddressSpace); 4251 if (status != B_OK) 4252 goto err1; 4253 4254 // We will first remove the area, and then reserve its former 4255 // address range so that we can later reclaim it if the 4256 // transfer failed. 4257 4258 sourceAddressSpace = area->address_space; 4259 reserved = create_reserved_area_struct(sourceAddressSpace, 0); 4260 if (reserved == NULL) { 4261 status = B_NO_MEMORY; 4262 goto err2; 4263 } 4264 4265 acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0); 4266 4267 // unmap the area in the source address space 4268 vm_unmap_pages(area, area->base, area->size); 4269 4270 // TODO: there might be additional page faults at this point! 4271 4272 reservedAddress = (void *)area->base; 4273 remove_area_from_address_space(sourceAddressSpace, area, true); 4274 status = insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, 4275 area->size, reserved); 4276 // famous last words: this cannot fail :) 4277 4278 release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0); 4279 4280 if (status != B_OK) 4281 goto err3; 4282 4283 // insert the area into the target address space 4284 4285 acquire_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0, 0); 4286 // check to see if this address space has entered DELETE state 4287 if (targetAddressSpace->state == VM_ASPACE_STATE_DELETION) { 4288 // okay, someone is trying to delete this adress space now, so we can't 4289 // insert the area, so back out 4290 status = B_BAD_TEAM_ID; 4291 goto err4; 4292 } 4293 4294 status = insert_area(targetAddressSpace, _address, addressSpec, area->size, area); 4295 if (status < B_OK) 4296 goto err4; 4297 4298 // The area was successfully transferred to the new team when we got here 4299 area->address_space = targetAddressSpace; 4300 4301 // TODO: take area lock/wiring into account! 4302 4303 release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0); 4304 4305 vm_unreserve_address_range(sourceAddressSpace->id, reservedAddress, area->size); 4306 vm_put_address_space(sourceAddressSpace); 4307 // we keep the reference of the target address space for the 4308 // area, so we only have to put the one from the source 4309 vm_put_area(area); 4310 4311 return B_OK; 4312 4313 err4: 4314 release_sem_etc(targetAddressSpace->sem, WRITE_COUNT, 0); 4315 err3: 4316 // insert the area again into the source address space 4317 acquire_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0, 0); 4318 // check to see if this address space has entered DELETE state 4319 if (sourceAddressSpace->state == VM_ASPACE_STATE_DELETION 4320 || insert_area(sourceAddressSpace, &reservedAddress, B_EXACT_ADDRESS, area->size, area) != B_OK) { 4321 // We can't insert the area anymore - we have to delete it manually 4322 vm_cache_remove_area(area->cache_ref, area); 4323 vm_cache_release_ref(area->cache_ref); 4324 free(area->name); 4325 free(area); 4326 area = NULL; 4327 } 4328 release_sem_etc(sourceAddressSpace->sem, WRITE_COUNT, 0); 4329 err2: 4330 vm_put_address_space(targetAddressSpace); 4331 err1: 4332 if (area != NULL) 4333 vm_put_area(area); 4334 return status; 4335 } 4336 4337 4338 area_id 4339 map_physical_memory(const char *name, void *physicalAddress, size_t numBytes, 4340 uint32 addressSpec, uint32 protection, void **_virtualAddress) 4341 { 4342 if (!arch_vm_supports_protection(protection)) 4343 return B_NOT_SUPPORTED; 4344 4345 fix_protection(&protection); 4346 4347 return vm_map_physical_memory(vm_kernel_address_space_id(), name, _virtualAddress, 4348 addressSpec, numBytes, protection, (addr_t)physicalAddress); 4349 } 4350 4351 4352 area_id 4353 clone_area(const char *name, void **_address, uint32 addressSpec, uint32 protection, 4354 area_id source) 4355 { 4356 if ((protection & B_KERNEL_PROTECTION) == 0) 4357 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 4358 4359 return vm_clone_area(vm_kernel_address_space_id(), name, _address, addressSpec, 4360 protection, REGION_NO_PRIVATE_MAP, source); 4361 } 4362 4363 4364 area_id 4365 create_area_etc(struct team *team, const char *name, void **address, uint32 addressSpec, 4366 uint32 size, uint32 lock, uint32 protection) 4367 { 4368 fix_protection(&protection); 4369 4370 return vm_create_anonymous_area(team->id, (char *)name, address, 4371 addressSpec, size, lock, protection); 4372 } 4373 4374 4375 area_id 4376 create_area(const char *name, void **_address, uint32 addressSpec, size_t size, uint32 lock, 4377 uint32 protection) 4378 { 4379 fix_protection(&protection); 4380 4381 return vm_create_anonymous_area(vm_kernel_address_space_id(), (char *)name, _address, 4382 addressSpec, size, lock, protection); 4383 } 4384 4385 4386 status_t 4387 delete_area_etc(struct team *team, area_id area) 4388 { 4389 return vm_delete_area(team->id, area); 4390 } 4391 4392 4393 status_t 4394 delete_area(area_id area) 4395 { 4396 return vm_delete_area(vm_kernel_address_space_id(), area); 4397 } 4398 4399 4400 // #pragma mark - Userland syscalls 4401 4402 4403 status_t 4404 _user_reserve_heap_address_range(addr_t* userAddress, uint32 addressSpec, addr_t size) 4405 { 4406 // filter out some unavailable values (for userland) 4407 switch (addressSpec) { 4408 case B_ANY_KERNEL_ADDRESS: 4409 case B_ANY_KERNEL_BLOCK_ADDRESS: 4410 return B_BAD_VALUE; 4411 } 4412 4413 addr_t address; 4414 4415 if (!IS_USER_ADDRESS(userAddress) 4416 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4417 return B_BAD_ADDRESS; 4418 4419 status_t status = vm_reserve_address_range(vm_current_user_address_space_id(), 4420 (void **)&address, addressSpec, size, RESERVED_AVOID_BASE); 4421 if (status < B_OK) 4422 return status; 4423 4424 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 4425 vm_unreserve_address_range(vm_current_user_address_space_id(), 4426 (void *)address, size); 4427 return B_BAD_ADDRESS; 4428 } 4429 4430 return B_OK; 4431 } 4432 4433 4434 area_id 4435 _user_area_for(void *address) 4436 { 4437 return vm_area_for(vm_current_user_address_space_id(), (addr_t)address); 4438 } 4439 4440 4441 area_id 4442 _user_find_area(const char *userName) 4443 { 4444 char name[B_OS_NAME_LENGTH]; 4445 4446 if (!IS_USER_ADDRESS(userName) 4447 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 4448 return B_BAD_ADDRESS; 4449 4450 return find_area(name); 4451 } 4452 4453 4454 status_t 4455 _user_get_area_info(area_id area, area_info *userInfo) 4456 { 4457 if (!IS_USER_ADDRESS(userInfo)) 4458 return B_BAD_ADDRESS; 4459 4460 area_info info; 4461 status_t status = get_area_info(area, &info); 4462 if (status < B_OK) 4463 return status; 4464 4465 // TODO: do we want to prevent userland from seeing kernel protections? 4466 //info.protection &= B_USER_PROTECTION; 4467 4468 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 4469 return B_BAD_ADDRESS; 4470 4471 return status; 4472 } 4473 4474 4475 status_t 4476 _user_get_next_area_info(team_id team, int32 *userCookie, area_info *userInfo) 4477 { 4478 int32 cookie; 4479 4480 if (!IS_USER_ADDRESS(userCookie) 4481 || !IS_USER_ADDRESS(userInfo) 4482 || user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK) 4483 return B_BAD_ADDRESS; 4484 4485 area_info info; 4486 status_t status = _get_next_area_info(team, &cookie, &info, sizeof(area_info)); 4487 if (status != B_OK) 4488 return status; 4489 4490 //info.protection &= B_USER_PROTECTION; 4491 4492 if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK 4493 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 4494 return B_BAD_ADDRESS; 4495 4496 return status; 4497 } 4498 4499 4500 status_t 4501 _user_set_area_protection(area_id area, uint32 newProtection) 4502 { 4503 if ((newProtection & ~B_USER_PROTECTION) != 0) 4504 return B_BAD_VALUE; 4505 4506 fix_protection(&newProtection); 4507 4508 return vm_set_area_protection(vm_current_user_address_space_id(), area, 4509 newProtection); 4510 } 4511 4512 4513 status_t 4514 _user_resize_area(area_id area, size_t newSize) 4515 { 4516 // ToDo: Since we restrict deleting of areas to those owned by the team, 4517 // we should also do that for resizing (check other functions, too). 4518 return resize_area(area, newSize); 4519 } 4520 4521 4522 status_t 4523 _user_transfer_area(area_id area, void **userAddress, uint32 addressSpec, team_id target) 4524 { 4525 // filter out some unavailable values (for userland) 4526 switch (addressSpec) { 4527 case B_ANY_KERNEL_ADDRESS: 4528 case B_ANY_KERNEL_BLOCK_ADDRESS: 4529 return B_BAD_VALUE; 4530 } 4531 4532 void *address; 4533 if (!IS_USER_ADDRESS(userAddress) 4534 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4535 return B_BAD_ADDRESS; 4536 4537 status_t status = transfer_area(area, &address, addressSpec, target); 4538 if (status < B_OK) 4539 return status; 4540 4541 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 4542 return B_BAD_ADDRESS; 4543 4544 return status; 4545 } 4546 4547 4548 area_id 4549 _user_clone_area(const char *userName, void **userAddress, uint32 addressSpec, 4550 uint32 protection, area_id sourceArea) 4551 { 4552 char name[B_OS_NAME_LENGTH]; 4553 void *address; 4554 4555 // filter out some unavailable values (for userland) 4556 switch (addressSpec) { 4557 case B_ANY_KERNEL_ADDRESS: 4558 case B_ANY_KERNEL_BLOCK_ADDRESS: 4559 return B_BAD_VALUE; 4560 } 4561 if ((protection & ~B_USER_PROTECTION) != 0) 4562 return B_BAD_VALUE; 4563 4564 if (!IS_USER_ADDRESS(userName) 4565 || !IS_USER_ADDRESS(userAddress) 4566 || user_strlcpy(name, userName, sizeof(name)) < B_OK 4567 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4568 return B_BAD_ADDRESS; 4569 4570 fix_protection(&protection); 4571 4572 area_id clonedArea = vm_clone_area(vm_current_user_address_space_id(), name, &address, 4573 addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea); 4574 if (clonedArea < B_OK) 4575 return clonedArea; 4576 4577 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 4578 delete_area(clonedArea); 4579 return B_BAD_ADDRESS; 4580 } 4581 4582 return clonedArea; 4583 } 4584 4585 4586 area_id 4587 _user_create_area(const char *userName, void **userAddress, uint32 addressSpec, 4588 size_t size, uint32 lock, uint32 protection) 4589 { 4590 char name[B_OS_NAME_LENGTH]; 4591 void *address; 4592 4593 // filter out some unavailable values (for userland) 4594 switch (addressSpec) { 4595 case B_ANY_KERNEL_ADDRESS: 4596 case B_ANY_KERNEL_BLOCK_ADDRESS: 4597 return B_BAD_VALUE; 4598 } 4599 if ((protection & ~B_USER_PROTECTION) != 0) 4600 return B_BAD_VALUE; 4601 4602 if (!IS_USER_ADDRESS(userName) 4603 || !IS_USER_ADDRESS(userAddress) 4604 || user_strlcpy(name, userName, sizeof(name)) < B_OK 4605 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 4606 return B_BAD_ADDRESS; 4607 4608 if (addressSpec == B_EXACT_ADDRESS 4609 && IS_KERNEL_ADDRESS(address)) 4610 return B_BAD_VALUE; 4611 4612 fix_protection(&protection); 4613 4614 area_id area = vm_create_anonymous_area(vm_current_user_address_space_id(), 4615 (char *)name, &address, addressSpec, size, lock, protection); 4616 4617 if (area >= B_OK && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 4618 delete_area(area); 4619 return B_BAD_ADDRESS; 4620 } 4621 4622 return area; 4623 } 4624 4625 4626 status_t 4627 _user_delete_area(area_id area) 4628 { 4629 // Unlike the BeOS implementation, you can now only delete areas 4630 // that you have created yourself from userland. 4631 // The documentation to delete_area() explicetly states that this 4632 // will be restricted in the future, and so it will. 4633 return vm_delete_area(vm_current_user_address_space_id(), area); 4634 } 4635 4636