1 /* 2 * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2008, Jérôme Duval. 4 * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de. 5 * Distributed under the terms of the MIT License. 6 * 7 * Copyright 2001, Travis Geiselbrecht. All rights reserved. 8 * Distributed under the terms of the NewOS License. 9 */ 10 11 12 #include <stdlib.h> 13 #include <string.h> 14 15 #include <KernelExport.h> 16 17 #include <smp.h> 18 #include <util/AutoLock.h> 19 #include <vm/vm.h> 20 #include <vm/vm_page.h> 21 #include <vm/vm_priv.h> 22 #include <vm/VMAddressSpace.h> 23 #include <vm/VMArea.h> 24 25 #include <arch/vm.h> 26 #include <arch/int.h> 27 #include <arch/cpu.h> 28 29 #include <arch/x86/bios.h> 30 31 #include "x86_paging.h" 32 33 34 //#define TRACE_ARCH_VM 35 #ifdef TRACE_ARCH_VM 36 # define TRACE(x) dprintf x 37 #else 38 # define TRACE(x) ; 39 #endif 40 41 #define TRACE_MTRR_ARCH_VM 42 #ifdef TRACE_MTRR_ARCH_VM 43 # define TRACE_MTRR(x...) dprintf(x) 44 #else 45 # define TRACE_MTRR(x...) 46 #endif 47 48 49 static const uint32 kMaxMemoryTypeRanges = 32; 50 static const uint32 kMaxMemoryTypeRegisters = 32; 51 static const uint64 kMinMemoryTypeRangeSize = 1 << 12; 52 53 54 struct memory_type_range_analysis_info { 55 uint64 size; 56 uint32 rangesNeeded; 57 uint32 subtractiveRangesNeeded; 58 uint64 bestSubtractiveRange; 59 }; 60 61 struct memory_type_range_analysis { 62 uint64 base; 63 uint64 size; 64 uint32 type; 65 uint32 rangesNeeded; 66 uint64 endRange; 67 memory_type_range_analysis_info left; 68 memory_type_range_analysis_info right; 69 }; 70 71 struct memory_type_range { 72 uint64 base; 73 uint64 size; 74 uint32 type; 75 area_id area; 76 }; 77 78 79 void *gDmaAddress; 80 81 static memory_type_range sMemoryTypeRanges[kMaxMemoryTypeRanges]; 82 static uint32 sMemoryTypeRangeCount; 83 84 static memory_type_range_analysis sMemoryTypeRangeAnalysis[ 85 kMaxMemoryTypeRanges]; 86 87 static x86_mtrr_info sMemoryTypeRegisters[kMaxMemoryTypeRegisters]; 88 static uint32 sMemoryTypeRegisterCount; 89 static uint32 sMemoryTypeRegistersUsed; 90 91 static mutex sMemoryTypeLock = MUTEX_INITIALIZER("memory type ranges"); 92 93 94 static void 95 set_mtrrs() 96 { 97 x86_set_mtrrs(sMemoryTypeRegisters, sMemoryTypeRegistersUsed); 98 99 #ifdef TRACE_MTRR_ARCH_VM 100 TRACE_MTRR("set MTRRs to:\n"); 101 for (uint32 i = 0; i < sMemoryTypeRegistersUsed; i++) { 102 const x86_mtrr_info& info = sMemoryTypeRegisters[i]; 103 TRACE_MTRR(" mtrr: %2lu: base: %#9llx, size: %#9llx, type: %u\n", 104 i, info.base, info.size, info.type); 105 } 106 #endif 107 } 108 109 110 static void 111 add_used_mtrr(uint64 base, uint64 size, uint32 type) 112 { 113 ASSERT(sMemoryTypeRegistersUsed < sMemoryTypeRegisterCount); 114 115 x86_mtrr_info& info = sMemoryTypeRegisters[sMemoryTypeRegistersUsed++]; 116 info.base = base; 117 info.size = size; 118 info.type = type; 119 } 120 121 122 static void 123 analyze_range(memory_type_range_analysis& analysis, uint64 previousEnd, 124 uint64 nextBase) 125 { 126 uint64 base = analysis.base; 127 uint64 size = analysis.size; 128 129 memory_type_range_analysis_info& left = analysis.left; 130 memory_type_range_analysis_info& right = analysis.right; 131 132 uint32 leftSubtractiveRangesNeeded = 2; 133 int32 leftBestSubtractiveRangeDifference = 0; 134 uint32 leftBestSubtractivePositiveRangesNeeded = 0; 135 uint32 leftBestSubtractiveRangesNeeded = 0; 136 137 uint32 rightSubtractiveRangesNeeded = 2; 138 int32 rightBestSubtractiveRangeDifference = 0; 139 uint32 rightBestSubtractivePositiveRangesNeeded = 0; 140 uint32 rightBestSubtractiveRangesNeeded = 0; 141 142 uint64 range = kMinMemoryTypeRangeSize; 143 144 while (size > 0) { 145 if ((base & range) != 0) { 146 left.rangesNeeded++; 147 148 bool replaceBestSubtractive = false; 149 int32 rangeDifference = (int32)left.rangesNeeded 150 - (int32)leftSubtractiveRangesNeeded; 151 if (left.bestSubtractiveRange == 0 152 || leftBestSubtractiveRangeDifference < rangeDifference) { 153 // check for intersection with previous range 154 replaceBestSubtractive 155 = previousEnd == 0 || base - range >= previousEnd; 156 } 157 158 if (replaceBestSubtractive) { 159 leftBestSubtractiveRangeDifference = rangeDifference; 160 leftBestSubtractiveRangesNeeded 161 = leftSubtractiveRangesNeeded; 162 left.bestSubtractiveRange = range; 163 leftBestSubtractivePositiveRangesNeeded = 0; 164 } else 165 leftBestSubtractivePositiveRangesNeeded++; 166 167 left.size += range; 168 base += range; 169 size -= range; 170 } else if (left.bestSubtractiveRange > 0) 171 leftSubtractiveRangesNeeded++; 172 173 if ((size & range) != 0) { 174 right.rangesNeeded++; 175 176 bool replaceBestSubtractive = false; 177 int32 rangeDifference = (int32)right.rangesNeeded 178 - (int32)rightSubtractiveRangesNeeded; 179 if (right.bestSubtractiveRange == 0 180 || rightBestSubtractiveRangeDifference < rangeDifference) { 181 // check for intersection with previous range 182 replaceBestSubtractive 183 = nextBase == 0 || base + size + range <= nextBase; 184 } 185 186 if (replaceBestSubtractive) { 187 rightBestSubtractiveRangeDifference = rangeDifference; 188 rightBestSubtractiveRangesNeeded 189 = rightSubtractiveRangesNeeded; 190 right.bestSubtractiveRange = range; 191 rightBestSubtractivePositiveRangesNeeded = 0; 192 } else 193 rightBestSubtractivePositiveRangesNeeded++; 194 195 right.size += range; 196 size -= range; 197 } else if (right.bestSubtractiveRange > 0) 198 rightSubtractiveRangesNeeded++; 199 200 range <<= 1; 201 } 202 203 analysis.endRange = range; 204 205 // If a subtractive setup doesn't have any advantages, don't use it. 206 // Also compute analysis.rangesNeeded. 207 if (leftBestSubtractiveRangesNeeded 208 + leftBestSubtractivePositiveRangesNeeded >= left.rangesNeeded) { 209 left.bestSubtractiveRange = 0; 210 left.subtractiveRangesNeeded = 0; 211 analysis.rangesNeeded = left.rangesNeeded; 212 } else { 213 left.subtractiveRangesNeeded = leftBestSubtractiveRangesNeeded 214 + leftBestSubtractivePositiveRangesNeeded; 215 analysis.rangesNeeded = left.subtractiveRangesNeeded; 216 } 217 218 if (rightBestSubtractiveRangesNeeded 219 + rightBestSubtractivePositiveRangesNeeded >= right.rangesNeeded) { 220 right.bestSubtractiveRange = 0; 221 right.subtractiveRangesNeeded = 0; 222 analysis.rangesNeeded += right.rangesNeeded; 223 } else { 224 right.subtractiveRangesNeeded = rightBestSubtractiveRangesNeeded 225 + rightBestSubtractivePositiveRangesNeeded; 226 analysis.rangesNeeded += right.subtractiveRangesNeeded; 227 } 228 } 229 230 static void 231 compute_mtrrs(const memory_type_range_analysis& analysis) 232 { 233 const memory_type_range_analysis_info& left = analysis.left; 234 const memory_type_range_analysis_info& right = analysis.right; 235 236 // generate a setup for the left side 237 if (left.rangesNeeded > 0) { 238 uint64 base = analysis.base; 239 uint64 size = left.size; 240 uint64 range = analysis.endRange; 241 uint64 rangeEnd = base + size; 242 bool subtractive = false; 243 while (size > 0) { 244 if (range == left.bestSubtractiveRange) { 245 base = rangeEnd - 2 * range; 246 add_used_mtrr(base, range, analysis.type); 247 subtractive = true; 248 break; 249 } 250 251 if ((size & range) != 0) { 252 rangeEnd -= range; 253 add_used_mtrr(rangeEnd, range, analysis.type); 254 size -= range; 255 } 256 257 range >>= 1; 258 } 259 260 if (subtractive) { 261 uint64 shortestRange = range; 262 while (size > 0) { 263 if ((size & range) != 0) { 264 shortestRange = range; 265 size -= range; 266 } else { 267 add_used_mtrr(base, range, IA32_MTR_UNCACHED); 268 base += range; 269 } 270 271 range >>= 1; 272 } 273 274 add_used_mtrr(base, shortestRange, IA32_MTR_UNCACHED); 275 } 276 } 277 278 // generate a setup for the right side 279 if (right.rangesNeeded > 0) { 280 uint64 base = analysis.base + left.size; 281 uint64 size = right.size; 282 uint64 range = analysis.endRange; 283 bool subtractive = false; 284 while (size > 0) { 285 if (range == right.bestSubtractiveRange) { 286 add_used_mtrr(base, range * 2, analysis.type); 287 subtractive = true; 288 break; 289 } 290 291 if ((size & range) != 0) { 292 add_used_mtrr(base, range, analysis.type); 293 base += range; 294 size -= range; 295 } 296 297 range >>= 1; 298 } 299 300 if (subtractive) { 301 uint64 rangeEnd = base + range * 2; 302 uint64 shortestRange = range; 303 while (size > 0) { 304 if ((size & range) != 0) { 305 shortestRange = range; 306 size -= range; 307 } else { 308 rangeEnd -= range; 309 add_used_mtrr(rangeEnd, range, IA32_MTR_UNCACHED); 310 } 311 312 range >>= 1; 313 } 314 315 rangeEnd -= shortestRange; 316 add_used_mtrr(rangeEnd, shortestRange, IA32_MTR_UNCACHED); 317 } 318 } 319 } 320 321 322 static status_t 323 update_mttrs() 324 { 325 // Transfer the range array to the analysis array, dropping all uncachable 326 // ranges (that's the default anyway) and joining adjacent ranges with the 327 // same type. 328 memory_type_range_analysis* ranges = sMemoryTypeRangeAnalysis; 329 uint32 rangeCount = 0; 330 { 331 uint32 previousRangeType = IA32_MTR_UNCACHED; 332 uint64 previousRangeEnd = 0; 333 for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) { 334 if (sMemoryTypeRanges[i].type != IA32_MTR_UNCACHED) { 335 uint64 rangeEnd = sMemoryTypeRanges[i].base 336 + sMemoryTypeRanges[i].size; 337 if (previousRangeType == sMemoryTypeRanges[i].type 338 && previousRangeEnd >= sMemoryTypeRanges[i].base) { 339 // the range overlaps/continues the previous range -- just 340 // enlarge that one 341 if (rangeEnd > previousRangeEnd) 342 previousRangeEnd = rangeEnd; 343 ranges[rangeCount - 1].size = previousRangeEnd 344 - ranges[rangeCount - 1].base; 345 } else { 346 // add the new range 347 memset(&ranges[rangeCount], 0, sizeof(ranges[rangeCount])); 348 ranges[rangeCount].base = sMemoryTypeRanges[i].base; 349 ranges[rangeCount].size = sMemoryTypeRanges[i].size; 350 ranges[rangeCount].type = sMemoryTypeRanges[i].type; 351 previousRangeEnd = rangeEnd; 352 previousRangeType = sMemoryTypeRanges[i].type; 353 rangeCount++; 354 } 355 } 356 } 357 } 358 359 // analyze the ranges 360 uint32 registersNeeded = 0; 361 uint64 previousEnd = 0; 362 for (uint32 i = 0; i < rangeCount; i++) { 363 memory_type_range_analysis& range = ranges[i]; 364 uint64 nextBase = i + 1 < rangeCount ? ranges[i + 1].base : 0; 365 analyze_range(range, previousEnd, nextBase); 366 registersNeeded += range.rangesNeeded; 367 previousEnd = range.base + range.size; 368 } 369 370 // fail when we need more registers than we have 371 if (registersNeeded > sMemoryTypeRegisterCount) 372 return B_BUSY; 373 374 sMemoryTypeRegistersUsed = 0; 375 376 for (uint32 i = 0; i < rangeCount; i++) { 377 memory_type_range_analysis& range = ranges[i]; 378 compute_mtrrs(range); 379 } 380 381 set_mtrrs(); 382 383 return B_OK; 384 } 385 386 387 static void 388 remove_memory_type_range_locked(uint32 index) 389 { 390 sMemoryTypeRangeCount--; 391 if (index < sMemoryTypeRangeCount) { 392 memmove(sMemoryTypeRanges + index, sMemoryTypeRanges + index + 1, 393 (sMemoryTypeRangeCount - index) * sizeof(memory_type_range)); 394 } 395 } 396 397 398 static status_t 399 add_memory_type_range(area_id areaID, uint64 base, uint64 size, uint32 type) 400 { 401 // translate the type 402 if (type == 0) 403 return B_OK; 404 405 switch (type) { 406 case B_MTR_UC: 407 type = IA32_MTR_UNCACHED; 408 break; 409 case B_MTR_WC: 410 type = IA32_MTR_WRITE_COMBINING; 411 break; 412 case B_MTR_WT: 413 type = IA32_MTR_WRITE_THROUGH; 414 break; 415 case B_MTR_WP: 416 type = IA32_MTR_WRITE_PROTECTED; 417 break; 418 case B_MTR_WB: 419 type = IA32_MTR_WRITE_BACK; 420 break; 421 default: 422 return B_BAD_VALUE; 423 } 424 425 TRACE_MTRR("add_memory_type_range(%ld, %#llx, %#llx, %lu)\n", areaID, base, 426 size, type); 427 428 // base and size must at least be aligned to the minimum range size 429 if (((base | size) & (kMinMemoryTypeRangeSize - 1)) != 0) { 430 dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory base or " 431 "size not minimally aligned!\n", areaID, base, size, type); 432 return B_BAD_VALUE; 433 } 434 435 MutexLocker locker(sMemoryTypeLock); 436 437 if (sMemoryTypeRangeCount == kMaxMemoryTypeRanges) { 438 dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Out of " 439 "memory ranges!\n", areaID, base, size, type); 440 return B_BUSY; 441 } 442 443 // iterate through the existing ranges and check for clashes 444 bool foundInsertionIndex = false; 445 uint32 index = 0; 446 for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) { 447 const memory_type_range& range = sMemoryTypeRanges[i]; 448 if (range.base > base) { 449 if (range.base - base < size && range.type != type) { 450 dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory " 451 "range intersects with existing one (%#llx, %#llx, %lu).\n", 452 areaID, base, size, type, range.base, range.size, 453 range.type); 454 return B_BAD_VALUE; 455 } 456 457 // found the insertion index 458 if (!foundInsertionIndex) { 459 index = i; 460 foundInsertionIndex = true; 461 } 462 break; 463 } else if (base - range.base < range.size && range.type != type) { 464 dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory " 465 "range intersects with existing one (%#llx, %#llx, %lu).\n", 466 areaID, base, size, type, range.base, range.size, range.type); 467 return B_BAD_VALUE; 468 } 469 } 470 471 if (!foundInsertionIndex) 472 index = sMemoryTypeRangeCount; 473 474 // make room for the new range 475 if (index < sMemoryTypeRangeCount) { 476 memmove(sMemoryTypeRanges + index + 1, sMemoryTypeRanges + index, 477 (sMemoryTypeRangeCount - index) * sizeof(memory_type_range)); 478 } 479 sMemoryTypeRangeCount++; 480 481 memory_type_range& rangeInfo = sMemoryTypeRanges[index]; 482 rangeInfo.base = base; 483 rangeInfo.size = size; 484 rangeInfo.type = type; 485 rangeInfo.area = areaID; 486 487 uint64 range = kMinMemoryTypeRangeSize; 488 status_t error; 489 do { 490 error = update_mttrs(); 491 if (error == B_OK) { 492 if (rangeInfo.size != size) { 493 dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): " 494 "update_mtrrs() succeeded only with simplified range: " 495 "base: %#llx, size: %#llx\n", areaID, base, size, type, 496 rangeInfo.base, rangeInfo.size); 497 } 498 return B_OK; 499 } 500 501 // update_mttrs() failed -- try to simplify (i.e. shrink) the range 502 while (rangeInfo.size != 0) { 503 if ((rangeInfo.base & range) != 0) { 504 rangeInfo.base += range; 505 rangeInfo.size -= range; 506 // don't shift the range yet -- we might still have an 507 // unaligned size 508 break; 509 } 510 if ((rangeInfo.size & range) != 0) { 511 rangeInfo.size -= range; 512 range <<= 1; 513 break; 514 } 515 516 range <<= 1; 517 } 518 } while (rangeInfo.size > 0); 519 520 dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): update_mtrrs() " 521 "failed.\n", areaID, base, size, type); 522 remove_memory_type_range_locked(index); 523 return error; 524 } 525 526 527 static void 528 remove_memory_type_range(area_id areaID) 529 { 530 MutexLocker locker(sMemoryTypeLock); 531 532 for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) { 533 if (sMemoryTypeRanges[i].area == areaID) { 534 TRACE_MTRR("remove_memory_type_range(%ld, %#llx, %#llxd)\n", 535 areaID, sMemoryTypeRanges[i].base, sMemoryTypeRanges[i].size); 536 remove_memory_type_range_locked(i); 537 update_mttrs(); 538 // TODO: It's actually possible that this call fails, since 539 // compute_mtrrs() joins ranges and removing one might cause a 540 // previously joined big simple range to be split into several 541 // ranges (or just make it more complicated). 542 return; 543 } 544 } 545 } 546 547 548 // #pragma mark - 549 550 551 status_t 552 arch_vm_init(kernel_args *args) 553 { 554 TRACE(("arch_vm_init: entry\n")); 555 return 0; 556 } 557 558 559 /*! Marks DMA region as in-use, and maps it into the kernel space */ 560 status_t 561 arch_vm_init_post_area(kernel_args *args) 562 { 563 area_id id; 564 565 TRACE(("arch_vm_init_post_area: entry\n")); 566 567 // account for DMA area and mark the pages unusable 568 vm_mark_page_range_inuse(0x0, 0xa0000 / B_PAGE_SIZE); 569 570 // map 0 - 0xa0000 directly 571 id = map_physical_memory("dma_region", (void *)0x0, 0xa0000, 572 B_ANY_KERNEL_ADDRESS, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 573 &gDmaAddress); 574 if (id < 0) { 575 panic("arch_vm_init_post_area: unable to map dma region\n"); 576 return B_NO_MEMORY; 577 } 578 579 return bios_init(); 580 } 581 582 583 /*! Gets rid of all yet unmapped (and therefore now unused) page tables */ 584 status_t 585 arch_vm_init_end(kernel_args *args) 586 { 587 TRACE(("arch_vm_init_endvm: entry\n")); 588 589 // throw away anything in the kernel_args.pgtable[] that's not yet mapped 590 vm_free_unused_boot_loader_range(KERNEL_BASE, 591 0x400000 * args->arch_args.num_pgtables); 592 593 return B_OK; 594 } 595 596 597 status_t 598 arch_vm_init_post_modules(kernel_args *args) 599 { 600 // the x86 CPU modules are now accessible 601 602 sMemoryTypeRegisterCount = x86_count_mtrrs(); 603 if (sMemoryTypeRegisterCount == 0) 604 return B_OK; 605 606 // not very likely, but play safe here 607 if (sMemoryTypeRegisterCount > kMaxMemoryTypeRegisters) 608 sMemoryTypeRegisterCount = kMaxMemoryTypeRegisters; 609 610 // set the physical memory ranges to write-back mode 611 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 612 add_memory_type_range(-1, args->physical_memory_range[i].start, 613 args->physical_memory_range[i].size, B_MTR_WB); 614 } 615 616 return B_OK; 617 } 618 619 620 void 621 arch_vm_aspace_swap(struct VMAddressSpace *from, struct VMAddressSpace *to) 622 { 623 // This functions is only invoked when a userland thread is in the process 624 // of dying. It switches to the kernel team and does whatever cleanup is 625 // necessary (in case it is the team's main thread, it will delete the 626 // team). 627 // It is however not necessary to change the page directory. Userland team's 628 // page directories include all kernel mappings as well. Furthermore our 629 // arch specific translation map data objects are ref-counted, so they won't 630 // go away as long as they are still used on any CPU. 631 } 632 633 634 bool 635 arch_vm_supports_protection(uint32 protection) 636 { 637 // x86 always has the same read/write properties for userland and the 638 // kernel. 639 // That's why we do not support user-read/kernel-write access. While the 640 // other way around is not supported either, we don't care in this case 641 // and give the kernel full access. 642 if ((protection & (B_READ_AREA | B_WRITE_AREA)) == B_READ_AREA 643 && protection & B_KERNEL_WRITE_AREA) 644 return false; 645 646 return true; 647 } 648 649 650 void 651 arch_vm_unset_memory_type(struct VMArea *area) 652 { 653 if (area->memory_type == 0) 654 return; 655 656 remove_memory_type_range(area->id); 657 } 658 659 660 status_t 661 arch_vm_set_memory_type(struct VMArea *area, addr_t physicalBase, 662 uint32 type) 663 { 664 area->memory_type = type >> MEMORY_TYPE_SHIFT; 665 return add_memory_type_range(area->id, physicalBase, area->Size(), type); 666 } 667