1 /* 2 * Copyright 2019-2022 Haiku, Inc. All rights reserved. 3 * Released under the terms of the MIT License. 4 */ 5 6 #include <boot/platform.h> 7 #include <boot/stage2.h> 8 9 #include "mmu.h" 10 #include "efi_platform.h" 11 12 #include "aarch64.h" 13 #include "arch_mmu.h" 14 15 // #define TRACE_MMU 16 #ifdef TRACE_MMU 17 # define TRACE(x) dprintf x 18 #else 19 # define TRACE(x) ; 20 #endif 21 22 23 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = { 24 {L0_SHIFT, L0_ADDR_MASK, false, true, false }, 25 {L1_SHIFT, Ln_ADDR_MASK, true, true, false }, 26 {L2_SHIFT, Ln_ADDR_MASK, true, true, false }, 27 {L3_SHIFT, Ln_ADDR_MASK, false, false, true } 28 }; 29 30 31 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits); 32 /* ARM port */ 33 static uint64_t* sPageDirectory = NULL; 34 // static uint64_t* sFirstPageTable = NULL; 35 static uint64_t* sNextPageTable = NULL; 36 // static uint64_t* sLastPageTable = NULL; 37 38 39 const char* 40 granule_type_str(int tg) 41 { 42 switch (tg) { 43 case TG_4KB: 44 return "4KB"; 45 case TG_16KB: 46 return "16KB"; 47 case TG_64KB: 48 return "64KB"; 49 default: 50 return "Invalid Granule"; 51 } 52 } 53 54 55 void 56 arch_mmu_dump_table(uint64* table, uint8 currentLevel) 57 { 58 ARMv8TranslationTableDescriptor ttd(table); 59 60 if (currentLevel >= CurrentRegime.MaxLevels()) { 61 // This should not happen 62 panic("Too many levels ..."); 63 return; 64 } 65 66 uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity()); 67 for (uint i = 0 ; i < EntriesPerLevel; i++) { 68 if (!ttd.IsInvalid()) { 69 TRACE(("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value())); 70 if (ttd.IsTable() && currentLevel < 3) { 71 TRACE(("Table! Next Level:\n")); 72 arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1); 73 } 74 if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) { 75 TRACE(("Block/Page")); 76 77 if (i & 1) { // 2 entries per row 78 TRACE(("\n")); 79 } else { 80 TRACE(("\t")); 81 } 82 } 83 } 84 ttd.Next(); 85 } 86 } 87 88 89 void 90 arch_mmu_dump_present_tables() 91 { 92 #ifdef TRACE_MMU 93 if (arch_mmu_enabled()) { 94 uint64 address = arch_mmu_base_register(); 95 TRACE(("Under TTBR0: %lx\n", address)); 96 97 arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0); 98 99 /* We are willing to transition, but still in EL2, present MMU configuration 100 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but 101 * allocated under sPageDirectory, defined under TTBR1_EL1. 102 */ 103 if (address != 0ul) { 104 TRACE(("Under allocated TTBR1_EL1:\n")); 105 arch_mmu_dump_table(sPageDirectory, 0); 106 } 107 } 108 #endif 109 } 110 111 112 void arch_mmu_setup_EL1(uint64 tcr) { 113 114 // Enable TTBR1 115 tcr &= ~TCR_EPD1_DISABLE; 116 117 // Set space for kernel space 118 tcr &= ~T1SZ_MASK; // Clear 119 // TODO: Compiler dependency? 120 tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE)); 121 122 WRITE_SPECIALREG(TCR_EL1, tcr); 123 } 124 125 126 uint64 127 map_region(addr_t virt_addr, addr_t phys_addr, size_t size, 128 uint32_t level, uint64_t flags, uint64* descriptor) 129 { 130 ARMv8TranslationTableDescriptor ttd(descriptor); 131 132 if (level >= CurrentRegime.MaxLevels()) { 133 panic("Too many levels at mapping\n"); 134 } 135 136 uint64 currentLevelSize = CurrentRegime.EntrySize(level); 137 138 ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level)); 139 140 uint64 remainingSizeInTable = CurrentRegime.TableSize(level) 141 - currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level); 142 143 TRACE(("Level %x, Processing desc %lx indexing %lx\n", 144 level, reinterpret_cast<uint64>(descriptor), ttd.Location())); 145 146 if (ttd.IsInvalid()) { 147 // If the physical has the same alignment we could make a block here 148 // instead of using a complete next level table 149 if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) { 150 // Set it as block or page 151 if (CurrentRegime.BlocksAllowed(level)) { 152 ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags); 153 } else { 154 // Most likely in Level 3... 155 ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags); 156 } 157 158 // Expand! 159 int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size; 160 161 do { 162 phys_addr += currentLevelSize; 163 expandedSize -= currentLevelSize; 164 if (expandedSize > 0) { 165 ttd.Next(); 166 if (CurrentRegime.BlocksAllowed(level)) { 167 ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags); 168 } else { 169 // Most likely in Level 3... 170 ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags); 171 } 172 } 173 } while (expandedSize > 0); 174 175 return (size > remainingSizeInTable)?(size - remainingSizeInTable):0; 176 177 } else { 178 // Set it to next level 179 uint64 offset = 0; 180 uint64 remainingSize = size; 181 do { 182 uint64* page = NULL; 183 if (ttd.IsInvalid()) { 184 // our region is too small would need to create a level below 185 page = CurrentRegime.AllocatePage(); 186 ttd.SetToTable(page, flags); 187 } else if (ttd.IsTable()) { 188 // Next table is allocated, follow it 189 page = ttd.Dereference(); 190 } else { 191 panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location()); 192 } 193 194 uint64 unprocessedSize = map_region(virt_addr + offset, 195 phys_addr + offset, remainingSize, level + 1, flags, page); 196 197 offset = remainingSize - unprocessedSize; 198 199 remainingSize = unprocessedSize; 200 201 ttd.Next(); 202 203 } while (remainingSize > 0); 204 205 return 0; 206 } 207 208 } else { 209 210 if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level)) 211 || (ttd.IsPage() && CurrentRegime.PagesAllowed(level)) 212 ) { 213 // TODO: Review, overlap? expand? 214 panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location()); 215 return 0; 216 } else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) { 217 // Next Level 218 map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference()); 219 return 0; 220 } else { 221 panic("All descriptor types processed for %lx\n", ttd.Location()); 222 return 0; 223 } 224 } 225 } 226 227 228 static void 229 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags) 230 { 231 TRACE(("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n", 232 (uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags)); 233 234 // TODO: Review why we get ranges with 0 size ... 235 if (size == 0) { 236 TRACE(("Requesing 0 size map\n")); 237 return; 238 } 239 240 // TODO: Review this case 241 if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) { 242 TRACE(("Trying to map the TTBR itself?!\n")); 243 return; 244 } 245 246 if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) { 247 TRACE(("Range already covered in current MMU\n")); 248 return; 249 } 250 251 uint64 address; 252 253 if (arch_mmu_is_kernel_address(virt_addr)) { 254 // Use TTBR1 255 address = READ_SPECIALREG(TTBR1_EL1); 256 } else { 257 // ok, but USE instead TTBR0 258 address = READ_SPECIALREG(TTBR0_EL1); 259 } 260 261 map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address)); 262 263 // for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 264 // map_page(virt_addr + offset, phys_addr + offset, flags); 265 // } 266 267 ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK); 268 } 269 270 271 static void 272 build_physical_memory_list(size_t memory_map_size, 273 efi_memory_descriptor* memory_map, size_t descriptor_size, 274 uint32_t descriptor_version) 275 { 276 addr_t addr = (addr_t)memory_map; 277 278 gKernelArgs.num_physical_memory_ranges = 0; 279 280 // First scan: Add all usable ranges 281 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) { 282 efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size); 283 switch (entry->Type) { 284 case EfiLoaderCode: 285 case EfiLoaderData: 286 entry->VirtualStart = entry->PhysicalStart; 287 break; 288 case EfiBootServicesCode: 289 case EfiBootServicesData: 290 case EfiConventionalMemory: { 291 // Usable memory. 292 uint64_t base = entry->PhysicalStart; 293 uint64_t size = entry->NumberOfPages * B_PAGE_SIZE; 294 insert_physical_memory_range(base, size); 295 break; 296 } 297 case EfiACPIReclaimMemory: 298 // ACPI reclaim -- physical memory we could actually use later 299 break; 300 case EfiRuntimeServicesCode: 301 case EfiRuntimeServicesData: 302 entry->VirtualStart = entry->PhysicalStart; 303 break; 304 case EfiMemoryMappedIO: 305 entry->VirtualStart = entry->PhysicalStart; 306 break; 307 } 308 } 309 310 uint64_t initialPhysicalMemory = total_physical_memory(); 311 312 // Second scan: Remove everything reserved that may overlap 313 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) { 314 efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size); 315 switch (entry->Type) { 316 case EfiLoaderCode: 317 case EfiLoaderData: 318 case EfiBootServicesCode: 319 case EfiBootServicesData: 320 case EfiConventionalMemory: 321 break; 322 default: 323 uint64_t base = entry->PhysicalStart; 324 uint64_t size = entry->NumberOfPages * B_PAGE_SIZE; 325 remove_physical_memory_range(base, size); 326 } 327 } 328 329 gKernelArgs.ignored_physical_memory 330 += initialPhysicalMemory - total_physical_memory(); 331 332 sort_address_ranges(gKernelArgs.physical_memory_range, 333 gKernelArgs.num_physical_memory_ranges); 334 } 335 336 337 static void 338 build_physical_allocated_list(size_t memory_map_size, 339 efi_memory_descriptor* memory_map, size_t descriptor_size, 340 uint32_t descriptor_version) 341 { 342 addr_t addr = (addr_t)memory_map; 343 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) { 344 efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size); 345 switch (entry->Type) { 346 case EfiLoaderData: { 347 uint64_t base = entry->PhysicalStart; 348 uint64_t size = entry->NumberOfPages * B_PAGE_SIZE; 349 insert_physical_allocated_range(base, size); 350 break; 351 } 352 default: 353 ; 354 } 355 } 356 357 sort_address_ranges(gKernelArgs.physical_allocated_range, 358 gKernelArgs.num_physical_allocated_ranges); 359 } 360 361 362 void 363 arch_mmu_init() 364 { 365 // Stub 366 } 367 368 369 void 370 arch_mmu_post_efi_setup(size_t memory_map_size, 371 efi_memory_descriptor* memory_map, size_t descriptor_size, 372 uint32_t descriptor_version) 373 { 374 build_physical_allocated_list(memory_map_size, memory_map, 375 descriptor_size, descriptor_version); 376 377 // Switch EFI to virtual mode, using the kernel pmap. 378 kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size, 379 descriptor_version, memory_map); 380 381 TRACE(("phys memory ranges:\n")); 382 for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) { 383 uint32_t start = (uint32_t)gKernelArgs.physical_memory_range[i].start; 384 uint32_t size = (uint32_t)gKernelArgs.physical_memory_range[i].size; 385 TRACE((" 0x%08x-0x%08x, length 0x%08x\n", 386 start, start + size, size)); 387 } 388 389 TRACE(("allocated phys memory ranges:\n")); 390 for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) { 391 uint32_t start = (uint32_t)gKernelArgs.physical_allocated_range[i].start; 392 uint32_t size = (uint32_t)gKernelArgs.physical_allocated_range[i].size; 393 TRACE((" 0x%08x-0x%08x, length 0x%08x\n", 394 start, start + size, size)); 395 } 396 397 TRACE(("allocated virt memory ranges:\n")); 398 for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) { 399 uint32_t start = (uint32_t)gKernelArgs.virtual_allocated_range[i].start; 400 uint32_t size = (uint32_t)gKernelArgs.virtual_allocated_range[i].size; 401 TRACE((" 0x%08x-0x%08x, length 0x%08x\n", 402 start, start + size, size)); 403 } 404 405 } 406 407 408 void 409 arch_mmu_allocate_kernel_page_tables(void) 410 { 411 uint64* page = NULL; 412 uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1); 413 414 // Trust possible previous allocations of TTBR1 415 // only if we come from a preset EL1 context 416 if (ttbr1 != 0ll) { 417 if (arch_exception_level() == 1) { 418 page = reinterpret_cast<uint64*>(ttbr1); 419 TRACE(("Resusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1)); 420 } else if (arch_exception_level() == 2) { 421 TRACE(("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1)); 422 } 423 } 424 425 // NOTE: On devices supporting multiple translation base registers, TTBR0 must 426 // be used solely. 427 if (page == NULL) { 428 page = CurrentRegime.AllocatePage(); 429 if (page != NULL) { 430 WRITE_SPECIALREG(TTBR1_EL1, page); 431 } else { 432 panic("Not enough memory for kernel initial page\n"); 433 } 434 } 435 436 sPageDirectory = page; 437 } 438 439 440 uint32_t 441 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size, 442 efi_memory_descriptor* memory_map, size_t descriptor_size, 443 uint32_t descriptor_version) 444 { 445 addr_t memory_map_addr = (addr_t)memory_map; 446 447 MemoryAttributeIndirection currentMair; 448 449 // arch_mmu_allocate_page_tables(); 450 arch_mmu_allocate_kernel_page_tables(); 451 452 build_physical_memory_list(memory_map_size, memory_map, 453 descriptor_size, descriptor_version); 454 455 TRACE(("Mapping Code & Data\n")); 456 457 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) { 458 efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size); 459 switch (entry->Type) { 460 case EfiLoaderCode: 461 case EfiLoaderData: 462 map_range(entry->VirtualStart, entry->PhysicalStart, 463 entry->NumberOfPages * B_PAGE_SIZE, 464 ARMv8TranslationTableDescriptor::DefaultCodeAttribute 465 | currentMair.MaskOf(MAIR_NORMAL_WB)); 466 break; 467 default: 468 ; 469 } 470 } 471 472 TRACE(("Mapping EFI_MEMORY_RUNTIME\n")); 473 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) { 474 efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size); 475 if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0) 476 map_range(entry->VirtualStart, entry->PhysicalStart, 477 entry->NumberOfPages * B_PAGE_SIZE, 478 ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB)); 479 } 480 481 TRACE(("Mapping \"next\" regions\n")); 482 void* cookie = NULL; 483 addr_t vaddr; 484 phys_addr_t paddr; 485 size_t size; 486 while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) { 487 map_range(vaddr, paddr, size, 488 ARMv8TranslationTableDescriptor::DefaultCodeAttribute 489 | currentMair.MaskOf(MAIR_NORMAL_WB)); 490 } 491 492 /* TODO: Not an UART here... inspect dtb? 493 // identity mapping for the debug uart 494 map_range(0x09000000, 0x09000000, B_PAGE_SIZE, 495 ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute 496 | currentMair.MaskOf(MAIR_DEVICE_nGnRnE)); 497 */ 498 499 // TODO: We actually can only map physical RAM, mapping everything 500 // could cause unwanted MMIO or bus errors on real hardware. 501 map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1, 502 ARMv8TranslationTableDescriptor::DefaultCodeAttribute 503 | currentMair.MaskOf(MAIR_NORMAL_WB)); 504 505 sort_address_ranges(gKernelArgs.virtual_allocated_range, 506 gKernelArgs.num_virtual_allocated_ranges); 507 508 addr_t vir_pgdir; 509 platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir); 510 511 gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory; 512 gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir; 513 gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory; 514 515 TRACE(("gKernelArgs.arch_args.phys_pgdir = 0x%08x\n", 516 (uint32_t)gKernelArgs.arch_args.phys_pgdir)); 517 TRACE(("gKernelArgs.arch_args.vir_pgdir = 0x%08x\n", 518 (uint32_t)gKernelArgs.arch_args.vir_pgdir)); 519 TRACE(("gKernelArgs.arch_args.next_pagetable = 0x%08x\n", 520 (uint32_t)gKernelArgs.arch_args.next_pagetable)); 521 522 return (uint64_t)sPageDirectory; 523 } 524