1 /* 2 * Copyright 2019-2023 Haiku, Inc. All rights reserved. 3 * Released under the terms of the MIT License. 4 */ 5 6 #include <boot/platform.h> 7 #include <boot/stage2.h> 8 9 #include "efi_platform.h" 10 #include "generic_mmu.h" 11 #include "mmu.h" 12 13 #include "aarch64.h" 14 #include "arch_mmu.h" 15 16 // #define TRACE_MMU 17 #ifdef TRACE_MMU 18 # define TRACE(x...) dprintf(x) 19 #else 20 # define TRACE(x...) ; 21 #endif 22 23 24 static constexpr bool kTraceMemoryMap = false; 25 static constexpr bool kTracePageDirectory = false; 26 27 28 // Ignore memory above 512GB 29 #define PHYSICAL_MEMORY_LOW 0x00000000 30 #define PHYSICAL_MEMORY_HIGH 0x8000000000ull 31 32 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = { 33 {L0_SHIFT, L0_ADDR_MASK, false, true, false }, 34 {L1_SHIFT, Ln_ADDR_MASK, true, true, false }, 35 {L2_SHIFT, Ln_ADDR_MASK, true, true, false }, 36 {L3_SHIFT, Ln_ADDR_MASK, false, false, true } 37 }; 38 39 40 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits); 41 /* ARM port */ 42 static uint64_t* sPageDirectory = NULL; 43 // static uint64_t* sFirstPageTable = NULL; 44 static uint64_t* sNextPageTable = NULL; 45 // static uint64_t* sLastPageTable = NULL; 46 47 48 const char* 49 granule_type_str(int tg) 50 { 51 switch (tg) { 52 case TG_4KB: 53 return "4KB"; 54 case TG_16KB: 55 return "16KB"; 56 case TG_64KB: 57 return "64KB"; 58 default: 59 return "Invalid Granule"; 60 } 61 } 62 63 64 void 65 arch_mmu_dump_table(uint64* table, uint8 currentLevel) 66 { 67 ARMv8TranslationTableDescriptor ttd(table); 68 69 if (currentLevel >= CurrentRegime.MaxLevels()) { 70 // This should not happen 71 panic("Too many levels ..."); 72 return; 73 } 74 75 uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity()); 76 for (uint i = 0 ; i < EntriesPerLevel; i++) { 77 if (!ttd.IsInvalid()) { 78 TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value()); 79 if (ttd.IsTable() && currentLevel < 3) { 80 TRACE("Table! Next Level:\n"); 81 arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1); 82 } 83 if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) { 84 TRACE("Block/Page"); 85 86 if (i & 1) { // 2 entries per row 87 TRACE("\n"); 88 } else { 89 TRACE("\t"); 90 } 91 } 92 } 93 ttd.Next(); 94 } 95 } 96 97 98 void 99 arch_mmu_dump_present_tables() 100 { 101 uint64 address = arch_mmu_base_register(); 102 dprintf("Under TTBR0: %lx\n", address); 103 104 arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0); 105 106 /* We are willing to transition, but still in EL2, present MMU configuration 107 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but 108 * allocated under sPageDirectory, defined under TTBR1_EL1. 109 */ 110 dprintf("Under allocated TTBR1_EL1:\n"); 111 arch_mmu_dump_table(sPageDirectory, 0); 112 } 113 114 115 void arch_mmu_setup_EL1(uint64 tcr) { 116 117 // Enable TTBR1 118 tcr &= ~TCR_EPD1_DISABLE; 119 120 // Set space for kernel space 121 tcr &= ~T1SZ_MASK; // Clear 122 // TODO: Compiler dependency? 123 tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE)); 124 125 // Set granule sizes to 4KB 126 tcr &= ~TCR_TG0_MASK; 127 tcr |= TCR_TG0_4K; 128 tcr &= ~TCR_TG1_MASK; 129 tcr |= TCR_TG1_4K; 130 131 // Set the maximum PA size to the maximum supported by the hardware. 132 uint64_t pa_size = READ_SPECIALREG(ID_AA64MMFR0_EL1) & ID_AA64MMFR0_PA_RANGE_MASK; 133 134 // PA size of 4 petabytes required 64KB paging granules, which 135 // we don't support, so clamp the maximum to 256 terabytes. 136 if (pa_size == ID_AA64MMFR0_PA_RANGE_4P) 137 pa_size = ID_AA64MMFR0_PA_RANGE_256T; 138 tcr &= ~IPS_MASK; 139 tcr |= pa_size << TCR_IPS_SHIFT; 140 141 // Flush the cache so that we don't receive unexpected writebacks later. 142 _arch_cache_clean_poc(); 143 144 WRITE_SPECIALREG(TCR_EL1, tcr); 145 146 // Invalidate all TLB entries. Also ensures that all memory traffic has 147 // resolved, and flushes the instruction pipeline. 148 _arch_mmu_invalidate_tlb_all(arch_exception_level()); 149 } 150 151 152 uint64 153 map_region(addr_t virt_addr, addr_t phys_addr, size_t size, 154 uint32_t level, uint64_t flags, uint64* descriptor) 155 { 156 ARMv8TranslationTableDescriptor ttd(descriptor); 157 158 if (level >= CurrentRegime.MaxLevels()) { 159 panic("Too many levels at mapping\n"); 160 } 161 162 uint64 currentLevelSize = CurrentRegime.EntrySize(level); 163 164 ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level)); 165 166 uint64 remainingSizeInTable = CurrentRegime.TableSize(level) 167 - currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level); 168 169 TRACE("Level %x, Processing desc %lx indexing %lx\n", 170 level, reinterpret_cast<uint64>(descriptor), ttd.Location()); 171 172 if (ttd.IsInvalid()) { 173 // If the physical has the same alignment we could make a block here 174 // instead of using a complete next level table 175 if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) { 176 // Set it as block or page 177 if (CurrentRegime.BlocksAllowed(level)) { 178 ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags); 179 } else { 180 // Most likely in Level 3... 181 ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags); 182 } 183 184 // Expand! 185 int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size; 186 187 do { 188 phys_addr += currentLevelSize; 189 expandedSize -= currentLevelSize; 190 if (expandedSize > 0) { 191 ttd.Next(); 192 if (CurrentRegime.BlocksAllowed(level)) { 193 ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags); 194 } else { 195 // Most likely in Level 3... 196 ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags); 197 } 198 } 199 } while (expandedSize > 0); 200 201 return (size > remainingSizeInTable)?(size - remainingSizeInTable):0; 202 203 } else { 204 // Set it to next level 205 uint64 offset = 0; 206 uint64 remainingSize = size; 207 do { 208 uint64* page = NULL; 209 if (ttd.IsInvalid()) { 210 // our region is too small would need to create a level below 211 page = CurrentRegime.AllocatePage(); 212 ttd.SetToTable(page, flags); 213 } else if (ttd.IsTable()) { 214 // Next table is allocated, follow it 215 page = ttd.Dereference(); 216 } else { 217 panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location()); 218 } 219 220 uint64 unprocessedSize = map_region(virt_addr + offset, 221 phys_addr + offset, remainingSize, level + 1, flags, page); 222 223 offset = remainingSize - unprocessedSize; 224 225 remainingSize = unprocessedSize; 226 227 ttd.Next(); 228 229 } while (remainingSize > 0); 230 231 return 0; 232 } 233 234 } else { 235 236 if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level)) 237 || (ttd.IsPage() && CurrentRegime.PagesAllowed(level)) 238 ) { 239 // TODO: Review, overlap? expand? 240 panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location()); 241 return 0; 242 } else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) { 243 // Next Level 244 map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference()); 245 return 0; 246 } else { 247 panic("All descriptor types processed for %lx\n", ttd.Location()); 248 return 0; 249 } 250 } 251 } 252 253 254 static void 255 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags) 256 { 257 TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n", 258 (uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags); 259 260 // TODO: Review why we get ranges with 0 size ... 261 if (size == 0) { 262 TRACE("Requesing 0 size map\n"); 263 return; 264 } 265 266 // TODO: Review this case 267 if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) { 268 TRACE("Trying to map the TTBR itself?!\n"); 269 return; 270 } 271 272 if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) { 273 TRACE("Range already covered in current MMU\n"); 274 return; 275 } 276 277 uint64 address; 278 279 if (arch_mmu_is_kernel_address(virt_addr)) { 280 // Use TTBR1 281 address = READ_SPECIALREG(TTBR1_EL1); 282 } else { 283 // ok, but USE instead TTBR0 284 if (arch_exception_level() == 1) 285 address = READ_SPECIALREG(TTBR0_EL1); 286 else 287 address = READ_SPECIALREG(TTBR0_EL2); 288 } 289 290 map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address)); 291 292 // for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 293 // map_page(virt_addr + offset, phys_addr + offset, flags); 294 // } 295 296 ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK); 297 } 298 299 300 void 301 arch_mmu_init() 302 { 303 // Stub 304 } 305 306 307 void 308 arch_mmu_post_efi_setup(size_t memory_map_size, 309 efi_memory_descriptor* memory_map, size_t descriptor_size, 310 uint32_t descriptor_version) 311 { 312 build_physical_allocated_list(memory_map_size, memory_map, 313 descriptor_size, descriptor_version); 314 315 // Switch EFI to virtual mode, using the kernel pmap. 316 kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size, 317 descriptor_version, memory_map); 318 319 if (kTraceMemoryMap) { 320 dprintf("phys memory ranges:\n"); 321 for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) { 322 uint64 start = gKernelArgs.physical_memory_range[i].start; 323 uint64 size = gKernelArgs.physical_memory_range[i].size; 324 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 325 start, start + size, size); 326 } 327 328 dprintf("allocated phys memory ranges:\n"); 329 for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) { 330 uint64 start = gKernelArgs.physical_allocated_range[i].start; 331 uint64 size = gKernelArgs.physical_allocated_range[i].size; 332 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 333 start, start + size, size); 334 } 335 336 dprintf("allocated virt memory ranges:\n"); 337 for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) { 338 uint64 start = gKernelArgs.virtual_allocated_range[i].start; 339 uint64 size = gKernelArgs.virtual_allocated_range[i].size; 340 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 341 start, start + size, size); 342 } 343 344 dprintf("virt memory ranges to keep:\n"); 345 for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) { 346 uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start; 347 uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size; 348 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 349 start, start + size, size); 350 } 351 } 352 } 353 354 355 void 356 arch_mmu_allocate_kernel_page_tables(void) 357 { 358 uint64* page = NULL; 359 uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1); 360 361 // Trust possible previous allocations of TTBR1 362 // only if we come from a preset EL1 context 363 if (ttbr1 != 0ll) { 364 if (arch_exception_level() == 1) { 365 page = reinterpret_cast<uint64*>(ttbr1); 366 TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1); 367 } else if (arch_exception_level() == 2) { 368 TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1); 369 } 370 } 371 372 // NOTE: On devices supporting multiple translation base registers, TTBR0 must 373 // be used solely. 374 if (page == NULL) { 375 page = CurrentRegime.AllocatePage(); 376 if (page != NULL) { 377 WRITE_SPECIALREG(TTBR1_EL1, page); 378 } else { 379 panic("Not enough memory for kernel initial page\n"); 380 } 381 } 382 383 sPageDirectory = page; 384 } 385 386 387 uint32_t 388 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size, 389 efi_memory_descriptor* memory_map, size_t descriptor_size, 390 uint32_t descriptor_version) 391 { 392 addr_t memory_map_addr = (addr_t)memory_map; 393 394 MemoryAttributeIndirection currentMair; 395 396 arch_mmu_allocate_kernel_page_tables(); 397 398 build_physical_memory_list(memory_map_size, memory_map, 399 descriptor_size, descriptor_version, 400 PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH); 401 402 TRACE("Mapping EFI_MEMORY_RUNTIME\n"); 403 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) { 404 efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size); 405 if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0) 406 map_range(entry->VirtualStart, entry->PhysicalStart, 407 entry->NumberOfPages * B_PAGE_SIZE, 408 ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB)); 409 } 410 411 TRACE("Mapping \"next\" regions\n"); 412 void* cookie = NULL; 413 addr_t vaddr; 414 phys_addr_t paddr; 415 size_t size; 416 while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) { 417 map_range(vaddr, paddr, size, 418 ARMv8TranslationTableDescriptor::DefaultCodeAttribute 419 | currentMair.MaskOf(MAIR_NORMAL_WB)); 420 } 421 422 // TODO: We actually can only map physical RAM, mapping everything 423 // could cause unwanted MMIO or bus errors on real hardware. 424 map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1, 425 ARMv8TranslationTableDescriptor::DefaultCodeAttribute 426 | currentMair.MaskOf(MAIR_NORMAL_WB)); 427 428 if (gKernelArgs.arch_args.uart.kind[0] != 0) { 429 // Map uart because we want to use it during early boot. 430 uint64 regs_start = gKernelArgs.arch_args.uart.regs.start; 431 uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE); 432 uint64 base = get_next_virtual_address(regs_size); 433 434 map_range(base, regs_start, regs_size, 435 ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute | 436 currentMair.MaskOf(MAIR_DEVICE_nGnRnE)); 437 438 gKernelArgs.arch_args.uart.regs.start = base; 439 } 440 441 sort_address_ranges(gKernelArgs.virtual_allocated_range, 442 gKernelArgs.num_virtual_allocated_ranges); 443 444 addr_t vir_pgdir; 445 platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir); 446 447 gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory; 448 gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir; 449 gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory; 450 451 TRACE("gKernelArgs.arch_args.phys_pgdir = 0x%08x\n", 452 (uint32_t)gKernelArgs.arch_args.phys_pgdir); 453 TRACE("gKernelArgs.arch_args.vir_pgdir = 0x%08x\n", 454 (uint32_t)gKernelArgs.arch_args.vir_pgdir); 455 TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n", 456 (uint32_t)gKernelArgs.arch_args.next_pagetable); 457 458 if (kTracePageDirectory) 459 arch_mmu_dump_present_tables(); 460 461 return (uint64_t)sPageDirectory; 462 } 463