1 /*
2 * Copyright 2019-2023 Haiku, Inc. All rights reserved.
3 * Released under the terms of the MIT License.
4 */
5
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8
9 #include "efi_platform.h"
10 #include "generic_mmu.h"
11 #include "mmu.h"
12
13 #include "aarch64.h"
14 #include "arch_mmu.h"
15
16 // #define TRACE_MMU
17 #ifdef TRACE_MMU
18 # define TRACE(x...) dprintf(x)
19 #else
20 # define TRACE(x...) ;
21 #endif
22
23
24 static constexpr bool kTraceMemoryMap = false;
25 static constexpr bool kTracePageDirectory = false;
26
27
28 // Ignore memory above 512GB
29 #define PHYSICAL_MEMORY_LOW 0x00000000
30 #define PHYSICAL_MEMORY_HIGH 0x8000000000ull
31
32 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
33 {L0_SHIFT, L0_ADDR_MASK, false, true, false },
34 {L1_SHIFT, Ln_ADDR_MASK, true, true, false },
35 {L2_SHIFT, Ln_ADDR_MASK, true, true, false },
36 {L3_SHIFT, Ln_ADDR_MASK, false, false, true }
37 };
38
39
40 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
41 /* ARM port */
42 static uint64_t* sPageDirectory = NULL;
43 // static uint64_t* sFirstPageTable = NULL;
44 static uint64_t* sNextPageTable = NULL;
45 // static uint64_t* sLastPageTable = NULL;
46
47
48 const char*
granule_type_str(int tg)49 granule_type_str(int tg)
50 {
51 switch (tg) {
52 case TG_4KB:
53 return "4KB";
54 case TG_16KB:
55 return "16KB";
56 case TG_64KB:
57 return "64KB";
58 default:
59 return "Invalid Granule";
60 }
61 }
62
63
64 void
arch_mmu_dump_table(uint64 * table,uint8 currentLevel)65 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
66 {
67 ARMv8TranslationTableDescriptor ttd(table);
68
69 if (currentLevel >= CurrentRegime.MaxLevels()) {
70 // This should not happen
71 panic("Too many levels ...");
72 return;
73 }
74
75 uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
76 for (uint i = 0 ; i < EntriesPerLevel; i++) {
77 if (!ttd.IsInvalid()) {
78 TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value());
79 if (ttd.IsTable() && currentLevel < 3) {
80 TRACE("Table! Next Level:\n");
81 arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
82 }
83 if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
84 TRACE("Block/Page");
85
86 if (i & 1) { // 2 entries per row
87 TRACE("\n");
88 } else {
89 TRACE("\t");
90 }
91 }
92 }
93 ttd.Next();
94 }
95 }
96
97
98 void
arch_mmu_dump_present_tables()99 arch_mmu_dump_present_tables()
100 {
101 uint64 address = arch_mmu_base_register();
102 dprintf("Under TTBR0: %lx\n", address);
103
104 arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
105
106 /* We are willing to transition, but still in EL2, present MMU configuration
107 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
108 * allocated under sPageDirectory, defined under TTBR1_EL1.
109 */
110 dprintf("Under allocated TTBR1_EL1:\n");
111 arch_mmu_dump_table(sPageDirectory, 0);
112 }
113
114
arch_mmu_setup_EL1(uint64 tcr)115 void arch_mmu_setup_EL1(uint64 tcr) {
116
117 // Enable TTBR1
118 tcr &= ~TCR_EPD1_DISABLE;
119
120 // Set space for kernel space
121 tcr &= ~T1SZ_MASK; // Clear
122 // TODO: Compiler dependency?
123 tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
124
125 // Set granule sizes to 4KB
126 tcr &= ~TCR_TG0_MASK;
127 tcr |= TCR_TG0_4K;
128 tcr &= ~TCR_TG1_MASK;
129 tcr |= TCR_TG1_4K;
130
131 // Set the maximum PA size to the maximum supported by the hardware.
132 uint64_t pa_size = READ_SPECIALREG(ID_AA64MMFR0_EL1) & ID_AA64MMFR0_PA_RANGE_MASK;
133
134 // PA size of 4 petabytes required 64KB paging granules, which
135 // we don't support, so clamp the maximum to 256 terabytes.
136 if (pa_size == ID_AA64MMFR0_PA_RANGE_4P)
137 pa_size = ID_AA64MMFR0_PA_RANGE_256T;
138 tcr &= ~IPS_MASK;
139 tcr |= pa_size << TCR_IPS_SHIFT;
140
141 // Flush the cache so that we don't receive unexpected writebacks later.
142 _arch_cache_clean_poc();
143
144 WRITE_SPECIALREG(TCR_EL1, tcr);
145
146 // Invalidate all TLB entries. Also ensures that all memory traffic has
147 // resolved, and flushes the instruction pipeline.
148 _arch_mmu_invalidate_tlb_all(arch_exception_level());
149 }
150
151
152 uint64
map_region(addr_t virt_addr,addr_t phys_addr,size_t size,uint32_t level,uint64_t flags,uint64 * descriptor)153 map_region(addr_t virt_addr, addr_t phys_addr, size_t size,
154 uint32_t level, uint64_t flags, uint64* descriptor)
155 {
156 ARMv8TranslationTableDescriptor ttd(descriptor);
157
158 if (level >= CurrentRegime.MaxLevels()) {
159 panic("Too many levels at mapping\n");
160 }
161
162 uint64 currentLevelSize = CurrentRegime.EntrySize(level);
163
164 ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
165
166 uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
167 - currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
168
169 TRACE("Level %x, Processing desc %lx indexing %lx\n",
170 level, reinterpret_cast<uint64>(descriptor), ttd.Location());
171
172 if (ttd.IsInvalid()) {
173 // If the physical has the same alignment we could make a block here
174 // instead of using a complete next level table
175 if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
176 // Set it as block or page
177 if (CurrentRegime.BlocksAllowed(level)) {
178 ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
179 } else {
180 // Most likely in Level 3...
181 ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
182 }
183
184 // Expand!
185 int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
186
187 do {
188 phys_addr += currentLevelSize;
189 expandedSize -= currentLevelSize;
190 if (expandedSize > 0) {
191 ttd.Next();
192 if (CurrentRegime.BlocksAllowed(level)) {
193 ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
194 } else {
195 // Most likely in Level 3...
196 ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
197 }
198 }
199 } while (expandedSize > 0);
200
201 return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
202
203 } else {
204 // Set it to next level
205 uint64 offset = 0;
206 uint64 remainingSize = size;
207 do {
208 uint64* page = NULL;
209 if (ttd.IsInvalid()) {
210 // our region is too small would need to create a level below
211 page = CurrentRegime.AllocatePage();
212 ttd.SetToTable(page, flags);
213 } else if (ttd.IsTable()) {
214 // Next table is allocated, follow it
215 page = ttd.Dereference();
216 } else {
217 panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
218 }
219
220 uint64 unprocessedSize = map_region(virt_addr + offset,
221 phys_addr + offset, remainingSize, level + 1, flags, page);
222
223 offset = remainingSize - unprocessedSize;
224
225 remainingSize = unprocessedSize;
226
227 ttd.Next();
228
229 } while (remainingSize > 0);
230
231 return 0;
232 }
233
234 } else {
235
236 if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
237 || (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
238 ) {
239 // TODO: Review, overlap? expand?
240 panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
241 return 0;
242 } else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
243 // Next Level
244 map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
245 return 0;
246 } else {
247 panic("All descriptor types processed for %lx\n", ttd.Location());
248 return 0;
249 }
250 }
251 }
252
253
254 static void
map_range(addr_t virt_addr,phys_addr_t phys_addr,size_t size,uint64_t flags)255 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
256 {
257 TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
258 (uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags);
259
260 // TODO: Review why we get ranges with 0 size ...
261 if (size == 0) {
262 TRACE("Requesing 0 size map\n");
263 return;
264 }
265
266 // TODO: Review this case
267 if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
268 TRACE("Trying to map the TTBR itself?!\n");
269 return;
270 }
271
272 if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
273 TRACE("Range already covered in current MMU\n");
274 return;
275 }
276
277 uint64 address;
278
279 if (arch_mmu_is_kernel_address(virt_addr)) {
280 // Use TTBR1
281 address = READ_SPECIALREG(TTBR1_EL1);
282 } else {
283 // ok, but USE instead TTBR0
284 if (arch_exception_level() == 1)
285 address = READ_SPECIALREG(TTBR0_EL1);
286 else
287 address = READ_SPECIALREG(TTBR0_EL2);
288 }
289
290 map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
291
292 // for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
293 // map_page(virt_addr + offset, phys_addr + offset, flags);
294 // }
295
296 ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
297 }
298
299
300 void
arch_mmu_init()301 arch_mmu_init()
302 {
303 // Stub
304 }
305
306
307 void
arch_mmu_post_efi_setup(size_t memory_map_size,efi_memory_descriptor * memory_map,size_t descriptor_size,uint32_t descriptor_version)308 arch_mmu_post_efi_setup(size_t memory_map_size,
309 efi_memory_descriptor* memory_map, size_t descriptor_size,
310 uint32_t descriptor_version)
311 {
312 build_physical_allocated_list(memory_map_size, memory_map,
313 descriptor_size, descriptor_version);
314
315 // Switch EFI to virtual mode, using the kernel pmap.
316 kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
317 descriptor_version, memory_map);
318
319 if (kTraceMemoryMap) {
320 dprintf("phys memory ranges:\n");
321 for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
322 uint64 start = gKernelArgs.physical_memory_range[i].start;
323 uint64 size = gKernelArgs.physical_memory_range[i].size;
324 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
325 start, start + size, size);
326 }
327
328 dprintf("allocated phys memory ranges:\n");
329 for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
330 uint64 start = gKernelArgs.physical_allocated_range[i].start;
331 uint64 size = gKernelArgs.physical_allocated_range[i].size;
332 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
333 start, start + size, size);
334 }
335
336 dprintf("allocated virt memory ranges:\n");
337 for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
338 uint64 start = gKernelArgs.virtual_allocated_range[i].start;
339 uint64 size = gKernelArgs.virtual_allocated_range[i].size;
340 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
341 start, start + size, size);
342 }
343
344 dprintf("virt memory ranges to keep:\n");
345 for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
346 uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
347 uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
348 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
349 start, start + size, size);
350 }
351 }
352 }
353
354
355 void
arch_mmu_allocate_kernel_page_tables(void)356 arch_mmu_allocate_kernel_page_tables(void)
357 {
358 uint64* page = NULL;
359 uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
360
361 // Trust possible previous allocations of TTBR1
362 // only if we come from a preset EL1 context
363 if (ttbr1 != 0ll) {
364 if (arch_exception_level() == 1) {
365 page = reinterpret_cast<uint64*>(ttbr1);
366 TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1);
367 } else if (arch_exception_level() == 2) {
368 TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1);
369 }
370 }
371
372 // NOTE: On devices supporting multiple translation base registers, TTBR0 must
373 // be used solely.
374 if (page == NULL) {
375 page = CurrentRegime.AllocatePage();
376 if (page != NULL) {
377 WRITE_SPECIALREG(TTBR1_EL1, page);
378 } else {
379 panic("Not enough memory for kernel initial page\n");
380 }
381 }
382
383 sPageDirectory = page;
384 }
385
386
387 uint32_t
arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,efi_memory_descriptor * memory_map,size_t descriptor_size,uint32_t descriptor_version)388 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
389 efi_memory_descriptor* memory_map, size_t descriptor_size,
390 uint32_t descriptor_version)
391 {
392 addr_t memory_map_addr = (addr_t)memory_map;
393
394 MemoryAttributeIndirection currentMair;
395
396 arch_mmu_allocate_kernel_page_tables();
397
398 build_physical_memory_list(memory_map_size, memory_map,
399 descriptor_size, descriptor_version,
400 PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
401
402 TRACE("Mapping EFI_MEMORY_RUNTIME\n");
403 for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
404 efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
405 if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
406 map_range(entry->VirtualStart, entry->PhysicalStart,
407 entry->NumberOfPages * B_PAGE_SIZE,
408 ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
409 }
410
411 TRACE("Mapping \"next\" regions\n");
412 void* cookie = NULL;
413 addr_t vaddr;
414 phys_addr_t paddr;
415 size_t size;
416 while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
417 map_range(vaddr, paddr, size,
418 ARMv8TranslationTableDescriptor::DefaultCodeAttribute
419 | currentMair.MaskOf(MAIR_NORMAL_WB));
420 }
421
422 // TODO: We actually can only map physical RAM, mapping everything
423 // could cause unwanted MMIO or bus errors on real hardware.
424 map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
425 ARMv8TranslationTableDescriptor::DefaultCodeAttribute
426 | currentMair.MaskOf(MAIR_NORMAL_WB));
427
428 if (gKernelArgs.arch_args.uart.kind[0] != 0) {
429 // Map uart because we want to use it during early boot.
430 uint64 regs_start = gKernelArgs.arch_args.uart.regs.start;
431 uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE);
432 uint64 base = get_next_virtual_address(regs_size);
433
434 map_range(base, regs_start, regs_size,
435 ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute |
436 currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
437
438 gKernelArgs.arch_args.uart.regs.start = base;
439 }
440
441 sort_address_ranges(gKernelArgs.virtual_allocated_range,
442 gKernelArgs.num_virtual_allocated_ranges);
443
444 addr_t vir_pgdir;
445 platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
446
447 gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
448 gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
449 gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
450
451 TRACE("gKernelArgs.arch_args.phys_pgdir = 0x%08x\n",
452 (uint32_t)gKernelArgs.arch_args.phys_pgdir);
453 TRACE("gKernelArgs.arch_args.vir_pgdir = 0x%08x\n",
454 (uint32_t)gKernelArgs.arch_args.vir_pgdir);
455 TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
456 (uint32_t)gKernelArgs.arch_args.next_pagetable);
457
458 if (kTracePageDirectory)
459 arch_mmu_dump_present_tables();
460
461 return (uint64_t)sPageDirectory;
462 }
463