xref: /haiku/src/system/boot/platform/efi/arch/arm64/arch_mmu.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2019-2023 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8 
9 #include "efi_platform.h"
10 #include "generic_mmu.h"
11 #include "mmu.h"
12 
13 #include "aarch64.h"
14 #include "arch_mmu.h"
15 
16 // #define TRACE_MMU
17 #ifdef TRACE_MMU
18 #	define TRACE(x...) dprintf(x)
19 #else
20 #	define TRACE(x...) ;
21 #endif
22 
23 
24 static constexpr bool kTraceMemoryMap = false;
25 static constexpr bool kTracePageDirectory = false;
26 
27 
28 // Ignore memory above 512GB
29 #define PHYSICAL_MEMORY_LOW		0x00000000
30 #define PHYSICAL_MEMORY_HIGH	0x8000000000ull
31 
32 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
33 	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
34 	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
35 	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
36 	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
37 };
38 
39 
40 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
41 /* ARM port */
42 static uint64_t* sPageDirectory = NULL;
43 // static uint64_t* sFirstPageTable = NULL;
44 static uint64_t* sNextPageTable = NULL;
45 // static uint64_t* sLastPageTable = NULL;
46 
47 
48 const char*
49 granule_type_str(int tg)
50 {
51 	switch (tg) {
52 		case TG_4KB:
53 			return "4KB";
54 		case TG_16KB:
55 			return "16KB";
56 		case TG_64KB:
57 			return "64KB";
58 		default:
59 			return "Invalid Granule";
60 	}
61 }
62 
63 
64 void
65 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
66 {
67 	ARMv8TranslationTableDescriptor ttd(table);
68 
69 	if (currentLevel >= CurrentRegime.MaxLevels()) {
70 		// This should not happen
71 		panic("Too many levels ...");
72 		return;
73 	}
74 
75 	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
76 	for (uint i = 0 ; i < EntriesPerLevel; i++) {
77 		if (!ttd.IsInvalid()) {
78 			TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value());
79 			if (ttd.IsTable() && currentLevel < 3) {
80 				TRACE("Table! Next Level:\n");
81 				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
82 			}
83 			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
84 				TRACE("Block/Page");
85 
86 				if (i & 1) { // 2 entries per row
87 					TRACE("\n");
88 				} else {
89 					TRACE("\t");
90 				}
91 			}
92 		}
93 		ttd.Next();
94 	}
95 }
96 
97 
98 void
99 arch_mmu_dump_present_tables()
100 {
101 	uint64 address = arch_mmu_base_register();
102 	dprintf("Under TTBR0: %lx\n", address);
103 
104 	arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
105 
106 	/* We are willing to transition, but still in EL2, present MMU configuration
107 	 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
108 	 * allocated under sPageDirectory, defined under TTBR1_EL1.
109 	 */
110 	dprintf("Under allocated TTBR1_EL1:\n");
111 	arch_mmu_dump_table(sPageDirectory, 0);
112 }
113 
114 
115 void arch_mmu_setup_EL1(uint64 tcr) {
116 
117 	// Enable TTBR1
118 	tcr &= ~TCR_EPD1_DISABLE;
119 
120 	// Set space for kernel space
121 	tcr &= ~T1SZ_MASK; // Clear
122 	// TODO: Compiler dependency?
123 	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
124 
125 	// Set the maximum PA size to the maximum supported by the hardware.
126 	uint64_t pa_size = READ_SPECIALREG(ID_AA64MMFR0_EL1) & ID_AA64MMFR0_PA_RANGE_MASK;
127 
128 	// PA size of 4 petabytes required 64KB paging granules, which
129 	// we don't support, so clamp the maximum to 256 terabytes.
130 	if (pa_size == ID_AA64MMFR0_PA_RANGE_4P)
131 		pa_size = ID_AA64MMFR0_PA_RANGE_256T;
132 	tcr &= ~IPS_MASK;
133 	tcr |= pa_size << TCR_IPS_SHIFT;
134 
135 	// Flush the cache so that we don't receive unexpected writebacks later.
136 	_arch_cache_clean_poc();
137 
138 	WRITE_SPECIALREG(TCR_EL1, tcr);
139 
140 	// Invalidate all TLB entries. Also ensures that all memory traffic has
141 	// resolved, and flushes the instruction pipeline.
142 	_arch_mmu_invalidate_tlb_all(arch_exception_level());
143 }
144 
145 
146 uint64
147 map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
148 	uint32_t level, uint64_t flags, uint64* descriptor)
149 {
150 	ARMv8TranslationTableDescriptor ttd(descriptor);
151 
152 	if (level >= CurrentRegime.MaxLevels()) {
153 		panic("Too many levels at mapping\n");
154 	}
155 
156 	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
157 
158 	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
159 
160 	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
161 		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
162 
163 	TRACE("Level %x, Processing desc %lx indexing %lx\n",
164 		level, reinterpret_cast<uint64>(descriptor), ttd.Location());
165 
166 	if (ttd.IsInvalid()) {
167 		// If the physical has the same alignment we could make a block here
168 		// instead of using a complete next level table
169 		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
170 			// Set it as block or page
171 			if (CurrentRegime.BlocksAllowed(level)) {
172 				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
173 			} else {
174 				// Most likely in Level 3...
175 				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
176 			}
177 
178 			// Expand!
179 			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
180 
181 			do {
182 				phys_addr += currentLevelSize;
183 				expandedSize -= currentLevelSize;
184 				if (expandedSize > 0) {
185 					ttd.Next();
186 					if (CurrentRegime.BlocksAllowed(level)) {
187 						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
188 					} else {
189 						// Most likely in Level 3...
190 						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
191 					}
192 				}
193 			} while (expandedSize > 0);
194 
195 			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
196 
197 		} else {
198 			// Set it to next level
199 			uint64 offset = 0;
200 			uint64 remainingSize = size;
201 			do {
202 				uint64* page = NULL;
203 				if (ttd.IsInvalid()) {
204 					// our region is too small would need to create a level below
205 					page = CurrentRegime.AllocatePage();
206 					ttd.SetToTable(page, flags);
207 				} else if (ttd.IsTable()) {
208 					// Next table is allocated, follow it
209 					page = ttd.Dereference();
210 				} else {
211 					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
212 				}
213 
214 				uint64 unprocessedSize = map_region(virt_addr + offset,
215 					phys_addr + offset, remainingSize, level + 1, flags, page);
216 
217 				offset = remainingSize - unprocessedSize;
218 
219 				remainingSize = unprocessedSize;
220 
221 				ttd.Next();
222 
223 			} while (remainingSize > 0);
224 
225 			return 0;
226 		}
227 
228 	} else {
229 
230 		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
231 			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
232 		) {
233 			// TODO: Review, overlap? expand?
234 			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
235 			return 0;
236 		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
237 			// Next Level
238 			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
239 			return 0;
240 		} else {
241 			panic("All descriptor types processed for %lx\n", ttd.Location());
242 			return 0;
243 		}
244 	}
245 }
246 
247 
248 static void
249 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
250 {
251 	TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
252 		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags);
253 
254 	// TODO: Review why we get ranges with 0 size ...
255 	if (size == 0) {
256 		TRACE("Requesing 0 size map\n");
257 		return;
258 	}
259 
260 	// TODO: Review this case
261 	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
262 		TRACE("Trying to map the TTBR itself?!\n");
263 		return;
264 	}
265 
266 	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
267 		TRACE("Range already covered in current MMU\n");
268 		return;
269 	}
270 
271 	uint64 address;
272 
273 	if (arch_mmu_is_kernel_address(virt_addr)) {
274 		// Use TTBR1
275 		address = READ_SPECIALREG(TTBR1_EL1);
276 	} else {
277 		// ok, but USE instead TTBR0
278 		if (arch_exception_level() == 1)
279 			address = READ_SPECIALREG(TTBR0_EL1);
280 		else
281 			address = READ_SPECIALREG(TTBR0_EL2);
282 	}
283 
284 	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
285 
286 // 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
287 // 		map_page(virt_addr + offset, phys_addr + offset, flags);
288 // 	}
289 
290 	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
291 }
292 
293 
294 void
295 arch_mmu_init()
296 {
297 	// Stub
298 }
299 
300 
301 void
302 arch_mmu_post_efi_setup(size_t memory_map_size,
303 	efi_memory_descriptor* memory_map, size_t descriptor_size,
304 	uint32_t descriptor_version)
305 {
306 	build_physical_allocated_list(memory_map_size, memory_map,
307 		descriptor_size, descriptor_version);
308 
309 	// Switch EFI to virtual mode, using the kernel pmap.
310 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
311 		descriptor_version, memory_map);
312 
313 	if (kTraceMemoryMap) {
314 		dprintf("phys memory ranges:\n");
315 		for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
316 			uint64 start = gKernelArgs.physical_memory_range[i].start;
317 			uint64 size = gKernelArgs.physical_memory_range[i].size;
318 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
319 				start, start + size, size);
320 		}
321 
322 		dprintf("allocated phys memory ranges:\n");
323 		for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
324 			uint64 start = gKernelArgs.physical_allocated_range[i].start;
325 			uint64 size = gKernelArgs.physical_allocated_range[i].size;
326 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
327 				start, start + size, size);
328 		}
329 
330 		dprintf("allocated virt memory ranges:\n");
331 		for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
332 			uint64 start = gKernelArgs.virtual_allocated_range[i].start;
333 			uint64 size = gKernelArgs.virtual_allocated_range[i].size;
334 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
335 				start, start + size, size);
336 		}
337 
338 		dprintf("virt memory ranges to keep:\n");
339 		for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
340 			uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
341 			uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
342 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
343 				start, start + size, size);
344 		}
345 	}
346 }
347 
348 
349 void
350 arch_mmu_allocate_kernel_page_tables(void)
351 {
352 	uint64* page = NULL;
353 	uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
354 
355 	// Trust possible previous allocations of TTBR1
356 	// only if we come from a preset EL1 context
357 	if (ttbr1 != 0ll) {
358 		if (arch_exception_level() == 1) {
359 			page = reinterpret_cast<uint64*>(ttbr1);
360 			TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1);
361 		} else if (arch_exception_level() == 2) {
362 			TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1);
363 		}
364 	}
365 
366 	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
367 	// be used solely.
368 	if (page == NULL) {
369 		page = CurrentRegime.AllocatePage();
370 		if (page != NULL) {
371 			WRITE_SPECIALREG(TTBR1_EL1, page);
372 		} else {
373 			panic("Not enough memory for kernel initial page\n");
374 		}
375 	}
376 
377 	sPageDirectory = page;
378 }
379 
380 
381 uint32_t
382 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
383 	efi_memory_descriptor* memory_map, size_t descriptor_size,
384 	uint32_t descriptor_version)
385 {
386 	addr_t memory_map_addr = (addr_t)memory_map;
387 
388 	MemoryAttributeIndirection currentMair;
389 
390 	arch_mmu_allocate_kernel_page_tables();
391 
392 	build_physical_memory_list(memory_map_size, memory_map,
393 		descriptor_size, descriptor_version,
394 		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
395 
396 	TRACE("Mapping EFI_MEMORY_RUNTIME\n");
397 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
398 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
399 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
400 			map_range(entry->VirtualStart, entry->PhysicalStart,
401 				entry->NumberOfPages * B_PAGE_SIZE,
402 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
403 	}
404 
405 	TRACE("Mapping \"next\" regions\n");
406 	void* cookie = NULL;
407 	addr_t vaddr;
408 	phys_addr_t paddr;
409 	size_t size;
410 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
411 		map_range(vaddr, paddr, size,
412 			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
413 			| currentMair.MaskOf(MAIR_NORMAL_WB));
414 	}
415 
416 	// TODO: We actually can only map physical RAM, mapping everything
417 	// could cause unwanted MMIO or bus errors on real hardware.
418 	map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
419 		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
420 		| currentMair.MaskOf(MAIR_NORMAL_WB));
421 
422 	if (gKernelArgs.arch_args.uart.kind[0] != 0) {
423 		// Map uart because we want to use it during early boot.
424 		uint64 regs_start = gKernelArgs.arch_args.uart.regs.start;
425 		uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE);
426 		uint64 base = get_next_virtual_address(regs_size);
427 
428 		map_range(base, regs_start, regs_size,
429 			ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute |
430 			currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
431 
432 		gKernelArgs.arch_args.uart.regs.start = base;
433 	}
434 
435 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
436 		gKernelArgs.num_virtual_allocated_ranges);
437 
438 	addr_t vir_pgdir;
439 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
440 
441 	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
442 	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
443 	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
444 
445 	TRACE("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
446 		(uint32_t)gKernelArgs.arch_args.phys_pgdir);
447 	TRACE("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
448 		(uint32_t)gKernelArgs.arch_args.vir_pgdir);
449 	TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
450 		(uint32_t)gKernelArgs.arch_args.next_pagetable);
451 
452 	if (kTracePageDirectory)
453 		arch_mmu_dump_present_tables();
454 
455 	return (uint64_t)sPageDirectory;
456 }
457