xref: /haiku/src/system/boot/platform/efi/arch/arm64/arch_mmu.cpp (revision df59dfec3b5a60258b73a8f437533746ee689020)
1 /*
2  * Copyright 2019-2023 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8 
9 #include "efi_platform.h"
10 #include "generic_mmu.h"
11 #include "mmu.h"
12 
13 #include "aarch64.h"
14 #include "arch_mmu.h"
15 
16 // #define TRACE_MMU
17 #ifdef TRACE_MMU
18 #	define TRACE(x...) dprintf(x)
19 #else
20 #	define TRACE(x...) ;
21 #endif
22 
23 
24 static constexpr bool kTraceMemoryMap = false;
25 static constexpr bool kTracePageDirectory = false;
26 
27 
28 // Ignore memory above 512GB
29 #define PHYSICAL_MEMORY_LOW		0x00000000
30 #define PHYSICAL_MEMORY_HIGH	0x8000000000ull
31 
32 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
33 	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
34 	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
35 	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
36 	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
37 };
38 
39 
40 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
41 /* ARM port */
42 static uint64_t* sPageDirectory = NULL;
43 // static uint64_t* sFirstPageTable = NULL;
44 static uint64_t* sNextPageTable = NULL;
45 // static uint64_t* sLastPageTable = NULL;
46 
47 
48 const char*
granule_type_str(int tg)49 granule_type_str(int tg)
50 {
51 	switch (tg) {
52 		case TG_4KB:
53 			return "4KB";
54 		case TG_16KB:
55 			return "16KB";
56 		case TG_64KB:
57 			return "64KB";
58 		default:
59 			return "Invalid Granule";
60 	}
61 }
62 
63 
64 void
arch_mmu_dump_table(uint64 * table,uint8 currentLevel)65 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
66 {
67 	ARMv8TranslationTableDescriptor ttd(table);
68 
69 	if (currentLevel >= CurrentRegime.MaxLevels()) {
70 		// This should not happen
71 		panic("Too many levels ...");
72 		return;
73 	}
74 
75 	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
76 	for (uint i = 0 ; i < EntriesPerLevel; i++) {
77 		if (!ttd.IsInvalid()) {
78 			TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value());
79 			if (ttd.IsTable() && currentLevel < 3) {
80 				TRACE("Table! Next Level:\n");
81 				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
82 			}
83 			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
84 				TRACE("Block/Page");
85 
86 				if (i & 1) { // 2 entries per row
87 					TRACE("\n");
88 				} else {
89 					TRACE("\t");
90 				}
91 			}
92 		}
93 		ttd.Next();
94 	}
95 }
96 
97 
98 void
arch_mmu_dump_present_tables()99 arch_mmu_dump_present_tables()
100 {
101 	uint64 address = arch_mmu_base_register();
102 	dprintf("Under TTBR0: %lx\n", address);
103 
104 	arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
105 
106 	/* We are willing to transition, but still in EL2, present MMU configuration
107 	 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
108 	 * allocated under sPageDirectory, defined under TTBR1_EL1.
109 	 */
110 	dprintf("Under allocated TTBR1_EL1:\n");
111 	arch_mmu_dump_table(sPageDirectory, 0);
112 }
113 
114 
arch_mmu_setup_EL1(uint64 tcr)115 void arch_mmu_setup_EL1(uint64 tcr) {
116 
117 	// Enable TTBR1
118 	tcr &= ~TCR_EPD1_DISABLE;
119 
120 	// Set space for kernel space
121 	tcr &= ~T1SZ_MASK; // Clear
122 	// TODO: Compiler dependency?
123 	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
124 
125 	// Set granule sizes to 4KB
126 	tcr &= ~TCR_TG0_MASK;
127 	tcr |= TCR_TG0_4K;
128 	tcr &= ~TCR_TG1_MASK;
129 	tcr |= TCR_TG1_4K;
130 
131 	// Set the maximum PA size to the maximum supported by the hardware.
132 	uint64_t pa_size = READ_SPECIALREG(ID_AA64MMFR0_EL1) & ID_AA64MMFR0_PA_RANGE_MASK;
133 
134 	// PA size of 4 petabytes required 64KB paging granules, which
135 	// we don't support, so clamp the maximum to 256 terabytes.
136 	if (pa_size == ID_AA64MMFR0_PA_RANGE_4P)
137 		pa_size = ID_AA64MMFR0_PA_RANGE_256T;
138 	tcr &= ~IPS_MASK;
139 	tcr |= pa_size << TCR_IPS_SHIFT;
140 
141 	// Flush the cache so that we don't receive unexpected writebacks later.
142 	_arch_cache_clean_poc();
143 
144 	WRITE_SPECIALREG(TCR_EL1, tcr);
145 
146 	// Invalidate all TLB entries. Also ensures that all memory traffic has
147 	// resolved, and flushes the instruction pipeline.
148 	_arch_mmu_invalidate_tlb_all(arch_exception_level());
149 }
150 
151 
152 uint64
map_region(addr_t virt_addr,addr_t phys_addr,size_t size,uint32_t level,uint64_t flags,uint64 * descriptor)153 map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
154 	uint32_t level, uint64_t flags, uint64* descriptor)
155 {
156 	ARMv8TranslationTableDescriptor ttd(descriptor);
157 
158 	if (level >= CurrentRegime.MaxLevels()) {
159 		panic("Too many levels at mapping\n");
160 	}
161 
162 	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
163 
164 	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
165 
166 	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
167 		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
168 
169 	TRACE("Level %x, Processing desc %lx indexing %lx\n",
170 		level, reinterpret_cast<uint64>(descriptor), ttd.Location());
171 
172 	if (ttd.IsInvalid()) {
173 		// If the physical has the same alignment we could make a block here
174 		// instead of using a complete next level table
175 		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
176 			// Set it as block or page
177 			if (CurrentRegime.BlocksAllowed(level)) {
178 				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
179 			} else {
180 				// Most likely in Level 3...
181 				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
182 			}
183 
184 			// Expand!
185 			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
186 
187 			do {
188 				phys_addr += currentLevelSize;
189 				expandedSize -= currentLevelSize;
190 				if (expandedSize > 0) {
191 					ttd.Next();
192 					if (CurrentRegime.BlocksAllowed(level)) {
193 						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
194 					} else {
195 						// Most likely in Level 3...
196 						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
197 					}
198 				}
199 			} while (expandedSize > 0);
200 
201 			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
202 
203 		} else {
204 			// Set it to next level
205 			uint64 offset = 0;
206 			uint64 remainingSize = size;
207 			do {
208 				uint64* page = NULL;
209 				if (ttd.IsInvalid()) {
210 					// our region is too small would need to create a level below
211 					page = CurrentRegime.AllocatePage();
212 					ttd.SetToTable(page, flags);
213 				} else if (ttd.IsTable()) {
214 					// Next table is allocated, follow it
215 					page = ttd.Dereference();
216 				} else {
217 					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
218 				}
219 
220 				uint64 unprocessedSize = map_region(virt_addr + offset,
221 					phys_addr + offset, remainingSize, level + 1, flags, page);
222 
223 				offset = remainingSize - unprocessedSize;
224 
225 				remainingSize = unprocessedSize;
226 
227 				ttd.Next();
228 
229 			} while (remainingSize > 0);
230 
231 			return 0;
232 		}
233 
234 	} else {
235 
236 		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
237 			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
238 		) {
239 			// TODO: Review, overlap? expand?
240 			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
241 			return 0;
242 		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
243 			// Next Level
244 			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
245 			return 0;
246 		} else {
247 			panic("All descriptor types processed for %lx\n", ttd.Location());
248 			return 0;
249 		}
250 	}
251 }
252 
253 
254 static void
map_range(addr_t virt_addr,phys_addr_t phys_addr,size_t size,uint64_t flags)255 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
256 {
257 	TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
258 		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags);
259 
260 	// TODO: Review why we get ranges with 0 size ...
261 	if (size == 0) {
262 		TRACE("Requesing 0 size map\n");
263 		return;
264 	}
265 
266 	// TODO: Review this case
267 	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
268 		TRACE("Trying to map the TTBR itself?!\n");
269 		return;
270 	}
271 
272 	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
273 		TRACE("Range already covered in current MMU\n");
274 		return;
275 	}
276 
277 	uint64 address;
278 
279 	if (arch_mmu_is_kernel_address(virt_addr)) {
280 		// Use TTBR1
281 		address = READ_SPECIALREG(TTBR1_EL1);
282 	} else {
283 		// ok, but USE instead TTBR0
284 		if (arch_exception_level() == 1)
285 			address = READ_SPECIALREG(TTBR0_EL1);
286 		else
287 			address = READ_SPECIALREG(TTBR0_EL2);
288 	}
289 
290 	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
291 
292 // 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
293 // 		map_page(virt_addr + offset, phys_addr + offset, flags);
294 // 	}
295 
296 	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
297 }
298 
299 
300 void
arch_mmu_init()301 arch_mmu_init()
302 {
303 	// Stub
304 }
305 
306 
307 void
arch_mmu_post_efi_setup(size_t memory_map_size,efi_memory_descriptor * memory_map,size_t descriptor_size,uint32_t descriptor_version)308 arch_mmu_post_efi_setup(size_t memory_map_size,
309 	efi_memory_descriptor* memory_map, size_t descriptor_size,
310 	uint32_t descriptor_version)
311 {
312 	build_physical_allocated_list(memory_map_size, memory_map,
313 		descriptor_size, descriptor_version);
314 
315 	// Switch EFI to virtual mode, using the kernel pmap.
316 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
317 		descriptor_version, memory_map);
318 
319 	if (kTraceMemoryMap) {
320 		dprintf("phys memory ranges:\n");
321 		for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
322 			uint64 start = gKernelArgs.physical_memory_range[i].start;
323 			uint64 size = gKernelArgs.physical_memory_range[i].size;
324 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
325 				start, start + size, size);
326 		}
327 
328 		dprintf("allocated phys memory ranges:\n");
329 		for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
330 			uint64 start = gKernelArgs.physical_allocated_range[i].start;
331 			uint64 size = gKernelArgs.physical_allocated_range[i].size;
332 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
333 				start, start + size, size);
334 		}
335 
336 		dprintf("allocated virt memory ranges:\n");
337 		for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
338 			uint64 start = gKernelArgs.virtual_allocated_range[i].start;
339 			uint64 size = gKernelArgs.virtual_allocated_range[i].size;
340 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
341 				start, start + size, size);
342 		}
343 
344 		dprintf("virt memory ranges to keep:\n");
345 		for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
346 			uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
347 			uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
348 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
349 				start, start + size, size);
350 		}
351 	}
352 }
353 
354 
355 void
arch_mmu_allocate_kernel_page_tables(void)356 arch_mmu_allocate_kernel_page_tables(void)
357 {
358 	uint64* page = NULL;
359 	uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
360 
361 	// Trust possible previous allocations of TTBR1
362 	// only if we come from a preset EL1 context
363 	if (ttbr1 != 0ll) {
364 		if (arch_exception_level() == 1) {
365 			page = reinterpret_cast<uint64*>(ttbr1);
366 			TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1);
367 		} else if (arch_exception_level() == 2) {
368 			TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1);
369 		}
370 	}
371 
372 	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
373 	// be used solely.
374 	if (page == NULL) {
375 		page = CurrentRegime.AllocatePage();
376 		if (page != NULL) {
377 			WRITE_SPECIALREG(TTBR1_EL1, page);
378 		} else {
379 			panic("Not enough memory for kernel initial page\n");
380 		}
381 	}
382 
383 	sPageDirectory = page;
384 }
385 
386 
387 uint32_t
arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,efi_memory_descriptor * memory_map,size_t descriptor_size,uint32_t descriptor_version)388 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
389 	efi_memory_descriptor* memory_map, size_t descriptor_size,
390 	uint32_t descriptor_version)
391 {
392 	addr_t memory_map_addr = (addr_t)memory_map;
393 
394 	MemoryAttributeIndirection currentMair;
395 
396 	arch_mmu_allocate_kernel_page_tables();
397 
398 	build_physical_memory_list(memory_map_size, memory_map,
399 		descriptor_size, descriptor_version,
400 		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
401 
402 	TRACE("Mapping EFI_MEMORY_RUNTIME\n");
403 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
404 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
405 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
406 			map_range(entry->VirtualStart, entry->PhysicalStart,
407 				entry->NumberOfPages * B_PAGE_SIZE,
408 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
409 	}
410 
411 	TRACE("Mapping \"next\" regions\n");
412 	void* cookie = NULL;
413 	addr_t vaddr;
414 	phys_addr_t paddr;
415 	size_t size;
416 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
417 		map_range(vaddr, paddr, size,
418 			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
419 			| currentMair.MaskOf(MAIR_NORMAL_WB));
420 	}
421 
422 	// TODO: We actually can only map physical RAM, mapping everything
423 	// could cause unwanted MMIO or bus errors on real hardware.
424 	map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
425 		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
426 		| currentMair.MaskOf(MAIR_NORMAL_WB));
427 
428 	if (gKernelArgs.arch_args.uart.kind[0] != 0) {
429 		// Map uart because we want to use it during early boot.
430 		uint64 regs_start = gKernelArgs.arch_args.uart.regs.start;
431 		uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE);
432 		uint64 base = get_next_virtual_address(regs_size);
433 
434 		map_range(base, regs_start, regs_size,
435 			ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute |
436 			currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
437 
438 		gKernelArgs.arch_args.uart.regs.start = base;
439 	}
440 
441 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
442 		gKernelArgs.num_virtual_allocated_ranges);
443 
444 	addr_t vir_pgdir;
445 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
446 
447 	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
448 	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
449 	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
450 
451 	TRACE("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
452 		(uint32_t)gKernelArgs.arch_args.phys_pgdir);
453 	TRACE("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
454 		(uint32_t)gKernelArgs.arch_args.vir_pgdir);
455 	TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
456 		(uint32_t)gKernelArgs.arch_args.next_pagetable);
457 
458 	if (kTracePageDirectory)
459 		arch_mmu_dump_present_tables();
460 
461 	return (uint64_t)sPageDirectory;
462 }
463