xref: /haiku/src/system/boot/platform/efi/arch/arm64/arch_mmu.cpp (revision 6a2d53e7237764eab0c7b6d121772f26d636fb60)
1 /*
2  * Copyright 2019-2023 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8 
9 #include "efi_platform.h"
10 #include "generic_mmu.h"
11 #include "mmu.h"
12 
13 #include "aarch64.h"
14 #include "arch_mmu.h"
15 
16 // #define TRACE_MMU
17 #ifdef TRACE_MMU
18 #	define TRACE(x...) dprintf(x)
19 #else
20 #	define TRACE(x...) ;
21 #endif
22 
23 
24 static constexpr bool kTraceMemoryMap = false;
25 static constexpr bool kTracePageDirectory = false;
26 
27 
28 // Ignore memory above 512GB
29 #define PHYSICAL_MEMORY_LOW		0x00000000
30 #define PHYSICAL_MEMORY_HIGH	0x8000000000ull
31 
32 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
33 	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
34 	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
35 	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
36 	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
37 };
38 
39 
40 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
41 /* ARM port */
42 static uint64_t* sPageDirectory = NULL;
43 // static uint64_t* sFirstPageTable = NULL;
44 static uint64_t* sNextPageTable = NULL;
45 // static uint64_t* sLastPageTable = NULL;
46 
47 
48 const char*
49 granule_type_str(int tg)
50 {
51 	switch (tg) {
52 		case TG_4KB:
53 			return "4KB";
54 		case TG_16KB:
55 			return "16KB";
56 		case TG_64KB:
57 			return "64KB";
58 		default:
59 			return "Invalid Granule";
60 	}
61 }
62 
63 
64 void
65 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
66 {
67 	ARMv8TranslationTableDescriptor ttd(table);
68 
69 	if (currentLevel >= CurrentRegime.MaxLevels()) {
70 		// This should not happen
71 		panic("Too many levels ...");
72 		return;
73 	}
74 
75 	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
76 	for (uint i = 0 ; i < EntriesPerLevel; i++) {
77 		if (!ttd.IsInvalid()) {
78 			TRACE("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value());
79 			if (ttd.IsTable() && currentLevel < 3) {
80 				TRACE("Table! Next Level:\n");
81 				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
82 			}
83 			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
84 				TRACE("Block/Page");
85 
86 				if (i & 1) { // 2 entries per row
87 					TRACE("\n");
88 				} else {
89 					TRACE("\t");
90 				}
91 			}
92 		}
93 		ttd.Next();
94 	}
95 }
96 
97 
98 void
99 arch_mmu_dump_present_tables()
100 {
101 	uint64 address = arch_mmu_base_register();
102 	dprintf("Under TTBR0: %lx\n", address);
103 
104 	arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
105 
106 	/* We are willing to transition, but still in EL2, present MMU configuration
107 	 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
108 	 * allocated under sPageDirectory, defined under TTBR1_EL1.
109 	 */
110 	dprintf("Under allocated TTBR1_EL1:\n");
111 	arch_mmu_dump_table(sPageDirectory, 0);
112 }
113 
114 
115 void arch_mmu_setup_EL1(uint64 tcr) {
116 
117 	// Enable TTBR1
118 	tcr &= ~TCR_EPD1_DISABLE;
119 
120 	// Set space for kernel space
121 	tcr &= ~T1SZ_MASK; // Clear
122 	// TODO: Compiler dependency?
123 	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
124 
125 	// Flush the cache so that we don't receive unexpected writebacks later.
126 	_arch_cache_clean_poc();
127 
128 	WRITE_SPECIALREG(TCR_EL1, tcr);
129 
130 	// Invalidate all TLB entries. Also ensures that all memory traffic has
131 	// resolved, and flushes the instruction pipeline.
132 	_arch_mmu_invalidate_tlb_all(arch_exception_level());
133 }
134 
135 
136 uint64
137 map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
138 	uint32_t level, uint64_t flags, uint64* descriptor)
139 {
140 	ARMv8TranslationTableDescriptor ttd(descriptor);
141 
142 	if (level >= CurrentRegime.MaxLevels()) {
143 		panic("Too many levels at mapping\n");
144 	}
145 
146 	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
147 
148 	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
149 
150 	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
151 		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
152 
153 	TRACE("Level %x, Processing desc %lx indexing %lx\n",
154 		level, reinterpret_cast<uint64>(descriptor), ttd.Location());
155 
156 	if (ttd.IsInvalid()) {
157 		// If the physical has the same alignment we could make a block here
158 		// instead of using a complete next level table
159 		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
160 			// Set it as block or page
161 			if (CurrentRegime.BlocksAllowed(level)) {
162 				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
163 			} else {
164 				// Most likely in Level 3...
165 				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
166 			}
167 
168 			// Expand!
169 			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
170 
171 			do {
172 				phys_addr += currentLevelSize;
173 				expandedSize -= currentLevelSize;
174 				if (expandedSize > 0) {
175 					ttd.Next();
176 					if (CurrentRegime.BlocksAllowed(level)) {
177 						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
178 					} else {
179 						// Most likely in Level 3...
180 						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
181 					}
182 				}
183 			} while (expandedSize > 0);
184 
185 			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
186 
187 		} else {
188 			// Set it to next level
189 			uint64 offset = 0;
190 			uint64 remainingSize = size;
191 			do {
192 				uint64* page = NULL;
193 				if (ttd.IsInvalid()) {
194 					// our region is too small would need to create a level below
195 					page = CurrentRegime.AllocatePage();
196 					ttd.SetToTable(page, flags);
197 				} else if (ttd.IsTable()) {
198 					// Next table is allocated, follow it
199 					page = ttd.Dereference();
200 				} else {
201 					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
202 				}
203 
204 				uint64 unprocessedSize = map_region(virt_addr + offset,
205 					phys_addr + offset, remainingSize, level + 1, flags, page);
206 
207 				offset = remainingSize - unprocessedSize;
208 
209 				remainingSize = unprocessedSize;
210 
211 				ttd.Next();
212 
213 			} while (remainingSize > 0);
214 
215 			return 0;
216 		}
217 
218 	} else {
219 
220 		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
221 			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
222 		) {
223 			// TODO: Review, overlap? expand?
224 			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
225 			return 0;
226 		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
227 			// Next Level
228 			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
229 			return 0;
230 		} else {
231 			panic("All descriptor types processed for %lx\n", ttd.Location());
232 			return 0;
233 		}
234 	}
235 }
236 
237 
238 static void
239 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
240 {
241 	TRACE("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
242 		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags);
243 
244 	// TODO: Review why we get ranges with 0 size ...
245 	if (size == 0) {
246 		TRACE("Requesing 0 size map\n");
247 		return;
248 	}
249 
250 	// TODO: Review this case
251 	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
252 		TRACE("Trying to map the TTBR itself?!\n");
253 		return;
254 	}
255 
256 	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
257 		TRACE("Range already covered in current MMU\n");
258 		return;
259 	}
260 
261 	uint64 address;
262 
263 	if (arch_mmu_is_kernel_address(virt_addr)) {
264 		// Use TTBR1
265 		address = READ_SPECIALREG(TTBR1_EL1);
266 	} else {
267 		// ok, but USE instead TTBR0
268 		address = READ_SPECIALREG(TTBR0_EL1);
269 	}
270 
271 	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
272 
273 // 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
274 // 		map_page(virt_addr + offset, phys_addr + offset, flags);
275 // 	}
276 
277 	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
278 }
279 
280 
281 void
282 arch_mmu_init()
283 {
284 	// Stub
285 }
286 
287 
288 void
289 arch_mmu_post_efi_setup(size_t memory_map_size,
290 	efi_memory_descriptor* memory_map, size_t descriptor_size,
291 	uint32_t descriptor_version)
292 {
293 	build_physical_allocated_list(memory_map_size, memory_map,
294 		descriptor_size, descriptor_version);
295 
296 	// Switch EFI to virtual mode, using the kernel pmap.
297 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
298 		descriptor_version, memory_map);
299 
300 	if (kTraceMemoryMap) {
301 		dprintf("phys memory ranges:\n");
302 		for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
303 			uint64 start = gKernelArgs.physical_memory_range[i].start;
304 			uint64 size = gKernelArgs.physical_memory_range[i].size;
305 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
306 				start, start + size, size);
307 		}
308 
309 		dprintf("allocated phys memory ranges:\n");
310 		for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
311 			uint64 start = gKernelArgs.physical_allocated_range[i].start;
312 			uint64 size = gKernelArgs.physical_allocated_range[i].size;
313 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
314 				start, start + size, size);
315 		}
316 
317 		dprintf("allocated virt memory ranges:\n");
318 		for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
319 			uint64 start = gKernelArgs.virtual_allocated_range[i].start;
320 			uint64 size = gKernelArgs.virtual_allocated_range[i].size;
321 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
322 				start, start + size, size);
323 		}
324 
325 		dprintf("virt memory ranges to keep:\n");
326 		for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
327 			uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
328 			uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
329 			dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
330 				start, start + size, size);
331 		}
332 	}
333 }
334 
335 
336 void
337 arch_mmu_allocate_kernel_page_tables(void)
338 {
339 	uint64* page = NULL;
340 	uint64 ttbr1 = READ_SPECIALREG(TTBR1_EL1);
341 
342 	// Trust possible previous allocations of TTBR1
343 	// only if we come from a preset EL1 context
344 	if (ttbr1 != 0ll) {
345 		if (arch_exception_level() == 1) {
346 			page = reinterpret_cast<uint64*>(ttbr1);
347 			TRACE("Reusing TTBR1_EL1 present : %" B_PRIx64 "\n", ttbr1);
348 		} else if (arch_exception_level() == 2) {
349 			TRACE("Ignoring EL1 TTBR1(%" B_PRIx64") tables\n", ttbr1);
350 		}
351 	}
352 
353 	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
354 	// be used solely.
355 	if (page == NULL) {
356 		page = CurrentRegime.AllocatePage();
357 		if (page != NULL) {
358 			WRITE_SPECIALREG(TTBR1_EL1, page);
359 		} else {
360 			panic("Not enough memory for kernel initial page\n");
361 		}
362 	}
363 
364 	sPageDirectory = page;
365 }
366 
367 
368 uint32_t
369 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
370 	efi_memory_descriptor* memory_map, size_t descriptor_size,
371 	uint32_t descriptor_version)
372 {
373 	addr_t memory_map_addr = (addr_t)memory_map;
374 
375 	MemoryAttributeIndirection currentMair;
376 
377 // 	arch_mmu_allocate_page_tables();
378 	arch_mmu_allocate_kernel_page_tables();
379 
380 	build_physical_memory_list(memory_map_size, memory_map,
381 		descriptor_size, descriptor_version,
382 		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
383 
384 	TRACE("Mapping EFI_MEMORY_RUNTIME\n");
385 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
386 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
387 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
388 			map_range(entry->VirtualStart, entry->PhysicalStart,
389 				entry->NumberOfPages * B_PAGE_SIZE,
390 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
391 	}
392 
393 	TRACE("Mapping \"next\" regions\n");
394 	void* cookie = NULL;
395 	addr_t vaddr;
396 	phys_addr_t paddr;
397 	size_t size;
398 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
399 		map_range(vaddr, paddr, size,
400 			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
401 			| currentMair.MaskOf(MAIR_NORMAL_WB));
402 	}
403 
404 	// TODO: We actually can only map physical RAM, mapping everything
405 	// could cause unwanted MMIO or bus errors on real hardware.
406 	map_range(KERNEL_PMAP_BASE, 0, KERNEL_PMAP_SIZE - 1,
407 		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
408 		| currentMair.MaskOf(MAIR_NORMAL_WB));
409 
410 	if (gKernelArgs.arch_args.uart.kind[0] != 0) {
411 		// Map uart because we want to use it during early boot.
412 		uint64 regs_start = gKernelArgs.arch_args.uart.regs.start;
413 		uint64 regs_size = ROUNDUP(gKernelArgs.arch_args.uart.regs.size, B_PAGE_SIZE);
414 		uint64 base = get_next_virtual_address(regs_size);
415 
416 		map_range(base, regs_start, regs_size,
417 			ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute |
418 			currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
419 
420 		gKernelArgs.arch_args.uart.regs.start = base;
421 	}
422 
423 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
424 		gKernelArgs.num_virtual_allocated_ranges);
425 
426 	addr_t vir_pgdir;
427 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
428 
429 	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
430 	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
431 	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
432 
433 	TRACE("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
434 		(uint32_t)gKernelArgs.arch_args.phys_pgdir);
435 	TRACE("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
436 		(uint32_t)gKernelArgs.arch_args.vir_pgdir);
437 	TRACE("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
438 		(uint32_t)gKernelArgs.arch_args.next_pagetable);
439 
440 	if (kTracePageDirectory)
441 		arch_mmu_dump_present_tables();
442 
443 	return (uint64_t)sPageDirectory;
444 }
445