xref: /haiku/src/system/boot/platform/efi/arch/arm64/arch_mmu.cpp (revision 52f7c9389475e19fc21487b38064b4390eeb6fea)
1 /*
2  * Copyright 2019-2022 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 #include <boot/platform.h>
7 #include <boot/stage2.h>
8 
9 #include "mmu.h"
10 #include "efi_platform.h"
11 
12 #include "aarch64.h"
13 #include "arch_mmu.h"
14 
15 // #define TRACE_MMU
16 #ifdef TRACE_MMU
17 #	define TRACE(x) dprintf x
18 #else
19 #	define TRACE(x) ;
20 #endif
21 
22 
23 ARMv8TranslationRegime::TranslationDescriptor translation4Kb48bits = {
24 	{L0_SHIFT, L0_ADDR_MASK, false, true, false },
25 	{L1_SHIFT, Ln_ADDR_MASK, true, true,  false },
26 	{L2_SHIFT, Ln_ADDR_MASK, true, true,  false },
27 	{L3_SHIFT, Ln_ADDR_MASK, false, false, true }
28 };
29 
30 
31 ARMv8TranslationRegime CurrentRegime(translation4Kb48bits);
32 /* ARM port */
33 static uint64_t* sPageDirectory = NULL;
34 // static uint64_t* sFirstPageTable = NULL;
35 static uint64_t* sNextPageTable = NULL;
36 // static uint64_t* sLastPageTable = NULL;
37 
38 
39 const char*
40 granule_type_str(int tg)
41 {
42 	switch (tg) {
43 		case TG_4KB:
44 			return "4KB";
45 		case TG_16KB:
46 			return "16KB";
47 		case TG_64KB:
48 			return "64KB";
49 		default:
50 			return "Invalid Granule";
51 	}
52 }
53 
54 
55 void
56 arch_mmu_dump_table(uint64* table, uint8 currentLevel)
57 {
58 	ARMv8TranslationTableDescriptor ttd(table);
59 
60 	if (currentLevel >= CurrentRegime.MaxLevels()) {
61 		// This should not happen
62 		panic("Too many levels ...");
63 		return;
64 	}
65 
66 	uint64 EntriesPerLevel = arch_mmu_entries_per_granularity(CurrentRegime.Granularity());
67 	for (uint i = 0 ; i < EntriesPerLevel; i++) {
68 		if (!ttd.IsInvalid()) {
69 			TRACE(("Level %d, @%0lx: TTD %016lx\t", currentLevel, ttd.Location(), ttd.Value()));
70 			if (ttd.IsTable() && currentLevel < 3) {
71 				TRACE(("Table! Next Level:\n"));
72 				arch_mmu_dump_table(ttd.Dereference(), currentLevel + 1);
73 			}
74 			if (ttd.IsBlock() || (ttd.IsPage() && currentLevel == 3)) {
75 				TRACE(("Block/Page"));
76 
77 				if (i & 1) { // 2 entries per row
78 					TRACE(("\n"));
79 				} else {
80 					TRACE(("\t"));
81 				}
82 			}
83 		}
84 		ttd.Next();
85 	}
86 }
87 
88 
89 void
90 arch_mmu_dump_present_tables()
91 {
92 #ifdef TRACE_MMU
93 	if (arch_mmu_enabled()) {
94 		uint64 address = arch_mmu_base_register();
95 		TRACE(("Under TTBR0: %lx\n", address));
96 
97 		arch_mmu_dump_table(reinterpret_cast<uint64*>(address), 0);
98 
99 		/* We are willing to transition, but still in EL2, present MMU configuration
100 		 * for user is present in EL2 by TTBR0_EL2. Kernel side is not active, but
101 		 * allocated under sPageDirectory, defined under TTBR1_EL1.
102 		 */
103 		if (address != 0ul) {
104 			TRACE(("Under allocated TTBR1_EL1:\n"));
105 			arch_mmu_dump_table(sPageDirectory, 0);
106 		}
107 	}
108 #endif
109 }
110 
111 
112 void arch_mmu_setup_EL1() {
113 
114 	// Inherit TCR from EL2
115 	uint64 tcr = READ_SPECIALREG(TCR_EL2);
116 
117 	// Enable TTBR1
118 	tcr &= ~TCR_EPD1_DISABLE;
119 
120 	// Set space for kernel space
121 	tcr &= ~T1SZ_MASK; // Clear
122 	// TODO: Compiler dependency?
123 	tcr |= TCR_T1SZ(__builtin_popcountl(KERNEL_BASE));
124 
125 	WRITE_SPECIALREG(TCR_EL1, tcr);
126 }
127 
128 
129 uint64
130 map_region(addr_t virt_addr, addr_t  phys_addr, size_t size,
131 	uint32_t level, uint64_t flags, uint64* descriptor)
132 {
133 	ARMv8TranslationTableDescriptor ttd(descriptor);
134 
135 	if (level >= CurrentRegime.MaxLevels()) {
136 		panic("Too many levels at mapping\n");
137 	}
138 
139 	uint64 currentLevelSize = CurrentRegime.EntrySize(level);
140 
141 	ttd.JumpTo(CurrentRegime.DescriptorIndex(virt_addr, level));
142 
143 	uint64 remainingSizeInTable = CurrentRegime.TableSize(level)
144 		- currentLevelSize * CurrentRegime.DescriptorIndex(virt_addr, level);
145 
146 	TRACE(("Level %x, Processing desc %lx indexing %lx\n",
147 		level, reinterpret_cast<uint64>(descriptor), ttd.Location()));
148 
149 	if (ttd.IsInvalid()) {
150 		// If the physical has the same alignment we could make a block here
151 		// instead of using a complete next level table
152 		if (size >= currentLevelSize && CurrentRegime.Aligned(phys_addr, level)) {
153 			// Set it as block or page
154 			if (CurrentRegime.BlocksAllowed(level)) {
155 				ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
156 			} else {
157 				// Most likely in Level 3...
158 				ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
159 			}
160 
161 			// Expand!
162 			int64 expandedSize = (size > remainingSizeInTable)?remainingSizeInTable:size;
163 
164 			do {
165 				phys_addr += currentLevelSize;
166 				expandedSize -= currentLevelSize;
167 				if (expandedSize > 0) {
168 					ttd.Next();
169 					if (CurrentRegime.BlocksAllowed(level)) {
170 						ttd.SetAsBlock(reinterpret_cast<uint64*>(phys_addr), flags);
171 					} else {
172 						// Most likely in Level 3...
173 						ttd.SetAsPage(reinterpret_cast<uint64*>(phys_addr), flags);
174 					}
175 				}
176 			} while (expandedSize > 0);
177 
178 			return (size > remainingSizeInTable)?(size - remainingSizeInTable):0;
179 
180 		} else {
181 			// Set it to next level
182 			uint64 offset = 0;
183 			uint64 remainingSize = size;
184 			do {
185 				uint64* page = NULL;
186 				if (ttd.IsInvalid()) {
187 					// our region is too small would need to create a level below
188 					page = CurrentRegime.AllocatePage();
189 					ttd.SetToTable(page, flags);
190 				} else if (ttd.IsTable()) {
191 					// Next table is allocated, follow it
192 					page = ttd.Dereference();
193 				} else {
194 					panic("Required contiguous descriptor in use by Block/Page for %lx\n", ttd.Location());
195 				}
196 
197 				uint64 unprocessedSize = map_region(virt_addr + offset,
198 					phys_addr + offset, remainingSize, level + 1, flags, page);
199 
200 				offset = remainingSize - unprocessedSize;
201 
202 				remainingSize = unprocessedSize;
203 
204 				ttd.Next();
205 
206 			} while (remainingSize > 0);
207 
208 			return 0;
209 		}
210 
211 	} else {
212 
213 		if ((ttd.IsBlock() && CurrentRegime.BlocksAllowed(level))
214 			|| (ttd.IsPage() && CurrentRegime.PagesAllowed(level))
215 		) {
216 			// TODO: Review, overlap? expand?
217 			panic("Re-setting a Block/Page descriptor for %lx\n", ttd.Location());
218 			return 0;
219 		} else if (ttd.IsTable() && CurrentRegime.TablesAllowed(level)) {
220 			// Next Level
221 			map_region(virt_addr, phys_addr, size, level + 1, flags, ttd.Dereference());
222 			return 0;
223 		} else {
224 			panic("All descriptor types processed for %lx\n", ttd.Location());
225 			return 0;
226 		}
227 	}
228 }
229 
230 
231 static void
232 map_range(addr_t virt_addr, phys_addr_t phys_addr, size_t size, uint64_t flags)
233 {
234 	TRACE(("map 0x%0lx --> 0x%0lx, len=0x%0lx, flags=0x%0lx\n",
235 		(uint64_t)virt_addr, (uint64_t)phys_addr, (uint64_t)size, flags));
236 
237 	// TODO: Review why we get ranges with 0 size ...
238 	if (size == 0) {
239 		TRACE(("Requesing 0 size map\n"));
240 		return;
241 	}
242 
243 	// TODO: Review this case
244 	if (phys_addr == READ_SPECIALREG(TTBR1_EL1)) {
245 		TRACE(("Trying to map the TTBR itself?!\n"));
246 		return;
247 	}
248 
249 	if (arch_mmu_read_access(virt_addr) && arch_mmu_read_access(virt_addr + size)) {
250 		TRACE(("Range already covered in current MMU\n"));
251 		return;
252 	}
253 
254 	uint64 address;
255 
256 	if (arch_mmu_is_kernel_address(virt_addr)) {
257 		// Use TTBR1
258 		address = READ_SPECIALREG(TTBR1_EL1);
259 	} else {
260 		// ok, but USE instead TTBR0
261 		address = READ_SPECIALREG(TTBR0_EL1);
262 	}
263 
264 	map_region(virt_addr, phys_addr, size, 0, flags, reinterpret_cast<uint64*>(address));
265 
266 // 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
267 // 		map_page(virt_addr + offset, phys_addr + offset, flags);
268 // 	}
269 
270 	ASSERT_ALWAYS(insert_virtual_allocated_range(virt_addr, size) >= B_OK);
271 }
272 
273 
274 static void
275 build_physical_memory_list(size_t memory_map_size,
276 	efi_memory_descriptor* memory_map, size_t descriptor_size,
277 	uint32_t descriptor_version)
278 {
279 	addr_t addr = (addr_t)memory_map;
280 
281 	gKernelArgs.num_physical_memory_ranges = 0;
282 
283 	// First scan: Add all usable ranges
284 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
285 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
286 		switch (entry->Type) {
287 		case EfiLoaderCode:
288 		case EfiLoaderData:
289 			entry->VirtualStart = entry->PhysicalStart;
290 			break;
291 		case EfiBootServicesCode:
292 		case EfiBootServicesData:
293 		case EfiConventionalMemory: {
294 			// Usable memory.
295 			uint64_t base = entry->PhysicalStart;
296 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
297 			insert_physical_memory_range(base, size);
298 			break;
299 		}
300 		case EfiACPIReclaimMemory:
301 			// ACPI reclaim -- physical memory we could actually use later
302 			break;
303 		case EfiRuntimeServicesCode:
304 		case EfiRuntimeServicesData:
305 			entry->VirtualStart = entry->PhysicalStart;
306 			break;
307 		case EfiMemoryMappedIO:
308 			entry->VirtualStart = entry->PhysicalStart;
309 			break;
310 		}
311 	}
312 
313 	uint64_t initialPhysicalMemory = total_physical_memory();
314 
315 	// Second scan: Remove everything reserved that may overlap
316 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
317 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
318 		switch (entry->Type) {
319 		case EfiLoaderCode:
320 		case EfiLoaderData:
321 		case EfiBootServicesCode:
322 		case EfiBootServicesData:
323 		case EfiConventionalMemory:
324 			break;
325 		default:
326 			uint64_t base = entry->PhysicalStart;
327 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
328 			remove_physical_memory_range(base, size);
329 		}
330 	}
331 
332 	gKernelArgs.ignored_physical_memory
333 		+= initialPhysicalMemory - total_physical_memory();
334 
335 	sort_address_ranges(gKernelArgs.physical_memory_range,
336 		gKernelArgs.num_physical_memory_ranges);
337 }
338 
339 
340 static void
341 build_physical_allocated_list(size_t memory_map_size,
342 	efi_memory_descriptor* memory_map, size_t descriptor_size,
343 	uint32_t descriptor_version)
344 {
345 	addr_t addr = (addr_t)memory_map;
346 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
347 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(addr + i * descriptor_size);
348 		switch (entry->Type) {
349 		case EfiLoaderData: {
350 			uint64_t base = entry->PhysicalStart;
351 			uint64_t size = entry->NumberOfPages * B_PAGE_SIZE;
352 			insert_physical_allocated_range(base, size);
353 			break;
354 		}
355 		default:
356 			;
357 		}
358 	}
359 
360 	sort_address_ranges(gKernelArgs.physical_allocated_range,
361 		gKernelArgs.num_physical_allocated_ranges);
362 }
363 
364 
365 void
366 arch_mmu_init()
367 {
368 	// Stub
369 }
370 
371 
372 void
373 arch_mmu_post_efi_setup(size_t memory_map_size,
374 	efi_memory_descriptor* memory_map, size_t descriptor_size,
375 	uint32_t descriptor_version)
376 {
377 	build_physical_allocated_list(memory_map_size, memory_map,
378 		descriptor_size, descriptor_version);
379 
380 	// Switch EFI to virtual mode, using the kernel pmap.
381 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
382 		descriptor_version, memory_map);
383 #ifdef DUMP_RANGES_AFTER_EXIT_SERIVCES
384 	TRACE(("phys memory ranges:\n"));
385 	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
386 		uint32_t start = (uint32_t)gKernelArgs.physical_memory_range[i].start;
387 		uint32_t size = (uint32_t)gKernelArgs.physical_memory_range[i].size;
388 		TRACE(("    0x%08x-0x%08x, length 0x%08x\n",
389 			start, start + size, size));
390 	}
391 
392 	TRACE(("allocated phys memory ranges:\n"));
393 	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
394 		uint32_t start = (uint32_t)gKernelArgs.physical_allocated_range[i].start;
395 		uint32_t size = (uint32_t)gKernelArgs.physical_allocated_range[i].size;
396 		TRACE(("    0x%08x-0x%08x, length 0x%08x\n",
397 			start, start + size, size));
398 	}
399 
400 	TRACE(("allocated virt memory ranges:\n"));
401 	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
402 		uint32_t start = (uint32_t)gKernelArgs.virtual_allocated_range[i].start;
403 		uint32_t size = (uint32_t)gKernelArgs.virtual_allocated_range[i].size;
404 		TRACE(("    0x%08x-0x%08x, length 0x%08x\n",
405 			start, start + size, size));
406 	}
407 #endif
408 }
409 
410 
411 void
412 arch_mmu_allocate_kernel_page_tables(void)
413 {
414 	uint64* page = reinterpret_cast<uint64*>(READ_SPECIALREG(TTBR1_EL1));
415 
416 	// NOTE: On devices supporting multiple translation base registers, TTBR0 must
417 	// be used solely.
418 	if (page == NULL) {
419 		page = CurrentRegime.AllocatePage();
420 		if (page != NULL) {
421 			WRITE_SPECIALREG(TTBR1_EL1, page);
422 		} else {
423 			panic("Not enough memory for kernel initial page\n");
424 		}
425 	} else {
426 		TRACE(("TTBR1_EL1 present ..."));
427 	}
428 
429 	sPageDirectory = page;
430 }
431 
432 
433 uint32_t
434 arch_mmu_generate_post_efi_page_tables(size_t memory_map_size,
435 	efi_memory_descriptor* memory_map, size_t descriptor_size,
436 	uint32_t descriptor_version)
437 {
438 	addr_t memory_map_addr = (addr_t)memory_map;
439 
440 	MemoryAttributeIndirection currentMair;
441 
442 // 	arch_mmu_allocate_page_tables();
443 	arch_mmu_allocate_kernel_page_tables();
444 
445 	build_physical_memory_list(memory_map_size, memory_map,
446 		descriptor_size, descriptor_version);
447 
448 	TRACE(("Mapping Code & Data\n"));
449 
450 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
451 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
452 		switch (entry->Type) {
453 		case EfiLoaderCode:
454 		case EfiLoaderData:
455 			map_range(entry->VirtualStart, entry->PhysicalStart,
456 				entry->NumberOfPages * B_PAGE_SIZE,
457 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute
458 				| currentMair.MaskOf(MAIR_NORMAL_WB));
459 			break;
460 		default:
461 			;
462 		}
463 	}
464 
465 	TRACE(("Mapping EFI_MEMORY_RUNTIME\n"));
466 	for (size_t i = 0; i < memory_map_size / descriptor_size; ++i) {
467 		efi_memory_descriptor* entry = (efi_memory_descriptor*)(memory_map_addr + i * descriptor_size);
468 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
469 			map_range(entry->VirtualStart, entry->PhysicalStart,
470 				entry->NumberOfPages * B_PAGE_SIZE,
471 				ARMv8TranslationTableDescriptor::DefaultCodeAttribute | currentMair.MaskOf(MAIR_NORMAL_WB));
472 	}
473 
474 	TRACE(("Mapping \"next\" regions\n"));
475 	void* cookie = NULL;
476 	addr_t vaddr;
477 	phys_addr_t paddr;
478 	size_t size;
479 	while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) {
480 		map_range(vaddr, paddr, size,
481 			ARMv8TranslationTableDescriptor::DefaultCodeAttribute
482 			| currentMair.MaskOf(MAIR_NORMAL_WB));
483 	}
484 
485 /*  TODO: Not an UART here... inspect dtb?
486 	// identity mapping for the debug uart
487 	map_range(0x09000000, 0x09000000, B_PAGE_SIZE,
488 		ARMv8TranslationTableDescriptor::DefaultPeripheralAttribute
489 		| currentMair.MaskOf(MAIR_DEVICE_nGnRnE));
490 */
491 
492 /*  TODO: Whole physical map already covered ...
493 	// identity mapping for page table area
494 	uint64_t page_table_area = (uint64_t)sFirstPageTable;
495 	map_range(page_table_area, page_table_area, PAGE_TABLE_AREA_SIZE,
496 		ARMv8TranslationTableDescriptor::DefaultCodeAttribute
497 		| currentMair.MaskOf(MAIR_NORMAL_WB));
498 */
499 
500 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
501 		gKernelArgs.num_virtual_allocated_ranges);
502 
503 	addr_t vir_pgdir;
504 	platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &vir_pgdir);
505 
506 	gKernelArgs.arch_args.phys_pgdir = (uint64)sPageDirectory;
507 	gKernelArgs.arch_args.vir_pgdir = (uint32)vir_pgdir;
508 	gKernelArgs.arch_args.next_pagetable = (uint64)(sNextPageTable) - (uint64)sPageDirectory;
509 
510 	TRACE(("gKernelArgs.arch_args.phys_pgdir     = 0x%08x\n",
511 		(uint32_t)gKernelArgs.arch_args.phys_pgdir));
512 	TRACE(("gKernelArgs.arch_args.vir_pgdir      = 0x%08x\n",
513 		(uint32_t)gKernelArgs.arch_args.vir_pgdir));
514 	TRACE(("gKernelArgs.arch_args.next_pagetable = 0x%08x\n",
515 		(uint32_t)gKernelArgs.arch_args.next_pagetable));
516 
517 	return (uint64_t)sPageDirectory;
518 }
519