xref: /haiku/src/system/boot/platform/efi/arch/riscv64/arch_mmu.cpp (revision 3634f142352af2428aed187781fc9d75075e9140)
1 /*
2  * Copyright 2019-2022 Haiku, Inc. All rights reserved.
3  * Released under the terms of the MIT License.
4  */
5 
6 
7 #include <algorithm>
8 
9 #include <kernel.h>
10 #include <arch_kernel.h>
11 #include <boot/platform.h>
12 #include <boot/stage2.h>
13 #include <efi/types.h>
14 #include <efi/boot-services.h>
15 #include <string.h>
16 
17 #include "efi_platform.h"
18 #include "generic_mmu.h"
19 #include "mmu.h"
20 
21 
22 //#define TRACE_MMU
23 #ifdef TRACE_MMU
24 #	define TRACE(x...) dprintf(x)
25 #else
26 #	define TRACE(x...) ;
27 #endif
28 
29 
30 //#define TRACE_MEMORY_MAP
31 
32 // Ignore memory above 512GB
33 #define PHYSICAL_MEMORY_LOW		0x00000000
34 #define PHYSICAL_MEMORY_HIGH	0x8000000000ull
35 
36 #define RESERVED_MEMORY_BASE	0x80000000
37 
38 phys_addr_t sPageTable = 0;
39 
40 
41 static inline
42 void *VirtFromPhys(uint64_t physAdr)
43 {
44 	return (void*)physAdr;
45 }
46 
47 
48 #ifdef TRACE_MEMORY_MAP
49 static uint64_t
50 SignExtendVirtAdr(uint64_t virtAdr)
51 {
52 	if (((uint64_t)1 << 38) & virtAdr)
53 		return virtAdr | 0xFFFFFF8000000000;
54 	return virtAdr;
55 }
56 
57 
58 static void
59 WritePteFlags(uint32 flags)
60 {
61 	bool first = true;
62 	dprintf("{");
63 	for (uint32 i = 0; i < 32; i++) {
64 		if ((1 << i) & flags) {
65 			if (first) first = false; else dprintf(", ");
66 			switch (i) {
67 			case 0:  dprintf("valid"); break;
68 			case 1:  dprintf("read"); break;
69 			case 2:  dprintf("write"); break;
70 			case 3:  dprintf("exec"); break;
71 			case 4:  dprintf("user"); break;
72 			case 5:  dprintf("global"); break;
73 			case 6:  dprintf("accessed"); break;
74 			case 7:  dprintf("dirty"); break;
75 			default: dprintf("%" B_PRIu32, i);
76 			}
77 		}
78 	}
79 	dprintf("}");
80 }
81 
82 
83 static void
84 DumpPageWrite(uint64_t virtAdr, uint64_t physAdr, size_t size, uint64 flags, uint64& firstVirt,
85 	uint64& firstPhys, uint64& firstFlags, uint64& len)
86 {
87 	if (virtAdr == firstVirt + len && physAdr == firstPhys + len && flags == firstFlags) {
88 		len += size;
89 	} else {
90 		if (len != 0) {
91 			dprintf("  0x%08" B_PRIxADDR " - 0x%08" B_PRIxADDR,
92 				firstVirt, firstVirt + (len - 1));
93 			dprintf(": 0x%08" B_PRIxADDR " - 0x%08" B_PRIxADDR ", %#" B_PRIxADDR ", ",
94 				firstPhys, firstPhys + (len - 1), len);
95 			WritePteFlags(firstFlags); dprintf("\n");
96 		}
97 		firstVirt = virtAdr;
98 		firstPhys = physAdr;
99 		firstFlags = flags;
100 		len = size;
101 	}
102 }
103 
104 
105 static void
106 DumpPageTableInt(Pte* pte, uint64_t virtAdr, uint32_t level, uint64& firstVirt, uint64& firstPhys,
107 	uint64& firstFlags, uint64& len)
108 {
109 	for (uint32 i = 0; i < pteCount; i++) {
110 		if (pte[i].isValid) {
111 			if (!pte[i].isRead && !pte[i].isWrite && !pte[i].isExec) {
112 				if (level == 0)
113 					panic("internal page table on level 0");
114 
115 				DumpPageTableInt((Pte*)VirtFromPhys(B_PAGE_SIZE*pte[i].ppn),
116 					virtAdr + ((uint64_t)i << (pageBits + pteIdxBits*level)),
117 					level - 1, firstVirt, firstPhys, firstFlags, len);
118 			} else {
119 				DumpPageWrite(
120 					SignExtendVirtAdr(virtAdr + ((uint64_t)i << (pageBits + pteIdxBits*level))),
121 					pte[i].ppn * B_PAGE_SIZE,
122 					1 << (pageBits + pteIdxBits*level),
123 					pte[i].val & 0xff,
124 					firstVirt, firstPhys, firstFlags, len);
125 			}
126 		}
127 	}
128 }
129 
130 
131 static int
132 DumpPageTable(uint64 satp)
133 {
134 	SatpReg satpReg{.val = satp};
135 	Pte* root = (Pte*)VirtFromPhys(satpReg.ppn * B_PAGE_SIZE);
136 
137 	dprintf("PageTable:\n");
138 	uint64 firstVirt = 0;
139 	uint64 firstPhys = 0;
140 	uint64 firstFlags = 0;
141 	uint64 len = 0;
142 	DumpPageTableInt(root, 0, 2, firstVirt, firstPhys, firstFlags, len);
143 	DumpPageWrite(0, 0, 0, 0, firstVirt, firstPhys, firstFlags, len);
144 
145 	return 0;
146 }
147 #endif /* TRACE_MEMORY_MAP */
148 
149 
150 static Pte*
151 LookupPte(addr_t virtAdr, bool alloc)
152 {
153 	Pte *pte = (Pte*)VirtFromPhys(sPageTable);
154 	for (int level = 2; level > 0; level --) {
155 		pte += VirtAdrPte(virtAdr, level);
156 		if (!pte->isValid) {
157 			if (!alloc)
158 				return NULL;
159 			uint64 ppn = mmu_allocate_page() / B_PAGE_SIZE;
160 			if (ppn == 0)
161 				return NULL;
162 			memset((Pte*)VirtFromPhys(B_PAGE_SIZE * ppn), 0, B_PAGE_SIZE);
163 			Pte newPte {
164 				.isValid = true,
165 				.isGlobal = IS_KERNEL_ADDRESS(virtAdr),
166 				.ppn = ppn
167 			};
168 			pte->val = newPte.val;
169 		}
170 		pte = (Pte*)VirtFromPhys(B_PAGE_SIZE * pte->ppn);
171 	}
172 	pte += VirtAdrPte(virtAdr, 0);
173 	return pte;
174 }
175 
176 
177 static void
178 Map(addr_t virtAdr, phys_addr_t physAdr, uint64 flags)
179 {
180 	// TRACE("Map(%#" B_PRIxADDR ", %#" B_PRIxADDR ")\n", virtAdr, physAdr);
181 	Pte* pte = LookupPte(virtAdr, true);
182 	if (pte == NULL) panic("can't allocate page table");
183 
184 	Pte newPte {
185 		.isValid = true,
186 		.isGlobal = IS_KERNEL_ADDRESS(virtAdr),
187 		.isAccessed = true,
188 		.isDirty = true,
189 	};
190 	newPte.val |= flags;
191 
192 	pte->val = newPte.val;
193 }
194 
195 
196 static void
197 MapRange(addr_t virtAdr, phys_addr_t physAdr, size_t size, uint64 flags)
198 {
199 	TRACE("MapRange(%#" B_PRIxADDR " - %#" B_PRIxADDR ", %#" B_PRIxADDR " - %#" B_PRIxADDR ", %#"
200 		B_PRIxADDR ")\n", virtAdr, virtAdr + (size - 1), physAdr, physAdr + (size - 1), size);
201 	for (size_t i = 0; i < size; i += B_PAGE_SIZE)
202 		Map(virtAdr + i, physAdr + i, flags);
203 
204 	ASSERT_ALWAYS(insert_virtual_allocated_range(virtAdr, size) >= B_OK);
205 }
206 
207 
208 static void
209 insert_virtual_range_to_keep(uint64 start, uint64 size)
210 {
211 	status_t status = insert_address_range(
212 		gKernelArgs.arch_args.virtual_ranges_to_keep,
213 		&gKernelArgs.arch_args.num_virtual_ranges_to_keep,
214 		MAX_VIRTUAL_RANGES_TO_KEEP, start, size);
215 
216 	if (status == B_ENTRY_NOT_FOUND)
217 		panic("too many virtual ranges to keep");
218 	else if (status != B_OK)
219 		panic("failed to add virtual range to keep");
220 }
221 
222 
223 static void
224 MapAddrRange(addr_range& range, uint64 flags)
225 {
226 	if (range.size == 0) {
227 		range.start = 0;
228 		return;
229 	}
230 
231 	phys_addr_t physAdr = range.start;
232 	range.start = get_next_virtual_address(range.size);
233 
234 	MapRange(range.start, physAdr, range.size, flags);
235 	insert_virtual_range_to_keep(range.start, range.size);
236 }
237 
238 
239 static void
240 PreallocKernelRange()
241 {
242 	Pte* root = (Pte*)VirtFromPhys(sPageTable);
243 	for (uint64 i = VirtAdrPte(KERNEL_BASE, 2); i <= VirtAdrPte(KERNEL_TOP, 2);
244 		i++) {
245 		Pte* pte = &root[i];
246 		uint64 ppn = mmu_allocate_page() / B_PAGE_SIZE;
247 		if (ppn == 0) panic("can't alloc early physical page");
248 		memset(VirtFromPhys(B_PAGE_SIZE * ppn), 0, B_PAGE_SIZE);
249 		Pte newPte {
250 			.isValid = true,
251 			.isGlobal = true,
252 			.ppn = ppn
253 		};
254 		pte->val = newPte.val;
255 	}
256 }
257 
258 
259 static uint64
260 GetSatp()
261 {
262 	return SatpReg{
263 		.ppn = sPageTable / B_PAGE_SIZE,
264 		.asid = 0,
265 		.mode = satpModeSv39
266 	}.val;
267 }
268 
269 
270 static void
271 GetPhysMemRange(addr_range& range)
272 {
273 	phys_addr_t beg = (phys_addr_t)(-1), end = 0;
274 	if (gKernelArgs.num_physical_memory_ranges <= 0)
275 		beg = 0;
276 	else {
277 		for (size_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
278 			beg = std::min(beg, gKernelArgs.physical_memory_range[i].start);
279 			end = std::max(end, gKernelArgs.physical_memory_range[i].start + gKernelArgs.physical_memory_range[i].size);
280 		}
281 	}
282 	range.start = beg;
283 	range.size = end - beg;
284 }
285 
286 
287 //#pragma mark -
288 
289 
290 void
291 arch_mmu_init()
292 {
293 }
294 
295 
296 void
297 arch_mmu_post_efi_setup(size_t memory_map_size,
298 	efi_memory_descriptor *memory_map, size_t descriptor_size,
299 	uint32_t descriptor_version)
300 {
301 	build_physical_allocated_list(memory_map_size, memory_map,
302 		descriptor_size, descriptor_version);
303 
304 	// Switch EFI to virtual mode, using the kernel pmap.
305 	kRuntimeServices->SetVirtualAddressMap(memory_map_size, descriptor_size,
306 		descriptor_version, memory_map);
307 
308 #ifdef TRACE_MEMORY_MAP
309 	dprintf("phys memory ranges:\n");
310 	for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
311 		uint64 start = gKernelArgs.physical_memory_range[i].start;
312 		uint64 size = gKernelArgs.physical_memory_range[i].size;
313 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
314 			start, start + size, size);
315 	}
316 
317 	dprintf("allocated phys memory ranges:\n");
318 	for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
319 		uint64 start = gKernelArgs.physical_allocated_range[i].start;
320 		uint64 size = gKernelArgs.physical_allocated_range[i].size;
321 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
322 			start, start + size, size);
323 	}
324 
325 	dprintf("allocated virt memory ranges:\n");
326 	for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
327 		uint64 start = gKernelArgs.virtual_allocated_range[i].start;
328 		uint64 size = gKernelArgs.virtual_allocated_range[i].size;
329 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
330 			start, start + size, size);
331 	}
332 
333 	dprintf("virt memory ranges to keep:\n");
334 	for (uint32_t i = 0; i < gKernelArgs.arch_args.num_virtual_ranges_to_keep; i++) {
335 		uint64 start = gKernelArgs.arch_args.virtual_ranges_to_keep[i].start;
336 		uint64 size = gKernelArgs.arch_args.virtual_ranges_to_keep[i].size;
337 		dprintf("    0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n",
338 			start, start + size, size);
339 	}
340 #endif
341 }
342 
343 
344 static void
345 fix_memory_map_for_m_mode(size_t memoryMapSize, efi_memory_descriptor* memoryMap,
346 	size_t descriptorSize, uint32_t descriptorVersion)
347 {
348 	addr_t addr = (addr_t)memoryMap;
349 
350 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
351 		efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize);
352 		if (entry->PhysicalStart == RESERVED_MEMORY_BASE) {
353 			entry->Type = EfiReservedMemoryType;
354 		}
355 	}
356 }
357 
358 
359 uint64
360 arch_mmu_generate_post_efi_page_tables(size_t memoryMapSize, efi_memory_descriptor* memoryMap,
361 	size_t descriptorSize, uint32_t descriptorVersion)
362 {
363 	sPageTable = mmu_allocate_page();
364 	memset(VirtFromPhys(sPageTable), 0, B_PAGE_SIZE);
365 	TRACE("sPageTable: %#" B_PRIxADDR "\n", sPageTable);
366 
367 	PreallocKernelRange();
368 
369 	gKernelArgs.num_virtual_allocated_ranges = 0;
370 	gKernelArgs.arch_args.num_virtual_ranges_to_keep = 0;
371 
372 	fix_memory_map_for_m_mode(memoryMapSize, memoryMap, descriptorSize, descriptorVersion);
373 
374 	build_physical_memory_list(memoryMapSize, memoryMap, descriptorSize, descriptorVersion,
375 		PHYSICAL_MEMORY_LOW, PHYSICAL_MEMORY_HIGH);
376 
377 	addr_range physMemRange;
378 	GetPhysMemRange(physMemRange);
379 	TRACE("physMemRange: %#" B_PRIxADDR ", %#" B_PRIxSIZE "\n",
380 		physMemRange.start, physMemRange.size);
381 
382 	// Physical memory mapping
383 	gKernelArgs.arch_args.physMap.start = KERNEL_TOP + 1 - physMemRange.size;
384 	gKernelArgs.arch_args.physMap.size = physMemRange.size;
385 	MapRange(gKernelArgs.arch_args.physMap.start, physMemRange.start, physMemRange.size,
386 		Pte {.isRead = true, .isWrite = true}.val);
387 
388 	// Boot loader
389 	TRACE("Boot loader:\n");
390 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
391 		efi_memory_descriptor* entry = &memoryMap[i];
392 		switch (entry->Type) {
393 		case EfiLoaderCode:
394 		case EfiLoaderData:
395 			MapRange(entry->VirtualStart, entry->PhysicalStart, entry->NumberOfPages * B_PAGE_SIZE,
396 				Pte {.isRead = true, .isWrite = true, .isExec = true}.val);
397 			break;
398 		default:
399 			;
400 		}
401 	}
402 	TRACE("Boot loader stack\n");
403 	addr_t sp = Sp();
404 	TRACE("  SP: %#" B_PRIxADDR "\n", sp);
405 
406 	// EFI runtime services
407 	TRACE("EFI runtime services:\n");
408 	for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) {
409 		efi_memory_descriptor* entry = &memoryMap[i];
410 		if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0)
411 			MapRange(entry->VirtualStart, entry->PhysicalStart, entry->NumberOfPages * B_PAGE_SIZE,
412 				Pte {.isRead = true, .isWrite = true, .isExec = true}.val);
413 	}
414 
415 	// Memory regions
416 	TRACE("Regions:\n");
417 	void* cookie = NULL;
418 	addr_t virtAdr;
419 	phys_addr_t physAdr;
420 	size_t size;
421 	while (mmu_next_region(&cookie, &virtAdr, &physAdr, &size)) {
422 		MapRange(virtAdr, physAdr, size, Pte {.isRead = true, .isWrite = true, .isExec = true}.val);
423 	}
424 
425 	// Devices
426 	TRACE("Devices:\n");
427 	MapAddrRange(gKernelArgs.arch_args.clint, Pte {.isRead = true, .isWrite = true}.val);
428 	MapAddrRange(gKernelArgs.arch_args.htif, Pte {.isRead = true, .isWrite = true}.val);
429 	MapAddrRange(gKernelArgs.arch_args.plic, Pte {.isRead = true, .isWrite = true}.val);
430 
431 	if (strcmp(gKernelArgs.arch_args.uart.kind, "") != 0) {
432 		MapRange(gKernelArgs.arch_args.uart.regs.start,
433 			gKernelArgs.arch_args.uart.regs.start,
434 			gKernelArgs.arch_args.uart.regs.size,
435 			Pte {.isRead = true, .isWrite = true}.val);
436 		MapAddrRange(gKernelArgs.arch_args.uart.regs,
437 			Pte {.isRead = true, .isWrite = true}.val);
438 	}
439 
440 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
441 		gKernelArgs.num_virtual_allocated_ranges);
442 
443 	#ifdef TRACE_MEMORY_MAP
444 	DumpPageTable(GetSatp());
445 	#endif
446 
447 	return GetSatp();
448 }
449