xref: /haiku/src/system/boot/platform/bios_ia32/long.cpp (revision 7b3e89c0944ae1efa9a8fc66c7303874b7a344b2)
1 /*
2  * Copyright 2012, Alex Smith, alex@alex-smith.me.uk.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "long.h"
8 
9 #include <algorithm>
10 
11 #include <KernelExport.h>
12 
13 // Include the x86_64 version of descriptors.h
14 #define __x86_64__
15 #include <arch/x86/descriptors.h>
16 #undef __x86_64__
17 
18 #include <arch_system_info.h>
19 #include <boot/platform.h>
20 #include <boot/heap.h>
21 #include <boot/stage2.h>
22 #include <boot/stdio.h>
23 #include <kernel.h>
24 #include <safemode.h>
25 
26 #include "debug.h"
27 #include "mmu.h"
28 #include "smp.h"
29 
30 
31 static const uint64 kTableMappingFlags = 0x7;
32 static const uint64 kLargePageMappingFlags = 0x183;
33 static const uint64 kPageMappingFlags = 0x103;
34 	// Global, R/W, Present
35 
36 extern "C" void long_enter_kernel(int currentCPU, uint64 stackTop);
37 
38 extern uint64 gLongGDT;
39 extern uint32 gLongPhysicalPMLTop;
40 extern bool gLongLA57;
41 extern uint64 gLongKernelEntry;
42 
43 
44 /*! Convert a 32-bit address to a 64-bit address. */
45 static inline uint64
46 fix_address(uint64 address)
47 {
48 	if (address >= KERNEL_LOAD_BASE)
49 		return address + KERNEL_FIXUP_FOR_LONG_MODE;
50 	else
51 		return address;
52 }
53 
54 
55 template<typename Type>
56 inline void
57 fix_address(FixedWidthPointer<Type>& p)
58 {
59 	if (p != NULL)
60 		p.SetTo(fix_address(p.Get()));
61 }
62 
63 
64 static void
65 long_gdt_init()
66 {
67 	STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT
68 		&& BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT
69 		&& BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT
70 		&& BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT);
71 
72 	clear_segment_descriptor(&gBootGDT[0]);
73 
74 	// Set up code/data segments (TSS segments set up later in the kernel).
75 	set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY,
76 		DPL_KERNEL);
77 	set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], DT_DATA_WRITEABLE,
78 		DPL_KERNEL);
79 	set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY,
80 		DPL_USER);
81 	set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], DT_DATA_WRITEABLE,
82 		DPL_USER);
83 
84 	// Used by long_enter_kernel().
85 	gLongGDT = fix_address((addr_t)gBootGDT);
86 	dprintf("GDT at 0x%llx\n", gLongGDT);
87 }
88 
89 
90 static void
91 long_mmu_init()
92 {
93 	uint64* pmlTop;
94 	// Allocate the top level PMLTop.
95 	pmlTop = (uint64*)mmu_allocate_page(&gKernelArgs.arch_args.phys_pgdir);
96 	memset(pmlTop, 0, B_PAGE_SIZE);
97 	gKernelArgs.arch_args.vir_pgdir = fix_address((uint64)(addr_t)pmlTop);
98 
99 	// Store the virtual memory usage information.
100 	gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE_64_BIT;
101 	gKernelArgs.virtual_allocated_range[0].size = mmu_get_virtual_usage();
102 	gKernelArgs.num_virtual_allocated_ranges = 1;
103 	gKernelArgs.arch_args.virtual_end = ROUNDUP(KERNEL_LOAD_BASE_64_BIT
104 		+ gKernelArgs.virtual_allocated_range[0].size, 0x200000);
105 
106 	// Find the highest physical memory address. We map all physical memory
107 	// into the kernel address space, so we want to make sure we map everything
108 	// we have available.
109 	uint64 maxAddress = 0;
110 	for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
111 		maxAddress = std::max(maxAddress,
112 			gKernelArgs.physical_memory_range[i].start
113 				+ gKernelArgs.physical_memory_range[i].size);
114 	}
115 
116 	// Want to map at least 4GB, there may be stuff other than usable RAM that
117 	// could be in the first 4GB of physical address space.
118 	maxAddress = std::max(maxAddress, (uint64)0x100000000ll);
119 	maxAddress = ROUNDUP(maxAddress, 0x40000000);
120 
121 	// Currently only use 1 PDPT (512GB). This will need to change if someone
122 	// wants to use Haiku on a box with more than 512GB of RAM but that's
123 	// probably not going to happen any time soon.
124 	if (maxAddress / 0x40000000 > 512)
125 		panic("Can't currently support more than 512GB of RAM!");
126 
127 	uint64* pml4 = pmlTop;
128 	addr_t physicalAddress;
129 	cpuid_info info;
130 	if (get_current_cpuid(&info, 7, 0) == B_OK
131 		&& (info.regs.ecx & IA32_FEATURE_LA57) != 0) {
132 
133 		if (get_safemode_boolean(B_SAFEMODE_256_TB_MEMORY_LIMIT, false)) {
134 			// LA57 has been disabled!
135 			dprintf("la57 disabled per safemode setting\n");
136 		} else {
137 			dprintf("la57 enabled\n");
138 			gLongLA57 = true;
139 			pml4 = (uint64*)mmu_allocate_page(&physicalAddress);
140 			memset(pml4, 0, B_PAGE_SIZE);
141 			pmlTop[511] = physicalAddress | kTableMappingFlags;
142 			pmlTop[0] = physicalAddress | kTableMappingFlags;
143 		}
144 	}
145 
146 	uint64* pdpt;
147 	uint64* pageDir;
148 	uint64* pageTable;
149 
150 	// Create page tables for the physical map area. Also map this PDPT
151 	// temporarily at the bottom of the address space so that we are identity
152 	// mapped.
153 
154 	pdpt = (uint64*)mmu_allocate_page(&physicalAddress);
155 	memset(pdpt, 0, B_PAGE_SIZE);
156 	pml4[510] = physicalAddress | kTableMappingFlags;
157 	pml4[0] = physicalAddress | kTableMappingFlags;
158 
159 	for (uint64 i = 0; i < maxAddress; i += 0x40000000) {
160 		pageDir = (uint64*)mmu_allocate_page(&physicalAddress);
161 		memset(pageDir, 0, B_PAGE_SIZE);
162 		pdpt[i / 0x40000000] = physicalAddress | kTableMappingFlags;
163 
164 		for (uint64 j = 0; j < 0x40000000; j += 0x200000) {
165 			pageDir[j / 0x200000] = (i + j) | kLargePageMappingFlags;
166 		}
167 
168 		mmu_free(pageDir, B_PAGE_SIZE);
169 	}
170 
171 	mmu_free(pdpt, B_PAGE_SIZE);
172 
173 	// Allocate tables for the kernel mappings.
174 	pdpt = (uint64*)mmu_allocate_page(&physicalAddress);
175 	memset(pdpt, 0, B_PAGE_SIZE);
176 	pml4[511] = physicalAddress | kTableMappingFlags;
177 
178 	pageDir = (uint64*)mmu_allocate_page(&physicalAddress);
179 	memset(pageDir, 0, B_PAGE_SIZE);
180 	pdpt[510] = physicalAddress | kTableMappingFlags;
181 
182 	// We can now allocate page tables and duplicate the mappings across from
183 	// the 32-bit address space to them.
184 	pageTable = NULL;
185 	for (uint32 i = 0; i < gKernelArgs.virtual_allocated_range[0].size
186 			/ B_PAGE_SIZE; i++) {
187 		if ((i % 512) == 0) {
188 			if (pageTable)
189 				mmu_free(pageTable, B_PAGE_SIZE);
190 
191 			pageTable = (uint64*)mmu_allocate_page(&physicalAddress);
192 			memset(pageTable, 0, B_PAGE_SIZE);
193 			pageDir[i / 512] = physicalAddress | kTableMappingFlags;
194 		}
195 
196 		// Get the physical address to map.
197 		if (!mmu_get_virtual_mapping(KERNEL_LOAD_BASE + (i * B_PAGE_SIZE),
198 				&physicalAddress))
199 			continue;
200 
201 		pageTable[i % 512] = physicalAddress | kPageMappingFlags;
202 	}
203 
204 	if (pageTable)
205 		mmu_free(pageTable, B_PAGE_SIZE);
206 	mmu_free(pageDir, B_PAGE_SIZE);
207 	mmu_free(pdpt, B_PAGE_SIZE);
208 	if (pml4 != pmlTop)
209 		mmu_free(pml4, B_PAGE_SIZE);
210 
211 	// Sort the address ranges.
212 	sort_address_ranges(gKernelArgs.physical_memory_range,
213 		gKernelArgs.num_physical_memory_ranges);
214 	sort_address_ranges(gKernelArgs.physical_allocated_range,
215 		gKernelArgs.num_physical_allocated_ranges);
216 	sort_address_ranges(gKernelArgs.virtual_allocated_range,
217 		gKernelArgs.num_virtual_allocated_ranges);
218 
219 	dprintf("phys memory ranges:\n");
220 	for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) {
221 		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
222 			gKernelArgs.physical_memory_range[i].start,
223 			gKernelArgs.physical_memory_range[i].size);
224 	}
225 
226 	dprintf("allocated phys memory ranges:\n");
227 	for (uint32 i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) {
228 		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
229 			gKernelArgs.physical_allocated_range[i].start,
230 			gKernelArgs.physical_allocated_range[i].size);
231 	}
232 
233 	dprintf("allocated virt memory ranges:\n");
234 	for (uint32 i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) {
235 		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
236 			gKernelArgs.virtual_allocated_range[i].start,
237 			gKernelArgs.virtual_allocated_range[i].size);
238 	}
239 
240 	gLongPhysicalPMLTop = gKernelArgs.arch_args.phys_pgdir;
241 }
242 
243 
244 static void
245 convert_preloaded_image(preloaded_elf64_image* image)
246 {
247 	fix_address(image->next);
248 	fix_address(image->name);
249 	fix_address(image->debug_string_table);
250 	fix_address(image->syms);
251 	fix_address(image->rel);
252 	fix_address(image->rela);
253 	fix_address(image->pltrel);
254 	fix_address(image->debug_symbols);
255 }
256 
257 
258 /*!	Convert all addresses in kernel_args to 64-bit addresses. */
259 static void
260 convert_kernel_args()
261 {
262 	fix_address(gKernelArgs.boot_volume);
263 	fix_address(gKernelArgs.vesa_modes);
264 	fix_address(gKernelArgs.edid_info);
265 	fix_address(gKernelArgs.debug_output);
266 	fix_address(gKernelArgs.previous_debug_output);
267 	fix_address(gKernelArgs.boot_splash);
268 	fix_address(gKernelArgs.ucode_data);
269 	fix_address(gKernelArgs.arch_args.apic);
270 	fix_address(gKernelArgs.arch_args.hpet);
271 
272 	convert_preloaded_image(static_cast<preloaded_elf64_image*>(
273 		gKernelArgs.kernel_image.Pointer()));
274 	fix_address(gKernelArgs.kernel_image);
275 
276 	// Iterate over the preloaded images. Must save the next address before
277 	// converting, as the next pointer will be converted.
278 	preloaded_image* image = gKernelArgs.preloaded_images;
279 	fix_address(gKernelArgs.preloaded_images);
280 	while (image != NULL) {
281 		preloaded_image* next = image->next;
282 		convert_preloaded_image(static_cast<preloaded_elf64_image*>(image));
283 		image = next;
284 	}
285 
286 	// Set correct kernel args range addresses.
287 	dprintf("kernel args ranges:\n");
288 	for (uint32 i = 0; i < gKernelArgs.num_kernel_args_ranges; i++) {
289 		gKernelArgs.kernel_args_range[i].start = fix_address(
290 			gKernelArgs.kernel_args_range[i].start);
291 		dprintf("    base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n",
292 			gKernelArgs.kernel_args_range[i].start,
293 			gKernelArgs.kernel_args_range[i].size);
294 	}
295 
296 	// Fix driver settings files.
297 	driver_settings_file* file = gKernelArgs.driver_settings;
298 	fix_address(gKernelArgs.driver_settings);
299 	while (file != NULL) {
300 		driver_settings_file* next = file->next;
301 		fix_address(file->next);
302 		fix_address(file->buffer);
303 		file = next;
304 	}
305 }
306 
307 
308 static void
309 enable_sse()
310 {
311 	x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION);
312 	x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU));
313 }
314 
315 
316 static void
317 long_smp_start_kernel(void)
318 {
319 	uint32 cpu = smp_get_current_cpu();
320 
321 	// Important.  Make sure supervisor threads can fault on read only pages...
322 	asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1));
323 	asm("cld");
324 	asm("fninit");
325 	enable_sse();
326 
327 	// Fix our kernel stack address.
328 	gKernelArgs.cpu_kstack[cpu].start
329 		= fix_address(gKernelArgs.cpu_kstack[cpu].start);
330 
331 	long_enter_kernel(cpu, gKernelArgs.cpu_kstack[cpu].start
332 		+ gKernelArgs.cpu_kstack[cpu].size);
333 
334 	panic("Shouldn't get here");
335 }
336 
337 
338 void
339 long_start_kernel()
340 {
341 	// Check whether long mode is supported.
342 	cpuid_info info;
343 	get_current_cpuid(&info, 0x80000001, 0);
344 	if ((info.regs.edx & (1 << 29)) == 0)
345 		panic("64-bit kernel requires a 64-bit CPU");
346 
347 	enable_sse();
348 
349 	preloaded_elf64_image *image = static_cast<preloaded_elf64_image *>(
350 		gKernelArgs.kernel_image.Pointer());
351 
352 	smp_init_other_cpus();
353 
354 	long_gdt_init();
355 	debug_cleanup();
356 	long_mmu_init();
357 	convert_kernel_args();
358 
359 	// Save the kernel entry point address.
360 	gLongKernelEntry = image->elf_header.e_entry;
361 	dprintf("kernel entry at %#llx\n", gLongKernelEntry);
362 
363 	// Fix our kernel stack address.
364 	gKernelArgs.cpu_kstack[0].start
365 		= fix_address(gKernelArgs.cpu_kstack[0].start);
366 
367 	// We're about to enter the kernel -- disable console output.
368 	stdout = NULL;
369 
370 	smp_boot_other_cpus(long_smp_start_kernel);
371 
372 	// Enter the kernel!
373 	long_enter_kernel(0, gKernelArgs.cpu_kstack[0].start
374 		+ gKernelArgs.cpu_kstack[0].size);
375 
376 	panic("Shouldn't get here");
377 }
378