1 /* 2 * Copyright 2012, Alex Smith, alex@alex-smith.me.uk. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "long.h" 8 9 #include <algorithm> 10 11 #include <KernelExport.h> 12 13 // Include the x86_64 version of descriptors.h 14 #define __x86_64__ 15 #include <arch/x86/descriptors.h> 16 #undef __x86_64__ 17 18 #include <arch_system_info.h> 19 #include <boot/platform.h> 20 #include <boot/heap.h> 21 #include <boot/stage2.h> 22 #include <boot/stdio.h> 23 #include <kernel.h> 24 #include <safemode.h> 25 26 #include "debug.h" 27 #include "mmu.h" 28 #include "smp.h" 29 30 31 static const uint64 kTableMappingFlags = 0x7; 32 static const uint64 kLargePageMappingFlags = 0x183; 33 static const uint64 kPageMappingFlags = 0x103; 34 // Global, R/W, Present 35 36 extern "C" void long_enter_kernel(int currentCPU, uint64 stackTop); 37 38 extern uint64 gLongGDT; 39 extern uint32 gLongPhysicalPMLTop; 40 extern bool gLongLA57; 41 extern uint64 gLongKernelEntry; 42 43 44 /*! Convert a 32-bit address to a 64-bit address. */ 45 static inline uint64 46 fix_address(uint64 address) 47 { 48 if (address >= KERNEL_LOAD_BASE) 49 return address + KERNEL_FIXUP_FOR_LONG_MODE; 50 else 51 return address; 52 } 53 54 55 template<typename Type> 56 inline void 57 fix_address(FixedWidthPointer<Type>& p) 58 { 59 if (p != NULL) 60 p.SetTo(fix_address(p.Get())); 61 } 62 63 64 static void 65 long_gdt_init() 66 { 67 STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT 68 && BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT 69 && BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT 70 && BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT); 71 72 clear_segment_descriptor(&gBootGDT[0]); 73 74 // Set up code/data segments (TSS segments set up later in the kernel). 75 set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY, 76 DPL_KERNEL); 77 set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], DT_DATA_WRITEABLE, 78 DPL_KERNEL); 79 set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], DT_CODE_EXECUTE_ONLY, 80 DPL_USER); 81 set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], DT_DATA_WRITEABLE, 82 DPL_USER); 83 84 // Used by long_enter_kernel(). 85 gLongGDT = fix_address((addr_t)gBootGDT); 86 dprintf("GDT at 0x%llx\n", gLongGDT); 87 } 88 89 90 static void 91 long_mmu_init() 92 { 93 uint64* pmlTop; 94 // Allocate the top level PMLTop. 95 pmlTop = (uint64*)mmu_allocate_page((addr_t*)&gKernelArgs.arch_args.phys_pgdir); 96 memset(pmlTop, 0, B_PAGE_SIZE); 97 gKernelArgs.arch_args.vir_pgdir = fix_address((uint64)(addr_t)pmlTop); 98 99 // Store the virtual memory usage information. 100 gKernelArgs.virtual_allocated_range[0].start = KERNEL_LOAD_BASE_64_BIT; 101 gKernelArgs.virtual_allocated_range[0].size = mmu_get_virtual_usage(); 102 gKernelArgs.num_virtual_allocated_ranges = 1; 103 gKernelArgs.arch_args.virtual_end = ROUNDUP(KERNEL_LOAD_BASE_64_BIT 104 + gKernelArgs.virtual_allocated_range[0].size, 0x200000); 105 106 // Find the highest physical memory address. We map all physical memory 107 // into the kernel address space, so we want to make sure we map everything 108 // we have available. 109 uint64 maxAddress = 0; 110 for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) { 111 maxAddress = std::max(maxAddress, 112 gKernelArgs.physical_memory_range[i].start 113 + gKernelArgs.physical_memory_range[i].size); 114 } 115 116 // Want to map at least 4GB, there may be stuff other than usable RAM that 117 // could be in the first 4GB of physical address space. 118 maxAddress = std::max(maxAddress, (uint64)0x100000000ll); 119 maxAddress = ROUNDUP(maxAddress, 0x40000000); 120 121 // Currently only use 1 PDPT (512GB). This will need to change if someone 122 // wants to use Haiku on a box with more than 512GB of RAM but that's 123 // probably not going to happen any time soon. 124 if (maxAddress / 0x40000000 > 512) 125 panic("Can't currently support more than 512GB of RAM!"); 126 127 uint64* pml4 = pmlTop; 128 addr_t physicalAddress; 129 cpuid_info info; 130 if (get_current_cpuid(&info, 7, 0) == B_OK 131 && (info.regs.ecx & IA32_FEATURE_LA57) != 0) { 132 133 if (get_safemode_boolean(B_SAFEMODE_256_TB_MEMORY_LIMIT, false)) { 134 // LA57 has been disabled! 135 dprintf("la57 disabled per safemode setting\n"); 136 } else { 137 dprintf("la57 enabled\n"); 138 gLongLA57 = true; 139 pml4 = (uint64*)mmu_allocate_page(&physicalAddress); 140 memset(pml4, 0, B_PAGE_SIZE); 141 pmlTop[511] = physicalAddress | kTableMappingFlags; 142 pmlTop[0] = physicalAddress | kTableMappingFlags; 143 } 144 } 145 146 uint64* pdpt; 147 uint64* pageDir; 148 uint64* pageTable; 149 150 // Create page tables for the physical map area. Also map this PDPT 151 // temporarily at the bottom of the address space so that we are identity 152 // mapped. 153 154 pdpt = (uint64*)mmu_allocate_page(&physicalAddress); 155 memset(pdpt, 0, B_PAGE_SIZE); 156 pml4[510] = physicalAddress | kTableMappingFlags; 157 pml4[0] = physicalAddress | kTableMappingFlags; 158 159 for (uint64 i = 0; i < maxAddress; i += 0x40000000) { 160 pageDir = (uint64*)mmu_allocate_page(&physicalAddress); 161 memset(pageDir, 0, B_PAGE_SIZE); 162 pdpt[i / 0x40000000] = physicalAddress | kTableMappingFlags; 163 164 for (uint64 j = 0; j < 0x40000000; j += 0x200000) { 165 pageDir[j / 0x200000] = (i + j) | kLargePageMappingFlags; 166 } 167 168 mmu_free(pageDir, B_PAGE_SIZE); 169 } 170 171 mmu_free(pdpt, B_PAGE_SIZE); 172 173 // Allocate tables for the kernel mappings. 174 pdpt = (uint64*)mmu_allocate_page(&physicalAddress); 175 memset(pdpt, 0, B_PAGE_SIZE); 176 pml4[511] = physicalAddress | kTableMappingFlags; 177 178 pageDir = (uint64*)mmu_allocate_page(&physicalAddress); 179 memset(pageDir, 0, B_PAGE_SIZE); 180 pdpt[510] = physicalAddress | kTableMappingFlags; 181 182 // We can now allocate page tables and duplicate the mappings across from 183 // the 32-bit address space to them. 184 pageTable = NULL; 185 for (uint32 i = 0; i < gKernelArgs.virtual_allocated_range[0].size 186 / B_PAGE_SIZE; i++) { 187 if ((i % 512) == 0) { 188 if (pageTable) 189 mmu_free(pageTable, B_PAGE_SIZE); 190 191 pageTable = (uint64*)mmu_allocate_page(&physicalAddress); 192 memset(pageTable, 0, B_PAGE_SIZE); 193 pageDir[i / 512] = physicalAddress | kTableMappingFlags; 194 } 195 196 // Get the physical address to map. 197 if (!mmu_get_virtual_mapping(KERNEL_LOAD_BASE + (i * B_PAGE_SIZE), 198 &physicalAddress)) 199 continue; 200 201 pageTable[i % 512] = physicalAddress | kPageMappingFlags; 202 } 203 204 if (pageTable) 205 mmu_free(pageTable, B_PAGE_SIZE); 206 mmu_free(pageDir, B_PAGE_SIZE); 207 mmu_free(pdpt, B_PAGE_SIZE); 208 if (pml4 != pmlTop) 209 mmu_free(pml4, B_PAGE_SIZE); 210 211 // Sort the address ranges. 212 sort_address_ranges(gKernelArgs.physical_memory_range, 213 gKernelArgs.num_physical_memory_ranges); 214 sort_address_ranges(gKernelArgs.physical_allocated_range, 215 gKernelArgs.num_physical_allocated_ranges); 216 sort_address_ranges(gKernelArgs.virtual_allocated_range, 217 gKernelArgs.num_virtual_allocated_ranges); 218 219 dprintf("phys memory ranges:\n"); 220 for (uint32 i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) { 221 dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n", 222 gKernelArgs.physical_memory_range[i].start, 223 gKernelArgs.physical_memory_range[i].size); 224 } 225 226 dprintf("allocated phys memory ranges:\n"); 227 for (uint32 i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) { 228 dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n", 229 gKernelArgs.physical_allocated_range[i].start, 230 gKernelArgs.physical_allocated_range[i].size); 231 } 232 233 dprintf("allocated virt memory ranges:\n"); 234 for (uint32 i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) { 235 dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n", 236 gKernelArgs.virtual_allocated_range[i].start, 237 gKernelArgs.virtual_allocated_range[i].size); 238 } 239 240 gLongPhysicalPMLTop = gKernelArgs.arch_args.phys_pgdir; 241 } 242 243 244 static void 245 convert_preloaded_image(preloaded_elf64_image* image) 246 { 247 fix_address(image->next); 248 fix_address(image->name); 249 fix_address(image->debug_string_table); 250 fix_address(image->syms); 251 fix_address(image->rel); 252 fix_address(image->rela); 253 fix_address(image->pltrel); 254 fix_address(image->debug_symbols); 255 } 256 257 258 /*! Convert all addresses in kernel_args to 64-bit addresses. */ 259 static void 260 convert_kernel_args() 261 { 262 fix_address(gKernelArgs.boot_volume); 263 fix_address(gKernelArgs.vesa_modes); 264 fix_address(gKernelArgs.edid_info); 265 fix_address(gKernelArgs.debug_output); 266 fix_address(gKernelArgs.previous_debug_output); 267 fix_address(gKernelArgs.boot_splash); 268 fix_address(gKernelArgs.ucode_data); 269 fix_address(gKernelArgs.arch_args.apic); 270 fix_address(gKernelArgs.arch_args.hpet); 271 272 convert_preloaded_image(static_cast<preloaded_elf64_image*>( 273 gKernelArgs.kernel_image.Pointer())); 274 fix_address(gKernelArgs.kernel_image); 275 276 // Iterate over the preloaded images. Must save the next address before 277 // converting, as the next pointer will be converted. 278 preloaded_image* image = gKernelArgs.preloaded_images; 279 fix_address(gKernelArgs.preloaded_images); 280 while (image != NULL) { 281 preloaded_image* next = image->next; 282 convert_preloaded_image(static_cast<preloaded_elf64_image*>(image)); 283 image = next; 284 } 285 286 // Set correct kernel args range addresses. 287 dprintf("kernel args ranges:\n"); 288 for (uint32 i = 0; i < gKernelArgs.num_kernel_args_ranges; i++) { 289 gKernelArgs.kernel_args_range[i].start = fix_address( 290 gKernelArgs.kernel_args_range[i].start); 291 dprintf(" base %#018" B_PRIx64 ", length %#018" B_PRIx64 "\n", 292 gKernelArgs.kernel_args_range[i].start, 293 gKernelArgs.kernel_args_range[i].size); 294 } 295 296 // Fix driver settings files. 297 driver_settings_file* file = gKernelArgs.driver_settings; 298 fix_address(gKernelArgs.driver_settings); 299 while (file != NULL) { 300 driver_settings_file* next = file->next; 301 fix_address(file->next); 302 fix_address(file->buffer); 303 file = next; 304 } 305 } 306 307 308 static void 309 enable_sse() 310 { 311 x86_write_cr4(x86_read_cr4() | CR4_OS_FXSR | CR4_OS_XMM_EXCEPTION); 312 x86_write_cr0(x86_read_cr0() & ~(CR0_FPU_EMULATION | CR0_MONITOR_FPU)); 313 } 314 315 316 static void 317 long_smp_start_kernel(void) 318 { 319 uint32 cpu = smp_get_current_cpu(); 320 321 // Important. Make sure supervisor threads can fault on read only pages... 322 asm("movl %%eax, %%cr0" : : "a" ((1 << 31) | (1 << 16) | (1 << 5) | 1)); 323 asm("cld"); 324 asm("fninit"); 325 enable_sse(); 326 327 // Fix our kernel stack address. 328 gKernelArgs.cpu_kstack[cpu].start 329 = fix_address(gKernelArgs.cpu_kstack[cpu].start); 330 331 long_enter_kernel(cpu, gKernelArgs.cpu_kstack[cpu].start 332 + gKernelArgs.cpu_kstack[cpu].size); 333 334 panic("Shouldn't get here"); 335 } 336 337 338 void 339 long_start_kernel() 340 { 341 // Check whether long mode is supported. 342 cpuid_info info; 343 get_current_cpuid(&info, 0x80000001, 0); 344 if ((info.regs.edx & (1 << 29)) == 0) 345 panic("64-bit kernel requires a 64-bit CPU"); 346 347 enable_sse(); 348 349 preloaded_elf64_image *image = static_cast<preloaded_elf64_image *>( 350 gKernelArgs.kernel_image.Pointer()); 351 352 smp_init_other_cpus(); 353 354 long_gdt_init(); 355 debug_cleanup(); 356 long_mmu_init(); 357 convert_kernel_args(); 358 359 // Save the kernel entry point address. 360 gLongKernelEntry = image->elf_header.e_entry; 361 dprintf("kernel entry at %#llx\n", gLongKernelEntry); 362 363 // Fix our kernel stack address. 364 gKernelArgs.cpu_kstack[0].start 365 = fix_address(gKernelArgs.cpu_kstack[0].start); 366 367 // We're about to enter the kernel -- disable console output. 368 stdout = NULL; 369 370 smp_boot_other_cpus(long_smp_start_kernel); 371 372 // Enter the kernel! 373 long_enter_kernel(0, gKernelArgs.cpu_kstack[0].start 374 + gKernelArgs.cpu_kstack[0].size); 375 376 panic("Shouldn't get here"); 377 } 378