1 /* 2 * Copyright 2021-2022 Haiku, Inc. All rights reserved. 3 * Released under the terms of the MIT License. 4 */ 5 6 7 #include <algorithm> 8 9 #include <kernel.h> 10 #include <arch_kernel.h> 11 #include <arch/cpu.h> 12 #include <arch/x86/descriptors.h> 13 #include <boot/platform.h> 14 #include <boot/stage2.h> 15 #include <efi/types.h> 16 #include <efi/boot-services.h> 17 18 #include "efi_platform.h" 19 #include "mmu.h" 20 21 22 //#define TRACE_MMU 23 #ifdef TRACE_MMU 24 # define TRACE(x...) dprintf(x) 25 #else 26 # define TRACE(x...) ; 27 #endif 28 29 30 //#define TRACE_MEMORY_MAP 31 //#define TRACE_PAGE_DIRECTORY 32 33 #define VADDR_TO_PDENT(va) (((va) / B_PAGE_SIZE) / 1024) 34 #define VADDR_TO_PTENT(va) (((va) / B_PAGE_SIZE) % 1024) 35 #define X86_PDE_ADDRESS_MASK 0xfffff000 36 #define X86_PTE_ADDRESS_MASK 0xfffff000 37 38 #define ALIGN_PAGEDIR B_PAGE_SIZE 39 40 41 struct gdt_idt_descr { 42 uint16_t limit; 43 uint32_t base; 44 } _PACKED; 45 46 47 gdt_idt_descr gBootGDTDescriptor; 48 segment_descriptor *gBootGDT = NULL; 49 50 51 static const uint32_t kDefaultPageTableFlags = 0x07; // present, user, R/W 52 53 54 static uint32_t *sPageDirectory = NULL; 55 56 57 #ifdef TRACE_PAGE_DIRECTORY 58 static void 59 dump_page_dir(void) 60 { 61 dprintf("=== Page Directory ===\n"); 62 for (uint32_t i = 0; i < 1024; i++) { 63 uint32_t directoryEntry = sPageDirectory[i]; 64 if (directoryEntry != 0) { 65 dprintf("virt 0x%08x --> page table 0x%08x type 0x%08x\n", 66 i << 22, directoryEntry & X86_PDE_ADDRESS_MASK, 67 directoryEntry & (~X86_PDE_ADDRESS_MASK)); 68 uint32_t *pageTable = (uint32_t *)(directoryEntry & X86_PDE_ADDRESS_MASK); 69 for (uint32_t j = 0; j < 1024; j++) { 70 uint32_t tableEntry = pageTable[j]; 71 if (tableEntry != 0) { 72 dprintf("virt 0x%08x --> page 0x%08x type+flags 0x%08x\n", 73 (i << 22) | (j << 12), 74 tableEntry & X86_PTE_ADDRESS_MASK, 75 tableEntry & (~X86_PTE_ADDRESS_MASK)); 76 } 77 } 78 } 79 } 80 } 81 #endif 82 83 84 static uint32_t * 85 get_next_page_table(void) 86 { 87 uint32_t *pageTable = (uint32_t *)mmu_allocate_page(); 88 memset(pageTable, 0, B_PAGE_SIZE); 89 return pageTable; 90 } 91 92 93 static void 94 arch_mmu_init_gdt(void) 95 { 96 if (platform_allocate_region((void **)&gBootGDT, 97 BOOT_GDT_SEGMENT_COUNT * sizeof(segment_descriptor), 0, false) != B_OK) { 98 panic("Failed to allocate GDT.\n"); 99 } 100 101 STATIC_ASSERT(BOOT_GDT_SEGMENT_COUNT > KERNEL_CODE_SEGMENT 102 && BOOT_GDT_SEGMENT_COUNT > KERNEL_DATA_SEGMENT 103 && BOOT_GDT_SEGMENT_COUNT > USER_CODE_SEGMENT 104 && BOOT_GDT_SEGMENT_COUNT > USER_DATA_SEGMENT); 105 106 // set up a new gdt 107 108 // put standard segment descriptors in GDT 109 clear_segment_descriptor(&gBootGDT[0]); 110 111 // seg 0x08 - kernel 4GB code 112 set_segment_descriptor(&gBootGDT[KERNEL_CODE_SEGMENT], 0, 0xffffffff, 113 DT_CODE_READABLE, DPL_KERNEL); 114 115 // seg 0x10 - kernel 4GB data 116 set_segment_descriptor(&gBootGDT[KERNEL_DATA_SEGMENT], 0, 0xffffffff, 117 DT_DATA_WRITEABLE, DPL_KERNEL); 118 119 // seg 0x1b - ring 3 user 4GB code 120 set_segment_descriptor(&gBootGDT[USER_CODE_SEGMENT], 0, 0xffffffff, 121 DT_CODE_READABLE, DPL_USER); 122 123 // seg 0x23 - ring 3 user 4GB data 124 set_segment_descriptor(&gBootGDT[USER_DATA_SEGMENT], 0, 0xffffffff, 125 DT_DATA_WRITEABLE, DPL_USER); 126 127 addr_t virtualGDT; 128 platform_bootloader_address_to_kernel_address(gBootGDT, &virtualGDT); 129 130 gBootGDTDescriptor.limit = BOOT_GDT_SEGMENT_COUNT * sizeof(segment_descriptor); 131 gBootGDTDescriptor.base = (uint32_t)virtualGDT; 132 133 TRACE("gdt phys 0x%08x virt 0x%08" B_PRIxADDR " desc 0x%08x\n", 134 (uint32_t)gBootGDT, virtualGDT, 135 (uint32_t)&gBootGDTDescriptor); 136 TRACE("gdt limit=%d base=0x%08x\n", 137 gBootGDTDescriptor.limit, gBootGDTDescriptor.base); 138 } 139 140 141 static void 142 map_page(addr_t virtAddr, phys_addr_t physAddr, uint32_t flags) 143 { 144 physAddr &= ~(B_PAGE_SIZE - 1); 145 146 uint32_t *pageTable = NULL; 147 uint32_t pageDirectoryIndex = VADDR_TO_PDENT(virtAddr); 148 uint32_t pageDirectoryEntry = sPageDirectory[pageDirectoryIndex]; 149 150 if (pageDirectoryEntry == 0) { 151 //TRACE("get next page table for address 0x%08" B_PRIxADDR "\n", 152 // virtAddr); 153 pageTable = get_next_page_table(); 154 sPageDirectory[pageDirectoryIndex] = (uint32_t)pageTable | kDefaultPageTableFlags; 155 } else { 156 pageTable = (uint32_t *)(pageDirectoryEntry & X86_PDE_ADDRESS_MASK); 157 } 158 159 uint32_t pageTableIndex = VADDR_TO_PTENT(virtAddr); 160 pageTable[pageTableIndex] = physAddr | flags; 161 } 162 163 164 static void 165 map_range(addr_t virtAddr, phys_addr_t physAddr, size_t size, uint32_t flags) 166 { 167 //TRACE("map 0x%08" B_PRIxADDR " --> 0x%08" B_PRIxPHYSADDR 168 // ", len=0x%08" B_PRIxSIZE ", flags=0x%08" PRIx32 "\n", 169 // virtAddr, physAddr, size, flags); 170 171 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 172 map_page(virtAddr + offset, physAddr + offset, flags); 173 } 174 175 if (virtAddr >= KERNEL_LOAD_BASE) 176 ASSERT_ALWAYS(insert_virtual_allocated_range(virtAddr, size) >= B_OK); 177 } 178 179 180 static void 181 build_physical_memory_list(size_t memoryMapSize, 182 efi_memory_descriptor *memoryMap, size_t descriptorSize, 183 uint32_t descriptorVersion) 184 { 185 addr_t addr = (addr_t)memoryMap; 186 187 gKernelArgs.num_physical_memory_ranges = 0; 188 189 // First scan: Add all usable ranges 190 for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) { 191 efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize); 192 switch (entry->Type) { 193 case EfiLoaderCode: 194 case EfiLoaderData: 195 case EfiBootServicesCode: 196 case EfiBootServicesData: 197 case EfiConventionalMemory: { 198 // Usable memory. 199 // Ignore memory below 1MB and above 512GB. 200 uint64_t base = entry->PhysicalStart; 201 uint64_t end = entry->PhysicalStart + entry->NumberOfPages * B_PAGE_SIZE; 202 uint64_t originalSize = end - base; 203 if (base < 0x100000) 204 base = 0x100000; 205 if (end > (512ull * 1024 * 1024 * 1024)) 206 end = 512ull * 1024 * 1024 * 1024; 207 208 gKernelArgs.ignored_physical_memory 209 += originalSize - (max_c(end, base) - base); 210 211 if (base >= end) 212 break; 213 uint64_t size = end - base; 214 215 insert_physical_memory_range(base, size); 216 break; 217 } 218 default: 219 break; 220 } 221 } 222 223 uint64_t initialPhysicalMemory = total_physical_memory(); 224 225 // Second scan: Remove everything reserved that may overlap 226 for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) { 227 efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize); 228 switch (entry->Type) { 229 case EfiLoaderCode: 230 case EfiLoaderData: 231 case EfiBootServicesCode: 232 case EfiBootServicesData: 233 case EfiConventionalMemory: 234 break; 235 default: 236 uint64_t base = entry->PhysicalStart; 237 uint64_t size = entry->NumberOfPages * B_PAGE_SIZE; 238 remove_physical_memory_range(base, size); 239 } 240 } 241 242 gKernelArgs.ignored_physical_memory 243 += initialPhysicalMemory - total_physical_memory(); 244 245 sort_address_ranges(gKernelArgs.physical_memory_range, 246 gKernelArgs.num_physical_memory_ranges); 247 } 248 249 250 static void 251 build_physical_allocated_list(size_t memoryMapSize, 252 efi_memory_descriptor *memoryMap, size_t descriptorSize, 253 uint32_t descriptorVersion) 254 { 255 addr_t addr = (addr_t)memoryMap; 256 for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) { 257 efi_memory_descriptor* entry = (efi_memory_descriptor *)(addr + i * descriptorSize); 258 switch (entry->Type) { 259 case EfiLoaderData: { 260 uint64_t base = entry->PhysicalStart; 261 uint64_t size = entry->NumberOfPages * B_PAGE_SIZE; 262 insert_physical_allocated_range(base, size); 263 break; 264 } 265 default: 266 ; 267 } 268 } 269 270 sort_address_ranges(gKernelArgs.physical_allocated_range, 271 gKernelArgs.num_physical_allocated_ranges); 272 } 273 274 275 void 276 arch_mmu_post_efi_setup(size_t memoryMapSize, 277 efi_memory_descriptor *memoryMap, size_t descriptorSize, 278 uint32_t descriptorVersion) 279 { 280 build_physical_allocated_list(memoryMapSize, memoryMap, 281 descriptorSize, descriptorVersion); 282 283 // Switch EFI to virtual mode, using the kernel pmap. 284 kRuntimeServices->SetVirtualAddressMap(memoryMapSize, descriptorSize, 285 descriptorVersion, memoryMap); 286 287 #ifdef TRACE_MEMORY_MAP 288 dprintf("phys memory ranges:\n"); 289 for (uint32_t i = 0; i < gKernelArgs.num_physical_memory_ranges; i++) { 290 uint64 start = gKernelArgs.physical_memory_range[i].start; 291 uint64 size = gKernelArgs.physical_memory_range[i].size; 292 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 293 start, start + size, size); 294 } 295 296 dprintf("allocated phys memory ranges:\n"); 297 for (uint32_t i = 0; i < gKernelArgs.num_physical_allocated_ranges; i++) { 298 uint64 start = gKernelArgs.physical_allocated_range[i].start; 299 uint64 size = gKernelArgs.physical_allocated_range[i].size; 300 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 301 start, start + size, size); 302 } 303 304 dprintf("allocated virt memory ranges:\n"); 305 for (uint32_t i = 0; i < gKernelArgs.num_virtual_allocated_ranges; i++) { 306 uint64 start = gKernelArgs.virtual_allocated_range[i].start; 307 uint64 size = gKernelArgs.virtual_allocated_range[i].size; 308 dprintf(" 0x%08" B_PRIx64 "-0x%08" B_PRIx64 ", length 0x%08" B_PRIx64 "\n", 309 start, start + size, size); 310 } 311 #endif 312 } 313 314 315 static void 316 arch_mmu_allocate_page_directory(void) 317 { 318 if (platform_allocate_region((void **)&sPageDirectory, 319 B_PAGE_SIZE + ALIGN_PAGEDIR, 0, false) != B_OK) 320 panic("Failed to allocate page directory."); 321 sPageDirectory = (uint32_t *)ROUNDUP((uint32_t)sPageDirectory, ALIGN_PAGEDIR); 322 memset(sPageDirectory, 0, B_PAGE_SIZE); 323 324 TRACE("sPageDirectory = 0x%08x\n", (uint32_t)sPageDirectory); 325 } 326 327 328 uint32_t 329 arch_mmu_generate_post_efi_page_tables(size_t memoryMapSize, 330 efi_memory_descriptor *memoryMap, size_t descriptorSize, 331 uint32_t descriptorVersion) 332 { 333 build_physical_memory_list(memoryMapSize, memoryMap, 334 descriptorSize, descriptorVersion); 335 336 //TODO: find out how to map EFI runtime services 337 //they are not mapped for now because the kernel doesn't use them anyway 338 #if 0 339 addr_t memoryMapAddr = (addr_t)memoryMap; 340 for (size_t i = 0; i < memoryMapSize / descriptorSize; ++i) { 341 efi_memory_descriptor* entry = 342 (efi_memory_descriptor *)(memoryMapAddr + i * descriptorSize); 343 if ((entry->Attribute & EFI_MEMORY_RUNTIME) != 0) 344 map_range(entry->VirtualStart, entry->PhysicalStart, 345 entry->NumberOfPages * B_PAGE_SIZE, 346 kDefaultPageFlags); 347 } 348 #endif 349 350 void* cookie = NULL; 351 addr_t vaddr; 352 phys_addr_t paddr; 353 size_t size; 354 while (mmu_next_region(&cookie, &vaddr, &paddr, &size)) { 355 map_range(vaddr, paddr, size, 356 kDefaultPageFlags); 357 } 358 359 // identity mapping for first 1MB 360 map_range((addr_t)0, (phys_addr_t)0, 1024*1024, kDefaultPageFlags); 361 362 sort_address_ranges(gKernelArgs.virtual_allocated_range, 363 gKernelArgs.num_virtual_allocated_ranges); 364 365 // Map the page directory into kernel space at 0xffc00000-0xffffffff 366 // this enables a mmu trick where the 4 MB region that this pgdir entry 367 // represents now maps the 4MB of potential pagetables that the pgdir 368 // points to. Thrown away later in VM bringup, but useful for now. 369 sPageDirectory[1023] = (uint32_t)sPageDirectory | kDefaultPageFlags; 370 371 addr_t virtPageDirectory; 372 platform_bootloader_address_to_kernel_address((void*)sPageDirectory, &virtPageDirectory); 373 374 gKernelArgs.arch_args.phys_pgdir = (uint32_t)sPageDirectory; 375 gKernelArgs.arch_args.vir_pgdir = (uint32_t)virtPageDirectory; 376 gKernelArgs.arch_args.page_hole = 0xffc00000; 377 gKernelArgs.arch_args.virtual_end 378 = gKernelArgs.virtual_allocated_range[gKernelArgs.num_virtual_allocated_ranges-1].start 379 + gKernelArgs.virtual_allocated_range[gKernelArgs.num_virtual_allocated_ranges-1].size; 380 381 TRACE("gKernelArgs.arch_args.phys_pgdir = 0x%08" B_PRIx32 "\n", 382 gKernelArgs.arch_args.phys_pgdir); 383 TRACE("gKernelArgs.arch_args.vir_pgdir = 0x%08" B_PRIx64 "\n", 384 gKernelArgs.arch_args.vir_pgdir); 385 TRACE("gKernelArgs.arch_args.page_hole = 0x%08" B_PRIx64 "\n", 386 gKernelArgs.arch_args.page_hole); 387 TRACE("gKernelArgs.arch_args.virtual_end = 0x%08" B_PRIx64 "\n", 388 gKernelArgs.arch_args.virtual_end); 389 390 #ifdef TRACE_PAGE_DIRECTORY 391 dump_page_dir(); 392 #endif 393 394 return (uint32_t)sPageDirectory; 395 } 396 397 398 void 399 arch_mmu_init(void) 400 { 401 arch_mmu_allocate_page_directory(); 402 arch_mmu_init_gdt(); 403 } 404