xref: /haiku/src/system/kernel/arch/x86/arch_vm.cpp (revision 3c6e2dd68577c34d93e17f19711f6245bf6d0915)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2008, Jérôme Duval.
4  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
5  * Distributed under the terms of the MIT License.
6  *
7  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
8  * Distributed under the terms of the NewOS License.
9  */
10 
11 
12 #include <stdlib.h>
13 #include <string.h>
14 
15 #include <KernelExport.h>
16 
17 #include <smp.h>
18 #include <util/AutoLock.h>
19 #include <vm/vm.h>
20 #include <vm/vm_page.h>
21 #include <vm/vm_priv.h>
22 #include <vm/VMAddressSpace.h>
23 #include <vm/VMArea.h>
24 
25 #include <arch/vm.h>
26 #include <arch/int.h>
27 #include <arch/cpu.h>
28 
29 #include <arch/x86/bios.h>
30 
31 #include "x86_paging.h"
32 
33 
34 //#define TRACE_ARCH_VM
35 #ifdef TRACE_ARCH_VM
36 #	define TRACE(x) dprintf x
37 #else
38 #	define TRACE(x) ;
39 #endif
40 
41 #define TRACE_MTRR_ARCH_VM
42 #ifdef TRACE_MTRR_ARCH_VM
43 #	define TRACE_MTRR(x...) dprintf(x)
44 #else
45 #	define TRACE_MTRR(x...)
46 #endif
47 
48 
49 static const uint32 kMaxMemoryTypeRanges	= 32;
50 static const uint32 kMaxMemoryTypeRegisters	= 32;
51 static const uint64 kMinMemoryTypeRangeSize	= 1 << 12;
52 
53 
54 struct memory_type_range_analysis_info {
55 	uint64	size;
56 	uint32	rangesNeeded;
57 	uint32	subtractiveRangesNeeded;
58 	uint64	bestSubtractiveRange;
59 };
60 
61 struct memory_type_range_analysis {
62 	uint64							base;
63 	uint64							size;
64 	uint32							type;
65 	uint32							rangesNeeded;
66 	uint64							endRange;
67 	memory_type_range_analysis_info	left;
68 	memory_type_range_analysis_info	right;
69 };
70 
71 struct memory_type_range {
72 	uint64						base;
73 	uint64						size;
74 	uint32						type;
75 	area_id						area;
76 };
77 
78 
79 void *gDmaAddress;
80 
81 static memory_type_range sMemoryTypeRanges[kMaxMemoryTypeRanges];
82 static uint32 sMemoryTypeRangeCount;
83 
84 static memory_type_range_analysis sMemoryTypeRangeAnalysis[
85 	kMaxMemoryTypeRanges];
86 
87 static x86_mtrr_info sMemoryTypeRegisters[kMaxMemoryTypeRegisters];
88 static uint32 sMemoryTypeRegisterCount;
89 static uint32 sMemoryTypeRegistersUsed;
90 
91 static mutex sMemoryTypeLock = MUTEX_INITIALIZER("memory type ranges");
92 
93 
94 static void
95 set_mtrrs()
96 {
97 	x86_set_mtrrs(sMemoryTypeRegisters, sMemoryTypeRegistersUsed);
98 
99 #ifdef TRACE_MTRR_ARCH_VM
100 	TRACE_MTRR("set MTRRs to:\n");
101 	for (uint32 i = 0; i < sMemoryTypeRegistersUsed; i++) {
102 		const x86_mtrr_info& info = sMemoryTypeRegisters[i];
103 		TRACE_MTRR("  mtrr: %2lu: base: %#9llx, size: %#9llx, type: %u\n",
104 			i, info.base, info.size, info.type);
105 	}
106 #endif
107 }
108 
109 
110 static void
111 add_used_mtrr(uint64 base, uint64 size, uint32 type)
112 {
113 	ASSERT(sMemoryTypeRegistersUsed < sMemoryTypeRegisterCount);
114 
115 	x86_mtrr_info& info = sMemoryTypeRegisters[sMemoryTypeRegistersUsed++];
116 	info.base = base;
117 	info.size = size;
118 	info.type = type;
119 }
120 
121 
122 static void
123 analyze_range(memory_type_range_analysis& analysis, uint64 previousEnd,
124 	uint64 nextBase)
125 {
126 	uint64 base = analysis.base;
127 	uint64 size = analysis.size;
128 
129 	memory_type_range_analysis_info& left = analysis.left;
130 	memory_type_range_analysis_info& right = analysis.right;
131 
132 	uint32 leftSubtractiveRangesNeeded = 2;
133 	int32 leftBestSubtractiveRangeDifference = 0;
134 	uint32 leftBestSubtractivePositiveRangesNeeded = 0;
135 	uint32 leftBestSubtractiveRangesNeeded = 0;
136 
137 	uint32 rightSubtractiveRangesNeeded = 2;
138 	int32 rightBestSubtractiveRangeDifference = 0;
139 	uint32 rightBestSubtractivePositiveRangesNeeded = 0;
140 	uint32 rightBestSubtractiveRangesNeeded = 0;
141 
142 	uint64 range = kMinMemoryTypeRangeSize;
143 
144 	while (size > 0) {
145 		if ((base & range) != 0) {
146 			left.rangesNeeded++;
147 
148 			bool replaceBestSubtractive = false;
149 			int32 rangeDifference = (int32)left.rangesNeeded
150 				- (int32)leftSubtractiveRangesNeeded;
151 			if (left.bestSubtractiveRange == 0
152 				|| leftBestSubtractiveRangeDifference < rangeDifference) {
153 				// check for intersection with previous range
154 				replaceBestSubtractive
155 					= previousEnd == 0 || base - range >= previousEnd;
156 			}
157 
158 			if (replaceBestSubtractive) {
159 				leftBestSubtractiveRangeDifference = rangeDifference;
160 				leftBestSubtractiveRangesNeeded
161 					= leftSubtractiveRangesNeeded;
162 				left.bestSubtractiveRange = range;
163 				leftBestSubtractivePositiveRangesNeeded = 0;
164 			} else
165 				leftBestSubtractivePositiveRangesNeeded++;
166 
167 			left.size += range;
168 			base += range;
169 			size -= range;
170 		} else if (left.bestSubtractiveRange > 0)
171 			leftSubtractiveRangesNeeded++;
172 
173 		if ((size & range) != 0) {
174 			right.rangesNeeded++;
175 
176 			bool replaceBestSubtractive = false;
177 			int32 rangeDifference = (int32)right.rangesNeeded
178 				- (int32)rightSubtractiveRangesNeeded;
179 			if (right.bestSubtractiveRange == 0
180 				|| rightBestSubtractiveRangeDifference < rangeDifference) {
181 				// check for intersection with previous range
182 				replaceBestSubtractive
183 					= nextBase == 0 || base + size + range <= nextBase;
184 			}
185 
186 			if (replaceBestSubtractive) {
187 				rightBestSubtractiveRangeDifference = rangeDifference;
188 				rightBestSubtractiveRangesNeeded
189 					= rightSubtractiveRangesNeeded;
190 				right.bestSubtractiveRange = range;
191 				rightBestSubtractivePositiveRangesNeeded = 0;
192 			} else
193 				rightBestSubtractivePositiveRangesNeeded++;
194 
195 			right.size += range;
196 			size -= range;
197 		} else if (right.bestSubtractiveRange > 0)
198 			rightSubtractiveRangesNeeded++;
199 
200 		range <<= 1;
201 	}
202 
203 	analysis.endRange = range;
204 
205 	// If a subtractive setup doesn't have any advantages, don't use it.
206 	// Also compute analysis.rangesNeeded.
207 	if (leftBestSubtractiveRangesNeeded
208 			+ leftBestSubtractivePositiveRangesNeeded >= left.rangesNeeded) {
209 		left.bestSubtractiveRange = 0;
210 		left.subtractiveRangesNeeded = 0;
211 		analysis.rangesNeeded = left.rangesNeeded;
212 	} else {
213 		left.subtractiveRangesNeeded = leftBestSubtractiveRangesNeeded
214 			+ leftBestSubtractivePositiveRangesNeeded;
215 		analysis.rangesNeeded = left.subtractiveRangesNeeded;
216 	}
217 
218 	if (rightBestSubtractiveRangesNeeded
219 			+ rightBestSubtractivePositiveRangesNeeded >= right.rangesNeeded) {
220 		right.bestSubtractiveRange = 0;
221 		right.subtractiveRangesNeeded = 0;
222 		analysis.rangesNeeded += right.rangesNeeded;
223 	} else {
224 		right.subtractiveRangesNeeded = rightBestSubtractiveRangesNeeded
225 			+ rightBestSubtractivePositiveRangesNeeded;
226 		analysis.rangesNeeded += right.subtractiveRangesNeeded;
227 	}
228 }
229 
230 static void
231 compute_mtrrs(const memory_type_range_analysis& analysis)
232 {
233 	const memory_type_range_analysis_info& left = analysis.left;
234 	const memory_type_range_analysis_info& right = analysis.right;
235 
236 	// generate a setup for the left side
237 	if (left.rangesNeeded > 0) {
238 		uint64 base = analysis.base;
239 		uint64 size = left.size;
240 		uint64 range = analysis.endRange;
241 		uint64 rangeEnd = base + size;
242 		bool subtractive = false;
243 		while (size > 0) {
244 			if (range == left.bestSubtractiveRange) {
245 				base = rangeEnd - 2 * range;
246 				add_used_mtrr(base, range, analysis.type);
247 				subtractive = true;
248 				break;
249 			}
250 
251 			if ((size & range) != 0) {
252 				rangeEnd -= range;
253 				add_used_mtrr(rangeEnd, range, analysis.type);
254 				size -= range;
255 			}
256 
257 			range >>= 1;
258 		}
259 
260 		if (subtractive) {
261 			uint64 shortestRange = range;
262 			while (size > 0) {
263 				if ((size & range) != 0) {
264 					shortestRange = range;
265 					size -= range;
266 				} else {
267 					add_used_mtrr(base, range, IA32_MTR_UNCACHED);
268 					base += range;
269 				}
270 
271 				range >>= 1;
272 			}
273 
274 			add_used_mtrr(base, shortestRange, IA32_MTR_UNCACHED);
275 		}
276 	}
277 
278 	// generate a setup for the right side
279 	if (right.rangesNeeded > 0) {
280 		uint64 base = analysis.base + left.size;
281 		uint64 size = right.size;
282 		uint64 range = analysis.endRange;
283 		bool subtractive = false;
284 		while (size > 0) {
285 			if (range == right.bestSubtractiveRange) {
286 				add_used_mtrr(base, range * 2, analysis.type);
287 				subtractive = true;
288 				break;
289 			}
290 
291 			if ((size & range) != 0) {
292 				add_used_mtrr(base, range, analysis.type);
293 				base += range;
294 				size -= range;
295 			}
296 
297 			range >>= 1;
298 		}
299 
300 		if (subtractive) {
301 			uint64 rangeEnd = base + range * 2;
302 			uint64 shortestRange = range;
303 			while (size > 0) {
304 				if ((size & range) != 0) {
305 					shortestRange = range;
306 					size -= range;
307 				} else {
308 					rangeEnd -= range;
309 					add_used_mtrr(rangeEnd, range, IA32_MTR_UNCACHED);
310 				}
311 
312 				range >>= 1;
313 			}
314 
315 			rangeEnd -= shortestRange;
316 			add_used_mtrr(rangeEnd, shortestRange, IA32_MTR_UNCACHED);
317 		}
318 	}
319 }
320 
321 
322 static status_t
323 update_mttrs()
324 {
325 	// Transfer the range array to the analysis array, dropping all uncachable
326 	// ranges (that's the default anyway) and joining adjacent ranges with the
327 	// same type.
328 	memory_type_range_analysis* ranges = sMemoryTypeRangeAnalysis;
329 	uint32 rangeCount = 0;
330 	{
331 		uint32 previousRangeType = IA32_MTR_UNCACHED;
332 		uint64 previousRangeEnd = 0;
333 		for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) {
334 			if (sMemoryTypeRanges[i].type != IA32_MTR_UNCACHED) {
335 				uint64 rangeEnd = sMemoryTypeRanges[i].base
336 					+ sMemoryTypeRanges[i].size;
337 				if (previousRangeType == sMemoryTypeRanges[i].type
338 					&& previousRangeEnd >= sMemoryTypeRanges[i].base) {
339 					// the range overlaps/continues the previous range -- just
340 					// enlarge that one
341 					if (rangeEnd > previousRangeEnd)
342 						previousRangeEnd = rangeEnd;
343 					ranges[rangeCount - 1].size  = previousRangeEnd
344 						- ranges[rangeCount - 1].base;
345 				} else {
346 					// add the new range
347 					memset(&ranges[rangeCount], 0, sizeof(ranges[rangeCount]));
348 					ranges[rangeCount].base = sMemoryTypeRanges[i].base;
349 					ranges[rangeCount].size = sMemoryTypeRanges[i].size;
350 					ranges[rangeCount].type = sMemoryTypeRanges[i].type;
351 					previousRangeEnd = rangeEnd;
352 					previousRangeType = sMemoryTypeRanges[i].type;
353 					rangeCount++;
354 				}
355 			}
356 		}
357 	}
358 
359 	// analyze the ranges
360 	uint32 registersNeeded = 0;
361 	uint64 previousEnd = 0;
362 	for (uint32 i = 0; i < rangeCount; i++) {
363 		memory_type_range_analysis& range = ranges[i];
364 		uint64 nextBase = i + 1 < rangeCount ? ranges[i + 1].base : 0;
365 		analyze_range(range, previousEnd, nextBase);
366 		registersNeeded += range.rangesNeeded;
367 		previousEnd = range.base + range.size;
368 	}
369 
370 	// fail when we need more registers than we have
371 	if (registersNeeded > sMemoryTypeRegisterCount)
372 		return B_BUSY;
373 
374 	sMemoryTypeRegistersUsed = 0;
375 
376 	for (uint32 i = 0; i < rangeCount; i++) {
377 		memory_type_range_analysis& range = ranges[i];
378 		compute_mtrrs(range);
379 	}
380 
381 	set_mtrrs();
382 
383 	return B_OK;
384 }
385 
386 
387 static void
388 remove_memory_type_range_locked(uint32 index)
389 {
390 	sMemoryTypeRangeCount--;
391 	if (index < sMemoryTypeRangeCount) {
392 		memmove(sMemoryTypeRanges + index, sMemoryTypeRanges + index + 1,
393 			(sMemoryTypeRangeCount - index) * sizeof(memory_type_range));
394 	}
395 }
396 
397 
398 static status_t
399 add_memory_type_range(area_id areaID, uint64 base, uint64 size, uint32 type)
400 {
401 	// translate the type
402 	if (type == 0)
403 		return B_OK;
404 
405 	switch (type) {
406 		case B_MTR_UC:
407 			type = IA32_MTR_UNCACHED;
408 			break;
409 		case B_MTR_WC:
410 			type = IA32_MTR_WRITE_COMBINING;
411 			break;
412 		case B_MTR_WT:
413 			type = IA32_MTR_WRITE_THROUGH;
414 			break;
415 		case B_MTR_WP:
416 			type = IA32_MTR_WRITE_PROTECTED;
417 			break;
418 		case B_MTR_WB:
419 			type = IA32_MTR_WRITE_BACK;
420 			break;
421 		default:
422 			return B_BAD_VALUE;
423 	}
424 
425 	TRACE_MTRR("add_memory_type_range(%ld, %#llx, %#llx, %lu)\n", areaID, base,
426 		size, type);
427 
428 	// base and size must at least be aligned to the minimum range size
429 	if (((base | size) & (kMinMemoryTypeRangeSize - 1)) != 0) {
430 		dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory base or "
431 			"size not minimally aligned!\n", areaID, base, size, type);
432 		return B_BAD_VALUE;
433 	}
434 
435 	MutexLocker locker(sMemoryTypeLock);
436 
437 	if (sMemoryTypeRangeCount == kMaxMemoryTypeRanges) {
438 		dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Out of "
439 			"memory ranges!\n", areaID, base, size, type);
440 		return B_BUSY;
441 	}
442 
443 	// iterate through the existing ranges and check for clashes
444 	bool foundInsertionIndex = false;
445 	uint32 index = 0;
446 	for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) {
447 		const memory_type_range& range = sMemoryTypeRanges[i];
448 		if (range.base > base) {
449 			if (range.base - base < size && range.type != type) {
450 				dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory "
451 					"range intersects with existing one (%#llx, %#llx, %lu).\n",
452 					areaID, base, size, type, range.base, range.size,
453 					range.type);
454 				return B_BAD_VALUE;
455 			}
456 
457 			// found the insertion index
458 			if (!foundInsertionIndex) {
459 				index = i;
460 				foundInsertionIndex = true;
461 			}
462 			break;
463 		} else if (base - range.base < range.size && range.type != type) {
464 			dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): Memory "
465 				"range intersects with existing one (%#llx, %#llx, %lu).\n",
466 				areaID, base, size, type, range.base, range.size, range.type);
467 			return B_BAD_VALUE;
468 		}
469 	}
470 
471 	if (!foundInsertionIndex)
472 		index = sMemoryTypeRangeCount;
473 
474 	// make room for the new range
475 	if (index < sMemoryTypeRangeCount) {
476 		memmove(sMemoryTypeRanges + index + 1, sMemoryTypeRanges + index,
477 			(sMemoryTypeRangeCount - index) * sizeof(memory_type_range));
478 	}
479 	sMemoryTypeRangeCount++;
480 
481 	memory_type_range& rangeInfo = sMemoryTypeRanges[index];
482 	rangeInfo.base = base;
483 	rangeInfo.size = size;
484 	rangeInfo.type = type;
485 	rangeInfo.area = areaID;
486 
487 	uint64 range = kMinMemoryTypeRangeSize;
488 	status_t error;
489 	do {
490 		error = update_mttrs();
491 		if (error == B_OK) {
492 			if (rangeInfo.size != size) {
493 				dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): "
494 					"update_mtrrs() succeeded only with simplified range: "
495 					"base: %#llx, size: %#llx\n", areaID, base, size, type,
496 					rangeInfo.base, rangeInfo.size);
497 			}
498 			return B_OK;
499 		}
500 
501 		// update_mttrs() failed -- try to simplify (i.e. shrink) the range
502 		while (rangeInfo.size != 0) {
503 			if ((rangeInfo.base & range) != 0) {
504 				rangeInfo.base += range;
505 				rangeInfo.size -= range;
506 				// don't shift the range yet -- we might still have an
507 				// unaligned size
508 				break;
509 			}
510 			if ((rangeInfo.size & range) != 0) {
511 				rangeInfo.size -= range;
512 				range <<= 1;
513 				break;
514 			}
515 
516 			range <<= 1;
517 		}
518 	} while (rangeInfo.size > 0);
519 
520 	dprintf("add_memory_type_range(%ld, %#llx, %#llx, %lu): update_mtrrs() "
521 		"failed.\n", areaID, base, size, type);
522 	remove_memory_type_range_locked(index);
523 	return error;
524 }
525 
526 
527 static void
528 remove_memory_type_range(area_id areaID)
529 {
530 	MutexLocker locker(sMemoryTypeLock);
531 
532 	for (uint32 i = 0; i < sMemoryTypeRangeCount; i++) {
533 		if (sMemoryTypeRanges[i].area == areaID) {
534 			TRACE_MTRR("remove_memory_type_range(%ld, %#llx, %#llxd)\n",
535 				areaID, sMemoryTypeRanges[i].base, sMemoryTypeRanges[i].size);
536 			remove_memory_type_range_locked(i);
537 			update_mttrs();
538 				// TODO: It's actually possible that this call fails, since
539 				// compute_mtrrs() joins ranges and removing one might cause a
540 				// previously joined big simple range to be split into several
541 				// ranges (or just make it more complicated).
542 			return;
543 		}
544 	}
545 }
546 
547 
548 //	#pragma mark -
549 
550 
551 status_t
552 arch_vm_init(kernel_args *args)
553 {
554 	TRACE(("arch_vm_init: entry\n"));
555 	return 0;
556 }
557 
558 
559 /*!	Marks DMA region as in-use, and maps it into the kernel space */
560 status_t
561 arch_vm_init_post_area(kernel_args *args)
562 {
563 	area_id id;
564 
565 	TRACE(("arch_vm_init_post_area: entry\n"));
566 
567 	// account for DMA area and mark the pages unusable
568 	vm_mark_page_range_inuse(0x0, 0xa0000 / B_PAGE_SIZE);
569 
570 	// map 0 - 0xa0000 directly
571 	id = map_physical_memory("dma_region", (void *)0x0, 0xa0000,
572 		B_ANY_KERNEL_ADDRESS, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
573 		&gDmaAddress);
574 	if (id < 0) {
575 		panic("arch_vm_init_post_area: unable to map dma region\n");
576 		return B_NO_MEMORY;
577 	}
578 
579 	return bios_init();
580 }
581 
582 
583 /*!	Gets rid of all yet unmapped (and therefore now unused) page tables */
584 status_t
585 arch_vm_init_end(kernel_args *args)
586 {
587 	TRACE(("arch_vm_init_endvm: entry\n"));
588 
589 	// throw away anything in the kernel_args.pgtable[] that's not yet mapped
590 	vm_free_unused_boot_loader_range(KERNEL_BASE,
591 		0x400000 * args->arch_args.num_pgtables);
592 
593 	return B_OK;
594 }
595 
596 
597 status_t
598 arch_vm_init_post_modules(kernel_args *args)
599 {
600 	// the x86 CPU modules are now accessible
601 
602 	sMemoryTypeRegisterCount = x86_count_mtrrs();
603 	if (sMemoryTypeRegisterCount == 0)
604 		return B_OK;
605 
606 	// not very likely, but play safe here
607 	if (sMemoryTypeRegisterCount > kMaxMemoryTypeRegisters)
608 		sMemoryTypeRegisterCount = kMaxMemoryTypeRegisters;
609 
610 	// set the physical memory ranges to write-back mode
611 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
612 		add_memory_type_range(-1, args->physical_memory_range[i].start,
613 			args->physical_memory_range[i].size, B_MTR_WB);
614 	}
615 
616 	return B_OK;
617 }
618 
619 
620 void
621 arch_vm_aspace_swap(struct VMAddressSpace *from, struct VMAddressSpace *to)
622 {
623 	// This functions is only invoked when a userland thread is in the process
624 	// of dying. It switches to the kernel team and does whatever cleanup is
625 	// necessary (in case it is the team's main thread, it will delete the
626 	// team).
627 	// It is however not necessary to change the page directory. Userland team's
628 	// page directories include all kernel mappings as well. Furthermore our
629 	// arch specific translation map data objects are ref-counted, so they won't
630 	// go away as long as they are still used on any CPU.
631 }
632 
633 
634 bool
635 arch_vm_supports_protection(uint32 protection)
636 {
637 	// x86 always has the same read/write properties for userland and the
638 	// kernel.
639 	// That's why we do not support user-read/kernel-write access. While the
640 	// other way around is not supported either, we don't care in this case
641 	// and give the kernel full access.
642 	if ((protection & (B_READ_AREA | B_WRITE_AREA)) == B_READ_AREA
643 		&& protection & B_KERNEL_WRITE_AREA)
644 		return false;
645 
646 	return true;
647 }
648 
649 
650 void
651 arch_vm_unset_memory_type(struct VMArea *area)
652 {
653 	if (area->memory_type == 0)
654 		return;
655 
656 	remove_memory_type_range(area->id);
657 }
658 
659 
660 status_t
661 arch_vm_set_memory_type(struct VMArea *area, addr_t physicalBase,
662 	uint32 type)
663 {
664 	area->memory_type = type >> MEMORY_TYPE_SHIFT;
665 	return add_memory_type_range(area->id, physicalBase, area->Size(), type);
666 }
667