xref: /haiku/src/system/kernel/arch/x86/arch_int.cpp (revision 3be9edf8da228afd9fec0390f408c964766122aa)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <cpu.h>
12 #include <int.h>
13 #include <kscheduler.h>
14 #include <ksyscalls.h>
15 #include <smp.h>
16 #include <team.h>
17 #include <thread.h>
18 #include <vm.h>
19 #include <vm_priv.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/int.h>
23 #include <arch/smp.h>
24 #include <arch/user_debugger.h>
25 #include <arch/vm.h>
26 
27 #include <arch/x86/arch_apic.h>
28 #include <arch/x86/descriptors.h>
29 #include <arch/x86/vm86.h>
30 
31 #include "interrupts.h"
32 
33 #include <ACPI.h>
34 #include <safemode.h>
35 #include <string.h>
36 #include <stdio.h>
37 
38 
39 //#define TRACE_ARCH_INT
40 #ifdef TRACE_ARCH_INT
41 #	define TRACE(x) dprintf x
42 #else
43 #	define TRACE(x) ;
44 #endif
45 
46 
47 // Definitions for the PIC 8259 controller
48 // (this is not a complete list, only what we're actually using)
49 
50 #define PIC_MASTER_CONTROL		0x20
51 #define PIC_MASTER_MASK			0x21
52 #define PIC_SLAVE_CONTROL		0xa0
53 #define PIC_SLAVE_MASK			0xa1
54 #define PIC_MASTER_INIT1		PIC_MASTER_CONTROL
55 #define PIC_MASTER_INIT2		PIC_MASTER_MASK
56 #define PIC_MASTER_INIT3		PIC_MASTER_MASK
57 #define PIC_MASTER_INIT4		PIC_MASTER_MASK
58 #define PIC_SLAVE_INIT1			PIC_SLAVE_CONTROL
59 #define PIC_SLAVE_INIT2			PIC_SLAVE_MASK
60 #define PIC_SLAVE_INIT3			PIC_SLAVE_MASK
61 #define PIC_SLAVE_INIT4			PIC_SLAVE_MASK
62 
63 // the edge/level trigger control registers
64 #define PIC_MASTER_TRIGGER_MODE	0x4d0
65 #define PIC_SLAVE_TRIGGER_MODE	0x4d1
66 
67 #define PIC_INIT1				0x10
68 #define PIC_INIT1_SEND_INIT4	0x01
69 #define PIC_INIT3_IR2_IS_SLAVE	0x04
70 #define PIC_INIT3_SLAVE_ID2		0x02
71 #define PIC_INIT4_x86_MODE		0x01
72 
73 #define PIC_CONTROL3			0x08
74 #define PIC_CONTROL3_READ_ISR	0x03
75 #define PIC_CONTROL3_READ_IRR	0x02
76 
77 #define PIC_NON_SPECIFIC_EOI	0x20
78 
79 #define PIC_SLAVE_INT_BASE		8
80 #define PIC_NUM_INTS			0x0f
81 
82 
83 // Definitions for a 82093AA IO APIC controller
84 #define IO_APIC_IDENTIFICATION				0x00
85 #define IO_APIC_VERSION						0x01
86 #define IO_APIC_ARBITRATION					0x02
87 #define IO_APIC_REDIRECTION_TABLE			0x10 // entry = base + 2 * index
88 
89 // Fields for the version register
90 #define IO_APIC_VERSION_SHIFT				0
91 #define IO_APIC_VERSION_MASK				0xff
92 #define IO_APIC_MAX_REDIRECTION_ENTRY_SHIFT	16
93 #define IO_APIC_MAX_REDIRECTION_ENTRY_MASK	0xff
94 
95 // Fields of each redirection table entry
96 #define IO_APIC_DESTINATION_FIELD_SHIFT		56
97 #define IO_APIC_DESTINATION_FIELD_MASK		0x0f
98 #define IO_APIC_INTERRUPT_MASK_SHIFT		16
99 #define IO_APIC_INTERRUPT_MASKED			1
100 #define IO_APIC_INTERRUPT_UNMASKED			0
101 #define IO_APIC_TRIGGER_MODE_SHIFT			15
102 #define IO_APIC_TRIGGER_MODE_EDGE			0
103 #define IO_APIC_TRIGGER_MODE_LEVEL			1
104 #define IO_APIC_REMOTE_IRR_SHIFT			14
105 #define IO_APIC_PIN_POLARITY_SHIFT			13
106 #define IO_APIC_PIN_POLARITY_HIGH_ACTIVE	0
107 #define IO_APIC_PIN_POLARITY_LOW_ACTIVE		1
108 #define IO_APIC_DELIVERY_STATUS_SHIFT		12
109 #define IO_APIC_DELIVERY_STATUS_IDLE		0
110 #define IO_APIC_DELIVERY_STATUS_PENDING		1
111 #define IO_APIC_DESTINATION_MODE_SHIFT		11
112 #define IO_APIC_DESTINATION_MODE_PHYSICAL	0
113 #define IO_APIC_DESTINATION_MODE_LOGICAL	1
114 #define IO_APIC_DELIVERY_MODE_SHIFT			8
115 #define IO_APIC_DELIVERY_MODE_MASK			0x07
116 #define IO_APIC_DELIVERY_MODE_FIXED			0
117 #define IO_APIC_DELIVERY_MODE_LOWEST_PRIO	1
118 #define IO_APIC_DELIVERY_MODE_SMI			2
119 #define IO_APIC_DELIVERY_MODE_NMI			4
120 #define IO_APIC_DELIVERY_MODE_INIT			5
121 #define IO_APIC_DELIVERY_MODE_EXT_INT		7
122 #define IO_APIC_INTERRUPT_VECTOR_SHIFT		0
123 #define IO_APIC_INTERRUPT_VECTOR_MASK		0xff
124 
125 typedef struct ioapic_s {
126 	volatile uint32	io_register_select;
127 	uint32			reserved[3];
128 	volatile uint32	io_window_register;
129 } ioapic;
130 
131 static ioapic *sIOAPIC = NULL;
132 static uint32 sIOAPICMaxRedirectionEntry = 23;
133 static void *sLocalAPIC = NULL;
134 
135 static uint32 sIRQToIOAPICPin[256];
136 
137 bool gUsingIOAPIC = false;
138 
139 typedef struct interrupt_controller_s {
140 	const char *name;
141 	void	(*enable_io_interrupt)(int32 num);
142 	void	(*disable_io_interrupt)(int32 num);
143 	void	(*configure_io_interrupt)(int32 num, uint32 config);
144 	bool	(*is_spurious_interrupt)(int32 num);
145 	void	(*end_of_interrupt)(int32 num);
146 } interrupt_controller;
147 
148 static const interrupt_controller *sCurrentPIC = NULL;
149 
150 static const char *kInterruptNames[] = {
151 	/*  0 */ "Divide Error Exception",
152 	/*  1 */ "Debug Exception",
153 	/*  2 */ "NMI Interrupt",
154 	/*  3 */ "Breakpoint Exception",
155 	/*  4 */ "Overflow Exception",
156 	/*  5 */ "BOUND Range Exceeded Exception",
157 	/*  6 */ "Invalid Opcode Exception",
158 	/*  7 */ "Device Not Available Exception",
159 	/*  8 */ "Double Fault Exception",
160 	/*  9 */ "Coprocessor Segment Overrun",
161 	/* 10 */ "Invalid TSS Exception",
162 	/* 11 */ "Segment Not Present",
163 	/* 12 */ "Stack Fault Exception",
164 	/* 13 */ "General Protection Exception",
165 	/* 14 */ "Page-Fault Exception",
166 	/* 15 */ "-",
167 	/* 16 */ "x87 FPU Floating-Point Error",
168 	/* 17 */ "Alignment Check Exception",
169 	/* 18 */ "Machine-Check Exception",
170 	/* 19 */ "SIMD Floating-Point Exception",
171 };
172 static const int kInterruptNameCount = 20;
173 
174 #define MAX_ARGS 16
175 
176 typedef struct {
177 	uint32 a, b;
178 } desc_table;
179 static desc_table* sIDTs[B_MAX_CPU_COUNT];
180 
181 static uint32 sLevelTriggeredInterrupts = 0;
182 	// binary mask: 1 level, 0 edge
183 
184 // table with functions handling respective interrupts
185 typedef void interrupt_handler_function(struct iframe* frame);
186 #define INTERRUPT_HANDLER_TABLE_SIZE 256
187 interrupt_handler_function* gInterruptHandlerTable[
188 	INTERRUPT_HANDLER_TABLE_SIZE];
189 
190 
191 /*!	Initializes a descriptor in an IDT.
192 */
193 static void
194 set_gate(desc_table *gate_addr, addr_t addr, int type, int dpl)
195 {
196 	unsigned int gate1; // first byte of gate desc
197 	unsigned int gate2; // second byte of gate desc
198 
199 	gate1 = (KERNEL_CODE_SEG << 16) | (0x0000ffff & addr);
200 	gate2 = (0xffff0000 & addr) | 0x8000 | (dpl << 13) | (type << 8);
201 
202 	gate_addr->a = gate1;
203 	gate_addr->b = gate2;
204 }
205 
206 
207 /*!	Initializes the descriptor for interrupt vector \a n in the IDT of the
208 	specified CPU to an interrupt-gate descriptor with the given procedure
209 	address.
210 	For CPUs other than the boot CPU it must not be called before
211 	arch_int_init_post_vm().
212 */
213 static void
214 set_interrupt_gate(int32 cpu, int n, void (*addr)())
215 {
216 	set_gate(&sIDTs[cpu][n], (addr_t)addr, 14, DPL_KERNEL);
217 }
218 
219 
220 /*!	Initializes the descriptor for interrupt vector \a n in the IDT of the
221 	specified CPU to an trap-gate descriptor with the given procedure address.
222 	For CPUs other than the boot CPU it must not be called before
223 	arch_int_init_post_vm().
224 */
225 static void
226 set_trap_gate(int32 cpu, int n, void (*addr)())
227 {
228 	set_gate(&sIDTs[cpu][n], (unsigned int)addr, 15, DPL_USER);
229 }
230 
231 
232 /*!	Initializes the descriptor for interrupt vector \a n in the IDT of CPU
233 	\a cpu to a task-gate descripter referring to the TSS segment identified
234 	by TSS segment selector \a segment.
235 	For CPUs other than the boot CPU it must not be called before
236 	arch_int_init_post_vm() (arch_cpu_init_post_vm() is fine).
237 */
238 void
239 x86_set_task_gate(int32 cpu, int32 n, int32 segment)
240 {
241 	sIDTs[cpu][n].a = (segment << 16);
242 	sIDTs[cpu][n].b = 0x8000 | (0 << 13) | (0x5 << 8); // present, dpl 0, type 5
243 }
244 
245 
246 /*!	Returns the virtual IDT address for CPU \a cpu. */
247 void*
248 x86_get_idt(int32 cpu)
249 {
250 	return sIDTs[cpu];
251 }
252 
253 
254 // #pragma mark - PIC
255 
256 
257 /*!	Tests if the interrupt in-service register of the responsible
258 	PIC is set for interrupts 7 and 15, and if that's not the case,
259 	it must assume it's a spurious interrupt.
260 */
261 static bool
262 pic_is_spurious_interrupt(int32 num)
263 {
264 	int32 isr;
265 
266 	if (num != 7)
267 		return false;
268 
269 	// Note, detecting spurious interrupts on line 15 obviously doesn't
270 	// work correctly - and since those are extremely rare, anyway, we
271 	// just ignore them
272 
273 	out8(PIC_CONTROL3 | PIC_CONTROL3_READ_ISR, PIC_MASTER_CONTROL);
274 	isr = in8(PIC_MASTER_CONTROL);
275 	out8(PIC_CONTROL3 | PIC_CONTROL3_READ_IRR, PIC_MASTER_CONTROL);
276 
277 	return (isr & 0x80) == 0;
278 }
279 
280 
281 /*!	Sends a non-specified EOI (end of interrupt) notice to the PIC in
282 	question (or both of them).
283 	This clears the PIC interrupt in-service bit.
284 */
285 static void
286 pic_end_of_interrupt(int32 num)
287 {
288 	if (num < 0 || num > PIC_NUM_INTS)
289 		return;
290 
291 	// PIC 8259 controlled interrupt
292 	if (num >= PIC_SLAVE_INT_BASE)
293 		out8(PIC_NON_SPECIFIC_EOI, PIC_SLAVE_CONTROL);
294 
295 	// we always need to acknowledge the master PIC
296 	out8(PIC_NON_SPECIFIC_EOI, PIC_MASTER_CONTROL);
297 }
298 
299 
300 static void
301 pic_enable_io_interrupt(int32 num)
302 {
303 	// interrupt is specified "normalized"
304 	if (num < 0 || num > PIC_NUM_INTS)
305 		return;
306 
307 	// enable PIC 8259 controlled interrupt
308 
309 	TRACE(("pic_enable_io_interrupt: irq %ld\n", num));
310 
311 	if (num < PIC_SLAVE_INT_BASE)
312 		out8(in8(PIC_MASTER_MASK) & ~(1 << num), PIC_MASTER_MASK);
313 	else
314 		out8(in8(PIC_SLAVE_MASK) & ~(1 << (num - PIC_SLAVE_INT_BASE)), PIC_SLAVE_MASK);
315 }
316 
317 
318 static void
319 pic_disable_io_interrupt(int32 num)
320 {
321 	// interrupt is specified "normalized"
322 	// never disable slave pic line IRQ 2
323 	if (num < 0 || num > PIC_NUM_INTS || num == 2)
324 		return;
325 
326 	// disable PIC 8259 controlled interrupt
327 
328 	TRACE(("pic_disable_io_interrupt: irq %ld\n", num));
329 
330 	if (num < PIC_SLAVE_INT_BASE)
331 		out8(in8(PIC_MASTER_MASK) | (1 << num), PIC_MASTER_MASK);
332 	else
333 		out8(in8(PIC_SLAVE_MASK) | (1 << (num - PIC_SLAVE_INT_BASE)), PIC_SLAVE_MASK);
334 }
335 
336 
337 static void
338 pic_configure_io_interrupt(int32 num, uint32 config)
339 {
340 	uint8 value;
341 	int32 localBit;
342 	if (num < 0 || num > PIC_NUM_INTS || num == 2)
343 		return;
344 
345 	TRACE(("pic_configure_io_interrupt: irq %ld; config 0x%08lx\n", num, config));
346 
347 	if (num < PIC_SLAVE_INT_BASE) {
348 		value = in8(PIC_MASTER_TRIGGER_MODE);
349 		localBit = num;
350 	} else {
351 		value = in8(PIC_SLAVE_TRIGGER_MODE);
352 		localBit = num - PIC_SLAVE_INT_BASE;
353 	}
354 
355 	if (config & B_LEVEL_TRIGGERED)
356 		value |= 1 << localBit;
357 	else
358 		value &= ~(1 << localBit);
359 
360 	if (num < PIC_SLAVE_INT_BASE)
361 		out8(value, PIC_MASTER_TRIGGER_MODE);
362 	else
363 		out8(value, PIC_SLAVE_TRIGGER_MODE);
364 
365 	sLevelTriggeredInterrupts = in8(PIC_MASTER_TRIGGER_MODE)
366 		| (in8(PIC_SLAVE_TRIGGER_MODE) << 8);
367 }
368 
369 
370 static void
371 pic_init(void)
372 {
373 	static interrupt_controller picController = {
374 		"8259 PIC",
375 		&pic_enable_io_interrupt,
376 		&pic_disable_io_interrupt,
377 		&pic_configure_io_interrupt,
378 		&pic_is_spurious_interrupt,
379 		&pic_end_of_interrupt
380 	};
381 
382 	// Start initialization sequence for the master and slave PICs
383 	out8(PIC_INIT1 | PIC_INIT1_SEND_INIT4, PIC_MASTER_INIT1);
384 	out8(PIC_INIT1 | PIC_INIT1_SEND_INIT4, PIC_SLAVE_INIT1);
385 
386 	// Set start of interrupts to 0x20 for master, 0x28 for slave
387 	out8(ARCH_INTERRUPT_BASE, PIC_MASTER_INIT2);
388 	out8(ARCH_INTERRUPT_BASE + PIC_SLAVE_INT_BASE, PIC_SLAVE_INIT2);
389 
390 	// Specify cascading through interrupt 2
391 	out8(PIC_INIT3_IR2_IS_SLAVE, PIC_MASTER_INIT3);
392 	out8(PIC_INIT3_SLAVE_ID2, PIC_SLAVE_INIT3);
393 
394 	// Set both to operate in 8086 mode
395 	out8(PIC_INIT4_x86_MODE, PIC_MASTER_INIT4);
396 	out8(PIC_INIT4_x86_MODE, PIC_SLAVE_INIT4);
397 
398 	out8(0xfb, PIC_MASTER_MASK);	// Mask off all interrupts (except slave pic line IRQ 2).
399 	out8(0xff, PIC_SLAVE_MASK); 	// Mask off interrupts on the slave.
400 
401 	// determine which interrupts are level or edge triggered
402 
403 #if 0
404 	// should set everything possible to level triggered
405 	out8(0xf8, PIC_MASTER_TRIGGER_MODE);
406 	out8(0xde, PIC_SLAVE_TRIGGER_MODE);
407 #endif
408 
409 	sLevelTriggeredInterrupts = in8(PIC_MASTER_TRIGGER_MODE)
410 		| (in8(PIC_SLAVE_TRIGGER_MODE) << 8);
411 
412 	TRACE(("PIC level trigger mode: 0x%08lx\n", sLevelTriggeredInterrupts));
413 
414 	// make the pic controller the current one
415 	sCurrentPIC = &picController;
416 	gUsingIOAPIC = false;
417 }
418 
419 
420 // #pragma mark - I/O APIC
421 
422 
423 static inline uint32
424 ioapic_read_32(uint8 registerSelect)
425 {
426 	sIOAPIC->io_register_select = registerSelect;
427 	return sIOAPIC->io_window_register;
428 }
429 
430 
431 static inline void
432 ioapic_write_32(uint8 registerSelect, uint32 value)
433 {
434 	sIOAPIC->io_register_select = registerSelect;
435 	sIOAPIC->io_window_register = value;
436 }
437 
438 
439 static inline uint64
440 ioapic_read_64(uint8 registerSelect)
441 {
442 	uint64 result;
443 	sIOAPIC->io_register_select = registerSelect + 1;
444 	result = sIOAPIC->io_window_register;
445 	result <<= 32;
446 	sIOAPIC->io_register_select = registerSelect;
447 	result |= sIOAPIC->io_window_register;
448 	return result;
449 }
450 
451 
452 static inline void
453 ioapic_write_64(uint8 registerSelect, uint64 value)
454 {
455 	sIOAPIC->io_register_select = registerSelect;
456 	sIOAPIC->io_window_register = (uint32)value;
457 	sIOAPIC->io_register_select = registerSelect + 1;
458 	sIOAPIC->io_window_register = (uint32)(value >> 32);
459 }
460 
461 
462 static bool
463 ioapic_is_spurious_interrupt(int32 num)
464 {
465 	// the spurious interrupt vector is initialized to the max value in smp
466 	return num == 0xff - ARCH_INTERRUPT_BASE;
467 }
468 
469 
470 static void
471 ioapic_end_of_interrupt(int32 num)
472 {
473 	*(volatile uint32 *)((char *)sLocalAPIC + APIC_EOI) = 0;
474 }
475 
476 
477 static void
478 ioapic_enable_io_interrupt(int32 num)
479 {
480 	uint64 entry;
481 	int32 pin = sIRQToIOAPICPin[num];
482 	if (pin < 0 || pin > (int32)sIOAPICMaxRedirectionEntry)
483 		return;
484 
485 	TRACE(("ioapic_enable_io_interrupt: IRQ %ld -> pin %ld\n", num, pin));
486 
487 	entry = ioapic_read_64(IO_APIC_REDIRECTION_TABLE + pin * 2);
488 	entry &= ~(1 << IO_APIC_INTERRUPT_MASK_SHIFT);
489 	entry |= IO_APIC_INTERRUPT_UNMASKED << IO_APIC_INTERRUPT_MASK_SHIFT;
490 	ioapic_write_64(IO_APIC_REDIRECTION_TABLE + pin * 2, entry);
491 }
492 
493 
494 static void
495 ioapic_disable_io_interrupt(int32 num)
496 {
497 	uint64 entry;
498 	int32 pin = sIRQToIOAPICPin[num];
499 	if (pin < 0 || pin > (int32)sIOAPICMaxRedirectionEntry)
500 		return;
501 
502 	TRACE(("ioapic_disable_io_interrupt: IRQ %ld -> pin %ld\n", num, pin));
503 
504 	entry = ioapic_read_64(IO_APIC_REDIRECTION_TABLE + pin * 2);
505 	entry &= ~(1 << IO_APIC_INTERRUPT_MASK_SHIFT);
506 	entry |= IO_APIC_INTERRUPT_MASKED << IO_APIC_INTERRUPT_MASK_SHIFT;
507 	ioapic_write_64(IO_APIC_REDIRECTION_TABLE + pin * 2, entry);
508 }
509 
510 
511 static void
512 ioapic_configure_io_interrupt(int32 num, uint32 config)
513 {
514 	uint64 entry;
515 	int32 pin = sIRQToIOAPICPin[num];
516 	if (pin < 0 || pin > (int32)sIOAPICMaxRedirectionEntry)
517 		return;
518 
519 	TRACE(("ioapic_configure_io_interrupt: IRQ %ld -> pin %ld; config 0x%08lx\n",
520 		num, pin, config));
521 
522 	entry = ioapic_read_64(IO_APIC_REDIRECTION_TABLE + pin * 2);
523 	entry &= ~((1 << IO_APIC_TRIGGER_MODE_SHIFT)
524 		| (1 << IO_APIC_PIN_POLARITY_SHIFT)
525 		| (IO_APIC_INTERRUPT_VECTOR_MASK << IO_APIC_INTERRUPT_VECTOR_SHIFT));
526 
527 	if (config & B_LEVEL_TRIGGERED) {
528 		entry |= (IO_APIC_TRIGGER_MODE_LEVEL << IO_APIC_TRIGGER_MODE_SHIFT);
529 		sLevelTriggeredInterrupts |= (1 << num);
530 	} else {
531 		entry |= (IO_APIC_TRIGGER_MODE_EDGE << IO_APIC_TRIGGER_MODE_SHIFT);
532 		sLevelTriggeredInterrupts &= ~(1 << num);
533 	}
534 
535 	if (config & B_LOW_ACTIVE_POLARITY)
536 		entry |= (IO_APIC_PIN_POLARITY_LOW_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT);
537 	else
538 		entry |= (IO_APIC_PIN_POLARITY_HIGH_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT);
539 
540 	entry |= (num + ARCH_INTERRUPT_BASE) << IO_APIC_INTERRUPT_VECTOR_SHIFT;
541 	ioapic_write_64(IO_APIC_REDIRECTION_TABLE + pin * 2, entry);
542 }
543 
544 
545 static void
546 ioapic_init(kernel_args *args)
547 {
548 	static const interrupt_controller ioapicController = {
549 		"82093AA IOAPIC",
550 		&ioapic_enable_io_interrupt,
551 		&ioapic_disable_io_interrupt,
552 		&ioapic_configure_io_interrupt,
553 		&ioapic_is_spurious_interrupt,
554 		&ioapic_end_of_interrupt
555 	};
556 
557 	if (args->arch_args.apic == NULL) {
558 		dprintf("no local apic available\n");
559 		return;
560 	}
561 
562 	bool disableAPIC = get_safemode_boolean(B_SAFEMODE_DISABLE_APIC, false);
563 	if (disableAPIC) {
564 		args->arch_args.apic = NULL;
565 		return;
566 	}
567 
568 	// always map the local apic as it can be used for timers even if we
569 	// don't end up using the io apic
570 	sLocalAPIC = args->arch_args.apic;
571 	if (map_physical_memory("local apic", (void *)args->arch_args.apic_phys,
572 		B_PAGE_SIZE, B_EXACT_ADDRESS, B_KERNEL_READ_AREA
573 		| B_KERNEL_WRITE_AREA, &sLocalAPIC) < B_OK) {
574 		panic("mapping the local apic failed");
575 		return;
576 	}
577 
578 	if (args->arch_args.ioapic == NULL) {
579 		dprintf("no ioapic available, not using ioapics for interrupt routing\n");
580 		return;
581 	}
582 
583 	if (!get_safemode_boolean(B_SAFEMODE_DISABLE_IOAPIC, false)) {
584 		dprintf("ioapic explicitly disabled, not using ioapics for interrupt "
585 			"routing\n");
586 		return;
587 	}
588 
589 	// TODO: remove when the PCI IRQ routing through ACPI is available below
590 	return;
591 
592 	acpi_module_info *acpi;
593 	if (get_module(B_ACPI_MODULE_NAME, (module_info **)&acpi) != B_OK) {
594 		dprintf("acpi module not available, not configuring ioapic\n");
595 		return;
596 	}
597 
598 	// map in the ioapic
599 	sIOAPIC = (ioapic *)args->arch_args.ioapic;
600 	if (map_physical_memory("ioapic", (void *)args->arch_args.ioapic_phys,
601 			B_PAGE_SIZE, B_EXACT_ADDRESS,
602 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, (void **)&sIOAPIC) < 0) {
603 		panic("mapping the ioapic failed");
604 		return;
605 	}
606 
607 	uint32 version = ioapic_read_32(IO_APIC_VERSION);
608 	if (version == 0xffffffff) {
609 		dprintf("ioapic seems inaccessible, not using it\n");
610 		return;
611 	}
612 
613 	sLevelTriggeredInterrupts = 0;
614 	sIOAPICMaxRedirectionEntry
615 		= ((version >> IO_APIC_MAX_REDIRECTION_ENTRY_SHIFT)
616 			& IO_APIC_MAX_REDIRECTION_ENTRY_MASK);
617 
618 	// use the boot CPU as the target for all interrupts
619 	uint64 targetAPIC = args->arch_args.cpu_apic_id[0];
620 
621 	// program the interrupt vectors of the ioapic
622 	for (uint32 i = 0; i <= sIOAPICMaxRedirectionEntry; i++) {
623 		// initialize everything to deliver to the boot CPU in physical mode
624 		// and masked until explicitly enabled through enable_io_interrupt()
625 		uint64 entry = (targetAPIC << IO_APIC_DESTINATION_FIELD_SHIFT)
626 			| (IO_APIC_INTERRUPT_MASKED << IO_APIC_INTERRUPT_MASK_SHIFT)
627 			| (IO_APIC_DESTINATION_MODE_PHYSICAL << IO_APIC_DESTINATION_MODE_SHIFT)
628 			| ((i + ARCH_INTERRUPT_BASE) << IO_APIC_INTERRUPT_VECTOR_SHIFT);
629 
630 		if (i == 0) {
631 			// make redirection entry 0 into an external interrupt
632 			entry |= (IO_APIC_TRIGGER_MODE_EDGE << IO_APIC_TRIGGER_MODE_SHIFT)
633 				| (IO_APIC_PIN_POLARITY_HIGH_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT)
634 				| (IO_APIC_DELIVERY_MODE_EXT_INT << IO_APIC_DELIVERY_MODE_SHIFT);
635 		} else if (i < 16) {
636 			// make 1-15 ISA interrupts
637 			entry |= (IO_APIC_TRIGGER_MODE_EDGE << IO_APIC_TRIGGER_MODE_SHIFT)
638 				| (IO_APIC_PIN_POLARITY_HIGH_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT)
639 				| (IO_APIC_DELIVERY_MODE_FIXED << IO_APIC_DELIVERY_MODE_SHIFT);
640 		} else {
641 			// and the rest are PCI interrupts
642 			entry |= (IO_APIC_TRIGGER_MODE_LEVEL << IO_APIC_TRIGGER_MODE_SHIFT)
643 				| (IO_APIC_PIN_POLARITY_LOW_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT)
644 				| (IO_APIC_DELIVERY_MODE_FIXED << IO_APIC_DELIVERY_MODE_SHIFT);
645 			sLevelTriggeredInterrupts |= (1 << i);
646 		}
647 
648 		ioapic_write_64(IO_APIC_REDIRECTION_TABLE + 2 * i, entry);
649 	}
650 
651 	// setup default 1:1 mapping
652 	for (uint32 i = 0; i < 256; i++)
653 		sIRQToIOAPICPin[i] = i;
654 
655 	// TODO: here ACPI needs to be used to properly set up the PCI IRQ
656 	// routing.
657 
658 	// prefer the ioapic over the normal pic
659 	put_module(B_ACPI_MODULE_NAME);
660 	dprintf("using ioapic for interrupt routing\n");
661 	sCurrentPIC = &ioapicController;
662 	gUsingIOAPIC = true;
663 }
664 
665 
666 // #pragma mark -
667 
668 
669 void
670 arch_int_enable_io_interrupt(int irq)
671 {
672 	sCurrentPIC->enable_io_interrupt(irq);
673 }
674 
675 
676 void
677 arch_int_disable_io_interrupt(int irq)
678 {
679 	sCurrentPIC->disable_io_interrupt(irq);
680 }
681 
682 
683 void
684 arch_int_configure_io_interrupt(int irq, uint32 config)
685 {
686 	sCurrentPIC->configure_io_interrupt(irq, config);
687 }
688 
689 
690 #undef arch_int_enable_interrupts
691 #undef arch_int_disable_interrupts
692 #undef arch_int_restore_interrupts
693 #undef arch_int_are_interrupts_enabled
694 
695 
696 void
697 arch_int_enable_interrupts(void)
698 {
699 	arch_int_enable_interrupts_inline();
700 }
701 
702 
703 int
704 arch_int_disable_interrupts(void)
705 {
706 	return arch_int_disable_interrupts_inline();
707 }
708 
709 
710 void
711 arch_int_restore_interrupts(int oldState)
712 {
713 	arch_int_restore_interrupts_inline(oldState);
714 }
715 
716 
717 bool
718 arch_int_are_interrupts_enabled(void)
719 {
720 	return arch_int_are_interrupts_enabled_inline();
721 }
722 
723 
724 static const char *
725 exception_name(int number, char *buffer, int32 bufferSize)
726 {
727 	if (number >= 0 && number < kInterruptNameCount)
728 		return kInterruptNames[number];
729 
730 	snprintf(buffer, bufferSize, "exception %d", number);
731 	return buffer;
732 }
733 
734 
735 static void
736 invalid_exception(struct iframe* frame)
737 {
738 	struct thread* thread = thread_get_current_thread();
739 	char name[32];
740 	panic("unhandled trap 0x%lx (%s) at ip 0x%lx, thread %ld!\n",
741 		frame->vector, exception_name(frame->vector, name, sizeof(name)),
742 		frame->eip, thread ? thread->id : -1);
743 }
744 
745 
746 static void
747 fatal_exception(struct iframe *frame)
748 {
749 	char name[32];
750 	panic("Fatal exception \"%s\" occurred! Error code: 0x%lx\n",
751 		exception_name(frame->vector, name, sizeof(name)), frame->error_code);
752 }
753 
754 
755 static void
756 unexpected_exception(struct iframe* frame)
757 {
758 	debug_exception_type type;
759 	int signal;
760 
761 	if (IFRAME_IS_VM86(frame)) {
762 		x86_vm86_return((struct vm86_iframe *)frame, (frame->vector == 13) ?
763 			B_OK : B_ERROR);
764 		// won't get here
765 	}
766 
767 	switch (frame->vector) {
768 		case 0:		// Divide Error Exception (#DE)
769 			type = B_DIVIDE_ERROR;
770 			signal = SIGFPE;
771 			break;
772 
773 		case 4:		// Overflow Exception (#OF)
774 			type = B_OVERFLOW_EXCEPTION;
775 			signal = SIGTRAP;
776 			break;
777 
778 		case 5:		// BOUND Range Exceeded Exception (#BR)
779 			type = B_BOUNDS_CHECK_EXCEPTION;
780 			signal = SIGTRAP;
781 			break;
782 
783 		case 6:		// Invalid Opcode Exception (#UD)
784 			type = B_INVALID_OPCODE_EXCEPTION;
785 			signal = SIGILL;
786 			break;
787 
788 		case 13: 	// General Protection Exception (#GP)
789 			type = B_GENERAL_PROTECTION_FAULT;
790 			signal = SIGILL;
791 			break;
792 
793 		case 16: 	// x87 FPU Floating-Point Error (#MF)
794 			type = B_FLOATING_POINT_EXCEPTION;
795 			signal = SIGFPE;
796 			break;
797 
798 		case 17: 	// Alignment Check Exception (#AC)
799 			type = B_ALIGNMENT_EXCEPTION;
800 			signal = SIGTRAP;
801 			break;
802 
803 		case 19: 	// SIMD Floating-Point Exception (#XF)
804 			type = B_FLOATING_POINT_EXCEPTION;
805 			signal = SIGFPE;
806 			break;
807 
808 		default:
809 			invalid_exception(frame);
810 			return;
811 	}
812 
813 	if (IFRAME_IS_USER(frame)) {
814 		struct sigaction action;
815 		struct thread* thread = thread_get_current_thread();
816 
817 		enable_interrupts();
818 
819 		// If the thread has a signal handler for the signal, we simply send it
820 		// the signal. Otherwise we notify the user debugger first.
821 		if (sigaction(signal, NULL, &action) == 0
822 			&& action.sa_handler != SIG_DFL
823 			&& action.sa_handler != SIG_IGN) {
824 			send_signal(thread->id, signal);
825 		} else if (user_debug_exception_occurred(type, signal))
826 			send_signal(team_get_current_team_id(), signal);
827 	} else {
828 		char name[32];
829 		panic("Unexpected exception \"%s\" occurred in kernel mode! "
830 			"Error code: 0x%lx\n",
831 			exception_name(frame->vector, name, sizeof(name)),
832 			frame->error_code);
833 	}
834 }
835 
836 
837 void
838 x86_double_fault_exception(struct iframe* frame)
839 {
840 	int cpu = x86_double_fault_get_cpu();
841 
842 	// The double fault iframe contains no useful information (as
843 	// per Intel's architecture spec). Thus we simply save the
844 	// information from the (unhandlable) exception which caused the
845 	// double in our iframe. This will result even in useful stack
846 	// traces. Only problem is that we trust that at least the
847 	// TSS is still accessible.
848 	struct tss *tss = &gCPU[cpu].arch.tss;
849 
850 	frame->cs = tss->cs;
851 	frame->es = tss->es;
852 	frame->ds = tss->ds;
853 	frame->fs = tss->fs;
854 	frame->gs = tss->gs;
855 	frame->eip = tss->eip;
856 	frame->ebp = tss->ebp;
857 	frame->esp = tss->esp;
858 	frame->eax = tss->eax;
859 	frame->ebx = tss->ebx;
860 	frame->ecx = tss->ecx;
861 	frame->edx = tss->edx;
862 	frame->esi = tss->esi;
863 	frame->edi = tss->edi;
864 	frame->flags = tss->eflags;
865 
866 	// Use a special handler for page faults which avoids the triple fault
867 	// pitfalls.
868 	set_interrupt_gate(cpu, 14, &trap14_double_fault);
869 
870 	debug_double_fault(cpu);
871 }
872 
873 
874 void
875 x86_page_fault_exception_double_fault(struct iframe* frame)
876 {
877 	uint32 cr2;
878 	asm("movl %%cr2, %0" : "=r" (cr2));
879 
880 	// Only if this CPU has a fault handler, we're allowed to be here.
881 	cpu_ent& cpu = gCPU[x86_double_fault_get_cpu()];
882 	addr_t faultHandler = cpu.fault_handler;
883 	if (faultHandler != 0) {
884 		debug_set_page_fault_info(cr2, frame->eip,
885 			(frame->error_code & 0x2) != 0 ? DEBUG_PAGE_FAULT_WRITE : 0);
886 		frame->eip = faultHandler;
887 		frame->ebp = cpu.fault_handler_stack_pointer;
888 		return;
889 	}
890 
891 	// No fault handler. This is bad. Since we originally came from a double
892 	// fault, we don't try to reenter the kernel debugger. Instead we just
893 	// print the info we've got and enter an infinite loop.
894 	kprintf("Page fault in double fault debugger without fault handler! "
895 		"Touching address %p from eip %p. Entering infinite loop...\n",
896 		(void*)cr2, (void*)frame->eip);
897 
898 	while (true);
899 }
900 
901 
902 static void
903 page_fault_exception(struct iframe* frame)
904 {
905 	struct thread *thread = thread_get_current_thread();
906 	uint32 cr2;
907 	addr_t newip;
908 
909 	asm("movl %%cr2, %0" : "=r" (cr2));
910 
911 	if (debug_debugger_running()) {
912 		// If this CPU or this thread has a fault handler, we're allowed to be
913 		// here.
914 		if (thread != NULL) {
915 			cpu_ent* cpu = &gCPU[smp_get_current_cpu()];
916 			if (cpu->fault_handler != 0) {
917 				debug_set_page_fault_info(cr2, frame->eip,
918 					(frame->error_code & 0x2) != 0
919 						? DEBUG_PAGE_FAULT_WRITE : 0);
920 				frame->eip = cpu->fault_handler;
921 				frame->ebp = cpu->fault_handler_stack_pointer;
922 				return;
923 			}
924 
925 			if (thread->fault_handler != 0) {
926 				kprintf("ERROR: thread::fault_handler used in kernel "
927 					"debugger!\n");
928 				debug_set_page_fault_info(cr2, frame->eip,
929 					(frame->error_code & 0x2) != 0
930 						? DEBUG_PAGE_FAULT_WRITE : 0);
931 				frame->eip = thread->fault_handler;
932 				return;
933 			}
934 		}
935 
936 		// otherwise, not really
937 		panic("page fault in debugger without fault handler! Touching "
938 			"address %p from eip %p\n", (void *)cr2, (void *)frame->eip);
939 		return;
940 	} else if ((frame->flags & 0x200) == 0) {
941 		// interrupts disabled
942 
943 		// If a page fault handler is installed, we're allowed to be here.
944 		// TODO: Now we are generally allowing user_memcpy() with interrupts
945 		// disabled, which in most cases is a bug. We should add some thread
946 		// flag allowing to explicitly indicate that this handling is desired.
947 		if (thread && thread->fault_handler != 0) {
948 			if (frame->eip != thread->fault_handler) {
949 				frame->eip = thread->fault_handler;
950 				return;
951 			}
952 
953 			// The fault happened at the fault handler address. This is a
954 			// certain infinite loop.
955 			panic("page fault, interrupts disabled, fault handler loop. "
956 				"Touching address %p from eip %p\n", (void*)cr2,
957 				(void*)frame->eip);
958 		}
959 
960 		// If we are not running the kernel startup the page fault was not
961 		// allowed to happen and we must panic.
962 		panic("page fault, but interrupts were disabled. Touching address "
963 			"%p from eip %p\n", (void *)cr2, (void *)frame->eip);
964 		return;
965 	} else if (thread != NULL && thread->page_faults_allowed < 1) {
966 		panic("page fault not allowed at this place. Touching address "
967 			"%p from eip %p\n", (void *)cr2, (void *)frame->eip);
968 		return;
969 	}
970 
971 	enable_interrupts();
972 
973 	vm_page_fault(cr2, frame->eip,
974 		(frame->error_code & 0x2) != 0,	// write access
975 		(frame->error_code & 0x4) != 0,	// userland
976 		&newip);
977 	if (newip != 0) {
978 		// the page fault handler wants us to modify the iframe to set the
979 		// IP the cpu will return to to be this ip
980 		frame->eip = newip;
981 	}
982 }
983 
984 
985 static void
986 hardware_interrupt(struct iframe* frame)
987 {
988 	int32 vector = frame->vector - ARCH_INTERRUPT_BASE;
989 	bool levelTriggered = false;
990 	int ret;
991 	struct thread* thread = thread_get_current_thread();
992 
993 	if (sCurrentPIC->is_spurious_interrupt(vector)) {
994 		TRACE(("got spurious interrupt at vector %ld\n", vector));
995 		return;
996 	}
997 
998 	if (vector < 32)
999 		levelTriggered = (sLevelTriggeredInterrupts & (1 << vector)) != 0;
1000 
1001 	if (!levelTriggered)
1002 		sCurrentPIC->end_of_interrupt(vector);
1003 
1004 	ret = int_io_interrupt_handler(vector, levelTriggered);
1005 
1006 	if (levelTriggered)
1007 		sCurrentPIC->end_of_interrupt(vector);
1008 
1009 	if (ret == B_INVOKE_SCHEDULER || thread->cpu->invoke_scheduler) {
1010 		cpu_status state = disable_interrupts();
1011 		GRAB_THREAD_LOCK();
1012 
1013 		scheduler_reschedule();
1014 
1015 		RELEASE_THREAD_LOCK();
1016 		restore_interrupts(state);
1017 	} else if (thread->post_interrupt_callback != NULL) {
1018 		void (*callback)(void*) = thread->post_interrupt_callback;
1019 		void* data = thread->post_interrupt_data;
1020 
1021 		thread->post_interrupt_callback = NULL;
1022 		thread->post_interrupt_data = NULL;
1023 
1024 		callback(data);
1025 	}
1026 }
1027 
1028 
1029 status_t
1030 arch_int_init(struct kernel_args *args)
1031 {
1032 	int i;
1033 	interrupt_handler_function** table;
1034 
1035 	// set the global sIDT variable
1036 	sIDTs[0] = (desc_table *)args->arch_args.vir_idt;
1037 
1038 	// setup the standard programmable interrupt controller
1039 	pic_init();
1040 
1041 	set_interrupt_gate(0, 0,  &trap0);
1042 	set_interrupt_gate(0, 1,  &trap1);
1043 	set_interrupt_gate(0, 2,  &trap2);
1044 	set_trap_gate(0, 3,  &trap3);
1045 	set_interrupt_gate(0, 4,  &trap4);
1046 	set_interrupt_gate(0, 5,  &trap5);
1047 	set_interrupt_gate(0, 6,  &trap6);
1048 	set_interrupt_gate(0, 7,  &trap7);
1049 	// trap8 (double fault) is set in arch_cpu.c
1050 	set_interrupt_gate(0, 9,  &trap9);
1051 	set_interrupt_gate(0, 10,  &trap10);
1052 	set_interrupt_gate(0, 11,  &trap11);
1053 	set_interrupt_gate(0, 12,  &trap12);
1054 	set_interrupt_gate(0, 13,  &trap13);
1055 	set_interrupt_gate(0, 14,  &trap14);
1056 //	set_interrupt_gate(0, 15,  &trap15);
1057 	set_interrupt_gate(0, 16,  &trap16);
1058 	set_interrupt_gate(0, 17,  &trap17);
1059 	set_interrupt_gate(0, 18,  &trap18);
1060 	set_interrupt_gate(0, 19,  &trap19);
1061 
1062 	set_interrupt_gate(0, 32,  &trap32);
1063 	set_interrupt_gate(0, 33,  &trap33);
1064 	set_interrupt_gate(0, 34,  &trap34);
1065 	set_interrupt_gate(0, 35,  &trap35);
1066 	set_interrupt_gate(0, 36,  &trap36);
1067 	set_interrupt_gate(0, 37,  &trap37);
1068 	set_interrupt_gate(0, 38,  &trap38);
1069 	set_interrupt_gate(0, 39,  &trap39);
1070 	set_interrupt_gate(0, 40,  &trap40);
1071 	set_interrupt_gate(0, 41,  &trap41);
1072 	set_interrupt_gate(0, 42,  &trap42);
1073 	set_interrupt_gate(0, 43,  &trap43);
1074 	set_interrupt_gate(0, 44,  &trap44);
1075 	set_interrupt_gate(0, 45,  &trap45);
1076 	set_interrupt_gate(0, 46,  &trap46);
1077 	set_interrupt_gate(0, 47,  &trap47);
1078 	set_interrupt_gate(0, 48,  &trap48);
1079 	set_interrupt_gate(0, 49,  &trap49);
1080 	set_interrupt_gate(0, 50,  &trap50);
1081 	set_interrupt_gate(0, 51,  &trap51);
1082 	set_interrupt_gate(0, 52,  &trap52);
1083 	set_interrupt_gate(0, 53,  &trap53);
1084 	set_interrupt_gate(0, 54,  &trap54);
1085 	set_interrupt_gate(0, 55,  &trap55);
1086 
1087 	set_trap_gate(0, 98, &trap98);	// for performance testing only
1088 	set_trap_gate(0, 99, &trap99);
1089 
1090 	set_interrupt_gate(0, 251, &trap251);
1091 	set_interrupt_gate(0, 252, &trap252);
1092 	set_interrupt_gate(0, 253, &trap253);
1093 	set_interrupt_gate(0, 254, &trap254);
1094 	set_interrupt_gate(0, 255, &trap255);
1095 
1096 	// init interrupt handler table
1097 	table = gInterruptHandlerTable;
1098 
1099 	// defaults
1100 	for (i = 0; i < ARCH_INTERRUPT_BASE; i++)
1101 		table[i] = invalid_exception;
1102 	for (i = ARCH_INTERRUPT_BASE; i < INTERRUPT_HANDLER_TABLE_SIZE; i++)
1103 		table[i] = hardware_interrupt;
1104 
1105 	table[0] = unexpected_exception;	// Divide Error Exception (#DE)
1106 	table[1] = x86_handle_debug_exception; // Debug Exception (#DB)
1107 	table[2] = fatal_exception;			// NMI Interrupt
1108 	table[3] = x86_handle_breakpoint_exception; // Breakpoint Exception (#BP)
1109 	table[4] = unexpected_exception;	// Overflow Exception (#OF)
1110 	table[5] = unexpected_exception;	// BOUND Range Exceeded Exception (#BR)
1111 	table[6] = unexpected_exception;	// Invalid Opcode Exception (#UD)
1112 	table[7] = fatal_exception;			// Device Not Available Exception (#NM)
1113 	table[8] = x86_double_fault_exception; // Double Fault Exception (#DF)
1114 	table[9] = fatal_exception;			// Coprocessor Segment Overrun
1115 	table[10] = fatal_exception;		// Invalid TSS Exception (#TS)
1116 	table[11] = fatal_exception;		// Segment Not Present (#NP)
1117 	table[12] = fatal_exception;		// Stack Fault Exception (#SS)
1118 	table[13] = unexpected_exception;	// General Protection Exception (#GP)
1119 	table[14] = page_fault_exception;	// Page-Fault Exception (#PF)
1120 	table[16] = unexpected_exception;	// x87 FPU Floating-Point Error (#MF)
1121 	table[17] = unexpected_exception;	// Alignment Check Exception (#AC)
1122 	table[18] = fatal_exception;		// Machine-Check Exception (#MC)
1123 	table[19] = unexpected_exception;	// SIMD Floating-Point Exception (#XF)
1124 
1125 	return B_OK;
1126 }
1127 
1128 
1129 status_t
1130 arch_int_init_post_vm(struct kernel_args *args)
1131 {
1132 	ioapic_init(args);
1133 
1134 	// create IDT area for the boot CPU
1135 	area_id area = create_area("idt", (void**)&sIDTs[0], B_EXACT_ADDRESS,
1136 		B_PAGE_SIZE, B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1137 	if (area < 0)
1138 		return area;
1139 
1140 	// create IDTs for the off-boot CPU
1141 	size_t idtSize = 256 * 8;
1142 		// 256 8 bytes-sized descriptors
1143 	int32 cpuCount = smp_get_num_cpus();
1144 	if (cpuCount > 0) {
1145 		size_t areaSize = ROUNDUP(cpuCount * idtSize, B_PAGE_SIZE);
1146 		desc_table* idt;
1147 		area = create_area("idt", (void**)&idt, B_ANY_KERNEL_ADDRESS,
1148 			areaSize, B_CONTIGUOUS, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1149 		if (area < 0)
1150 			return area;
1151 
1152 		for (int32 i = 1; i < cpuCount; i++) {
1153 			sIDTs[i] = idt;
1154 			memcpy(idt, sIDTs[0], idtSize);
1155 			idt += 256;
1156 			// The CPU's IDTR will be set in arch_cpu_init_percpu().
1157 		}
1158 	}
1159 
1160 	return area >= B_OK ? B_OK : area;
1161 }
1162 
1163 
1164 status_t
1165 arch_int_init_post_device_manager(struct kernel_args *args)
1166 {
1167 	return B_OK;
1168 }
1169