xref: /haiku/src/system/kernel/arch/x86/arch_int.cpp (revision b4e5e4982360e684c5a13d227b9a958dbe725554)
1 /*
2  * Copyright 2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <cpu.h>
12 #include <int.h>
13 #include <kscheduler.h>
14 #include <ksyscalls.h>
15 #include <smp.h>
16 #include <team.h>
17 #include <thread.h>
18 #include <vm/vm.h>
19 #include <vm/vm_priv.h>
20 
21 #include <arch/cpu.h>
22 #include <arch/int.h>
23 #include <arch/smp.h>
24 #include <arch/user_debugger.h>
25 #include <arch/vm.h>
26 
27 #include <arch/x86/arch_apic.h>
28 #include <arch/x86/descriptors.h>
29 #include <arch/x86/vm86.h>
30 
31 #include "interrupts.h"
32 
33 #include <ACPI.h>
34 #include <safemode.h>
35 #include <string.h>
36 #include <stdio.h>
37 
38 
39 //#define TRACE_ARCH_INT
40 #ifdef TRACE_ARCH_INT
41 #	define TRACE(x) dprintf x
42 #else
43 #	define TRACE(x) ;
44 #endif
45 
46 
47 // Definitions for the PIC 8259 controller
48 // (this is not a complete list, only what we're actually using)
49 
50 #define PIC_MASTER_CONTROL		0x20
51 #define PIC_MASTER_MASK			0x21
52 #define PIC_SLAVE_CONTROL		0xa0
53 #define PIC_SLAVE_MASK			0xa1
54 #define PIC_MASTER_INIT1		PIC_MASTER_CONTROL
55 #define PIC_MASTER_INIT2		PIC_MASTER_MASK
56 #define PIC_MASTER_INIT3		PIC_MASTER_MASK
57 #define PIC_MASTER_INIT4		PIC_MASTER_MASK
58 #define PIC_SLAVE_INIT1			PIC_SLAVE_CONTROL
59 #define PIC_SLAVE_INIT2			PIC_SLAVE_MASK
60 #define PIC_SLAVE_INIT3			PIC_SLAVE_MASK
61 #define PIC_SLAVE_INIT4			PIC_SLAVE_MASK
62 
63 // the edge/level trigger control registers
64 #define PIC_MASTER_TRIGGER_MODE	0x4d0
65 #define PIC_SLAVE_TRIGGER_MODE	0x4d1
66 
67 #define PIC_INIT1				0x10
68 #define PIC_INIT1_SEND_INIT4	0x01
69 #define PIC_INIT3_IR2_IS_SLAVE	0x04
70 #define PIC_INIT3_SLAVE_ID2		0x02
71 #define PIC_INIT4_x86_MODE		0x01
72 
73 #define PIC_CONTROL3			0x08
74 #define PIC_CONTROL3_READ_ISR	0x03
75 #define PIC_CONTROL3_READ_IRR	0x02
76 
77 #define PIC_NON_SPECIFIC_EOI	0x20
78 
79 #define PIC_SLAVE_INT_BASE		8
80 #define PIC_NUM_INTS			0x0f
81 
82 
83 // Definitions for a 82093AA IO APIC controller
84 #define IO_APIC_IDENTIFICATION				0x00
85 #define IO_APIC_VERSION						0x01
86 #define IO_APIC_ARBITRATION					0x02
87 #define IO_APIC_REDIRECTION_TABLE			0x10 // entry = base + 2 * index
88 
89 // Fields for the version register
90 #define IO_APIC_VERSION_SHIFT				0
91 #define IO_APIC_VERSION_MASK				0xff
92 #define IO_APIC_MAX_REDIRECTION_ENTRY_SHIFT	16
93 #define IO_APIC_MAX_REDIRECTION_ENTRY_MASK	0xff
94 
95 // Fields of each redirection table entry
96 #define IO_APIC_DESTINATION_FIELD_SHIFT		56
97 #define IO_APIC_DESTINATION_FIELD_MASK		0x0f
98 #define IO_APIC_INTERRUPT_MASK_SHIFT		16
99 #define IO_APIC_INTERRUPT_MASKED			1
100 #define IO_APIC_INTERRUPT_UNMASKED			0
101 #define IO_APIC_TRIGGER_MODE_SHIFT			15
102 #define IO_APIC_TRIGGER_MODE_EDGE			0
103 #define IO_APIC_TRIGGER_MODE_LEVEL			1
104 #define IO_APIC_REMOTE_IRR_SHIFT			14
105 #define IO_APIC_PIN_POLARITY_SHIFT			13
106 #define IO_APIC_PIN_POLARITY_HIGH_ACTIVE	0
107 #define IO_APIC_PIN_POLARITY_LOW_ACTIVE		1
108 #define IO_APIC_DELIVERY_STATUS_SHIFT		12
109 #define IO_APIC_DELIVERY_STATUS_IDLE		0
110 #define IO_APIC_DELIVERY_STATUS_PENDING		1
111 #define IO_APIC_DESTINATION_MODE_SHIFT		11
112 #define IO_APIC_DESTINATION_MODE_PHYSICAL	0
113 #define IO_APIC_DESTINATION_MODE_LOGICAL	1
114 #define IO_APIC_DELIVERY_MODE_SHIFT			8
115 #define IO_APIC_DELIVERY_MODE_MASK			0x07
116 #define IO_APIC_DELIVERY_MODE_FIXED			0
117 #define IO_APIC_DELIVERY_MODE_LOWEST_PRIO	1
118 #define IO_APIC_DELIVERY_MODE_SMI			2
119 #define IO_APIC_DELIVERY_MODE_NMI			4
120 #define IO_APIC_DELIVERY_MODE_INIT			5
121 #define IO_APIC_DELIVERY_MODE_EXT_INT		7
122 #define IO_APIC_INTERRUPT_VECTOR_SHIFT		0
123 #define IO_APIC_INTERRUPT_VECTOR_MASK		0xff
124 
125 typedef struct ioapic_s {
126 	volatile uint32	io_register_select;
127 	uint32			reserved[3];
128 	volatile uint32	io_window_register;
129 } ioapic;
130 
131 static ioapic *sIOAPIC = NULL;
132 static uint32 sIOAPICMaxRedirectionEntry = 23;
133 static void *sLocalAPIC = NULL;
134 
135 static uint32 sIRQToIOAPICPin[256];
136 
137 bool gUsingIOAPIC = false;
138 
139 typedef struct interrupt_controller_s {
140 	const char *name;
141 	void	(*enable_io_interrupt)(int32 num);
142 	void	(*disable_io_interrupt)(int32 num);
143 	void	(*configure_io_interrupt)(int32 num, uint32 config);
144 	bool	(*is_spurious_interrupt)(int32 num);
145 	void	(*end_of_interrupt)(int32 num);
146 } interrupt_controller;
147 
148 static const interrupt_controller *sCurrentPIC = NULL;
149 
150 static const char *kInterruptNames[] = {
151 	/*  0 */ "Divide Error Exception",
152 	/*  1 */ "Debug Exception",
153 	/*  2 */ "NMI Interrupt",
154 	/*  3 */ "Breakpoint Exception",
155 	/*  4 */ "Overflow Exception",
156 	/*  5 */ "BOUND Range Exceeded Exception",
157 	/*  6 */ "Invalid Opcode Exception",
158 	/*  7 */ "Device Not Available Exception",
159 	/*  8 */ "Double Fault Exception",
160 	/*  9 */ "Coprocessor Segment Overrun",
161 	/* 10 */ "Invalid TSS Exception",
162 	/* 11 */ "Segment Not Present",
163 	/* 12 */ "Stack Fault Exception",
164 	/* 13 */ "General Protection Exception",
165 	/* 14 */ "Page-Fault Exception",
166 	/* 15 */ "-",
167 	/* 16 */ "x87 FPU Floating-Point Error",
168 	/* 17 */ "Alignment Check Exception",
169 	/* 18 */ "Machine-Check Exception",
170 	/* 19 */ "SIMD Floating-Point Exception",
171 };
172 static const int kInterruptNameCount = 20;
173 
174 #define MAX_ARGS 16
175 
176 typedef struct {
177 	uint32 a, b;
178 } desc_table;
179 static desc_table* sIDTs[B_MAX_CPU_COUNT];
180 
181 static uint32 sLevelTriggeredInterrupts = 0;
182 	// binary mask: 1 level, 0 edge
183 
184 // table with functions handling respective interrupts
185 typedef void interrupt_handler_function(struct iframe* frame);
186 #define INTERRUPT_HANDLER_TABLE_SIZE 256
187 interrupt_handler_function* gInterruptHandlerTable[
188 	INTERRUPT_HANDLER_TABLE_SIZE];
189 
190 
191 /*!	Initializes a descriptor in an IDT.
192 */
193 static void
194 set_gate(desc_table *gate_addr, addr_t addr, int type, int dpl)
195 {
196 	unsigned int gate1; // first byte of gate desc
197 	unsigned int gate2; // second byte of gate desc
198 
199 	gate1 = (KERNEL_CODE_SEG << 16) | (0x0000ffff & addr);
200 	gate2 = (0xffff0000 & addr) | 0x8000 | (dpl << 13) | (type << 8);
201 
202 	gate_addr->a = gate1;
203 	gate_addr->b = gate2;
204 }
205 
206 
207 /*!	Initializes the descriptor for interrupt vector \a n in the IDT of the
208 	specified CPU to an interrupt-gate descriptor with the given procedure
209 	address.
210 	For CPUs other than the boot CPU it must not be called before
211 	arch_int_init_post_vm().
212 */
213 static void
214 set_interrupt_gate(int32 cpu, int n, void (*addr)())
215 {
216 	set_gate(&sIDTs[cpu][n], (addr_t)addr, 14, DPL_KERNEL);
217 }
218 
219 
220 /*!	Initializes the descriptor for interrupt vector \a n in the IDT of the
221 	specified CPU to an trap-gate descriptor with the given procedure address.
222 	For CPUs other than the boot CPU it must not be called before
223 	arch_int_init_post_vm().
224 */
225 static void
226 set_trap_gate(int32 cpu, int n, void (*addr)())
227 {
228 	set_gate(&sIDTs[cpu][n], (unsigned int)addr, 15, DPL_USER);
229 }
230 
231 
232 /*!	Initializes the descriptor for interrupt vector \a n in the IDT of CPU
233 	\a cpu to a task-gate descripter referring to the TSS segment identified
234 	by TSS segment selector \a segment.
235 	For CPUs other than the boot CPU it must not be called before
236 	arch_int_init_post_vm() (arch_cpu_init_post_vm() is fine).
237 */
238 void
239 x86_set_task_gate(int32 cpu, int32 n, int32 segment)
240 {
241 	sIDTs[cpu][n].a = (segment << 16);
242 	sIDTs[cpu][n].b = 0x8000 | (0 << 13) | (0x5 << 8); // present, dpl 0, type 5
243 }
244 
245 
246 /*!	Returns the virtual IDT address for CPU \a cpu. */
247 void*
248 x86_get_idt(int32 cpu)
249 {
250 	return sIDTs[cpu];
251 }
252 
253 
254 // #pragma mark - PIC
255 
256 
257 /*!	Tests if the interrupt in-service register of the responsible
258 	PIC is set for interrupts 7 and 15, and if that's not the case,
259 	it must assume it's a spurious interrupt.
260 */
261 static bool
262 pic_is_spurious_interrupt(int32 num)
263 {
264 	int32 isr;
265 
266 	if (num != 7)
267 		return false;
268 
269 	// Note, detecting spurious interrupts on line 15 obviously doesn't
270 	// work correctly - and since those are extremely rare, anyway, we
271 	// just ignore them
272 
273 	out8(PIC_CONTROL3 | PIC_CONTROL3_READ_ISR, PIC_MASTER_CONTROL);
274 	isr = in8(PIC_MASTER_CONTROL);
275 	out8(PIC_CONTROL3 | PIC_CONTROL3_READ_IRR, PIC_MASTER_CONTROL);
276 
277 	return (isr & 0x80) == 0;
278 }
279 
280 
281 /*!	Sends a non-specified EOI (end of interrupt) notice to the PIC in
282 	question (or both of them).
283 	This clears the PIC interrupt in-service bit.
284 */
285 static void
286 pic_end_of_interrupt(int32 num)
287 {
288 	if (num < 0 || num > PIC_NUM_INTS)
289 		return;
290 
291 	// PIC 8259 controlled interrupt
292 	if (num >= PIC_SLAVE_INT_BASE)
293 		out8(PIC_NON_SPECIFIC_EOI, PIC_SLAVE_CONTROL);
294 
295 	// we always need to acknowledge the master PIC
296 	out8(PIC_NON_SPECIFIC_EOI, PIC_MASTER_CONTROL);
297 }
298 
299 
300 static void
301 pic_enable_io_interrupt(int32 num)
302 {
303 	// interrupt is specified "normalized"
304 	if (num < 0 || num > PIC_NUM_INTS)
305 		return;
306 
307 	// enable PIC 8259 controlled interrupt
308 
309 	TRACE(("pic_enable_io_interrupt: irq %ld\n", num));
310 
311 	if (num < PIC_SLAVE_INT_BASE)
312 		out8(in8(PIC_MASTER_MASK) & ~(1 << num), PIC_MASTER_MASK);
313 	else
314 		out8(in8(PIC_SLAVE_MASK) & ~(1 << (num - PIC_SLAVE_INT_BASE)), PIC_SLAVE_MASK);
315 }
316 
317 
318 static void
319 pic_disable_io_interrupt(int32 num)
320 {
321 	// interrupt is specified "normalized"
322 	// never disable slave pic line IRQ 2
323 	if (num < 0 || num > PIC_NUM_INTS || num == 2)
324 		return;
325 
326 	// disable PIC 8259 controlled interrupt
327 
328 	TRACE(("pic_disable_io_interrupt: irq %ld\n", num));
329 
330 	if (num < PIC_SLAVE_INT_BASE)
331 		out8(in8(PIC_MASTER_MASK) | (1 << num), PIC_MASTER_MASK);
332 	else
333 		out8(in8(PIC_SLAVE_MASK) | (1 << (num - PIC_SLAVE_INT_BASE)), PIC_SLAVE_MASK);
334 }
335 
336 
337 static void
338 pic_configure_io_interrupt(int32 num, uint32 config)
339 {
340 	uint8 value;
341 	int32 localBit;
342 	if (num < 0 || num > PIC_NUM_INTS || num == 2)
343 		return;
344 
345 	TRACE(("pic_configure_io_interrupt: irq %ld; config 0x%08lx\n", num, config));
346 
347 	if (num < PIC_SLAVE_INT_BASE) {
348 		value = in8(PIC_MASTER_TRIGGER_MODE);
349 		localBit = num;
350 	} else {
351 		value = in8(PIC_SLAVE_TRIGGER_MODE);
352 		localBit = num - PIC_SLAVE_INT_BASE;
353 	}
354 
355 	if (config & B_LEVEL_TRIGGERED)
356 		value |= 1 << localBit;
357 	else
358 		value &= ~(1 << localBit);
359 
360 	if (num < PIC_SLAVE_INT_BASE)
361 		out8(value, PIC_MASTER_TRIGGER_MODE);
362 	else
363 		out8(value, PIC_SLAVE_TRIGGER_MODE);
364 
365 	sLevelTriggeredInterrupts = in8(PIC_MASTER_TRIGGER_MODE)
366 		| (in8(PIC_SLAVE_TRIGGER_MODE) << 8);
367 }
368 
369 
370 static void
371 pic_init(void)
372 {
373 	static interrupt_controller picController = {
374 		"8259 PIC",
375 		&pic_enable_io_interrupt,
376 		&pic_disable_io_interrupt,
377 		&pic_configure_io_interrupt,
378 		&pic_is_spurious_interrupt,
379 		&pic_end_of_interrupt
380 	};
381 
382 	// Start initialization sequence for the master and slave PICs
383 	out8(PIC_INIT1 | PIC_INIT1_SEND_INIT4, PIC_MASTER_INIT1);
384 	out8(PIC_INIT1 | PIC_INIT1_SEND_INIT4, PIC_SLAVE_INIT1);
385 
386 	// Set start of interrupts to 0x20 for master, 0x28 for slave
387 	out8(ARCH_INTERRUPT_BASE, PIC_MASTER_INIT2);
388 	out8(ARCH_INTERRUPT_BASE + PIC_SLAVE_INT_BASE, PIC_SLAVE_INIT2);
389 
390 	// Specify cascading through interrupt 2
391 	out8(PIC_INIT3_IR2_IS_SLAVE, PIC_MASTER_INIT3);
392 	out8(PIC_INIT3_SLAVE_ID2, PIC_SLAVE_INIT3);
393 
394 	// Set both to operate in 8086 mode
395 	out8(PIC_INIT4_x86_MODE, PIC_MASTER_INIT4);
396 	out8(PIC_INIT4_x86_MODE, PIC_SLAVE_INIT4);
397 
398 	out8(0xfb, PIC_MASTER_MASK);	// Mask off all interrupts (except slave pic line IRQ 2).
399 	out8(0xff, PIC_SLAVE_MASK); 	// Mask off interrupts on the slave.
400 
401 	// determine which interrupts are level or edge triggered
402 
403 #if 0
404 	// should set everything possible to level triggered
405 	out8(0xf8, PIC_MASTER_TRIGGER_MODE);
406 	out8(0xde, PIC_SLAVE_TRIGGER_MODE);
407 #endif
408 
409 	sLevelTriggeredInterrupts = in8(PIC_MASTER_TRIGGER_MODE)
410 		| (in8(PIC_SLAVE_TRIGGER_MODE) << 8);
411 
412 	TRACE(("PIC level trigger mode: 0x%08lx\n", sLevelTriggeredInterrupts));
413 
414 	// make the pic controller the current one
415 	sCurrentPIC = &picController;
416 	gUsingIOAPIC = false;
417 }
418 
419 
420 // #pragma mark - I/O APIC
421 
422 
423 static inline uint32
424 ioapic_read_32(uint8 registerSelect)
425 {
426 	sIOAPIC->io_register_select = registerSelect;
427 	return sIOAPIC->io_window_register;
428 }
429 
430 
431 static inline void
432 ioapic_write_32(uint8 registerSelect, uint32 value)
433 {
434 	sIOAPIC->io_register_select = registerSelect;
435 	sIOAPIC->io_window_register = value;
436 }
437 
438 
439 static inline uint64
440 ioapic_read_64(uint8 registerSelect)
441 {
442 	uint64 result;
443 	sIOAPIC->io_register_select = registerSelect + 1;
444 	result = sIOAPIC->io_window_register;
445 	result <<= 32;
446 	sIOAPIC->io_register_select = registerSelect;
447 	result |= sIOAPIC->io_window_register;
448 	return result;
449 }
450 
451 
452 static inline void
453 ioapic_write_64(uint8 registerSelect, uint64 value)
454 {
455 	sIOAPIC->io_register_select = registerSelect;
456 	sIOAPIC->io_window_register = (uint32)value;
457 	sIOAPIC->io_register_select = registerSelect + 1;
458 	sIOAPIC->io_window_register = (uint32)(value >> 32);
459 }
460 
461 
462 static bool
463 ioapic_is_spurious_interrupt(int32 num)
464 {
465 	// the spurious interrupt vector is initialized to the max value in smp
466 	return num == 0xff - ARCH_INTERRUPT_BASE;
467 }
468 
469 
470 static void
471 ioapic_end_of_interrupt(int32 num)
472 {
473 	*(volatile uint32 *)((char *)sLocalAPIC + APIC_EOI) = 0;
474 }
475 
476 
477 static void
478 ioapic_enable_io_interrupt(int32 num)
479 {
480 	uint64 entry;
481 	int32 pin = sIRQToIOAPICPin[num];
482 	if (pin < 0 || pin > (int32)sIOAPICMaxRedirectionEntry)
483 		return;
484 
485 	TRACE(("ioapic_enable_io_interrupt: IRQ %ld -> pin %ld\n", num, pin));
486 
487 	entry = ioapic_read_64(IO_APIC_REDIRECTION_TABLE + pin * 2);
488 	entry &= ~(1 << IO_APIC_INTERRUPT_MASK_SHIFT);
489 	entry |= IO_APIC_INTERRUPT_UNMASKED << IO_APIC_INTERRUPT_MASK_SHIFT;
490 	ioapic_write_64(IO_APIC_REDIRECTION_TABLE + pin * 2, entry);
491 }
492 
493 
494 static void
495 ioapic_disable_io_interrupt(int32 num)
496 {
497 	uint64 entry;
498 	int32 pin = sIRQToIOAPICPin[num];
499 	if (pin < 0 || pin > (int32)sIOAPICMaxRedirectionEntry)
500 		return;
501 
502 	TRACE(("ioapic_disable_io_interrupt: IRQ %ld -> pin %ld\n", num, pin));
503 
504 	entry = ioapic_read_64(IO_APIC_REDIRECTION_TABLE + pin * 2);
505 	entry &= ~(1 << IO_APIC_INTERRUPT_MASK_SHIFT);
506 	entry |= IO_APIC_INTERRUPT_MASKED << IO_APIC_INTERRUPT_MASK_SHIFT;
507 	ioapic_write_64(IO_APIC_REDIRECTION_TABLE + pin * 2, entry);
508 }
509 
510 
511 static void
512 ioapic_configure_io_interrupt(int32 num, uint32 config)
513 {
514 	uint64 entry;
515 	int32 pin = sIRQToIOAPICPin[num];
516 	if (pin < 0 || pin > (int32)sIOAPICMaxRedirectionEntry)
517 		return;
518 
519 	TRACE(("ioapic_configure_io_interrupt: IRQ %ld -> pin %ld; config 0x%08lx\n",
520 		num, pin, config));
521 
522 	entry = ioapic_read_64(IO_APIC_REDIRECTION_TABLE + pin * 2);
523 	entry &= ~((1 << IO_APIC_TRIGGER_MODE_SHIFT)
524 		| (1 << IO_APIC_PIN_POLARITY_SHIFT)
525 		| (IO_APIC_INTERRUPT_VECTOR_MASK << IO_APIC_INTERRUPT_VECTOR_SHIFT));
526 
527 	if (config & B_LEVEL_TRIGGERED) {
528 		entry |= (IO_APIC_TRIGGER_MODE_LEVEL << IO_APIC_TRIGGER_MODE_SHIFT);
529 		sLevelTriggeredInterrupts |= (1 << num);
530 	} else {
531 		entry |= (IO_APIC_TRIGGER_MODE_EDGE << IO_APIC_TRIGGER_MODE_SHIFT);
532 		sLevelTriggeredInterrupts &= ~(1 << num);
533 	}
534 
535 	if (config & B_LOW_ACTIVE_POLARITY)
536 		entry |= (IO_APIC_PIN_POLARITY_LOW_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT);
537 	else
538 		entry |= (IO_APIC_PIN_POLARITY_HIGH_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT);
539 
540 	entry |= (num + ARCH_INTERRUPT_BASE) << IO_APIC_INTERRUPT_VECTOR_SHIFT;
541 	ioapic_write_64(IO_APIC_REDIRECTION_TABLE + pin * 2, entry);
542 }
543 
544 
545 static void
546 ioapic_init(kernel_args *args)
547 {
548 	static const interrupt_controller ioapicController = {
549 		"82093AA IOAPIC",
550 		&ioapic_enable_io_interrupt,
551 		&ioapic_disable_io_interrupt,
552 		&ioapic_configure_io_interrupt,
553 		&ioapic_is_spurious_interrupt,
554 		&ioapic_end_of_interrupt
555 	};
556 
557 	if (args->arch_args.apic == NULL) {
558 		dprintf("no local apic available\n");
559 		return;
560 	}
561 
562 	bool disableAPIC = get_safemode_boolean(B_SAFEMODE_DISABLE_APIC, false);
563 	if (disableAPIC) {
564 		args->arch_args.apic = NULL;
565 		return;
566 	}
567 
568 	// always map the local apic as it can be used for timers even if we
569 	// don't end up using the io apic
570 	sLocalAPIC = args->arch_args.apic;
571 	if (vm_map_physical_memory(B_SYSTEM_TEAM, "local apic", &sLocalAPIC,
572 			B_EXACT_ADDRESS, B_PAGE_SIZE,
573 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
574 			args->arch_args.apic_phys, true) < 0) {
575 		panic("mapping the local apic failed");
576 		return;
577 	}
578 
579 	if (args->arch_args.ioapic == NULL) {
580 		dprintf("no ioapic available, not using ioapics for interrupt routing\n");
581 		return;
582 	}
583 
584 	if (!get_safemode_boolean(B_SAFEMODE_DISABLE_IOAPIC, false)) {
585 		dprintf("ioapic explicitly disabled, not using ioapics for interrupt "
586 			"routing\n");
587 		return;
588 	}
589 
590 	// TODO: remove when the PCI IRQ routing through ACPI is available below
591 	return;
592 
593 	acpi_module_info *acpi;
594 	if (get_module(B_ACPI_MODULE_NAME, (module_info **)&acpi) != B_OK) {
595 		dprintf("acpi module not available, not configuring ioapic\n");
596 		return;
597 	}
598 
599 	// map in the ioapic
600 	sIOAPIC = (ioapic *)args->arch_args.ioapic;
601 	if (vm_map_physical_memory(B_SYSTEM_TEAM, "ioapic", (void**)&sIOAPIC,
602 			B_EXACT_ADDRESS, B_PAGE_SIZE,
603 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
604 			args->arch_args.ioapic_phys, true) < 0) {
605 		panic("mapping the ioapic failed");
606 		return;
607 	}
608 
609 	uint32 version = ioapic_read_32(IO_APIC_VERSION);
610 	if (version == 0xffffffff) {
611 		dprintf("ioapic seems inaccessible, not using it\n");
612 		return;
613 	}
614 
615 	sLevelTriggeredInterrupts = 0;
616 	sIOAPICMaxRedirectionEntry
617 		= ((version >> IO_APIC_MAX_REDIRECTION_ENTRY_SHIFT)
618 			& IO_APIC_MAX_REDIRECTION_ENTRY_MASK);
619 
620 	// use the boot CPU as the target for all interrupts
621 	uint64 targetAPIC = args->arch_args.cpu_apic_id[0];
622 
623 	// program the interrupt vectors of the ioapic
624 	for (uint32 i = 0; i <= sIOAPICMaxRedirectionEntry; i++) {
625 		// initialize everything to deliver to the boot CPU in physical mode
626 		// and masked until explicitly enabled through enable_io_interrupt()
627 		uint64 entry = (targetAPIC << IO_APIC_DESTINATION_FIELD_SHIFT)
628 			| (IO_APIC_INTERRUPT_MASKED << IO_APIC_INTERRUPT_MASK_SHIFT)
629 			| (IO_APIC_DESTINATION_MODE_PHYSICAL << IO_APIC_DESTINATION_MODE_SHIFT)
630 			| ((i + ARCH_INTERRUPT_BASE) << IO_APIC_INTERRUPT_VECTOR_SHIFT);
631 
632 		if (i == 0) {
633 			// make redirection entry 0 into an external interrupt
634 			entry |= (IO_APIC_TRIGGER_MODE_EDGE << IO_APIC_TRIGGER_MODE_SHIFT)
635 				| (IO_APIC_PIN_POLARITY_HIGH_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT)
636 				| (IO_APIC_DELIVERY_MODE_EXT_INT << IO_APIC_DELIVERY_MODE_SHIFT);
637 		} else if (i < 16) {
638 			// make 1-15 ISA interrupts
639 			entry |= (IO_APIC_TRIGGER_MODE_EDGE << IO_APIC_TRIGGER_MODE_SHIFT)
640 				| (IO_APIC_PIN_POLARITY_HIGH_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT)
641 				| (IO_APIC_DELIVERY_MODE_FIXED << IO_APIC_DELIVERY_MODE_SHIFT);
642 		} else {
643 			// and the rest are PCI interrupts
644 			entry |= (IO_APIC_TRIGGER_MODE_LEVEL << IO_APIC_TRIGGER_MODE_SHIFT)
645 				| (IO_APIC_PIN_POLARITY_LOW_ACTIVE << IO_APIC_PIN_POLARITY_SHIFT)
646 				| (IO_APIC_DELIVERY_MODE_FIXED << IO_APIC_DELIVERY_MODE_SHIFT);
647 			sLevelTriggeredInterrupts |= (1 << i);
648 		}
649 
650 		ioapic_write_64(IO_APIC_REDIRECTION_TABLE + 2 * i, entry);
651 	}
652 
653 	// setup default 1:1 mapping
654 	for (uint32 i = 0; i < 256; i++)
655 		sIRQToIOAPICPin[i] = i;
656 
657 	// TODO: here ACPI needs to be used to properly set up the PCI IRQ
658 	// routing.
659 
660 	// prefer the ioapic over the normal pic
661 	put_module(B_ACPI_MODULE_NAME);
662 	dprintf("using ioapic for interrupt routing\n");
663 	sCurrentPIC = &ioapicController;
664 	gUsingIOAPIC = true;
665 }
666 
667 
668 // #pragma mark -
669 
670 
671 void
672 arch_int_enable_io_interrupt(int irq)
673 {
674 	sCurrentPIC->enable_io_interrupt(irq);
675 }
676 
677 
678 void
679 arch_int_disable_io_interrupt(int irq)
680 {
681 	sCurrentPIC->disable_io_interrupt(irq);
682 }
683 
684 
685 void
686 arch_int_configure_io_interrupt(int irq, uint32 config)
687 {
688 	sCurrentPIC->configure_io_interrupt(irq, config);
689 }
690 
691 
692 #undef arch_int_enable_interrupts
693 #undef arch_int_disable_interrupts
694 #undef arch_int_restore_interrupts
695 #undef arch_int_are_interrupts_enabled
696 
697 
698 void
699 arch_int_enable_interrupts(void)
700 {
701 	arch_int_enable_interrupts_inline();
702 }
703 
704 
705 int
706 arch_int_disable_interrupts(void)
707 {
708 	return arch_int_disable_interrupts_inline();
709 }
710 
711 
712 void
713 arch_int_restore_interrupts(int oldState)
714 {
715 	arch_int_restore_interrupts_inline(oldState);
716 }
717 
718 
719 bool
720 arch_int_are_interrupts_enabled(void)
721 {
722 	return arch_int_are_interrupts_enabled_inline();
723 }
724 
725 
726 static const char *
727 exception_name(int number, char *buffer, int32 bufferSize)
728 {
729 	if (number >= 0 && number < kInterruptNameCount)
730 		return kInterruptNames[number];
731 
732 	snprintf(buffer, bufferSize, "exception %d", number);
733 	return buffer;
734 }
735 
736 
737 static void
738 invalid_exception(struct iframe* frame)
739 {
740 	struct thread* thread = thread_get_current_thread();
741 	char name[32];
742 	panic("unhandled trap 0x%lx (%s) at ip 0x%lx, thread %ld!\n",
743 		frame->vector, exception_name(frame->vector, name, sizeof(name)),
744 		frame->eip, thread ? thread->id : -1);
745 }
746 
747 
748 static void
749 fatal_exception(struct iframe *frame)
750 {
751 	char name[32];
752 	panic("Fatal exception \"%s\" occurred! Error code: 0x%lx\n",
753 		exception_name(frame->vector, name, sizeof(name)), frame->error_code);
754 }
755 
756 
757 static void
758 unexpected_exception(struct iframe* frame)
759 {
760 	debug_exception_type type;
761 	int signal;
762 
763 	if (IFRAME_IS_VM86(frame)) {
764 		x86_vm86_return((struct vm86_iframe *)frame, (frame->vector == 13) ?
765 			B_OK : B_ERROR);
766 		// won't get here
767 	}
768 
769 	switch (frame->vector) {
770 		case 0:		// Divide Error Exception (#DE)
771 			type = B_DIVIDE_ERROR;
772 			signal = SIGFPE;
773 			break;
774 
775 		case 4:		// Overflow Exception (#OF)
776 			type = B_OVERFLOW_EXCEPTION;
777 			signal = SIGTRAP;
778 			break;
779 
780 		case 5:		// BOUND Range Exceeded Exception (#BR)
781 			type = B_BOUNDS_CHECK_EXCEPTION;
782 			signal = SIGTRAP;
783 			break;
784 
785 		case 6:		// Invalid Opcode Exception (#UD)
786 			type = B_INVALID_OPCODE_EXCEPTION;
787 			signal = SIGILL;
788 			break;
789 
790 		case 13: 	// General Protection Exception (#GP)
791 			type = B_GENERAL_PROTECTION_FAULT;
792 			signal = SIGILL;
793 			break;
794 
795 		case 16: 	// x87 FPU Floating-Point Error (#MF)
796 			type = B_FLOATING_POINT_EXCEPTION;
797 			signal = SIGFPE;
798 			break;
799 
800 		case 17: 	// Alignment Check Exception (#AC)
801 			type = B_ALIGNMENT_EXCEPTION;
802 			signal = SIGTRAP;
803 			break;
804 
805 		case 19: 	// SIMD Floating-Point Exception (#XF)
806 			type = B_FLOATING_POINT_EXCEPTION;
807 			signal = SIGFPE;
808 			break;
809 
810 		default:
811 			invalid_exception(frame);
812 			return;
813 	}
814 
815 	if (IFRAME_IS_USER(frame)) {
816 		struct sigaction action;
817 		struct thread* thread = thread_get_current_thread();
818 
819 		enable_interrupts();
820 
821 		// If the thread has a signal handler for the signal, we simply send it
822 		// the signal. Otherwise we notify the user debugger first.
823 		if (sigaction(signal, NULL, &action) == 0
824 			&& action.sa_handler != SIG_DFL
825 			&& action.sa_handler != SIG_IGN) {
826 			send_signal(thread->id, signal);
827 		} else if (user_debug_exception_occurred(type, signal))
828 			send_signal(team_get_current_team_id(), signal);
829 	} else {
830 		char name[32];
831 		panic("Unexpected exception \"%s\" occurred in kernel mode! "
832 			"Error code: 0x%lx\n",
833 			exception_name(frame->vector, name, sizeof(name)),
834 			frame->error_code);
835 	}
836 }
837 
838 
839 void
840 x86_double_fault_exception(struct iframe* frame)
841 {
842 	int cpu = x86_double_fault_get_cpu();
843 
844 	// The double fault iframe contains no useful information (as
845 	// per Intel's architecture spec). Thus we simply save the
846 	// information from the (unhandlable) exception which caused the
847 	// double in our iframe. This will result even in useful stack
848 	// traces. Only problem is that we trust that at least the
849 	// TSS is still accessible.
850 	struct tss *tss = &gCPU[cpu].arch.tss;
851 
852 	frame->cs = tss->cs;
853 	frame->es = tss->es;
854 	frame->ds = tss->ds;
855 	frame->fs = tss->fs;
856 	frame->gs = tss->gs;
857 	frame->eip = tss->eip;
858 	frame->ebp = tss->ebp;
859 	frame->esp = tss->esp;
860 	frame->eax = tss->eax;
861 	frame->ebx = tss->ebx;
862 	frame->ecx = tss->ecx;
863 	frame->edx = tss->edx;
864 	frame->esi = tss->esi;
865 	frame->edi = tss->edi;
866 	frame->flags = tss->eflags;
867 
868 	// Use a special handler for page faults which avoids the triple fault
869 	// pitfalls.
870 	set_interrupt_gate(cpu, 14, &trap14_double_fault);
871 
872 	debug_double_fault(cpu);
873 }
874 
875 
876 void
877 x86_page_fault_exception_double_fault(struct iframe* frame)
878 {
879 	uint32 cr2;
880 	asm("movl %%cr2, %0" : "=r" (cr2));
881 
882 	// Only if this CPU has a fault handler, we're allowed to be here.
883 	cpu_ent& cpu = gCPU[x86_double_fault_get_cpu()];
884 	addr_t faultHandler = cpu.fault_handler;
885 	if (faultHandler != 0) {
886 		debug_set_page_fault_info(cr2, frame->eip,
887 			(frame->error_code & 0x2) != 0 ? DEBUG_PAGE_FAULT_WRITE : 0);
888 		frame->eip = faultHandler;
889 		frame->ebp = cpu.fault_handler_stack_pointer;
890 		return;
891 	}
892 
893 	// No fault handler. This is bad. Since we originally came from a double
894 	// fault, we don't try to reenter the kernel debugger. Instead we just
895 	// print the info we've got and enter an infinite loop.
896 	kprintf("Page fault in double fault debugger without fault handler! "
897 		"Touching address %p from eip %p. Entering infinite loop...\n",
898 		(void*)cr2, (void*)frame->eip);
899 
900 	while (true);
901 }
902 
903 
904 static void
905 page_fault_exception(struct iframe* frame)
906 {
907 	struct thread *thread = thread_get_current_thread();
908 	uint32 cr2;
909 	addr_t newip;
910 
911 	asm("movl %%cr2, %0" : "=r" (cr2));
912 
913 	if (debug_debugger_running()) {
914 		// If this CPU or this thread has a fault handler, we're allowed to be
915 		// here.
916 		if (thread != NULL) {
917 			cpu_ent* cpu = &gCPU[smp_get_current_cpu()];
918 			if (cpu->fault_handler != 0) {
919 				debug_set_page_fault_info(cr2, frame->eip,
920 					(frame->error_code & 0x2) != 0
921 						? DEBUG_PAGE_FAULT_WRITE : 0);
922 				frame->eip = cpu->fault_handler;
923 				frame->ebp = cpu->fault_handler_stack_pointer;
924 				return;
925 			}
926 
927 			if (thread->fault_handler != 0) {
928 				kprintf("ERROR: thread::fault_handler used in kernel "
929 					"debugger!\n");
930 				debug_set_page_fault_info(cr2, frame->eip,
931 					(frame->error_code & 0x2) != 0
932 						? DEBUG_PAGE_FAULT_WRITE : 0);
933 				frame->eip = thread->fault_handler;
934 				return;
935 			}
936 		}
937 
938 		// otherwise, not really
939 		panic("page fault in debugger without fault handler! Touching "
940 			"address %p from eip %p\n", (void *)cr2, (void *)frame->eip);
941 		return;
942 	} else if ((frame->flags & 0x200) == 0) {
943 		// interrupts disabled
944 
945 		// If a page fault handler is installed, we're allowed to be here.
946 		// TODO: Now we are generally allowing user_memcpy() with interrupts
947 		// disabled, which in most cases is a bug. We should add some thread
948 		// flag allowing to explicitly indicate that this handling is desired.
949 		if (thread && thread->fault_handler != 0) {
950 			if (frame->eip != thread->fault_handler) {
951 				frame->eip = thread->fault_handler;
952 				return;
953 			}
954 
955 			// The fault happened at the fault handler address. This is a
956 			// certain infinite loop.
957 			panic("page fault, interrupts disabled, fault handler loop. "
958 				"Touching address %p from eip %p\n", (void*)cr2,
959 				(void*)frame->eip);
960 		}
961 
962 		// If we are not running the kernel startup the page fault was not
963 		// allowed to happen and we must panic.
964 		panic("page fault, but interrupts were disabled. Touching address "
965 			"%p from eip %p\n", (void *)cr2, (void *)frame->eip);
966 		return;
967 	} else if (thread != NULL && thread->page_faults_allowed < 1) {
968 		panic("page fault not allowed at this place. Touching address "
969 			"%p from eip %p\n", (void *)cr2, (void *)frame->eip);
970 		return;
971 	}
972 
973 	enable_interrupts();
974 
975 	vm_page_fault(cr2, frame->eip,
976 		(frame->error_code & 0x2) != 0,	// write access
977 		(frame->error_code & 0x4) != 0,	// userland
978 		&newip);
979 	if (newip != 0) {
980 		// the page fault handler wants us to modify the iframe to set the
981 		// IP the cpu will return to to be this ip
982 		frame->eip = newip;
983 	}
984 }
985 
986 
987 static void
988 hardware_interrupt(struct iframe* frame)
989 {
990 	int32 vector = frame->vector - ARCH_INTERRUPT_BASE;
991 	bool levelTriggered = false;
992 	struct thread* thread = thread_get_current_thread();
993 
994 	if (sCurrentPIC->is_spurious_interrupt(vector)) {
995 		TRACE(("got spurious interrupt at vector %ld\n", vector));
996 		return;
997 	}
998 
999 	if (vector < 32)
1000 		levelTriggered = (sLevelTriggeredInterrupts & (1 << vector)) != 0;
1001 
1002 	if (!levelTriggered)
1003 		sCurrentPIC->end_of_interrupt(vector);
1004 
1005 	int_io_interrupt_handler(vector, levelTriggered);
1006 
1007 	if (levelTriggered)
1008 		sCurrentPIC->end_of_interrupt(vector);
1009 
1010 	cpu_status state = disable_interrupts();
1011 	if (thread->cpu->invoke_scheduler) {
1012 		GRAB_THREAD_LOCK();
1013 		scheduler_reschedule();
1014 		RELEASE_THREAD_LOCK();
1015 		restore_interrupts(state);
1016 	} else if (thread->post_interrupt_callback != NULL) {
1017 		restore_interrupts(state);
1018 		void (*callback)(void*) = thread->post_interrupt_callback;
1019 		void* data = thread->post_interrupt_data;
1020 
1021 		thread->post_interrupt_callback = NULL;
1022 		thread->post_interrupt_data = NULL;
1023 
1024 		callback(data);
1025 	}
1026 }
1027 
1028 
1029 status_t
1030 arch_int_init(struct kernel_args *args)
1031 {
1032 	int i;
1033 	interrupt_handler_function** table;
1034 
1035 	// set the global sIDT variable
1036 	sIDTs[0] = (desc_table *)args->arch_args.vir_idt;
1037 
1038 	// setup the standard programmable interrupt controller
1039 	pic_init();
1040 
1041 	set_interrupt_gate(0, 0,  &trap0);
1042 	set_interrupt_gate(0, 1,  &trap1);
1043 	set_interrupt_gate(0, 2,  &trap2);
1044 	set_trap_gate(0, 3,  &trap3);
1045 	set_interrupt_gate(0, 4,  &trap4);
1046 	set_interrupt_gate(0, 5,  &trap5);
1047 	set_interrupt_gate(0, 6,  &trap6);
1048 	set_interrupt_gate(0, 7,  &trap7);
1049 	// trap8 (double fault) is set in arch_cpu.c
1050 	set_interrupt_gate(0, 9,  &trap9);
1051 	set_interrupt_gate(0, 10,  &trap10);
1052 	set_interrupt_gate(0, 11,  &trap11);
1053 	set_interrupt_gate(0, 12,  &trap12);
1054 	set_interrupt_gate(0, 13,  &trap13);
1055 	set_interrupt_gate(0, 14,  &trap14);
1056 //	set_interrupt_gate(0, 15,  &trap15);
1057 	set_interrupt_gate(0, 16,  &trap16);
1058 	set_interrupt_gate(0, 17,  &trap17);
1059 	set_interrupt_gate(0, 18,  &trap18);
1060 	set_interrupt_gate(0, 19,  &trap19);
1061 
1062 	set_interrupt_gate(0, 32,  &trap32);
1063 	set_interrupt_gate(0, 33,  &trap33);
1064 	set_interrupt_gate(0, 34,  &trap34);
1065 	set_interrupt_gate(0, 35,  &trap35);
1066 	set_interrupt_gate(0, 36,  &trap36);
1067 	set_interrupt_gate(0, 37,  &trap37);
1068 	set_interrupt_gate(0, 38,  &trap38);
1069 	set_interrupt_gate(0, 39,  &trap39);
1070 	set_interrupt_gate(0, 40,  &trap40);
1071 	set_interrupt_gate(0, 41,  &trap41);
1072 	set_interrupt_gate(0, 42,  &trap42);
1073 	set_interrupt_gate(0, 43,  &trap43);
1074 	set_interrupt_gate(0, 44,  &trap44);
1075 	set_interrupt_gate(0, 45,  &trap45);
1076 	set_interrupt_gate(0, 46,  &trap46);
1077 	set_interrupt_gate(0, 47,  &trap47);
1078 	set_interrupt_gate(0, 48,  &trap48);
1079 	set_interrupt_gate(0, 49,  &trap49);
1080 	set_interrupt_gate(0, 50,  &trap50);
1081 	set_interrupt_gate(0, 51,  &trap51);
1082 	set_interrupt_gate(0, 52,  &trap52);
1083 	set_interrupt_gate(0, 53,  &trap53);
1084 	set_interrupt_gate(0, 54,  &trap54);
1085 	set_interrupt_gate(0, 55,  &trap55);
1086 
1087 	set_trap_gate(0, 98, &trap98);	// for performance testing only
1088 	set_trap_gate(0, 99, &trap99);
1089 
1090 	set_interrupt_gate(0, 251, &trap251);
1091 	set_interrupt_gate(0, 252, &trap252);
1092 	set_interrupt_gate(0, 253, &trap253);
1093 	set_interrupt_gate(0, 254, &trap254);
1094 	set_interrupt_gate(0, 255, &trap255);
1095 
1096 	// init interrupt handler table
1097 	table = gInterruptHandlerTable;
1098 
1099 	// defaults
1100 	for (i = 0; i < ARCH_INTERRUPT_BASE; i++)
1101 		table[i] = invalid_exception;
1102 	for (i = ARCH_INTERRUPT_BASE; i < INTERRUPT_HANDLER_TABLE_SIZE; i++)
1103 		table[i] = hardware_interrupt;
1104 
1105 	table[0] = unexpected_exception;	// Divide Error Exception (#DE)
1106 	table[1] = x86_handle_debug_exception; // Debug Exception (#DB)
1107 	table[2] = fatal_exception;			// NMI Interrupt
1108 	table[3] = x86_handle_breakpoint_exception; // Breakpoint Exception (#BP)
1109 	table[4] = unexpected_exception;	// Overflow Exception (#OF)
1110 	table[5] = unexpected_exception;	// BOUND Range Exceeded Exception (#BR)
1111 	table[6] = unexpected_exception;	// Invalid Opcode Exception (#UD)
1112 	table[7] = fatal_exception;			// Device Not Available Exception (#NM)
1113 	table[8] = x86_double_fault_exception; // Double Fault Exception (#DF)
1114 	table[9] = fatal_exception;			// Coprocessor Segment Overrun
1115 	table[10] = fatal_exception;		// Invalid TSS Exception (#TS)
1116 	table[11] = fatal_exception;		// Segment Not Present (#NP)
1117 	table[12] = fatal_exception;		// Stack Fault Exception (#SS)
1118 	table[13] = unexpected_exception;	// General Protection Exception (#GP)
1119 	table[14] = page_fault_exception;	// Page-Fault Exception (#PF)
1120 	table[16] = unexpected_exception;	// x87 FPU Floating-Point Error (#MF)
1121 	table[17] = unexpected_exception;	// Alignment Check Exception (#AC)
1122 	table[18] = fatal_exception;		// Machine-Check Exception (#MC)
1123 	table[19] = unexpected_exception;	// SIMD Floating-Point Exception (#XF)
1124 
1125 	return B_OK;
1126 }
1127 
1128 
1129 status_t
1130 arch_int_init_post_vm(struct kernel_args *args)
1131 {
1132 	ioapic_init(args);
1133 
1134 	// create IDT area for the boot CPU
1135 	area_id area = create_area("idt", (void**)&sIDTs[0], B_EXACT_ADDRESS,
1136 		B_PAGE_SIZE, B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1137 	if (area < 0)
1138 		return area;
1139 
1140 	// create IDTs for the off-boot CPU
1141 	size_t idtSize = 256 * 8;
1142 		// 256 8 bytes-sized descriptors
1143 	int32 cpuCount = smp_get_num_cpus();
1144 	if (cpuCount > 0) {
1145 		size_t areaSize = ROUNDUP(cpuCount * idtSize, B_PAGE_SIZE);
1146 		desc_table* idt;
1147 		area = create_area("idt", (void**)&idt, B_ANY_KERNEL_ADDRESS,
1148 			areaSize, B_CONTIGUOUS, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
1149 		if (area < 0)
1150 			return area;
1151 
1152 		for (int32 i = 1; i < cpuCount; i++) {
1153 			sIDTs[i] = idt;
1154 			memcpy(idt, sIDTs[0], idtSize);
1155 			idt += 256;
1156 			// The CPU's IDTR will be set in arch_cpu_init_percpu().
1157 		}
1158 	}
1159 
1160 	return area >= B_OK ? B_OK : area;
1161 }
1162 
1163 
1164 status_t
1165 arch_int_init_post_device_manager(struct kernel_args *args)
1166 {
1167 	return B_OK;
1168 }
1169