xref: /haiku/src/system/kernel/smp.cpp (revision 50b3e74489a1a46fec88df793e4f6780e4de933c)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Functionality for symetrical multi-processors */
12 
13 
14 #include <smp.h>
15 
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/debug.h>
21 #include <arch/int.h>
22 #include <arch/smp.h>
23 #include <cpu.h>
24 #include <generic_syscall.h>
25 #include <int.h>
26 #include <spinlock_contention.h>
27 #include <thread.h>
28 #if DEBUG_SPINLOCK_LATENCIES
29 #	include <safemode.h>
30 #endif
31 
32 #include "kernel_debug_config.h"
33 
34 
35 //#define TRACE_SMP
36 #ifdef TRACE_SMP
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 
43 #undef acquire_spinlock
44 #undef release_spinlock
45 
46 
47 #define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
48 
49 // These macros define the number of unsuccessful iterations in
50 // acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
51 // panic(), assuming a deadlock.
52 #define SPINLOCK_DEADLOCK_COUNT				100000000
53 #define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
54 
55 
56 struct smp_msg {
57 	struct smp_msg	*next;
58 	int32			message;
59 	uint32			data;
60 	uint32			data2;
61 	uint32			data3;
62 	void			*data_ptr;
63 	uint32			flags;
64 	int32			ref_count;
65 	volatile bool	done;
66 	uint32			proc_bitmap;
67 };
68 
69 #define MAILBOX_LOCAL 1
70 #define MAILBOX_BCAST 2
71 
72 static spinlock boot_cpu_spin[SMP_MAX_CPUS] = { };
73 
74 static struct smp_msg *sFreeMessages = NULL;
75 static volatile int sFreeMessageCount = 0;
76 static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
77 
78 static struct smp_msg *sCPUMessages[SMP_MAX_CPUS] = { NULL, };
79 static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
80 
81 static struct smp_msg *sBroadcastMessages = NULL;
82 static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
83 
84 static bool sICIEnabled = false;
85 static int32 sNumCPUs = 1;
86 
87 static int32 process_pending_ici(int32 currentCPU);
88 
89 
90 #if DEBUG_SPINLOCKS
91 #define NUM_LAST_CALLERS	32
92 
93 static struct {
94 	void		*caller;
95 	spinlock	*lock;
96 } sLastCaller[NUM_LAST_CALLERS];
97 
98 static vint32 sLastIndex = 0;
99 	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
100 	// as index into sLastCaller. Note, that it has to be casted to uint32
101 	// before applying the modulo operation, since otherwise after overflowing
102 	// that would yield negative indices.
103 
104 
105 static void
106 push_lock_caller(void *caller, spinlock *lock)
107 {
108 	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
109 
110 	sLastCaller[index].caller = caller;
111 	sLastCaller[index].lock = lock;
112 }
113 
114 
115 static void *
116 find_lock_caller(spinlock *lock)
117 {
118 	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
119 
120 	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
121 		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
122 		if (sLastCaller[index].lock == lock)
123 			return sLastCaller[index].caller;
124 	}
125 
126 	return NULL;
127 }
128 
129 
130 int
131 dump_spinlock(int argc, char** argv)
132 {
133 	if (argc != 2) {
134 		print_debugger_command_usage(argv[0]);
135 		return 0;
136 	}
137 
138 	uint64 address;
139 	if (!evaluate_debug_expression(argv[1], &address, false))
140 		return 0;
141 
142 	spinlock* lock = (spinlock*)(addr_t)address;
143 	kprintf("spinlock %p:\n", lock);
144 	bool locked = B_SPINLOCK_IS_LOCKED(lock);
145 	if (locked) {
146 		kprintf("  locked from %p\n", find_lock_caller(lock));
147 	} else
148 		kprintf("  not locked\n");
149 
150 	return 0;
151 }
152 
153 
154 #endif	// DEBUG_SPINLOCKS
155 
156 
157 #if DEBUG_SPINLOCK_LATENCIES
158 
159 
160 #define NUM_LATENCY_LOCKS	4
161 #define DEBUG_LATENCY		200
162 
163 
164 static struct {
165 	spinlock	*lock;
166 	bigtime_t	timestamp;
167 } sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
168 
169 static int32 sLatencyIndex[B_MAX_CPU_COUNT];
170 static bool sEnableLatencyCheck;
171 
172 
173 static void
174 push_latency(spinlock* lock)
175 {
176 	if (!sEnableLatencyCheck)
177 		return;
178 
179 	int32 cpu = smp_get_current_cpu();
180 	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
181 
182 	sLatency[cpu][index].lock = lock;
183 	sLatency[cpu][index].timestamp = system_time();
184 }
185 
186 
187 static void
188 test_latency(spinlock* lock)
189 {
190 	if (!sEnableLatencyCheck)
191 		return;
192 
193 	int32 cpu = smp_get_current_cpu();
194 
195 	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
196 		if (sLatency[cpu][i].lock == lock) {
197 			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
198 			if (diff > DEBUG_LATENCY && diff < 500000) {
199 				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
200 					lock, diff, DEBUG_LATENCY);
201 			}
202 
203 			sLatency[cpu][i].lock = NULL;
204 		}
205 	}
206 }
207 
208 
209 #endif	// DEBUG_SPINLOCK_LATENCIES
210 
211 
212 int
213 dump_ici_messages(int argc, char** argv)
214 {
215 	// count broadcast messages
216 	int32 count = 0;
217 	int32 doneCount = 0;
218 	int32 unreferencedCount = 0;
219 	smp_msg* message = sBroadcastMessages;
220 	while (message != NULL) {
221 		count++;
222 		if (message->done)
223 			doneCount++;
224 		if (message->ref_count <= 0)
225 			unreferencedCount++;
226 		message = message->next;
227 	}
228 
229 	kprintf("ICI broadcast messages: %ld, first: %p\n", count,
230 		sBroadcastMessages);
231 	kprintf("  done:         %ld\n", doneCount);
232 	kprintf("  unreferenced: %ld\n", unreferencedCount);
233 
234 	// count per-CPU messages
235 	for (int32 i = 0; i < sNumCPUs; i++) {
236 		count = 0;
237 		message = sCPUMessages[i];
238 		while (message != NULL) {
239 			count++;
240 			message = message->next;
241 		}
242 
243 		kprintf("CPU %ld messages: %ld, first: %p\n", i, count,
244 			sCPUMessages[i]);
245 	}
246 
247 	return 0;
248 }
249 
250 
251 int
252 dump_ici_message(int argc, char** argv)
253 {
254 	if (argc != 2) {
255 		print_debugger_command_usage(argv[0]);
256 		return 0;
257 	}
258 
259 	uint64 address;
260 	if (!evaluate_debug_expression(argv[1], &address, false))
261 		return 0;
262 
263 	smp_msg* message = (smp_msg*)(addr_t)address;
264 	kprintf("ICI message %p:\n", message);
265 	kprintf("  next:        %p\n", message->next);
266 	kprintf("  message:     %ld\n", message->message);
267 	kprintf("  data:        %ld\n", message->data);
268 	kprintf("  data2:       %ld\n", message->data2);
269 	kprintf("  data3:       %ld\n", message->data3);
270 	kprintf("  data_ptr:    %p\n", message->data_ptr);
271 	kprintf("  flags:       %lx\n", message->flags);
272 	kprintf("  ref_count:   %lx\n", message->ref_count);
273 	kprintf("  done:        %s\n", message->done ? "true" : "false");
274 	kprintf("  proc_bitmap: %lx\n", message->proc_bitmap);
275 
276 	return 0;
277 }
278 
279 
280 static inline void
281 process_all_pending_ici(int32 currentCPU)
282 {
283 	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
284 		;
285 }
286 
287 
288 void
289 _acquire_spinlock(spinlock *lock)
290 {
291 #if DEBUG_SPINLOCKS
292 	if (are_interrupts_enabled()) {
293 		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
294 			"enabled", lock);
295 	}
296 #endif
297 
298 	if (sNumCPUs > 1) {
299 		int currentCPU = smp_get_current_cpu();
300 #if B_DEBUG_SPINLOCK_CONTENTION
301 		while (atomic_add(&lock->lock, 1) != 0)
302 			process_all_pending_ici(currentCPU);
303 #else
304 		while (1) {
305 			uint32 count = 0;
306 			while (*lock != 0) {
307 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
308 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
309 						"for a long time!", lock);
310 					count = 0;
311 				}
312 
313 				process_all_pending_ici(currentCPU);
314 				PAUSE();
315 			}
316 			if (atomic_or((int32 *)lock, 1) == 0)
317 				break;
318 		}
319 
320 #	if DEBUG_SPINLOCKS
321 		push_lock_caller(arch_debug_get_caller(), lock);
322 #	endif
323 #endif
324 	} else {
325 #if DEBUG_SPINLOCKS
326 		int32 oldValue;
327 		oldValue = atomic_or((int32 *)lock, 1);
328 		if (oldValue != 0) {
329 			panic("acquire_spinlock: attempt to acquire lock %p twice on "
330 				"non-SMP system (last caller: %p, value %ld)", lock,
331 				find_lock_caller(lock), oldValue);
332 		}
333 
334 		push_lock_caller(arch_debug_get_caller(), lock);
335 #endif
336 	}
337 #if DEBUG_SPINLOCK_LATENCIES
338 	push_latency(lock);
339 #endif
340 }
341 
342 
343 static void
344 acquire_spinlock_nocheck(spinlock *lock)
345 {
346 #if DEBUG_SPINLOCKS
347 	if (are_interrupts_enabled()) {
348 		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
349 			"interrupts enabled", lock);
350 	}
351 #endif
352 
353 	if (sNumCPUs > 1) {
354 #if B_DEBUG_SPINLOCK_CONTENTION
355 		while (atomic_add(&lock->lock, 1) != 0) {
356 		}
357 #else
358 		while (1) {
359 			uint32 count = 0;
360 			while (*lock != 0) {
361 				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
362 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
363 						"for a long time!", lock);
364 					count = 0;
365 				}
366 
367 				PAUSE();
368 			}
369 
370 			if (atomic_or((int32 *)lock, 1) == 0)
371 				break;
372 		}
373 #endif
374 	} else {
375 #if DEBUG_SPINLOCKS
376 		if (atomic_or((int32 *)lock, 1) != 0) {
377 			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
378 				"on non-SMP system\n", lock);
379 		}
380 #endif
381 	}
382 }
383 
384 
385 /*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
386 static void
387 acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
388 {
389 #if DEBUG_SPINLOCKS
390 	if (are_interrupts_enabled()) {
391 		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
392 			"interrupts enabled", lock);
393 	}
394 #endif
395 
396 	if (sNumCPUs > 1) {
397 #if B_DEBUG_SPINLOCK_CONTENTION
398 		while (atomic_add(&lock->lock, 1) != 0)
399 			process_all_pending_ici(currentCPU);
400 #else
401 		while (1) {
402 			uint32 count = 0;
403 			while (*lock != 0) {
404 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
405 					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
406 						"%p for a long time!", lock);
407 					count = 0;
408 				}
409 
410 				process_all_pending_ici(currentCPU);
411 				PAUSE();
412 			}
413 			if (atomic_or((int32 *)lock, 1) == 0)
414 				break;
415 		}
416 
417 #	if DEBUG_SPINLOCKS
418 		push_lock_caller(arch_debug_get_caller(), lock);
419 #	endif
420 #endif
421 	} else {
422 #if DEBUG_SPINLOCKS
423 		int32 oldValue;
424 		oldValue = atomic_or((int32 *)lock, 1);
425 		if (oldValue != 0) {
426 			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
427 				"non-SMP system (last caller: %p, value %ld)", lock,
428 				find_lock_caller(lock), oldValue);
429 		}
430 
431 		push_lock_caller(arch_debug_get_caller(), lock);
432 #endif
433 	}
434 }
435 
436 
437 void
438 release_spinlock(spinlock *lock)
439 {
440 #if DEBUG_SPINLOCK_LATENCIES
441 	test_latency(lock);
442 #endif
443 
444 	if (sNumCPUs > 1) {
445 		if (are_interrupts_enabled())
446 			panic("release_spinlock: attempt to release lock %p with interrupts enabled\n", lock);
447 #if B_DEBUG_SPINLOCK_CONTENTION
448 		{
449 			int32 count = atomic_and(&lock->lock, 0) - 1;
450 			if (count < 0) {
451 				panic("release_spinlock: lock %p was already released\n", lock);
452 			} else {
453 				// add to the total count -- deal with carry manually
454 				if ((uint32)atomic_add(&lock->count_low, count) + count
455 						< (uint32)count) {
456 					atomic_add(&lock->count_high, 1);
457 				}
458 			}
459 		}
460 #else
461 		if (atomic_and((int32 *)lock, 0) != 1)
462 			panic("release_spinlock: lock %p was already released\n", lock);
463 #endif
464 	} else {
465 #if DEBUG_SPINLOCKS
466 		if (are_interrupts_enabled())
467 			panic("release_spinlock: attempt to release lock %p with interrupts enabled\n", lock);
468 		if (atomic_and((int32 *)lock, 0) != 1)
469 			panic("release_spinlock: lock %p was already released\n", lock);
470 #endif
471 #if DEBUG_SPINLOCK_LATENCIES
472 		test_latency(lock);
473 #endif
474 	}
475 }
476 
477 
478 /** Finds a free message and gets it.
479  *	NOTE: has side effect of disabling interrupts
480  *	return value is the former interrupt state
481  */
482 
483 static cpu_status
484 find_free_message(struct smp_msg **msg)
485 {
486 	cpu_status state;
487 
488 	TRACE(("find_free_message: entry\n"));
489 
490 retry:
491 	while (sFreeMessageCount <= 0) {
492 		state = disable_interrupts();
493 		process_all_pending_ici(smp_get_current_cpu());
494 		restore_interrupts(state);
495 		PAUSE();
496 	}
497 	state = disable_interrupts();
498 	acquire_spinlock(&sFreeMessageSpinlock);
499 
500 	if (sFreeMessageCount <= 0) {
501 		// someone grabbed one while we were getting the lock,
502 		// go back to waiting for it
503 		release_spinlock(&sFreeMessageSpinlock);
504 		restore_interrupts(state);
505 		goto retry;
506 	}
507 
508 	*msg = sFreeMessages;
509 	sFreeMessages = (*msg)->next;
510 	sFreeMessageCount--;
511 
512 	release_spinlock(&sFreeMessageSpinlock);
513 
514 	TRACE(("find_free_message: returning msg %p\n", *msg));
515 
516 	return state;
517 }
518 
519 
520 /*!	Similar to find_free_message(), but expects the interrupts to be disabled
521 	already.
522 */
523 static void
524 find_free_message_interrupts_disabled(int32 currentCPU,
525 	struct smp_msg** _message)
526 {
527 	TRACE(("find_free_message_interrupts_disabled: entry\n"));
528 
529 	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
530 	while (sFreeMessageCount <= 0) {
531 		release_spinlock(&sFreeMessageSpinlock);
532 		process_all_pending_ici(currentCPU);
533 		PAUSE();
534 		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
535 	}
536 
537 	*_message = sFreeMessages;
538 	sFreeMessages = (*_message)->next;
539 	sFreeMessageCount--;
540 
541 	release_spinlock(&sFreeMessageSpinlock);
542 
543 	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
544 		*_message));
545 }
546 
547 
548 static void
549 return_free_message(struct smp_msg *msg)
550 {
551 	TRACE(("return_free_message: returning msg %p\n", msg));
552 
553 	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
554 	msg->next = sFreeMessages;
555 	sFreeMessages = msg;
556 	sFreeMessageCount++;
557 	release_spinlock(&sFreeMessageSpinlock);
558 }
559 
560 
561 static struct smp_msg *
562 check_for_message(int currentCPU, int *source_mailbox)
563 {
564 	struct smp_msg *msg;
565 
566 	if (!sICIEnabled)
567 		return NULL;
568 
569 	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
570 	msg = sCPUMessages[currentCPU];
571 	if (msg != NULL) {
572 		sCPUMessages[currentCPU] = msg->next;
573 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
574 		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
575 		*source_mailbox = MAILBOX_LOCAL;
576 	} else {
577 		// try getting one from the broadcast mailbox
578 
579 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
580 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
581 
582 		msg = sBroadcastMessages;
583 		while (msg != NULL) {
584 			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
585 				// we have handled this one already
586 				msg = msg->next;
587 				continue;
588 			}
589 
590 			// mark it so we wont try to process this one again
591 			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
592 			*source_mailbox = MAILBOX_BCAST;
593 			break;
594 		}
595 		release_spinlock(&sBroadcastMessageSpinlock);
596 		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU, msg));
597 	}
598 	return msg;
599 }
600 
601 
602 static void
603 finish_message_processing(int currentCPU, struct smp_msg *msg, int source_mailbox)
604 {
605 	int old_refcount;
606 
607 	old_refcount = atomic_add(&msg->ref_count, -1);
608 	if (old_refcount == 1) {
609 		// we were the last one to decrement the ref_count
610 		// it's our job to remove it from the list & possibly clean it up
611 		struct smp_msg **mbox = NULL;
612 		spinlock *spinlock = NULL;
613 
614 		// clean up the message from one of the mailboxes
615 		switch (source_mailbox) {
616 			case MAILBOX_BCAST:
617 				mbox = &sBroadcastMessages;
618 				spinlock = &sBroadcastMessageSpinlock;
619 				break;
620 			case MAILBOX_LOCAL:
621 				mbox = &sCPUMessages[currentCPU];
622 				spinlock = &sCPUMessageSpinlock[currentCPU];
623 				break;
624 		}
625 
626 		acquire_spinlock_nocheck(spinlock);
627 
628 		TRACE(("cleaning up message %p\n", msg));
629 
630 		if (source_mailbox != MAILBOX_BCAST) {
631 			// local mailbox -- the message has already been removed in
632 			// check_for_message()
633 		} else if (msg == *mbox) {
634 			(*mbox) = msg->next;
635 		} else {
636 			// we need to walk to find the message in the list.
637 			// we can't use any data found when previously walking through
638 			// the list, since the list may have changed. But, we are guaranteed
639 			// to at least have msg in it.
640 			struct smp_msg *last = NULL;
641 			struct smp_msg *msg1;
642 
643 			msg1 = *mbox;
644 			while (msg1 != NULL && msg1 != msg) {
645 				last = msg1;
646 				msg1 = msg1->next;
647 			}
648 
649 			// by definition, last must be something
650 			if (msg1 == msg && last != NULL)
651 				last->next = msg->next;
652 			else
653 				panic("last == NULL or msg != msg1");
654 		}
655 
656 		release_spinlock(spinlock);
657 
658 		if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
659 			free(msg->data_ptr);
660 
661 		if (msg->flags & SMP_MSG_FLAG_SYNC) {
662 			msg->done = true;
663 			// the caller cpu should now free the message
664 		} else {
665 			// in the !SYNC case, we get to free the message
666 			return_free_message(msg);
667 		}
668 	}
669 }
670 
671 
672 static int32
673 process_pending_ici(int32 currentCPU)
674 {
675 	struct smp_msg *msg;
676 	bool haltCPU = false;
677 	int sourceMailbox = 0;
678 	int retval = B_HANDLED_INTERRUPT;
679 
680 	msg = check_for_message(currentCPU, &sourceMailbox);
681 	if (msg == NULL)
682 		return B_ENTRY_NOT_FOUND;
683 
684 	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
685 
686 	switch (msg->message) {
687 		case SMP_MSG_INVALIDATE_PAGE_RANGE:
688 			arch_cpu_invalidate_TLB_range((addr_t)msg->data, (addr_t)msg->data2);
689 			break;
690 		case SMP_MSG_INVALIDATE_PAGE_LIST:
691 			arch_cpu_invalidate_TLB_list((addr_t *)msg->data, (int)msg->data2);
692 			break;
693 		case SMP_MSG_USER_INVALIDATE_PAGES:
694 			arch_cpu_user_TLB_invalidate();
695 			break;
696 		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
697 			arch_cpu_global_TLB_invalidate();
698 			break;
699 		case SMP_MSG_CPU_HALT:
700 			haltCPU = true;
701 			break;
702 		case SMP_MSG_CALL_FUNCTION:
703 		{
704 			smp_call_func func = (smp_call_func)msg->data_ptr;
705 			func(msg->data, currentCPU, msg->data2, msg->data3);
706 			break;
707 		}
708 		case SMP_MSG_RESCHEDULE:
709 		{
710 			cpu_ent* cpu = thread_get_current_thread()->cpu;
711 			cpu->invoke_scheduler = true;
712 			cpu->invoke_scheduler_if_idle = false;
713 			break;
714 		}
715 		case SMP_MSG_RESCHEDULE_IF_IDLE:
716 		{
717 			cpu_ent* cpu = thread_get_current_thread()->cpu;
718 			if (!cpu->invoke_scheduler) {
719 				cpu->invoke_scheduler = true;
720 				cpu->invoke_scheduler_if_idle = true;
721 			}
722 			break;
723 		}
724 		default:
725 			dprintf("smp_intercpu_int_handler: got unknown message %ld\n", msg->message);
726 	}
727 
728 	// finish dealing with this message, possibly removing it from the list
729 	finish_message_processing(currentCPU, msg, sourceMailbox);
730 
731 	// special case for the halt message
732 	if (haltCPU)
733 		debug_trap_cpu_in_kdl(currentCPU, false);
734 
735 	return retval;
736 }
737 
738 
739 #if B_DEBUG_SPINLOCK_CONTENTION
740 
741 static uint64
742 get_spinlock_counter(spinlock* lock)
743 {
744 	uint32 high;
745 	uint32 low;
746 	do {
747 		high = (uint32)atomic_get(&lock->count_high);
748 		low = (uint32)atomic_get(&lock->count_low);
749 	} while (high != atomic_get(&lock->count_high));
750 
751 	return ((uint64)high << 32) | low;
752 }
753 
754 
755 static status_t
756 spinlock_contention_syscall(const char* subsystem, uint32 function,
757 	void* buffer, size_t bufferSize)
758 {
759 	spinlock_contention_info info;
760 
761 	if (function != GET_SPINLOCK_CONTENTION_INFO)
762 		return B_BAD_VALUE;
763 
764 	if (bufferSize < sizeof(spinlock_contention_info))
765 		return B_BAD_VALUE;
766 
767 	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
768 	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
769 
770 	if (!IS_USER_ADDRESS(buffer)
771 		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
772 		return B_BAD_ADDRESS;
773 	}
774 
775 	return B_OK;
776 }
777 
778 #endif	// B_DEBUG_SPINLOCK_CONTENTION
779 
780 
781 //	#pragma mark -
782 
783 
784 int
785 smp_intercpu_int_handler(int32 cpu)
786 {
787 	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
788 
789 	process_all_pending_ici(cpu);
790 
791 	TRACE(("smp_intercpu_int_handler: done\n"));
792 
793 	return B_HANDLED_INTERRUPT;
794 }
795 
796 
797 void
798 smp_send_ici(int32 targetCPU, int32 message, uint32 data, uint32 data2, uint32 data3,
799 	void *data_ptr, uint32 flags)
800 {
801 	struct smp_msg *msg;
802 
803 	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
804 		targetCPU, message, data, data2, data3, data_ptr, flags));
805 
806 	if (sICIEnabled) {
807 		int state;
808 		int currentCPU;
809 
810 		// find_free_message leaves interrupts disabled
811 		state = find_free_message(&msg);
812 
813 		currentCPU = smp_get_current_cpu();
814 		if (targetCPU == currentCPU) {
815 			return_free_message(msg);
816 			restore_interrupts(state);
817 			return; // nope, cant do that
818 		}
819 
820 		// set up the message
821 		msg->message = message;
822 		msg->data = data;
823 		msg->data2 = data2;
824 		msg->data3 = data3;
825 		msg->data_ptr = data_ptr;
826 		msg->ref_count = 1;
827 		msg->flags = flags;
828 		msg->done = false;
829 
830 		// stick it in the appropriate cpu's mailbox
831 		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
832 		msg->next = sCPUMessages[targetCPU];
833 		sCPUMessages[targetCPU] = msg;
834 		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
835 
836 		arch_smp_send_ici(targetCPU);
837 
838 		if (flags & SMP_MSG_FLAG_SYNC) {
839 			// wait for the other cpu to finish processing it
840 			// the interrupt handler will ref count it to <0
841 			// if the message is sync after it has removed it from the mailbox
842 			while (msg->done == false) {
843 				process_all_pending_ici(currentCPU);
844 				PAUSE();
845 			}
846 			// for SYNC messages, it's our responsibility to put it
847 			// back into the free list
848 			return_free_message(msg);
849 		}
850 
851 		restore_interrupts(state);
852 	}
853 }
854 
855 
856 void
857 smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, uint32 data,
858 	uint32 data2, uint32 data3, void *data_ptr, uint32 flags)
859 {
860 	if (!sICIEnabled)
861 		return;
862 
863 	int currentCPU = smp_get_current_cpu();
864 	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
865 		& (((cpu_mask_t)1 << sNumCPUs) - 1);
866 	if (cpuMask == 0) {
867 		panic("smp_send_multicast_ici(): 0 CPU mask");
868 		return;
869 	}
870 
871 	// count target CPUs
872 	int32 targetCPUs = 0;
873 	for (int32 i = 0; i < sNumCPUs; i++) {
874 		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
875 			targetCPUs++;
876 	}
877 
878 	// find_free_message leaves interrupts disabled
879 	struct smp_msg *msg;
880 	int state = find_free_message(&msg);
881 
882 	msg->message = message;
883 	msg->data = data;
884 	msg->data2 = data2;
885 	msg->data3 = data3;
886 	msg->data_ptr = data_ptr;
887 	msg->ref_count = targetCPUs;
888 	msg->flags = flags;
889 	msg->proc_bitmap = ~cpuMask;
890 	msg->done = false;
891 
892 	// stick it in the broadcast mailbox
893 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
894 	msg->next = sBroadcastMessages;
895 	sBroadcastMessages = msg;
896 	release_spinlock(&sBroadcastMessageSpinlock);
897 
898 	arch_smp_send_broadcast_ici();
899 		// TODO: Introduce a call that only bothers the target CPUs!
900 
901 	if (flags & SMP_MSG_FLAG_SYNC) {
902 		// wait for the other cpus to finish processing it
903 		// the interrupt handler will ref count it to <0
904 		// if the message is sync after it has removed it from the mailbox
905 		while (msg->done == false) {
906 			process_all_pending_ici(currentCPU);
907 			PAUSE();
908 		}
909 
910 		// for SYNC messages, it's our responsibility to put it
911 		// back into the free list
912 		return_free_message(msg);
913 	}
914 
915 	restore_interrupts(state);
916 }
917 
918 
919 void
920 smp_send_broadcast_ici(int32 message, uint32 data, uint32 data2, uint32 data3,
921 	void *data_ptr, uint32 flags)
922 {
923 	struct smp_msg *msg;
924 
925 	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
926 		smp_get_current_cpu(), message, data, data2, data3, data_ptr, flags));
927 
928 	if (sICIEnabled) {
929 		int state;
930 		int currentCPU;
931 
932 		// find_free_message leaves interrupts disabled
933 		state = find_free_message(&msg);
934 
935 		currentCPU = smp_get_current_cpu();
936 
937 		msg->message = message;
938 		msg->data = data;
939 		msg->data2 = data2;
940 		msg->data3 = data3;
941 		msg->data_ptr = data_ptr;
942 		msg->ref_count = sNumCPUs - 1;
943 		msg->flags = flags;
944 		msg->proc_bitmap = SET_BIT(0, currentCPU);
945 		msg->done = false;
946 
947 		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast mbox\n",
948 			currentCPU, msg));
949 
950 		// stick it in the appropriate cpu's mailbox
951 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
952 		msg->next = sBroadcastMessages;
953 		sBroadcastMessages = msg;
954 		release_spinlock(&sBroadcastMessageSpinlock);
955 
956 		arch_smp_send_broadcast_ici();
957 
958 		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
959 
960 		if (flags & SMP_MSG_FLAG_SYNC) {
961 			// wait for the other cpus to finish processing it
962 			// the interrupt handler will ref count it to <0
963 			// if the message is sync after it has removed it from the mailbox
964 			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
965 
966 			while (msg->done == false) {
967 				process_all_pending_ici(currentCPU);
968 				PAUSE();
969 			}
970 
971 			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
972 
973 			// for SYNC messages, it's our responsibility to put it
974 			// back into the free list
975 			return_free_message(msg);
976 		}
977 
978 		restore_interrupts(state);
979 	}
980 
981 	TRACE(("smp_send_broadcast_ici: done\n"));
982 }
983 
984 
985 void
986 smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
987 	uint32 data, uint32 data2, uint32 data3, void *data_ptr, uint32 flags)
988 {
989 	if (!sICIEnabled)
990 		return;
991 
992 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
993 		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
994 		currentCPU, message, data, data2, data3, data_ptr, flags));
995 
996 	struct smp_msg *msg;
997 	find_free_message_interrupts_disabled(currentCPU, &msg);
998 
999 	msg->message = message;
1000 	msg->data = data;
1001 	msg->data2 = data2;
1002 	msg->data3 = data3;
1003 	msg->data_ptr = data_ptr;
1004 	msg->ref_count = sNumCPUs - 1;
1005 	msg->flags = flags;
1006 	msg->proc_bitmap = SET_BIT(0, currentCPU);
1007 	msg->done = false;
1008 
1009 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1010 		"into broadcast mbox\n", currentCPU, msg));
1011 
1012 	// stick it in the appropriate cpu's mailbox
1013 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1014 	msg->next = sBroadcastMessages;
1015 	sBroadcastMessages = msg;
1016 	release_spinlock(&sBroadcastMessageSpinlock);
1017 
1018 	arch_smp_send_broadcast_ici();
1019 
1020 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1021 		currentCPU));
1022 
1023 	if (flags & SMP_MSG_FLAG_SYNC) {
1024 		// wait for the other cpus to finish processing it
1025 		// the interrupt handler will ref count it to <0
1026 		// if the message is sync after it has removed it from the mailbox
1027 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1028 			"ack\n", currentCPU));
1029 
1030 		while (msg->done == false) {
1031 			process_all_pending_ici(currentCPU);
1032 			PAUSE();
1033 		}
1034 
1035 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1036 			"message to free list\n", currentCPU));
1037 
1038 		// for SYNC messages, it's our responsibility to put it
1039 		// back into the free list
1040 		return_free_message(msg);
1041 	}
1042 
1043 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1044 }
1045 
1046 
1047 /*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1048 
1049 	\param cpu The index of the calling CPU.
1050 	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1051 		does not return before all other CPUs have started waiting.
1052 	\return \c true on the boot CPU, \c false otherwise.
1053 */
1054 bool
1055 smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1056 {
1057 	if (cpu == 0) {
1058 		smp_cpu_rendezvous(rendezVous, cpu);
1059 		return true;
1060 	}
1061 
1062 	acquire_spinlock_nocheck(&boot_cpu_spin[cpu]);
1063 	smp_cpu_rendezvous(rendezVous, cpu);
1064 	acquire_spinlock_nocheck(&boot_cpu_spin[cpu]);
1065 	return false;
1066 }
1067 
1068 
1069 void
1070 smp_wake_up_non_boot_cpus()
1071 {
1072 	int i;
1073 
1074 	// ICIs were previously being ignored
1075 	if (sNumCPUs > 1)
1076 		sICIEnabled = true;
1077 
1078 	// resume non boot CPUs
1079 	for (i = 1; i < sNumCPUs; i++) {
1080 		release_spinlock(&boot_cpu_spin[i]);
1081 	}
1082 }
1083 
1084 
1085 /*!	Spin until all CPUs have reached the rendez-vous point.
1086 
1087 	The rendez-vous variable \c *var must have been initialized to 0 before the
1088 	function is called. The variable will be non-null when the function returns.
1089 
1090 	Note that when the function returns on one CPU, it only means that all CPU
1091 	have already entered the function. It does not mean that the variable can
1092 	already be reset. Only when all CPUs have returned (which would have to be
1093 	ensured via another rendez-vous) the variable can be reset.
1094 */
1095 void
1096 smp_cpu_rendezvous(volatile uint32 *var, int current_cpu)
1097 {
1098 	atomic_or((vint32*)var, 1 << current_cpu);
1099 
1100 	while (*var != (((uint32)1 << sNumCPUs) - 1))
1101 		PAUSE();
1102 }
1103 
1104 
1105 status_t
1106 smp_init(kernel_args *args)
1107 {
1108 	TRACE(("smp_init: entry\n"));
1109 
1110 #if DEBUG_SPINLOCK_LATENCIES
1111 	sEnableLatencyCheck
1112 		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1113 #endif
1114 
1115 #if DEBUG_SPINLOCKS
1116 	add_debugger_command_etc("spinlock", &dump_spinlock,
1117 		"Dump info on a spinlock",
1118 		"\n"
1119 		"Dumps info on a spinlock.\n", 0);
1120 #endif
1121 	add_debugger_command_etc("ici", &dump_ici_messages,
1122 		"Dump info on pending ICI messages",
1123 		"\n"
1124 		"Dumps info on pending ICI messages.\n", 0);
1125 	add_debugger_command_etc("ici_message", &dump_ici_message,
1126 		"Dump info on an ICI message",
1127 		"\n"
1128 		"Dumps info on an ICI message.\n", 0);
1129 
1130 	if (args->num_cpus > 1) {
1131 		sFreeMessages = NULL;
1132 		sFreeMessageCount = 0;
1133 		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1134 			struct smp_msg *msg
1135 				= (struct smp_msg *)malloc(sizeof(struct smp_msg));
1136 			if (msg == NULL) {
1137 				panic("error creating smp mailboxes\n");
1138 				return B_ERROR;
1139 			}
1140 			memset(msg, 0, sizeof(struct smp_msg));
1141 			msg->next = sFreeMessages;
1142 			sFreeMessages = msg;
1143 			sFreeMessageCount++;
1144 		}
1145 		sNumCPUs = args->num_cpus;
1146 	}
1147 	TRACE(("smp_init: calling arch_smp_init\n"));
1148 
1149 	return arch_smp_init(args);
1150 }
1151 
1152 
1153 status_t
1154 smp_per_cpu_init(kernel_args *args, int32 cpu)
1155 {
1156 	return arch_smp_per_cpu_init(args, cpu);
1157 }
1158 
1159 
1160 status_t
1161 smp_init_post_generic_syscalls(void)
1162 {
1163 #if B_DEBUG_SPINLOCK_CONTENTION
1164 	return register_generic_syscall(SPINLOCK_CONTENTION,
1165 		&spinlock_contention_syscall, 0, 0);
1166 #else
1167 	return B_OK;
1168 #endif
1169 }
1170 
1171 
1172 void
1173 smp_set_num_cpus(int32 numCPUs)
1174 {
1175 	sNumCPUs = numCPUs;
1176 }
1177 
1178 
1179 int32
1180 smp_get_num_cpus()
1181 {
1182 	return sNumCPUs;
1183 }
1184 
1185 
1186 int32
1187 smp_get_current_cpu(void)
1188 {
1189 	return thread_get_current_thread()->cpu->cpu_num;
1190 }
1191 
1192 
1193 //	#pragma mark -
1194 //	public exported functions
1195 
1196 
1197 void
1198 call_all_cpus(void (*func)(void *, int), void *cookie)
1199 {
1200 	cpu_status state = disable_interrupts();
1201 
1202 	if (smp_get_num_cpus() > 1) {
1203 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1204 			0, 0, (void *)func, SMP_MSG_FLAG_ASYNC);
1205 	}
1206 
1207 	// we need to call this function ourselves as well
1208 	func(cookie, smp_get_current_cpu());
1209 
1210 	restore_interrupts(state);
1211 }
1212 
1213 void
1214 call_all_cpus_sync(void (*func)(void *, int), void *cookie)
1215 {
1216 	cpu_status state = disable_interrupts();
1217 
1218 	if (smp_get_num_cpus() > 1) {
1219 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1220 			0, 0, (void *)func, SMP_MSG_FLAG_SYNC);
1221 	}
1222 
1223 	// we need to call this function ourselves as well
1224 	func(cookie, smp_get_current_cpu());
1225 
1226 	restore_interrupts(state);
1227 }
1228 
1229 
1230 void
1231 memory_read_barrier(void)
1232 {
1233 	arch_cpu_memory_read_barrier();
1234 }
1235 
1236 
1237 void
1238 memory_write_barrier(void)
1239 {
1240 	arch_cpu_memory_write_barrier();
1241 }
1242 
1243 
1244 #pragma weak acquire_spinlock=_acquire_spinlock
1245