xref: /haiku/src/system/kernel/smp.cpp (revision b46615c55ad2c8fe6de54412055a0713da3d610a)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Functionality for symetrical multi-processors */
12 
13 
14 #include <smp.h>
15 
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/debug.h>
21 #include <arch/int.h>
22 #include <arch/smp.h>
23 #include <boot/kernel_args.h>
24 #include <cpu.h>
25 #include <generic_syscall.h>
26 #include <int.h>
27 #include <spinlock_contention.h>
28 #include <thread.h>
29 #if DEBUG_SPINLOCK_LATENCIES
30 #	include <safemode.h>
31 #endif
32 
33 #include "kernel_debug_config.h"
34 
35 
36 //#define TRACE_SMP
37 #ifdef TRACE_SMP
38 #	define TRACE(x) dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 
44 #undef acquire_spinlock
45 #undef release_spinlock
46 
47 
48 #define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
49 
50 // These macros define the number of unsuccessful iterations in
51 // acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
52 // panic(), assuming a deadlock.
53 #define SPINLOCK_DEADLOCK_COUNT				100000000
54 #define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
55 
56 
57 struct smp_msg {
58 	struct smp_msg	*next;
59 	int32			message;
60 	uint32			data;
61 	uint32			data2;
62 	uint32			data3;
63 	void			*data_ptr;
64 	uint32			flags;
65 	int32			ref_count;
66 	volatile bool	done;
67 	uint32			proc_bitmap;
68 };
69 
70 enum mailbox_source {
71 	MAILBOX_LOCAL,
72 	MAILBOX_BCAST,
73 };
74 
75 static vint32 sBootCPUSpin = 0;
76 
77 static vint32 sEarlyCPUCall = 0;
78 static void (*sEarlyCPUCallFunction)(void*, int);
79 void* sEarlyCPUCallCookie;
80 
81 static struct smp_msg* sFreeMessages = NULL;
82 static volatile int sFreeMessageCount = 0;
83 static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
84 
85 static struct smp_msg* sCPUMessages[SMP_MAX_CPUS] = { NULL, };
86 static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
87 
88 static struct smp_msg* sBroadcastMessages = NULL;
89 static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
90 
91 static bool sICIEnabled = false;
92 static int32 sNumCPUs = 1;
93 
94 static int32 process_pending_ici(int32 currentCPU);
95 
96 
97 #if DEBUG_SPINLOCKS
98 #define NUM_LAST_CALLERS	32
99 
100 static struct {
101 	void		*caller;
102 	spinlock	*lock;
103 } sLastCaller[NUM_LAST_CALLERS];
104 
105 static vint32 sLastIndex = 0;
106 	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
107 	// as index into sLastCaller. Note, that it has to be casted to uint32
108 	// before applying the modulo operation, since otherwise after overflowing
109 	// that would yield negative indices.
110 
111 
112 static void
113 push_lock_caller(void* caller, spinlock* lock)
114 {
115 	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
116 
117 	sLastCaller[index].caller = caller;
118 	sLastCaller[index].lock = lock;
119 }
120 
121 
122 static void*
123 find_lock_caller(spinlock* lock)
124 {
125 	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
126 
127 	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
128 		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
129 		if (sLastCaller[index].lock == lock)
130 			return sLastCaller[index].caller;
131 	}
132 
133 	return NULL;
134 }
135 
136 
137 int
138 dump_spinlock(int argc, char** argv)
139 {
140 	if (argc != 2) {
141 		print_debugger_command_usage(argv[0]);
142 		return 0;
143 	}
144 
145 	uint64 address;
146 	if (!evaluate_debug_expression(argv[1], &address, false))
147 		return 0;
148 
149 	spinlock* lock = (spinlock*)(addr_t)address;
150 	kprintf("spinlock %p:\n", lock);
151 	bool locked = B_SPINLOCK_IS_LOCKED(lock);
152 	if (locked) {
153 		kprintf("  locked from %p\n", find_lock_caller(lock));
154 	} else
155 		kprintf("  not locked\n");
156 
157 	return 0;
158 }
159 
160 
161 #endif	// DEBUG_SPINLOCKS
162 
163 
164 #if DEBUG_SPINLOCK_LATENCIES
165 
166 
167 #define NUM_LATENCY_LOCKS	4
168 #define DEBUG_LATENCY		200
169 
170 
171 static struct {
172 	spinlock	*lock;
173 	bigtime_t	timestamp;
174 } sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
175 
176 static int32 sLatencyIndex[B_MAX_CPU_COUNT];
177 static bool sEnableLatencyCheck;
178 
179 
180 static void
181 push_latency(spinlock* lock)
182 {
183 	if (!sEnableLatencyCheck)
184 		return;
185 
186 	int32 cpu = smp_get_current_cpu();
187 	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
188 
189 	sLatency[cpu][index].lock = lock;
190 	sLatency[cpu][index].timestamp = system_time();
191 }
192 
193 
194 static void
195 test_latency(spinlock* lock)
196 {
197 	if (!sEnableLatencyCheck)
198 		return;
199 
200 	int32 cpu = smp_get_current_cpu();
201 
202 	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
203 		if (sLatency[cpu][i].lock == lock) {
204 			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
205 			if (diff > DEBUG_LATENCY && diff < 500000) {
206 				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
207 					lock, diff, DEBUG_LATENCY);
208 			}
209 
210 			sLatency[cpu][i].lock = NULL;
211 		}
212 	}
213 }
214 
215 
216 #endif	// DEBUG_SPINLOCK_LATENCIES
217 
218 
219 int
220 dump_ici_messages(int argc, char** argv)
221 {
222 	// count broadcast messages
223 	int32 count = 0;
224 	int32 doneCount = 0;
225 	int32 unreferencedCount = 0;
226 	smp_msg* message = sBroadcastMessages;
227 	while (message != NULL) {
228 		count++;
229 		if (message->done)
230 			doneCount++;
231 		if (message->ref_count <= 0)
232 			unreferencedCount++;
233 		message = message->next;
234 	}
235 
236 	kprintf("ICI broadcast messages: %ld, first: %p\n", count,
237 		sBroadcastMessages);
238 	kprintf("  done:         %ld\n", doneCount);
239 	kprintf("  unreferenced: %ld\n", unreferencedCount);
240 
241 	// count per-CPU messages
242 	for (int32 i = 0; i < sNumCPUs; i++) {
243 		count = 0;
244 		message = sCPUMessages[i];
245 		while (message != NULL) {
246 			count++;
247 			message = message->next;
248 		}
249 
250 		kprintf("CPU %ld messages: %ld, first: %p\n", i, count,
251 			sCPUMessages[i]);
252 	}
253 
254 	return 0;
255 }
256 
257 
258 int
259 dump_ici_message(int argc, char** argv)
260 {
261 	if (argc != 2) {
262 		print_debugger_command_usage(argv[0]);
263 		return 0;
264 	}
265 
266 	uint64 address;
267 	if (!evaluate_debug_expression(argv[1], &address, false))
268 		return 0;
269 
270 	smp_msg* message = (smp_msg*)(addr_t)address;
271 	kprintf("ICI message %p:\n", message);
272 	kprintf("  next:        %p\n", message->next);
273 	kprintf("  message:     %ld\n", message->message);
274 	kprintf("  data:        %ld\n", message->data);
275 	kprintf("  data2:       %ld\n", message->data2);
276 	kprintf("  data3:       %ld\n", message->data3);
277 	kprintf("  data_ptr:    %p\n", message->data_ptr);
278 	kprintf("  flags:       %lx\n", message->flags);
279 	kprintf("  ref_count:   %lx\n", message->ref_count);
280 	kprintf("  done:        %s\n", message->done ? "true" : "false");
281 	kprintf("  proc_bitmap: %lx\n", message->proc_bitmap);
282 
283 	return 0;
284 }
285 
286 
287 static inline void
288 process_all_pending_ici(int32 currentCPU)
289 {
290 	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
291 		;
292 }
293 
294 
295 void
296 acquire_spinlock(spinlock* lock)
297 {
298 #if DEBUG_SPINLOCKS
299 	if (are_interrupts_enabled()) {
300 		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
301 			"enabled", lock);
302 	}
303 #endif
304 
305 	if (sNumCPUs > 1) {
306 		int currentCPU = smp_get_current_cpu();
307 #if B_DEBUG_SPINLOCK_CONTENTION
308 		while (atomic_add(&lock->lock, 1) != 0)
309 			process_all_pending_ici(currentCPU);
310 #else
311 		while (1) {
312 			uint32 count = 0;
313 			while (*lock != 0) {
314 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
315 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
316 						"for a long time!", lock);
317 					count = 0;
318 				}
319 
320 				process_all_pending_ici(currentCPU);
321 				PAUSE();
322 			}
323 			if (atomic_or((int32*)lock, 1) == 0)
324 				break;
325 		}
326 
327 #	if DEBUG_SPINLOCKS
328 		push_lock_caller(arch_debug_get_caller(), lock);
329 #	endif
330 #endif
331 	} else {
332 #if DEBUG_SPINLOCKS
333 		int32 oldValue;
334 		oldValue = atomic_or((int32*)lock, 1);
335 		if (oldValue != 0) {
336 			panic("acquire_spinlock: attempt to acquire lock %p twice on "
337 				"non-SMP system (last caller: %p, value %ld)", lock,
338 				find_lock_caller(lock), oldValue);
339 		}
340 
341 		push_lock_caller(arch_debug_get_caller(), lock);
342 #endif
343 	}
344 #if DEBUG_SPINLOCK_LATENCIES
345 	push_latency(lock);
346 #endif
347 }
348 
349 
350 static void
351 acquire_spinlock_nocheck(spinlock *lock)
352 {
353 #if DEBUG_SPINLOCKS
354 	if (are_interrupts_enabled()) {
355 		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
356 			"interrupts enabled", lock);
357 	}
358 #endif
359 
360 	if (sNumCPUs > 1) {
361 #if B_DEBUG_SPINLOCK_CONTENTION
362 		while (atomic_add(&lock->lock, 1) != 0) {
363 		}
364 #else
365 		while (1) {
366 			uint32 count = 0;
367 			while (*lock != 0) {
368 				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
369 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
370 						"for a long time!", lock);
371 					count = 0;
372 				}
373 
374 				PAUSE();
375 			}
376 
377 			if (atomic_or((int32*)lock, 1) == 0)
378 				break;
379 		}
380 #endif
381 	} else {
382 #if DEBUG_SPINLOCKS
383 		if (atomic_or((int32*)lock, 1) != 0) {
384 			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
385 				"on non-SMP system\n", lock);
386 		}
387 #endif
388 	}
389 }
390 
391 
392 /*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
393 static void
394 acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
395 {
396 #if DEBUG_SPINLOCKS
397 	if (are_interrupts_enabled()) {
398 		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
399 			"interrupts enabled", lock);
400 	}
401 #endif
402 
403 	if (sNumCPUs > 1) {
404 #if B_DEBUG_SPINLOCK_CONTENTION
405 		while (atomic_add(&lock->lock, 1) != 0)
406 			process_all_pending_ici(currentCPU);
407 #else
408 		while (1) {
409 			uint32 count = 0;
410 			while (*lock != 0) {
411 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
412 					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
413 						"%p for a long time!", lock);
414 					count = 0;
415 				}
416 
417 				process_all_pending_ici(currentCPU);
418 				PAUSE();
419 			}
420 			if (atomic_or((int32*)lock, 1) == 0)
421 				break;
422 		}
423 
424 #	if DEBUG_SPINLOCKS
425 		push_lock_caller(arch_debug_get_caller(), lock);
426 #	endif
427 #endif
428 	} else {
429 #if DEBUG_SPINLOCKS
430 		int32 oldValue;
431 		oldValue = atomic_or((int32*)lock, 1);
432 		if (oldValue != 0) {
433 			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
434 				"non-SMP system (last caller: %p, value %ld)", lock,
435 				find_lock_caller(lock), oldValue);
436 		}
437 
438 		push_lock_caller(arch_debug_get_caller(), lock);
439 #endif
440 	}
441 }
442 
443 
444 void
445 release_spinlock(spinlock *lock)
446 {
447 #if DEBUG_SPINLOCK_LATENCIES
448 	test_latency(lock);
449 #endif
450 
451 	if (sNumCPUs > 1) {
452 		if (are_interrupts_enabled())
453 			panic("release_spinlock: attempt to release lock %p with "
454 				"interrupts enabled\n", lock);
455 #if B_DEBUG_SPINLOCK_CONTENTION
456 		{
457 			int32 count = atomic_and(&lock->lock, 0) - 1;
458 			if (count < 0) {
459 				panic("release_spinlock: lock %p was already released\n", lock);
460 			} else {
461 				// add to the total count -- deal with carry manually
462 				if ((uint32)atomic_add(&lock->count_low, count) + count
463 						< (uint32)count) {
464 					atomic_add(&lock->count_high, 1);
465 				}
466 			}
467 		}
468 #else
469 		if (atomic_and((int32*)lock, 0) != 1)
470 			panic("release_spinlock: lock %p was already released\n", lock);
471 #endif
472 	} else {
473 #if DEBUG_SPINLOCKS
474 		if (are_interrupts_enabled()) {
475 			panic("release_spinlock: attempt to release lock %p with "
476 				"interrupts enabled\n", lock);
477 		}
478 		if (atomic_and((int32*)lock, 0) != 1)
479 			panic("release_spinlock: lock %p was already released\n", lock);
480 #endif
481 #if DEBUG_SPINLOCK_LATENCIES
482 		test_latency(lock);
483 #endif
484 	}
485 }
486 
487 
488 /*!	Finds a free message and gets it.
489 	NOTE: has side effect of disabling interrupts
490 	return value is the former interrupt state
491 */
492 static cpu_status
493 find_free_message(struct smp_msg** msg)
494 {
495 	cpu_status state;
496 
497 	TRACE(("find_free_message: entry\n"));
498 
499 retry:
500 	while (sFreeMessageCount <= 0) {
501 		state = disable_interrupts();
502 		process_all_pending_ici(smp_get_current_cpu());
503 		restore_interrupts(state);
504 		PAUSE();
505 	}
506 	state = disable_interrupts();
507 	acquire_spinlock(&sFreeMessageSpinlock);
508 
509 	if (sFreeMessageCount <= 0) {
510 		// someone grabbed one while we were getting the lock,
511 		// go back to waiting for it
512 		release_spinlock(&sFreeMessageSpinlock);
513 		restore_interrupts(state);
514 		goto retry;
515 	}
516 
517 	*msg = sFreeMessages;
518 	sFreeMessages = (*msg)->next;
519 	sFreeMessageCount--;
520 
521 	release_spinlock(&sFreeMessageSpinlock);
522 
523 	TRACE(("find_free_message: returning msg %p\n", *msg));
524 
525 	return state;
526 }
527 
528 
529 /*!	Similar to find_free_message(), but expects the interrupts to be disabled
530 	already.
531 */
532 static void
533 find_free_message_interrupts_disabled(int32 currentCPU,
534 	struct smp_msg** _message)
535 {
536 	TRACE(("find_free_message_interrupts_disabled: entry\n"));
537 
538 	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
539 	while (sFreeMessageCount <= 0) {
540 		release_spinlock(&sFreeMessageSpinlock);
541 		process_all_pending_ici(currentCPU);
542 		PAUSE();
543 		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
544 	}
545 
546 	*_message = sFreeMessages;
547 	sFreeMessages = (*_message)->next;
548 	sFreeMessageCount--;
549 
550 	release_spinlock(&sFreeMessageSpinlock);
551 
552 	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
553 		*_message));
554 }
555 
556 
557 static void
558 return_free_message(struct smp_msg* msg)
559 {
560 	TRACE(("return_free_message: returning msg %p\n", msg));
561 
562 	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
563 	msg->next = sFreeMessages;
564 	sFreeMessages = msg;
565 	sFreeMessageCount++;
566 	release_spinlock(&sFreeMessageSpinlock);
567 }
568 
569 
570 static struct smp_msg*
571 check_for_message(int currentCPU, mailbox_source& sourceMailbox)
572 {
573 	if (!sICIEnabled)
574 		return NULL;
575 
576 	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
577 
578 	struct smp_msg* msg = sCPUMessages[currentCPU];
579 	if (msg != NULL) {
580 		sCPUMessages[currentCPU] = msg->next;
581 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
582 		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
583 		sourceMailbox = MAILBOX_LOCAL;
584 	} else {
585 		// try getting one from the broadcast mailbox
586 
587 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
588 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
589 
590 		msg = sBroadcastMessages;
591 		while (msg != NULL) {
592 			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
593 				// we have handled this one already
594 				msg = msg->next;
595 				continue;
596 			}
597 
598 			// mark it so we wont try to process this one again
599 			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
600 			sourceMailbox = MAILBOX_BCAST;
601 			break;
602 		}
603 		release_spinlock(&sBroadcastMessageSpinlock);
604 
605 		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU,
606 			msg));
607 	}
608 	return msg;
609 }
610 
611 
612 static void
613 finish_message_processing(int currentCPU, struct smp_msg* msg,
614 	mailbox_source sourceMailbox)
615 {
616 	if (atomic_add(&msg->ref_count, -1) != 1)
617 		return;
618 
619 	// we were the last one to decrement the ref_count
620 	// it's our job to remove it from the list & possibly clean it up
621 	struct smp_msg** mbox;
622 	spinlock* spinlock;
623 
624 	// clean up the message from one of the mailboxes
625 	if (sourceMailbox == MAILBOX_BCAST) {
626 		mbox = &sBroadcastMessages;
627 		spinlock = &sBroadcastMessageSpinlock;
628 	} else {
629 		mbox = &sCPUMessages[currentCPU];
630 		spinlock = &sCPUMessageSpinlock[currentCPU];
631 	}
632 
633 	acquire_spinlock_nocheck(spinlock);
634 
635 	TRACE(("cleaning up message %p\n", msg));
636 
637 	if (sourceMailbox != MAILBOX_BCAST) {
638 		// local mailbox -- the message has already been removed in
639 		// check_for_message()
640 	} else if (msg == *mbox) {
641 		*mbox = msg->next;
642 	} else {
643 		// we need to walk to find the message in the list.
644 		// we can't use any data found when previously walking through
645 		// the list, since the list may have changed. But, we are guaranteed
646 		// to at least have msg in it.
647 		struct smp_msg* last = NULL;
648 		struct smp_msg* msg1;
649 
650 		msg1 = *mbox;
651 		while (msg1 != NULL && msg1 != msg) {
652 			last = msg1;
653 			msg1 = msg1->next;
654 		}
655 
656 		// by definition, last must be something
657 		if (msg1 == msg && last != NULL)
658 			last->next = msg->next;
659 		else
660 			panic("last == NULL or msg != msg1");
661 	}
662 
663 	release_spinlock(spinlock);
664 
665 	if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
666 		free(msg->data_ptr);
667 
668 	if ((msg->flags & SMP_MSG_FLAG_SYNC) != 0) {
669 		msg->done = true;
670 		// the caller cpu should now free the message
671 	} else {
672 		// in the !SYNC case, we get to free the message
673 		return_free_message(msg);
674 	}
675 }
676 
677 
678 static status_t
679 process_pending_ici(int32 currentCPU)
680 {
681 	mailbox_source sourceMailbox;
682 	struct smp_msg* msg = check_for_message(currentCPU, sourceMailbox);
683 	if (msg == NULL)
684 		return B_ENTRY_NOT_FOUND;
685 
686 	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
687 
688 	bool haltCPU = false;
689 
690 	switch (msg->message) {
691 		case SMP_MSG_INVALIDATE_PAGE_RANGE:
692 			arch_cpu_invalidate_TLB_range((addr_t)msg->data,
693 				(addr_t)msg->data2);
694 			break;
695 		case SMP_MSG_INVALIDATE_PAGE_LIST:
696 			arch_cpu_invalidate_TLB_list((addr_t*)msg->data, (int)msg->data2);
697 			break;
698 		case SMP_MSG_USER_INVALIDATE_PAGES:
699 			arch_cpu_user_TLB_invalidate();
700 			break;
701 		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
702 			arch_cpu_global_TLB_invalidate();
703 			break;
704 		case SMP_MSG_CPU_HALT:
705 			haltCPU = true;
706 			break;
707 		case SMP_MSG_CALL_FUNCTION:
708 		{
709 			smp_call_func func = (smp_call_func)msg->data_ptr;
710 			func(msg->data, currentCPU, msg->data2, msg->data3);
711 			break;
712 		}
713 		case SMP_MSG_RESCHEDULE:
714 		{
715 			cpu_ent* cpu = thread_get_current_thread()->cpu;
716 			cpu->invoke_scheduler = true;
717 			cpu->invoke_scheduler_if_idle = false;
718 			break;
719 		}
720 		case SMP_MSG_RESCHEDULE_IF_IDLE:
721 		{
722 			cpu_ent* cpu = thread_get_current_thread()->cpu;
723 			if (!cpu->invoke_scheduler) {
724 				cpu->invoke_scheduler = true;
725 				cpu->invoke_scheduler_if_idle = true;
726 			}
727 			break;
728 		}
729 
730 		default:
731 			dprintf("smp_intercpu_int_handler: got unknown message %ld\n",
732 				msg->message);
733 			break;
734 	}
735 
736 	// finish dealing with this message, possibly removing it from the list
737 	finish_message_processing(currentCPU, msg, sourceMailbox);
738 
739 	// special case for the halt message
740 	if (haltCPU)
741 		debug_trap_cpu_in_kdl(currentCPU, false);
742 
743 	return B_OK;
744 }
745 
746 
747 #if B_DEBUG_SPINLOCK_CONTENTION
748 
749 
750 static uint64
751 get_spinlock_counter(spinlock* lock)
752 {
753 	uint32 high;
754 	uint32 low;
755 	do {
756 		high = (uint32)atomic_get(&lock->count_high);
757 		low = (uint32)atomic_get(&lock->count_low);
758 	} while (high != atomic_get(&lock->count_high));
759 
760 	return ((uint64)high << 32) | low;
761 }
762 
763 
764 static status_t
765 spinlock_contention_syscall(const char* subsystem, uint32 function,
766 	void* buffer, size_t bufferSize)
767 {
768 	spinlock_contention_info info;
769 
770 	if (function != GET_SPINLOCK_CONTENTION_INFO)
771 		return B_BAD_VALUE;
772 
773 	if (bufferSize < sizeof(spinlock_contention_info))
774 		return B_BAD_VALUE;
775 
776 	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
777 	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
778 
779 	if (!IS_USER_ADDRESS(buffer)
780 		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
781 		return B_BAD_ADDRESS;
782 	}
783 
784 	return B_OK;
785 }
786 
787 
788 #endif	// B_DEBUG_SPINLOCK_CONTENTION
789 
790 
791 static void
792 process_early_cpu_call(int32 cpu)
793 {
794 	sEarlyCPUCallFunction(sEarlyCPUCallCookie, cpu);
795 	atomic_and(&sEarlyCPUCall, ~(uint32)(1 << cpu));
796 }
797 
798 
799 static void
800 call_all_cpus_early(void (*function)(void*, int), void* cookie)
801 {
802 	if (sNumCPUs > 1) {
803 		sEarlyCPUCallFunction = function;
804 		sEarlyCPUCallCookie = cookie;
805 
806 		uint32 cpuMask = (1 << sNumCPUs) - 2;
807 			// all CPUs but the boot cpu
808 
809 		sEarlyCPUCall = cpuMask;
810 
811 		// wait for all CPUs to finish
812 		while ((sEarlyCPUCall & cpuMask) != 0)
813 			PAUSE();
814 	}
815 
816 	function(cookie, 0);
817 }
818 
819 
820 //	#pragma mark -
821 
822 
823 int
824 smp_intercpu_int_handler(int32 cpu)
825 {
826 	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
827 
828 	process_all_pending_ici(cpu);
829 
830 	TRACE(("smp_intercpu_int_handler: done\n"));
831 
832 	return B_HANDLED_INTERRUPT;
833 }
834 
835 
836 void
837 smp_send_ici(int32 targetCPU, int32 message, uint32 data, uint32 data2,
838 	uint32 data3, void* dataPointer, uint32 flags)
839 {
840 	struct smp_msg *msg;
841 
842 	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, "
843 		"data3 0x%lx, ptr %p, flags 0x%lx\n", targetCPU, message, data, data2,
844 		data3, dataPointer, flags));
845 
846 	if (sICIEnabled) {
847 		int state;
848 		int currentCPU;
849 
850 		// find_free_message leaves interrupts disabled
851 		state = find_free_message(&msg);
852 
853 		currentCPU = smp_get_current_cpu();
854 		if (targetCPU == currentCPU) {
855 			return_free_message(msg);
856 			restore_interrupts(state);
857 			return; // nope, cant do that
858 		}
859 
860 		// set up the message
861 		msg->message = message;
862 		msg->data = data;
863 		msg->data2 = data2;
864 		msg->data3 = data3;
865 		msg->data_ptr = dataPointer;
866 		msg->ref_count = 1;
867 		msg->flags = flags;
868 		msg->done = false;
869 
870 		// stick it in the appropriate cpu's mailbox
871 		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
872 		msg->next = sCPUMessages[targetCPU];
873 		sCPUMessages[targetCPU] = msg;
874 		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
875 
876 		arch_smp_send_ici(targetCPU);
877 
878 		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
879 			// wait for the other cpu to finish processing it
880 			// the interrupt handler will ref count it to <0
881 			// if the message is sync after it has removed it from the mailbox
882 			while (msg->done == false) {
883 				process_all_pending_ici(currentCPU);
884 				PAUSE();
885 			}
886 			// for SYNC messages, it's our responsibility to put it
887 			// back into the free list
888 			return_free_message(msg);
889 		}
890 
891 		restore_interrupts(state);
892 	}
893 }
894 
895 
896 void
897 smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, uint32 data,
898 	uint32 data2, uint32 data3, void *dataPointer, uint32 flags)
899 {
900 	if (!sICIEnabled)
901 		return;
902 
903 	int currentCPU = smp_get_current_cpu();
904 	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
905 		& (((cpu_mask_t)1 << sNumCPUs) - 1);
906 	if (cpuMask == 0) {
907 		panic("smp_send_multicast_ici(): 0 CPU mask");
908 		return;
909 	}
910 
911 	// count target CPUs
912 	int32 targetCPUs = 0;
913 	for (int32 i = 0; i < sNumCPUs; i++) {
914 		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
915 			targetCPUs++;
916 	}
917 
918 	// find_free_message leaves interrupts disabled
919 	struct smp_msg *msg;
920 	int state = find_free_message(&msg);
921 
922 	msg->message = message;
923 	msg->data = data;
924 	msg->data2 = data2;
925 	msg->data3 = data3;
926 	msg->data_ptr = dataPointer;
927 	msg->ref_count = targetCPUs;
928 	msg->flags = flags;
929 	msg->proc_bitmap = ~cpuMask;
930 	msg->done = false;
931 
932 	// stick it in the broadcast mailbox
933 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
934 	msg->next = sBroadcastMessages;
935 	sBroadcastMessages = msg;
936 	release_spinlock(&sBroadcastMessageSpinlock);
937 
938 	arch_smp_send_broadcast_ici();
939 		// TODO: Introduce a call that only bothers the target CPUs!
940 
941 	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
942 		// wait for the other cpus to finish processing it
943 		// the interrupt handler will ref count it to <0
944 		// if the message is sync after it has removed it from the mailbox
945 		while (msg->done == false) {
946 			process_all_pending_ici(currentCPU);
947 			PAUSE();
948 		}
949 
950 		// for SYNC messages, it's our responsibility to put it
951 		// back into the free list
952 		return_free_message(msg);
953 	}
954 
955 	restore_interrupts(state);
956 }
957 
958 
959 void
960 smp_send_broadcast_ici(int32 message, uint32 data, uint32 data2, uint32 data3,
961 	void *dataPointer, uint32 flags)
962 {
963 	struct smp_msg *msg;
964 
965 	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 "
966 		"0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n", smp_get_current_cpu(),
967 		message, data, data2, data3, dataPointer, flags));
968 
969 	if (sICIEnabled) {
970 		int state;
971 		int currentCPU;
972 
973 		// find_free_message leaves interrupts disabled
974 		state = find_free_message(&msg);
975 
976 		currentCPU = smp_get_current_cpu();
977 
978 		msg->message = message;
979 		msg->data = data;
980 		msg->data2 = data2;
981 		msg->data3 = data3;
982 		msg->data_ptr = dataPointer;
983 		msg->ref_count = sNumCPUs - 1;
984 		msg->flags = flags;
985 		msg->proc_bitmap = SET_BIT(0, currentCPU);
986 		msg->done = false;
987 
988 		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast "
989 			"mbox\n", currentCPU, msg));
990 
991 		// stick it in the appropriate cpu's mailbox
992 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
993 		msg->next = sBroadcastMessages;
994 		sBroadcastMessages = msg;
995 		release_spinlock(&sBroadcastMessageSpinlock);
996 
997 		arch_smp_send_broadcast_ici();
998 
999 		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
1000 
1001 		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1002 			// wait for the other cpus to finish processing it
1003 			// the interrupt handler will ref count it to <0
1004 			// if the message is sync after it has removed it from the mailbox
1005 			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
1006 
1007 			while (msg->done == false) {
1008 				process_all_pending_ici(currentCPU);
1009 				PAUSE();
1010 			}
1011 
1012 			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
1013 
1014 			// for SYNC messages, it's our responsibility to put it
1015 			// back into the free list
1016 			return_free_message(msg);
1017 		}
1018 
1019 		restore_interrupts(state);
1020 	}
1021 
1022 	TRACE(("smp_send_broadcast_ici: done\n"));
1023 }
1024 
1025 
1026 void
1027 smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
1028 	uint32 data, uint32 data2, uint32 data3, void *dataPointer, uint32 flags)
1029 {
1030 	if (!sICIEnabled)
1031 		return;
1032 
1033 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
1034 		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
1035 		currentCPU, message, data, data2, data3, dataPointer, flags));
1036 
1037 	struct smp_msg *msg;
1038 	find_free_message_interrupts_disabled(currentCPU, &msg);
1039 
1040 	msg->message = message;
1041 	msg->data = data;
1042 	msg->data2 = data2;
1043 	msg->data3 = data3;
1044 	msg->data_ptr = dataPointer;
1045 	msg->ref_count = sNumCPUs - 1;
1046 	msg->flags = flags;
1047 	msg->proc_bitmap = SET_BIT(0, currentCPU);
1048 	msg->done = false;
1049 
1050 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1051 		"into broadcast mbox\n", currentCPU, msg));
1052 
1053 	// stick it in the appropriate cpu's mailbox
1054 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1055 	msg->next = sBroadcastMessages;
1056 	sBroadcastMessages = msg;
1057 	release_spinlock(&sBroadcastMessageSpinlock);
1058 
1059 	arch_smp_send_broadcast_ici();
1060 
1061 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1062 		currentCPU));
1063 
1064 	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1065 		// wait for the other cpus to finish processing it
1066 		// the interrupt handler will ref count it to <0
1067 		// if the message is sync after it has removed it from the mailbox
1068 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1069 			"ack\n", currentCPU));
1070 
1071 		while (msg->done == false) {
1072 			process_all_pending_ici(currentCPU);
1073 			PAUSE();
1074 		}
1075 
1076 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1077 			"message to free list\n", currentCPU));
1078 
1079 		// for SYNC messages, it's our responsibility to put it
1080 		// back into the free list
1081 		return_free_message(msg);
1082 	}
1083 
1084 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1085 }
1086 
1087 
1088 /*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1089 
1090 	\param cpu The index of the calling CPU.
1091 	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1092 		does not return before all other CPUs have started waiting.
1093 	\return \c true on the boot CPU, \c false otherwise.
1094 */
1095 bool
1096 smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1097 {
1098 	if (cpu == 0) {
1099 		smp_cpu_rendezvous(rendezVous, cpu);
1100 		return true;
1101 	}
1102 
1103 	smp_cpu_rendezvous(rendezVous, cpu);
1104 
1105 	while (sBootCPUSpin == 0) {
1106 		if ((sEarlyCPUCall & (1 << cpu)) != 0)
1107 			process_early_cpu_call(cpu);
1108 
1109 		PAUSE();
1110 	}
1111 
1112 	return false;
1113 }
1114 
1115 
1116 void
1117 smp_wake_up_non_boot_cpus()
1118 {
1119 	// ICIs were previously being ignored
1120 	if (sNumCPUs > 1)
1121 		sICIEnabled = true;
1122 
1123 	// resume non boot CPUs
1124 	sBootCPUSpin = 1;
1125 }
1126 
1127 
1128 /*!	Spin until all CPUs have reached the rendez-vous point.
1129 
1130 	The rendez-vous variable \c *var must have been initialized to 0 before the
1131 	function is called. The variable will be non-null when the function returns.
1132 
1133 	Note that when the function returns on one CPU, it only means that all CPU
1134 	have already entered the function. It does not mean that the variable can
1135 	already be reset. Only when all CPUs have returned (which would have to be
1136 	ensured via another rendez-vous) the variable can be reset.
1137 */
1138 void
1139 smp_cpu_rendezvous(volatile uint32* var, int current_cpu)
1140 {
1141 	atomic_or((vint32*)var, 1 << current_cpu);
1142 
1143 	while (*var != (((uint32)1 << sNumCPUs) - 1))
1144 		PAUSE();
1145 }
1146 
1147 
1148 status_t
1149 smp_init(kernel_args* args)
1150 {
1151 	TRACE(("smp_init: entry\n"));
1152 
1153 #if DEBUG_SPINLOCK_LATENCIES
1154 	sEnableLatencyCheck
1155 		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1156 #endif
1157 
1158 #if DEBUG_SPINLOCKS
1159 	add_debugger_command_etc("spinlock", &dump_spinlock,
1160 		"Dump info on a spinlock",
1161 		"\n"
1162 		"Dumps info on a spinlock.\n", 0);
1163 #endif
1164 	add_debugger_command_etc("ici", &dump_ici_messages,
1165 		"Dump info on pending ICI messages",
1166 		"\n"
1167 		"Dumps info on pending ICI messages.\n", 0);
1168 	add_debugger_command_etc("ici_message", &dump_ici_message,
1169 		"Dump info on an ICI message",
1170 		"\n"
1171 		"Dumps info on an ICI message.\n", 0);
1172 
1173 	if (args->num_cpus > 1) {
1174 		sFreeMessages = NULL;
1175 		sFreeMessageCount = 0;
1176 		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1177 			struct smp_msg* msg
1178 				= (struct smp_msg*)malloc(sizeof(struct smp_msg));
1179 			if (msg == NULL) {
1180 				panic("error creating smp mailboxes\n");
1181 				return B_ERROR;
1182 			}
1183 			memset(msg, 0, sizeof(struct smp_msg));
1184 			msg->next = sFreeMessages;
1185 			sFreeMessages = msg;
1186 			sFreeMessageCount++;
1187 		}
1188 		sNumCPUs = args->num_cpus;
1189 	}
1190 	TRACE(("smp_init: calling arch_smp_init\n"));
1191 
1192 	return arch_smp_init(args);
1193 }
1194 
1195 
1196 status_t
1197 smp_per_cpu_init(kernel_args* args, int32 cpu)
1198 {
1199 	return arch_smp_per_cpu_init(args, cpu);
1200 }
1201 
1202 
1203 status_t
1204 smp_init_post_generic_syscalls(void)
1205 {
1206 #if B_DEBUG_SPINLOCK_CONTENTION
1207 	return register_generic_syscall(SPINLOCK_CONTENTION,
1208 		&spinlock_contention_syscall, 0, 0);
1209 #else
1210 	return B_OK;
1211 #endif
1212 }
1213 
1214 
1215 void
1216 smp_set_num_cpus(int32 numCPUs)
1217 {
1218 	sNumCPUs = numCPUs;
1219 }
1220 
1221 
1222 int32
1223 smp_get_num_cpus()
1224 {
1225 	return sNumCPUs;
1226 }
1227 
1228 
1229 int32
1230 smp_get_current_cpu(void)
1231 {
1232 	return thread_get_current_thread()->cpu->cpu_num;
1233 }
1234 
1235 
1236 // #pragma mark - public exported functions
1237 
1238 
1239 void
1240 call_all_cpus(void (*func)(void*, int), void* cookie)
1241 {
1242 	// if inter-CPU communication is not yet enabled, use the early mechanism
1243 	if (!sICIEnabled) {
1244 		call_all_cpus_early(func, cookie);
1245 		return;
1246 	}
1247 
1248 	cpu_status state = disable_interrupts();
1249 
1250 	if (smp_get_num_cpus() > 1) {
1251 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1252 			0, 0, (void*)func, SMP_MSG_FLAG_ASYNC);
1253 	}
1254 
1255 	// we need to call this function ourselves as well
1256 	func(cookie, smp_get_current_cpu());
1257 
1258 	restore_interrupts(state);
1259 }
1260 
1261 
1262 void
1263 call_all_cpus_sync(void (*func)(void*, int), void* cookie)
1264 {
1265 	// if inter-CPU communication is not yet enabled, use the early mechanism
1266 	if (!sICIEnabled) {
1267 		call_all_cpus_early(func, cookie);
1268 		return;
1269 	}
1270 
1271 	cpu_status state = disable_interrupts();
1272 
1273 	if (smp_get_num_cpus() > 1) {
1274 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1275 			0, 0, (void*)func, SMP_MSG_FLAG_SYNC);
1276 	}
1277 
1278 	// we need to call this function ourselves as well
1279 	func(cookie, smp_get_current_cpu());
1280 
1281 	restore_interrupts(state);
1282 }
1283 
1284 
1285 void
1286 memory_read_barrier(void)
1287 {
1288 	arch_cpu_memory_read_barrier();
1289 }
1290 
1291 
1292 void
1293 memory_write_barrier(void)
1294 {
1295 	arch_cpu_memory_write_barrier();
1296 }
1297