xref: /haiku/src/system/kernel/smp.cpp (revision 0d452c8f34013b611a54c746a71c05e28796eae2)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Functionality for symetrical multi-processors */
12 
13 
14 #include <smp.h>
15 
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/debug.h>
21 #include <arch/int.h>
22 #include <arch/smp.h>
23 #include <boot/kernel_args.h>
24 #include <cpu.h>
25 #include <generic_syscall.h>
26 #include <int.h>
27 #include <spinlock_contention.h>
28 #include <thread.h>
29 #if DEBUG_SPINLOCK_LATENCIES
30 #	include <safemode.h>
31 #endif
32 
33 #include "kernel_debug_config.h"
34 
35 
36 //#define TRACE_SMP
37 #ifdef TRACE_SMP
38 #	define TRACE(x) dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 
44 #undef try_acquire_spinlock
45 #undef acquire_spinlock
46 #undef release_spinlock
47 
48 
49 #define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
50 
51 // These macros define the number of unsuccessful iterations in
52 // acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
53 // panic(), assuming a deadlock.
54 #define SPINLOCK_DEADLOCK_COUNT				100000000
55 #define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
56 
57 
58 struct smp_msg {
59 	struct smp_msg	*next;
60 	int32			message;
61 	uint32			data;
62 	uint32			data2;
63 	uint32			data3;
64 	void			*data_ptr;
65 	uint32			flags;
66 	int32			ref_count;
67 	volatile bool	done;
68 	uint32			proc_bitmap;
69 };
70 
71 enum mailbox_source {
72 	MAILBOX_LOCAL,
73 	MAILBOX_BCAST,
74 };
75 
76 static vint32 sBootCPUSpin = 0;
77 
78 static vint32 sEarlyCPUCall = 0;
79 static void (*sEarlyCPUCallFunction)(void*, int);
80 void* sEarlyCPUCallCookie;
81 
82 static struct smp_msg* sFreeMessages = NULL;
83 static volatile int sFreeMessageCount = 0;
84 static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
85 
86 static struct smp_msg* sCPUMessages[SMP_MAX_CPUS] = { NULL, };
87 static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
88 
89 static struct smp_msg* sBroadcastMessages = NULL;
90 static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
91 
92 static bool sICIEnabled = false;
93 static int32 sNumCPUs = 1;
94 
95 static int32 process_pending_ici(int32 currentCPU);
96 
97 
98 #if DEBUG_SPINLOCKS
99 #define NUM_LAST_CALLERS	32
100 
101 static struct {
102 	void		*caller;
103 	spinlock	*lock;
104 } sLastCaller[NUM_LAST_CALLERS];
105 
106 static vint32 sLastIndex = 0;
107 	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
108 	// as index into sLastCaller. Note, that it has to be casted to uint32
109 	// before applying the modulo operation, since otherwise after overflowing
110 	// that would yield negative indices.
111 
112 
113 static void
114 push_lock_caller(void* caller, spinlock* lock)
115 {
116 	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
117 
118 	sLastCaller[index].caller = caller;
119 	sLastCaller[index].lock = lock;
120 }
121 
122 
123 static void*
124 find_lock_caller(spinlock* lock)
125 {
126 	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
127 
128 	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
129 		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
130 		if (sLastCaller[index].lock == lock)
131 			return sLastCaller[index].caller;
132 	}
133 
134 	return NULL;
135 }
136 
137 
138 int
139 dump_spinlock(int argc, char** argv)
140 {
141 	if (argc != 2) {
142 		print_debugger_command_usage(argv[0]);
143 		return 0;
144 	}
145 
146 	uint64 address;
147 	if (!evaluate_debug_expression(argv[1], &address, false))
148 		return 0;
149 
150 	spinlock* lock = (spinlock*)(addr_t)address;
151 	kprintf("spinlock %p:\n", lock);
152 	bool locked = B_SPINLOCK_IS_LOCKED(lock);
153 	if (locked) {
154 		kprintf("  locked from %p\n", find_lock_caller(lock));
155 	} else
156 		kprintf("  not locked\n");
157 
158 	return 0;
159 }
160 
161 
162 #endif	// DEBUG_SPINLOCKS
163 
164 
165 #if DEBUG_SPINLOCK_LATENCIES
166 
167 
168 #define NUM_LATENCY_LOCKS	4
169 #define DEBUG_LATENCY		200
170 
171 
172 static struct {
173 	spinlock	*lock;
174 	bigtime_t	timestamp;
175 } sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
176 
177 static int32 sLatencyIndex[B_MAX_CPU_COUNT];
178 static bool sEnableLatencyCheck;
179 
180 
181 static void
182 push_latency(spinlock* lock)
183 {
184 	if (!sEnableLatencyCheck)
185 		return;
186 
187 	int32 cpu = smp_get_current_cpu();
188 	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
189 
190 	sLatency[cpu][index].lock = lock;
191 	sLatency[cpu][index].timestamp = system_time();
192 }
193 
194 
195 static void
196 test_latency(spinlock* lock)
197 {
198 	if (!sEnableLatencyCheck)
199 		return;
200 
201 	int32 cpu = smp_get_current_cpu();
202 
203 	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
204 		if (sLatency[cpu][i].lock == lock) {
205 			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
206 			if (diff > DEBUG_LATENCY && diff < 500000) {
207 				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
208 					lock, diff, DEBUG_LATENCY);
209 			}
210 
211 			sLatency[cpu][i].lock = NULL;
212 		}
213 	}
214 }
215 
216 
217 #endif	// DEBUG_SPINLOCK_LATENCIES
218 
219 
220 int
221 dump_ici_messages(int argc, char** argv)
222 {
223 	// count broadcast messages
224 	int32 count = 0;
225 	int32 doneCount = 0;
226 	int32 unreferencedCount = 0;
227 	smp_msg* message = sBroadcastMessages;
228 	while (message != NULL) {
229 		count++;
230 		if (message->done)
231 			doneCount++;
232 		if (message->ref_count <= 0)
233 			unreferencedCount++;
234 		message = message->next;
235 	}
236 
237 	kprintf("ICI broadcast messages: %ld, first: %p\n", count,
238 		sBroadcastMessages);
239 	kprintf("  done:         %ld\n", doneCount);
240 	kprintf("  unreferenced: %ld\n", unreferencedCount);
241 
242 	// count per-CPU messages
243 	for (int32 i = 0; i < sNumCPUs; i++) {
244 		count = 0;
245 		message = sCPUMessages[i];
246 		while (message != NULL) {
247 			count++;
248 			message = message->next;
249 		}
250 
251 		kprintf("CPU %ld messages: %ld, first: %p\n", i, count,
252 			sCPUMessages[i]);
253 	}
254 
255 	return 0;
256 }
257 
258 
259 int
260 dump_ici_message(int argc, char** argv)
261 {
262 	if (argc != 2) {
263 		print_debugger_command_usage(argv[0]);
264 		return 0;
265 	}
266 
267 	uint64 address;
268 	if (!evaluate_debug_expression(argv[1], &address, false))
269 		return 0;
270 
271 	smp_msg* message = (smp_msg*)(addr_t)address;
272 	kprintf("ICI message %p:\n", message);
273 	kprintf("  next:        %p\n", message->next);
274 	kprintf("  message:     %ld\n", message->message);
275 	kprintf("  data:        %ld\n", message->data);
276 	kprintf("  data2:       %ld\n", message->data2);
277 	kprintf("  data3:       %ld\n", message->data3);
278 	kprintf("  data_ptr:    %p\n", message->data_ptr);
279 	kprintf("  flags:       %lx\n", message->flags);
280 	kprintf("  ref_count:   %lx\n", message->ref_count);
281 	kprintf("  done:        %s\n", message->done ? "true" : "false");
282 	kprintf("  proc_bitmap: %lx\n", message->proc_bitmap);
283 
284 	return 0;
285 }
286 
287 
288 static inline void
289 process_all_pending_ici(int32 currentCPU)
290 {
291 	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
292 		;
293 }
294 
295 
296 bool
297 try_acquire_spinlock(spinlock* lock)
298 {
299 #if DEBUG_SPINLOCKS
300 	if (are_interrupts_enabled()) {
301 		panic("try_acquire_spinlock: attempt to acquire lock %p with "
302 			"interrupts enabled", lock);
303 	}
304 #endif
305 
306 #if B_DEBUG_SPINLOCK_CONTENTION
307 	if (atomic_add(&lock->lock, 1) != 0)
308 		return false;
309 #else
310 	if (atomic_or((int32*)lock, 1) != 0)
311 		return false;
312 
313 #	if DEBUG_SPINLOCKS
314 	push_lock_caller(arch_debug_get_caller(), lock);
315 #	endif
316 #endif
317 
318 	return true;
319 }
320 
321 
322 void
323 acquire_spinlock(spinlock* lock)
324 {
325 #if DEBUG_SPINLOCKS
326 	if (are_interrupts_enabled()) {
327 		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
328 			"enabled", lock);
329 	}
330 #endif
331 
332 	if (sNumCPUs > 1) {
333 		int currentCPU = smp_get_current_cpu();
334 #if B_DEBUG_SPINLOCK_CONTENTION
335 		while (atomic_add(&lock->lock, 1) != 0)
336 			process_all_pending_ici(currentCPU);
337 #else
338 		while (1) {
339 			uint32 count = 0;
340 			while (*lock != 0) {
341 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
342 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
343 						"for a long time!", lock);
344 					count = 0;
345 				}
346 
347 				process_all_pending_ici(currentCPU);
348 				PAUSE();
349 			}
350 			if (atomic_or((int32*)lock, 1) == 0)
351 				break;
352 		}
353 
354 #	if DEBUG_SPINLOCKS
355 		push_lock_caller(arch_debug_get_caller(), lock);
356 #	endif
357 #endif
358 	} else {
359 #if DEBUG_SPINLOCKS
360 		int32 oldValue;
361 		oldValue = atomic_or((int32*)lock, 1);
362 		if (oldValue != 0) {
363 			panic("acquire_spinlock: attempt to acquire lock %p twice on "
364 				"non-SMP system (last caller: %p, value %ld)", lock,
365 				find_lock_caller(lock), oldValue);
366 		}
367 
368 		push_lock_caller(arch_debug_get_caller(), lock);
369 #endif
370 	}
371 #if DEBUG_SPINLOCK_LATENCIES
372 	push_latency(lock);
373 #endif
374 }
375 
376 
377 static void
378 acquire_spinlock_nocheck(spinlock *lock)
379 {
380 #if DEBUG_SPINLOCKS
381 	if (are_interrupts_enabled()) {
382 		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
383 			"interrupts enabled", lock);
384 	}
385 #endif
386 
387 	if (sNumCPUs > 1) {
388 #if B_DEBUG_SPINLOCK_CONTENTION
389 		while (atomic_add(&lock->lock, 1) != 0) {
390 		}
391 #else
392 		while (1) {
393 			uint32 count = 0;
394 			while (*lock != 0) {
395 				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
396 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
397 						"for a long time!", lock);
398 					count = 0;
399 				}
400 
401 				PAUSE();
402 			}
403 
404 			if (atomic_or((int32*)lock, 1) == 0)
405 				break;
406 		}
407 #endif
408 	} else {
409 #if DEBUG_SPINLOCKS
410 		if (atomic_or((int32*)lock, 1) != 0) {
411 			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
412 				"on non-SMP system\n", lock);
413 		}
414 #endif
415 	}
416 }
417 
418 
419 /*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
420 static void
421 acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
422 {
423 #if DEBUG_SPINLOCKS
424 	if (are_interrupts_enabled()) {
425 		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
426 			"interrupts enabled", lock);
427 	}
428 #endif
429 
430 	if (sNumCPUs > 1) {
431 #if B_DEBUG_SPINLOCK_CONTENTION
432 		while (atomic_add(&lock->lock, 1) != 0)
433 			process_all_pending_ici(currentCPU);
434 #else
435 		while (1) {
436 			uint32 count = 0;
437 			while (*lock != 0) {
438 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
439 					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
440 						"%p for a long time!", lock);
441 					count = 0;
442 				}
443 
444 				process_all_pending_ici(currentCPU);
445 				PAUSE();
446 			}
447 			if (atomic_or((int32*)lock, 1) == 0)
448 				break;
449 		}
450 
451 #	if DEBUG_SPINLOCKS
452 		push_lock_caller(arch_debug_get_caller(), lock);
453 #	endif
454 #endif
455 	} else {
456 #if DEBUG_SPINLOCKS
457 		int32 oldValue;
458 		oldValue = atomic_or((int32*)lock, 1);
459 		if (oldValue != 0) {
460 			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
461 				"non-SMP system (last caller: %p, value %ld)", lock,
462 				find_lock_caller(lock), oldValue);
463 		}
464 
465 		push_lock_caller(arch_debug_get_caller(), lock);
466 #endif
467 	}
468 }
469 
470 
471 void
472 release_spinlock(spinlock *lock)
473 {
474 #if DEBUG_SPINLOCK_LATENCIES
475 	test_latency(lock);
476 #endif
477 
478 	if (sNumCPUs > 1) {
479 		if (are_interrupts_enabled())
480 			panic("release_spinlock: attempt to release lock %p with "
481 				"interrupts enabled\n", lock);
482 #if B_DEBUG_SPINLOCK_CONTENTION
483 		{
484 			int32 count = atomic_and(&lock->lock, 0) - 1;
485 			if (count < 0) {
486 				panic("release_spinlock: lock %p was already released\n", lock);
487 			} else {
488 				// add to the total count -- deal with carry manually
489 				if ((uint32)atomic_add(&lock->count_low, count) + count
490 						< (uint32)count) {
491 					atomic_add(&lock->count_high, 1);
492 				}
493 			}
494 		}
495 #else
496 		if (atomic_and((int32*)lock, 0) != 1)
497 			panic("release_spinlock: lock %p was already released\n", lock);
498 #endif
499 	} else {
500 #if DEBUG_SPINLOCKS
501 		if (are_interrupts_enabled()) {
502 			panic("release_spinlock: attempt to release lock %p with "
503 				"interrupts enabled\n", lock);
504 		}
505 		if (atomic_and((int32*)lock, 0) != 1)
506 			panic("release_spinlock: lock %p was already released\n", lock);
507 #endif
508 #if DEBUG_SPINLOCK_LATENCIES
509 		test_latency(lock);
510 #endif
511 	}
512 }
513 
514 
515 /*!	Finds a free message and gets it.
516 	NOTE: has side effect of disabling interrupts
517 	return value is the former interrupt state
518 */
519 static cpu_status
520 find_free_message(struct smp_msg** msg)
521 {
522 	cpu_status state;
523 
524 	TRACE(("find_free_message: entry\n"));
525 
526 retry:
527 	while (sFreeMessageCount <= 0) {
528 		state = disable_interrupts();
529 		process_all_pending_ici(smp_get_current_cpu());
530 		restore_interrupts(state);
531 		PAUSE();
532 	}
533 	state = disable_interrupts();
534 	acquire_spinlock(&sFreeMessageSpinlock);
535 
536 	if (sFreeMessageCount <= 0) {
537 		// someone grabbed one while we were getting the lock,
538 		// go back to waiting for it
539 		release_spinlock(&sFreeMessageSpinlock);
540 		restore_interrupts(state);
541 		goto retry;
542 	}
543 
544 	*msg = sFreeMessages;
545 	sFreeMessages = (*msg)->next;
546 	sFreeMessageCount--;
547 
548 	release_spinlock(&sFreeMessageSpinlock);
549 
550 	TRACE(("find_free_message: returning msg %p\n", *msg));
551 
552 	return state;
553 }
554 
555 
556 /*!	Similar to find_free_message(), but expects the interrupts to be disabled
557 	already.
558 */
559 static void
560 find_free_message_interrupts_disabled(int32 currentCPU,
561 	struct smp_msg** _message)
562 {
563 	TRACE(("find_free_message_interrupts_disabled: entry\n"));
564 
565 	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
566 	while (sFreeMessageCount <= 0) {
567 		release_spinlock(&sFreeMessageSpinlock);
568 		process_all_pending_ici(currentCPU);
569 		PAUSE();
570 		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
571 	}
572 
573 	*_message = sFreeMessages;
574 	sFreeMessages = (*_message)->next;
575 	sFreeMessageCount--;
576 
577 	release_spinlock(&sFreeMessageSpinlock);
578 
579 	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
580 		*_message));
581 }
582 
583 
584 static void
585 return_free_message(struct smp_msg* msg)
586 {
587 	TRACE(("return_free_message: returning msg %p\n", msg));
588 
589 	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
590 	msg->next = sFreeMessages;
591 	sFreeMessages = msg;
592 	sFreeMessageCount++;
593 	release_spinlock(&sFreeMessageSpinlock);
594 }
595 
596 
597 static struct smp_msg*
598 check_for_message(int currentCPU, mailbox_source& sourceMailbox)
599 {
600 	if (!sICIEnabled)
601 		return NULL;
602 
603 	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
604 
605 	struct smp_msg* msg = sCPUMessages[currentCPU];
606 	if (msg != NULL) {
607 		sCPUMessages[currentCPU] = msg->next;
608 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
609 		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
610 		sourceMailbox = MAILBOX_LOCAL;
611 	} else {
612 		// try getting one from the broadcast mailbox
613 
614 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
615 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
616 
617 		msg = sBroadcastMessages;
618 		while (msg != NULL) {
619 			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
620 				// we have handled this one already
621 				msg = msg->next;
622 				continue;
623 			}
624 
625 			// mark it so we wont try to process this one again
626 			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
627 			sourceMailbox = MAILBOX_BCAST;
628 			break;
629 		}
630 		release_spinlock(&sBroadcastMessageSpinlock);
631 
632 		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU,
633 			msg));
634 	}
635 	return msg;
636 }
637 
638 
639 static void
640 finish_message_processing(int currentCPU, struct smp_msg* msg,
641 	mailbox_source sourceMailbox)
642 {
643 	if (atomic_add(&msg->ref_count, -1) != 1)
644 		return;
645 
646 	// we were the last one to decrement the ref_count
647 	// it's our job to remove it from the list & possibly clean it up
648 	struct smp_msg** mbox;
649 	spinlock* spinlock;
650 
651 	// clean up the message from one of the mailboxes
652 	if (sourceMailbox == MAILBOX_BCAST) {
653 		mbox = &sBroadcastMessages;
654 		spinlock = &sBroadcastMessageSpinlock;
655 	} else {
656 		mbox = &sCPUMessages[currentCPU];
657 		spinlock = &sCPUMessageSpinlock[currentCPU];
658 	}
659 
660 	acquire_spinlock_nocheck(spinlock);
661 
662 	TRACE(("cleaning up message %p\n", msg));
663 
664 	if (sourceMailbox != MAILBOX_BCAST) {
665 		// local mailbox -- the message has already been removed in
666 		// check_for_message()
667 	} else if (msg == *mbox) {
668 		*mbox = msg->next;
669 	} else {
670 		// we need to walk to find the message in the list.
671 		// we can't use any data found when previously walking through
672 		// the list, since the list may have changed. But, we are guaranteed
673 		// to at least have msg in it.
674 		struct smp_msg* last = NULL;
675 		struct smp_msg* msg1;
676 
677 		msg1 = *mbox;
678 		while (msg1 != NULL && msg1 != msg) {
679 			last = msg1;
680 			msg1 = msg1->next;
681 		}
682 
683 		// by definition, last must be something
684 		if (msg1 == msg && last != NULL)
685 			last->next = msg->next;
686 		else
687 			panic("last == NULL or msg != msg1");
688 	}
689 
690 	release_spinlock(spinlock);
691 
692 	if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
693 		free(msg->data_ptr);
694 
695 	if ((msg->flags & SMP_MSG_FLAG_SYNC) != 0) {
696 		msg->done = true;
697 		// the caller cpu should now free the message
698 	} else {
699 		// in the !SYNC case, we get to free the message
700 		return_free_message(msg);
701 	}
702 }
703 
704 
705 static status_t
706 process_pending_ici(int32 currentCPU)
707 {
708 	mailbox_source sourceMailbox;
709 	struct smp_msg* msg = check_for_message(currentCPU, sourceMailbox);
710 	if (msg == NULL)
711 		return B_ENTRY_NOT_FOUND;
712 
713 	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
714 
715 	bool haltCPU = false;
716 
717 	switch (msg->message) {
718 		case SMP_MSG_INVALIDATE_PAGE_RANGE:
719 			arch_cpu_invalidate_TLB_range((addr_t)msg->data,
720 				(addr_t)msg->data2);
721 			break;
722 		case SMP_MSG_INVALIDATE_PAGE_LIST:
723 			arch_cpu_invalidate_TLB_list((addr_t*)msg->data, (int)msg->data2);
724 			break;
725 		case SMP_MSG_USER_INVALIDATE_PAGES:
726 			arch_cpu_user_TLB_invalidate();
727 			break;
728 		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
729 			arch_cpu_global_TLB_invalidate();
730 			break;
731 		case SMP_MSG_CPU_HALT:
732 			haltCPU = true;
733 			break;
734 		case SMP_MSG_CALL_FUNCTION:
735 		{
736 			smp_call_func func = (smp_call_func)msg->data_ptr;
737 			func(msg->data, currentCPU, msg->data2, msg->data3);
738 			break;
739 		}
740 		case SMP_MSG_RESCHEDULE:
741 		{
742 			cpu_ent* cpu = thread_get_current_thread()->cpu;
743 			cpu->invoke_scheduler = true;
744 			cpu->invoke_scheduler_if_idle = false;
745 			break;
746 		}
747 		case SMP_MSG_RESCHEDULE_IF_IDLE:
748 		{
749 			cpu_ent* cpu = thread_get_current_thread()->cpu;
750 			if (!cpu->invoke_scheduler) {
751 				cpu->invoke_scheduler = true;
752 				cpu->invoke_scheduler_if_idle = true;
753 			}
754 			break;
755 		}
756 
757 		default:
758 			dprintf("smp_intercpu_int_handler: got unknown message %ld\n",
759 				msg->message);
760 			break;
761 	}
762 
763 	// finish dealing with this message, possibly removing it from the list
764 	finish_message_processing(currentCPU, msg, sourceMailbox);
765 
766 	// special case for the halt message
767 	if (haltCPU)
768 		debug_trap_cpu_in_kdl(currentCPU, false);
769 
770 	return B_OK;
771 }
772 
773 
774 #if B_DEBUG_SPINLOCK_CONTENTION
775 
776 
777 static uint64
778 get_spinlock_counter(spinlock* lock)
779 {
780 	uint32 high;
781 	uint32 low;
782 	do {
783 		high = (uint32)atomic_get(&lock->count_high);
784 		low = (uint32)atomic_get(&lock->count_low);
785 	} while (high != atomic_get(&lock->count_high));
786 
787 	return ((uint64)high << 32) | low;
788 }
789 
790 
791 static status_t
792 spinlock_contention_syscall(const char* subsystem, uint32 function,
793 	void* buffer, size_t bufferSize)
794 {
795 	spinlock_contention_info info;
796 
797 	if (function != GET_SPINLOCK_CONTENTION_INFO)
798 		return B_BAD_VALUE;
799 
800 	if (bufferSize < sizeof(spinlock_contention_info))
801 		return B_BAD_VALUE;
802 
803 	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
804 	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
805 
806 	if (!IS_USER_ADDRESS(buffer)
807 		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
808 		return B_BAD_ADDRESS;
809 	}
810 
811 	return B_OK;
812 }
813 
814 
815 #endif	// B_DEBUG_SPINLOCK_CONTENTION
816 
817 
818 static void
819 process_early_cpu_call(int32 cpu)
820 {
821 	sEarlyCPUCallFunction(sEarlyCPUCallCookie, cpu);
822 	atomic_and(&sEarlyCPUCall, ~(uint32)(1 << cpu));
823 }
824 
825 
826 static void
827 call_all_cpus_early(void (*function)(void*, int), void* cookie)
828 {
829 	if (sNumCPUs > 1) {
830 		sEarlyCPUCallFunction = function;
831 		sEarlyCPUCallCookie = cookie;
832 
833 		uint32 cpuMask = (1 << sNumCPUs) - 2;
834 			// all CPUs but the boot cpu
835 
836 		sEarlyCPUCall = cpuMask;
837 
838 		// wait for all CPUs to finish
839 		while ((sEarlyCPUCall & cpuMask) != 0)
840 			PAUSE();
841 	}
842 
843 	function(cookie, 0);
844 }
845 
846 
847 //	#pragma mark -
848 
849 
850 int
851 smp_intercpu_int_handler(int32 cpu)
852 {
853 	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
854 
855 	process_all_pending_ici(cpu);
856 
857 	TRACE(("smp_intercpu_int_handler: done\n"));
858 
859 	return B_HANDLED_INTERRUPT;
860 }
861 
862 
863 void
864 smp_send_ici(int32 targetCPU, int32 message, uint32 data, uint32 data2,
865 	uint32 data3, void* dataPointer, uint32 flags)
866 {
867 	struct smp_msg *msg;
868 
869 	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, "
870 		"data3 0x%lx, ptr %p, flags 0x%lx\n", targetCPU, message, data, data2,
871 		data3, dataPointer, flags));
872 
873 	if (sICIEnabled) {
874 		int state;
875 		int currentCPU;
876 
877 		// find_free_message leaves interrupts disabled
878 		state = find_free_message(&msg);
879 
880 		currentCPU = smp_get_current_cpu();
881 		if (targetCPU == currentCPU) {
882 			return_free_message(msg);
883 			restore_interrupts(state);
884 			return; // nope, cant do that
885 		}
886 
887 		// set up the message
888 		msg->message = message;
889 		msg->data = data;
890 		msg->data2 = data2;
891 		msg->data3 = data3;
892 		msg->data_ptr = dataPointer;
893 		msg->ref_count = 1;
894 		msg->flags = flags;
895 		msg->done = false;
896 
897 		// stick it in the appropriate cpu's mailbox
898 		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
899 		msg->next = sCPUMessages[targetCPU];
900 		sCPUMessages[targetCPU] = msg;
901 		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
902 
903 		arch_smp_send_ici(targetCPU);
904 
905 		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
906 			// wait for the other cpu to finish processing it
907 			// the interrupt handler will ref count it to <0
908 			// if the message is sync after it has removed it from the mailbox
909 			while (msg->done == false) {
910 				process_all_pending_ici(currentCPU);
911 				PAUSE();
912 			}
913 			// for SYNC messages, it's our responsibility to put it
914 			// back into the free list
915 			return_free_message(msg);
916 		}
917 
918 		restore_interrupts(state);
919 	}
920 }
921 
922 
923 void
924 smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, uint32 data,
925 	uint32 data2, uint32 data3, void *dataPointer, uint32 flags)
926 {
927 	if (!sICIEnabled)
928 		return;
929 
930 	int currentCPU = smp_get_current_cpu();
931 	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
932 		& (((cpu_mask_t)1 << sNumCPUs) - 1);
933 	if (cpuMask == 0) {
934 		panic("smp_send_multicast_ici(): 0 CPU mask");
935 		return;
936 	}
937 
938 	// count target CPUs
939 	int32 targetCPUs = 0;
940 	for (int32 i = 0; i < sNumCPUs; i++) {
941 		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
942 			targetCPUs++;
943 	}
944 
945 	// find_free_message leaves interrupts disabled
946 	struct smp_msg *msg;
947 	int state = find_free_message(&msg);
948 
949 	msg->message = message;
950 	msg->data = data;
951 	msg->data2 = data2;
952 	msg->data3 = data3;
953 	msg->data_ptr = dataPointer;
954 	msg->ref_count = targetCPUs;
955 	msg->flags = flags;
956 	msg->proc_bitmap = ~cpuMask;
957 	msg->done = false;
958 
959 	// stick it in the broadcast mailbox
960 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
961 	msg->next = sBroadcastMessages;
962 	sBroadcastMessages = msg;
963 	release_spinlock(&sBroadcastMessageSpinlock);
964 
965 	arch_smp_send_broadcast_ici();
966 		// TODO: Introduce a call that only bothers the target CPUs!
967 
968 	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
969 		// wait for the other cpus to finish processing it
970 		// the interrupt handler will ref count it to <0
971 		// if the message is sync after it has removed it from the mailbox
972 		while (msg->done == false) {
973 			process_all_pending_ici(currentCPU);
974 			PAUSE();
975 		}
976 
977 		// for SYNC messages, it's our responsibility to put it
978 		// back into the free list
979 		return_free_message(msg);
980 	}
981 
982 	restore_interrupts(state);
983 }
984 
985 
986 void
987 smp_send_broadcast_ici(int32 message, uint32 data, uint32 data2, uint32 data3,
988 	void *dataPointer, uint32 flags)
989 {
990 	struct smp_msg *msg;
991 
992 	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 "
993 		"0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n", smp_get_current_cpu(),
994 		message, data, data2, data3, dataPointer, flags));
995 
996 	if (sICIEnabled) {
997 		int state;
998 		int currentCPU;
999 
1000 		// find_free_message leaves interrupts disabled
1001 		state = find_free_message(&msg);
1002 
1003 		currentCPU = smp_get_current_cpu();
1004 
1005 		msg->message = message;
1006 		msg->data = data;
1007 		msg->data2 = data2;
1008 		msg->data3 = data3;
1009 		msg->data_ptr = dataPointer;
1010 		msg->ref_count = sNumCPUs - 1;
1011 		msg->flags = flags;
1012 		msg->proc_bitmap = SET_BIT(0, currentCPU);
1013 		msg->done = false;
1014 
1015 		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast "
1016 			"mbox\n", currentCPU, msg));
1017 
1018 		// stick it in the appropriate cpu's mailbox
1019 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1020 		msg->next = sBroadcastMessages;
1021 		sBroadcastMessages = msg;
1022 		release_spinlock(&sBroadcastMessageSpinlock);
1023 
1024 		arch_smp_send_broadcast_ici();
1025 
1026 		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
1027 
1028 		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1029 			// wait for the other cpus to finish processing it
1030 			// the interrupt handler will ref count it to <0
1031 			// if the message is sync after it has removed it from the mailbox
1032 			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
1033 
1034 			while (msg->done == false) {
1035 				process_all_pending_ici(currentCPU);
1036 				PAUSE();
1037 			}
1038 
1039 			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
1040 
1041 			// for SYNC messages, it's our responsibility to put it
1042 			// back into the free list
1043 			return_free_message(msg);
1044 		}
1045 
1046 		restore_interrupts(state);
1047 	}
1048 
1049 	TRACE(("smp_send_broadcast_ici: done\n"));
1050 }
1051 
1052 
1053 void
1054 smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
1055 	uint32 data, uint32 data2, uint32 data3, void *dataPointer, uint32 flags)
1056 {
1057 	if (!sICIEnabled)
1058 		return;
1059 
1060 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
1061 		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
1062 		currentCPU, message, data, data2, data3, dataPointer, flags));
1063 
1064 	struct smp_msg *msg;
1065 	find_free_message_interrupts_disabled(currentCPU, &msg);
1066 
1067 	msg->message = message;
1068 	msg->data = data;
1069 	msg->data2 = data2;
1070 	msg->data3 = data3;
1071 	msg->data_ptr = dataPointer;
1072 	msg->ref_count = sNumCPUs - 1;
1073 	msg->flags = flags;
1074 	msg->proc_bitmap = SET_BIT(0, currentCPU);
1075 	msg->done = false;
1076 
1077 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1078 		"into broadcast mbox\n", currentCPU, msg));
1079 
1080 	// stick it in the appropriate cpu's mailbox
1081 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1082 	msg->next = sBroadcastMessages;
1083 	sBroadcastMessages = msg;
1084 	release_spinlock(&sBroadcastMessageSpinlock);
1085 
1086 	arch_smp_send_broadcast_ici();
1087 
1088 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1089 		currentCPU));
1090 
1091 	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1092 		// wait for the other cpus to finish processing it
1093 		// the interrupt handler will ref count it to <0
1094 		// if the message is sync after it has removed it from the mailbox
1095 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1096 			"ack\n", currentCPU));
1097 
1098 		while (msg->done == false) {
1099 			process_all_pending_ici(currentCPU);
1100 			PAUSE();
1101 		}
1102 
1103 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1104 			"message to free list\n", currentCPU));
1105 
1106 		// for SYNC messages, it's our responsibility to put it
1107 		// back into the free list
1108 		return_free_message(msg);
1109 	}
1110 
1111 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1112 }
1113 
1114 
1115 /*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1116 
1117 	\param cpu The index of the calling CPU.
1118 	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1119 		does not return before all other CPUs have started waiting.
1120 	\return \c true on the boot CPU, \c false otherwise.
1121 */
1122 bool
1123 smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1124 {
1125 	if (cpu == 0) {
1126 		smp_cpu_rendezvous(rendezVous, cpu);
1127 		return true;
1128 	}
1129 
1130 	smp_cpu_rendezvous(rendezVous, cpu);
1131 
1132 	while (sBootCPUSpin == 0) {
1133 		if ((sEarlyCPUCall & (1 << cpu)) != 0)
1134 			process_early_cpu_call(cpu);
1135 
1136 		PAUSE();
1137 	}
1138 
1139 	return false;
1140 }
1141 
1142 
1143 void
1144 smp_wake_up_non_boot_cpus()
1145 {
1146 	// ICIs were previously being ignored
1147 	if (sNumCPUs > 1)
1148 		sICIEnabled = true;
1149 
1150 	// resume non boot CPUs
1151 	sBootCPUSpin = 1;
1152 }
1153 
1154 
1155 /*!	Spin until all CPUs have reached the rendez-vous point.
1156 
1157 	The rendez-vous variable \c *var must have been initialized to 0 before the
1158 	function is called. The variable will be non-null when the function returns.
1159 
1160 	Note that when the function returns on one CPU, it only means that all CPU
1161 	have already entered the function. It does not mean that the variable can
1162 	already be reset. Only when all CPUs have returned (which would have to be
1163 	ensured via another rendez-vous) the variable can be reset.
1164 */
1165 void
1166 smp_cpu_rendezvous(volatile uint32* var, int current_cpu)
1167 {
1168 	atomic_or((vint32*)var, 1 << current_cpu);
1169 
1170 	while (*var != (((uint32)1 << sNumCPUs) - 1))
1171 		PAUSE();
1172 }
1173 
1174 
1175 status_t
1176 smp_init(kernel_args* args)
1177 {
1178 	TRACE(("smp_init: entry\n"));
1179 
1180 #if DEBUG_SPINLOCK_LATENCIES
1181 	sEnableLatencyCheck
1182 		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1183 #endif
1184 
1185 #if DEBUG_SPINLOCKS
1186 	add_debugger_command_etc("spinlock", &dump_spinlock,
1187 		"Dump info on a spinlock",
1188 		"\n"
1189 		"Dumps info on a spinlock.\n", 0);
1190 #endif
1191 	add_debugger_command_etc("ici", &dump_ici_messages,
1192 		"Dump info on pending ICI messages",
1193 		"\n"
1194 		"Dumps info on pending ICI messages.\n", 0);
1195 	add_debugger_command_etc("ici_message", &dump_ici_message,
1196 		"Dump info on an ICI message",
1197 		"\n"
1198 		"Dumps info on an ICI message.\n", 0);
1199 
1200 	if (args->num_cpus > 1) {
1201 		sFreeMessages = NULL;
1202 		sFreeMessageCount = 0;
1203 		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1204 			struct smp_msg* msg
1205 				= (struct smp_msg*)malloc(sizeof(struct smp_msg));
1206 			if (msg == NULL) {
1207 				panic("error creating smp mailboxes\n");
1208 				return B_ERROR;
1209 			}
1210 			memset(msg, 0, sizeof(struct smp_msg));
1211 			msg->next = sFreeMessages;
1212 			sFreeMessages = msg;
1213 			sFreeMessageCount++;
1214 		}
1215 		sNumCPUs = args->num_cpus;
1216 	}
1217 	TRACE(("smp_init: calling arch_smp_init\n"));
1218 
1219 	return arch_smp_init(args);
1220 }
1221 
1222 
1223 status_t
1224 smp_per_cpu_init(kernel_args* args, int32 cpu)
1225 {
1226 	return arch_smp_per_cpu_init(args, cpu);
1227 }
1228 
1229 
1230 status_t
1231 smp_init_post_generic_syscalls(void)
1232 {
1233 #if B_DEBUG_SPINLOCK_CONTENTION
1234 	return register_generic_syscall(SPINLOCK_CONTENTION,
1235 		&spinlock_contention_syscall, 0, 0);
1236 #else
1237 	return B_OK;
1238 #endif
1239 }
1240 
1241 
1242 void
1243 smp_set_num_cpus(int32 numCPUs)
1244 {
1245 	sNumCPUs = numCPUs;
1246 }
1247 
1248 
1249 int32
1250 smp_get_num_cpus()
1251 {
1252 	return sNumCPUs;
1253 }
1254 
1255 
1256 int32
1257 smp_get_current_cpu(void)
1258 {
1259 	return thread_get_current_thread()->cpu->cpu_num;
1260 }
1261 
1262 
1263 // #pragma mark - public exported functions
1264 
1265 
1266 void
1267 call_all_cpus(void (*func)(void*, int), void* cookie)
1268 {
1269 	cpu_status state = disable_interrupts();
1270 
1271 	// if inter-CPU communication is not yet enabled, use the early mechanism
1272 	if (!sICIEnabled) {
1273 		call_all_cpus_early(func, cookie);
1274 		restore_interrupts(state);
1275 		return;
1276 	}
1277 
1278 	if (smp_get_num_cpus() > 1) {
1279 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1280 			0, 0, (void*)func, SMP_MSG_FLAG_ASYNC);
1281 	}
1282 
1283 	// we need to call this function ourselves as well
1284 	func(cookie, smp_get_current_cpu());
1285 
1286 	restore_interrupts(state);
1287 }
1288 
1289 
1290 void
1291 call_all_cpus_sync(void (*func)(void*, int), void* cookie)
1292 {
1293 	cpu_status state = disable_interrupts();
1294 
1295 	// if inter-CPU communication is not yet enabled, use the early mechanism
1296 	if (!sICIEnabled) {
1297 		call_all_cpus_early(func, cookie);
1298 		restore_interrupts(state);
1299 		return;
1300 	}
1301 
1302 	if (smp_get_num_cpus() > 1) {
1303 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1304 			0, 0, (void*)func, SMP_MSG_FLAG_SYNC);
1305 	}
1306 
1307 	// we need to call this function ourselves as well
1308 	func(cookie, smp_get_current_cpu());
1309 
1310 	restore_interrupts(state);
1311 }
1312 
1313 
1314 void
1315 memory_read_barrier(void)
1316 {
1317 	arch_cpu_memory_read_barrier();
1318 }
1319 
1320 
1321 void
1322 memory_write_barrier(void)
1323 {
1324 	arch_cpu_memory_write_barrier();
1325 }
1326