xref: /haiku/src/system/kernel/smp.cpp (revision 1345706a9ff6ad0dc041339a02d4259998b0765d)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Functionality for symetrical multi-processors */
12 
13 
14 #include <smp.h>
15 
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/debug.h>
21 #include <arch/int.h>
22 #include <arch/smp.h>
23 #include <boot/kernel_args.h>
24 #include <cpu.h>
25 #include <generic_syscall.h>
26 #include <int.h>
27 #include <spinlock_contention.h>
28 #include <thread.h>
29 #if DEBUG_SPINLOCK_LATENCIES
30 #	include <safemode.h>
31 #endif
32 
33 #include "kernel_debug_config.h"
34 
35 
36 //#define TRACE_SMP
37 #ifdef TRACE_SMP
38 #	define TRACE(x) dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 
44 #undef acquire_spinlock
45 #undef release_spinlock
46 
47 
48 #define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
49 
50 // These macros define the number of unsuccessful iterations in
51 // acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
52 // panic(), assuming a deadlock.
53 #define SPINLOCK_DEADLOCK_COUNT				100000000
54 #define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
55 
56 
57 struct smp_msg {
58 	struct smp_msg	*next;
59 	int32			message;
60 	uint32			data;
61 	uint32			data2;
62 	uint32			data3;
63 	void			*data_ptr;
64 	uint32			flags;
65 	int32			ref_count;
66 	volatile bool	done;
67 	uint32			proc_bitmap;
68 };
69 
70 #define MAILBOX_LOCAL 1
71 #define MAILBOX_BCAST 2
72 
73 static vint32 sBootCPUSpin = 0;
74 
75 static vint32 sEarlyCPUCall = 0;
76 static void (*sEarlyCPUCallFunction)(void*, int);
77 void* sEarlyCPUCallCookie;
78 
79 static struct smp_msg *sFreeMessages = NULL;
80 static volatile int sFreeMessageCount = 0;
81 static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
82 
83 static struct smp_msg *sCPUMessages[SMP_MAX_CPUS] = { NULL, };
84 static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
85 
86 static struct smp_msg *sBroadcastMessages = NULL;
87 static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
88 
89 static bool sICIEnabled = false;
90 static int32 sNumCPUs = 1;
91 
92 static int32 process_pending_ici(int32 currentCPU);
93 
94 
95 #if DEBUG_SPINLOCKS
96 #define NUM_LAST_CALLERS	32
97 
98 static struct {
99 	void		*caller;
100 	spinlock	*lock;
101 } sLastCaller[NUM_LAST_CALLERS];
102 
103 static vint32 sLastIndex = 0;
104 	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
105 	// as index into sLastCaller. Note, that it has to be casted to uint32
106 	// before applying the modulo operation, since otherwise after overflowing
107 	// that would yield negative indices.
108 
109 
110 static void
111 push_lock_caller(void *caller, spinlock *lock)
112 {
113 	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
114 
115 	sLastCaller[index].caller = caller;
116 	sLastCaller[index].lock = lock;
117 }
118 
119 
120 static void *
121 find_lock_caller(spinlock *lock)
122 {
123 	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
124 
125 	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
126 		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
127 		if (sLastCaller[index].lock == lock)
128 			return sLastCaller[index].caller;
129 	}
130 
131 	return NULL;
132 }
133 
134 
135 int
136 dump_spinlock(int argc, char** argv)
137 {
138 	if (argc != 2) {
139 		print_debugger_command_usage(argv[0]);
140 		return 0;
141 	}
142 
143 	uint64 address;
144 	if (!evaluate_debug_expression(argv[1], &address, false))
145 		return 0;
146 
147 	spinlock* lock = (spinlock*)(addr_t)address;
148 	kprintf("spinlock %p:\n", lock);
149 	bool locked = B_SPINLOCK_IS_LOCKED(lock);
150 	if (locked) {
151 		kprintf("  locked from %p\n", find_lock_caller(lock));
152 	} else
153 		kprintf("  not locked\n");
154 
155 	return 0;
156 }
157 
158 
159 #endif	// DEBUG_SPINLOCKS
160 
161 
162 #if DEBUG_SPINLOCK_LATENCIES
163 
164 
165 #define NUM_LATENCY_LOCKS	4
166 #define DEBUG_LATENCY		200
167 
168 
169 static struct {
170 	spinlock	*lock;
171 	bigtime_t	timestamp;
172 } sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
173 
174 static int32 sLatencyIndex[B_MAX_CPU_COUNT];
175 static bool sEnableLatencyCheck;
176 
177 
178 static void
179 push_latency(spinlock* lock)
180 {
181 	if (!sEnableLatencyCheck)
182 		return;
183 
184 	int32 cpu = smp_get_current_cpu();
185 	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
186 
187 	sLatency[cpu][index].lock = lock;
188 	sLatency[cpu][index].timestamp = system_time();
189 }
190 
191 
192 static void
193 test_latency(spinlock* lock)
194 {
195 	if (!sEnableLatencyCheck)
196 		return;
197 
198 	int32 cpu = smp_get_current_cpu();
199 
200 	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
201 		if (sLatency[cpu][i].lock == lock) {
202 			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
203 			if (diff > DEBUG_LATENCY && diff < 500000) {
204 				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
205 					lock, diff, DEBUG_LATENCY);
206 			}
207 
208 			sLatency[cpu][i].lock = NULL;
209 		}
210 	}
211 }
212 
213 
214 #endif	// DEBUG_SPINLOCK_LATENCIES
215 
216 
217 int
218 dump_ici_messages(int argc, char** argv)
219 {
220 	// count broadcast messages
221 	int32 count = 0;
222 	int32 doneCount = 0;
223 	int32 unreferencedCount = 0;
224 	smp_msg* message = sBroadcastMessages;
225 	while (message != NULL) {
226 		count++;
227 		if (message->done)
228 			doneCount++;
229 		if (message->ref_count <= 0)
230 			unreferencedCount++;
231 		message = message->next;
232 	}
233 
234 	kprintf("ICI broadcast messages: %ld, first: %p\n", count,
235 		sBroadcastMessages);
236 	kprintf("  done:         %ld\n", doneCount);
237 	kprintf("  unreferenced: %ld\n", unreferencedCount);
238 
239 	// count per-CPU messages
240 	for (int32 i = 0; i < sNumCPUs; i++) {
241 		count = 0;
242 		message = sCPUMessages[i];
243 		while (message != NULL) {
244 			count++;
245 			message = message->next;
246 		}
247 
248 		kprintf("CPU %ld messages: %ld, first: %p\n", i, count,
249 			sCPUMessages[i]);
250 	}
251 
252 	return 0;
253 }
254 
255 
256 int
257 dump_ici_message(int argc, char** argv)
258 {
259 	if (argc != 2) {
260 		print_debugger_command_usage(argv[0]);
261 		return 0;
262 	}
263 
264 	uint64 address;
265 	if (!evaluate_debug_expression(argv[1], &address, false))
266 		return 0;
267 
268 	smp_msg* message = (smp_msg*)(addr_t)address;
269 	kprintf("ICI message %p:\n", message);
270 	kprintf("  next:        %p\n", message->next);
271 	kprintf("  message:     %ld\n", message->message);
272 	kprintf("  data:        %ld\n", message->data);
273 	kprintf("  data2:       %ld\n", message->data2);
274 	kprintf("  data3:       %ld\n", message->data3);
275 	kprintf("  data_ptr:    %p\n", message->data_ptr);
276 	kprintf("  flags:       %lx\n", message->flags);
277 	kprintf("  ref_count:   %lx\n", message->ref_count);
278 	kprintf("  done:        %s\n", message->done ? "true" : "false");
279 	kprintf("  proc_bitmap: %lx\n", message->proc_bitmap);
280 
281 	return 0;
282 }
283 
284 
285 static inline void
286 process_all_pending_ici(int32 currentCPU)
287 {
288 	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
289 		;
290 }
291 
292 
293 void
294 _acquire_spinlock(spinlock *lock)
295 {
296 #if DEBUG_SPINLOCKS
297 	if (are_interrupts_enabled()) {
298 		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
299 			"enabled", lock);
300 	}
301 #endif
302 
303 	if (sNumCPUs > 1) {
304 		int currentCPU = smp_get_current_cpu();
305 #if B_DEBUG_SPINLOCK_CONTENTION
306 		while (atomic_add(&lock->lock, 1) != 0)
307 			process_all_pending_ici(currentCPU);
308 #else
309 		while (1) {
310 			uint32 count = 0;
311 			while (*lock != 0) {
312 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
313 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
314 						"for a long time!", lock);
315 					count = 0;
316 				}
317 
318 				process_all_pending_ici(currentCPU);
319 				PAUSE();
320 			}
321 			if (atomic_or((int32 *)lock, 1) == 0)
322 				break;
323 		}
324 
325 #	if DEBUG_SPINLOCKS
326 		push_lock_caller(arch_debug_get_caller(), lock);
327 #	endif
328 #endif
329 	} else {
330 #if DEBUG_SPINLOCKS
331 		int32 oldValue;
332 		oldValue = atomic_or((int32 *)lock, 1);
333 		if (oldValue != 0) {
334 			panic("acquire_spinlock: attempt to acquire lock %p twice on "
335 				"non-SMP system (last caller: %p, value %ld)", lock,
336 				find_lock_caller(lock), oldValue);
337 		}
338 
339 		push_lock_caller(arch_debug_get_caller(), lock);
340 #endif
341 	}
342 #if DEBUG_SPINLOCK_LATENCIES
343 	push_latency(lock);
344 #endif
345 }
346 
347 
348 static void
349 acquire_spinlock_nocheck(spinlock *lock)
350 {
351 #if DEBUG_SPINLOCKS
352 	if (are_interrupts_enabled()) {
353 		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
354 			"interrupts enabled", lock);
355 	}
356 #endif
357 
358 	if (sNumCPUs > 1) {
359 #if B_DEBUG_SPINLOCK_CONTENTION
360 		while (atomic_add(&lock->lock, 1) != 0) {
361 		}
362 #else
363 		while (1) {
364 			uint32 count = 0;
365 			while (*lock != 0) {
366 				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
367 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
368 						"for a long time!", lock);
369 					count = 0;
370 				}
371 
372 				PAUSE();
373 			}
374 
375 			if (atomic_or((int32 *)lock, 1) == 0)
376 				break;
377 		}
378 #endif
379 	} else {
380 #if DEBUG_SPINLOCKS
381 		if (atomic_or((int32 *)lock, 1) != 0) {
382 			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
383 				"on non-SMP system\n", lock);
384 		}
385 #endif
386 	}
387 }
388 
389 
390 /*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
391 static void
392 acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
393 {
394 #if DEBUG_SPINLOCKS
395 	if (are_interrupts_enabled()) {
396 		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
397 			"interrupts enabled", lock);
398 	}
399 #endif
400 
401 	if (sNumCPUs > 1) {
402 #if B_DEBUG_SPINLOCK_CONTENTION
403 		while (atomic_add(&lock->lock, 1) != 0)
404 			process_all_pending_ici(currentCPU);
405 #else
406 		while (1) {
407 			uint32 count = 0;
408 			while (*lock != 0) {
409 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
410 					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
411 						"%p for a long time!", lock);
412 					count = 0;
413 				}
414 
415 				process_all_pending_ici(currentCPU);
416 				PAUSE();
417 			}
418 			if (atomic_or((int32 *)lock, 1) == 0)
419 				break;
420 		}
421 
422 #	if DEBUG_SPINLOCKS
423 		push_lock_caller(arch_debug_get_caller(), lock);
424 #	endif
425 #endif
426 	} else {
427 #if DEBUG_SPINLOCKS
428 		int32 oldValue;
429 		oldValue = atomic_or((int32 *)lock, 1);
430 		if (oldValue != 0) {
431 			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
432 				"non-SMP system (last caller: %p, value %ld)", lock,
433 				find_lock_caller(lock), oldValue);
434 		}
435 
436 		push_lock_caller(arch_debug_get_caller(), lock);
437 #endif
438 	}
439 }
440 
441 
442 void
443 release_spinlock(spinlock *lock)
444 {
445 #if DEBUG_SPINLOCK_LATENCIES
446 	test_latency(lock);
447 #endif
448 
449 	if (sNumCPUs > 1) {
450 		if (are_interrupts_enabled())
451 			panic("release_spinlock: attempt to release lock %p with interrupts enabled\n", lock);
452 #if B_DEBUG_SPINLOCK_CONTENTION
453 		{
454 			int32 count = atomic_and(&lock->lock, 0) - 1;
455 			if (count < 0) {
456 				panic("release_spinlock: lock %p was already released\n", lock);
457 			} else {
458 				// add to the total count -- deal with carry manually
459 				if ((uint32)atomic_add(&lock->count_low, count) + count
460 						< (uint32)count) {
461 					atomic_add(&lock->count_high, 1);
462 				}
463 			}
464 		}
465 #else
466 		if (atomic_and((int32 *)lock, 0) != 1)
467 			panic("release_spinlock: lock %p was already released\n", lock);
468 #endif
469 	} else {
470 #if DEBUG_SPINLOCKS
471 		if (are_interrupts_enabled())
472 			panic("release_spinlock: attempt to release lock %p with interrupts enabled\n", lock);
473 		if (atomic_and((int32 *)lock, 0) != 1)
474 			panic("release_spinlock: lock %p was already released\n", lock);
475 #endif
476 #if DEBUG_SPINLOCK_LATENCIES
477 		test_latency(lock);
478 #endif
479 	}
480 }
481 
482 
483 /** Finds a free message and gets it.
484  *	NOTE: has side effect of disabling interrupts
485  *	return value is the former interrupt state
486  */
487 
488 static cpu_status
489 find_free_message(struct smp_msg **msg)
490 {
491 	cpu_status state;
492 
493 	TRACE(("find_free_message: entry\n"));
494 
495 retry:
496 	while (sFreeMessageCount <= 0) {
497 		state = disable_interrupts();
498 		process_all_pending_ici(smp_get_current_cpu());
499 		restore_interrupts(state);
500 		PAUSE();
501 	}
502 	state = disable_interrupts();
503 	acquire_spinlock(&sFreeMessageSpinlock);
504 
505 	if (sFreeMessageCount <= 0) {
506 		// someone grabbed one while we were getting the lock,
507 		// go back to waiting for it
508 		release_spinlock(&sFreeMessageSpinlock);
509 		restore_interrupts(state);
510 		goto retry;
511 	}
512 
513 	*msg = sFreeMessages;
514 	sFreeMessages = (*msg)->next;
515 	sFreeMessageCount--;
516 
517 	release_spinlock(&sFreeMessageSpinlock);
518 
519 	TRACE(("find_free_message: returning msg %p\n", *msg));
520 
521 	return state;
522 }
523 
524 
525 /*!	Similar to find_free_message(), but expects the interrupts to be disabled
526 	already.
527 */
528 static void
529 find_free_message_interrupts_disabled(int32 currentCPU,
530 	struct smp_msg** _message)
531 {
532 	TRACE(("find_free_message_interrupts_disabled: entry\n"));
533 
534 	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
535 	while (sFreeMessageCount <= 0) {
536 		release_spinlock(&sFreeMessageSpinlock);
537 		process_all_pending_ici(currentCPU);
538 		PAUSE();
539 		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
540 	}
541 
542 	*_message = sFreeMessages;
543 	sFreeMessages = (*_message)->next;
544 	sFreeMessageCount--;
545 
546 	release_spinlock(&sFreeMessageSpinlock);
547 
548 	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
549 		*_message));
550 }
551 
552 
553 static void
554 return_free_message(struct smp_msg *msg)
555 {
556 	TRACE(("return_free_message: returning msg %p\n", msg));
557 
558 	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
559 	msg->next = sFreeMessages;
560 	sFreeMessages = msg;
561 	sFreeMessageCount++;
562 	release_spinlock(&sFreeMessageSpinlock);
563 }
564 
565 
566 static struct smp_msg *
567 check_for_message(int currentCPU, int *source_mailbox)
568 {
569 	struct smp_msg *msg;
570 
571 	if (!sICIEnabled)
572 		return NULL;
573 
574 	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
575 	msg = sCPUMessages[currentCPU];
576 	if (msg != NULL) {
577 		sCPUMessages[currentCPU] = msg->next;
578 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
579 		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
580 		*source_mailbox = MAILBOX_LOCAL;
581 	} else {
582 		// try getting one from the broadcast mailbox
583 
584 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
585 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
586 
587 		msg = sBroadcastMessages;
588 		while (msg != NULL) {
589 			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
590 				// we have handled this one already
591 				msg = msg->next;
592 				continue;
593 			}
594 
595 			// mark it so we wont try to process this one again
596 			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
597 			*source_mailbox = MAILBOX_BCAST;
598 			break;
599 		}
600 		release_spinlock(&sBroadcastMessageSpinlock);
601 		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU, msg));
602 	}
603 	return msg;
604 }
605 
606 
607 static void
608 finish_message_processing(int currentCPU, struct smp_msg *msg, int source_mailbox)
609 {
610 	int old_refcount;
611 
612 	old_refcount = atomic_add(&msg->ref_count, -1);
613 	if (old_refcount == 1) {
614 		// we were the last one to decrement the ref_count
615 		// it's our job to remove it from the list & possibly clean it up
616 		struct smp_msg **mbox = NULL;
617 		spinlock *spinlock = NULL;
618 
619 		// clean up the message from one of the mailboxes
620 		switch (source_mailbox) {
621 			case MAILBOX_BCAST:
622 				mbox = &sBroadcastMessages;
623 				spinlock = &sBroadcastMessageSpinlock;
624 				break;
625 			case MAILBOX_LOCAL:
626 				mbox = &sCPUMessages[currentCPU];
627 				spinlock = &sCPUMessageSpinlock[currentCPU];
628 				break;
629 		}
630 
631 		acquire_spinlock_nocheck(spinlock);
632 
633 		TRACE(("cleaning up message %p\n", msg));
634 
635 		if (source_mailbox != MAILBOX_BCAST) {
636 			// local mailbox -- the message has already been removed in
637 			// check_for_message()
638 		} else if (msg == *mbox) {
639 			(*mbox) = msg->next;
640 		} else {
641 			// we need to walk to find the message in the list.
642 			// we can't use any data found when previously walking through
643 			// the list, since the list may have changed. But, we are guaranteed
644 			// to at least have msg in it.
645 			struct smp_msg *last = NULL;
646 			struct smp_msg *msg1;
647 
648 			msg1 = *mbox;
649 			while (msg1 != NULL && msg1 != msg) {
650 				last = msg1;
651 				msg1 = msg1->next;
652 			}
653 
654 			// by definition, last must be something
655 			if (msg1 == msg && last != NULL)
656 				last->next = msg->next;
657 			else
658 				panic("last == NULL or msg != msg1");
659 		}
660 
661 		release_spinlock(spinlock);
662 
663 		if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
664 			free(msg->data_ptr);
665 
666 		if (msg->flags & SMP_MSG_FLAG_SYNC) {
667 			msg->done = true;
668 			// the caller cpu should now free the message
669 		} else {
670 			// in the !SYNC case, we get to free the message
671 			return_free_message(msg);
672 		}
673 	}
674 }
675 
676 
677 static int32
678 process_pending_ici(int32 currentCPU)
679 {
680 	struct smp_msg *msg;
681 	bool haltCPU = false;
682 	int sourceMailbox = 0;
683 	int retval = B_HANDLED_INTERRUPT;
684 
685 	msg = check_for_message(currentCPU, &sourceMailbox);
686 	if (msg == NULL)
687 		return B_ENTRY_NOT_FOUND;
688 
689 	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
690 
691 	switch (msg->message) {
692 		case SMP_MSG_INVALIDATE_PAGE_RANGE:
693 			arch_cpu_invalidate_TLB_range((addr_t)msg->data, (addr_t)msg->data2);
694 			break;
695 		case SMP_MSG_INVALIDATE_PAGE_LIST:
696 			arch_cpu_invalidate_TLB_list((addr_t *)msg->data, (int)msg->data2);
697 			break;
698 		case SMP_MSG_USER_INVALIDATE_PAGES:
699 			arch_cpu_user_TLB_invalidate();
700 			break;
701 		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
702 			arch_cpu_global_TLB_invalidate();
703 			break;
704 		case SMP_MSG_CPU_HALT:
705 			haltCPU = true;
706 			break;
707 		case SMP_MSG_CALL_FUNCTION:
708 		{
709 			smp_call_func func = (smp_call_func)msg->data_ptr;
710 			func(msg->data, currentCPU, msg->data2, msg->data3);
711 			break;
712 		}
713 		case SMP_MSG_RESCHEDULE:
714 		{
715 			cpu_ent* cpu = thread_get_current_thread()->cpu;
716 			cpu->invoke_scheduler = true;
717 			cpu->invoke_scheduler_if_idle = false;
718 			break;
719 		}
720 		case SMP_MSG_RESCHEDULE_IF_IDLE:
721 		{
722 			cpu_ent* cpu = thread_get_current_thread()->cpu;
723 			if (!cpu->invoke_scheduler) {
724 				cpu->invoke_scheduler = true;
725 				cpu->invoke_scheduler_if_idle = true;
726 			}
727 			break;
728 		}
729 		default:
730 			dprintf("smp_intercpu_int_handler: got unknown message %ld\n", msg->message);
731 	}
732 
733 	// finish dealing with this message, possibly removing it from the list
734 	finish_message_processing(currentCPU, msg, sourceMailbox);
735 
736 	// special case for the halt message
737 	if (haltCPU)
738 		debug_trap_cpu_in_kdl(currentCPU, false);
739 
740 	return retval;
741 }
742 
743 
744 #if B_DEBUG_SPINLOCK_CONTENTION
745 
746 static uint64
747 get_spinlock_counter(spinlock* lock)
748 {
749 	uint32 high;
750 	uint32 low;
751 	do {
752 		high = (uint32)atomic_get(&lock->count_high);
753 		low = (uint32)atomic_get(&lock->count_low);
754 	} while (high != atomic_get(&lock->count_high));
755 
756 	return ((uint64)high << 32) | low;
757 }
758 
759 
760 static status_t
761 spinlock_contention_syscall(const char* subsystem, uint32 function,
762 	void* buffer, size_t bufferSize)
763 {
764 	spinlock_contention_info info;
765 
766 	if (function != GET_SPINLOCK_CONTENTION_INFO)
767 		return B_BAD_VALUE;
768 
769 	if (bufferSize < sizeof(spinlock_contention_info))
770 		return B_BAD_VALUE;
771 
772 	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
773 	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
774 
775 	if (!IS_USER_ADDRESS(buffer)
776 		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
777 		return B_BAD_ADDRESS;
778 	}
779 
780 	return B_OK;
781 }
782 
783 #endif	// B_DEBUG_SPINLOCK_CONTENTION
784 
785 
786 static void
787 process_early_cpu_call(int32 cpu)
788 {
789 	sEarlyCPUCallFunction(sEarlyCPUCallCookie, cpu);
790 	atomic_and(&sEarlyCPUCall, ~(uint32)(1 << cpu));
791 }
792 
793 
794 static void
795 call_all_cpus_early(void (*function)(void*, int), void* cookie)
796 {
797 	if (sNumCPUs > 1) {
798 		sEarlyCPUCallFunction = function;
799 		sEarlyCPUCallCookie = cookie;
800 
801 		uint32 cpuMask = (1 << sNumCPUs) - 2;
802 			// all CPUs but the boot cpu
803 
804 		sEarlyCPUCall = cpuMask;
805 
806 		// wait for all CPUs to finish
807 		while ((sEarlyCPUCall & cpuMask) != 0)
808 			PAUSE();
809 	}
810 
811 	function(cookie, 0);
812 }
813 
814 
815 //	#pragma mark -
816 
817 
818 int
819 smp_intercpu_int_handler(int32 cpu)
820 {
821 	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
822 
823 	process_all_pending_ici(cpu);
824 
825 	TRACE(("smp_intercpu_int_handler: done\n"));
826 
827 	return B_HANDLED_INTERRUPT;
828 }
829 
830 
831 void
832 smp_send_ici(int32 targetCPU, int32 message, uint32 data, uint32 data2, uint32 data3,
833 	void *data_ptr, uint32 flags)
834 {
835 	struct smp_msg *msg;
836 
837 	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
838 		targetCPU, message, data, data2, data3, data_ptr, flags));
839 
840 	if (sICIEnabled) {
841 		int state;
842 		int currentCPU;
843 
844 		// find_free_message leaves interrupts disabled
845 		state = find_free_message(&msg);
846 
847 		currentCPU = smp_get_current_cpu();
848 		if (targetCPU == currentCPU) {
849 			return_free_message(msg);
850 			restore_interrupts(state);
851 			return; // nope, cant do that
852 		}
853 
854 		// set up the message
855 		msg->message = message;
856 		msg->data = data;
857 		msg->data2 = data2;
858 		msg->data3 = data3;
859 		msg->data_ptr = data_ptr;
860 		msg->ref_count = 1;
861 		msg->flags = flags;
862 		msg->done = false;
863 
864 		// stick it in the appropriate cpu's mailbox
865 		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
866 		msg->next = sCPUMessages[targetCPU];
867 		sCPUMessages[targetCPU] = msg;
868 		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
869 
870 		arch_smp_send_ici(targetCPU);
871 
872 		if (flags & SMP_MSG_FLAG_SYNC) {
873 			// wait for the other cpu to finish processing it
874 			// the interrupt handler will ref count it to <0
875 			// if the message is sync after it has removed it from the mailbox
876 			while (msg->done == false) {
877 				process_all_pending_ici(currentCPU);
878 				PAUSE();
879 			}
880 			// for SYNC messages, it's our responsibility to put it
881 			// back into the free list
882 			return_free_message(msg);
883 		}
884 
885 		restore_interrupts(state);
886 	}
887 }
888 
889 
890 void
891 smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, uint32 data,
892 	uint32 data2, uint32 data3, void *data_ptr, uint32 flags)
893 {
894 	if (!sICIEnabled)
895 		return;
896 
897 	int currentCPU = smp_get_current_cpu();
898 	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
899 		& (((cpu_mask_t)1 << sNumCPUs) - 1);
900 	if (cpuMask == 0) {
901 		panic("smp_send_multicast_ici(): 0 CPU mask");
902 		return;
903 	}
904 
905 	// count target CPUs
906 	int32 targetCPUs = 0;
907 	for (int32 i = 0; i < sNumCPUs; i++) {
908 		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
909 			targetCPUs++;
910 	}
911 
912 	// find_free_message leaves interrupts disabled
913 	struct smp_msg *msg;
914 	int state = find_free_message(&msg);
915 
916 	msg->message = message;
917 	msg->data = data;
918 	msg->data2 = data2;
919 	msg->data3 = data3;
920 	msg->data_ptr = data_ptr;
921 	msg->ref_count = targetCPUs;
922 	msg->flags = flags;
923 	msg->proc_bitmap = ~cpuMask;
924 	msg->done = false;
925 
926 	// stick it in the broadcast mailbox
927 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
928 	msg->next = sBroadcastMessages;
929 	sBroadcastMessages = msg;
930 	release_spinlock(&sBroadcastMessageSpinlock);
931 
932 	arch_smp_send_broadcast_ici();
933 		// TODO: Introduce a call that only bothers the target CPUs!
934 
935 	if (flags & SMP_MSG_FLAG_SYNC) {
936 		// wait for the other cpus to finish processing it
937 		// the interrupt handler will ref count it to <0
938 		// if the message is sync after it has removed it from the mailbox
939 		while (msg->done == false) {
940 			process_all_pending_ici(currentCPU);
941 			PAUSE();
942 		}
943 
944 		// for SYNC messages, it's our responsibility to put it
945 		// back into the free list
946 		return_free_message(msg);
947 	}
948 
949 	restore_interrupts(state);
950 }
951 
952 
953 void
954 smp_send_broadcast_ici(int32 message, uint32 data, uint32 data2, uint32 data3,
955 	void *data_ptr, uint32 flags)
956 {
957 	struct smp_msg *msg;
958 
959 	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
960 		smp_get_current_cpu(), message, data, data2, data3, data_ptr, flags));
961 
962 	if (sICIEnabled) {
963 		int state;
964 		int currentCPU;
965 
966 		// find_free_message leaves interrupts disabled
967 		state = find_free_message(&msg);
968 
969 		currentCPU = smp_get_current_cpu();
970 
971 		msg->message = message;
972 		msg->data = data;
973 		msg->data2 = data2;
974 		msg->data3 = data3;
975 		msg->data_ptr = data_ptr;
976 		msg->ref_count = sNumCPUs - 1;
977 		msg->flags = flags;
978 		msg->proc_bitmap = SET_BIT(0, currentCPU);
979 		msg->done = false;
980 
981 		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast mbox\n",
982 			currentCPU, msg));
983 
984 		// stick it in the appropriate cpu's mailbox
985 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
986 		msg->next = sBroadcastMessages;
987 		sBroadcastMessages = msg;
988 		release_spinlock(&sBroadcastMessageSpinlock);
989 
990 		arch_smp_send_broadcast_ici();
991 
992 		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
993 
994 		if (flags & SMP_MSG_FLAG_SYNC) {
995 			// wait for the other cpus to finish processing it
996 			// the interrupt handler will ref count it to <0
997 			// if the message is sync after it has removed it from the mailbox
998 			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
999 
1000 			while (msg->done == false) {
1001 				process_all_pending_ici(currentCPU);
1002 				PAUSE();
1003 			}
1004 
1005 			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
1006 
1007 			// for SYNC messages, it's our responsibility to put it
1008 			// back into the free list
1009 			return_free_message(msg);
1010 		}
1011 
1012 		restore_interrupts(state);
1013 	}
1014 
1015 	TRACE(("smp_send_broadcast_ici: done\n"));
1016 }
1017 
1018 
1019 void
1020 smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
1021 	uint32 data, uint32 data2, uint32 data3, void *data_ptr, uint32 flags)
1022 {
1023 	if (!sICIEnabled)
1024 		return;
1025 
1026 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
1027 		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
1028 		currentCPU, message, data, data2, data3, data_ptr, flags));
1029 
1030 	struct smp_msg *msg;
1031 	find_free_message_interrupts_disabled(currentCPU, &msg);
1032 
1033 	msg->message = message;
1034 	msg->data = data;
1035 	msg->data2 = data2;
1036 	msg->data3 = data3;
1037 	msg->data_ptr = data_ptr;
1038 	msg->ref_count = sNumCPUs - 1;
1039 	msg->flags = flags;
1040 	msg->proc_bitmap = SET_BIT(0, currentCPU);
1041 	msg->done = false;
1042 
1043 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1044 		"into broadcast mbox\n", currentCPU, msg));
1045 
1046 	// stick it in the appropriate cpu's mailbox
1047 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1048 	msg->next = sBroadcastMessages;
1049 	sBroadcastMessages = msg;
1050 	release_spinlock(&sBroadcastMessageSpinlock);
1051 
1052 	arch_smp_send_broadcast_ici();
1053 
1054 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1055 		currentCPU));
1056 
1057 	if (flags & SMP_MSG_FLAG_SYNC) {
1058 		// wait for the other cpus to finish processing it
1059 		// the interrupt handler will ref count it to <0
1060 		// if the message is sync after it has removed it from the mailbox
1061 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1062 			"ack\n", currentCPU));
1063 
1064 		while (msg->done == false) {
1065 			process_all_pending_ici(currentCPU);
1066 			PAUSE();
1067 		}
1068 
1069 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1070 			"message to free list\n", currentCPU));
1071 
1072 		// for SYNC messages, it's our responsibility to put it
1073 		// back into the free list
1074 		return_free_message(msg);
1075 	}
1076 
1077 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1078 }
1079 
1080 
1081 /*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1082 
1083 	\param cpu The index of the calling CPU.
1084 	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1085 		does not return before all other CPUs have started waiting.
1086 	\return \c true on the boot CPU, \c false otherwise.
1087 */
1088 bool
1089 smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1090 {
1091 	if (cpu == 0) {
1092 		smp_cpu_rendezvous(rendezVous, cpu);
1093 		return true;
1094 	}
1095 
1096 	smp_cpu_rendezvous(rendezVous, cpu);
1097 
1098 	while (sBootCPUSpin == 0) {
1099 		if ((sEarlyCPUCall & (1 << cpu)) != 0)
1100 			process_early_cpu_call(cpu);
1101 
1102 		PAUSE();
1103 	}
1104 
1105 	return false;
1106 }
1107 
1108 
1109 void
1110 smp_wake_up_non_boot_cpus()
1111 {
1112 	// ICIs were previously being ignored
1113 	if (sNumCPUs > 1)
1114 		sICIEnabled = true;
1115 
1116 	// resume non boot CPUs
1117 	sBootCPUSpin = 1;
1118 }
1119 
1120 
1121 /*!	Spin until all CPUs have reached the rendez-vous point.
1122 
1123 	The rendez-vous variable \c *var must have been initialized to 0 before the
1124 	function is called. The variable will be non-null when the function returns.
1125 
1126 	Note that when the function returns on one CPU, it only means that all CPU
1127 	have already entered the function. It does not mean that the variable can
1128 	already be reset. Only when all CPUs have returned (which would have to be
1129 	ensured via another rendez-vous) the variable can be reset.
1130 */
1131 void
1132 smp_cpu_rendezvous(volatile uint32 *var, int current_cpu)
1133 {
1134 	atomic_or((vint32*)var, 1 << current_cpu);
1135 
1136 	while (*var != (((uint32)1 << sNumCPUs) - 1))
1137 		PAUSE();
1138 }
1139 
1140 
1141 status_t
1142 smp_init(kernel_args *args)
1143 {
1144 	TRACE(("smp_init: entry\n"));
1145 
1146 #if DEBUG_SPINLOCK_LATENCIES
1147 	sEnableLatencyCheck
1148 		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1149 #endif
1150 
1151 #if DEBUG_SPINLOCKS
1152 	add_debugger_command_etc("spinlock", &dump_spinlock,
1153 		"Dump info on a spinlock",
1154 		"\n"
1155 		"Dumps info on a spinlock.\n", 0);
1156 #endif
1157 	add_debugger_command_etc("ici", &dump_ici_messages,
1158 		"Dump info on pending ICI messages",
1159 		"\n"
1160 		"Dumps info on pending ICI messages.\n", 0);
1161 	add_debugger_command_etc("ici_message", &dump_ici_message,
1162 		"Dump info on an ICI message",
1163 		"\n"
1164 		"Dumps info on an ICI message.\n", 0);
1165 
1166 	if (args->num_cpus > 1) {
1167 		sFreeMessages = NULL;
1168 		sFreeMessageCount = 0;
1169 		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1170 			struct smp_msg *msg
1171 				= (struct smp_msg *)malloc(sizeof(struct smp_msg));
1172 			if (msg == NULL) {
1173 				panic("error creating smp mailboxes\n");
1174 				return B_ERROR;
1175 			}
1176 			memset(msg, 0, sizeof(struct smp_msg));
1177 			msg->next = sFreeMessages;
1178 			sFreeMessages = msg;
1179 			sFreeMessageCount++;
1180 		}
1181 		sNumCPUs = args->num_cpus;
1182 	}
1183 	TRACE(("smp_init: calling arch_smp_init\n"));
1184 
1185 	return arch_smp_init(args);
1186 }
1187 
1188 
1189 status_t
1190 smp_per_cpu_init(kernel_args *args, int32 cpu)
1191 {
1192 	return arch_smp_per_cpu_init(args, cpu);
1193 }
1194 
1195 
1196 status_t
1197 smp_init_post_generic_syscalls(void)
1198 {
1199 #if B_DEBUG_SPINLOCK_CONTENTION
1200 	return register_generic_syscall(SPINLOCK_CONTENTION,
1201 		&spinlock_contention_syscall, 0, 0);
1202 #else
1203 	return B_OK;
1204 #endif
1205 }
1206 
1207 
1208 void
1209 smp_set_num_cpus(int32 numCPUs)
1210 {
1211 	sNumCPUs = numCPUs;
1212 }
1213 
1214 
1215 int32
1216 smp_get_num_cpus()
1217 {
1218 	return sNumCPUs;
1219 }
1220 
1221 
1222 int32
1223 smp_get_current_cpu(void)
1224 {
1225 	return thread_get_current_thread()->cpu->cpu_num;
1226 }
1227 
1228 
1229 //	#pragma mark -
1230 //	public exported functions
1231 
1232 
1233 void
1234 call_all_cpus(void (*func)(void *, int), void *cookie)
1235 {
1236 	// if inter-CPU communication is not yet enabled, use the early mechanism
1237 	if (!sICIEnabled) {
1238 		call_all_cpus_early(func, cookie);
1239 		return;
1240 	}
1241 
1242 	cpu_status state = disable_interrupts();
1243 
1244 	if (smp_get_num_cpus() > 1) {
1245 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1246 			0, 0, (void *)func, SMP_MSG_FLAG_ASYNC);
1247 	}
1248 
1249 	// we need to call this function ourselves as well
1250 	func(cookie, smp_get_current_cpu());
1251 
1252 	restore_interrupts(state);
1253 }
1254 
1255 void
1256 call_all_cpus_sync(void (*func)(void *, int), void *cookie)
1257 {
1258 	// if inter-CPU communication is not yet enabled, use the early mechanism
1259 	if (!sICIEnabled) {
1260 		call_all_cpus_early(func, cookie);
1261 		return;
1262 	}
1263 
1264 	cpu_status state = disable_interrupts();
1265 
1266 	if (smp_get_num_cpus() > 1) {
1267 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1268 			0, 0, (void *)func, SMP_MSG_FLAG_SYNC);
1269 	}
1270 
1271 	// we need to call this function ourselves as well
1272 	func(cookie, smp_get_current_cpu());
1273 
1274 	restore_interrupts(state);
1275 }
1276 
1277 
1278 void
1279 memory_read_barrier(void)
1280 {
1281 	arch_cpu_memory_read_barrier();
1282 }
1283 
1284 
1285 void
1286 memory_write_barrier(void)
1287 {
1288 	arch_cpu_memory_write_barrier();
1289 }
1290 
1291 
1292 #pragma weak acquire_spinlock=_acquire_spinlock
1293