xref: /haiku/src/system/kernel/smp.cpp (revision a5bf12376daeded4049521eb17a6cc41192250d9)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Functionality for symetrical multi-processors */
12 
13 
14 #include <smp.h>
15 
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/debug.h>
21 #include <arch/int.h>
22 #include <arch/smp.h>
23 #include <cpu.h>
24 #include <generic_syscall.h>
25 #include <int.h>
26 #include <spinlock_contention.h>
27 #include <thread.h>
28 #if DEBUG_SPINLOCK_LATENCIES
29 #	include <safemode.h>
30 #endif
31 
32 #include "kernel_debug_config.h"
33 
34 
35 //#define TRACE_SMP
36 #ifdef TRACE_SMP
37 #	define TRACE(x) dprintf x
38 #else
39 #	define TRACE(x) ;
40 #endif
41 
42 
43 #undef acquire_spinlock
44 #undef release_spinlock
45 
46 
47 #define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
48 
49 // These macros define the number of unsuccessful iterations in
50 // acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
51 // panic(), assuming a deadlock.
52 #define SPINLOCK_DEADLOCK_COUNT				100000000
53 #define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
54 
55 
56 struct smp_msg {
57 	struct smp_msg	*next;
58 	int32			message;
59 	uint32			data;
60 	uint32			data2;
61 	uint32			data3;
62 	void			*data_ptr;
63 	uint32			flags;
64 	int32			ref_count;
65 	volatile bool	done;
66 	uint32			proc_bitmap;
67 };
68 
69 #define MAILBOX_LOCAL 1
70 #define MAILBOX_BCAST 2
71 
72 static vint32 sBootCPUSpin = 0;
73 
74 static vint32 sEarlyCPUCall = 0;
75 static void (*sEarlyCPUCallFunction)(void*, int);
76 void* sEarlyCPUCallCookie;
77 
78 static struct smp_msg *sFreeMessages = NULL;
79 static volatile int sFreeMessageCount = 0;
80 static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
81 
82 static struct smp_msg *sCPUMessages[SMP_MAX_CPUS] = { NULL, };
83 static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
84 
85 static struct smp_msg *sBroadcastMessages = NULL;
86 static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
87 
88 static bool sICIEnabled = false;
89 static int32 sNumCPUs = 1;
90 
91 static int32 process_pending_ici(int32 currentCPU);
92 
93 
94 #if DEBUG_SPINLOCKS
95 #define NUM_LAST_CALLERS	32
96 
97 static struct {
98 	void		*caller;
99 	spinlock	*lock;
100 } sLastCaller[NUM_LAST_CALLERS];
101 
102 static vint32 sLastIndex = 0;
103 	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
104 	// as index into sLastCaller. Note, that it has to be casted to uint32
105 	// before applying the modulo operation, since otherwise after overflowing
106 	// that would yield negative indices.
107 
108 
109 static void
110 push_lock_caller(void *caller, spinlock *lock)
111 {
112 	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
113 
114 	sLastCaller[index].caller = caller;
115 	sLastCaller[index].lock = lock;
116 }
117 
118 
119 static void *
120 find_lock_caller(spinlock *lock)
121 {
122 	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
123 
124 	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
125 		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
126 		if (sLastCaller[index].lock == lock)
127 			return sLastCaller[index].caller;
128 	}
129 
130 	return NULL;
131 }
132 
133 
134 int
135 dump_spinlock(int argc, char** argv)
136 {
137 	if (argc != 2) {
138 		print_debugger_command_usage(argv[0]);
139 		return 0;
140 	}
141 
142 	uint64 address;
143 	if (!evaluate_debug_expression(argv[1], &address, false))
144 		return 0;
145 
146 	spinlock* lock = (spinlock*)(addr_t)address;
147 	kprintf("spinlock %p:\n", lock);
148 	bool locked = B_SPINLOCK_IS_LOCKED(lock);
149 	if (locked) {
150 		kprintf("  locked from %p\n", find_lock_caller(lock));
151 	} else
152 		kprintf("  not locked\n");
153 
154 	return 0;
155 }
156 
157 
158 #endif	// DEBUG_SPINLOCKS
159 
160 
161 #if DEBUG_SPINLOCK_LATENCIES
162 
163 
164 #define NUM_LATENCY_LOCKS	4
165 #define DEBUG_LATENCY		200
166 
167 
168 static struct {
169 	spinlock	*lock;
170 	bigtime_t	timestamp;
171 } sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
172 
173 static int32 sLatencyIndex[B_MAX_CPU_COUNT];
174 static bool sEnableLatencyCheck;
175 
176 
177 static void
178 push_latency(spinlock* lock)
179 {
180 	if (!sEnableLatencyCheck)
181 		return;
182 
183 	int32 cpu = smp_get_current_cpu();
184 	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
185 
186 	sLatency[cpu][index].lock = lock;
187 	sLatency[cpu][index].timestamp = system_time();
188 }
189 
190 
191 static void
192 test_latency(spinlock* lock)
193 {
194 	if (!sEnableLatencyCheck)
195 		return;
196 
197 	int32 cpu = smp_get_current_cpu();
198 
199 	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
200 		if (sLatency[cpu][i].lock == lock) {
201 			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
202 			if (diff > DEBUG_LATENCY && diff < 500000) {
203 				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
204 					lock, diff, DEBUG_LATENCY);
205 			}
206 
207 			sLatency[cpu][i].lock = NULL;
208 		}
209 	}
210 }
211 
212 
213 #endif	// DEBUG_SPINLOCK_LATENCIES
214 
215 
216 int
217 dump_ici_messages(int argc, char** argv)
218 {
219 	// count broadcast messages
220 	int32 count = 0;
221 	int32 doneCount = 0;
222 	int32 unreferencedCount = 0;
223 	smp_msg* message = sBroadcastMessages;
224 	while (message != NULL) {
225 		count++;
226 		if (message->done)
227 			doneCount++;
228 		if (message->ref_count <= 0)
229 			unreferencedCount++;
230 		message = message->next;
231 	}
232 
233 	kprintf("ICI broadcast messages: %ld, first: %p\n", count,
234 		sBroadcastMessages);
235 	kprintf("  done:         %ld\n", doneCount);
236 	kprintf("  unreferenced: %ld\n", unreferencedCount);
237 
238 	// count per-CPU messages
239 	for (int32 i = 0; i < sNumCPUs; i++) {
240 		count = 0;
241 		message = sCPUMessages[i];
242 		while (message != NULL) {
243 			count++;
244 			message = message->next;
245 		}
246 
247 		kprintf("CPU %ld messages: %ld, first: %p\n", i, count,
248 			sCPUMessages[i]);
249 	}
250 
251 	return 0;
252 }
253 
254 
255 int
256 dump_ici_message(int argc, char** argv)
257 {
258 	if (argc != 2) {
259 		print_debugger_command_usage(argv[0]);
260 		return 0;
261 	}
262 
263 	uint64 address;
264 	if (!evaluate_debug_expression(argv[1], &address, false))
265 		return 0;
266 
267 	smp_msg* message = (smp_msg*)(addr_t)address;
268 	kprintf("ICI message %p:\n", message);
269 	kprintf("  next:        %p\n", message->next);
270 	kprintf("  message:     %ld\n", message->message);
271 	kprintf("  data:        %ld\n", message->data);
272 	kprintf("  data2:       %ld\n", message->data2);
273 	kprintf("  data3:       %ld\n", message->data3);
274 	kprintf("  data_ptr:    %p\n", message->data_ptr);
275 	kprintf("  flags:       %lx\n", message->flags);
276 	kprintf("  ref_count:   %lx\n", message->ref_count);
277 	kprintf("  done:        %s\n", message->done ? "true" : "false");
278 	kprintf("  proc_bitmap: %lx\n", message->proc_bitmap);
279 
280 	return 0;
281 }
282 
283 
284 static inline void
285 process_all_pending_ici(int32 currentCPU)
286 {
287 	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
288 		;
289 }
290 
291 
292 void
293 _acquire_spinlock(spinlock *lock)
294 {
295 #if DEBUG_SPINLOCKS
296 	if (are_interrupts_enabled()) {
297 		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
298 			"enabled", lock);
299 	}
300 #endif
301 
302 	if (sNumCPUs > 1) {
303 		int currentCPU = smp_get_current_cpu();
304 #if B_DEBUG_SPINLOCK_CONTENTION
305 		while (atomic_add(&lock->lock, 1) != 0)
306 			process_all_pending_ici(currentCPU);
307 #else
308 		while (1) {
309 			uint32 count = 0;
310 			while (*lock != 0) {
311 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
312 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
313 						"for a long time!", lock);
314 					count = 0;
315 				}
316 
317 				process_all_pending_ici(currentCPU);
318 				PAUSE();
319 			}
320 			if (atomic_or((int32 *)lock, 1) == 0)
321 				break;
322 		}
323 
324 #	if DEBUG_SPINLOCKS
325 		push_lock_caller(arch_debug_get_caller(), lock);
326 #	endif
327 #endif
328 	} else {
329 #if DEBUG_SPINLOCKS
330 		int32 oldValue;
331 		oldValue = atomic_or((int32 *)lock, 1);
332 		if (oldValue != 0) {
333 			panic("acquire_spinlock: attempt to acquire lock %p twice on "
334 				"non-SMP system (last caller: %p, value %ld)", lock,
335 				find_lock_caller(lock), oldValue);
336 		}
337 
338 		push_lock_caller(arch_debug_get_caller(), lock);
339 #endif
340 	}
341 #if DEBUG_SPINLOCK_LATENCIES
342 	push_latency(lock);
343 #endif
344 }
345 
346 
347 static void
348 acquire_spinlock_nocheck(spinlock *lock)
349 {
350 #if DEBUG_SPINLOCKS
351 	if (are_interrupts_enabled()) {
352 		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
353 			"interrupts enabled", lock);
354 	}
355 #endif
356 
357 	if (sNumCPUs > 1) {
358 #if B_DEBUG_SPINLOCK_CONTENTION
359 		while (atomic_add(&lock->lock, 1) != 0) {
360 		}
361 #else
362 		while (1) {
363 			uint32 count = 0;
364 			while (*lock != 0) {
365 				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
366 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
367 						"for a long time!", lock);
368 					count = 0;
369 				}
370 
371 				PAUSE();
372 			}
373 
374 			if (atomic_or((int32 *)lock, 1) == 0)
375 				break;
376 		}
377 #endif
378 	} else {
379 #if DEBUG_SPINLOCKS
380 		if (atomic_or((int32 *)lock, 1) != 0) {
381 			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
382 				"on non-SMP system\n", lock);
383 		}
384 #endif
385 	}
386 }
387 
388 
389 /*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
390 static void
391 acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
392 {
393 #if DEBUG_SPINLOCKS
394 	if (are_interrupts_enabled()) {
395 		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
396 			"interrupts enabled", lock);
397 	}
398 #endif
399 
400 	if (sNumCPUs > 1) {
401 #if B_DEBUG_SPINLOCK_CONTENTION
402 		while (atomic_add(&lock->lock, 1) != 0)
403 			process_all_pending_ici(currentCPU);
404 #else
405 		while (1) {
406 			uint32 count = 0;
407 			while (*lock != 0) {
408 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
409 					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
410 						"%p for a long time!", lock);
411 					count = 0;
412 				}
413 
414 				process_all_pending_ici(currentCPU);
415 				PAUSE();
416 			}
417 			if (atomic_or((int32 *)lock, 1) == 0)
418 				break;
419 		}
420 
421 #	if DEBUG_SPINLOCKS
422 		push_lock_caller(arch_debug_get_caller(), lock);
423 #	endif
424 #endif
425 	} else {
426 #if DEBUG_SPINLOCKS
427 		int32 oldValue;
428 		oldValue = atomic_or((int32 *)lock, 1);
429 		if (oldValue != 0) {
430 			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
431 				"non-SMP system (last caller: %p, value %ld)", lock,
432 				find_lock_caller(lock), oldValue);
433 		}
434 
435 		push_lock_caller(arch_debug_get_caller(), lock);
436 #endif
437 	}
438 }
439 
440 
441 void
442 release_spinlock(spinlock *lock)
443 {
444 #if DEBUG_SPINLOCK_LATENCIES
445 	test_latency(lock);
446 #endif
447 
448 	if (sNumCPUs > 1) {
449 		if (are_interrupts_enabled())
450 			panic("release_spinlock: attempt to release lock %p with interrupts enabled\n", lock);
451 #if B_DEBUG_SPINLOCK_CONTENTION
452 		{
453 			int32 count = atomic_and(&lock->lock, 0) - 1;
454 			if (count < 0) {
455 				panic("release_spinlock: lock %p was already released\n", lock);
456 			} else {
457 				// add to the total count -- deal with carry manually
458 				if ((uint32)atomic_add(&lock->count_low, count) + count
459 						< (uint32)count) {
460 					atomic_add(&lock->count_high, 1);
461 				}
462 			}
463 		}
464 #else
465 		if (atomic_and((int32 *)lock, 0) != 1)
466 			panic("release_spinlock: lock %p was already released\n", lock);
467 #endif
468 	} else {
469 #if DEBUG_SPINLOCKS
470 		if (are_interrupts_enabled())
471 			panic("release_spinlock: attempt to release lock %p with interrupts enabled\n", lock);
472 		if (atomic_and((int32 *)lock, 0) != 1)
473 			panic("release_spinlock: lock %p was already released\n", lock);
474 #endif
475 #if DEBUG_SPINLOCK_LATENCIES
476 		test_latency(lock);
477 #endif
478 	}
479 }
480 
481 
482 /** Finds a free message and gets it.
483  *	NOTE: has side effect of disabling interrupts
484  *	return value is the former interrupt state
485  */
486 
487 static cpu_status
488 find_free_message(struct smp_msg **msg)
489 {
490 	cpu_status state;
491 
492 	TRACE(("find_free_message: entry\n"));
493 
494 retry:
495 	while (sFreeMessageCount <= 0) {
496 		state = disable_interrupts();
497 		process_all_pending_ici(smp_get_current_cpu());
498 		restore_interrupts(state);
499 		PAUSE();
500 	}
501 	state = disable_interrupts();
502 	acquire_spinlock(&sFreeMessageSpinlock);
503 
504 	if (sFreeMessageCount <= 0) {
505 		// someone grabbed one while we were getting the lock,
506 		// go back to waiting for it
507 		release_spinlock(&sFreeMessageSpinlock);
508 		restore_interrupts(state);
509 		goto retry;
510 	}
511 
512 	*msg = sFreeMessages;
513 	sFreeMessages = (*msg)->next;
514 	sFreeMessageCount--;
515 
516 	release_spinlock(&sFreeMessageSpinlock);
517 
518 	TRACE(("find_free_message: returning msg %p\n", *msg));
519 
520 	return state;
521 }
522 
523 
524 /*!	Similar to find_free_message(), but expects the interrupts to be disabled
525 	already.
526 */
527 static void
528 find_free_message_interrupts_disabled(int32 currentCPU,
529 	struct smp_msg** _message)
530 {
531 	TRACE(("find_free_message_interrupts_disabled: entry\n"));
532 
533 	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
534 	while (sFreeMessageCount <= 0) {
535 		release_spinlock(&sFreeMessageSpinlock);
536 		process_all_pending_ici(currentCPU);
537 		PAUSE();
538 		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
539 	}
540 
541 	*_message = sFreeMessages;
542 	sFreeMessages = (*_message)->next;
543 	sFreeMessageCount--;
544 
545 	release_spinlock(&sFreeMessageSpinlock);
546 
547 	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
548 		*_message));
549 }
550 
551 
552 static void
553 return_free_message(struct smp_msg *msg)
554 {
555 	TRACE(("return_free_message: returning msg %p\n", msg));
556 
557 	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
558 	msg->next = sFreeMessages;
559 	sFreeMessages = msg;
560 	sFreeMessageCount++;
561 	release_spinlock(&sFreeMessageSpinlock);
562 }
563 
564 
565 static struct smp_msg *
566 check_for_message(int currentCPU, int *source_mailbox)
567 {
568 	struct smp_msg *msg;
569 
570 	if (!sICIEnabled)
571 		return NULL;
572 
573 	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
574 	msg = sCPUMessages[currentCPU];
575 	if (msg != NULL) {
576 		sCPUMessages[currentCPU] = msg->next;
577 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
578 		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
579 		*source_mailbox = MAILBOX_LOCAL;
580 	} else {
581 		// try getting one from the broadcast mailbox
582 
583 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
584 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
585 
586 		msg = sBroadcastMessages;
587 		while (msg != NULL) {
588 			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
589 				// we have handled this one already
590 				msg = msg->next;
591 				continue;
592 			}
593 
594 			// mark it so we wont try to process this one again
595 			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
596 			*source_mailbox = MAILBOX_BCAST;
597 			break;
598 		}
599 		release_spinlock(&sBroadcastMessageSpinlock);
600 		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU, msg));
601 	}
602 	return msg;
603 }
604 
605 
606 static void
607 finish_message_processing(int currentCPU, struct smp_msg *msg, int source_mailbox)
608 {
609 	int old_refcount;
610 
611 	old_refcount = atomic_add(&msg->ref_count, -1);
612 	if (old_refcount == 1) {
613 		// we were the last one to decrement the ref_count
614 		// it's our job to remove it from the list & possibly clean it up
615 		struct smp_msg **mbox = NULL;
616 		spinlock *spinlock = NULL;
617 
618 		// clean up the message from one of the mailboxes
619 		switch (source_mailbox) {
620 			case MAILBOX_BCAST:
621 				mbox = &sBroadcastMessages;
622 				spinlock = &sBroadcastMessageSpinlock;
623 				break;
624 			case MAILBOX_LOCAL:
625 				mbox = &sCPUMessages[currentCPU];
626 				spinlock = &sCPUMessageSpinlock[currentCPU];
627 				break;
628 		}
629 
630 		acquire_spinlock_nocheck(spinlock);
631 
632 		TRACE(("cleaning up message %p\n", msg));
633 
634 		if (source_mailbox != MAILBOX_BCAST) {
635 			// local mailbox -- the message has already been removed in
636 			// check_for_message()
637 		} else if (msg == *mbox) {
638 			(*mbox) = msg->next;
639 		} else {
640 			// we need to walk to find the message in the list.
641 			// we can't use any data found when previously walking through
642 			// the list, since the list may have changed. But, we are guaranteed
643 			// to at least have msg in it.
644 			struct smp_msg *last = NULL;
645 			struct smp_msg *msg1;
646 
647 			msg1 = *mbox;
648 			while (msg1 != NULL && msg1 != msg) {
649 				last = msg1;
650 				msg1 = msg1->next;
651 			}
652 
653 			// by definition, last must be something
654 			if (msg1 == msg && last != NULL)
655 				last->next = msg->next;
656 			else
657 				panic("last == NULL or msg != msg1");
658 		}
659 
660 		release_spinlock(spinlock);
661 
662 		if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
663 			free(msg->data_ptr);
664 
665 		if (msg->flags & SMP_MSG_FLAG_SYNC) {
666 			msg->done = true;
667 			// the caller cpu should now free the message
668 		} else {
669 			// in the !SYNC case, we get to free the message
670 			return_free_message(msg);
671 		}
672 	}
673 }
674 
675 
676 static int32
677 process_pending_ici(int32 currentCPU)
678 {
679 	struct smp_msg *msg;
680 	bool haltCPU = false;
681 	int sourceMailbox = 0;
682 	int retval = B_HANDLED_INTERRUPT;
683 
684 	msg = check_for_message(currentCPU, &sourceMailbox);
685 	if (msg == NULL)
686 		return B_ENTRY_NOT_FOUND;
687 
688 	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
689 
690 	switch (msg->message) {
691 		case SMP_MSG_INVALIDATE_PAGE_RANGE:
692 			arch_cpu_invalidate_TLB_range((addr_t)msg->data, (addr_t)msg->data2);
693 			break;
694 		case SMP_MSG_INVALIDATE_PAGE_LIST:
695 			arch_cpu_invalidate_TLB_list((addr_t *)msg->data, (int)msg->data2);
696 			break;
697 		case SMP_MSG_USER_INVALIDATE_PAGES:
698 			arch_cpu_user_TLB_invalidate();
699 			break;
700 		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
701 			arch_cpu_global_TLB_invalidate();
702 			break;
703 		case SMP_MSG_CPU_HALT:
704 			haltCPU = true;
705 			break;
706 		case SMP_MSG_CALL_FUNCTION:
707 		{
708 			smp_call_func func = (smp_call_func)msg->data_ptr;
709 			func(msg->data, currentCPU, msg->data2, msg->data3);
710 			break;
711 		}
712 		case SMP_MSG_RESCHEDULE:
713 		{
714 			cpu_ent* cpu = thread_get_current_thread()->cpu;
715 			cpu->invoke_scheduler = true;
716 			cpu->invoke_scheduler_if_idle = false;
717 			break;
718 		}
719 		case SMP_MSG_RESCHEDULE_IF_IDLE:
720 		{
721 			cpu_ent* cpu = thread_get_current_thread()->cpu;
722 			if (!cpu->invoke_scheduler) {
723 				cpu->invoke_scheduler = true;
724 				cpu->invoke_scheduler_if_idle = true;
725 			}
726 			break;
727 		}
728 		default:
729 			dprintf("smp_intercpu_int_handler: got unknown message %ld\n", msg->message);
730 	}
731 
732 	// finish dealing with this message, possibly removing it from the list
733 	finish_message_processing(currentCPU, msg, sourceMailbox);
734 
735 	// special case for the halt message
736 	if (haltCPU)
737 		debug_trap_cpu_in_kdl(currentCPU, false);
738 
739 	return retval;
740 }
741 
742 
743 #if B_DEBUG_SPINLOCK_CONTENTION
744 
745 static uint64
746 get_spinlock_counter(spinlock* lock)
747 {
748 	uint32 high;
749 	uint32 low;
750 	do {
751 		high = (uint32)atomic_get(&lock->count_high);
752 		low = (uint32)atomic_get(&lock->count_low);
753 	} while (high != atomic_get(&lock->count_high));
754 
755 	return ((uint64)high << 32) | low;
756 }
757 
758 
759 static status_t
760 spinlock_contention_syscall(const char* subsystem, uint32 function,
761 	void* buffer, size_t bufferSize)
762 {
763 	spinlock_contention_info info;
764 
765 	if (function != GET_SPINLOCK_CONTENTION_INFO)
766 		return B_BAD_VALUE;
767 
768 	if (bufferSize < sizeof(spinlock_contention_info))
769 		return B_BAD_VALUE;
770 
771 	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
772 	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
773 
774 	if (!IS_USER_ADDRESS(buffer)
775 		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
776 		return B_BAD_ADDRESS;
777 	}
778 
779 	return B_OK;
780 }
781 
782 #endif	// B_DEBUG_SPINLOCK_CONTENTION
783 
784 
785 static void
786 process_early_cpu_call(int32 cpu)
787 {
788 	sEarlyCPUCallFunction(sEarlyCPUCallCookie, cpu);
789 	atomic_and(&sEarlyCPUCall, ~(uint32)(1 << cpu));
790 }
791 
792 
793 static void
794 call_all_cpus_early(void (*function)(void*, int), void* cookie)
795 {
796 	if (sNumCPUs > 1) {
797 		sEarlyCPUCallFunction = function;
798 		sEarlyCPUCallCookie = cookie;
799 
800 		uint32 cpuMask = (1 << sNumCPUs) - 2;
801 			// all CPUs but the boot cpu
802 
803 		sEarlyCPUCall = cpuMask;
804 
805 		// wait for all CPUs to finish
806 		while ((sEarlyCPUCall & cpuMask) != 0)
807 			PAUSE();
808 	}
809 
810 	function(cookie, 0);
811 }
812 
813 
814 //	#pragma mark -
815 
816 
817 int
818 smp_intercpu_int_handler(int32 cpu)
819 {
820 	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
821 
822 	process_all_pending_ici(cpu);
823 
824 	TRACE(("smp_intercpu_int_handler: done\n"));
825 
826 	return B_HANDLED_INTERRUPT;
827 }
828 
829 
830 void
831 smp_send_ici(int32 targetCPU, int32 message, uint32 data, uint32 data2, uint32 data3,
832 	void *data_ptr, uint32 flags)
833 {
834 	struct smp_msg *msg;
835 
836 	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
837 		targetCPU, message, data, data2, data3, data_ptr, flags));
838 
839 	if (sICIEnabled) {
840 		int state;
841 		int currentCPU;
842 
843 		// find_free_message leaves interrupts disabled
844 		state = find_free_message(&msg);
845 
846 		currentCPU = smp_get_current_cpu();
847 		if (targetCPU == currentCPU) {
848 			return_free_message(msg);
849 			restore_interrupts(state);
850 			return; // nope, cant do that
851 		}
852 
853 		// set up the message
854 		msg->message = message;
855 		msg->data = data;
856 		msg->data2 = data2;
857 		msg->data3 = data3;
858 		msg->data_ptr = data_ptr;
859 		msg->ref_count = 1;
860 		msg->flags = flags;
861 		msg->done = false;
862 
863 		// stick it in the appropriate cpu's mailbox
864 		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
865 		msg->next = sCPUMessages[targetCPU];
866 		sCPUMessages[targetCPU] = msg;
867 		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
868 
869 		arch_smp_send_ici(targetCPU);
870 
871 		if (flags & SMP_MSG_FLAG_SYNC) {
872 			// wait for the other cpu to finish processing it
873 			// the interrupt handler will ref count it to <0
874 			// if the message is sync after it has removed it from the mailbox
875 			while (msg->done == false) {
876 				process_all_pending_ici(currentCPU);
877 				PAUSE();
878 			}
879 			// for SYNC messages, it's our responsibility to put it
880 			// back into the free list
881 			return_free_message(msg);
882 		}
883 
884 		restore_interrupts(state);
885 	}
886 }
887 
888 
889 void
890 smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, uint32 data,
891 	uint32 data2, uint32 data3, void *data_ptr, uint32 flags)
892 {
893 	if (!sICIEnabled)
894 		return;
895 
896 	int currentCPU = smp_get_current_cpu();
897 	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
898 		& (((cpu_mask_t)1 << sNumCPUs) - 1);
899 	if (cpuMask == 0) {
900 		panic("smp_send_multicast_ici(): 0 CPU mask");
901 		return;
902 	}
903 
904 	// count target CPUs
905 	int32 targetCPUs = 0;
906 	for (int32 i = 0; i < sNumCPUs; i++) {
907 		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
908 			targetCPUs++;
909 	}
910 
911 	// find_free_message leaves interrupts disabled
912 	struct smp_msg *msg;
913 	int state = find_free_message(&msg);
914 
915 	msg->message = message;
916 	msg->data = data;
917 	msg->data2 = data2;
918 	msg->data3 = data3;
919 	msg->data_ptr = data_ptr;
920 	msg->ref_count = targetCPUs;
921 	msg->flags = flags;
922 	msg->proc_bitmap = ~cpuMask;
923 	msg->done = false;
924 
925 	// stick it in the broadcast mailbox
926 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
927 	msg->next = sBroadcastMessages;
928 	sBroadcastMessages = msg;
929 	release_spinlock(&sBroadcastMessageSpinlock);
930 
931 	arch_smp_send_broadcast_ici();
932 		// TODO: Introduce a call that only bothers the target CPUs!
933 
934 	if (flags & SMP_MSG_FLAG_SYNC) {
935 		// wait for the other cpus to finish processing it
936 		// the interrupt handler will ref count it to <0
937 		// if the message is sync after it has removed it from the mailbox
938 		while (msg->done == false) {
939 			process_all_pending_ici(currentCPU);
940 			PAUSE();
941 		}
942 
943 		// for SYNC messages, it's our responsibility to put it
944 		// back into the free list
945 		return_free_message(msg);
946 	}
947 
948 	restore_interrupts(state);
949 }
950 
951 
952 void
953 smp_send_broadcast_ici(int32 message, uint32 data, uint32 data2, uint32 data3,
954 	void *data_ptr, uint32 flags)
955 {
956 	struct smp_msg *msg;
957 
958 	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
959 		smp_get_current_cpu(), message, data, data2, data3, data_ptr, flags));
960 
961 	if (sICIEnabled) {
962 		int state;
963 		int currentCPU;
964 
965 		// find_free_message leaves interrupts disabled
966 		state = find_free_message(&msg);
967 
968 		currentCPU = smp_get_current_cpu();
969 
970 		msg->message = message;
971 		msg->data = data;
972 		msg->data2 = data2;
973 		msg->data3 = data3;
974 		msg->data_ptr = data_ptr;
975 		msg->ref_count = sNumCPUs - 1;
976 		msg->flags = flags;
977 		msg->proc_bitmap = SET_BIT(0, currentCPU);
978 		msg->done = false;
979 
980 		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast mbox\n",
981 			currentCPU, msg));
982 
983 		// stick it in the appropriate cpu's mailbox
984 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
985 		msg->next = sBroadcastMessages;
986 		sBroadcastMessages = msg;
987 		release_spinlock(&sBroadcastMessageSpinlock);
988 
989 		arch_smp_send_broadcast_ici();
990 
991 		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
992 
993 		if (flags & SMP_MSG_FLAG_SYNC) {
994 			// wait for the other cpus to finish processing it
995 			// the interrupt handler will ref count it to <0
996 			// if the message is sync after it has removed it from the mailbox
997 			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
998 
999 			while (msg->done == false) {
1000 				process_all_pending_ici(currentCPU);
1001 				PAUSE();
1002 			}
1003 
1004 			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
1005 
1006 			// for SYNC messages, it's our responsibility to put it
1007 			// back into the free list
1008 			return_free_message(msg);
1009 		}
1010 
1011 		restore_interrupts(state);
1012 	}
1013 
1014 	TRACE(("smp_send_broadcast_ici: done\n"));
1015 }
1016 
1017 
1018 void
1019 smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
1020 	uint32 data, uint32 data2, uint32 data3, void *data_ptr, uint32 flags)
1021 {
1022 	if (!sICIEnabled)
1023 		return;
1024 
1025 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
1026 		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
1027 		currentCPU, message, data, data2, data3, data_ptr, flags));
1028 
1029 	struct smp_msg *msg;
1030 	find_free_message_interrupts_disabled(currentCPU, &msg);
1031 
1032 	msg->message = message;
1033 	msg->data = data;
1034 	msg->data2 = data2;
1035 	msg->data3 = data3;
1036 	msg->data_ptr = data_ptr;
1037 	msg->ref_count = sNumCPUs - 1;
1038 	msg->flags = flags;
1039 	msg->proc_bitmap = SET_BIT(0, currentCPU);
1040 	msg->done = false;
1041 
1042 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1043 		"into broadcast mbox\n", currentCPU, msg));
1044 
1045 	// stick it in the appropriate cpu's mailbox
1046 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1047 	msg->next = sBroadcastMessages;
1048 	sBroadcastMessages = msg;
1049 	release_spinlock(&sBroadcastMessageSpinlock);
1050 
1051 	arch_smp_send_broadcast_ici();
1052 
1053 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1054 		currentCPU));
1055 
1056 	if (flags & SMP_MSG_FLAG_SYNC) {
1057 		// wait for the other cpus to finish processing it
1058 		// the interrupt handler will ref count it to <0
1059 		// if the message is sync after it has removed it from the mailbox
1060 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1061 			"ack\n", currentCPU));
1062 
1063 		while (msg->done == false) {
1064 			process_all_pending_ici(currentCPU);
1065 			PAUSE();
1066 		}
1067 
1068 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1069 			"message to free list\n", currentCPU));
1070 
1071 		// for SYNC messages, it's our responsibility to put it
1072 		// back into the free list
1073 		return_free_message(msg);
1074 	}
1075 
1076 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1077 }
1078 
1079 
1080 /*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1081 
1082 	\param cpu The index of the calling CPU.
1083 	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1084 		does not return before all other CPUs have started waiting.
1085 	\return \c true on the boot CPU, \c false otherwise.
1086 */
1087 bool
1088 smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1089 {
1090 	if (cpu == 0) {
1091 		smp_cpu_rendezvous(rendezVous, cpu);
1092 		return true;
1093 	}
1094 
1095 	smp_cpu_rendezvous(rendezVous, cpu);
1096 
1097 	while (sBootCPUSpin == 0) {
1098 		if ((sEarlyCPUCall & (1 << cpu)) != 0)
1099 			process_early_cpu_call(cpu);
1100 
1101 		PAUSE();
1102 	}
1103 
1104 	return false;
1105 }
1106 
1107 
1108 void
1109 smp_wake_up_non_boot_cpus()
1110 {
1111 	// ICIs were previously being ignored
1112 	if (sNumCPUs > 1)
1113 		sICIEnabled = true;
1114 
1115 	// resume non boot CPUs
1116 	sBootCPUSpin = 1;
1117 }
1118 
1119 
1120 /*!	Spin until all CPUs have reached the rendez-vous point.
1121 
1122 	The rendez-vous variable \c *var must have been initialized to 0 before the
1123 	function is called. The variable will be non-null when the function returns.
1124 
1125 	Note that when the function returns on one CPU, it only means that all CPU
1126 	have already entered the function. It does not mean that the variable can
1127 	already be reset. Only when all CPUs have returned (which would have to be
1128 	ensured via another rendez-vous) the variable can be reset.
1129 */
1130 void
1131 smp_cpu_rendezvous(volatile uint32 *var, int current_cpu)
1132 {
1133 	atomic_or((vint32*)var, 1 << current_cpu);
1134 
1135 	while (*var != (((uint32)1 << sNumCPUs) - 1))
1136 		PAUSE();
1137 }
1138 
1139 
1140 status_t
1141 smp_init(kernel_args *args)
1142 {
1143 	TRACE(("smp_init: entry\n"));
1144 
1145 #if DEBUG_SPINLOCK_LATENCIES
1146 	sEnableLatencyCheck
1147 		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1148 #endif
1149 
1150 #if DEBUG_SPINLOCKS
1151 	add_debugger_command_etc("spinlock", &dump_spinlock,
1152 		"Dump info on a spinlock",
1153 		"\n"
1154 		"Dumps info on a spinlock.\n", 0);
1155 #endif
1156 	add_debugger_command_etc("ici", &dump_ici_messages,
1157 		"Dump info on pending ICI messages",
1158 		"\n"
1159 		"Dumps info on pending ICI messages.\n", 0);
1160 	add_debugger_command_etc("ici_message", &dump_ici_message,
1161 		"Dump info on an ICI message",
1162 		"\n"
1163 		"Dumps info on an ICI message.\n", 0);
1164 
1165 	if (args->num_cpus > 1) {
1166 		sFreeMessages = NULL;
1167 		sFreeMessageCount = 0;
1168 		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1169 			struct smp_msg *msg
1170 				= (struct smp_msg *)malloc(sizeof(struct smp_msg));
1171 			if (msg == NULL) {
1172 				panic("error creating smp mailboxes\n");
1173 				return B_ERROR;
1174 			}
1175 			memset(msg, 0, sizeof(struct smp_msg));
1176 			msg->next = sFreeMessages;
1177 			sFreeMessages = msg;
1178 			sFreeMessageCount++;
1179 		}
1180 		sNumCPUs = args->num_cpus;
1181 	}
1182 	TRACE(("smp_init: calling arch_smp_init\n"));
1183 
1184 	return arch_smp_init(args);
1185 }
1186 
1187 
1188 status_t
1189 smp_per_cpu_init(kernel_args *args, int32 cpu)
1190 {
1191 	return arch_smp_per_cpu_init(args, cpu);
1192 }
1193 
1194 
1195 status_t
1196 smp_init_post_generic_syscalls(void)
1197 {
1198 #if B_DEBUG_SPINLOCK_CONTENTION
1199 	return register_generic_syscall(SPINLOCK_CONTENTION,
1200 		&spinlock_contention_syscall, 0, 0);
1201 #else
1202 	return B_OK;
1203 #endif
1204 }
1205 
1206 
1207 void
1208 smp_set_num_cpus(int32 numCPUs)
1209 {
1210 	sNumCPUs = numCPUs;
1211 }
1212 
1213 
1214 int32
1215 smp_get_num_cpus()
1216 {
1217 	return sNumCPUs;
1218 }
1219 
1220 
1221 int32
1222 smp_get_current_cpu(void)
1223 {
1224 	return thread_get_current_thread()->cpu->cpu_num;
1225 }
1226 
1227 
1228 //	#pragma mark -
1229 //	public exported functions
1230 
1231 
1232 void
1233 call_all_cpus(void (*func)(void *, int), void *cookie)
1234 {
1235 	// if inter-CPU communication is not yet enabled, use the early mechanism
1236 	if (!sICIEnabled) {
1237 		call_all_cpus_early(func, cookie);
1238 		return;
1239 	}
1240 
1241 	cpu_status state = disable_interrupts();
1242 
1243 	if (smp_get_num_cpus() > 1) {
1244 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1245 			0, 0, (void *)func, SMP_MSG_FLAG_ASYNC);
1246 	}
1247 
1248 	// we need to call this function ourselves as well
1249 	func(cookie, smp_get_current_cpu());
1250 
1251 	restore_interrupts(state);
1252 }
1253 
1254 void
1255 call_all_cpus_sync(void (*func)(void *, int), void *cookie)
1256 {
1257 	// if inter-CPU communication is not yet enabled, use the early mechanism
1258 	if (!sICIEnabled) {
1259 		call_all_cpus_early(func, cookie);
1260 		return;
1261 	}
1262 
1263 	cpu_status state = disable_interrupts();
1264 
1265 	if (smp_get_num_cpus() > 1) {
1266 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (uint32)cookie,
1267 			0, 0, (void *)func, SMP_MSG_FLAG_SYNC);
1268 	}
1269 
1270 	// we need to call this function ourselves as well
1271 	func(cookie, smp_get_current_cpu());
1272 
1273 	restore_interrupts(state);
1274 }
1275 
1276 
1277 void
1278 memory_read_barrier(void)
1279 {
1280 	arch_cpu_memory_read_barrier();
1281 }
1282 
1283 
1284 void
1285 memory_write_barrier(void)
1286 {
1287 	arch_cpu_memory_write_barrier();
1288 }
1289 
1290 
1291 #pragma weak acquire_spinlock=_acquire_spinlock
1292