xref: /haiku/src/system/kernel/smp.cpp (revision 82b4b371721bb24f2697b5c17360380cf11aef9c)
1 /*
2  * Copyright 2008-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Functionality for symetrical multi-processors */
12 
13 
14 #include <smp.h>
15 
16 #include <stdlib.h>
17 #include <string.h>
18 
19 #include <arch/cpu.h>
20 #include <arch/debug.h>
21 #include <arch/int.h>
22 #include <arch/smp.h>
23 #include <boot/kernel_args.h>
24 #include <cpu.h>
25 #include <generic_syscall.h>
26 #include <int.h>
27 #include <spinlock_contention.h>
28 #include <thread.h>
29 #if DEBUG_SPINLOCK_LATENCIES
30 #	include <safemode.h>
31 #endif
32 
33 #include "kernel_debug_config.h"
34 
35 
36 //#define TRACE_SMP
37 #ifdef TRACE_SMP
38 #	define TRACE(x) dprintf x
39 #else
40 #	define TRACE(x) ;
41 #endif
42 
43 
44 #undef try_acquire_spinlock
45 #undef acquire_spinlock
46 #undef release_spinlock
47 
48 
49 #define MSG_POOL_SIZE (SMP_MAX_CPUS * 4)
50 
51 // These macros define the number of unsuccessful iterations in
52 // acquire_spinlock() and acquire_spinlock_nocheck() after which the functions
53 // panic(), assuming a deadlock.
54 #define SPINLOCK_DEADLOCK_COUNT				100000000
55 #define SPINLOCK_DEADLOCK_COUNT_NO_CHECK	2000000000
56 
57 
58 struct smp_msg {
59 	struct smp_msg	*next;
60 	int32			message;
61 	addr_t			data;
62 	addr_t			data2;
63 	addr_t			data3;
64 	void			*data_ptr;
65 	uint32			flags;
66 	int32			ref_count;
67 	volatile bool	done;
68 	uint32			proc_bitmap;
69 };
70 
71 enum mailbox_source {
72 	MAILBOX_LOCAL,
73 	MAILBOX_BCAST,
74 };
75 
76 static vint32 sBootCPUSpin = 0;
77 
78 static vint32 sEarlyCPUCall = 0;
79 static void (*sEarlyCPUCallFunction)(void*, int);
80 void* sEarlyCPUCallCookie;
81 
82 static struct smp_msg* sFreeMessages = NULL;
83 static volatile int sFreeMessageCount = 0;
84 static spinlock sFreeMessageSpinlock = B_SPINLOCK_INITIALIZER;
85 
86 static struct smp_msg* sCPUMessages[SMP_MAX_CPUS] = { NULL, };
87 static spinlock sCPUMessageSpinlock[SMP_MAX_CPUS];
88 
89 static struct smp_msg* sBroadcastMessages = NULL;
90 static spinlock sBroadcastMessageSpinlock = B_SPINLOCK_INITIALIZER;
91 
92 static bool sICIEnabled = false;
93 static int32 sNumCPUs = 1;
94 
95 static int32 process_pending_ici(int32 currentCPU);
96 
97 
98 #if DEBUG_SPINLOCKS
99 #define NUM_LAST_CALLERS	32
100 
101 static struct {
102 	void		*caller;
103 	spinlock	*lock;
104 } sLastCaller[NUM_LAST_CALLERS];
105 
106 static vint32 sLastIndex = 0;
107 	// Is incremented atomically. Must be % NUM_LAST_CALLERS before being used
108 	// as index into sLastCaller. Note, that it has to be casted to uint32
109 	// before applying the modulo operation, since otherwise after overflowing
110 	// that would yield negative indices.
111 
112 
113 static void
114 push_lock_caller(void* caller, spinlock* lock)
115 {
116 	int32 index = (uint32)atomic_add(&sLastIndex, 1) % NUM_LAST_CALLERS;
117 
118 	sLastCaller[index].caller = caller;
119 	sLastCaller[index].lock = lock;
120 }
121 
122 
123 static void*
124 find_lock_caller(spinlock* lock)
125 {
126 	int32 lastIndex = (uint32)sLastIndex % NUM_LAST_CALLERS;
127 
128 	for (int32 i = 0; i < NUM_LAST_CALLERS; i++) {
129 		int32 index = (NUM_LAST_CALLERS + lastIndex - 1 - i) % NUM_LAST_CALLERS;
130 		if (sLastCaller[index].lock == lock)
131 			return sLastCaller[index].caller;
132 	}
133 
134 	return NULL;
135 }
136 
137 
138 int
139 dump_spinlock(int argc, char** argv)
140 {
141 	if (argc != 2) {
142 		print_debugger_command_usage(argv[0]);
143 		return 0;
144 	}
145 
146 	uint64 address;
147 	if (!evaluate_debug_expression(argv[1], &address, false))
148 		return 0;
149 
150 	spinlock* lock = (spinlock*)(addr_t)address;
151 	kprintf("spinlock %p:\n", lock);
152 	bool locked = B_SPINLOCK_IS_LOCKED(lock);
153 	if (locked) {
154 		kprintf("  locked from %p\n", find_lock_caller(lock));
155 	} else
156 		kprintf("  not locked\n");
157 
158 	return 0;
159 }
160 
161 
162 #endif	// DEBUG_SPINLOCKS
163 
164 
165 #if DEBUG_SPINLOCK_LATENCIES
166 
167 
168 #define NUM_LATENCY_LOCKS	4
169 #define DEBUG_LATENCY		200
170 
171 
172 static struct {
173 	spinlock	*lock;
174 	bigtime_t	timestamp;
175 } sLatency[B_MAX_CPU_COUNT][NUM_LATENCY_LOCKS];
176 
177 static int32 sLatencyIndex[B_MAX_CPU_COUNT];
178 static bool sEnableLatencyCheck;
179 
180 
181 static void
182 push_latency(spinlock* lock)
183 {
184 	if (!sEnableLatencyCheck)
185 		return;
186 
187 	int32 cpu = smp_get_current_cpu();
188 	int32 index = (++sLatencyIndex[cpu]) % NUM_LATENCY_LOCKS;
189 
190 	sLatency[cpu][index].lock = lock;
191 	sLatency[cpu][index].timestamp = system_time();
192 }
193 
194 
195 static void
196 test_latency(spinlock* lock)
197 {
198 	if (!sEnableLatencyCheck)
199 		return;
200 
201 	int32 cpu = smp_get_current_cpu();
202 
203 	for (int32 i = 0; i < NUM_LATENCY_LOCKS; i++) {
204 		if (sLatency[cpu][i].lock == lock) {
205 			bigtime_t diff = system_time() - sLatency[cpu][i].timestamp;
206 			if (diff > DEBUG_LATENCY && diff < 500000) {
207 				panic("spinlock %p were held for %lld usecs (%d allowed)\n",
208 					lock, diff, DEBUG_LATENCY);
209 			}
210 
211 			sLatency[cpu][i].lock = NULL;
212 		}
213 	}
214 }
215 
216 
217 #endif	// DEBUG_SPINLOCK_LATENCIES
218 
219 
220 int
221 dump_ici_messages(int argc, char** argv)
222 {
223 	// count broadcast messages
224 	int32 count = 0;
225 	int32 doneCount = 0;
226 	int32 unreferencedCount = 0;
227 	smp_msg* message = sBroadcastMessages;
228 	while (message != NULL) {
229 		count++;
230 		if (message->done)
231 			doneCount++;
232 		if (message->ref_count <= 0)
233 			unreferencedCount++;
234 		message = message->next;
235 	}
236 
237 	kprintf("ICI broadcast messages: %" B_PRId32 ", first: %p\n", count,
238 		sBroadcastMessages);
239 	kprintf("  done:         %" B_PRId32 "\n", doneCount);
240 	kprintf("  unreferenced: %" B_PRId32 "\n", unreferencedCount);
241 
242 	// count per-CPU messages
243 	for (int32 i = 0; i < sNumCPUs; i++) {
244 		count = 0;
245 		message = sCPUMessages[i];
246 		while (message != NULL) {
247 			count++;
248 			message = message->next;
249 		}
250 
251 		kprintf("CPU %" B_PRId32 " messages: %" B_PRId32 ", first: %p\n", i,
252 			count, sCPUMessages[i]);
253 	}
254 
255 	return 0;
256 }
257 
258 
259 int
260 dump_ici_message(int argc, char** argv)
261 {
262 	if (argc != 2) {
263 		print_debugger_command_usage(argv[0]);
264 		return 0;
265 	}
266 
267 	uint64 address;
268 	if (!evaluate_debug_expression(argv[1], &address, false))
269 		return 0;
270 
271 	smp_msg* message = (smp_msg*)(addr_t)address;
272 	kprintf("ICI message %p:\n", message);
273 	kprintf("  next:        %p\n", message->next);
274 	kprintf("  message:     %" B_PRId32 "\n", message->message);
275 	kprintf("  data:        0x%lx\n", message->data);
276 	kprintf("  data2:       0x%lx\n", message->data2);
277 	kprintf("  data3:       0x%lx\n", message->data3);
278 	kprintf("  data_ptr:    %p\n", message->data_ptr);
279 	kprintf("  flags:       %" B_PRIx32 "\n", message->flags);
280 	kprintf("  ref_count:   %" B_PRIx32 "\n", message->ref_count);
281 	kprintf("  done:        %s\n", message->done ? "true" : "false");
282 	kprintf("  proc_bitmap: %" B_PRIx32 "\n", message->proc_bitmap);
283 
284 	return 0;
285 }
286 
287 
288 static inline void
289 process_all_pending_ici(int32 currentCPU)
290 {
291 	while (process_pending_ici(currentCPU) != B_ENTRY_NOT_FOUND)
292 		;
293 }
294 
295 
296 bool
297 try_acquire_spinlock(spinlock* lock)
298 {
299 #if DEBUG_SPINLOCKS
300 	if (are_interrupts_enabled()) {
301 		panic("try_acquire_spinlock: attempt to acquire lock %p with "
302 			"interrupts enabled", lock);
303 	}
304 #endif
305 
306 #if B_DEBUG_SPINLOCK_CONTENTION
307 	if (atomic_add(&lock->lock, 1) != 0)
308 		return false;
309 #else
310 	if (atomic_or((int32*)lock, 1) != 0)
311 		return false;
312 
313 #	if DEBUG_SPINLOCKS
314 	push_lock_caller(arch_debug_get_caller(), lock);
315 #	endif
316 #endif
317 
318 	return true;
319 }
320 
321 
322 void
323 acquire_spinlock(spinlock* lock)
324 {
325 #if DEBUG_SPINLOCKS
326 	if (are_interrupts_enabled()) {
327 		panic("acquire_spinlock: attempt to acquire lock %p with interrupts "
328 			"enabled", lock);
329 	}
330 #endif
331 
332 	if (sNumCPUs > 1) {
333 		int currentCPU = smp_get_current_cpu();
334 #if B_DEBUG_SPINLOCK_CONTENTION
335 		while (atomic_add(&lock->lock, 1) != 0)
336 			process_all_pending_ici(currentCPU);
337 #else
338 		while (1) {
339 			uint32 count = 0;
340 			while (*lock != 0) {
341 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
342 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
343 						"for a long time!", lock);
344 					count = 0;
345 				}
346 
347 				process_all_pending_ici(currentCPU);
348 				PAUSE();
349 			}
350 			if (atomic_or((int32*)lock, 1) == 0)
351 				break;
352 		}
353 
354 #	if DEBUG_SPINLOCKS
355 		push_lock_caller(arch_debug_get_caller(), lock);
356 #	endif
357 #endif
358 	} else {
359 #if DEBUG_SPINLOCKS
360 		int32 oldValue;
361 		oldValue = atomic_or((int32*)lock, 1);
362 		if (oldValue != 0) {
363 			panic("acquire_spinlock: attempt to acquire lock %p twice on "
364 				"non-SMP system (last caller: %p, value %" B_PRId32 ")", lock,
365 				find_lock_caller(lock), oldValue);
366 		}
367 
368 		push_lock_caller(arch_debug_get_caller(), lock);
369 #endif
370 	}
371 #if DEBUG_SPINLOCK_LATENCIES
372 	push_latency(lock);
373 #endif
374 }
375 
376 
377 static void
378 acquire_spinlock_nocheck(spinlock *lock)
379 {
380 #if DEBUG_SPINLOCKS
381 	if (are_interrupts_enabled()) {
382 		panic("acquire_spinlock_nocheck: attempt to acquire lock %p with "
383 			"interrupts enabled", lock);
384 	}
385 #endif
386 
387 	if (sNumCPUs > 1) {
388 #if B_DEBUG_SPINLOCK_CONTENTION
389 		while (atomic_add(&lock->lock, 1) != 0) {
390 		}
391 #else
392 		while (1) {
393 			uint32 count = 0;
394 			while (*lock != 0) {
395 				if (++count == SPINLOCK_DEADLOCK_COUNT_NO_CHECK) {
396 					panic("acquire_spinlock(): Failed to acquire spinlock %p "
397 						"for a long time!", lock);
398 					count = 0;
399 				}
400 
401 				PAUSE();
402 			}
403 
404 			if (atomic_or((int32*)lock, 1) == 0)
405 				break;
406 		}
407 #endif
408 	} else {
409 #if DEBUG_SPINLOCKS
410 		if (atomic_or((int32*)lock, 1) != 0) {
411 			panic("acquire_spinlock_nocheck: attempt to acquire lock %p twice "
412 				"on non-SMP system\n", lock);
413 		}
414 #endif
415 	}
416 }
417 
418 
419 /*!	Equivalent to acquire_spinlock(), save for currentCPU parameter. */
420 static void
421 acquire_spinlock_cpu(int32 currentCPU, spinlock *lock)
422 {
423 #if DEBUG_SPINLOCKS
424 	if (are_interrupts_enabled()) {
425 		panic("acquire_spinlock_cpu: attempt to acquire lock %p with "
426 			"interrupts enabled", lock);
427 	}
428 #endif
429 
430 	if (sNumCPUs > 1) {
431 #if B_DEBUG_SPINLOCK_CONTENTION
432 		while (atomic_add(&lock->lock, 1) != 0)
433 			process_all_pending_ici(currentCPU);
434 #else
435 		while (1) {
436 			uint32 count = 0;
437 			while (*lock != 0) {
438 				if (++count == SPINLOCK_DEADLOCK_COUNT) {
439 					panic("acquire_spinlock_cpu(): Failed to acquire spinlock "
440 						"%p for a long time!", lock);
441 					count = 0;
442 				}
443 
444 				process_all_pending_ici(currentCPU);
445 				PAUSE();
446 			}
447 			if (atomic_or((int32*)lock, 1) == 0)
448 				break;
449 		}
450 
451 #	if DEBUG_SPINLOCKS
452 		push_lock_caller(arch_debug_get_caller(), lock);
453 #	endif
454 #endif
455 	} else {
456 #if DEBUG_SPINLOCKS
457 		int32 oldValue;
458 		oldValue = atomic_or((int32*)lock, 1);
459 		if (oldValue != 0) {
460 			panic("acquire_spinlock_cpu(): attempt to acquire lock %p twice on "
461 				"non-SMP system (last caller: %p, value %" B_PRId32 ")", lock,
462 				find_lock_caller(lock), oldValue);
463 		}
464 
465 		push_lock_caller(arch_debug_get_caller(), lock);
466 #endif
467 	}
468 }
469 
470 
471 void
472 release_spinlock(spinlock *lock)
473 {
474 #if DEBUG_SPINLOCK_LATENCIES
475 	test_latency(lock);
476 #endif
477 
478 	if (sNumCPUs > 1) {
479 		if (are_interrupts_enabled())
480 			panic("release_spinlock: attempt to release lock %p with "
481 				"interrupts enabled\n", lock);
482 #if B_DEBUG_SPINLOCK_CONTENTION
483 		{
484 			int32 count = atomic_and(&lock->lock, 0) - 1;
485 			if (count < 0) {
486 				panic("release_spinlock: lock %p was already released\n", lock);
487 			} else {
488 				// add to the total count -- deal with carry manually
489 				if ((uint32)atomic_add(&lock->count_low, count) + count
490 						< (uint32)count) {
491 					atomic_add(&lock->count_high, 1);
492 				}
493 			}
494 		}
495 #else
496 		if (atomic_and((int32*)lock, 0) != 1)
497 			panic("release_spinlock: lock %p was already released\n", lock);
498 #endif
499 	} else {
500 #if DEBUG_SPINLOCKS
501 		if (are_interrupts_enabled()) {
502 			panic("release_spinlock: attempt to release lock %p with "
503 				"interrupts enabled\n", lock);
504 		}
505 		if (atomic_and((int32*)lock, 0) != 1)
506 			panic("release_spinlock: lock %p was already released\n", lock);
507 #endif
508 #if DEBUG_SPINLOCK_LATENCIES
509 		test_latency(lock);
510 #endif
511 	}
512 }
513 
514 
515 /*!	Finds a free message and gets it.
516 	NOTE: has side effect of disabling interrupts
517 	return value is the former interrupt state
518 */
519 static cpu_status
520 find_free_message(struct smp_msg** msg)
521 {
522 	cpu_status state;
523 
524 	TRACE(("find_free_message: entry\n"));
525 
526 retry:
527 	while (sFreeMessageCount <= 0) {
528 		state = disable_interrupts();
529 		process_all_pending_ici(smp_get_current_cpu());
530 		restore_interrupts(state);
531 		PAUSE();
532 	}
533 	state = disable_interrupts();
534 	acquire_spinlock(&sFreeMessageSpinlock);
535 
536 	if (sFreeMessageCount <= 0) {
537 		// someone grabbed one while we were getting the lock,
538 		// go back to waiting for it
539 		release_spinlock(&sFreeMessageSpinlock);
540 		restore_interrupts(state);
541 		goto retry;
542 	}
543 
544 	*msg = sFreeMessages;
545 	sFreeMessages = (*msg)->next;
546 	sFreeMessageCount--;
547 
548 	release_spinlock(&sFreeMessageSpinlock);
549 
550 	TRACE(("find_free_message: returning msg %p\n", *msg));
551 
552 	return state;
553 }
554 
555 
556 /*!	Similar to find_free_message(), but expects the interrupts to be disabled
557 	already.
558 */
559 static void
560 find_free_message_interrupts_disabled(int32 currentCPU,
561 	struct smp_msg** _message)
562 {
563 	TRACE(("find_free_message_interrupts_disabled: entry\n"));
564 
565 	acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
566 	while (sFreeMessageCount <= 0) {
567 		release_spinlock(&sFreeMessageSpinlock);
568 		process_all_pending_ici(currentCPU);
569 		PAUSE();
570 		acquire_spinlock_cpu(currentCPU, &sFreeMessageSpinlock);
571 	}
572 
573 	*_message = sFreeMessages;
574 	sFreeMessages = (*_message)->next;
575 	sFreeMessageCount--;
576 
577 	release_spinlock(&sFreeMessageSpinlock);
578 
579 	TRACE(("find_free_message_interrupts_disabled: returning msg %p\n",
580 		*_message));
581 }
582 
583 
584 static void
585 return_free_message(struct smp_msg* msg)
586 {
587 	TRACE(("return_free_message: returning msg %p\n", msg));
588 
589 	acquire_spinlock_nocheck(&sFreeMessageSpinlock);
590 	msg->next = sFreeMessages;
591 	sFreeMessages = msg;
592 	sFreeMessageCount++;
593 	release_spinlock(&sFreeMessageSpinlock);
594 }
595 
596 
597 static struct smp_msg*
598 check_for_message(int currentCPU, mailbox_source& sourceMailbox)
599 {
600 	if (!sICIEnabled)
601 		return NULL;
602 
603 	acquire_spinlock_nocheck(&sCPUMessageSpinlock[currentCPU]);
604 
605 	struct smp_msg* msg = sCPUMessages[currentCPU];
606 	if (msg != NULL) {
607 		sCPUMessages[currentCPU] = msg->next;
608 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
609 		TRACE((" cpu %d: found msg %p in cpu mailbox\n", currentCPU, msg));
610 		sourceMailbox = MAILBOX_LOCAL;
611 	} else {
612 		// try getting one from the broadcast mailbox
613 
614 		release_spinlock(&sCPUMessageSpinlock[currentCPU]);
615 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
616 
617 		msg = sBroadcastMessages;
618 		while (msg != NULL) {
619 			if (CHECK_BIT(msg->proc_bitmap, currentCPU) != 0) {
620 				// we have handled this one already
621 				msg = msg->next;
622 				continue;
623 			}
624 
625 			// mark it so we wont try to process this one again
626 			msg->proc_bitmap = SET_BIT(msg->proc_bitmap, currentCPU);
627 			sourceMailbox = MAILBOX_BCAST;
628 			break;
629 		}
630 		release_spinlock(&sBroadcastMessageSpinlock);
631 
632 		TRACE((" cpu %d: found msg %p in broadcast mailbox\n", currentCPU,
633 			msg));
634 	}
635 	return msg;
636 }
637 
638 
639 static void
640 finish_message_processing(int currentCPU, struct smp_msg* msg,
641 	mailbox_source sourceMailbox)
642 {
643 	if (atomic_add(&msg->ref_count, -1) != 1)
644 		return;
645 
646 	// we were the last one to decrement the ref_count
647 	// it's our job to remove it from the list & possibly clean it up
648 	struct smp_msg** mbox;
649 	spinlock* spinlock;
650 
651 	// clean up the message from one of the mailboxes
652 	if (sourceMailbox == MAILBOX_BCAST) {
653 		mbox = &sBroadcastMessages;
654 		spinlock = &sBroadcastMessageSpinlock;
655 	} else {
656 		mbox = &sCPUMessages[currentCPU];
657 		spinlock = &sCPUMessageSpinlock[currentCPU];
658 	}
659 
660 	acquire_spinlock_nocheck(spinlock);
661 
662 	TRACE(("cleaning up message %p\n", msg));
663 
664 	if (sourceMailbox != MAILBOX_BCAST) {
665 		// local mailbox -- the message has already been removed in
666 		// check_for_message()
667 	} else if (msg == *mbox) {
668 		*mbox = msg->next;
669 	} else {
670 		// we need to walk to find the message in the list.
671 		// we can't use any data found when previously walking through
672 		// the list, since the list may have changed. But, we are guaranteed
673 		// to at least have msg in it.
674 		struct smp_msg* last = NULL;
675 		struct smp_msg* msg1;
676 
677 		msg1 = *mbox;
678 		while (msg1 != NULL && msg1 != msg) {
679 			last = msg1;
680 			msg1 = msg1->next;
681 		}
682 
683 		// by definition, last must be something
684 		if (msg1 == msg && last != NULL)
685 			last->next = msg->next;
686 		else
687 			panic("last == NULL or msg != msg1");
688 	}
689 
690 	release_spinlock(spinlock);
691 
692 	if ((msg->flags & SMP_MSG_FLAG_FREE_ARG) != 0 && msg->data_ptr != NULL)
693 		free(msg->data_ptr);
694 
695 	if ((msg->flags & SMP_MSG_FLAG_SYNC) != 0) {
696 		msg->done = true;
697 		// the caller cpu should now free the message
698 	} else {
699 		// in the !SYNC case, we get to free the message
700 		return_free_message(msg);
701 	}
702 }
703 
704 
705 static status_t
706 process_pending_ici(int32 currentCPU)
707 {
708 	mailbox_source sourceMailbox;
709 	struct smp_msg* msg = check_for_message(currentCPU, sourceMailbox);
710 	if (msg == NULL)
711 		return B_ENTRY_NOT_FOUND;
712 
713 	TRACE(("  cpu %ld message = %ld\n", currentCPU, msg->message));
714 
715 	bool haltCPU = false;
716 
717 	switch (msg->message) {
718 		case SMP_MSG_INVALIDATE_PAGE_RANGE:
719 			arch_cpu_invalidate_TLB_range(msg->data, msg->data2);
720 			break;
721 		case SMP_MSG_INVALIDATE_PAGE_LIST:
722 			arch_cpu_invalidate_TLB_list((addr_t*)msg->data, (int)msg->data2);
723 			break;
724 		case SMP_MSG_USER_INVALIDATE_PAGES:
725 			arch_cpu_user_TLB_invalidate();
726 			break;
727 		case SMP_MSG_GLOBAL_INVALIDATE_PAGES:
728 			arch_cpu_global_TLB_invalidate();
729 			break;
730 		case SMP_MSG_CPU_HALT:
731 			haltCPU = true;
732 			break;
733 		case SMP_MSG_CALL_FUNCTION:
734 		{
735 			smp_call_func func = (smp_call_func)msg->data_ptr;
736 			func(msg->data, currentCPU, msg->data2, msg->data3);
737 			break;
738 		}
739 		case SMP_MSG_RESCHEDULE:
740 		{
741 			cpu_ent* cpu = thread_get_current_thread()->cpu;
742 			cpu->invoke_scheduler = true;
743 			cpu->invoke_scheduler_if_idle = false;
744 			break;
745 		}
746 		case SMP_MSG_RESCHEDULE_IF_IDLE:
747 		{
748 			cpu_ent* cpu = thread_get_current_thread()->cpu;
749 			if (!cpu->invoke_scheduler) {
750 				cpu->invoke_scheduler = true;
751 				cpu->invoke_scheduler_if_idle = true;
752 			}
753 			break;
754 		}
755 
756 		default:
757 			dprintf("smp_intercpu_int_handler: got unknown message %" B_PRId32 "\n",
758 				msg->message);
759 			break;
760 	}
761 
762 	// finish dealing with this message, possibly removing it from the list
763 	finish_message_processing(currentCPU, msg, sourceMailbox);
764 
765 	// special case for the halt message
766 	if (haltCPU)
767 		debug_trap_cpu_in_kdl(currentCPU, false);
768 
769 	return B_OK;
770 }
771 
772 
773 #if B_DEBUG_SPINLOCK_CONTENTION
774 
775 
776 static uint64
777 get_spinlock_counter(spinlock* lock)
778 {
779 	uint32 high;
780 	uint32 low;
781 	do {
782 		high = (uint32)atomic_get(&lock->count_high);
783 		low = (uint32)atomic_get(&lock->count_low);
784 	} while (high != atomic_get(&lock->count_high));
785 
786 	return ((uint64)high << 32) | low;
787 }
788 
789 
790 static status_t
791 spinlock_contention_syscall(const char* subsystem, uint32 function,
792 	void* buffer, size_t bufferSize)
793 {
794 	spinlock_contention_info info;
795 
796 	if (function != GET_SPINLOCK_CONTENTION_INFO)
797 		return B_BAD_VALUE;
798 
799 	if (bufferSize < sizeof(spinlock_contention_info))
800 		return B_BAD_VALUE;
801 
802 	info.thread_spinlock_counter = get_spinlock_counter(&gThreadSpinlock);
803 	info.team_spinlock_counter = get_spinlock_counter(&gTeamSpinlock);
804 
805 	if (!IS_USER_ADDRESS(buffer)
806 		|| user_memcpy(buffer, &info, sizeof(info)) != B_OK) {
807 		return B_BAD_ADDRESS;
808 	}
809 
810 	return B_OK;
811 }
812 
813 
814 #endif	// B_DEBUG_SPINLOCK_CONTENTION
815 
816 
817 static void
818 process_early_cpu_call(int32 cpu)
819 {
820 	sEarlyCPUCallFunction(sEarlyCPUCallCookie, cpu);
821 	atomic_and(&sEarlyCPUCall, ~(uint32)(1 << cpu));
822 }
823 
824 
825 static void
826 call_all_cpus_early(void (*function)(void*, int), void* cookie)
827 {
828 	if (sNumCPUs > 1) {
829 		sEarlyCPUCallFunction = function;
830 		sEarlyCPUCallCookie = cookie;
831 
832 		uint32 cpuMask = (1 << sNumCPUs) - 2;
833 			// all CPUs but the boot cpu
834 
835 		sEarlyCPUCall = cpuMask;
836 
837 		// wait for all CPUs to finish
838 		while ((sEarlyCPUCall & cpuMask) != 0)
839 			PAUSE();
840 	}
841 
842 	function(cookie, 0);
843 }
844 
845 
846 //	#pragma mark -
847 
848 
849 int
850 smp_intercpu_int_handler(int32 cpu)
851 {
852 	TRACE(("smp_intercpu_int_handler: entry on cpu %ld\n", cpu));
853 
854 	process_all_pending_ici(cpu);
855 
856 	TRACE(("smp_intercpu_int_handler: done\n"));
857 
858 	return B_HANDLED_INTERRUPT;
859 }
860 
861 
862 void
863 smp_send_ici(int32 targetCPU, int32 message, addr_t data, addr_t data2,
864 	addr_t data3, void* dataPointer, addr_t flags)
865 {
866 	struct smp_msg *msg;
867 
868 	TRACE(("smp_send_ici: target 0x%lx, mess 0x%lx, data 0x%lx, data2 0x%lx, "
869 		"data3 0x%lx, ptr %p, flags 0x%lx\n", targetCPU, message, data, data2,
870 		data3, dataPointer, flags));
871 
872 	if (sICIEnabled) {
873 		int state;
874 		int currentCPU;
875 
876 		// find_free_message leaves interrupts disabled
877 		state = find_free_message(&msg);
878 
879 		currentCPU = smp_get_current_cpu();
880 		if (targetCPU == currentCPU) {
881 			return_free_message(msg);
882 			restore_interrupts(state);
883 			return; // nope, cant do that
884 		}
885 
886 		// set up the message
887 		msg->message = message;
888 		msg->data = data;
889 		msg->data2 = data2;
890 		msg->data3 = data3;
891 		msg->data_ptr = dataPointer;
892 		msg->ref_count = 1;
893 		msg->flags = flags;
894 		msg->done = false;
895 
896 		// stick it in the appropriate cpu's mailbox
897 		acquire_spinlock_nocheck(&sCPUMessageSpinlock[targetCPU]);
898 		msg->next = sCPUMessages[targetCPU];
899 		sCPUMessages[targetCPU] = msg;
900 		release_spinlock(&sCPUMessageSpinlock[targetCPU]);
901 
902 		arch_smp_send_ici(targetCPU);
903 
904 		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
905 			// wait for the other cpu to finish processing it
906 			// the interrupt handler will ref count it to <0
907 			// if the message is sync after it has removed it from the mailbox
908 			while (msg->done == false) {
909 				process_all_pending_ici(currentCPU);
910 				PAUSE();
911 			}
912 			// for SYNC messages, it's our responsibility to put it
913 			// back into the free list
914 			return_free_message(msg);
915 		}
916 
917 		restore_interrupts(state);
918 	}
919 }
920 
921 
922 void
923 smp_send_multicast_ici(cpu_mask_t cpuMask, int32 message, addr_t data,
924 	addr_t data2, addr_t data3, void *dataPointer, uint32 flags)
925 {
926 	if (!sICIEnabled)
927 		return;
928 
929 	int currentCPU = smp_get_current_cpu();
930 	cpuMask &= ~((cpu_mask_t)1 << currentCPU)
931 		& (((cpu_mask_t)1 << sNumCPUs) - 1);
932 	if (cpuMask == 0) {
933 		panic("smp_send_multicast_ici(): 0 CPU mask");
934 		return;
935 	}
936 
937 	// count target CPUs
938 	int32 targetCPUs = 0;
939 	for (int32 i = 0; i < sNumCPUs; i++) {
940 		if ((cpuMask & (cpu_mask_t)1 << i) != 0)
941 			targetCPUs++;
942 	}
943 
944 	// find_free_message leaves interrupts disabled
945 	struct smp_msg *msg;
946 	int state = find_free_message(&msg);
947 
948 	msg->message = message;
949 	msg->data = data;
950 	msg->data2 = data2;
951 	msg->data3 = data3;
952 	msg->data_ptr = dataPointer;
953 	msg->ref_count = targetCPUs;
954 	msg->flags = flags;
955 	msg->proc_bitmap = ~cpuMask;
956 	msg->done = false;
957 
958 	// stick it in the broadcast mailbox
959 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
960 	msg->next = sBroadcastMessages;
961 	sBroadcastMessages = msg;
962 	release_spinlock(&sBroadcastMessageSpinlock);
963 
964 	arch_smp_send_broadcast_ici();
965 		// TODO: Introduce a call that only bothers the target CPUs!
966 
967 	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
968 		// wait for the other cpus to finish processing it
969 		// the interrupt handler will ref count it to <0
970 		// if the message is sync after it has removed it from the mailbox
971 		while (msg->done == false) {
972 			process_all_pending_ici(currentCPU);
973 			PAUSE();
974 		}
975 
976 		// for SYNC messages, it's our responsibility to put it
977 		// back into the free list
978 		return_free_message(msg);
979 	}
980 
981 	restore_interrupts(state);
982 }
983 
984 
985 void
986 smp_send_broadcast_ici(int32 message, addr_t data, addr_t data2, addr_t data3,
987 	void *dataPointer, uint32 flags)
988 {
989 	struct smp_msg *msg;
990 
991 	TRACE(("smp_send_broadcast_ici: cpu %ld mess 0x%lx, data 0x%lx, data2 "
992 		"0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n", smp_get_current_cpu(),
993 		message, data, data2, data3, dataPointer, flags));
994 
995 	if (sICIEnabled) {
996 		int state;
997 		int currentCPU;
998 
999 		// find_free_message leaves interrupts disabled
1000 		state = find_free_message(&msg);
1001 
1002 		currentCPU = smp_get_current_cpu();
1003 
1004 		msg->message = message;
1005 		msg->data = data;
1006 		msg->data2 = data2;
1007 		msg->data3 = data3;
1008 		msg->data_ptr = dataPointer;
1009 		msg->ref_count = sNumCPUs - 1;
1010 		msg->flags = flags;
1011 		msg->proc_bitmap = SET_BIT(0, currentCPU);
1012 		msg->done = false;
1013 
1014 		TRACE(("smp_send_broadcast_ici%d: inserting msg %p into broadcast "
1015 			"mbox\n", currentCPU, msg));
1016 
1017 		// stick it in the appropriate cpu's mailbox
1018 		acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1019 		msg->next = sBroadcastMessages;
1020 		sBroadcastMessages = msg;
1021 		release_spinlock(&sBroadcastMessageSpinlock);
1022 
1023 		arch_smp_send_broadcast_ici();
1024 
1025 		TRACE(("smp_send_broadcast_ici: sent interrupt\n"));
1026 
1027 		if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1028 			// wait for the other cpus to finish processing it
1029 			// the interrupt handler will ref count it to <0
1030 			// if the message is sync after it has removed it from the mailbox
1031 			TRACE(("smp_send_broadcast_ici: waiting for ack\n"));
1032 
1033 			while (msg->done == false) {
1034 				process_all_pending_ici(currentCPU);
1035 				PAUSE();
1036 			}
1037 
1038 			TRACE(("smp_send_broadcast_ici: returning message to free list\n"));
1039 
1040 			// for SYNC messages, it's our responsibility to put it
1041 			// back into the free list
1042 			return_free_message(msg);
1043 		}
1044 
1045 		restore_interrupts(state);
1046 	}
1047 
1048 	TRACE(("smp_send_broadcast_ici: done\n"));
1049 }
1050 
1051 
1052 void
1053 smp_send_broadcast_ici_interrupts_disabled(int32 currentCPU, int32 message,
1054 	addr_t data, addr_t data2, addr_t data3, void *dataPointer, uint32 flags)
1055 {
1056 	if (!sICIEnabled)
1057 		return;
1058 
1059 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: cpu %ld mess 0x%lx, "
1060 		"data 0x%lx, data2 0x%lx, data3 0x%lx, ptr %p, flags 0x%lx\n",
1061 		currentCPU, message, data, data2, data3, dataPointer, flags));
1062 
1063 	struct smp_msg *msg;
1064 	find_free_message_interrupts_disabled(currentCPU, &msg);
1065 
1066 	msg->message = message;
1067 	msg->data = data;
1068 	msg->data2 = data2;
1069 	msg->data3 = data3;
1070 	msg->data_ptr = dataPointer;
1071 	msg->ref_count = sNumCPUs - 1;
1072 	msg->flags = flags;
1073 	msg->proc_bitmap = SET_BIT(0, currentCPU);
1074 	msg->done = false;
1075 
1076 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: inserting msg %p "
1077 		"into broadcast mbox\n", currentCPU, msg));
1078 
1079 	// stick it in the appropriate cpu's mailbox
1080 	acquire_spinlock_nocheck(&sBroadcastMessageSpinlock);
1081 	msg->next = sBroadcastMessages;
1082 	sBroadcastMessages = msg;
1083 	release_spinlock(&sBroadcastMessageSpinlock);
1084 
1085 	arch_smp_send_broadcast_ici();
1086 
1087 	TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: sent interrupt\n",
1088 		currentCPU));
1089 
1090 	if ((flags & SMP_MSG_FLAG_SYNC) != 0) {
1091 		// wait for the other cpus to finish processing it
1092 		// the interrupt handler will ref count it to <0
1093 		// if the message is sync after it has removed it from the mailbox
1094 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: waiting for "
1095 			"ack\n", currentCPU));
1096 
1097 		while (msg->done == false) {
1098 			process_all_pending_ici(currentCPU);
1099 			PAUSE();
1100 		}
1101 
1102 		TRACE(("smp_send_broadcast_ici_interrupts_disabled %ld: returning "
1103 			"message to free list\n", currentCPU));
1104 
1105 		// for SYNC messages, it's our responsibility to put it
1106 		// back into the free list
1107 		return_free_message(msg);
1108 	}
1109 
1110 	TRACE(("smp_send_broadcast_ici_interrupts_disabled: done\n"));
1111 }
1112 
1113 
1114 /*!	Spin on non-boot CPUs until smp_wake_up_non_boot_cpus() has been called.
1115 
1116 	\param cpu The index of the calling CPU.
1117 	\param rendezVous A rendez-vous variable to make sure that the boot CPU
1118 		does not return before all other CPUs have started waiting.
1119 	\return \c true on the boot CPU, \c false otherwise.
1120 */
1121 bool
1122 smp_trap_non_boot_cpus(int32 cpu, uint32* rendezVous)
1123 {
1124 	if (cpu == 0) {
1125 		smp_cpu_rendezvous(rendezVous, cpu);
1126 		return true;
1127 	}
1128 
1129 	smp_cpu_rendezvous(rendezVous, cpu);
1130 
1131 	while (sBootCPUSpin == 0) {
1132 		if ((sEarlyCPUCall & (1 << cpu)) != 0)
1133 			process_early_cpu_call(cpu);
1134 
1135 		PAUSE();
1136 	}
1137 
1138 	return false;
1139 }
1140 
1141 
1142 void
1143 smp_wake_up_non_boot_cpus()
1144 {
1145 	// ICIs were previously being ignored
1146 	if (sNumCPUs > 1)
1147 		sICIEnabled = true;
1148 
1149 	// resume non boot CPUs
1150 	sBootCPUSpin = 1;
1151 }
1152 
1153 
1154 /*!	Spin until all CPUs have reached the rendez-vous point.
1155 
1156 	The rendez-vous variable \c *var must have been initialized to 0 before the
1157 	function is called. The variable will be non-null when the function returns.
1158 
1159 	Note that when the function returns on one CPU, it only means that all CPU
1160 	have already entered the function. It does not mean that the variable can
1161 	already be reset. Only when all CPUs have returned (which would have to be
1162 	ensured via another rendez-vous) the variable can be reset.
1163 */
1164 void
1165 smp_cpu_rendezvous(volatile uint32* var, int current_cpu)
1166 {
1167 	atomic_or((vint32*)var, 1 << current_cpu);
1168 
1169 	while (*var != (((uint32)1 << sNumCPUs) - 1))
1170 		PAUSE();
1171 }
1172 
1173 
1174 status_t
1175 smp_init(kernel_args* args)
1176 {
1177 	TRACE(("smp_init: entry\n"));
1178 
1179 #if DEBUG_SPINLOCK_LATENCIES
1180 	sEnableLatencyCheck
1181 		= !get_safemode_boolean(B_SAFEMODE_DISABLE_LATENCY_CHECK, false);
1182 #endif
1183 
1184 #if DEBUG_SPINLOCKS
1185 	add_debugger_command_etc("spinlock", &dump_spinlock,
1186 		"Dump info on a spinlock",
1187 		"\n"
1188 		"Dumps info on a spinlock.\n", 0);
1189 #endif
1190 	add_debugger_command_etc("ici", &dump_ici_messages,
1191 		"Dump info on pending ICI messages",
1192 		"\n"
1193 		"Dumps info on pending ICI messages.\n", 0);
1194 	add_debugger_command_etc("ici_message", &dump_ici_message,
1195 		"Dump info on an ICI message",
1196 		"\n"
1197 		"Dumps info on an ICI message.\n", 0);
1198 
1199 	if (args->num_cpus > 1) {
1200 		sFreeMessages = NULL;
1201 		sFreeMessageCount = 0;
1202 		for (int i = 0; i < MSG_POOL_SIZE; i++) {
1203 			struct smp_msg* msg
1204 				= (struct smp_msg*)malloc(sizeof(struct smp_msg));
1205 			if (msg == NULL) {
1206 				panic("error creating smp mailboxes\n");
1207 				return B_ERROR;
1208 			}
1209 			memset(msg, 0, sizeof(struct smp_msg));
1210 			msg->next = sFreeMessages;
1211 			sFreeMessages = msg;
1212 			sFreeMessageCount++;
1213 		}
1214 		sNumCPUs = args->num_cpus;
1215 	}
1216 	TRACE(("smp_init: calling arch_smp_init\n"));
1217 
1218 	return arch_smp_init(args);
1219 }
1220 
1221 
1222 status_t
1223 smp_per_cpu_init(kernel_args* args, int32 cpu)
1224 {
1225 	return arch_smp_per_cpu_init(args, cpu);
1226 }
1227 
1228 
1229 status_t
1230 smp_init_post_generic_syscalls(void)
1231 {
1232 #if B_DEBUG_SPINLOCK_CONTENTION
1233 	return register_generic_syscall(SPINLOCK_CONTENTION,
1234 		&spinlock_contention_syscall, 0, 0);
1235 #else
1236 	return B_OK;
1237 #endif
1238 }
1239 
1240 
1241 void
1242 smp_set_num_cpus(int32 numCPUs)
1243 {
1244 	sNumCPUs = numCPUs;
1245 }
1246 
1247 
1248 int32
1249 smp_get_num_cpus()
1250 {
1251 	return sNumCPUs;
1252 }
1253 
1254 
1255 int32
1256 smp_get_current_cpu(void)
1257 {
1258 	return thread_get_current_thread()->cpu->cpu_num;
1259 }
1260 
1261 
1262 // #pragma mark - public exported functions
1263 
1264 
1265 void
1266 call_all_cpus(void (*func)(void*, int), void* cookie)
1267 {
1268 	cpu_status state = disable_interrupts();
1269 
1270 	// if inter-CPU communication is not yet enabled, use the early mechanism
1271 	if (!sICIEnabled) {
1272 		call_all_cpus_early(func, cookie);
1273 		restore_interrupts(state);
1274 		return;
1275 	}
1276 
1277 	if (smp_get_num_cpus() > 1) {
1278 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (addr_t)cookie,
1279 			0, 0, (void*)func, SMP_MSG_FLAG_ASYNC);
1280 	}
1281 
1282 	// we need to call this function ourselves as well
1283 	func(cookie, smp_get_current_cpu());
1284 
1285 	restore_interrupts(state);
1286 }
1287 
1288 
1289 void
1290 call_all_cpus_sync(void (*func)(void*, int), void* cookie)
1291 {
1292 	cpu_status state = disable_interrupts();
1293 
1294 	// if inter-CPU communication is not yet enabled, use the early mechanism
1295 	if (!sICIEnabled) {
1296 		call_all_cpus_early(func, cookie);
1297 		restore_interrupts(state);
1298 		return;
1299 	}
1300 
1301 	if (smp_get_num_cpus() > 1) {
1302 		smp_send_broadcast_ici(SMP_MSG_CALL_FUNCTION, (addr_t)cookie,
1303 			0, 0, (void*)func, SMP_MSG_FLAG_SYNC);
1304 	}
1305 
1306 	// we need to call this function ourselves as well
1307 	func(cookie, smp_get_current_cpu());
1308 
1309 	restore_interrupts(state);
1310 }
1311 
1312 
1313 void
1314 memory_read_barrier(void)
1315 {
1316 	arch_cpu_memory_read_barrier();
1317 }
1318 
1319 
1320 void
1321 memory_write_barrier(void)
1322 {
1323 	arch_cpu_memory_write_barrier();
1324 }
1325