xref: /haiku/src/libs/compat/freebsd_iflib/subr_gtaskqueue.c (revision 1e60bdeab63fa7a57bc9a55b032052e95a18bd2c)
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #ifndef __HAIKU__
36 #include <sys/cpuset.h>
37 #include <sys/interrupt.h>
38 #endif
39 #include <sys/kernel.h>
40 #include <sys/kthread.h>
41 #include <sys/libkern.h>
42 #include <sys/limits.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mutex.h>
46 #include <sys/proc.h>
47 #ifndef __HAIKU__
48 #include <sys/sched.h>
49 #endif
50 #include <sys/smp.h>
51 #include <sys/gtaskqueue.h>
52 #ifndef __HAIKU__
53 #include <sys/unistd.h>
54 #endif
55 #include <machine/stdarg.h>
56 
57 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
58 static void	gtaskqueue_thread_enqueue(void *);
59 static void	gtaskqueue_thread_loop(void *arg);
60 static int	task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
61 static void	gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
62 
63 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
64 TASKQGROUP_DEFINE(config, 1, 1);
65 
66 struct gtaskqueue_busy {
67 	struct gtask	*tb_running;
68 	TAILQ_ENTRY(gtaskqueue_busy) tb_link;
69 };
70 
71 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
72 
73 struct gtaskqueue {
74 	STAILQ_HEAD(, gtask)	tq_queue;
75 	gtaskqueue_enqueue_fn	tq_enqueue;
76 	void			*tq_context;
77 	char			*tq_name;
78 	TAILQ_HEAD(, gtaskqueue_busy) tq_active;
79 	struct mtx		tq_mutex;
80 #ifdef __HAIKU__
81 	sem_id 			tq_sem;
82 #endif
83 	struct thread		**tq_threads;
84 	int			tq_tcount;
85 	int			tq_spin;
86 	int			tq_flags;
87 	int			tq_callouts;
88 	taskqueue_callback_fn	tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
89 	void			*tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
90 };
91 
92 #define	TQ_FLAGS_ACTIVE		(1 << 0)
93 #define	TQ_FLAGS_BLOCKED	(1 << 1)
94 #define	TQ_FLAGS_UNLOCKED_ENQUEUE	(1 << 2)
95 
96 #define	DT_CALLOUT_ARMED	(1 << 0)
97 
98 #define	TQ_LOCK(tq)							\
99 	do {								\
100 		if ((tq)->tq_spin)					\
101 			mtx_lock_spin(&(tq)->tq_mutex);			\
102 		else							\
103 			mtx_lock(&(tq)->tq_mutex);			\
104 	} while (0)
105 #define	TQ_ASSERT_LOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_OWNED)
106 
107 #define	TQ_UNLOCK(tq)							\
108 	do {								\
109 		if ((tq)->tq_spin)					\
110 			mtx_unlock_spin(&(tq)->tq_mutex);		\
111 		else							\
112 			mtx_unlock(&(tq)->tq_mutex);			\
113 	} while (0)
114 #define	TQ_ASSERT_UNLOCKED(tq)	mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
115 
116 #ifdef INVARIANTS
117 static void
118 gtask_dump(struct gtask *gtask)
119 {
120 	printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
121 	       gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
122 }
123 #endif
124 
125 static __inline int
126 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
127     int t)
128 {
129 	if (tq->tq_spin)
130 		return (msleep_spin(p, m, wm, t));
131 	return (msleep(p, m, pri, wm, t));
132 }
133 
134 static struct gtaskqueue *
135 _gtaskqueue_create(const char *name, int mflags,
136 		 taskqueue_enqueue_fn enqueue, void *context,
137 		 int mtxflags, const char *mtxname __unused)
138 {
139 	struct gtaskqueue *queue;
140 	char *tq_name;
141 
142 	tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
143 	if (!tq_name)
144 		return (NULL);
145 
146 	snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
147 
148 	queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
149 	if (!queue) {
150 		free(tq_name, M_GTASKQUEUE);
151 		return (NULL);
152 	}
153 
154 	STAILQ_INIT(&queue->tq_queue);
155 	TAILQ_INIT(&queue->tq_active);
156 	queue->tq_enqueue = enqueue;
157 	queue->tq_context = context;
158 	queue->tq_name = tq_name;
159 	queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
160 	queue->tq_flags |= TQ_FLAGS_ACTIVE;
161 	if (enqueue == gtaskqueue_thread_enqueue)
162 		queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
163 	mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
164 #ifdef __HAIKU__
165 	queue->tq_sem = create_sem(0, tq_name);
166 #endif
167 
168 	return (queue);
169 }
170 
171 
172 /*
173  * Signal a taskqueue thread to terminate.
174  */
175 static void
176 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
177 {
178 
179 	while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
180 		wakeup(tq);
181 		TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
182 	}
183 }
184 
185 static void
186 gtaskqueue_free(struct gtaskqueue *queue)
187 {
188 
189 	TQ_LOCK(queue);
190 	queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
191 	gtaskqueue_terminate(queue->tq_threads, queue);
192 	KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
193 	KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
194 	mtx_destroy(&queue->tq_mutex);
195 #ifdef __HAIKU__
196 	delete_sem(queue->tq_sem);
197 #endif
198 	free(queue->tq_threads, M_GTASKQUEUE);
199 	free(queue->tq_name, M_GTASKQUEUE);
200 	free(queue, M_GTASKQUEUE);
201 }
202 
203 /*
204  * Wait for all to complete, then prevent it from being enqueued
205  */
206 void
207 grouptask_block(struct grouptask *grouptask)
208 {
209 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
210 	struct gtask *gtask = &grouptask->gt_task;
211 
212 #ifdef INVARIANTS
213 	if (queue == NULL) {
214 		gtask_dump(gtask);
215 		panic("queue == NULL");
216 	}
217 #endif
218 	TQ_LOCK(queue);
219 	gtask->ta_flags |= TASK_NOENQUEUE;
220   	gtaskqueue_drain_locked(queue, gtask);
221 	TQ_UNLOCK(queue);
222 }
223 
224 void
225 grouptask_unblock(struct grouptask *grouptask)
226 {
227 	struct gtaskqueue *queue = grouptask->gt_taskqueue;
228 	struct gtask *gtask = &grouptask->gt_task;
229 
230 #ifdef INVARIANTS
231 	if (queue == NULL) {
232 		gtask_dump(gtask);
233 		panic("queue == NULL");
234 	}
235 #endif
236 	TQ_LOCK(queue);
237 	gtask->ta_flags &= ~TASK_NOENQUEUE;
238 	TQ_UNLOCK(queue);
239 }
240 
241 int
242 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
243 {
244 #ifdef INVARIANTS
245 	if (queue == NULL) {
246 		gtask_dump(gtask);
247 		panic("queue == NULL");
248 	}
249 #endif
250 	TQ_LOCK(queue);
251 	if (gtask->ta_flags & TASK_ENQUEUED) {
252 		TQ_UNLOCK(queue);
253 		return (0);
254 	}
255 	if (gtask->ta_flags & TASK_NOENQUEUE) {
256 		TQ_UNLOCK(queue);
257 		return (EAGAIN);
258 	}
259 	STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
260 	gtask->ta_flags |= TASK_ENQUEUED;
261 	TQ_UNLOCK(queue);
262 	if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
263 		queue->tq_enqueue(queue->tq_context);
264 	return (0);
265 }
266 
267 static void
268 gtaskqueue_task_nop_fn(void *context)
269 {
270 }
271 
272 /*
273  * Block until all currently queued tasks in this taskqueue
274  * have begun execution.  Tasks queued during execution of
275  * this function are ignored.
276  */
277 static void
278 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
279 {
280 	struct gtask t_barrier;
281 
282 	if (STAILQ_EMPTY(&queue->tq_queue))
283 		return;
284 
285 	/*
286 	 * Enqueue our barrier after all current tasks, but with
287 	 * the highest priority so that newly queued tasks cannot
288 	 * pass it.  Because of the high priority, we can not use
289 	 * taskqueue_enqueue_locked directly (which drops the lock
290 	 * anyway) so just insert it at tail while we have the
291 	 * queue lock.
292 	 */
293 	GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
294 	STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
295 	t_barrier.ta_flags |= TASK_ENQUEUED;
296 
297 	/*
298 	 * Once the barrier has executed, all previously queued tasks
299 	 * have completed or are currently executing.
300 	 */
301 	while (t_barrier.ta_flags & TASK_ENQUEUED)
302 		TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
303 }
304 
305 /*
306  * Block until all currently executing tasks for this taskqueue
307  * complete.  Tasks that begin execution during the execution
308  * of this function are ignored.
309  */
310 static void
311 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
312 {
313 	struct gtaskqueue_busy tb_marker, *tb_first;
314 
315 	if (TAILQ_EMPTY(&queue->tq_active))
316 		return;
317 
318 	/* Block taskq_terminate().*/
319 	queue->tq_callouts++;
320 
321 	/*
322 	 * Wait for all currently executing taskqueue threads
323 	 * to go idle.
324 	 */
325 	tb_marker.tb_running = TB_DRAIN_WAITER;
326 	TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
327 	while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
328 		TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
329 	TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
330 
331 	/*
332 	 * Wakeup any other drain waiter that happened to queue up
333 	 * without any intervening active thread.
334 	 */
335 	tb_first = TAILQ_FIRST(&queue->tq_active);
336 	if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
337 		wakeup(tb_first);
338 
339 	/* Release taskqueue_terminate(). */
340 	queue->tq_callouts--;
341 	if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
342 		wakeup_one(queue->tq_threads);
343 }
344 
345 void
346 gtaskqueue_block(struct gtaskqueue *queue)
347 {
348 
349 	TQ_LOCK(queue);
350 	queue->tq_flags |= TQ_FLAGS_BLOCKED;
351 	TQ_UNLOCK(queue);
352 }
353 
354 void
355 gtaskqueue_unblock(struct gtaskqueue *queue)
356 {
357 
358 	TQ_LOCK(queue);
359 	queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
360 	if (!STAILQ_EMPTY(&queue->tq_queue))
361 		queue->tq_enqueue(queue->tq_context);
362 	TQ_UNLOCK(queue);
363 }
364 
365 static void
366 gtaskqueue_run_locked(struct gtaskqueue *queue)
367 {
368 	struct gtaskqueue_busy tb;
369 	struct gtaskqueue_busy *tb_first;
370 	struct gtask *gtask;
371 
372 	KASSERT(queue != NULL, ("tq is NULL"));
373 	TQ_ASSERT_LOCKED(queue);
374 	tb.tb_running = NULL;
375 
376 	while (STAILQ_FIRST(&queue->tq_queue)) {
377 		TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
378 
379 		/*
380 		 * Carefully remove the first task from the queue and
381 		 * clear its TASK_ENQUEUED flag
382 		 */
383 		gtask = STAILQ_FIRST(&queue->tq_queue);
384 		KASSERT(gtask != NULL, ("task is NULL"));
385 		STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
386 		gtask->ta_flags &= ~TASK_ENQUEUED;
387 		tb.tb_running = gtask;
388 		TQ_UNLOCK(queue);
389 
390 		KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
391 		gtask->ta_func(gtask->ta_context);
392 
393 		TQ_LOCK(queue);
394 		tb.tb_running = NULL;
395 		wakeup(gtask);
396 
397 		TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
398 		tb_first = TAILQ_FIRST(&queue->tq_active);
399 		if (tb_first != NULL &&
400 		    tb_first->tb_running == TB_DRAIN_WAITER)
401 			wakeup(tb_first);
402 	}
403 }
404 
405 static int
406 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
407 {
408 	struct gtaskqueue_busy *tb;
409 
410 	TQ_ASSERT_LOCKED(queue);
411 	TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
412 		if (tb->tb_running == gtask)
413 			return (1);
414 	}
415 	return (0);
416 }
417 
418 static int
419 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
420 {
421 
422 	if (gtask->ta_flags & TASK_ENQUEUED)
423 		STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
424 	gtask->ta_flags &= ~TASK_ENQUEUED;
425 	return (task_is_running(queue, gtask) ? EBUSY : 0);
426 }
427 
428 int
429 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
430 {
431 	int error;
432 
433 	TQ_LOCK(queue);
434 	error = gtaskqueue_cancel_locked(queue, gtask);
435 	TQ_UNLOCK(queue);
436 
437 	return (error);
438 }
439 
440 static void
441 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
442 {
443 	while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
444 		TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
445 }
446 
447 void
448 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
449 {
450 #ifndef __HAIKU__
451 	if (!queue->tq_spin)
452 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
453 #endif
454 
455 	TQ_LOCK(queue);
456 	gtaskqueue_drain_locked(queue, gtask);
457 	TQ_UNLOCK(queue);
458 }
459 
460 void
461 gtaskqueue_drain_all(struct gtaskqueue *queue)
462 {
463 #ifndef __HAIKU__
464 	if (!queue->tq_spin)
465 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
466 #endif
467 
468 	TQ_LOCK(queue);
469 	gtaskqueue_drain_tq_queue(queue);
470 	gtaskqueue_drain_tq_active(queue);
471 	TQ_UNLOCK(queue);
472 }
473 
474 static int
475 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
476     void* mask, const char *name, va_list ap)
477 {
478 	char ktname[19 + 1];
479 	struct thread *td;
480 	struct gtaskqueue *tq;
481 	int i, error;
482 
483 	if (count <= 0)
484 		return (EINVAL);
485 
486 	vsnprintf(ktname, sizeof(ktname), name, ap);
487 	tq = *tqp;
488 
489 	tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
490 	    M_NOWAIT | M_ZERO);
491 	if (tq->tq_threads == NULL) {
492 		printf("%s: no memory for %s threads\n", __func__, ktname);
493 		return (ENOMEM);
494 	}
495 
496 	for (i = 0; i < count; i++) {
497 		if (count == 1)
498 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
499 			    &tq->tq_threads[i], 0, 0, "%s", ktname);
500 		else
501 			error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
502 			    &tq->tq_threads[i], 0, 0,
503 			    "%s_%d", ktname, i);
504 		if (error) {
505 			/* should be ok to continue, taskqueue_free will dtrt */
506 			printf("%s: kthread_add(%s): error %d", __func__,
507 			    ktname, error);
508 			tq->tq_threads[i] = NULL;		/* paranoid */
509 		} else
510 			tq->tq_tcount++;
511 	}
512 	for (i = 0; i < count; i++) {
513 		if (tq->tq_threads[i] == NULL)
514 			continue;
515 		td = tq->tq_threads[i];
516 #ifndef __HAIKU__
517 		if (mask) {
518 			error = cpuset_setthread(td->td_tid, mask);
519 			/*
520 			 * Failing to pin is rarely an actual fatal error;
521 			 * it'll just affect performance.
522 			 */
523 			if (error)
524 				printf("%s: curthread=%llu: can't pin; "
525 				    "error=%d\n",
526 				    __func__,
527 				    (unsigned long long) td->td_tid,
528 				    error);
529 		}
530 #endif
531 		thread_lock(td);
532 		sched_prio(td, pri);
533 		sched_add(td, SRQ_BORING);
534 		thread_unlock(td);
535 	}
536 
537 	return (0);
538 }
539 
540 static int
541 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
542     const char *name, ...)
543 {
544 	va_list ap;
545 	int error;
546 
547 	va_start(ap, name);
548 	error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
549 	va_end(ap);
550 	return (error);
551 }
552 
553 static inline void
554 gtaskqueue_run_callback(struct gtaskqueue *tq,
555     enum taskqueue_callback_type cb_type)
556 {
557 	taskqueue_callback_fn tq_callback;
558 
559 	TQ_ASSERT_UNLOCKED(tq);
560 	tq_callback = tq->tq_callbacks[cb_type];
561 	if (tq_callback != NULL)
562 		tq_callback(tq->tq_cb_contexts[cb_type]);
563 }
564 
565 
566 static void
567 gtaskqueue_thread_loop(void *arg)
568 {
569 	struct gtaskqueue **tqp, *tq;
570 
571 	tqp = arg;
572 	tq = *tqp;
573 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
574 	TQ_LOCK(tq);
575 	while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
576 		/* XXX ? */
577 		gtaskqueue_run_locked(tq);
578 		/*
579 		 * Because taskqueue_run() can drop tq_mutex, we need to
580 		 * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
581 		 * meantime, which means we missed a wakeup.
582 		 */
583 		if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
584 			break;
585 #ifndef __HAIKU__
586 		TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
587 #else
588 		TQ_UNLOCK(tq);
589 		acquire_sem(tq->tq_sem);
590 		TQ_LOCK(tq);
591 #endif
592 	}
593 	gtaskqueue_run_locked(tq);
594 	/*
595 	 * This thread is on its way out, so just drop the lock temporarily
596 	 * in order to call the shutdown callback.  This allows the callback
597 	 * to look at the taskqueue, even just before it dies.
598 	 */
599 	TQ_UNLOCK(tq);
600 	gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
601 	TQ_LOCK(tq);
602 
603 	/* rendezvous with thread that asked us to terminate */
604 	tq->tq_tcount--;
605 	wakeup_one(tq->tq_threads);
606 	TQ_UNLOCK(tq);
607 	kthread_exit();
608 }
609 
610 static void
611 gtaskqueue_thread_enqueue(void *context)
612 {
613 	struct gtaskqueue **tqp, *tq;
614 
615 	tqp = context;
616 	tq = *tqp;
617 #ifndef __HAIKU__
618 	wakeup_one(tq);
619 #else
620 	release_sem_etc(tq->tq_sem, 1, B_DO_NOT_RESCHEDULE);
621 #endif
622 }
623 
624 
625 static struct gtaskqueue *
626 gtaskqueue_create_fast(const char *name, int mflags,
627 		 taskqueue_enqueue_fn enqueue, void *context)
628 {
629 	return _gtaskqueue_create(name, mflags, enqueue, context,
630 			MTX_SPIN, "fast_taskqueue");
631 }
632 
633 
634 struct taskqgroup_cpu {
635 	LIST_HEAD(, grouptask)	tgc_tasks;
636 	struct gtaskqueue	*tgc_taskq;
637 	int	tgc_cnt;
638 	int	tgc_cpu;
639 };
640 
641 struct taskqgroup {
642 	struct taskqgroup_cpu tqg_queue[MAXCPU];
643 	struct mtx	tqg_lock;
644 	const char *	tqg_name;
645 	int		tqg_adjusting;
646 	int		tqg_stride;
647 	int		tqg_cnt;
648 };
649 
650 struct taskq_bind_task {
651 	struct gtask bt_task;
652 	int	bt_cpuid;
653 };
654 
655 static void
656 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
657 {
658 	struct taskqgroup_cpu *qcpu;
659 
660 	qcpu = &qgroup->tqg_queue[idx];
661 	LIST_INIT(&qcpu->tgc_tasks);
662 	qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
663 	    gtaskqueue_thread_enqueue, &qcpu->tgc_taskq);
664 	MPASS(qcpu->tgc_taskq);
665 	gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
666 	    "%s_%d", qgroup->tqg_name, idx);
667 	qcpu->tgc_cpu = cpu;
668 }
669 
670 static void
671 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
672 {
673 
674 	gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
675 }
676 
677 /*
678  * Find the taskq with least # of tasks that doesn't currently have any
679  * other queues from the uniq identifier.
680  */
681 static int
682 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
683 {
684 	struct grouptask *n;
685 	int i, idx, mincnt;
686 	int strict;
687 
688 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
689 #ifndef __HAIKU__
690 	if (qgroup->tqg_cnt == 0)
691 #else
692 	KASSERT(qgroup->tqg_cnt > 0, ("qgroup(%p)->tqg_cnt is %d!", qgroup, qgroup->tqg_cnt));
693 	if (qgroup->tqg_cnt == 1)
694 #endif
695 		return (0);
696 	idx = -1;
697 	mincnt = INT_MAX;
698 	/*
699 	 * Two passes;  First scan for a queue with the least tasks that
700 	 * does not already service this uniq id.  If that fails simply find
701 	 * the queue with the least total tasks;
702 	 */
703 	for (strict = 1; mincnt == INT_MAX; strict = 0) {
704 		for (i = 0; i < qgroup->tqg_cnt; i++) {
705 			if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
706 				continue;
707 			if (strict) {
708 				LIST_FOREACH(n,
709 				    &qgroup->tqg_queue[i].tgc_tasks, gt_list)
710 					if (n->gt_uniq == uniq)
711 						break;
712 				if (n != NULL)
713 					continue;
714 			}
715 			mincnt = qgroup->tqg_queue[i].tgc_cnt;
716 			idx = i;
717 		}
718 	}
719 	if (idx == -1)
720 		panic("taskqgroup_find: Failed to pick a qid.");
721 
722 	return (idx);
723 }
724 
725 /*
726  * smp_started is unusable since it is not set for UP kernels or even for
727  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
728  * (mp_ncpus == 1) test, but that would be broken here since we need to
729  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
730  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
731  *
732  * So maintain our own flag.  It must be set after all CPUs are started
733  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
734  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
735  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
736  * simpler for adjustment to pass a flag indicating if it is delayed.
737  */
738 
739 static int tqg_smp_started = 0;
740 
741 static void
742 tqg_record_smp_started(void *arg)
743 {
744 	tqg_smp_started = 1;
745 }
746 
747 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
748 	tqg_record_smp_started, NULL);
749 
750 void
751 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
752     void *uniq, int irq, const char *name)
753 {
754 #ifndef __HAIKU__
755 	cpuset_t mask;
756 #endif
757 	int qid, error;
758 
759 	gtask->gt_uniq = uniq;
760 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
761 	gtask->gt_irq = irq;
762 	gtask->gt_cpu = -1;
763 	mtx_lock(&qgroup->tqg_lock);
764 	qid = taskqgroup_find(qgroup, uniq);
765 	qgroup->tqg_queue[qid].tgc_cnt++;
766 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
767 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
768 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
769 	if (irq != -1 && tqg_smp_started) {
770 		gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
771 #ifndef __HAIKU__
772 		CPU_ZERO(&mask);
773 		CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
774 #endif
775 		mtx_unlock(&qgroup->tqg_lock);
776 #ifndef __HAIKU__
777 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
778 		if (error)
779 			printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
780 #endif
781 	} else
782 		mtx_unlock(&qgroup->tqg_lock);
783 }
784 
785 static void
786 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
787 {
788 #ifndef __HAIKU__
789 	cpuset_t mask;
790 #endif
791 	int qid, cpu, error;
792 
793 	mtx_lock(&qgroup->tqg_lock);
794 	qid = taskqgroup_find(qgroup, gtask->gt_uniq);
795 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
796 #ifndef __HAIKU__
797 	if (gtask->gt_irq != -1) {
798 		mtx_unlock(&qgroup->tqg_lock);
799 
800 		CPU_ZERO(&mask);
801 		CPU_SET(cpu, &mask);
802 		error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
803 		mtx_lock(&qgroup->tqg_lock);
804 		if (error)
805 			printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
806 
807 	}
808 #endif
809 	qgroup->tqg_queue[qid].tgc_cnt++;
810 
811 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
812 			 gt_list);
813 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
814 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
815 	mtx_unlock(&qgroup->tqg_lock);
816 }
817 
818 int
819 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
820 	void *uniq, int cpu, int irq, const char *name)
821 {
822 #ifndef __HAIKU__
823 	cpuset_t mask;
824 #endif
825 	int i, qid, error;
826 
827 	qid = -1;
828 	gtask->gt_uniq = uniq;
829 	snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
830 	gtask->gt_irq = irq;
831 	gtask->gt_cpu = cpu;
832 	mtx_lock(&qgroup->tqg_lock);
833 	if (tqg_smp_started) {
834 		for (i = 0; i < qgroup->tqg_cnt; i++)
835 			if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
836 				qid = i;
837 				break;
838 			}
839 		if (qid == -1) {
840 			mtx_unlock(&qgroup->tqg_lock);
841 			printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
842 			return (EINVAL);
843 		}
844 	} else
845 		qid = 0;
846 	qgroup->tqg_queue[qid].tgc_cnt++;
847 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
848 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
849 	cpu = qgroup->tqg_queue[qid].tgc_cpu;
850 	mtx_unlock(&qgroup->tqg_lock);
851 
852 #ifndef __HAIKU__
853 	CPU_ZERO(&mask);
854 	CPU_SET(cpu, &mask);
855 	if (irq != -1 && tqg_smp_started) {
856 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
857 		if (error)
858 			printf("%s: setaffinity failed: %d\n", __func__, error);
859 	}
860 #endif
861 	return (0);
862 }
863 
864 static int
865 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
866 {
867 #ifndef __HAIKU__
868 	cpuset_t mask;
869 #endif
870 	int i, qid, irq, cpu, error;
871 
872 	qid = -1;
873 	irq = gtask->gt_irq;
874 	cpu = gtask->gt_cpu;
875 	MPASS(tqg_smp_started);
876 	mtx_lock(&qgroup->tqg_lock);
877 	for (i = 0; i < qgroup->tqg_cnt; i++)
878 		if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
879 			qid = i;
880 			break;
881 		}
882 	if (qid == -1) {
883 		mtx_unlock(&qgroup->tqg_lock);
884 		printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
885 		return (EINVAL);
886 	}
887 	qgroup->tqg_queue[qid].tgc_cnt++;
888 	LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
889 	MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
890 	gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
891 	mtx_unlock(&qgroup->tqg_lock);
892 
893 #ifndef __HAIKU__
894 	CPU_ZERO(&mask);
895 	CPU_SET(cpu, &mask);
896 
897 	if (irq != -1) {
898 		error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
899 		if (error)
900 			printf("%s: setaffinity failed: %d\n", __func__, error);
901 	}
902 #endif
903 	return (0);
904 }
905 
906 void
907 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
908 {
909 	int i;
910 
911 	grouptask_block(gtask);
912 	mtx_lock(&qgroup->tqg_lock);
913 	for (i = 0; i < qgroup->tqg_cnt; i++)
914 		if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
915 			break;
916 	if (i == qgroup->tqg_cnt)
917 		panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
918 	qgroup->tqg_queue[i].tgc_cnt--;
919 	LIST_REMOVE(gtask, gt_list);
920 	mtx_unlock(&qgroup->tqg_lock);
921 	gtask->gt_taskqueue = NULL;
922 	gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
923 }
924 
925 static void
926 taskqgroup_binder(void *ctx)
927 {
928 	struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
929 #ifndef __HAIKU__
930 	cpuset_t mask;
931 	int error;
932 
933 	CPU_ZERO(&mask);
934 	CPU_SET(gtask->bt_cpuid, &mask);
935 	error = cpuset_setthread(curthread->td_tid, &mask);
936 	thread_lock(curthread);
937 	sched_bind(curthread, gtask->bt_cpuid);
938 	thread_unlock(curthread);
939 
940 	if (error)
941 		printf("%s: setaffinity failed: %d\n", __func__,
942 		    error);
943 #endif
944 	free(gtask, M_DEVBUF);
945 }
946 
947 static void
948 taskqgroup_bind(struct taskqgroup *qgroup)
949 {
950 	struct taskq_bind_task *gtask;
951 	int i;
952 
953 	/*
954 	 * Bind taskqueue threads to specific CPUs, if they have been assigned
955 	 * one.
956 	 */
957 	if (qgroup->tqg_cnt == 1)
958 		return;
959 
960 	for (i = 0; i < qgroup->tqg_cnt; i++) {
961 		gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
962 		GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
963 		gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
964 		grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
965 		    &gtask->bt_task);
966 	}
967 }
968 
969 static void
970 taskqgroup_config_init(void *arg)
971 {
972 	struct taskqgroup *qgroup = qgroup_config;
973 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
974 
975 	LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
976 	    grouptask, gt_list);
977 	qgroup->tqg_queue[0].tgc_cnt = 0;
978 	taskqgroup_cpu_create(qgroup, 0, 0);
979 
980 	qgroup->tqg_cnt = 1;
981 	qgroup->tqg_stride = 1;
982 }
983 
984 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
985 	taskqgroup_config_init, NULL);
986 
987 static int
988 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
989 {
990 	LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
991 	struct grouptask *gtask;
992 	int i, k, old_cnt, old_cpu, cpu;
993 
994 	mtx_assert(&qgroup->tqg_lock, MA_OWNED);
995 
996 	if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
997 		printf("%s: failed cnt: %d stride: %d "
998 		    "mp_ncpus: %d tqg_smp_started: %d\n",
999 		    __func__, cnt, stride, mp_ncpus, tqg_smp_started);
1000 		return (EINVAL);
1001 	}
1002 	if (qgroup->tqg_adjusting) {
1003 		printf("%s failed: adjusting\n", __func__);
1004 		return (EBUSY);
1005 	}
1006 	qgroup->tqg_adjusting = 1;
1007 	old_cnt = qgroup->tqg_cnt;
1008 	old_cpu = 0;
1009 	if (old_cnt < cnt)
1010 		old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
1011 	mtx_unlock(&qgroup->tqg_lock);
1012 	/*
1013 	 * Set up queue for tasks added before boot.
1014 	 */
1015 	if (old_cnt == 0) {
1016 		LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
1017 		    grouptask, gt_list);
1018 		qgroup->tqg_queue[0].tgc_cnt = 0;
1019 	}
1020 
1021 	/*
1022 	 * If new taskq threads have been added.
1023 	 */
1024 	cpu = old_cpu;
1025 	for (i = old_cnt; i < cnt; i++) {
1026 		taskqgroup_cpu_create(qgroup, i, cpu);
1027 
1028 		for (k = 0; k < stride; k++)
1029 			cpu = CPU_NEXT(cpu);
1030 	}
1031 	mtx_lock(&qgroup->tqg_lock);
1032 	qgroup->tqg_cnt = cnt;
1033 	qgroup->tqg_stride = stride;
1034 
1035 	/*
1036 	 * Adjust drivers to use new taskqs.
1037 	 */
1038 	for (i = 0; i < old_cnt; i++) {
1039 		while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
1040 			LIST_REMOVE(gtask, gt_list);
1041 			qgroup->tqg_queue[i].tgc_cnt--;
1042 			LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
1043 		}
1044 	}
1045 	mtx_unlock(&qgroup->tqg_lock);
1046 
1047 	while ((gtask = LIST_FIRST(&gtask_head))) {
1048 		LIST_REMOVE(gtask, gt_list);
1049 		if (gtask->gt_cpu == -1)
1050 			taskqgroup_attach_deferred(qgroup, gtask);
1051 		else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
1052 			taskqgroup_attach_deferred(qgroup, gtask);
1053 	}
1054 
1055 #ifdef INVARIANTS
1056 	mtx_lock(&qgroup->tqg_lock);
1057 	for (i = 0; i < qgroup->tqg_cnt; i++) {
1058 		MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
1059 		LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
1060 			MPASS(gtask->gt_taskqueue != NULL);
1061 	}
1062 	mtx_unlock(&qgroup->tqg_lock);
1063 #endif
1064 	/*
1065 	 * If taskq thread count has been reduced.
1066 	 */
1067 	for (i = cnt; i < old_cnt; i++)
1068 		taskqgroup_cpu_remove(qgroup, i);
1069 
1070 	taskqgroup_bind(qgroup);
1071 
1072 	mtx_lock(&qgroup->tqg_lock);
1073 	qgroup->tqg_adjusting = 0;
1074 
1075 	return (0);
1076 }
1077 
1078 int
1079 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1080 {
1081 	int error;
1082 
1083 	mtx_lock(&qgroup->tqg_lock);
1084 	error = _taskqgroup_adjust(qgroup, cnt, stride);
1085 	mtx_unlock(&qgroup->tqg_lock);
1086 
1087 	return (error);
1088 }
1089 
1090 struct taskqgroup *
1091 taskqgroup_create(const char *name)
1092 {
1093 	struct taskqgroup *qgroup;
1094 
1095 	qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1096 	mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1097 	qgroup->tqg_name = name;
1098 	LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1099 
1100 	return (qgroup);
1101 }
1102 
1103 void
1104 taskqgroup_destroy(struct taskqgroup *qgroup)
1105 {
1106 
1107 }
1108 
1109 void
1110 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1111 	const char *name)
1112 {
1113 
1114 	GROUPTASK_INIT(gtask, 0, fn, ctx);
1115 	taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1116 }
1117 
1118 void
1119 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1120 {
1121 	taskqgroup_detach(qgroup_config, gtask);
1122 }
1123