1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer,
12 * without modification.
13 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
14 * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
15 * redistribution must be conditioned upon including a substantially
16 * similar Disclaimer requirement for further binary redistribution.
17 *
18 * NO WARRANTY
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
22 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23 * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
24 * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
27 * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
29 * THE POSSIBILITY OF SUCH DAMAGES.
30 */
31
32 #include <sys/cdefs.h>
33 /*
34 * Driver for the Atheros Wireless LAN controller.
35 *
36 * This software is derived from work of Atsushi Onoe; his contribution
37 * is greatly appreciated.
38 */
39
40 #include "opt_inet.h"
41 #include "opt_ath.h"
42 /*
43 * This is needed for register operations which are performed
44 * by the driver - eg, calls to ath_hal_gettsf32().
45 *
46 * It's also required for any AH_DEBUG checks in here, eg the
47 * module dependencies.
48 */
49 #include "opt_ah.h"
50 #include "opt_wlan.h"
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/sysctl.h>
55 #include <sys/mbuf.h>
56 #include <sys/malloc.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 #include <sys/kernel.h>
60 #include <sys/socket.h>
61 #include <sys/sockio.h>
62 #include <sys/errno.h>
63 #include <sys/callout.h>
64 #include <sys/bus.h>
65 #include <sys/endian.h>
66 #include <sys/kthread.h>
67 #include <sys/taskqueue.h>
68 #include <sys/priv.h>
69 #include <sys/module.h>
70 #include <sys/ktr.h>
71 #include <sys/smp.h> /* for mp_ncpus */
72
73 #include <machine/bus.h>
74
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/if_dl.h>
78 #include <net/if_media.h>
79 #include <net/if_types.h>
80 #include <net/if_arp.h>
81 #include <net/ethernet.h>
82 #include <net/if_llc.h>
83
84 #include <net80211/ieee80211_var.h>
85 #include <net80211/ieee80211_regdomain.h>
86 #ifdef IEEE80211_SUPPORT_SUPERG
87 #include <net80211/ieee80211_superg.h>
88 #endif
89 #ifdef IEEE80211_SUPPORT_TDMA
90 #include <net80211/ieee80211_tdma.h>
91 #endif
92
93 #include <net/bpf.h>
94
95 #ifdef INET
96 #include <netinet/in.h>
97 #include <netinet/if_ether.h>
98 #endif
99
100 #include <dev/ath/if_athvar.h>
101 #include <dev/ath/ath_hal/ah_devid.h> /* XXX for softled */
102 #include <dev/ath/ath_hal/ah_diagcodes.h>
103
104 #include <dev/ath/if_ath_debug.h>
105 #include <dev/ath/if_ath_misc.h>
106 #include <dev/ath/if_ath_tsf.h>
107 #include <dev/ath/if_ath_tx.h>
108 #include <dev/ath/if_ath_sysctl.h>
109 #include <dev/ath/if_ath_led.h>
110 #include <dev/ath/if_ath_keycache.h>
111 #include <dev/ath/if_ath_rx.h>
112 #include <dev/ath/if_ath_beacon.h>
113 #include <dev/ath/if_athdfs.h>
114 #include <dev/ath/if_ath_descdma.h>
115
116 #ifdef ATH_TX99_DIAG
117 #include <dev/ath/ath_tx99/ath_tx99.h>
118 #endif
119
120 #include <dev/ath/if_ath_tx_edma.h>
121
122 #ifdef ATH_DEBUG_ALQ
123 #include <dev/ath/if_ath_alq.h>
124 #endif
125
126 /*
127 * some general macros
128 */
129 #define INCR(_l, _sz) (_l) ++; (_l) &= ((_sz) - 1)
130 #define DECR(_l, _sz) (_l) --; (_l) &= ((_sz) - 1)
131
132 /*
133 * XXX doesn't belong here, and should be tunable
134 */
135 #define ATH_TXSTATUS_RING_SIZE 512
136
137 MALLOC_DECLARE(M_ATHDEV);
138
139 static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
140
141 #ifdef ATH_DEBUG_ALQ
142 static void
ath_tx_alq_edma_push(struct ath_softc * sc,int txq,int nframes,int fifo_depth,int frame_cnt)143 ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
144 int fifo_depth, int frame_cnt)
145 {
146 struct if_ath_alq_tx_fifo_push aq;
147
148 aq.txq = htobe32(txq);
149 aq.nframes = htobe32(nframes);
150 aq.fifo_depth = htobe32(fifo_depth);
151 aq.frame_cnt = htobe32(frame_cnt);
152
153 if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
154 sizeof(aq),
155 (const char *) &aq);
156 }
157 #endif /* ATH_DEBUG_ALQ */
158
159 /*
160 * XXX TODO: push an aggregate as a single FIFO slot, even though
161 * it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
162 *
163 * The TX completion code handles a TX FIFO slot having multiple frames,
164 * aggregate or otherwise, but it may just make things easier to deal
165 * with.
166 *
167 * XXX TODO: track the number of aggregate subframes and put that in the
168 * push alq message.
169 */
170 static void
ath_tx_edma_push_staging_list(struct ath_softc * sc,struct ath_txq * txq,int limit)171 ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
172 int limit)
173 {
174 struct ath_buf *bf, *bf_last;
175 struct ath_buf *bfi, *bfp;
176 int i, sqdepth;
177 TAILQ_HEAD(axq_q_f_s, ath_buf) sq;
178
179 ATH_TXQ_LOCK_ASSERT(txq);
180
181 DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
182 "%s: called; TXQ=%d, fifo.depth=%d, axq_q empty=%d\n",
183 __func__,
184 txq->axq_qnum,
185 txq->axq_fifo_depth,
186 !! (TAILQ_EMPTY(&txq->axq_q)));
187
188 /*
189 * Don't bother doing any work if it's full.
190 */
191 if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
192 return;
193
194 if (TAILQ_EMPTY(&txq->axq_q))
195 return;
196
197 TAILQ_INIT(&sq);
198
199 /*
200 * First pass - walk sq, queue up to 'limit' entries,
201 * subtract them from the staging queue.
202 */
203 sqdepth = 0;
204 for (i = 0; i < limit; i++) {
205 /* Grab the head entry */
206 bf = ATH_TXQ_FIRST(txq);
207 if (bf == NULL)
208 break;
209 ATH_TXQ_REMOVE(txq, bf, bf_list);
210
211 /* Queue it into our staging list */
212 TAILQ_INSERT_TAIL(&sq, bf, bf_list);
213
214 /* Ensure the flags are cleared */
215 bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
216 sqdepth++;
217 }
218
219 /*
220 * Ok, so now we have a staging list of up to 'limit'
221 * frames from the txq. Now let's wrap that up
222 * into its own list and pass that to the hardware
223 * as one FIFO entry.
224 */
225
226 bf = TAILQ_FIRST(&sq);
227 bf_last = TAILQ_LAST(&sq, axq_q_s);
228
229 /*
230 * Ok, so here's the gymnastics reqiured to make this
231 * all sensible.
232 */
233
234 /*
235 * Tag the first/last buffer appropriately.
236 */
237 bf->bf_flags |= ATH_BUF_FIFOPTR;
238 bf_last->bf_flags |= ATH_BUF_FIFOEND;
239
240 /*
241 * Walk the descriptor list and link them appropriately.
242 */
243 bfp = NULL;
244 TAILQ_FOREACH(bfi, &sq, bf_list) {
245 if (bfp != NULL) {
246 ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
247 bfi->bf_daddr);
248 }
249 bfp = bfi;
250 }
251
252 i = 0;
253 TAILQ_FOREACH(bfi, &sq, bf_list) {
254 #ifdef ATH_DEBUG
255 if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
256 ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
257 #endif/* ATH_DEBUG */
258 #ifdef ATH_DEBUG_ALQ
259 if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
260 ath_tx_alq_post(sc, bfi);
261 #endif /* ATH_DEBUG_ALQ */
262 i++;
263 }
264
265 /*
266 * We now need to push this set of frames onto the tail
267 * of the FIFO queue. We don't adjust the aggregate
268 * count, only the queue depth counter(s).
269 * We also need to blank the link pointer now.
270 */
271
272 TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
273 /* Bump total queue tracking in FIFO queue */
274 txq->fifo.axq_depth += sqdepth;
275
276 /* Bump FIFO queue */
277 txq->axq_fifo_depth++;
278 DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
279 "%s: queued %d packets; depth=%d, fifo depth=%d\n",
280 __func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
281
282 /* Push the first entry into the hardware */
283 ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
284
285 /* Push start on the DMA if it's not already started */
286 ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
287
288 #ifdef ATH_DEBUG_ALQ
289 ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
290 txq->axq_fifo_depth,
291 txq->fifo.axq_depth);
292 #endif /* ATH_DEBUG_ALQ */
293 }
294
295 #define TX_BATCH_SIZE 32
296
297 /*
298 * Push some frames into the TX FIFO if we have space.
299 */
300 static void
ath_edma_tx_fifo_fill(struct ath_softc * sc,struct ath_txq * txq)301 ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
302 {
303
304 ATH_TXQ_LOCK_ASSERT(txq);
305
306 DPRINTF(sc, ATH_DEBUG_TX_PROC,
307 "%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
308 __func__,
309 txq->axq_qnum,
310 txq->fifo.axq_depth,
311 txq->axq_fifo_depth,
312 txq->axq_depth,
313 txq->axq_aggr_depth);
314
315 /*
316 * For now, push up to 32 frames per TX FIFO slot.
317 * If more are in the hardware queue then they'll
318 * get populated when we try to send another frame
319 * or complete a frame - so at most there'll be
320 * 32 non-AMPDU frames per node/TID anyway.
321 *
322 * Note that the hardware staging queue will limit
323 * how many frames in total we will have pushed into
324 * here.
325 *
326 * Later on, we'll want to push less frames into
327 * the TX FIFO since we don't want to necessarily
328 * fill tens or hundreds of milliseconds of potential
329 * frames.
330 *
331 * However, we need more frames right now because of
332 * how the MAC implements the frame scheduling policy.
333 * It only ungates a single FIFO entry at a time,
334 * and will run that until CHNTIME expires or the
335 * end of that FIFO entry descriptor list is reached.
336 * So for TDMA we suffer a big performance penalty -
337 * single TX FIFO entries mean the MAC only sends out
338 * one frame per DBA event, which turned out on average
339 * 6ms per TX frame.
340 *
341 * So, for aggregates it's okay - it'll push two at a
342 * time and this will just do them more efficiently.
343 * For non-aggregates it'll do 4 at a time, up to the
344 * non-aggr limit (non_aggr, which is 32.) They should
345 * be time based rather than a hard count, but I also
346 * do need sleep.
347 */
348
349 /*
350 * Do some basic, basic batching to the hardware
351 * queue.
352 *
353 * If we have TX_BATCH_SIZE entries in the staging
354 * queue, then let's try to send them all in one hit.
355 *
356 * Ensure we don't push more than TX_BATCH_SIZE worth
357 * in, otherwise we end up draining 8 slots worth of
358 * 32 frames into the hardware queue and then we don't
359 * attempt to push more frames in until we empty the
360 * FIFO.
361 */
362 if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
363 txq->fifo.axq_depth <= TX_BATCH_SIZE) {
364 ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
365 }
366
367 /*
368 * Aggregate check: if we have less than two FIFO slots
369 * busy and we have some aggregate frames, queue it.
370 *
371 * Now, ideally we'd just check to see if the scheduler
372 * has given us aggregate frames and push them into the FIFO
373 * as individual slots, as honestly we should just be pushing
374 * a single aggregate in as one FIFO slot.
375 *
376 * Let's do that next once I know this works.
377 */
378 else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
379 ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
380
381 /*
382 *
383 * If we have less, and the TXFIFO isn't empty, let's
384 * wait until we've finished sending the FIFO.
385 *
386 * If we have less, and the TXFIFO is empty, then
387 * send them.
388 */
389 else if (txq->axq_fifo_depth == 0) {
390 ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
391 }
392 }
393
394 /*
395 * Re-initialise the DMA FIFO with the current contents of
396 * said TXQ.
397 *
398 * This should only be called as part of the chip reset path, as it
399 * assumes the FIFO is currently empty.
400 */
401 static void
ath_edma_dma_restart(struct ath_softc * sc,struct ath_txq * txq)402 ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
403 {
404 struct ath_buf *bf;
405 int i = 0;
406 int fifostart = 1;
407 int old_fifo_depth;
408
409 DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
410 __func__,
411 txq->axq_qnum);
412
413 ATH_TXQ_LOCK_ASSERT(txq);
414
415 /*
416 * Let's log if the tracked FIFO depth doesn't match
417 * what we actually push in.
418 */
419 old_fifo_depth = txq->axq_fifo_depth;
420 txq->axq_fifo_depth = 0;
421
422 /*
423 * Walk the FIFO staging list, looking for "head" entries.
424 * Since we may have a partially completed list of frames,
425 * we push the first frame we see into the FIFO and re-mark
426 * it as the head entry. We then skip entries until we see
427 * FIFO end, at which point we get ready to push another
428 * entry into the FIFO.
429 */
430 TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
431 /*
432 * If we're looking for FIFOEND and we haven't found
433 * it, skip.
434 *
435 * If we're looking for FIFOEND and we've found it,
436 * reset for another descriptor.
437 */
438 #ifdef ATH_DEBUG
439 if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
440 ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
441 #endif/* ATH_DEBUG */
442 #ifdef ATH_DEBUG_ALQ
443 if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
444 ath_tx_alq_post(sc, bf);
445 #endif /* ATH_DEBUG_ALQ */
446
447 if (fifostart == 0) {
448 if (bf->bf_flags & ATH_BUF_FIFOEND)
449 fifostart = 1;
450 continue;
451 }
452
453 /* Make sure we're not overflowing the FIFO! */
454 if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
455 device_printf(sc->sc_dev,
456 "%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
457 __func__,
458 txq->axq_qnum,
459 txq->axq_fifo_depth);
460 }
461
462 #if 0
463 DPRINTF(sc, ATH_DEBUG_RESET,
464 "%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
465 __func__,
466 txq->axq_qnum,
467 txq->axq_fifo_depth,
468 bf,
469 !! (bf->bf_flags & ATH_BUF_FIFOPTR),
470 !! (bf->bf_flags & ATH_BUF_FIFOEND));
471 #endif
472
473 /*
474 * Set this to be the first buffer in the FIFO
475 * list - even if it's also the last buffer in
476 * a FIFO list!
477 */
478 bf->bf_flags |= ATH_BUF_FIFOPTR;
479
480 /* Push it into the FIFO and bump the FIFO count */
481 ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
482 txq->axq_fifo_depth++;
483
484 /*
485 * If this isn't the last entry either, let's
486 * clear fifostart so we continue looking for
487 * said last entry.
488 */
489 if (! (bf->bf_flags & ATH_BUF_FIFOEND))
490 fifostart = 0;
491 i++;
492 }
493
494 /* Only bother starting the queue if there's something in it */
495 if (i > 0)
496 ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
497
498 DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
499 __func__,
500 txq->axq_qnum,
501 old_fifo_depth,
502 txq->axq_fifo_depth);
503
504 /* And now, let's check! */
505 if (txq->axq_fifo_depth != old_fifo_depth) {
506 device_printf(sc->sc_dev,
507 "%s: Q%d: FIFO depth should be %d, is %d\n",
508 __func__,
509 txq->axq_qnum,
510 old_fifo_depth,
511 txq->axq_fifo_depth);
512 }
513 }
514
515 /*
516 * Hand off this frame to a hardware queue.
517 *
518 * Things are a bit hairy in the EDMA world. The TX FIFO is only
519 * 8 entries deep, so we need to keep track of exactly what we've
520 * pushed into the FIFO and what's just sitting in the TX queue,
521 * waiting to go out.
522 *
523 * So this is split into two halves - frames get appended to the
524 * TXQ; then a scheduler is called to push some frames into the
525 * actual TX FIFO.
526 */
527 static void
ath_edma_xmit_handoff_hw(struct ath_softc * sc,struct ath_txq * txq,struct ath_buf * bf)528 ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
529 struct ath_buf *bf)
530 {
531
532 ATH_TXQ_LOCK(txq);
533
534 KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
535 ("%s: busy status 0x%x", __func__, bf->bf_flags));
536
537 /*
538 * XXX TODO: write a hard-coded check to ensure that
539 * the queue id in the TX descriptor matches txq->axq_qnum.
540 */
541
542 /* Update aggr stats */
543 if (bf->bf_state.bfs_aggr)
544 txq->axq_aggr_depth++;
545
546 /* Push and update frame stats */
547 ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
548
549 /*
550 * Finally, call the FIFO schedule routine to schedule some
551 * frames to the FIFO.
552 */
553 ath_edma_tx_fifo_fill(sc, txq);
554 ATH_TXQ_UNLOCK(txq);
555 }
556
557 /*
558 * Hand off this frame to a multicast software queue.
559 *
560 * The EDMA TX CABQ will get a list of chained frames, chained
561 * together using the next pointer. The single head of that
562 * particular queue is pushed to the hardware CABQ.
563 */
564 static void
ath_edma_xmit_handoff_mcast(struct ath_softc * sc,struct ath_txq * txq,struct ath_buf * bf)565 ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
566 struct ath_buf *bf)
567 {
568
569 ATH_TX_LOCK_ASSERT(sc);
570 KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
571 ("%s: busy status 0x%x", __func__, bf->bf_flags));
572
573 ATH_TXQ_LOCK(txq);
574 /*
575 * XXX this is mostly duplicated in ath_tx_handoff_mcast().
576 */
577 if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
578 struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
579 struct ieee80211_frame *wh;
580
581 /* mark previous frame */
582 wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
583 wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
584
585 /* re-sync buffer to memory */
586 bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
587 BUS_DMASYNC_PREWRITE);
588
589 /* link descriptor */
590 ath_hal_settxdesclink(sc->sc_ah,
591 bf_last->bf_lastds,
592 bf->bf_daddr);
593 }
594 #ifdef ATH_DEBUG_ALQ
595 if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
596 ath_tx_alq_post(sc, bf);
597 #endif /* ATH_DEBUG_ALQ */
598 ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
599 ATH_TXQ_UNLOCK(txq);
600 }
601
602 /*
603 * Handoff this frame to the hardware.
604 *
605 * For the multicast queue, this will treat it as a software queue
606 * and append it to the list, after updating the MORE_DATA flag
607 * in the previous frame. The cabq processing code will ensure
608 * that the queue contents gets transferred over.
609 *
610 * For the hardware queues, this will queue a frame to the queue
611 * like before, then populate the FIFO from that. Since the
612 * EDMA hardware has 8 FIFO slots per TXQ, this ensures that
613 * frames such as management frames don't get prematurely dropped.
614 *
615 * This does imply that a similar flush-hwq-to-fifoq method will
616 * need to be called from the processq function, before the
617 * per-node software scheduler is called.
618 */
619 static void
ath_edma_xmit_handoff(struct ath_softc * sc,struct ath_txq * txq,struct ath_buf * bf)620 ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
621 struct ath_buf *bf)
622 {
623
624 DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
625 "%s: called; bf=%p, txq=%p, qnum=%d\n",
626 __func__,
627 bf,
628 txq,
629 txq->axq_qnum);
630
631 if (txq->axq_qnum == ATH_TXQ_SWQ)
632 ath_edma_xmit_handoff_mcast(sc, txq, bf);
633 else
634 ath_edma_xmit_handoff_hw(sc, txq, bf);
635 }
636
637 static int
ath_edma_setup_txfifo(struct ath_softc * sc,int qnum)638 ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
639 {
640 struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
641
642 te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
643 M_ATHDEV,
644 M_NOWAIT | M_ZERO);
645 if (te->m_fifo == NULL) {
646 device_printf(sc->sc_dev, "%s: malloc failed\n",
647 __func__);
648 return (-ENOMEM);
649 }
650
651 /*
652 * Set initial "empty" state.
653 */
654 te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
655
656 return (0);
657 }
658
659 static int
ath_edma_free_txfifo(struct ath_softc * sc,int qnum)660 ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
661 {
662 struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
663
664 /* XXX TODO: actually deref the ath_buf entries? */
665 free(te->m_fifo, M_ATHDEV);
666 return (0);
667 }
668
669 static int
ath_edma_dma_txsetup(struct ath_softc * sc)670 ath_edma_dma_txsetup(struct ath_softc *sc)
671 {
672 int error;
673 int i;
674
675 error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
676 NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
677 if (error != 0)
678 return (error);
679
680 ath_hal_setuptxstatusring(sc->sc_ah,
681 (void *) sc->sc_txsdma.dd_desc,
682 sc->sc_txsdma.dd_desc_paddr,
683 ATH_TXSTATUS_RING_SIZE);
684
685 for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
686 ath_edma_setup_txfifo(sc, i);
687 }
688
689 return (0);
690 }
691
692 static int
ath_edma_dma_txteardown(struct ath_softc * sc)693 ath_edma_dma_txteardown(struct ath_softc *sc)
694 {
695 int i;
696
697 for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
698 ath_edma_free_txfifo(sc, i);
699 }
700
701 ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
702 return (0);
703 }
704
705 /*
706 * Drain all TXQs, potentially after completing the existing completed
707 * frames.
708 */
709 static void
ath_edma_tx_drain(struct ath_softc * sc,ATH_RESET_TYPE reset_type)710 ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
711 {
712 int i;
713
714 DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
715
716 (void) ath_stoptxdma(sc);
717
718 /*
719 * If reset type is noloss, the TX FIFO needs to be serviced
720 * and those frames need to be handled.
721 *
722 * Otherwise, just toss everything in each TX queue.
723 */
724 if (reset_type == ATH_RESET_NOLOSS) {
725 ath_edma_tx_processq(sc, 0);
726 for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
727 if (ATH_TXQ_SETUP(sc, i)) {
728 ATH_TXQ_LOCK(&sc->sc_txq[i]);
729 /*
730 * Free the holding buffer; DMA is now
731 * stopped.
732 */
733 ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
734 /*
735 * Reset the link pointer to NULL; there's
736 * no frames to chain DMA to.
737 */
738 sc->sc_txq[i].axq_link = NULL;
739 ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
740 }
741 }
742 } else {
743 for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
744 if (ATH_TXQ_SETUP(sc, i))
745 ath_tx_draintxq(sc, &sc->sc_txq[i]);
746 }
747 }
748
749 /* XXX dump out the TX completion FIFO contents */
750
751 /* XXX dump out the frames */
752
753 sc->sc_wd_timer = 0;
754 }
755
756 /*
757 * TX completion tasklet.
758 */
759
760 static void
ath_edma_tx_proc(void * arg,int npending)761 ath_edma_tx_proc(void *arg, int npending)
762 {
763 struct ath_softc *sc = (struct ath_softc *) arg;
764
765 ATH_PCU_LOCK(sc);
766 sc->sc_txproc_cnt++;
767 ATH_PCU_UNLOCK(sc);
768
769 ATH_LOCK(sc);
770 ath_power_set_power_state(sc, HAL_PM_AWAKE);
771 ATH_UNLOCK(sc);
772
773 #if 0
774 DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
775 __func__, npending);
776 #endif
777 ath_edma_tx_processq(sc, 1);
778
779 ATH_PCU_LOCK(sc);
780 sc->sc_txproc_cnt--;
781 ATH_PCU_UNLOCK(sc);
782
783 ATH_LOCK(sc);
784 ath_power_restore_power_state(sc);
785 ATH_UNLOCK(sc);
786
787 ath_tx_kick(sc);
788 }
789
790 /*
791 * Process the TX status queue.
792 */
793 static void
ath_edma_tx_processq(struct ath_softc * sc,int dosched)794 ath_edma_tx_processq(struct ath_softc *sc, int dosched)
795 {
796 struct ath_hal *ah = sc->sc_ah;
797 HAL_STATUS status;
798 struct ath_tx_status ts;
799 struct ath_txq *txq;
800 struct ath_buf *bf;
801 struct ieee80211_node *ni;
802 int nacked = 0;
803 int idx;
804 int i;
805
806 #ifdef ATH_DEBUG
807 /* XXX */
808 uint32_t txstatus[32];
809 #endif
810
811 DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called\n", __func__);
812
813 for (idx = 0; ; idx++) {
814 bzero(&ts, sizeof(ts));
815
816 ATH_TXSTATUS_LOCK(sc);
817 #ifdef ATH_DEBUG
818 ath_hal_gettxrawtxdesc(ah, txstatus);
819 #endif
820 status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
821 ATH_TXSTATUS_UNLOCK(sc);
822
823 if (status == HAL_EINPROGRESS) {
824 DPRINTF(sc, ATH_DEBUG_TX_PROC,
825 "%s: (%d): EINPROGRESS\n",
826 __func__, idx);
827 break;
828 }
829
830 #ifdef ATH_DEBUG
831 if (sc->sc_debug & ATH_DEBUG_TX_PROC)
832 if (ts.ts_queue_id != sc->sc_bhalq)
833 ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
834 idx, (status == HAL_OK));
835 #endif
836
837 /*
838 * If there is an error with this descriptor, continue
839 * processing.
840 *
841 * XXX TBD: log some statistics?
842 */
843 if (status == HAL_EIO) {
844 device_printf(sc->sc_dev, "%s: invalid TX status?\n",
845 __func__);
846 break;
847 }
848
849 #if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
850 if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
851 if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
852 sc->sc_tx_statuslen,
853 (char *) txstatus);
854 }
855 #endif /* ATH_DEBUG_ALQ */
856
857 /*
858 * At this point we have a valid status descriptor.
859 * The QID and descriptor ID (which currently isn't set)
860 * is part of the status.
861 *
862 * We then assume that the descriptor in question is the
863 * -head- of the given QID. Eventually we should verify
864 * this by using the descriptor ID.
865 */
866
867 /*
868 * The beacon queue is not currently a "real" queue.
869 * Frames aren't pushed onto it and the lock isn't setup.
870 * So skip it for now; the beacon handling code will
871 * free and alloc more beacon buffers as appropriate.
872 */
873 if (ts.ts_queue_id == sc->sc_bhalq)
874 continue;
875
876 txq = &sc->sc_txq[ts.ts_queue_id];
877
878 ATH_TXQ_LOCK(txq);
879 bf = ATH_TXQ_FIRST(&txq->fifo);
880
881 /*
882 * Work around the situation where I'm seeing notifications
883 * for Q1 when no frames are available. That needs to be
884 * debugged but not by crashing _here_.
885 */
886 if (bf == NULL) {
887 device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
888 __func__,
889 ts.ts_queue_id);
890 ATH_TXQ_UNLOCK(txq);
891 continue;
892 }
893
894 DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
895 __func__,
896 ts.ts_queue_id, bf,
897 !! (bf->bf_flags & ATH_BUF_FIFOPTR),
898 !! (bf->bf_flags & ATH_BUF_FIFOEND));
899
900 /* XXX TODO: actually output debugging info about this */
901
902 #if 0
903 /* XXX assert the buffer/descriptor matches the status descid */
904 if (ts.ts_desc_id != bf->bf_descid) {
905 device_printf(sc->sc_dev,
906 "%s: mismatched descid (qid=%d, tsdescid=%d, "
907 "bfdescid=%d\n",
908 __func__,
909 ts.ts_queue_id,
910 ts.ts_desc_id,
911 bf->bf_descid);
912 }
913 #endif
914
915 /* This removes the buffer and decrements the queue depth */
916 ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
917 if (bf->bf_state.bfs_aggr)
918 txq->axq_aggr_depth--;
919
920 /*
921 * If this was the end of a FIFO set, decrement FIFO depth
922 */
923 if (bf->bf_flags & ATH_BUF_FIFOEND)
924 txq->axq_fifo_depth--;
925
926 /*
927 * If this isn't the final buffer in a FIFO set, mark
928 * the buffer as busy so it goes onto the holding queue.
929 */
930 if (! (bf->bf_flags & ATH_BUF_FIFOEND))
931 bf->bf_flags |= ATH_BUF_BUSY;
932
933 DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
934 __func__,
935 txq->axq_qnum,
936 txq->axq_fifo_depth,
937 txq->fifo.axq_depth);
938
939 /* XXX assert FIFO depth >= 0 */
940 ATH_TXQ_UNLOCK(txq);
941
942 /*
943 * Outside of the TX lock - if the buffer is end
944 * end buffer in this FIFO, we don't need a holding
945 * buffer any longer.
946 */
947 if (bf->bf_flags & ATH_BUF_FIFOEND) {
948 ATH_TXQ_LOCK(txq);
949 ath_txq_freeholdingbuf(sc, txq);
950 ATH_TXQ_UNLOCK(txq);
951 }
952
953 /*
954 * First we need to make sure ts_rate is valid.
955 *
956 * Pre-EDMA chips pass the whole TX descriptor to
957 * the proctxdesc function which will then fill out
958 * ts_rate based on the ts_finaltsi (final TX index)
959 * in the TX descriptor. However the TX completion
960 * FIFO doesn't have this information. So here we
961 * do a separate HAL call to populate that information.
962 *
963 * The same problem exists with ts_longretry.
964 * The FreeBSD HAL corrects ts_longretry in the HAL layer;
965 * the AR9380 HAL currently doesn't. So until the HAL
966 * is imported and this can be added, we correct for it
967 * here.
968 */
969 /* XXX TODO */
970 /* XXX faked for now. Ew. */
971 if (ts.ts_finaltsi < 4) {
972 ts.ts_rate =
973 bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
974 switch (ts.ts_finaltsi) {
975 case 3: ts.ts_longretry +=
976 bf->bf_state.bfs_rc[2].tries;
977 case 2: ts.ts_longretry +=
978 bf->bf_state.bfs_rc[1].tries;
979 case 1: ts.ts_longretry +=
980 bf->bf_state.bfs_rc[0].tries;
981 }
982 } else {
983 device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
984 __func__,
985 ts.ts_finaltsi);
986 ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
987 }
988
989 /*
990 * XXX This is terrible.
991 *
992 * Right now, some code uses the TX status that is
993 * passed in here, but the completion handlers in the
994 * software TX path also use bf_status.ds_txstat.
995 * Ew. That should all go away.
996 *
997 * XXX It's also possible the rate control completion
998 * routine is called twice.
999 */
1000 memcpy(&bf->bf_status, &ts, sizeof(ts));
1001
1002 ni = bf->bf_node;
1003
1004 /* Update RSSI */
1005 /* XXX duplicate from ath_tx_processq */
1006 if (ni != NULL && ts.ts_status == 0 &&
1007 ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
1008 nacked++;
1009 sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
1010 ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
1011 ts.ts_rssi);
1012 ATH_RSSI_LPF(ATH_NODE(ni)->an_node_stats.ns_avgtxrssi,
1013 ts.ts_rssi);
1014 }
1015
1016 /* Handle frame completion and rate control update */
1017 ath_tx_process_buf_completion(sc, txq, &ts, bf);
1018
1019 /* NB: bf is invalid at this point */
1020 }
1021
1022 sc->sc_wd_timer = 0;
1023
1024 /*
1025 * XXX It's inefficient to do this if the FIFO queue is full,
1026 * but there's no easy way right now to only populate
1027 * the txq task for _one_ TXQ. This should be fixed.
1028 */
1029 if (dosched) {
1030 /* Attempt to schedule more hardware frames to the TX FIFO */
1031 for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
1032 if (ATH_TXQ_SETUP(sc, i)) {
1033 ATH_TX_LOCK(sc);
1034 ath_txq_sched(sc, &sc->sc_txq[i]);
1035 ATH_TX_UNLOCK(sc);
1036
1037 ATH_TXQ_LOCK(&sc->sc_txq[i]);
1038 ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
1039 ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
1040 }
1041 }
1042 /* Kick software scheduler */
1043 ath_tx_swq_kick(sc);
1044 }
1045
1046 DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: end\n", __func__);
1047 }
1048
1049 static void
ath_edma_attach_comp_func(struct ath_softc * sc)1050 ath_edma_attach_comp_func(struct ath_softc *sc)
1051 {
1052
1053 TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
1054 }
1055
1056 void
ath_xmit_setup_edma(struct ath_softc * sc)1057 ath_xmit_setup_edma(struct ath_softc *sc)
1058 {
1059
1060 /* Fetch EDMA field and buffer sizes */
1061 (void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
1062 (void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
1063 (void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
1064
1065 if (bootverbose) {
1066 device_printf(sc->sc_dev, "TX descriptor length: %d\n",
1067 sc->sc_tx_desclen);
1068 device_printf(sc->sc_dev, "TX status length: %d\n",
1069 sc->sc_tx_statuslen);
1070 device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
1071 sc->sc_tx_nmaps);
1072 }
1073
1074 sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
1075 sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
1076 sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
1077
1078 sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
1079 sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
1080 sc->sc_tx.xmit_drain = ath_edma_tx_drain;
1081 }
1082