1 /*- 2 * Copyright (c) 2014-2018, Matthew Macy <mmacy@mattmacy.io> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Neither the name of Matthew Macy nor the names of its 12 * contributors may be used to endorse or promote products derived from 13 * this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include <stdlib.h> 30 __FBSDID("$FreeBSD$"); 31 32 #ifndef __HAIKU__ 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_acpi.h" 36 #include "opt_sched.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/types.h> 41 #include <sys/bus.h> 42 #include <sys/eventhandler.h> 43 #ifndef __HAIKU__ 44 #include <sys/jail.h> 45 #endif 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sx.h> 50 #include <sys/module.h> 51 #include <sys/kobj.h> 52 #include <sys/rman.h> 53 #include <sys/sbuf.h> 54 #include <sys/smp.h> 55 #include <sys/socket.h> 56 #include <sys/sockio.h> 57 #include <sys/sysctl.h> 58 #include <sys/syslog.h> 59 #include <sys/taskqueue.h> 60 #include <sys/limits.h> 61 62 #include <net/if.h> 63 #include <net/if_var.h> 64 #include <net/if_types.h> 65 #include <net/if_media.h> 66 #include <net/bpf.h> 67 #include <net/ethernet.h> 68 #include <net/if_vlan_var.h> 69 #include <net/mp_ring.h> 70 #include <net/vnet.h> 71 #include <net/debugnet.h> 72 73 #include <netinet/in.h> 74 #ifndef __HAIKU__ 75 #include <netinet/in_pcb.h> 76 #include <netinet/tcp_lro.h> 77 #include <netinet/in_systm.h> 78 #endif 79 #include <netinet/if_ether.h> 80 #include <netinet/ip.h> 81 #include <netinet/ip6.h> 82 #include <netinet/tcp.h> 83 #include <netinet/ip_var.h> 84 #ifndef __HAIKU__ 85 #include <netinet6/ip6_var.h> 86 #endif 87 88 #include <machine/bus.h> 89 #ifndef __HAIKU__ 90 #include <machine/in_cksum.h> 91 #endif 92 93 #include <vm/vm.h> 94 #include <vm/pmap.h> 95 96 #include <dev/led/led.h> 97 #include <dev/pci/pcireg.h> 98 #include <dev/pci/pcivar.h> 99 #ifndef __HAIKU__ 100 #include <dev/pci/pci_private.h> 101 #endif 102 103 #include <net/iflib.h> 104 #include <net/iflib_private.h> 105 106 #include <ifdi_if.h> 107 #include <device_if.h> 108 109 #ifdef PCI_IOV 110 #include <dev/pci/pci_iov.h> 111 #endif 112 113 #include <sys/bitstring.h> 114 115 /* 116 * enable accounting of every mbuf as it comes in to and goes out of 117 * iflib's software descriptor references 118 */ 119 #define MEMORY_LOGGING 0 120 /* 121 * Enable mbuf vectors for compressing long mbuf chains 122 */ 123 124 /* 125 * NB: 126 * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead 127 * we prefetch needs to be determined by the time spent in m_free vis a vis 128 * the cost of a prefetch. This will of course vary based on the workload: 129 * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which 130 * is quite expensive, thus suggesting very little prefetch. 131 * - small packet forwarding which is just returning a single mbuf to 132 * UMA will typically be very fast vis a vis the cost of a memory 133 * access. 134 */ 135 136 /* 137 * File organization: 138 * - private structures 139 * - iflib private utility functions 140 * - ifnet functions 141 * - vlan registry and other exported functions 142 * - iflib public core functions 143 * 144 * 145 */ 146 MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); 147 148 #define IFLIB_RXEOF_MORE (1U << 0) 149 #define IFLIB_RXEOF_EMPTY (2U << 0) 150 151 struct iflib_txq; 152 typedef struct iflib_txq *iflib_txq_t; 153 struct iflib_rxq; 154 typedef struct iflib_rxq *iflib_rxq_t; 155 struct iflib_fl; 156 typedef struct iflib_fl *iflib_fl_t; 157 158 struct iflib_ctx; 159 160 static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); 161 static void iflib_timer(void *arg); 162 static void iflib_tqg_detach(if_ctx_t ctx); 163 164 typedef struct iflib_filter_info { 165 driver_filter_t *ifi_filter; 166 void *ifi_filter_arg; 167 struct grouptask *ifi_task; 168 void *ifi_ctx; 169 } *iflib_filter_info_t; 170 171 struct iflib_ctx { 172 KOBJ_FIELDS; 173 /* 174 * Pointer to hardware driver's softc 175 */ 176 void *ifc_softc; 177 device_t ifc_dev; 178 if_t ifc_ifp; 179 180 #ifndef __HAIKU__ 181 cpuset_t ifc_cpus; 182 #endif 183 if_shared_ctx_t ifc_sctx; 184 struct if_softc_ctx ifc_softc_ctx; 185 186 struct sx ifc_ctx_sx; 187 struct mtx ifc_state_mtx; 188 189 iflib_txq_t ifc_txqs; 190 iflib_rxq_t ifc_rxqs; 191 uint32_t ifc_if_flags; 192 uint32_t ifc_flags; 193 uint32_t ifc_max_fl_buf_size; 194 uint32_t ifc_rx_mbuf_sz; 195 196 int ifc_link_state; 197 int ifc_watchdog_events; 198 struct cdev *ifc_led_dev; 199 struct resource *ifc_msix_mem; 200 201 struct if_irq ifc_legacy_irq; 202 struct grouptask ifc_admin_task; 203 struct grouptask ifc_vflr_task; 204 struct iflib_filter_info ifc_filter_info; 205 struct ifmedia ifc_media; 206 struct ifmedia *ifc_mediap; 207 208 struct sysctl_oid *ifc_sysctl_node; 209 uint16_t ifc_sysctl_ntxqs; 210 uint16_t ifc_sysctl_nrxqs; 211 uint16_t ifc_sysctl_qs_eq_override; 212 uint16_t ifc_sysctl_rx_budget; 213 uint16_t ifc_sysctl_tx_abdicate; 214 uint16_t ifc_sysctl_core_offset; 215 #define CORE_OFFSET_UNSPECIFIED 0xffff 216 uint8_t ifc_sysctl_separate_txrx; 217 uint8_t ifc_sysctl_use_logical_cores; 218 bool ifc_cpus_are_physical_cores; 219 220 qidx_t ifc_sysctl_ntxds[8]; 221 qidx_t ifc_sysctl_nrxds[8]; 222 struct if_txrx ifc_txrx; 223 #define isc_txd_encap ifc_txrx.ift_txd_encap 224 #define isc_txd_flush ifc_txrx.ift_txd_flush 225 #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update 226 #define isc_rxd_available ifc_txrx.ift_rxd_available 227 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get 228 #define isc_rxd_refill ifc_txrx.ift_rxd_refill 229 #define isc_rxd_flush ifc_txrx.ift_rxd_flush 230 #define isc_legacy_intr ifc_txrx.ift_legacy_intr 231 eventhandler_tag ifc_vlan_attach_event; 232 eventhandler_tag ifc_vlan_detach_event; 233 struct ether_addr ifc_mac; 234 }; 235 236 void * 237 iflib_get_softc(if_ctx_t ctx) 238 { 239 240 return (ctx->ifc_softc); 241 } 242 243 device_t 244 iflib_get_dev(if_ctx_t ctx) 245 { 246 247 return (ctx->ifc_dev); 248 } 249 250 if_t 251 iflib_get_ifp(if_ctx_t ctx) 252 { 253 254 return (ctx->ifc_ifp); 255 } 256 257 struct ifmedia * 258 iflib_get_media(if_ctx_t ctx) 259 { 260 261 return (ctx->ifc_mediap); 262 } 263 264 uint32_t 265 iflib_get_flags(if_ctx_t ctx) 266 { 267 return (ctx->ifc_flags); 268 } 269 270 void 271 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) 272 { 273 274 bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN); 275 } 276 277 if_softc_ctx_t 278 iflib_get_softc_ctx(if_ctx_t ctx) 279 { 280 281 return (&ctx->ifc_softc_ctx); 282 } 283 284 if_shared_ctx_t 285 iflib_get_sctx(if_ctx_t ctx) 286 { 287 288 return (ctx->ifc_sctx); 289 } 290 291 #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) 292 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) 293 #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) 294 295 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) 296 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) 297 298 typedef struct iflib_sw_rx_desc_array { 299 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 300 struct mbuf **ifsd_m; /* pkthdr mbufs */ 301 caddr_t *ifsd_cl; /* direct cluster pointer for rx */ 302 bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */ 303 } iflib_rxsd_array_t; 304 305 typedef struct iflib_sw_tx_desc_array { 306 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 307 bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */ 308 struct mbuf **ifsd_m; /* pkthdr mbufs */ 309 } if_txsd_vec_t; 310 311 /* magic number that should be high enough for any hardware */ 312 #define IFLIB_MAX_TX_SEGS 128 313 #define IFLIB_RX_COPY_THRESH 128 314 #define IFLIB_MAX_RX_REFRESH 32 315 /* The minimum descriptors per second before we start coalescing */ 316 #define IFLIB_MIN_DESC_SEC 16384 317 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 318 #define IFLIB_QUEUE_IDLE 0 319 #define IFLIB_QUEUE_HUNG 1 320 #define IFLIB_QUEUE_WORKING 2 321 /* maximum number of txqs that can share an rx interrupt */ 322 #define IFLIB_MAX_TX_SHARED_INTR 4 323 324 /* this should really scale with ring size - this is a fairly arbitrary value */ 325 #define TX_BATCH_SIZE 32 326 327 #define IFLIB_RESTART_BUDGET 8 328 329 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ 330 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ 331 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) 332 333 struct iflib_txq { 334 qidx_t ift_in_use; 335 qidx_t ift_cidx; 336 qidx_t ift_cidx_processed; 337 qidx_t ift_pidx; 338 uint8_t ift_gen; 339 uint8_t ift_br_offset; 340 uint16_t ift_npending; 341 uint16_t ift_db_pending; 342 uint16_t ift_rs_pending; 343 /* implicit pad */ 344 uint8_t ift_txd_size[8]; 345 uint64_t ift_processed; 346 uint64_t ift_cleaned; 347 uint64_t ift_cleaned_prev; 348 #if MEMORY_LOGGING 349 uint64_t ift_enqueued; 350 uint64_t ift_dequeued; 351 #endif 352 uint64_t ift_no_tx_dma_setup; 353 uint64_t ift_no_desc_avail; 354 uint64_t ift_mbuf_defrag_failed; 355 uint64_t ift_mbuf_defrag; 356 uint64_t ift_map_failed; 357 uint64_t ift_txd_encap_efbig; 358 uint64_t ift_pullups; 359 uint64_t ift_last_timer_tick; 360 361 struct mtx ift_mtx; 362 struct mtx ift_db_mtx; 363 364 /* constant values */ 365 if_ctx_t ift_ctx; 366 struct ifmp_ring *ift_br; 367 struct grouptask ift_task; 368 qidx_t ift_size; 369 uint16_t ift_id; 370 struct callout ift_timer; 371 #ifdef DEV_NETMAP 372 struct callout ift_netmap_timer; 373 #endif /* DEV_NETMAP */ 374 375 if_txsd_vec_t ift_sds; 376 uint8_t ift_qstatus; 377 uint8_t ift_closed; 378 uint8_t ift_update_freq; 379 struct iflib_filter_info ift_filter_info; 380 bus_dma_tag_t ift_buf_tag; 381 bus_dma_tag_t ift_tso_buf_tag; 382 iflib_dma_info_t ift_ifdi; 383 #define MTX_NAME_LEN 32 384 char ift_mtx_name[MTX_NAME_LEN]; 385 bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); 386 #ifdef IFLIB_DIAGNOSTICS 387 uint64_t ift_cpu_exec_count[256]; 388 #endif 389 } __aligned(CACHE_LINE_SIZE); 390 391 struct iflib_fl { 392 qidx_t ifl_cidx; 393 qidx_t ifl_pidx; 394 qidx_t ifl_credits; 395 uint8_t ifl_gen; 396 uint8_t ifl_rxd_size; 397 #if MEMORY_LOGGING 398 uint64_t ifl_m_enqueued; 399 uint64_t ifl_m_dequeued; 400 uint64_t ifl_cl_enqueued; 401 uint64_t ifl_cl_dequeued; 402 #endif 403 /* implicit pad */ 404 bitstr_t *ifl_rx_bitmap; 405 qidx_t ifl_fragidx; 406 /* constant */ 407 qidx_t ifl_size; 408 uint16_t ifl_buf_size; 409 uint16_t ifl_cltype; 410 #ifndef __HAIKU__ 411 uma_zone_t ifl_zone; 412 #endif 413 iflib_rxsd_array_t ifl_sds; 414 iflib_rxq_t ifl_rxq; 415 uint8_t ifl_id; 416 bus_dma_tag_t ifl_buf_tag; 417 iflib_dma_info_t ifl_ifdi; 418 uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); 419 qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; 420 } __aligned(CACHE_LINE_SIZE); 421 422 static inline qidx_t 423 get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) 424 { 425 qidx_t used; 426 427 if (pidx > cidx) 428 used = pidx - cidx; 429 else if (pidx < cidx) 430 used = size - cidx + pidx; 431 else if (gen == 0 && pidx == cidx) 432 used = 0; 433 else if (gen == 1 && pidx == cidx) 434 used = size; 435 else 436 panic("bad state"); 437 438 return (used); 439 } 440 441 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) 442 443 #define IDXDIFF(head, tail, wrap) \ 444 ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) 445 446 struct iflib_rxq { 447 if_ctx_t ifr_ctx; 448 iflib_fl_t ifr_fl; 449 uint64_t ifr_rx_irq; 450 #ifndef __HAIKU__ 451 struct pfil_head *pfil; 452 #else 453 #define PFIL_PASS 0 454 #endif 455 /* 456 * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is 457 * the completion queue consumer index. Otherwise it's unused. 458 */ 459 qidx_t ifr_cq_cidx; 460 uint16_t ifr_id; 461 uint8_t ifr_nfl; 462 uint8_t ifr_ntxqirq; 463 uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; 464 uint8_t ifr_fl_offset; 465 #ifndef __HAIKU__ 466 struct lro_ctrl ifr_lc; 467 #endif 468 struct grouptask ifr_task; 469 struct callout ifr_watchdog; 470 struct iflib_filter_info ifr_filter_info; 471 iflib_dma_info_t ifr_ifdi; 472 473 /* dynamically allocate if any drivers need a value substantially larger than this */ 474 struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); 475 #ifdef IFLIB_DIAGNOSTICS 476 uint64_t ifr_cpu_exec_count[256]; 477 #endif 478 } __aligned(CACHE_LINE_SIZE); 479 480 typedef struct if_rxsd { 481 caddr_t *ifsd_cl; 482 iflib_fl_t ifsd_fl; 483 } *if_rxsd_t; 484 485 /* multiple of word size */ 486 #ifdef __LP64__ 487 #define PKT_INFO_SIZE 6 488 #define RXD_INFO_SIZE 5 489 #define PKT_TYPE uint64_t 490 #else 491 #define PKT_INFO_SIZE 11 492 #define RXD_INFO_SIZE 8 493 #define PKT_TYPE uint32_t 494 #endif 495 #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) 496 #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) 497 498 typedef struct if_pkt_info_pad { 499 PKT_TYPE pkt_val[PKT_INFO_SIZE]; 500 } *if_pkt_info_pad_t; 501 typedef struct if_rxd_info_pad { 502 PKT_TYPE rxd_val[RXD_INFO_SIZE]; 503 } *if_rxd_info_pad_t; 504 505 CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); 506 CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); 507 508 static inline void 509 pkt_info_zero(if_pkt_info_t pi) 510 { 511 if_pkt_info_pad_t pi_pad; 512 513 pi_pad = (if_pkt_info_pad_t)pi; 514 pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; 515 pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; 516 #ifndef __LP64__ 517 pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; 518 pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; 519 #endif 520 } 521 522 #ifndef __HAIKU__ 523 static device_method_t iflib_pseudo_methods[] = { 524 DEVMETHOD(device_attach, noop_attach), 525 DEVMETHOD(device_detach, iflib_pseudo_detach), 526 DEVMETHOD_END 527 }; 528 529 driver_t iflib_pseudodriver = { 530 "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), 531 }; 532 #endif 533 534 static inline void 535 rxd_info_zero(if_rxd_info_t ri) 536 { 537 if_rxd_info_pad_t ri_pad; 538 int i; 539 540 ri_pad = (if_rxd_info_pad_t)ri; 541 for (i = 0; i < RXD_LOOP_BOUND; i += 4) { 542 ri_pad->rxd_val[i] = 0; 543 ri_pad->rxd_val[i+1] = 0; 544 ri_pad->rxd_val[i+2] = 0; 545 ri_pad->rxd_val[i+3] = 0; 546 } 547 #ifdef __LP64__ 548 ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; 549 #endif 550 } 551 552 /* 553 * Only allow a single packet to take up most 1/nth of the tx ring 554 */ 555 #define MAX_SINGLE_PACKET_FRACTION 12 556 #define IF_BAD_DMA (bus_addr_t)-1 557 558 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) 559 560 #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock") 561 #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx) 562 #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) 563 #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) 564 565 #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) 566 #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) 567 #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) 568 #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) 569 570 #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) 571 #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) 572 573 void 574 iflib_set_detach(if_ctx_t ctx) 575 { 576 STATE_LOCK(ctx); 577 ctx->ifc_flags |= IFC_IN_DETACH; 578 STATE_UNLOCK(ctx); 579 } 580 581 /* Our boot-time initialization hook */ 582 static int iflib_module_event_handler(module_t, int, void *); 583 584 #ifndef __HAIKU__ 585 static moduledata_t iflib_moduledata = { 586 "iflib", 587 iflib_module_event_handler, 588 NULL 589 }; 590 #endif 591 592 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); 593 MODULE_VERSION(iflib, 1); 594 595 MODULE_DEPEND(iflib, pci, 1, 1, 1); 596 MODULE_DEPEND(iflib, ether, 1, 1, 1); 597 598 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); 599 TASKQGROUP_DEFINE(if_config_tqg, 1, 1); 600 601 #ifndef IFLIB_DEBUG_COUNTERS 602 #ifdef INVARIANTS 603 #define IFLIB_DEBUG_COUNTERS 1 604 #else 605 #define IFLIB_DEBUG_COUNTERS 0 606 #endif /* !INVARIANTS */ 607 #endif 608 609 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 610 "iflib driver parameters"); 611 612 /* 613 * XXX need to ensure that this can't accidentally cause the head to be moved backwards 614 */ 615 static int iflib_min_tx_latency = 0; 616 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, 617 &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); 618 static int iflib_no_tx_batch = 0; 619 SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, 620 &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); 621 static int iflib_timer_default = 1000; 622 SYSCTL_INT(_net_iflib, OID_AUTO, timer_default, CTLFLAG_RW, 623 &iflib_timer_default, 0, "number of ticks between iflib_timer calls"); 624 625 626 #if IFLIB_DEBUG_COUNTERS 627 628 static int iflib_tx_seen; 629 static int iflib_tx_sent; 630 static int iflib_tx_encap; 631 static int iflib_rx_allocs; 632 static int iflib_fl_refills; 633 static int iflib_fl_refills_large; 634 static int iflib_tx_frees; 635 636 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, 637 &iflib_tx_seen, 0, "# TX mbufs seen"); 638 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, 639 &iflib_tx_sent, 0, "# TX mbufs sent"); 640 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, 641 &iflib_tx_encap, 0, "# TX mbufs encapped"); 642 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, 643 &iflib_tx_frees, 0, "# TX frees"); 644 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, 645 &iflib_rx_allocs, 0, "# RX allocations"); 646 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, 647 &iflib_fl_refills, 0, "# refills"); 648 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, 649 &iflib_fl_refills_large, 0, "# large refills"); 650 651 static int iflib_txq_drain_flushing; 652 static int iflib_txq_drain_oactive; 653 static int iflib_txq_drain_notready; 654 655 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, 656 &iflib_txq_drain_flushing, 0, "# drain flushes"); 657 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, 658 &iflib_txq_drain_oactive, 0, "# drain oactives"); 659 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, 660 &iflib_txq_drain_notready, 0, "# drain notready"); 661 662 static int iflib_encap_load_mbuf_fail; 663 static int iflib_encap_pad_mbuf_fail; 664 static int iflib_encap_txq_avail_fail; 665 static int iflib_encap_txd_encap_fail; 666 667 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, 668 &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); 669 SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, 670 &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); 671 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, 672 &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); 673 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, 674 &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); 675 676 static int iflib_task_fn_rxs; 677 static int iflib_rx_intr_enables; 678 static int iflib_fast_intrs; 679 static int iflib_rx_unavail; 680 static int iflib_rx_ctx_inactive; 681 static int iflib_rx_if_input; 682 static int iflib_rxd_flush; 683 684 static int iflib_verbose_debug; 685 686 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, 687 &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); 688 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, 689 &iflib_rx_intr_enables, 0, "# RX intr enables"); 690 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, 691 &iflib_fast_intrs, 0, "# fast_intr calls"); 692 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, 693 &iflib_rx_unavail, 0, "# times rxeof called with no available data"); 694 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, 695 &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); 696 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, 697 &iflib_rx_if_input, 0, "# times rxeof called if_input"); 698 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, 699 &iflib_rxd_flush, 0, "# times rxd_flush called"); 700 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, 701 &iflib_verbose_debug, 0, "enable verbose debugging"); 702 703 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) 704 static void 705 iflib_debug_reset(void) 706 { 707 iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = 708 iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = 709 iflib_txq_drain_flushing = iflib_txq_drain_oactive = 710 iflib_txq_drain_notready = 711 iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = 712 iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = 713 iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = 714 iflib_rx_unavail = 715 iflib_rx_ctx_inactive = iflib_rx_if_input = 716 iflib_rxd_flush = 0; 717 } 718 719 #else 720 #define DBG_COUNTER_INC(name) 721 static void iflib_debug_reset(void) {} 722 #endif 723 724 #define IFLIB_DEBUG 0 725 726 static void iflib_tx_structures_free(if_ctx_t ctx); 727 static void iflib_rx_structures_free(if_ctx_t ctx); 728 static int iflib_queues_alloc(if_ctx_t ctx); 729 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); 730 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); 731 static int iflib_qset_structures_setup(if_ctx_t ctx); 732 static int iflib_msix_init(if_ctx_t ctx); 733 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str); 734 static void iflib_txq_check_drain(iflib_txq_t txq, int budget); 735 static uint32_t iflib_txq_can_drain(struct ifmp_ring *); 736 #ifdef ALTQ 737 static void iflib_altq_if_start(if_t ifp); 738 static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); 739 #endif 740 static int iflib_register(if_ctx_t); 741 static void iflib_deregister(if_ctx_t); 742 static void iflib_unregister_vlan_handlers(if_ctx_t ctx); 743 static uint16_t iflib_get_mbuf_size_for(unsigned int size); 744 static void iflib_init_locked(if_ctx_t ctx); 745 static void iflib_add_device_sysctl_pre(if_ctx_t ctx); 746 static void iflib_add_device_sysctl_post(if_ctx_t ctx); 747 static void iflib_ifmp_purge(iflib_txq_t txq); 748 static void _iflib_pre_assert(if_softc_ctx_t scctx); 749 static void iflib_if_init_locked(if_ctx_t ctx); 750 static void iflib_free_intr_mem(if_ctx_t ctx); 751 #ifndef __NO_STRICT_ALIGNMENT 752 static struct mbuf * iflib_fixup_rx(struct mbuf *m); 753 #endif 754 755 #ifndef __HAIKU__ 756 static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = 757 SLIST_HEAD_INITIALIZER(cpu_offsets); 758 struct cpu_offset { 759 SLIST_ENTRY(cpu_offset) entries; 760 cpuset_t set; 761 unsigned int refcount; 762 uint16_t next_cpuid; 763 }; 764 static struct mtx cpu_offset_mtx; 765 MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", 766 MTX_DEF); 767 #endif 768 769 DEBUGNET_DEFINE(iflib); 770 771 static int 772 iflib_num_rx_descs(if_ctx_t ctx) 773 { 774 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 775 if_shared_ctx_t sctx = ctx->ifc_sctx; 776 uint16_t first_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; 777 778 return scctx->isc_nrxd[first_rxq]; 779 } 780 781 static int 782 iflib_num_tx_descs(if_ctx_t ctx) 783 { 784 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 785 if_shared_ctx_t sctx = ctx->ifc_sctx; 786 uint16_t first_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; 787 788 return scctx->isc_ntxd[first_txq]; 789 } 790 791 #ifdef DEV_NETMAP 792 #include <sys/selinfo.h> 793 #include <net/netmap.h> 794 #include <dev/netmap/netmap_kern.h> 795 796 MODULE_DEPEND(iflib, netmap, 1, 1, 1); 797 798 static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init); 799 static void iflib_netmap_timer(void *arg); 800 801 /* 802 * device-specific sysctl variables: 803 * 804 * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. 805 * During regular operations the CRC is stripped, but on some 806 * hardware reception of frames not multiple of 64 is slower, 807 * so using crcstrip=0 helps in benchmarks. 808 * 809 * iflib_rx_miss, iflib_rx_miss_bufs: 810 * count packets that might be missed due to lost interrupts. 811 */ 812 SYSCTL_DECL(_dev_netmap); 813 /* 814 * The xl driver by default strips CRCs and we do not override it. 815 */ 816 817 int iflib_crcstrip = 1; 818 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, 819 CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames"); 820 821 int iflib_rx_miss, iflib_rx_miss_bufs; 822 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, 823 CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr"); 824 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, 825 CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs"); 826 827 /* 828 * Register/unregister. We are already under netmap lock. 829 * Only called on the first register or the last unregister. 830 */ 831 static int 832 iflib_netmap_register(struct netmap_adapter *na, int onoff) 833 { 834 if_t ifp = na->ifp; 835 if_ctx_t ctx = ifp->if_softc; 836 int status; 837 838 CTX_LOCK(ctx); 839 if (!CTX_IS_VF(ctx)) 840 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); 841 842 iflib_stop(ctx); 843 844 /* 845 * Enable (or disable) netmap flags, and intercept (or restore) 846 * ifp->if_transmit. This is done once the device has been stopped 847 * to prevent race conditions. Also, this must be done after 848 * calling netmap_disable_all_rings() and before calling 849 * netmap_enable_all_rings(), so that these two functions see the 850 * updated state of the NAF_NETMAP_ON bit. 851 */ 852 if (onoff) { 853 nm_set_native_flags(na); 854 } else { 855 nm_clear_native_flags(na); 856 } 857 858 iflib_init_locked(ctx); 859 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? 860 status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; 861 if (status) 862 nm_clear_native_flags(na); 863 CTX_UNLOCK(ctx); 864 return (status); 865 } 866 867 static int 868 iflib_netmap_config(struct netmap_adapter *na, struct nm_config_info *info) 869 { 870 if_t ifp = na->ifp; 871 if_ctx_t ctx = ifp->if_softc; 872 iflib_rxq_t rxq = &ctx->ifc_rxqs[0]; 873 iflib_fl_t fl = &rxq->ifr_fl[0]; 874 875 info->num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; 876 info->num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; 877 info->num_tx_descs = iflib_num_tx_descs(ctx); 878 info->num_rx_descs = iflib_num_rx_descs(ctx); 879 info->rx_buf_maxsize = fl->ifl_buf_size; 880 nm_prinf("txr %u rxr %u txd %u rxd %u rbufsz %u", 881 info->num_tx_rings, info->num_rx_rings, info->num_tx_descs, 882 info->num_rx_descs, info->rx_buf_maxsize); 883 884 return 0; 885 } 886 887 static int 888 netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init) 889 { 890 struct netmap_adapter *na = kring->na; 891 u_int const lim = kring->nkr_num_slots - 1; 892 struct netmap_ring *ring = kring->ring; 893 bus_dmamap_t *map; 894 struct if_rxd_update iru; 895 if_ctx_t ctx = rxq->ifr_ctx; 896 iflib_fl_t fl = &rxq->ifr_fl[0]; 897 u_int nic_i_first, nic_i; 898 u_int nm_i; 899 int i, n; 900 #if IFLIB_DEBUG_COUNTERS 901 int rf_count = 0; 902 #endif 903 904 /* 905 * This function is used both at initialization and in rxsync. 906 * At initialization we need to prepare (with isc_rxd_refill()) 907 * all the netmap buffers currently owned by the kernel, in 908 * such a way to keep fl->ifl_pidx and kring->nr_hwcur in sync 909 * (except for kring->nkr_hwofs). These may be less than 910 * kring->nkr_num_slots if netmap_reset() was called while 911 * an application using the kring that still owned some 912 * buffers. 913 * At rxsync time, both indexes point to the next buffer to be 914 * refilled. 915 * In any case we publish (with isc_rxd_flush()) up to 916 * (fl->ifl_pidx - 1) % N (included), to avoid the NIC tail/prod 917 * pointer to overrun the head/cons pointer, although this is 918 * not necessary for some NICs (e.g. vmx). 919 */ 920 if (__predict_false(init)) { 921 n = kring->nkr_num_slots - nm_kr_rxspace(kring); 922 } else { 923 n = kring->rhead - kring->nr_hwcur; 924 if (n == 0) 925 return (0); /* Nothing to do. */ 926 if (n < 0) 927 n += kring->nkr_num_slots; 928 } 929 930 iru_init(&iru, rxq, 0 /* flid */); 931 map = fl->ifl_sds.ifsd_map; 932 nic_i = fl->ifl_pidx; 933 nm_i = netmap_idx_n2k(kring, nic_i); 934 if (__predict_false(init)) { 935 /* 936 * On init/reset, nic_i must be 0, and we must 937 * start to refill from hwtail (see netmap_reset()). 938 */ 939 MPASS(nic_i == 0); 940 MPASS(nm_i == kring->nr_hwtail); 941 } else 942 MPASS(nm_i == kring->nr_hwcur); 943 DBG_COUNTER_INC(fl_refills); 944 while (n > 0) { 945 #if IFLIB_DEBUG_COUNTERS 946 if (++rf_count == 9) 947 DBG_COUNTER_INC(fl_refills_large); 948 #endif 949 nic_i_first = nic_i; 950 for (i = 0; n > 0 && i < IFLIB_MAX_RX_REFRESH; n--, i++) { 951 struct netmap_slot *slot = &ring->slot[nm_i]; 952 void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); 953 954 MPASS(i < IFLIB_MAX_RX_REFRESH); 955 956 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 957 return netmap_ring_reinit(kring); 958 959 fl->ifl_rxd_idxs[i] = nic_i; 960 961 if (__predict_false(init)) { 962 netmap_load_map(na, fl->ifl_buf_tag, 963 map[nic_i], addr); 964 } else if (slot->flags & NS_BUF_CHANGED) { 965 /* buffer has changed, reload map */ 966 netmap_reload_map(na, fl->ifl_buf_tag, 967 map[nic_i], addr); 968 } 969 bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i], 970 BUS_DMASYNC_PREREAD); 971 slot->flags &= ~NS_BUF_CHANGED; 972 973 nm_i = nm_next(nm_i, lim); 974 nic_i = nm_next(nic_i, lim); 975 } 976 977 iru.iru_pidx = nic_i_first; 978 iru.iru_count = i; 979 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 980 } 981 fl->ifl_pidx = nic_i; 982 /* 983 * At the end of the loop we must have refilled everything 984 * we could possibly refill. 985 */ 986 MPASS(nm_i == kring->rhead); 987 kring->nr_hwcur = nm_i; 988 989 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 990 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 991 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, 992 nm_prev(nic_i, lim)); 993 DBG_COUNTER_INC(rxd_flush); 994 995 return (0); 996 } 997 998 #define NETMAP_TX_TIMER_US 90 999 1000 /* 1001 * Reconcile kernel and user view of the transmit ring. 1002 * 1003 * All information is in the kring. 1004 * Userspace wants to send packets up to the one before kring->rhead, 1005 * kernel knows kring->nr_hwcur is the first unsent packet. 1006 * 1007 * Here we push packets out (as many as possible), and possibly 1008 * reclaim buffers from previously completed transmission. 1009 * 1010 * The caller (netmap) guarantees that there is only one instance 1011 * running at any time. Any interference with other driver 1012 * methods should be handled by the individual drivers. 1013 */ 1014 static int 1015 iflib_netmap_txsync(struct netmap_kring *kring, int flags) 1016 { 1017 struct netmap_adapter *na = kring->na; 1018 if_t ifp = na->ifp; 1019 struct netmap_ring *ring = kring->ring; 1020 u_int nm_i; /* index into the netmap kring */ 1021 u_int nic_i; /* index into the NIC ring */ 1022 u_int n; 1023 u_int const lim = kring->nkr_num_slots - 1; 1024 u_int const head = kring->rhead; 1025 struct if_pkt_info pi; 1026 int tx_pkts = 0, tx_bytes = 0; 1027 1028 /* 1029 * interrupts on every tx packet are expensive so request 1030 * them every half ring, or where NS_REPORT is set 1031 */ 1032 u_int report_frequency = kring->nkr_num_slots >> 1; 1033 /* device-specific */ 1034 if_ctx_t ctx = ifp->if_softc; 1035 iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; 1036 1037 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1038 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1039 1040 /* 1041 * First part: process new packets to send. 1042 * nm_i is the current index in the netmap kring, 1043 * nic_i is the corresponding index in the NIC ring. 1044 * 1045 * If we have packets to send (nm_i != head) 1046 * iterate over the netmap ring, fetch length and update 1047 * the corresponding slot in the NIC ring. Some drivers also 1048 * need to update the buffer's physical address in the NIC slot 1049 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 1050 * 1051 * The netmap_reload_map() calls is especially expensive, 1052 * even when (as in this case) the tag is 0, so do only 1053 * when the buffer has actually changed. 1054 * 1055 * If possible do not set the report/intr bit on all slots, 1056 * but only a few times per ring or when NS_REPORT is set. 1057 * 1058 * Finally, on 10G and faster drivers, it might be useful 1059 * to prefetch the next slot and txr entry. 1060 */ 1061 1062 nm_i = kring->nr_hwcur; 1063 if (nm_i != head) { /* we have new packets to send */ 1064 uint32_t pkt_len = 0, seg_idx = 0; 1065 int nic_i_start = -1, flags = 0; 1066 pkt_info_zero(&pi); 1067 pi.ipi_segs = txq->ift_segs; 1068 pi.ipi_qsidx = kring->ring_id; 1069 nic_i = netmap_idx_k2n(kring, nm_i); 1070 1071 __builtin_prefetch(&ring->slot[nm_i]); 1072 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); 1073 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); 1074 1075 for (n = 0; nm_i != head; n++) { 1076 struct netmap_slot *slot = &ring->slot[nm_i]; 1077 u_int len = slot->len; 1078 uint64_t paddr; 1079 void *addr = PNMB(na, slot, &paddr); 1080 1081 flags |= (slot->flags & NS_REPORT || 1082 nic_i == 0 || nic_i == report_frequency) ? 1083 IPI_TX_INTR : 0; 1084 1085 /* 1086 * If this is the first packet fragment, save the 1087 * index of the first NIC slot for later. 1088 */ 1089 if (nic_i_start < 0) 1090 nic_i_start = nic_i; 1091 1092 pi.ipi_segs[seg_idx].ds_addr = paddr; 1093 pi.ipi_segs[seg_idx].ds_len = len; 1094 if (len) { 1095 pkt_len += len; 1096 seg_idx++; 1097 } 1098 1099 if (!(slot->flags & NS_MOREFRAG)) { 1100 pi.ipi_len = pkt_len; 1101 pi.ipi_nsegs = seg_idx; 1102 pi.ipi_pidx = nic_i_start; 1103 pi.ipi_ndescs = 0; 1104 pi.ipi_flags = flags; 1105 1106 /* Prepare the NIC TX ring. */ 1107 ctx->isc_txd_encap(ctx->ifc_softc, &pi); 1108 DBG_COUNTER_INC(tx_encap); 1109 1110 /* Update transmit counters */ 1111 tx_bytes += pi.ipi_len; 1112 tx_pkts++; 1113 1114 /* Reinit per-packet info for the next one. */ 1115 flags = seg_idx = pkt_len = 0; 1116 nic_i_start = -1; 1117 } 1118 1119 /* prefetch for next round */ 1120 __builtin_prefetch(&ring->slot[nm_i + 1]); 1121 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); 1122 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); 1123 1124 NM_CHECK_ADDR_LEN(na, addr, len); 1125 1126 if (slot->flags & NS_BUF_CHANGED) { 1127 /* buffer has changed, reload map */ 1128 netmap_reload_map(na, txq->ift_buf_tag, 1129 txq->ift_sds.ifsd_map[nic_i], addr); 1130 } 1131 /* make sure changes to the buffer are synced */ 1132 bus_dmamap_sync(txq->ift_buf_tag, 1133 txq->ift_sds.ifsd_map[nic_i], 1134 BUS_DMASYNC_PREWRITE); 1135 1136 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED | NS_MOREFRAG); 1137 nm_i = nm_next(nm_i, lim); 1138 nic_i = nm_next(nic_i, lim); 1139 } 1140 kring->nr_hwcur = nm_i; 1141 1142 /* synchronize the NIC ring */ 1143 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1144 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1145 1146 /* (re)start the tx unit up to slot nic_i (excluded) */ 1147 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); 1148 } 1149 1150 /* 1151 * Second part: reclaim buffers for completed transmissions. 1152 * 1153 * If there are unclaimed buffers, attempt to reclaim them. 1154 * If we don't manage to reclaim them all, and TX IRQs are not in use, 1155 * trigger a per-tx-queue timer to try again later. 1156 */ 1157 if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { 1158 if (iflib_tx_credits_update(ctx, txq)) { 1159 /* some tx completed, increment avail */ 1160 nic_i = txq->ift_cidx_processed; 1161 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 1162 } 1163 } 1164 1165 if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) 1166 if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { 1167 callout_reset_sbt_on(&txq->ift_netmap_timer, 1168 NETMAP_TX_TIMER_US * SBT_1US, SBT_1US, 1169 iflib_netmap_timer, txq, 1170 txq->ift_netmap_timer.c_cpu, 0); 1171 } 1172 1173 if_inc_counter(ifp, IFCOUNTER_OBYTES, tx_bytes); 1174 if_inc_counter(ifp, IFCOUNTER_OPACKETS, tx_pkts); 1175 1176 return (0); 1177 } 1178 1179 /* 1180 * Reconcile kernel and user view of the receive ring. 1181 * Same as for the txsync, this routine must be efficient. 1182 * The caller guarantees a single invocations, but races against 1183 * the rest of the driver should be handled here. 1184 * 1185 * On call, kring->rhead is the first packet that userspace wants 1186 * to keep, and kring->rcur is the wakeup point. 1187 * The kernel has previously reported packets up to kring->rtail. 1188 * 1189 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 1190 * of whether or not we received an interrupt. 1191 */ 1192 static int 1193 iflib_netmap_rxsync(struct netmap_kring *kring, int flags) 1194 { 1195 struct netmap_adapter *na = kring->na; 1196 struct netmap_ring *ring = kring->ring; 1197 if_t ifp = na->ifp; 1198 uint32_t nm_i; /* index into the netmap ring */ 1199 uint32_t nic_i; /* index into the NIC ring */ 1200 u_int n; 1201 u_int const lim = kring->nkr_num_slots - 1; 1202 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 1203 int i = 0, rx_bytes = 0, rx_pkts = 0; 1204 1205 if_ctx_t ctx = ifp->if_softc; 1206 if_shared_ctx_t sctx = ctx->ifc_sctx; 1207 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1208 iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; 1209 iflib_fl_t fl = &rxq->ifr_fl[0]; 1210 struct if_rxd_info ri; 1211 qidx_t *cidxp; 1212 1213 /* 1214 * netmap only uses free list 0, to avoid out of order consumption 1215 * of receive buffers 1216 */ 1217 1218 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 1219 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1220 1221 /* 1222 * First part: import newly received packets. 1223 * 1224 * nm_i is the index of the next free slot in the netmap ring, 1225 * nic_i is the index of the next received packet in the NIC ring 1226 * (or in the free list 0 if IFLIB_HAS_RXCQ is set), and they may 1227 * differ in case if_init() has been called while 1228 * in netmap mode. For the receive ring we have 1229 * 1230 * nic_i = fl->ifl_cidx; 1231 * nm_i = kring->nr_hwtail (previous) 1232 * and 1233 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1234 * 1235 * fl->ifl_cidx is set to 0 on a ring reinit 1236 */ 1237 if (netmap_no_pendintr || force_update) { 1238 uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); 1239 bool have_rxcq = sctx->isc_flags & IFLIB_HAS_RXCQ; 1240 int crclen = iflib_crcstrip ? 0 : 4; 1241 int error, avail; 1242 1243 /* 1244 * For the free list consumer index, we use the same 1245 * logic as in iflib_rxeof(). 1246 */ 1247 if (have_rxcq) 1248 cidxp = &rxq->ifr_cq_cidx; 1249 else 1250 cidxp = &fl->ifl_cidx; 1251 avail = ctx->isc_rxd_available(ctx->ifc_softc, 1252 rxq->ifr_id, *cidxp, USHRT_MAX); 1253 1254 nic_i = fl->ifl_cidx; 1255 nm_i = netmap_idx_n2k(kring, nic_i); 1256 MPASS(nm_i == kring->nr_hwtail); 1257 for (n = 0; avail > 0 && nm_i != hwtail_lim; n++, avail--) { 1258 rxd_info_zero(&ri); 1259 ri.iri_frags = rxq->ifr_frags; 1260 ri.iri_qsidx = kring->ring_id; 1261 ri.iri_ifp = ctx->ifc_ifp; 1262 ri.iri_cidx = *cidxp; 1263 1264 error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 1265 for (i = 0; i < ri.iri_nfrags; i++) { 1266 if (error) { 1267 ring->slot[nm_i].len = 0; 1268 ring->slot[nm_i].flags = 0; 1269 } else { 1270 ring->slot[nm_i].len = ri.iri_frags[i].irf_len; 1271 if (i == (ri.iri_nfrags - 1)) { 1272 ring->slot[nm_i].len -= crclen; 1273 ring->slot[nm_i].flags = 0; 1274 1275 /* Update receive counters */ 1276 rx_bytes += ri.iri_len; 1277 rx_pkts++; 1278 } else 1279 ring->slot[nm_i].flags = NS_MOREFRAG; 1280 } 1281 1282 bus_dmamap_sync(fl->ifl_buf_tag, 1283 fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); 1284 nm_i = nm_next(nm_i, lim); 1285 fl->ifl_cidx = nic_i = nm_next(nic_i, lim); 1286 } 1287 1288 if (have_rxcq) { 1289 *cidxp = ri.iri_cidx; 1290 while (*cidxp >= scctx->isc_nrxd[0]) 1291 *cidxp -= scctx->isc_nrxd[0]; 1292 } 1293 1294 } 1295 if (n) { /* update the state variables */ 1296 if (netmap_no_pendintr && !force_update) { 1297 /* diagnostics */ 1298 iflib_rx_miss ++; 1299 iflib_rx_miss_bufs += n; 1300 } 1301 kring->nr_hwtail = nm_i; 1302 } 1303 kring->nr_kflags &= ~NKR_PENDINTR; 1304 } 1305 /* 1306 * Second part: skip past packets that userspace has released. 1307 * (kring->nr_hwcur to head excluded), 1308 * and make the buffers available for reception. 1309 * As usual nm_i is the index in the netmap ring, 1310 * nic_i is the index in the NIC ring, and 1311 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1312 */ 1313 netmap_fl_refill(rxq, kring, false); 1314 1315 if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); 1316 if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); 1317 1318 return (0); 1319 } 1320 1321 static void 1322 iflib_netmap_intr(struct netmap_adapter *na, int onoff) 1323 { 1324 if_ctx_t ctx = na->ifp->if_softc; 1325 1326 CTX_LOCK(ctx); 1327 if (onoff) { 1328 IFDI_INTR_ENABLE(ctx); 1329 } else { 1330 IFDI_INTR_DISABLE(ctx); 1331 } 1332 CTX_UNLOCK(ctx); 1333 } 1334 1335 static int 1336 iflib_netmap_attach(if_ctx_t ctx) 1337 { 1338 struct netmap_adapter na; 1339 1340 bzero(&na, sizeof(na)); 1341 1342 na.ifp = ctx->ifc_ifp; 1343 na.na_flags = NAF_BDG_MAYSLEEP | NAF_MOREFRAG; 1344 MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); 1345 MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); 1346 1347 na.num_tx_desc = iflib_num_tx_descs(ctx); 1348 na.num_rx_desc = iflib_num_rx_descs(ctx); 1349 na.nm_txsync = iflib_netmap_txsync; 1350 na.nm_rxsync = iflib_netmap_rxsync; 1351 na.nm_register = iflib_netmap_register; 1352 na.nm_intr = iflib_netmap_intr; 1353 na.nm_config = iflib_netmap_config; 1354 na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; 1355 na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; 1356 return (netmap_attach(&na)); 1357 } 1358 1359 static int 1360 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) 1361 { 1362 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1363 struct netmap_slot *slot; 1364 1365 slot = netmap_reset(na, NR_TX, txq->ift_id, 0); 1366 if (slot == NULL) 1367 return (0); 1368 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { 1369 /* 1370 * In netmap mode, set the map for the packet buffer. 1371 * NOTE: Some drivers (not this one) also need to set 1372 * the physical buffer address in the NIC ring. 1373 * netmap_idx_n2k() maps a nic index, i, into the corresponding 1374 * netmap slot index, si 1375 */ 1376 int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i); 1377 netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], 1378 NMB(na, slot + si)); 1379 } 1380 return (1); 1381 } 1382 1383 static int 1384 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) 1385 { 1386 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1387 struct netmap_kring *kring; 1388 struct netmap_slot *slot; 1389 1390 slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); 1391 if (slot == NULL) 1392 return (0); 1393 kring = na->rx_rings[rxq->ifr_id]; 1394 netmap_fl_refill(rxq, kring, true); 1395 return (1); 1396 } 1397 1398 static void 1399 iflib_netmap_timer(void *arg) 1400 { 1401 iflib_txq_t txq = arg; 1402 if_ctx_t ctx = txq->ift_ctx; 1403 1404 /* 1405 * Wake up the netmap application, to give it a chance to 1406 * call txsync and reclaim more completed TX buffers. 1407 */ 1408 netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); 1409 } 1410 1411 #define iflib_netmap_detach(ifp) netmap_detach(ifp) 1412 1413 #else 1414 #define iflib_netmap_txq_init(ctx, txq) (0) 1415 #define iflib_netmap_rxq_init(ctx, rxq) (0) 1416 #define iflib_netmap_detach(ifp) 1417 #define netmap_enable_all_rings(ifp) 1418 #define netmap_disable_all_rings(ifp) 1419 1420 #define iflib_netmap_attach(ctx) (0) 1421 #define netmap_rx_irq(ifp, qid, budget) (0) 1422 #endif 1423 1424 #if defined(__i386__) || defined(__amd64__) 1425 static __inline void 1426 prefetch(void *x) 1427 { 1428 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1429 } 1430 static __inline void 1431 prefetch2cachelines(void *x) 1432 { 1433 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1434 #if (CACHE_LINE_SIZE < 128) 1435 __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); 1436 #endif 1437 } 1438 #else 1439 #define prefetch(x) 1440 #define prefetch2cachelines(x) 1441 #endif 1442 1443 static void 1444 iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) 1445 { 1446 iflib_fl_t fl; 1447 1448 fl = &rxq->ifr_fl[flid]; 1449 iru->iru_paddrs = fl->ifl_bus_addrs; 1450 iru->iru_idxs = fl->ifl_rxd_idxs; 1451 iru->iru_qsidx = rxq->ifr_id; 1452 iru->iru_buf_size = fl->ifl_buf_size; 1453 iru->iru_flidx = fl->ifl_id; 1454 } 1455 1456 static void 1457 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1458 { 1459 if (err) 1460 return; 1461 *(bus_addr_t *) arg = segs[0].ds_addr; 1462 } 1463 1464 int 1465 iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags) 1466 { 1467 int err; 1468 device_t dev = ctx->ifc_dev; 1469 1470 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1471 align, 0, /* alignment, bounds */ 1472 BUS_SPACE_MAXADDR, /* lowaddr */ 1473 BUS_SPACE_MAXADDR, /* highaddr */ 1474 NULL, NULL, /* filter, filterarg */ 1475 size, /* maxsize */ 1476 1, /* nsegments */ 1477 size, /* maxsegsize */ 1478 BUS_DMA_ALLOCNOW, /* flags */ 1479 NULL, /* lockfunc */ 1480 NULL, /* lockarg */ 1481 &dma->idi_tag); 1482 if (err) { 1483 device_printf(dev, 1484 "%s: bus_dma_tag_create failed: %d\n", 1485 __func__, err); 1486 goto fail_0; 1487 } 1488 1489 err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, 1490 BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); 1491 if (err) { 1492 device_printf(dev, 1493 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 1494 __func__, (uintmax_t)size, err); 1495 goto fail_1; 1496 } 1497 1498 dma->idi_paddr = IF_BAD_DMA; 1499 err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, 1500 size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); 1501 if (err || dma->idi_paddr == IF_BAD_DMA) { 1502 device_printf(dev, 1503 "%s: bus_dmamap_load failed: %d\n", 1504 __func__, err); 1505 goto fail_2; 1506 } 1507 1508 dma->idi_size = size; 1509 return (0); 1510 1511 fail_2: 1512 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1513 fail_1: 1514 bus_dma_tag_destroy(dma->idi_tag); 1515 fail_0: 1516 dma->idi_tag = NULL; 1517 1518 return (err); 1519 } 1520 1521 int 1522 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) 1523 { 1524 if_shared_ctx_t sctx = ctx->ifc_sctx; 1525 1526 KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); 1527 1528 return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags)); 1529 } 1530 1531 int 1532 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) 1533 { 1534 int i, err; 1535 iflib_dma_info_t *dmaiter; 1536 1537 dmaiter = dmalist; 1538 for (i = 0; i < count; i++, dmaiter++) { 1539 if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) 1540 break; 1541 } 1542 if (err) 1543 iflib_dma_free_multi(dmalist, i); 1544 return (err); 1545 } 1546 1547 void 1548 iflib_dma_free(iflib_dma_info_t dma) 1549 { 1550 if (dma->idi_tag == NULL) 1551 return; 1552 if (dma->idi_paddr != IF_BAD_DMA) { 1553 bus_dmamap_sync(dma->idi_tag, dma->idi_map, 1554 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1555 bus_dmamap_unload(dma->idi_tag, dma->idi_map); 1556 dma->idi_paddr = IF_BAD_DMA; 1557 } 1558 if (dma->idi_vaddr != NULL) { 1559 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1560 dma->idi_vaddr = NULL; 1561 } 1562 bus_dma_tag_destroy(dma->idi_tag); 1563 dma->idi_tag = NULL; 1564 } 1565 1566 void 1567 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) 1568 { 1569 int i; 1570 iflib_dma_info_t *dmaiter = dmalist; 1571 1572 for (i = 0; i < count; i++, dmaiter++) 1573 iflib_dma_free(*dmaiter); 1574 } 1575 1576 static int 1577 iflib_fast_intr(void *arg) 1578 { 1579 iflib_filter_info_t info = arg; 1580 struct grouptask *gtask = info->ifi_task; 1581 int result; 1582 1583 DBG_COUNTER_INC(fast_intrs); 1584 if (info->ifi_filter != NULL) { 1585 result = info->ifi_filter(info->ifi_filter_arg); 1586 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1587 return (result); 1588 } 1589 1590 GROUPTASK_ENQUEUE(gtask); 1591 return (FILTER_HANDLED); 1592 } 1593 1594 static int 1595 iflib_fast_intr_rxtx(void *arg) 1596 { 1597 iflib_filter_info_t info = arg; 1598 struct grouptask *gtask = info->ifi_task; 1599 if_ctx_t ctx; 1600 iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; 1601 iflib_txq_t txq; 1602 void *sc; 1603 int i, cidx, result; 1604 qidx_t txqid; 1605 bool intr_enable, intr_legacy; 1606 1607 DBG_COUNTER_INC(fast_intrs); 1608 if (info->ifi_filter != NULL) { 1609 result = info->ifi_filter(info->ifi_filter_arg); 1610 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1611 return (result); 1612 } 1613 1614 ctx = rxq->ifr_ctx; 1615 sc = ctx->ifc_softc; 1616 intr_enable = false; 1617 intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY); 1618 MPASS(rxq->ifr_ntxqirq); 1619 for (i = 0; i < rxq->ifr_ntxqirq; i++) { 1620 txqid = rxq->ifr_txqid[i]; 1621 txq = &ctx->ifc_txqs[txqid]; 1622 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1623 BUS_DMASYNC_POSTREAD); 1624 if (!ctx->isc_txd_credits_update(sc, txqid, false)) { 1625 if (intr_legacy) 1626 intr_enable = true; 1627 else 1628 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); 1629 continue; 1630 } 1631 GROUPTASK_ENQUEUE(&txq->ift_task); 1632 } 1633 if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) 1634 cidx = rxq->ifr_cq_cidx; 1635 else 1636 cidx = rxq->ifr_fl[0].ifl_cidx; 1637 if (iflib_rxd_avail(ctx, rxq, cidx, 1)) 1638 GROUPTASK_ENQUEUE(gtask); 1639 else { 1640 if (intr_legacy) 1641 intr_enable = true; 1642 else 1643 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 1644 DBG_COUNTER_INC(rx_intr_enables); 1645 } 1646 if (intr_enable) 1647 IFDI_INTR_ENABLE(ctx); 1648 return (FILTER_HANDLED); 1649 } 1650 1651 static int 1652 iflib_fast_intr_ctx(void *arg) 1653 { 1654 iflib_filter_info_t info = arg; 1655 struct grouptask *gtask = info->ifi_task; 1656 int result; 1657 1658 DBG_COUNTER_INC(fast_intrs); 1659 if (info->ifi_filter != NULL) { 1660 result = info->ifi_filter(info->ifi_filter_arg); 1661 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1662 return (result); 1663 } 1664 1665 GROUPTASK_ENQUEUE(gtask); 1666 return (FILTER_HANDLED); 1667 } 1668 1669 static int 1670 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 1671 driver_filter_t filter, driver_intr_t handler, void *arg, 1672 const char *name) 1673 { 1674 struct resource *res; 1675 void *tag = NULL; 1676 device_t dev = ctx->ifc_dev; 1677 int flags, i, rc; 1678 1679 flags = RF_ACTIVE; 1680 if (ctx->ifc_flags & IFC_LEGACY) 1681 flags |= RF_SHAREABLE; 1682 MPASS(rid < 512); 1683 i = rid; 1684 res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags); 1685 if (res == NULL) { 1686 device_printf(dev, 1687 "failed to allocate IRQ for rid %d, name %s.\n", rid, name); 1688 return (ENOMEM); 1689 } 1690 irq->ii_res = res; 1691 KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); 1692 rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, 1693 filter, handler, arg, &tag); 1694 if (rc != 0) { 1695 device_printf(dev, 1696 "failed to setup interrupt for rid %d, name %s: %d\n", 1697 rid, name ? name : "unknown", rc); 1698 return (rc); 1699 } else if (name) 1700 bus_describe_intr(dev, res, tag, "%s", name); 1701 1702 irq->ii_tag = tag; 1703 return (0); 1704 } 1705 1706 /********************************************************************* 1707 * 1708 * Allocate DMA resources for TX buffers as well as memory for the TX 1709 * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a 1710 * iflib_sw_tx_desc_array structure, storing all the information that 1711 * is needed to transmit a packet on the wire. This is called only 1712 * once at attach, setup is done every reset. 1713 * 1714 **********************************************************************/ 1715 static int 1716 iflib_txsd_alloc(iflib_txq_t txq) 1717 { 1718 if_ctx_t ctx = txq->ift_ctx; 1719 if_shared_ctx_t sctx = ctx->ifc_sctx; 1720 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1721 device_t dev = ctx->ifc_dev; 1722 bus_size_t tsomaxsize; 1723 int err, nsegments, ntsosegments; 1724 bool tso; 1725 1726 nsegments = scctx->isc_tx_nsegments; 1727 ntsosegments = scctx->isc_tx_tso_segments_max; 1728 tsomaxsize = scctx->isc_tx_tso_size_max; 1729 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU) 1730 tsomaxsize += sizeof(struct ether_vlan_header); 1731 MPASS(scctx->isc_ntxd[0] > 0); 1732 MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); 1733 MPASS(nsegments > 0); 1734 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) { 1735 MPASS(ntsosegments > 0); 1736 MPASS(sctx->isc_tso_maxsize >= tsomaxsize); 1737 } 1738 1739 /* 1740 * Set up DMA tags for TX buffers. 1741 */ 1742 if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1743 1, 0, /* alignment, bounds */ 1744 BUS_SPACE_MAXADDR, /* lowaddr */ 1745 BUS_SPACE_MAXADDR, /* highaddr */ 1746 NULL, NULL, /* filter, filterarg */ 1747 sctx->isc_tx_maxsize, /* maxsize */ 1748 nsegments, /* nsegments */ 1749 sctx->isc_tx_maxsegsize, /* maxsegsize */ 1750 0, /* flags */ 1751 NULL, /* lockfunc */ 1752 NULL, /* lockfuncarg */ 1753 &txq->ift_buf_tag))) { 1754 device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); 1755 device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", 1756 (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); 1757 goto fail; 1758 } 1759 tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0; 1760 if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev), 1761 1, 0, /* alignment, bounds */ 1762 BUS_SPACE_MAXADDR, /* lowaddr */ 1763 BUS_SPACE_MAXADDR, /* highaddr */ 1764 NULL, NULL, /* filter, filterarg */ 1765 tsomaxsize, /* maxsize */ 1766 ntsosegments, /* nsegments */ 1767 sctx->isc_tso_maxsegsize,/* maxsegsize */ 1768 0, /* flags */ 1769 NULL, /* lockfunc */ 1770 NULL, /* lockfuncarg */ 1771 &txq->ift_tso_buf_tag))) { 1772 device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n", 1773 err); 1774 goto fail; 1775 } 1776 1777 /* Allocate memory for the TX mbuf map. */ 1778 if (!(txq->ift_sds.ifsd_m = 1779 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1780 scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1781 device_printf(dev, "Unable to allocate TX mbuf map memory\n"); 1782 err = ENOMEM; 1783 goto fail; 1784 } 1785 1786 /* 1787 * Create the DMA maps for TX buffers. 1788 */ 1789 if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc( 1790 sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], 1791 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 1792 device_printf(dev, 1793 "Unable to allocate TX buffer DMA map memory\n"); 1794 err = ENOMEM; 1795 goto fail; 1796 } 1797 if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc( 1798 sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], 1799 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 1800 device_printf(dev, 1801 "Unable to allocate TSO TX buffer map memory\n"); 1802 err = ENOMEM; 1803 goto fail; 1804 } 1805 for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { 1806 err = bus_dmamap_create(txq->ift_buf_tag, 0, 1807 &txq->ift_sds.ifsd_map[i]); 1808 if (err != 0) { 1809 device_printf(dev, "Unable to create TX DMA map\n"); 1810 goto fail; 1811 } 1812 if (!tso) 1813 continue; 1814 err = bus_dmamap_create(txq->ift_tso_buf_tag, 0, 1815 &txq->ift_sds.ifsd_tso_map[i]); 1816 if (err != 0) { 1817 device_printf(dev, "Unable to create TSO TX DMA map\n"); 1818 goto fail; 1819 } 1820 } 1821 return (0); 1822 fail: 1823 /* We free all, it handles case where we are in the middle */ 1824 iflib_tx_structures_free(ctx); 1825 return (err); 1826 } 1827 1828 static void 1829 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) 1830 { 1831 bus_dmamap_t map; 1832 1833 if (txq->ift_sds.ifsd_map != NULL) { 1834 map = txq->ift_sds.ifsd_map[i]; 1835 bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE); 1836 bus_dmamap_unload(txq->ift_buf_tag, map); 1837 bus_dmamap_destroy(txq->ift_buf_tag, map); 1838 txq->ift_sds.ifsd_map[i] = NULL; 1839 } 1840 1841 if (txq->ift_sds.ifsd_tso_map != NULL) { 1842 map = txq->ift_sds.ifsd_tso_map[i]; 1843 bus_dmamap_sync(txq->ift_tso_buf_tag, map, 1844 BUS_DMASYNC_POSTWRITE); 1845 bus_dmamap_unload(txq->ift_tso_buf_tag, map); 1846 bus_dmamap_destroy(txq->ift_tso_buf_tag, map); 1847 txq->ift_sds.ifsd_tso_map[i] = NULL; 1848 } 1849 } 1850 1851 static void 1852 iflib_txq_destroy(iflib_txq_t txq) 1853 { 1854 if_ctx_t ctx = txq->ift_ctx; 1855 1856 for (int i = 0; i < txq->ift_size; i++) 1857 iflib_txsd_destroy(ctx, txq, i); 1858 1859 if (txq->ift_br != NULL) { 1860 ifmp_ring_free(txq->ift_br); 1861 txq->ift_br = NULL; 1862 } 1863 1864 mtx_destroy(&txq->ift_mtx); 1865 1866 if (txq->ift_sds.ifsd_map != NULL) { 1867 free(txq->ift_sds.ifsd_map, M_IFLIB); 1868 txq->ift_sds.ifsd_map = NULL; 1869 } 1870 if (txq->ift_sds.ifsd_tso_map != NULL) { 1871 free(txq->ift_sds.ifsd_tso_map, M_IFLIB); 1872 txq->ift_sds.ifsd_tso_map = NULL; 1873 } 1874 if (txq->ift_sds.ifsd_m != NULL) { 1875 free(txq->ift_sds.ifsd_m, M_IFLIB); 1876 txq->ift_sds.ifsd_m = NULL; 1877 } 1878 if (txq->ift_buf_tag != NULL) { 1879 bus_dma_tag_destroy(txq->ift_buf_tag); 1880 txq->ift_buf_tag = NULL; 1881 } 1882 if (txq->ift_tso_buf_tag != NULL) { 1883 bus_dma_tag_destroy(txq->ift_tso_buf_tag); 1884 txq->ift_tso_buf_tag = NULL; 1885 } 1886 if (txq->ift_ifdi != NULL) { 1887 free(txq->ift_ifdi, M_IFLIB); 1888 } 1889 } 1890 1891 static void 1892 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) 1893 { 1894 struct mbuf **mp; 1895 1896 mp = &txq->ift_sds.ifsd_m[i]; 1897 if (*mp == NULL) 1898 return; 1899 1900 if (txq->ift_sds.ifsd_map != NULL) { 1901 bus_dmamap_sync(txq->ift_buf_tag, 1902 txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); 1903 bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]); 1904 } 1905 if (txq->ift_sds.ifsd_tso_map != NULL) { 1906 bus_dmamap_sync(txq->ift_tso_buf_tag, 1907 txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE); 1908 bus_dmamap_unload(txq->ift_tso_buf_tag, 1909 txq->ift_sds.ifsd_tso_map[i]); 1910 } 1911 m_freem(*mp); 1912 DBG_COUNTER_INC(tx_frees); 1913 *mp = NULL; 1914 } 1915 1916 static int 1917 iflib_txq_setup(iflib_txq_t txq) 1918 { 1919 if_ctx_t ctx = txq->ift_ctx; 1920 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1921 if_shared_ctx_t sctx = ctx->ifc_sctx; 1922 iflib_dma_info_t di; 1923 int i; 1924 1925 /* Set number of descriptors available */ 1926 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 1927 /* XXX make configurable */ 1928 txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; 1929 1930 /* Reset indices */ 1931 txq->ift_cidx_processed = 0; 1932 txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; 1933 txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; 1934 1935 for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) 1936 bzero((void *)di->idi_vaddr, di->idi_size); 1937 1938 IFDI_TXQ_SETUP(ctx, txq->ift_id); 1939 for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) 1940 bus_dmamap_sync(di->idi_tag, di->idi_map, 1941 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1942 return (0); 1943 } 1944 1945 /********************************************************************* 1946 * 1947 * Allocate DMA resources for RX buffers as well as memory for the RX 1948 * mbuf map, direct RX cluster pointer map and RX cluster bus address 1949 * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and 1950 * RX cluster map are kept in a iflib_sw_rx_desc_array structure. 1951 * Since we use use one entry in iflib_sw_rx_desc_array per received 1952 * packet, the maximum number of entries we'll need is equal to the 1953 * number of hardware receive descriptors that we've allocated. 1954 * 1955 **********************************************************************/ 1956 static int 1957 iflib_rxsd_alloc(iflib_rxq_t rxq) 1958 { 1959 if_ctx_t ctx = rxq->ifr_ctx; 1960 if_shared_ctx_t sctx = ctx->ifc_sctx; 1961 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1962 device_t dev = ctx->ifc_dev; 1963 iflib_fl_t fl; 1964 int err; 1965 1966 MPASS(scctx->isc_nrxd[0] > 0); 1967 MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); 1968 1969 fl = rxq->ifr_fl; 1970 for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { 1971 fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ 1972 /* Set up DMA tag for RX buffers. */ 1973 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1974 1, 0, /* alignment, bounds */ 1975 BUS_SPACE_MAXADDR, /* lowaddr */ 1976 BUS_SPACE_MAXADDR, /* highaddr */ 1977 NULL, NULL, /* filter, filterarg */ 1978 sctx->isc_rx_maxsize, /* maxsize */ 1979 sctx->isc_rx_nsegments, /* nsegments */ 1980 sctx->isc_rx_maxsegsize, /* maxsegsize */ 1981 0, /* flags */ 1982 NULL, /* lockfunc */ 1983 NULL, /* lockarg */ 1984 &fl->ifl_buf_tag); 1985 if (err) { 1986 device_printf(dev, 1987 "Unable to allocate RX DMA tag: %d\n", err); 1988 goto fail; 1989 } 1990 1991 /* Allocate memory for the RX mbuf map. */ 1992 if (!(fl->ifl_sds.ifsd_m = 1993 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1994 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1995 device_printf(dev, 1996 "Unable to allocate RX mbuf map memory\n"); 1997 err = ENOMEM; 1998 goto fail; 1999 } 2000 2001 /* Allocate memory for the direct RX cluster pointer map. */ 2002 if (!(fl->ifl_sds.ifsd_cl = 2003 (caddr_t *) malloc(sizeof(caddr_t) * 2004 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 2005 device_printf(dev, 2006 "Unable to allocate RX cluster map memory\n"); 2007 err = ENOMEM; 2008 goto fail; 2009 } 2010 2011 /* Allocate memory for the RX cluster bus address map. */ 2012 if (!(fl->ifl_sds.ifsd_ba = 2013 (bus_addr_t *) malloc(sizeof(bus_addr_t) * 2014 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 2015 device_printf(dev, 2016 "Unable to allocate RX bus address map memory\n"); 2017 err = ENOMEM; 2018 goto fail; 2019 } 2020 2021 /* 2022 * Create the DMA maps for RX buffers. 2023 */ 2024 if (!(fl->ifl_sds.ifsd_map = 2025 (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 2026 device_printf(dev, 2027 "Unable to allocate RX buffer DMA map memory\n"); 2028 err = ENOMEM; 2029 goto fail; 2030 } 2031 for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { 2032 err = bus_dmamap_create(fl->ifl_buf_tag, 0, 2033 &fl->ifl_sds.ifsd_map[i]); 2034 if (err != 0) { 2035 device_printf(dev, "Unable to create RX buffer DMA map\n"); 2036 goto fail; 2037 } 2038 } 2039 } 2040 return (0); 2041 2042 fail: 2043 iflib_rx_structures_free(ctx); 2044 return (err); 2045 } 2046 2047 /* 2048 * Internal service routines 2049 */ 2050 2051 struct rxq_refill_cb_arg { 2052 int error; 2053 bus_dma_segment_t seg; 2054 int nseg; 2055 }; 2056 2057 static void 2058 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 2059 { 2060 struct rxq_refill_cb_arg *cb_arg = arg; 2061 2062 cb_arg->error = error; 2063 cb_arg->seg = segs[0]; 2064 cb_arg->nseg = nseg; 2065 } 2066 2067 /** 2068 * iflib_fl_refill - refill an rxq free-buffer list 2069 * @ctx: the iflib context 2070 * @fl: the free list to refill 2071 * @count: the number of new buffers to allocate 2072 * 2073 * (Re)populate an rxq free-buffer list with up to @count new packet buffers. 2074 * The caller must assure that @count does not exceed the queue's capacity 2075 * minus one (since we always leave a descriptor unavailable). 2076 */ 2077 static uint8_t 2078 iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) 2079 { 2080 struct if_rxd_update iru; 2081 struct rxq_refill_cb_arg cb_arg; 2082 struct mbuf *m; 2083 caddr_t cl, *sd_cl; 2084 struct mbuf **sd_m; 2085 bus_dmamap_t *sd_map; 2086 bus_addr_t bus_addr, *sd_ba; 2087 int err, frag_idx, i, idx, n, pidx; 2088 qidx_t credits; 2089 2090 MPASS(count <= fl->ifl_size - fl->ifl_credits - 1); 2091 2092 sd_m = fl->ifl_sds.ifsd_m; 2093 sd_map = fl->ifl_sds.ifsd_map; 2094 sd_cl = fl->ifl_sds.ifsd_cl; 2095 sd_ba = fl->ifl_sds.ifsd_ba; 2096 pidx = fl->ifl_pidx; 2097 idx = pidx; 2098 frag_idx = fl->ifl_fragidx; 2099 credits = fl->ifl_credits; 2100 2101 i = 0; 2102 n = count; 2103 MPASS(n > 0); 2104 MPASS(credits + n <= fl->ifl_size); 2105 2106 if (pidx < fl->ifl_cidx) 2107 MPASS(pidx + n <= fl->ifl_cidx); 2108 if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) 2109 MPASS(fl->ifl_gen == 0); 2110 if (pidx > fl->ifl_cidx) 2111 MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); 2112 2113 DBG_COUNTER_INC(fl_refills); 2114 if (n > 8) 2115 DBG_COUNTER_INC(fl_refills_large); 2116 iru_init(&iru, fl->ifl_rxq, fl->ifl_id); 2117 while (n-- > 0) { 2118 /* 2119 * We allocate an uninitialized mbuf + cluster, mbuf is 2120 * initialized after rx. 2121 * 2122 * If the cluster is still set then we know a minimum sized 2123 * packet was received 2124 */ 2125 bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, 2126 &frag_idx); 2127 if (frag_idx < 0) 2128 bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); 2129 MPASS(frag_idx >= 0); 2130 if ((cl = sd_cl[frag_idx]) == NULL) { 2131 #ifndef __HAIKU__ 2132 cl = uma_zalloc(fl->ifl_zone, M_NOWAIT); 2133 if (__predict_false(cl == NULL)) 2134 #else 2135 if ((cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) 2136 #endif 2137 break; 2138 2139 cb_arg.error = 0; 2140 MPASS(sd_map != NULL); 2141 err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], 2142 cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 2143 BUS_DMA_NOWAIT); 2144 if (__predict_false(err != 0 || cb_arg.error)) { 2145 #ifndef __HAIKU__ 2146 uma_zfree(fl->ifl_zone, cl); 2147 #else 2148 m_free(cl); 2149 #endif 2150 break; 2151 } 2152 2153 sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; 2154 sd_cl[frag_idx] = cl; 2155 #if MEMORY_LOGGING 2156 fl->ifl_cl_enqueued++; 2157 #endif 2158 } else { 2159 bus_addr = sd_ba[frag_idx]; 2160 } 2161 bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], 2162 BUS_DMASYNC_PREREAD); 2163 2164 if (sd_m[frag_idx] == NULL) { 2165 m = m_gethdr(M_NOWAIT, MT_NOINIT); 2166 if (__predict_false(m == NULL)) 2167 break; 2168 sd_m[frag_idx] = m; 2169 } 2170 bit_set(fl->ifl_rx_bitmap, frag_idx); 2171 #if MEMORY_LOGGING 2172 fl->ifl_m_enqueued++; 2173 #endif 2174 2175 DBG_COUNTER_INC(rx_allocs); 2176 fl->ifl_rxd_idxs[i] = frag_idx; 2177 fl->ifl_bus_addrs[i] = bus_addr; 2178 credits++; 2179 i++; 2180 MPASS(credits <= fl->ifl_size); 2181 if (++idx == fl->ifl_size) { 2182 #ifdef INVARIANTS 2183 fl->ifl_gen = 1; 2184 #endif 2185 idx = 0; 2186 } 2187 if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { 2188 iru.iru_pidx = pidx; 2189 iru.iru_count = i; 2190 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 2191 fl->ifl_pidx = idx; 2192 fl->ifl_credits = credits; 2193 pidx = idx; 2194 i = 0; 2195 } 2196 } 2197 2198 if (n < count - 1) { 2199 if (i != 0) { 2200 iru.iru_pidx = pidx; 2201 iru.iru_count = i; 2202 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 2203 fl->ifl_pidx = idx; 2204 fl->ifl_credits = credits; 2205 } 2206 DBG_COUNTER_INC(rxd_flush); 2207 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2208 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2209 ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, 2210 fl->ifl_id, fl->ifl_pidx); 2211 if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) { 2212 fl->ifl_fragidx = frag_idx + 1; 2213 if (fl->ifl_fragidx == fl->ifl_size) 2214 fl->ifl_fragidx = 0; 2215 } else { 2216 fl->ifl_fragidx = frag_idx; 2217 } 2218 } 2219 2220 return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY); 2221 } 2222 2223 static inline uint8_t 2224 iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl) 2225 { 2226 /* 2227 * We leave an unused descriptor to avoid pidx to catch up with cidx. 2228 * This is important as it confuses most NICs. For instance, 2229 * Intel NICs have (per receive ring) RDH and RDT registers, where 2230 * RDH points to the next receive descriptor to be used by the NIC, 2231 * and RDT for the next receive descriptor to be published by the 2232 * driver to the NIC (RDT - 1 is thus the last valid one). 2233 * The condition RDH == RDT means no descriptors are available to 2234 * the NIC, and thus it would be ambiguous if it also meant that 2235 * all the descriptors are available to the NIC. 2236 */ 2237 int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; 2238 #ifdef INVARIANTS 2239 int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; 2240 #endif 2241 2242 MPASS(fl->ifl_credits <= fl->ifl_size); 2243 MPASS(reclaimable == delta); 2244 2245 if (reclaimable > 0) 2246 return (iflib_fl_refill(ctx, fl, reclaimable)); 2247 return (0); 2248 } 2249 2250 uint8_t 2251 iflib_in_detach(if_ctx_t ctx) 2252 { 2253 bool in_detach; 2254 2255 STATE_LOCK(ctx); 2256 in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH); 2257 STATE_UNLOCK(ctx); 2258 return (in_detach); 2259 } 2260 2261 static void 2262 iflib_fl_bufs_free(iflib_fl_t fl) 2263 { 2264 iflib_dma_info_t idi = fl->ifl_ifdi; 2265 bus_dmamap_t sd_map; 2266 uint32_t i; 2267 2268 for (i = 0; i < fl->ifl_size; i++) { 2269 struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; 2270 caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; 2271 2272 if (*sd_cl != NULL) { 2273 sd_map = fl->ifl_sds.ifsd_map[i]; 2274 bus_dmamap_sync(fl->ifl_buf_tag, sd_map, 2275 BUS_DMASYNC_POSTREAD); 2276 bus_dmamap_unload(fl->ifl_buf_tag, sd_map); 2277 #ifndef __HAIKU__ 2278 uma_zfree(fl->ifl_zone, *sd_cl); 2279 #else 2280 struct mbuf* mb = m_get(0, MT_DATA); 2281 m_cljset(mb, *sd_cl, fl->ifl_cltype); 2282 m_free(mb); 2283 #endif 2284 *sd_cl = NULL; 2285 if (*sd_m != NULL) { 2286 m_init(*sd_m, M_NOWAIT, MT_DATA, 0); 2287 #ifndef __HAIKU__ 2288 m_free_raw(*sd_m); 2289 #else 2290 m_free(*sd_m); 2291 #endif 2292 *sd_m = NULL; 2293 } 2294 } else { 2295 MPASS(*sd_m == NULL); 2296 } 2297 #if MEMORY_LOGGING 2298 fl->ifl_m_dequeued++; 2299 fl->ifl_cl_dequeued++; 2300 #endif 2301 } 2302 #ifdef INVARIANTS 2303 for (i = 0; i < fl->ifl_size; i++) { 2304 MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); 2305 MPASS(fl->ifl_sds.ifsd_m[i] == NULL); 2306 } 2307 #endif 2308 /* 2309 * Reset free list values 2310 */ 2311 fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; 2312 bzero(idi->idi_vaddr, idi->idi_size); 2313 } 2314 2315 /********************************************************************* 2316 * 2317 * Initialize a free list and its buffers. 2318 * 2319 **********************************************************************/ 2320 static int 2321 iflib_fl_setup(iflib_fl_t fl) 2322 { 2323 iflib_rxq_t rxq = fl->ifl_rxq; 2324 if_ctx_t ctx = rxq->ifr_ctx; 2325 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2326 int qidx; 2327 2328 bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); 2329 /* 2330 ** Free current RX buffer structs and their mbufs 2331 */ 2332 iflib_fl_bufs_free(fl); 2333 /* Now replenish the mbufs */ 2334 MPASS(fl->ifl_credits == 0); 2335 qidx = rxq->ifr_fl_offset + fl->ifl_id; 2336 if (scctx->isc_rxd_buf_size[qidx] != 0) 2337 fl->ifl_buf_size = scctx->isc_rxd_buf_size[qidx]; 2338 else 2339 fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; 2340 /* 2341 * ifl_buf_size may be a driver-supplied value, so pull it up 2342 * to the selected mbuf size. 2343 */ 2344 fl->ifl_buf_size = iflib_get_mbuf_size_for(fl->ifl_buf_size); 2345 if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) 2346 ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; 2347 fl->ifl_cltype = m_gettype(fl->ifl_buf_size); 2348 #ifndef __HAIKU__ 2349 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 2350 #endif 2351 2352 /* 2353 * Avoid pre-allocating zillions of clusters to an idle card 2354 * potentially speeding up attach. In any case make sure 2355 * to leave a descriptor unavailable. See the comment in 2356 * iflib_fl_refill_all(). 2357 */ 2358 MPASS(fl->ifl_size > 0); 2359 (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size - 1)); 2360 if (min(128, fl->ifl_size - 1) != fl->ifl_credits) 2361 return (ENOBUFS); 2362 /* 2363 * handle failure 2364 */ 2365 MPASS(rxq != NULL); 2366 MPASS(fl->ifl_ifdi != NULL); 2367 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2368 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2369 return (0); 2370 } 2371 2372 /********************************************************************* 2373 * 2374 * Free receive ring data structures 2375 * 2376 **********************************************************************/ 2377 static void 2378 iflib_rx_sds_free(iflib_rxq_t rxq) 2379 { 2380 iflib_fl_t fl; 2381 int i, j; 2382 2383 if (rxq->ifr_fl != NULL) { 2384 for (i = 0; i < rxq->ifr_nfl; i++) { 2385 fl = &rxq->ifr_fl[i]; 2386 if (fl->ifl_buf_tag != NULL) { 2387 if (fl->ifl_sds.ifsd_map != NULL) { 2388 for (j = 0; j < fl->ifl_size; j++) { 2389 bus_dmamap_sync( 2390 fl->ifl_buf_tag, 2391 fl->ifl_sds.ifsd_map[j], 2392 BUS_DMASYNC_POSTREAD); 2393 bus_dmamap_unload( 2394 fl->ifl_buf_tag, 2395 fl->ifl_sds.ifsd_map[j]); 2396 bus_dmamap_destroy( 2397 fl->ifl_buf_tag, 2398 fl->ifl_sds.ifsd_map[j]); 2399 } 2400 } 2401 bus_dma_tag_destroy(fl->ifl_buf_tag); 2402 fl->ifl_buf_tag = NULL; 2403 } 2404 free(fl->ifl_sds.ifsd_m, M_IFLIB); 2405 free(fl->ifl_sds.ifsd_cl, M_IFLIB); 2406 free(fl->ifl_sds.ifsd_ba, M_IFLIB); 2407 free(fl->ifl_sds.ifsd_map, M_IFLIB); 2408 free(fl->ifl_rx_bitmap, M_IFLIB); 2409 fl->ifl_sds.ifsd_m = NULL; 2410 fl->ifl_sds.ifsd_cl = NULL; 2411 fl->ifl_sds.ifsd_ba = NULL; 2412 fl->ifl_sds.ifsd_map = NULL; 2413 fl->ifl_rx_bitmap = NULL; 2414 } 2415 free(rxq->ifr_fl, M_IFLIB); 2416 rxq->ifr_fl = NULL; 2417 free(rxq->ifr_ifdi, M_IFLIB); 2418 rxq->ifr_ifdi = NULL; 2419 rxq->ifr_cq_cidx = 0; 2420 } 2421 } 2422 2423 /* 2424 * Timer routine 2425 */ 2426 static void 2427 iflib_timer(void *arg) 2428 { 2429 iflib_txq_t txq = arg; 2430 if_ctx_t ctx = txq->ift_ctx; 2431 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2432 uint64_t this_tick = ticks; 2433 2434 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) 2435 return; 2436 2437 /* 2438 ** Check on the state of the TX queue(s), this 2439 ** can be done without the lock because its RO 2440 ** and the HUNG state will be static if set. 2441 */ 2442 if (this_tick - txq->ift_last_timer_tick >= iflib_timer_default) { 2443 txq->ift_last_timer_tick = this_tick; 2444 IFDI_TIMER(ctx, txq->ift_id); 2445 if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && 2446 ((txq->ift_cleaned_prev == txq->ift_cleaned) || 2447 (sctx->isc_pause_frames == 0))) 2448 goto hung; 2449 2450 if (txq->ift_qstatus != IFLIB_QUEUE_IDLE && 2451 ifmp_ring_is_stalled(txq->ift_br)) { 2452 KASSERT(ctx->ifc_link_state == LINK_STATE_UP, 2453 ("queue can't be marked as hung if interface is down")); 2454 txq->ift_qstatus = IFLIB_QUEUE_HUNG; 2455 } 2456 txq->ift_cleaned_prev = txq->ift_cleaned; 2457 } 2458 /* handle any laggards */ 2459 if (txq->ift_db_pending) 2460 GROUPTASK_ENQUEUE(&txq->ift_task); 2461 2462 sctx->isc_pause_frames = 0; 2463 if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 2464 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, 2465 txq, txq->ift_timer.c_cpu); 2466 return; 2467 2468 hung: 2469 device_printf(ctx->ifc_dev, 2470 "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n", 2471 txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); 2472 STATE_LOCK(ctx); 2473 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2474 ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); 2475 iflib_admin_intr_deferred(ctx); 2476 STATE_UNLOCK(ctx); 2477 } 2478 2479 static uint16_t 2480 iflib_get_mbuf_size_for(unsigned int size) 2481 { 2482 2483 if (size <= MCLBYTES) 2484 return (MCLBYTES); 2485 else 2486 return (MJUMPAGESIZE); 2487 } 2488 2489 static void 2490 iflib_calc_rx_mbuf_sz(if_ctx_t ctx) 2491 { 2492 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2493 2494 /* 2495 * XXX don't set the max_frame_size to larger 2496 * than the hardware can handle 2497 */ 2498 ctx->ifc_rx_mbuf_sz = 2499 iflib_get_mbuf_size_for(sctx->isc_max_frame_size); 2500 } 2501 2502 uint32_t 2503 iflib_get_rx_mbuf_sz(if_ctx_t ctx) 2504 { 2505 2506 return (ctx->ifc_rx_mbuf_sz); 2507 } 2508 2509 static void 2510 iflib_init_locked(if_ctx_t ctx) 2511 { 2512 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2513 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2514 if_t ifp = ctx->ifc_ifp; 2515 iflib_fl_t fl; 2516 iflib_txq_t txq; 2517 iflib_rxq_t rxq; 2518 int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; 2519 2520 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2521 IFDI_INTR_DISABLE(ctx); 2522 2523 /* 2524 * See iflib_stop(). Useful in case iflib_init_locked() is 2525 * called without first calling iflib_stop(). 2526 */ 2527 netmap_disable_all_rings(ifp); 2528 2529 tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); 2530 tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); 2531 /* Set hardware offload abilities */ 2532 if_clearhwassist(ifp); 2533 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 2534 if_sethwassistbits(ifp, tx_ip_csum_flags, 0); 2535 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) 2536 if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); 2537 if (if_getcapenable(ifp) & IFCAP_TSO4) 2538 if_sethwassistbits(ifp, CSUM_IP_TSO, 0); 2539 if (if_getcapenable(ifp) & IFCAP_TSO6) 2540 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); 2541 2542 for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { 2543 CALLOUT_LOCK(txq); 2544 callout_stop(&txq->ift_timer); 2545 #ifdef DEV_NETMAP 2546 callout_stop(&txq->ift_netmap_timer); 2547 #endif /* DEV_NETMAP */ 2548 CALLOUT_UNLOCK(txq); 2549 iflib_netmap_txq_init(ctx, txq); 2550 } 2551 2552 /* 2553 * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so 2554 * that drivers can use the value when setting up the hardware receive 2555 * buffers. 2556 */ 2557 iflib_calc_rx_mbuf_sz(ctx); 2558 2559 #ifdef INVARIANTS 2560 i = if_getdrvflags(ifp); 2561 #endif 2562 IFDI_INIT(ctx); 2563 MPASS(if_getdrvflags(ifp) == i); 2564 for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { 2565 if (iflib_netmap_rxq_init(ctx, rxq) > 0) { 2566 /* This rxq is in netmap mode. Skip normal init. */ 2567 continue; 2568 } 2569 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 2570 if (iflib_fl_setup(fl)) { 2571 device_printf(ctx->ifc_dev, 2572 "setting up free list %d failed - " 2573 "check cluster settings\n", j); 2574 goto done; 2575 } 2576 } 2577 } 2578 done: 2579 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2580 IFDI_INTR_ENABLE(ctx); 2581 txq = ctx->ifc_txqs; 2582 for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) 2583 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, 2584 txq->ift_timer.c_cpu); 2585 2586 /* Re-enable txsync/rxsync. */ 2587 netmap_enable_all_rings(ifp); 2588 } 2589 2590 static int 2591 iflib_media_change(if_t ifp) 2592 { 2593 if_ctx_t ctx = if_getsoftc(ifp); 2594 int err; 2595 2596 CTX_LOCK(ctx); 2597 if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) 2598 iflib_if_init_locked(ctx); 2599 CTX_UNLOCK(ctx); 2600 return (err); 2601 } 2602 2603 static void 2604 iflib_media_status(if_t ifp, struct ifmediareq *ifmr) 2605 { 2606 if_ctx_t ctx = if_getsoftc(ifp); 2607 2608 CTX_LOCK(ctx); 2609 IFDI_UPDATE_ADMIN_STATUS(ctx); 2610 IFDI_MEDIA_STATUS(ctx, ifmr); 2611 CTX_UNLOCK(ctx); 2612 } 2613 2614 void 2615 iflib_stop(if_ctx_t ctx) 2616 { 2617 iflib_txq_t txq = ctx->ifc_txqs; 2618 iflib_rxq_t rxq = ctx->ifc_rxqs; 2619 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2620 if_shared_ctx_t sctx = ctx->ifc_sctx; 2621 iflib_dma_info_t di; 2622 iflib_fl_t fl; 2623 int i, j; 2624 2625 /* Tell the stack that the interface is no longer active */ 2626 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2627 2628 IFDI_INTR_DISABLE(ctx); 2629 DELAY(1000); 2630 IFDI_STOP(ctx); 2631 DELAY(1000); 2632 2633 /* 2634 * Stop any pending txsync/rxsync and prevent new ones 2635 * form starting. Processes blocked in poll() will get 2636 * POLLERR. 2637 */ 2638 netmap_disable_all_rings(ctx->ifc_ifp); 2639 2640 iflib_debug_reset(); 2641 /* Wait for current tx queue users to exit to disarm watchdog timer. */ 2642 for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { 2643 /* make sure all transmitters have completed before proceeding XXX */ 2644 2645 CALLOUT_LOCK(txq); 2646 callout_stop(&txq->ift_timer); 2647 #ifdef DEV_NETMAP 2648 callout_stop(&txq->ift_netmap_timer); 2649 #endif /* DEV_NETMAP */ 2650 CALLOUT_UNLOCK(txq); 2651 2652 /* clean any enqueued buffers */ 2653 iflib_ifmp_purge(txq); 2654 /* Free any existing tx buffers. */ 2655 for (j = 0; j < txq->ift_size; j++) { 2656 iflib_txsd_free(ctx, txq, j); 2657 } 2658 txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; 2659 txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; 2660 txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; 2661 txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; 2662 txq->ift_pullups = 0; 2663 ifmp_ring_reset_stats(txq->ift_br); 2664 for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++) 2665 bzero((void *)di->idi_vaddr, di->idi_size); 2666 } 2667 for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { 2668 /* make sure all transmitters have completed before proceeding XXX */ 2669 2670 rxq->ifr_cq_cidx = 0; 2671 for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) 2672 bzero((void *)di->idi_vaddr, di->idi_size); 2673 /* also resets the free lists pidx/cidx */ 2674 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 2675 iflib_fl_bufs_free(fl); 2676 } 2677 } 2678 2679 static inline caddr_t 2680 calc_next_rxd(iflib_fl_t fl, int cidx) 2681 { 2682 qidx_t size; 2683 int nrxd; 2684 caddr_t start, end, cur, next; 2685 2686 nrxd = fl->ifl_size; 2687 size = fl->ifl_rxd_size; 2688 start = fl->ifl_ifdi->idi_vaddr; 2689 2690 if (__predict_false(size == 0)) 2691 return (start); 2692 cur = start + size*cidx; 2693 end = start + size*nrxd; 2694 next = CACHE_PTR_NEXT(cur); 2695 return (next < end ? next : start); 2696 } 2697 2698 static inline void 2699 prefetch_pkts(iflib_fl_t fl, int cidx) 2700 { 2701 int nextptr; 2702 int nrxd = fl->ifl_size; 2703 caddr_t next_rxd; 2704 2705 nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); 2706 prefetch(&fl->ifl_sds.ifsd_m[nextptr]); 2707 prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); 2708 next_rxd = calc_next_rxd(fl, cidx); 2709 prefetch(next_rxd); 2710 prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); 2711 prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); 2712 prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); 2713 prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); 2714 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); 2715 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); 2716 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); 2717 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); 2718 } 2719 2720 static struct mbuf * 2721 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, 2722 int *pf_rv, if_rxd_info_t ri) 2723 { 2724 bus_dmamap_t map; 2725 iflib_fl_t fl; 2726 caddr_t payload; 2727 struct mbuf *m; 2728 int flid, cidx, len, next; 2729 2730 map = NULL; 2731 flid = irf->irf_flid; 2732 cidx = irf->irf_idx; 2733 fl = &rxq->ifr_fl[flid]; 2734 sd->ifsd_fl = fl; 2735 m = fl->ifl_sds.ifsd_m[cidx]; 2736 sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; 2737 fl->ifl_credits--; 2738 #if MEMORY_LOGGING 2739 fl->ifl_m_dequeued++; 2740 #endif 2741 if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) 2742 prefetch_pkts(fl, cidx); 2743 next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); 2744 prefetch(&fl->ifl_sds.ifsd_map[next]); 2745 map = fl->ifl_sds.ifsd_map[cidx]; 2746 2747 bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); 2748 2749 #ifndef __HAIKU__ 2750 if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL && 2751 irf->irf_len != 0) { 2752 payload = *sd->ifsd_cl; 2753 payload += ri->iri_pad; 2754 len = ri->iri_len - ri->iri_pad; 2755 *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, 2756 len | PFIL_MEMPTR | PFIL_IN, NULL); 2757 switch (*pf_rv) { 2758 case PFIL_DROPPED: 2759 case PFIL_CONSUMED: 2760 /* 2761 * The filter ate it. Everything is recycled. 2762 */ 2763 m = NULL; 2764 unload = 0; 2765 break; 2766 case PFIL_REALLOCED: 2767 /* 2768 * The filter copied it. Everything is recycled. 2769 */ 2770 m = pfil_mem2mbuf(payload); 2771 unload = 0; 2772 break; 2773 case PFIL_PASS: 2774 /* 2775 * Filter said it was OK, so receive like 2776 * normal 2777 */ 2778 fl->ifl_sds.ifsd_m[cidx] = NULL; 2779 break; 2780 default: 2781 MPASS(0); 2782 } 2783 } else 2784 #endif 2785 { 2786 fl->ifl_sds.ifsd_m[cidx] = NULL; 2787 if (pf_rv != NULL) 2788 *pf_rv = PFIL_PASS; 2789 } 2790 2791 if (unload && irf->irf_len != 0) 2792 bus_dmamap_unload(fl->ifl_buf_tag, map); 2793 fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); 2794 if (__predict_false(fl->ifl_cidx == 0)) 2795 fl->ifl_gen = 0; 2796 bit_clear(fl->ifl_rx_bitmap, cidx); 2797 return (m); 2798 } 2799 2800 static struct mbuf * 2801 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) 2802 { 2803 struct mbuf *m, *mh, *mt; 2804 caddr_t cl; 2805 int *pf_rv_ptr, flags, i, padlen; 2806 bool consumed; 2807 2808 i = 0; 2809 mh = NULL; 2810 consumed = false; 2811 *pf_rv = PFIL_PASS; 2812 pf_rv_ptr = pf_rv; 2813 do { 2814 m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, 2815 pf_rv_ptr, ri); 2816 2817 MPASS(*sd->ifsd_cl != NULL); 2818 2819 /* 2820 * Exclude zero-length frags & frags from 2821 * packets the filter has consumed or dropped 2822 */ 2823 if (ri->iri_frags[i].irf_len == 0 || consumed || 2824 #ifndef __HAIKU__ 2825 *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED 2826 #else 2827 0 2828 #endif 2829 ) { 2830 if (mh == NULL) { 2831 /* everything saved here */ 2832 consumed = true; 2833 pf_rv_ptr = NULL; 2834 continue; 2835 } 2836 /* XXX we can save the cluster here, but not the mbuf */ 2837 m_init(m, M_NOWAIT, MT_DATA, 0); 2838 m_free(m); 2839 continue; 2840 } 2841 if (mh == NULL) { 2842 flags = M_PKTHDR|M_EXT; 2843 mh = mt = m; 2844 padlen = ri->iri_pad; 2845 } else { 2846 flags = M_EXT; 2847 mt->m_next = m; 2848 mt = m; 2849 /* assuming padding is only on the first fragment */ 2850 padlen = 0; 2851 } 2852 cl = *sd->ifsd_cl; 2853 *sd->ifsd_cl = NULL; 2854 2855 /* Can these two be made one ? */ 2856 m_init(m, M_NOWAIT, MT_DATA, flags); 2857 m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); 2858 /* 2859 * These must follow m_init and m_cljset 2860 */ 2861 m->m_data += padlen; 2862 ri->iri_len -= padlen; 2863 m->m_len = ri->iri_frags[i].irf_len; 2864 } while (++i < ri->iri_nfrags); 2865 2866 return (mh); 2867 } 2868 2869 /* 2870 * Process one software descriptor 2871 */ 2872 static struct mbuf * 2873 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) 2874 { 2875 struct if_rxsd sd; 2876 struct mbuf *m; 2877 int pf_rv; 2878 2879 /* should I merge this back in now that the two paths are basically duplicated? */ 2880 if (ri->iri_nfrags == 1 && 2881 ri->iri_frags[0].irf_len != 0 && 2882 ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { 2883 m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, 2884 &pf_rv, ri); 2885 if (pf_rv != PFIL_PASS 2886 #ifndef __HAIKU__ 2887 && pf_rv != PFIL_REALLOCED 2888 #endif 2889 ) 2890 return (m); 2891 if (pf_rv == PFIL_PASS) { 2892 m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); 2893 #ifndef __NO_STRICT_ALIGNMENT 2894 if (!IP_ALIGNED(m)) 2895 m->m_data += 2; 2896 #endif 2897 memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); 2898 m->m_len = ri->iri_frags[0].irf_len; 2899 } 2900 } else { 2901 m = assemble_segments(rxq, ri, &sd, &pf_rv); 2902 if (m == NULL) 2903 return (NULL); 2904 if (pf_rv != PFIL_PASS 2905 #ifndef __HAIKU__ 2906 && pf_rv != PFIL_REALLOCED 2907 #endif 2908 ) 2909 return (m); 2910 } 2911 m->m_pkthdr.len = ri->iri_len; 2912 m->m_pkthdr.rcvif = ri->iri_ifp; 2913 m->m_flags |= ri->iri_flags; 2914 m->m_pkthdr.ether_vtag = ri->iri_vtag; 2915 m->m_pkthdr.flowid = ri->iri_flowid; 2916 M_HASHTYPE_SET(m, ri->iri_rsstype); 2917 m->m_pkthdr.csum_flags = ri->iri_csum_flags; 2918 m->m_pkthdr.csum_data = ri->iri_csum_data; 2919 return (m); 2920 } 2921 2922 #if defined(INET6) || defined(INET) 2923 static void 2924 iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) 2925 { 2926 CURVNET_SET(lc->ifp->if_vnet); 2927 #if defined(INET6) 2928 *v6 = V_ip6_forwarding; 2929 #endif 2930 #if defined(INET) 2931 *v4 = V_ipforwarding; 2932 #endif 2933 CURVNET_RESTORE(); 2934 } 2935 2936 /* 2937 * Returns true if it's possible this packet could be LROed. 2938 * if it returns false, it is guaranteed that tcp_lro_rx() 2939 * would not return zero. 2940 */ 2941 static bool 2942 iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) 2943 { 2944 #ifndef __HAIKU__ 2945 struct ether_header *eh; 2946 2947 eh = mtod(m, struct ether_header *); 2948 switch (eh->ether_type) { 2949 #if defined(INET6) 2950 case htons(ETHERTYPE_IPV6): 2951 return (!v6_forwarding); 2952 #endif 2953 #if defined (INET) 2954 case htons(ETHERTYPE_IP): 2955 return (!v4_forwarding); 2956 #endif 2957 } 2958 #endif 2959 2960 return false; 2961 } 2962 #else 2963 static void 2964 iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) 2965 { 2966 } 2967 #endif 2968 2969 static void 2970 _task_fn_rx_watchdog(void *context) 2971 { 2972 iflib_rxq_t rxq = context; 2973 2974 GROUPTASK_ENQUEUE(&rxq->ifr_task); 2975 } 2976 2977 static uint8_t 2978 iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) 2979 { 2980 if_t ifp; 2981 if_ctx_t ctx = rxq->ifr_ctx; 2982 if_shared_ctx_t sctx = ctx->ifc_sctx; 2983 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2984 int avail, i; 2985 qidx_t *cidxp; 2986 struct if_rxd_info ri; 2987 int err, budget_left, rx_bytes, rx_pkts; 2988 iflib_fl_t fl; 2989 int lro_enabled; 2990 bool v4_forwarding, v6_forwarding, lro_possible; 2991 uint8_t retval = 0; 2992 2993 /* 2994 * XXX early demux data packets so that if_input processing only handles 2995 * acks in interrupt context 2996 */ 2997 struct mbuf *m, *mh, *mt, *mf; 2998 2999 #ifndef __HAIKU__ 3000 NET_EPOCH_ASSERT(); 3001 #endif 3002 3003 lro_possible = v4_forwarding = v6_forwarding = false; 3004 ifp = ctx->ifc_ifp; 3005 mh = mt = NULL; 3006 MPASS(budget > 0); 3007 rx_pkts = rx_bytes = 0; 3008 if (sctx->isc_flags & IFLIB_HAS_RXCQ) 3009 cidxp = &rxq->ifr_cq_cidx; 3010 else 3011 cidxp = &rxq->ifr_fl[0].ifl_cidx; 3012 if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { 3013 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 3014 retval |= iflib_fl_refill_all(ctx, fl); 3015 DBG_COUNTER_INC(rx_unavail); 3016 return (retval); 3017 } 3018 3019 /* pfil needs the vnet to be set */ 3020 CURVNET_SET_QUIET(ifp->if_vnet); 3021 for (budget_left = budget; budget_left > 0 && avail > 0;) { 3022 if (__predict_false(!CTX_ACTIVE(ctx))) { 3023 DBG_COUNTER_INC(rx_ctx_inactive); 3024 break; 3025 } 3026 /* 3027 * Reset client set fields to their default values 3028 */ 3029 rxd_info_zero(&ri); 3030 ri.iri_qsidx = rxq->ifr_id; 3031 ri.iri_cidx = *cidxp; 3032 ri.iri_ifp = ifp; 3033 ri.iri_frags = rxq->ifr_frags; 3034 err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 3035 3036 if (err) 3037 goto err; 3038 rx_pkts += 1; 3039 rx_bytes += ri.iri_len; 3040 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 3041 *cidxp = ri.iri_cidx; 3042 /* Update our consumer index */ 3043 /* XXX NB: shurd - check if this is still safe */ 3044 while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) 3045 rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; 3046 /* was this only a completion queue message? */ 3047 if (__predict_false(ri.iri_nfrags == 0)) 3048 continue; 3049 } 3050 MPASS(ri.iri_nfrags != 0); 3051 MPASS(ri.iri_len != 0); 3052 3053 /* will advance the cidx on the corresponding free lists */ 3054 m = iflib_rxd_pkt_get(rxq, &ri); 3055 avail--; 3056 budget_left--; 3057 if (avail == 0 && budget_left) 3058 avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); 3059 3060 if (__predict_false(m == NULL)) 3061 continue; 3062 3063 /* imm_pkt: -- cxgb */ 3064 if (mh == NULL) 3065 mh = mt = m; 3066 else { 3067 mt->m_nextpkt = m; 3068 mt = m; 3069 } 3070 } 3071 CURVNET_RESTORE(); 3072 /* make sure that we can refill faster than drain */ 3073 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 3074 retval |= iflib_fl_refill_all(ctx, fl); 3075 3076 lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); 3077 #ifndef __HAIKU__ 3078 if (lro_enabled) 3079 iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); 3080 #endif 3081 mt = mf = NULL; 3082 while (mh != NULL) { 3083 m = mh; 3084 mh = mh->m_nextpkt; 3085 m->m_nextpkt = NULL; 3086 #ifndef __NO_STRICT_ALIGNMENT 3087 if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) 3088 continue; 3089 #endif 3090 #ifndef __HAIKU__ 3091 #if defined(INET6) || defined(INET) 3092 if (lro_enabled) { 3093 if (!lro_possible) { 3094 lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); 3095 if (lro_possible && mf != NULL) { 3096 ifp->if_input(ifp, mf); 3097 DBG_COUNTER_INC(rx_if_input); 3098 mt = mf = NULL; 3099 } 3100 } 3101 if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == 3102 (CSUM_L4_CALC|CSUM_L4_VALID)) { 3103 if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) 3104 continue; 3105 } 3106 } 3107 #endif 3108 if (lro_possible) { 3109 ifp->if_input(ifp, m); 3110 DBG_COUNTER_INC(rx_if_input); 3111 continue; 3112 } 3113 #else /* __HAIKU __*/ 3114 if (mf != NULL) { 3115 ifp->if_input(ifp, mf); 3116 DBG_COUNTER_INC(rx_if_input); 3117 mt = mf = NULL; 3118 } 3119 ifp->if_input(ifp, m); 3120 DBG_COUNTER_INC(rx_if_input); 3121 continue; 3122 #endif 3123 3124 if (mf == NULL) 3125 mf = m; 3126 if (mt != NULL) 3127 mt->m_nextpkt = m; 3128 mt = m; 3129 } 3130 if (mf != NULL) { 3131 ifp->if_input(ifp, mf); 3132 DBG_COUNTER_INC(rx_if_input); 3133 } 3134 3135 if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); 3136 if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); 3137 3138 /* 3139 * Flush any outstanding LRO work 3140 */ 3141 #if defined(INET6) || defined(INET) 3142 #ifndef __HAIKU__ 3143 tcp_lro_flush_all(&rxq->ifr_lc); 3144 #endif 3145 #endif 3146 if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0) 3147 retval |= IFLIB_RXEOF_MORE; 3148 return (retval); 3149 err: 3150 STATE_LOCK(ctx); 3151 ctx->ifc_flags |= IFC_DO_RESET; 3152 iflib_admin_intr_deferred(ctx); 3153 STATE_UNLOCK(ctx); 3154 return (0); 3155 } 3156 3157 #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) 3158 static inline qidx_t 3159 txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) 3160 { 3161 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 3162 qidx_t minthresh = txq->ift_size / 8; 3163 if (in_use > 4*minthresh) 3164 return (notify_count); 3165 if (in_use > 2*minthresh) 3166 return (notify_count >> 1); 3167 if (in_use > minthresh) 3168 return (notify_count >> 3); 3169 return (0); 3170 } 3171 3172 static inline qidx_t 3173 txq_max_rs_deferred(iflib_txq_t txq) 3174 { 3175 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 3176 qidx_t minthresh = txq->ift_size / 8; 3177 if (txq->ift_in_use > 4*minthresh) 3178 return (notify_count); 3179 if (txq->ift_in_use > 2*minthresh) 3180 return (notify_count >> 1); 3181 if (txq->ift_in_use > minthresh) 3182 return (notify_count >> 2); 3183 return (2); 3184 } 3185 3186 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) 3187 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) 3188 3189 #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) 3190 #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) 3191 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) 3192 3193 /* forward compatibility for cxgb */ 3194 #define FIRST_QSET(ctx) 0 3195 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) 3196 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) 3197 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) 3198 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) 3199 3200 /* XXX we should be setting this to something other than zero */ 3201 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) 3202 #define MAX_TX_DESC(ctx) MAX((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ 3203 (ctx)->ifc_softc_ctx.isc_tx_nsegments) 3204 3205 static inline bool 3206 iflib_txd_db_check(iflib_txq_t txq, int ring) 3207 { 3208 if_ctx_t ctx = txq->ift_ctx; 3209 qidx_t dbval, max; 3210 3211 max = TXQ_MAX_DB_DEFERRED(txq, txq->ift_in_use); 3212 3213 /* force || threshold exceeded || at the edge of the ring */ 3214 if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= MAX_TX_DESC(ctx) + 2)) { 3215 3216 /* 3217 * 'npending' is used if the card's doorbell is in terms of the number of descriptors 3218 * pending flush (BRCM). 'pidx' is used in cases where the card's doorbeel uses the 3219 * producer index explicitly (INTC). 3220 */ 3221 dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; 3222 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3223 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3224 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); 3225 3226 /* 3227 * Absent bugs there are zero packets pending so reset pending counts to zero. 3228 */ 3229 txq->ift_db_pending = txq->ift_npending = 0; 3230 return (true); 3231 } 3232 return (false); 3233 } 3234 3235 #ifdef PKT_DEBUG 3236 static void 3237 print_pkt(if_pkt_info_t pi) 3238 { 3239 printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", 3240 pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); 3241 printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", 3242 pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); 3243 printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", 3244 pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); 3245 } 3246 #endif 3247 3248 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) 3249 #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) 3250 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) 3251 #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) 3252 3253 static int 3254 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) 3255 { 3256 if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; 3257 struct ether_vlan_header *eh; 3258 struct mbuf *m; 3259 3260 m = *mp; 3261 if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && 3262 M_WRITABLE(m) == 0) { 3263 if ((m = m_dup(m, M_NOWAIT)) == NULL) { 3264 return (ENOMEM); 3265 } else { 3266 m_freem(*mp); 3267 DBG_COUNTER_INC(tx_frees); 3268 *mp = m; 3269 } 3270 } 3271 3272 /* 3273 * Determine where frame payload starts. 3274 * Jump over vlan headers if already present, 3275 * helpful for QinQ too. 3276 */ 3277 if (__predict_false(m->m_len < sizeof(*eh))) { 3278 txq->ift_pullups++; 3279 if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) 3280 return (ENOMEM); 3281 } 3282 eh = mtod(m, struct ether_vlan_header *); 3283 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 3284 pi->ipi_etype = ntohs(eh->evl_proto); 3285 pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3286 } else { 3287 pi->ipi_etype = ntohs(eh->evl_encap_proto); 3288 pi->ipi_ehdrlen = ETHER_HDR_LEN; 3289 } 3290 3291 switch (pi->ipi_etype) { 3292 #ifdef INET 3293 case ETHERTYPE_IP: 3294 { 3295 struct mbuf *n; 3296 struct ip *ip = NULL; 3297 struct tcphdr *th = NULL; 3298 int minthlen; 3299 3300 minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); 3301 if (__predict_false(m->m_len < minthlen)) { 3302 /* 3303 * if this code bloat is causing too much of a hit 3304 * move it to a separate function and mark it noinline 3305 */ 3306 if (m->m_len == pi->ipi_ehdrlen) { 3307 n = m->m_next; 3308 MPASS(n); 3309 if (n->m_len >= sizeof(*ip)) { 3310 ip = (struct ip *)n->m_data; 3311 if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3312 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3313 } else { 3314 txq->ift_pullups++; 3315 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 3316 return (ENOMEM); 3317 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3318 } 3319 } else { 3320 txq->ift_pullups++; 3321 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 3322 return (ENOMEM); 3323 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3324 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3325 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3326 } 3327 } else { 3328 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3329 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3330 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3331 } 3332 pi->ipi_ip_hlen = ip->ip_hl << 2; 3333 pi->ipi_ipproto = ip->ip_p; 3334 pi->ipi_flags |= IPI_TX_IPV4; 3335 3336 /* TCP checksum offload may require TCP header length */ 3337 if (IS_TX_OFFLOAD4(pi)) { 3338 if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { 3339 if (__predict_false(th == NULL)) { 3340 txq->ift_pullups++; 3341 if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) 3342 return (ENOMEM); 3343 th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); 3344 } 3345 pi->ipi_tcp_hflags = th->th_flags; 3346 pi->ipi_tcp_hlen = th->th_off << 2; 3347 pi->ipi_tcp_seq = th->th_seq; 3348 } 3349 if (IS_TSO4(pi)) { 3350 if (__predict_false(ip->ip_p != IPPROTO_TCP)) 3351 return (ENXIO); 3352 /* 3353 * TSO always requires hardware checksum offload. 3354 */ 3355 pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP); 3356 th->th_sum = in_pseudo(ip->ip_src.s_addr, 3357 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 3358 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 3359 if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { 3360 ip->ip_sum = 0; 3361 ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); 3362 } 3363 } 3364 } 3365 if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) 3366 ip->ip_sum = 0; 3367 3368 break; 3369 } 3370 #endif 3371 #ifdef INET6 3372 case ETHERTYPE_IPV6: 3373 { 3374 struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); 3375 struct tcphdr *th; 3376 pi->ipi_ip_hlen = sizeof(struct ip6_hdr); 3377 3378 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { 3379 txq->ift_pullups++; 3380 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) 3381 return (ENOMEM); 3382 } 3383 th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); 3384 3385 /* XXX-BZ this will go badly in case of ext hdrs. */ 3386 pi->ipi_ipproto = ip6->ip6_nxt; 3387 pi->ipi_flags |= IPI_TX_IPV6; 3388 3389 /* TCP checksum offload may require TCP header length */ 3390 if (IS_TX_OFFLOAD6(pi)) { 3391 if (pi->ipi_ipproto == IPPROTO_TCP) { 3392 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { 3393 txq->ift_pullups++; 3394 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) 3395 return (ENOMEM); 3396 } 3397 pi->ipi_tcp_hflags = th->th_flags; 3398 pi->ipi_tcp_hlen = th->th_off << 2; 3399 pi->ipi_tcp_seq = th->th_seq; 3400 } 3401 if (IS_TSO6(pi)) { 3402 if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) 3403 return (ENXIO); 3404 /* 3405 * TSO always requires hardware checksum offload. 3406 */ 3407 pi->ipi_csum_flags |= CSUM_IP6_TCP; 3408 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 3409 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 3410 } 3411 } 3412 break; 3413 } 3414 #endif 3415 default: 3416 pi->ipi_csum_flags &= ~CSUM_OFFLOAD; 3417 pi->ipi_ip_hlen = 0; 3418 break; 3419 } 3420 *mp = m; 3421 3422 return (0); 3423 } 3424 3425 /* 3426 * If dodgy hardware rejects the scatter gather chain we've handed it 3427 * we'll need to remove the mbuf chain from ifsg_m[] before we can add the 3428 * m_defrag'd mbufs 3429 */ 3430 static __noinline struct mbuf * 3431 iflib_remove_mbuf(iflib_txq_t txq) 3432 { 3433 int ntxd, pidx; 3434 struct mbuf *m, **ifsd_m; 3435 3436 ifsd_m = txq->ift_sds.ifsd_m; 3437 ntxd = txq->ift_size; 3438 pidx = txq->ift_pidx & (ntxd - 1); 3439 ifsd_m = txq->ift_sds.ifsd_m; 3440 m = ifsd_m[pidx]; 3441 ifsd_m[pidx] = NULL; 3442 bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]); 3443 if (txq->ift_sds.ifsd_tso_map != NULL) 3444 bus_dmamap_unload(txq->ift_tso_buf_tag, 3445 txq->ift_sds.ifsd_tso_map[pidx]); 3446 #if MEMORY_LOGGING 3447 txq->ift_dequeued++; 3448 #endif 3449 return (m); 3450 } 3451 3452 static inline caddr_t 3453 calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) 3454 { 3455 qidx_t size; 3456 int ntxd; 3457 caddr_t start, end, cur, next; 3458 3459 ntxd = txq->ift_size; 3460 size = txq->ift_txd_size[qid]; 3461 start = txq->ift_ifdi[qid].idi_vaddr; 3462 3463 if (__predict_false(size == 0)) 3464 return (start); 3465 cur = start + size*cidx; 3466 end = start + size*ntxd; 3467 next = CACHE_PTR_NEXT(cur); 3468 return (next < end ? next : start); 3469 } 3470 3471 /* 3472 * Pad an mbuf to ensure a minimum ethernet frame size. 3473 * min_frame_size is the frame size (less CRC) to pad the mbuf to 3474 */ 3475 static __noinline int 3476 iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) 3477 { 3478 /* 3479 * 18 is enough bytes to pad an ARP packet to 46 bytes, and 3480 * and ARP message is the smallest common payload I can think of 3481 */ 3482 static char pad[18]; /* just zeros */ 3483 int n; 3484 struct mbuf *new_head; 3485 3486 if (!M_WRITABLE(*m_head)) { 3487 new_head = m_dup(*m_head, M_NOWAIT); 3488 if (new_head == NULL) { 3489 m_freem(*m_head); 3490 device_printf(dev, "cannot pad short frame, m_dup() failed"); 3491 DBG_COUNTER_INC(encap_pad_mbuf_fail); 3492 DBG_COUNTER_INC(tx_frees); 3493 return ENOMEM; 3494 } 3495 m_freem(*m_head); 3496 *m_head = new_head; 3497 } 3498 3499 for (n = min_frame_size - (*m_head)->m_pkthdr.len; 3500 n > 0; n -= sizeof(pad)) 3501 if (!m_append(*m_head, min(n, sizeof(pad)), pad)) 3502 break; 3503 3504 if (n > 0) { 3505 m_freem(*m_head); 3506 device_printf(dev, "cannot pad short frame\n"); 3507 DBG_COUNTER_INC(encap_pad_mbuf_fail); 3508 DBG_COUNTER_INC(tx_frees); 3509 return (ENOBUFS); 3510 } 3511 3512 return 0; 3513 } 3514 3515 static int 3516 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) 3517 { 3518 if_ctx_t ctx; 3519 if_shared_ctx_t sctx; 3520 if_softc_ctx_t scctx; 3521 bus_dma_tag_t buf_tag; 3522 bus_dma_segment_t *segs; 3523 struct mbuf *m_head, **ifsd_m; 3524 void *next_txd; 3525 bus_dmamap_t map; 3526 struct if_pkt_info pi; 3527 int remap = 0; 3528 int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; 3529 3530 ctx = txq->ift_ctx; 3531 sctx = ctx->ifc_sctx; 3532 scctx = &ctx->ifc_softc_ctx; 3533 segs = txq->ift_segs; 3534 ntxd = txq->ift_size; 3535 m_head = *m_headp; 3536 map = NULL; 3537 3538 /* 3539 * If we're doing TSO the next descriptor to clean may be quite far ahead 3540 */ 3541 cidx = txq->ift_cidx; 3542 pidx = txq->ift_pidx; 3543 if (ctx->ifc_flags & IFC_PREFETCH) { 3544 next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); 3545 if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { 3546 next_txd = calc_next_txd(txq, cidx, 0); 3547 prefetch(next_txd); 3548 } 3549 3550 /* prefetch the next cache line of mbuf pointers and flags */ 3551 prefetch(&txq->ift_sds.ifsd_m[next]); 3552 prefetch(&txq->ift_sds.ifsd_map[next]); 3553 next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); 3554 } 3555 map = txq->ift_sds.ifsd_map[pidx]; 3556 ifsd_m = txq->ift_sds.ifsd_m; 3557 3558 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3559 buf_tag = txq->ift_tso_buf_tag; 3560 max_segs = scctx->isc_tx_tso_segments_max; 3561 map = txq->ift_sds.ifsd_tso_map[pidx]; 3562 MPASS(buf_tag != NULL); 3563 MPASS(max_segs > 0); 3564 } else { 3565 buf_tag = txq->ift_buf_tag; 3566 max_segs = scctx->isc_tx_nsegments; 3567 map = txq->ift_sds.ifsd_map[pidx]; 3568 } 3569 if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && 3570 __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { 3571 err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); 3572 if (err) { 3573 DBG_COUNTER_INC(encap_txd_encap_fail); 3574 return err; 3575 } 3576 } 3577 m_head = *m_headp; 3578 3579 pkt_info_zero(&pi); 3580 pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); 3581 pi.ipi_pidx = pidx; 3582 pi.ipi_qsidx = txq->ift_id; 3583 pi.ipi_len = m_head->m_pkthdr.len; 3584 pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; 3585 pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0; 3586 3587 /* deliberate bitwise OR to make one condition */ 3588 if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { 3589 if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) { 3590 DBG_COUNTER_INC(encap_txd_encap_fail); 3591 return (err); 3592 } 3593 m_head = *m_headp; 3594 } 3595 3596 retry: 3597 err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs, 3598 BUS_DMA_NOWAIT); 3599 defrag: 3600 if (__predict_false(err)) { 3601 switch (err) { 3602 case EFBIG: 3603 /* try collapse once and defrag once */ 3604 if (remap == 0) { 3605 m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); 3606 /* try defrag if collapsing fails */ 3607 if (m_head == NULL) 3608 remap++; 3609 } 3610 if (remap == 1) { 3611 txq->ift_mbuf_defrag++; 3612 m_head = m_defrag(*m_headp, M_NOWAIT); 3613 } 3614 /* 3615 * remap should never be >1 unless bus_dmamap_load_mbuf_sg 3616 * failed to map an mbuf that was run through m_defrag 3617 */ 3618 MPASS(remap <= 1); 3619 if (__predict_false(m_head == NULL || remap > 1)) 3620 goto defrag_failed; 3621 remap++; 3622 *m_headp = m_head; 3623 goto retry; 3624 break; 3625 case ENOMEM: 3626 txq->ift_no_tx_dma_setup++; 3627 break; 3628 default: 3629 txq->ift_no_tx_dma_setup++; 3630 m_freem(*m_headp); 3631 DBG_COUNTER_INC(tx_frees); 3632 *m_headp = NULL; 3633 break; 3634 } 3635 txq->ift_map_failed++; 3636 DBG_COUNTER_INC(encap_load_mbuf_fail); 3637 DBG_COUNTER_INC(encap_txd_encap_fail); 3638 return (err); 3639 } 3640 ifsd_m[pidx] = m_head; 3641 /* 3642 * XXX assumes a 1 to 1 relationship between segments and 3643 * descriptors - this does not hold true on all drivers, e.g. 3644 * cxgb 3645 */ 3646 if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { 3647 txq->ift_no_desc_avail++; 3648 bus_dmamap_unload(buf_tag, map); 3649 DBG_COUNTER_INC(encap_txq_avail_fail); 3650 DBG_COUNTER_INC(encap_txd_encap_fail); 3651 if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) 3652 GROUPTASK_ENQUEUE(&txq->ift_task); 3653 return (ENOBUFS); 3654 } 3655 /* 3656 * On Intel cards we can greatly reduce the number of TX interrupts 3657 * we see by only setting report status on every Nth descriptor. 3658 * However, this also means that the driver will need to keep track 3659 * of the descriptors that RS was set on to check them for the DD bit. 3660 */ 3661 txq->ift_rs_pending += nsegs + 1; 3662 if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || 3663 iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) { 3664 pi.ipi_flags |= IPI_TX_INTR; 3665 txq->ift_rs_pending = 0; 3666 } 3667 3668 pi.ipi_segs = segs; 3669 pi.ipi_nsegs = nsegs; 3670 3671 MPASS(pidx >= 0 && pidx < txq->ift_size); 3672 #ifdef PKT_DEBUG 3673 print_pkt(&pi); 3674 #endif 3675 if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { 3676 bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); 3677 DBG_COUNTER_INC(tx_encap); 3678 MPASS(pi.ipi_new_pidx < txq->ift_size); 3679 3680 ndesc = pi.ipi_new_pidx - pi.ipi_pidx; 3681 if (pi.ipi_new_pidx < pi.ipi_pidx) { 3682 ndesc += txq->ift_size; 3683 txq->ift_gen = 1; 3684 } 3685 /* 3686 * drivers can need as many as 3687 * two sentinels 3688 */ 3689 MPASS(ndesc <= pi.ipi_nsegs + 2); 3690 MPASS(pi.ipi_new_pidx != pidx); 3691 MPASS(ndesc > 0); 3692 txq->ift_in_use += ndesc; 3693 txq->ift_db_pending += ndesc; 3694 3695 /* 3696 * We update the last software descriptor again here because there may 3697 * be a sentinel and/or there may be more mbufs than segments 3698 */ 3699 txq->ift_pidx = pi.ipi_new_pidx; 3700 txq->ift_npending += pi.ipi_ndescs; 3701 } else { 3702 *m_headp = m_head = iflib_remove_mbuf(txq); 3703 if (err == EFBIG) { 3704 txq->ift_txd_encap_efbig++; 3705 if (remap < 2) { 3706 remap = 1; 3707 goto defrag; 3708 } 3709 } 3710 goto defrag_failed; 3711 } 3712 /* 3713 * err can't possibly be non-zero here, so we don't neet to test it 3714 * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail). 3715 */ 3716 return (err); 3717 3718 defrag_failed: 3719 txq->ift_mbuf_defrag_failed++; 3720 txq->ift_map_failed++; 3721 m_freem(*m_headp); 3722 DBG_COUNTER_INC(tx_frees); 3723 *m_headp = NULL; 3724 DBG_COUNTER_INC(encap_txd_encap_fail); 3725 return (ENOMEM); 3726 } 3727 3728 static void 3729 iflib_tx_desc_free(iflib_txq_t txq, int n) 3730 { 3731 uint32_t qsize, cidx, mask, gen; 3732 struct mbuf *m, **ifsd_m; 3733 bool do_prefetch; 3734 3735 cidx = txq->ift_cidx; 3736 gen = txq->ift_gen; 3737 qsize = txq->ift_size; 3738 mask = qsize-1; 3739 ifsd_m = txq->ift_sds.ifsd_m; 3740 do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); 3741 3742 while (n-- > 0) { 3743 if (do_prefetch) { 3744 prefetch(ifsd_m[(cidx + 3) & mask]); 3745 prefetch(ifsd_m[(cidx + 4) & mask]); 3746 } 3747 if ((m = ifsd_m[cidx]) != NULL) { 3748 prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); 3749 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 3750 bus_dmamap_sync(txq->ift_tso_buf_tag, 3751 txq->ift_sds.ifsd_tso_map[cidx], 3752 BUS_DMASYNC_POSTWRITE); 3753 bus_dmamap_unload(txq->ift_tso_buf_tag, 3754 txq->ift_sds.ifsd_tso_map[cidx]); 3755 } else { 3756 bus_dmamap_sync(txq->ift_buf_tag, 3757 txq->ift_sds.ifsd_map[cidx], 3758 BUS_DMASYNC_POSTWRITE); 3759 bus_dmamap_unload(txq->ift_buf_tag, 3760 txq->ift_sds.ifsd_map[cidx]); 3761 } 3762 /* XXX we don't support any drivers that batch packets yet */ 3763 MPASS(m->m_nextpkt == NULL); 3764 m_freem(m); 3765 ifsd_m[cidx] = NULL; 3766 #if MEMORY_LOGGING 3767 txq->ift_dequeued++; 3768 #endif 3769 DBG_COUNTER_INC(tx_frees); 3770 } 3771 if (__predict_false(++cidx == qsize)) { 3772 cidx = 0; 3773 gen = 0; 3774 } 3775 } 3776 txq->ift_cidx = cidx; 3777 txq->ift_gen = gen; 3778 } 3779 3780 static __inline int 3781 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) 3782 { 3783 int reclaim; 3784 if_ctx_t ctx = txq->ift_ctx; 3785 3786 KASSERT(thresh >= 0, ("invalid threshold to reclaim")); 3787 MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); 3788 3789 /* 3790 * Need a rate-limiting check so that this isn't called every time 3791 */ 3792 iflib_tx_credits_update(ctx, txq); 3793 reclaim = DESC_RECLAIMABLE(txq); 3794 3795 if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { 3796 #ifdef INVARIANTS 3797 if (iflib_verbose_debug) { 3798 printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, 3799 txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, 3800 reclaim, thresh); 3801 } 3802 #endif 3803 return (0); 3804 } 3805 iflib_tx_desc_free(txq, reclaim); 3806 txq->ift_cleaned += reclaim; 3807 txq->ift_in_use -= reclaim; 3808 3809 return (reclaim); 3810 } 3811 3812 static struct mbuf ** 3813 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) 3814 { 3815 int next, size; 3816 struct mbuf **items; 3817 3818 size = r->size; 3819 next = (cidx + CACHE_PTR_INCREMENT) & (size-1); 3820 items = __DEVOLATILE(struct mbuf **, &r->items[0]); 3821 3822 prefetch(items[(cidx + offset) & (size-1)]); 3823 if (remaining > 1) { 3824 prefetch2cachelines(&items[next]); 3825 prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); 3826 prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); 3827 prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); 3828 } 3829 return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); 3830 } 3831 3832 static void 3833 iflib_txq_check_drain(iflib_txq_t txq, int budget) 3834 { 3835 3836 ifmp_ring_check_drainage(txq->ift_br, budget); 3837 } 3838 3839 static uint32_t 3840 iflib_txq_can_drain(struct ifmp_ring *r) 3841 { 3842 iflib_txq_t txq = r->cookie; 3843 if_ctx_t ctx = txq->ift_ctx; 3844 3845 if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) 3846 return (1); 3847 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3848 BUS_DMASYNC_POSTREAD); 3849 return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, 3850 false)); 3851 } 3852 3853 static uint32_t 3854 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3855 { 3856 iflib_txq_t txq = r->cookie; 3857 if_ctx_t ctx = txq->ift_ctx; 3858 if_t ifp = ctx->ifc_ifp; 3859 struct mbuf *m, **mp; 3860 int avail, bytes_sent, skipped, count, err, i; 3861 int mcast_sent, pkt_sent, reclaimed; 3862 bool do_prefetch, rang, ring; 3863 3864 if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || 3865 !LINK_ACTIVE(ctx))) { 3866 DBG_COUNTER_INC(txq_drain_notready); 3867 return (0); 3868 } 3869 reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 3870 rang = iflib_txd_db_check(txq, reclaimed && txq->ift_db_pending); 3871 avail = IDXDIFF(pidx, cidx, r->size); 3872 3873 if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { 3874 /* 3875 * The driver is unloading so we need to free all pending packets. 3876 */ 3877 DBG_COUNTER_INC(txq_drain_flushing); 3878 for (i = 0; i < avail; i++) { 3879 if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) 3880 m_freem(r->items[(cidx + i) & (r->size-1)]); 3881 r->items[(cidx + i) & (r->size-1)] = NULL; 3882 } 3883 return (avail); 3884 } 3885 3886 if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { 3887 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3888 CALLOUT_LOCK(txq); 3889 callout_stop(&txq->ift_timer); 3890 CALLOUT_UNLOCK(txq); 3891 DBG_COUNTER_INC(txq_drain_oactive); 3892 return (0); 3893 } 3894 3895 /* 3896 * If we've reclaimed any packets this queue cannot be hung. 3897 */ 3898 if (reclaimed) 3899 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3900 skipped = mcast_sent = bytes_sent = pkt_sent = 0; 3901 count = MIN(avail, TX_BATCH_SIZE); 3902 #ifdef INVARIANTS 3903 if (iflib_verbose_debug) 3904 printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, 3905 avail, ctx->ifc_flags, TXQ_AVAIL(txq)); 3906 #endif 3907 do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); 3908 err = 0; 3909 for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx) + 2; i++) { 3910 int rem = do_prefetch ? count - i : 0; 3911 3912 mp = _ring_peek_one(r, cidx, i, rem); 3913 MPASS(mp != NULL && *mp != NULL); 3914 3915 /* 3916 * Completion interrupts will use the address of the txq 3917 * as a sentinel to enqueue _something_ in order to acquire 3918 * the lock on the mp_ring (there's no direct lock call). 3919 * We obviously whave to check for these sentinel cases 3920 * and skip them. 3921 */ 3922 if (__predict_false(*mp == (struct mbuf *)txq)) { 3923 skipped++; 3924 continue; 3925 } 3926 err = iflib_encap(txq, mp); 3927 if (__predict_false(err)) { 3928 /* no room - bail out */ 3929 if (err == ENOBUFS) 3930 break; 3931 skipped++; 3932 /* we can't send this packet - skip it */ 3933 continue; 3934 } 3935 pkt_sent++; 3936 m = *mp; 3937 DBG_COUNTER_INC(tx_sent); 3938 bytes_sent += m->m_pkthdr.len; 3939 mcast_sent += !!(m->m_flags & M_MCAST); 3940 3941 if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) 3942 break; 3943 ETHER_BPF_MTAP(ifp, m); 3944 rang = iflib_txd_db_check(txq, false); 3945 } 3946 3947 /* deliberate use of bitwise or to avoid gratuitous short-circuit */ 3948 ring = rang ? false : (iflib_min_tx_latency | err); 3949 iflib_txd_db_check(txq, ring); 3950 if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); 3951 if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); 3952 if (mcast_sent) 3953 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); 3954 #ifdef INVARIANTS 3955 if (iflib_verbose_debug) 3956 printf("consumed=%d\n", skipped + pkt_sent); 3957 #endif 3958 return (skipped + pkt_sent); 3959 } 3960 3961 static uint32_t 3962 iflib_txq_drain_always(struct ifmp_ring *r) 3963 { 3964 return (1); 3965 } 3966 3967 static uint32_t 3968 iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3969 { 3970 int i, avail; 3971 struct mbuf **mp; 3972 iflib_txq_t txq; 3973 3974 txq = r->cookie; 3975 3976 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3977 CALLOUT_LOCK(txq); 3978 callout_stop(&txq->ift_timer); 3979 CALLOUT_UNLOCK(txq); 3980 3981 avail = IDXDIFF(pidx, cidx, r->size); 3982 for (i = 0; i < avail; i++) { 3983 mp = _ring_peek_one(r, cidx, i, avail - i); 3984 if (__predict_false(*mp == (struct mbuf *)txq)) 3985 continue; 3986 m_freem(*mp); 3987 DBG_COUNTER_INC(tx_frees); 3988 } 3989 MPASS(ifmp_ring_is_stalled(r) == 0); 3990 return (avail); 3991 } 3992 3993 static void 3994 iflib_ifmp_purge(iflib_txq_t txq) 3995 { 3996 struct ifmp_ring *r; 3997 3998 r = txq->ift_br; 3999 r->drain = iflib_txq_drain_free; 4000 r->can_drain = iflib_txq_drain_always; 4001 4002 ifmp_ring_check_drainage(r, r->size); 4003 4004 r->drain = iflib_txq_drain; 4005 r->can_drain = iflib_txq_can_drain; 4006 } 4007 4008 static void 4009 _task_fn_tx(void *context) 4010 { 4011 iflib_txq_t txq = context; 4012 if_ctx_t ctx = txq->ift_ctx; 4013 if_t ifp = ctx->ifc_ifp; 4014 int abdicate = ctx->ifc_sysctl_tx_abdicate; 4015 4016 #ifdef IFLIB_DIAGNOSTICS 4017 txq->ift_cpu_exec_count[curcpu]++; 4018 #endif 4019 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 4020 return; 4021 #ifdef DEV_NETMAP 4022 if ((if_getcapenable(ifp) & IFCAP_NETMAP) && 4023 netmap_tx_irq(ifp, txq->ift_id)) 4024 goto skip_ifmp; 4025 #endif 4026 #ifdef ALTQ 4027 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 4028 iflib_altq_if_start(ifp); 4029 #endif 4030 if (txq->ift_db_pending) 4031 ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate); 4032 else if (!abdicate) 4033 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4034 /* 4035 * When abdicating, we always need to check drainage, not just when we don't enqueue 4036 */ 4037 if (abdicate) 4038 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4039 #ifdef DEV_NETMAP 4040 skip_ifmp: 4041 #endif 4042 if (ctx->ifc_flags & IFC_LEGACY) 4043 IFDI_INTR_ENABLE(ctx); 4044 else 4045 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); 4046 } 4047 4048 static void 4049 _task_fn_rx(void *context) 4050 { 4051 iflib_rxq_t rxq = context; 4052 if_ctx_t ctx = rxq->ifr_ctx; 4053 uint8_t more; 4054 uint16_t budget; 4055 #ifdef DEV_NETMAP 4056 u_int work = 0; 4057 int nmirq; 4058 #endif 4059 4060 #ifdef IFLIB_DIAGNOSTICS 4061 rxq->ifr_cpu_exec_count[curcpu]++; 4062 #endif 4063 DBG_COUNTER_INC(task_fn_rxs); 4064 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 4065 return; 4066 #ifdef DEV_NETMAP 4067 nmirq = netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work); 4068 if (nmirq != NM_IRQ_PASS) { 4069 more = (nmirq == NM_IRQ_RESCHED) ? IFLIB_RXEOF_MORE : 0; 4070 goto skip_rxeof; 4071 } 4072 #endif 4073 budget = ctx->ifc_sysctl_rx_budget; 4074 if (budget == 0) 4075 budget = 16; /* XXX */ 4076 more = iflib_rxeof(rxq, budget); 4077 #ifdef DEV_NETMAP 4078 skip_rxeof: 4079 #endif 4080 if ((more & IFLIB_RXEOF_MORE) == 0) { 4081 if (ctx->ifc_flags & IFC_LEGACY) 4082 IFDI_INTR_ENABLE(ctx); 4083 else 4084 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 4085 DBG_COUNTER_INC(rx_intr_enables); 4086 } 4087 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 4088 return; 4089 4090 if (more & IFLIB_RXEOF_MORE) 4091 GROUPTASK_ENQUEUE(&rxq->ifr_task); 4092 else if (more & IFLIB_RXEOF_EMPTY) 4093 #ifndef __HAIKU__ 4094 callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); 4095 #else 4096 callout_reset(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); 4097 #endif 4098 } 4099 4100 static void 4101 _task_fn_admin(void *context) 4102 { 4103 if_ctx_t ctx = context; 4104 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 4105 iflib_txq_t txq; 4106 int i; 4107 bool oactive, running, do_reset, do_watchdog, in_detach; 4108 4109 STATE_LOCK(ctx); 4110 running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); 4111 oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE); 4112 do_reset = (ctx->ifc_flags & IFC_DO_RESET); 4113 do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG); 4114 in_detach = (ctx->ifc_flags & IFC_IN_DETACH); 4115 ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG); 4116 STATE_UNLOCK(ctx); 4117 4118 if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) 4119 return; 4120 if (in_detach) 4121 return; 4122 4123 CTX_LOCK(ctx); 4124 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 4125 CALLOUT_LOCK(txq); 4126 callout_stop(&txq->ift_timer); 4127 CALLOUT_UNLOCK(txq); 4128 } 4129 if (do_watchdog) { 4130 ctx->ifc_watchdog_events++; 4131 IFDI_WATCHDOG_RESET(ctx); 4132 } 4133 IFDI_UPDATE_ADMIN_STATUS(ctx); 4134 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 4135 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, 4136 txq->ift_timer.c_cpu); 4137 } 4138 IFDI_LINK_INTR_ENABLE(ctx); 4139 if (do_reset) 4140 iflib_if_init_locked(ctx); 4141 CTX_UNLOCK(ctx); 4142 4143 if (LINK_ACTIVE(ctx) == 0) 4144 return; 4145 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) 4146 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 4147 } 4148 4149 static void 4150 _task_fn_iov(void *context) 4151 { 4152 if_ctx_t ctx = context; 4153 4154 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) && 4155 !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) 4156 return; 4157 4158 CTX_LOCK(ctx); 4159 IFDI_VFLR_HANDLE(ctx); 4160 CTX_UNLOCK(ctx); 4161 } 4162 4163 static int 4164 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 4165 { 4166 int err; 4167 if_int_delay_info_t info; 4168 if_ctx_t ctx; 4169 4170 info = (if_int_delay_info_t)arg1; 4171 ctx = info->iidi_ctx; 4172 info->iidi_req = req; 4173 info->iidi_oidp = oidp; 4174 CTX_LOCK(ctx); 4175 err = IFDI_SYSCTL_INT_DELAY(ctx, info); 4176 CTX_UNLOCK(ctx); 4177 return (err); 4178 } 4179 4180 /********************************************************************* 4181 * 4182 * IFNET FUNCTIONS 4183 * 4184 **********************************************************************/ 4185 4186 static void 4187 iflib_if_init_locked(if_ctx_t ctx) 4188 { 4189 iflib_stop(ctx); 4190 iflib_init_locked(ctx); 4191 } 4192 4193 static void 4194 iflib_if_init(void *arg) 4195 { 4196 if_ctx_t ctx = arg; 4197 4198 CTX_LOCK(ctx); 4199 iflib_if_init_locked(ctx); 4200 CTX_UNLOCK(ctx); 4201 } 4202 4203 static int 4204 iflib_if_transmit(if_t ifp, struct mbuf *m) 4205 { 4206 if_ctx_t ctx = if_getsoftc(ifp); 4207 4208 iflib_txq_t txq; 4209 int err, qidx; 4210 int abdicate = ctx->ifc_sysctl_tx_abdicate; 4211 4212 if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { 4213 DBG_COUNTER_INC(tx_frees); 4214 m_freem(m); 4215 return (ENETDOWN); 4216 } 4217 4218 MPASS(m->m_nextpkt == NULL); 4219 /* ALTQ-enabled interfaces always use queue 0. */ 4220 qidx = 0; 4221 if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) 4222 qidx = QIDX(ctx, m); 4223 /* 4224 * XXX calculate buf_ring based on flowid (divvy up bits?) 4225 */ 4226 txq = &ctx->ifc_txqs[qidx]; 4227 4228 #ifdef DRIVER_BACKPRESSURE 4229 if (txq->ift_closed) { 4230 while (m != NULL) { 4231 next = m->m_nextpkt; 4232 m->m_nextpkt = NULL; 4233 m_freem(m); 4234 DBG_COUNTER_INC(tx_frees); 4235 m = next; 4236 } 4237 return (ENOBUFS); 4238 } 4239 #endif 4240 #ifdef notyet 4241 qidx = count = 0; 4242 mp = marr; 4243 next = m; 4244 do { 4245 count++; 4246 next = next->m_nextpkt; 4247 } while (next != NULL); 4248 4249 if (count > nitems(marr)) 4250 if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { 4251 /* XXX check nextpkt */ 4252 m_freem(m); 4253 /* XXX simplify for now */ 4254 DBG_COUNTER_INC(tx_frees); 4255 return (ENOBUFS); 4256 } 4257 for (next = m, i = 0; next != NULL; i++) { 4258 mp[i] = next; 4259 next = next->m_nextpkt; 4260 mp[i]->m_nextpkt = NULL; 4261 } 4262 #endif 4263 DBG_COUNTER_INC(tx_seen); 4264 err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate); 4265 4266 if (abdicate) 4267 GROUPTASK_ENQUEUE(&txq->ift_task); 4268 if (err) { 4269 if (!abdicate) 4270 GROUPTASK_ENQUEUE(&txq->ift_task); 4271 /* support forthcoming later */ 4272 #ifdef DRIVER_BACKPRESSURE 4273 txq->ift_closed = TRUE; 4274 #endif 4275 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4276 m_freem(m); 4277 DBG_COUNTER_INC(tx_frees); 4278 } 4279 4280 return (err); 4281 } 4282 4283 #ifdef ALTQ 4284 /* 4285 * The overall approach to integrating iflib with ALTQ is to continue to use 4286 * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware 4287 * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring 4288 * is redundant/unnecessary, but doing so minimizes the amount of 4289 * ALTQ-specific code required in iflib. It is assumed that the overhead of 4290 * redundantly queueing to an intermediate mp_ring is swamped by the 4291 * performance limitations inherent in using ALTQ. 4292 * 4293 * When ALTQ support is compiled in, all iflib drivers will use a transmit 4294 * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the 4295 * given interface. If ALTQ is enabled for an interface, then all 4296 * transmitted packets for that interface will be submitted to the ALTQ 4297 * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit() 4298 * implementation because it uses IFQ_HANDOFF(), which will duplicatively 4299 * update stats that the iflib machinery handles, and which is sensitve to 4300 * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start() 4301 * will be installed as the start routine for use by ALTQ facilities that 4302 * need to trigger queue drains on a scheduled basis. 4303 * 4304 */ 4305 static void 4306 iflib_altq_if_start(if_t ifp) 4307 { 4308 struct ifaltq *ifq = &ifp->if_snd; 4309 struct mbuf *m; 4310 4311 IFQ_LOCK(ifq); 4312 IFQ_DEQUEUE_NOLOCK(ifq, m); 4313 while (m != NULL) { 4314 iflib_if_transmit(ifp, m); 4315 IFQ_DEQUEUE_NOLOCK(ifq, m); 4316 } 4317 IFQ_UNLOCK(ifq); 4318 } 4319 4320 static int 4321 iflib_altq_if_transmit(if_t ifp, struct mbuf *m) 4322 { 4323 int err; 4324 4325 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 4326 IFQ_ENQUEUE(&ifp->if_snd, m, err); 4327 if (err == 0) 4328 iflib_altq_if_start(ifp); 4329 } else 4330 err = iflib_if_transmit(ifp, m); 4331 4332 return (err); 4333 } 4334 #endif /* ALTQ */ 4335 4336 static void 4337 iflib_if_qflush(if_t ifp) 4338 { 4339 if_ctx_t ctx = if_getsoftc(ifp); 4340 iflib_txq_t txq = ctx->ifc_txqs; 4341 int i; 4342 4343 STATE_LOCK(ctx); 4344 ctx->ifc_flags |= IFC_QFLUSH; 4345 STATE_UNLOCK(ctx); 4346 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 4347 while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) 4348 iflib_txq_check_drain(txq, 0); 4349 STATE_LOCK(ctx); 4350 ctx->ifc_flags &= ~IFC_QFLUSH; 4351 STATE_UNLOCK(ctx); 4352 4353 /* 4354 * When ALTQ is enabled, this will also take care of purging the 4355 * ALTQ queue(s). 4356 */ 4357 if_qflush(ifp); 4358 } 4359 4360 #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ 4361 IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ 4362 IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \ 4363 IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG) 4364 4365 static int 4366 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) 4367 { 4368 if_ctx_t ctx = if_getsoftc(ifp); 4369 struct ifreq *ifr = (struct ifreq *)data; 4370 #if defined(INET) || defined(INET6) 4371 struct ifaddr *ifa = (struct ifaddr *)data; 4372 #endif 4373 bool avoid_reset = false; 4374 int err = 0, reinit = 0, bits; 4375 4376 switch (command) { 4377 case SIOCSIFADDR: 4378 #ifdef INET 4379 if (ifa->ifa_addr->sa_family == AF_INET) 4380 avoid_reset = true; 4381 #endif 4382 #ifdef INET6 4383 if (ifa->ifa_addr->sa_family == AF_INET6) 4384 avoid_reset = true; 4385 #endif 4386 /* 4387 ** Calling init results in link renegotiation, 4388 ** so we avoid doing it when possible. 4389 */ 4390 if (avoid_reset) { 4391 if_setflagbits(ifp, IFF_UP,0); 4392 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 4393 reinit = 1; 4394 #ifdef INET 4395 if (!(if_getflags(ifp) & IFF_NOARP)) 4396 arp_ifinit(ifp, ifa); 4397 #endif 4398 } else 4399 err = ether_ioctl(ifp, command, data); 4400 break; 4401 case SIOCSIFMTU: 4402 CTX_LOCK(ctx); 4403 if (ifr->ifr_mtu == if_getmtu(ifp)) { 4404 CTX_UNLOCK(ctx); 4405 break; 4406 } 4407 bits = if_getdrvflags(ifp); 4408 /* stop the driver and free any clusters before proceeding */ 4409 iflib_stop(ctx); 4410 4411 if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { 4412 STATE_LOCK(ctx); 4413 if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) 4414 ctx->ifc_flags |= IFC_MULTISEG; 4415 else 4416 ctx->ifc_flags &= ~IFC_MULTISEG; 4417 STATE_UNLOCK(ctx); 4418 err = if_setmtu(ifp, ifr->ifr_mtu); 4419 } 4420 iflib_init_locked(ctx); 4421 STATE_LOCK(ctx); 4422 if_setdrvflags(ifp, bits); 4423 STATE_UNLOCK(ctx); 4424 CTX_UNLOCK(ctx); 4425 break; 4426 case SIOCSIFFLAGS: 4427 CTX_LOCK(ctx); 4428 if (if_getflags(ifp) & IFF_UP) { 4429 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4430 if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & 4431 (IFF_PROMISC | IFF_ALLMULTI)) { 4432 CTX_UNLOCK(ctx); 4433 err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); 4434 CTX_LOCK(ctx); 4435 } 4436 } else 4437 reinit = 1; 4438 } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4439 iflib_stop(ctx); 4440 } 4441 ctx->ifc_if_flags = if_getflags(ifp); 4442 CTX_UNLOCK(ctx); 4443 break; 4444 case SIOCADDMULTI: 4445 case SIOCDELMULTI: 4446 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4447 CTX_LOCK(ctx); 4448 IFDI_INTR_DISABLE(ctx); 4449 IFDI_MULTI_SET(ctx); 4450 IFDI_INTR_ENABLE(ctx); 4451 CTX_UNLOCK(ctx); 4452 } 4453 break; 4454 case SIOCSIFMEDIA: 4455 CTX_LOCK(ctx); 4456 IFDI_MEDIA_SET(ctx); 4457 CTX_UNLOCK(ctx); 4458 /* FALLTHROUGH */ 4459 case SIOCGIFMEDIA: 4460 #ifndef __HAIKU__ 4461 case SIOCGIFXMEDIA: 4462 #endif 4463 err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); 4464 break; 4465 #ifndef __HAIKU__ 4466 case SIOCGI2C: 4467 { 4468 struct ifi2creq i2c; 4469 4470 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4471 if (err != 0) 4472 break; 4473 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { 4474 err = EINVAL; 4475 break; 4476 } 4477 if (i2c.len > sizeof(i2c.data)) { 4478 err = EINVAL; 4479 break; 4480 } 4481 4482 if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) 4483 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4484 sizeof(i2c)); 4485 break; 4486 } 4487 #endif 4488 case SIOCSIFCAP: 4489 { 4490 int mask, setmask, oldmask; 4491 4492 oldmask = if_getcapenable(ifp); 4493 mask = ifr->ifr_reqcap ^ oldmask; 4494 mask &= ctx->ifc_softc_ctx.isc_capabilities | IFCAP_MEXTPG; 4495 setmask = 0; 4496 #ifdef TCP_OFFLOAD 4497 setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); 4498 #endif 4499 setmask |= (mask & IFCAP_FLAGS); 4500 setmask |= (mask & IFCAP_WOL); 4501 4502 /* 4503 * If any RX csum has changed, change all the ones that 4504 * are supported by the driver. 4505 */ 4506 if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 4507 setmask |= ctx->ifc_softc_ctx.isc_capabilities & 4508 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); 4509 } 4510 4511 /* 4512 * want to ensure that traffic has stopped before we change any of the flags 4513 */ 4514 if (setmask) { 4515 CTX_LOCK(ctx); 4516 bits = if_getdrvflags(ifp); 4517 if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) 4518 iflib_stop(ctx); 4519 STATE_LOCK(ctx); 4520 if_togglecapenable(ifp, setmask); 4521 STATE_UNLOCK(ctx); 4522 if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) 4523 iflib_init_locked(ctx); 4524 STATE_LOCK(ctx); 4525 if_setdrvflags(ifp, bits); 4526 STATE_UNLOCK(ctx); 4527 CTX_UNLOCK(ctx); 4528 } 4529 if_vlancap(ifp); 4530 break; 4531 } 4532 case SIOCGPRIVATE_0: 4533 case SIOCSDRVSPEC: 4534 case SIOCGDRVSPEC: 4535 CTX_LOCK(ctx); 4536 err = IFDI_PRIV_IOCTL(ctx, command, data); 4537 CTX_UNLOCK(ctx); 4538 break; 4539 default: 4540 err = ether_ioctl(ifp, command, data); 4541 break; 4542 } 4543 if (reinit) 4544 iflib_if_init(ctx); 4545 return (err); 4546 } 4547 4548 static uint64_t 4549 iflib_if_get_counter(if_t ifp, ift_counter cnt) 4550 { 4551 if_ctx_t ctx = if_getsoftc(ifp); 4552 4553 return (IFDI_GET_COUNTER(ctx, cnt)); 4554 } 4555 4556 /********************************************************************* 4557 * 4558 * OTHER FUNCTIONS EXPORTED TO THE STACK 4559 * 4560 **********************************************************************/ 4561 4562 static void 4563 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) 4564 { 4565 if_ctx_t ctx = if_getsoftc(ifp); 4566 4567 if ((void *)ctx != arg) 4568 return; 4569 4570 if ((vtag == 0) || (vtag > 4095)) 4571 return; 4572 4573 if (iflib_in_detach(ctx)) 4574 return; 4575 4576 CTX_LOCK(ctx); 4577 /* Driver may need all untagged packets to be flushed */ 4578 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4579 iflib_stop(ctx); 4580 IFDI_VLAN_REGISTER(ctx, vtag); 4581 /* Re-init to load the changes, if required */ 4582 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4583 iflib_init_locked(ctx); 4584 CTX_UNLOCK(ctx); 4585 } 4586 4587 static void 4588 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) 4589 { 4590 if_ctx_t ctx = if_getsoftc(ifp); 4591 4592 if ((void *)ctx != arg) 4593 return; 4594 4595 if ((vtag == 0) || (vtag > 4095)) 4596 return; 4597 4598 CTX_LOCK(ctx); 4599 /* Driver may need all tagged packets to be flushed */ 4600 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4601 iflib_stop(ctx); 4602 IFDI_VLAN_UNREGISTER(ctx, vtag); 4603 /* Re-init to load the changes, if required */ 4604 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4605 iflib_init_locked(ctx); 4606 CTX_UNLOCK(ctx); 4607 } 4608 4609 static void 4610 iflib_led_func(void *arg, int onoff) 4611 { 4612 if_ctx_t ctx = arg; 4613 4614 CTX_LOCK(ctx); 4615 IFDI_LED_FUNC(ctx, onoff); 4616 CTX_UNLOCK(ctx); 4617 } 4618 4619 /********************************************************************* 4620 * 4621 * BUS FUNCTION DEFINITIONS 4622 * 4623 **********************************************************************/ 4624 4625 int 4626 iflib_device_probe(device_t dev) 4627 { 4628 const pci_vendor_info_t *ent; 4629 if_shared_ctx_t sctx; 4630 uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id; 4631 uint16_t pci_vendor_id; 4632 4633 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 4634 return (ENOTSUP); 4635 4636 pci_vendor_id = pci_get_vendor(dev); 4637 pci_device_id = pci_get_device(dev); 4638 pci_subvendor_id = pci_get_subvendor(dev); 4639 pci_subdevice_id = pci_get_subdevice(dev); 4640 pci_rev_id = pci_get_revid(dev); 4641 if (sctx->isc_parse_devinfo != NULL) 4642 sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); 4643 4644 ent = sctx->isc_vendor_info; 4645 while (ent->pvi_vendor_id != 0) { 4646 if (pci_vendor_id != ent->pvi_vendor_id) { 4647 ent++; 4648 continue; 4649 } 4650 if ((pci_device_id == ent->pvi_device_id) && 4651 ((pci_subvendor_id == ent->pvi_subvendor_id) || 4652 (ent->pvi_subvendor_id == 0)) && 4653 ((pci_subdevice_id == ent->pvi_subdevice_id) || 4654 (ent->pvi_subdevice_id == 0)) && 4655 ((pci_rev_id == ent->pvi_rev_id) || 4656 (ent->pvi_rev_id == 0))) { 4657 device_set_desc_copy(dev, ent->pvi_name); 4658 /* this needs to be changed to zero if the bus probing code 4659 * ever stops re-probing on best match because the sctx 4660 * may have its values over written by register calls 4661 * in subsequent probes 4662 */ 4663 return (BUS_PROBE_DEFAULT); 4664 } 4665 ent++; 4666 } 4667 return (ENXIO); 4668 } 4669 4670 int 4671 iflib_device_probe_vendor(device_t dev) 4672 { 4673 int probe; 4674 4675 probe = iflib_device_probe(dev); 4676 #ifndef __HAIKU__ 4677 if (probe == BUS_PROBE_DEFAULT) 4678 return (BUS_PROBE_VENDOR); 4679 else 4680 #endif 4681 return (probe); 4682 } 4683 4684 static void 4685 iflib_reset_qvalues(if_ctx_t ctx) 4686 { 4687 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4688 if_shared_ctx_t sctx = ctx->ifc_sctx; 4689 device_t dev = ctx->ifc_dev; 4690 int i; 4691 4692 if (ctx->ifc_sysctl_ntxqs != 0) 4693 scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; 4694 if (ctx->ifc_sysctl_nrxqs != 0) 4695 scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; 4696 4697 for (i = 0; i < sctx->isc_ntxqs; i++) { 4698 if (ctx->ifc_sysctl_ntxds[i] != 0) 4699 scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; 4700 else 4701 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4702 } 4703 4704 for (i = 0; i < sctx->isc_nrxqs; i++) { 4705 if (ctx->ifc_sysctl_nrxds[i] != 0) 4706 scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; 4707 else 4708 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4709 } 4710 4711 for (i = 0; i < sctx->isc_nrxqs; i++) { 4712 if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { 4713 device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", 4714 i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); 4715 scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; 4716 } 4717 if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { 4718 device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", 4719 i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); 4720 scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; 4721 } 4722 if (!powerof2(scctx->isc_nrxd[i])) { 4723 device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n", 4724 i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]); 4725 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4726 } 4727 } 4728 4729 for (i = 0; i < sctx->isc_ntxqs; i++) { 4730 if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { 4731 device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", 4732 i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); 4733 scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; 4734 } 4735 if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { 4736 device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", 4737 i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); 4738 scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; 4739 } 4740 if (!powerof2(scctx->isc_ntxd[i])) { 4741 device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n", 4742 i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]); 4743 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4744 } 4745 } 4746 } 4747 4748 static void 4749 iflib_add_pfil(if_ctx_t ctx) 4750 { 4751 #ifndef __HAIKU__ 4752 struct pfil_head *pfil; 4753 struct pfil_head_args pa; 4754 iflib_rxq_t rxq; 4755 int i; 4756 4757 pa.pa_version = PFIL_VERSION; 4758 pa.pa_flags = PFIL_IN; 4759 pa.pa_type = PFIL_TYPE_ETHERNET; 4760 pa.pa_headname = ctx->ifc_ifp->if_xname; 4761 pfil = pfil_head_register(&pa); 4762 4763 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 4764 rxq->pfil = pfil; 4765 } 4766 #endif 4767 } 4768 4769 static void 4770 iflib_rem_pfil(if_ctx_t ctx) 4771 { 4772 #ifndef __HAIKU__ 4773 struct pfil_head *pfil; 4774 iflib_rxq_t rxq; 4775 int i; 4776 4777 rxq = ctx->ifc_rxqs; 4778 pfil = rxq->pfil; 4779 for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { 4780 rxq->pfil = NULL; 4781 } 4782 pfil_head_unregister(pfil); 4783 #endif 4784 } 4785 4786 4787 #ifndef __HAIKU__ 4788 /* 4789 * Advance forward by n members of the cpuset ctx->ifc_cpus starting from 4790 * cpuid and wrapping as necessary. 4791 */ 4792 static unsigned int 4793 cpuid_advance(if_ctx_t ctx, unsigned int cpuid, unsigned int n) 4794 { 4795 unsigned int first_valid; 4796 unsigned int last_valid; 4797 4798 /* cpuid should always be in the valid set */ 4799 MPASS(CPU_ISSET(cpuid, &ctx->ifc_cpus)); 4800 4801 /* valid set should never be empty */ 4802 MPASS(!CPU_EMPTY(&ctx->ifc_cpus)); 4803 4804 first_valid = CPU_FFS(&ctx->ifc_cpus) - 1; 4805 last_valid = CPU_FLS(&ctx->ifc_cpus) - 1; 4806 n = n % CPU_COUNT(&ctx->ifc_cpus); 4807 while (n > 0) { 4808 do { 4809 cpuid++; 4810 if (cpuid > last_valid) 4811 cpuid = first_valid; 4812 } while (!CPU_ISSET(cpuid, &ctx->ifc_cpus)); 4813 n--; 4814 } 4815 4816 return (cpuid); 4817 } 4818 #endif 4819 4820 #if defined(SMP) && defined(SCHED_ULE) 4821 extern struct cpu_group *cpu_top; /* CPU topology */ 4822 4823 static int 4824 find_child_with_core(int cpu, struct cpu_group *grp) 4825 { 4826 int i; 4827 4828 if (grp->cg_children == 0) 4829 return -1; 4830 4831 MPASS(grp->cg_child); 4832 for (i = 0; i < grp->cg_children; i++) { 4833 if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) 4834 return i; 4835 } 4836 4837 return -1; 4838 } 4839 4840 4841 /* 4842 * Find an L2 neighbor of the given CPU or return -1 if none found. This 4843 * does not distinguish among multiple L2 neighbors if the given CPU has 4844 * more than one (it will always return the same result in that case). 4845 */ 4846 static int 4847 find_l2_neighbor(int cpu) 4848 { 4849 struct cpu_group *grp; 4850 int i; 4851 4852 grp = cpu_top; 4853 if (grp == NULL) 4854 return -1; 4855 4856 /* 4857 * Find the smallest CPU group that contains the given core. 4858 */ 4859 i = 0; 4860 while ((i = find_child_with_core(cpu, grp)) != -1) { 4861 /* 4862 * If the smallest group containing the given CPU has less 4863 * than two members, we conclude the given CPU has no 4864 * L2 neighbor. 4865 */ 4866 if (grp->cg_child[i].cg_count <= 1) 4867 return (-1); 4868 grp = &grp->cg_child[i]; 4869 } 4870 4871 /* Must share L2. */ 4872 if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) 4873 return -1; 4874 4875 /* 4876 * Select the first member of the set that isn't the reference 4877 * CPU, which at this point is guaranteed to exist. 4878 */ 4879 for (i = 0; i < CPU_SETSIZE; i++) { 4880 if (CPU_ISSET(i, &grp->cg_mask) && i != cpu) 4881 return (i); 4882 } 4883 4884 /* Should never be reached */ 4885 return (-1); 4886 } 4887 4888 #else 4889 static int 4890 find_l2_neighbor(int cpu) 4891 { 4892 4893 return (-1); 4894 } 4895 #endif 4896 4897 #ifndef __HAIKU__ 4898 /* 4899 * CPU mapping behaviors 4900 * --------------------- 4901 * 'separate txrx' refers to the separate_txrx sysctl 4902 * 'use logical' refers to the use_logical_cores sysctl 4903 * 'INTR CPUS' indicates whether bus_get_cpus(INTR_CPUS) succeeded 4904 * 4905 * separate use INTR 4906 * txrx logical CPUS result 4907 * ---------- --------- ------ ------------------------------------------------ 4908 * - - X RX and TX queues mapped to consecutive physical 4909 * cores with RX/TX pairs on same core and excess 4910 * of either following 4911 * - X X RX and TX queues mapped to consecutive cores 4912 * of any type with RX/TX pairs on same core and 4913 * excess of either following 4914 * X - X RX and TX queues mapped to consecutive physical 4915 * cores; all RX then all TX 4916 * X X X RX queues mapped to consecutive physical cores 4917 * first, then TX queues mapped to L2 neighbor of 4918 * the corresponding RX queue if one exists, 4919 * otherwise to consecutive physical cores 4920 * - n/a - RX and TX queues mapped to consecutive cores of 4921 * any type with RX/TX pairs on same core and excess 4922 * of either following 4923 * X n/a - RX and TX queues mapped to consecutive cores of 4924 * any type; all RX then all TX 4925 */ 4926 static unsigned int 4927 get_cpuid_for_queue(if_ctx_t ctx, unsigned int base_cpuid, unsigned int qid, 4928 bool is_tx) 4929 { 4930 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4931 unsigned int core_index; 4932 4933 if (ctx->ifc_sysctl_separate_txrx) { 4934 /* 4935 * When using separate CPUs for TX and RX, the assignment 4936 * will always be of a consecutive CPU out of the set of 4937 * context CPUs, except for the specific case where the 4938 * context CPUs are phsyical cores, the use of logical cores 4939 * has been enabled, the assignment is for TX, the TX qid 4940 * corresponds to an RX qid, and the CPU assigned to the 4941 * corresponding RX queue has an L2 neighbor. 4942 */ 4943 if (ctx->ifc_sysctl_use_logical_cores && 4944 ctx->ifc_cpus_are_physical_cores && 4945 is_tx && qid < scctx->isc_nrxqsets) { 4946 int l2_neighbor; 4947 unsigned int rx_cpuid; 4948 4949 rx_cpuid = cpuid_advance(ctx, base_cpuid, qid); 4950 l2_neighbor = find_l2_neighbor(rx_cpuid); 4951 if (l2_neighbor != -1) { 4952 return (l2_neighbor); 4953 } 4954 /* 4955 * ... else fall through to the normal 4956 * consecutive-after-RX assignment scheme. 4957 * 4958 * Note that we are assuming that all RX queue CPUs 4959 * have an L2 neighbor, or all do not. If a mixed 4960 * scenario is possible, we will have to keep track 4961 * separately of how many queues prior to this one 4962 * were not able to be assigned to an L2 neighbor. 4963 */ 4964 } 4965 if (is_tx) 4966 core_index = scctx->isc_nrxqsets + qid; 4967 else 4968 core_index = qid; 4969 } else { 4970 core_index = qid; 4971 } 4972 4973 return (cpuid_advance(ctx, base_cpuid, core_index)); 4974 } 4975 #else 4976 #define get_cpuid_for_queue(...) CPU_FIRST() 4977 #endif 4978 4979 static uint16_t 4980 get_ctx_core_offset(if_ctx_t ctx) 4981 { 4982 #ifndef __HAIKU__ 4983 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4984 struct cpu_offset *op; 4985 cpuset_t assigned_cpus; 4986 unsigned int cores_consumed; 4987 unsigned int base_cpuid = ctx->ifc_sysctl_core_offset; 4988 unsigned int first_valid; 4989 unsigned int last_valid; 4990 unsigned int i; 4991 4992 first_valid = CPU_FFS(&ctx->ifc_cpus) - 1; 4993 last_valid = CPU_FLS(&ctx->ifc_cpus) - 1; 4994 4995 if (base_cpuid != CORE_OFFSET_UNSPECIFIED) { 4996 /* 4997 * Align the user-chosen base CPU ID to the next valid CPU 4998 * for this device. If the chosen base CPU ID is smaller 4999 * than the first valid CPU or larger than the last valid 5000 * CPU, we assume the user does not know what the valid 5001 * range is for this device and is thinking in terms of a 5002 * zero-based reference frame, and so we shift the given 5003 * value into the valid range (and wrap accordingly) so the 5004 * intent is translated to the proper frame of reference. 5005 * If the base CPU ID is within the valid first/last, but 5006 * does not correspond to a valid CPU, it is advanced to the 5007 * next valid CPU (wrapping if necessary). 5008 */ 5009 if (base_cpuid < first_valid || base_cpuid > last_valid) { 5010 /* shift from zero-based to first_valid-based */ 5011 base_cpuid += first_valid; 5012 /* wrap to range [first_valid, last_valid] */ 5013 base_cpuid = (base_cpuid - first_valid) % 5014 (last_valid - first_valid + 1); 5015 } 5016 if (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) { 5017 /* 5018 * base_cpuid is in [first_valid, last_valid], but 5019 * not a member of the valid set. In this case, 5020 * there will always be a member of the valid set 5021 * with a CPU ID that is greater than base_cpuid, 5022 * and we simply advance to it. 5023 */ 5024 while (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) 5025 base_cpuid++; 5026 } 5027 return (base_cpuid); 5028 } 5029 5030 /* 5031 * Determine how many cores will be consumed by performing the CPU 5032 * assignments and counting how many of the assigned CPUs correspond 5033 * to CPUs in the set of context CPUs. This is done using the CPU 5034 * ID first_valid as the base CPU ID, as the base CPU must be within 5035 * the set of context CPUs. 5036 * 5037 * Note not all assigned CPUs will be in the set of context CPUs 5038 * when separate CPUs are being allocated to TX and RX queues, 5039 * assignment to logical cores has been enabled, the set of context 5040 * CPUs contains only physical CPUs, and TX queues are mapped to L2 5041 * neighbors of CPUs that RX queues have been mapped to - in this 5042 * case we do only want to count how many CPUs in the set of context 5043 * CPUs have been consumed, as that determines the next CPU in that 5044 * set to start allocating at for the next device for which 5045 * core_offset is not set. 5046 */ 5047 CPU_ZERO(&assigned_cpus); 5048 for (i = 0; i < scctx->isc_ntxqsets; i++) 5049 CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, true), 5050 &assigned_cpus); 5051 for (i = 0; i < scctx->isc_nrxqsets; i++) 5052 CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, false), 5053 &assigned_cpus); 5054 CPU_AND(&assigned_cpus, &ctx->ifc_cpus); 5055 cores_consumed = CPU_COUNT(&assigned_cpus); 5056 5057 mtx_lock(&cpu_offset_mtx); 5058 SLIST_FOREACH(op, &cpu_offsets, entries) { 5059 if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { 5060 base_cpuid = op->next_cpuid; 5061 op->next_cpuid = cpuid_advance(ctx, op->next_cpuid, 5062 cores_consumed); 5063 MPASS(op->refcount < UINT_MAX); 5064 op->refcount++; 5065 break; 5066 } 5067 } 5068 if (base_cpuid == CORE_OFFSET_UNSPECIFIED) { 5069 base_cpuid = first_valid; 5070 op = malloc(sizeof(struct cpu_offset), M_IFLIB, 5071 M_NOWAIT | M_ZERO); 5072 if (op == NULL) { 5073 device_printf(ctx->ifc_dev, 5074 "allocation for cpu offset failed.\n"); 5075 } else { 5076 op->next_cpuid = cpuid_advance(ctx, base_cpuid, 5077 cores_consumed); 5078 op->refcount = 1; 5079 CPU_COPY(&ctx->ifc_cpus, &op->set); 5080 SLIST_INSERT_HEAD(&cpu_offsets, op, entries); 5081 } 5082 } 5083 mtx_unlock(&cpu_offset_mtx); 5084 5085 return (base_cpuid); 5086 #else 5087 return 0; 5088 #endif 5089 } 5090 5091 static void 5092 unref_ctx_core_offset(if_ctx_t ctx) 5093 { 5094 #ifndef __HAIKU__ 5095 struct cpu_offset *op, *top; 5096 5097 mtx_lock(&cpu_offset_mtx); 5098 SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { 5099 if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { 5100 MPASS(op->refcount > 0); 5101 op->refcount--; 5102 if (op->refcount == 0) { 5103 SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); 5104 free(op, M_IFLIB); 5105 } 5106 break; 5107 } 5108 } 5109 mtx_unlock(&cpu_offset_mtx); 5110 #endif 5111 } 5112 5113 int 5114 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) 5115 { 5116 if_ctx_t ctx; 5117 if_t ifp; 5118 if_softc_ctx_t scctx; 5119 kobjop_desc_t kobj_desc; 5120 kobj_method_t *kobj_method; 5121 int err, msix, rid; 5122 int num_txd, num_rxd; 5123 5124 ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); 5125 5126 if (sc == NULL) { 5127 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 5128 device_set_softc(dev, ctx); 5129 ctx->ifc_flags |= IFC_SC_ALLOCATED; 5130 } 5131 5132 ctx->ifc_sctx = sctx; 5133 ctx->ifc_dev = dev; 5134 ctx->ifc_softc = sc; 5135 5136 if ((err = iflib_register(ctx)) != 0) { 5137 device_printf(dev, "iflib_register failed %d\n", err); 5138 goto fail_ctx_free; 5139 } 5140 iflib_add_device_sysctl_pre(ctx); 5141 5142 scctx = &ctx->ifc_softc_ctx; 5143 ifp = ctx->ifc_ifp; 5144 5145 iflib_reset_qvalues(ctx); 5146 CTX_LOCK(ctx); 5147 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 5148 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 5149 goto fail_unlock; 5150 } 5151 _iflib_pre_assert(scctx); 5152 ctx->ifc_txrx = *scctx->isc_txrx; 5153 5154 if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) 5155 ctx->ifc_mediap = scctx->isc_media; 5156 5157 #ifdef INVARIANTS 5158 if (scctx->isc_capabilities & IFCAP_TXCSUM) 5159 MPASS(scctx->isc_tx_csum_flags); 5160 #endif 5161 5162 if_setcapabilities(ifp, 5163 scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_MEXTPG); 5164 if_setcapenable(ifp, 5165 scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_MEXTPG); 5166 5167 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 5168 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 5169 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 5170 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 5171 5172 num_txd = iflib_num_tx_descs(ctx); 5173 num_rxd = iflib_num_rx_descs(ctx); 5174 5175 /* XXX change for per-queue sizes */ 5176 device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", 5177 num_txd, num_rxd); 5178 5179 if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) 5180 scctx->isc_tx_nsegments = max(1, num_txd / 5181 MAX_SINGLE_PACKET_FRACTION); 5182 if (scctx->isc_tx_tso_segments_max > num_txd / 5183 MAX_SINGLE_PACKET_FRACTION) 5184 scctx->isc_tx_tso_segments_max = max(1, 5185 num_txd / MAX_SINGLE_PACKET_FRACTION); 5186 5187 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 5188 if (if_getcapabilities(ifp) & IFCAP_TSO) { 5189 #ifndef __HAIKU__ 5190 /* 5191 * The stack can't handle a TSO size larger than IP_MAXPACKET, 5192 * but some MACs do. 5193 */ 5194 if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, 5195 IP_MAXPACKET)); 5196 /* 5197 * Take maximum number of m_pullup(9)'s in iflib_parse_header() 5198 * into account. In the worst case, each of these calls will 5199 * add another mbuf and, thus, the requirement for another DMA 5200 * segment. So for best performance, it doesn't make sense to 5201 * advertize a maximum of TSO segments that typically will 5202 * require defragmentation in iflib_encap(). 5203 */ 5204 if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); 5205 if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); 5206 #endif 5207 } 5208 if (scctx->isc_rss_table_size == 0) 5209 scctx->isc_rss_table_size = 64; 5210 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 5211 5212 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 5213 /* XXX format name */ 5214 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, 5215 NULL, NULL, "admin"); 5216 5217 #ifndef __HAIKU__ 5218 /* Set up cpu set. If it fails, use the set of all CPUs. */ 5219 if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { 5220 device_printf(dev, "Unable to fetch CPU list\n"); 5221 CPU_COPY(&all_cpus, &ctx->ifc_cpus); 5222 ctx->ifc_cpus_are_physical_cores = false; 5223 } else 5224 ctx->ifc_cpus_are_physical_cores = true; 5225 MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); 5226 #endif 5227 5228 /* 5229 ** Now set up MSI or MSI-X, should return us the number of supported 5230 ** vectors (will be 1 for a legacy interrupt and MSI). 5231 */ 5232 if (sctx->isc_flags & IFLIB_SKIP_MSIX) { 5233 msix = scctx->isc_vectors; 5234 } else if (scctx->isc_msix_bar != 0) 5235 /* 5236 * The simple fact that isc_msix_bar is not 0 does not mean we 5237 * we have a good value there that is known to work. 5238 */ 5239 msix = iflib_msix_init(ctx); 5240 else { 5241 scctx->isc_vectors = 1; 5242 scctx->isc_ntxqsets = 1; 5243 scctx->isc_nrxqsets = 1; 5244 scctx->isc_intr = IFLIB_INTR_LEGACY; 5245 msix = 0; 5246 } 5247 /* Get memory for the station queues */ 5248 if ((err = iflib_queues_alloc(ctx))) { 5249 device_printf(dev, "Unable to allocate queue memory\n"); 5250 goto fail_intr_free; 5251 } 5252 5253 if ((err = iflib_qset_structures_setup(ctx))) 5254 goto fail_queues; 5255 5256 /* 5257 * Now that we know how many queues there are, get the core offset. 5258 */ 5259 ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); 5260 5261 if (msix > 1) { 5262 /* 5263 * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable 5264 * aren't the default NULL implementation. 5265 */ 5266 kobj_desc = &ifdi_rx_queue_intr_enable_desc; 5267 #ifdef __HAIKU__ 5268 kobj_method = kobj_lookup_method(ctx->ops.cls, NULL, 5269 #else 5270 kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, 5271 #endif 5272 kobj_desc); 5273 if (kobj_method == &kobj_desc->deflt) { 5274 device_printf(dev, 5275 "MSI-X requires ifdi_rx_queue_intr_enable method"); 5276 err = EOPNOTSUPP; 5277 goto fail_queues; 5278 } 5279 kobj_desc = &ifdi_tx_queue_intr_enable_desc; 5280 #ifdef __HAIKU__ 5281 kobj_method = kobj_lookup_method(ctx->ops.cls, NULL, 5282 #else 5283 kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, 5284 #endif 5285 kobj_desc); 5286 if (kobj_method == &kobj_desc->deflt) { 5287 device_printf(dev, 5288 "MSI-X requires ifdi_tx_queue_intr_enable method"); 5289 err = EOPNOTSUPP; 5290 goto fail_queues; 5291 } 5292 5293 /* 5294 * Assign the MSI-X vectors. 5295 * Note that the default NULL ifdi_msix_intr_assign method will 5296 * fail here, too. 5297 */ 5298 err = IFDI_MSIX_INTR_ASSIGN(ctx, msix); 5299 if (err != 0) { 5300 device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", 5301 err); 5302 goto fail_queues; 5303 } 5304 } else if (scctx->isc_intr != IFLIB_INTR_MSIX) { 5305 rid = 0; 5306 if (scctx->isc_intr == IFLIB_INTR_MSI) { 5307 MPASS(msix == 1); 5308 rid = 1; 5309 } 5310 if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { 5311 device_printf(dev, "iflib_legacy_setup failed %d\n", err); 5312 goto fail_queues; 5313 } 5314 } else { 5315 device_printf(dev, 5316 "Cannot use iflib with only 1 MSI-X interrupt!\n"); 5317 err = ENODEV; 5318 goto fail_queues; 5319 } 5320 5321 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5322 5323 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5324 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5325 goto fail_detach; 5326 } 5327 5328 /* 5329 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5330 * This must appear after the call to ether_ifattach() because 5331 * ether_ifattach() sets if_hdrlen to the default value. 5332 */ 5333 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5334 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 5335 5336 if ((err = iflib_netmap_attach(ctx))) { 5337 device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); 5338 goto fail_detach; 5339 } 5340 *ctxp = ctx; 5341 5342 DEBUGNET_SET(ctx->ifc_ifp, iflib); 5343 5344 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5345 iflib_add_device_sysctl_post(ctx); 5346 iflib_add_pfil(ctx); 5347 ctx->ifc_flags |= IFC_INIT_DONE; 5348 CTX_UNLOCK(ctx); 5349 5350 return (0); 5351 5352 fail_detach: 5353 ether_ifdetach(ctx->ifc_ifp); 5354 fail_queues: 5355 iflib_tqg_detach(ctx); 5356 iflib_tx_structures_free(ctx); 5357 iflib_rx_structures_free(ctx); 5358 IFDI_DETACH(ctx); 5359 IFDI_QUEUES_FREE(ctx); 5360 fail_intr_free: 5361 iflib_free_intr_mem(ctx); 5362 fail_unlock: 5363 CTX_UNLOCK(ctx); 5364 iflib_deregister(ctx); 5365 fail_ctx_free: 5366 device_set_softc(ctx->ifc_dev, NULL); 5367 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5368 free(ctx->ifc_softc, M_IFLIB); 5369 free(ctx, M_IFLIB); 5370 return (err); 5371 } 5372 5373 int 5374 iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, 5375 struct iflib_cloneattach_ctx *clctx) 5376 { 5377 int num_txd, num_rxd; 5378 int err; 5379 if_ctx_t ctx; 5380 if_t ifp; 5381 if_softc_ctx_t scctx; 5382 int i; 5383 void *sc; 5384 5385 ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); 5386 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 5387 ctx->ifc_flags |= IFC_SC_ALLOCATED; 5388 if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) 5389 ctx->ifc_flags |= IFC_PSEUDO; 5390 5391 ctx->ifc_sctx = sctx; 5392 ctx->ifc_softc = sc; 5393 ctx->ifc_dev = dev; 5394 5395 if ((err = iflib_register(ctx)) != 0) { 5396 device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); 5397 goto fail_ctx_free; 5398 } 5399 iflib_add_device_sysctl_pre(ctx); 5400 5401 scctx = &ctx->ifc_softc_ctx; 5402 ifp = ctx->ifc_ifp; 5403 5404 iflib_reset_qvalues(ctx); 5405 CTX_LOCK(ctx); 5406 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 5407 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 5408 goto fail_unlock; 5409 } 5410 #ifndef __HAIKU__ 5411 if (sctx->isc_flags & IFLIB_GEN_MAC) 5412 ether_gen_addr(ifp, &ctx->ifc_mac); 5413 #endif 5414 if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, 5415 clctx->cc_params)) != 0) { 5416 device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); 5417 goto fail_unlock; 5418 } 5419 #ifdef INVARIANTS 5420 if (scctx->isc_capabilities & IFCAP_TXCSUM) 5421 MPASS(scctx->isc_tx_csum_flags); 5422 #endif 5423 5424 if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE); 5425 if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); 5426 5427 ifp->if_flags |= IFF_NOGROUP; 5428 if (sctx->isc_flags & IFLIB_PSEUDO) { 5429 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); 5430 ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); 5431 if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) { 5432 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5433 } else { 5434 if_attach(ctx->ifc_ifp); 5435 bpfattach(ctx->ifc_ifp, DLT_NULL, sizeof(u_int32_t)); 5436 } 5437 5438 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5439 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5440 goto fail_detach; 5441 } 5442 *ctxp = ctx; 5443 5444 /* 5445 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5446 * This must appear after the call to ether_ifattach() because 5447 * ether_ifattach() sets if_hdrlen to the default value. 5448 */ 5449 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5450 if_setifheaderlen(ifp, 5451 sizeof(struct ether_vlan_header)); 5452 5453 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5454 iflib_add_device_sysctl_post(ctx); 5455 ctx->ifc_flags |= IFC_INIT_DONE; 5456 CTX_UNLOCK(ctx); 5457 return (0); 5458 } 5459 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 5460 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); 5461 ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); 5462 5463 _iflib_pre_assert(scctx); 5464 ctx->ifc_txrx = *scctx->isc_txrx; 5465 5466 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 5467 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 5468 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 5469 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 5470 5471 num_txd = iflib_num_tx_descs(ctx); 5472 num_rxd = iflib_num_rx_descs(ctx); 5473 5474 /* XXX change for per-queue sizes */ 5475 device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", 5476 num_txd, num_rxd); 5477 5478 if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) 5479 scctx->isc_tx_nsegments = max(1, num_txd / 5480 MAX_SINGLE_PACKET_FRACTION); 5481 if (scctx->isc_tx_tso_segments_max > num_txd / 5482 MAX_SINGLE_PACKET_FRACTION) 5483 scctx->isc_tx_tso_segments_max = max(1, 5484 num_txd / MAX_SINGLE_PACKET_FRACTION); 5485 5486 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 5487 if (if_getcapabilities(ifp) & IFCAP_TSO) { 5488 #ifndef __HAIKU__ 5489 /* 5490 * The stack can't handle a TSO size larger than IP_MAXPACKET, 5491 * but some MACs do. 5492 */ 5493 if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, 5494 IP_MAXPACKET)); 5495 /* 5496 * Take maximum number of m_pullup(9)'s in iflib_parse_header() 5497 * into account. In the worst case, each of these calls will 5498 * add another mbuf and, thus, the requirement for another DMA 5499 * segment. So for best performance, it doesn't make sense to 5500 * advertize a maximum of TSO segments that typically will 5501 * require defragmentation in iflib_encap(). 5502 */ 5503 if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); 5504 if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); 5505 #endif 5506 } 5507 if (scctx->isc_rss_table_size == 0) 5508 scctx->isc_rss_table_size = 64; 5509 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 5510 5511 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 5512 /* XXX format name */ 5513 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, 5514 NULL, NULL, "admin"); 5515 5516 /* XXX --- can support > 1 -- but keep it simple for now */ 5517 scctx->isc_intr = IFLIB_INTR_LEGACY; 5518 5519 /* Get memory for the station queues */ 5520 if ((err = iflib_queues_alloc(ctx))) { 5521 device_printf(dev, "Unable to allocate queue memory\n"); 5522 goto fail_iflib_detach; 5523 } 5524 5525 if ((err = iflib_qset_structures_setup(ctx))) { 5526 device_printf(dev, "qset structure setup failed %d\n", err); 5527 goto fail_queues; 5528 } 5529 5530 /* 5531 * XXX What if anything do we want to do about interrupts? 5532 */ 5533 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5534 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5535 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5536 goto fail_detach; 5537 } 5538 5539 /* 5540 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5541 * This must appear after the call to ether_ifattach() because 5542 * ether_ifattach() sets if_hdrlen to the default value. 5543 */ 5544 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5545 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 5546 5547 /* XXX handle more than one queue */ 5548 for (i = 0; i < scctx->isc_nrxqsets; i++) 5549 IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); 5550 5551 *ctxp = ctx; 5552 5553 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5554 iflib_add_device_sysctl_post(ctx); 5555 ctx->ifc_flags |= IFC_INIT_DONE; 5556 CTX_UNLOCK(ctx); 5557 5558 return (0); 5559 fail_detach: 5560 ether_ifdetach(ctx->ifc_ifp); 5561 fail_queues: 5562 iflib_tqg_detach(ctx); 5563 iflib_tx_structures_free(ctx); 5564 iflib_rx_structures_free(ctx); 5565 fail_iflib_detach: 5566 IFDI_DETACH(ctx); 5567 IFDI_QUEUES_FREE(ctx); 5568 fail_unlock: 5569 CTX_UNLOCK(ctx); 5570 iflib_deregister(ctx); 5571 fail_ctx_free: 5572 free(ctx->ifc_softc, M_IFLIB); 5573 free(ctx, M_IFLIB); 5574 return (err); 5575 } 5576 5577 int 5578 iflib_pseudo_deregister(if_ctx_t ctx) 5579 { 5580 if_t ifp = ctx->ifc_ifp; 5581 if_shared_ctx_t sctx = ctx->ifc_sctx; 5582 5583 /* Unregister VLAN event handlers early */ 5584 iflib_unregister_vlan_handlers(ctx); 5585 5586 if ((sctx->isc_flags & IFLIB_PSEUDO) && 5587 (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) { 5588 bpfdetach(ifp); 5589 if_detach(ifp); 5590 } else { 5591 ether_ifdetach(ifp); 5592 } 5593 5594 iflib_tqg_detach(ctx); 5595 iflib_tx_structures_free(ctx); 5596 iflib_rx_structures_free(ctx); 5597 IFDI_DETACH(ctx); 5598 IFDI_QUEUES_FREE(ctx); 5599 5600 iflib_deregister(ctx); 5601 5602 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5603 free(ctx->ifc_softc, M_IFLIB); 5604 free(ctx, M_IFLIB); 5605 return (0); 5606 } 5607 5608 int 5609 iflib_device_attach(device_t dev) 5610 { 5611 if_ctx_t ctx; 5612 if_shared_ctx_t sctx; 5613 5614 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 5615 return (ENOTSUP); 5616 5617 pci_enable_busmaster(dev); 5618 5619 return (iflib_device_register(dev, NULL, sctx, &ctx)); 5620 } 5621 5622 int 5623 iflib_device_deregister(if_ctx_t ctx) 5624 { 5625 if_t ifp = ctx->ifc_ifp; 5626 device_t dev = ctx->ifc_dev; 5627 5628 /* Make sure VLANS are not using driver */ 5629 if (if_vlantrunkinuse(ifp)) { 5630 device_printf(dev, "Vlan in use, detach first\n"); 5631 return (EBUSY); 5632 } 5633 #ifdef PCI_IOV 5634 if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) { 5635 device_printf(dev, "SR-IOV in use; detach first.\n"); 5636 return (EBUSY); 5637 } 5638 #endif 5639 5640 STATE_LOCK(ctx); 5641 ctx->ifc_flags |= IFC_IN_DETACH; 5642 STATE_UNLOCK(ctx); 5643 5644 /* Unregister VLAN handlers before calling iflib_stop() */ 5645 iflib_unregister_vlan_handlers(ctx); 5646 5647 iflib_netmap_detach(ifp); 5648 ether_ifdetach(ifp); 5649 5650 CTX_LOCK(ctx); 5651 iflib_stop(ctx); 5652 CTX_UNLOCK(ctx); 5653 5654 iflib_rem_pfil(ctx); 5655 if (ctx->ifc_led_dev != NULL) 5656 led_destroy(ctx->ifc_led_dev); 5657 5658 iflib_tqg_detach(ctx); 5659 iflib_tx_structures_free(ctx); 5660 iflib_rx_structures_free(ctx); 5661 5662 CTX_LOCK(ctx); 5663 IFDI_DETACH(ctx); 5664 IFDI_QUEUES_FREE(ctx); 5665 CTX_UNLOCK(ctx); 5666 5667 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 5668 iflib_free_intr_mem(ctx); 5669 5670 bus_generic_detach(dev); 5671 5672 iflib_deregister(ctx); 5673 5674 device_set_softc(ctx->ifc_dev, NULL); 5675 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5676 free(ctx->ifc_softc, M_IFLIB); 5677 unref_ctx_core_offset(ctx); 5678 free(ctx, M_IFLIB); 5679 return (0); 5680 } 5681 5682 static void 5683 iflib_tqg_detach(if_ctx_t ctx) 5684 { 5685 iflib_txq_t txq; 5686 iflib_rxq_t rxq; 5687 int i; 5688 struct taskqgroup *tqg; 5689 5690 /* XXX drain any dependent tasks */ 5691 tqg = qgroup_if_io_tqg; 5692 for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { 5693 callout_drain(&txq->ift_timer); 5694 #ifdef DEV_NETMAP 5695 callout_drain(&txq->ift_netmap_timer); 5696 #endif /* DEV_NETMAP */ 5697 if (txq->ift_task.gt_uniq != NULL) 5698 taskqgroup_detach(tqg, &txq->ift_task); 5699 } 5700 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 5701 if (rxq->ifr_task.gt_uniq != NULL) 5702 taskqgroup_detach(tqg, &rxq->ifr_task); 5703 } 5704 tqg = qgroup_if_config_tqg; 5705 if (ctx->ifc_admin_task.gt_uniq != NULL) 5706 taskqgroup_detach(tqg, &ctx->ifc_admin_task); 5707 if (ctx->ifc_vflr_task.gt_uniq != NULL) 5708 taskqgroup_detach(tqg, &ctx->ifc_vflr_task); 5709 } 5710 5711 static void 5712 iflib_free_intr_mem(if_ctx_t ctx) 5713 { 5714 5715 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { 5716 iflib_irq_free(ctx, &ctx->ifc_legacy_irq); 5717 } 5718 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { 5719 pci_release_msi(ctx->ifc_dev); 5720 } 5721 if (ctx->ifc_msix_mem != NULL) { 5722 bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, 5723 rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); 5724 ctx->ifc_msix_mem = NULL; 5725 } 5726 } 5727 5728 int 5729 iflib_device_detach(device_t dev) 5730 { 5731 if_ctx_t ctx = device_get_softc(dev); 5732 5733 return (iflib_device_deregister(ctx)); 5734 } 5735 5736 int 5737 iflib_device_suspend(device_t dev) 5738 { 5739 if_ctx_t ctx = device_get_softc(dev); 5740 5741 CTX_LOCK(ctx); 5742 IFDI_SUSPEND(ctx); 5743 CTX_UNLOCK(ctx); 5744 5745 return bus_generic_suspend(dev); 5746 } 5747 int 5748 iflib_device_shutdown(device_t dev) 5749 { 5750 if_ctx_t ctx = device_get_softc(dev); 5751 5752 CTX_LOCK(ctx); 5753 IFDI_SHUTDOWN(ctx); 5754 CTX_UNLOCK(ctx); 5755 5756 return bus_generic_suspend(dev); 5757 } 5758 5759 int 5760 iflib_device_resume(device_t dev) 5761 { 5762 if_ctx_t ctx = device_get_softc(dev); 5763 iflib_txq_t txq = ctx->ifc_txqs; 5764 5765 CTX_LOCK(ctx); 5766 IFDI_RESUME(ctx); 5767 iflib_if_init_locked(ctx); 5768 CTX_UNLOCK(ctx); 5769 for (int i = 0; i < NTXQSETS(ctx); i++, txq++) 5770 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 5771 5772 return (bus_generic_resume(dev)); 5773 } 5774 5775 int 5776 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) 5777 { 5778 int error; 5779 if_ctx_t ctx = device_get_softc(dev); 5780 5781 CTX_LOCK(ctx); 5782 error = IFDI_IOV_INIT(ctx, num_vfs, params); 5783 CTX_UNLOCK(ctx); 5784 5785 return (error); 5786 } 5787 5788 void 5789 iflib_device_iov_uninit(device_t dev) 5790 { 5791 if_ctx_t ctx = device_get_softc(dev); 5792 5793 CTX_LOCK(ctx); 5794 IFDI_IOV_UNINIT(ctx); 5795 CTX_UNLOCK(ctx); 5796 } 5797 5798 int 5799 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) 5800 { 5801 int error; 5802 if_ctx_t ctx = device_get_softc(dev); 5803 5804 CTX_LOCK(ctx); 5805 error = IFDI_IOV_VF_ADD(ctx, vfnum, params); 5806 CTX_UNLOCK(ctx); 5807 5808 return (error); 5809 } 5810 5811 /********************************************************************* 5812 * 5813 * MODULE FUNCTION DEFINITIONS 5814 * 5815 **********************************************************************/ 5816 5817 /* 5818 * - Start a fast taskqueue thread for each core 5819 * - Start a taskqueue for control operations 5820 */ 5821 static int 5822 iflib_module_init(void) 5823 { 5824 iflib_timer_default = hz / 2; 5825 return (0); 5826 } 5827 5828 static int 5829 iflib_module_event_handler(module_t mod, int what, void *arg) 5830 { 5831 int err; 5832 5833 switch (what) { 5834 case MOD_LOAD: 5835 if ((err = iflib_module_init()) != 0) 5836 return (err); 5837 break; 5838 case MOD_UNLOAD: 5839 return (EBUSY); 5840 default: 5841 return (EOPNOTSUPP); 5842 } 5843 5844 return (0); 5845 } 5846 5847 /********************************************************************* 5848 * 5849 * PUBLIC FUNCTION DEFINITIONS 5850 * ordered as in iflib.h 5851 * 5852 **********************************************************************/ 5853 5854 static void 5855 _iflib_assert(if_shared_ctx_t sctx) 5856 { 5857 int i; 5858 5859 MPASS(sctx->isc_tx_maxsize); 5860 MPASS(sctx->isc_tx_maxsegsize); 5861 5862 MPASS(sctx->isc_rx_maxsize); 5863 MPASS(sctx->isc_rx_nsegments); 5864 MPASS(sctx->isc_rx_maxsegsize); 5865 5866 MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8); 5867 for (i = 0; i < sctx->isc_nrxqs; i++) { 5868 MPASS(sctx->isc_nrxd_min[i]); 5869 MPASS(powerof2(sctx->isc_nrxd_min[i])); 5870 MPASS(sctx->isc_nrxd_max[i]); 5871 MPASS(powerof2(sctx->isc_nrxd_max[i])); 5872 MPASS(sctx->isc_nrxd_default[i]); 5873 MPASS(powerof2(sctx->isc_nrxd_default[i])); 5874 } 5875 5876 MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8); 5877 for (i = 0; i < sctx->isc_ntxqs; i++) { 5878 MPASS(sctx->isc_ntxd_min[i]); 5879 MPASS(powerof2(sctx->isc_ntxd_min[i])); 5880 MPASS(sctx->isc_ntxd_max[i]); 5881 MPASS(powerof2(sctx->isc_ntxd_max[i])); 5882 MPASS(sctx->isc_ntxd_default[i]); 5883 MPASS(powerof2(sctx->isc_ntxd_default[i])); 5884 } 5885 } 5886 5887 static void 5888 _iflib_pre_assert(if_softc_ctx_t scctx) 5889 { 5890 5891 MPASS(scctx->isc_txrx->ift_txd_encap); 5892 MPASS(scctx->isc_txrx->ift_txd_flush); 5893 MPASS(scctx->isc_txrx->ift_txd_credits_update); 5894 MPASS(scctx->isc_txrx->ift_rxd_available); 5895 MPASS(scctx->isc_txrx->ift_rxd_pkt_get); 5896 MPASS(scctx->isc_txrx->ift_rxd_refill); 5897 MPASS(scctx->isc_txrx->ift_rxd_flush); 5898 } 5899 5900 static int 5901 iflib_register(if_ctx_t ctx) 5902 { 5903 if_shared_ctx_t sctx = ctx->ifc_sctx; 5904 driver_t *driver = sctx->isc_driver; 5905 device_t dev = ctx->ifc_dev; 5906 if_t ifp; 5907 u_char type; 5908 int iflags; 5909 5910 if ((sctx->isc_flags & IFLIB_PSEUDO) == 0) 5911 _iflib_assert(sctx); 5912 5913 CTX_LOCK_INIT(ctx); 5914 STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); 5915 if (sctx->isc_flags & IFLIB_PSEUDO) { 5916 if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) 5917 type = IFT_ETHER; 5918 else 5919 type = IFT_PPP; 5920 } else 5921 type = IFT_ETHER; 5922 ifp = ctx->ifc_ifp = if_alloc(type); 5923 if (ifp == NULL) { 5924 device_printf(dev, "can not allocate ifnet structure\n"); 5925 return (ENOMEM); 5926 } 5927 5928 /* 5929 * Initialize our context's device specific methods 5930 */ 5931 kobj_init((kobj_t) ctx, (kobj_class_t) driver); 5932 kobj_class_compile((kobj_class_t) driver); 5933 5934 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 5935 if_setsoftc(ifp, ctx); 5936 if_setdev(ifp, dev); 5937 if_setinitfn(ifp, iflib_if_init); 5938 if_setioctlfn(ifp, iflib_if_ioctl); 5939 #ifdef ALTQ 5940 if_setstartfn(ifp, iflib_altq_if_start); 5941 if_settransmitfn(ifp, iflib_altq_if_transmit); 5942 if_setsendqready(ifp); 5943 #else 5944 if_settransmitfn(ifp, iflib_if_transmit); 5945 #endif 5946 if_setqflushfn(ifp, iflib_if_qflush); 5947 #ifndef __HAIKU__ 5948 iflags = IFF_MULTICAST | IFF_KNOWSEPOCH; 5949 #else 5950 iflags = IFF_MULTICAST; 5951 #endif 5952 5953 if ((sctx->isc_flags & IFLIB_PSEUDO) && 5954 (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) 5955 iflags |= IFF_POINTOPOINT; 5956 else 5957 iflags |= IFF_BROADCAST | IFF_SIMPLEX; 5958 if_setflags(ifp, iflags); 5959 ctx->ifc_vlan_attach_event = 5960 EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, 5961 EVENTHANDLER_PRI_FIRST); 5962 ctx->ifc_vlan_detach_event = 5963 EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, 5964 EVENTHANDLER_PRI_FIRST); 5965 5966 if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { 5967 ctx->ifc_mediap = &ctx->ifc_media; 5968 ifmedia_init(ctx->ifc_mediap, IFM_IMASK, 5969 iflib_media_change, iflib_media_status); 5970 } 5971 return (0); 5972 } 5973 5974 static void 5975 iflib_unregister_vlan_handlers(if_ctx_t ctx) 5976 { 5977 /* Unregister VLAN events */ 5978 if (ctx->ifc_vlan_attach_event != NULL) { 5979 EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); 5980 ctx->ifc_vlan_attach_event = NULL; 5981 } 5982 if (ctx->ifc_vlan_detach_event != NULL) { 5983 EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); 5984 ctx->ifc_vlan_detach_event = NULL; 5985 } 5986 5987 } 5988 5989 static void 5990 iflib_deregister(if_ctx_t ctx) 5991 { 5992 if_t ifp = ctx->ifc_ifp; 5993 5994 /* Remove all media */ 5995 ifmedia_removeall(&ctx->ifc_media); 5996 5997 /* Ensure that VLAN event handlers are unregistered */ 5998 iflib_unregister_vlan_handlers(ctx); 5999 6000 #ifndef __HAIKU__ 6001 /* Release kobject reference */ 6002 kobj_delete((kobj_t) ctx, NULL); 6003 #endif 6004 6005 /* Free the ifnet structure */ 6006 if_free(ifp); 6007 6008 STATE_LOCK_DESTROY(ctx); 6009 6010 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 6011 CTX_LOCK_DESTROY(ctx); 6012 } 6013 6014 static int 6015 iflib_queues_alloc(if_ctx_t ctx) 6016 { 6017 if_shared_ctx_t sctx = ctx->ifc_sctx; 6018 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 6019 device_t dev = ctx->ifc_dev; 6020 int nrxqsets = scctx->isc_nrxqsets; 6021 int ntxqsets = scctx->isc_ntxqsets; 6022 iflib_txq_t txq; 6023 iflib_rxq_t rxq; 6024 iflib_fl_t fl = NULL; 6025 int i, j, cpu, err, txconf, rxconf; 6026 iflib_dma_info_t ifdip; 6027 uint32_t *rxqsizes = scctx->isc_rxqsizes; 6028 uint32_t *txqsizes = scctx->isc_txqsizes; 6029 uint8_t nrxqs = sctx->isc_nrxqs; 6030 uint8_t ntxqs = sctx->isc_ntxqs; 6031 int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; 6032 int fl_offset = (sctx->isc_flags & IFLIB_HAS_RXCQ ? 1 : 0); 6033 caddr_t *vaddrs; 6034 uint64_t *paddrs; 6035 6036 KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); 6037 KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); 6038 KASSERT(nrxqs >= fl_offset + nfree_lists, 6039 ("there must be at least a rxq for each free list")); 6040 6041 /* Allocate the TX ring struct memory */ 6042 if (!(ctx->ifc_txqs = 6043 (iflib_txq_t) malloc(sizeof(struct iflib_txq) * 6044 ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 6045 device_printf(dev, "Unable to allocate TX ring memory\n"); 6046 err = ENOMEM; 6047 goto fail; 6048 } 6049 6050 /* Now allocate the RX */ 6051 if (!(ctx->ifc_rxqs = 6052 (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * 6053 nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 6054 device_printf(dev, "Unable to allocate RX ring memory\n"); 6055 err = ENOMEM; 6056 goto rx_fail; 6057 } 6058 6059 txq = ctx->ifc_txqs; 6060 rxq = ctx->ifc_rxqs; 6061 6062 /* 6063 * XXX handle allocation failure 6064 */ 6065 for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { 6066 /* Set up some basics */ 6067 6068 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, 6069 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 6070 device_printf(dev, 6071 "Unable to allocate TX DMA info memory\n"); 6072 err = ENOMEM; 6073 goto err_tx_desc; 6074 } 6075 txq->ift_ifdi = ifdip; 6076 for (j = 0; j < ntxqs; j++, ifdip++) { 6077 if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) { 6078 device_printf(dev, 6079 "Unable to allocate TX descriptors\n"); 6080 err = ENOMEM; 6081 goto err_tx_desc; 6082 } 6083 txq->ift_txd_size[j] = scctx->isc_txd_size[j]; 6084 bzero((void *)ifdip->idi_vaddr, txqsizes[j]); 6085 } 6086 txq->ift_ctx = ctx; 6087 txq->ift_id = i; 6088 if (sctx->isc_flags & IFLIB_HAS_TXCQ) { 6089 txq->ift_br_offset = 1; 6090 } else { 6091 txq->ift_br_offset = 0; 6092 } 6093 6094 if (iflib_txsd_alloc(txq)) { 6095 device_printf(dev, "Critical Failure setting up TX buffers\n"); 6096 err = ENOMEM; 6097 goto err_tx_desc; 6098 } 6099 6100 /* Initialize the TX lock */ 6101 snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout", 6102 device_get_nameunit(dev), txq->ift_id); 6103 mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); 6104 callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); 6105 #ifndef __HAIKU__ 6106 txq->ift_timer.c_cpu = cpu; 6107 #endif 6108 #ifdef DEV_NETMAP 6109 callout_init_mtx(&txq->ift_netmap_timer, &txq->ift_mtx, 0); 6110 txq->ift_netmap_timer.c_cpu = cpu; 6111 #endif /* DEV_NETMAP */ 6112 6113 err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, 6114 iflib_txq_can_drain, M_IFLIB, M_WAITOK); 6115 if (err) { 6116 /* XXX free any allocated rings */ 6117 device_printf(dev, "Unable to allocate buf_ring\n"); 6118 goto err_tx_desc; 6119 } 6120 } 6121 6122 for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { 6123 /* Set up some basics */ 6124 callout_init(&rxq->ifr_watchdog, 1); 6125 6126 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, 6127 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 6128 device_printf(dev, 6129 "Unable to allocate RX DMA info memory\n"); 6130 err = ENOMEM; 6131 goto err_tx_desc; 6132 } 6133 6134 rxq->ifr_ifdi = ifdip; 6135 /* XXX this needs to be changed if #rx queues != #tx queues */ 6136 rxq->ifr_ntxqirq = 1; 6137 rxq->ifr_txqid[0] = i; 6138 for (j = 0; j < nrxqs; j++, ifdip++) { 6139 if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) { 6140 device_printf(dev, 6141 "Unable to allocate RX descriptors\n"); 6142 err = ENOMEM; 6143 goto err_tx_desc; 6144 } 6145 bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); 6146 } 6147 rxq->ifr_ctx = ctx; 6148 rxq->ifr_id = i; 6149 rxq->ifr_fl_offset = fl_offset; 6150 rxq->ifr_nfl = nfree_lists; 6151 if (!(fl = 6152 (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { 6153 device_printf(dev, "Unable to allocate free list memory\n"); 6154 err = ENOMEM; 6155 goto err_tx_desc; 6156 } 6157 rxq->ifr_fl = fl; 6158 for (j = 0; j < nfree_lists; j++) { 6159 fl[j].ifl_rxq = rxq; 6160 fl[j].ifl_id = j; 6161 fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; 6162 fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; 6163 } 6164 /* Allocate receive buffers for the ring */ 6165 if (iflib_rxsd_alloc(rxq)) { 6166 device_printf(dev, 6167 "Critical Failure setting up receive buffers\n"); 6168 err = ENOMEM; 6169 goto err_rx_desc; 6170 } 6171 6172 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 6173 fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, 6174 M_WAITOK); 6175 } 6176 6177 /* TXQs */ 6178 vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 6179 paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 6180 for (i = 0; i < ntxqsets; i++) { 6181 iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; 6182 6183 for (j = 0; j < ntxqs; j++, di++) { 6184 vaddrs[i*ntxqs + j] = di->idi_vaddr; 6185 paddrs[i*ntxqs + j] = di->idi_paddr; 6186 } 6187 } 6188 if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { 6189 device_printf(ctx->ifc_dev, 6190 "Unable to allocate device TX queue\n"); 6191 iflib_tx_structures_free(ctx); 6192 free(vaddrs, M_IFLIB); 6193 free(paddrs, M_IFLIB); 6194 goto err_rx_desc; 6195 } 6196 free(vaddrs, M_IFLIB); 6197 free(paddrs, M_IFLIB); 6198 6199 /* RXQs */ 6200 vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 6201 paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 6202 for (i = 0; i < nrxqsets; i++) { 6203 iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; 6204 6205 for (j = 0; j < nrxqs; j++, di++) { 6206 vaddrs[i*nrxqs + j] = di->idi_vaddr; 6207 paddrs[i*nrxqs + j] = di->idi_paddr; 6208 } 6209 } 6210 if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { 6211 device_printf(ctx->ifc_dev, 6212 "Unable to allocate device RX queue\n"); 6213 iflib_tx_structures_free(ctx); 6214 free(vaddrs, M_IFLIB); 6215 free(paddrs, M_IFLIB); 6216 goto err_rx_desc; 6217 } 6218 free(vaddrs, M_IFLIB); 6219 free(paddrs, M_IFLIB); 6220 6221 return (0); 6222 6223 /* XXX handle allocation failure changes */ 6224 err_rx_desc: 6225 err_tx_desc: 6226 rx_fail: 6227 if (ctx->ifc_rxqs != NULL) 6228 free(ctx->ifc_rxqs, M_IFLIB); 6229 ctx->ifc_rxqs = NULL; 6230 if (ctx->ifc_txqs != NULL) 6231 free(ctx->ifc_txqs, M_IFLIB); 6232 ctx->ifc_txqs = NULL; 6233 fail: 6234 return (err); 6235 } 6236 6237 static int 6238 iflib_tx_structures_setup(if_ctx_t ctx) 6239 { 6240 iflib_txq_t txq = ctx->ifc_txqs; 6241 int i; 6242 6243 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 6244 iflib_txq_setup(txq); 6245 6246 return (0); 6247 } 6248 6249 static void 6250 iflib_tx_structures_free(if_ctx_t ctx) 6251 { 6252 iflib_txq_t txq = ctx->ifc_txqs; 6253 if_shared_ctx_t sctx = ctx->ifc_sctx; 6254 int i, j; 6255 6256 for (i = 0; i < NTXQSETS(ctx); i++, txq++) { 6257 for (j = 0; j < sctx->isc_ntxqs; j++) 6258 iflib_dma_free(&txq->ift_ifdi[j]); 6259 iflib_txq_destroy(txq); 6260 } 6261 free(ctx->ifc_txqs, M_IFLIB); 6262 ctx->ifc_txqs = NULL; 6263 } 6264 6265 /********************************************************************* 6266 * 6267 * Initialize all receive rings. 6268 * 6269 **********************************************************************/ 6270 static int 6271 iflib_rx_structures_setup(if_ctx_t ctx) 6272 { 6273 iflib_rxq_t rxq = ctx->ifc_rxqs; 6274 int q; 6275 #if defined(INET6) || defined(INET) 6276 int err, i; 6277 #endif 6278 6279 for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { 6280 #if defined(INET6) || defined(INET) 6281 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) { 6282 err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, 6283 TCP_LRO_ENTRIES, min(1024, 6284 ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset])); 6285 if (err != 0) { 6286 device_printf(ctx->ifc_dev, 6287 "LRO Initialization failed!\n"); 6288 goto fail; 6289 } 6290 } 6291 #endif 6292 IFDI_RXQ_SETUP(ctx, rxq->ifr_id); 6293 } 6294 return (0); 6295 #if defined(INET6) || defined(INET) 6296 fail: 6297 /* 6298 * Free LRO resources allocated so far, we will only handle 6299 * the rings that completed, the failing case will have 6300 * cleaned up for itself. 'q' failed, so its the terminus. 6301 */ 6302 rxq = ctx->ifc_rxqs; 6303 for (i = 0; i < q; ++i, rxq++) { 6304 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) 6305 tcp_lro_free(&rxq->ifr_lc); 6306 } 6307 return (err); 6308 #endif 6309 } 6310 6311 /********************************************************************* 6312 * 6313 * Free all receive rings. 6314 * 6315 **********************************************************************/ 6316 static void 6317 iflib_rx_structures_free(if_ctx_t ctx) 6318 { 6319 iflib_rxq_t rxq = ctx->ifc_rxqs; 6320 if_shared_ctx_t sctx = ctx->ifc_sctx; 6321 int i, j; 6322 6323 for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { 6324 for (j = 0; j < sctx->isc_nrxqs; j++) 6325 iflib_dma_free(&rxq->ifr_ifdi[j]); 6326 iflib_rx_sds_free(rxq); 6327 #if defined(INET6) || defined(INET) 6328 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) 6329 tcp_lro_free(&rxq->ifr_lc); 6330 #endif 6331 } 6332 free(ctx->ifc_rxqs, M_IFLIB); 6333 ctx->ifc_rxqs = NULL; 6334 } 6335 6336 static int 6337 iflib_qset_structures_setup(if_ctx_t ctx) 6338 { 6339 int err; 6340 6341 /* 6342 * It is expected that the caller takes care of freeing queues if this 6343 * fails. 6344 */ 6345 if ((err = iflib_tx_structures_setup(ctx)) != 0) { 6346 device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err); 6347 return (err); 6348 } 6349 6350 if ((err = iflib_rx_structures_setup(ctx)) != 0) 6351 device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); 6352 6353 return (err); 6354 } 6355 6356 int 6357 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 6358 driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name) 6359 { 6360 6361 return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); 6362 } 6363 6364 /* Just to avoid copy/paste */ 6365 static inline int 6366 iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, 6367 int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, 6368 const char *name) 6369 { 6370 device_t dev; 6371 unsigned int base_cpuid, cpuid; 6372 int err; 6373 6374 dev = ctx->ifc_dev; 6375 base_cpuid = ctx->ifc_sysctl_core_offset; 6376 cpuid = get_cpuid_for_queue(ctx, base_cpuid, qid, type == IFLIB_INTR_TX); 6377 err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, 6378 irq ? irq->ii_res : NULL, name); 6379 if (err) { 6380 device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); 6381 return (err); 6382 } 6383 #ifdef notyet 6384 if (cpuid > ctx->ifc_cpuid_highest) 6385 ctx->ifc_cpuid_highest = cpuid; 6386 #endif 6387 return (0); 6388 } 6389 6390 int 6391 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, 6392 iflib_intr_type_t type, driver_filter_t *filter, 6393 void *filter_arg, int qid, const char *name) 6394 { 6395 device_t dev; 6396 struct grouptask *gtask; 6397 struct taskqgroup *tqg; 6398 iflib_filter_info_t info; 6399 gtask_fn_t *fn; 6400 int tqrid, err; 6401 driver_filter_t *intr_fast; 6402 void *q; 6403 6404 info = &ctx->ifc_filter_info; 6405 tqrid = rid; 6406 6407 switch (type) { 6408 /* XXX merge tx/rx for netmap? */ 6409 case IFLIB_INTR_TX: 6410 q = &ctx->ifc_txqs[qid]; 6411 info = &ctx->ifc_txqs[qid].ift_filter_info; 6412 gtask = &ctx->ifc_txqs[qid].ift_task; 6413 tqg = qgroup_if_io_tqg; 6414 fn = _task_fn_tx; 6415 intr_fast = iflib_fast_intr; 6416 GROUPTASK_INIT(gtask, 0, fn, q); 6417 ctx->ifc_flags |= IFC_NETMAP_TX_IRQ; 6418 break; 6419 case IFLIB_INTR_RX: 6420 q = &ctx->ifc_rxqs[qid]; 6421 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 6422 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6423 tqg = qgroup_if_io_tqg; 6424 fn = _task_fn_rx; 6425 intr_fast = iflib_fast_intr; 6426 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6427 break; 6428 case IFLIB_INTR_RXTX: 6429 q = &ctx->ifc_rxqs[qid]; 6430 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 6431 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6432 tqg = qgroup_if_io_tqg; 6433 fn = _task_fn_rx; 6434 intr_fast = iflib_fast_intr_rxtx; 6435 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6436 break; 6437 case IFLIB_INTR_ADMIN: 6438 q = ctx; 6439 tqrid = -1; 6440 info = &ctx->ifc_filter_info; 6441 gtask = &ctx->ifc_admin_task; 6442 tqg = qgroup_if_config_tqg; 6443 fn = _task_fn_admin; 6444 intr_fast = iflib_fast_intr_ctx; 6445 break; 6446 default: 6447 device_printf(ctx->ifc_dev, "%s: unknown net intr type\n", 6448 __func__); 6449 return (EINVAL); 6450 } 6451 6452 info->ifi_filter = filter; 6453 info->ifi_filter_arg = filter_arg; 6454 info->ifi_task = gtask; 6455 info->ifi_ctx = q; 6456 6457 dev = ctx->ifc_dev; 6458 err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); 6459 if (err != 0) { 6460 device_printf(dev, "_iflib_irq_alloc failed %d\n", err); 6461 return (err); 6462 } 6463 if (type == IFLIB_INTR_ADMIN) 6464 return (0); 6465 6466 if (tqrid != -1) { 6467 err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, 6468 name); 6469 if (err) 6470 return (err); 6471 } else { 6472 taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); 6473 } 6474 6475 return (0); 6476 } 6477 6478 void 6479 iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name) 6480 { 6481 device_t dev; 6482 struct grouptask *gtask; 6483 struct taskqgroup *tqg; 6484 gtask_fn_t *fn; 6485 void *q; 6486 int err; 6487 6488 switch (type) { 6489 case IFLIB_INTR_TX: 6490 q = &ctx->ifc_txqs[qid]; 6491 gtask = &ctx->ifc_txqs[qid].ift_task; 6492 tqg = qgroup_if_io_tqg; 6493 fn = _task_fn_tx; 6494 GROUPTASK_INIT(gtask, 0, fn, q); 6495 break; 6496 case IFLIB_INTR_RX: 6497 q = &ctx->ifc_rxqs[qid]; 6498 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6499 tqg = qgroup_if_io_tqg; 6500 fn = _task_fn_rx; 6501 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6502 break; 6503 case IFLIB_INTR_IOV: 6504 q = ctx; 6505 gtask = &ctx->ifc_vflr_task; 6506 tqg = qgroup_if_config_tqg; 6507 fn = _task_fn_iov; 6508 GROUPTASK_INIT(gtask, 0, fn, q); 6509 break; 6510 default: 6511 panic("unknown net intr type"); 6512 } 6513 err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); 6514 if (err) { 6515 dev = ctx->ifc_dev; 6516 taskqgroup_attach(tqg, gtask, q, dev, irq ? irq->ii_res : NULL, 6517 name); 6518 } 6519 } 6520 6521 void 6522 iflib_irq_free(if_ctx_t ctx, if_irq_t irq) 6523 { 6524 #ifdef __HAIKU__ 6525 if (!ctx || !irq) 6526 return; 6527 #endif 6528 6529 if (irq->ii_tag) 6530 bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); 6531 6532 if (irq->ii_res) 6533 bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, 6534 rman_get_rid(irq->ii_res), irq->ii_res); 6535 } 6536 6537 static int 6538 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name) 6539 { 6540 iflib_txq_t txq = ctx->ifc_txqs; 6541 iflib_rxq_t rxq = ctx->ifc_rxqs; 6542 if_irq_t irq = &ctx->ifc_legacy_irq; 6543 iflib_filter_info_t info; 6544 device_t dev; 6545 struct grouptask *gtask; 6546 struct resource *res; 6547 struct taskqgroup *tqg; 6548 void *q; 6549 int err, tqrid; 6550 bool rx_only; 6551 6552 q = &ctx->ifc_rxqs[0]; 6553 info = &rxq[0].ifr_filter_info; 6554 gtask = &rxq[0].ifr_task; 6555 tqg = qgroup_if_io_tqg; 6556 tqrid = *rid; 6557 rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0; 6558 6559 ctx->ifc_flags |= IFC_LEGACY; 6560 info->ifi_filter = filter; 6561 info->ifi_filter_arg = filter_arg; 6562 info->ifi_task = gtask; 6563 info->ifi_ctx = rx_only ? ctx : q; 6564 6565 dev = ctx->ifc_dev; 6566 /* We allocate a single interrupt resource */ 6567 err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx : 6568 iflib_fast_intr_rxtx, NULL, info, name); 6569 if (err != 0) 6570 return (err); 6571 NET_GROUPTASK_INIT(gtask, 0, _task_fn_rx, q); 6572 res = irq->ii_res; 6573 taskqgroup_attach(tqg, gtask, q, dev, res, name); 6574 6575 GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); 6576 taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, 6577 "tx"); 6578 return (0); 6579 } 6580 6581 void 6582 iflib_led_create(if_ctx_t ctx) 6583 { 6584 6585 ctx->ifc_led_dev = led_create(iflib_led_func, ctx, 6586 device_get_nameunit(ctx->ifc_dev)); 6587 } 6588 6589 void 6590 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) 6591 { 6592 6593 GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); 6594 } 6595 6596 void 6597 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) 6598 { 6599 6600 GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); 6601 } 6602 6603 void 6604 iflib_admin_intr_deferred(if_ctx_t ctx) 6605 { 6606 6607 MPASS(ctx->ifc_admin_task.gt_taskqueue != NULL); 6608 GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); 6609 } 6610 6611 void 6612 iflib_iov_intr_deferred(if_ctx_t ctx) 6613 { 6614 6615 GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); 6616 } 6617 6618 void 6619 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name) 6620 { 6621 6622 taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, 6623 name); 6624 } 6625 6626 void 6627 iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, 6628 const char *name) 6629 { 6630 6631 GROUPTASK_INIT(gtask, 0, fn, ctx); 6632 taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, 6633 name); 6634 } 6635 6636 void 6637 iflib_config_gtask_deinit(struct grouptask *gtask) 6638 { 6639 6640 taskqgroup_detach(qgroup_if_config_tqg, gtask); 6641 } 6642 6643 void 6644 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) 6645 { 6646 if_t ifp = ctx->ifc_ifp; 6647 iflib_txq_t txq = ctx->ifc_txqs; 6648 6649 if_setbaudrate(ifp, baudrate); 6650 if (baudrate >= IF_Gbps(10)) { 6651 STATE_LOCK(ctx); 6652 ctx->ifc_flags |= IFC_PREFETCH; 6653 STATE_UNLOCK(ctx); 6654 } 6655 /* If link down, disable watchdog */ 6656 if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { 6657 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) 6658 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 6659 } 6660 ctx->ifc_link_state = link_state; 6661 if_link_state_change(ifp, link_state); 6662 } 6663 6664 static int 6665 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) 6666 { 6667 int credits; 6668 #ifdef INVARIANTS 6669 int credits_pre = txq->ift_cidx_processed; 6670 #endif 6671 6672 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 6673 BUS_DMASYNC_POSTREAD); 6674 if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) 6675 return (0); 6676 6677 txq->ift_processed += credits; 6678 txq->ift_cidx_processed += credits; 6679 6680 MPASS(credits_pre + credits == txq->ift_cidx_processed); 6681 if (txq->ift_cidx_processed >= txq->ift_size) 6682 txq->ift_cidx_processed -= txq->ift_size; 6683 return (credits); 6684 } 6685 6686 static int 6687 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) 6688 { 6689 iflib_fl_t fl; 6690 u_int i; 6691 6692 for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) 6693 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 6694 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 6695 return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, 6696 budget)); 6697 } 6698 6699 void 6700 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, 6701 const char *description, if_int_delay_info_t info, 6702 int offset, int value) 6703 { 6704 info->iidi_ctx = ctx; 6705 info->iidi_offset = offset; 6706 info->iidi_value = value; 6707 SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), 6708 SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), 6709 OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 6710 info, 0, iflib_sysctl_int_delay, "I", description); 6711 } 6712 6713 struct sx * 6714 iflib_ctx_lock_get(if_ctx_t ctx) 6715 { 6716 6717 return (&ctx->ifc_ctx_sx); 6718 } 6719 6720 static int 6721 iflib_msix_init(if_ctx_t ctx) 6722 { 6723 device_t dev = ctx->ifc_dev; 6724 if_shared_ctx_t sctx = ctx->ifc_sctx; 6725 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 6726 int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues; 6727 int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors; 6728 6729 iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; 6730 iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; 6731 6732 if (bootverbose) 6733 device_printf(dev, "msix_init qsets capped at %d\n", 6734 imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); 6735 6736 /* Override by tuneable */ 6737 if (scctx->isc_disable_msix) 6738 goto msi; 6739 6740 /* First try MSI-X */ 6741 if ((msgs = pci_msix_count(dev)) == 0) { 6742 if (bootverbose) 6743 device_printf(dev, "MSI-X not supported or disabled\n"); 6744 goto msi; 6745 } 6746 6747 bar = ctx->ifc_softc_ctx.isc_msix_bar; 6748 /* 6749 * bar == -1 => "trust me I know what I'm doing" 6750 * Some drivers are for hardware that is so shoddily 6751 * documented that no one knows which bars are which 6752 * so the developer has to map all bars. This hack 6753 * allows shoddy garbage to use MSI-X in this framework. 6754 */ 6755 if (bar != -1) { 6756 ctx->ifc_msix_mem = bus_alloc_resource_any(dev, 6757 SYS_RES_MEMORY, &bar, RF_ACTIVE); 6758 if (ctx->ifc_msix_mem == NULL) { 6759 device_printf(dev, "Unable to map MSI-X table\n"); 6760 goto msi; 6761 } 6762 } 6763 6764 admincnt = sctx->isc_admin_intrcnt; 6765 #if IFLIB_DEBUG 6766 /* use only 1 qset in debug mode */ 6767 queuemsgs = min(msgs - admincnt, 1); 6768 #else 6769 queuemsgs = msgs - admincnt; 6770 #endif 6771 #ifdef RSS 6772 queues = imin(queuemsgs, rss_getnumbuckets()); 6773 #else 6774 queues = queuemsgs; 6775 #endif 6776 #ifndef __HAIKU__ 6777 queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); 6778 if (bootverbose) 6779 device_printf(dev, 6780 "intr CPUs: %d queue msgs: %d admincnt: %d\n", 6781 CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); 6782 #endif 6783 #ifdef RSS 6784 /* If we're doing RSS, clamp at the number of RSS buckets */ 6785 if (queues > rss_getnumbuckets()) 6786 queues = rss_getnumbuckets(); 6787 #endif 6788 if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) 6789 rx_queues = iflib_num_rx_queues; 6790 else 6791 rx_queues = queues; 6792 6793 if (rx_queues > scctx->isc_nrxqsets) 6794 rx_queues = scctx->isc_nrxqsets; 6795 6796 /* 6797 * We want this to be all logical CPUs by default 6798 */ 6799 if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) 6800 tx_queues = iflib_num_tx_queues; 6801 else 6802 tx_queues = mp_ncpus; 6803 6804 if (tx_queues > scctx->isc_ntxqsets) 6805 tx_queues = scctx->isc_ntxqsets; 6806 6807 if (ctx->ifc_sysctl_qs_eq_override == 0) { 6808 #ifdef INVARIANTS 6809 if (tx_queues != rx_queues) 6810 device_printf(dev, 6811 "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", 6812 min(rx_queues, tx_queues), min(rx_queues, tx_queues)); 6813 #endif 6814 tx_queues = min(rx_queues, tx_queues); 6815 rx_queues = min(rx_queues, tx_queues); 6816 } 6817 6818 vectors = rx_queues + admincnt; 6819 if (msgs < vectors) { 6820 device_printf(dev, 6821 "insufficient number of MSI-X vectors " 6822 "(supported %d, need %d)\n", msgs, vectors); 6823 goto msi; 6824 } 6825 6826 device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues, 6827 tx_queues); 6828 msgs = vectors; 6829 if ((err = pci_alloc_msix(dev, &vectors)) == 0) { 6830 if (vectors != msgs) { 6831 device_printf(dev, 6832 "Unable to allocate sufficient MSI-X vectors " 6833 "(got %d, need %d)\n", vectors, msgs); 6834 pci_release_msi(dev); 6835 if (bar != -1) { 6836 bus_release_resource(dev, SYS_RES_MEMORY, bar, 6837 ctx->ifc_msix_mem); 6838 ctx->ifc_msix_mem = NULL; 6839 } 6840 goto msi; 6841 } 6842 device_printf(dev, "Using MSI-X interrupts with %d vectors\n", 6843 vectors); 6844 scctx->isc_vectors = vectors; 6845 scctx->isc_nrxqsets = rx_queues; 6846 scctx->isc_ntxqsets = tx_queues; 6847 scctx->isc_intr = IFLIB_INTR_MSIX; 6848 6849 return (vectors); 6850 } else { 6851 device_printf(dev, 6852 "failed to allocate %d MSI-X vectors, err: %d\n", vectors, 6853 err); 6854 if (bar != -1) { 6855 bus_release_resource(dev, SYS_RES_MEMORY, bar, 6856 ctx->ifc_msix_mem); 6857 ctx->ifc_msix_mem = NULL; 6858 } 6859 } 6860 6861 msi: 6862 vectors = pci_msi_count(dev); 6863 scctx->isc_nrxqsets = 1; 6864 scctx->isc_ntxqsets = 1; 6865 scctx->isc_vectors = vectors; 6866 if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { 6867 device_printf(dev,"Using an MSI interrupt\n"); 6868 scctx->isc_intr = IFLIB_INTR_MSI; 6869 } else { 6870 scctx->isc_vectors = 1; 6871 device_printf(dev,"Using a Legacy interrupt\n"); 6872 scctx->isc_intr = IFLIB_INTR_LEGACY; 6873 } 6874 6875 return (vectors); 6876 } 6877 6878 static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; 6879 6880 #ifndef __HAIKU__ 6881 static int 6882 mp_ring_state_handler(SYSCTL_HANDLER_ARGS) 6883 { 6884 int rc; 6885 uint16_t *state = ((uint16_t *)oidp->oid_arg1); 6886 struct sbuf *sb; 6887 const char *ring_state = "UNKNOWN"; 6888 6889 /* XXX needed ? */ 6890 rc = sysctl_wire_old_buffer(req, 0); 6891 MPASS(rc == 0); 6892 if (rc != 0) 6893 return (rc); 6894 sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); 6895 MPASS(sb != NULL); 6896 if (sb == NULL) 6897 return (ENOMEM); 6898 if (state[3] <= 3) 6899 ring_state = ring_states[state[3]]; 6900 6901 sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", 6902 state[0], state[1], state[2], ring_state); 6903 rc = sbuf_finish(sb); 6904 sbuf_delete(sb); 6905 return(rc); 6906 } 6907 #endif 6908 6909 enum iflib_ndesc_handler { 6910 IFLIB_NTXD_HANDLER, 6911 IFLIB_NRXD_HANDLER, 6912 }; 6913 6914 static int 6915 mp_ndesc_handler(SYSCTL_HANDLER_ARGS) 6916 { 6917 if_ctx_t ctx = (void *)arg1; 6918 enum iflib_ndesc_handler type = arg2; 6919 char buf[256] = {0}; 6920 qidx_t *ndesc; 6921 char *p, *next; 6922 int nqs, rc, i; 6923 6924 nqs = 8; 6925 switch(type) { 6926 case IFLIB_NTXD_HANDLER: 6927 ndesc = ctx->ifc_sysctl_ntxds; 6928 if (ctx->ifc_sctx) 6929 nqs = ctx->ifc_sctx->isc_ntxqs; 6930 break; 6931 case IFLIB_NRXD_HANDLER: 6932 ndesc = ctx->ifc_sysctl_nrxds; 6933 if (ctx->ifc_sctx) 6934 nqs = ctx->ifc_sctx->isc_nrxqs; 6935 break; 6936 default: 6937 printf("%s: unhandled type\n", __func__); 6938 return (EINVAL); 6939 } 6940 if (nqs == 0) 6941 nqs = 8; 6942 6943 for (i=0; i<8; i++) { 6944 if (i >= nqs) 6945 break; 6946 if (i) 6947 strcat(buf, ","); 6948 sprintf(strchr(buf, 0), "%d", ndesc[i]); 6949 } 6950 6951 rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); 6952 if (rc || req->newptr == NULL) 6953 return rc; 6954 6955 for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; 6956 i++, p = strsep(&next, " ,")) { 6957 ndesc[i] = strtoul(p, NULL, 10); 6958 } 6959 6960 return(rc); 6961 } 6962 6963 #define NAME_BUFLEN 32 6964 static void 6965 iflib_add_device_sysctl_pre(if_ctx_t ctx) 6966 { 6967 #ifndef __HAIKU__ 6968 device_t dev = iflib_get_dev(ctx); 6969 struct sysctl_oid_list *child, *oid_list; 6970 struct sysctl_ctx_list *ctx_list; 6971 struct sysctl_oid *node; 6972 6973 ctx_list = device_get_sysctl_ctx(dev); 6974 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 6975 ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", 6976 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); 6977 oid_list = SYSCTL_CHILDREN(node); 6978 6979 SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", 6980 CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 6981 "driver version"); 6982 6983 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", 6984 CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, 6985 "# of txqs to use, 0 => use default #"); 6986 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", 6987 CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, 6988 "# of rxqs to use, 0 => use default #"); 6989 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", 6990 CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, 6991 "permit #txq != #rxq"); 6992 SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", 6993 CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, 6994 "disable MSI-X (default 0)"); 6995 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", 6996 CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, 6997 "set the RX budget"); 6998 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", 6999 CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, 7000 "cause TX to abdicate instead of running to completion"); 7001 ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; 7002 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", 7003 CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, 7004 "offset to start using cores at"); 7005 SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", 7006 CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, 7007 "use separate cores for TX and RX"); 7008 SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores", 7009 CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0, 7010 "try to make use of logical cores for TX and RX"); 7011 7012 /* XXX change for per-queue sizes */ 7013 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", 7014 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, 7015 IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", 7016 "list of # of TX descriptors to use, 0 = use default #"); 7017 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", 7018 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, 7019 IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", 7020 "list of # of RX descriptors to use, 0 = use default #"); 7021 #endif 7022 } 7023 7024 static void 7025 iflib_add_device_sysctl_post(if_ctx_t ctx) 7026 { 7027 #ifndef __HAIKU__ 7028 if_shared_ctx_t sctx = ctx->ifc_sctx; 7029 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 7030 device_t dev = iflib_get_dev(ctx); 7031 struct sysctl_oid_list *child; 7032 struct sysctl_ctx_list *ctx_list; 7033 iflib_fl_t fl; 7034 iflib_txq_t txq; 7035 iflib_rxq_t rxq; 7036 int i, j; 7037 char namebuf[NAME_BUFLEN]; 7038 char *qfmt; 7039 struct sysctl_oid *queue_node, *fl_node, *node; 7040 struct sysctl_oid_list *queue_list, *fl_list; 7041 ctx_list = device_get_sysctl_ctx(dev); 7042 7043 node = ctx->ifc_sysctl_node; 7044 child = SYSCTL_CHILDREN(node); 7045 7046 if (scctx->isc_ntxqsets > 100) 7047 qfmt = "txq%03d"; 7048 else if (scctx->isc_ntxqsets > 10) 7049 qfmt = "txq%02d"; 7050 else 7051 qfmt = "txq%d"; 7052 for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { 7053 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 7054 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 7055 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 7056 queue_list = SYSCTL_CHILDREN(queue_node); 7057 SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu", 7058 CTLFLAG_RD, 7059 &txq->ift_task.gt_cpu, 0, "cpu this queue is bound to"); 7060 #if MEMORY_LOGGING 7061 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", 7062 CTLFLAG_RD, 7063 &txq->ift_dequeued, "total mbufs freed"); 7064 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", 7065 CTLFLAG_RD, 7066 &txq->ift_enqueued, "total mbufs enqueued"); 7067 #endif 7068 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", 7069 CTLFLAG_RD, 7070 &txq->ift_mbuf_defrag, "# of times m_defrag was called"); 7071 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", 7072 CTLFLAG_RD, 7073 &txq->ift_pullups, "# of times m_pullup was called"); 7074 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", 7075 CTLFLAG_RD, 7076 &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); 7077 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", 7078 CTLFLAG_RD, 7079 &txq->ift_no_desc_avail, "# of times no descriptors were available"); 7080 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", 7081 CTLFLAG_RD, 7082 &txq->ift_map_failed, "# of times DMA map failed"); 7083 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", 7084 CTLFLAG_RD, 7085 &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); 7086 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", 7087 CTLFLAG_RD, 7088 &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); 7089 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", 7090 CTLFLAG_RD, 7091 &txq->ift_pidx, 1, "Producer Index"); 7092 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", 7093 CTLFLAG_RD, 7094 &txq->ift_cidx, 1, "Consumer Index"); 7095 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", 7096 CTLFLAG_RD, 7097 &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); 7098 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", 7099 CTLFLAG_RD, 7100 &txq->ift_in_use, 1, "descriptors in use"); 7101 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", 7102 CTLFLAG_RD, 7103 &txq->ift_processed, "descriptors procesed for clean"); 7104 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", 7105 CTLFLAG_RD, 7106 &txq->ift_cleaned, "total cleaned"); 7107 SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", 7108 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 7109 __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, 7110 mp_ring_state_handler, "A", "soft ring state"); 7111 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", 7112 CTLFLAG_RD, &txq->ift_br->enqueues, 7113 "# of enqueues to the mp_ring for this queue"); 7114 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", 7115 CTLFLAG_RD, &txq->ift_br->drops, 7116 "# of drops in the mp_ring for this queue"); 7117 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", 7118 CTLFLAG_RD, &txq->ift_br->starts, 7119 "# of normal consumer starts in the mp_ring for this queue"); 7120 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", 7121 CTLFLAG_RD, &txq->ift_br->stalls, 7122 "# of consumer stalls in the mp_ring for this queue"); 7123 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", 7124 CTLFLAG_RD, &txq->ift_br->restarts, 7125 "# of consumer restarts in the mp_ring for this queue"); 7126 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", 7127 CTLFLAG_RD, &txq->ift_br->abdications, 7128 "# of consumer abdications in the mp_ring for this queue"); 7129 } 7130 7131 if (scctx->isc_nrxqsets > 100) 7132 qfmt = "rxq%03d"; 7133 else if (scctx->isc_nrxqsets > 10) 7134 qfmt = "rxq%02d"; 7135 else 7136 qfmt = "rxq%d"; 7137 for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { 7138 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 7139 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 7140 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 7141 queue_list = SYSCTL_CHILDREN(queue_node); 7142 SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu", 7143 CTLFLAG_RD, 7144 &rxq->ifr_task.gt_cpu, 0, "cpu this queue is bound to"); 7145 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 7146 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", 7147 CTLFLAG_RD, 7148 &rxq->ifr_cq_cidx, 1, "Consumer Index"); 7149 } 7150 7151 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 7152 snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); 7153 fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, 7154 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist Name"); 7155 fl_list = SYSCTL_CHILDREN(fl_node); 7156 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", 7157 CTLFLAG_RD, 7158 &fl->ifl_pidx, 1, "Producer Index"); 7159 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", 7160 CTLFLAG_RD, 7161 &fl->ifl_cidx, 1, "Consumer Index"); 7162 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", 7163 CTLFLAG_RD, 7164 &fl->ifl_credits, 1, "credits available"); 7165 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "buf_size", 7166 CTLFLAG_RD, 7167 &fl->ifl_buf_size, 1, "buffer size"); 7168 #if MEMORY_LOGGING 7169 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", 7170 CTLFLAG_RD, 7171 &fl->ifl_m_enqueued, "mbufs allocated"); 7172 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", 7173 CTLFLAG_RD, 7174 &fl->ifl_m_dequeued, "mbufs freed"); 7175 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", 7176 CTLFLAG_RD, 7177 &fl->ifl_cl_enqueued, "clusters allocated"); 7178 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", 7179 CTLFLAG_RD, 7180 &fl->ifl_cl_dequeued, "clusters freed"); 7181 #endif 7182 } 7183 } 7184 #endif 7185 } 7186 7187 void 7188 iflib_request_reset(if_ctx_t ctx) 7189 { 7190 7191 STATE_LOCK(ctx); 7192 ctx->ifc_flags |= IFC_DO_RESET; 7193 STATE_UNLOCK(ctx); 7194 } 7195 7196 #ifndef __NO_STRICT_ALIGNMENT 7197 static struct mbuf * 7198 iflib_fixup_rx(struct mbuf *m) 7199 { 7200 struct mbuf *n; 7201 7202 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 7203 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 7204 m->m_data += ETHER_HDR_LEN; 7205 n = m; 7206 } else { 7207 MGETHDR(n, M_NOWAIT, MT_DATA); 7208 if (n == NULL) { 7209 m_freem(m); 7210 return (NULL); 7211 } 7212 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 7213 m->m_data += ETHER_HDR_LEN; 7214 m->m_len -= ETHER_HDR_LEN; 7215 n->m_len = ETHER_HDR_LEN; 7216 M_MOVE_PKTHDR(n, m); 7217 n->m_next = m; 7218 } 7219 return (n); 7220 } 7221 #endif 7222 7223 #ifdef DEBUGNET 7224 static void 7225 iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) 7226 { 7227 if_ctx_t ctx; 7228 7229 ctx = if_getsoftc(ifp); 7230 CTX_LOCK(ctx); 7231 *nrxr = NRXQSETS(ctx); 7232 *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; 7233 *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; 7234 CTX_UNLOCK(ctx); 7235 } 7236 7237 static void 7238 iflib_debugnet_event(if_t ifp, enum debugnet_ev event) 7239 { 7240 if_ctx_t ctx; 7241 if_softc_ctx_t scctx; 7242 iflib_fl_t fl; 7243 iflib_rxq_t rxq; 7244 int i, j; 7245 7246 ctx = if_getsoftc(ifp); 7247 scctx = &ctx->ifc_softc_ctx; 7248 7249 switch (event) { 7250 case DEBUGNET_START: 7251 #ifndef __HAIKU__ 7252 for (i = 0; i < scctx->isc_nrxqsets; i++) { 7253 rxq = &ctx->ifc_rxqs[i]; 7254 for (j = 0; j < rxq->ifr_nfl; j++) { 7255 fl = rxq->ifr_fl; 7256 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 7257 } 7258 } 7259 iflib_no_tx_batch = 1; 7260 break; 7261 #endif 7262 default: 7263 break; 7264 } 7265 } 7266 7267 static int 7268 iflib_debugnet_transmit(if_t ifp, struct mbuf *m) 7269 { 7270 if_ctx_t ctx; 7271 iflib_txq_t txq; 7272 int error; 7273 7274 ctx = if_getsoftc(ifp); 7275 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 7276 IFF_DRV_RUNNING) 7277 return (EBUSY); 7278 7279 txq = &ctx->ifc_txqs[0]; 7280 error = iflib_encap(txq, &m); 7281 if (error == 0) 7282 (void)iflib_txd_db_check(txq, true); 7283 return (error); 7284 } 7285 7286 static int 7287 iflib_debugnet_poll(if_t ifp, int count) 7288 { 7289 struct epoch_tracker et; 7290 if_ctx_t ctx; 7291 if_softc_ctx_t scctx; 7292 iflib_txq_t txq; 7293 int i; 7294 7295 ctx = if_getsoftc(ifp); 7296 scctx = &ctx->ifc_softc_ctx; 7297 7298 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 7299 IFF_DRV_RUNNING) 7300 return (EBUSY); 7301 7302 txq = &ctx->ifc_txqs[0]; 7303 (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 7304 7305 NET_EPOCH_ENTER(et); 7306 for (i = 0; i < scctx->isc_nrxqsets; i++) 7307 (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); 7308 NET_EPOCH_EXIT(et); 7309 return (0); 7310 } 7311 #endif /* DEBUGNET */ 7312