1 /*- 2 * Copyright (c) 2014-2018, Matthew Macy <mmacy@mattmacy.io> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 11 * 2. Neither the name of Matthew Macy nor the names of its 12 * contributors may be used to endorse or promote products derived from 13 * this software without specific prior written permission. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 #include <stdlib.h> 30 __FBSDID("$FreeBSD$"); 31 32 #ifndef __HAIKU__ 33 #include "opt_inet.h" 34 #include "opt_inet6.h" 35 #include "opt_acpi.h" 36 #include "opt_sched.h" 37 #endif 38 39 #include <sys/param.h> 40 #include <sys/types.h> 41 #include <sys/bus.h> 42 #include <sys/eventhandler.h> 43 #ifndef __HAIKU__ 44 #include <sys/jail.h> 45 #endif 46 #include <sys/kernel.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/sx.h> 50 #include <sys/module.h> 51 #include <sys/kobj.h> 52 #include <sys/rman.h> 53 #include <sys/sbuf.h> 54 #include <sys/smp.h> 55 #include <sys/socket.h> 56 #include <sys/sockio.h> 57 #include <sys/sysctl.h> 58 #include <sys/syslog.h> 59 #include <sys/taskqueue.h> 60 #include <sys/limits.h> 61 62 #include <net/if.h> 63 #include <net/if_var.h> 64 #include <net/if_types.h> 65 #include <net/if_media.h> 66 #include <net/bpf.h> 67 #include <net/ethernet.h> 68 #include <net/if_vlan_var.h> 69 #include <net/mp_ring.h> 70 #include <net/vnet.h> 71 #include <net/debugnet.h> 72 73 #include <netinet/in.h> 74 #ifndef __HAIKU__ 75 #include <netinet/in_pcb.h> 76 #include <netinet/tcp_lro.h> 77 #include <netinet/in_systm.h> 78 #endif 79 #include <netinet/if_ether.h> 80 #include <netinet/ip.h> 81 #include <netinet/ip6.h> 82 #include <netinet/tcp.h> 83 #include <netinet/ip_var.h> 84 #ifndef __HAIKU__ 85 #include <netinet6/ip6_var.h> 86 #endif 87 88 #include <machine/bus.h> 89 #ifndef __HAIKU__ 90 #include <machine/in_cksum.h> 91 #endif 92 93 #include <vm/vm.h> 94 #include <vm/pmap.h> 95 96 #include <dev/led/led.h> 97 #include <dev/pci/pcireg.h> 98 #include <dev/pci/pcivar.h> 99 #ifndef __HAIKU__ 100 #include <dev/pci/pci_private.h> 101 #endif 102 103 #include <net/iflib.h> 104 #include <net/iflib_private.h> 105 106 #include <ifdi_if.h> 107 #include <device_if.h> 108 109 #ifdef PCI_IOV 110 #include <dev/pci/pci_iov.h> 111 #endif 112 113 #include <sys/bitstring.h> 114 115 /* 116 * enable accounting of every mbuf as it comes in to and goes out of 117 * iflib's software descriptor references 118 */ 119 #define MEMORY_LOGGING 0 120 /* 121 * Enable mbuf vectors for compressing long mbuf chains 122 */ 123 124 /* 125 * NB: 126 * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead 127 * we prefetch needs to be determined by the time spent in m_free vis a vis 128 * the cost of a prefetch. This will of course vary based on the workload: 129 * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which 130 * is quite expensive, thus suggesting very little prefetch. 131 * - small packet forwarding which is just returning a single mbuf to 132 * UMA will typically be very fast vis a vis the cost of a memory 133 * access. 134 */ 135 136 /* 137 * File organization: 138 * - private structures 139 * - iflib private utility functions 140 * - ifnet functions 141 * - vlan registry and other exported functions 142 * - iflib public core functions 143 * 144 * 145 */ 146 MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); 147 148 #define IFLIB_RXEOF_MORE (1U << 0) 149 #define IFLIB_RXEOF_EMPTY (2U << 0) 150 151 struct iflib_txq; 152 typedef struct iflib_txq *iflib_txq_t; 153 struct iflib_rxq; 154 typedef struct iflib_rxq *iflib_rxq_t; 155 struct iflib_fl; 156 typedef struct iflib_fl *iflib_fl_t; 157 158 struct iflib_ctx; 159 160 static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); 161 static void iflib_timer(void *arg); 162 static void iflib_tqg_detach(if_ctx_t ctx); 163 164 typedef struct iflib_filter_info { 165 driver_filter_t *ifi_filter; 166 void *ifi_filter_arg; 167 struct grouptask *ifi_task; 168 void *ifi_ctx; 169 } *iflib_filter_info_t; 170 171 struct iflib_ctx { 172 KOBJ_FIELDS; 173 /* 174 * Pointer to hardware driver's softc 175 */ 176 void *ifc_softc; 177 device_t ifc_dev; 178 if_t ifc_ifp; 179 180 #ifndef __HAIKU__ 181 cpuset_t ifc_cpus; 182 #endif 183 if_shared_ctx_t ifc_sctx; 184 struct if_softc_ctx ifc_softc_ctx; 185 186 struct sx ifc_ctx_sx; 187 struct mtx ifc_state_mtx; 188 189 iflib_txq_t ifc_txqs; 190 iflib_rxq_t ifc_rxqs; 191 uint32_t ifc_if_flags; 192 uint32_t ifc_flags; 193 uint32_t ifc_max_fl_buf_size; 194 uint32_t ifc_rx_mbuf_sz; 195 196 int ifc_link_state; 197 int ifc_watchdog_events; 198 struct cdev *ifc_led_dev; 199 struct resource *ifc_msix_mem; 200 201 struct if_irq ifc_legacy_irq; 202 struct grouptask ifc_admin_task; 203 struct grouptask ifc_vflr_task; 204 struct iflib_filter_info ifc_filter_info; 205 struct ifmedia ifc_media; 206 struct ifmedia *ifc_mediap; 207 208 struct sysctl_oid *ifc_sysctl_node; 209 uint16_t ifc_sysctl_ntxqs; 210 uint16_t ifc_sysctl_nrxqs; 211 uint16_t ifc_sysctl_qs_eq_override; 212 uint16_t ifc_sysctl_rx_budget; 213 uint16_t ifc_sysctl_tx_abdicate; 214 uint16_t ifc_sysctl_core_offset; 215 #define CORE_OFFSET_UNSPECIFIED 0xffff 216 uint8_t ifc_sysctl_separate_txrx; 217 uint8_t ifc_sysctl_use_logical_cores; 218 bool ifc_cpus_are_physical_cores; 219 220 qidx_t ifc_sysctl_ntxds[8]; 221 qidx_t ifc_sysctl_nrxds[8]; 222 struct if_txrx ifc_txrx; 223 #define isc_txd_encap ifc_txrx.ift_txd_encap 224 #define isc_txd_flush ifc_txrx.ift_txd_flush 225 #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update 226 #define isc_rxd_available ifc_txrx.ift_rxd_available 227 #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get 228 #define isc_rxd_refill ifc_txrx.ift_rxd_refill 229 #define isc_rxd_flush ifc_txrx.ift_rxd_flush 230 #define isc_legacy_intr ifc_txrx.ift_legacy_intr 231 eventhandler_tag ifc_vlan_attach_event; 232 eventhandler_tag ifc_vlan_detach_event; 233 struct ether_addr ifc_mac; 234 }; 235 236 void * 237 iflib_get_softc(if_ctx_t ctx) 238 { 239 240 return (ctx->ifc_softc); 241 } 242 243 device_t 244 iflib_get_dev(if_ctx_t ctx) 245 { 246 247 return (ctx->ifc_dev); 248 } 249 250 if_t 251 iflib_get_ifp(if_ctx_t ctx) 252 { 253 254 return (ctx->ifc_ifp); 255 } 256 257 struct ifmedia * 258 iflib_get_media(if_ctx_t ctx) 259 { 260 261 return (ctx->ifc_mediap); 262 } 263 264 uint32_t 265 iflib_get_flags(if_ctx_t ctx) 266 { 267 return (ctx->ifc_flags); 268 } 269 270 void 271 iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) 272 { 273 274 bcopy(mac, ctx->ifc_mac.octet, ETHER_ADDR_LEN); 275 } 276 277 if_softc_ctx_t 278 iflib_get_softc_ctx(if_ctx_t ctx) 279 { 280 281 return (&ctx->ifc_softc_ctx); 282 } 283 284 if_shared_ctx_t 285 iflib_get_sctx(if_ctx_t ctx) 286 { 287 288 return (ctx->ifc_sctx); 289 } 290 291 #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) 292 #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) 293 #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) 294 295 #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) 296 #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) 297 298 typedef struct iflib_sw_rx_desc_array { 299 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 300 struct mbuf **ifsd_m; /* pkthdr mbufs */ 301 caddr_t *ifsd_cl; /* direct cluster pointer for rx */ 302 bus_addr_t *ifsd_ba; /* bus addr of cluster for rx */ 303 } iflib_rxsd_array_t; 304 305 typedef struct iflib_sw_tx_desc_array { 306 bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ 307 bus_dmamap_t *ifsd_tso_map; /* bus_dma maps for TSO packet */ 308 struct mbuf **ifsd_m; /* pkthdr mbufs */ 309 } if_txsd_vec_t; 310 311 /* magic number that should be high enough for any hardware */ 312 #define IFLIB_MAX_TX_SEGS 128 313 #define IFLIB_RX_COPY_THRESH 128 314 #define IFLIB_MAX_RX_REFRESH 32 315 /* The minimum descriptors per second before we start coalescing */ 316 #define IFLIB_MIN_DESC_SEC 16384 317 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 318 #define IFLIB_QUEUE_IDLE 0 319 #define IFLIB_QUEUE_HUNG 1 320 #define IFLIB_QUEUE_WORKING 2 321 /* maximum number of txqs that can share an rx interrupt */ 322 #define IFLIB_MAX_TX_SHARED_INTR 4 323 324 /* this should really scale with ring size - this is a fairly arbitrary value */ 325 #define TX_BATCH_SIZE 32 326 327 #define IFLIB_RESTART_BUDGET 8 328 329 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ 330 CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ 331 CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) 332 333 struct iflib_txq { 334 qidx_t ift_in_use; 335 qidx_t ift_cidx; 336 qidx_t ift_cidx_processed; 337 qidx_t ift_pidx; 338 uint8_t ift_gen; 339 uint8_t ift_br_offset; 340 uint16_t ift_npending; 341 uint16_t ift_db_pending; 342 uint16_t ift_rs_pending; 343 /* implicit pad */ 344 uint8_t ift_txd_size[8]; 345 uint64_t ift_processed; 346 uint64_t ift_cleaned; 347 uint64_t ift_cleaned_prev; 348 #if MEMORY_LOGGING 349 uint64_t ift_enqueued; 350 uint64_t ift_dequeued; 351 #endif 352 uint64_t ift_no_tx_dma_setup; 353 uint64_t ift_no_desc_avail; 354 uint64_t ift_mbuf_defrag_failed; 355 uint64_t ift_mbuf_defrag; 356 uint64_t ift_map_failed; 357 uint64_t ift_txd_encap_efbig; 358 uint64_t ift_pullups; 359 uint64_t ift_last_timer_tick; 360 361 struct mtx ift_mtx; 362 struct mtx ift_db_mtx; 363 364 /* constant values */ 365 if_ctx_t ift_ctx; 366 struct ifmp_ring *ift_br; 367 struct grouptask ift_task; 368 qidx_t ift_size; 369 uint16_t ift_id; 370 struct callout ift_timer; 371 #ifdef DEV_NETMAP 372 struct callout ift_netmap_timer; 373 #endif /* DEV_NETMAP */ 374 375 if_txsd_vec_t ift_sds; 376 uint8_t ift_qstatus; 377 uint8_t ift_closed; 378 uint8_t ift_update_freq; 379 struct iflib_filter_info ift_filter_info; 380 bus_dma_tag_t ift_buf_tag; 381 bus_dma_tag_t ift_tso_buf_tag; 382 iflib_dma_info_t ift_ifdi; 383 #define MTX_NAME_LEN 32 384 char ift_mtx_name[MTX_NAME_LEN]; 385 bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); 386 #ifdef IFLIB_DIAGNOSTICS 387 uint64_t ift_cpu_exec_count[256]; 388 #endif 389 } __aligned(CACHE_LINE_SIZE); 390 391 struct iflib_fl { 392 qidx_t ifl_cidx; 393 qidx_t ifl_pidx; 394 qidx_t ifl_credits; 395 uint8_t ifl_gen; 396 uint8_t ifl_rxd_size; 397 #if MEMORY_LOGGING 398 uint64_t ifl_m_enqueued; 399 uint64_t ifl_m_dequeued; 400 uint64_t ifl_cl_enqueued; 401 uint64_t ifl_cl_dequeued; 402 #endif 403 /* implicit pad */ 404 bitstr_t *ifl_rx_bitmap; 405 qidx_t ifl_fragidx; 406 /* constant */ 407 qidx_t ifl_size; 408 uint16_t ifl_buf_size; 409 uint16_t ifl_cltype; 410 #ifndef __HAIKU__ 411 uma_zone_t ifl_zone; 412 #endif 413 iflib_rxsd_array_t ifl_sds; 414 iflib_rxq_t ifl_rxq; 415 uint8_t ifl_id; 416 bus_dma_tag_t ifl_buf_tag; 417 iflib_dma_info_t ifl_ifdi; 418 uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); 419 qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; 420 } __aligned(CACHE_LINE_SIZE); 421 422 static inline qidx_t 423 get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) 424 { 425 qidx_t used; 426 427 if (pidx > cidx) 428 used = pidx - cidx; 429 else if (pidx < cidx) 430 used = size - cidx + pidx; 431 else if (gen == 0 && pidx == cidx) 432 used = 0; 433 else if (gen == 1 && pidx == cidx) 434 used = size; 435 else 436 panic("bad state"); 437 438 return (used); 439 } 440 441 #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) 442 443 #define IDXDIFF(head, tail, wrap) \ 444 ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) 445 446 struct iflib_rxq { 447 if_ctx_t ifr_ctx; 448 iflib_fl_t ifr_fl; 449 uint64_t ifr_rx_irq; 450 #ifndef __HAIKU__ 451 struct pfil_head *pfil; 452 #else 453 #define PFIL_PASS 0 454 #endif 455 /* 456 * If there is a separate completion queue (IFLIB_HAS_RXCQ), this is 457 * the completion queue consumer index. Otherwise it's unused. 458 */ 459 qidx_t ifr_cq_cidx; 460 uint16_t ifr_id; 461 uint8_t ifr_nfl; 462 uint8_t ifr_ntxqirq; 463 uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; 464 uint8_t ifr_fl_offset; 465 #ifndef __HAIKU__ 466 struct lro_ctrl ifr_lc; 467 #endif 468 struct grouptask ifr_task; 469 struct callout ifr_watchdog; 470 struct iflib_filter_info ifr_filter_info; 471 iflib_dma_info_t ifr_ifdi; 472 473 /* dynamically allocate if any drivers need a value substantially larger than this */ 474 struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); 475 #ifdef IFLIB_DIAGNOSTICS 476 uint64_t ifr_cpu_exec_count[256]; 477 #endif 478 } __aligned(CACHE_LINE_SIZE); 479 480 typedef struct if_rxsd { 481 caddr_t *ifsd_cl; 482 iflib_fl_t ifsd_fl; 483 } *if_rxsd_t; 484 485 /* multiple of word size */ 486 #ifdef __LP64__ 487 #define PKT_INFO_SIZE 6 488 #define RXD_INFO_SIZE 5 489 #define PKT_TYPE uint64_t 490 #else 491 #define PKT_INFO_SIZE 11 492 #define RXD_INFO_SIZE 8 493 #define PKT_TYPE uint32_t 494 #endif 495 #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) 496 #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) 497 498 typedef struct if_pkt_info_pad { 499 PKT_TYPE pkt_val[PKT_INFO_SIZE]; 500 } *if_pkt_info_pad_t; 501 typedef struct if_rxd_info_pad { 502 PKT_TYPE rxd_val[RXD_INFO_SIZE]; 503 } *if_rxd_info_pad_t; 504 505 CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); 506 CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); 507 508 static inline void 509 pkt_info_zero(if_pkt_info_t pi) 510 { 511 if_pkt_info_pad_t pi_pad; 512 513 pi_pad = (if_pkt_info_pad_t)pi; 514 pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; 515 pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; 516 #ifndef __LP64__ 517 pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; 518 pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; 519 #endif 520 } 521 522 #ifndef __HAIKU__ 523 static device_method_t iflib_pseudo_methods[] = { 524 DEVMETHOD(device_attach, noop_attach), 525 DEVMETHOD(device_detach, iflib_pseudo_detach), 526 DEVMETHOD_END 527 }; 528 529 driver_t iflib_pseudodriver = { 530 "iflib_pseudo", iflib_pseudo_methods, sizeof(struct iflib_ctx), 531 }; 532 #endif 533 534 static inline void 535 rxd_info_zero(if_rxd_info_t ri) 536 { 537 if_rxd_info_pad_t ri_pad; 538 int i; 539 540 ri_pad = (if_rxd_info_pad_t)ri; 541 for (i = 0; i < RXD_LOOP_BOUND; i += 4) { 542 ri_pad->rxd_val[i] = 0; 543 ri_pad->rxd_val[i+1] = 0; 544 ri_pad->rxd_val[i+2] = 0; 545 ri_pad->rxd_val[i+3] = 0; 546 } 547 #ifdef __LP64__ 548 ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; 549 #endif 550 } 551 552 /* 553 * Only allow a single packet to take up most 1/nth of the tx ring 554 */ 555 #define MAX_SINGLE_PACKET_FRACTION 12 556 #define IF_BAD_DMA (bus_addr_t)-1 557 558 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) 559 560 #define CTX_LOCK_INIT(_sc) sx_init(&(_sc)->ifc_ctx_sx, "iflib ctx lock") 561 #define CTX_LOCK(ctx) sx_xlock(&(ctx)->ifc_ctx_sx) 562 #define CTX_UNLOCK(ctx) sx_xunlock(&(ctx)->ifc_ctx_sx) 563 #define CTX_LOCK_DESTROY(ctx) sx_destroy(&(ctx)->ifc_ctx_sx) 564 565 #define STATE_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_state_mtx, _name, "iflib state lock", MTX_DEF) 566 #define STATE_LOCK(ctx) mtx_lock(&(ctx)->ifc_state_mtx) 567 #define STATE_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_state_mtx) 568 #define STATE_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_state_mtx) 569 570 #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) 571 #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) 572 573 void 574 iflib_set_detach(if_ctx_t ctx) 575 { 576 STATE_LOCK(ctx); 577 ctx->ifc_flags |= IFC_IN_DETACH; 578 STATE_UNLOCK(ctx); 579 } 580 581 /* Our boot-time initialization hook */ 582 static int iflib_module_event_handler(module_t, int, void *); 583 584 #ifndef __HAIKU__ 585 static moduledata_t iflib_moduledata = { 586 "iflib", 587 iflib_module_event_handler, 588 NULL 589 }; 590 #endif 591 592 DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); 593 MODULE_VERSION(iflib, 1); 594 595 MODULE_DEPEND(iflib, pci, 1, 1, 1); 596 MODULE_DEPEND(iflib, ether, 1, 1, 1); 597 598 TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); 599 TASKQGROUP_DEFINE(if_config_tqg, 1, 1); 600 601 #ifndef IFLIB_DEBUG_COUNTERS 602 #ifdef INVARIANTS 603 #define IFLIB_DEBUG_COUNTERS 1 604 #else 605 #define IFLIB_DEBUG_COUNTERS 0 606 #endif /* !INVARIANTS */ 607 #endif 608 609 static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 610 "iflib driver parameters"); 611 612 /* 613 * XXX need to ensure that this can't accidentally cause the head to be moved backwards 614 */ 615 static int iflib_min_tx_latency = 0; 616 SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, 617 &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); 618 static int iflib_no_tx_batch = 0; 619 SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, 620 &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); 621 static int iflib_timer_default = 1000; 622 SYSCTL_INT(_net_iflib, OID_AUTO, timer_default, CTLFLAG_RW, 623 &iflib_timer_default, 0, "number of ticks between iflib_timer calls"); 624 625 626 #if IFLIB_DEBUG_COUNTERS 627 628 static int iflib_tx_seen; 629 static int iflib_tx_sent; 630 static int iflib_tx_encap; 631 static int iflib_rx_allocs; 632 static int iflib_fl_refills; 633 static int iflib_fl_refills_large; 634 static int iflib_tx_frees; 635 636 SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, 637 &iflib_tx_seen, 0, "# TX mbufs seen"); 638 SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, 639 &iflib_tx_sent, 0, "# TX mbufs sent"); 640 SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, 641 &iflib_tx_encap, 0, "# TX mbufs encapped"); 642 SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, 643 &iflib_tx_frees, 0, "# TX frees"); 644 SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, 645 &iflib_rx_allocs, 0, "# RX allocations"); 646 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, 647 &iflib_fl_refills, 0, "# refills"); 648 SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, 649 &iflib_fl_refills_large, 0, "# large refills"); 650 651 static int iflib_txq_drain_flushing; 652 static int iflib_txq_drain_oactive; 653 static int iflib_txq_drain_notready; 654 655 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, 656 &iflib_txq_drain_flushing, 0, "# drain flushes"); 657 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, 658 &iflib_txq_drain_oactive, 0, "# drain oactives"); 659 SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, 660 &iflib_txq_drain_notready, 0, "# drain notready"); 661 662 static int iflib_encap_load_mbuf_fail; 663 static int iflib_encap_pad_mbuf_fail; 664 static int iflib_encap_txq_avail_fail; 665 static int iflib_encap_txd_encap_fail; 666 667 SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, 668 &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); 669 SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, 670 &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); 671 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, 672 &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); 673 SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, 674 &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); 675 676 static int iflib_task_fn_rxs; 677 static int iflib_rx_intr_enables; 678 static int iflib_fast_intrs; 679 static int iflib_rx_unavail; 680 static int iflib_rx_ctx_inactive; 681 static int iflib_rx_if_input; 682 static int iflib_rxd_flush; 683 684 static int iflib_verbose_debug; 685 686 SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, 687 &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); 688 SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, 689 &iflib_rx_intr_enables, 0, "# RX intr enables"); 690 SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, 691 &iflib_fast_intrs, 0, "# fast_intr calls"); 692 SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, 693 &iflib_rx_unavail, 0, "# times rxeof called with no available data"); 694 SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, 695 &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); 696 SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, 697 &iflib_rx_if_input, 0, "# times rxeof called if_input"); 698 SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, 699 &iflib_rxd_flush, 0, "# times rxd_flush called"); 700 SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, 701 &iflib_verbose_debug, 0, "enable verbose debugging"); 702 703 #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) 704 static void 705 iflib_debug_reset(void) 706 { 707 iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = 708 iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = 709 iflib_txq_drain_flushing = iflib_txq_drain_oactive = 710 iflib_txq_drain_notready = 711 iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = 712 iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = 713 iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = 714 iflib_rx_unavail = 715 iflib_rx_ctx_inactive = iflib_rx_if_input = 716 iflib_rxd_flush = 0; 717 } 718 719 #else 720 #define DBG_COUNTER_INC(name) 721 static void iflib_debug_reset(void) {} 722 #endif 723 724 #define IFLIB_DEBUG 0 725 726 static void iflib_tx_structures_free(if_ctx_t ctx); 727 static void iflib_rx_structures_free(if_ctx_t ctx); 728 static int iflib_queues_alloc(if_ctx_t ctx); 729 static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); 730 static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); 731 static int iflib_qset_structures_setup(if_ctx_t ctx); 732 static int iflib_msix_init(if_ctx_t ctx); 733 static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, const char *str); 734 static void iflib_txq_check_drain(iflib_txq_t txq, int budget); 735 static uint32_t iflib_txq_can_drain(struct ifmp_ring *); 736 #ifdef ALTQ 737 static void iflib_altq_if_start(if_t ifp); 738 static int iflib_altq_if_transmit(if_t ifp, struct mbuf *m); 739 #endif 740 static int iflib_register(if_ctx_t); 741 static void iflib_deregister(if_ctx_t); 742 static void iflib_unregister_vlan_handlers(if_ctx_t ctx); 743 static uint16_t iflib_get_mbuf_size_for(unsigned int size); 744 static void iflib_init_locked(if_ctx_t ctx); 745 static void iflib_add_device_sysctl_pre(if_ctx_t ctx); 746 static void iflib_add_device_sysctl_post(if_ctx_t ctx); 747 static void iflib_ifmp_purge(iflib_txq_t txq); 748 static void _iflib_pre_assert(if_softc_ctx_t scctx); 749 static void iflib_if_init_locked(if_ctx_t ctx); 750 static void iflib_free_intr_mem(if_ctx_t ctx); 751 #ifndef __NO_STRICT_ALIGNMENT 752 static struct mbuf * iflib_fixup_rx(struct mbuf *m); 753 #endif 754 755 #ifndef __HAIKU__ 756 static SLIST_HEAD(cpu_offset_list, cpu_offset) cpu_offsets = 757 SLIST_HEAD_INITIALIZER(cpu_offsets); 758 struct cpu_offset { 759 SLIST_ENTRY(cpu_offset) entries; 760 cpuset_t set; 761 unsigned int refcount; 762 uint16_t next_cpuid; 763 }; 764 static struct mtx cpu_offset_mtx; 765 MTX_SYSINIT(iflib_cpu_offset, &cpu_offset_mtx, "iflib_cpu_offset lock", 766 MTX_DEF); 767 #endif 768 769 DEBUGNET_DEFINE(iflib); 770 771 static int 772 iflib_num_rx_descs(if_ctx_t ctx) 773 { 774 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 775 if_shared_ctx_t sctx = ctx->ifc_sctx; 776 uint16_t first_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; 777 778 return scctx->isc_nrxd[first_rxq]; 779 } 780 781 static int 782 iflib_num_tx_descs(if_ctx_t ctx) 783 { 784 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 785 if_shared_ctx_t sctx = ctx->ifc_sctx; 786 uint16_t first_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; 787 788 return scctx->isc_ntxd[first_txq]; 789 } 790 791 #ifdef DEV_NETMAP 792 #include <sys/selinfo.h> 793 #include <net/netmap.h> 794 #include <dev/netmap/netmap_kern.h> 795 796 MODULE_DEPEND(iflib, netmap, 1, 1, 1); 797 798 static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init); 799 static void iflib_netmap_timer(void *arg); 800 801 /* 802 * device-specific sysctl variables: 803 * 804 * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. 805 * During regular operations the CRC is stripped, but on some 806 * hardware reception of frames not multiple of 64 is slower, 807 * so using crcstrip=0 helps in benchmarks. 808 * 809 * iflib_rx_miss, iflib_rx_miss_bufs: 810 * count packets that might be missed due to lost interrupts. 811 */ 812 SYSCTL_DECL(_dev_netmap); 813 /* 814 * The xl driver by default strips CRCs and we do not override it. 815 */ 816 817 int iflib_crcstrip = 1; 818 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, 819 CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on RX frames"); 820 821 int iflib_rx_miss, iflib_rx_miss_bufs; 822 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, 823 CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed RX intr"); 824 SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, 825 CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed RX intr bufs"); 826 827 /* 828 * Register/unregister. We are already under netmap lock. 829 * Only called on the first register or the last unregister. 830 */ 831 static int 832 iflib_netmap_register(struct netmap_adapter *na, int onoff) 833 { 834 if_t ifp = na->ifp; 835 if_ctx_t ctx = ifp->if_softc; 836 int status; 837 838 CTX_LOCK(ctx); 839 if (!CTX_IS_VF(ctx)) 840 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); 841 842 iflib_stop(ctx); 843 844 /* 845 * Enable (or disable) netmap flags, and intercept (or restore) 846 * ifp->if_transmit. This is done once the device has been stopped 847 * to prevent race conditions. Also, this must be done after 848 * calling netmap_disable_all_rings() and before calling 849 * netmap_enable_all_rings(), so that these two functions see the 850 * updated state of the NAF_NETMAP_ON bit. 851 */ 852 if (onoff) { 853 nm_set_native_flags(na); 854 } else { 855 nm_clear_native_flags(na); 856 } 857 858 iflib_init_locked(ctx); 859 IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? 860 status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; 861 if (status) 862 nm_clear_native_flags(na); 863 CTX_UNLOCK(ctx); 864 return (status); 865 } 866 867 static int 868 iflib_netmap_config(struct netmap_adapter *na, struct nm_config_info *info) 869 { 870 if_t ifp = na->ifp; 871 if_ctx_t ctx = ifp->if_softc; 872 iflib_rxq_t rxq = &ctx->ifc_rxqs[0]; 873 iflib_fl_t fl = &rxq->ifr_fl[0]; 874 875 info->num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; 876 info->num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; 877 info->num_tx_descs = iflib_num_tx_descs(ctx); 878 info->num_rx_descs = iflib_num_rx_descs(ctx); 879 info->rx_buf_maxsize = fl->ifl_buf_size; 880 nm_prinf("txr %u rxr %u txd %u rxd %u rbufsz %u", 881 info->num_tx_rings, info->num_rx_rings, info->num_tx_descs, 882 info->num_rx_descs, info->rx_buf_maxsize); 883 884 return 0; 885 } 886 887 static int 888 netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, bool init) 889 { 890 struct netmap_adapter *na = kring->na; 891 u_int const lim = kring->nkr_num_slots - 1; 892 struct netmap_ring *ring = kring->ring; 893 bus_dmamap_t *map; 894 struct if_rxd_update iru; 895 if_ctx_t ctx = rxq->ifr_ctx; 896 iflib_fl_t fl = &rxq->ifr_fl[0]; 897 u_int nic_i_first, nic_i; 898 u_int nm_i; 899 int i, n; 900 #if IFLIB_DEBUG_COUNTERS 901 int rf_count = 0; 902 #endif 903 904 /* 905 * This function is used both at initialization and in rxsync. 906 * At initialization we need to prepare (with isc_rxd_refill()) 907 * all the netmap buffers currently owned by the kernel, in 908 * such a way to keep fl->ifl_pidx and kring->nr_hwcur in sync 909 * (except for kring->nkr_hwofs). These may be less than 910 * kring->nkr_num_slots if netmap_reset() was called while 911 * an application using the kring that still owned some 912 * buffers. 913 * At rxsync time, both indexes point to the next buffer to be 914 * refilled. 915 * In any case we publish (with isc_rxd_flush()) up to 916 * (fl->ifl_pidx - 1) % N (included), to avoid the NIC tail/prod 917 * pointer to overrun the head/cons pointer, although this is 918 * not necessary for some NICs (e.g. vmx). 919 */ 920 if (__predict_false(init)) { 921 n = kring->nkr_num_slots - nm_kr_rxspace(kring); 922 } else { 923 n = kring->rhead - kring->nr_hwcur; 924 if (n == 0) 925 return (0); /* Nothing to do. */ 926 if (n < 0) 927 n += kring->nkr_num_slots; 928 } 929 930 iru_init(&iru, rxq, 0 /* flid */); 931 map = fl->ifl_sds.ifsd_map; 932 nic_i = fl->ifl_pidx; 933 nm_i = netmap_idx_n2k(kring, nic_i); 934 if (__predict_false(init)) { 935 /* 936 * On init/reset, nic_i must be 0, and we must 937 * start to refill from hwtail (see netmap_reset()). 938 */ 939 MPASS(nic_i == 0); 940 MPASS(nm_i == kring->nr_hwtail); 941 } else 942 MPASS(nm_i == kring->nr_hwcur); 943 DBG_COUNTER_INC(fl_refills); 944 while (n > 0) { 945 #if IFLIB_DEBUG_COUNTERS 946 if (++rf_count == 9) 947 DBG_COUNTER_INC(fl_refills_large); 948 #endif 949 nic_i_first = nic_i; 950 for (i = 0; n > 0 && i < IFLIB_MAX_RX_REFRESH; n--, i++) { 951 struct netmap_slot *slot = &ring->slot[nm_i]; 952 void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[i]); 953 954 MPASS(i < IFLIB_MAX_RX_REFRESH); 955 956 if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ 957 return netmap_ring_reinit(kring); 958 959 fl->ifl_rxd_idxs[i] = nic_i; 960 961 if (__predict_false(init)) { 962 netmap_load_map(na, fl->ifl_buf_tag, 963 map[nic_i], addr); 964 } else if (slot->flags & NS_BUF_CHANGED) { 965 /* buffer has changed, reload map */ 966 netmap_reload_map(na, fl->ifl_buf_tag, 967 map[nic_i], addr); 968 } 969 bus_dmamap_sync(fl->ifl_buf_tag, map[nic_i], 970 BUS_DMASYNC_PREREAD); 971 slot->flags &= ~NS_BUF_CHANGED; 972 973 nm_i = nm_next(nm_i, lim); 974 nic_i = nm_next(nic_i, lim); 975 } 976 977 iru.iru_pidx = nic_i_first; 978 iru.iru_count = i; 979 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 980 } 981 fl->ifl_pidx = nic_i; 982 /* 983 * At the end of the loop we must have refilled everything 984 * we could possibly refill. 985 */ 986 MPASS(nm_i == kring->rhead); 987 kring->nr_hwcur = nm_i; 988 989 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 990 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 991 ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, 992 nm_prev(nic_i, lim)); 993 DBG_COUNTER_INC(rxd_flush); 994 995 return (0); 996 } 997 998 #define NETMAP_TX_TIMER_US 90 999 1000 /* 1001 * Reconcile kernel and user view of the transmit ring. 1002 * 1003 * All information is in the kring. 1004 * Userspace wants to send packets up to the one before kring->rhead, 1005 * kernel knows kring->nr_hwcur is the first unsent packet. 1006 * 1007 * Here we push packets out (as many as possible), and possibly 1008 * reclaim buffers from previously completed transmission. 1009 * 1010 * The caller (netmap) guarantees that there is only one instance 1011 * running at any time. Any interference with other driver 1012 * methods should be handled by the individual drivers. 1013 */ 1014 static int 1015 iflib_netmap_txsync(struct netmap_kring *kring, int flags) 1016 { 1017 struct netmap_adapter *na = kring->na; 1018 if_t ifp = na->ifp; 1019 struct netmap_ring *ring = kring->ring; 1020 u_int nm_i; /* index into the netmap kring */ 1021 u_int nic_i; /* index into the NIC ring */ 1022 u_int n; 1023 u_int const lim = kring->nkr_num_slots - 1; 1024 u_int const head = kring->rhead; 1025 struct if_pkt_info pi; 1026 int tx_pkts = 0, tx_bytes = 0; 1027 1028 /* 1029 * interrupts on every tx packet are expensive so request 1030 * them every half ring, or where NS_REPORT is set 1031 */ 1032 u_int report_frequency = kring->nkr_num_slots >> 1; 1033 /* device-specific */ 1034 if_ctx_t ctx = ifp->if_softc; 1035 iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; 1036 1037 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1038 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1039 1040 /* 1041 * First part: process new packets to send. 1042 * nm_i is the current index in the netmap kring, 1043 * nic_i is the corresponding index in the NIC ring. 1044 * 1045 * If we have packets to send (nm_i != head) 1046 * iterate over the netmap ring, fetch length and update 1047 * the corresponding slot in the NIC ring. Some drivers also 1048 * need to update the buffer's physical address in the NIC slot 1049 * even NS_BUF_CHANGED is not set (PNMB computes the addresses). 1050 * 1051 * The netmap_reload_map() calls is especially expensive, 1052 * even when (as in this case) the tag is 0, so do only 1053 * when the buffer has actually changed. 1054 * 1055 * If possible do not set the report/intr bit on all slots, 1056 * but only a few times per ring or when NS_REPORT is set. 1057 * 1058 * Finally, on 10G and faster drivers, it might be useful 1059 * to prefetch the next slot and txr entry. 1060 */ 1061 1062 nm_i = kring->nr_hwcur; 1063 if (nm_i != head) { /* we have new packets to send */ 1064 uint32_t pkt_len = 0, seg_idx = 0; 1065 int nic_i_start = -1, flags = 0; 1066 pkt_info_zero(&pi); 1067 pi.ipi_segs = txq->ift_segs; 1068 pi.ipi_qsidx = kring->ring_id; 1069 nic_i = netmap_idx_k2n(kring, nm_i); 1070 1071 __builtin_prefetch(&ring->slot[nm_i]); 1072 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); 1073 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); 1074 1075 for (n = 0; nm_i != head; n++) { 1076 struct netmap_slot *slot = &ring->slot[nm_i]; 1077 u_int len = slot->len; 1078 uint64_t paddr; 1079 void *addr = PNMB(na, slot, &paddr); 1080 1081 flags |= (slot->flags & NS_REPORT || 1082 nic_i == 0 || nic_i == report_frequency) ? 1083 IPI_TX_INTR : 0; 1084 1085 /* 1086 * If this is the first packet fragment, save the 1087 * index of the first NIC slot for later. 1088 */ 1089 if (nic_i_start < 0) 1090 nic_i_start = nic_i; 1091 1092 pi.ipi_segs[seg_idx].ds_addr = paddr; 1093 pi.ipi_segs[seg_idx].ds_len = len; 1094 if (len) { 1095 pkt_len += len; 1096 seg_idx++; 1097 } 1098 1099 if (!(slot->flags & NS_MOREFRAG)) { 1100 pi.ipi_len = pkt_len; 1101 pi.ipi_nsegs = seg_idx; 1102 pi.ipi_pidx = nic_i_start; 1103 pi.ipi_ndescs = 0; 1104 pi.ipi_flags = flags; 1105 1106 /* Prepare the NIC TX ring. */ 1107 ctx->isc_txd_encap(ctx->ifc_softc, &pi); 1108 DBG_COUNTER_INC(tx_encap); 1109 1110 /* Update transmit counters */ 1111 tx_bytes += pi.ipi_len; 1112 tx_pkts++; 1113 1114 /* Reinit per-packet info for the next one. */ 1115 flags = seg_idx = pkt_len = 0; 1116 nic_i_start = -1; 1117 } 1118 1119 /* prefetch for next round */ 1120 __builtin_prefetch(&ring->slot[nm_i + 1]); 1121 __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); 1122 __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); 1123 1124 NM_CHECK_ADDR_LEN(na, addr, len); 1125 1126 if (slot->flags & NS_BUF_CHANGED) { 1127 /* buffer has changed, reload map */ 1128 netmap_reload_map(na, txq->ift_buf_tag, 1129 txq->ift_sds.ifsd_map[nic_i], addr); 1130 } 1131 /* make sure changes to the buffer are synced */ 1132 bus_dmamap_sync(txq->ift_buf_tag, 1133 txq->ift_sds.ifsd_map[nic_i], 1134 BUS_DMASYNC_PREWRITE); 1135 1136 slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED | NS_MOREFRAG); 1137 nm_i = nm_next(nm_i, lim); 1138 nic_i = nm_next(nic_i, lim); 1139 } 1140 kring->nr_hwcur = nm_i; 1141 1142 /* synchronize the NIC ring */ 1143 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1144 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1145 1146 /* (re)start the tx unit up to slot nic_i (excluded) */ 1147 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); 1148 } 1149 1150 /* 1151 * Second part: reclaim buffers for completed transmissions. 1152 * 1153 * If there are unclaimed buffers, attempt to reclaim them. 1154 * If we don't manage to reclaim them all, and TX IRQs are not in use, 1155 * trigger a per-tx-queue timer to try again later. 1156 */ 1157 if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { 1158 if (iflib_tx_credits_update(ctx, txq)) { 1159 /* some tx completed, increment avail */ 1160 nic_i = txq->ift_cidx_processed; 1161 kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); 1162 } 1163 } 1164 1165 if (!(ctx->ifc_flags & IFC_NETMAP_TX_IRQ)) 1166 if (kring->nr_hwtail != nm_prev(kring->nr_hwcur, lim)) { 1167 callout_reset_sbt_on(&txq->ift_netmap_timer, 1168 NETMAP_TX_TIMER_US * SBT_1US, SBT_1US, 1169 iflib_netmap_timer, txq, 1170 txq->ift_netmap_timer.c_cpu, 0); 1171 } 1172 1173 if_inc_counter(ifp, IFCOUNTER_OBYTES, tx_bytes); 1174 if_inc_counter(ifp, IFCOUNTER_OPACKETS, tx_pkts); 1175 1176 return (0); 1177 } 1178 1179 /* 1180 * Reconcile kernel and user view of the receive ring. 1181 * Same as for the txsync, this routine must be efficient. 1182 * The caller guarantees a single invocations, but races against 1183 * the rest of the driver should be handled here. 1184 * 1185 * On call, kring->rhead is the first packet that userspace wants 1186 * to keep, and kring->rcur is the wakeup point. 1187 * The kernel has previously reported packets up to kring->rtail. 1188 * 1189 * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective 1190 * of whether or not we received an interrupt. 1191 */ 1192 static int 1193 iflib_netmap_rxsync(struct netmap_kring *kring, int flags) 1194 { 1195 struct netmap_adapter *na = kring->na; 1196 struct netmap_ring *ring = kring->ring; 1197 if_t ifp = na->ifp; 1198 uint32_t nm_i; /* index into the netmap ring */ 1199 uint32_t nic_i; /* index into the NIC ring */ 1200 u_int n; 1201 u_int const lim = kring->nkr_num_slots - 1; 1202 int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; 1203 int i = 0, rx_bytes = 0, rx_pkts = 0; 1204 1205 if_ctx_t ctx = ifp->if_softc; 1206 if_shared_ctx_t sctx = ctx->ifc_sctx; 1207 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1208 iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; 1209 iflib_fl_t fl = &rxq->ifr_fl[0]; 1210 struct if_rxd_info ri; 1211 qidx_t *cidxp; 1212 1213 /* 1214 * netmap only uses free list 0, to avoid out of order consumption 1215 * of receive buffers 1216 */ 1217 1218 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 1219 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1220 1221 /* 1222 * First part: import newly received packets. 1223 * 1224 * nm_i is the index of the next free slot in the netmap ring, 1225 * nic_i is the index of the next received packet in the NIC ring 1226 * (or in the free list 0 if IFLIB_HAS_RXCQ is set), and they may 1227 * differ in case if_init() has been called while 1228 * in netmap mode. For the receive ring we have 1229 * 1230 * nic_i = fl->ifl_cidx; 1231 * nm_i = kring->nr_hwtail (previous) 1232 * and 1233 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1234 * 1235 * fl->ifl_cidx is set to 0 on a ring reinit 1236 */ 1237 if (netmap_no_pendintr || force_update) { 1238 uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); 1239 bool have_rxcq = sctx->isc_flags & IFLIB_HAS_RXCQ; 1240 int crclen = iflib_crcstrip ? 0 : 4; 1241 int error, avail; 1242 1243 /* 1244 * For the free list consumer index, we use the same 1245 * logic as in iflib_rxeof(). 1246 */ 1247 if (have_rxcq) 1248 cidxp = &rxq->ifr_cq_cidx; 1249 else 1250 cidxp = &fl->ifl_cidx; 1251 avail = ctx->isc_rxd_available(ctx->ifc_softc, 1252 rxq->ifr_id, *cidxp, USHRT_MAX); 1253 1254 nic_i = fl->ifl_cidx; 1255 nm_i = netmap_idx_n2k(kring, nic_i); 1256 MPASS(nm_i == kring->nr_hwtail); 1257 for (n = 0; avail > 0 && nm_i != hwtail_lim; n++, avail--) { 1258 rxd_info_zero(&ri); 1259 ri.iri_frags = rxq->ifr_frags; 1260 ri.iri_qsidx = kring->ring_id; 1261 ri.iri_ifp = ctx->ifc_ifp; 1262 ri.iri_cidx = *cidxp; 1263 1264 error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 1265 for (i = 0; i < ri.iri_nfrags; i++) { 1266 if (error) { 1267 ring->slot[nm_i].len = 0; 1268 ring->slot[nm_i].flags = 0; 1269 } else { 1270 ring->slot[nm_i].len = ri.iri_frags[i].irf_len; 1271 if (i == (ri.iri_nfrags - 1)) { 1272 ring->slot[nm_i].len -= crclen; 1273 ring->slot[nm_i].flags = 0; 1274 1275 /* Update receive counters */ 1276 rx_bytes += ri.iri_len; 1277 rx_pkts++; 1278 } else 1279 ring->slot[nm_i].flags = NS_MOREFRAG; 1280 } 1281 1282 bus_dmamap_sync(fl->ifl_buf_tag, 1283 fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); 1284 nm_i = nm_next(nm_i, lim); 1285 fl->ifl_cidx = nic_i = nm_next(nic_i, lim); 1286 } 1287 1288 if (have_rxcq) { 1289 *cidxp = ri.iri_cidx; 1290 while (*cidxp >= scctx->isc_nrxd[0]) 1291 *cidxp -= scctx->isc_nrxd[0]; 1292 } 1293 1294 } 1295 if (n) { /* update the state variables */ 1296 if (netmap_no_pendintr && !force_update) { 1297 /* diagnostics */ 1298 iflib_rx_miss ++; 1299 iflib_rx_miss_bufs += n; 1300 } 1301 kring->nr_hwtail = nm_i; 1302 } 1303 kring->nr_kflags &= ~NKR_PENDINTR; 1304 } 1305 /* 1306 * Second part: skip past packets that userspace has released. 1307 * (kring->nr_hwcur to head excluded), 1308 * and make the buffers available for reception. 1309 * As usual nm_i is the index in the netmap ring, 1310 * nic_i is the index in the NIC ring, and 1311 * nm_i == (nic_i + kring->nkr_hwofs) % ring_size 1312 */ 1313 netmap_fl_refill(rxq, kring, false); 1314 1315 if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); 1316 if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); 1317 1318 return (0); 1319 } 1320 1321 static void 1322 iflib_netmap_intr(struct netmap_adapter *na, int onoff) 1323 { 1324 if_ctx_t ctx = na->ifp->if_softc; 1325 1326 CTX_LOCK(ctx); 1327 if (onoff) { 1328 IFDI_INTR_ENABLE(ctx); 1329 } else { 1330 IFDI_INTR_DISABLE(ctx); 1331 } 1332 CTX_UNLOCK(ctx); 1333 } 1334 1335 static int 1336 iflib_netmap_attach(if_ctx_t ctx) 1337 { 1338 struct netmap_adapter na; 1339 1340 bzero(&na, sizeof(na)); 1341 1342 na.ifp = ctx->ifc_ifp; 1343 na.na_flags = NAF_BDG_MAYSLEEP | NAF_MOREFRAG; 1344 MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); 1345 MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); 1346 1347 na.num_tx_desc = iflib_num_tx_descs(ctx); 1348 na.num_rx_desc = iflib_num_rx_descs(ctx); 1349 na.nm_txsync = iflib_netmap_txsync; 1350 na.nm_rxsync = iflib_netmap_rxsync; 1351 na.nm_register = iflib_netmap_register; 1352 na.nm_intr = iflib_netmap_intr; 1353 na.nm_config = iflib_netmap_config; 1354 na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; 1355 na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; 1356 return (netmap_attach(&na)); 1357 } 1358 1359 static int 1360 iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) 1361 { 1362 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1363 struct netmap_slot *slot; 1364 1365 slot = netmap_reset(na, NR_TX, txq->ift_id, 0); 1366 if (slot == NULL) 1367 return (0); 1368 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { 1369 /* 1370 * In netmap mode, set the map for the packet buffer. 1371 * NOTE: Some drivers (not this one) also need to set 1372 * the physical buffer address in the NIC ring. 1373 * netmap_idx_n2k() maps a nic index, i, into the corresponding 1374 * netmap slot index, si 1375 */ 1376 int si = netmap_idx_n2k(na->tx_rings[txq->ift_id], i); 1377 netmap_load_map(na, txq->ift_buf_tag, txq->ift_sds.ifsd_map[i], 1378 NMB(na, slot + si)); 1379 } 1380 return (1); 1381 } 1382 1383 static int 1384 iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) 1385 { 1386 struct netmap_adapter *na = NA(ctx->ifc_ifp); 1387 struct netmap_kring *kring; 1388 struct netmap_slot *slot; 1389 1390 slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); 1391 if (slot == NULL) 1392 return (0); 1393 kring = na->rx_rings[rxq->ifr_id]; 1394 netmap_fl_refill(rxq, kring, true); 1395 return (1); 1396 } 1397 1398 static void 1399 iflib_netmap_timer(void *arg) 1400 { 1401 iflib_txq_t txq = arg; 1402 if_ctx_t ctx = txq->ift_ctx; 1403 1404 /* 1405 * Wake up the netmap application, to give it a chance to 1406 * call txsync and reclaim more completed TX buffers. 1407 */ 1408 netmap_tx_irq(ctx->ifc_ifp, txq->ift_id); 1409 } 1410 1411 #define iflib_netmap_detach(ifp) netmap_detach(ifp) 1412 1413 #else 1414 #define iflib_netmap_txq_init(ctx, txq) (0) 1415 #define iflib_netmap_rxq_init(ctx, rxq) (0) 1416 #define iflib_netmap_detach(ifp) 1417 #define netmap_enable_all_rings(ifp) 1418 #define netmap_disable_all_rings(ifp) 1419 1420 #define iflib_netmap_attach(ctx) (0) 1421 #define netmap_rx_irq(ifp, qid, budget) (0) 1422 #endif 1423 1424 #if defined(__i386__) || defined(__amd64__) 1425 static __inline void 1426 prefetch(void *x) 1427 { 1428 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1429 } 1430 static __inline void 1431 prefetch2cachelines(void *x) 1432 { 1433 __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); 1434 #if (CACHE_LINE_SIZE < 128) 1435 __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); 1436 #endif 1437 } 1438 #else 1439 #define prefetch(x) 1440 #define prefetch2cachelines(x) 1441 #endif 1442 1443 static void 1444 iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) 1445 { 1446 iflib_fl_t fl; 1447 1448 fl = &rxq->ifr_fl[flid]; 1449 iru->iru_paddrs = fl->ifl_bus_addrs; 1450 iru->iru_idxs = fl->ifl_rxd_idxs; 1451 iru->iru_qsidx = rxq->ifr_id; 1452 iru->iru_buf_size = fl->ifl_buf_size; 1453 iru->iru_flidx = fl->ifl_id; 1454 } 1455 1456 static void 1457 _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) 1458 { 1459 if (err) 1460 return; 1461 *(bus_addr_t *) arg = segs[0].ds_addr; 1462 } 1463 1464 int 1465 iflib_dma_alloc_align(if_ctx_t ctx, int size, int align, iflib_dma_info_t dma, int mapflags) 1466 { 1467 int err; 1468 device_t dev = ctx->ifc_dev; 1469 1470 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1471 align, 0, /* alignment, bounds */ 1472 BUS_SPACE_MAXADDR, /* lowaddr */ 1473 BUS_SPACE_MAXADDR, /* highaddr */ 1474 NULL, NULL, /* filter, filterarg */ 1475 size, /* maxsize */ 1476 1, /* nsegments */ 1477 size, /* maxsegsize */ 1478 BUS_DMA_ALLOCNOW, /* flags */ 1479 NULL, /* lockfunc */ 1480 NULL, /* lockarg */ 1481 &dma->idi_tag); 1482 if (err) { 1483 device_printf(dev, 1484 "%s: bus_dma_tag_create failed: %d\n", 1485 __func__, err); 1486 goto fail_0; 1487 } 1488 1489 err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, 1490 BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); 1491 if (err) { 1492 device_printf(dev, 1493 "%s: bus_dmamem_alloc(%ju) failed: %d\n", 1494 __func__, (uintmax_t)size, err); 1495 goto fail_1; 1496 } 1497 1498 dma->idi_paddr = IF_BAD_DMA; 1499 err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, 1500 size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); 1501 if (err || dma->idi_paddr == IF_BAD_DMA) { 1502 device_printf(dev, 1503 "%s: bus_dmamap_load failed: %d\n", 1504 __func__, err); 1505 goto fail_2; 1506 } 1507 1508 dma->idi_size = size; 1509 return (0); 1510 1511 fail_2: 1512 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1513 fail_1: 1514 bus_dma_tag_destroy(dma->idi_tag); 1515 fail_0: 1516 dma->idi_tag = NULL; 1517 1518 return (err); 1519 } 1520 1521 int 1522 iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) 1523 { 1524 if_shared_ctx_t sctx = ctx->ifc_sctx; 1525 1526 KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); 1527 1528 return (iflib_dma_alloc_align(ctx, size, sctx->isc_q_align, dma, mapflags)); 1529 } 1530 1531 int 1532 iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) 1533 { 1534 int i, err; 1535 iflib_dma_info_t *dmaiter; 1536 1537 dmaiter = dmalist; 1538 for (i = 0; i < count; i++, dmaiter++) { 1539 if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) 1540 break; 1541 } 1542 if (err) 1543 iflib_dma_free_multi(dmalist, i); 1544 return (err); 1545 } 1546 1547 void 1548 iflib_dma_free(iflib_dma_info_t dma) 1549 { 1550 if (dma->idi_tag == NULL) 1551 return; 1552 if (dma->idi_paddr != IF_BAD_DMA) { 1553 bus_dmamap_sync(dma->idi_tag, dma->idi_map, 1554 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 1555 bus_dmamap_unload(dma->idi_tag, dma->idi_map); 1556 dma->idi_paddr = IF_BAD_DMA; 1557 } 1558 if (dma->idi_vaddr != NULL) { 1559 bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); 1560 dma->idi_vaddr = NULL; 1561 } 1562 bus_dma_tag_destroy(dma->idi_tag); 1563 dma->idi_tag = NULL; 1564 } 1565 1566 void 1567 iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) 1568 { 1569 int i; 1570 iflib_dma_info_t *dmaiter = dmalist; 1571 1572 for (i = 0; i < count; i++, dmaiter++) 1573 iflib_dma_free(*dmaiter); 1574 } 1575 1576 static int 1577 iflib_fast_intr(void *arg) 1578 { 1579 iflib_filter_info_t info = arg; 1580 struct grouptask *gtask = info->ifi_task; 1581 int result; 1582 1583 DBG_COUNTER_INC(fast_intrs); 1584 if (info->ifi_filter != NULL) { 1585 result = info->ifi_filter(info->ifi_filter_arg); 1586 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1587 return (result); 1588 } 1589 1590 GROUPTASK_ENQUEUE(gtask); 1591 return (FILTER_HANDLED); 1592 } 1593 1594 static int 1595 iflib_fast_intr_rxtx(void *arg) 1596 { 1597 iflib_filter_info_t info = arg; 1598 struct grouptask *gtask = info->ifi_task; 1599 if_ctx_t ctx; 1600 iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; 1601 iflib_txq_t txq; 1602 void *sc; 1603 int i, cidx, result; 1604 qidx_t txqid; 1605 bool intr_enable, intr_legacy; 1606 1607 DBG_COUNTER_INC(fast_intrs); 1608 if (info->ifi_filter != NULL) { 1609 result = info->ifi_filter(info->ifi_filter_arg); 1610 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1611 return (result); 1612 } 1613 1614 ctx = rxq->ifr_ctx; 1615 sc = ctx->ifc_softc; 1616 intr_enable = false; 1617 intr_legacy = !!(ctx->ifc_flags & IFC_LEGACY); 1618 MPASS(rxq->ifr_ntxqirq); 1619 for (i = 0; i < rxq->ifr_ntxqirq; i++) { 1620 txqid = rxq->ifr_txqid[i]; 1621 txq = &ctx->ifc_txqs[txqid]; 1622 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 1623 BUS_DMASYNC_POSTREAD); 1624 if (!ctx->isc_txd_credits_update(sc, txqid, false)) { 1625 if (intr_legacy) 1626 intr_enable = true; 1627 else 1628 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); 1629 continue; 1630 } 1631 GROUPTASK_ENQUEUE(&txq->ift_task); 1632 } 1633 if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) 1634 cidx = rxq->ifr_cq_cidx; 1635 else 1636 cidx = rxq->ifr_fl[0].ifl_cidx; 1637 if (iflib_rxd_avail(ctx, rxq, cidx, 1)) 1638 GROUPTASK_ENQUEUE(gtask); 1639 else { 1640 if (intr_legacy) 1641 intr_enable = true; 1642 else 1643 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 1644 DBG_COUNTER_INC(rx_intr_enables); 1645 } 1646 if (intr_enable) 1647 IFDI_INTR_ENABLE(ctx); 1648 return (FILTER_HANDLED); 1649 } 1650 1651 static int 1652 iflib_fast_intr_ctx(void *arg) 1653 { 1654 iflib_filter_info_t info = arg; 1655 struct grouptask *gtask = info->ifi_task; 1656 int result; 1657 1658 DBG_COUNTER_INC(fast_intrs); 1659 if (info->ifi_filter != NULL) { 1660 result = info->ifi_filter(info->ifi_filter_arg); 1661 if ((result & FILTER_SCHEDULE_THREAD) == 0) 1662 return (result); 1663 } 1664 1665 GROUPTASK_ENQUEUE(gtask); 1666 return (FILTER_HANDLED); 1667 } 1668 1669 static int 1670 _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 1671 driver_filter_t filter, driver_intr_t handler, void *arg, 1672 const char *name) 1673 { 1674 struct resource *res; 1675 void *tag = NULL; 1676 device_t dev = ctx->ifc_dev; 1677 int flags, i, rc; 1678 1679 flags = RF_ACTIVE; 1680 if (ctx->ifc_flags & IFC_LEGACY) 1681 flags |= RF_SHAREABLE; 1682 MPASS(rid < 512); 1683 i = rid; 1684 res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, flags); 1685 if (res == NULL) { 1686 device_printf(dev, 1687 "failed to allocate IRQ for rid %d, name %s.\n", rid, name); 1688 return (ENOMEM); 1689 } 1690 irq->ii_res = res; 1691 KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); 1692 rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, 1693 filter, handler, arg, &tag); 1694 if (rc != 0) { 1695 device_printf(dev, 1696 "failed to setup interrupt for rid %d, name %s: %d\n", 1697 rid, name ? name : "unknown", rc); 1698 return (rc); 1699 } else if (name) 1700 bus_describe_intr(dev, res, tag, "%s", name); 1701 1702 irq->ii_tag = tag; 1703 return (0); 1704 } 1705 1706 /********************************************************************* 1707 * 1708 * Allocate DMA resources for TX buffers as well as memory for the TX 1709 * mbuf map. TX DMA maps (non-TSO/TSO) and TX mbuf map are kept in a 1710 * iflib_sw_tx_desc_array structure, storing all the information that 1711 * is needed to transmit a packet on the wire. This is called only 1712 * once at attach, setup is done every reset. 1713 * 1714 **********************************************************************/ 1715 static int 1716 iflib_txsd_alloc(iflib_txq_t txq) 1717 { 1718 if_ctx_t ctx = txq->ift_ctx; 1719 if_shared_ctx_t sctx = ctx->ifc_sctx; 1720 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1721 device_t dev = ctx->ifc_dev; 1722 bus_size_t tsomaxsize; 1723 int err, nsegments, ntsosegments; 1724 bool tso; 1725 1726 nsegments = scctx->isc_tx_nsegments; 1727 ntsosegments = scctx->isc_tx_tso_segments_max; 1728 tsomaxsize = scctx->isc_tx_tso_size_max; 1729 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_VLAN_MTU) 1730 tsomaxsize += sizeof(struct ether_vlan_header); 1731 MPASS(scctx->isc_ntxd[0] > 0); 1732 MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); 1733 MPASS(nsegments > 0); 1734 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) { 1735 MPASS(ntsosegments > 0); 1736 MPASS(sctx->isc_tso_maxsize >= tsomaxsize); 1737 } 1738 1739 /* 1740 * Set up DMA tags for TX buffers. 1741 */ 1742 if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1743 1, 0, /* alignment, bounds */ 1744 BUS_SPACE_MAXADDR, /* lowaddr */ 1745 BUS_SPACE_MAXADDR, /* highaddr */ 1746 NULL, NULL, /* filter, filterarg */ 1747 sctx->isc_tx_maxsize, /* maxsize */ 1748 nsegments, /* nsegments */ 1749 sctx->isc_tx_maxsegsize, /* maxsegsize */ 1750 0, /* flags */ 1751 NULL, /* lockfunc */ 1752 NULL, /* lockfuncarg */ 1753 &txq->ift_buf_tag))) { 1754 device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); 1755 device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", 1756 (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); 1757 goto fail; 1758 } 1759 tso = (if_getcapabilities(ctx->ifc_ifp) & IFCAP_TSO) != 0; 1760 if (tso && (err = bus_dma_tag_create(bus_get_dma_tag(dev), 1761 1, 0, /* alignment, bounds */ 1762 BUS_SPACE_MAXADDR, /* lowaddr */ 1763 BUS_SPACE_MAXADDR, /* highaddr */ 1764 NULL, NULL, /* filter, filterarg */ 1765 tsomaxsize, /* maxsize */ 1766 ntsosegments, /* nsegments */ 1767 sctx->isc_tso_maxsegsize,/* maxsegsize */ 1768 0, /* flags */ 1769 NULL, /* lockfunc */ 1770 NULL, /* lockfuncarg */ 1771 &txq->ift_tso_buf_tag))) { 1772 device_printf(dev, "Unable to allocate TSO TX DMA tag: %d\n", 1773 err); 1774 goto fail; 1775 } 1776 1777 /* Allocate memory for the TX mbuf map. */ 1778 if (!(txq->ift_sds.ifsd_m = 1779 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1780 scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1781 device_printf(dev, "Unable to allocate TX mbuf map memory\n"); 1782 err = ENOMEM; 1783 goto fail; 1784 } 1785 1786 /* 1787 * Create the DMA maps for TX buffers. 1788 */ 1789 if ((txq->ift_sds.ifsd_map = (bus_dmamap_t *)malloc( 1790 sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], 1791 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 1792 device_printf(dev, 1793 "Unable to allocate TX buffer DMA map memory\n"); 1794 err = ENOMEM; 1795 goto fail; 1796 } 1797 if (tso && (txq->ift_sds.ifsd_tso_map = (bus_dmamap_t *)malloc( 1798 sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], 1799 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 1800 device_printf(dev, 1801 "Unable to allocate TSO TX buffer map memory\n"); 1802 err = ENOMEM; 1803 goto fail; 1804 } 1805 for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { 1806 err = bus_dmamap_create(txq->ift_buf_tag, 0, 1807 &txq->ift_sds.ifsd_map[i]); 1808 if (err != 0) { 1809 device_printf(dev, "Unable to create TX DMA map\n"); 1810 goto fail; 1811 } 1812 if (!tso) 1813 continue; 1814 err = bus_dmamap_create(txq->ift_tso_buf_tag, 0, 1815 &txq->ift_sds.ifsd_tso_map[i]); 1816 if (err != 0) { 1817 device_printf(dev, "Unable to create TSO TX DMA map\n"); 1818 goto fail; 1819 } 1820 } 1821 return (0); 1822 fail: 1823 /* We free all, it handles case where we are in the middle */ 1824 iflib_tx_structures_free(ctx); 1825 return (err); 1826 } 1827 1828 static void 1829 iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) 1830 { 1831 bus_dmamap_t map; 1832 1833 if (txq->ift_sds.ifsd_map != NULL) { 1834 map = txq->ift_sds.ifsd_map[i]; 1835 bus_dmamap_sync(txq->ift_buf_tag, map, BUS_DMASYNC_POSTWRITE); 1836 bus_dmamap_unload(txq->ift_buf_tag, map); 1837 bus_dmamap_destroy(txq->ift_buf_tag, map); 1838 txq->ift_sds.ifsd_map[i] = NULL; 1839 } 1840 1841 if (txq->ift_sds.ifsd_tso_map != NULL) { 1842 map = txq->ift_sds.ifsd_tso_map[i]; 1843 bus_dmamap_sync(txq->ift_tso_buf_tag, map, 1844 BUS_DMASYNC_POSTWRITE); 1845 bus_dmamap_unload(txq->ift_tso_buf_tag, map); 1846 bus_dmamap_destroy(txq->ift_tso_buf_tag, map); 1847 txq->ift_sds.ifsd_tso_map[i] = NULL; 1848 } 1849 } 1850 1851 static void 1852 iflib_txq_destroy(iflib_txq_t txq) 1853 { 1854 if_ctx_t ctx = txq->ift_ctx; 1855 1856 for (int i = 0; i < txq->ift_size; i++) 1857 iflib_txsd_destroy(ctx, txq, i); 1858 1859 if (txq->ift_br != NULL) { 1860 ifmp_ring_free(txq->ift_br); 1861 txq->ift_br = NULL; 1862 } 1863 1864 mtx_destroy(&txq->ift_mtx); 1865 1866 if (txq->ift_sds.ifsd_map != NULL) { 1867 free(txq->ift_sds.ifsd_map, M_IFLIB); 1868 txq->ift_sds.ifsd_map = NULL; 1869 } 1870 if (txq->ift_sds.ifsd_tso_map != NULL) { 1871 free(txq->ift_sds.ifsd_tso_map, M_IFLIB); 1872 txq->ift_sds.ifsd_tso_map = NULL; 1873 } 1874 if (txq->ift_sds.ifsd_m != NULL) { 1875 free(txq->ift_sds.ifsd_m, M_IFLIB); 1876 txq->ift_sds.ifsd_m = NULL; 1877 } 1878 if (txq->ift_buf_tag != NULL) { 1879 bus_dma_tag_destroy(txq->ift_buf_tag); 1880 txq->ift_buf_tag = NULL; 1881 } 1882 if (txq->ift_tso_buf_tag != NULL) { 1883 bus_dma_tag_destroy(txq->ift_tso_buf_tag); 1884 txq->ift_tso_buf_tag = NULL; 1885 } 1886 if (txq->ift_ifdi != NULL) { 1887 free(txq->ift_ifdi, M_IFLIB); 1888 } 1889 } 1890 1891 static void 1892 iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) 1893 { 1894 struct mbuf **mp; 1895 1896 mp = &txq->ift_sds.ifsd_m[i]; 1897 if (*mp == NULL) 1898 return; 1899 1900 if (txq->ift_sds.ifsd_map != NULL) { 1901 bus_dmamap_sync(txq->ift_buf_tag, 1902 txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); 1903 bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[i]); 1904 } 1905 if (txq->ift_sds.ifsd_tso_map != NULL) { 1906 bus_dmamap_sync(txq->ift_tso_buf_tag, 1907 txq->ift_sds.ifsd_tso_map[i], BUS_DMASYNC_POSTWRITE); 1908 bus_dmamap_unload(txq->ift_tso_buf_tag, 1909 txq->ift_sds.ifsd_tso_map[i]); 1910 } 1911 m_freem(*mp); 1912 DBG_COUNTER_INC(tx_frees); 1913 *mp = NULL; 1914 } 1915 1916 static int 1917 iflib_txq_setup(iflib_txq_t txq) 1918 { 1919 if_ctx_t ctx = txq->ift_ctx; 1920 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1921 if_shared_ctx_t sctx = ctx->ifc_sctx; 1922 iflib_dma_info_t di; 1923 int i; 1924 1925 /* Set number of descriptors available */ 1926 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 1927 /* XXX make configurable */ 1928 txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; 1929 1930 /* Reset indices */ 1931 txq->ift_cidx_processed = 0; 1932 txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; 1933 txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; 1934 1935 for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) 1936 bzero((void *)di->idi_vaddr, di->idi_size); 1937 1938 IFDI_TXQ_SETUP(ctx, txq->ift_id); 1939 for (i = 0, di = txq->ift_ifdi; i < sctx->isc_ntxqs; i++, di++) 1940 bus_dmamap_sync(di->idi_tag, di->idi_map, 1941 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 1942 return (0); 1943 } 1944 1945 /********************************************************************* 1946 * 1947 * Allocate DMA resources for RX buffers as well as memory for the RX 1948 * mbuf map, direct RX cluster pointer map and RX cluster bus address 1949 * map. RX DMA map, RX mbuf map, direct RX cluster pointer map and 1950 * RX cluster map are kept in a iflib_sw_rx_desc_array structure. 1951 * Since we use use one entry in iflib_sw_rx_desc_array per received 1952 * packet, the maximum number of entries we'll need is equal to the 1953 * number of hardware receive descriptors that we've allocated. 1954 * 1955 **********************************************************************/ 1956 static int 1957 iflib_rxsd_alloc(iflib_rxq_t rxq) 1958 { 1959 if_ctx_t ctx = rxq->ifr_ctx; 1960 if_shared_ctx_t sctx = ctx->ifc_sctx; 1961 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 1962 device_t dev = ctx->ifc_dev; 1963 iflib_fl_t fl; 1964 int err; 1965 1966 MPASS(scctx->isc_nrxd[0] > 0); 1967 MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); 1968 1969 fl = rxq->ifr_fl; 1970 for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { 1971 fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ 1972 /* Set up DMA tag for RX buffers. */ 1973 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1974 1, 0, /* alignment, bounds */ 1975 BUS_SPACE_MAXADDR, /* lowaddr */ 1976 BUS_SPACE_MAXADDR, /* highaddr */ 1977 NULL, NULL, /* filter, filterarg */ 1978 sctx->isc_rx_maxsize, /* maxsize */ 1979 sctx->isc_rx_nsegments, /* nsegments */ 1980 sctx->isc_rx_maxsegsize, /* maxsegsize */ 1981 0, /* flags */ 1982 NULL, /* lockfunc */ 1983 NULL, /* lockarg */ 1984 &fl->ifl_buf_tag); 1985 if (err) { 1986 device_printf(dev, 1987 "Unable to allocate RX DMA tag: %d\n", err); 1988 goto fail; 1989 } 1990 1991 /* Allocate memory for the RX mbuf map. */ 1992 if (!(fl->ifl_sds.ifsd_m = 1993 (struct mbuf **) malloc(sizeof(struct mbuf *) * 1994 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 1995 device_printf(dev, 1996 "Unable to allocate RX mbuf map memory\n"); 1997 err = ENOMEM; 1998 goto fail; 1999 } 2000 2001 /* Allocate memory for the direct RX cluster pointer map. */ 2002 if (!(fl->ifl_sds.ifsd_cl = 2003 (caddr_t *) malloc(sizeof(caddr_t) * 2004 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 2005 device_printf(dev, 2006 "Unable to allocate RX cluster map memory\n"); 2007 err = ENOMEM; 2008 goto fail; 2009 } 2010 2011 /* Allocate memory for the RX cluster bus address map. */ 2012 if (!(fl->ifl_sds.ifsd_ba = 2013 (bus_addr_t *) malloc(sizeof(bus_addr_t) * 2014 scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 2015 device_printf(dev, 2016 "Unable to allocate RX bus address map memory\n"); 2017 err = ENOMEM; 2018 goto fail; 2019 } 2020 2021 /* 2022 * Create the DMA maps for RX buffers. 2023 */ 2024 if (!(fl->ifl_sds.ifsd_map = 2025 (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { 2026 device_printf(dev, 2027 "Unable to allocate RX buffer DMA map memory\n"); 2028 err = ENOMEM; 2029 goto fail; 2030 } 2031 for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { 2032 err = bus_dmamap_create(fl->ifl_buf_tag, 0, 2033 &fl->ifl_sds.ifsd_map[i]); 2034 if (err != 0) { 2035 device_printf(dev, "Unable to create RX buffer DMA map\n"); 2036 goto fail; 2037 } 2038 } 2039 } 2040 return (0); 2041 2042 fail: 2043 iflib_rx_structures_free(ctx); 2044 return (err); 2045 } 2046 2047 /* 2048 * Internal service routines 2049 */ 2050 2051 struct rxq_refill_cb_arg { 2052 int error; 2053 bus_dma_segment_t seg; 2054 int nseg; 2055 }; 2056 2057 static void 2058 _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) 2059 { 2060 struct rxq_refill_cb_arg *cb_arg = arg; 2061 2062 cb_arg->error = error; 2063 cb_arg->seg = segs[0]; 2064 cb_arg->nseg = nseg; 2065 } 2066 2067 /** 2068 * iflib_fl_refill - refill an rxq free-buffer list 2069 * @ctx: the iflib context 2070 * @fl: the free list to refill 2071 * @count: the number of new buffers to allocate 2072 * 2073 * (Re)populate an rxq free-buffer list with up to @count new packet buffers. 2074 * The caller must assure that @count does not exceed the queue's capacity 2075 * minus one (since we always leave a descriptor unavailable). 2076 */ 2077 static uint8_t 2078 iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) 2079 { 2080 struct if_rxd_update iru; 2081 struct rxq_refill_cb_arg cb_arg; 2082 struct mbuf *m; 2083 caddr_t cl, *sd_cl; 2084 struct mbuf **sd_m; 2085 bus_dmamap_t *sd_map; 2086 bus_addr_t bus_addr, *sd_ba; 2087 int err, frag_idx, i, idx, n, pidx; 2088 qidx_t credits; 2089 2090 MPASS(count <= fl->ifl_size - fl->ifl_credits - 1); 2091 2092 sd_m = fl->ifl_sds.ifsd_m; 2093 sd_map = fl->ifl_sds.ifsd_map; 2094 sd_cl = fl->ifl_sds.ifsd_cl; 2095 sd_ba = fl->ifl_sds.ifsd_ba; 2096 pidx = fl->ifl_pidx; 2097 idx = pidx; 2098 frag_idx = fl->ifl_fragidx; 2099 credits = fl->ifl_credits; 2100 2101 i = 0; 2102 n = count; 2103 MPASS(n > 0); 2104 MPASS(credits + n <= fl->ifl_size); 2105 2106 if (pidx < fl->ifl_cidx) 2107 MPASS(pidx + n <= fl->ifl_cidx); 2108 if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) 2109 MPASS(fl->ifl_gen == 0); 2110 if (pidx > fl->ifl_cidx) 2111 MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); 2112 2113 DBG_COUNTER_INC(fl_refills); 2114 if (n > 8) 2115 DBG_COUNTER_INC(fl_refills_large); 2116 iru_init(&iru, fl->ifl_rxq, fl->ifl_id); 2117 while (n-- > 0) { 2118 /* 2119 * We allocate an uninitialized mbuf + cluster, mbuf is 2120 * initialized after rx. 2121 * 2122 * If the cluster is still set then we know a minimum sized 2123 * packet was received 2124 */ 2125 bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, 2126 &frag_idx); 2127 if (frag_idx < 0) 2128 bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); 2129 MPASS(frag_idx >= 0); 2130 if ((cl = sd_cl[frag_idx]) == NULL) { 2131 #ifndef __HAIKU__ 2132 cl = uma_zalloc(fl->ifl_zone, M_NOWAIT); 2133 if (__predict_false(cl == NULL)) 2134 #else 2135 if ((cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) 2136 #endif 2137 break; 2138 2139 cb_arg.error = 0; 2140 MPASS(sd_map != NULL); 2141 err = bus_dmamap_load(fl->ifl_buf_tag, sd_map[frag_idx], 2142 cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 2143 BUS_DMA_NOWAIT); 2144 if (__predict_false(err != 0 || cb_arg.error)) { 2145 #ifndef __HAIKU__ 2146 uma_zfree(fl->ifl_zone, cl); 2147 #else 2148 m_free(cl); 2149 #endif 2150 break; 2151 } 2152 2153 sd_ba[frag_idx] = bus_addr = cb_arg.seg.ds_addr; 2154 sd_cl[frag_idx] = cl; 2155 #if MEMORY_LOGGING 2156 fl->ifl_cl_enqueued++; 2157 #endif 2158 } else { 2159 bus_addr = sd_ba[frag_idx]; 2160 } 2161 bus_dmamap_sync(fl->ifl_buf_tag, sd_map[frag_idx], 2162 BUS_DMASYNC_PREREAD); 2163 2164 if (sd_m[frag_idx] == NULL) { 2165 m = m_gethdr(M_NOWAIT, MT_NOINIT); 2166 if (__predict_false(m == NULL)) 2167 break; 2168 sd_m[frag_idx] = m; 2169 } 2170 bit_set(fl->ifl_rx_bitmap, frag_idx); 2171 #if MEMORY_LOGGING 2172 fl->ifl_m_enqueued++; 2173 #endif 2174 2175 DBG_COUNTER_INC(rx_allocs); 2176 fl->ifl_rxd_idxs[i] = frag_idx; 2177 fl->ifl_bus_addrs[i] = bus_addr; 2178 credits++; 2179 i++; 2180 MPASS(credits <= fl->ifl_size); 2181 if (++idx == fl->ifl_size) { 2182 #ifdef INVARIANTS 2183 fl->ifl_gen = 1; 2184 #endif 2185 idx = 0; 2186 } 2187 if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { 2188 iru.iru_pidx = pidx; 2189 iru.iru_count = i; 2190 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 2191 fl->ifl_pidx = idx; 2192 fl->ifl_credits = credits; 2193 pidx = idx; 2194 i = 0; 2195 } 2196 } 2197 2198 if (n < count - 1) { 2199 if (i != 0) { 2200 iru.iru_pidx = pidx; 2201 iru.iru_count = i; 2202 ctx->isc_rxd_refill(ctx->ifc_softc, &iru); 2203 fl->ifl_pidx = idx; 2204 fl->ifl_credits = credits; 2205 } 2206 DBG_COUNTER_INC(rxd_flush); 2207 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2208 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2209 ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, 2210 fl->ifl_id, fl->ifl_pidx); 2211 if (__predict_true(bit_test(fl->ifl_rx_bitmap, frag_idx))) { 2212 fl->ifl_fragidx = frag_idx + 1; 2213 if (fl->ifl_fragidx == fl->ifl_size) 2214 fl->ifl_fragidx = 0; 2215 } else { 2216 fl->ifl_fragidx = frag_idx; 2217 } 2218 } 2219 2220 return (n == -1 ? 0 : IFLIB_RXEOF_EMPTY); 2221 } 2222 2223 static inline uint8_t 2224 iflib_fl_refill_all(if_ctx_t ctx, iflib_fl_t fl) 2225 { 2226 /* 2227 * We leave an unused descriptor to avoid pidx to catch up with cidx. 2228 * This is important as it confuses most NICs. For instance, 2229 * Intel NICs have (per receive ring) RDH and RDT registers, where 2230 * RDH points to the next receive descriptor to be used by the NIC, 2231 * and RDT for the next receive descriptor to be published by the 2232 * driver to the NIC (RDT - 1 is thus the last valid one). 2233 * The condition RDH == RDT means no descriptors are available to 2234 * the NIC, and thus it would be ambiguous if it also meant that 2235 * all the descriptors are available to the NIC. 2236 */ 2237 int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; 2238 #ifdef INVARIANTS 2239 int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; 2240 #endif 2241 2242 MPASS(fl->ifl_credits <= fl->ifl_size); 2243 MPASS(reclaimable == delta); 2244 2245 if (reclaimable > 0) 2246 return (iflib_fl_refill(ctx, fl, reclaimable)); 2247 return (0); 2248 } 2249 2250 uint8_t 2251 iflib_in_detach(if_ctx_t ctx) 2252 { 2253 bool in_detach; 2254 2255 STATE_LOCK(ctx); 2256 in_detach = !!(ctx->ifc_flags & IFC_IN_DETACH); 2257 STATE_UNLOCK(ctx); 2258 return (in_detach); 2259 } 2260 2261 static void 2262 iflib_fl_bufs_free(iflib_fl_t fl) 2263 { 2264 iflib_dma_info_t idi = fl->ifl_ifdi; 2265 bus_dmamap_t sd_map; 2266 uint32_t i; 2267 2268 for (i = 0; i < fl->ifl_size; i++) { 2269 struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; 2270 caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; 2271 2272 if (*sd_cl != NULL) { 2273 sd_map = fl->ifl_sds.ifsd_map[i]; 2274 bus_dmamap_sync(fl->ifl_buf_tag, sd_map, 2275 BUS_DMASYNC_POSTREAD); 2276 bus_dmamap_unload(fl->ifl_buf_tag, sd_map); 2277 #ifndef __HAIKU__ 2278 uma_zfree(fl->ifl_zone, *sd_cl); 2279 #else 2280 struct mbuf* mb = m_get(0, MT_DATA); 2281 m_cljset(mb, *sd_cl, fl->ifl_cltype); 2282 m_free(mb); 2283 #endif 2284 *sd_cl = NULL; 2285 if (*sd_m != NULL) { 2286 m_init(*sd_m, M_NOWAIT, MT_DATA, 0); 2287 #ifndef __HAIKU__ 2288 m_free_raw(*sd_m); 2289 #else 2290 m_free(*sd_m); 2291 #endif 2292 *sd_m = NULL; 2293 } 2294 } else { 2295 MPASS(*sd_m == NULL); 2296 } 2297 #if MEMORY_LOGGING 2298 fl->ifl_m_dequeued++; 2299 fl->ifl_cl_dequeued++; 2300 #endif 2301 } 2302 #ifdef INVARIANTS 2303 for (i = 0; i < fl->ifl_size; i++) { 2304 MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); 2305 MPASS(fl->ifl_sds.ifsd_m[i] == NULL); 2306 } 2307 #endif 2308 /* 2309 * Reset free list values 2310 */ 2311 fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; 2312 bzero(idi->idi_vaddr, idi->idi_size); 2313 } 2314 2315 /********************************************************************* 2316 * 2317 * Initialize a free list and its buffers. 2318 * 2319 **********************************************************************/ 2320 static int 2321 iflib_fl_setup(iflib_fl_t fl) 2322 { 2323 iflib_rxq_t rxq = fl->ifl_rxq; 2324 if_ctx_t ctx = rxq->ifr_ctx; 2325 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2326 int qidx; 2327 2328 bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); 2329 /* 2330 ** Free current RX buffer structs and their mbufs 2331 */ 2332 iflib_fl_bufs_free(fl); 2333 /* Now replenish the mbufs */ 2334 MPASS(fl->ifl_credits == 0); 2335 qidx = rxq->ifr_fl_offset + fl->ifl_id; 2336 if (scctx->isc_rxd_buf_size[qidx] != 0) 2337 fl->ifl_buf_size = scctx->isc_rxd_buf_size[qidx]; 2338 else 2339 fl->ifl_buf_size = ctx->ifc_rx_mbuf_sz; 2340 /* 2341 * ifl_buf_size may be a driver-supplied value, so pull it up 2342 * to the selected mbuf size. 2343 */ 2344 fl->ifl_buf_size = iflib_get_mbuf_size_for(fl->ifl_buf_size); 2345 if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) 2346 ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; 2347 fl->ifl_cltype = m_gettype(fl->ifl_buf_size); 2348 #ifndef __HAIKU__ 2349 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 2350 #endif 2351 2352 /* 2353 * Avoid pre-allocating zillions of clusters to an idle card 2354 * potentially speeding up attach. In any case make sure 2355 * to leave a descriptor unavailable. See the comment in 2356 * iflib_fl_refill_all(). 2357 */ 2358 MPASS(fl->ifl_size > 0); 2359 (void)iflib_fl_refill(ctx, fl, min(128, fl->ifl_size - 1)); 2360 if (min(128, fl->ifl_size - 1) != fl->ifl_credits) 2361 return (ENOBUFS); 2362 /* 2363 * handle failure 2364 */ 2365 MPASS(rxq != NULL); 2366 MPASS(fl->ifl_ifdi != NULL); 2367 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 2368 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2369 return (0); 2370 } 2371 2372 /********************************************************************* 2373 * 2374 * Free receive ring data structures 2375 * 2376 **********************************************************************/ 2377 static void 2378 iflib_rx_sds_free(iflib_rxq_t rxq) 2379 { 2380 iflib_fl_t fl; 2381 int i, j; 2382 2383 if (rxq->ifr_fl != NULL) { 2384 for (i = 0; i < rxq->ifr_nfl; i++) { 2385 fl = &rxq->ifr_fl[i]; 2386 if (fl->ifl_buf_tag != NULL) { 2387 if (fl->ifl_sds.ifsd_map != NULL) { 2388 for (j = 0; j < fl->ifl_size; j++) { 2389 bus_dmamap_sync( 2390 fl->ifl_buf_tag, 2391 fl->ifl_sds.ifsd_map[j], 2392 BUS_DMASYNC_POSTREAD); 2393 bus_dmamap_unload( 2394 fl->ifl_buf_tag, 2395 fl->ifl_sds.ifsd_map[j]); 2396 bus_dmamap_destroy( 2397 fl->ifl_buf_tag, 2398 fl->ifl_sds.ifsd_map[j]); 2399 } 2400 } 2401 bus_dma_tag_destroy(fl->ifl_buf_tag); 2402 fl->ifl_buf_tag = NULL; 2403 } 2404 free(fl->ifl_sds.ifsd_m, M_IFLIB); 2405 free(fl->ifl_sds.ifsd_cl, M_IFLIB); 2406 free(fl->ifl_sds.ifsd_ba, M_IFLIB); 2407 free(fl->ifl_sds.ifsd_map, M_IFLIB); 2408 free(fl->ifl_rx_bitmap, M_IFLIB); 2409 fl->ifl_sds.ifsd_m = NULL; 2410 fl->ifl_sds.ifsd_cl = NULL; 2411 fl->ifl_sds.ifsd_ba = NULL; 2412 fl->ifl_sds.ifsd_map = NULL; 2413 fl->ifl_rx_bitmap = NULL; 2414 } 2415 free(rxq->ifr_fl, M_IFLIB); 2416 rxq->ifr_fl = NULL; 2417 free(rxq->ifr_ifdi, M_IFLIB); 2418 rxq->ifr_ifdi = NULL; 2419 rxq->ifr_cq_cidx = 0; 2420 } 2421 } 2422 2423 /* 2424 * Timer routine 2425 */ 2426 static void 2427 iflib_timer(void *arg) 2428 { 2429 iflib_txq_t txq = arg; 2430 if_ctx_t ctx = txq->ift_ctx; 2431 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2432 uint64_t this_tick = ticks; 2433 2434 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) 2435 return; 2436 2437 /* 2438 ** Check on the state of the TX queue(s), this 2439 ** can be done without the lock because its RO 2440 ** and the HUNG state will be static if set. 2441 */ 2442 if (this_tick - txq->ift_last_timer_tick >= iflib_timer_default) { 2443 txq->ift_last_timer_tick = this_tick; 2444 IFDI_TIMER(ctx, txq->ift_id); 2445 if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && 2446 ((txq->ift_cleaned_prev == txq->ift_cleaned) || 2447 (sctx->isc_pause_frames == 0))) 2448 goto hung; 2449 2450 if (txq->ift_qstatus != IFLIB_QUEUE_IDLE && 2451 ifmp_ring_is_stalled(txq->ift_br)) { 2452 KASSERT(ctx->ifc_link_state == LINK_STATE_UP, 2453 ("queue can't be marked as hung if interface is down")); 2454 txq->ift_qstatus = IFLIB_QUEUE_HUNG; 2455 } 2456 txq->ift_cleaned_prev = txq->ift_cleaned; 2457 } 2458 /* handle any laggards */ 2459 if (txq->ift_db_pending) 2460 GROUPTASK_ENQUEUE(&txq->ift_task); 2461 2462 sctx->isc_pause_frames = 0; 2463 if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) 2464 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, 2465 txq, txq->ift_timer.c_cpu); 2466 return; 2467 2468 hung: 2469 device_printf(ctx->ifc_dev, 2470 "Watchdog timeout (TX: %d desc avail: %d pidx: %d) -- resetting\n", 2471 txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); 2472 STATE_LOCK(ctx); 2473 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2474 ctx->ifc_flags |= (IFC_DO_WATCHDOG|IFC_DO_RESET); 2475 iflib_admin_intr_deferred(ctx); 2476 STATE_UNLOCK(ctx); 2477 } 2478 2479 static uint16_t 2480 iflib_get_mbuf_size_for(unsigned int size) 2481 { 2482 2483 if (size <= MCLBYTES) 2484 return (MCLBYTES); 2485 else 2486 return (MJUMPAGESIZE); 2487 } 2488 2489 static void 2490 iflib_calc_rx_mbuf_sz(if_ctx_t ctx) 2491 { 2492 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2493 2494 /* 2495 * XXX don't set the max_frame_size to larger 2496 * than the hardware can handle 2497 */ 2498 ctx->ifc_rx_mbuf_sz = 2499 iflib_get_mbuf_size_for(sctx->isc_max_frame_size); 2500 } 2501 2502 uint32_t 2503 iflib_get_rx_mbuf_sz(if_ctx_t ctx) 2504 { 2505 2506 return (ctx->ifc_rx_mbuf_sz); 2507 } 2508 2509 static void 2510 iflib_init_locked(if_ctx_t ctx) 2511 { 2512 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 2513 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2514 if_t ifp = ctx->ifc_ifp; 2515 iflib_fl_t fl; 2516 iflib_txq_t txq; 2517 iflib_rxq_t rxq; 2518 int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; 2519 2520 if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2521 IFDI_INTR_DISABLE(ctx); 2522 2523 /* 2524 * See iflib_stop(). Useful in case iflib_init_locked() is 2525 * called without first calling iflib_stop(). 2526 */ 2527 netmap_disable_all_rings(ifp); 2528 2529 tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); 2530 tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); 2531 /* Set hardware offload abilities */ 2532 if_clearhwassist(ifp); 2533 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 2534 if_sethwassistbits(ifp, tx_ip_csum_flags, 0); 2535 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) 2536 if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); 2537 if (if_getcapenable(ifp) & IFCAP_TSO4) 2538 if_sethwassistbits(ifp, CSUM_IP_TSO, 0); 2539 if (if_getcapenable(ifp) & IFCAP_TSO6) 2540 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); 2541 2542 for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { 2543 CALLOUT_LOCK(txq); 2544 callout_stop(&txq->ift_timer); 2545 #ifdef DEV_NETMAP 2546 callout_stop(&txq->ift_netmap_timer); 2547 #endif /* DEV_NETMAP */ 2548 CALLOUT_UNLOCK(txq); 2549 iflib_netmap_txq_init(ctx, txq); 2550 } 2551 2552 /* 2553 * Calculate a suitable Rx mbuf size prior to calling IFDI_INIT, so 2554 * that drivers can use the value when setting up the hardware receive 2555 * buffers. 2556 */ 2557 iflib_calc_rx_mbuf_sz(ctx); 2558 2559 #ifdef INVARIANTS 2560 i = if_getdrvflags(ifp); 2561 #endif 2562 IFDI_INIT(ctx); 2563 MPASS(if_getdrvflags(ifp) == i); 2564 for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { 2565 if (iflib_netmap_rxq_init(ctx, rxq) > 0) { 2566 /* This rxq is in netmap mode. Skip normal init. */ 2567 continue; 2568 } 2569 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 2570 if (iflib_fl_setup(fl)) { 2571 device_printf(ctx->ifc_dev, 2572 "setting up free list %d failed - " 2573 "check cluster settings\n", j); 2574 goto done; 2575 } 2576 } 2577 } 2578 done: 2579 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); 2580 IFDI_INTR_ENABLE(ctx); 2581 txq = ctx->ifc_txqs; 2582 for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) 2583 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, 2584 txq->ift_timer.c_cpu); 2585 2586 /* Re-enable txsync/rxsync. */ 2587 netmap_enable_all_rings(ifp); 2588 } 2589 2590 static int 2591 iflib_media_change(if_t ifp) 2592 { 2593 if_ctx_t ctx = if_getsoftc(ifp); 2594 int err; 2595 2596 CTX_LOCK(ctx); 2597 if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) 2598 iflib_if_init_locked(ctx); 2599 CTX_UNLOCK(ctx); 2600 return (err); 2601 } 2602 2603 static void 2604 iflib_media_status(if_t ifp, struct ifmediareq *ifmr) 2605 { 2606 if_ctx_t ctx = if_getsoftc(ifp); 2607 2608 CTX_LOCK(ctx); 2609 IFDI_UPDATE_ADMIN_STATUS(ctx); 2610 IFDI_MEDIA_STATUS(ctx, ifmr); 2611 CTX_UNLOCK(ctx); 2612 } 2613 2614 void 2615 iflib_stop(if_ctx_t ctx) 2616 { 2617 iflib_txq_t txq = ctx->ifc_txqs; 2618 iflib_rxq_t rxq = ctx->ifc_rxqs; 2619 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2620 if_shared_ctx_t sctx = ctx->ifc_sctx; 2621 iflib_dma_info_t di; 2622 iflib_fl_t fl; 2623 int i, j; 2624 2625 /* Tell the stack that the interface is no longer active */ 2626 if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); 2627 2628 IFDI_INTR_DISABLE(ctx); 2629 DELAY(1000); 2630 IFDI_STOP(ctx); 2631 DELAY(1000); 2632 2633 /* 2634 * Stop any pending txsync/rxsync and prevent new ones 2635 * form starting. Processes blocked in poll() will get 2636 * POLLERR. 2637 */ 2638 netmap_disable_all_rings(ctx->ifc_ifp); 2639 2640 iflib_debug_reset(); 2641 /* Wait for current tx queue users to exit to disarm watchdog timer. */ 2642 for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { 2643 /* make sure all transmitters have completed before proceeding XXX */ 2644 2645 CALLOUT_LOCK(txq); 2646 callout_stop(&txq->ift_timer); 2647 #ifdef DEV_NETMAP 2648 callout_stop(&txq->ift_netmap_timer); 2649 #endif /* DEV_NETMAP */ 2650 CALLOUT_UNLOCK(txq); 2651 2652 /* clean any enqueued buffers */ 2653 iflib_ifmp_purge(txq); 2654 /* Free any existing tx buffers. */ 2655 for (j = 0; j < txq->ift_size; j++) { 2656 iflib_txsd_free(ctx, txq, j); 2657 } 2658 txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; 2659 txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; 2660 txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; 2661 txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; 2662 txq->ift_pullups = 0; 2663 ifmp_ring_reset_stats(txq->ift_br); 2664 for (j = 0, di = txq->ift_ifdi; j < sctx->isc_ntxqs; j++, di++) 2665 bzero((void *)di->idi_vaddr, di->idi_size); 2666 } 2667 for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { 2668 /* make sure all transmitters have completed before proceeding XXX */ 2669 2670 rxq->ifr_cq_cidx = 0; 2671 for (j = 0, di = rxq->ifr_ifdi; j < sctx->isc_nrxqs; j++, di++) 2672 bzero((void *)di->idi_vaddr, di->idi_size); 2673 /* also resets the free lists pidx/cidx */ 2674 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 2675 iflib_fl_bufs_free(fl); 2676 } 2677 } 2678 2679 static inline caddr_t 2680 calc_next_rxd(iflib_fl_t fl, int cidx) 2681 { 2682 qidx_t size; 2683 int nrxd; 2684 caddr_t start, end, cur, next; 2685 2686 nrxd = fl->ifl_size; 2687 size = fl->ifl_rxd_size; 2688 start = fl->ifl_ifdi->idi_vaddr; 2689 2690 if (__predict_false(size == 0)) 2691 return (start); 2692 cur = start + size*cidx; 2693 end = start + size*nrxd; 2694 next = CACHE_PTR_NEXT(cur); 2695 return (next < end ? next : start); 2696 } 2697 2698 static inline void 2699 prefetch_pkts(iflib_fl_t fl, int cidx) 2700 { 2701 int nextptr; 2702 int nrxd = fl->ifl_size; 2703 caddr_t next_rxd; 2704 2705 nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); 2706 prefetch(&fl->ifl_sds.ifsd_m[nextptr]); 2707 prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); 2708 next_rxd = calc_next_rxd(fl, cidx); 2709 prefetch(next_rxd); 2710 prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); 2711 prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); 2712 prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); 2713 prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); 2714 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); 2715 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); 2716 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); 2717 prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); 2718 } 2719 2720 static struct mbuf * 2721 rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, bool unload, if_rxsd_t sd, 2722 int *pf_rv, if_rxd_info_t ri) 2723 { 2724 bus_dmamap_t map; 2725 iflib_fl_t fl; 2726 caddr_t payload; 2727 struct mbuf *m; 2728 int flid, cidx, len, next; 2729 2730 map = NULL; 2731 flid = irf->irf_flid; 2732 cidx = irf->irf_idx; 2733 fl = &rxq->ifr_fl[flid]; 2734 sd->ifsd_fl = fl; 2735 m = fl->ifl_sds.ifsd_m[cidx]; 2736 sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; 2737 fl->ifl_credits--; 2738 #if MEMORY_LOGGING 2739 fl->ifl_m_dequeued++; 2740 #endif 2741 if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) 2742 prefetch_pkts(fl, cidx); 2743 next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); 2744 prefetch(&fl->ifl_sds.ifsd_map[next]); 2745 map = fl->ifl_sds.ifsd_map[cidx]; 2746 2747 bus_dmamap_sync(fl->ifl_buf_tag, map, BUS_DMASYNC_POSTREAD); 2748 2749 #ifndef __HAIKU__ 2750 if (rxq->pfil != NULL && PFIL_HOOKED_IN(rxq->pfil) && pf_rv != NULL && 2751 irf->irf_len != 0) { 2752 payload = *sd->ifsd_cl; 2753 payload += ri->iri_pad; 2754 len = ri->iri_len - ri->iri_pad; 2755 *pf_rv = pfil_run_hooks(rxq->pfil, payload, ri->iri_ifp, 2756 len | PFIL_MEMPTR | PFIL_IN, NULL); 2757 switch (*pf_rv) { 2758 case PFIL_DROPPED: 2759 case PFIL_CONSUMED: 2760 /* 2761 * The filter ate it. Everything is recycled. 2762 */ 2763 m = NULL; 2764 unload = 0; 2765 break; 2766 case PFIL_REALLOCED: 2767 /* 2768 * The filter copied it. Everything is recycled. 2769 */ 2770 m = pfil_mem2mbuf(payload); 2771 unload = 0; 2772 break; 2773 case PFIL_PASS: 2774 /* 2775 * Filter said it was OK, so receive like 2776 * normal 2777 */ 2778 fl->ifl_sds.ifsd_m[cidx] = NULL; 2779 break; 2780 default: 2781 MPASS(0); 2782 } 2783 } else 2784 #endif 2785 { 2786 fl->ifl_sds.ifsd_m[cidx] = NULL; 2787 if (pf_rv != NULL) 2788 *pf_rv = PFIL_PASS; 2789 } 2790 2791 if (unload && irf->irf_len != 0) 2792 bus_dmamap_unload(fl->ifl_buf_tag, map); 2793 fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); 2794 if (__predict_false(fl->ifl_cidx == 0)) 2795 fl->ifl_gen = 0; 2796 bit_clear(fl->ifl_rx_bitmap, cidx); 2797 return (m); 2798 } 2799 2800 static struct mbuf * 2801 assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd, int *pf_rv) 2802 { 2803 struct mbuf *m, *mh, *mt; 2804 caddr_t cl; 2805 int *pf_rv_ptr, flags, i, padlen; 2806 bool consumed; 2807 2808 i = 0; 2809 mh = NULL; 2810 consumed = false; 2811 *pf_rv = PFIL_PASS; 2812 pf_rv_ptr = pf_rv; 2813 do { 2814 m = rxd_frag_to_sd(rxq, &ri->iri_frags[i], !consumed, sd, 2815 pf_rv_ptr, ri); 2816 2817 MPASS(*sd->ifsd_cl != NULL); 2818 2819 /* 2820 * Exclude zero-length frags & frags from 2821 * packets the filter has consumed or dropped 2822 */ 2823 if (ri->iri_frags[i].irf_len == 0 || consumed || 2824 #ifndef __HAIKU__ 2825 *pf_rv == PFIL_CONSUMED || *pf_rv == PFIL_DROPPED 2826 #else 2827 0 2828 #endif 2829 ) { 2830 if (mh == NULL) { 2831 /* everything saved here */ 2832 consumed = true; 2833 pf_rv_ptr = NULL; 2834 continue; 2835 } 2836 /* XXX we can save the cluster here, but not the mbuf */ 2837 m_init(m, M_NOWAIT, MT_DATA, 0); 2838 m_free(m); 2839 continue; 2840 } 2841 if (mh == NULL) { 2842 flags = M_PKTHDR|M_EXT; 2843 mh = mt = m; 2844 padlen = ri->iri_pad; 2845 } else { 2846 flags = M_EXT; 2847 mt->m_next = m; 2848 mt = m; 2849 /* assuming padding is only on the first fragment */ 2850 padlen = 0; 2851 } 2852 cl = *sd->ifsd_cl; 2853 *sd->ifsd_cl = NULL; 2854 2855 /* Can these two be made one ? */ 2856 m_init(m, M_NOWAIT, MT_DATA, flags); 2857 m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); 2858 /* 2859 * These must follow m_init and m_cljset 2860 */ 2861 m->m_data += padlen; 2862 ri->iri_len -= padlen; 2863 m->m_len = ri->iri_frags[i].irf_len; 2864 } while (++i < ri->iri_nfrags); 2865 2866 return (mh); 2867 } 2868 2869 /* 2870 * Process one software descriptor 2871 */ 2872 static struct mbuf * 2873 iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) 2874 { 2875 struct if_rxsd sd; 2876 struct mbuf *m; 2877 int pf_rv; 2878 2879 /* should I merge this back in now that the two paths are basically duplicated? */ 2880 if (ri->iri_nfrags == 1 && 2881 ri->iri_frags[0].irf_len != 0 && 2882 ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { 2883 m = rxd_frag_to_sd(rxq, &ri->iri_frags[0], false, &sd, 2884 &pf_rv, ri); 2885 if (pf_rv != PFIL_PASS 2886 #ifndef __HAIKU__ 2887 && pf_rv != PFIL_REALLOCED 2888 #endif 2889 ) 2890 return (m); 2891 if (pf_rv == PFIL_PASS) { 2892 m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); 2893 #ifndef __NO_STRICT_ALIGNMENT 2894 if (!IP_ALIGNED(m)) 2895 m->m_data += 2; 2896 #endif 2897 memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); 2898 m->m_len = ri->iri_frags[0].irf_len; 2899 } 2900 } else { 2901 m = assemble_segments(rxq, ri, &sd, &pf_rv); 2902 if (m == NULL) 2903 return (NULL); 2904 if (pf_rv != PFIL_PASS 2905 #ifndef __HAIKU__ 2906 && pf_rv != PFIL_REALLOCED 2907 #endif 2908 ) 2909 return (m); 2910 } 2911 m->m_pkthdr.len = ri->iri_len; 2912 m->m_pkthdr.rcvif = ri->iri_ifp; 2913 m->m_flags |= ri->iri_flags; 2914 m->m_pkthdr.ether_vtag = ri->iri_vtag; 2915 m->m_pkthdr.flowid = ri->iri_flowid; 2916 M_HASHTYPE_SET(m, ri->iri_rsstype); 2917 m->m_pkthdr.csum_flags = ri->iri_csum_flags; 2918 m->m_pkthdr.csum_data = ri->iri_csum_data; 2919 return (m); 2920 } 2921 2922 #if defined(INET6) || defined(INET) 2923 static void 2924 iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) 2925 { 2926 CURVNET_SET(lc->ifp->if_vnet); 2927 #if defined(INET6) 2928 *v6 = V_ip6_forwarding; 2929 #endif 2930 #if defined(INET) 2931 *v4 = V_ipforwarding; 2932 #endif 2933 CURVNET_RESTORE(); 2934 } 2935 2936 /* 2937 * Returns true if it's possible this packet could be LROed. 2938 * if it returns false, it is guaranteed that tcp_lro_rx() 2939 * would not return zero. 2940 */ 2941 static bool 2942 iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) 2943 { 2944 #ifndef __HAIKU__ 2945 struct ether_header *eh; 2946 2947 eh = mtod(m, struct ether_header *); 2948 switch (eh->ether_type) { 2949 #if defined(INET6) 2950 case htons(ETHERTYPE_IPV6): 2951 return (!v6_forwarding); 2952 #endif 2953 #if defined (INET) 2954 case htons(ETHERTYPE_IP): 2955 return (!v4_forwarding); 2956 #endif 2957 } 2958 #endif 2959 2960 return false; 2961 } 2962 #else 2963 static void 2964 iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) 2965 { 2966 } 2967 #endif 2968 2969 static void 2970 _task_fn_rx_watchdog(void *context) 2971 { 2972 iflib_rxq_t rxq = context; 2973 2974 GROUPTASK_ENQUEUE(&rxq->ifr_task); 2975 } 2976 2977 static uint8_t 2978 iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) 2979 { 2980 if_t ifp; 2981 if_ctx_t ctx = rxq->ifr_ctx; 2982 if_shared_ctx_t sctx = ctx->ifc_sctx; 2983 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 2984 int avail, i; 2985 qidx_t *cidxp; 2986 struct if_rxd_info ri; 2987 int err, budget_left, rx_bytes, rx_pkts; 2988 iflib_fl_t fl; 2989 int lro_enabled; 2990 bool v4_forwarding, v6_forwarding, lro_possible; 2991 uint8_t retval = 0; 2992 2993 /* 2994 * XXX early demux data packets so that if_input processing only handles 2995 * acks in interrupt context 2996 */ 2997 struct mbuf *m, *mh, *mt, *mf; 2998 2999 #ifndef __HAIKU__ 3000 NET_EPOCH_ASSERT(); 3001 #endif 3002 3003 lro_possible = v4_forwarding = v6_forwarding = false; 3004 ifp = ctx->ifc_ifp; 3005 mh = mt = NULL; 3006 MPASS(budget > 0); 3007 rx_pkts = rx_bytes = 0; 3008 if (sctx->isc_flags & IFLIB_HAS_RXCQ) 3009 cidxp = &rxq->ifr_cq_cidx; 3010 else 3011 cidxp = &rxq->ifr_fl[0].ifl_cidx; 3012 if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { 3013 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 3014 retval |= iflib_fl_refill_all(ctx, fl); 3015 DBG_COUNTER_INC(rx_unavail); 3016 return (retval); 3017 } 3018 3019 /* pfil needs the vnet to be set */ 3020 CURVNET_SET_QUIET(ifp->if_vnet); 3021 for (budget_left = budget; budget_left > 0 && avail > 0;) { 3022 if (__predict_false(!CTX_ACTIVE(ctx))) { 3023 DBG_COUNTER_INC(rx_ctx_inactive); 3024 break; 3025 } 3026 /* 3027 * Reset client set fields to their default values 3028 */ 3029 rxd_info_zero(&ri); 3030 ri.iri_qsidx = rxq->ifr_id; 3031 ri.iri_cidx = *cidxp; 3032 ri.iri_ifp = ifp; 3033 ri.iri_frags = rxq->ifr_frags; 3034 err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); 3035 3036 if (err) 3037 goto err; 3038 rx_pkts += 1; 3039 rx_bytes += ri.iri_len; 3040 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 3041 *cidxp = ri.iri_cidx; 3042 /* Update our consumer index */ 3043 /* XXX NB: shurd - check if this is still safe */ 3044 while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) 3045 rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; 3046 /* was this only a completion queue message? */ 3047 if (__predict_false(ri.iri_nfrags == 0)) 3048 continue; 3049 } 3050 MPASS(ri.iri_nfrags != 0); 3051 MPASS(ri.iri_len != 0); 3052 3053 /* will advance the cidx on the corresponding free lists */ 3054 m = iflib_rxd_pkt_get(rxq, &ri); 3055 avail--; 3056 budget_left--; 3057 if (avail == 0 && budget_left) 3058 avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); 3059 3060 if (__predict_false(m == NULL)) 3061 continue; 3062 3063 /* imm_pkt: -- cxgb */ 3064 if (mh == NULL) 3065 mh = mt = m; 3066 else { 3067 mt->m_nextpkt = m; 3068 mt = m; 3069 } 3070 } 3071 CURVNET_RESTORE(); 3072 /* make sure that we can refill faster than drain */ 3073 for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) 3074 retval |= iflib_fl_refill_all(ctx, fl); 3075 3076 lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); 3077 #ifndef __HAIKU__ 3078 if (lro_enabled) 3079 iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); 3080 #endif 3081 mt = mf = NULL; 3082 while (mh != NULL) { 3083 m = mh; 3084 mh = mh->m_nextpkt; 3085 m->m_nextpkt = NULL; 3086 #ifndef __NO_STRICT_ALIGNMENT 3087 if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) 3088 continue; 3089 #endif 3090 #ifndef __HAIKU__ 3091 #if defined(INET6) || defined(INET) 3092 if (lro_enabled) { 3093 if (!lro_possible) { 3094 lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); 3095 if (lro_possible && mf != NULL) { 3096 ifp->if_input(ifp, mf); 3097 DBG_COUNTER_INC(rx_if_input); 3098 mt = mf = NULL; 3099 } 3100 } 3101 if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == 3102 (CSUM_L4_CALC|CSUM_L4_VALID)) { 3103 if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) 3104 continue; 3105 } 3106 } 3107 #endif 3108 if (lro_possible) { 3109 ifp->if_input(ifp, m); 3110 DBG_COUNTER_INC(rx_if_input); 3111 continue; 3112 } 3113 #endif 3114 3115 if (mf == NULL) 3116 mf = m; 3117 if (mt != NULL) 3118 mt->m_nextpkt = m; 3119 mt = m; 3120 } 3121 if (mf != NULL) { 3122 ifp->if_input(ifp, mf); 3123 DBG_COUNTER_INC(rx_if_input); 3124 } 3125 3126 if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); 3127 if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); 3128 3129 /* 3130 * Flush any outstanding LRO work 3131 */ 3132 #if defined(INET6) || defined(INET) 3133 #ifndef __HAIKU__ 3134 tcp_lro_flush_all(&rxq->ifr_lc); 3135 #endif 3136 #endif 3137 if (avail != 0 || iflib_rxd_avail(ctx, rxq, *cidxp, 1) != 0) 3138 retval |= IFLIB_RXEOF_MORE; 3139 return (retval); 3140 err: 3141 STATE_LOCK(ctx); 3142 ctx->ifc_flags |= IFC_DO_RESET; 3143 iflib_admin_intr_deferred(ctx); 3144 STATE_UNLOCK(ctx); 3145 return (0); 3146 } 3147 3148 #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) 3149 static inline qidx_t 3150 txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) 3151 { 3152 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 3153 qidx_t minthresh = txq->ift_size / 8; 3154 if (in_use > 4*minthresh) 3155 return (notify_count); 3156 if (in_use > 2*minthresh) 3157 return (notify_count >> 1); 3158 if (in_use > minthresh) 3159 return (notify_count >> 3); 3160 return (0); 3161 } 3162 3163 static inline qidx_t 3164 txq_max_rs_deferred(iflib_txq_t txq) 3165 { 3166 qidx_t notify_count = TXD_NOTIFY_COUNT(txq); 3167 qidx_t minthresh = txq->ift_size / 8; 3168 if (txq->ift_in_use > 4*minthresh) 3169 return (notify_count); 3170 if (txq->ift_in_use > 2*minthresh) 3171 return (notify_count >> 1); 3172 if (txq->ift_in_use > minthresh) 3173 return (notify_count >> 2); 3174 return (2); 3175 } 3176 3177 #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) 3178 #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) 3179 3180 #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) 3181 #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) 3182 #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) 3183 3184 /* forward compatibility for cxgb */ 3185 #define FIRST_QSET(ctx) 0 3186 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) 3187 #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) 3188 #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) 3189 #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) 3190 3191 /* XXX we should be setting this to something other than zero */ 3192 #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) 3193 #define MAX_TX_DESC(ctx) MAX((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max, \ 3194 (ctx)->ifc_softc_ctx.isc_tx_nsegments) 3195 3196 static inline bool 3197 iflib_txd_db_check(iflib_txq_t txq, int ring) 3198 { 3199 if_ctx_t ctx = txq->ift_ctx; 3200 qidx_t dbval, max; 3201 3202 max = TXQ_MAX_DB_DEFERRED(txq, txq->ift_in_use); 3203 3204 /* force || threshold exceeded || at the edge of the ring */ 3205 if (ring || (txq->ift_db_pending >= max) || (TXQ_AVAIL(txq) <= MAX_TX_DESC(ctx) + 2)) { 3206 3207 /* 3208 * 'npending' is used if the card's doorbell is in terms of the number of descriptors 3209 * pending flush (BRCM). 'pidx' is used in cases where the card's doorbeel uses the 3210 * producer index explicitly (INTC). 3211 */ 3212 dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; 3213 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3214 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3215 ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); 3216 3217 /* 3218 * Absent bugs there are zero packets pending so reset pending counts to zero. 3219 */ 3220 txq->ift_db_pending = txq->ift_npending = 0; 3221 return (true); 3222 } 3223 return (false); 3224 } 3225 3226 #ifdef PKT_DEBUG 3227 static void 3228 print_pkt(if_pkt_info_t pi) 3229 { 3230 printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", 3231 pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); 3232 printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", 3233 pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); 3234 printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", 3235 pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); 3236 } 3237 #endif 3238 3239 #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) 3240 #define IS_TX_OFFLOAD4(pi) ((pi)->ipi_csum_flags & (CSUM_IP_TCP | CSUM_IP_TSO)) 3241 #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) 3242 #define IS_TX_OFFLOAD6(pi) ((pi)->ipi_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_TSO)) 3243 3244 static int 3245 iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) 3246 { 3247 if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; 3248 struct ether_vlan_header *eh; 3249 struct mbuf *m; 3250 3251 m = *mp; 3252 if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && 3253 M_WRITABLE(m) == 0) { 3254 if ((m = m_dup(m, M_NOWAIT)) == NULL) { 3255 return (ENOMEM); 3256 } else { 3257 m_freem(*mp); 3258 DBG_COUNTER_INC(tx_frees); 3259 *mp = m; 3260 } 3261 } 3262 3263 /* 3264 * Determine where frame payload starts. 3265 * Jump over vlan headers if already present, 3266 * helpful for QinQ too. 3267 */ 3268 if (__predict_false(m->m_len < sizeof(*eh))) { 3269 txq->ift_pullups++; 3270 if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) 3271 return (ENOMEM); 3272 } 3273 eh = mtod(m, struct ether_vlan_header *); 3274 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 3275 pi->ipi_etype = ntohs(eh->evl_proto); 3276 pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; 3277 } else { 3278 pi->ipi_etype = ntohs(eh->evl_encap_proto); 3279 pi->ipi_ehdrlen = ETHER_HDR_LEN; 3280 } 3281 3282 switch (pi->ipi_etype) { 3283 #ifdef INET 3284 case ETHERTYPE_IP: 3285 { 3286 struct mbuf *n; 3287 struct ip *ip = NULL; 3288 struct tcphdr *th = NULL; 3289 int minthlen; 3290 3291 minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); 3292 if (__predict_false(m->m_len < minthlen)) { 3293 /* 3294 * if this code bloat is causing too much of a hit 3295 * move it to a separate function and mark it noinline 3296 */ 3297 if (m->m_len == pi->ipi_ehdrlen) { 3298 n = m->m_next; 3299 MPASS(n); 3300 if (n->m_len >= sizeof(*ip)) { 3301 ip = (struct ip *)n->m_data; 3302 if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3303 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3304 } else { 3305 txq->ift_pullups++; 3306 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 3307 return (ENOMEM); 3308 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3309 } 3310 } else { 3311 txq->ift_pullups++; 3312 if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) 3313 return (ENOMEM); 3314 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3315 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3316 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3317 } 3318 } else { 3319 ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); 3320 if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) 3321 th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); 3322 } 3323 pi->ipi_ip_hlen = ip->ip_hl << 2; 3324 pi->ipi_ipproto = ip->ip_p; 3325 pi->ipi_flags |= IPI_TX_IPV4; 3326 3327 /* TCP checksum offload may require TCP header length */ 3328 if (IS_TX_OFFLOAD4(pi)) { 3329 if (__predict_true(pi->ipi_ipproto == IPPROTO_TCP)) { 3330 if (__predict_false(th == NULL)) { 3331 txq->ift_pullups++; 3332 if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) 3333 return (ENOMEM); 3334 th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); 3335 } 3336 pi->ipi_tcp_hflags = th->th_flags; 3337 pi->ipi_tcp_hlen = th->th_off << 2; 3338 pi->ipi_tcp_seq = th->th_seq; 3339 } 3340 if (IS_TSO4(pi)) { 3341 if (__predict_false(ip->ip_p != IPPROTO_TCP)) 3342 return (ENXIO); 3343 /* 3344 * TSO always requires hardware checksum offload. 3345 */ 3346 pi->ipi_csum_flags |= (CSUM_IP_TCP | CSUM_IP); 3347 th->th_sum = in_pseudo(ip->ip_src.s_addr, 3348 ip->ip_dst.s_addr, htons(IPPROTO_TCP)); 3349 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 3350 if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { 3351 ip->ip_sum = 0; 3352 ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); 3353 } 3354 } 3355 } 3356 if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) 3357 ip->ip_sum = 0; 3358 3359 break; 3360 } 3361 #endif 3362 #ifdef INET6 3363 case ETHERTYPE_IPV6: 3364 { 3365 struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); 3366 struct tcphdr *th; 3367 pi->ipi_ip_hlen = sizeof(struct ip6_hdr); 3368 3369 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { 3370 txq->ift_pullups++; 3371 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) 3372 return (ENOMEM); 3373 } 3374 th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); 3375 3376 /* XXX-BZ this will go badly in case of ext hdrs. */ 3377 pi->ipi_ipproto = ip6->ip6_nxt; 3378 pi->ipi_flags |= IPI_TX_IPV6; 3379 3380 /* TCP checksum offload may require TCP header length */ 3381 if (IS_TX_OFFLOAD6(pi)) { 3382 if (pi->ipi_ipproto == IPPROTO_TCP) { 3383 if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { 3384 txq->ift_pullups++; 3385 if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) 3386 return (ENOMEM); 3387 } 3388 pi->ipi_tcp_hflags = th->th_flags; 3389 pi->ipi_tcp_hlen = th->th_off << 2; 3390 pi->ipi_tcp_seq = th->th_seq; 3391 } 3392 if (IS_TSO6(pi)) { 3393 if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) 3394 return (ENXIO); 3395 /* 3396 * TSO always requires hardware checksum offload. 3397 */ 3398 pi->ipi_csum_flags |= CSUM_IP6_TCP; 3399 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); 3400 pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; 3401 } 3402 } 3403 break; 3404 } 3405 #endif 3406 default: 3407 pi->ipi_csum_flags &= ~CSUM_OFFLOAD; 3408 pi->ipi_ip_hlen = 0; 3409 break; 3410 } 3411 *mp = m; 3412 3413 return (0); 3414 } 3415 3416 /* 3417 * If dodgy hardware rejects the scatter gather chain we've handed it 3418 * we'll need to remove the mbuf chain from ifsg_m[] before we can add the 3419 * m_defrag'd mbufs 3420 */ 3421 static __noinline struct mbuf * 3422 iflib_remove_mbuf(iflib_txq_t txq) 3423 { 3424 int ntxd, pidx; 3425 struct mbuf *m, **ifsd_m; 3426 3427 ifsd_m = txq->ift_sds.ifsd_m; 3428 ntxd = txq->ift_size; 3429 pidx = txq->ift_pidx & (ntxd - 1); 3430 ifsd_m = txq->ift_sds.ifsd_m; 3431 m = ifsd_m[pidx]; 3432 ifsd_m[pidx] = NULL; 3433 bus_dmamap_unload(txq->ift_buf_tag, txq->ift_sds.ifsd_map[pidx]); 3434 if (txq->ift_sds.ifsd_tso_map != NULL) 3435 bus_dmamap_unload(txq->ift_tso_buf_tag, 3436 txq->ift_sds.ifsd_tso_map[pidx]); 3437 #if MEMORY_LOGGING 3438 txq->ift_dequeued++; 3439 #endif 3440 return (m); 3441 } 3442 3443 static inline caddr_t 3444 calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) 3445 { 3446 qidx_t size; 3447 int ntxd; 3448 caddr_t start, end, cur, next; 3449 3450 ntxd = txq->ift_size; 3451 size = txq->ift_txd_size[qid]; 3452 start = txq->ift_ifdi[qid].idi_vaddr; 3453 3454 if (__predict_false(size == 0)) 3455 return (start); 3456 cur = start + size*cidx; 3457 end = start + size*ntxd; 3458 next = CACHE_PTR_NEXT(cur); 3459 return (next < end ? next : start); 3460 } 3461 3462 /* 3463 * Pad an mbuf to ensure a minimum ethernet frame size. 3464 * min_frame_size is the frame size (less CRC) to pad the mbuf to 3465 */ 3466 static __noinline int 3467 iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) 3468 { 3469 /* 3470 * 18 is enough bytes to pad an ARP packet to 46 bytes, and 3471 * and ARP message is the smallest common payload I can think of 3472 */ 3473 static char pad[18]; /* just zeros */ 3474 int n; 3475 struct mbuf *new_head; 3476 3477 if (!M_WRITABLE(*m_head)) { 3478 new_head = m_dup(*m_head, M_NOWAIT); 3479 if (new_head == NULL) { 3480 m_freem(*m_head); 3481 device_printf(dev, "cannot pad short frame, m_dup() failed"); 3482 DBG_COUNTER_INC(encap_pad_mbuf_fail); 3483 DBG_COUNTER_INC(tx_frees); 3484 return ENOMEM; 3485 } 3486 m_freem(*m_head); 3487 *m_head = new_head; 3488 } 3489 3490 for (n = min_frame_size - (*m_head)->m_pkthdr.len; 3491 n > 0; n -= sizeof(pad)) 3492 if (!m_append(*m_head, min(n, sizeof(pad)), pad)) 3493 break; 3494 3495 if (n > 0) { 3496 m_freem(*m_head); 3497 device_printf(dev, "cannot pad short frame\n"); 3498 DBG_COUNTER_INC(encap_pad_mbuf_fail); 3499 DBG_COUNTER_INC(tx_frees); 3500 return (ENOBUFS); 3501 } 3502 3503 return 0; 3504 } 3505 3506 static int 3507 iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) 3508 { 3509 if_ctx_t ctx; 3510 if_shared_ctx_t sctx; 3511 if_softc_ctx_t scctx; 3512 bus_dma_tag_t buf_tag; 3513 bus_dma_segment_t *segs; 3514 struct mbuf *m_head, **ifsd_m; 3515 void *next_txd; 3516 bus_dmamap_t map; 3517 struct if_pkt_info pi; 3518 int remap = 0; 3519 int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; 3520 3521 ctx = txq->ift_ctx; 3522 sctx = ctx->ifc_sctx; 3523 scctx = &ctx->ifc_softc_ctx; 3524 segs = txq->ift_segs; 3525 ntxd = txq->ift_size; 3526 m_head = *m_headp; 3527 map = NULL; 3528 3529 /* 3530 * If we're doing TSO the next descriptor to clean may be quite far ahead 3531 */ 3532 cidx = txq->ift_cidx; 3533 pidx = txq->ift_pidx; 3534 if (ctx->ifc_flags & IFC_PREFETCH) { 3535 next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); 3536 if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { 3537 next_txd = calc_next_txd(txq, cidx, 0); 3538 prefetch(next_txd); 3539 } 3540 3541 /* prefetch the next cache line of mbuf pointers and flags */ 3542 prefetch(&txq->ift_sds.ifsd_m[next]); 3543 prefetch(&txq->ift_sds.ifsd_map[next]); 3544 next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); 3545 } 3546 map = txq->ift_sds.ifsd_map[pidx]; 3547 ifsd_m = txq->ift_sds.ifsd_m; 3548 3549 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { 3550 buf_tag = txq->ift_tso_buf_tag; 3551 max_segs = scctx->isc_tx_tso_segments_max; 3552 map = txq->ift_sds.ifsd_tso_map[pidx]; 3553 MPASS(buf_tag != NULL); 3554 MPASS(max_segs > 0); 3555 } else { 3556 buf_tag = txq->ift_buf_tag; 3557 max_segs = scctx->isc_tx_nsegments; 3558 map = txq->ift_sds.ifsd_map[pidx]; 3559 } 3560 if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && 3561 __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { 3562 err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); 3563 if (err) { 3564 DBG_COUNTER_INC(encap_txd_encap_fail); 3565 return err; 3566 } 3567 } 3568 m_head = *m_headp; 3569 3570 pkt_info_zero(&pi); 3571 pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); 3572 pi.ipi_pidx = pidx; 3573 pi.ipi_qsidx = txq->ift_id; 3574 pi.ipi_len = m_head->m_pkthdr.len; 3575 pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; 3576 pi.ipi_vtag = M_HAS_VLANTAG(m_head) ? m_head->m_pkthdr.ether_vtag : 0; 3577 3578 /* deliberate bitwise OR to make one condition */ 3579 if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { 3580 if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) { 3581 DBG_COUNTER_INC(encap_txd_encap_fail); 3582 return (err); 3583 } 3584 m_head = *m_headp; 3585 } 3586 3587 retry: 3588 err = bus_dmamap_load_mbuf_sg(buf_tag, map, m_head, segs, &nsegs, 3589 BUS_DMA_NOWAIT); 3590 defrag: 3591 if (__predict_false(err)) { 3592 switch (err) { 3593 case EFBIG: 3594 /* try collapse once and defrag once */ 3595 if (remap == 0) { 3596 m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); 3597 /* try defrag if collapsing fails */ 3598 if (m_head == NULL) 3599 remap++; 3600 } 3601 if (remap == 1) { 3602 txq->ift_mbuf_defrag++; 3603 m_head = m_defrag(*m_headp, M_NOWAIT); 3604 } 3605 /* 3606 * remap should never be >1 unless bus_dmamap_load_mbuf_sg 3607 * failed to map an mbuf that was run through m_defrag 3608 */ 3609 MPASS(remap <= 1); 3610 if (__predict_false(m_head == NULL || remap > 1)) 3611 goto defrag_failed; 3612 remap++; 3613 *m_headp = m_head; 3614 goto retry; 3615 break; 3616 case ENOMEM: 3617 txq->ift_no_tx_dma_setup++; 3618 break; 3619 default: 3620 txq->ift_no_tx_dma_setup++; 3621 m_freem(*m_headp); 3622 DBG_COUNTER_INC(tx_frees); 3623 *m_headp = NULL; 3624 break; 3625 } 3626 txq->ift_map_failed++; 3627 DBG_COUNTER_INC(encap_load_mbuf_fail); 3628 DBG_COUNTER_INC(encap_txd_encap_fail); 3629 return (err); 3630 } 3631 ifsd_m[pidx] = m_head; 3632 /* 3633 * XXX assumes a 1 to 1 relationship between segments and 3634 * descriptors - this does not hold true on all drivers, e.g. 3635 * cxgb 3636 */ 3637 if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { 3638 txq->ift_no_desc_avail++; 3639 bus_dmamap_unload(buf_tag, map); 3640 DBG_COUNTER_INC(encap_txq_avail_fail); 3641 DBG_COUNTER_INC(encap_txd_encap_fail); 3642 if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) 3643 GROUPTASK_ENQUEUE(&txq->ift_task); 3644 return (ENOBUFS); 3645 } 3646 /* 3647 * On Intel cards we can greatly reduce the number of TX interrupts 3648 * we see by only setting report status on every Nth descriptor. 3649 * However, this also means that the driver will need to keep track 3650 * of the descriptors that RS was set on to check them for the DD bit. 3651 */ 3652 txq->ift_rs_pending += nsegs + 1; 3653 if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || 3654 iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs) <= MAX_TX_DESC(ctx) + 2) { 3655 pi.ipi_flags |= IPI_TX_INTR; 3656 txq->ift_rs_pending = 0; 3657 } 3658 3659 pi.ipi_segs = segs; 3660 pi.ipi_nsegs = nsegs; 3661 3662 MPASS(pidx >= 0 && pidx < txq->ift_size); 3663 #ifdef PKT_DEBUG 3664 print_pkt(&pi); 3665 #endif 3666 if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { 3667 bus_dmamap_sync(buf_tag, map, BUS_DMASYNC_PREWRITE); 3668 DBG_COUNTER_INC(tx_encap); 3669 MPASS(pi.ipi_new_pidx < txq->ift_size); 3670 3671 ndesc = pi.ipi_new_pidx - pi.ipi_pidx; 3672 if (pi.ipi_new_pidx < pi.ipi_pidx) { 3673 ndesc += txq->ift_size; 3674 txq->ift_gen = 1; 3675 } 3676 /* 3677 * drivers can need as many as 3678 * two sentinels 3679 */ 3680 MPASS(ndesc <= pi.ipi_nsegs + 2); 3681 MPASS(pi.ipi_new_pidx != pidx); 3682 MPASS(ndesc > 0); 3683 txq->ift_in_use += ndesc; 3684 txq->ift_db_pending += ndesc; 3685 3686 /* 3687 * We update the last software descriptor again here because there may 3688 * be a sentinel and/or there may be more mbufs than segments 3689 */ 3690 txq->ift_pidx = pi.ipi_new_pidx; 3691 txq->ift_npending += pi.ipi_ndescs; 3692 } else { 3693 *m_headp = m_head = iflib_remove_mbuf(txq); 3694 if (err == EFBIG) { 3695 txq->ift_txd_encap_efbig++; 3696 if (remap < 2) { 3697 remap = 1; 3698 goto defrag; 3699 } 3700 } 3701 goto defrag_failed; 3702 } 3703 /* 3704 * err can't possibly be non-zero here, so we don't neet to test it 3705 * to see if we need to DBG_COUNTER_INC(encap_txd_encap_fail). 3706 */ 3707 return (err); 3708 3709 defrag_failed: 3710 txq->ift_mbuf_defrag_failed++; 3711 txq->ift_map_failed++; 3712 m_freem(*m_headp); 3713 DBG_COUNTER_INC(tx_frees); 3714 *m_headp = NULL; 3715 DBG_COUNTER_INC(encap_txd_encap_fail); 3716 return (ENOMEM); 3717 } 3718 3719 static void 3720 iflib_tx_desc_free(iflib_txq_t txq, int n) 3721 { 3722 uint32_t qsize, cidx, mask, gen; 3723 struct mbuf *m, **ifsd_m; 3724 bool do_prefetch; 3725 3726 cidx = txq->ift_cidx; 3727 gen = txq->ift_gen; 3728 qsize = txq->ift_size; 3729 mask = qsize-1; 3730 ifsd_m = txq->ift_sds.ifsd_m; 3731 do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); 3732 3733 while (n-- > 0) { 3734 if (do_prefetch) { 3735 prefetch(ifsd_m[(cidx + 3) & mask]); 3736 prefetch(ifsd_m[(cidx + 4) & mask]); 3737 } 3738 if ((m = ifsd_m[cidx]) != NULL) { 3739 prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); 3740 if (m->m_pkthdr.csum_flags & CSUM_TSO) { 3741 bus_dmamap_sync(txq->ift_tso_buf_tag, 3742 txq->ift_sds.ifsd_tso_map[cidx], 3743 BUS_DMASYNC_POSTWRITE); 3744 bus_dmamap_unload(txq->ift_tso_buf_tag, 3745 txq->ift_sds.ifsd_tso_map[cidx]); 3746 } else { 3747 bus_dmamap_sync(txq->ift_buf_tag, 3748 txq->ift_sds.ifsd_map[cidx], 3749 BUS_DMASYNC_POSTWRITE); 3750 bus_dmamap_unload(txq->ift_buf_tag, 3751 txq->ift_sds.ifsd_map[cidx]); 3752 } 3753 /* XXX we don't support any drivers that batch packets yet */ 3754 MPASS(m->m_nextpkt == NULL); 3755 m_freem(m); 3756 ifsd_m[cidx] = NULL; 3757 #if MEMORY_LOGGING 3758 txq->ift_dequeued++; 3759 #endif 3760 DBG_COUNTER_INC(tx_frees); 3761 } 3762 if (__predict_false(++cidx == qsize)) { 3763 cidx = 0; 3764 gen = 0; 3765 } 3766 } 3767 txq->ift_cidx = cidx; 3768 txq->ift_gen = gen; 3769 } 3770 3771 static __inline int 3772 iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) 3773 { 3774 int reclaim; 3775 if_ctx_t ctx = txq->ift_ctx; 3776 3777 KASSERT(thresh >= 0, ("invalid threshold to reclaim")); 3778 MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); 3779 3780 /* 3781 * Need a rate-limiting check so that this isn't called every time 3782 */ 3783 iflib_tx_credits_update(ctx, txq); 3784 reclaim = DESC_RECLAIMABLE(txq); 3785 3786 if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { 3787 #ifdef INVARIANTS 3788 if (iflib_verbose_debug) { 3789 printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, 3790 txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, 3791 reclaim, thresh); 3792 } 3793 #endif 3794 return (0); 3795 } 3796 iflib_tx_desc_free(txq, reclaim); 3797 txq->ift_cleaned += reclaim; 3798 txq->ift_in_use -= reclaim; 3799 3800 return (reclaim); 3801 } 3802 3803 static struct mbuf ** 3804 _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) 3805 { 3806 int next, size; 3807 struct mbuf **items; 3808 3809 size = r->size; 3810 next = (cidx + CACHE_PTR_INCREMENT) & (size-1); 3811 items = __DEVOLATILE(struct mbuf **, &r->items[0]); 3812 3813 prefetch(items[(cidx + offset) & (size-1)]); 3814 if (remaining > 1) { 3815 prefetch2cachelines(&items[next]); 3816 prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); 3817 prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); 3818 prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); 3819 } 3820 return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); 3821 } 3822 3823 static void 3824 iflib_txq_check_drain(iflib_txq_t txq, int budget) 3825 { 3826 3827 ifmp_ring_check_drainage(txq->ift_br, budget); 3828 } 3829 3830 static uint32_t 3831 iflib_txq_can_drain(struct ifmp_ring *r) 3832 { 3833 iflib_txq_t txq = r->cookie; 3834 if_ctx_t ctx = txq->ift_ctx; 3835 3836 if (TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) 3837 return (1); 3838 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 3839 BUS_DMASYNC_POSTREAD); 3840 return (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, 3841 false)); 3842 } 3843 3844 static uint32_t 3845 iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3846 { 3847 iflib_txq_t txq = r->cookie; 3848 if_ctx_t ctx = txq->ift_ctx; 3849 if_t ifp = ctx->ifc_ifp; 3850 struct mbuf *m, **mp; 3851 int avail, bytes_sent, skipped, count, err, i; 3852 int mcast_sent, pkt_sent, reclaimed; 3853 bool do_prefetch, rang, ring; 3854 3855 if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || 3856 !LINK_ACTIVE(ctx))) { 3857 DBG_COUNTER_INC(txq_drain_notready); 3858 return (0); 3859 } 3860 reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 3861 rang = iflib_txd_db_check(txq, reclaimed && txq->ift_db_pending); 3862 avail = IDXDIFF(pidx, cidx, r->size); 3863 3864 if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { 3865 /* 3866 * The driver is unloading so we need to free all pending packets. 3867 */ 3868 DBG_COUNTER_INC(txq_drain_flushing); 3869 for (i = 0; i < avail; i++) { 3870 if (__predict_true(r->items[(cidx + i) & (r->size-1)] != (void *)txq)) 3871 m_freem(r->items[(cidx + i) & (r->size-1)]); 3872 r->items[(cidx + i) & (r->size-1)] = NULL; 3873 } 3874 return (avail); 3875 } 3876 3877 if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { 3878 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3879 CALLOUT_LOCK(txq); 3880 callout_stop(&txq->ift_timer); 3881 CALLOUT_UNLOCK(txq); 3882 DBG_COUNTER_INC(txq_drain_oactive); 3883 return (0); 3884 } 3885 3886 /* 3887 * If we've reclaimed any packets this queue cannot be hung. 3888 */ 3889 if (reclaimed) 3890 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3891 skipped = mcast_sent = bytes_sent = pkt_sent = 0; 3892 count = MIN(avail, TX_BATCH_SIZE); 3893 #ifdef INVARIANTS 3894 if (iflib_verbose_debug) 3895 printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, 3896 avail, ctx->ifc_flags, TXQ_AVAIL(txq)); 3897 #endif 3898 do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); 3899 err = 0; 3900 for (i = 0; i < count && TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx) + 2; i++) { 3901 int rem = do_prefetch ? count - i : 0; 3902 3903 mp = _ring_peek_one(r, cidx, i, rem); 3904 MPASS(mp != NULL && *mp != NULL); 3905 3906 /* 3907 * Completion interrupts will use the address of the txq 3908 * as a sentinel to enqueue _something_ in order to acquire 3909 * the lock on the mp_ring (there's no direct lock call). 3910 * We obviously whave to check for these sentinel cases 3911 * and skip them. 3912 */ 3913 if (__predict_false(*mp == (struct mbuf *)txq)) { 3914 skipped++; 3915 continue; 3916 } 3917 err = iflib_encap(txq, mp); 3918 if (__predict_false(err)) { 3919 /* no room - bail out */ 3920 if (err == ENOBUFS) 3921 break; 3922 skipped++; 3923 /* we can't send this packet - skip it */ 3924 continue; 3925 } 3926 pkt_sent++; 3927 m = *mp; 3928 DBG_COUNTER_INC(tx_sent); 3929 bytes_sent += m->m_pkthdr.len; 3930 mcast_sent += !!(m->m_flags & M_MCAST); 3931 3932 if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) 3933 break; 3934 ETHER_BPF_MTAP(ifp, m); 3935 rang = iflib_txd_db_check(txq, false); 3936 } 3937 3938 /* deliberate use of bitwise or to avoid gratuitous short-circuit */ 3939 ring = rang ? false : (iflib_min_tx_latency | err); 3940 iflib_txd_db_check(txq, ring); 3941 if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); 3942 if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); 3943 if (mcast_sent) 3944 if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); 3945 #ifdef INVARIANTS 3946 if (iflib_verbose_debug) 3947 printf("consumed=%d\n", skipped + pkt_sent); 3948 #endif 3949 return (skipped + pkt_sent); 3950 } 3951 3952 static uint32_t 3953 iflib_txq_drain_always(struct ifmp_ring *r) 3954 { 3955 return (1); 3956 } 3957 3958 static uint32_t 3959 iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) 3960 { 3961 int i, avail; 3962 struct mbuf **mp; 3963 iflib_txq_t txq; 3964 3965 txq = r->cookie; 3966 3967 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 3968 CALLOUT_LOCK(txq); 3969 callout_stop(&txq->ift_timer); 3970 CALLOUT_UNLOCK(txq); 3971 3972 avail = IDXDIFF(pidx, cidx, r->size); 3973 for (i = 0; i < avail; i++) { 3974 mp = _ring_peek_one(r, cidx, i, avail - i); 3975 if (__predict_false(*mp == (struct mbuf *)txq)) 3976 continue; 3977 m_freem(*mp); 3978 DBG_COUNTER_INC(tx_frees); 3979 } 3980 MPASS(ifmp_ring_is_stalled(r) == 0); 3981 return (avail); 3982 } 3983 3984 static void 3985 iflib_ifmp_purge(iflib_txq_t txq) 3986 { 3987 struct ifmp_ring *r; 3988 3989 r = txq->ift_br; 3990 r->drain = iflib_txq_drain_free; 3991 r->can_drain = iflib_txq_drain_always; 3992 3993 ifmp_ring_check_drainage(r, r->size); 3994 3995 r->drain = iflib_txq_drain; 3996 r->can_drain = iflib_txq_can_drain; 3997 } 3998 3999 static void 4000 _task_fn_tx(void *context) 4001 { 4002 iflib_txq_t txq = context; 4003 if_ctx_t ctx = txq->ift_ctx; 4004 if_t ifp = ctx->ifc_ifp; 4005 int abdicate = ctx->ifc_sysctl_tx_abdicate; 4006 4007 #ifdef IFLIB_DIAGNOSTICS 4008 txq->ift_cpu_exec_count[curcpu]++; 4009 #endif 4010 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 4011 return; 4012 #ifdef DEV_NETMAP 4013 if ((if_getcapenable(ifp) & IFCAP_NETMAP) && 4014 netmap_tx_irq(ifp, txq->ift_id)) 4015 goto skip_ifmp; 4016 #endif 4017 #ifdef ALTQ 4018 if (ALTQ_IS_ENABLED(&ifp->if_snd)) 4019 iflib_altq_if_start(ifp); 4020 #endif 4021 if (txq->ift_db_pending) 4022 ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE, abdicate); 4023 else if (!abdicate) 4024 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4025 /* 4026 * When abdicating, we always need to check drainage, not just when we don't enqueue 4027 */ 4028 if (abdicate) 4029 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4030 #ifdef DEV_NETMAP 4031 skip_ifmp: 4032 #endif 4033 if (ctx->ifc_flags & IFC_LEGACY) 4034 IFDI_INTR_ENABLE(ctx); 4035 else 4036 IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); 4037 } 4038 4039 static void 4040 _task_fn_rx(void *context) 4041 { 4042 iflib_rxq_t rxq = context; 4043 if_ctx_t ctx = rxq->ifr_ctx; 4044 uint8_t more; 4045 uint16_t budget; 4046 #ifdef DEV_NETMAP 4047 u_int work = 0; 4048 int nmirq; 4049 #endif 4050 4051 #ifdef IFLIB_DIAGNOSTICS 4052 rxq->ifr_cpu_exec_count[curcpu]++; 4053 #endif 4054 DBG_COUNTER_INC(task_fn_rxs); 4055 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 4056 return; 4057 #ifdef DEV_NETMAP 4058 nmirq = netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work); 4059 if (nmirq != NM_IRQ_PASS) { 4060 more = (nmirq == NM_IRQ_RESCHED) ? IFLIB_RXEOF_MORE : 0; 4061 goto skip_rxeof; 4062 } 4063 #endif 4064 budget = ctx->ifc_sysctl_rx_budget; 4065 if (budget == 0) 4066 budget = 16; /* XXX */ 4067 more = iflib_rxeof(rxq, budget); 4068 #ifdef DEV_NETMAP 4069 skip_rxeof: 4070 #endif 4071 if ((more & IFLIB_RXEOF_MORE) == 0) { 4072 if (ctx->ifc_flags & IFC_LEGACY) 4073 IFDI_INTR_ENABLE(ctx); 4074 else 4075 IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); 4076 DBG_COUNTER_INC(rx_intr_enables); 4077 } 4078 if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) 4079 return; 4080 4081 if (more & IFLIB_RXEOF_MORE) 4082 GROUPTASK_ENQUEUE(&rxq->ifr_task); 4083 else if (more & IFLIB_RXEOF_EMPTY) 4084 #ifndef __HAIKU__ 4085 callout_reset_curcpu(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); 4086 #else 4087 callout_reset(&rxq->ifr_watchdog, 1, &_task_fn_rx_watchdog, rxq); 4088 #endif 4089 } 4090 4091 static void 4092 _task_fn_admin(void *context) 4093 { 4094 if_ctx_t ctx = context; 4095 if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; 4096 iflib_txq_t txq; 4097 int i; 4098 bool oactive, running, do_reset, do_watchdog, in_detach; 4099 4100 STATE_LOCK(ctx); 4101 running = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING); 4102 oactive = (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE); 4103 do_reset = (ctx->ifc_flags & IFC_DO_RESET); 4104 do_watchdog = (ctx->ifc_flags & IFC_DO_WATCHDOG); 4105 in_detach = (ctx->ifc_flags & IFC_IN_DETACH); 4106 ctx->ifc_flags &= ~(IFC_DO_RESET|IFC_DO_WATCHDOG); 4107 STATE_UNLOCK(ctx); 4108 4109 if ((!running && !oactive) && !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) 4110 return; 4111 if (in_detach) 4112 return; 4113 4114 CTX_LOCK(ctx); 4115 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 4116 CALLOUT_LOCK(txq); 4117 callout_stop(&txq->ift_timer); 4118 CALLOUT_UNLOCK(txq); 4119 } 4120 if (do_watchdog) { 4121 ctx->ifc_watchdog_events++; 4122 IFDI_WATCHDOG_RESET(ctx); 4123 } 4124 IFDI_UPDATE_ADMIN_STATUS(ctx); 4125 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { 4126 callout_reset_on(&txq->ift_timer, iflib_timer_default, iflib_timer, txq, 4127 txq->ift_timer.c_cpu); 4128 } 4129 IFDI_LINK_INTR_ENABLE(ctx); 4130 if (do_reset) 4131 iflib_if_init_locked(ctx); 4132 CTX_UNLOCK(ctx); 4133 4134 if (LINK_ACTIVE(ctx) == 0) 4135 return; 4136 for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) 4137 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 4138 } 4139 4140 static void 4141 _task_fn_iov(void *context) 4142 { 4143 if_ctx_t ctx = context; 4144 4145 if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) && 4146 !(ctx->ifc_sctx->isc_flags & IFLIB_ADMIN_ALWAYS_RUN)) 4147 return; 4148 4149 CTX_LOCK(ctx); 4150 IFDI_VFLR_HANDLE(ctx); 4151 CTX_UNLOCK(ctx); 4152 } 4153 4154 static int 4155 iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) 4156 { 4157 int err; 4158 if_int_delay_info_t info; 4159 if_ctx_t ctx; 4160 4161 info = (if_int_delay_info_t)arg1; 4162 ctx = info->iidi_ctx; 4163 info->iidi_req = req; 4164 info->iidi_oidp = oidp; 4165 CTX_LOCK(ctx); 4166 err = IFDI_SYSCTL_INT_DELAY(ctx, info); 4167 CTX_UNLOCK(ctx); 4168 return (err); 4169 } 4170 4171 /********************************************************************* 4172 * 4173 * IFNET FUNCTIONS 4174 * 4175 **********************************************************************/ 4176 4177 static void 4178 iflib_if_init_locked(if_ctx_t ctx) 4179 { 4180 iflib_stop(ctx); 4181 iflib_init_locked(ctx); 4182 } 4183 4184 static void 4185 iflib_if_init(void *arg) 4186 { 4187 if_ctx_t ctx = arg; 4188 4189 CTX_LOCK(ctx); 4190 iflib_if_init_locked(ctx); 4191 CTX_UNLOCK(ctx); 4192 } 4193 4194 static int 4195 iflib_if_transmit(if_t ifp, struct mbuf *m) 4196 { 4197 if_ctx_t ctx = if_getsoftc(ifp); 4198 4199 iflib_txq_t txq; 4200 int err, qidx; 4201 int abdicate = ctx->ifc_sysctl_tx_abdicate; 4202 4203 if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { 4204 DBG_COUNTER_INC(tx_frees); 4205 m_freem(m); 4206 return (ENETDOWN); 4207 } 4208 4209 MPASS(m->m_nextpkt == NULL); 4210 /* ALTQ-enabled interfaces always use queue 0. */ 4211 qidx = 0; 4212 if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m) && !ALTQ_IS_ENABLED(&ifp->if_snd)) 4213 qidx = QIDX(ctx, m); 4214 /* 4215 * XXX calculate buf_ring based on flowid (divvy up bits?) 4216 */ 4217 txq = &ctx->ifc_txqs[qidx]; 4218 4219 #ifdef DRIVER_BACKPRESSURE 4220 if (txq->ift_closed) { 4221 while (m != NULL) { 4222 next = m->m_nextpkt; 4223 m->m_nextpkt = NULL; 4224 m_freem(m); 4225 DBG_COUNTER_INC(tx_frees); 4226 m = next; 4227 } 4228 return (ENOBUFS); 4229 } 4230 #endif 4231 #ifdef notyet 4232 qidx = count = 0; 4233 mp = marr; 4234 next = m; 4235 do { 4236 count++; 4237 next = next->m_nextpkt; 4238 } while (next != NULL); 4239 4240 if (count > nitems(marr)) 4241 if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { 4242 /* XXX check nextpkt */ 4243 m_freem(m); 4244 /* XXX simplify for now */ 4245 DBG_COUNTER_INC(tx_frees); 4246 return (ENOBUFS); 4247 } 4248 for (next = m, i = 0; next != NULL; i++) { 4249 mp[i] = next; 4250 next = next->m_nextpkt; 4251 mp[i]->m_nextpkt = NULL; 4252 } 4253 #endif 4254 DBG_COUNTER_INC(tx_seen); 4255 err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE, abdicate); 4256 4257 if (abdicate) 4258 GROUPTASK_ENQUEUE(&txq->ift_task); 4259 if (err) { 4260 if (!abdicate) 4261 GROUPTASK_ENQUEUE(&txq->ift_task); 4262 /* support forthcoming later */ 4263 #ifdef DRIVER_BACKPRESSURE 4264 txq->ift_closed = TRUE; 4265 #endif 4266 ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); 4267 m_freem(m); 4268 DBG_COUNTER_INC(tx_frees); 4269 } 4270 4271 return (err); 4272 } 4273 4274 #ifdef ALTQ 4275 /* 4276 * The overall approach to integrating iflib with ALTQ is to continue to use 4277 * the iflib mp_ring machinery between the ALTQ queue(s) and the hardware 4278 * ring. Technically, when using ALTQ, queueing to an intermediate mp_ring 4279 * is redundant/unnecessary, but doing so minimizes the amount of 4280 * ALTQ-specific code required in iflib. It is assumed that the overhead of 4281 * redundantly queueing to an intermediate mp_ring is swamped by the 4282 * performance limitations inherent in using ALTQ. 4283 * 4284 * When ALTQ support is compiled in, all iflib drivers will use a transmit 4285 * routine, iflib_altq_if_transmit(), that checks if ALTQ is enabled for the 4286 * given interface. If ALTQ is enabled for an interface, then all 4287 * transmitted packets for that interface will be submitted to the ALTQ 4288 * subsystem via IFQ_ENQUEUE(). We don't use the legacy if_transmit() 4289 * implementation because it uses IFQ_HANDOFF(), which will duplicatively 4290 * update stats that the iflib machinery handles, and which is sensitve to 4291 * the disused IFF_DRV_OACTIVE flag. Additionally, iflib_altq_if_start() 4292 * will be installed as the start routine for use by ALTQ facilities that 4293 * need to trigger queue drains on a scheduled basis. 4294 * 4295 */ 4296 static void 4297 iflib_altq_if_start(if_t ifp) 4298 { 4299 struct ifaltq *ifq = &ifp->if_snd; 4300 struct mbuf *m; 4301 4302 IFQ_LOCK(ifq); 4303 IFQ_DEQUEUE_NOLOCK(ifq, m); 4304 while (m != NULL) { 4305 iflib_if_transmit(ifp, m); 4306 IFQ_DEQUEUE_NOLOCK(ifq, m); 4307 } 4308 IFQ_UNLOCK(ifq); 4309 } 4310 4311 static int 4312 iflib_altq_if_transmit(if_t ifp, struct mbuf *m) 4313 { 4314 int err; 4315 4316 if (ALTQ_IS_ENABLED(&ifp->if_snd)) { 4317 IFQ_ENQUEUE(&ifp->if_snd, m, err); 4318 if (err == 0) 4319 iflib_altq_if_start(ifp); 4320 } else 4321 err = iflib_if_transmit(ifp, m); 4322 4323 return (err); 4324 } 4325 #endif /* ALTQ */ 4326 4327 static void 4328 iflib_if_qflush(if_t ifp) 4329 { 4330 if_ctx_t ctx = if_getsoftc(ifp); 4331 iflib_txq_t txq = ctx->ifc_txqs; 4332 int i; 4333 4334 STATE_LOCK(ctx); 4335 ctx->ifc_flags |= IFC_QFLUSH; 4336 STATE_UNLOCK(ctx); 4337 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 4338 while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) 4339 iflib_txq_check_drain(txq, 0); 4340 STATE_LOCK(ctx); 4341 ctx->ifc_flags &= ~IFC_QFLUSH; 4342 STATE_UNLOCK(ctx); 4343 4344 /* 4345 * When ALTQ is enabled, this will also take care of purging the 4346 * ALTQ queue(s). 4347 */ 4348 if_qflush(ifp); 4349 } 4350 4351 #define IFCAP_FLAGS (IFCAP_HWCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ 4352 IFCAP_TSO | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ 4353 IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | \ 4354 IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_MEXTPG) 4355 4356 static int 4357 iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) 4358 { 4359 if_ctx_t ctx = if_getsoftc(ifp); 4360 struct ifreq *ifr = (struct ifreq *)data; 4361 #if defined(INET) || defined(INET6) 4362 struct ifaddr *ifa = (struct ifaddr *)data; 4363 #endif 4364 bool avoid_reset = false; 4365 int err = 0, reinit = 0, bits; 4366 4367 switch (command) { 4368 case SIOCSIFADDR: 4369 #ifdef INET 4370 if (ifa->ifa_addr->sa_family == AF_INET) 4371 avoid_reset = true; 4372 #endif 4373 #ifdef INET6 4374 if (ifa->ifa_addr->sa_family == AF_INET6) 4375 avoid_reset = true; 4376 #endif 4377 /* 4378 ** Calling init results in link renegotiation, 4379 ** so we avoid doing it when possible. 4380 */ 4381 if (avoid_reset) { 4382 if_setflagbits(ifp, IFF_UP,0); 4383 if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) 4384 reinit = 1; 4385 #ifdef INET 4386 if (!(if_getflags(ifp) & IFF_NOARP)) 4387 arp_ifinit(ifp, ifa); 4388 #endif 4389 } else 4390 err = ether_ioctl(ifp, command, data); 4391 break; 4392 case SIOCSIFMTU: 4393 CTX_LOCK(ctx); 4394 if (ifr->ifr_mtu == if_getmtu(ifp)) { 4395 CTX_UNLOCK(ctx); 4396 break; 4397 } 4398 bits = if_getdrvflags(ifp); 4399 /* stop the driver and free any clusters before proceeding */ 4400 iflib_stop(ctx); 4401 4402 if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { 4403 STATE_LOCK(ctx); 4404 if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) 4405 ctx->ifc_flags |= IFC_MULTISEG; 4406 else 4407 ctx->ifc_flags &= ~IFC_MULTISEG; 4408 STATE_UNLOCK(ctx); 4409 err = if_setmtu(ifp, ifr->ifr_mtu); 4410 } 4411 iflib_init_locked(ctx); 4412 STATE_LOCK(ctx); 4413 if_setdrvflags(ifp, bits); 4414 STATE_UNLOCK(ctx); 4415 CTX_UNLOCK(ctx); 4416 break; 4417 case SIOCSIFFLAGS: 4418 CTX_LOCK(ctx); 4419 if (if_getflags(ifp) & IFF_UP) { 4420 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4421 if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & 4422 (IFF_PROMISC | IFF_ALLMULTI)) { 4423 CTX_UNLOCK(ctx); 4424 err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); 4425 CTX_LOCK(ctx); 4426 } 4427 } else 4428 reinit = 1; 4429 } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4430 iflib_stop(ctx); 4431 } 4432 ctx->ifc_if_flags = if_getflags(ifp); 4433 CTX_UNLOCK(ctx); 4434 break; 4435 case SIOCADDMULTI: 4436 case SIOCDELMULTI: 4437 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 4438 CTX_LOCK(ctx); 4439 IFDI_INTR_DISABLE(ctx); 4440 IFDI_MULTI_SET(ctx); 4441 IFDI_INTR_ENABLE(ctx); 4442 CTX_UNLOCK(ctx); 4443 } 4444 break; 4445 case SIOCSIFMEDIA: 4446 CTX_LOCK(ctx); 4447 IFDI_MEDIA_SET(ctx); 4448 CTX_UNLOCK(ctx); 4449 /* FALLTHROUGH */ 4450 case SIOCGIFMEDIA: 4451 #ifndef __HAIKU__ 4452 case SIOCGIFXMEDIA: 4453 #endif 4454 err = ifmedia_ioctl(ifp, ifr, ctx->ifc_mediap, command); 4455 break; 4456 #ifndef __HAIKU__ 4457 case SIOCGI2C: 4458 { 4459 struct ifi2creq i2c; 4460 4461 err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); 4462 if (err != 0) 4463 break; 4464 if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { 4465 err = EINVAL; 4466 break; 4467 } 4468 if (i2c.len > sizeof(i2c.data)) { 4469 err = EINVAL; 4470 break; 4471 } 4472 4473 if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) 4474 err = copyout(&i2c, ifr_data_get_ptr(ifr), 4475 sizeof(i2c)); 4476 break; 4477 } 4478 #endif 4479 case SIOCSIFCAP: 4480 { 4481 int mask, setmask, oldmask; 4482 4483 oldmask = if_getcapenable(ifp); 4484 mask = ifr->ifr_reqcap ^ oldmask; 4485 mask &= ctx->ifc_softc_ctx.isc_capabilities | IFCAP_MEXTPG; 4486 setmask = 0; 4487 #ifdef TCP_OFFLOAD 4488 setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); 4489 #endif 4490 setmask |= (mask & IFCAP_FLAGS); 4491 setmask |= (mask & IFCAP_WOL); 4492 4493 /* 4494 * If any RX csum has changed, change all the ones that 4495 * are supported by the driver. 4496 */ 4497 if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 4498 setmask |= ctx->ifc_softc_ctx.isc_capabilities & 4499 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); 4500 } 4501 4502 /* 4503 * want to ensure that traffic has stopped before we change any of the flags 4504 */ 4505 if (setmask) { 4506 CTX_LOCK(ctx); 4507 bits = if_getdrvflags(ifp); 4508 if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) 4509 iflib_stop(ctx); 4510 STATE_LOCK(ctx); 4511 if_togglecapenable(ifp, setmask); 4512 STATE_UNLOCK(ctx); 4513 if (bits & IFF_DRV_RUNNING && setmask & ~IFCAP_WOL) 4514 iflib_init_locked(ctx); 4515 STATE_LOCK(ctx); 4516 if_setdrvflags(ifp, bits); 4517 STATE_UNLOCK(ctx); 4518 CTX_UNLOCK(ctx); 4519 } 4520 if_vlancap(ifp); 4521 break; 4522 } 4523 case SIOCGPRIVATE_0: 4524 case SIOCSDRVSPEC: 4525 case SIOCGDRVSPEC: 4526 CTX_LOCK(ctx); 4527 err = IFDI_PRIV_IOCTL(ctx, command, data); 4528 CTX_UNLOCK(ctx); 4529 break; 4530 default: 4531 err = ether_ioctl(ifp, command, data); 4532 break; 4533 } 4534 if (reinit) 4535 iflib_if_init(ctx); 4536 return (err); 4537 } 4538 4539 static uint64_t 4540 iflib_if_get_counter(if_t ifp, ift_counter cnt) 4541 { 4542 if_ctx_t ctx = if_getsoftc(ifp); 4543 4544 return (IFDI_GET_COUNTER(ctx, cnt)); 4545 } 4546 4547 /********************************************************************* 4548 * 4549 * OTHER FUNCTIONS EXPORTED TO THE STACK 4550 * 4551 **********************************************************************/ 4552 4553 static void 4554 iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) 4555 { 4556 if_ctx_t ctx = if_getsoftc(ifp); 4557 4558 if ((void *)ctx != arg) 4559 return; 4560 4561 if ((vtag == 0) || (vtag > 4095)) 4562 return; 4563 4564 if (iflib_in_detach(ctx)) 4565 return; 4566 4567 CTX_LOCK(ctx); 4568 /* Driver may need all untagged packets to be flushed */ 4569 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4570 iflib_stop(ctx); 4571 IFDI_VLAN_REGISTER(ctx, vtag); 4572 /* Re-init to load the changes, if required */ 4573 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4574 iflib_init_locked(ctx); 4575 CTX_UNLOCK(ctx); 4576 } 4577 4578 static void 4579 iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) 4580 { 4581 if_ctx_t ctx = if_getsoftc(ifp); 4582 4583 if ((void *)ctx != arg) 4584 return; 4585 4586 if ((vtag == 0) || (vtag > 4095)) 4587 return; 4588 4589 CTX_LOCK(ctx); 4590 /* Driver may need all tagged packets to be flushed */ 4591 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4592 iflib_stop(ctx); 4593 IFDI_VLAN_UNREGISTER(ctx, vtag); 4594 /* Re-init to load the changes, if required */ 4595 if (IFDI_NEEDS_RESTART(ctx, IFLIB_RESTART_VLAN_CONFIG)) 4596 iflib_init_locked(ctx); 4597 CTX_UNLOCK(ctx); 4598 } 4599 4600 static void 4601 iflib_led_func(void *arg, int onoff) 4602 { 4603 if_ctx_t ctx = arg; 4604 4605 CTX_LOCK(ctx); 4606 IFDI_LED_FUNC(ctx, onoff); 4607 CTX_UNLOCK(ctx); 4608 } 4609 4610 /********************************************************************* 4611 * 4612 * BUS FUNCTION DEFINITIONS 4613 * 4614 **********************************************************************/ 4615 4616 int 4617 iflib_device_probe(device_t dev) 4618 { 4619 const pci_vendor_info_t *ent; 4620 if_shared_ctx_t sctx; 4621 uint16_t pci_device_id, pci_rev_id, pci_subdevice_id, pci_subvendor_id; 4622 uint16_t pci_vendor_id; 4623 4624 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 4625 return (ENOTSUP); 4626 4627 pci_vendor_id = pci_get_vendor(dev); 4628 pci_device_id = pci_get_device(dev); 4629 pci_subvendor_id = pci_get_subvendor(dev); 4630 pci_subdevice_id = pci_get_subdevice(dev); 4631 pci_rev_id = pci_get_revid(dev); 4632 if (sctx->isc_parse_devinfo != NULL) 4633 sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); 4634 4635 ent = sctx->isc_vendor_info; 4636 while (ent->pvi_vendor_id != 0) { 4637 if (pci_vendor_id != ent->pvi_vendor_id) { 4638 ent++; 4639 continue; 4640 } 4641 if ((pci_device_id == ent->pvi_device_id) && 4642 ((pci_subvendor_id == ent->pvi_subvendor_id) || 4643 (ent->pvi_subvendor_id == 0)) && 4644 ((pci_subdevice_id == ent->pvi_subdevice_id) || 4645 (ent->pvi_subdevice_id == 0)) && 4646 ((pci_rev_id == ent->pvi_rev_id) || 4647 (ent->pvi_rev_id == 0))) { 4648 device_set_desc_copy(dev, ent->pvi_name); 4649 /* this needs to be changed to zero if the bus probing code 4650 * ever stops re-probing on best match because the sctx 4651 * may have its values over written by register calls 4652 * in subsequent probes 4653 */ 4654 return (BUS_PROBE_DEFAULT); 4655 } 4656 ent++; 4657 } 4658 return (ENXIO); 4659 } 4660 4661 int 4662 iflib_device_probe_vendor(device_t dev) 4663 { 4664 int probe; 4665 4666 probe = iflib_device_probe(dev); 4667 #ifndef __HAIKU__ 4668 if (probe == BUS_PROBE_DEFAULT) 4669 return (BUS_PROBE_VENDOR); 4670 else 4671 #endif 4672 return (probe); 4673 } 4674 4675 static void 4676 iflib_reset_qvalues(if_ctx_t ctx) 4677 { 4678 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4679 if_shared_ctx_t sctx = ctx->ifc_sctx; 4680 device_t dev = ctx->ifc_dev; 4681 int i; 4682 4683 if (ctx->ifc_sysctl_ntxqs != 0) 4684 scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; 4685 if (ctx->ifc_sysctl_nrxqs != 0) 4686 scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; 4687 4688 for (i = 0; i < sctx->isc_ntxqs; i++) { 4689 if (ctx->ifc_sysctl_ntxds[i] != 0) 4690 scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; 4691 else 4692 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4693 } 4694 4695 for (i = 0; i < sctx->isc_nrxqs; i++) { 4696 if (ctx->ifc_sysctl_nrxds[i] != 0) 4697 scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; 4698 else 4699 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4700 } 4701 4702 for (i = 0; i < sctx->isc_nrxqs; i++) { 4703 if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { 4704 device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", 4705 i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); 4706 scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; 4707 } 4708 if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { 4709 device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", 4710 i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); 4711 scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; 4712 } 4713 if (!powerof2(scctx->isc_nrxd[i])) { 4714 device_printf(dev, "nrxd%d: %d is not a power of 2 - using default value of %d\n", 4715 i, scctx->isc_nrxd[i], sctx->isc_nrxd_default[i]); 4716 scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; 4717 } 4718 } 4719 4720 for (i = 0; i < sctx->isc_ntxqs; i++) { 4721 if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { 4722 device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", 4723 i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); 4724 scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; 4725 } 4726 if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { 4727 device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", 4728 i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); 4729 scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; 4730 } 4731 if (!powerof2(scctx->isc_ntxd[i])) { 4732 device_printf(dev, "ntxd%d: %d is not a power of 2 - using default value of %d\n", 4733 i, scctx->isc_ntxd[i], sctx->isc_ntxd_default[i]); 4734 scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; 4735 } 4736 } 4737 } 4738 4739 static void 4740 iflib_add_pfil(if_ctx_t ctx) 4741 { 4742 #ifndef __HAIKU__ 4743 struct pfil_head *pfil; 4744 struct pfil_head_args pa; 4745 iflib_rxq_t rxq; 4746 int i; 4747 4748 pa.pa_version = PFIL_VERSION; 4749 pa.pa_flags = PFIL_IN; 4750 pa.pa_type = PFIL_TYPE_ETHERNET; 4751 pa.pa_headname = ctx->ifc_ifp->if_xname; 4752 pfil = pfil_head_register(&pa); 4753 4754 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 4755 rxq->pfil = pfil; 4756 } 4757 #endif 4758 } 4759 4760 static void 4761 iflib_rem_pfil(if_ctx_t ctx) 4762 { 4763 #ifndef __HAIKU__ 4764 struct pfil_head *pfil; 4765 iflib_rxq_t rxq; 4766 int i; 4767 4768 rxq = ctx->ifc_rxqs; 4769 pfil = rxq->pfil; 4770 for (i = 0; i < NRXQSETS(ctx); i++, rxq++) { 4771 rxq->pfil = NULL; 4772 } 4773 pfil_head_unregister(pfil); 4774 #endif 4775 } 4776 4777 4778 #ifndef __HAIKU__ 4779 /* 4780 * Advance forward by n members of the cpuset ctx->ifc_cpus starting from 4781 * cpuid and wrapping as necessary. 4782 */ 4783 static unsigned int 4784 cpuid_advance(if_ctx_t ctx, unsigned int cpuid, unsigned int n) 4785 { 4786 unsigned int first_valid; 4787 unsigned int last_valid; 4788 4789 /* cpuid should always be in the valid set */ 4790 MPASS(CPU_ISSET(cpuid, &ctx->ifc_cpus)); 4791 4792 /* valid set should never be empty */ 4793 MPASS(!CPU_EMPTY(&ctx->ifc_cpus)); 4794 4795 first_valid = CPU_FFS(&ctx->ifc_cpus) - 1; 4796 last_valid = CPU_FLS(&ctx->ifc_cpus) - 1; 4797 n = n % CPU_COUNT(&ctx->ifc_cpus); 4798 while (n > 0) { 4799 do { 4800 cpuid++; 4801 if (cpuid > last_valid) 4802 cpuid = first_valid; 4803 } while (!CPU_ISSET(cpuid, &ctx->ifc_cpus)); 4804 n--; 4805 } 4806 4807 return (cpuid); 4808 } 4809 #endif 4810 4811 #if defined(SMP) && defined(SCHED_ULE) 4812 extern struct cpu_group *cpu_top; /* CPU topology */ 4813 4814 static int 4815 find_child_with_core(int cpu, struct cpu_group *grp) 4816 { 4817 int i; 4818 4819 if (grp->cg_children == 0) 4820 return -1; 4821 4822 MPASS(grp->cg_child); 4823 for (i = 0; i < grp->cg_children; i++) { 4824 if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) 4825 return i; 4826 } 4827 4828 return -1; 4829 } 4830 4831 4832 /* 4833 * Find an L2 neighbor of the given CPU or return -1 if none found. This 4834 * does not distinguish among multiple L2 neighbors if the given CPU has 4835 * more than one (it will always return the same result in that case). 4836 */ 4837 static int 4838 find_l2_neighbor(int cpu) 4839 { 4840 struct cpu_group *grp; 4841 int i; 4842 4843 grp = cpu_top; 4844 if (grp == NULL) 4845 return -1; 4846 4847 /* 4848 * Find the smallest CPU group that contains the given core. 4849 */ 4850 i = 0; 4851 while ((i = find_child_with_core(cpu, grp)) != -1) { 4852 /* 4853 * If the smallest group containing the given CPU has less 4854 * than two members, we conclude the given CPU has no 4855 * L2 neighbor. 4856 */ 4857 if (grp->cg_child[i].cg_count <= 1) 4858 return (-1); 4859 grp = &grp->cg_child[i]; 4860 } 4861 4862 /* Must share L2. */ 4863 if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) 4864 return -1; 4865 4866 /* 4867 * Select the first member of the set that isn't the reference 4868 * CPU, which at this point is guaranteed to exist. 4869 */ 4870 for (i = 0; i < CPU_SETSIZE; i++) { 4871 if (CPU_ISSET(i, &grp->cg_mask) && i != cpu) 4872 return (i); 4873 } 4874 4875 /* Should never be reached */ 4876 return (-1); 4877 } 4878 4879 #else 4880 static int 4881 find_l2_neighbor(int cpu) 4882 { 4883 4884 return (-1); 4885 } 4886 #endif 4887 4888 #ifndef __HAIKU__ 4889 /* 4890 * CPU mapping behaviors 4891 * --------------------- 4892 * 'separate txrx' refers to the separate_txrx sysctl 4893 * 'use logical' refers to the use_logical_cores sysctl 4894 * 'INTR CPUS' indicates whether bus_get_cpus(INTR_CPUS) succeeded 4895 * 4896 * separate use INTR 4897 * txrx logical CPUS result 4898 * ---------- --------- ------ ------------------------------------------------ 4899 * - - X RX and TX queues mapped to consecutive physical 4900 * cores with RX/TX pairs on same core and excess 4901 * of either following 4902 * - X X RX and TX queues mapped to consecutive cores 4903 * of any type with RX/TX pairs on same core and 4904 * excess of either following 4905 * X - X RX and TX queues mapped to consecutive physical 4906 * cores; all RX then all TX 4907 * X X X RX queues mapped to consecutive physical cores 4908 * first, then TX queues mapped to L2 neighbor of 4909 * the corresponding RX queue if one exists, 4910 * otherwise to consecutive physical cores 4911 * - n/a - RX and TX queues mapped to consecutive cores of 4912 * any type with RX/TX pairs on same core and excess 4913 * of either following 4914 * X n/a - RX and TX queues mapped to consecutive cores of 4915 * any type; all RX then all TX 4916 */ 4917 static unsigned int 4918 get_cpuid_for_queue(if_ctx_t ctx, unsigned int base_cpuid, unsigned int qid, 4919 bool is_tx) 4920 { 4921 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4922 unsigned int core_index; 4923 4924 if (ctx->ifc_sysctl_separate_txrx) { 4925 /* 4926 * When using separate CPUs for TX and RX, the assignment 4927 * will always be of a consecutive CPU out of the set of 4928 * context CPUs, except for the specific case where the 4929 * context CPUs are phsyical cores, the use of logical cores 4930 * has been enabled, the assignment is for TX, the TX qid 4931 * corresponds to an RX qid, and the CPU assigned to the 4932 * corresponding RX queue has an L2 neighbor. 4933 */ 4934 if (ctx->ifc_sysctl_use_logical_cores && 4935 ctx->ifc_cpus_are_physical_cores && 4936 is_tx && qid < scctx->isc_nrxqsets) { 4937 int l2_neighbor; 4938 unsigned int rx_cpuid; 4939 4940 rx_cpuid = cpuid_advance(ctx, base_cpuid, qid); 4941 l2_neighbor = find_l2_neighbor(rx_cpuid); 4942 if (l2_neighbor != -1) { 4943 return (l2_neighbor); 4944 } 4945 /* 4946 * ... else fall through to the normal 4947 * consecutive-after-RX assignment scheme. 4948 * 4949 * Note that we are assuming that all RX queue CPUs 4950 * have an L2 neighbor, or all do not. If a mixed 4951 * scenario is possible, we will have to keep track 4952 * separately of how many queues prior to this one 4953 * were not able to be assigned to an L2 neighbor. 4954 */ 4955 } 4956 if (is_tx) 4957 core_index = scctx->isc_nrxqsets + qid; 4958 else 4959 core_index = qid; 4960 } else { 4961 core_index = qid; 4962 } 4963 4964 return (cpuid_advance(ctx, base_cpuid, core_index)); 4965 } 4966 #else 4967 #define get_cpuid_for_queue(...) CPU_FIRST() 4968 #endif 4969 4970 static uint16_t 4971 get_ctx_core_offset(if_ctx_t ctx) 4972 { 4973 #ifndef __HAIKU__ 4974 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 4975 struct cpu_offset *op; 4976 cpuset_t assigned_cpus; 4977 unsigned int cores_consumed; 4978 unsigned int base_cpuid = ctx->ifc_sysctl_core_offset; 4979 unsigned int first_valid; 4980 unsigned int last_valid; 4981 unsigned int i; 4982 4983 first_valid = CPU_FFS(&ctx->ifc_cpus) - 1; 4984 last_valid = CPU_FLS(&ctx->ifc_cpus) - 1; 4985 4986 if (base_cpuid != CORE_OFFSET_UNSPECIFIED) { 4987 /* 4988 * Align the user-chosen base CPU ID to the next valid CPU 4989 * for this device. If the chosen base CPU ID is smaller 4990 * than the first valid CPU or larger than the last valid 4991 * CPU, we assume the user does not know what the valid 4992 * range is for this device and is thinking in terms of a 4993 * zero-based reference frame, and so we shift the given 4994 * value into the valid range (and wrap accordingly) so the 4995 * intent is translated to the proper frame of reference. 4996 * If the base CPU ID is within the valid first/last, but 4997 * does not correspond to a valid CPU, it is advanced to the 4998 * next valid CPU (wrapping if necessary). 4999 */ 5000 if (base_cpuid < first_valid || base_cpuid > last_valid) { 5001 /* shift from zero-based to first_valid-based */ 5002 base_cpuid += first_valid; 5003 /* wrap to range [first_valid, last_valid] */ 5004 base_cpuid = (base_cpuid - first_valid) % 5005 (last_valid - first_valid + 1); 5006 } 5007 if (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) { 5008 /* 5009 * base_cpuid is in [first_valid, last_valid], but 5010 * not a member of the valid set. In this case, 5011 * there will always be a member of the valid set 5012 * with a CPU ID that is greater than base_cpuid, 5013 * and we simply advance to it. 5014 */ 5015 while (!CPU_ISSET(base_cpuid, &ctx->ifc_cpus)) 5016 base_cpuid++; 5017 } 5018 return (base_cpuid); 5019 } 5020 5021 /* 5022 * Determine how many cores will be consumed by performing the CPU 5023 * assignments and counting how many of the assigned CPUs correspond 5024 * to CPUs in the set of context CPUs. This is done using the CPU 5025 * ID first_valid as the base CPU ID, as the base CPU must be within 5026 * the set of context CPUs. 5027 * 5028 * Note not all assigned CPUs will be in the set of context CPUs 5029 * when separate CPUs are being allocated to TX and RX queues, 5030 * assignment to logical cores has been enabled, the set of context 5031 * CPUs contains only physical CPUs, and TX queues are mapped to L2 5032 * neighbors of CPUs that RX queues have been mapped to - in this 5033 * case we do only want to count how many CPUs in the set of context 5034 * CPUs have been consumed, as that determines the next CPU in that 5035 * set to start allocating at for the next device for which 5036 * core_offset is not set. 5037 */ 5038 CPU_ZERO(&assigned_cpus); 5039 for (i = 0; i < scctx->isc_ntxqsets; i++) 5040 CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, true), 5041 &assigned_cpus); 5042 for (i = 0; i < scctx->isc_nrxqsets; i++) 5043 CPU_SET(get_cpuid_for_queue(ctx, first_valid, i, false), 5044 &assigned_cpus); 5045 CPU_AND(&assigned_cpus, &ctx->ifc_cpus); 5046 cores_consumed = CPU_COUNT(&assigned_cpus); 5047 5048 mtx_lock(&cpu_offset_mtx); 5049 SLIST_FOREACH(op, &cpu_offsets, entries) { 5050 if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { 5051 base_cpuid = op->next_cpuid; 5052 op->next_cpuid = cpuid_advance(ctx, op->next_cpuid, 5053 cores_consumed); 5054 MPASS(op->refcount < UINT_MAX); 5055 op->refcount++; 5056 break; 5057 } 5058 } 5059 if (base_cpuid == CORE_OFFSET_UNSPECIFIED) { 5060 base_cpuid = first_valid; 5061 op = malloc(sizeof(struct cpu_offset), M_IFLIB, 5062 M_NOWAIT | M_ZERO); 5063 if (op == NULL) { 5064 device_printf(ctx->ifc_dev, 5065 "allocation for cpu offset failed.\n"); 5066 } else { 5067 op->next_cpuid = cpuid_advance(ctx, base_cpuid, 5068 cores_consumed); 5069 op->refcount = 1; 5070 CPU_COPY(&ctx->ifc_cpus, &op->set); 5071 SLIST_INSERT_HEAD(&cpu_offsets, op, entries); 5072 } 5073 } 5074 mtx_unlock(&cpu_offset_mtx); 5075 5076 return (base_cpuid); 5077 #else 5078 return 0; 5079 #endif 5080 } 5081 5082 static void 5083 unref_ctx_core_offset(if_ctx_t ctx) 5084 { 5085 #ifndef __HAIKU__ 5086 struct cpu_offset *op, *top; 5087 5088 mtx_lock(&cpu_offset_mtx); 5089 SLIST_FOREACH_SAFE(op, &cpu_offsets, entries, top) { 5090 if (CPU_CMP(&ctx->ifc_cpus, &op->set) == 0) { 5091 MPASS(op->refcount > 0); 5092 op->refcount--; 5093 if (op->refcount == 0) { 5094 SLIST_REMOVE(&cpu_offsets, op, cpu_offset, entries); 5095 free(op, M_IFLIB); 5096 } 5097 break; 5098 } 5099 } 5100 mtx_unlock(&cpu_offset_mtx); 5101 #endif 5102 } 5103 5104 int 5105 iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) 5106 { 5107 if_ctx_t ctx; 5108 if_t ifp; 5109 if_softc_ctx_t scctx; 5110 kobjop_desc_t kobj_desc; 5111 kobj_method_t *kobj_method; 5112 int err, msix, rid; 5113 int num_txd, num_rxd; 5114 5115 ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); 5116 5117 if (sc == NULL) { 5118 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 5119 device_set_softc(dev, ctx); 5120 ctx->ifc_flags |= IFC_SC_ALLOCATED; 5121 } 5122 5123 ctx->ifc_sctx = sctx; 5124 ctx->ifc_dev = dev; 5125 ctx->ifc_softc = sc; 5126 5127 if ((err = iflib_register(ctx)) != 0) { 5128 device_printf(dev, "iflib_register failed %d\n", err); 5129 goto fail_ctx_free; 5130 } 5131 iflib_add_device_sysctl_pre(ctx); 5132 5133 scctx = &ctx->ifc_softc_ctx; 5134 ifp = ctx->ifc_ifp; 5135 5136 iflib_reset_qvalues(ctx); 5137 CTX_LOCK(ctx); 5138 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 5139 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 5140 goto fail_unlock; 5141 } 5142 _iflib_pre_assert(scctx); 5143 ctx->ifc_txrx = *scctx->isc_txrx; 5144 5145 if (sctx->isc_flags & IFLIB_DRIVER_MEDIA) 5146 ctx->ifc_mediap = scctx->isc_media; 5147 5148 #ifdef INVARIANTS 5149 if (scctx->isc_capabilities & IFCAP_TXCSUM) 5150 MPASS(scctx->isc_tx_csum_flags); 5151 #endif 5152 5153 if_setcapabilities(ifp, 5154 scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_MEXTPG); 5155 if_setcapenable(ifp, 5156 scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_MEXTPG); 5157 5158 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 5159 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 5160 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 5161 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 5162 5163 num_txd = iflib_num_tx_descs(ctx); 5164 num_rxd = iflib_num_rx_descs(ctx); 5165 5166 /* XXX change for per-queue sizes */ 5167 device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", 5168 num_txd, num_rxd); 5169 5170 if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) 5171 scctx->isc_tx_nsegments = max(1, num_txd / 5172 MAX_SINGLE_PACKET_FRACTION); 5173 if (scctx->isc_tx_tso_segments_max > num_txd / 5174 MAX_SINGLE_PACKET_FRACTION) 5175 scctx->isc_tx_tso_segments_max = max(1, 5176 num_txd / MAX_SINGLE_PACKET_FRACTION); 5177 5178 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 5179 if (if_getcapabilities(ifp) & IFCAP_TSO) { 5180 #ifndef __HAIKU__ 5181 /* 5182 * The stack can't handle a TSO size larger than IP_MAXPACKET, 5183 * but some MACs do. 5184 */ 5185 if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, 5186 IP_MAXPACKET)); 5187 /* 5188 * Take maximum number of m_pullup(9)'s in iflib_parse_header() 5189 * into account. In the worst case, each of these calls will 5190 * add another mbuf and, thus, the requirement for another DMA 5191 * segment. So for best performance, it doesn't make sense to 5192 * advertize a maximum of TSO segments that typically will 5193 * require defragmentation in iflib_encap(). 5194 */ 5195 if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); 5196 if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); 5197 #endif 5198 } 5199 if (scctx->isc_rss_table_size == 0) 5200 scctx->isc_rss_table_size = 64; 5201 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 5202 5203 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 5204 /* XXX format name */ 5205 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, 5206 NULL, NULL, "admin"); 5207 5208 #ifndef __HAIKU__ 5209 /* Set up cpu set. If it fails, use the set of all CPUs. */ 5210 if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { 5211 device_printf(dev, "Unable to fetch CPU list\n"); 5212 CPU_COPY(&all_cpus, &ctx->ifc_cpus); 5213 ctx->ifc_cpus_are_physical_cores = false; 5214 } else 5215 ctx->ifc_cpus_are_physical_cores = true; 5216 MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); 5217 #endif 5218 5219 /* 5220 ** Now set up MSI or MSI-X, should return us the number of supported 5221 ** vectors (will be 1 for a legacy interrupt and MSI). 5222 */ 5223 if (sctx->isc_flags & IFLIB_SKIP_MSIX) { 5224 msix = scctx->isc_vectors; 5225 } else if (scctx->isc_msix_bar != 0) 5226 /* 5227 * The simple fact that isc_msix_bar is not 0 does not mean we 5228 * we have a good value there that is known to work. 5229 */ 5230 msix = iflib_msix_init(ctx); 5231 else { 5232 scctx->isc_vectors = 1; 5233 scctx->isc_ntxqsets = 1; 5234 scctx->isc_nrxqsets = 1; 5235 scctx->isc_intr = IFLIB_INTR_LEGACY; 5236 msix = 0; 5237 } 5238 /* Get memory for the station queues */ 5239 if ((err = iflib_queues_alloc(ctx))) { 5240 device_printf(dev, "Unable to allocate queue memory\n"); 5241 goto fail_intr_free; 5242 } 5243 5244 if ((err = iflib_qset_structures_setup(ctx))) 5245 goto fail_queues; 5246 5247 /* 5248 * Now that we know how many queues there are, get the core offset. 5249 */ 5250 ctx->ifc_sysctl_core_offset = get_ctx_core_offset(ctx); 5251 5252 if (msix > 1) { 5253 /* 5254 * When using MSI-X, ensure that ifdi_{r,t}x_queue_intr_enable 5255 * aren't the default NULL implementation. 5256 */ 5257 kobj_desc = &ifdi_rx_queue_intr_enable_desc; 5258 #ifdef __HAIKU__ 5259 kobj_method = kobj_lookup_method(ctx->ops.cls, NULL, 5260 #else 5261 kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, 5262 #endif 5263 kobj_desc); 5264 if (kobj_method == &kobj_desc->deflt) { 5265 device_printf(dev, 5266 "MSI-X requires ifdi_rx_queue_intr_enable method"); 5267 err = EOPNOTSUPP; 5268 goto fail_queues; 5269 } 5270 kobj_desc = &ifdi_tx_queue_intr_enable_desc; 5271 #ifdef __HAIKU__ 5272 kobj_method = kobj_lookup_method(ctx->ops.cls, NULL, 5273 #else 5274 kobj_method = kobj_lookup_method(((kobj_t)ctx)->ops->cls, NULL, 5275 #endif 5276 kobj_desc); 5277 if (kobj_method == &kobj_desc->deflt) { 5278 device_printf(dev, 5279 "MSI-X requires ifdi_tx_queue_intr_enable method"); 5280 err = EOPNOTSUPP; 5281 goto fail_queues; 5282 } 5283 5284 /* 5285 * Assign the MSI-X vectors. 5286 * Note that the default NULL ifdi_msix_intr_assign method will 5287 * fail here, too. 5288 */ 5289 err = IFDI_MSIX_INTR_ASSIGN(ctx, msix); 5290 if (err != 0) { 5291 device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", 5292 err); 5293 goto fail_queues; 5294 } 5295 } else if (scctx->isc_intr != IFLIB_INTR_MSIX) { 5296 rid = 0; 5297 if (scctx->isc_intr == IFLIB_INTR_MSI) { 5298 MPASS(msix == 1); 5299 rid = 1; 5300 } 5301 if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { 5302 device_printf(dev, "iflib_legacy_setup failed %d\n", err); 5303 goto fail_queues; 5304 } 5305 } else { 5306 device_printf(dev, 5307 "Cannot use iflib with only 1 MSI-X interrupt!\n"); 5308 err = ENODEV; 5309 goto fail_queues; 5310 } 5311 5312 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5313 5314 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5315 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5316 goto fail_detach; 5317 } 5318 5319 /* 5320 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5321 * This must appear after the call to ether_ifattach() because 5322 * ether_ifattach() sets if_hdrlen to the default value. 5323 */ 5324 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5325 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 5326 5327 if ((err = iflib_netmap_attach(ctx))) { 5328 device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); 5329 goto fail_detach; 5330 } 5331 *ctxp = ctx; 5332 5333 DEBUGNET_SET(ctx->ifc_ifp, iflib); 5334 5335 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5336 iflib_add_device_sysctl_post(ctx); 5337 iflib_add_pfil(ctx); 5338 ctx->ifc_flags |= IFC_INIT_DONE; 5339 CTX_UNLOCK(ctx); 5340 5341 return (0); 5342 5343 fail_detach: 5344 ether_ifdetach(ctx->ifc_ifp); 5345 fail_queues: 5346 iflib_tqg_detach(ctx); 5347 iflib_tx_structures_free(ctx); 5348 iflib_rx_structures_free(ctx); 5349 IFDI_DETACH(ctx); 5350 IFDI_QUEUES_FREE(ctx); 5351 fail_intr_free: 5352 iflib_free_intr_mem(ctx); 5353 fail_unlock: 5354 CTX_UNLOCK(ctx); 5355 iflib_deregister(ctx); 5356 fail_ctx_free: 5357 device_set_softc(ctx->ifc_dev, NULL); 5358 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5359 free(ctx->ifc_softc, M_IFLIB); 5360 free(ctx, M_IFLIB); 5361 return (err); 5362 } 5363 5364 int 5365 iflib_pseudo_register(device_t dev, if_shared_ctx_t sctx, if_ctx_t *ctxp, 5366 struct iflib_cloneattach_ctx *clctx) 5367 { 5368 int num_txd, num_rxd; 5369 int err; 5370 if_ctx_t ctx; 5371 if_t ifp; 5372 if_softc_ctx_t scctx; 5373 int i; 5374 void *sc; 5375 5376 ctx = malloc(sizeof(*ctx), M_IFLIB, M_WAITOK|M_ZERO); 5377 sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); 5378 ctx->ifc_flags |= IFC_SC_ALLOCATED; 5379 if (sctx->isc_flags & (IFLIB_PSEUDO|IFLIB_VIRTUAL)) 5380 ctx->ifc_flags |= IFC_PSEUDO; 5381 5382 ctx->ifc_sctx = sctx; 5383 ctx->ifc_softc = sc; 5384 ctx->ifc_dev = dev; 5385 5386 if ((err = iflib_register(ctx)) != 0) { 5387 device_printf(dev, "%s: iflib_register failed %d\n", __func__, err); 5388 goto fail_ctx_free; 5389 } 5390 iflib_add_device_sysctl_pre(ctx); 5391 5392 scctx = &ctx->ifc_softc_ctx; 5393 ifp = ctx->ifc_ifp; 5394 5395 iflib_reset_qvalues(ctx); 5396 CTX_LOCK(ctx); 5397 if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { 5398 device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); 5399 goto fail_unlock; 5400 } 5401 #ifndef __HAIKU__ 5402 if (sctx->isc_flags & IFLIB_GEN_MAC) 5403 ether_gen_addr(ifp, &ctx->ifc_mac); 5404 #endif 5405 if ((err = IFDI_CLONEATTACH(ctx, clctx->cc_ifc, clctx->cc_name, 5406 clctx->cc_params)) != 0) { 5407 device_printf(dev, "IFDI_CLONEATTACH failed %d\n", err); 5408 goto fail_unlock; 5409 } 5410 #ifdef INVARIANTS 5411 if (scctx->isc_capabilities & IFCAP_TXCSUM) 5412 MPASS(scctx->isc_tx_csum_flags); 5413 #endif 5414 5415 if_setcapabilities(ifp, scctx->isc_capabilities | IFCAP_HWSTATS | IFCAP_LINKSTATE); 5416 if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS | IFCAP_LINKSTATE); 5417 5418 ifp->if_flags |= IFF_NOGROUP; 5419 if (sctx->isc_flags & IFLIB_PSEUDO) { 5420 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); 5421 ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); 5422 if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) { 5423 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5424 } else { 5425 if_attach(ctx->ifc_ifp); 5426 bpfattach(ctx->ifc_ifp, DLT_NULL, sizeof(u_int32_t)); 5427 } 5428 5429 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5430 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5431 goto fail_detach; 5432 } 5433 *ctxp = ctx; 5434 5435 /* 5436 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5437 * This must appear after the call to ether_ifattach() because 5438 * ether_ifattach() sets if_hdrlen to the default value. 5439 */ 5440 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5441 if_setifheaderlen(ifp, 5442 sizeof(struct ether_vlan_header)); 5443 5444 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5445 iflib_add_device_sysctl_post(ctx); 5446 ctx->ifc_flags |= IFC_INIT_DONE; 5447 CTX_UNLOCK(ctx); 5448 return (0); 5449 } 5450 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); 5451 ifmedia_add(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO, 0, NULL); 5452 ifmedia_set(ctx->ifc_mediap, IFM_ETHER | IFM_AUTO); 5453 5454 _iflib_pre_assert(scctx); 5455 ctx->ifc_txrx = *scctx->isc_txrx; 5456 5457 if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) 5458 scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; 5459 if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) 5460 scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; 5461 5462 num_txd = iflib_num_tx_descs(ctx); 5463 num_rxd = iflib_num_rx_descs(ctx); 5464 5465 /* XXX change for per-queue sizes */ 5466 device_printf(dev, "Using %d TX descriptors and %d RX descriptors\n", 5467 num_txd, num_rxd); 5468 5469 if (scctx->isc_tx_nsegments > num_txd / MAX_SINGLE_PACKET_FRACTION) 5470 scctx->isc_tx_nsegments = max(1, num_txd / 5471 MAX_SINGLE_PACKET_FRACTION); 5472 if (scctx->isc_tx_tso_segments_max > num_txd / 5473 MAX_SINGLE_PACKET_FRACTION) 5474 scctx->isc_tx_tso_segments_max = max(1, 5475 num_txd / MAX_SINGLE_PACKET_FRACTION); 5476 5477 /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ 5478 if (if_getcapabilities(ifp) & IFCAP_TSO) { 5479 #ifndef __HAIKU__ 5480 /* 5481 * The stack can't handle a TSO size larger than IP_MAXPACKET, 5482 * but some MACs do. 5483 */ 5484 if_sethwtsomax(ifp, min(scctx->isc_tx_tso_size_max, 5485 IP_MAXPACKET)); 5486 /* 5487 * Take maximum number of m_pullup(9)'s in iflib_parse_header() 5488 * into account. In the worst case, each of these calls will 5489 * add another mbuf and, thus, the requirement for another DMA 5490 * segment. So for best performance, it doesn't make sense to 5491 * advertize a maximum of TSO segments that typically will 5492 * require defragmentation in iflib_encap(). 5493 */ 5494 if_sethwtsomaxsegcount(ifp, scctx->isc_tx_tso_segments_max - 3); 5495 if_sethwtsomaxsegsize(ifp, scctx->isc_tx_tso_segsize_max); 5496 #endif 5497 } 5498 if (scctx->isc_rss_table_size == 0) 5499 scctx->isc_rss_table_size = 64; 5500 scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; 5501 5502 GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); 5503 /* XXX format name */ 5504 taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, 5505 NULL, NULL, "admin"); 5506 5507 /* XXX --- can support > 1 -- but keep it simple for now */ 5508 scctx->isc_intr = IFLIB_INTR_LEGACY; 5509 5510 /* Get memory for the station queues */ 5511 if ((err = iflib_queues_alloc(ctx))) { 5512 device_printf(dev, "Unable to allocate queue memory\n"); 5513 goto fail_iflib_detach; 5514 } 5515 5516 if ((err = iflib_qset_structures_setup(ctx))) { 5517 device_printf(dev, "qset structure setup failed %d\n", err); 5518 goto fail_queues; 5519 } 5520 5521 /* 5522 * XXX What if anything do we want to do about interrupts? 5523 */ 5524 ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac.octet); 5525 if ((err = IFDI_ATTACH_POST(ctx)) != 0) { 5526 device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); 5527 goto fail_detach; 5528 } 5529 5530 /* 5531 * Tell the upper layer(s) if IFCAP_VLAN_MTU is supported. 5532 * This must appear after the call to ether_ifattach() because 5533 * ether_ifattach() sets if_hdrlen to the default value. 5534 */ 5535 if (if_getcapabilities(ifp) & IFCAP_VLAN_MTU) 5536 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 5537 5538 /* XXX handle more than one queue */ 5539 for (i = 0; i < scctx->isc_nrxqsets; i++) 5540 IFDI_RX_CLSET(ctx, 0, i, ctx->ifc_rxqs[i].ifr_fl[0].ifl_sds.ifsd_cl); 5541 5542 *ctxp = ctx; 5543 5544 if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); 5545 iflib_add_device_sysctl_post(ctx); 5546 ctx->ifc_flags |= IFC_INIT_DONE; 5547 CTX_UNLOCK(ctx); 5548 5549 return (0); 5550 fail_detach: 5551 ether_ifdetach(ctx->ifc_ifp); 5552 fail_queues: 5553 iflib_tqg_detach(ctx); 5554 iflib_tx_structures_free(ctx); 5555 iflib_rx_structures_free(ctx); 5556 fail_iflib_detach: 5557 IFDI_DETACH(ctx); 5558 IFDI_QUEUES_FREE(ctx); 5559 fail_unlock: 5560 CTX_UNLOCK(ctx); 5561 iflib_deregister(ctx); 5562 fail_ctx_free: 5563 free(ctx->ifc_softc, M_IFLIB); 5564 free(ctx, M_IFLIB); 5565 return (err); 5566 } 5567 5568 int 5569 iflib_pseudo_deregister(if_ctx_t ctx) 5570 { 5571 if_t ifp = ctx->ifc_ifp; 5572 if_shared_ctx_t sctx = ctx->ifc_sctx; 5573 5574 /* Unregister VLAN event handlers early */ 5575 iflib_unregister_vlan_handlers(ctx); 5576 5577 if ((sctx->isc_flags & IFLIB_PSEUDO) && 5578 (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) { 5579 bpfdetach(ifp); 5580 if_detach(ifp); 5581 } else { 5582 ether_ifdetach(ifp); 5583 } 5584 5585 iflib_tqg_detach(ctx); 5586 iflib_tx_structures_free(ctx); 5587 iflib_rx_structures_free(ctx); 5588 IFDI_DETACH(ctx); 5589 IFDI_QUEUES_FREE(ctx); 5590 5591 iflib_deregister(ctx); 5592 5593 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5594 free(ctx->ifc_softc, M_IFLIB); 5595 free(ctx, M_IFLIB); 5596 return (0); 5597 } 5598 5599 int 5600 iflib_device_attach(device_t dev) 5601 { 5602 if_ctx_t ctx; 5603 if_shared_ctx_t sctx; 5604 5605 if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) 5606 return (ENOTSUP); 5607 5608 pci_enable_busmaster(dev); 5609 5610 return (iflib_device_register(dev, NULL, sctx, &ctx)); 5611 } 5612 5613 int 5614 iflib_device_deregister(if_ctx_t ctx) 5615 { 5616 if_t ifp = ctx->ifc_ifp; 5617 device_t dev = ctx->ifc_dev; 5618 5619 /* Make sure VLANS are not using driver */ 5620 if (if_vlantrunkinuse(ifp)) { 5621 device_printf(dev, "Vlan in use, detach first\n"); 5622 return (EBUSY); 5623 } 5624 #ifdef PCI_IOV 5625 if (!CTX_IS_VF(ctx) && pci_iov_detach(dev) != 0) { 5626 device_printf(dev, "SR-IOV in use; detach first.\n"); 5627 return (EBUSY); 5628 } 5629 #endif 5630 5631 STATE_LOCK(ctx); 5632 ctx->ifc_flags |= IFC_IN_DETACH; 5633 STATE_UNLOCK(ctx); 5634 5635 /* Unregister VLAN handlers before calling iflib_stop() */ 5636 iflib_unregister_vlan_handlers(ctx); 5637 5638 iflib_netmap_detach(ifp); 5639 ether_ifdetach(ifp); 5640 5641 CTX_LOCK(ctx); 5642 iflib_stop(ctx); 5643 CTX_UNLOCK(ctx); 5644 5645 iflib_rem_pfil(ctx); 5646 if (ctx->ifc_led_dev != NULL) 5647 led_destroy(ctx->ifc_led_dev); 5648 5649 iflib_tqg_detach(ctx); 5650 iflib_tx_structures_free(ctx); 5651 iflib_rx_structures_free(ctx); 5652 5653 CTX_LOCK(ctx); 5654 IFDI_DETACH(ctx); 5655 IFDI_QUEUES_FREE(ctx); 5656 CTX_UNLOCK(ctx); 5657 5658 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 5659 iflib_free_intr_mem(ctx); 5660 5661 bus_generic_detach(dev); 5662 5663 iflib_deregister(ctx); 5664 5665 device_set_softc(ctx->ifc_dev, NULL); 5666 if (ctx->ifc_flags & IFC_SC_ALLOCATED) 5667 free(ctx->ifc_softc, M_IFLIB); 5668 unref_ctx_core_offset(ctx); 5669 free(ctx, M_IFLIB); 5670 return (0); 5671 } 5672 5673 static void 5674 iflib_tqg_detach(if_ctx_t ctx) 5675 { 5676 iflib_txq_t txq; 5677 iflib_rxq_t rxq; 5678 int i; 5679 struct taskqgroup *tqg; 5680 5681 /* XXX drain any dependent tasks */ 5682 tqg = qgroup_if_io_tqg; 5683 for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { 5684 callout_drain(&txq->ift_timer); 5685 #ifdef DEV_NETMAP 5686 callout_drain(&txq->ift_netmap_timer); 5687 #endif /* DEV_NETMAP */ 5688 if (txq->ift_task.gt_uniq != NULL) 5689 taskqgroup_detach(tqg, &txq->ift_task); 5690 } 5691 for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { 5692 if (rxq->ifr_task.gt_uniq != NULL) 5693 taskqgroup_detach(tqg, &rxq->ifr_task); 5694 } 5695 tqg = qgroup_if_config_tqg; 5696 if (ctx->ifc_admin_task.gt_uniq != NULL) 5697 taskqgroup_detach(tqg, &ctx->ifc_admin_task); 5698 if (ctx->ifc_vflr_task.gt_uniq != NULL) 5699 taskqgroup_detach(tqg, &ctx->ifc_vflr_task); 5700 } 5701 5702 static void 5703 iflib_free_intr_mem(if_ctx_t ctx) 5704 { 5705 5706 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { 5707 iflib_irq_free(ctx, &ctx->ifc_legacy_irq); 5708 } 5709 if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { 5710 pci_release_msi(ctx->ifc_dev); 5711 } 5712 if (ctx->ifc_msix_mem != NULL) { 5713 bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, 5714 rman_get_rid(ctx->ifc_msix_mem), ctx->ifc_msix_mem); 5715 ctx->ifc_msix_mem = NULL; 5716 } 5717 } 5718 5719 int 5720 iflib_device_detach(device_t dev) 5721 { 5722 if_ctx_t ctx = device_get_softc(dev); 5723 5724 return (iflib_device_deregister(ctx)); 5725 } 5726 5727 int 5728 iflib_device_suspend(device_t dev) 5729 { 5730 if_ctx_t ctx = device_get_softc(dev); 5731 5732 CTX_LOCK(ctx); 5733 IFDI_SUSPEND(ctx); 5734 CTX_UNLOCK(ctx); 5735 5736 return bus_generic_suspend(dev); 5737 } 5738 int 5739 iflib_device_shutdown(device_t dev) 5740 { 5741 if_ctx_t ctx = device_get_softc(dev); 5742 5743 CTX_LOCK(ctx); 5744 IFDI_SHUTDOWN(ctx); 5745 CTX_UNLOCK(ctx); 5746 5747 return bus_generic_suspend(dev); 5748 } 5749 5750 int 5751 iflib_device_resume(device_t dev) 5752 { 5753 if_ctx_t ctx = device_get_softc(dev); 5754 iflib_txq_t txq = ctx->ifc_txqs; 5755 5756 CTX_LOCK(ctx); 5757 IFDI_RESUME(ctx); 5758 iflib_if_init_locked(ctx); 5759 CTX_UNLOCK(ctx); 5760 for (int i = 0; i < NTXQSETS(ctx); i++, txq++) 5761 iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); 5762 5763 return (bus_generic_resume(dev)); 5764 } 5765 5766 int 5767 iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) 5768 { 5769 int error; 5770 if_ctx_t ctx = device_get_softc(dev); 5771 5772 CTX_LOCK(ctx); 5773 error = IFDI_IOV_INIT(ctx, num_vfs, params); 5774 CTX_UNLOCK(ctx); 5775 5776 return (error); 5777 } 5778 5779 void 5780 iflib_device_iov_uninit(device_t dev) 5781 { 5782 if_ctx_t ctx = device_get_softc(dev); 5783 5784 CTX_LOCK(ctx); 5785 IFDI_IOV_UNINIT(ctx); 5786 CTX_UNLOCK(ctx); 5787 } 5788 5789 int 5790 iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) 5791 { 5792 int error; 5793 if_ctx_t ctx = device_get_softc(dev); 5794 5795 CTX_LOCK(ctx); 5796 error = IFDI_IOV_VF_ADD(ctx, vfnum, params); 5797 CTX_UNLOCK(ctx); 5798 5799 return (error); 5800 } 5801 5802 /********************************************************************* 5803 * 5804 * MODULE FUNCTION DEFINITIONS 5805 * 5806 **********************************************************************/ 5807 5808 /* 5809 * - Start a fast taskqueue thread for each core 5810 * - Start a taskqueue for control operations 5811 */ 5812 static int 5813 iflib_module_init(void) 5814 { 5815 iflib_timer_default = hz / 2; 5816 return (0); 5817 } 5818 5819 static int 5820 iflib_module_event_handler(module_t mod, int what, void *arg) 5821 { 5822 int err; 5823 5824 switch (what) { 5825 case MOD_LOAD: 5826 if ((err = iflib_module_init()) != 0) 5827 return (err); 5828 break; 5829 case MOD_UNLOAD: 5830 return (EBUSY); 5831 default: 5832 return (EOPNOTSUPP); 5833 } 5834 5835 return (0); 5836 } 5837 5838 /********************************************************************* 5839 * 5840 * PUBLIC FUNCTION DEFINITIONS 5841 * ordered as in iflib.h 5842 * 5843 **********************************************************************/ 5844 5845 static void 5846 _iflib_assert(if_shared_ctx_t sctx) 5847 { 5848 int i; 5849 5850 MPASS(sctx->isc_tx_maxsize); 5851 MPASS(sctx->isc_tx_maxsegsize); 5852 5853 MPASS(sctx->isc_rx_maxsize); 5854 MPASS(sctx->isc_rx_nsegments); 5855 MPASS(sctx->isc_rx_maxsegsize); 5856 5857 MPASS(sctx->isc_nrxqs >= 1 && sctx->isc_nrxqs <= 8); 5858 for (i = 0; i < sctx->isc_nrxqs; i++) { 5859 MPASS(sctx->isc_nrxd_min[i]); 5860 MPASS(powerof2(sctx->isc_nrxd_min[i])); 5861 MPASS(sctx->isc_nrxd_max[i]); 5862 MPASS(powerof2(sctx->isc_nrxd_max[i])); 5863 MPASS(sctx->isc_nrxd_default[i]); 5864 MPASS(powerof2(sctx->isc_nrxd_default[i])); 5865 } 5866 5867 MPASS(sctx->isc_ntxqs >= 1 && sctx->isc_ntxqs <= 8); 5868 for (i = 0; i < sctx->isc_ntxqs; i++) { 5869 MPASS(sctx->isc_ntxd_min[i]); 5870 MPASS(powerof2(sctx->isc_ntxd_min[i])); 5871 MPASS(sctx->isc_ntxd_max[i]); 5872 MPASS(powerof2(sctx->isc_ntxd_max[i])); 5873 MPASS(sctx->isc_ntxd_default[i]); 5874 MPASS(powerof2(sctx->isc_ntxd_default[i])); 5875 } 5876 } 5877 5878 static void 5879 _iflib_pre_assert(if_softc_ctx_t scctx) 5880 { 5881 5882 MPASS(scctx->isc_txrx->ift_txd_encap); 5883 MPASS(scctx->isc_txrx->ift_txd_flush); 5884 MPASS(scctx->isc_txrx->ift_txd_credits_update); 5885 MPASS(scctx->isc_txrx->ift_rxd_available); 5886 MPASS(scctx->isc_txrx->ift_rxd_pkt_get); 5887 MPASS(scctx->isc_txrx->ift_rxd_refill); 5888 MPASS(scctx->isc_txrx->ift_rxd_flush); 5889 } 5890 5891 static int 5892 iflib_register(if_ctx_t ctx) 5893 { 5894 if_shared_ctx_t sctx = ctx->ifc_sctx; 5895 driver_t *driver = sctx->isc_driver; 5896 device_t dev = ctx->ifc_dev; 5897 if_t ifp; 5898 u_char type; 5899 int iflags; 5900 5901 if ((sctx->isc_flags & IFLIB_PSEUDO) == 0) 5902 _iflib_assert(sctx); 5903 5904 CTX_LOCK_INIT(ctx); 5905 STATE_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); 5906 if (sctx->isc_flags & IFLIB_PSEUDO) { 5907 if (sctx->isc_flags & IFLIB_PSEUDO_ETHER) 5908 type = IFT_ETHER; 5909 else 5910 type = IFT_PPP; 5911 } else 5912 type = IFT_ETHER; 5913 ifp = ctx->ifc_ifp = if_alloc(type); 5914 if (ifp == NULL) { 5915 device_printf(dev, "can not allocate ifnet structure\n"); 5916 return (ENOMEM); 5917 } 5918 5919 /* 5920 * Initialize our context's device specific methods 5921 */ 5922 kobj_init((kobj_t) ctx, (kobj_class_t) driver); 5923 kobj_class_compile((kobj_class_t) driver); 5924 5925 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 5926 if_setsoftc(ifp, ctx); 5927 if_setdev(ifp, dev); 5928 if_setinitfn(ifp, iflib_if_init); 5929 if_setioctlfn(ifp, iflib_if_ioctl); 5930 #ifdef ALTQ 5931 if_setstartfn(ifp, iflib_altq_if_start); 5932 if_settransmitfn(ifp, iflib_altq_if_transmit); 5933 if_setsendqready(ifp); 5934 #else 5935 if_settransmitfn(ifp, iflib_if_transmit); 5936 #endif 5937 if_setqflushfn(ifp, iflib_if_qflush); 5938 #ifndef __HAIKU__ 5939 iflags = IFF_MULTICAST | IFF_KNOWSEPOCH; 5940 #else 5941 iflags = IFF_MULTICAST; 5942 #endif 5943 5944 if ((sctx->isc_flags & IFLIB_PSEUDO) && 5945 (sctx->isc_flags & IFLIB_PSEUDO_ETHER) == 0) 5946 iflags |= IFF_POINTOPOINT; 5947 else 5948 iflags |= IFF_BROADCAST | IFF_SIMPLEX; 5949 if_setflags(ifp, iflags); 5950 ctx->ifc_vlan_attach_event = 5951 EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, 5952 EVENTHANDLER_PRI_FIRST); 5953 ctx->ifc_vlan_detach_event = 5954 EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, 5955 EVENTHANDLER_PRI_FIRST); 5956 5957 if ((sctx->isc_flags & IFLIB_DRIVER_MEDIA) == 0) { 5958 ctx->ifc_mediap = &ctx->ifc_media; 5959 ifmedia_init(ctx->ifc_mediap, IFM_IMASK, 5960 iflib_media_change, iflib_media_status); 5961 } 5962 return (0); 5963 } 5964 5965 static void 5966 iflib_unregister_vlan_handlers(if_ctx_t ctx) 5967 { 5968 /* Unregister VLAN events */ 5969 if (ctx->ifc_vlan_attach_event != NULL) { 5970 EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); 5971 ctx->ifc_vlan_attach_event = NULL; 5972 } 5973 if (ctx->ifc_vlan_detach_event != NULL) { 5974 EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); 5975 ctx->ifc_vlan_detach_event = NULL; 5976 } 5977 5978 } 5979 5980 static void 5981 iflib_deregister(if_ctx_t ctx) 5982 { 5983 if_t ifp = ctx->ifc_ifp; 5984 5985 /* Remove all media */ 5986 ifmedia_removeall(&ctx->ifc_media); 5987 5988 /* Ensure that VLAN event handlers are unregistered */ 5989 iflib_unregister_vlan_handlers(ctx); 5990 5991 #ifndef __HAIKU__ 5992 /* Release kobject reference */ 5993 kobj_delete((kobj_t) ctx, NULL); 5994 #endif 5995 5996 /* Free the ifnet structure */ 5997 if_free(ifp); 5998 5999 STATE_LOCK_DESTROY(ctx); 6000 6001 /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ 6002 CTX_LOCK_DESTROY(ctx); 6003 } 6004 6005 static int 6006 iflib_queues_alloc(if_ctx_t ctx) 6007 { 6008 if_shared_ctx_t sctx = ctx->ifc_sctx; 6009 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 6010 device_t dev = ctx->ifc_dev; 6011 int nrxqsets = scctx->isc_nrxqsets; 6012 int ntxqsets = scctx->isc_ntxqsets; 6013 iflib_txq_t txq; 6014 iflib_rxq_t rxq; 6015 iflib_fl_t fl = NULL; 6016 int i, j, cpu, err, txconf, rxconf; 6017 iflib_dma_info_t ifdip; 6018 uint32_t *rxqsizes = scctx->isc_rxqsizes; 6019 uint32_t *txqsizes = scctx->isc_txqsizes; 6020 uint8_t nrxqs = sctx->isc_nrxqs; 6021 uint8_t ntxqs = sctx->isc_ntxqs; 6022 int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; 6023 int fl_offset = (sctx->isc_flags & IFLIB_HAS_RXCQ ? 1 : 0); 6024 caddr_t *vaddrs; 6025 uint64_t *paddrs; 6026 6027 KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); 6028 KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); 6029 KASSERT(nrxqs >= fl_offset + nfree_lists, 6030 ("there must be at least a rxq for each free list")); 6031 6032 /* Allocate the TX ring struct memory */ 6033 if (!(ctx->ifc_txqs = 6034 (iflib_txq_t) malloc(sizeof(struct iflib_txq) * 6035 ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 6036 device_printf(dev, "Unable to allocate TX ring memory\n"); 6037 err = ENOMEM; 6038 goto fail; 6039 } 6040 6041 /* Now allocate the RX */ 6042 if (!(ctx->ifc_rxqs = 6043 (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * 6044 nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { 6045 device_printf(dev, "Unable to allocate RX ring memory\n"); 6046 err = ENOMEM; 6047 goto rx_fail; 6048 } 6049 6050 txq = ctx->ifc_txqs; 6051 rxq = ctx->ifc_rxqs; 6052 6053 /* 6054 * XXX handle allocation failure 6055 */ 6056 for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { 6057 /* Set up some basics */ 6058 6059 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, 6060 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 6061 device_printf(dev, 6062 "Unable to allocate TX DMA info memory\n"); 6063 err = ENOMEM; 6064 goto err_tx_desc; 6065 } 6066 txq->ift_ifdi = ifdip; 6067 for (j = 0; j < ntxqs; j++, ifdip++) { 6068 if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, 0)) { 6069 device_printf(dev, 6070 "Unable to allocate TX descriptors\n"); 6071 err = ENOMEM; 6072 goto err_tx_desc; 6073 } 6074 txq->ift_txd_size[j] = scctx->isc_txd_size[j]; 6075 bzero((void *)ifdip->idi_vaddr, txqsizes[j]); 6076 } 6077 txq->ift_ctx = ctx; 6078 txq->ift_id = i; 6079 if (sctx->isc_flags & IFLIB_HAS_TXCQ) { 6080 txq->ift_br_offset = 1; 6081 } else { 6082 txq->ift_br_offset = 0; 6083 } 6084 6085 if (iflib_txsd_alloc(txq)) { 6086 device_printf(dev, "Critical Failure setting up TX buffers\n"); 6087 err = ENOMEM; 6088 goto err_tx_desc; 6089 } 6090 6091 /* Initialize the TX lock */ 6092 snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:TX(%d):callout", 6093 device_get_nameunit(dev), txq->ift_id); 6094 mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); 6095 callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); 6096 #ifndef __HAIKU__ 6097 txq->ift_timer.c_cpu = cpu; 6098 #endif 6099 #ifdef DEV_NETMAP 6100 callout_init_mtx(&txq->ift_netmap_timer, &txq->ift_mtx, 0); 6101 txq->ift_netmap_timer.c_cpu = cpu; 6102 #endif /* DEV_NETMAP */ 6103 6104 err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, 6105 iflib_txq_can_drain, M_IFLIB, M_WAITOK); 6106 if (err) { 6107 /* XXX free any allocated rings */ 6108 device_printf(dev, "Unable to allocate buf_ring\n"); 6109 goto err_tx_desc; 6110 } 6111 } 6112 6113 for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { 6114 /* Set up some basics */ 6115 callout_init(&rxq->ifr_watchdog, 1); 6116 6117 if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, 6118 M_IFLIB, M_NOWAIT | M_ZERO)) == NULL) { 6119 device_printf(dev, 6120 "Unable to allocate RX DMA info memory\n"); 6121 err = ENOMEM; 6122 goto err_tx_desc; 6123 } 6124 6125 rxq->ifr_ifdi = ifdip; 6126 /* XXX this needs to be changed if #rx queues != #tx queues */ 6127 rxq->ifr_ntxqirq = 1; 6128 rxq->ifr_txqid[0] = i; 6129 for (j = 0; j < nrxqs; j++, ifdip++) { 6130 if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, 0)) { 6131 device_printf(dev, 6132 "Unable to allocate RX descriptors\n"); 6133 err = ENOMEM; 6134 goto err_tx_desc; 6135 } 6136 bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); 6137 } 6138 rxq->ifr_ctx = ctx; 6139 rxq->ifr_id = i; 6140 rxq->ifr_fl_offset = fl_offset; 6141 rxq->ifr_nfl = nfree_lists; 6142 if (!(fl = 6143 (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { 6144 device_printf(dev, "Unable to allocate free list memory\n"); 6145 err = ENOMEM; 6146 goto err_tx_desc; 6147 } 6148 rxq->ifr_fl = fl; 6149 for (j = 0; j < nfree_lists; j++) { 6150 fl[j].ifl_rxq = rxq; 6151 fl[j].ifl_id = j; 6152 fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; 6153 fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; 6154 } 6155 /* Allocate receive buffers for the ring */ 6156 if (iflib_rxsd_alloc(rxq)) { 6157 device_printf(dev, 6158 "Critical Failure setting up receive buffers\n"); 6159 err = ENOMEM; 6160 goto err_rx_desc; 6161 } 6162 6163 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) 6164 fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, 6165 M_WAITOK); 6166 } 6167 6168 /* TXQs */ 6169 vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 6170 paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); 6171 for (i = 0; i < ntxqsets; i++) { 6172 iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; 6173 6174 for (j = 0; j < ntxqs; j++, di++) { 6175 vaddrs[i*ntxqs + j] = di->idi_vaddr; 6176 paddrs[i*ntxqs + j] = di->idi_paddr; 6177 } 6178 } 6179 if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { 6180 device_printf(ctx->ifc_dev, 6181 "Unable to allocate device TX queue\n"); 6182 iflib_tx_structures_free(ctx); 6183 free(vaddrs, M_IFLIB); 6184 free(paddrs, M_IFLIB); 6185 goto err_rx_desc; 6186 } 6187 free(vaddrs, M_IFLIB); 6188 free(paddrs, M_IFLIB); 6189 6190 /* RXQs */ 6191 vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 6192 paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); 6193 for (i = 0; i < nrxqsets; i++) { 6194 iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; 6195 6196 for (j = 0; j < nrxqs; j++, di++) { 6197 vaddrs[i*nrxqs + j] = di->idi_vaddr; 6198 paddrs[i*nrxqs + j] = di->idi_paddr; 6199 } 6200 } 6201 if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { 6202 device_printf(ctx->ifc_dev, 6203 "Unable to allocate device RX queue\n"); 6204 iflib_tx_structures_free(ctx); 6205 free(vaddrs, M_IFLIB); 6206 free(paddrs, M_IFLIB); 6207 goto err_rx_desc; 6208 } 6209 free(vaddrs, M_IFLIB); 6210 free(paddrs, M_IFLIB); 6211 6212 return (0); 6213 6214 /* XXX handle allocation failure changes */ 6215 err_rx_desc: 6216 err_tx_desc: 6217 rx_fail: 6218 if (ctx->ifc_rxqs != NULL) 6219 free(ctx->ifc_rxqs, M_IFLIB); 6220 ctx->ifc_rxqs = NULL; 6221 if (ctx->ifc_txqs != NULL) 6222 free(ctx->ifc_txqs, M_IFLIB); 6223 ctx->ifc_txqs = NULL; 6224 fail: 6225 return (err); 6226 } 6227 6228 static int 6229 iflib_tx_structures_setup(if_ctx_t ctx) 6230 { 6231 iflib_txq_t txq = ctx->ifc_txqs; 6232 int i; 6233 6234 for (i = 0; i < NTXQSETS(ctx); i++, txq++) 6235 iflib_txq_setup(txq); 6236 6237 return (0); 6238 } 6239 6240 static void 6241 iflib_tx_structures_free(if_ctx_t ctx) 6242 { 6243 iflib_txq_t txq = ctx->ifc_txqs; 6244 if_shared_ctx_t sctx = ctx->ifc_sctx; 6245 int i, j; 6246 6247 for (i = 0; i < NTXQSETS(ctx); i++, txq++) { 6248 for (j = 0; j < sctx->isc_ntxqs; j++) 6249 iflib_dma_free(&txq->ift_ifdi[j]); 6250 iflib_txq_destroy(txq); 6251 } 6252 free(ctx->ifc_txqs, M_IFLIB); 6253 ctx->ifc_txqs = NULL; 6254 } 6255 6256 /********************************************************************* 6257 * 6258 * Initialize all receive rings. 6259 * 6260 **********************************************************************/ 6261 static int 6262 iflib_rx_structures_setup(if_ctx_t ctx) 6263 { 6264 iflib_rxq_t rxq = ctx->ifc_rxqs; 6265 int q; 6266 #if defined(INET6) || defined(INET) 6267 int err, i; 6268 #endif 6269 6270 for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { 6271 #if defined(INET6) || defined(INET) 6272 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) { 6273 err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, 6274 TCP_LRO_ENTRIES, min(1024, 6275 ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset])); 6276 if (err != 0) { 6277 device_printf(ctx->ifc_dev, 6278 "LRO Initialization failed!\n"); 6279 goto fail; 6280 } 6281 } 6282 #endif 6283 IFDI_RXQ_SETUP(ctx, rxq->ifr_id); 6284 } 6285 return (0); 6286 #if defined(INET6) || defined(INET) 6287 fail: 6288 /* 6289 * Free LRO resources allocated so far, we will only handle 6290 * the rings that completed, the failing case will have 6291 * cleaned up for itself. 'q' failed, so its the terminus. 6292 */ 6293 rxq = ctx->ifc_rxqs; 6294 for (i = 0; i < q; ++i, rxq++) { 6295 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) 6296 tcp_lro_free(&rxq->ifr_lc); 6297 } 6298 return (err); 6299 #endif 6300 } 6301 6302 /********************************************************************* 6303 * 6304 * Free all receive rings. 6305 * 6306 **********************************************************************/ 6307 static void 6308 iflib_rx_structures_free(if_ctx_t ctx) 6309 { 6310 iflib_rxq_t rxq = ctx->ifc_rxqs; 6311 if_shared_ctx_t sctx = ctx->ifc_sctx; 6312 int i, j; 6313 6314 for (i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { 6315 for (j = 0; j < sctx->isc_nrxqs; j++) 6316 iflib_dma_free(&rxq->ifr_ifdi[j]); 6317 iflib_rx_sds_free(rxq); 6318 #if defined(INET6) || defined(INET) 6319 if (if_getcapabilities(ctx->ifc_ifp) & IFCAP_LRO) 6320 tcp_lro_free(&rxq->ifr_lc); 6321 #endif 6322 } 6323 free(ctx->ifc_rxqs, M_IFLIB); 6324 ctx->ifc_rxqs = NULL; 6325 } 6326 6327 static int 6328 iflib_qset_structures_setup(if_ctx_t ctx) 6329 { 6330 int err; 6331 6332 /* 6333 * It is expected that the caller takes care of freeing queues if this 6334 * fails. 6335 */ 6336 if ((err = iflib_tx_structures_setup(ctx)) != 0) { 6337 device_printf(ctx->ifc_dev, "iflib_tx_structures_setup failed: %d\n", err); 6338 return (err); 6339 } 6340 6341 if ((err = iflib_rx_structures_setup(ctx)) != 0) 6342 device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); 6343 6344 return (err); 6345 } 6346 6347 int 6348 iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, 6349 driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, const char *name) 6350 { 6351 6352 return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); 6353 } 6354 6355 /* Just to avoid copy/paste */ 6356 static inline int 6357 iflib_irq_set_affinity(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, 6358 int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, 6359 const char *name) 6360 { 6361 device_t dev; 6362 unsigned int base_cpuid, cpuid; 6363 int err; 6364 6365 dev = ctx->ifc_dev; 6366 base_cpuid = ctx->ifc_sysctl_core_offset; 6367 cpuid = get_cpuid_for_queue(ctx, base_cpuid, qid, type == IFLIB_INTR_TX); 6368 err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, dev, 6369 irq ? irq->ii_res : NULL, name); 6370 if (err) { 6371 device_printf(dev, "taskqgroup_attach_cpu failed %d\n", err); 6372 return (err); 6373 } 6374 #ifdef notyet 6375 if (cpuid > ctx->ifc_cpuid_highest) 6376 ctx->ifc_cpuid_highest = cpuid; 6377 #endif 6378 return (0); 6379 } 6380 6381 int 6382 iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, 6383 iflib_intr_type_t type, driver_filter_t *filter, 6384 void *filter_arg, int qid, const char *name) 6385 { 6386 device_t dev; 6387 struct grouptask *gtask; 6388 struct taskqgroup *tqg; 6389 iflib_filter_info_t info; 6390 gtask_fn_t *fn; 6391 int tqrid, err; 6392 driver_filter_t *intr_fast; 6393 void *q; 6394 6395 info = &ctx->ifc_filter_info; 6396 tqrid = rid; 6397 6398 switch (type) { 6399 /* XXX merge tx/rx for netmap? */ 6400 case IFLIB_INTR_TX: 6401 q = &ctx->ifc_txqs[qid]; 6402 info = &ctx->ifc_txqs[qid].ift_filter_info; 6403 gtask = &ctx->ifc_txqs[qid].ift_task; 6404 tqg = qgroup_if_io_tqg; 6405 fn = _task_fn_tx; 6406 intr_fast = iflib_fast_intr; 6407 GROUPTASK_INIT(gtask, 0, fn, q); 6408 ctx->ifc_flags |= IFC_NETMAP_TX_IRQ; 6409 break; 6410 case IFLIB_INTR_RX: 6411 q = &ctx->ifc_rxqs[qid]; 6412 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 6413 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6414 tqg = qgroup_if_io_tqg; 6415 fn = _task_fn_rx; 6416 intr_fast = iflib_fast_intr; 6417 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6418 break; 6419 case IFLIB_INTR_RXTX: 6420 q = &ctx->ifc_rxqs[qid]; 6421 info = &ctx->ifc_rxqs[qid].ifr_filter_info; 6422 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6423 tqg = qgroup_if_io_tqg; 6424 fn = _task_fn_rx; 6425 intr_fast = iflib_fast_intr_rxtx; 6426 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6427 break; 6428 case IFLIB_INTR_ADMIN: 6429 q = ctx; 6430 tqrid = -1; 6431 info = &ctx->ifc_filter_info; 6432 gtask = &ctx->ifc_admin_task; 6433 tqg = qgroup_if_config_tqg; 6434 fn = _task_fn_admin; 6435 intr_fast = iflib_fast_intr_ctx; 6436 break; 6437 default: 6438 device_printf(ctx->ifc_dev, "%s: unknown net intr type\n", 6439 __func__); 6440 return (EINVAL); 6441 } 6442 6443 info->ifi_filter = filter; 6444 info->ifi_filter_arg = filter_arg; 6445 info->ifi_task = gtask; 6446 info->ifi_ctx = q; 6447 6448 dev = ctx->ifc_dev; 6449 err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); 6450 if (err != 0) { 6451 device_printf(dev, "_iflib_irq_alloc failed %d\n", err); 6452 return (err); 6453 } 6454 if (type == IFLIB_INTR_ADMIN) 6455 return (0); 6456 6457 if (tqrid != -1) { 6458 err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, 6459 name); 6460 if (err) 6461 return (err); 6462 } else { 6463 taskqgroup_attach(tqg, gtask, q, dev, irq->ii_res, name); 6464 } 6465 6466 return (0); 6467 } 6468 6469 void 6470 iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, const char *name) 6471 { 6472 device_t dev; 6473 struct grouptask *gtask; 6474 struct taskqgroup *tqg; 6475 gtask_fn_t *fn; 6476 void *q; 6477 int err; 6478 6479 switch (type) { 6480 case IFLIB_INTR_TX: 6481 q = &ctx->ifc_txqs[qid]; 6482 gtask = &ctx->ifc_txqs[qid].ift_task; 6483 tqg = qgroup_if_io_tqg; 6484 fn = _task_fn_tx; 6485 GROUPTASK_INIT(gtask, 0, fn, q); 6486 break; 6487 case IFLIB_INTR_RX: 6488 q = &ctx->ifc_rxqs[qid]; 6489 gtask = &ctx->ifc_rxqs[qid].ifr_task; 6490 tqg = qgroup_if_io_tqg; 6491 fn = _task_fn_rx; 6492 NET_GROUPTASK_INIT(gtask, 0, fn, q); 6493 break; 6494 case IFLIB_INTR_IOV: 6495 q = ctx; 6496 gtask = &ctx->ifc_vflr_task; 6497 tqg = qgroup_if_config_tqg; 6498 fn = _task_fn_iov; 6499 GROUPTASK_INIT(gtask, 0, fn, q); 6500 break; 6501 default: 6502 panic("unknown net intr type"); 6503 } 6504 err = iflib_irq_set_affinity(ctx, irq, type, qid, gtask, tqg, q, name); 6505 if (err) { 6506 dev = ctx->ifc_dev; 6507 taskqgroup_attach(tqg, gtask, q, dev, irq ? irq->ii_res : NULL, 6508 name); 6509 } 6510 } 6511 6512 void 6513 iflib_irq_free(if_ctx_t ctx, if_irq_t irq) 6514 { 6515 #ifdef __HAIKU__ 6516 if (!ctx || !irq) 6517 return; 6518 #endif 6519 6520 if (irq->ii_tag) 6521 bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); 6522 6523 if (irq->ii_res) 6524 bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, 6525 rman_get_rid(irq->ii_res), irq->ii_res); 6526 } 6527 6528 static int 6529 iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, const char *name) 6530 { 6531 iflib_txq_t txq = ctx->ifc_txqs; 6532 iflib_rxq_t rxq = ctx->ifc_rxqs; 6533 if_irq_t irq = &ctx->ifc_legacy_irq; 6534 iflib_filter_info_t info; 6535 device_t dev; 6536 struct grouptask *gtask; 6537 struct resource *res; 6538 struct taskqgroup *tqg; 6539 void *q; 6540 int err, tqrid; 6541 bool rx_only; 6542 6543 q = &ctx->ifc_rxqs[0]; 6544 info = &rxq[0].ifr_filter_info; 6545 gtask = &rxq[0].ifr_task; 6546 tqg = qgroup_if_io_tqg; 6547 tqrid = *rid; 6548 rx_only = (ctx->ifc_sctx->isc_flags & IFLIB_SINGLE_IRQ_RX_ONLY) != 0; 6549 6550 ctx->ifc_flags |= IFC_LEGACY; 6551 info->ifi_filter = filter; 6552 info->ifi_filter_arg = filter_arg; 6553 info->ifi_task = gtask; 6554 info->ifi_ctx = rx_only ? ctx : q; 6555 6556 dev = ctx->ifc_dev; 6557 /* We allocate a single interrupt resource */ 6558 err = _iflib_irq_alloc(ctx, irq, tqrid, rx_only ? iflib_fast_intr_ctx : 6559 iflib_fast_intr_rxtx, NULL, info, name); 6560 if (err != 0) 6561 return (err); 6562 NET_GROUPTASK_INIT(gtask, 0, _task_fn_rx, q); 6563 res = irq->ii_res; 6564 taskqgroup_attach(tqg, gtask, q, dev, res, name); 6565 6566 GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); 6567 taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, dev, res, 6568 "tx"); 6569 return (0); 6570 } 6571 6572 void 6573 iflib_led_create(if_ctx_t ctx) 6574 { 6575 6576 ctx->ifc_led_dev = led_create(iflib_led_func, ctx, 6577 device_get_nameunit(ctx->ifc_dev)); 6578 } 6579 6580 void 6581 iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) 6582 { 6583 6584 GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); 6585 } 6586 6587 void 6588 iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) 6589 { 6590 6591 GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); 6592 } 6593 6594 void 6595 iflib_admin_intr_deferred(if_ctx_t ctx) 6596 { 6597 6598 MPASS(ctx->ifc_admin_task.gt_taskqueue != NULL); 6599 GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); 6600 } 6601 6602 void 6603 iflib_iov_intr_deferred(if_ctx_t ctx) 6604 { 6605 6606 GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); 6607 } 6608 6609 void 6610 iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, const char *name) 6611 { 6612 6613 taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, NULL, NULL, 6614 name); 6615 } 6616 6617 void 6618 iflib_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn, 6619 const char *name) 6620 { 6621 6622 GROUPTASK_INIT(gtask, 0, fn, ctx); 6623 taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, NULL, NULL, 6624 name); 6625 } 6626 6627 void 6628 iflib_config_gtask_deinit(struct grouptask *gtask) 6629 { 6630 6631 taskqgroup_detach(qgroup_if_config_tqg, gtask); 6632 } 6633 6634 void 6635 iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) 6636 { 6637 if_t ifp = ctx->ifc_ifp; 6638 iflib_txq_t txq = ctx->ifc_txqs; 6639 6640 if_setbaudrate(ifp, baudrate); 6641 if (baudrate >= IF_Gbps(10)) { 6642 STATE_LOCK(ctx); 6643 ctx->ifc_flags |= IFC_PREFETCH; 6644 STATE_UNLOCK(ctx); 6645 } 6646 /* If link down, disable watchdog */ 6647 if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { 6648 for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) 6649 txq->ift_qstatus = IFLIB_QUEUE_IDLE; 6650 } 6651 ctx->ifc_link_state = link_state; 6652 if_link_state_change(ifp, link_state); 6653 } 6654 6655 static int 6656 iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) 6657 { 6658 int credits; 6659 #ifdef INVARIANTS 6660 int credits_pre = txq->ift_cidx_processed; 6661 #endif 6662 6663 bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, 6664 BUS_DMASYNC_POSTREAD); 6665 if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) 6666 return (0); 6667 6668 txq->ift_processed += credits; 6669 txq->ift_cidx_processed += credits; 6670 6671 MPASS(credits_pre + credits == txq->ift_cidx_processed); 6672 if (txq->ift_cidx_processed >= txq->ift_size) 6673 txq->ift_cidx_processed -= txq->ift_size; 6674 return (credits); 6675 } 6676 6677 static int 6678 iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) 6679 { 6680 iflib_fl_t fl; 6681 u_int i; 6682 6683 for (i = 0, fl = &rxq->ifr_fl[0]; i < rxq->ifr_nfl; i++, fl++) 6684 bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, 6685 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 6686 return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, 6687 budget)); 6688 } 6689 6690 void 6691 iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, 6692 const char *description, if_int_delay_info_t info, 6693 int offset, int value) 6694 { 6695 info->iidi_ctx = ctx; 6696 info->iidi_offset = offset; 6697 info->iidi_value = value; 6698 SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), 6699 SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), 6700 OID_AUTO, name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 6701 info, 0, iflib_sysctl_int_delay, "I", description); 6702 } 6703 6704 struct sx * 6705 iflib_ctx_lock_get(if_ctx_t ctx) 6706 { 6707 6708 return (&ctx->ifc_ctx_sx); 6709 } 6710 6711 static int 6712 iflib_msix_init(if_ctx_t ctx) 6713 { 6714 device_t dev = ctx->ifc_dev; 6715 if_shared_ctx_t sctx = ctx->ifc_sctx; 6716 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 6717 int admincnt, bar, err, iflib_num_rx_queues, iflib_num_tx_queues; 6718 int msgs, queuemsgs, queues, rx_queues, tx_queues, vectors; 6719 6720 iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; 6721 iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; 6722 6723 if (bootverbose) 6724 device_printf(dev, "msix_init qsets capped at %d\n", 6725 imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); 6726 6727 /* Override by tuneable */ 6728 if (scctx->isc_disable_msix) 6729 goto msi; 6730 6731 /* First try MSI-X */ 6732 if ((msgs = pci_msix_count(dev)) == 0) { 6733 if (bootverbose) 6734 device_printf(dev, "MSI-X not supported or disabled\n"); 6735 goto msi; 6736 } 6737 6738 bar = ctx->ifc_softc_ctx.isc_msix_bar; 6739 /* 6740 * bar == -1 => "trust me I know what I'm doing" 6741 * Some drivers are for hardware that is so shoddily 6742 * documented that no one knows which bars are which 6743 * so the developer has to map all bars. This hack 6744 * allows shoddy garbage to use MSI-X in this framework. 6745 */ 6746 if (bar != -1) { 6747 ctx->ifc_msix_mem = bus_alloc_resource_any(dev, 6748 SYS_RES_MEMORY, &bar, RF_ACTIVE); 6749 if (ctx->ifc_msix_mem == NULL) { 6750 device_printf(dev, "Unable to map MSI-X table\n"); 6751 goto msi; 6752 } 6753 } 6754 6755 admincnt = sctx->isc_admin_intrcnt; 6756 #if IFLIB_DEBUG 6757 /* use only 1 qset in debug mode */ 6758 queuemsgs = min(msgs - admincnt, 1); 6759 #else 6760 queuemsgs = msgs - admincnt; 6761 #endif 6762 #ifdef RSS 6763 queues = imin(queuemsgs, rss_getnumbuckets()); 6764 #else 6765 queues = queuemsgs; 6766 #endif 6767 #ifndef __HAIKU__ 6768 queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); 6769 if (bootverbose) 6770 device_printf(dev, 6771 "intr CPUs: %d queue msgs: %d admincnt: %d\n", 6772 CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); 6773 #endif 6774 #ifdef RSS 6775 /* If we're doing RSS, clamp at the number of RSS buckets */ 6776 if (queues > rss_getnumbuckets()) 6777 queues = rss_getnumbuckets(); 6778 #endif 6779 if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) 6780 rx_queues = iflib_num_rx_queues; 6781 else 6782 rx_queues = queues; 6783 6784 if (rx_queues > scctx->isc_nrxqsets) 6785 rx_queues = scctx->isc_nrxqsets; 6786 6787 /* 6788 * We want this to be all logical CPUs by default 6789 */ 6790 if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) 6791 tx_queues = iflib_num_tx_queues; 6792 else 6793 tx_queues = mp_ncpus; 6794 6795 if (tx_queues > scctx->isc_ntxqsets) 6796 tx_queues = scctx->isc_ntxqsets; 6797 6798 if (ctx->ifc_sysctl_qs_eq_override == 0) { 6799 #ifdef INVARIANTS 6800 if (tx_queues != rx_queues) 6801 device_printf(dev, 6802 "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", 6803 min(rx_queues, tx_queues), min(rx_queues, tx_queues)); 6804 #endif 6805 tx_queues = min(rx_queues, tx_queues); 6806 rx_queues = min(rx_queues, tx_queues); 6807 } 6808 6809 vectors = rx_queues + admincnt; 6810 if (msgs < vectors) { 6811 device_printf(dev, 6812 "insufficient number of MSI-X vectors " 6813 "(supported %d, need %d)\n", msgs, vectors); 6814 goto msi; 6815 } 6816 6817 device_printf(dev, "Using %d RX queues %d TX queues\n", rx_queues, 6818 tx_queues); 6819 msgs = vectors; 6820 if ((err = pci_alloc_msix(dev, &vectors)) == 0) { 6821 if (vectors != msgs) { 6822 device_printf(dev, 6823 "Unable to allocate sufficient MSI-X vectors " 6824 "(got %d, need %d)\n", vectors, msgs); 6825 pci_release_msi(dev); 6826 if (bar != -1) { 6827 bus_release_resource(dev, SYS_RES_MEMORY, bar, 6828 ctx->ifc_msix_mem); 6829 ctx->ifc_msix_mem = NULL; 6830 } 6831 goto msi; 6832 } 6833 device_printf(dev, "Using MSI-X interrupts with %d vectors\n", 6834 vectors); 6835 scctx->isc_vectors = vectors; 6836 scctx->isc_nrxqsets = rx_queues; 6837 scctx->isc_ntxqsets = tx_queues; 6838 scctx->isc_intr = IFLIB_INTR_MSIX; 6839 6840 return (vectors); 6841 } else { 6842 device_printf(dev, 6843 "failed to allocate %d MSI-X vectors, err: %d\n", vectors, 6844 err); 6845 if (bar != -1) { 6846 bus_release_resource(dev, SYS_RES_MEMORY, bar, 6847 ctx->ifc_msix_mem); 6848 ctx->ifc_msix_mem = NULL; 6849 } 6850 } 6851 6852 msi: 6853 vectors = pci_msi_count(dev); 6854 scctx->isc_nrxqsets = 1; 6855 scctx->isc_ntxqsets = 1; 6856 scctx->isc_vectors = vectors; 6857 if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { 6858 device_printf(dev,"Using an MSI interrupt\n"); 6859 scctx->isc_intr = IFLIB_INTR_MSI; 6860 } else { 6861 scctx->isc_vectors = 1; 6862 device_printf(dev,"Using a Legacy interrupt\n"); 6863 scctx->isc_intr = IFLIB_INTR_LEGACY; 6864 } 6865 6866 return (vectors); 6867 } 6868 6869 static const char *ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; 6870 6871 #ifndef __HAIKU__ 6872 static int 6873 mp_ring_state_handler(SYSCTL_HANDLER_ARGS) 6874 { 6875 int rc; 6876 uint16_t *state = ((uint16_t *)oidp->oid_arg1); 6877 struct sbuf *sb; 6878 const char *ring_state = "UNKNOWN"; 6879 6880 /* XXX needed ? */ 6881 rc = sysctl_wire_old_buffer(req, 0); 6882 MPASS(rc == 0); 6883 if (rc != 0) 6884 return (rc); 6885 sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); 6886 MPASS(sb != NULL); 6887 if (sb == NULL) 6888 return (ENOMEM); 6889 if (state[3] <= 3) 6890 ring_state = ring_states[state[3]]; 6891 6892 sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", 6893 state[0], state[1], state[2], ring_state); 6894 rc = sbuf_finish(sb); 6895 sbuf_delete(sb); 6896 return(rc); 6897 } 6898 #endif 6899 6900 enum iflib_ndesc_handler { 6901 IFLIB_NTXD_HANDLER, 6902 IFLIB_NRXD_HANDLER, 6903 }; 6904 6905 static int 6906 mp_ndesc_handler(SYSCTL_HANDLER_ARGS) 6907 { 6908 if_ctx_t ctx = (void *)arg1; 6909 enum iflib_ndesc_handler type = arg2; 6910 char buf[256] = {0}; 6911 qidx_t *ndesc; 6912 char *p, *next; 6913 int nqs, rc, i; 6914 6915 nqs = 8; 6916 switch(type) { 6917 case IFLIB_NTXD_HANDLER: 6918 ndesc = ctx->ifc_sysctl_ntxds; 6919 if (ctx->ifc_sctx) 6920 nqs = ctx->ifc_sctx->isc_ntxqs; 6921 break; 6922 case IFLIB_NRXD_HANDLER: 6923 ndesc = ctx->ifc_sysctl_nrxds; 6924 if (ctx->ifc_sctx) 6925 nqs = ctx->ifc_sctx->isc_nrxqs; 6926 break; 6927 default: 6928 printf("%s: unhandled type\n", __func__); 6929 return (EINVAL); 6930 } 6931 if (nqs == 0) 6932 nqs = 8; 6933 6934 for (i=0; i<8; i++) { 6935 if (i >= nqs) 6936 break; 6937 if (i) 6938 strcat(buf, ","); 6939 sprintf(strchr(buf, 0), "%d", ndesc[i]); 6940 } 6941 6942 rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); 6943 if (rc || req->newptr == NULL) 6944 return rc; 6945 6946 for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; 6947 i++, p = strsep(&next, " ,")) { 6948 ndesc[i] = strtoul(p, NULL, 10); 6949 } 6950 6951 return(rc); 6952 } 6953 6954 #define NAME_BUFLEN 32 6955 static void 6956 iflib_add_device_sysctl_pre(if_ctx_t ctx) 6957 { 6958 #ifndef __HAIKU__ 6959 device_t dev = iflib_get_dev(ctx); 6960 struct sysctl_oid_list *child, *oid_list; 6961 struct sysctl_ctx_list *ctx_list; 6962 struct sysctl_oid *node; 6963 6964 ctx_list = device_get_sysctl_ctx(dev); 6965 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); 6966 ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", 6967 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "IFLIB fields"); 6968 oid_list = SYSCTL_CHILDREN(node); 6969 6970 SYSCTL_ADD_CONST_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", 6971 CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 6972 "driver version"); 6973 6974 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", 6975 CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, 6976 "# of txqs to use, 0 => use default #"); 6977 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", 6978 CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, 6979 "# of rxqs to use, 0 => use default #"); 6980 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", 6981 CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, 6982 "permit #txq != #rxq"); 6983 SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", 6984 CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, 6985 "disable MSI-X (default 0)"); 6986 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", 6987 CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, 6988 "set the RX budget"); 6989 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "tx_abdicate", 6990 CTLFLAG_RWTUN, &ctx->ifc_sysctl_tx_abdicate, 0, 6991 "cause TX to abdicate instead of running to completion"); 6992 ctx->ifc_sysctl_core_offset = CORE_OFFSET_UNSPECIFIED; 6993 SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "core_offset", 6994 CTLFLAG_RDTUN, &ctx->ifc_sysctl_core_offset, 0, 6995 "offset to start using cores at"); 6996 SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "separate_txrx", 6997 CTLFLAG_RDTUN, &ctx->ifc_sysctl_separate_txrx, 0, 6998 "use separate cores for TX and RX"); 6999 SYSCTL_ADD_U8(ctx_list, oid_list, OID_AUTO, "use_logical_cores", 7000 CTLFLAG_RDTUN, &ctx->ifc_sysctl_use_logical_cores, 0, 7001 "try to make use of logical cores for TX and RX"); 7002 7003 /* XXX change for per-queue sizes */ 7004 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", 7005 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, 7006 IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", 7007 "list of # of TX descriptors to use, 0 = use default #"); 7008 SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", 7009 CTLTYPE_STRING | CTLFLAG_RWTUN | CTLFLAG_NEEDGIANT, ctx, 7010 IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", 7011 "list of # of RX descriptors to use, 0 = use default #"); 7012 #endif 7013 } 7014 7015 static void 7016 iflib_add_device_sysctl_post(if_ctx_t ctx) 7017 { 7018 #ifndef __HAIKU__ 7019 if_shared_ctx_t sctx = ctx->ifc_sctx; 7020 if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; 7021 device_t dev = iflib_get_dev(ctx); 7022 struct sysctl_oid_list *child; 7023 struct sysctl_ctx_list *ctx_list; 7024 iflib_fl_t fl; 7025 iflib_txq_t txq; 7026 iflib_rxq_t rxq; 7027 int i, j; 7028 char namebuf[NAME_BUFLEN]; 7029 char *qfmt; 7030 struct sysctl_oid *queue_node, *fl_node, *node; 7031 struct sysctl_oid_list *queue_list, *fl_list; 7032 ctx_list = device_get_sysctl_ctx(dev); 7033 7034 node = ctx->ifc_sysctl_node; 7035 child = SYSCTL_CHILDREN(node); 7036 7037 if (scctx->isc_ntxqsets > 100) 7038 qfmt = "txq%03d"; 7039 else if (scctx->isc_ntxqsets > 10) 7040 qfmt = "txq%02d"; 7041 else 7042 qfmt = "txq%d"; 7043 for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { 7044 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 7045 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 7046 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 7047 queue_list = SYSCTL_CHILDREN(queue_node); 7048 SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu", 7049 CTLFLAG_RD, 7050 &txq->ift_task.gt_cpu, 0, "cpu this queue is bound to"); 7051 #if MEMORY_LOGGING 7052 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", 7053 CTLFLAG_RD, 7054 &txq->ift_dequeued, "total mbufs freed"); 7055 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", 7056 CTLFLAG_RD, 7057 &txq->ift_enqueued, "total mbufs enqueued"); 7058 #endif 7059 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", 7060 CTLFLAG_RD, 7061 &txq->ift_mbuf_defrag, "# of times m_defrag was called"); 7062 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", 7063 CTLFLAG_RD, 7064 &txq->ift_pullups, "# of times m_pullup was called"); 7065 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", 7066 CTLFLAG_RD, 7067 &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); 7068 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", 7069 CTLFLAG_RD, 7070 &txq->ift_no_desc_avail, "# of times no descriptors were available"); 7071 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", 7072 CTLFLAG_RD, 7073 &txq->ift_map_failed, "# of times DMA map failed"); 7074 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", 7075 CTLFLAG_RD, 7076 &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); 7077 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", 7078 CTLFLAG_RD, 7079 &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); 7080 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", 7081 CTLFLAG_RD, 7082 &txq->ift_pidx, 1, "Producer Index"); 7083 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", 7084 CTLFLAG_RD, 7085 &txq->ift_cidx, 1, "Consumer Index"); 7086 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", 7087 CTLFLAG_RD, 7088 &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); 7089 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", 7090 CTLFLAG_RD, 7091 &txq->ift_in_use, 1, "descriptors in use"); 7092 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", 7093 CTLFLAG_RD, 7094 &txq->ift_processed, "descriptors procesed for clean"); 7095 SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", 7096 CTLFLAG_RD, 7097 &txq->ift_cleaned, "total cleaned"); 7098 SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", 7099 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT, 7100 __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, 7101 mp_ring_state_handler, "A", "soft ring state"); 7102 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", 7103 CTLFLAG_RD, &txq->ift_br->enqueues, 7104 "# of enqueues to the mp_ring for this queue"); 7105 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", 7106 CTLFLAG_RD, &txq->ift_br->drops, 7107 "# of drops in the mp_ring for this queue"); 7108 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", 7109 CTLFLAG_RD, &txq->ift_br->starts, 7110 "# of normal consumer starts in the mp_ring for this queue"); 7111 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", 7112 CTLFLAG_RD, &txq->ift_br->stalls, 7113 "# of consumer stalls in the mp_ring for this queue"); 7114 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", 7115 CTLFLAG_RD, &txq->ift_br->restarts, 7116 "# of consumer restarts in the mp_ring for this queue"); 7117 SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", 7118 CTLFLAG_RD, &txq->ift_br->abdications, 7119 "# of consumer abdications in the mp_ring for this queue"); 7120 } 7121 7122 if (scctx->isc_nrxqsets > 100) 7123 qfmt = "rxq%03d"; 7124 else if (scctx->isc_nrxqsets > 10) 7125 qfmt = "rxq%02d"; 7126 else 7127 qfmt = "rxq%d"; 7128 for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { 7129 snprintf(namebuf, NAME_BUFLEN, qfmt, i); 7130 queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, 7131 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Queue Name"); 7132 queue_list = SYSCTL_CHILDREN(queue_node); 7133 SYSCTL_ADD_INT(ctx_list, queue_list, OID_AUTO, "cpu", 7134 CTLFLAG_RD, 7135 &rxq->ifr_task.gt_cpu, 0, "cpu this queue is bound to"); 7136 if (sctx->isc_flags & IFLIB_HAS_RXCQ) { 7137 SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", 7138 CTLFLAG_RD, 7139 &rxq->ifr_cq_cidx, 1, "Consumer Index"); 7140 } 7141 7142 for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { 7143 snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); 7144 fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, 7145 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "freelist Name"); 7146 fl_list = SYSCTL_CHILDREN(fl_node); 7147 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", 7148 CTLFLAG_RD, 7149 &fl->ifl_pidx, 1, "Producer Index"); 7150 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", 7151 CTLFLAG_RD, 7152 &fl->ifl_cidx, 1, "Consumer Index"); 7153 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", 7154 CTLFLAG_RD, 7155 &fl->ifl_credits, 1, "credits available"); 7156 SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "buf_size", 7157 CTLFLAG_RD, 7158 &fl->ifl_buf_size, 1, "buffer size"); 7159 #if MEMORY_LOGGING 7160 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", 7161 CTLFLAG_RD, 7162 &fl->ifl_m_enqueued, "mbufs allocated"); 7163 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", 7164 CTLFLAG_RD, 7165 &fl->ifl_m_dequeued, "mbufs freed"); 7166 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", 7167 CTLFLAG_RD, 7168 &fl->ifl_cl_enqueued, "clusters allocated"); 7169 SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", 7170 CTLFLAG_RD, 7171 &fl->ifl_cl_dequeued, "clusters freed"); 7172 #endif 7173 } 7174 } 7175 #endif 7176 } 7177 7178 void 7179 iflib_request_reset(if_ctx_t ctx) 7180 { 7181 7182 STATE_LOCK(ctx); 7183 ctx->ifc_flags |= IFC_DO_RESET; 7184 STATE_UNLOCK(ctx); 7185 } 7186 7187 #ifndef __NO_STRICT_ALIGNMENT 7188 static struct mbuf * 7189 iflib_fixup_rx(struct mbuf *m) 7190 { 7191 struct mbuf *n; 7192 7193 if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { 7194 bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); 7195 m->m_data += ETHER_HDR_LEN; 7196 n = m; 7197 } else { 7198 MGETHDR(n, M_NOWAIT, MT_DATA); 7199 if (n == NULL) { 7200 m_freem(m); 7201 return (NULL); 7202 } 7203 bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); 7204 m->m_data += ETHER_HDR_LEN; 7205 m->m_len -= ETHER_HDR_LEN; 7206 n->m_len = ETHER_HDR_LEN; 7207 M_MOVE_PKTHDR(n, m); 7208 n->m_next = m; 7209 } 7210 return (n); 7211 } 7212 #endif 7213 7214 #ifdef DEBUGNET 7215 static void 7216 iflib_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) 7217 { 7218 if_ctx_t ctx; 7219 7220 ctx = if_getsoftc(ifp); 7221 CTX_LOCK(ctx); 7222 *nrxr = NRXQSETS(ctx); 7223 *ncl = ctx->ifc_rxqs[0].ifr_fl->ifl_size; 7224 *clsize = ctx->ifc_rxqs[0].ifr_fl->ifl_buf_size; 7225 CTX_UNLOCK(ctx); 7226 } 7227 7228 static void 7229 iflib_debugnet_event(if_t ifp, enum debugnet_ev event) 7230 { 7231 if_ctx_t ctx; 7232 if_softc_ctx_t scctx; 7233 iflib_fl_t fl; 7234 iflib_rxq_t rxq; 7235 int i, j; 7236 7237 ctx = if_getsoftc(ifp); 7238 scctx = &ctx->ifc_softc_ctx; 7239 7240 switch (event) { 7241 case DEBUGNET_START: 7242 #ifndef __HAIKU__ 7243 for (i = 0; i < scctx->isc_nrxqsets; i++) { 7244 rxq = &ctx->ifc_rxqs[i]; 7245 for (j = 0; j < rxq->ifr_nfl; j++) { 7246 fl = rxq->ifr_fl; 7247 fl->ifl_zone = m_getzone(fl->ifl_buf_size); 7248 } 7249 } 7250 iflib_no_tx_batch = 1; 7251 break; 7252 #endif 7253 default: 7254 break; 7255 } 7256 } 7257 7258 static int 7259 iflib_debugnet_transmit(if_t ifp, struct mbuf *m) 7260 { 7261 if_ctx_t ctx; 7262 iflib_txq_t txq; 7263 int error; 7264 7265 ctx = if_getsoftc(ifp); 7266 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 7267 IFF_DRV_RUNNING) 7268 return (EBUSY); 7269 7270 txq = &ctx->ifc_txqs[0]; 7271 error = iflib_encap(txq, &m); 7272 if (error == 0) 7273 (void)iflib_txd_db_check(txq, true); 7274 return (error); 7275 } 7276 7277 static int 7278 iflib_debugnet_poll(if_t ifp, int count) 7279 { 7280 struct epoch_tracker et; 7281 if_ctx_t ctx; 7282 if_softc_ctx_t scctx; 7283 iflib_txq_t txq; 7284 int i; 7285 7286 ctx = if_getsoftc(ifp); 7287 scctx = &ctx->ifc_softc_ctx; 7288 7289 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 7290 IFF_DRV_RUNNING) 7291 return (EBUSY); 7292 7293 txq = &ctx->ifc_txqs[0]; 7294 (void)iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); 7295 7296 NET_EPOCH_ENTER(et); 7297 for (i = 0; i < scctx->isc_nrxqsets; i++) 7298 (void)iflib_rxeof(&ctx->ifc_rxqs[i], 16 /* XXX */); 7299 NET_EPOCH_EXIT(et); 7300 return (0); 7301 } 7302 #endif /* DEBUGNET */ 7303