1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2016 Intel Corporation
3 */
4
5 #include <sys/queue.h>
6
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <errno.h>
11 #include <stdint.h>
12 #include <stdarg.h>
13 #include <inttypes.h>
14
15 #include <rte_interrupts.h>
16 #include <rte_byteorder.h>
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_debug.h>
20 #include <rte_pci.h>
21 #include <rte_memory.h>
22 #include <rte_memcpy.h>
23 #include <rte_memzone.h>
24 #include <rte_launch.h>
25 #include <rte_eal.h>
26 #include <rte_per_lcore.h>
27 #include <rte_lcore.h>
28 #include <rte_atomic.h>
29 #include <rte_branch_prediction.h>
30 #include <rte_mempool.h>
31 #include <rte_malloc.h>
32 #include <rte_mbuf.h>
33 #include <rte_ether.h>
34 #include <ethdev_driver.h>
35 #include <rte_prefetch.h>
36 #include <rte_udp.h>
37 #include <rte_tcp.h>
38 #include <rte_sctp.h>
39 #include <rte_net.h>
40 #include <rte_string_fns.h>
41
42 #include "e1000_logs.h"
43 #include "base/e1000_api.h"
44 #include "e1000_ethdev.h"
45
46 #define dcbf(p) { asm volatile("dc cvac, %0" : : "r"(p) : "memory"); }
47
48 extern uint32_t igb_gbd_addr_b_p[4];
49 extern uint32_t igb_gbd_addr_r_p[4];
50 extern uint32_t igb_gbd_addr_t_p[4];
51 extern uint32_t igb_gbd_addr_x_p[4];
52
53 extern void *igb_gbd_addr_b_v[4];
54 extern void *igb_gbd_addr_t_v[4];
55 extern void *igb_gbd_addr_r_v[4];
56 extern void *igb_gbd_addr_x_v[4];
57 extern uint64_t base_hw_addr;
58
59 #ifdef RTE_LIBRTE_IEEE1588
60 #define IGB_TX_IEEE1588_TMST RTE_MBUF_F_TX_IEEE1588_TMST
61 #else
62 #define IGB_TX_IEEE1588_TMST 0
63 #endif
64 /* Bit Mask to indicate what bits required for building TX context */
65 #define IGB_TX_OFFLOAD_MASK (RTE_MBUF_F_TX_OUTER_IPV6 | \
66 RTE_MBUF_F_TX_OUTER_IPV4 | \
67 RTE_MBUF_F_TX_IPV6 | \
68 RTE_MBUF_F_TX_IPV4 | \
69 RTE_MBUF_F_TX_VLAN | \
70 RTE_MBUF_F_TX_IP_CKSUM | \
71 RTE_MBUF_F_TX_L4_MASK | \
72 RTE_MBUF_F_TX_TCP_SEG | \
73 IGB_TX_IEEE1588_TMST)
74
75 #define IGB_TX_OFFLOAD_NOTSUP_MASK \
76 (RTE_MBUF_F_TX_OFFLOAD_MASK ^ IGB_TX_OFFLOAD_MASK)
77
78 /**
79 * Structure associated with each descriptor of the RX ring of a RX queue.
80 */
81 struct igb_rx_entry {
82 struct rte_mbuf *mbuf; /**< mbuf associated with RX descriptor. */
83 };
84
85 /**
86 * Structure associated with each descriptor of the TX ring of a TX queue.
87 */
88 struct igb_tx_entry {
89 struct rte_mbuf *mbuf; /**< mbuf associated with TX desc, if any. */
90 uint16_t next_id; /**< Index of next descriptor in ring. */
91 uint16_t last_id; /**< Index of last scattered descriptor. */
92 };
93
94 /**
95 * rx queue flags
96 */
97 enum igb_rxq_flags {
98 IGB_RXQ_FLAG_LB_BSWAP_VLAN = 0x01,
99 };
100
101 /**
102 * Structure associated with each RX queue.
103 */
104 struct igb_rx_queue {
105 struct rte_mempool *mb_pool; /**< mbuf pool to populate RX ring. */
106 volatile union e1000_adv_rx_desc *rx_ring; /**< RX ring virtual address. */
107 uint64_t rx_ring_phys_addr; /**< RX ring DMA address. */
108 volatile uint32_t *rdt_reg_addr; /**< RDT register address. */
109 volatile uint32_t *rdh_reg_addr; /**< RDH register address. */
110 struct igb_rx_entry *sw_ring; /**< address of RX software ring. */
111 struct rte_mbuf *pkt_first_seg; /**< First segment of current packet. */
112 struct rte_mbuf *pkt_last_seg; /**< Last segment of current packet. */
113 uint16_t nb_rx_desc; /**< number of RX descriptors. */
114 uint16_t rx_tail; /**< current value of RDT register. */
115 uint16_t nb_rx_hold; /**< number of held free RX desc. */
116 uint16_t rx_free_thresh; /**< max free RX desc to hold. */
117 uint16_t queue_id; /**< RX queue index. */
118 uint16_t reg_idx; /**< RX queue register index. */
119 uint16_t port_id; /**< Device port identifier. */
120 uint8_t pthresh; /**< Prefetch threshold register. */
121 uint8_t hthresh; /**< Host threshold register. */
122 uint8_t wthresh; /**< Write-back threshold register. */
123 uint8_t crc_len; /**< 0 if CRC stripped, 4 otherwise. */
124 uint8_t drop_en; /**< If not 0, set SRRCTL.Drop_En. */
125 uint32_t flags; /**< RX flags. */
126 uint64_t offloads; /**< offloads of RTE_ETH_RX_OFFLOAD_* */
127 const struct rte_memzone *mz;
128 };
129
130 /**
131 * Hardware context number
132 */
133 enum igb_advctx_num {
134 IGB_CTX_0 = 0, /**< CTX0 */
135 IGB_CTX_1 = 1, /**< CTX1 */
136 IGB_CTX_NUM = 2, /**< CTX_NUM */
137 };
138
139 /** Offload features */
140 union igb_tx_offload {
141 uint64_t data;
142 struct {
143 uint64_t l3_len:9; /**< L3 (IP) Header Length. */
144 uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
145 uint64_t vlan_tci:16; /**< VLAN Tag Control Identifier(CPU order). */
146 uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
147 uint64_t tso_segsz:16; /**< TCP TSO segment size. */
148
149 /* uint64_t unused:8; */
150 };
151 };
152
153 /*
154 * Compare mask for igb_tx_offload.data,
155 * should be in sync with igb_tx_offload layout.
156 * */
157 #define TX_MACIP_LEN_CMP_MASK 0x000000000000FFFFULL /**< L2L3 header mask. */
158 #define TX_VLAN_CMP_MASK 0x00000000FFFF0000ULL /**< Vlan mask. */
159 #define TX_TCP_LEN_CMP_MASK 0x000000FF00000000ULL /**< TCP header mask. */
160 #define TX_TSO_MSS_CMP_MASK 0x00FFFF0000000000ULL /**< TSO segsz mask. */
161 /** Mac + IP + TCP + Mss mask. */
162 #define TX_TSO_CMP_MASK \
163 (TX_MACIP_LEN_CMP_MASK | TX_TCP_LEN_CMP_MASK | TX_TSO_MSS_CMP_MASK)
164
165 /**
166 * Strucutre to check if new context need be built
167 */
168 struct igb_advctx_info {
169 uint64_t flags; /**< ol_flags related to context build. */
170 /** tx offload: vlan, tso, l2-l3-l4 lengths. */
171 union igb_tx_offload tx_offload;
172 /** compare mask for tx offload. */
173 union igb_tx_offload tx_offload_mask;
174 };
175
176 /**
177 * Structure associated with each TX queue.
178 */
179 struct igb_tx_queue {
180 volatile union e1000_adv_tx_desc *tx_ring; /**< TX ring address */
181 uint64_t tx_ring_phys_addr; /**< TX ring DMA address. */
182 struct igb_tx_entry *sw_ring; /**< virtual address of SW ring. */
183 volatile uint32_t *tdt_reg_addr; /**< Address of TDT register. */
184 uint32_t txd_type; /**< Device-specific TXD type */
185 uint16_t nb_tx_desc; /**< number of TX descriptors. */
186 uint16_t tx_tail; /**< Current value of TDT register. */
187 uint16_t tx_head;
188 /**< Index of first used TX descriptor. */
189 uint16_t queue_id; /**< TX queue index. */
190 uint16_t reg_idx; /**< TX queue register index. */
191 uint16_t port_id; /**< Device port identifier. */
192 uint8_t pthresh; /**< Prefetch threshold register. */
193 uint8_t hthresh; /**< Host threshold register. */
194 uint8_t wthresh; /**< Write-back threshold register. */
195 uint32_t ctx_curr;
196 /**< Current used hardware descriptor. */
197 uint32_t ctx_start;
198 /**< Start context position for transmit queue. */
199 struct igb_advctx_info ctx_cache[IGB_CTX_NUM];
200 /**< Hardware context history.*/
201 uint64_t offloads; /**< offloads of RTE_ETH_TX_OFFLOAD_* */
202 const struct rte_memzone *mz;
203 };
204
205 #if 1
206 #define RTE_PMD_USE_PREFETCH
207 #endif
208
209 #ifdef RTE_PMD_USE_PREFETCH
210 #define rte_igb_prefetch(p) rte_prefetch0(p)
211 #else
212 #define rte_igb_prefetch(p) do {} while(0)
213 #endif
214
215 #ifdef RTE_PMD_PACKET_PREFETCH
216 #define rte_packet_prefetch(p) rte_prefetch1(p)
217 #else
218 #define rte_packet_prefetch(p) do {} while(0)
219 #endif
220
221 /*
222 * Macro for VMDq feature for 1 GbE NIC.
223 */
224 #define E1000_VMOLR_SIZE (8)
225 #define IGB_TSO_MAX_HDRLEN (512)
226 #define IGB_TSO_MAX_MSS (9216)
227
228 /*********************************************************************
229 *
230 * TX function
231 *
232 **********************************************************************/
233
234 /*
235 *There're some limitations in hardware for TCP segmentation offload. We
236 *should check whether the parameters are valid.
237 */
238 static inline uint64_t
check_tso_para(uint64_t ol_req,union igb_tx_offload ol_para)239 check_tso_para(uint64_t ol_req, union igb_tx_offload ol_para)
240 {
241 if (!(ol_req & RTE_MBUF_F_TX_TCP_SEG))
242 return ol_req;
243 if ((ol_para.tso_segsz > IGB_TSO_MAX_MSS) || (ol_para.l2_len +
244 ol_para.l3_len + ol_para.l4_len > IGB_TSO_MAX_HDRLEN)) {
245 ol_req &= ~RTE_MBUF_F_TX_TCP_SEG;
246 ol_req |= RTE_MBUF_F_TX_TCP_CKSUM;
247 }
248 return ol_req;
249 }
250
251 /*
252 * Advanced context descriptor are almost same between igb/ixgbe
253 * This is a separate function, looking for optimization opportunity here
254 * Rework required to go with the pre-defined values.
255 */
256
257 static inline void
igbe_set_xmit_ctx(struct igb_tx_queue * txq,volatile struct e1000_adv_tx_context_desc * ctx_txd,uint64_t ol_flags,union igb_tx_offload tx_offload)258 igbe_set_xmit_ctx(struct igb_tx_queue* txq,
259 volatile struct e1000_adv_tx_context_desc *ctx_txd,
260 uint64_t ol_flags, union igb_tx_offload tx_offload)
261 {
262 uint32_t type_tucmd_mlhl;
263 uint32_t mss_l4len_idx;
264 uint32_t ctx_idx, ctx_curr;
265 uint32_t vlan_macip_lens;
266 union igb_tx_offload tx_offload_mask;
267
268 ctx_curr = txq->ctx_curr;
269 ctx_idx = ctx_curr + txq->ctx_start;
270
271 tx_offload_mask.data = 0;
272 type_tucmd_mlhl = 0;
273
274 /* Specify which HW CTX to upload. */
275 mss_l4len_idx = (ctx_idx << E1000_ADVTXD_IDX_SHIFT);
276
277 if (ol_flags & RTE_MBUF_F_TX_VLAN)
278 tx_offload_mask.data |= TX_VLAN_CMP_MASK;
279
280 /* check if TCP segmentation required for this packet */
281 if (ol_flags & RTE_MBUF_F_TX_TCP_SEG) {
282 /* implies IP cksum in IPv4 */
283 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
284 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4 |
285 E1000_ADVTXD_TUCMD_L4T_TCP |
286 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
287 else
288 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV6 |
289 E1000_ADVTXD_TUCMD_L4T_TCP |
290 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
291
292 tx_offload_mask.data |= TX_TSO_CMP_MASK;
293 mss_l4len_idx |= tx_offload.tso_segsz << E1000_ADVTXD_MSS_SHIFT;
294 mss_l4len_idx |= tx_offload.l4_len << E1000_ADVTXD_L4LEN_SHIFT;
295 } else { /* no TSO, check if hardware checksum is needed */
296 if (ol_flags & (RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_L4_MASK))
297 tx_offload_mask.data |= TX_MACIP_LEN_CMP_MASK;
298
299 if (ol_flags & RTE_MBUF_F_TX_IP_CKSUM)
300 type_tucmd_mlhl = E1000_ADVTXD_TUCMD_IPV4;
301
302 switch (ol_flags & RTE_MBUF_F_TX_L4_MASK) {
303 case RTE_MBUF_F_TX_UDP_CKSUM:
304 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_UDP |
305 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
306 mss_l4len_idx |= sizeof(struct rte_udp_hdr)
307 << E1000_ADVTXD_L4LEN_SHIFT;
308 break;
309 case RTE_MBUF_F_TX_TCP_CKSUM:
310 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_TCP |
311 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
312 mss_l4len_idx |= sizeof(struct rte_tcp_hdr)
313 << E1000_ADVTXD_L4LEN_SHIFT;
314 break;
315 case RTE_MBUF_F_TX_SCTP_CKSUM:
316 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_SCTP |
317 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
318 mss_l4len_idx |= sizeof(struct rte_sctp_hdr)
319 << E1000_ADVTXD_L4LEN_SHIFT;
320 break;
321 default:
322 type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_L4T_RSV |
323 E1000_ADVTXD_DTYP_CTXT | E1000_ADVTXD_DCMD_DEXT;
324 break;
325 }
326 }
327
328 txq->ctx_cache[ctx_curr].flags = ol_flags;
329 txq->ctx_cache[ctx_curr].tx_offload.data =
330 tx_offload_mask.data & tx_offload.data;
331 txq->ctx_cache[ctx_curr].tx_offload_mask = tx_offload_mask;
332
333 ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
334 vlan_macip_lens = (uint32_t)tx_offload.data;
335 ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
336 ctx_txd->mss_l4len_idx = rte_cpu_to_le_32(mss_l4len_idx);
337 ctx_txd->u.seqnum_seed = 0;
338 }
339
340 /*
341 * Check which hardware context can be used. Use the existing match
342 * or create a new context descriptor.
343 */
344 static inline uint32_t
what_advctx_update(struct igb_tx_queue * txq,uint64_t flags,union igb_tx_offload tx_offload)345 what_advctx_update(struct igb_tx_queue *txq, uint64_t flags,
346 union igb_tx_offload tx_offload)
347 {
348 /* If match with the current context */
349 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
350 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
351 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
352 return txq->ctx_curr;
353 }
354
355 /* If match with the second context */
356 txq->ctx_curr ^= 1;
357 if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
358 (txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
359 (txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
360 return txq->ctx_curr;
361 }
362
363 /* Mismatch, use the previous context */
364 return IGB_CTX_NUM;
365 }
366
367 static inline uint32_t
tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)368 tx_desc_cksum_flags_to_olinfo(uint64_t ol_flags)
369 {
370 static const uint32_t l4_olinfo[2] = {0, E1000_ADVTXD_POPTS_TXSM};
371 static const uint32_t l3_olinfo[2] = {0, E1000_ADVTXD_POPTS_IXSM};
372 uint32_t tmp;
373
374 tmp = l4_olinfo[(ol_flags & RTE_MBUF_F_TX_L4_MASK) != RTE_MBUF_F_TX_L4_NO_CKSUM];
375 tmp |= l3_olinfo[(ol_flags & RTE_MBUF_F_TX_IP_CKSUM) != 0];
376 tmp |= l4_olinfo[(ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0];
377 return tmp;
378 }
379
380 static inline uint32_t
tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)381 tx_desc_vlan_flags_to_cmdtype(uint64_t ol_flags)
382 {
383 uint32_t cmdtype;
384 static uint32_t vlan_cmd[2] = {0, E1000_ADVTXD_DCMD_VLE};
385 static uint32_t tso_cmd[2] = {0, E1000_ADVTXD_DCMD_TSE};
386 cmdtype = vlan_cmd[(ol_flags & RTE_MBUF_F_TX_VLAN) != 0];
387 cmdtype |= tso_cmd[(ol_flags & RTE_MBUF_F_TX_TCP_SEG) != 0];
388 return cmdtype;
389 }
390
391 uint16_t
eth_igb_xmit_pkts(void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)392 eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
393 uint16_t nb_pkts)
394 {
395 struct igb_tx_queue *txq;
396 struct igb_tx_entry *sw_ring;
397 struct igb_tx_entry *txe, *txn;
398 volatile union e1000_adv_tx_desc *txr;
399 volatile union e1000_adv_tx_desc *txd;
400 struct rte_mbuf *tx_pkt;
401 struct rte_mbuf *m_seg;
402 uint64_t buf_dma_addr;
403 uint32_t olinfo_status;
404 uint32_t cmd_type_len;
405 uint32_t pkt_len;
406 uint16_t slen;
407 uint64_t ol_flags;
408 uint16_t tx_end;
409 uint16_t tx_id;
410 uint16_t tx_last;
411 uint16_t nb_tx;
412 uint64_t tx_ol_req;
413 uint32_t new_ctx = 0;
414 uint32_t ctx = 0;
415 union igb_tx_offload tx_offload = {0};
416 uint8_t *data;
417
418 txq = tx_queue;
419 sw_ring = txq->sw_ring;
420 txr = txq->tx_ring;
421 tx_id = txq->tx_tail;
422 txe = &sw_ring[tx_id];
423
424 for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
425 tx_pkt = *tx_pkts++;
426 pkt_len = tx_pkt->pkt_len;
427
428 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
429
430 /*
431 * The number of descriptors that must be allocated for a
432 * packet is the number of segments of that packet, plus 1
433 * Context Descriptor for the VLAN Tag Identifier, if any.
434 * Determine the last TX descriptor to allocate in the TX ring
435 * for the packet, starting from the current position (tx_id)
436 * in the ring.
437 */
438 tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
439
440 ol_flags = tx_pkt->ol_flags;
441 tx_ol_req = ol_flags & IGB_TX_OFFLOAD_MASK;
442
443 /* If a Context Descriptor need be built . */
444 if (tx_ol_req) {
445 tx_offload.l2_len = tx_pkt->l2_len;
446 tx_offload.l3_len = tx_pkt->l3_len;
447 tx_offload.l4_len = tx_pkt->l4_len;
448 tx_offload.vlan_tci = tx_pkt->vlan_tci;
449 tx_offload.tso_segsz = tx_pkt->tso_segsz;
450 tx_ol_req = check_tso_para(tx_ol_req, tx_offload);
451
452 ctx = what_advctx_update(txq, tx_ol_req, tx_offload);
453 /* Only allocate context descriptor if required*/
454 new_ctx = (ctx == IGB_CTX_NUM);
455 ctx = txq->ctx_curr + txq->ctx_start;
456 tx_last = (uint16_t) (tx_last + new_ctx);
457 }
458 if (tx_last >= txq->nb_tx_desc)
459 tx_last = (uint16_t) (tx_last - txq->nb_tx_desc);
460
461 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u pktlen=%u"
462 " tx_first=%u tx_last=%u",
463 (unsigned) txq->port_id,
464 (unsigned) txq->queue_id,
465 (unsigned) pkt_len,
466 (unsigned) tx_id,
467 (unsigned) tx_last);
468
469 /*
470 * Check if there are enough free descriptors in the TX ring
471 * to transmit the next packet.
472 * This operation is based on the two following rules:
473 *
474 * 1- Only check that the last needed TX descriptor can be
475 * allocated (by construction, if that descriptor is free,
476 * all intermediate ones are also free).
477 *
478 * For this purpose, the index of the last TX descriptor
479 * used for a packet (the "last descriptor" of a packet)
480 * is recorded in the TX entries (the last one included)
481 * that are associated with all TX descriptors allocated
482 * for that packet.
483 *
484 * 2- Avoid to allocate the last free TX descriptor of the
485 * ring, in order to never set the TDT register with the
486 * same value stored in parallel by the NIC in the TDH
487 * register, which makes the TX engine of the NIC enter
488 * in a deadlock situation.
489 *
490 * By extension, avoid to allocate a free descriptor that
491 * belongs to the last set of free descriptors allocated
492 * to the same packet previously transmitted.
493 */
494
495 /*
496 * The "last descriptor" of the previously sent packet, if any,
497 * which used the last descriptor to allocate.
498 */
499 tx_end = sw_ring[tx_last].last_id;
500
501 /*
502 * The next descriptor following that "last descriptor" in the
503 * ring.
504 */
505 tx_end = sw_ring[tx_end].next_id;
506
507 /*
508 * The "last descriptor" associated with that next descriptor.
509 */
510 tx_end = sw_ring[tx_end].last_id;
511
512 /*
513 * Check that this descriptor is free.
514 */
515 if (! (txr[tx_end].wb.status & E1000_TXD_STAT_DD)) {
516 if (nb_tx == 0)
517 return 0;
518 goto end_of_tx;
519 }
520
521 /*
522 * Set common flags of all TX Data Descriptors.
523 *
524 * The following bits must be set in all Data Descriptors:
525 * - E1000_ADVTXD_DTYP_DATA
526 * - E1000_ADVTXD_DCMD_DEXT
527 *
528 * The following bits must be set in the first Data Descriptor
529 * and are ignored in the other ones:
530 * - E1000_ADVTXD_DCMD_IFCS
531 * - E1000_ADVTXD_MAC_1588
532 * - E1000_ADVTXD_DCMD_VLE
533 *
534 * The following bits must only be set in the last Data
535 * Descriptor:
536 * - E1000_TXD_CMD_EOP
537 *
538 * The following bits can be set in any Data Descriptor, but
539 * are only set in the last Data Descriptor:
540 * - E1000_TXD_CMD_RS
541 */
542 cmd_type_len = txq->txd_type |
543 E1000_ADVTXD_DCMD_IFCS | E1000_ADVTXD_DCMD_DEXT;
544 if (tx_ol_req & RTE_MBUF_F_TX_TCP_SEG)
545 pkt_len -= (tx_pkt->l2_len + tx_pkt->l3_len + tx_pkt->l4_len);
546 olinfo_status = (pkt_len << E1000_ADVTXD_PAYLEN_SHIFT);
547 #if defined(RTE_LIBRTE_IEEE1588)
548 if (ol_flags & RTE_MBUF_F_TX_IEEE1588_TMST)
549 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
550 #endif
551 if (tx_ol_req) {
552 /* Setup TX Advanced context descriptor if required */
553 if (new_ctx) {
554 volatile struct e1000_adv_tx_context_desc *
555 ctx_txd;
556
557 ctx_txd = (volatile struct
558 e1000_adv_tx_context_desc *)
559 &txr[tx_id];
560
561 txn = &sw_ring[txe->next_id];
562 RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
563
564 if (txe->mbuf != NULL) {
565 rte_pktmbuf_free_seg(txe->mbuf);
566 txe->mbuf = NULL;
567 }
568
569 igbe_set_xmit_ctx(txq, ctx_txd, tx_ol_req, tx_offload);
570
571 txe->last_id = tx_last;
572 tx_id = txe->next_id;
573 txe = txn;
574 }
575
576 /* Setup the TX Advanced Data Descriptor */
577 cmd_type_len |= tx_desc_vlan_flags_to_cmdtype(tx_ol_req);
578 olinfo_status |= tx_desc_cksum_flags_to_olinfo(tx_ol_req);
579 olinfo_status |= (ctx << E1000_ADVTXD_IDX_SHIFT);
580 }
581
582 m_seg = tx_pkt;
583 do {
584 txn = &sw_ring[txe->next_id];
585 txd = &txr[tx_id];
586
587 if (txe->mbuf != NULL)
588 rte_pktmbuf_free_seg(txe->mbuf);
589 txe->mbuf = m_seg;
590
591 data = rte_pktmbuf_mtod(m_seg, uint8_t *);
592 for (int i = 0; i < m_seg->data_len; i += 64) {
593 dcbf(data + i);
594 }
595
596 /*
597 * Set up transmit descriptor.
598 */
599 slen = (uint16_t) m_seg->data_len;
600 buf_dma_addr = rte_mbuf_data_iova(m_seg);
601 txd->read.buffer_addr =
602 rte_cpu_to_le_64(buf_dma_addr);
603 txd->read.cmd_type_len =
604 rte_cpu_to_le_32(cmd_type_len | slen);
605 txd->read.olinfo_status =
606 rte_cpu_to_le_32(olinfo_status);
607 txe->last_id = tx_last;
608 tx_id = txe->next_id;
609 txe = txn;
610 m_seg = m_seg->next;
611 } while (m_seg != NULL);
612
613 /*
614 * The last packet data descriptor needs End Of Packet (EOP)
615 * and Report Status (RS).
616 */
617 txd->read.cmd_type_len |=
618 rte_cpu_to_le_32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
619 }
620 end_of_tx:
621 rte_wmb();
622
623 /*
624 * Set the Transmit Descriptor Tail (TDT).
625 */
626 E1000_PCI_REG_WRITE_RELAXED(txq->tdt_reg_addr, tx_id);
627 PMD_TX_LOG(DEBUG, "port_id=%u queue_id=%u tx_tail=%u nb_tx=%u",
628 (unsigned) txq->port_id, (unsigned) txq->queue_id,
629 (unsigned) tx_id, (unsigned) nb_tx);
630 txq->tx_tail = tx_id;
631
632 return nb_tx;
633 }
634
635 /*********************************************************************
636 *
637 * TX prep functions
638 *
639 **********************************************************************/
640 uint16_t
eth_igb_prep_pkts(__rte_unused void * tx_queue,struct rte_mbuf ** tx_pkts,uint16_t nb_pkts)641 eth_igb_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
642 uint16_t nb_pkts)
643 {
644 int i, ret;
645 struct rte_mbuf *m;
646
647 for (i = 0; i < nb_pkts; i++) {
648 m = tx_pkts[i];
649
650 /* Check some limitations for TSO in hardware */
651 if (m->ol_flags & RTE_MBUF_F_TX_TCP_SEG)
652 if ((m->tso_segsz > IGB_TSO_MAX_MSS) ||
653 (m->l2_len + m->l3_len + m->l4_len >
654 IGB_TSO_MAX_HDRLEN)) {
655 rte_errno = EINVAL;
656 return i;
657 }
658
659 if (m->ol_flags & IGB_TX_OFFLOAD_NOTSUP_MASK) {
660 rte_errno = ENOTSUP;
661 return i;
662 }
663
664 #ifdef RTE_ETHDEV_DEBUG_TX
665 ret = rte_validate_tx_offload(m);
666 if (ret != 0) {
667 rte_errno = -ret;
668 return i;
669 }
670 #endif
671 ret = rte_net_intel_cksum_prepare(m);
672 if (ret != 0) {
673 rte_errno = -ret;
674 return i;
675 }
676 }
677
678 return i;
679 }
680
681 /*********************************************************************
682 *
683 * RX functions
684 *
685 **********************************************************************/
686 #define IGB_PACKET_TYPE_IPV4 0X01
687 #define IGB_PACKET_TYPE_IPV4_TCP 0X11
688 #define IGB_PACKET_TYPE_IPV4_UDP 0X21
689 #define IGB_PACKET_TYPE_IPV4_SCTP 0X41
690 #define IGB_PACKET_TYPE_IPV4_EXT 0X03
691 #define IGB_PACKET_TYPE_IPV4_EXT_SCTP 0X43
692 #define IGB_PACKET_TYPE_IPV6 0X04
693 #define IGB_PACKET_TYPE_IPV6_TCP 0X14
694 #define IGB_PACKET_TYPE_IPV6_UDP 0X24
695 #define IGB_PACKET_TYPE_IPV6_EXT 0X0C
696 #define IGB_PACKET_TYPE_IPV6_EXT_TCP 0X1C
697 #define IGB_PACKET_TYPE_IPV6_EXT_UDP 0X2C
698 #define IGB_PACKET_TYPE_IPV4_IPV6 0X05
699 #define IGB_PACKET_TYPE_IPV4_IPV6_TCP 0X15
700 #define IGB_PACKET_TYPE_IPV4_IPV6_UDP 0X25
701 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT 0X0D
702 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP 0X1D
703 #define IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP 0X2D
704 #define IGB_PACKET_TYPE_MAX 0X80
705 #define IGB_PACKET_TYPE_MASK 0X7F
706 #define IGB_PACKET_TYPE_SHIFT 0X04
707 static inline uint32_t
igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)708 igb_rxd_pkt_info_to_pkt_type(uint16_t pkt_info)
709 {
710 static const uint32_t
711 ptype_table[IGB_PACKET_TYPE_MAX] __rte_cache_aligned = {
712 [IGB_PACKET_TYPE_IPV4] = RTE_PTYPE_L2_ETHER |
713 RTE_PTYPE_L3_IPV4,
714 [IGB_PACKET_TYPE_IPV4_EXT] = RTE_PTYPE_L2_ETHER |
715 RTE_PTYPE_L3_IPV4_EXT,
716 [IGB_PACKET_TYPE_IPV6] = RTE_PTYPE_L2_ETHER |
717 RTE_PTYPE_L3_IPV6,
718 [IGB_PACKET_TYPE_IPV4_IPV6] = RTE_PTYPE_L2_ETHER |
719 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
720 RTE_PTYPE_INNER_L3_IPV6,
721 [IGB_PACKET_TYPE_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
722 RTE_PTYPE_L3_IPV6_EXT,
723 [IGB_PACKET_TYPE_IPV4_IPV6_EXT] = RTE_PTYPE_L2_ETHER |
724 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
725 RTE_PTYPE_INNER_L3_IPV6_EXT,
726 [IGB_PACKET_TYPE_IPV4_TCP] = RTE_PTYPE_L2_ETHER |
727 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP,
728 [IGB_PACKET_TYPE_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
729 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP,
730 [IGB_PACKET_TYPE_IPV4_IPV6_TCP] = RTE_PTYPE_L2_ETHER |
731 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
732 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_TCP,
733 [IGB_PACKET_TYPE_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
734 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_TCP,
735 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_TCP] = RTE_PTYPE_L2_ETHER |
736 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
737 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_TCP,
738 [IGB_PACKET_TYPE_IPV4_UDP] = RTE_PTYPE_L2_ETHER |
739 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_UDP,
740 [IGB_PACKET_TYPE_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
741 RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP,
742 [IGB_PACKET_TYPE_IPV4_IPV6_UDP] = RTE_PTYPE_L2_ETHER |
743 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
744 RTE_PTYPE_INNER_L3_IPV6 | RTE_PTYPE_INNER_L4_UDP,
745 [IGB_PACKET_TYPE_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
746 RTE_PTYPE_L3_IPV6_EXT | RTE_PTYPE_L4_UDP,
747 [IGB_PACKET_TYPE_IPV4_IPV6_EXT_UDP] = RTE_PTYPE_L2_ETHER |
748 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_TUNNEL_IP |
749 RTE_PTYPE_INNER_L3_IPV6_EXT | RTE_PTYPE_INNER_L4_UDP,
750 [IGB_PACKET_TYPE_IPV4_SCTP] = RTE_PTYPE_L2_ETHER |
751 RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_SCTP,
752 [IGB_PACKET_TYPE_IPV4_EXT_SCTP] = RTE_PTYPE_L2_ETHER |
753 RTE_PTYPE_L3_IPV4_EXT | RTE_PTYPE_L4_SCTP,
754 };
755 if (unlikely(pkt_info & E1000_RXDADV_PKTTYPE_ETQF))
756 return RTE_PTYPE_UNKNOWN;
757
758 pkt_info = (pkt_info >> IGB_PACKET_TYPE_SHIFT) & IGB_PACKET_TYPE_MASK;
759
760 return ptype_table[pkt_info];
761 }
762
763 static inline uint64_t
rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue * rxq,uint32_t hl_tp_rs)764 rx_desc_hlen_type_rss_to_pkt_flags(struct igb_rx_queue *rxq, uint32_t hl_tp_rs)
765 {
766 uint64_t pkt_flags = ((hl_tp_rs & 0x0F) == 0) ? 0 : RTE_MBUF_F_RX_RSS_HASH;
767
768 #if defined(RTE_LIBRTE_IEEE1588)
769 static uint32_t ip_pkt_etqf_map[8] = {
770 0, 0, 0, RTE_MBUF_F_RX_IEEE1588_PTP,
771 0, 0, 0, 0,
772 };
773
774 struct rte_eth_dev dev = rte_eth_devices[rxq->port_id];
775 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev.data->dev_private);
776
777 /* EtherType is in bits 8:10 in Packet Type, and not in the default 0:2 */
778 if (hw->mac.type == e1000_i210)
779 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 12) & 0x07];
780 else
781 pkt_flags |= ip_pkt_etqf_map[(hl_tp_rs >> 4) & 0x07];
782 #else
783 RTE_SET_USED(rxq);
784 #endif
785
786 return pkt_flags;
787 }
788
789 static inline uint64_t
rx_desc_status_to_pkt_flags(uint32_t rx_status)790 rx_desc_status_to_pkt_flags(uint32_t rx_status)
791 {
792 uint64_t pkt_flags;
793
794 /* Check if VLAN present */
795 pkt_flags = ((rx_status & E1000_RXD_STAT_VP) ?
796 RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED : 0);
797
798 #if defined(RTE_LIBRTE_IEEE1588)
799 if (rx_status & E1000_RXD_STAT_TMST)
800 pkt_flags = pkt_flags | RTE_MBUF_F_RX_IEEE1588_TMST;
801 #endif
802 return pkt_flags;
803 }
804
805 static inline uint64_t
rx_desc_error_to_pkt_flags(uint32_t rx_status)806 rx_desc_error_to_pkt_flags(uint32_t rx_status)
807 {
808 /*
809 * Bit 30: IPE, IPv4 checksum error
810 * Bit 29: L4I, L4I integrity error
811 */
812
813 static uint64_t error_to_pkt_flags_map[4] = {
814 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
815 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD,
816 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_GOOD,
817 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD
818 };
819 return error_to_pkt_flags_map[(rx_status >>
820 E1000_RXD_ERR_CKSUM_BIT) & E1000_RXD_ERR_CKSUM_MSK];
821 }
822
823 uint16_t
eth_igb_recv_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)824 eth_igb_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
825 uint16_t nb_pkts)
826 {
827 struct igb_rx_queue *rxq;
828 volatile union e1000_adv_rx_desc *rx_ring;
829 volatile union e1000_adv_rx_desc *rxdp;
830 struct igb_rx_entry *sw_ring;
831 struct igb_rx_entry *rxe;
832 struct rte_mbuf *rxm;
833 struct rte_mbuf *nmb;
834 union e1000_adv_rx_desc rxd;
835 uint64_t dma_addr;
836 uint32_t staterr;
837 uint32_t hlen_type_rss;
838 uint16_t pkt_len;
839 uint16_t rx_id;
840 uint16_t nb_rx;
841 uint16_t nb_hold;
842 uint64_t pkt_flags;
843
844 nb_rx = 0;
845 nb_hold = 0;
846 rxq = rx_queue;
847 rx_id = rxq->rx_tail;
848 rx_ring = rxq->rx_ring;
849 sw_ring = rxq->sw_ring;
850 while (nb_rx < nb_pkts) {
851 /*
852 * The order of operations here is important as the DD status
853 * bit must not be read after any other descriptor fields.
854 * rx_ring and rxdp are pointing to volatile data so the order
855 * of accesses cannot be reordered by the compiler. If they were
856 * not volatile, they could be reordered which could lead to
857 * using invalid descriptor fields when read from rxd.
858 */
859 rxdp = &rx_ring[rx_id];
860 staterr = rxdp->wb.upper.status_error;
861 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
862 break;
863 rxd = *rxdp;
864
865 /*
866 * End of packet.
867 *
868 * If the E1000_RXD_STAT_EOP flag is not set, the RX packet is
869 * likely to be invalid and to be dropped by the various
870 * validation checks performed by the network stack.
871 *
872 * Allocate a new mbuf to replenish the RX ring descriptor.
873 * If the allocation fails:
874 * - arrange for that RX descriptor to be the first one
875 * being parsed the next time the receive function is
876 * invoked [on the same queue].
877 *
878 * - Stop parsing the RX ring and return immediately.
879 *
880 * This policy do not drop the packet received in the RX
881 * descriptor for which the allocation of a new mbuf failed.
882 * Thus, it allows that packet to be later retrieved if
883 * mbuf have been freed in the mean time.
884 * As a side effect, holding RX descriptors instead of
885 * systematically giving them back to the NIC may lead to
886 * RX ring exhaustion situations.
887 * However, the NIC can gracefully prevent such situations
888 * to happen by sending specific "back-pressure" flow control
889 * frames to its peer(s).
890 */
891 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
892 "staterr=0x%x pkt_len=%u",
893 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
894 (unsigned) rx_id, (unsigned) staterr,
895 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
896
897 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
898 if (nmb == NULL) {
899 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
900 "queue_id=%u", (unsigned) rxq->port_id,
901 (unsigned) rxq->queue_id);
902 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
903 break;
904 }
905
906 nb_hold++;
907 rxe = &sw_ring[rx_id];
908 rx_id++;
909 if (rx_id == rxq->nb_rx_desc)
910 rx_id = 0;
911
912 /* Prefetch next mbuf while processing current one. */
913 rte_igb_prefetch(sw_ring[rx_id].mbuf);
914
915 /*
916 * When next RX descriptor is on a cache-line boundary,
917 * prefetch the next 4 RX descriptors and the next 8 pointers
918 * to mbufs.
919 */
920 if ((rx_id & 0x3) == 0) {
921 rte_igb_prefetch(&rx_ring[rx_id]);
922 rte_igb_prefetch(&sw_ring[rx_id]);
923 }
924
925 rxm = rxe->mbuf;
926 rxe->mbuf = nmb;
927 dma_addr =
928 rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
929 rxdp->read.hdr_addr = 0;
930 rxdp->read.pkt_addr = dma_addr;
931
932 /*
933 * Initialize the returned mbuf.
934 * 1) setup generic mbuf fields:
935 * - number of segments,
936 * - next segment,
937 * - packet length,
938 * - RX port identifier.
939 * 2) integrate hardware offload data, if any:
940 * - RSS flag & hash,
941 * - IP checksum flag,
942 * - VLAN TCI, if any,
943 * - error flags.
944 */
945 pkt_len = (uint16_t) (rte_le_to_cpu_16(rxd.wb.upper.length) -
946 rxq->crc_len);
947 rxm->data_off = RTE_PKTMBUF_HEADROOM;
948 rte_packet_prefetch((char *)rxm->buf_addr + rxm->data_off);
949 rxm->nb_segs = 1;
950 rxm->next = NULL;
951 rxm->pkt_len = pkt_len;
952 rxm->data_len = pkt_len;
953 rxm->port = rxq->port_id;
954
955 rxm->hash.rss = rxd.wb.lower.hi_dword.rss;
956 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
957
958 /*
959 * The vlan_tci field is only valid when RTE_MBUF_F_RX_VLAN is
960 * set in the pkt_flags field and must be in CPU byte order.
961 */
962 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
963 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
964 rxm->vlan_tci = rte_be_to_cpu_16(rxd.wb.upper.vlan);
965 } else {
966 rxm->vlan_tci = rte_le_to_cpu_16(rxd.wb.upper.vlan);
967 }
968 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
969 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
970 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
971 rxm->ol_flags = pkt_flags;
972 rxm->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.lower.
973 lo_dword.hs_rss.pkt_info);
974
975 /*
976 * Store the mbuf address into the next entry of the array
977 * of returned packets.
978 */
979 rx_pkts[nb_rx++] = rxm;
980 }
981 rxq->rx_tail = rx_id;
982
983 /*
984 * If the number of free RX descriptors is greater than the RX free
985 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
986 * register.
987 * Update the RDT with the value of the last processed RX descriptor
988 * minus 1, to guarantee that the RDT register is never equal to the
989 * RDH register, which creates a "full" ring situtation from the
990 * hardware point of view...
991 */
992 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
993 if (nb_hold > rxq->rx_free_thresh) {
994 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
995 "nb_hold=%u nb_rx=%u",
996 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
997 (unsigned) rx_id, (unsigned) nb_hold,
998 (unsigned) nb_rx);
999 rx_id = (uint16_t) ((rx_id == 0) ?
1000 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1001 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1002 nb_hold = 0;
1003 }
1004 rxq->nb_rx_hold = nb_hold;
1005 return nb_rx;
1006 }
1007
1008 uint16_t
eth_igb_recv_scattered_pkts(void * rx_queue,struct rte_mbuf ** rx_pkts,uint16_t nb_pkts)1009 eth_igb_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
1010 uint16_t nb_pkts)
1011 {
1012 struct igb_rx_queue *rxq;
1013 volatile union e1000_adv_rx_desc *rx_ring;
1014 volatile union e1000_adv_rx_desc *rxdp;
1015 struct igb_rx_entry *sw_ring;
1016 struct igb_rx_entry *rxe;
1017 struct rte_mbuf *first_seg;
1018 struct rte_mbuf *last_seg;
1019 struct rte_mbuf *rxm;
1020 struct rte_mbuf *nmb;
1021 union e1000_adv_rx_desc rxd;
1022 uint64_t dma; /* Physical address of mbuf data buffer */
1023 uint32_t staterr;
1024 uint32_t hlen_type_rss;
1025 uint16_t rx_id;
1026 uint16_t nb_rx;
1027 uint16_t nb_hold;
1028 uint16_t data_len;
1029 uint64_t pkt_flags;
1030
1031 nb_rx = 0;
1032 nb_hold = 0;
1033 rxq = rx_queue;
1034 rx_id = rxq->rx_tail;
1035 rx_ring = rxq->rx_ring;
1036 sw_ring = rxq->sw_ring;
1037
1038 /*
1039 * Retrieve RX context of current packet, if any.
1040 */
1041 first_seg = rxq->pkt_first_seg;
1042 last_seg = rxq->pkt_last_seg;
1043
1044 while (nb_rx < nb_pkts) {
1045 next_desc:
1046 /*
1047 * The order of operations here is important as the DD status
1048 * bit must not be read after any other descriptor fields.
1049 * rx_ring and rxdp are pointing to volatile data so the order
1050 * of accesses cannot be reordered by the compiler. If they were
1051 * not volatile, they could be reordered which could lead to
1052 * using invalid descriptor fields when read from rxd.
1053 */
1054 rxdp = &rx_ring[rx_id];
1055 staterr = rxdp->wb.upper.status_error;
1056 if (! (staterr & rte_cpu_to_le_32(E1000_RXD_STAT_DD)))
1057 break;
1058 rxd = *rxdp;
1059
1060 /*
1061 * Descriptor done.
1062 *
1063 * Allocate a new mbuf to replenish the RX ring descriptor.
1064 * If the allocation fails:
1065 * - arrange for that RX descriptor to be the first one
1066 * being parsed the next time the receive function is
1067 * invoked [on the same queue].
1068 *
1069 * - Stop parsing the RX ring and return immediately.
1070 *
1071 * This policy does not drop the packet received in the RX
1072 * descriptor for which the allocation of a new mbuf failed.
1073 * Thus, it allows that packet to be later retrieved if
1074 * mbuf have been freed in the mean time.
1075 * As a side effect, holding RX descriptors instead of
1076 * systematically giving them back to the NIC may lead to
1077 * RX ring exhaustion situations.
1078 * However, the NIC can gracefully prevent such situations
1079 * to happen by sending specific "back-pressure" flow control
1080 * frames to its peer(s).
1081 */
1082 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_id=%u "
1083 "staterr=0x%x data_len=%u",
1084 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1085 (unsigned) rx_id, (unsigned) staterr,
1086 (unsigned) rte_le_to_cpu_16(rxd.wb.upper.length));
1087
1088 nmb = rte_mbuf_raw_alloc(rxq->mb_pool);
1089 if (nmb == NULL) {
1090 PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u "
1091 "queue_id=%u", (unsigned) rxq->port_id,
1092 (unsigned) rxq->queue_id);
1093 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
1094 break;
1095 }
1096
1097 nb_hold++;
1098 rxe = &sw_ring[rx_id];
1099 rx_id++;
1100 if (rx_id == rxq->nb_rx_desc)
1101 rx_id = 0;
1102
1103 /* Prefetch next mbuf while processing current one. */
1104 rte_igb_prefetch(sw_ring[rx_id].mbuf);
1105
1106 /*
1107 * When next RX descriptor is on a cache-line boundary,
1108 * prefetch the next 4 RX descriptors and the next 8 pointers
1109 * to mbufs.
1110 */
1111 if ((rx_id & 0x3) == 0) {
1112 rte_igb_prefetch(&rx_ring[rx_id]);
1113 rte_igb_prefetch(&sw_ring[rx_id]);
1114 }
1115
1116 /*
1117 * Update RX descriptor with the physical address of the new
1118 * data buffer of the new allocated mbuf.
1119 */
1120 rxm = rxe->mbuf;
1121 rxe->mbuf = nmb;
1122 dma = rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
1123 rxdp->read.pkt_addr = dma;
1124 rxdp->read.hdr_addr = 0;
1125
1126 /*
1127 * Set data length & data buffer address of mbuf.
1128 */
1129 data_len = rte_le_to_cpu_16(rxd.wb.upper.length);
1130 rxm->data_len = data_len;
1131 rxm->data_off = RTE_PKTMBUF_HEADROOM;
1132
1133 /*
1134 * If this is the first buffer of the received packet,
1135 * set the pointer to the first mbuf of the packet and
1136 * initialize its context.
1137 * Otherwise, update the total length and the number of segments
1138 * of the current scattered packet, and update the pointer to
1139 * the last mbuf of the current packet.
1140 */
1141 if (first_seg == NULL) {
1142 first_seg = rxm;
1143 first_seg->pkt_len = data_len;
1144 first_seg->nb_segs = 1;
1145 } else {
1146 first_seg->pkt_len += data_len;
1147 first_seg->nb_segs++;
1148 last_seg->next = rxm;
1149 }
1150
1151 /*
1152 * If this is not the last buffer of the received packet,
1153 * update the pointer to the last mbuf of the current scattered
1154 * packet and continue to parse the RX ring.
1155 */
1156 if (! (staterr & E1000_RXD_STAT_EOP)) {
1157 last_seg = rxm;
1158 goto next_desc;
1159 }
1160
1161 /*
1162 * This is the last buffer of the received packet.
1163 * If the CRC is not stripped by the hardware:
1164 * - Subtract the CRC length from the total packet length.
1165 * - If the last buffer only contains the whole CRC or a part
1166 * of it, free the mbuf associated to the last buffer.
1167 * If part of the CRC is also contained in the previous
1168 * mbuf, subtract the length of that CRC part from the
1169 * data length of the previous mbuf.
1170 */
1171 rxm->next = NULL;
1172 if (unlikely(rxq->crc_len > 0)) {
1173 first_seg->pkt_len -= RTE_ETHER_CRC_LEN;
1174 if (data_len <= RTE_ETHER_CRC_LEN) {
1175 rte_pktmbuf_free_seg(rxm);
1176 first_seg->nb_segs--;
1177 last_seg->data_len = (uint16_t)
1178 (last_seg->data_len -
1179 (RTE_ETHER_CRC_LEN - data_len));
1180 last_seg->next = NULL;
1181 } else
1182 rxm->data_len = (uint16_t)
1183 (data_len - RTE_ETHER_CRC_LEN);
1184 }
1185
1186 /*
1187 * Initialize the first mbuf of the returned packet:
1188 * - RX port identifier,
1189 * - hardware offload data, if any:
1190 * - RSS flag & hash,
1191 * - IP checksum flag,
1192 * - VLAN TCI, if any,
1193 * - error flags.
1194 */
1195 first_seg->port = rxq->port_id;
1196 first_seg->hash.rss = rxd.wb.lower.hi_dword.rss;
1197
1198 /*
1199 * The vlan_tci field is only valid when RTE_MBUF_F_RX_VLAN is
1200 * set in the pkt_flags field and must be in CPU byte order.
1201 */
1202 if ((staterr & rte_cpu_to_le_32(E1000_RXDEXT_STATERR_LB)) &&
1203 (rxq->flags & IGB_RXQ_FLAG_LB_BSWAP_VLAN)) {
1204 first_seg->vlan_tci =
1205 rte_be_to_cpu_16(rxd.wb.upper.vlan);
1206 } else {
1207 first_seg->vlan_tci =
1208 rte_le_to_cpu_16(rxd.wb.upper.vlan);
1209 }
1210 hlen_type_rss = rte_le_to_cpu_32(rxd.wb.lower.lo_dword.data);
1211 pkt_flags = rx_desc_hlen_type_rss_to_pkt_flags(rxq, hlen_type_rss);
1212 pkt_flags = pkt_flags | rx_desc_status_to_pkt_flags(staterr);
1213 pkt_flags = pkt_flags | rx_desc_error_to_pkt_flags(staterr);
1214 first_seg->ol_flags = pkt_flags;
1215 first_seg->packet_type = igb_rxd_pkt_info_to_pkt_type(rxd.wb.
1216 lower.lo_dword.hs_rss.pkt_info);
1217
1218 /* Prefetch data of first segment, if configured to do so. */
1219 rte_packet_prefetch((char *)first_seg->buf_addr +
1220 first_seg->data_off);
1221
1222 /*
1223 * Store the mbuf address into the next entry of the array
1224 * of returned packets.
1225 */
1226 rx_pkts[nb_rx++] = first_seg;
1227
1228 /*
1229 * Setup receipt context for a new packet.
1230 */
1231 first_seg = NULL;
1232 }
1233
1234 /*
1235 * Record index of the next RX descriptor to probe.
1236 */
1237 rxq->rx_tail = rx_id;
1238
1239 /*
1240 * Save receive context.
1241 */
1242 rxq->pkt_first_seg = first_seg;
1243 rxq->pkt_last_seg = last_seg;
1244
1245 /*
1246 * If the number of free RX descriptors is greater than the RX free
1247 * threshold of the queue, advance the Receive Descriptor Tail (RDT)
1248 * register.
1249 * Update the RDT with the value of the last processed RX descriptor
1250 * minus 1, to guarantee that the RDT register is never equal to the
1251 * RDH register, which creates a "full" ring situtation from the
1252 * hardware point of view...
1253 */
1254 nb_hold = (uint16_t) (nb_hold + rxq->nb_rx_hold);
1255 if (nb_hold > rxq->rx_free_thresh) {
1256 PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u "
1257 "nb_hold=%u nb_rx=%u",
1258 (unsigned) rxq->port_id, (unsigned) rxq->queue_id,
1259 (unsigned) rx_id, (unsigned) nb_hold,
1260 (unsigned) nb_rx);
1261 rx_id = (uint16_t) ((rx_id == 0) ?
1262 (rxq->nb_rx_desc - 1) : (rx_id - 1));
1263 E1000_PCI_REG_WRITE(rxq->rdt_reg_addr, rx_id);
1264 nb_hold = 0;
1265 }
1266 rxq->nb_rx_hold = nb_hold;
1267 return nb_rx;
1268 }
1269
1270 /*
1271 * Maximum number of Ring Descriptors.
1272 *
1273 * Since RDLEN/TDLEN should be multiple of 128bytes, the number of ring
1274 * desscriptors should meet the following condition:
1275 * (num_ring_desc * sizeof(struct e1000_rx/tx_desc)) % 128 == 0
1276 */
1277
1278 static void
igb_tx_queue_release_mbufs(struct igb_tx_queue * txq)1279 igb_tx_queue_release_mbufs(struct igb_tx_queue *txq)
1280 {
1281 unsigned i;
1282
1283 if (txq->sw_ring != NULL) {
1284 for (i = 0; i < txq->nb_tx_desc; i++) {
1285 if (txq->sw_ring[i].mbuf != NULL) {
1286 rte_pktmbuf_free_seg(txq->sw_ring[i].mbuf);
1287 txq->sw_ring[i].mbuf = NULL;
1288 }
1289 }
1290 }
1291 }
1292
1293 static void
igb_tx_queue_release(struct igb_tx_queue * txq)1294 igb_tx_queue_release(struct igb_tx_queue *txq)
1295 {
1296 if (txq != NULL) {
1297 igb_tx_queue_release_mbufs(txq);
1298 rte_free(txq->sw_ring);
1299 rte_memzone_free(txq->mz);
1300 rte_free(txq);
1301 }
1302 }
1303
1304 void
eth_igb_tx_queue_release(struct rte_eth_dev * dev,uint16_t qid)1305 eth_igb_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1306 {
1307 igb_tx_queue_release(dev->data->tx_queues[qid]);
1308 }
1309
1310 static int
igb_tx_done_cleanup(struct igb_tx_queue * txq,uint32_t free_cnt)1311 igb_tx_done_cleanup(struct igb_tx_queue *txq, uint32_t free_cnt)
1312 {
1313 struct igb_tx_entry *sw_ring;
1314 volatile union e1000_adv_tx_desc *txr;
1315 uint16_t tx_first; /* First segment analyzed. */
1316 uint16_t tx_id; /* Current segment being processed. */
1317 uint16_t tx_last; /* Last segment in the current packet. */
1318 uint16_t tx_next; /* First segment of the next packet. */
1319 int count = 0;
1320
1321 if (!txq)
1322 return -ENODEV;
1323
1324 sw_ring = txq->sw_ring;
1325 txr = txq->tx_ring;
1326
1327 /* tx_tail is the last sent packet on the sw_ring. Goto the end
1328 * of that packet (the last segment in the packet chain) and
1329 * then the next segment will be the start of the oldest segment
1330 * in the sw_ring. This is the first packet that will be
1331 * attempted to be freed.
1332 */
1333
1334 /* Get last segment in most recently added packet. */
1335 tx_first = sw_ring[txq->tx_tail].last_id;
1336
1337 /* Get the next segment, which is the oldest segment in ring. */
1338 tx_first = sw_ring[tx_first].next_id;
1339
1340 /* Set the current index to the first. */
1341 tx_id = tx_first;
1342
1343 /* Loop through each packet. For each packet, verify that an
1344 * mbuf exists and that the last segment is free. If so, free
1345 * it and move on.
1346 */
1347 while (1) {
1348 tx_last = sw_ring[tx_id].last_id;
1349
1350 if (sw_ring[tx_last].mbuf) {
1351 if (txr[tx_last].wb.status &
1352 E1000_TXD_STAT_DD) {
1353 /* Increment the number of packets
1354 * freed.
1355 */
1356 count++;
1357
1358 /* Get the start of the next packet. */
1359 tx_next = sw_ring[tx_last].next_id;
1360
1361 /* Loop through all segments in a
1362 * packet.
1363 */
1364 do {
1365 if (sw_ring[tx_id].mbuf) {
1366 rte_pktmbuf_free_seg(
1367 sw_ring[tx_id].mbuf);
1368 sw_ring[tx_id].mbuf = NULL;
1369 sw_ring[tx_id].last_id = tx_id;
1370 }
1371
1372 /* Move to next segemnt. */
1373 tx_id = sw_ring[tx_id].next_id;
1374
1375 } while (tx_id != tx_next);
1376
1377 if (unlikely(count == (int)free_cnt))
1378 break;
1379 } else {
1380 /* mbuf still in use, nothing left to
1381 * free.
1382 */
1383 break;
1384 }
1385 } else {
1386 /* There are multiple reasons to be here:
1387 * 1) All the packets on the ring have been
1388 * freed - tx_id is equal to tx_first
1389 * and some packets have been freed.
1390 * - Done, exit
1391 * 2) Interfaces has not sent a rings worth of
1392 * packets yet, so the segment after tail is
1393 * still empty. Or a previous call to this
1394 * function freed some of the segments but
1395 * not all so there is a hole in the list.
1396 * Hopefully this is a rare case.
1397 * - Walk the list and find the next mbuf. If
1398 * there isn't one, then done.
1399 */
1400 if (likely(tx_id == tx_first && count != 0))
1401 break;
1402
1403 /* Walk the list and find the next mbuf, if any. */
1404 do {
1405 /* Move to next segemnt. */
1406 tx_id = sw_ring[tx_id].next_id;
1407
1408 if (sw_ring[tx_id].mbuf)
1409 break;
1410
1411 } while (tx_id != tx_first);
1412
1413 /* Determine why previous loop bailed. If there
1414 * is not an mbuf, done.
1415 */
1416 if (!sw_ring[tx_id].mbuf)
1417 break;
1418 }
1419 }
1420
1421 return count;
1422 }
1423
1424 int
eth_igb_tx_done_cleanup(void * txq,uint32_t free_cnt)1425 eth_igb_tx_done_cleanup(void *txq, uint32_t free_cnt)
1426 {
1427 return igb_tx_done_cleanup(txq, free_cnt);
1428 }
1429
1430 static void
igb_reset_tx_queue_stat(struct igb_tx_queue * txq)1431 igb_reset_tx_queue_stat(struct igb_tx_queue *txq)
1432 {
1433 txq->tx_head = 0;
1434 txq->tx_tail = 0;
1435 txq->ctx_curr = 0;
1436 memset((void*)&txq->ctx_cache, 0,
1437 IGB_CTX_NUM * sizeof(struct igb_advctx_info));
1438 }
1439
1440 static void
igb_reset_tx_queue(struct igb_tx_queue * txq,struct rte_eth_dev * dev)1441 igb_reset_tx_queue(struct igb_tx_queue *txq, struct rte_eth_dev *dev)
1442 {
1443 static const union e1000_adv_tx_desc zeroed_desc = {{0}};
1444 struct igb_tx_entry *txe = txq->sw_ring;
1445 uint16_t i, prev;
1446 struct e1000_hw *hw;
1447
1448 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1449 /* Zero out HW ring memory */
1450 for (i = 0; i < txq->nb_tx_desc; i++) {
1451 txq->tx_ring[i] = zeroed_desc;
1452 }
1453
1454 /* Initialize ring entries */
1455 prev = (uint16_t)(txq->nb_tx_desc - 1);
1456 for (i = 0; i < txq->nb_tx_desc; i++) {
1457 volatile union e1000_adv_tx_desc *txd = &(txq->tx_ring[i]);
1458
1459 txd->wb.status = E1000_TXD_STAT_DD;
1460 txe[i].mbuf = NULL;
1461 txe[i].last_id = i;
1462 txe[prev].next_id = i;
1463 prev = i;
1464 }
1465
1466 txq->txd_type = E1000_ADVTXD_DTYP_DATA;
1467 /* 82575 specific, each tx queue will use 2 hw contexts */
1468 if (hw->mac.type == e1000_82575)
1469 txq->ctx_start = txq->queue_id * IGB_CTX_NUM;
1470
1471 igb_reset_tx_queue_stat(txq);
1472 }
1473
1474 uint64_t
igb_get_tx_port_offloads_capa(struct rte_eth_dev * dev)1475 igb_get_tx_port_offloads_capa(struct rte_eth_dev *dev)
1476 {
1477 uint64_t tx_offload_capa;
1478
1479 RTE_SET_USED(dev);
1480 tx_offload_capa = RTE_ETH_TX_OFFLOAD_VLAN_INSERT |
1481 RTE_ETH_TX_OFFLOAD_IPV4_CKSUM |
1482 RTE_ETH_TX_OFFLOAD_UDP_CKSUM |
1483 RTE_ETH_TX_OFFLOAD_TCP_CKSUM |
1484 RTE_ETH_TX_OFFLOAD_SCTP_CKSUM |
1485 RTE_ETH_TX_OFFLOAD_TCP_TSO |
1486 RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
1487
1488 return tx_offload_capa;
1489 }
1490
1491 uint64_t
igb_get_tx_queue_offloads_capa(struct rte_eth_dev * dev)1492 igb_get_tx_queue_offloads_capa(struct rte_eth_dev *dev)
1493 {
1494 uint64_t tx_queue_offload_capa;
1495
1496 tx_queue_offload_capa = igb_get_tx_port_offloads_capa(dev);
1497
1498 return tx_queue_offload_capa;
1499 }
1500
1501 int
eth_igb_tx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_txconf * tx_conf)1502 eth_igb_tx_queue_setup(struct rte_eth_dev *dev,
1503 uint16_t queue_idx,
1504 uint16_t nb_desc,
1505 unsigned int socket_id,
1506 const struct rte_eth_txconf *tx_conf)
1507 {
1508 const struct rte_memzone *tz;
1509 struct igb_tx_queue *txq;
1510 struct e1000_hw *hw;
1511 uint32_t size;
1512 uint64_t offloads;
1513 uint64_t index;
1514
1515 offloads = tx_conf->offloads | dev->data->dev_conf.txmode.offloads;
1516
1517 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1518
1519 /*
1520 * Validate number of transmit descriptors.
1521 * It must not exceed hardware maximum, and must be multiple
1522 * of E1000_ALIGN.
1523 */
1524 if (nb_desc % IGB_TXD_ALIGN != 0 ||
1525 (nb_desc > E1000_MAX_RING_DESC) ||
1526 (nb_desc < E1000_MIN_RING_DESC)) {
1527 return -EINVAL;
1528 }
1529
1530 /*
1531 * The tx_free_thresh and tx_rs_thresh values are not used in the 1G
1532 * driver.
1533 */
1534 if (tx_conf->tx_free_thresh != 0)
1535 PMD_INIT_LOG(INFO, "The tx_free_thresh parameter is not "
1536 "used for the 1G driver.");
1537 if (tx_conf->tx_rs_thresh != 0)
1538 PMD_INIT_LOG(INFO, "The tx_rs_thresh parameter is not "
1539 "used for the 1G driver.");
1540 if (tx_conf->tx_thresh.wthresh == 0 && hw->mac.type != e1000_82576)
1541 PMD_INIT_LOG(INFO, "To improve 1G driver performance, "
1542 "consider setting the TX WTHRESH value to 4, 8, "
1543 "or 16.");
1544
1545 /* Free memory prior to re-allocation if needed */
1546 if (dev->data->tx_queues[queue_idx] != NULL) {
1547 igb_tx_queue_release(dev->data->tx_queues[queue_idx]);
1548 dev->data->tx_queues[queue_idx] = NULL;
1549 }
1550
1551 /* First allocate the tx queue data structure */
1552 txq = rte_zmalloc("ethdev TX queue", sizeof(struct igb_tx_queue),
1553 RTE_CACHE_LINE_SIZE);
1554 if (txq == NULL)
1555 return -ENOMEM;
1556
1557 /*
1558 * Allocate TX ring hardware descriptors. A memzone large enough to
1559 * handle the maximum ring size is allocated in order to allow for
1560 * resizing in later calls to the queue setup function.
1561 */
1562 size = sizeof(union e1000_adv_tx_desc) * E1000_MAX_RING_DESC;
1563 tz = rte_eth_dma_zone_reserve(dev, "tx_ring", queue_idx, size,
1564 E1000_ALIGN, socket_id);
1565 if (tz == NULL) {
1566 igb_tx_queue_release(txq);
1567 return -ENOMEM;
1568 }
1569
1570 txq->mz = tz;
1571 txq->nb_tx_desc = nb_desc;
1572 txq->pthresh = tx_conf->tx_thresh.pthresh;
1573 txq->hthresh = tx_conf->tx_thresh.hthresh;
1574 txq->wthresh = tx_conf->tx_thresh.wthresh;
1575 if (txq->wthresh > 0 && hw->mac.type == e1000_82576)
1576 txq->wthresh = 1;
1577 txq->queue_id = queue_idx;
1578 txq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1579 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1580 txq->port_id = dev->data->port_id;
1581
1582 txq->tdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_TDT(txq->reg_idx));
1583 txq->tx_ring_phys_addr = tz->iova;
1584
1585 txq->tx_ring = (union e1000_adv_tx_desc *) tz->addr;
1586
1587 index = ((uint64_t)hw->hw_addr - base_hw_addr) / 0x104000;
1588 txq->tx_ring_phys_addr = igb_gbd_addr_t_p[index];
1589 txq->tx_ring = (union e1000_adv_tx_desc *)igb_gbd_addr_t_v[index];
1590 printf("hw tx ring size: %d:%ld[0x%lx:%p]\n",
1591 size,
1592 sizeof(union e1000_adv_tx_desc),
1593 txq->tx_ring_phys_addr,
1594 txq->tx_ring);
1595
1596 /* Allocate software ring */
1597 txq->sw_ring = rte_zmalloc("txq->sw_ring",
1598 sizeof(struct igb_tx_entry) * nb_desc,
1599 RTE_CACHE_LINE_SIZE);
1600 if (txq->sw_ring == NULL) {
1601 igb_tx_queue_release(txq);
1602 return -ENOMEM;
1603 }
1604 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1605 txq->sw_ring, txq->tx_ring, txq->tx_ring_phys_addr);
1606
1607 igb_reset_tx_queue(txq, dev);
1608 dev->tx_pkt_burst = eth_igb_xmit_pkts;
1609 dev->tx_pkt_prepare = ð_igb_prep_pkts;
1610 dev->data->tx_queues[queue_idx] = txq;
1611 txq->offloads = offloads;
1612
1613 return 0;
1614 }
1615
1616 static void
igb_rx_queue_release_mbufs(struct igb_rx_queue * rxq)1617 igb_rx_queue_release_mbufs(struct igb_rx_queue *rxq)
1618 {
1619 unsigned i;
1620
1621 if (rxq->sw_ring != NULL) {
1622 for (i = 0; i < rxq->nb_rx_desc; i++) {
1623 if (rxq->sw_ring[i].mbuf != NULL) {
1624 rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
1625 rxq->sw_ring[i].mbuf = NULL;
1626 }
1627 }
1628 }
1629 }
1630
1631 static void
igb_rx_queue_release(struct igb_rx_queue * rxq)1632 igb_rx_queue_release(struct igb_rx_queue *rxq)
1633 {
1634 if (rxq != NULL) {
1635 igb_rx_queue_release_mbufs(rxq);
1636 rte_free(rxq->sw_ring);
1637 rte_memzone_free(rxq->mz);
1638 rte_free(rxq);
1639 }
1640 }
1641
1642 void
eth_igb_rx_queue_release(struct rte_eth_dev * dev,uint16_t qid)1643 eth_igb_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
1644 {
1645 igb_rx_queue_release(dev->data->rx_queues[qid]);
1646 }
1647
1648 static void
igb_reset_rx_queue(struct igb_rx_queue * rxq)1649 igb_reset_rx_queue(struct igb_rx_queue *rxq)
1650 {
1651 static const union e1000_adv_rx_desc zeroed_desc = {{0}};
1652 unsigned i;
1653
1654 /* Zero out HW ring memory */
1655 for (i = 0; i < rxq->nb_rx_desc; i++) {
1656 rxq->rx_ring[i] = zeroed_desc;
1657 }
1658
1659 rxq->rx_tail = 0;
1660 rxq->pkt_first_seg = NULL;
1661 rxq->pkt_last_seg = NULL;
1662 }
1663
1664 uint64_t
igb_get_rx_port_offloads_capa(struct rte_eth_dev * dev)1665 igb_get_rx_port_offloads_capa(struct rte_eth_dev *dev)
1666 {
1667 uint64_t rx_offload_capa;
1668 struct e1000_hw *hw;
1669
1670 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1671
1672 rx_offload_capa = RTE_ETH_RX_OFFLOAD_VLAN_STRIP |
1673 RTE_ETH_RX_OFFLOAD_VLAN_FILTER |
1674 RTE_ETH_RX_OFFLOAD_IPV4_CKSUM |
1675 RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
1676 RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
1677 RTE_ETH_RX_OFFLOAD_KEEP_CRC |
1678 RTE_ETH_RX_OFFLOAD_SCATTER |
1679 RTE_ETH_RX_OFFLOAD_RSS_HASH;
1680
1681 if (hw->mac.type == e1000_i350 ||
1682 hw->mac.type == e1000_i210 ||
1683 hw->mac.type == e1000_i211)
1684 rx_offload_capa |= RTE_ETH_RX_OFFLOAD_VLAN_EXTEND;
1685
1686 return rx_offload_capa;
1687 }
1688
1689 uint64_t
igb_get_rx_queue_offloads_capa(struct rte_eth_dev * dev)1690 igb_get_rx_queue_offloads_capa(struct rte_eth_dev *dev)
1691 {
1692 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1693 uint64_t rx_queue_offload_capa;
1694
1695 switch (hw->mac.type) {
1696 case e1000_vfadapt_i350:
1697 /*
1698 * As only one Rx queue can be used, let per queue offloading
1699 * capability be same to per port queue offloading capability
1700 * for better convenience.
1701 */
1702 rx_queue_offload_capa = igb_get_rx_port_offloads_capa(dev);
1703 break;
1704 default:
1705 rx_queue_offload_capa = 0;
1706 }
1707 return rx_queue_offload_capa;
1708 }
1709
1710 int
eth_igb_rx_queue_setup(struct rte_eth_dev * dev,uint16_t queue_idx,uint16_t nb_desc,unsigned int socket_id,const struct rte_eth_rxconf * rx_conf,struct rte_mempool * mp)1711 eth_igb_rx_queue_setup(struct rte_eth_dev *dev,
1712 uint16_t queue_idx,
1713 uint16_t nb_desc,
1714 unsigned int socket_id,
1715 const struct rte_eth_rxconf *rx_conf,
1716 struct rte_mempool *mp)
1717 {
1718 const struct rte_memzone *rz;
1719 struct igb_rx_queue *rxq;
1720 struct e1000_hw *hw;
1721 unsigned int size;
1722 uint64_t offloads;
1723 uint64_t index;
1724
1725 offloads = rx_conf->offloads | dev->data->dev_conf.rxmode.offloads;
1726
1727 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1728
1729 /*
1730 * Validate number of receive descriptors.
1731 * It must not exceed hardware maximum, and must be multiple
1732 * of E1000_ALIGN.
1733 */
1734 if (nb_desc % IGB_RXD_ALIGN != 0 ||
1735 (nb_desc > E1000_MAX_RING_DESC) ||
1736 (nb_desc < E1000_MIN_RING_DESC)) {
1737 return -EINVAL;
1738 }
1739
1740 /* Free memory prior to re-allocation if needed */
1741 if (dev->data->rx_queues[queue_idx] != NULL) {
1742 igb_rx_queue_release(dev->data->rx_queues[queue_idx]);
1743 dev->data->rx_queues[queue_idx] = NULL;
1744 }
1745
1746 /* First allocate the RX queue data structure. */
1747 rxq = rte_zmalloc("ethdev RX queue", sizeof(struct igb_rx_queue),
1748 RTE_CACHE_LINE_SIZE);
1749 if (rxq == NULL)
1750 return -ENOMEM;
1751 rxq->offloads = offloads;
1752 rxq->mb_pool = mp;
1753 rxq->nb_rx_desc = nb_desc;
1754 rxq->pthresh = rx_conf->rx_thresh.pthresh;
1755 rxq->hthresh = rx_conf->rx_thresh.hthresh;
1756 rxq->wthresh = rx_conf->rx_thresh.wthresh;
1757 if (rxq->wthresh > 0 &&
1758 (hw->mac.type == e1000_82576 || hw->mac.type == e1000_vfadapt_i350))
1759 rxq->wthresh = 1;
1760 rxq->drop_en = rx_conf->rx_drop_en;
1761 rxq->rx_free_thresh = rx_conf->rx_free_thresh;
1762 rxq->queue_id = queue_idx;
1763 rxq->reg_idx = (uint16_t)((RTE_ETH_DEV_SRIOV(dev).active == 0) ?
1764 queue_idx : RTE_ETH_DEV_SRIOV(dev).def_pool_q_idx + queue_idx);
1765 rxq->port_id = dev->data->port_id;
1766 if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
1767 rxq->crc_len = RTE_ETHER_CRC_LEN;
1768 else
1769 rxq->crc_len = 0;
1770
1771 /*
1772 * Allocate RX ring hardware descriptors. A memzone large enough to
1773 * handle the maximum ring size is allocated in order to allow for
1774 * resizing in later calls to the queue setup function.
1775 */
1776 size = sizeof(union e1000_adv_rx_desc) * E1000_MAX_RING_DESC;
1777 rz = rte_eth_dma_zone_reserve(dev, "rx_ring", queue_idx, size,
1778 E1000_ALIGN, socket_id);
1779 if (rz == NULL) {
1780 igb_rx_queue_release(rxq);
1781 return -ENOMEM;
1782 }
1783
1784 rxq->mz = rz;
1785 rxq->rdt_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDT(rxq->reg_idx));
1786 rxq->rdh_reg_addr = E1000_PCI_REG_ADDR(hw, E1000_RDH(rxq->reg_idx));
1787 rxq->rx_ring_phys_addr = rz->iova;
1788 rxq->rx_ring = (union e1000_adv_rx_desc *) rz->addr;
1789
1790 index = ((uint64_t)hw->hw_addr - base_hw_addr) / 0x104000;
1791 rxq->rx_ring_phys_addr = igb_gbd_addr_r_p[index];
1792 rxq->rx_ring = (union e1000_adv_rx_desc *)igb_gbd_addr_r_v[index];
1793 printf("hw rx ring size: %d:%ld[0x%lx:%p]\n",
1794 size,
1795 sizeof(union e1000_adv_rx_desc),
1796 rxq->rx_ring_phys_addr,
1797 rxq->rx_ring);
1798
1799 /* Allocate software ring. */
1800 rxq->sw_ring = rte_zmalloc("rxq->sw_ring",
1801 sizeof(struct igb_rx_entry) * nb_desc,
1802 RTE_CACHE_LINE_SIZE);
1803 if (rxq->sw_ring == NULL) {
1804 igb_rx_queue_release(rxq);
1805 return -ENOMEM;
1806 }
1807 PMD_INIT_LOG(DEBUG, "sw_ring=%p hw_ring=%p dma_addr=0x%"PRIx64,
1808 rxq->sw_ring, rxq->rx_ring, rxq->rx_ring_phys_addr);
1809
1810 dev->data->rx_queues[queue_idx] = rxq;
1811 igb_reset_rx_queue(rxq);
1812
1813 return 0;
1814 }
1815
1816 uint32_t
eth_igb_rx_queue_count(void * rx_queue)1817 eth_igb_rx_queue_count(void *rx_queue)
1818 {
1819 #define IGB_RXQ_SCAN_INTERVAL 4
1820 volatile union e1000_adv_rx_desc *rxdp;
1821 struct igb_rx_queue *rxq;
1822 uint32_t desc = 0;
1823
1824 rxq = rx_queue;
1825 rxdp = &(rxq->rx_ring[rxq->rx_tail]);
1826
1827 while ((desc < rxq->nb_rx_desc) &&
1828 (rxdp->wb.upper.status_error & E1000_RXD_STAT_DD)) {
1829 desc += IGB_RXQ_SCAN_INTERVAL;
1830 rxdp += IGB_RXQ_SCAN_INTERVAL;
1831 if (rxq->rx_tail + desc >= rxq->nb_rx_desc)
1832 rxdp = &(rxq->rx_ring[rxq->rx_tail +
1833 desc - rxq->nb_rx_desc]);
1834 }
1835
1836 return desc;
1837 }
1838
1839 int
eth_igb_rx_descriptor_status(void * rx_queue,uint16_t offset)1840 eth_igb_rx_descriptor_status(void *rx_queue, uint16_t offset)
1841 {
1842 struct igb_rx_queue *rxq = rx_queue;
1843 volatile uint32_t *status;
1844 uint32_t desc;
1845
1846 if (unlikely(offset >= rxq->nb_rx_desc))
1847 return -EINVAL;
1848
1849 if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold)
1850 return RTE_ETH_RX_DESC_UNAVAIL;
1851
1852 desc = rxq->rx_tail + offset;
1853 if (desc >= rxq->nb_rx_desc)
1854 desc -= rxq->nb_rx_desc;
1855
1856 status = &rxq->rx_ring[desc].wb.upper.status_error;
1857 if (*status & rte_cpu_to_le_32(E1000_RXD_STAT_DD))
1858 return RTE_ETH_RX_DESC_DONE;
1859
1860 return RTE_ETH_RX_DESC_AVAIL;
1861 }
1862
1863 int
eth_igb_tx_descriptor_status(void * tx_queue,uint16_t offset)1864 eth_igb_tx_descriptor_status(void *tx_queue, uint16_t offset)
1865 {
1866 struct igb_tx_queue *txq = tx_queue;
1867 volatile uint32_t *status;
1868 uint32_t desc;
1869
1870 if (unlikely(offset >= txq->nb_tx_desc))
1871 return -EINVAL;
1872
1873 desc = txq->tx_tail + offset;
1874 if (desc >= txq->nb_tx_desc)
1875 desc -= txq->nb_tx_desc;
1876
1877 status = &txq->tx_ring[desc].wb.status;
1878 if (*status & rte_cpu_to_le_32(E1000_TXD_STAT_DD))
1879 return RTE_ETH_TX_DESC_DONE;
1880
1881 return RTE_ETH_TX_DESC_FULL;
1882 }
1883
1884 void
igb_dev_clear_queues(struct rte_eth_dev * dev)1885 igb_dev_clear_queues(struct rte_eth_dev *dev)
1886 {
1887 uint16_t i;
1888 struct igb_tx_queue *txq;
1889 struct igb_rx_queue *rxq;
1890
1891 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1892 txq = dev->data->tx_queues[i];
1893 if (txq != NULL) {
1894 igb_tx_queue_release_mbufs(txq);
1895 igb_reset_tx_queue(txq, dev);
1896 }
1897 }
1898
1899 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1900 rxq = dev->data->rx_queues[i];
1901 if (rxq != NULL) {
1902 igb_rx_queue_release_mbufs(rxq);
1903 igb_reset_rx_queue(rxq);
1904 }
1905 }
1906 }
1907
1908 void
igb_dev_free_queues(struct rte_eth_dev * dev)1909 igb_dev_free_queues(struct rte_eth_dev *dev)
1910 {
1911 uint16_t i;
1912
1913 for (i = 0; i < dev->data->nb_rx_queues; i++) {
1914 eth_igb_rx_queue_release(dev, i);
1915 dev->data->rx_queues[i] = NULL;
1916 }
1917 dev->data->nb_rx_queues = 0;
1918
1919 for (i = 0; i < dev->data->nb_tx_queues; i++) {
1920 eth_igb_tx_queue_release(dev, i);
1921 dev->data->tx_queues[i] = NULL;
1922 }
1923 dev->data->nb_tx_queues = 0;
1924 }
1925
1926 /**
1927 * Receive Side Scaling (RSS).
1928 * See section 7.1.1.7 in the following document:
1929 * "Intel 82576 GbE Controller Datasheet" - Revision 2.45 October 2009
1930 *
1931 * Principles:
1932 * The source and destination IP addresses of the IP header and the source and
1933 * destination ports of TCP/UDP headers, if any, of received packets are hashed
1934 * against a configurable random key to compute a 32-bit RSS hash result.
1935 * The seven (7) LSBs of the 32-bit hash result are used as an index into a
1936 * 128-entry redirection table (RETA). Each entry of the RETA provides a 3-bit
1937 * RSS output index which is used as the RX queue index where to store the
1938 * received packets.
1939 * The following output is supplied in the RX write-back descriptor:
1940 * - 32-bit result of the Microsoft RSS hash function,
1941 * - 4-bit RSS type field.
1942 */
1943
1944 /*
1945 * RSS random key supplied in section 7.1.1.7.3 of the Intel 82576 datasheet.
1946 * Used as the default key.
1947 */
1948 static uint8_t rss_intel_key[40] = {
1949 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2,
1950 0x41, 0x67, 0x25, 0x3D, 0x43, 0xA3, 0x8F, 0xB0,
1951 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
1952 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C,
1953 0x6A, 0x42, 0xB7, 0x3B, 0xBE, 0xAC, 0x01, 0xFA,
1954 };
1955
1956 static void
igb_rss_disable(struct rte_eth_dev * dev)1957 igb_rss_disable(struct rte_eth_dev *dev)
1958 {
1959 struct e1000_hw *hw;
1960 uint32_t mrqc;
1961
1962 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
1963 mrqc = E1000_READ_REG(hw, E1000_MRQC);
1964 mrqc &= ~E1000_MRQC_ENABLE_MASK;
1965 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
1966 }
1967
1968 static void
igb_hw_rss_hash_set(struct e1000_hw * hw,struct rte_eth_rss_conf * rss_conf)1969 igb_hw_rss_hash_set(struct e1000_hw *hw, struct rte_eth_rss_conf *rss_conf)
1970 {
1971 uint8_t *hash_key;
1972 uint32_t rss_key;
1973 uint32_t mrqc;
1974 uint64_t rss_hf;
1975 uint16_t i;
1976
1977 hash_key = rss_conf->rss_key;
1978 if (hash_key != NULL) {
1979 /* Fill in RSS hash key */
1980 for (i = 0; i < 10; i++) {
1981 rss_key = hash_key[(i * 4)];
1982 rss_key |= hash_key[(i * 4) + 1] << 8;
1983 rss_key |= hash_key[(i * 4) + 2] << 16;
1984 rss_key |= hash_key[(i * 4) + 3] << 24;
1985 E1000_WRITE_REG_ARRAY(hw, E1000_RSSRK(0), i, rss_key);
1986 }
1987 }
1988
1989 /* Set configured hashing protocols in MRQC register */
1990 rss_hf = rss_conf->rss_hf;
1991 mrqc = E1000_MRQC_ENABLE_RSS_4Q; /* RSS enabled. */
1992 if (rss_hf & RTE_ETH_RSS_IPV4)
1993 mrqc |= E1000_MRQC_RSS_FIELD_IPV4;
1994 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_TCP)
1995 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_TCP;
1996 if (rss_hf & RTE_ETH_RSS_IPV6)
1997 mrqc |= E1000_MRQC_RSS_FIELD_IPV6;
1998 if (rss_hf & RTE_ETH_RSS_IPV6_EX)
1999 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_EX;
2000 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_TCP)
2001 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP;
2002 if (rss_hf & RTE_ETH_RSS_IPV6_TCP_EX)
2003 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2004 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV4_UDP)
2005 mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP;
2006 if (rss_hf & RTE_ETH_RSS_NONFRAG_IPV6_UDP)
2007 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP;
2008 if (rss_hf & RTE_ETH_RSS_IPV6_UDP_EX)
2009 mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP_EX;
2010 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2011 }
2012
2013 int
eth_igb_rss_hash_update(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)2014 eth_igb_rss_hash_update(struct rte_eth_dev *dev,
2015 struct rte_eth_rss_conf *rss_conf)
2016 {
2017 struct e1000_hw *hw;
2018 uint32_t mrqc;
2019 uint64_t rss_hf;
2020
2021 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2022
2023 /*
2024 * Before changing anything, first check that the update RSS operation
2025 * does not attempt to disable RSS, if RSS was enabled at
2026 * initialization time, or does not attempt to enable RSS, if RSS was
2027 * disabled at initialization time.
2028 */
2029 rss_hf = rss_conf->rss_hf & IGB_RSS_OFFLOAD_ALL;
2030 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2031 if (!(mrqc & E1000_MRQC_ENABLE_MASK)) { /* RSS disabled */
2032 if (rss_hf != 0) /* Enable RSS */
2033 return -(EINVAL);
2034 return 0; /* Nothing to do */
2035 }
2036 /* RSS enabled */
2037 if (rss_hf == 0) /* Disable RSS */
2038 return -(EINVAL);
2039 igb_hw_rss_hash_set(hw, rss_conf);
2040 return 0;
2041 }
2042
eth_igb_rss_hash_conf_get(struct rte_eth_dev * dev,struct rte_eth_rss_conf * rss_conf)2043 int eth_igb_rss_hash_conf_get(struct rte_eth_dev *dev,
2044 struct rte_eth_rss_conf *rss_conf)
2045 {
2046 struct e1000_hw *hw;
2047 uint8_t *hash_key;
2048 uint32_t rss_key;
2049 uint32_t mrqc;
2050 uint64_t rss_hf;
2051 uint16_t i;
2052
2053 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2054 hash_key = rss_conf->rss_key;
2055 if (hash_key != NULL) {
2056 /* Return RSS hash key */
2057 for (i = 0; i < 10; i++) {
2058 rss_key = E1000_READ_REG_ARRAY(hw, E1000_RSSRK(0), i);
2059 hash_key[(i * 4)] = rss_key & 0x000000FF;
2060 hash_key[(i * 4) + 1] = (rss_key >> 8) & 0x000000FF;
2061 hash_key[(i * 4) + 2] = (rss_key >> 16) & 0x000000FF;
2062 hash_key[(i * 4) + 3] = (rss_key >> 24) & 0x000000FF;
2063 }
2064 }
2065
2066 /* Get RSS functions configured in MRQC register */
2067 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2068 if ((mrqc & E1000_MRQC_ENABLE_RSS_4Q) == 0) { /* RSS is disabled */
2069 rss_conf->rss_hf = 0;
2070 return 0;
2071 }
2072 rss_hf = 0;
2073 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4)
2074 rss_hf |= RTE_ETH_RSS_IPV4;
2075 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_TCP)
2076 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_TCP;
2077 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6)
2078 rss_hf |= RTE_ETH_RSS_IPV6;
2079 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_EX)
2080 rss_hf |= RTE_ETH_RSS_IPV6_EX;
2081 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP)
2082 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_TCP;
2083 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_TCP_EX)
2084 rss_hf |= RTE_ETH_RSS_IPV6_TCP_EX;
2085 if (mrqc & E1000_MRQC_RSS_FIELD_IPV4_UDP)
2086 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV4_UDP;
2087 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP)
2088 rss_hf |= RTE_ETH_RSS_NONFRAG_IPV6_UDP;
2089 if (mrqc & E1000_MRQC_RSS_FIELD_IPV6_UDP_EX)
2090 rss_hf |= RTE_ETH_RSS_IPV6_UDP_EX;
2091 rss_conf->rss_hf = rss_hf;
2092 return 0;
2093 }
2094
2095 static void
igb_rss_configure(struct rte_eth_dev * dev)2096 igb_rss_configure(struct rte_eth_dev *dev)
2097 {
2098 struct rte_eth_rss_conf rss_conf;
2099 struct e1000_hw *hw;
2100 uint32_t shift;
2101 uint16_t i;
2102
2103 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2104
2105 /* Fill in redirection table. */
2106 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2107 for (i = 0; i < 128; i++) {
2108 union e1000_reta {
2109 uint32_t dword;
2110 uint8_t bytes[4];
2111 } reta;
2112 uint8_t q_idx;
2113
2114 q_idx = (uint8_t) ((dev->data->nb_rx_queues > 1) ?
2115 i % dev->data->nb_rx_queues : 0);
2116 reta.bytes[i & 3] = (uint8_t) (q_idx << shift);
2117 if ((i & 3) == 3)
2118 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2119 }
2120
2121 /*
2122 * Configure the RSS key and the RSS protocols used to compute
2123 * the RSS hash of input packets.
2124 */
2125 rss_conf = dev->data->dev_conf.rx_adv_conf.rss_conf;
2126 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2127 igb_rss_disable(dev);
2128 return;
2129 }
2130 if (rss_conf.rss_key == NULL)
2131 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2132 igb_hw_rss_hash_set(hw, &rss_conf);
2133 }
2134
2135 /*
2136 * Check if the mac type support VMDq or not.
2137 * Return 1 if it supports, otherwise, return 0.
2138 */
2139 static int
igb_is_vmdq_supported(const struct rte_eth_dev * dev)2140 igb_is_vmdq_supported(const struct rte_eth_dev *dev)
2141 {
2142 const struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2143
2144 switch (hw->mac.type) {
2145 case e1000_82576:
2146 case e1000_82580:
2147 case e1000_i350:
2148 return 1;
2149 case e1000_82540:
2150 case e1000_82541:
2151 case e1000_82542:
2152 case e1000_82543:
2153 case e1000_82544:
2154 case e1000_82545:
2155 case e1000_82546:
2156 case e1000_82547:
2157 case e1000_82571:
2158 case e1000_82572:
2159 case e1000_82573:
2160 case e1000_82574:
2161 case e1000_82583:
2162 case e1000_i210:
2163 case e1000_i211:
2164 default:
2165 PMD_INIT_LOG(ERR, "Cannot support VMDq feature");
2166 return 0;
2167 }
2168 }
2169
2170 static int
igb_vmdq_rx_hw_configure(struct rte_eth_dev * dev)2171 igb_vmdq_rx_hw_configure(struct rte_eth_dev *dev)
2172 {
2173 struct rte_eth_vmdq_rx_conf *cfg;
2174 struct e1000_hw *hw;
2175 uint32_t mrqc, vt_ctl, vmolr, rctl;
2176 int i;
2177
2178 PMD_INIT_FUNC_TRACE();
2179
2180 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2181 cfg = &dev->data->dev_conf.rx_adv_conf.vmdq_rx_conf;
2182
2183 /* Check if mac type can support VMDq, return value of 0 means NOT support */
2184 if (igb_is_vmdq_supported(dev) == 0)
2185 return -1;
2186
2187 igb_rss_disable(dev);
2188
2189 /* RCTL: eanble VLAN filter */
2190 rctl = E1000_READ_REG(hw, E1000_RCTL);
2191 rctl |= E1000_RCTL_VFE;
2192 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2193
2194 /* MRQC: enable vmdq */
2195 mrqc = E1000_READ_REG(hw, E1000_MRQC);
2196 mrqc |= E1000_MRQC_ENABLE_VMDQ;
2197 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2198
2199 /* VTCTL: pool selection according to VLAN tag */
2200 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL);
2201 if (cfg->enable_default_pool)
2202 vt_ctl |= (cfg->default_pool << E1000_VT_CTL_DEFAULT_POOL_SHIFT);
2203 vt_ctl |= E1000_VT_CTL_IGNORE_MAC;
2204 E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl);
2205
2206 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2207 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2208 vmolr &= ~(E1000_VMOLR_AUPE | E1000_VMOLR_ROMPE |
2209 E1000_VMOLR_ROPE | E1000_VMOLR_BAM |
2210 E1000_VMOLR_MPME);
2211
2212 if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_UNTAG)
2213 vmolr |= E1000_VMOLR_AUPE;
2214 if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_HASH_MC)
2215 vmolr |= E1000_VMOLR_ROMPE;
2216 if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_HASH_UC)
2217 vmolr |= E1000_VMOLR_ROPE;
2218 if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_BROADCAST)
2219 vmolr |= E1000_VMOLR_BAM;
2220 if (cfg->rx_mode & RTE_ETH_VMDQ_ACCEPT_MULTICAST)
2221 vmolr |= E1000_VMOLR_MPME;
2222
2223 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2224 }
2225
2226 /*
2227 * VMOLR: set STRVLAN as 1 if IGMAC in VTCTL is set as 1
2228 * Both 82576 and 82580 support it
2229 */
2230 if (hw->mac.type != e1000_i350) {
2231 for (i = 0; i < E1000_VMOLR_SIZE; i++) {
2232 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i));
2233 vmolr |= E1000_VMOLR_STRVLAN;
2234 E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr);
2235 }
2236 }
2237
2238 /* VFTA - enable all vlan filters */
2239 for (i = 0; i < IGB_VFTA_SIZE; i++)
2240 E1000_WRITE_REG(hw, (E1000_VFTA+(i*4)), UINT32_MAX);
2241
2242 /* VFRE: 8 pools enabling for rx, both 82576 and i350 support it */
2243 if (hw->mac.type != e1000_82580)
2244 E1000_WRITE_REG(hw, E1000_VFRE, E1000_MBVFICR_VFREQ_MASK);
2245
2246 /*
2247 * RAH/RAL - allow pools to read specific mac addresses
2248 * In this case, all pools should be able to read from mac addr 0
2249 */
2250 E1000_WRITE_REG(hw, E1000_RAH(0), (E1000_RAH_AV | UINT16_MAX));
2251 E1000_WRITE_REG(hw, E1000_RAL(0), UINT32_MAX);
2252
2253 /* VLVF: set up filters for vlan tags as configured */
2254 for (i = 0; i < cfg->nb_pool_maps; i++) {
2255 /* set vlan id in VF register and set the valid bit */
2256 E1000_WRITE_REG(hw, E1000_VLVF(i), (E1000_VLVF_VLANID_ENABLE |
2257 (cfg->pool_map[i].vlan_id & RTE_ETH_VLAN_ID_MAX) |
2258 ((cfg->pool_map[i].pools << E1000_VLVF_POOLSEL_SHIFT) &
2259 E1000_VLVF_POOLSEL_MASK)));
2260 }
2261
2262 E1000_WRITE_FLUSH(hw);
2263
2264 return 0;
2265 }
2266
2267
2268 /*********************************************************************
2269 *
2270 * Enable receive unit.
2271 *
2272 **********************************************************************/
2273
2274 static int
igb_alloc_rx_queue_mbufs(struct igb_rx_queue * rxq)2275 igb_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
2276 {
2277 struct igb_rx_entry *rxe = rxq->sw_ring;
2278 uint64_t dma_addr;
2279 unsigned i;
2280
2281 /* Initialize software ring entries. */
2282 for (i = 0; i < rxq->nb_rx_desc; i++) {
2283 volatile union e1000_adv_rx_desc *rxd;
2284 struct rte_mbuf *mbuf = rte_mbuf_raw_alloc(rxq->mb_pool);
2285
2286 if (mbuf == NULL) {
2287 PMD_INIT_LOG(ERR, "RX mbuf alloc failed "
2288 "queue_id=%hu", rxq->queue_id);
2289 return -ENOMEM;
2290 }
2291 dma_addr =
2292 rte_cpu_to_le_64(rte_mbuf_data_iova_default(mbuf));
2293 rxd = &rxq->rx_ring[i];
2294 rxd->read.hdr_addr = 0;
2295 rxd->read.pkt_addr = dma_addr;
2296 rxe[i].mbuf = mbuf;
2297 }
2298
2299 return 0;
2300 }
2301
2302 #define E1000_MRQC_DEF_Q_SHIFT (3)
2303 static int
igb_dev_mq_rx_configure(struct rte_eth_dev * dev)2304 igb_dev_mq_rx_configure(struct rte_eth_dev *dev)
2305 {
2306 struct e1000_hw *hw =
2307 E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2308 uint32_t mrqc;
2309
2310 if (RTE_ETH_DEV_SRIOV(dev).active == RTE_ETH_8_POOLS) {
2311 /*
2312 * SRIOV active scheme
2313 * FIXME if support RSS together with VMDq & SRIOV
2314 */
2315 mrqc = E1000_MRQC_ENABLE_VMDQ;
2316 /* 011b Def_Q ignore, according to VT_CTL.DEF_PL */
2317 mrqc |= 0x3 << E1000_MRQC_DEF_Q_SHIFT;
2318 E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
2319 } else if(RTE_ETH_DEV_SRIOV(dev).active == 0) {
2320 /*
2321 * SRIOV inactive scheme
2322 */
2323 switch (dev->data->dev_conf.rxmode.mq_mode) {
2324 case RTE_ETH_MQ_RX_RSS:
2325 igb_rss_configure(dev);
2326 break;
2327 case RTE_ETH_MQ_RX_VMDQ_ONLY:
2328 /*Configure general VMDQ only RX parameters*/
2329 igb_vmdq_rx_hw_configure(dev);
2330 break;
2331 case RTE_ETH_MQ_RX_NONE:
2332 /* if mq_mode is none, disable rss mode.*/
2333 default:
2334 igb_rss_disable(dev);
2335 break;
2336 }
2337 }
2338
2339 return 0;
2340 }
2341
2342 int
eth_igb_rx_init(struct rte_eth_dev * dev)2343 eth_igb_rx_init(struct rte_eth_dev *dev)
2344 {
2345 struct rte_eth_rxmode *rxmode;
2346 struct e1000_hw *hw;
2347 struct igb_rx_queue *rxq;
2348 uint32_t rctl;
2349 uint32_t rxcsum;
2350 uint32_t srrctl;
2351 uint16_t buf_size;
2352 uint16_t rctl_bsize;
2353 uint32_t max_len;
2354 uint16_t i;
2355 int ret;
2356
2357 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2358 srrctl = 0;
2359
2360 /*
2361 * Make sure receives are disabled while setting
2362 * up the descriptor ring.
2363 */
2364 rctl = E1000_READ_REG(hw, E1000_RCTL);
2365 E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
2366
2367 rxmode = &dev->data->dev_conf.rxmode;
2368
2369 /*
2370 * Configure support of jumbo frames, if any.
2371 */
2372 max_len = dev->data->mtu + E1000_ETH_OVERHEAD;
2373 if (dev->data->mtu > RTE_ETHER_MTU) {
2374 rctl |= E1000_RCTL_LPE;
2375
2376 /*
2377 * Set maximum packet length by default, and might be updated
2378 * together with enabling/disabling dual VLAN.
2379 */
2380 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_VLAN_EXTEND)
2381 max_len += VLAN_TAG_SIZE;
2382
2383 E1000_WRITE_REG(hw, E1000_RLPML, max_len);
2384 } else
2385 rctl &= ~E1000_RCTL_LPE;
2386
2387 /* Configure and enable each RX queue. */
2388 rctl_bsize = 0;
2389 dev->rx_pkt_burst = eth_igb_recv_pkts;
2390 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2391 uint64_t bus_addr;
2392 uint32_t rxdctl;
2393
2394 rxq = dev->data->rx_queues[i];
2395
2396 rxq->flags = 0;
2397 /*
2398 * i350 and i354 vlan packets have vlan tags byte swapped.
2399 */
2400 if (hw->mac.type == e1000_i350 || hw->mac.type == e1000_i354) {
2401 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2402 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2403 } else {
2404 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2405 }
2406
2407 /* Allocate buffers for descriptor rings and set up queue */
2408 ret = igb_alloc_rx_queue_mbufs(rxq);
2409 if (ret)
2410 return ret;
2411
2412 /*
2413 * Reset crc_len in case it was changed after queue setup by a
2414 * call to configure
2415 */
2416 if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC)
2417 rxq->crc_len = RTE_ETHER_CRC_LEN;
2418 else
2419 rxq->crc_len = 0;
2420
2421 bus_addr = rxq->rx_ring_phys_addr;
2422 E1000_WRITE_REG(hw, E1000_RDLEN(rxq->reg_idx),
2423 rxq->nb_rx_desc *
2424 sizeof(union e1000_adv_rx_desc));
2425 E1000_WRITE_REG(hw, E1000_RDBAH(rxq->reg_idx),
2426 (uint32_t)(bus_addr >> 32));
2427 E1000_WRITE_REG(hw, E1000_RDBAL(rxq->reg_idx), (uint32_t)bus_addr);
2428
2429 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2430
2431 /*
2432 * Configure RX buffer size.
2433 */
2434 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2435 RTE_PKTMBUF_HEADROOM);
2436 if (buf_size >= 1024) {
2437 /*
2438 * Configure the BSIZEPACKET field of the SRRCTL
2439 * register of the queue.
2440 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2441 * If this field is equal to 0b, then RCTL.BSIZE
2442 * determines the RX packet buffer size.
2443 */
2444 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2445 E1000_SRRCTL_BSIZEPKT_MASK);
2446 buf_size = (uint16_t) ((srrctl &
2447 E1000_SRRCTL_BSIZEPKT_MASK) <<
2448 E1000_SRRCTL_BSIZEPKT_SHIFT);
2449
2450 /* It adds dual VLAN length for supporting dual VLAN */
2451 if ((max_len + 2 * VLAN_TAG_SIZE) > buf_size) {
2452 if (!dev->data->scattered_rx)
2453 PMD_INIT_LOG(DEBUG,
2454 "forcing scatter mode");
2455 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2456 dev->data->scattered_rx = 1;
2457 }
2458 } else {
2459 /*
2460 * Use BSIZE field of the device RCTL register.
2461 */
2462 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2463 rctl_bsize = buf_size;
2464 if (!dev->data->scattered_rx)
2465 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2466 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2467 dev->data->scattered_rx = 1;
2468 }
2469
2470 /* Set if packets are dropped when no descriptors available */
2471 if (rxq->drop_en)
2472 srrctl |= E1000_SRRCTL_DROP_EN;
2473
2474 E1000_WRITE_REG(hw, E1000_SRRCTL(rxq->reg_idx), srrctl);
2475
2476 /* Enable this RX queue. */
2477 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rxq->reg_idx));
2478 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2479 rxdctl &= 0xFFF00000;
2480 rxdctl |= (rxq->pthresh & 0x1F);
2481 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2482 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2483 E1000_WRITE_REG(hw, E1000_RXDCTL(rxq->reg_idx), rxdctl);
2484 }
2485
2486 if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) {
2487 if (!dev->data->scattered_rx)
2488 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2489 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2490 dev->data->scattered_rx = 1;
2491 }
2492
2493 /*
2494 * Setup BSIZE field of RCTL register, if needed.
2495 * Buffer sizes >= 1024 are not [supposed to be] setup in the RCTL
2496 * register, since the code above configures the SRRCTL register of
2497 * the RX queue in such a case.
2498 * All configurable sizes are:
2499 * 16384: rctl |= (E1000_RCTL_SZ_16384 | E1000_RCTL_BSEX);
2500 * 8192: rctl |= (E1000_RCTL_SZ_8192 | E1000_RCTL_BSEX);
2501 * 4096: rctl |= (E1000_RCTL_SZ_4096 | E1000_RCTL_BSEX);
2502 * 2048: rctl |= E1000_RCTL_SZ_2048;
2503 * 1024: rctl |= E1000_RCTL_SZ_1024;
2504 * 512: rctl |= E1000_RCTL_SZ_512;
2505 * 256: rctl |= E1000_RCTL_SZ_256;
2506 */
2507 if (rctl_bsize > 0) {
2508 if (rctl_bsize >= 512) /* 512 <= buf_size < 1024 - use 512 */
2509 rctl |= E1000_RCTL_SZ_512;
2510 else /* 256 <= buf_size < 512 - use 256 */
2511 rctl |= E1000_RCTL_SZ_256;
2512 }
2513
2514 /*
2515 * Configure RSS if device configured with multiple RX queues.
2516 */
2517 igb_dev_mq_rx_configure(dev);
2518
2519 /* Update the rctl since igb_dev_mq_rx_configure may change its value */
2520 rctl |= E1000_READ_REG(hw, E1000_RCTL);
2521
2522 /*
2523 * Setup the Checksum Register.
2524 * Receive Full-Packet Checksum Offload is mutually exclusive with RSS.
2525 */
2526 rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
2527 rxcsum |= E1000_RXCSUM_PCSD;
2528
2529 /* Enable both L3/L4 rx checksum offload */
2530 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_IPV4_CKSUM)
2531 rxcsum |= E1000_RXCSUM_IPOFL;
2532 else
2533 rxcsum &= ~E1000_RXCSUM_IPOFL;
2534 if (rxmode->offloads &
2535 (RTE_ETH_RX_OFFLOAD_TCP_CKSUM | RTE_ETH_RX_OFFLOAD_UDP_CKSUM))
2536 rxcsum |= E1000_RXCSUM_TUOFL;
2537 else
2538 rxcsum &= ~E1000_RXCSUM_TUOFL;
2539 if (rxmode->offloads & RTE_ETH_RX_OFFLOAD_CHECKSUM)
2540 rxcsum |= E1000_RXCSUM_CRCOFL;
2541 else
2542 rxcsum &= ~E1000_RXCSUM_CRCOFL;
2543
2544 E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
2545
2546 /* Setup the Receive Control Register. */
2547 if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_KEEP_CRC) {
2548 rctl &= ~E1000_RCTL_SECRC; /* Do not Strip Ethernet CRC. */
2549
2550 /* clear STRCRC bit in all queues */
2551 if (hw->mac.type == e1000_i350 ||
2552 hw->mac.type == e1000_i210 ||
2553 hw->mac.type == e1000_i211 ||
2554 hw->mac.type == e1000_i354) {
2555 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2556 rxq = dev->data->rx_queues[i];
2557 uint32_t dvmolr = E1000_READ_REG(hw,
2558 E1000_DVMOLR(rxq->reg_idx));
2559 dvmolr &= ~E1000_DVMOLR_STRCRC;
2560 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2561 }
2562 }
2563 } else {
2564 rctl |= E1000_RCTL_SECRC; /* Strip Ethernet CRC. */
2565
2566 /* set STRCRC bit in all queues */
2567 if (hw->mac.type == e1000_i350 ||
2568 hw->mac.type == e1000_i210 ||
2569 hw->mac.type == e1000_i211 ||
2570 hw->mac.type == e1000_i354) {
2571 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2572 rxq = dev->data->rx_queues[i];
2573 uint32_t dvmolr = E1000_READ_REG(hw,
2574 E1000_DVMOLR(rxq->reg_idx));
2575 dvmolr |= E1000_DVMOLR_STRCRC;
2576 E1000_WRITE_REG(hw, E1000_DVMOLR(rxq->reg_idx), dvmolr);
2577 }
2578 }
2579 }
2580
2581 rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2582 rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
2583 E1000_RCTL_RDMTS_HALF |
2584 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2585
2586 /* Make sure VLAN Filters are off. */
2587 if (dev->data->dev_conf.rxmode.mq_mode != RTE_ETH_MQ_RX_VMDQ_ONLY)
2588 rctl &= ~E1000_RCTL_VFE;
2589 /* Don't store bad packets. */
2590 rctl &= ~E1000_RCTL_SBP;
2591
2592 /* Enable Receives. */
2593 E1000_WRITE_REG(hw, E1000_RCTL, rctl);
2594
2595 /*
2596 * Setup the HW Rx Head and Tail Descriptor Pointers.
2597 * This needs to be done after enable.
2598 */
2599 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2600 rxq = dev->data->rx_queues[i];
2601 E1000_WRITE_REG(hw, E1000_RDH(rxq->reg_idx), 0);
2602 E1000_WRITE_REG(hw, E1000_RDT(rxq->reg_idx), rxq->nb_rx_desc - 1);
2603 }
2604
2605 return 0;
2606 }
2607
2608 /*********************************************************************
2609 *
2610 * Enable transmit unit.
2611 *
2612 **********************************************************************/
2613 void
eth_igb_tx_init(struct rte_eth_dev * dev)2614 eth_igb_tx_init(struct rte_eth_dev *dev)
2615 {
2616 struct e1000_hw *hw;
2617 struct igb_tx_queue *txq;
2618 uint32_t tctl;
2619 uint32_t txdctl;
2620 uint16_t i;
2621
2622 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2623
2624 /* Setup the Base and Length of the Tx Descriptor Rings. */
2625 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2626 uint64_t bus_addr;
2627 txq = dev->data->tx_queues[i];
2628 bus_addr = txq->tx_ring_phys_addr;
2629
2630 E1000_WRITE_REG(hw, E1000_TDLEN(txq->reg_idx),
2631 txq->nb_tx_desc *
2632 sizeof(union e1000_adv_tx_desc));
2633 E1000_WRITE_REG(hw, E1000_TDBAH(txq->reg_idx),
2634 (uint32_t)(bus_addr >> 32));
2635 E1000_WRITE_REG(hw, E1000_TDBAL(txq->reg_idx), (uint32_t)bus_addr);
2636
2637 /* Setup the HW Tx Head and Tail descriptor pointers. */
2638 E1000_WRITE_REG(hw, E1000_TDT(txq->reg_idx), 0);
2639 E1000_WRITE_REG(hw, E1000_TDH(txq->reg_idx), 0);
2640
2641 /* Setup Transmit threshold registers. */
2642 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(txq->reg_idx));
2643 txdctl |= txq->pthresh & 0x1F;
2644 txdctl |= ((txq->hthresh & 0x1F) << 8);
2645 txdctl |= ((txq->wthresh & 0x1F) << 16);
2646 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2647 E1000_WRITE_REG(hw, E1000_TXDCTL(txq->reg_idx), txdctl);
2648 }
2649
2650 /* Program the Transmit Control Register. */
2651 tctl = E1000_READ_REG(hw, E1000_TCTL);
2652 tctl &= ~E1000_TCTL_CT;
2653 tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2654 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2655
2656 e1000_config_collision_dist(hw);
2657
2658 /* This write will effectively turn on the transmit unit. */
2659 E1000_WRITE_REG(hw, E1000_TCTL, tctl);
2660 }
2661
2662 /*********************************************************************
2663 *
2664 * Enable VF receive unit.
2665 *
2666 **********************************************************************/
2667 int
eth_igbvf_rx_init(struct rte_eth_dev * dev)2668 eth_igbvf_rx_init(struct rte_eth_dev *dev)
2669 {
2670 struct e1000_hw *hw;
2671 struct igb_rx_queue *rxq;
2672 uint32_t srrctl;
2673 uint16_t buf_size;
2674 uint16_t rctl_bsize;
2675 uint32_t max_len;
2676 uint16_t i;
2677 int ret;
2678
2679 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2680
2681 /* setup MTU */
2682 max_len = dev->data->mtu + E1000_ETH_OVERHEAD;
2683 e1000_rlpml_set_vf(hw, (uint16_t)(max_len + VLAN_TAG_SIZE));
2684
2685 /* Configure and enable each RX queue. */
2686 rctl_bsize = 0;
2687 dev->rx_pkt_burst = eth_igb_recv_pkts;
2688 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2689 uint64_t bus_addr;
2690 uint32_t rxdctl;
2691
2692 rxq = dev->data->rx_queues[i];
2693
2694 rxq->flags = 0;
2695 /*
2696 * i350VF LB vlan packets have vlan tags byte swapped.
2697 */
2698 if (hw->mac.type == e1000_vfadapt_i350) {
2699 rxq->flags |= IGB_RXQ_FLAG_LB_BSWAP_VLAN;
2700 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap required");
2701 } else {
2702 PMD_INIT_LOG(DEBUG, "IGB rx vlan bswap not required");
2703 }
2704
2705 /* Allocate buffers for descriptor rings and set up queue */
2706 ret = igb_alloc_rx_queue_mbufs(rxq);
2707 if (ret)
2708 return ret;
2709
2710 bus_addr = rxq->rx_ring_phys_addr;
2711 E1000_WRITE_REG(hw, E1000_RDLEN(i),
2712 rxq->nb_rx_desc *
2713 sizeof(union e1000_adv_rx_desc));
2714 E1000_WRITE_REG(hw, E1000_RDBAH(i),
2715 (uint32_t)(bus_addr >> 32));
2716 E1000_WRITE_REG(hw, E1000_RDBAL(i), (uint32_t)bus_addr);
2717
2718 srrctl = E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2719
2720 /*
2721 * Configure RX buffer size.
2722 */
2723 buf_size = (uint16_t)(rte_pktmbuf_data_room_size(rxq->mb_pool) -
2724 RTE_PKTMBUF_HEADROOM);
2725 if (buf_size >= 1024) {
2726 /*
2727 * Configure the BSIZEPACKET field of the SRRCTL
2728 * register of the queue.
2729 * Value is in 1 KB resolution, from 1 KB to 127 KB.
2730 * If this field is equal to 0b, then RCTL.BSIZE
2731 * determines the RX packet buffer size.
2732 */
2733 srrctl |= ((buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) &
2734 E1000_SRRCTL_BSIZEPKT_MASK);
2735 buf_size = (uint16_t) ((srrctl &
2736 E1000_SRRCTL_BSIZEPKT_MASK) <<
2737 E1000_SRRCTL_BSIZEPKT_SHIFT);
2738
2739 /* It adds dual VLAN length for supporting dual VLAN */
2740 if ((max_len + 2 * VLAN_TAG_SIZE) > buf_size) {
2741 if (!dev->data->scattered_rx)
2742 PMD_INIT_LOG(DEBUG,
2743 "forcing scatter mode");
2744 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2745 dev->data->scattered_rx = 1;
2746 }
2747 } else {
2748 /*
2749 * Use BSIZE field of the device RCTL register.
2750 */
2751 if ((rctl_bsize == 0) || (rctl_bsize > buf_size))
2752 rctl_bsize = buf_size;
2753 if (!dev->data->scattered_rx)
2754 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2755 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2756 dev->data->scattered_rx = 1;
2757 }
2758
2759 /* Set if packets are dropped when no descriptors available */
2760 if (rxq->drop_en)
2761 srrctl |= E1000_SRRCTL_DROP_EN;
2762
2763 E1000_WRITE_REG(hw, E1000_SRRCTL(i), srrctl);
2764
2765 /* Enable this RX queue. */
2766 rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(i));
2767 rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2768 rxdctl &= 0xFFF00000;
2769 rxdctl |= (rxq->pthresh & 0x1F);
2770 rxdctl |= ((rxq->hthresh & 0x1F) << 8);
2771 if (hw->mac.type == e1000_vfadapt) {
2772 /*
2773 * Workaround of 82576 VF Erratum
2774 * force set WTHRESH to 1
2775 * to avoid Write-Back not triggered sometimes
2776 */
2777 rxdctl |= 0x10000;
2778 PMD_INIT_LOG(DEBUG, "Force set RX WTHRESH to 1 !");
2779 }
2780 else
2781 rxdctl |= ((rxq->wthresh & 0x1F) << 16);
2782 E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl);
2783 }
2784
2785 if (dev->data->dev_conf.rxmode.offloads & RTE_ETH_RX_OFFLOAD_SCATTER) {
2786 if (!dev->data->scattered_rx)
2787 PMD_INIT_LOG(DEBUG, "forcing scatter mode");
2788 dev->rx_pkt_burst = eth_igb_recv_scattered_pkts;
2789 dev->data->scattered_rx = 1;
2790 }
2791
2792 /*
2793 * Setup the HW Rx Head and Tail Descriptor Pointers.
2794 * This needs to be done after enable.
2795 */
2796 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2797 rxq = dev->data->rx_queues[i];
2798 E1000_WRITE_REG(hw, E1000_RDH(i), 0);
2799 E1000_WRITE_REG(hw, E1000_RDT(i), rxq->nb_rx_desc - 1);
2800 }
2801
2802 return 0;
2803 }
2804
2805 /*********************************************************************
2806 *
2807 * Enable VF transmit unit.
2808 *
2809 **********************************************************************/
2810 void
eth_igbvf_tx_init(struct rte_eth_dev * dev)2811 eth_igbvf_tx_init(struct rte_eth_dev *dev)
2812 {
2813 struct e1000_hw *hw;
2814 struct igb_tx_queue *txq;
2815 uint32_t txdctl;
2816 uint16_t i;
2817
2818 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2819
2820 /* Setup the Base and Length of the Tx Descriptor Rings. */
2821 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2822 uint64_t bus_addr;
2823
2824 txq = dev->data->tx_queues[i];
2825 bus_addr = txq->tx_ring_phys_addr;
2826 E1000_WRITE_REG(hw, E1000_TDLEN(i),
2827 txq->nb_tx_desc *
2828 sizeof(union e1000_adv_tx_desc));
2829 E1000_WRITE_REG(hw, E1000_TDBAH(i),
2830 (uint32_t)(bus_addr >> 32));
2831 E1000_WRITE_REG(hw, E1000_TDBAL(i), (uint32_t)bus_addr);
2832
2833 /* Setup the HW Tx Head and Tail descriptor pointers. */
2834 E1000_WRITE_REG(hw, E1000_TDT(i), 0);
2835 E1000_WRITE_REG(hw, E1000_TDH(i), 0);
2836
2837 /* Setup Transmit threshold registers. */
2838 txdctl = E1000_READ_REG(hw, E1000_TXDCTL(i));
2839 txdctl |= txq->pthresh & 0x1F;
2840 txdctl |= ((txq->hthresh & 0x1F) << 8);
2841 if (hw->mac.type == e1000_82576) {
2842 /*
2843 * Workaround of 82576 VF Erratum
2844 * force set WTHRESH to 1
2845 * to avoid Write-Back not triggered sometimes
2846 */
2847 txdctl |= 0x10000;
2848 PMD_INIT_LOG(DEBUG, "Force set TX WTHRESH to 1 !");
2849 }
2850 else
2851 txdctl |= ((txq->wthresh & 0x1F) << 16);
2852 txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2853 E1000_WRITE_REG(hw, E1000_TXDCTL(i), txdctl);
2854 }
2855
2856 }
2857
2858 void
igb_rxq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_rxq_info * qinfo)2859 igb_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2860 struct rte_eth_rxq_info *qinfo)
2861 {
2862 struct igb_rx_queue *rxq;
2863
2864 rxq = dev->data->rx_queues[queue_id];
2865
2866 qinfo->mp = rxq->mb_pool;
2867 qinfo->scattered_rx = dev->data->scattered_rx;
2868 qinfo->nb_desc = rxq->nb_rx_desc;
2869
2870 qinfo->conf.rx_free_thresh = rxq->rx_free_thresh;
2871 qinfo->conf.rx_drop_en = rxq->drop_en;
2872 qinfo->conf.offloads = rxq->offloads;
2873 }
2874
2875 void
igb_txq_info_get(struct rte_eth_dev * dev,uint16_t queue_id,struct rte_eth_txq_info * qinfo)2876 igb_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
2877 struct rte_eth_txq_info *qinfo)
2878 {
2879 struct igb_tx_queue *txq;
2880
2881 txq = dev->data->tx_queues[queue_id];
2882
2883 qinfo->nb_desc = txq->nb_tx_desc;
2884
2885 qinfo->conf.tx_thresh.pthresh = txq->pthresh;
2886 qinfo->conf.tx_thresh.hthresh = txq->hthresh;
2887 qinfo->conf.tx_thresh.wthresh = txq->wthresh;
2888 qinfo->conf.offloads = txq->offloads;
2889 }
2890
2891 int
igb_rss_conf_init(struct rte_eth_dev * dev,struct igb_rte_flow_rss_conf * out,const struct rte_flow_action_rss * in)2892 igb_rss_conf_init(struct rte_eth_dev *dev,
2893 struct igb_rte_flow_rss_conf *out,
2894 const struct rte_flow_action_rss *in)
2895 {
2896 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2897
2898 if (in->key_len > RTE_DIM(out->key) ||
2899 ((hw->mac.type == e1000_82576) &&
2900 (in->queue_num > IGB_MAX_RX_QUEUE_NUM_82576)) ||
2901 ((hw->mac.type != e1000_82576) &&
2902 (in->queue_num > IGB_MAX_RX_QUEUE_NUM)))
2903 return -EINVAL;
2904 out->conf = (struct rte_flow_action_rss){
2905 .func = in->func,
2906 .level = in->level,
2907 .types = in->types,
2908 .key_len = in->key_len,
2909 .queue_num = in->queue_num,
2910 .key = memcpy(out->key, in->key, in->key_len),
2911 .queue = memcpy(out->queue, in->queue,
2912 sizeof(*in->queue) * in->queue_num),
2913 };
2914 return 0;
2915 }
2916
2917 int
igb_action_rss_same(const struct rte_flow_action_rss * comp,const struct rte_flow_action_rss * with)2918 igb_action_rss_same(const struct rte_flow_action_rss *comp,
2919 const struct rte_flow_action_rss *with)
2920 {
2921 return (comp->func == with->func &&
2922 comp->level == with->level &&
2923 comp->types == with->types &&
2924 comp->key_len == with->key_len &&
2925 comp->queue_num == with->queue_num &&
2926 !memcmp(comp->key, with->key, with->key_len) &&
2927 !memcmp(comp->queue, with->queue,
2928 sizeof(*with->queue) * with->queue_num));
2929 }
2930
2931 int
igb_config_rss_filter(struct rte_eth_dev * dev,struct igb_rte_flow_rss_conf * conf,bool add)2932 igb_config_rss_filter(struct rte_eth_dev *dev,
2933 struct igb_rte_flow_rss_conf *conf, bool add)
2934 {
2935 uint32_t shift;
2936 uint16_t i, j;
2937 struct rte_eth_rss_conf rss_conf = {
2938 .rss_key = conf->conf.key_len ?
2939 (void *)(uintptr_t)conf->conf.key : NULL,
2940 .rss_key_len = conf->conf.key_len,
2941 .rss_hf = conf->conf.types,
2942 };
2943 struct e1000_filter_info *filter_info =
2944 E1000_DEV_PRIVATE_TO_FILTER_INFO(dev->data->dev_private);
2945 struct e1000_hw *hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2946
2947 hw = E1000_DEV_PRIVATE_TO_HW(dev->data->dev_private);
2948
2949 if (!add) {
2950 if (igb_action_rss_same(&filter_info->rss_info.conf,
2951 &conf->conf)) {
2952 igb_rss_disable(dev);
2953 memset(&filter_info->rss_info, 0,
2954 sizeof(struct igb_rte_flow_rss_conf));
2955 return 0;
2956 }
2957 return -EINVAL;
2958 }
2959
2960 if (filter_info->rss_info.conf.queue_num)
2961 return -EINVAL;
2962
2963 /* Fill in redirection table. */
2964 shift = (hw->mac.type == e1000_82575) ? 6 : 0;
2965 for (i = 0, j = 0; i < 128; i++, j++) {
2966 union e1000_reta {
2967 uint32_t dword;
2968 uint8_t bytes[4];
2969 } reta;
2970 uint8_t q_idx;
2971
2972 if (j == conf->conf.queue_num)
2973 j = 0;
2974 q_idx = conf->conf.queue[j];
2975 reta.bytes[i & 3] = (uint8_t)(q_idx << shift);
2976 if ((i & 3) == 3)
2977 E1000_WRITE_REG(hw, E1000_RETA(i >> 2), reta.dword);
2978 }
2979
2980 /* Configure the RSS key and the RSS protocols used to compute
2981 * the RSS hash of input packets.
2982 */
2983 if ((rss_conf.rss_hf & IGB_RSS_OFFLOAD_ALL) == 0) {
2984 igb_rss_disable(dev);
2985 return 0;
2986 }
2987 if (rss_conf.rss_key == NULL)
2988 rss_conf.rss_key = rss_intel_key; /* Default hash key */
2989 igb_hw_rss_hash_set(hw, &rss_conf);
2990
2991 if (igb_rss_conf_init(dev, &filter_info->rss_info, &conf->conf))
2992 return -EINVAL;
2993
2994 return 0;
2995 }
2996