1 /*******************************************************************************
3 * Intel Ethernet Controller XL710 Family Linux Driver
4 * Copyright(c) 2013 - 2014 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along
16 * with this program. If not, see <http://www.gnu.org/licenses/>.
18 * The full GNU General Public License is included in this distribution in
19 * the file called "COPYING".
21 * Contact Information:
22 * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23 * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25 ******************************************************************************/
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
30 #include "i40e_prototype.h"
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
35 return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36 ((u64)td_cmd << I40E_TXD_QW1_CMD_SHIFT) |
37 ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38 ((u64)size << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39 ((u64)td_tag << I40E_TXD_QW1_L2TAG1_SHIFT));
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
45 * i40e_program_fdir_filter - Program a Flow Director filter
46 * @fdir_data: Packet data that will be filter parameters
47 * @raw_packet: the pre-allocated packet buffer for FDir
49 * @add: True for add/update, False for remove
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52 struct i40e_pf *pf, bool add)
54 struct i40e_filter_program_desc *fdir_desc;
55 struct i40e_tx_buffer *tx_buf, *first;
56 struct i40e_tx_desc *tx_desc;
57 struct i40e_ring *tx_ring;
58 unsigned int fpt, dcc;
66 /* find existing FDIR VSI */
68 for (i = 0; i < pf->num_alloc_vsi; i++)
69 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
74 tx_ring = vsi->tx_rings[0];
77 /* we need two descriptors to add/del a filter and we can wait */
79 if (I40E_DESC_UNUSED(tx_ring) > 1)
81 msleep_interruptible(1);
83 } while (delay < I40E_FD_CLEAN_DELAY);
85 if (!(I40E_DESC_UNUSED(tx_ring) > 1))
88 dma = dma_map_single(dev, raw_packet,
89 I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90 if (dma_mapping_error(dev, dma))
93 /* grab the next descriptor */
94 i = tx_ring->next_to_use;
95 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96 first = &tx_ring->tx_bi[i];
97 memset(first, 0, sizeof(struct i40e_tx_buffer));
99 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
101 fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102 I40E_TXD_FLTR_QW0_QINDEX_MASK;
104 fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105 I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
107 fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108 I40E_TXD_FLTR_QW0_PCTYPE_MASK;
110 /* Use LAN VSI Id if not programmed by user */
111 if (fdir_data->dest_vsi == 0)
112 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
115 fpt |= ((u32)fdir_data->dest_vsi <<
116 I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117 I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
119 dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
122 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
125 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126 I40E_TXD_FLTR_QW1_PCMD_SHIFT;
128 dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129 I40E_TXD_FLTR_QW1_DEST_MASK;
131 dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132 I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
134 if (fdir_data->cnt_index != 0) {
135 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136 dcc |= ((u32)fdir_data->cnt_index <<
137 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
141 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142 fdir_desc->rsvd = cpu_to_le32(0);
143 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144 fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
146 /* Now program a dummy descriptor */
147 i = tx_ring->next_to_use;
148 tx_desc = I40E_TX_DESC(tx_ring, i);
149 tx_buf = &tx_ring->tx_bi[i];
151 tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
153 memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
155 /* record length, and DMA address */
156 dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157 dma_unmap_addr_set(tx_buf, dma, dma);
159 tx_desc->buffer_addr = cpu_to_le64(dma);
160 td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
162 tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163 tx_buf->raw_buf = (void *)raw_packet;
165 tx_desc->cmd_type_offset_bsz =
166 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
168 /* Force memory writes to complete before letting h/w
169 * know there are new descriptors to fetch.
173 /* Mark the data descriptor to be watched */
174 first->next_to_watch = tx_desc;
176 writel(tx_ring->next_to_use, tx_ring->tail);
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
186 * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187 * @vsi: pointer to the targeted VSI
188 * @fd_data: the flow director data required for the FDir descriptor
189 * @add: true adds a filter, false removes it
191 * Returns 0 if the filters were successfully added or removed
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194 struct i40e_fdir_filter *fd_data,
197 struct i40e_pf *pf = vsi->back;
203 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
207 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
210 memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
212 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213 udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214 + sizeof(struct iphdr));
216 ip->daddr = fd_data->dst_ip[0];
217 udp->dest = fd_data->dst_port;
218 ip->saddr = fd_data->src_ip[0];
219 udp->source = fd_data->src_port;
221 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
224 dev_info(&pf->pdev->dev,
225 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226 fd_data->pctype, fd_data->fd_id, ret);
228 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
230 dev_info(&pf->pdev->dev,
231 "Filter OK for PCTYPE %d loc = %d\n",
232 fd_data->pctype, fd_data->fd_id);
234 dev_info(&pf->pdev->dev,
235 "Filter deleted for PCTYPE %d loc = %d\n",
236 fd_data->pctype, fd_data->fd_id);
238 return err ? -EOPNOTSUPP : 0;
241 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
243 * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
244 * @vsi: pointer to the targeted VSI
245 * @fd_data: the flow director data required for the FDir descriptor
246 * @add: true adds a filter, false removes it
248 * Returns 0 if the filters were successfully added or removed
250 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
251 struct i40e_fdir_filter *fd_data,
254 struct i40e_pf *pf = vsi->back;
261 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
262 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
263 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
264 0x0, 0x72, 0, 0, 0, 0};
266 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
269 memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
271 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
272 tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
273 + sizeof(struct iphdr));
275 ip->daddr = fd_data->dst_ip[0];
276 tcp->dest = fd_data->dst_port;
277 ip->saddr = fd_data->src_ip[0];
278 tcp->source = fd_data->src_port;
282 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
283 if (I40E_DEBUG_FD & pf->hw.debug_mask)
284 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
285 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
288 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
289 (pf->fd_tcp_rule - 1) : 0;
290 if (pf->fd_tcp_rule == 0) {
291 pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
292 if (I40E_DEBUG_FD & pf->hw.debug_mask)
293 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297 fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
298 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
301 dev_info(&pf->pdev->dev,
302 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
303 fd_data->pctype, fd_data->fd_id, ret);
305 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
307 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
308 fd_data->pctype, fd_data->fd_id);
310 dev_info(&pf->pdev->dev,
311 "Filter deleted for PCTYPE %d loc = %d\n",
312 fd_data->pctype, fd_data->fd_id);
315 return err ? -EOPNOTSUPP : 0;
319 * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
320 * a specific flow spec
321 * @vsi: pointer to the targeted VSI
322 * @fd_data: the flow director data required for the FDir descriptor
323 * @add: true adds a filter, false removes it
325 * Always returns -EOPNOTSUPP
327 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
328 struct i40e_fdir_filter *fd_data,
334 #define I40E_IP_DUMMY_PACKET_LEN 34
336 * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
337 * a specific flow spec
338 * @vsi: pointer to the targeted VSI
339 * @fd_data: the flow director data required for the FDir descriptor
340 * @add: true adds a filter, false removes it
342 * Returns 0 if the filters were successfully added or removed
344 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
345 struct i40e_fdir_filter *fd_data,
348 struct i40e_pf *pf = vsi->back;
354 static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
355 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
358 for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
359 i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
360 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
363 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
364 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
366 ip->saddr = fd_data->src_ip[0];
367 ip->daddr = fd_data->dst_ip[0];
371 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
374 dev_info(&pf->pdev->dev,
375 "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
376 fd_data->pctype, fd_data->fd_id, ret);
378 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
380 dev_info(&pf->pdev->dev,
381 "Filter OK for PCTYPE %d loc = %d\n",
382 fd_data->pctype, fd_data->fd_id);
384 dev_info(&pf->pdev->dev,
385 "Filter deleted for PCTYPE %d loc = %d\n",
386 fd_data->pctype, fd_data->fd_id);
390 return err ? -EOPNOTSUPP : 0;
394 * i40e_add_del_fdir - Build raw packets to add/del fdir filter
395 * @vsi: pointer to the targeted VSI
396 * @cmd: command to get or set RX flow classification rules
397 * @add: true adds a filter, false removes it
400 int i40e_add_del_fdir(struct i40e_vsi *vsi,
401 struct i40e_fdir_filter *input, bool add)
403 struct i40e_pf *pf = vsi->back;
406 switch (input->flow_type & ~FLOW_EXT) {
408 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
411 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
414 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
417 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
420 switch (input->ip4_proto) {
422 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
425 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
428 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
431 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
436 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
441 /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
446 * i40e_fd_handle_status - check the Programming Status for FD
447 * @rx_ring: the Rx ring for this descriptor
448 * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
449 * @prog_id: the id originally used for programming
451 * This is used to verify if the FD programming or invalidation
452 * requested by SW to the HW is successful or not and take actions accordingly.
454 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
455 union i40e_rx_desc *rx_desc, u8 prog_id)
457 struct i40e_pf *pf = rx_ring->vsi->back;
458 struct pci_dev *pdev = pf->pdev;
459 u32 fcnt_prog, fcnt_avail;
463 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
464 error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
465 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
467 if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
468 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
469 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
470 (I40E_DEBUG_FD & pf->hw.debug_mask))
471 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
474 /* Check if the programming error is for ATR.
475 * If so, auto disable ATR and set a state for
476 * flush in progress. Next time we come here if flush is in
477 * progress do nothing, once flush is complete the state will
480 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
484 /* store the current atr filter count */
485 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
487 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
488 (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
489 pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
490 set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
493 /* filter programming failed most likely due to table full */
494 fcnt_prog = i40e_get_global_fd_count(pf);
495 fcnt_avail = pf->fdir_pf_filter_count;
496 /* If ATR is running fcnt_prog can quickly change,
497 * if we are very close to full, it makes sense to disable
498 * FD ATR/SB and then re-enable it when there is room.
500 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
501 if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
502 !(pf->auto_disable_flags &
503 I40E_FLAG_FD_SB_ENABLED)) {
504 if (I40E_DEBUG_FD & pf->hw.debug_mask)
505 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
506 pf->auto_disable_flags |=
507 I40E_FLAG_FD_SB_ENABLED;
511 "FD filter programming failed due to incorrect filter parameters\n");
513 } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
514 if (I40E_DEBUG_FD & pf->hw.debug_mask)
515 dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
516 rx_desc->wb.qword0.hi_dword.fd_id);
521 * i40e_unmap_and_free_tx_resource - Release a Tx buffer
522 * @ring: the ring that owns the buffer
523 * @tx_buffer: the buffer to free
525 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
526 struct i40e_tx_buffer *tx_buffer)
528 if (tx_buffer->skb) {
529 if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
530 kfree(tx_buffer->raw_buf);
532 dev_kfree_skb_any(tx_buffer->skb);
534 if (dma_unmap_len(tx_buffer, len))
535 dma_unmap_single(ring->dev,
536 dma_unmap_addr(tx_buffer, dma),
537 dma_unmap_len(tx_buffer, len),
539 } else if (dma_unmap_len(tx_buffer, len)) {
540 dma_unmap_page(ring->dev,
541 dma_unmap_addr(tx_buffer, dma),
542 dma_unmap_len(tx_buffer, len),
545 tx_buffer->next_to_watch = NULL;
546 tx_buffer->skb = NULL;
547 dma_unmap_len_set(tx_buffer, len, 0);
548 /* tx_buffer must be completely set up in the transmit path */
552 * i40e_clean_tx_ring - Free any empty Tx buffers
553 * @tx_ring: ring to be cleaned
555 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
557 unsigned long bi_size;
560 /* ring already cleared, nothing to do */
564 /* Free all the Tx ring sk_buffs */
565 for (i = 0; i < tx_ring->count; i++)
566 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
568 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
569 memset(tx_ring->tx_bi, 0, bi_size);
571 /* Zero out the descriptor ring */
572 memset(tx_ring->desc, 0, tx_ring->size);
574 tx_ring->next_to_use = 0;
575 tx_ring->next_to_clean = 0;
577 if (!tx_ring->netdev)
580 /* cleanup Tx queue statistics */
581 netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
582 tx_ring->queue_index));
586 * i40e_free_tx_resources - Free Tx resources per queue
587 * @tx_ring: Tx descriptor ring for a specific queue
589 * Free all transmit software resources
591 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
593 i40e_clean_tx_ring(tx_ring);
594 kfree(tx_ring->tx_bi);
595 tx_ring->tx_bi = NULL;
598 dma_free_coherent(tx_ring->dev, tx_ring->size,
599 tx_ring->desc, tx_ring->dma);
600 tx_ring->desc = NULL;
605 * i40e_get_tx_pending - how many tx descriptors not processed
606 * @tx_ring: the ring of descriptors
608 * Since there is no access to the ring head register
609 * in XL710, we need to use our local copies
611 u32 i40e_get_tx_pending(struct i40e_ring *ring)
615 head = i40e_get_head(ring);
616 tail = readl(ring->tail);
619 return (head < tail) ?
620 tail - head : (tail + ring->count - head);
625 #define WB_STRIDE 0x3
628 * i40e_clean_tx_irq - Reclaim resources after transmit completes
629 * @tx_ring: tx ring to clean
630 * @budget: how many cleans we're allowed
632 * Returns true if there's any budget left (e.g. the clean is finished)
634 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
636 u16 i = tx_ring->next_to_clean;
637 struct i40e_tx_buffer *tx_buf;
638 struct i40e_tx_desc *tx_head;
639 struct i40e_tx_desc *tx_desc;
640 unsigned int total_packets = 0;
641 unsigned int total_bytes = 0;
643 tx_buf = &tx_ring->tx_bi[i];
644 tx_desc = I40E_TX_DESC(tx_ring, i);
647 tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
650 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
652 /* if next_to_watch is not set then there is no work pending */
656 /* prevent any other reads prior to eop_desc */
657 read_barrier_depends();
659 /* we have caught up to head, no work left to do */
660 if (tx_head == tx_desc)
663 /* clear next_to_watch to prevent false hangs */
664 tx_buf->next_to_watch = NULL;
666 /* update the statistics for this packet */
667 total_bytes += tx_buf->bytecount;
668 total_packets += tx_buf->gso_segs;
671 dev_consume_skb_any(tx_buf->skb);
673 /* unmap skb header data */
674 dma_unmap_single(tx_ring->dev,
675 dma_unmap_addr(tx_buf, dma),
676 dma_unmap_len(tx_buf, len),
679 /* clear tx_buffer data */
681 dma_unmap_len_set(tx_buf, len, 0);
683 /* unmap remaining buffers */
684 while (tx_desc != eop_desc) {
691 tx_buf = tx_ring->tx_bi;
692 tx_desc = I40E_TX_DESC(tx_ring, 0);
695 /* unmap any remaining paged data */
696 if (dma_unmap_len(tx_buf, len)) {
697 dma_unmap_page(tx_ring->dev,
698 dma_unmap_addr(tx_buf, dma),
699 dma_unmap_len(tx_buf, len),
701 dma_unmap_len_set(tx_buf, len, 0);
705 /* move us one more past the eop_desc for start of next pkt */
711 tx_buf = tx_ring->tx_bi;
712 tx_desc = I40E_TX_DESC(tx_ring, 0);
717 /* update budget accounting */
719 } while (likely(budget));
722 tx_ring->next_to_clean = i;
723 u64_stats_update_begin(&tx_ring->syncp);
724 tx_ring->stats.bytes += total_bytes;
725 tx_ring->stats.packets += total_packets;
726 u64_stats_update_end(&tx_ring->syncp);
727 tx_ring->q_vector->tx.total_bytes += total_bytes;
728 tx_ring->q_vector->tx.total_packets += total_packets;
730 if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
733 /* check to see if there are < 4 descriptors
734 * waiting to be written back, then kick the hardware to force
735 * them to be written back in case we stay in NAPI.
736 * In this mode on X722 we do not enable Interrupt.
738 j = i40e_get_tx_pending(tx_ring);
741 ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
742 !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
743 (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
744 tx_ring->arm_wb = true;
747 netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
748 tx_ring->queue_index),
749 total_packets, total_bytes);
751 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
752 if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
753 (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
754 /* Make sure that anybody stopping the queue after this
755 * sees the new next_to_clean.
758 if (__netif_subqueue_stopped(tx_ring->netdev,
759 tx_ring->queue_index) &&
760 !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
761 netif_wake_subqueue(tx_ring->netdev,
762 tx_ring->queue_index);
763 ++tx_ring->tx_stats.restart_queue;
771 * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
772 * @vsi: the VSI we care about
773 * @q_vector: the vector on which to force writeback
776 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
778 u16 flags = q_vector->tx.ring[0].flags;
780 if (flags & I40E_TXR_FLAGS_WB_ON_ITR) {
783 if (q_vector->arm_wb_state)
786 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK;
789 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
790 vsi->base_vector - 1),
792 q_vector->arm_wb_state = true;
793 } else if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
794 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
795 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
796 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
797 I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
798 /* allow 00 to be written to the index */
801 I40E_PFINT_DYN_CTLN(q_vector->v_idx +
802 vsi->base_vector - 1), val);
804 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
805 I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
806 I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
807 I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
808 /* allow 00 to be written to the index */
810 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
815 * i40e_set_new_dynamic_itr - Find new ITR level
816 * @rc: structure containing ring performance data
818 * Returns true if ITR changed, false if not
820 * Stores a new ITR value based on packets and byte counts during
821 * the last interrupt. The advantage of per interrupt computation
822 * is faster updates and more accurate ITR for the current traffic
823 * pattern. Constants in this function were computed based on
824 * theoretical maximum wire speed and thresholds were set based on
825 * testing data as well as attempting to minimize response time
826 * while increasing bulk throughput.
828 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
830 enum i40e_latency_range new_latency_range = rc->latency_range;
831 u32 new_itr = rc->itr;
834 if (rc->total_packets == 0 || !rc->itr)
837 /* simple throttlerate management
838 * 0-10MB/s lowest (100000 ints/s)
839 * 10-20MB/s low (20000 ints/s)
840 * 20-1249MB/s bulk (8000 ints/s)
842 bytes_per_int = rc->total_bytes / rc->itr;
843 switch (new_latency_range) {
844 case I40E_LOWEST_LATENCY:
845 if (bytes_per_int > 10)
846 new_latency_range = I40E_LOW_LATENCY;
848 case I40E_LOW_LATENCY:
849 if (bytes_per_int > 20)
850 new_latency_range = I40E_BULK_LATENCY;
851 else if (bytes_per_int <= 10)
852 new_latency_range = I40E_LOWEST_LATENCY;
854 case I40E_BULK_LATENCY:
855 if (bytes_per_int <= 20)
856 new_latency_range = I40E_LOW_LATENCY;
859 if (bytes_per_int <= 20)
860 new_latency_range = I40E_LOW_LATENCY;
863 rc->latency_range = new_latency_range;
865 switch (new_latency_range) {
866 case I40E_LOWEST_LATENCY:
867 new_itr = I40E_ITR_100K;
869 case I40E_LOW_LATENCY:
870 new_itr = I40E_ITR_20K;
872 case I40E_BULK_LATENCY:
873 new_itr = I40E_ITR_8K;
880 rc->total_packets = 0;
882 if (new_itr != rc->itr) {
891 * i40e_clean_programming_status - clean the programming status descriptor
892 * @rx_ring: the rx ring that has this descriptor
893 * @rx_desc: the rx descriptor written back by HW
895 * Flow director should handle FD_FILTER_STATUS to check its filter programming
896 * status being successful or not and take actions accordingly. FCoE should
897 * handle its context/filter programming/invalidation status and take actions.
900 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
901 union i40e_rx_desc *rx_desc)
906 qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
907 id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
908 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
910 if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
911 i40e_fd_handle_status(rx_ring, rx_desc, id);
913 else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
914 (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
915 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
920 * i40e_setup_tx_descriptors - Allocate the Tx descriptors
921 * @tx_ring: the tx ring to set up
923 * Return 0 on success, negative on error
925 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
927 struct device *dev = tx_ring->dev;
933 /* warn if we are about to overwrite the pointer */
934 WARN_ON(tx_ring->tx_bi);
935 bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
936 tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
940 /* round up to nearest 4K */
941 tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
942 /* add u32 for head writeback, align after this takes care of
943 * guaranteeing this is at least one cache line in size
945 tx_ring->size += sizeof(u32);
946 tx_ring->size = ALIGN(tx_ring->size, 4096);
947 tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
948 &tx_ring->dma, GFP_KERNEL);
949 if (!tx_ring->desc) {
950 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
955 tx_ring->next_to_use = 0;
956 tx_ring->next_to_clean = 0;
960 kfree(tx_ring->tx_bi);
961 tx_ring->tx_bi = NULL;
966 * i40e_clean_rx_ring - Free Rx buffers
967 * @rx_ring: ring to be cleaned
969 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
971 struct device *dev = rx_ring->dev;
972 struct i40e_rx_buffer *rx_bi;
973 unsigned long bi_size;
976 /* ring already cleared, nothing to do */
980 if (ring_is_ps_enabled(rx_ring)) {
981 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
983 rx_bi = &rx_ring->rx_bi[0];
984 if (rx_bi->hdr_buf) {
985 dma_free_coherent(dev,
989 for (i = 0; i < rx_ring->count; i++) {
990 rx_bi = &rx_ring->rx_bi[i];
992 rx_bi->hdr_buf = NULL;
996 /* Free all the Rx ring sk_buffs */
997 for (i = 0; i < rx_ring->count; i++) {
998 rx_bi = &rx_ring->rx_bi[i];
1000 dma_unmap_single(dev,
1002 rx_ring->rx_buf_len,
1007 dev_kfree_skb(rx_bi->skb);
1011 if (rx_bi->page_dma) {
1016 rx_bi->page_dma = 0;
1018 __free_page(rx_bi->page);
1020 rx_bi->page_offset = 0;
1024 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1025 memset(rx_ring->rx_bi, 0, bi_size);
1027 /* Zero out the descriptor ring */
1028 memset(rx_ring->desc, 0, rx_ring->size);
1030 rx_ring->next_to_clean = 0;
1031 rx_ring->next_to_use = 0;
1035 * i40e_free_rx_resources - Free Rx resources
1036 * @rx_ring: ring to clean the resources from
1038 * Free all receive software resources
1040 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1042 i40e_clean_rx_ring(rx_ring);
1043 kfree(rx_ring->rx_bi);
1044 rx_ring->rx_bi = NULL;
1046 if (rx_ring->desc) {
1047 dma_free_coherent(rx_ring->dev, rx_ring->size,
1048 rx_ring->desc, rx_ring->dma);
1049 rx_ring->desc = NULL;
1054 * i40e_alloc_rx_headers - allocate rx header buffers
1055 * @rx_ring: ring to alloc buffers
1057 * Allocate rx header buffers for the entire ring. As these are static,
1058 * this is only called when setting up a new ring.
1060 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1062 struct device *dev = rx_ring->dev;
1063 struct i40e_rx_buffer *rx_bi;
1069 if (rx_ring->rx_bi[0].hdr_buf)
1071 /* Make sure the buffers don't cross cache line boundaries. */
1072 buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1073 buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1077 for (i = 0; i < rx_ring->count; i++) {
1078 rx_bi = &rx_ring->rx_bi[i];
1079 rx_bi->dma = dma + (i * buf_size);
1080 rx_bi->hdr_buf = buffer + (i * buf_size);
1085 * i40e_setup_rx_descriptors - Allocate Rx descriptors
1086 * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1088 * Returns 0 on success, negative on failure
1090 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1092 struct device *dev = rx_ring->dev;
1095 /* warn if we are about to overwrite the pointer */
1096 WARN_ON(rx_ring->rx_bi);
1097 bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1098 rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1099 if (!rx_ring->rx_bi)
1102 u64_stats_init(&rx_ring->syncp);
1104 /* Round up to nearest 4K */
1105 rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1106 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1107 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1108 rx_ring->size = ALIGN(rx_ring->size, 4096);
1109 rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1110 &rx_ring->dma, GFP_KERNEL);
1112 if (!rx_ring->desc) {
1113 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1118 rx_ring->next_to_clean = 0;
1119 rx_ring->next_to_use = 0;
1123 kfree(rx_ring->rx_bi);
1124 rx_ring->rx_bi = NULL;
1129 * i40e_release_rx_desc - Store the new tail and head values
1130 * @rx_ring: ring to bump
1131 * @val: new head index
1133 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1135 rx_ring->next_to_use = val;
1136 /* Force memory writes to complete before letting h/w
1137 * know there are new descriptors to fetch. (Only
1138 * applicable for weak-ordered memory model archs,
1142 writel(val, rx_ring->tail);
1146 * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1147 * @rx_ring: ring to place buffers on
1148 * @cleaned_count: number of buffers to replace
1150 void i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1152 u16 i = rx_ring->next_to_use;
1153 union i40e_rx_desc *rx_desc;
1154 struct i40e_rx_buffer *bi;
1156 /* do nothing if no valid netdev defined */
1157 if (!rx_ring->netdev || !cleaned_count)
1160 while (cleaned_count--) {
1161 rx_desc = I40E_RX_DESC(rx_ring, i);
1162 bi = &rx_ring->rx_bi[i];
1164 if (bi->skb) /* desc is in use */
1167 bi->page = alloc_page(GFP_ATOMIC);
1169 rx_ring->rx_stats.alloc_page_failed++;
1174 if (!bi->page_dma) {
1175 /* use a half page if we're re-using */
1176 bi->page_offset ^= PAGE_SIZE / 2;
1177 bi->page_dma = dma_map_page(rx_ring->dev,
1182 if (dma_mapping_error(rx_ring->dev,
1184 rx_ring->rx_stats.alloc_page_failed++;
1190 dma_sync_single_range_for_device(rx_ring->dev,
1193 rx_ring->rx_hdr_len,
1195 /* Refresh the desc even if buffer_addrs didn't change
1196 * because each write-back erases this info.
1198 rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1199 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1201 if (i == rx_ring->count)
1206 if (rx_ring->next_to_use != i)
1207 i40e_release_rx_desc(rx_ring, i);
1211 * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1212 * @rx_ring: ring to place buffers on
1213 * @cleaned_count: number of buffers to replace
1215 void i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1217 u16 i = rx_ring->next_to_use;
1218 union i40e_rx_desc *rx_desc;
1219 struct i40e_rx_buffer *bi;
1220 struct sk_buff *skb;
1222 /* do nothing if no valid netdev defined */
1223 if (!rx_ring->netdev || !cleaned_count)
1226 while (cleaned_count--) {
1227 rx_desc = I40E_RX_DESC(rx_ring, i);
1228 bi = &rx_ring->rx_bi[i];
1232 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1233 rx_ring->rx_buf_len);
1235 rx_ring->rx_stats.alloc_buff_failed++;
1238 /* initialize queue mapping */
1239 skb_record_rx_queue(skb, rx_ring->queue_index);
1244 bi->dma = dma_map_single(rx_ring->dev,
1246 rx_ring->rx_buf_len,
1248 if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1249 rx_ring->rx_stats.alloc_buff_failed++;
1255 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1256 rx_desc->read.hdr_addr = 0;
1258 if (i == rx_ring->count)
1263 if (rx_ring->next_to_use != i)
1264 i40e_release_rx_desc(rx_ring, i);
1268 * i40e_receive_skb - Send a completed packet up the stack
1269 * @rx_ring: rx ring in play
1270 * @skb: packet to send up
1271 * @vlan_tag: vlan tag for packet
1273 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1274 struct sk_buff *skb, u16 vlan_tag)
1276 struct i40e_q_vector *q_vector = rx_ring->q_vector;
1278 if (vlan_tag & VLAN_VID_MASK)
1279 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1281 napi_gro_receive(&q_vector->napi, skb);
1285 * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1286 * @vsi: the VSI we care about
1287 * @skb: skb currently being received and modified
1288 * @rx_status: status value of last descriptor in packet
1289 * @rx_error: error value of last descriptor in packet
1290 * @rx_ptype: ptype value of last descriptor in packet
1292 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1293 struct sk_buff *skb,
1298 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1299 bool ipv4 = false, ipv6 = false;
1300 bool ipv4_tunnel, ipv6_tunnel;
1305 ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1306 (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1307 ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1308 (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1310 skb->ip_summed = CHECKSUM_NONE;
1312 /* Rx csum enabled and ip headers found? */
1313 if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1316 /* did the hardware decode the packet and checksum? */
1317 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1320 /* both known and outer_ip must be set for the below code to work */
1321 if (!(decoded.known && decoded.outer_ip))
1324 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1325 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4)
1327 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1328 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1332 (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1333 BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1336 /* likely incorrect csum if alternate IP extension headers found */
1338 rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1339 /* don't increment checksum err here, non-fatal err */
1342 /* there was some L4 error, count error and punt packet to the stack */
1343 if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1346 /* handle packets that were not able to be checksummed due
1347 * to arrival speed, in this case the stack can compute
1350 if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1353 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1354 * it in the driver, hardware does not do it for us.
1355 * Since L3L4P bit was set we assume a valid IHL value (>=5)
1356 * so the total length of IPv4 header is IHL*4 bytes
1357 * The UDP_0 bit *may* bet set if the *inner* header is UDP
1359 if (!(vsi->back->flags & I40E_FLAG_OUTER_UDP_CSUM_CAPABLE) &&
1361 skb->transport_header = skb->mac_header +
1362 sizeof(struct ethhdr) +
1363 (ip_hdr(skb)->ihl * 4);
1365 /* Add 4 bytes for VLAN tagged packets */
1366 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1367 skb->protocol == htons(ETH_P_8021AD))
1370 if ((ip_hdr(skb)->protocol == IPPROTO_UDP) &&
1371 (udp_hdr(skb)->check != 0)) {
1372 rx_udp_csum = udp_csum(skb);
1374 csum = csum_tcpudp_magic(
1375 iph->saddr, iph->daddr,
1376 (skb->len - skb_transport_offset(skb)),
1377 IPPROTO_UDP, rx_udp_csum);
1379 if (udp_hdr(skb)->check != csum)
1382 } /* else its GRE and so no outer UDP header */
1385 skb->ip_summed = CHECKSUM_UNNECESSARY;
1386 skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1391 vsi->back->hw_csum_rx_error++;
1395 * i40e_rx_hash - returns the hash value from the Rx descriptor
1396 * @ring: descriptor ring
1397 * @rx_desc: specific descriptor
1399 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1400 union i40e_rx_desc *rx_desc)
1402 const __le64 rss_mask =
1403 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1404 I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1406 if ((ring->netdev->features & NETIF_F_RXHASH) &&
1407 (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1408 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1414 * i40e_ptype_to_hash - get a hash type
1415 * @ptype: the ptype value from the descriptor
1417 * Returns a hash type to be used by skb_set_hash
1419 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1421 struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1424 return PKT_HASH_TYPE_NONE;
1426 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1427 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1428 return PKT_HASH_TYPE_L4;
1429 else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1430 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1431 return PKT_HASH_TYPE_L3;
1433 return PKT_HASH_TYPE_L2;
1437 * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1438 * @rx_ring: rx ring to clean
1439 * @budget: how many cleans we're allowed
1441 * Returns true if there's any budget left (e.g. the clean is finished)
1443 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, int budget)
1445 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1446 u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1447 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1448 const int current_node = numa_mem_id();
1449 struct i40e_vsi *vsi = rx_ring->vsi;
1450 u16 i = rx_ring->next_to_clean;
1451 union i40e_rx_desc *rx_desc;
1452 u32 rx_error, rx_status;
1460 struct i40e_rx_buffer *rx_bi;
1461 struct sk_buff *skb;
1463 /* return some buffers to hardware, one at a time is too slow */
1464 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1465 i40e_alloc_rx_buffers_ps(rx_ring, cleaned_count);
1469 i = rx_ring->next_to_clean;
1470 rx_desc = I40E_RX_DESC(rx_ring, i);
1471 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1472 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1473 I40E_RXD_QW1_STATUS_SHIFT;
1475 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1478 /* This memory barrier is needed to keep us from reading
1479 * any other fields out of the rx_desc until we know the
1483 if (i40e_rx_is_programming_status(qword)) {
1484 i40e_clean_programming_status(rx_ring, rx_desc);
1485 I40E_RX_INCREMENT(rx_ring, i);
1488 rx_bi = &rx_ring->rx_bi[i];
1491 skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1492 rx_ring->rx_hdr_len);
1494 rx_ring->rx_stats.alloc_buff_failed++;
1498 /* initialize queue mapping */
1499 skb_record_rx_queue(skb, rx_ring->queue_index);
1500 /* we are reusing so sync this buffer for CPU use */
1501 dma_sync_single_range_for_cpu(rx_ring->dev,
1504 rx_ring->rx_hdr_len,
1507 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1508 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1509 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1510 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1511 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1512 I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1514 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1515 I40E_RXD_QW1_ERROR_SHIFT;
1516 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1517 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1519 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1520 I40E_RXD_QW1_PTYPE_SHIFT;
1521 prefetch(rx_bi->page);
1524 if (rx_hbo || rx_sph) {
1528 len = I40E_RX_HDR_SIZE;
1530 len = rx_header_len;
1531 memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1532 } else if (skb->len == 0) {
1535 len = (rx_packet_len > skb_headlen(skb) ?
1536 skb_headlen(skb) : rx_packet_len);
1537 memcpy(__skb_put(skb, len),
1538 rx_bi->page + rx_bi->page_offset,
1540 rx_bi->page_offset += len;
1541 rx_packet_len -= len;
1544 /* Get the rest of the data if this was a header split */
1545 if (rx_packet_len) {
1546 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1551 skb->len += rx_packet_len;
1552 skb->data_len += rx_packet_len;
1553 skb->truesize += rx_packet_len;
1555 if ((page_count(rx_bi->page) == 1) &&
1556 (page_to_nid(rx_bi->page) == current_node))
1557 get_page(rx_bi->page);
1561 dma_unmap_page(rx_ring->dev,
1565 rx_bi->page_dma = 0;
1567 I40E_RX_INCREMENT(rx_ring, i);
1570 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1571 struct i40e_rx_buffer *next_buffer;
1573 next_buffer = &rx_ring->rx_bi[i];
1574 next_buffer->skb = skb;
1575 rx_ring->rx_stats.non_eop_descs++;
1579 /* ERR_MASK will only have valid bits if EOP set */
1580 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1581 dev_kfree_skb_any(skb);
1585 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1586 i40e_ptype_to_hash(rx_ptype));
1587 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1588 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1589 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1590 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1591 rx_ring->last_rx_timestamp = jiffies;
1594 /* probably a little skewed due to removing CRC */
1595 total_rx_bytes += skb->len;
1598 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1600 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1602 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1603 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1606 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1607 dev_kfree_skb_any(skb);
1611 skb_mark_napi_id(skb, &rx_ring->q_vector->napi);
1612 i40e_receive_skb(rx_ring, skb, vlan_tag);
1614 rx_desc->wb.qword1.status_error_len = 0;
1616 } while (likely(total_rx_packets < budget));
1618 u64_stats_update_begin(&rx_ring->syncp);
1619 rx_ring->stats.packets += total_rx_packets;
1620 rx_ring->stats.bytes += total_rx_bytes;
1621 u64_stats_update_end(&rx_ring->syncp);
1622 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1623 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1625 return total_rx_packets;
1629 * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1630 * @rx_ring: rx ring to clean
1631 * @budget: how many cleans we're allowed
1633 * Returns number of packets cleaned
1635 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1637 unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1638 u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1639 struct i40e_vsi *vsi = rx_ring->vsi;
1640 union i40e_rx_desc *rx_desc;
1641 u32 rx_error, rx_status;
1648 struct i40e_rx_buffer *rx_bi;
1649 struct sk_buff *skb;
1651 /* return some buffers to hardware, one at a time is too slow */
1652 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1653 i40e_alloc_rx_buffers_1buf(rx_ring, cleaned_count);
1657 i = rx_ring->next_to_clean;
1658 rx_desc = I40E_RX_DESC(rx_ring, i);
1659 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1660 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1661 I40E_RXD_QW1_STATUS_SHIFT;
1663 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1666 /* This memory barrier is needed to keep us from reading
1667 * any other fields out of the rx_desc until we know the
1672 if (i40e_rx_is_programming_status(qword)) {
1673 i40e_clean_programming_status(rx_ring, rx_desc);
1674 I40E_RX_INCREMENT(rx_ring, i);
1677 rx_bi = &rx_ring->rx_bi[i];
1679 prefetch(skb->data);
1681 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1682 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1684 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1685 I40E_RXD_QW1_ERROR_SHIFT;
1686 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1688 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1689 I40E_RXD_QW1_PTYPE_SHIFT;
1693 /* Get the header and possibly the whole packet
1694 * If this is an skb from previous receive dma will be 0
1696 skb_put(skb, rx_packet_len);
1697 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1701 I40E_RX_INCREMENT(rx_ring, i);
1704 !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1705 rx_ring->rx_stats.non_eop_descs++;
1709 /* ERR_MASK will only have valid bits if EOP set */
1710 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1711 dev_kfree_skb_any(skb);
1715 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1716 i40e_ptype_to_hash(rx_ptype));
1717 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1718 i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1719 I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1720 I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1721 rx_ring->last_rx_timestamp = jiffies;
1724 /* probably a little skewed due to removing CRC */
1725 total_rx_bytes += skb->len;
1728 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1730 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1732 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1733 ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1736 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1737 dev_kfree_skb_any(skb);
1741 i40e_receive_skb(rx_ring, skb, vlan_tag);
1743 rx_desc->wb.qword1.status_error_len = 0;
1744 } while (likely(total_rx_packets < budget));
1746 u64_stats_update_begin(&rx_ring->syncp);
1747 rx_ring->stats.packets += total_rx_packets;
1748 rx_ring->stats.bytes += total_rx_bytes;
1749 u64_stats_update_end(&rx_ring->syncp);
1750 rx_ring->q_vector->rx.total_packets += total_rx_packets;
1751 rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1753 return total_rx_packets;
1756 static u32 i40e_buildreg_itr(const int type, const u16 itr)
1760 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1761 I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
1762 (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1763 (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1768 /* a small macro to shorten up some long lines */
1769 #define INTREG I40E_PFINT_DYN_CTLN
1772 * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1773 * @vsi: the VSI we care about
1774 * @q_vector: q_vector for which itr is being updated and interrupt enabled
1777 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1778 struct i40e_q_vector *q_vector)
1780 struct i40e_hw *hw = &vsi->back->hw;
1781 bool rx = false, tx = false;
1785 vector = (q_vector->v_idx + vsi->base_vector);
1787 rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1789 if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) {
1790 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1791 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1794 if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) {
1795 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1796 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
1800 /* get the higher of the two ITR adjustments and
1801 * use the same value for both ITR registers
1802 * when in adaptive mode (Rx and/or Tx)
1804 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1806 q_vector->tx.itr = q_vector->rx.itr = itr;
1807 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1809 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1813 /* only need to enable the interrupt once, but need
1814 * to possibly update both ITR values
1817 /* set the INTENA_MSK_MASK so that this first write
1818 * won't actually enable the interrupt, instead just
1819 * updating the ITR (it's bit 31 PF and VF)
1822 /* don't check _DOWN because interrupt isn't being enabled */
1823 wr32(hw, INTREG(vector - 1), rxval);
1826 if (!test_bit(__I40E_DOWN, &vsi->state))
1827 wr32(hw, INTREG(vector - 1), txval);
1831 * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1832 * @napi: napi struct with our devices info in it
1833 * @budget: amount of work driver is allowed to do this pass, in packets
1835 * This function will clean all queues associated with a q_vector.
1837 * Returns the amount of work done
1839 int i40e_napi_poll(struct napi_struct *napi, int budget)
1841 struct i40e_q_vector *q_vector =
1842 container_of(napi, struct i40e_q_vector, napi);
1843 struct i40e_vsi *vsi = q_vector->vsi;
1844 struct i40e_ring *ring;
1845 bool clean_complete = true;
1846 bool arm_wb = false;
1847 int budget_per_ring;
1850 if (test_bit(__I40E_DOWN, &vsi->state)) {
1851 napi_complete(napi);
1855 /* Since the actual Tx work is minimal, we can give the Tx a larger
1856 * budget and be more aggressive about cleaning up the Tx descriptors.
1858 i40e_for_each_ring(ring, q_vector->tx) {
1859 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1860 arm_wb |= ring->arm_wb;
1861 ring->arm_wb = false;
1864 /* Handle case where we are called by netpoll with a budget of 0 */
1868 /* We attempt to distribute budget to each Rx queue fairly, but don't
1869 * allow the budget to go below 1 because that would exit polling early.
1871 budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1873 i40e_for_each_ring(ring, q_vector->rx) {
1876 if (ring_is_ps_enabled(ring))
1877 cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1879 cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
1881 work_done += cleaned;
1882 /* if we didn't clean as many as budgeted, we must be done */
1883 clean_complete &= (budget_per_ring != cleaned);
1886 /* If work not completed, return budget and polling will return */
1887 if (!clean_complete) {
1890 i40e_force_wb(vsi, q_vector);
1894 if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
1895 q_vector->arm_wb_state = false;
1897 /* Work is done so exit the polling mode and re-enable the interrupt */
1898 napi_complete_done(napi, work_done);
1899 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1900 i40e_update_enable_itr(vsi, q_vector);
1901 } else { /* Legacy mode */
1902 struct i40e_hw *hw = &vsi->back->hw;
1903 /* We re-enable the queue 0 cause, but
1904 * don't worry about dynamic_enable
1905 * because we left it on for the other
1906 * possible interrupts during napi
1908 u32 qval = rd32(hw, I40E_QINT_RQCTL(0)) |
1909 I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1911 wr32(hw, I40E_QINT_RQCTL(0), qval);
1912 qval = rd32(hw, I40E_QINT_TQCTL(0)) |
1913 I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1914 wr32(hw, I40E_QINT_TQCTL(0), qval);
1915 i40e_irq_dynamic_enable_icr0(vsi->back);
1921 * i40e_atr - Add a Flow Director ATR filter
1922 * @tx_ring: ring to add programming descriptor to
1924 * @tx_flags: send tx flags
1925 * @protocol: wire protocol
1927 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1928 u32 tx_flags, __be16 protocol)
1930 struct i40e_filter_program_desc *fdir_desc;
1931 struct i40e_pf *pf = tx_ring->vsi->back;
1933 unsigned char *network;
1935 struct ipv6hdr *ipv6;
1939 u32 flex_ptype, dtype_cmd;
1942 /* make sure ATR is enabled */
1943 if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1946 if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1949 /* if sampling is disabled do nothing */
1950 if (!tx_ring->atr_sample_rate)
1953 if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
1956 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL)) {
1957 /* snag network header to get L4 type and address */
1958 hdr.network = skb_network_header(skb);
1960 /* Currently only IPv4/IPv6 with TCP is supported
1961 * access ihl as u8 to avoid unaligned access on ia64
1963 if (tx_flags & I40E_TX_FLAGS_IPV4)
1964 hlen = (hdr.network[0] & 0x0F) << 2;
1965 else if (protocol == htons(ETH_P_IPV6))
1966 hlen = sizeof(struct ipv6hdr);
1970 hdr.network = skb_inner_network_header(skb);
1971 hlen = skb_inner_network_header_len(skb);
1974 /* Currently only IPv4/IPv6 with TCP is supported
1975 * Note: tx_flags gets modified to reflect inner protocols in
1976 * tx_enable_csum function if encap is enabled.
1978 if ((tx_flags & I40E_TX_FLAGS_IPV4) &&
1979 (hdr.ipv4->protocol != IPPROTO_TCP))
1981 else if ((tx_flags & I40E_TX_FLAGS_IPV6) &&
1982 (hdr.ipv6->nexthdr != IPPROTO_TCP))
1985 th = (struct tcphdr *)(hdr.network + hlen);
1987 /* Due to lack of space, no more new filters can be programmed */
1988 if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1990 if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) {
1991 /* HW ATR eviction will take care of removing filters on FIN
1994 if (th->fin || th->rst)
1998 tx_ring->atr_count++;
2000 /* sample on all syn/fin/rst packets or once every atr sample rate */
2004 (tx_ring->atr_count < tx_ring->atr_sample_rate))
2007 tx_ring->atr_count = 0;
2009 /* grab the next descriptor */
2010 i = tx_ring->next_to_use;
2011 fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2014 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2016 flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2017 I40E_TXD_FLTR_QW0_QINDEX_MASK;
2018 flex_ptype |= (protocol == htons(ETH_P_IP)) ?
2019 (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2020 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2021 (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2022 I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2024 flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2026 dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2028 dtype_cmd |= (th->fin || th->rst) ?
2029 (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2030 I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2031 (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2032 I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2034 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2035 I40E_TXD_FLTR_QW1_DEST_SHIFT;
2037 dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2038 I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2040 dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2041 if (!(tx_flags & I40E_TX_FLAGS_VXLAN_TUNNEL))
2043 ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2044 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2045 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2048 ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2049 I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2050 I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2052 if (pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)
2053 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2055 fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2056 fdir_desc->rsvd = cpu_to_le32(0);
2057 fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2058 fdir_desc->fd_id = cpu_to_le32(0);
2062 * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2064 * @tx_ring: ring to send buffer on
2065 * @flags: the tx flags to be set
2067 * Checks the skb and set up correspondingly several generic transmit flags
2068 * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2070 * Returns error code indicate the frame should be dropped upon error and the
2071 * otherwise returns 0 to indicate the flags has been set properly.
2074 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2075 struct i40e_ring *tx_ring,
2078 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2079 struct i40e_ring *tx_ring,
2083 __be16 protocol = skb->protocol;
2086 if (protocol == htons(ETH_P_8021Q) &&
2087 !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2088 /* When HW VLAN acceleration is turned off by the user the
2089 * stack sets the protocol to 8021q so that the driver
2090 * can take any steps required to support the SW only
2091 * VLAN handling. In our case the driver doesn't need
2092 * to take any further steps so just set the protocol
2093 * to the encapsulated ethertype.
2095 skb->protocol = vlan_get_protocol(skb);
2099 /* if we have a HW VLAN tag being added, default to the HW one */
2100 if (skb_vlan_tag_present(skb)) {
2101 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2102 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2103 /* else if it is a SW VLAN, check the next protocol and store the tag */
2104 } else if (protocol == htons(ETH_P_8021Q)) {
2105 struct vlan_hdr *vhdr, _vhdr;
2107 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2111 protocol = vhdr->h_vlan_encapsulated_proto;
2112 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2113 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2116 if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2119 /* Insert 802.1p priority into VLAN header */
2120 if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2121 (skb->priority != TC_PRIO_CONTROL)) {
2122 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2123 tx_flags |= (skb->priority & 0x7) <<
2124 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2125 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2126 struct vlan_ethhdr *vhdr;
2129 rc = skb_cow_head(skb, 0);
2132 vhdr = (struct vlan_ethhdr *)skb->data;
2133 vhdr->h_vlan_TCI = htons(tx_flags >>
2134 I40E_TX_FLAGS_VLAN_SHIFT);
2136 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2146 * i40e_tso - set up the tso context descriptor
2147 * @tx_ring: ptr to the ring to send
2148 * @skb: ptr to the skb we're sending
2149 * @hdr_len: ptr to the size of the packet header
2150 * @cd_tunneling: ptr to context descriptor bits
2152 * Returns 0 if no TSO can happen, 1 if tso is going, or error
2154 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2155 u8 *hdr_len, u64 *cd_type_cmd_tso_mss,
2158 u32 cd_cmd, cd_tso_len, cd_mss;
2159 struct ipv6hdr *ipv6h;
2160 struct tcphdr *tcph;
2165 if (!skb_is_gso(skb))
2168 err = skb_cow_head(skb, 0);
2172 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
2173 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb);
2175 if (iph->version == 4) {
2176 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2179 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
2181 } else if (ipv6h->version == 6) {
2182 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
2183 ipv6h->payload_len = 0;
2184 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
2188 l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
2189 *hdr_len = (skb->encapsulation
2190 ? (skb_inner_transport_header(skb) - skb->data)
2191 : skb_transport_offset(skb)) + l4len;
2193 /* find the field values */
2194 cd_cmd = I40E_TX_CTX_DESC_TSO;
2195 cd_tso_len = skb->len - *hdr_len;
2196 cd_mss = skb_shinfo(skb)->gso_size;
2197 *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2199 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2200 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2205 * i40e_tsyn - set up the tsyn context descriptor
2206 * @tx_ring: ptr to the ring to send
2207 * @skb: ptr to the skb we're sending
2208 * @tx_flags: the collected send information
2210 * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2212 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2213 u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2217 if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2220 /* Tx timestamps cannot be sampled when doing TSO */
2221 if (tx_flags & I40E_TX_FLAGS_TSO)
2224 /* only timestamp the outbound packet if the user has requested it and
2225 * we are not already transmitting a packet to be timestamped
2227 pf = i40e_netdev_to_pf(tx_ring->netdev);
2228 if (!(pf->flags & I40E_FLAG_PTP))
2232 !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2233 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2234 pf->ptp_tx_skb = skb_get(skb);
2239 *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2240 I40E_TXD_CTX_QW1_CMD_SHIFT;
2246 * i40e_tx_enable_csum - Enable Tx checksum offloads
2248 * @tx_flags: pointer to Tx flags currently set
2249 * @td_cmd: Tx descriptor command bits to set
2250 * @td_offset: Tx descriptor header offsets to set
2251 * @cd_tunneling: ptr to context desc bits
2253 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2254 u32 *td_cmd, u32 *td_offset,
2255 struct i40e_ring *tx_ring,
2258 struct ipv6hdr *this_ipv6_hdr;
2259 unsigned int this_tcp_hdrlen;
2260 struct iphdr *this_ip_hdr;
2261 u32 network_hdr_len;
2263 struct udphdr *oudph;
2267 if (skb->encapsulation) {
2268 switch (ip_hdr(skb)->protocol) {
2270 oudph = udp_hdr(skb);
2272 l4_tunnel = I40E_TXD_CTX_UDP_TUNNELING;
2273 *tx_flags |= I40E_TX_FLAGS_VXLAN_TUNNEL;
2276 l4_tunnel = I40E_TXD_CTX_GRE_TUNNELING;
2281 network_hdr_len = skb_inner_network_header_len(skb);
2282 this_ip_hdr = inner_ip_hdr(skb);
2283 this_ipv6_hdr = inner_ipv6_hdr(skb);
2284 this_tcp_hdrlen = inner_tcp_hdrlen(skb);
2286 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2287 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2288 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
2289 ip_hdr(skb)->check = 0;
2292 I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2294 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2295 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
2296 if (*tx_flags & I40E_TX_FLAGS_TSO)
2297 ip_hdr(skb)->check = 0;
2300 /* Now set the ctx descriptor fields */
2301 *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
2302 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
2304 ((skb_inner_network_offset(skb) -
2305 skb_transport_offset(skb)) >> 1) <<
2306 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2307 if (this_ip_hdr->version == 6) {
2308 *tx_flags &= ~I40E_TX_FLAGS_IPV4;
2309 *tx_flags |= I40E_TX_FLAGS_IPV6;
2311 if ((tx_ring->flags & I40E_TXR_FLAGS_OUTER_UDP_CSUM) &&
2312 (l4_tunnel == I40E_TXD_CTX_UDP_TUNNELING) &&
2313 (*cd_tunneling & I40E_TXD_CTX_QW0_EXT_IP_MASK)) {
2314 oudph->check = ~csum_tcpudp_magic(oiph->saddr,
2316 (skb->len - skb_transport_offset(skb)),
2318 *cd_tunneling |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2321 network_hdr_len = skb_network_header_len(skb);
2322 this_ip_hdr = ip_hdr(skb);
2323 this_ipv6_hdr = ipv6_hdr(skb);
2324 this_tcp_hdrlen = tcp_hdrlen(skb);
2327 /* Enable IP checksum offloads */
2328 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2329 l4_hdr = this_ip_hdr->protocol;
2330 /* the stack computes the IP header already, the only time we
2331 * need the hardware to recompute it is in the case of TSO.
2333 if (*tx_flags & I40E_TX_FLAGS_TSO) {
2334 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
2335 this_ip_hdr->check = 0;
2337 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
2339 /* Now set the td_offset for IP header length */
2340 *td_offset = (network_hdr_len >> 2) <<
2341 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2342 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2343 l4_hdr = this_ipv6_hdr->nexthdr;
2344 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2345 /* Now set the td_offset for IP header length */
2346 *td_offset = (network_hdr_len >> 2) <<
2347 I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2349 /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
2350 *td_offset |= (skb_network_offset(skb) >> 1) <<
2351 I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2353 /* Enable L4 checksum offloads */
2356 /* enable checksum offloads */
2357 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2358 *td_offset |= (this_tcp_hdrlen >> 2) <<
2359 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2362 /* enable SCTP checksum offload */
2363 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2364 *td_offset |= (sizeof(struct sctphdr) >> 2) <<
2365 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2368 /* enable UDP checksum offload */
2369 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2370 *td_offset |= (sizeof(struct udphdr) >> 2) <<
2371 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2379 * i40e_create_tx_ctx Build the Tx context descriptor
2380 * @tx_ring: ring to create the descriptor on
2381 * @cd_type_cmd_tso_mss: Quad Word 1
2382 * @cd_tunneling: Quad Word 0 - bits 0-31
2383 * @cd_l2tag2: Quad Word 0 - bits 32-63
2385 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2386 const u64 cd_type_cmd_tso_mss,
2387 const u32 cd_tunneling, const u32 cd_l2tag2)
2389 struct i40e_tx_context_desc *context_desc;
2390 int i = tx_ring->next_to_use;
2392 if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2393 !cd_tunneling && !cd_l2tag2)
2396 /* grab the next descriptor */
2397 context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2400 tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2402 /* cpu_to_le32 and assign to struct fields */
2403 context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2404 context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2405 context_desc->rsvd = cpu_to_le16(0);
2406 context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2410 * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2411 * @tx_ring: the ring to be checked
2412 * @size: the size buffer we want to assure is available
2414 * Returns -EBUSY if a stop is needed, else 0
2416 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2418 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2419 /* Memory barrier before checking head and tail */
2422 /* Check again in a case another CPU has just made room available. */
2423 if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2426 /* A reprieve! - use start_queue because it doesn't call schedule */
2427 netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2428 ++tx_ring->tx_stats.restart_queue;
2433 * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2434 * @tx_ring: the ring to be checked
2435 * @size: the size buffer we want to assure is available
2437 * Returns 0 if stop is not needed
2440 inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2442 static inline int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2445 if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2447 return __i40e_maybe_stop_tx(tx_ring, size);
2451 * i40e_chk_linearize - Check if there are more than 8 fragments per packet
2453 * @tx_flags: collected send information
2455 * Note: Our HW can't scatter-gather more than 8 fragments to build
2456 * a packet on the wire and so we need to figure out the cases where we
2457 * need to linearize the skb.
2459 static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
2461 struct skb_frag_struct *frag;
2462 bool linearize = false;
2463 unsigned int size = 0;
2467 num_frags = skb_shinfo(skb)->nr_frags;
2468 gso_segs = skb_shinfo(skb)->gso_segs;
2470 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
2473 if (num_frags < (I40E_MAX_BUFFER_TXD))
2474 goto linearize_chk_done;
2475 /* try the simple math, if we have too many frags per segment */
2476 if (DIV_ROUND_UP((num_frags + gso_segs), gso_segs) >
2477 I40E_MAX_BUFFER_TXD) {
2479 goto linearize_chk_done;
2481 frag = &skb_shinfo(skb)->frags[0];
2482 /* we might still have more fragments per segment */
2484 size += skb_frag_size(frag);
2486 if ((size >= skb_shinfo(skb)->gso_size) &&
2487 (j < I40E_MAX_BUFFER_TXD)) {
2488 size = (size % skb_shinfo(skb)->gso_size);
2491 if (j == I40E_MAX_BUFFER_TXD) {
2496 } while (num_frags);
2498 if (num_frags >= I40E_MAX_BUFFER_TXD)
2507 * i40e_tx_map - Build the Tx descriptor
2508 * @tx_ring: ring to send buffer on
2510 * @first: first buffer info buffer to use
2511 * @tx_flags: collected send information
2512 * @hdr_len: size of the packet header
2513 * @td_cmd: the command field in the descriptor
2514 * @td_offset: offset for checksum or crc
2517 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2518 struct i40e_tx_buffer *first, u32 tx_flags,
2519 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2521 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2522 struct i40e_tx_buffer *first, u32 tx_flags,
2523 const u8 hdr_len, u32 td_cmd, u32 td_offset)
2526 unsigned int data_len = skb->data_len;
2527 unsigned int size = skb_headlen(skb);
2528 struct skb_frag_struct *frag;
2529 struct i40e_tx_buffer *tx_bi;
2530 struct i40e_tx_desc *tx_desc;
2531 u16 i = tx_ring->next_to_use;
2536 bool tail_bump = true;
2539 if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2540 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2541 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2542 I40E_TX_FLAGS_VLAN_SHIFT;
2545 if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2546 gso_segs = skb_shinfo(skb)->gso_segs;
2550 /* multiply data chunks by size of headers */
2551 first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2552 first->gso_segs = gso_segs;
2554 first->tx_flags = tx_flags;
2556 dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2558 tx_desc = I40E_TX_DESC(tx_ring, i);
2561 for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2562 if (dma_mapping_error(tx_ring->dev, dma))
2565 /* record length, and DMA address */
2566 dma_unmap_len_set(tx_bi, len, size);
2567 dma_unmap_addr_set(tx_bi, dma, dma);
2569 tx_desc->buffer_addr = cpu_to_le64(dma);
2571 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2572 tx_desc->cmd_type_offset_bsz =
2573 build_ctob(td_cmd, td_offset,
2574 I40E_MAX_DATA_PER_TXD, td_tag);
2580 if (i == tx_ring->count) {
2581 tx_desc = I40E_TX_DESC(tx_ring, 0);
2585 dma += I40E_MAX_DATA_PER_TXD;
2586 size -= I40E_MAX_DATA_PER_TXD;
2588 tx_desc->buffer_addr = cpu_to_le64(dma);
2591 if (likely(!data_len))
2594 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2601 if (i == tx_ring->count) {
2602 tx_desc = I40E_TX_DESC(tx_ring, 0);
2606 size = skb_frag_size(frag);
2609 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2612 tx_bi = &tx_ring->tx_bi[i];
2615 /* set next_to_watch value indicating a packet is present */
2616 first->next_to_watch = tx_desc;
2619 if (i == tx_ring->count)
2622 tx_ring->next_to_use = i;
2624 netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2625 tx_ring->queue_index),
2627 i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2629 /* Algorithm to optimize tail and RS bit setting:
2630 * if xmit_more is supported
2631 * if xmit_more is true
2632 * do not update tail and do not mark RS bit.
2633 * if xmit_more is false and last xmit_more was false
2634 * if every packet spanned less than 4 desc
2635 * then set RS bit on 4th packet and update tail
2638 * update tail and set RS bit on every packet.
2639 * if xmit_more is false and last_xmit_more was true
2640 * update tail and set RS bit.
2642 * Optimization: wmb to be issued only in case of tail update.
2643 * Also optimize the Descriptor WB path for RS bit with the same
2646 * Note: If there are less than 4 packets
2647 * pending and interrupts were disabled the service task will
2648 * trigger a force WB.
2650 if (skb->xmit_more &&
2651 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2652 tx_ring->queue_index))) {
2653 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2655 } else if (!skb->xmit_more &&
2656 !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2657 tx_ring->queue_index)) &&
2658 (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2659 (tx_ring->packet_stride < WB_STRIDE) &&
2660 (desc_count < WB_STRIDE)) {
2661 tx_ring->packet_stride++;
2663 tx_ring->packet_stride = 0;
2664 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2668 tx_ring->packet_stride = 0;
2670 tx_desc->cmd_type_offset_bsz =
2671 build_ctob(td_cmd, td_offset, size, td_tag) |
2672 cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2673 I40E_TX_DESC_CMD_EOP) <<
2674 I40E_TXD_QW1_CMD_SHIFT);
2676 /* notify HW of packet */
2678 prefetchw(tx_desc + 1);
2681 /* Force memory writes to complete before letting h/w
2682 * know there are new descriptors to fetch. (Only
2683 * applicable for weak-ordered memory model archs,
2687 writel(i, tx_ring->tail);
2693 dev_info(tx_ring->dev, "TX DMA map failed\n");
2695 /* clear dma mappings for failed tx_bi map */
2697 tx_bi = &tx_ring->tx_bi[i];
2698 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2706 tx_ring->next_to_use = i;
2710 * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2712 * @tx_ring: ring to send buffer on
2714 * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2715 * there is not enough descriptors available in this ring since we need at least
2719 inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2720 struct i40e_ring *tx_ring)
2722 static inline int i40e_xmit_descriptor_count(struct sk_buff *skb,
2723 struct i40e_ring *tx_ring)
2729 /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2730 * + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2731 * + 4 desc gap to avoid the cache line where head is,
2732 * + 1 desc for context descriptor,
2733 * otherwise try next time
2735 for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2736 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2738 count += TXD_USE_COUNT(skb_headlen(skb));
2739 if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2740 tx_ring->tx_stats.tx_busy++;
2747 * i40e_xmit_frame_ring - Sends buffer on Tx ring
2749 * @tx_ring: ring to send buffer on
2751 * Returns NETDEV_TX_OK if sent, else an error code
2753 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2754 struct i40e_ring *tx_ring)
2756 u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2757 u32 cd_tunneling = 0, cd_l2tag2 = 0;
2758 struct i40e_tx_buffer *first;
2767 if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2768 return NETDEV_TX_BUSY;
2770 /* prepare the xmit flags */
2771 if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2774 /* obtain protocol of skb */
2775 protocol = vlan_get_protocol(skb);
2777 /* record the location of the first descriptor for this packet */
2778 first = &tx_ring->tx_bi[tx_ring->next_to_use];
2780 /* setup IPv4/IPv6 offloads */
2781 if (protocol == htons(ETH_P_IP))
2782 tx_flags |= I40E_TX_FLAGS_IPV4;
2783 else if (protocol == htons(ETH_P_IPV6))
2784 tx_flags |= I40E_TX_FLAGS_IPV6;
2786 tso = i40e_tso(tx_ring, skb, &hdr_len,
2787 &cd_type_cmd_tso_mss, &cd_tunneling);
2792 tx_flags |= I40E_TX_FLAGS_TSO;
2794 tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2797 tx_flags |= I40E_TX_FLAGS_TSYN;
2799 if (i40e_chk_linearize(skb, tx_flags)) {
2800 if (skb_linearize(skb))
2802 tx_ring->tx_stats.tx_linearize++;
2804 skb_tx_timestamp(skb);
2806 /* always enable CRC insertion offload */
2807 td_cmd |= I40E_TX_DESC_CMD_ICRC;
2809 /* Always offload the checksum, since it's in the data descriptor */
2810 if (skb->ip_summed == CHECKSUM_PARTIAL) {
2811 tx_flags |= I40E_TX_FLAGS_CSUM;
2813 i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2814 tx_ring, &cd_tunneling);
2817 i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2818 cd_tunneling, cd_l2tag2);
2820 /* Add Flow Director ATR if it's enabled.
2822 * NOTE: this must always be directly before the data descriptor.
2824 i40e_atr(tx_ring, skb, tx_flags, protocol);
2826 i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2829 return NETDEV_TX_OK;
2832 dev_kfree_skb_any(skb);
2833 return NETDEV_TX_OK;
2837 * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2839 * @netdev: network interface device structure
2841 * Returns NETDEV_TX_OK if sent, else an error code
2843 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2845 struct i40e_netdev_priv *np = netdev_priv(netdev);
2846 struct i40e_vsi *vsi = np->vsi;
2847 struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2849 /* hardware can't handle really short frames, hardware padding works
2852 if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2853 return NETDEV_TX_OK;
2855 return i40e_xmit_frame_ring(skb, tx_ring);