]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/net/ethernet/intel/i40e/i40e_txrx.c
Merge tag 'for-linus-20170812' of git://git.infradead.org/linux-mtd
[karo-tx-linux.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
index 77115c25d96fa88e8348bd0e891d03ca964b4d12..2194960d5855c6576ec03c870479344b099ce12b 100644 (file)
@@ -26,6 +26,7 @@
 
 #include <linux/prefetch.h>
 #include <net/busy_poll.h>
+#include <linux/bpf_trace.h>
 #include "i40e.h"
 #include "i40e_trace.h"
 #include "i40e_prototype.h"
@@ -629,6 +630,8 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
        if (tx_buffer->skb) {
                if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
                        kfree(tx_buffer->raw_buf);
+               else if (ring_is_xdp(ring))
+                       page_frag_free(tx_buffer->raw_buf);
                else
                        dev_kfree_skb_any(tx_buffer->skb);
                if (dma_unmap_len(tx_buffer, len))
@@ -770,8 +773,11 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                total_bytes += tx_buf->bytecount;
                total_packets += tx_buf->gso_segs;
 
-               /* free the skb */
-               napi_consume_skb(tx_buf->skb, napi_budget);
+               /* free the skb/XDP data */
+               if (ring_is_xdp(tx_ring))
+                       page_frag_free(tx_buf->raw_buf);
+               else
+                       napi_consume_skb(tx_buf->skb, napi_budget);
 
                /* unmap skb header data */
                dma_unmap_single(tx_ring->dev,
@@ -847,6 +853,9 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
                        tx_ring->arm_wb = true;
        }
 
+       if (ring_is_xdp(tx_ring))
+               return !!budget;
+
        /* notify netdev of completed buffers */
        netdev_tx_completed_queue(txring_txq(tx_ring),
                                  total_packets, total_bytes);
@@ -1104,6 +1113,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
        if (!tx_ring->tx_bi)
                goto err;
 
+       u64_stats_init(&tx_ring->syncp);
+
        /* round up to nearest 4K */
        tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
        /* add u32 for head writeback, align after this takes care of
@@ -1195,6 +1206,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 {
        i40e_clean_rx_ring(rx_ring);
+       rx_ring->xdp_prog = NULL;
        kfree(rx_ring->rx_bi);
        rx_ring->rx_bi = NULL;
 
@@ -1241,6 +1253,8 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
        rx_ring->next_to_clean = 0;
        rx_ring->next_to_use = 0;
 
+       rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
+
        return 0;
 err:
        kfree(rx_ring->rx_bi);
@@ -1593,6 +1607,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
  * i40e_cleanup_headers - Correct empty headers
  * @rx_ring: rx descriptor ring packet is being transacted on
  * @skb: pointer to current skb being fixed
+ * @rx_desc: pointer to the EOP Rx descriptor
  *
  * Also address the case where we are pulling data in on pages only
  * and as such no data is present in the skb header.
@@ -1602,8 +1617,25 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
  *
  * Returns true if an error was encountered and skb was freed.
  **/
-static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
+static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
+                                union i40e_rx_desc *rx_desc)
+
 {
+       /* XDP packets use error pointer so abort at this point */
+       if (IS_ERR(skb))
+               return true;
+
+       /* ERR_MASK will only have valid bits if EOP set, and
+        * what we are doing here is actually checking
+        * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+        * the error field
+        */
+       if (unlikely(i40e_test_staterr(rx_desc,
+                                      BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
+               dev_kfree_skb_any(skb);
+               return true;
+       }
+
        /* if eth_skb_pad returns an error the skb was freed */
        if (eth_skb_pad(skb))
                return true;
@@ -1776,7 +1808,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
  * i40e_construct_skb - Allocate skb and populate it
  * @rx_ring: rx descriptor ring to transact packets on
  * @rx_buffer: rx buffer to pull data from
- * @size: size of buffer to add to skb
+ * @xdp: xdp_buff pointing to the data
  *
  * This function allocates an skb.  It then populates it with the page
  * data from the current receive descriptor, taking care to set up the
@@ -1784,9 +1816,9 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
  */
 static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
                                          struct i40e_rx_buffer *rx_buffer,
-                                         unsigned int size)
+                                         struct xdp_buff *xdp)
 {
-       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+       unsigned int size = xdp->data_end - xdp->data;
 #if (PAGE_SIZE < 8192)
        unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
@@ -1796,9 +1828,9 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
        struct sk_buff *skb;
 
        /* prefetch first cache line of first page */
-       prefetch(va);
+       prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-       prefetch(va + L1_CACHE_BYTES);
+       prefetch(xdp->data + L1_CACHE_BYTES);
 #endif
 
        /* allocate a skb to store the frags */
@@ -1811,10 +1843,11 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
        /* Determine available headroom for copy */
        headlen = size;
        if (headlen > I40E_RX_HDR_SIZE)
-               headlen = eth_get_headlen(va, I40E_RX_HDR_SIZE);
+               headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE);
 
        /* align pull length to size of long to optimize memcpy performance */
-       memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
+       memcpy(__skb_put(skb, headlen), xdp->data,
+              ALIGN(headlen, sizeof(long)));
 
        /* update all of the pointers */
        size -= headlen;
@@ -1841,16 +1874,16 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring,
  * i40e_build_skb - Build skb around an existing buffer
  * @rx_ring: Rx descriptor ring to transact packets on
  * @rx_buffer: Rx buffer to pull data from
- * @size: size of buffer to add to skb
+ * @xdp: xdp_buff pointing to the data
  *
  * This function builds an skb around an existing Rx buffer, taking care
  * to set up the skb correctly and avoid any memcpy overhead.
  */
 static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
                                      struct i40e_rx_buffer *rx_buffer,
-                                     unsigned int size)
+                                     struct xdp_buff *xdp)
 {
-       void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
+       unsigned int size = xdp->data_end - xdp->data;
 #if (PAGE_SIZE < 8192)
        unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
@@ -1860,12 +1893,12 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
        struct sk_buff *skb;
 
        /* prefetch first cache line of first page */
-       prefetch(va);
+       prefetch(xdp->data);
 #if L1_CACHE_BYTES < 128
-       prefetch(va + L1_CACHE_BYTES);
+       prefetch(xdp->data + L1_CACHE_BYTES);
 #endif
        /* build an skb around the page buffer */
-       skb = build_skb(va - I40E_SKB_PAD, truesize);
+       skb = build_skb(xdp->data_hard_start, truesize);
        if (unlikely(!skb))
                return NULL;
 
@@ -1944,6 +1977,75 @@ static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
        return true;
 }
 
+#define I40E_XDP_PASS 0
+#define I40E_XDP_CONSUMED 1
+#define I40E_XDP_TX 2
+
+static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
+                             struct i40e_ring *xdp_ring);
+
+/**
+ * i40e_run_xdp - run an XDP program
+ * @rx_ring: Rx ring being processed
+ * @xdp: XDP buffer containing the frame
+ **/
+static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
+                                   struct xdp_buff *xdp)
+{
+       int result = I40E_XDP_PASS;
+       struct i40e_ring *xdp_ring;
+       struct bpf_prog *xdp_prog;
+       u32 act;
+
+       rcu_read_lock();
+       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+       if (!xdp_prog)
+               goto xdp_out;
+
+       act = bpf_prog_run_xdp(xdp_prog, xdp);
+       switch (act) {
+       case XDP_PASS:
+               break;
+       case XDP_TX:
+               xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+               result = i40e_xmit_xdp_ring(xdp, xdp_ring);
+               break;
+       default:
+               bpf_warn_invalid_xdp_action(act);
+       case XDP_ABORTED:
+               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+               /* fallthrough -- handle aborts by dropping packet */
+       case XDP_DROP:
+               result = I40E_XDP_CONSUMED;
+               break;
+       }
+xdp_out:
+       rcu_read_unlock();
+       return ERR_PTR(-result);
+}
+
+/**
+ * i40e_rx_buffer_flip - adjusted rx_buffer to point to an unused region
+ * @rx_ring: Rx ring
+ * @rx_buffer: Rx buffer to adjust
+ * @size: Size of adjustment
+ **/
+static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
+                               struct i40e_rx_buffer *rx_buffer,
+                               unsigned int size)
+{
+#if (PAGE_SIZE < 8192)
+       unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
+
+       rx_buffer->page_offset ^= truesize;
+#else
+       unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
+
+       rx_buffer->page_offset += truesize;
+#endif
+}
+
 /**
  * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @rx_ring: rx descriptor ring to transact packets on
@@ -1961,11 +2063,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
        unsigned int total_rx_bytes = 0, total_rx_packets = 0;
        struct sk_buff *skb = rx_ring->skb;
        u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
-       bool failure = false;
+       bool failure = false, xdp_xmit = false;
 
        while (likely(total_rx_packets < budget)) {
                struct i40e_rx_buffer *rx_buffer;
                union i40e_rx_desc *rx_desc;
+               struct xdp_buff xdp;
                unsigned int size;
                u16 vlan_tag;
                u8 rx_ptype;
@@ -2006,12 +2109,32 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                rx_buffer = i40e_get_rx_buffer(rx_ring, size);
 
                /* retrieve a buffer from the ring */
-               if (skb)
+               if (!skb) {
+                       xdp.data = page_address(rx_buffer->page) +
+                                  rx_buffer->page_offset;
+                       xdp.data_hard_start = xdp.data -
+                                             i40e_rx_offset(rx_ring);
+                       xdp.data_end = xdp.data + size;
+
+                       skb = i40e_run_xdp(rx_ring, &xdp);
+               }
+
+               if (IS_ERR(skb)) {
+                       if (PTR_ERR(skb) == -I40E_XDP_TX) {
+                               xdp_xmit = true;
+                               i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
+                       } else {
+                               rx_buffer->pagecnt_bias++;
+                       }
+                       total_rx_bytes += size;
+                       total_rx_packets++;
+               } else if (skb) {
                        i40e_add_rx_frag(rx_ring, rx_buffer, skb, size);
-               else if (ring_uses_build_skb(rx_ring))
-                       skb = i40e_build_skb(rx_ring, rx_buffer, size);
-               else
-                       skb = i40e_construct_skb(rx_ring, rx_buffer, size);
+               } else if (ring_uses_build_skb(rx_ring)) {
+                       skb = i40e_build_skb(rx_ring, rx_buffer, &xdp);
+               } else {
+                       skb = i40e_construct_skb(rx_ring, rx_buffer, &xdp);
+               }
 
                /* exit if we failed to retrieve a buffer */
                if (!skb) {
@@ -2026,18 +2149,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                if (i40e_is_non_eop(rx_ring, rx_desc, skb))
                        continue;
 
-               /* ERR_MASK will only have valid bits if EOP set, and
-                * what we are doing here is actually checking
-                * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
-                * the error field
-                */
-               if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
-                       dev_kfree_skb_any(skb);
-                       skb = NULL;
-                       continue;
-               }
-
-               if (i40e_cleanup_headers(rx_ring, skb)) {
+               if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
                        skb = NULL;
                        continue;
                }
@@ -2063,6 +2175,19 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
                total_rx_packets++;
        }
 
+       if (xdp_xmit) {
+               struct i40e_ring *xdp_ring;
+
+               xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
+
+               /* Force memory writes to complete before letting h/w
+                * know there are new descriptors to fetch.
+                */
+               wmb();
+
+               writel(xdp_ring->next_to_use, xdp_ring->tail);
+       }
+
        rx_ring->skb = skb;
 
        u64_stats_update_begin(&rx_ring->syncp);
@@ -2629,8 +2754,10 @@ static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
        if (pf->ptp_tx &&
            !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, pf->state)) {
                skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+               pf->ptp_tx_start = jiffies;
                pf->ptp_tx_skb = skb_get(skb);
        } else {
+               pf->tx_hwtstamp_skipped++;
                return 0;
        }
 
@@ -2933,10 +3060,12 @@ bool __i40e_chk_linearize(struct sk_buff *skb)
  * @hdr_len:  size of the packet header
  * @td_cmd:   the command field in the descriptor
  * @td_offset: offset for checksum or crc
+ *
+ * Returns 0 on success, -1 on failure to DMA
  **/
-static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
-                              struct i40e_tx_buffer *first, u32 tx_flags,
-                              const u8 hdr_len, u32 td_cmd, u32 td_offset)
+static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
+                             struct i40e_tx_buffer *first, u32 tx_flags,
+                             const u8 hdr_len, u32 td_cmd, u32 td_offset)
 {
        unsigned int data_len = skb->data_len;
        unsigned int size = skb_headlen(skb);
@@ -3094,7 +3223,7 @@ do_rs:
                mmiowb();
        }
 
-       return;
+       return 0;
 
 dma_error:
        dev_info(tx_ring->dev, "TX DMA map failed\n");
@@ -3111,6 +3240,61 @@ dma_error:
        }
 
        tx_ring->next_to_use = i;
+
+       return -1;
+}
+
+/**
+ * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring
+ * @xdp: data to transmit
+ * @xdp_ring: XDP Tx ring
+ **/
+static int i40e_xmit_xdp_ring(struct xdp_buff *xdp,
+                             struct i40e_ring *xdp_ring)
+{
+       u32 size = xdp->data_end - xdp->data;
+       u16 i = xdp_ring->next_to_use;
+       struct i40e_tx_buffer *tx_bi;
+       struct i40e_tx_desc *tx_desc;
+       dma_addr_t dma;
+
+       if (!unlikely(I40E_DESC_UNUSED(xdp_ring))) {
+               xdp_ring->tx_stats.tx_busy++;
+               return I40E_XDP_CONSUMED;
+       }
+
+       dma = dma_map_single(xdp_ring->dev, xdp->data, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(xdp_ring->dev, dma))
+               return I40E_XDP_CONSUMED;
+
+       tx_bi = &xdp_ring->tx_bi[i];
+       tx_bi->bytecount = size;
+       tx_bi->gso_segs = 1;
+       tx_bi->raw_buf = xdp->data;
+
+       /* record length, and DMA address */
+       dma_unmap_len_set(tx_bi, len, size);
+       dma_unmap_addr_set(tx_bi, dma, dma);
+
+       tx_desc = I40E_TX_DESC(xdp_ring, i);
+       tx_desc->buffer_addr = cpu_to_le64(dma);
+       tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC
+                                                 | I40E_TXD_CMD,
+                                                 0, size, 0);
+
+       /* Make certain all of the status bits have been updated
+        * before next_to_watch is written.
+        */
+       smp_wmb();
+
+       i++;
+       if (i == xdp_ring->count)
+               i = 0;
+
+       tx_bi->next_to_watch = tx_desc;
+       xdp_ring->next_to_use = i;
+
+       return I40E_XDP_TX;
 }
 
 /**
@@ -3211,8 +3395,9 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
         */
        i40e_atr(tx_ring, skb, tx_flags);
 
-       i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
-                   td_cmd, td_offset);
+       if (i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
+                       td_cmd, td_offset))
+               goto cleanup_tx_tstamp;
 
        return NETDEV_TX_OK;
 
@@ -3220,6 +3405,15 @@ out_drop:
        i40e_trace(xmit_frame_ring_drop, first->skb, tx_ring);
        dev_kfree_skb_any(first->skb);
        first->skb = NULL;
+cleanup_tx_tstamp:
+       if (unlikely(tx_flags & I40E_TX_FLAGS_TSYN)) {
+               struct i40e_pf *pf = i40e_netdev_to_pf(tx_ring->netdev);
+
+               dev_kfree_skb_any(pf->ptp_tx_skb);
+               pf->ptp_tx_skb = NULL;
+               clear_bit_unlock(__I40E_PTP_TX_IN_PROGRESS, pf->state);
+       }
+
        return NETDEV_TX_OK;
 }