]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branch 'xen-netback-control-ring'
authorDavid S. Miller <davem@davemloft.net>
Mon, 16 May 2016 17:35:57 +0000 (13:35 -0400)
committerDavid S. Miller <davem@davemloft.net>
Mon, 16 May 2016 17:35:57 +0000 (13:35 -0400)
Paul Durrant says:

====================
xen-netback: support for control ring

My recent patch to import an up-to-date include/xen/interface/io/netif.h
from the Xen Project brought in the necessary definitions to support the
new control shared ring and protocol. This patch series updates xen-netback
to support the new ring.

Patch #1 adds the necessary boilerplate to map the control ring and handle
messages. No implementation of the new protocol is included in this patch
so that it can be kept to a reasonable size.

Patch #2 adds the protocol implementation.

Patch #3 adds support for passing has values calculated by xen-netback to
capable frontends.

Patch #4 adds support for accepting hash values calculated by capable
frontends and using them the set the socket buffer hash.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/xen-netback/Makefile
drivers/net/xen-netback/common.h
drivers/net/xen-netback/hash.c [new file with mode: 0644]
drivers/net/xen-netback/interface.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netback/xenbus.c

index e346e8125ef5d4d1797199853f7247043251ee6c..11e02be9db1a0b1ed5e6116bb2d1c3dd3b68fda4 100644 (file)
@@ -1,3 +1,3 @@
 obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
 
-xen-netback-y := netback.o xenbus.o interface.o
+xen-netback-y := netback.o xenbus.o interface.o hash.o
index f44b388464208d25b4d11613a2f8b16862256ec4..84d6cbdd11b2d870c9e6be8a470402bd84ebfb8b 100644 (file)
@@ -220,6 +220,35 @@ struct xenvif_mcast_addr {
 
 #define XEN_NETBK_MCAST_MAX 64
 
+#define XEN_NETBK_MAX_HASH_KEY_SIZE 40
+#define XEN_NETBK_MAX_HASH_MAPPING_SIZE 128
+#define XEN_NETBK_HASH_TAG_SIZE 40
+
+struct xenvif_hash_cache_entry {
+       struct list_head link;
+       struct rcu_head rcu;
+       u8 tag[XEN_NETBK_HASH_TAG_SIZE];
+       unsigned int len;
+       u32 val;
+       int seq;
+};
+
+struct xenvif_hash_cache {
+       spinlock_t lock;
+       struct list_head list;
+       unsigned int count;
+       atomic_t seq;
+};
+
+struct xenvif_hash {
+       unsigned int alg;
+       u32 flags;
+       u8 key[XEN_NETBK_MAX_HASH_KEY_SIZE];
+       u32 mapping[XEN_NETBK_MAX_HASH_MAPPING_SIZE];
+       unsigned int size;
+       struct xenvif_hash_cache cache;
+};
+
 struct xenvif {
        /* Unique identifier for this interface. */
        domid_t          domid;
@@ -251,6 +280,8 @@ struct xenvif {
        unsigned int num_queues; /* active queues, resource allocated */
        unsigned int stalled_queues;
 
+       struct xenvif_hash hash;
+
        struct xenbus_watch credit_watch;
        struct xenbus_watch mcast_ctrl_watch;
 
@@ -260,6 +291,11 @@ struct xenvif {
        struct dentry *xenvif_dbg_root;
 #endif
 
+       struct xen_netif_ctrl_back_ring ctrl;
+       struct task_struct *ctrl_task;
+       wait_queue_head_t ctrl_wq;
+       unsigned int ctrl_irq;
+
        /* Miscellaneous private stuff. */
        struct net_device *dev;
 };
@@ -285,10 +321,15 @@ struct xenvif *xenvif_alloc(struct device *parent,
 int xenvif_init_queue(struct xenvif_queue *queue);
 void xenvif_deinit_queue(struct xenvif_queue *queue);
 
-int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
-                  unsigned long rx_ring_ref, unsigned int tx_evtchn,
-                  unsigned int rx_evtchn);
-void xenvif_disconnect(struct xenvif *vif);
+int xenvif_connect_data(struct xenvif_queue *queue,
+                       unsigned long tx_ring_ref,
+                       unsigned long rx_ring_ref,
+                       unsigned int tx_evtchn,
+                       unsigned int rx_evtchn);
+void xenvif_disconnect_data(struct xenvif *vif);
+int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
+                       unsigned int evtchn);
+void xenvif_disconnect_ctrl(struct xenvif *vif);
 void xenvif_free(struct xenvif *vif);
 
 int xenvif_xenbus_init(void);
@@ -300,10 +341,10 @@ int xenvif_queue_stopped(struct xenvif_queue *queue);
 void xenvif_wake_queue(struct xenvif_queue *queue);
 
 /* (Un)Map communication rings. */
-void xenvif_unmap_frontend_rings(struct xenvif_queue *queue);
-int xenvif_map_frontend_rings(struct xenvif_queue *queue,
-                             grant_ref_t tx_ring_ref,
-                             grant_ref_t rx_ring_ref);
+void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue);
+int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
+                                  grant_ref_t tx_ring_ref,
+                                  grant_ref_t rx_ring_ref);
 
 /* Check for SKBs from frontend and schedule backend processing */
 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue);
@@ -318,6 +359,8 @@ void xenvif_kick_thread(struct xenvif_queue *queue);
 
 int xenvif_dealloc_kthread(void *data);
 
+int xenvif_ctrl_kthread(void *data);
+
 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
 
 void xenvif_carrier_on(struct xenvif *vif);
@@ -341,6 +384,7 @@ extern bool separate_tx_rx_irq;
 extern unsigned int rx_drain_timeout_msecs;
 extern unsigned int rx_stall_timeout_msecs;
 extern unsigned int xenvif_max_queues;
+extern unsigned int xenvif_hash_cache_size;
 
 #ifdef CONFIG_DEBUG_FS
 extern struct dentry *xen_netback_dbg_root;
@@ -354,4 +398,18 @@ void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
 bool xenvif_mcast_match(struct xenvif *vif, const u8 *addr);
 void xenvif_mcast_addr_list_free(struct xenvif *vif);
 
+/* Hash */
+void xenvif_init_hash(struct xenvif *vif);
+void xenvif_deinit_hash(struct xenvif *vif);
+
+u32 xenvif_set_hash_alg(struct xenvif *vif, u32 alg);
+u32 xenvif_get_hash_flags(struct xenvif *vif, u32 *flags);
+u32 xenvif_set_hash_flags(struct xenvif *vif, u32 flags);
+u32 xenvif_set_hash_key(struct xenvif *vif, u32 gref, u32 len);
+u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size);
+u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
+                           u32 off);
+
+void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb);
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c
new file mode 100644 (file)
index 0000000..392e392
--- /dev/null
@@ -0,0 +1,384 @@
+/*
+ * Copyright (c) 2016 Citrix Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Softare Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define XEN_NETIF_DEFINE_TOEPLITZ
+
+#include "common.h"
+#include <linux/vmalloc.h>
+#include <linux/rculist.h>
+
+static void xenvif_del_hash(struct rcu_head *rcu)
+{
+       struct xenvif_hash_cache_entry *entry;
+
+       entry = container_of(rcu, struct xenvif_hash_cache_entry, rcu);
+
+       kfree(entry);
+}
+
+static void xenvif_add_hash(struct xenvif *vif, const u8 *tag,
+                           unsigned int len, u32 val)
+{
+       struct xenvif_hash_cache_entry *new, *entry, *oldest;
+       unsigned long flags;
+       bool found;
+
+       new = kmalloc(sizeof(*entry), GFP_KERNEL);
+       if (!new)
+               return;
+
+       memcpy(new->tag, tag, len);
+       new->len = len;
+       new->val = val;
+
+       spin_lock_irqsave(&vif->hash.cache.lock, flags);
+
+       found = false;
+       oldest = NULL;
+       list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) {
+               /* Make sure we don't add duplicate entries */
+               if (entry->len == len &&
+                   memcmp(entry->tag, tag, len) == 0)
+                       found = true;
+               if (!oldest || entry->seq < oldest->seq)
+                       oldest = entry;
+       }
+
+       if (!found) {
+               new->seq = atomic_inc_return(&vif->hash.cache.seq);
+               list_add_rcu(&new->link, &vif->hash.cache.list);
+
+               if (++vif->hash.cache.count > xenvif_hash_cache_size) {
+                       list_del_rcu(&oldest->link);
+                       vif->hash.cache.count--;
+                       call_rcu(&oldest->rcu, xenvif_del_hash);
+               }
+       }
+
+       spin_unlock_irqrestore(&vif->hash.cache.lock, flags);
+
+       if (found)
+               kfree(new);
+}
+
+static u32 xenvif_new_hash(struct xenvif *vif, const u8 *data,
+                          unsigned int len)
+{
+       u32 val;
+
+       val = xen_netif_toeplitz_hash(vif->hash.key,
+                                     sizeof(vif->hash.key),
+                                     data, len);
+
+       if (xenvif_hash_cache_size != 0)
+               xenvif_add_hash(vif, data, len, val);
+
+       return val;
+}
+
+static void xenvif_flush_hash(struct xenvif *vif)
+{
+       struct xenvif_hash_cache_entry *entry;
+       unsigned long flags;
+
+       if (xenvif_hash_cache_size == 0)
+               return;
+
+       spin_lock_irqsave(&vif->hash.cache.lock, flags);
+
+       list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) {
+               list_del_rcu(&entry->link);
+               vif->hash.cache.count--;
+               call_rcu(&entry->rcu, xenvif_del_hash);
+       }
+
+       spin_unlock_irqrestore(&vif->hash.cache.lock, flags);
+}
+
+static u32 xenvif_find_hash(struct xenvif *vif, const u8 *data,
+                           unsigned int len)
+{
+       struct xenvif_hash_cache_entry *entry;
+       u32 val;
+       bool found;
+
+       if (len >= XEN_NETBK_HASH_TAG_SIZE)
+               return 0;
+
+       if (xenvif_hash_cache_size == 0)
+               return xenvif_new_hash(vif, data, len);
+
+       rcu_read_lock();
+
+       found = false;
+
+       list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) {
+               if (entry->len == len &&
+                   memcmp(entry->tag, data, len) == 0) {
+                       val = entry->val;
+                       entry->seq = atomic_inc_return(&vif->hash.cache.seq);
+                       found = true;
+                       break;
+               }
+       }
+
+       rcu_read_unlock();
+
+       if (!found)
+               val = xenvif_new_hash(vif, data, len);
+
+       return val;
+}
+
+void xenvif_set_skb_hash(struct xenvif *vif, struct sk_buff *skb)
+{
+       struct flow_keys flow;
+       u32 hash = 0;
+       enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
+       u32 flags = vif->hash.flags;
+       bool has_tcp_hdr;
+
+       /* Quick rejection test: If the network protocol doesn't
+        * correspond to any enabled hash type then there's no point
+        * in parsing the packet header.
+        */
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               if (flags & (XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP |
+                            XEN_NETIF_CTRL_HASH_TYPE_IPV4))
+                       break;
+
+               goto done;
+
+       case htons(ETH_P_IPV6):
+               if (flags & (XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP |
+                            XEN_NETIF_CTRL_HASH_TYPE_IPV6))
+                       break;
+
+               goto done;
+
+       default:
+               goto done;
+       }
+
+       memset(&flow, 0, sizeof(flow));
+       if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
+               goto done;
+
+       has_tcp_hdr = (flow.basic.ip_proto == IPPROTO_TCP) &&
+                     !(flow.control.flags & FLOW_DIS_IS_FRAGMENT);
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               if (has_tcp_hdr &&
+                   (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)) {
+                       u8 data[12];
+
+                       memcpy(&data[0], &flow.addrs.v4addrs.src, 4);
+                       memcpy(&data[4], &flow.addrs.v4addrs.dst, 4);
+                       memcpy(&data[8], &flow.ports.src, 2);
+                       memcpy(&data[10], &flow.ports.dst, 2);
+
+                       hash = xenvif_find_hash(vif, data, sizeof(data));
+                       type = PKT_HASH_TYPE_L4;
+               } else if (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV4) {
+                       u8 data[8];
+
+                       memcpy(&data[0], &flow.addrs.v4addrs.src, 4);
+                       memcpy(&data[4], &flow.addrs.v4addrs.dst, 4);
+
+                       hash = xenvif_find_hash(vif, data, sizeof(data));
+                       type = PKT_HASH_TYPE_L3;
+               }
+
+               break;
+
+       case htons(ETH_P_IPV6):
+               if (has_tcp_hdr &&
+                   (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)) {
+                       u8 data[36];
+
+                       memcpy(&data[0], &flow.addrs.v6addrs.src, 16);
+                       memcpy(&data[16], &flow.addrs.v6addrs.dst, 16);
+                       memcpy(&data[32], &flow.ports.src, 2);
+                       memcpy(&data[34], &flow.ports.dst, 2);
+
+                       hash = xenvif_find_hash(vif, data, sizeof(data));
+                       type = PKT_HASH_TYPE_L4;
+               } else if (flags & XEN_NETIF_CTRL_HASH_TYPE_IPV6) {
+                       u8 data[32];
+
+                       memcpy(&data[0], &flow.addrs.v6addrs.src, 16);
+                       memcpy(&data[16], &flow.addrs.v6addrs.dst, 16);
+
+                       hash = xenvif_find_hash(vif, data, sizeof(data));
+                       type = PKT_HASH_TYPE_L3;
+               }
+
+               break;
+       }
+
+done:
+       if (type == PKT_HASH_TYPE_NONE)
+               skb_clear_hash(skb);
+       else
+               __skb_set_sw_hash(skb, hash, type == PKT_HASH_TYPE_L4);
+}
+
+u32 xenvif_set_hash_alg(struct xenvif *vif, u32 alg)
+{
+       switch (alg) {
+       case XEN_NETIF_CTRL_HASH_ALGORITHM_NONE:
+       case XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ:
+               break;
+
+       default:
+               return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+       }
+
+       vif->hash.alg = alg;
+
+       return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+u32 xenvif_get_hash_flags(struct xenvif *vif, u32 *flags)
+{
+       if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
+               return XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
+
+       *flags = XEN_NETIF_CTRL_HASH_TYPE_IPV4 |
+                XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP |
+                XEN_NETIF_CTRL_HASH_TYPE_IPV6 |
+                XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
+
+       return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+u32 xenvif_set_hash_flags(struct xenvif *vif, u32 flags)
+{
+       if (flags & ~(XEN_NETIF_CTRL_HASH_TYPE_IPV4 |
+                     XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP |
+                     XEN_NETIF_CTRL_HASH_TYPE_IPV6 |
+                     XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP))
+               return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+       if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE)
+               return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+       vif->hash.flags = flags;
+
+       return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+u32 xenvif_set_hash_key(struct xenvif *vif, u32 gref, u32 len)
+{
+       u8 *key = vif->hash.key;
+       struct gnttab_copy copy_op = {
+               .source.u.ref = gref,
+               .source.domid = vif->domid,
+               .dest.u.gmfn = virt_to_gfn(key),
+               .dest.domid = DOMID_SELF,
+               .dest.offset = xen_offset_in_page(key),
+               .len = len,
+               .flags = GNTCOPY_source_gref
+       };
+
+       if (len > XEN_NETBK_MAX_HASH_KEY_SIZE)
+               return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+       if (len != 0) {
+               gnttab_batch_copy(&copy_op, 1);
+
+               if (copy_op.status != GNTST_okay)
+                       return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+       }
+
+       /* Clear any remaining key octets */
+       if (len < XEN_NETBK_MAX_HASH_KEY_SIZE)
+               memset(key + len, 0, XEN_NETBK_MAX_HASH_KEY_SIZE - len);
+
+       xenvif_flush_hash(vif);
+
+       return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+u32 xenvif_set_hash_mapping_size(struct xenvif *vif, u32 size)
+{
+       if (size > XEN_NETBK_MAX_HASH_MAPPING_SIZE)
+               return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+       vif->hash.size = size;
+       memset(vif->hash.mapping, 0, sizeof(u32) * size);
+
+       return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+u32 xenvif_set_hash_mapping(struct xenvif *vif, u32 gref, u32 len,
+                           u32 off)
+{
+       u32 *mapping = &vif->hash.mapping[off];
+       struct gnttab_copy copy_op = {
+               .source.u.ref = gref,
+               .source.domid = vif->domid,
+               .dest.u.gmfn = virt_to_gfn(mapping),
+               .dest.domid = DOMID_SELF,
+               .dest.offset = xen_offset_in_page(mapping),
+               .len = len * sizeof(u32),
+               .flags = GNTCOPY_source_gref
+       };
+
+       if ((off + len > vif->hash.size) || copy_op.len > XEN_PAGE_SIZE)
+               return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+       while (len-- != 0)
+               if (mapping[off++] >= vif->num_queues)
+                       return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+
+       if (len != 0) {
+               gnttab_batch_copy(&copy_op, 1);
+
+               if (copy_op.status != GNTST_okay)
+                       return XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER;
+       }
+
+       return XEN_NETIF_CTRL_STATUS_SUCCESS;
+}
+
+void xenvif_init_hash(struct xenvif *vif)
+{
+       if (xenvif_hash_cache_size == 0)
+               return;
+
+       spin_lock_init(&vif->hash.cache.lock);
+       INIT_LIST_HEAD(&vif->hash.cache.list);
+}
+
+void xenvif_deinit_hash(struct xenvif *vif)
+{
+       xenvif_flush_hash(vif);
+}
index f5231a2dd2ac961089029c8acd40f97c3cb7991b..1c7f49b5acc17805f5983ed41839472fa8c8d5d8 100644 (file)
@@ -128,6 +128,15 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id)
+{
+       struct xenvif *vif = dev_id;
+
+       wake_up(&vif->ctrl_wq);
+
+       return IRQ_HANDLED;
+}
+
 int xenvif_queue_stopped(struct xenvif_queue *queue)
 {
        struct net_device *dev = queue->vif->dev;
@@ -142,6 +151,33 @@ void xenvif_wake_queue(struct xenvif_queue *queue)
        netif_tx_wake_queue(netdev_get_tx_queue(dev, id));
 }
 
+static u16 xenvif_select_queue(struct net_device *dev, struct sk_buff *skb,
+                              void *accel_priv,
+                              select_queue_fallback_t fallback)
+{
+       struct xenvif *vif = netdev_priv(dev);
+       unsigned int size = vif->hash.size;
+
+       if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) {
+               u16 index = fallback(dev, skb) % dev->real_num_tx_queues;
+
+               /* Make sure there is no hash information in the socket
+                * buffer otherwise it would be incorrectly forwarded
+                * to the frontend.
+                */
+               skb_clear_hash(skb);
+
+               return index;
+       }
+
+       xenvif_set_skb_hash(vif, skb);
+
+       if (size == 0)
+               return skb_get_hash_raw(skb) % dev->real_num_tx_queues;
+
+       return vif->hash.mapping[skb_get_hash_raw(skb) % size];
+}
+
 static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct xenvif *vif = netdev_priv(dev);
@@ -386,6 +422,7 @@ static const struct ethtool_ops xenvif_ethtool_ops = {
 };
 
 static const struct net_device_ops xenvif_netdev_ops = {
+       .ndo_select_queue = xenvif_select_queue,
        .ndo_start_xmit = xenvif_start_xmit,
        .ndo_get_stats  = xenvif_get_stats,
        .ndo_open       = xenvif_open,
@@ -527,9 +564,69 @@ void xenvif_carrier_on(struct xenvif *vif)
        rtnl_unlock();
 }
 
-int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
-                  unsigned long rx_ring_ref, unsigned int tx_evtchn,
-                  unsigned int rx_evtchn)
+int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
+                       unsigned int evtchn)
+{
+       struct net_device *dev = vif->dev;
+       void *addr;
+       struct xen_netif_ctrl_sring *shared;
+       struct task_struct *task;
+       int err = -ENOMEM;
+
+       err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
+                                    &ring_ref, 1, &addr);
+       if (err)
+               goto err;
+
+       shared = (struct xen_netif_ctrl_sring *)addr;
+       BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
+
+       init_waitqueue_head(&vif->ctrl_wq);
+
+       err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn,
+                                                   xenvif_ctrl_interrupt,
+                                                   0, dev->name, vif);
+       if (err < 0)
+               goto err_unmap;
+
+       vif->ctrl_irq = err;
+
+       xenvif_init_hash(vif);
+
+       task = kthread_create(xenvif_ctrl_kthread, (void *)vif,
+                             "%s-control", dev->name);
+       if (IS_ERR(task)) {
+               pr_warn("Could not allocate kthread for %s\n", dev->name);
+               err = PTR_ERR(task);
+               goto err_deinit;
+       }
+
+       get_task_struct(task);
+       vif->ctrl_task = task;
+
+       wake_up_process(vif->ctrl_task);
+
+       return 0;
+
+err_deinit:
+       xenvif_deinit_hash(vif);
+       unbind_from_irqhandler(vif->ctrl_irq, vif);
+       vif->ctrl_irq = 0;
+
+err_unmap:
+       xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
+                               vif->ctrl.sring);
+       vif->ctrl.sring = NULL;
+
+err:
+       return err;
+}
+
+int xenvif_connect_data(struct xenvif_queue *queue,
+                       unsigned long tx_ring_ref,
+                       unsigned long rx_ring_ref,
+                       unsigned int tx_evtchn,
+                       unsigned int rx_evtchn)
 {
        struct task_struct *task;
        int err = -ENOMEM;
@@ -538,7 +635,8 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
        BUG_ON(queue->task);
        BUG_ON(queue->dealloc_task);
 
-       err = xenvif_map_frontend_rings(queue, tx_ring_ref, rx_ring_ref);
+       err = xenvif_map_frontend_data_rings(queue, tx_ring_ref,
+                                            rx_ring_ref);
        if (err < 0)
                goto err;
 
@@ -614,7 +712,7 @@ err_tx_unbind:
        unbind_from_irqhandler(queue->tx_irq, queue);
        queue->tx_irq = 0;
 err_unmap:
-       xenvif_unmap_frontend_rings(queue);
+       xenvif_unmap_frontend_data_rings(queue);
        netif_napi_del(&queue->napi);
 err:
        module_put(THIS_MODULE);
@@ -634,7 +732,7 @@ void xenvif_carrier_off(struct xenvif *vif)
        rtnl_unlock();
 }
 
-void xenvif_disconnect(struct xenvif *vif)
+void xenvif_disconnect_data(struct xenvif *vif)
 {
        struct xenvif_queue *queue = NULL;
        unsigned int num_queues = vif->num_queues;
@@ -668,12 +766,34 @@ void xenvif_disconnect(struct xenvif *vif)
                        queue->tx_irq = 0;
                }
 
-               xenvif_unmap_frontend_rings(queue);
+               xenvif_unmap_frontend_data_rings(queue);
        }
 
        xenvif_mcast_addr_list_free(vif);
 }
 
+void xenvif_disconnect_ctrl(struct xenvif *vif)
+{
+       if (vif->ctrl_task) {
+               kthread_stop(vif->ctrl_task);
+               put_task_struct(vif->ctrl_task);
+               vif->ctrl_task = NULL;
+       }
+
+       xenvif_deinit_hash(vif);
+
+       if (vif->ctrl_irq) {
+               unbind_from_irqhandler(vif->ctrl_irq, vif);
+               vif->ctrl_irq = 0;
+       }
+
+       if (vif->ctrl.sring) {
+               xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(vif),
+                                       vif->ctrl.sring);
+               vif->ctrl.sring = NULL;
+       }
+}
+
 /* Reverse the relevant parts of xenvif_init_queue().
  * Used for queue teardown from xenvif_free(), and on the
  * error handling paths in xenbus.c:connect().
index 4412a57ec862287b1acad6ed486b7d7b2bfe1c1c..edbae0b1e8f0ed8e9be929767c01cecf838fa5fd 100644 (file)
@@ -89,6 +89,11 @@ module_param(fatal_skb_slots, uint, 0444);
  */
 #define XEN_NETBACK_TX_COPY_LEN 128
 
+/* This is the maximum number of flows in the hash cache. */
+#define XENVIF_HASH_CACHE_SIZE_DEFAULT 64
+unsigned int xenvif_hash_cache_size = XENVIF_HASH_CACHE_SIZE_DEFAULT;
+module_param_named(hash_cache_size, xenvif_hash_cache_size, uint, 0644);
+MODULE_PARM_DESC(hash_cache_size, "Number of flows in the hash cache");
 
 static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
                               u8 status);
@@ -163,6 +168,8 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
        needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
        if (skb_is_gso(skb))
                needed++;
+       if (skb->sw_hash)
+               needed++;
 
        do {
                prod = queue->rx.sring->req_prod;
@@ -280,6 +287,8 @@ struct gop_frag_copy {
        struct xenvif_rx_meta *meta;
        int head;
        int gso_type;
+       int protocol;
+       int hash_present;
 
        struct page *page;
 };
@@ -326,8 +335,15 @@ static void xenvif_setup_copy_gop(unsigned long gfn,
        npo->copy_off += *len;
        info->meta->size += *len;
 
+       if (!info->head)
+               return;
+
        /* Leave a gap for the GSO descriptor. */
-       if (info->head && ((1 << info->gso_type) & queue->vif->gso_mask))
+       if ((1 << info->gso_type) & queue->vif->gso_mask)
+               queue->rx.req_cons++;
+
+       /* Leave a gap for the hash extra segment. */
+       if (info->hash_present)
                queue->rx.req_cons++;
 
        info->head = 0; /* There must be something in this buffer now */
@@ -362,6 +378,11 @@ static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb
                .npo = npo,
                .head = *head,
                .gso_type = XEN_NETIF_GSO_TYPE_NONE,
+               /* xenvif_set_skb_hash() will have either set a s/w
+                * hash or cleared the hash depending on
+                * whether the the frontend wants a hash for this skb.
+                */
+               .hash_present = skb->sw_hash,
        };
        unsigned long bytes;
 
@@ -550,6 +571,7 @@ void xenvif_kick_thread(struct xenvif_queue *queue)
 
 static void xenvif_rx_action(struct xenvif_queue *queue)
 {
+       struct xenvif *vif = queue->vif;
        s8 status;
        u16 flags;
        struct xen_netif_rx_response *resp;
@@ -585,9 +607,10 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
        gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
 
        while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               struct xen_netif_extra_info *extra = NULL;
 
                if ((1 << queue->meta[npo.meta_cons].gso_type) &
-                   queue->vif->gso_prefix_mask) {
+                   vif->gso_prefix_mask) {
                        resp = RING_GET_RESPONSE(&queue->rx,
                                                 queue->rx.rsp_prod_pvt++);
 
@@ -605,7 +628,7 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
                queue->stats.tx_bytes += skb->len;
                queue->stats.tx_packets++;
 
-               status = xenvif_check_gop(queue->vif,
+               status = xenvif_check_gop(vif,
                                          XENVIF_RX_CB(skb)->meta_slots_used,
                                          &npo);
 
@@ -627,21 +650,57 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
                                        flags);
 
                if ((1 << queue->meta[npo.meta_cons].gso_type) &
-                   queue->vif->gso_mask) {
-                       struct xen_netif_extra_info *gso =
-                               (struct xen_netif_extra_info *)
+                   vif->gso_mask) {
+                       extra = (struct xen_netif_extra_info *)
                                RING_GET_RESPONSE(&queue->rx,
                                                  queue->rx.rsp_prod_pvt++);
 
                        resp->flags |= XEN_NETRXF_extra_info;
 
-                       gso->u.gso.type = queue->meta[npo.meta_cons].gso_type;
-                       gso->u.gso.size = queue->meta[npo.meta_cons].gso_size;
-                       gso->u.gso.pad = 0;
-                       gso->u.gso.features = 0;
+                       extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
+                       extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
+                       extra->u.gso.pad = 0;
+                       extra->u.gso.features = 0;
 
-                       gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
-                       gso->flags = 0;
+                       extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
+                       extra->flags = 0;
+               }
+
+               if (skb->sw_hash) {
+                       /* Since the skb got here via xenvif_select_queue()
+                        * we know that the hash has been re-calculated
+                        * according to a configuration set by the frontend
+                        * and therefore we know that it is legitimate to
+                        * pass it to the frontend.
+                        */
+                       if (resp->flags & XEN_NETRXF_extra_info)
+                               extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+                       else
+                               resp->flags |= XEN_NETRXF_extra_info;
+
+                       extra = (struct xen_netif_extra_info *)
+                               RING_GET_RESPONSE(&queue->rx,
+                                                 queue->rx.rsp_prod_pvt++);
+
+                       extra->u.hash.algorithm =
+                               XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
+
+                       if (skb->l4_hash)
+                               extra->u.hash.type =
+                                       skb->protocol == htons(ETH_P_IP) ?
+                                       _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
+                                       _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
+                       else
+                               extra->u.hash.type =
+                                       skb->protocol == htons(ETH_P_IP) ?
+                                       _XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
+                                       _XEN_NETIF_CTRL_HASH_TYPE_IPV6;
+
+                       *(uint32_t *)extra->u.hash.value =
+                               skb_get_hash_raw(skb);
+
+                       extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
+                       extra->flags = 0;
                }
 
                xenvif_add_frag_responses(queue, status,
@@ -1451,6 +1510,33 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue,
                        }
                }
 
+               if (extras[XEN_NETIF_EXTRA_TYPE_HASH - 1].type) {
+                       struct xen_netif_extra_info *extra;
+                       enum pkt_hash_types type = PKT_HASH_TYPE_NONE;
+
+                       extra = &extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
+
+                       switch (extra->u.hash.type) {
+                       case _XEN_NETIF_CTRL_HASH_TYPE_IPV4:
+                       case _XEN_NETIF_CTRL_HASH_TYPE_IPV6:
+                               type = PKT_HASH_TYPE_L3;
+                               break;
+
+                       case _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP:
+                       case _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP:
+                               type = PKT_HASH_TYPE_L4;
+                               break;
+
+                       default:
+                               break;
+                       }
+
+                       if (type != PKT_HASH_TYPE_NONE)
+                               skb_set_hash(skb,
+                                            *(u32 *)extra->u.hash.value,
+                                            type);
+               }
+
                XENVIF_TX_CB(skb)->pending_idx = pending_idx;
 
                __skb_put(skb, data_len);
@@ -1926,7 +2012,7 @@ static inline bool tx_dealloc_work_todo(struct xenvif_queue *queue)
        return queue->dealloc_cons != queue->dealloc_prod;
 }
 
-void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
+void xenvif_unmap_frontend_data_rings(struct xenvif_queue *queue)
 {
        if (queue->tx.sring)
                xenbus_unmap_ring_vfree(xenvif_to_xenbus_device(queue->vif),
@@ -1936,9 +2022,9 @@ void xenvif_unmap_frontend_rings(struct xenvif_queue *queue)
                                        queue->rx.sring);
 }
 
-int xenvif_map_frontend_rings(struct xenvif_queue *queue,
-                             grant_ref_t tx_ring_ref,
-                             grant_ref_t rx_ring_ref)
+int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
+                                  grant_ref_t tx_ring_ref,
+                                  grant_ref_t rx_ring_ref)
 {
        void *addr;
        struct xen_netif_tx_sring *txs;
@@ -1965,7 +2051,7 @@ int xenvif_map_frontend_rings(struct xenvif_queue *queue,
        return 0;
 
 err:
-       xenvif_unmap_frontend_rings(queue);
+       xenvif_unmap_frontend_data_rings(queue);
        return err;
 }
 
@@ -2164,6 +2250,135 @@ int xenvif_dealloc_kthread(void *data)
        return 0;
 }
 
+static void make_ctrl_response(struct xenvif *vif,
+                              const struct xen_netif_ctrl_request *req,
+                              u32 status, u32 data)
+{
+       RING_IDX idx = vif->ctrl.rsp_prod_pvt;
+       struct xen_netif_ctrl_response rsp = {
+               .id = req->id,
+               .type = req->type,
+               .status = status,
+               .data = data,
+       };
+
+       *RING_GET_RESPONSE(&vif->ctrl, idx) = rsp;
+       vif->ctrl.rsp_prod_pvt = ++idx;
+}
+
+static void push_ctrl_response(struct xenvif *vif)
+{
+       int notify;
+
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->ctrl, notify);
+       if (notify)
+               notify_remote_via_irq(vif->ctrl_irq);
+}
+
+static void process_ctrl_request(struct xenvif *vif,
+                                const struct xen_netif_ctrl_request *req)
+{
+       u32 status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED;
+       u32 data = 0;
+
+       switch (req->type) {
+       case XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM:
+               status = xenvif_set_hash_alg(vif, req->data[0]);
+               break;
+
+       case XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS:
+               status = xenvif_get_hash_flags(vif, &data);
+               break;
+
+       case XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS:
+               status = xenvif_set_hash_flags(vif, req->data[0]);
+               break;
+
+       case XEN_NETIF_CTRL_TYPE_SET_HASH_KEY:
+               status = xenvif_set_hash_key(vif, req->data[0],
+                                            req->data[1]);
+               break;
+
+       case XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE:
+               status = XEN_NETIF_CTRL_STATUS_SUCCESS;
+               data = XEN_NETBK_MAX_HASH_MAPPING_SIZE;
+               break;
+
+       case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE:
+               status = xenvif_set_hash_mapping_size(vif,
+                                                     req->data[0]);
+               break;
+
+       case XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING:
+               status = xenvif_set_hash_mapping(vif, req->data[0],
+                                                req->data[1],
+                                                req->data[2]);
+               break;
+
+       default:
+               break;
+       }
+
+       make_ctrl_response(vif, req, status, data);
+       push_ctrl_response(vif);
+}
+
+static void xenvif_ctrl_action(struct xenvif *vif)
+{
+       for (;;) {
+               RING_IDX req_prod, req_cons;
+
+               req_prod = vif->ctrl.sring->req_prod;
+               req_cons = vif->ctrl.req_cons;
+
+               /* Make sure we can see requests before we process them. */
+               rmb();
+
+               if (req_cons == req_prod)
+                       break;
+
+               while (req_cons != req_prod) {
+                       struct xen_netif_ctrl_request req;
+
+                       RING_COPY_REQUEST(&vif->ctrl, req_cons, &req);
+                       req_cons++;
+
+                       process_ctrl_request(vif, &req);
+               }
+
+               vif->ctrl.req_cons = req_cons;
+               vif->ctrl.sring->req_event = req_cons + 1;
+       }
+}
+
+static bool xenvif_ctrl_work_todo(struct xenvif *vif)
+{
+       if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->ctrl)))
+               return 1;
+
+       return 0;
+}
+
+int xenvif_ctrl_kthread(void *data)
+{
+       struct xenvif *vif = data;
+
+       for (;;) {
+               wait_event_interruptible(vif->ctrl_wq,
+                                        xenvif_ctrl_work_todo(vif) ||
+                                        kthread_should_stop());
+               if (kthread_should_stop())
+                       break;
+
+               while (xenvif_ctrl_work_todo(vif))
+                       xenvif_ctrl_action(vif);
+
+               cond_resched();
+       }
+
+       return 0;
+}
+
 static int __init netback_init(void)
 {
        int rc = 0;
index bd182cd55dda87c76b3984bbec8cfef493284dd1..6a31f2610c2378185c4274cd9828cd5117ca61ea 100644 (file)
@@ -38,7 +38,8 @@ struct backend_info {
        const char *hotplug_script;
 };
 
-static int connect_rings(struct backend_info *be, struct xenvif_queue *queue);
+static int connect_data_rings(struct backend_info *be,
+                             struct xenvif_queue *queue);
 static void connect(struct backend_info *be);
 static int read_xenbus_vif_flags(struct backend_info *be);
 static int backend_create_xenvif(struct backend_info *be);
@@ -367,6 +368,12 @@ static int netback_probe(struct xenbus_device *dev,
        if (err)
                pr_debug("Error writing multi-queue-max-queues\n");
 
+       err = xenbus_printf(XBT_NIL, dev->nodename,
+                           "feature-ctrl-ring",
+                           "%u", true);
+       if (err)
+               pr_debug("Error writing feature-ctrl-ring\n");
+
        script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL);
        if (IS_ERR(script)) {
                err = PTR_ERR(script);
@@ -457,7 +464,8 @@ static void backend_disconnect(struct backend_info *be)
 #ifdef CONFIG_DEBUG_FS
                xenvif_debugfs_delif(be->vif);
 #endif /* CONFIG_DEBUG_FS */
-               xenvif_disconnect(be->vif);
+               xenvif_disconnect_data(be->vif);
+               xenvif_disconnect_ctrl(be->vif);
        }
 }
 
@@ -825,6 +833,48 @@ static void hotplug_status_changed(struct xenbus_watch *watch,
        kfree(str);
 }
 
+static int connect_ctrl_ring(struct backend_info *be)
+{
+       struct xenbus_device *dev = be->dev;
+       struct xenvif *vif = be->vif;
+       unsigned int val;
+       grant_ref_t ring_ref;
+       unsigned int evtchn;
+       int err;
+
+       err = xenbus_gather(XBT_NIL, dev->otherend,
+                           "ctrl-ring-ref", "%u", &val, NULL);
+       if (err)
+               goto done; /* The frontend does not have a control ring */
+
+       ring_ref = val;
+
+       err = xenbus_gather(XBT_NIL, dev->otherend,
+                           "event-channel-ctrl", "%u", &val, NULL);
+       if (err) {
+               xenbus_dev_fatal(dev, err,
+                                "reading %s/event-channel-ctrl",
+                                dev->otherend);
+               goto fail;
+       }
+
+       evtchn = val;
+
+       err = xenvif_connect_ctrl(vif, ring_ref, evtchn);
+       if (err) {
+               xenbus_dev_fatal(dev, err,
+                                "mapping shared-frame %u port %u",
+                                ring_ref, evtchn);
+               goto fail;
+       }
+
+done:
+       return 0;
+
+fail:
+       return err;
+}
+
 static void connect(struct backend_info *be)
 {
        int err;
@@ -861,6 +911,12 @@ static void connect(struct backend_info *be)
        xen_register_watchers(dev, be->vif);
        read_xenbus_vif_flags(be);
 
+       err = connect_ctrl_ring(be);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "connecting control ring");
+               return;
+       }
+
        /* Use the number of queues requested by the frontend */
        be->vif->queues = vzalloc(requested_num_queues *
                                  sizeof(struct xenvif_queue));
@@ -896,11 +952,12 @@ static void connect(struct backend_info *be)
                queue->remaining_credit = credit_bytes;
                queue->credit_usec = credit_usec;
 
-               err = connect_rings(be, queue);
+               err = connect_data_rings(be, queue);
                if (err) {
-                       /* connect_rings() cleans up after itself on failure,
-                        * but we need to clean up after xenvif_init_queue() here,
-                        * and also clean up any previously initialised queues.
+                       /* connect_data_rings() cleans up after itself on
+                        * failure, but we need to clean up after
+                        * xenvif_init_queue() here, and also clean up any
+                        * previously initialised queues.
                         */
                        xenvif_deinit_queue(queue);
                        be->vif->num_queues = queue_index;
@@ -935,15 +992,17 @@ static void connect(struct backend_info *be)
 
 err:
        if (be->vif->num_queues > 0)
-               xenvif_disconnect(be->vif); /* Clean up existing queues */
+               xenvif_disconnect_data(be->vif); /* Clean up existing queues */
        vfree(be->vif->queues);
        be->vif->queues = NULL;
        be->vif->num_queues = 0;
+       xenvif_disconnect_ctrl(be->vif);
        return;
 }
 
 
-static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
+static int connect_data_rings(struct backend_info *be,
+                             struct xenvif_queue *queue)
 {
        struct xenbus_device *dev = be->dev;
        unsigned int num_queues = queue->vif->num_queues;
@@ -1007,8 +1066,8 @@ static int connect_rings(struct backend_info *be, struct xenvif_queue *queue)
        }
 
        /* Map the shared frame, irq etc. */
-       err = xenvif_connect(queue, tx_ring_ref, rx_ring_ref,
-                            tx_evtchn, rx_evtchn);
+       err = xenvif_connect_data(queue, tx_ring_ref, rx_ring_ref,
+                                 tx_evtchn, rx_evtchn);
        if (err) {
                xenbus_dev_fatal(dev, err,
                                 "mapping shared-frames %lu/%lu port tx %u rx %u",