]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/infiniband/ulp/ipoib/ipoib_main.c
IB/ipoib: For sendonly join free the multicast group on leave
[karo-tx-linux.git] / drivers / infiniband / ulp / ipoib / ipoib_main.c
index b2943c84a5dda0aecdd8904917f2ebbb02b9b013..babba05d7a0eb707f472d7de3cb06843a0844eff 100644 (file)
@@ -48,6 +48,9 @@
 
 #include <linux/jhash.h>
 #include <net/arp.h>
+#include <net/addrconf.h>
+#include <linux/inetdevice.h>
+#include <rdma/ib_cache.h>
 
 #define DRV_VERSION "1.0.0"
 
@@ -89,13 +92,18 @@ struct workqueue_struct *ipoib_workqueue;
 struct ib_sa_client ipoib_sa_client;
 
 static void ipoib_add_one(struct ib_device *device);
-static void ipoib_remove_one(struct ib_device *device);
+static void ipoib_remove_one(struct ib_device *device, void *client_data);
 static void ipoib_neigh_reclaim(struct rcu_head *rp);
+static struct net_device *ipoib_get_net_dev_by_params(
+               struct ib_device *dev, u8 port, u16 pkey,
+               const union ib_gid *gid, const struct sockaddr *addr,
+               void *client_data);
 
 static struct ib_client ipoib_client = {
        .name   = "ipoib",
        .add    = ipoib_add_one,
-       .remove = ipoib_remove_one
+       .remove = ipoib_remove_one,
+       .get_net_dev_by_params = ipoib_get_net_dev_by_params,
 };
 
 int ipoib_open(struct net_device *dev)
@@ -222,6 +230,225 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
+/* Called with an RCU read lock taken */
+static bool ipoib_is_dev_match_addr_rcu(const struct sockaddr *addr,
+                                       struct net_device *dev)
+{
+       struct net *net = dev_net(dev);
+       struct in_device *in_dev;
+       struct sockaddr_in *addr_in = (struct sockaddr_in *)addr;
+       struct sockaddr_in6 *addr_in6 = (struct sockaddr_in6 *)addr;
+       __be32 ret_addr;
+
+       switch (addr->sa_family) {
+       case AF_INET:
+               in_dev = in_dev_get(dev);
+               if (!in_dev)
+                       return false;
+
+               ret_addr = inet_confirm_addr(net, in_dev, 0,
+                                            addr_in->sin_addr.s_addr,
+                                            RT_SCOPE_HOST);
+               in_dev_put(in_dev);
+               if (ret_addr)
+                       return true;
+
+               break;
+       case AF_INET6:
+               if (IS_ENABLED(CONFIG_IPV6) &&
+                   ipv6_chk_addr(net, &addr_in6->sin6_addr, dev, 1))
+                       return true;
+
+               break;
+       }
+       return false;
+}
+
+/**
+ * Find the master net_device on top of the given net_device.
+ * @dev: base IPoIB net_device
+ *
+ * Returns the master net_device with a reference held, or the same net_device
+ * if no master exists.
+ */
+static struct net_device *ipoib_get_master_net_dev(struct net_device *dev)
+{
+       struct net_device *master;
+
+       rcu_read_lock();
+       master = netdev_master_upper_dev_get_rcu(dev);
+       if (master)
+               dev_hold(master);
+       rcu_read_unlock();
+
+       if (master)
+               return master;
+
+       dev_hold(dev);
+       return dev;
+}
+
+/**
+ * Find a net_device matching the given address, which is an upper device of
+ * the given net_device.
+ * @addr: IP address to look for.
+ * @dev: base IPoIB net_device
+ *
+ * If found, returns the net_device with a reference held. Otherwise return
+ * NULL.
+ */
+static struct net_device *ipoib_get_net_dev_match_addr(
+               const struct sockaddr *addr, struct net_device *dev)
+{
+       struct net_device *upper,
+                         *result = NULL;
+       struct list_head *iter;
+
+       rcu_read_lock();
+       if (ipoib_is_dev_match_addr_rcu(addr, dev)) {
+               dev_hold(dev);
+               result = dev;
+               goto out;
+       }
+
+       netdev_for_each_all_upper_dev_rcu(dev, upper, iter) {
+               if (ipoib_is_dev_match_addr_rcu(addr, upper)) {
+                       dev_hold(upper);
+                       result = upper;
+                       break;
+               }
+       }
+out:
+       rcu_read_unlock();
+       return result;
+}
+
+/* returns the number of IPoIB netdevs on top a given ipoib device matching a
+ * pkey_index and address, if one exists.
+ *
+ * @found_net_dev: contains a matching net_device if the return value >= 1,
+ * with a reference held. */
+static int ipoib_match_gid_pkey_addr(struct ipoib_dev_priv *priv,
+                                    const union ib_gid *gid,
+                                    u16 pkey_index,
+                                    const struct sockaddr *addr,
+                                    int nesting,
+                                    struct net_device **found_net_dev)
+{
+       struct ipoib_dev_priv *child_priv;
+       struct net_device *net_dev = NULL;
+       int matches = 0;
+
+       if (priv->pkey_index == pkey_index &&
+           (!gid || !memcmp(gid, &priv->local_gid, sizeof(*gid)))) {
+               if (!addr) {
+                       net_dev = ipoib_get_master_net_dev(priv->dev);
+               } else {
+                       /* Verify the net_device matches the IP address, as
+                        * IPoIB child devices currently share a GID. */
+                       net_dev = ipoib_get_net_dev_match_addr(addr, priv->dev);
+               }
+               if (net_dev) {
+                       if (!*found_net_dev)
+                               *found_net_dev = net_dev;
+                       else
+                               dev_put(net_dev);
+                       ++matches;
+               }
+       }
+
+       /* Check child interfaces */
+       down_read_nested(&priv->vlan_rwsem, nesting);
+       list_for_each_entry(child_priv, &priv->child_intfs, list) {
+               matches += ipoib_match_gid_pkey_addr(child_priv, gid,
+                                                   pkey_index, addr,
+                                                   nesting + 1,
+                                                   found_net_dev);
+               if (matches > 1)
+                       break;
+       }
+       up_read(&priv->vlan_rwsem);
+
+       return matches;
+}
+
+/* Returns the number of matching net_devs found (between 0 and 2). Also
+ * return the matching net_device in the @net_dev parameter, holding a
+ * reference to the net_device, if the number of matches >= 1 */
+static int __ipoib_get_net_dev_by_params(struct list_head *dev_list, u8 port,
+                                        u16 pkey_index,
+                                        const union ib_gid *gid,
+                                        const struct sockaddr *addr,
+                                        struct net_device **net_dev)
+{
+       struct ipoib_dev_priv *priv;
+       int matches = 0;
+
+       *net_dev = NULL;
+
+       list_for_each_entry(priv, dev_list, list) {
+               if (priv->port != port)
+                       continue;
+
+               matches += ipoib_match_gid_pkey_addr(priv, gid, pkey_index,
+                                                    addr, 0, net_dev);
+               if (matches > 1)
+                       break;
+       }
+
+       return matches;
+}
+
+static struct net_device *ipoib_get_net_dev_by_params(
+               struct ib_device *dev, u8 port, u16 pkey,
+               const union ib_gid *gid, const struct sockaddr *addr,
+               void *client_data)
+{
+       struct net_device *net_dev;
+       struct list_head *dev_list = client_data;
+       u16 pkey_index;
+       int matches;
+       int ret;
+
+       if (!rdma_protocol_ib(dev, port))
+               return NULL;
+
+       ret = ib_find_cached_pkey(dev, port, pkey, &pkey_index);
+       if (ret)
+               return NULL;
+
+       if (!dev_list)
+               return NULL;
+
+       /* See if we can find a unique device matching the L2 parameters */
+       matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
+                                               gid, NULL, &net_dev);
+
+       switch (matches) {
+       case 0:
+               return NULL;
+       case 1:
+               return net_dev;
+       }
+
+       dev_put(net_dev);
+
+       /* Couldn't find a unique device with L2 parameters only. Use L3
+        * address to uniquely match the net device */
+       matches = __ipoib_get_net_dev_by_params(dev_list, port, pkey_index,
+                                               gid, addr, &net_dev);
+       switch (matches) {
+       case 0:
+               return NULL;
+       default:
+               dev_warn_ratelimited(&dev->dev,
+                                    "duplicate IP address detected\n");
+               /* Fall through */
+       case 1:
+               return net_dev;
+       }
+}
+
 int ipoib_set_mode(struct net_device *dev, const char *buf)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -922,6 +1149,9 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
        unsigned long dt;
        unsigned long flags;
        int i;
+       LIST_HEAD(remove_list);
+       struct ipoib_mcast *mcast, *tmcast;
+       struct net_device *dev = priv->dev;
 
        if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
                return;
@@ -949,6 +1179,19 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
                                                          lockdep_is_held(&priv->lock))) != NULL) {
                        /* was the neigh idle for two GC periods */
                        if (time_after(neigh_obsolete, neigh->alive)) {
+                               u8 *mgid = neigh->daddr + 4;
+
+                               /* Is this multicast ? */
+                               if (*mgid == 0xff) {
+                                       mcast = __ipoib_mcast_find(dev, mgid);
+
+                                       if (mcast && test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
+                                               list_del(&mcast->list);
+                                               rb_erase(&mcast->rb_node, &priv->multicast_tree);
+                                               list_add_tail(&mcast->list, &remove_list);
+                                       }
+                               }
+
                                rcu_assign_pointer(*np,
                                                   rcu_dereference_protected(neigh->hnext,
                                                                             lockdep_is_held(&priv->lock)));
@@ -964,6 +1207,10 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
 
 out_unlock:
        spin_unlock_irqrestore(&priv->lock, flags);
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+               ipoib_mcast_leave(dev, mcast);
+               ipoib_mcast_free(mcast);
+       }
 }
 
 static void ipoib_reap_neigh(struct work_struct *work)
@@ -1715,12 +1962,11 @@ static void ipoib_add_one(struct ib_device *device)
        ib_set_client_data(device, &ipoib_client, dev_list);
 }
 
-static void ipoib_remove_one(struct ib_device *device)
+static void ipoib_remove_one(struct ib_device *device, void *client_data)
 {
        struct ipoib_dev_priv *priv, *tmp;
-       struct list_head *dev_list;
+       struct list_head *dev_list = client_data;
 
-       dev_list = ib_get_client_data(device, &ipoib_client);
        if (!dev_list)
                return;