]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branches 'misc-4.7-2', 'ipoib' and 'ib-router' into k.o/for-4.7
authorDoug Ledford <dledford@redhat.com>
Thu, 26 May 2016 15:55:19 +0000 (11:55 -0400)
committerDoug Ledford <dledford@redhat.com>
Thu, 26 May 2016 15:55:19 +0000 (11:55 -0400)
18 files changed:
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/multicast.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/hw/qib/qib_mad.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
drivers/infiniband/ulp/ipoib/ipoib_vlan.c
drivers/infiniband/ulp/srpt/ib_srpt.c
include/rdma/ib_mad.h
include/rdma/ib_sa.h
include/uapi/rdma/rdma_netlink.h

index 26987d9d7e1cdccd5922322e34fd31466d8c064d..edaae9f9853c73b2f990ccbd82ebcee3868010fd 100644 (file)
@@ -1,8 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)     := rdma_cm.o
 user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)    := rdma_ucm.o
 
-obj-$(CONFIG_INFINIBAND) +=            ib_core.o ib_mad.o ib_sa.o \
-                                       ib_cm.o iw_cm.o ib_addr.o \
+obj-$(CONFIG_INFINIBAND) +=            ib_core.o ib_cm.o iw_cm.o \
                                        $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=   ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o \
@@ -10,14 +9,11 @@ obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=      ib_uverbs.o ib_ucm.o \
 
 ib_core-y :=                   packer.o ud_header.o verbs.o cq.o rw.o sysfs.o \
                                device.o fmr_pool.o cache.o netlink.o \
-                               roce_gid_mgmt.o mr_pool.o
+                               roce_gid_mgmt.o mr_pool.o addr.o sa_query.o \
+                               multicast.o mad.o smi.o agent.o mad_rmpp.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
 ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
 
-ib_mad-y :=                    mad.o smi.o agent.o mad_rmpp.o
-
-ib_sa-y :=                     sa_query.o multicast.o
-
 ib_cm-y :=                     cm.o
 
 iw_cm-y :=                     iwcm.o iwpm_util.o iwpm_msg.o
@@ -28,8 +24,6 @@ rdma_cm-$(CONFIG_INFINIBAND_ADDR_TRANS_CONFIGFS) += cma_configfs.o
 
 rdma_ucm-y :=                  ucma.o
 
-ib_addr-y :=                   addr.o
-
 ib_umad-y :=                   user_mad.o
 
 ib_ucm-y :=                    ucm.o
index 337353d86cfadec33064e10117539769fcc0f95d..1374541a45287ffd0ed43261126141a533a91815 100644 (file)
 #include <net/ip6_route.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib.h>
+#include <rdma/rdma_netlink.h>
+#include <net/netlink.h>
 
-MODULE_AUTHOR("Sean Hefty");
-MODULE_DESCRIPTION("IB Address Translation");
-MODULE_LICENSE("Dual BSD/GPL");
+#include "core_priv.h"
 
 struct addr_req {
        struct list_head list;
@@ -62,8 +62,11 @@ struct addr_req {
                         struct rdma_dev_addr *addr, void *context);
        unsigned long timeout;
        int status;
+       u32 seq;
 };
 
+static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0);
+
 static void process_req(struct work_struct *work);
 
 static DEFINE_MUTEX(lock);
@@ -71,6 +74,126 @@ static LIST_HEAD(req_list);
 static DECLARE_DELAYED_WORK(work, process_req);
 static struct workqueue_struct *addr_wq;
 
+static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
+       [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
+               .len = sizeof(struct rdma_nla_ls_gid)},
+};
+
+static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
+{
+       struct nlattr *tb[LS_NLA_TYPE_MAX] = {};
+       int ret;
+
+       if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR)
+               return false;
+
+       ret = nla_parse(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh),
+                       nlmsg_len(nlh), ib_nl_addr_policy);
+       if (ret)
+               return false;
+
+       return true;
+}
+
+static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh)
+{
+       const struct nlattr *head, *curr;
+       union ib_gid gid;
+       struct addr_req *req;
+       int len, rem;
+       int found = 0;
+
+       head = (const struct nlattr *)nlmsg_data(nlh);
+       len = nlmsg_len(nlh);
+
+       nla_for_each_attr(curr, head, len, rem) {
+               if (curr->nla_type == LS_NLA_TYPE_DGID)
+                       memcpy(&gid, nla_data(curr), nla_len(curr));
+       }
+
+       mutex_lock(&lock);
+       list_for_each_entry(req, &req_list, list) {
+               if (nlh->nlmsg_seq != req->seq)
+                       continue;
+               /* We set the DGID part, the rest was set earlier */
+               rdma_addr_set_dgid(req->addr, &gid);
+               req->status = 0;
+               found = 1;
+               break;
+       }
+       mutex_unlock(&lock);
+
+       if (!found)
+               pr_info("Couldn't find request waiting for DGID: %pI6\n",
+                       &gid);
+}
+
+int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
+                            struct netlink_callback *cb)
+{
+       const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
+
+       if ((nlh->nlmsg_flags & NLM_F_REQUEST) ||
+           !(NETLINK_CB(skb).sk) ||
+           !netlink_capable(skb, CAP_NET_ADMIN))
+               return -EPERM;
+
+       if (ib_nl_is_good_ip_resp(nlh))
+               ib_nl_process_good_ip_rsep(nlh);
+
+       return skb->len;
+}
+
+static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr,
+                            const void *daddr,
+                            u32 seq, u16 family)
+{
+       struct sk_buff *skb = NULL;
+       struct nlmsghdr *nlh;
+       struct rdma_ls_ip_resolve_header *header;
+       void *data;
+       size_t size;
+       int attrtype;
+       int len;
+
+       if (family == AF_INET) {
+               size = sizeof(struct in_addr);
+               attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4;
+       } else {
+               size = sizeof(struct in6_addr);
+               attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6;
+       }
+
+       len = nla_total_size(sizeof(size));
+       len += NLMSG_ALIGN(sizeof(*header));
+
+       skb = nlmsg_new(len, GFP_KERNEL);
+       if (!skb)
+               return -ENOMEM;
+
+       data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS,
+                           RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST);
+       if (!data) {
+               nlmsg_free(skb);
+               return -ENODATA;
+       }
+
+       /* Construct the family header first */
+       header = (struct rdma_ls_ip_resolve_header *)
+               skb_put(skb, NLMSG_ALIGN(sizeof(*header)));
+       header->ifindex = dev_addr->bound_dev_if;
+       nla_put(skb, attrtype, size, daddr);
+
+       /* Repair the nlmsg header length */
+       nlmsg_end(skb, nlh);
+       ibnl_multicast(skb, nlh, RDMA_NL_GROUP_LS, GFP_KERNEL);
+
+       /* Make the request retry, so when we get the response from userspace
+        * we will have something.
+        */
+       return -ENODATA;
+}
+
 int rdma_addr_size(struct sockaddr *addr)
 {
        switch (addr->sa_family) {
@@ -199,6 +322,17 @@ static void queue_req(struct addr_req *req)
        mutex_unlock(&lock);
 }
 
+static int ib_nl_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+                         const void *daddr, u32 seq, u16 family)
+{
+       if (ibnl_chk_listeners(RDMA_NL_GROUP_LS))
+               return -EADDRNOTAVAIL;
+
+       /* We fill in what we can, the response will fill the rest */
+       rdma_copy_addr(dev_addr, dst->dev, NULL);
+       return ib_nl_ip_send_msg(dev_addr, daddr, seq, family);
+}
+
 static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
                        const void *daddr)
 {
@@ -223,6 +357,39 @@ static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
        return ret;
 }
 
+static bool has_gateway(struct dst_entry *dst, sa_family_t family)
+{
+       struct rtable *rt;
+       struct rt6_info *rt6;
+
+       if (family == AF_INET) {
+               rt = container_of(dst, struct rtable, dst);
+               return rt->rt_uses_gateway;
+       }
+
+       rt6 = container_of(dst, struct rt6_info, dst);
+       return rt6->rt6i_flags & RTF_GATEWAY;
+}
+
+static int fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr,
+                   const struct sockaddr *dst_in, u32 seq)
+{
+       const struct sockaddr_in *dst_in4 =
+               (const struct sockaddr_in *)dst_in;
+       const struct sockaddr_in6 *dst_in6 =
+               (const struct sockaddr_in6 *)dst_in;
+       const void *daddr = (dst_in->sa_family == AF_INET) ?
+               (const void *)&dst_in4->sin_addr.s_addr :
+               (const void *)&dst_in6->sin6_addr;
+       sa_family_t family = dst_in->sa_family;
+
+       /* Gateway + ARPHRD_INFINIBAND -> IB router */
+       if (has_gateway(dst, family) && dst->dev->type == ARPHRD_INFINIBAND)
+               return ib_nl_fetch_ha(dst, dev_addr, daddr, seq, family);
+       else
+               return dst_fetch_ha(dst, dev_addr, daddr);
+}
+
 static int addr4_resolve(struct sockaddr_in *src_in,
                         const struct sockaddr_in *dst_in,
                         struct rdma_dev_addr *addr,
@@ -246,10 +413,11 @@ static int addr4_resolve(struct sockaddr_in *src_in,
        src_in->sin_family = AF_INET;
        src_in->sin_addr.s_addr = fl4.saddr;
 
-       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
-        * routable) and we could set the network type accordingly.
+       /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
+        * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
+        * type accordingly.
         */
-       if (rt->rt_uses_gateway)
+       if (rt->rt_uses_gateway && rt->dst.dev->type != ARPHRD_INFINIBAND)
                addr->network = RDMA_NETWORK_IPV4;
 
        addr->hoplimit = ip4_dst_hoplimit(&rt->dst);
@@ -291,10 +459,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
                src_in->sin6_addr = fl6.saddr;
        }
 
-       /* If there's a gateway, we're definitely in RoCE v2 (as RoCE v1 isn't
-        * routable) and we could set the network type accordingly.
+       /* If there's a gateway and type of device not ARPHRD_INFINIBAND, we're
+        * definitely in RoCE v2 (as RoCE v1 isn't routable) set the network
+        * type accordingly.
         */
-       if (rt->rt6i_flags & RTF_GATEWAY)
+       if (rt->rt6i_flags & RTF_GATEWAY &&
+           ip6_dst_idev(dst)->dev->type != ARPHRD_INFINIBAND)
                addr->network = RDMA_NETWORK_IPV6;
 
        addr->hoplimit = ip6_dst_hoplimit(dst);
@@ -317,7 +487,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 
 static int addr_resolve_neigh(struct dst_entry *dst,
                              const struct sockaddr *dst_in,
-                             struct rdma_dev_addr *addr)
+                             struct rdma_dev_addr *addr,
+                             u32 seq)
 {
        if (dst->dev->flags & IFF_LOOPBACK) {
                int ret;
@@ -331,17 +502,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
        }
 
        /* If the device doesn't do ARP internally */
-       if (!(dst->dev->flags & IFF_NOARP)) {
-               const struct sockaddr_in *dst_in4 =
-                       (const struct sockaddr_in *)dst_in;
-               const struct sockaddr_in6 *dst_in6 =
-                       (const struct sockaddr_in6 *)dst_in;
-
-               return dst_fetch_ha(dst, addr,
-                                   dst_in->sa_family == AF_INET ?
-                                   (const void *)&dst_in4->sin_addr.s_addr :
-                                   (const void *)&dst_in6->sin6_addr);
-       }
+       if (!(dst->dev->flags & IFF_NOARP))
+               return fetch_ha(dst, addr, dst_in, seq);
 
        return rdma_copy_addr(addr, dst->dev, NULL);
 }
@@ -349,7 +511,8 @@ static int addr_resolve_neigh(struct dst_entry *dst,
 static int addr_resolve(struct sockaddr *src_in,
                        const struct sockaddr *dst_in,
                        struct rdma_dev_addr *addr,
-                       bool resolve_neigh)
+                       bool resolve_neigh,
+                       u32 seq)
 {
        struct net_device *ndev;
        struct dst_entry *dst;
@@ -366,7 +529,7 @@ static int addr_resolve(struct sockaddr *src_in,
                        return ret;
 
                if (resolve_neigh)
-                       ret = addr_resolve_neigh(&rt->dst, dst_in, addr);
+                       ret = addr_resolve_neigh(&rt->dst, dst_in, addr, seq);
 
                ndev = rt->dst.dev;
                dev_hold(ndev);
@@ -383,7 +546,7 @@ static int addr_resolve(struct sockaddr *src_in,
                        return ret;
 
                if (resolve_neigh)
-                       ret = addr_resolve_neigh(dst, dst_in, addr);
+                       ret = addr_resolve_neigh(dst, dst_in, addr, seq);
 
                ndev = dst->dev;
                dev_hold(ndev);
@@ -412,7 +575,7 @@ static void process_req(struct work_struct *work)
                        src_in = (struct sockaddr *) &req->src_addr;
                        dst_in = (struct sockaddr *) &req->dst_addr;
                        req->status = addr_resolve(src_in, dst_in, req->addr,
-                                                  true);
+                                                  true, req->seq);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
                        else if (req->status == -ENODATA)
@@ -471,8 +634,9 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->context = context;
        req->client = client;
        atomic_inc(&client->refcount);
+       req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
 
-       req->status = addr_resolve(src_in, dst_in, addr, true);
+       req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
        switch (req->status) {
        case 0:
                req->timeout = jiffies;
@@ -510,7 +674,7 @@ int rdma_resolve_ip_route(struct sockaddr *src_addr,
                src_in->sa_family = dst_addr->sa_family;
        }
 
-       return addr_resolve(src_in, dst_addr, addr, false);
+       return addr_resolve(src_in, dst_addr, addr, false, 0);
 }
 EXPORT_SYMBOL(rdma_resolve_ip_route);
 
@@ -634,7 +798,7 @@ static struct notifier_block nb = {
        .notifier_call = netevent_callback
 };
 
-static int __init addr_init(void)
+int addr_init(void)
 {
        addr_wq = create_singlethread_workqueue("ib_addr");
        if (!addr_wq)
@@ -642,15 +806,13 @@ static int __init addr_init(void)
 
        register_netevent_notifier(&nb);
        rdma_addr_register_client(&self);
+
        return 0;
 }
 
-static void __exit addr_cleanup(void)
+void addr_cleanup(void)
 {
        rdma_addr_unregister_client(&self);
        unregister_netevent_notifier(&nb);
        destroy_workqueue(addr_wq);
 }
-
-module_init(addr_init);
-module_exit(addr_cleanup);
index eab32215756b935159355e22c8d9bf76f108f873..19d499dcab764bc74307b3bd3617cd7657874792 100644 (file)
@@ -137,4 +137,20 @@ static inline bool rdma_is_upper_dev_rcu(struct net_device *dev,
        return _upper == upper;
 }
 
+int addr_init(void);
+void addr_cleanup(void);
+
+int ib_mad_init(void);
+void ib_mad_cleanup(void);
+
+int ib_sa_init(void);
+void ib_sa_cleanup(void);
+
+int ib_nl_handle_resolve_resp(struct sk_buff *skb,
+                             struct netlink_callback *cb);
+int ib_nl_handle_set_timeout(struct sk_buff *skb,
+                            struct netlink_callback *cb);
+int ib_nl_handle_ip_res_resp(struct sk_buff *skb,
+                            struct netlink_callback *cb);
+
 #endif /* _CORE_PRIV_H */
index 10979844026a01bda540a6f99d2cef0ef56b525f..5516fb0703442cafc0c917d3115b8b25e7c71b27 100644 (file)
@@ -955,6 +955,29 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
 }
 EXPORT_SYMBOL(ib_get_net_dev_by_params);
 
+static struct ibnl_client_cbs ibnl_ls_cb_table[] = {
+       [RDMA_NL_LS_OP_RESOLVE] = {
+               .dump = ib_nl_handle_resolve_resp,
+               .module = THIS_MODULE },
+       [RDMA_NL_LS_OP_SET_TIMEOUT] = {
+               .dump = ib_nl_handle_set_timeout,
+               .module = THIS_MODULE },
+       [RDMA_NL_LS_OP_IP_RESOLVE] = {
+               .dump = ib_nl_handle_ip_res_resp,
+               .module = THIS_MODULE },
+};
+
+static int ib_add_ibnl_clients(void)
+{
+       return ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ibnl_ls_cb_table),
+                              ibnl_ls_cb_table);
+}
+
+static void ib_remove_ibnl_clients(void)
+{
+       ibnl_remove_client(RDMA_NL_LS);
+}
+
 static int __init ib_core_init(void)
 {
        int ret;
@@ -983,10 +1006,41 @@ static int __init ib_core_init(void)
                goto err_sysfs;
        }
 
+       ret = addr_init();
+       if (ret) {
+               pr_warn("Could't init IB address resolution\n");
+               goto err_ibnl;
+       }
+
+       ret = ib_mad_init();
+       if (ret) {
+               pr_warn("Couldn't init IB MAD\n");
+               goto err_addr;
+       }
+
+       ret = ib_sa_init();
+       if (ret) {
+               pr_warn("Couldn't init SA\n");
+               goto err_mad;
+       }
+
+       if (ib_add_ibnl_clients()) {
+               pr_warn("Couldn't register ibnl clients\n");
+               goto err_sa;
+       }
+
        ib_cache_setup();
 
        return 0;
 
+err_sa:
+       ib_sa_cleanup();
+err_mad:
+       ib_mad_cleanup();
+err_addr:
+       addr_cleanup();
+err_ibnl:
+       ibnl_cleanup();
 err_sysfs:
        class_unregister(&ib_class);
 err_comp:
@@ -999,6 +1053,10 @@ err:
 static void __exit ib_core_cleanup(void)
 {
        ib_cache_cleanup();
+       ib_remove_ibnl_clients();
+       ib_sa_cleanup();
+       ib_mad_cleanup();
+       addr_cleanup();
        ibnl_cleanup();
        class_unregister(&ib_class);
        destroy_workqueue(ib_comp_wq);
index 9fa5bf33f5a34261b16a72c9800b55daab921c84..82fb511112da745e3fdbf30fe35bcebfceee7491 100644 (file)
 #include "smi.h"
 #include "opa_smi.h"
 #include "agent.h"
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("kernel IB MAD API");
-MODULE_AUTHOR("Hal Rosenstock");
-MODULE_AUTHOR("Sean Hefty");
+#include "core_priv.h"
 
 static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
 static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
@@ -3316,7 +3312,7 @@ static struct ib_client mad_client = {
        .remove = ib_mad_remove_device
 };
 
-static int __init ib_mad_init_module(void)
+int ib_mad_init(void)
 {
        mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
        mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
@@ -3334,10 +3330,7 @@ static int __init ib_mad_init_module(void)
        return 0;
 }
 
-static void __exit ib_mad_cleanup_module(void)
+void ib_mad_cleanup(void)
 {
        ib_unregister_client(&mad_client);
 }
-
-module_init(ib_mad_init_module);
-module_exit(ib_mad_cleanup_module);
index 250937cb9a1a5071f054a24e74414a8c90bd88a0..a83ec28a147b4884492fa960fd721568791553f2 100644 (file)
@@ -93,6 +93,18 @@ enum {
 
 struct mcast_member;
 
+/*
+* There are 4 types of join states:
+* FullMember, NonMember, SendOnlyNonMember, SendOnlyFullMember.
+*/
+enum {
+       FULLMEMBER_JOIN,
+       NONMEMBER_JOIN,
+       SENDONLY_NONMEBER_JOIN,
+       SENDONLY_FULLMEMBER_JOIN,
+       NUM_JOIN_MEMBERSHIP_TYPES,
+};
+
 struct mcast_group {
        struct ib_sa_mcmember_rec rec;
        struct rb_node          node;
@@ -102,7 +114,7 @@ struct mcast_group {
        struct list_head        pending_list;
        struct list_head        active_list;
        struct mcast_member     *last_join;
-       int                     members[3];
+       int                     members[NUM_JOIN_MEMBERSHIP_TYPES];
        atomic_t                refcount;
        enum mcast_group_state  state;
        struct ib_sa_query      *query;
@@ -220,8 +232,9 @@ static void queue_join(struct mcast_member *member)
 }
 
 /*
- * A multicast group has three types of members: full member, non member, and
- * send only member.  We need to keep track of the number of members of each
+ * A multicast group has four types of members: full member, non member,
+ * sendonly non member and sendonly full member.
+ * We need to keep track of the number of members of each
  * type based on their join state.  Adjust the number of members the belong to
  * the specified join states.
  */
@@ -229,7 +242,7 @@ static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
 {
        int i;
 
-       for (i = 0; i < 3; i++, join_state >>= 1)
+       for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++, join_state >>= 1)
                if (join_state & 0x1)
                        group->members[i] += inc;
 }
@@ -245,7 +258,7 @@ static u8 get_leave_state(struct mcast_group *group)
        u8 leave_state = 0;
        int i;
 
-       for (i = 0; i < 3; i++)
+       for (i = 0; i < NUM_JOIN_MEMBERSHIP_TYPES; i++)
                if (!group->members[i])
                        leave_state |= (0x1 << i);
 
index 3ebd108bcc5f272165d61e8da10eab916f8c2b41..e95538650dc6fda8b2d413edfabc60eaad116c28 100644 (file)
 #include "sa.h"
 #include "core_priv.h"
 
-MODULE_AUTHOR("Roland Dreier");
-MODULE_DESCRIPTION("InfiniBand subnet administration query support");
-MODULE_LICENSE("Dual BSD/GPL");
-
 #define IB_SA_LOCAL_SVC_TIMEOUT_MIN            100
 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT                2000
 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX            200000
@@ -119,6 +115,12 @@ struct ib_sa_guidinfo_query {
        struct ib_sa_query sa_query;
 };
 
+struct ib_sa_classport_info_query {
+       void (*callback)(int, struct ib_class_port_info *, void *);
+       void *context;
+       struct ib_sa_query sa_query;
+};
+
 struct ib_sa_mcmember_query {
        void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
        void *context;
@@ -392,6 +394,82 @@ static const struct ib_field service_rec_table[] = {
          .size_bits    = 2*64 },
 };
 
+#define CLASSPORTINFO_REC_FIELD(field) \
+       .struct_offset_bytes = offsetof(struct ib_class_port_info, field),      \
+       .struct_size_bytes   = sizeof((struct ib_class_port_info *)0)->field,   \
+       .field_name          = "ib_class_port_info:" #field
+
+static const struct ib_field classport_info_rec_table[] = {
+       { CLASSPORTINFO_REC_FIELD(base_version),
+         .offset_words = 0,
+         .offset_bits  = 0,
+         .size_bits    = 8 },
+       { CLASSPORTINFO_REC_FIELD(class_version),
+         .offset_words = 0,
+         .offset_bits  = 8,
+         .size_bits    = 8 },
+       { CLASSPORTINFO_REC_FIELD(capability_mask),
+         .offset_words = 0,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+       { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time),
+         .offset_words = 1,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { CLASSPORTINFO_REC_FIELD(redirect_gid),
+         .offset_words = 2,
+         .offset_bits  = 0,
+         .size_bits    = 128 },
+       { CLASSPORTINFO_REC_FIELD(redirect_tcslfl),
+         .offset_words = 6,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { CLASSPORTINFO_REC_FIELD(redirect_lid),
+         .offset_words = 7,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { CLASSPORTINFO_REC_FIELD(redirect_pkey),
+         .offset_words = 7,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+
+       { CLASSPORTINFO_REC_FIELD(redirect_qp),
+         .offset_words = 8,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { CLASSPORTINFO_REC_FIELD(redirect_qkey),
+         .offset_words = 9,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+
+       { CLASSPORTINFO_REC_FIELD(trap_gid),
+         .offset_words = 10,
+         .offset_bits  = 0,
+         .size_bits    = 128 },
+       { CLASSPORTINFO_REC_FIELD(trap_tcslfl),
+         .offset_words = 14,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+
+       { CLASSPORTINFO_REC_FIELD(trap_lid),
+         .offset_words = 15,
+         .offset_bits  = 0,
+         .size_bits    = 16 },
+       { CLASSPORTINFO_REC_FIELD(trap_pkey),
+         .offset_words = 15,
+         .offset_bits  = 16,
+         .size_bits    = 16 },
+
+       { CLASSPORTINFO_REC_FIELD(trap_hlqp),
+         .offset_words = 16,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+       { CLASSPORTINFO_REC_FIELD(trap_qkey),
+         .offset_words = 17,
+         .offset_bits  = 0,
+         .size_bits    = 32 },
+};
+
 #define GUIDINFO_REC_FIELD(field) \
        .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field),      \
        .struct_size_bytes   = sizeof((struct ib_sa_guidinfo_rec *) 0)->field,  \
@@ -705,8 +783,8 @@ static void ib_nl_request_timeout(struct work_struct *work)
        spin_unlock_irqrestore(&ib_nl_request_lock, flags);
 }
 
-static int ib_nl_handle_set_timeout(struct sk_buff *skb,
-                                   struct netlink_callback *cb)
+int ib_nl_handle_set_timeout(struct sk_buff *skb,
+                            struct netlink_callback *cb)
 {
        const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
        int timeout, delta, abs_delta;
@@ -782,8 +860,8 @@ static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh)
        return 1;
 }
 
-static int ib_nl_handle_resolve_resp(struct sk_buff *skb,
-                                    struct netlink_callback *cb)
+int ib_nl_handle_resolve_resp(struct sk_buff *skb,
+                             struct netlink_callback *cb)
 {
        const struct nlmsghdr *nlh = (struct nlmsghdr *)cb->nlh;
        unsigned long flags;
@@ -838,15 +916,6 @@ resp_out:
        return skb->len;
 }
 
-static struct ibnl_client_cbs ib_sa_cb_table[] = {
-       [RDMA_NL_LS_OP_RESOLVE] = {
-               .dump = ib_nl_handle_resolve_resp,
-               .module = THIS_MODULE },
-       [RDMA_NL_LS_OP_SET_TIMEOUT] = {
-               .dump = ib_nl_handle_set_timeout,
-               .module = THIS_MODULE },
-};
-
 static void free_sm_ah(struct kref *kref)
 {
        struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -1645,6 +1714,97 @@ err1:
 }
 EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
 
+/* Support get SA ClassPortInfo */
+static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query,
+                                             int status,
+                                             struct ib_sa_mad *mad)
+{
+       struct ib_sa_classport_info_query *query =
+               container_of(sa_query, struct ib_sa_classport_info_query, sa_query);
+
+       if (mad) {
+               struct ib_class_port_info rec;
+
+               ib_unpack(classport_info_rec_table,
+                         ARRAY_SIZE(classport_info_rec_table),
+                         mad->data, &rec);
+               query->callback(status, &rec, query->context);
+       } else {
+               query->callback(status, NULL, query->context);
+       }
+}
+
+static void ib_sa_portclass_info_rec_release(struct ib_sa_query *sa_query)
+{
+       kfree(container_of(sa_query, struct ib_sa_classport_info_query,
+                          sa_query));
+}
+
+int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
+                                  struct ib_device *device, u8 port_num,
+                                  int timeout_ms, gfp_t gfp_mask,
+                                  void (*callback)(int status,
+                                                   struct ib_class_port_info *resp,
+                                                   void *context),
+                                  void *context,
+                                  struct ib_sa_query **sa_query)
+{
+       struct ib_sa_classport_info_query *query;
+       struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+       struct ib_sa_port *port;
+       struct ib_mad_agent *agent;
+       struct ib_sa_mad *mad;
+       int ret;
+
+       if (!sa_dev)
+               return -ENODEV;
+
+       port  = &sa_dev->port[port_num - sa_dev->start_port];
+       agent = port->agent;
+
+       query = kzalloc(sizeof(*query), gfp_mask);
+       if (!query)
+               return -ENOMEM;
+
+       query->sa_query.port = port;
+       ret = alloc_mad(&query->sa_query, gfp_mask);
+       if (ret)
+               goto err1;
+
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
+
+       mad = query->sa_query.mad_buf->mad;
+       init_mad(mad, agent);
+
+       query->sa_query.callback = callback ? ib_sa_classport_info_rec_callback : NULL;
+
+       query->sa_query.release  = ib_sa_portclass_info_rec_release;
+       /* support GET only */
+       mad->mad_hdr.method      = IB_MGMT_METHOD_GET;
+       mad->mad_hdr.attr_id     = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO);
+       mad->sa_hdr.comp_mask    = 0;
+       *sa_query = &query->sa_query;
+
+       ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+       if (ret < 0)
+               goto err2;
+
+       return ret;
+
+err2:
+       *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
+       free_mad(&query->sa_query);
+
+err1:
+       kfree(query);
+       return ret;
+}
+EXPORT_SYMBOL(ib_sa_classport_info_rec_query);
+
 static void send_handler(struct ib_mad_agent *agent,
                         struct ib_mad_send_wc *mad_send_wc)
 {
@@ -1794,7 +1954,7 @@ static void ib_sa_remove_one(struct ib_device *device, void *client_data)
        kfree(sa_dev);
 }
 
-static int __init ib_sa_init(void)
+int ib_sa_init(void)
 {
        int ret;
 
@@ -1820,17 +1980,10 @@ static int __init ib_sa_init(void)
                goto err3;
        }
 
-       if (ibnl_add_client(RDMA_NL_LS, ARRAY_SIZE(ib_sa_cb_table),
-                           ib_sa_cb_table)) {
-               pr_err("Failed to add netlink callback\n");
-               ret = -EINVAL;
-               goto err4;
-       }
        INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout);
 
        return 0;
-err4:
-       destroy_workqueue(ib_nl_wq);
+
 err3:
        mcast_cleanup();
 err2:
@@ -1839,9 +1992,8 @@ err1:
        return ret;
 }
 
-static void __exit ib_sa_cleanup(void)
+void ib_sa_cleanup(void)
 {
-       ibnl_remove_client(RDMA_NL_LS);
        cancel_delayed_work(&ib_nl_timed_work);
        flush_workqueue(ib_nl_wq);
        destroy_workqueue(ib_nl_wq);
@@ -1849,6 +2001,3 @@ static void __exit ib_sa_cleanup(void)
        ib_unregister_client(&sa_client);
        idr_destroy(&query_idr);
 }
-
-module_init(ib_sa_init);
-module_exit(ib_sa_cleanup);
index 0bd18375d7df97301a8a037889c54dba87e004e8..d2ac29861af5b6ba42487c6ef3a6c92258713529 100644 (file)
@@ -1172,11 +1172,13 @@ static int pma_get_classportinfo(struct ib_pma_mad *pmp,
         * Set the most significant bit of CM2 to indicate support for
         * congestion statistics
         */
-       p->reserved[0] = dd->psxmitwait_supported << 7;
+       ib_set_cpi_capmask2(p,
+                           dd->psxmitwait_supported <<
+                           (31 - IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE));
        /*
         * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
         */
-       p->resp_time_value = 18;
+       ib_set_cpi_resp_time(p, 18);
 
        return reply((struct ib_smp *) pmp);
 }
index caec8e9c46669bf692d34a31735b86c446da1ff0..bab7db6fa9abf8c045b9e1b7e7a55f8996bf3745 100644 (file)
@@ -92,6 +92,8 @@ enum {
        IPOIB_FLAG_UMCAST         = 10,
        IPOIB_STOP_NEIGH_GC       = 11,
        IPOIB_NEIGH_TBL_FLUSH     = 12,
+       IPOIB_FLAG_DEV_ADDR_SET   = 13,
+       IPOIB_FLAG_DEV_ADDR_CTRL  = 14,
 
        IPOIB_MAX_BACKOFF_SECONDS = 16,
 
@@ -392,6 +394,7 @@ struct ipoib_dev_priv {
        struct ipoib_ethtool_st ethtool;
        struct timer_list poll_timer;
        unsigned max_send_sge;
+       bool sm_fullmember_sendonly_support;
 };
 
 struct ipoib_ah {
@@ -476,6 +479,7 @@ void ipoib_reap_ah(struct work_struct *work);
 
 void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
+int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv);
 struct ipoib_dev_priv *ipoib_intf_alloc(const char *format);
 
 int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
index da5f28c892ca53bb0acf8414bf074ab71cde6327..7e9a77040a24ea75601f748410a906ca9d87062b 100644 (file)
@@ -997,6 +997,106 @@ static inline int update_child_pkey(struct ipoib_dev_priv *priv)
        return 0;
 }
 
+/*
+ * returns true if the device address of the ipoib interface has changed and the
+ * new address is a valid one (i.e in the gid table), return false otherwise.
+ */
+static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
+{
+       union ib_gid search_gid;
+       union ib_gid gid0;
+       union ib_gid *netdev_gid;
+       int err;
+       u16 index;
+       u8 port;
+       bool ret = false;
+
+       netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
+       if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
+               return false;
+
+       netif_addr_lock(priv->dev);
+
+       /* The subnet prefix may have changed, update it now so we won't have
+        * to do it later
+        */
+       priv->local_gid.global.subnet_prefix = gid0.global.subnet_prefix;
+       netdev_gid->global.subnet_prefix = gid0.global.subnet_prefix;
+       search_gid.global.subnet_prefix = gid0.global.subnet_prefix;
+
+       search_gid.global.interface_id = priv->local_gid.global.interface_id;
+
+       netif_addr_unlock(priv->dev);
+
+       err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
+                         priv->dev, &port, &index);
+
+       netif_addr_lock(priv->dev);
+
+       if (search_gid.global.interface_id !=
+           priv->local_gid.global.interface_id)
+               /* There was a change while we were looking up the gid, bail
+                * here and let the next work sort this out
+                */
+               goto out;
+
+       /* The next section of code needs some background:
+        * Per IB spec the port GUID can't change if the HCA is powered on.
+        * port GUID is the basis for GID at index 0 which is the basis for
+        * the default device address of a ipoib interface.
+        *
+        * so it seems the flow should be:
+        * if user_changed_dev_addr && gid in gid tbl
+        *      set bit dev_addr_set
+        *      return true
+        * else
+        *      return false
+        *
+        * The issue is that there are devices that don't follow the spec,
+        * they change the port GUID when the HCA is powered, so in order
+        * not to break userspace applications, We need to check if the
+        * user wanted to control the device address and we assume that
+        * if he sets the device address back to be based on GID index 0,
+        * he no longer wishs to control it.
+        *
+        * If the user doesn't control the the device address,
+        * IPOIB_FLAG_DEV_ADDR_SET is set and ib_find_gid failed it means
+        * the port GUID has changed and GID at index 0 has changed
+        * so we need to change priv->local_gid and priv->dev->dev_addr
+        * to reflect the new GID.
+        */
+       if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
+               if (!err && port == priv->port) {
+                       set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
+                       if (index == 0)
+                               clear_bit(IPOIB_FLAG_DEV_ADDR_CTRL,
+                                         &priv->flags);
+                       else
+                               set_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags);
+                       ret = true;
+               } else {
+                       ret = false;
+               }
+       } else {
+               if (!err && port == priv->port) {
+                       ret = true;
+               } else {
+                       if (!test_bit(IPOIB_FLAG_DEV_ADDR_CTRL, &priv->flags)) {
+                               memcpy(&priv->local_gid, &gid0,
+                                      sizeof(priv->local_gid));
+                               memcpy(priv->dev->dev_addr + 4, &gid0,
+                                      sizeof(priv->local_gid));
+                               ret = true;
+                       }
+               }
+       }
+
+out:
+       netif_addr_unlock(priv->dev);
+
+       return ret;
+}
+
 static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
                                enum ipoib_flush_level level,
                                int nesting)
@@ -1018,6 +1118,9 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
 
        if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
            level != IPOIB_FLUSH_HEAVY) {
+               /* Make sure the dev_addr is set even if not flushing */
+               if (level == IPOIB_FLUSH_LIGHT)
+                       ipoib_dev_addr_changed_valid(priv);
                ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
                return;
        }
@@ -1029,7 +1132,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
                                update_parent_pkey(priv);
                        else
                                update_child_pkey(priv);
-               }
+               } else if (level == IPOIB_FLUSH_LIGHT)
+                       ipoib_dev_addr_changed_valid(priv);
                ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
                return;
        }
@@ -1081,7 +1185,8 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
                if (level >= IPOIB_FLUSH_NORMAL)
                        ipoib_ib_dev_up(dev);
-               ipoib_mcast_restart_task(&priv->restart_task);
+               if (ipoib_dev_addr_changed_valid(priv))
+                       ipoib_mcast_restart_task(&priv->restart_task);
        }
 }
 
index 80807d6e5c4cff878f25eaf0861c9c8822843676..2c3fb5337bc1f7d0be83d19f2b714c5d7e11fd6a 100644 (file)
@@ -99,6 +99,7 @@ static struct net_device *ipoib_get_net_dev_by_params(
                struct ib_device *dev, u8 port, u16 pkey,
                const union ib_gid *gid, const struct sockaddr *addr,
                void *client_data);
+static int ipoib_set_mac(struct net_device *dev, void *addr);
 
 static struct ib_client ipoib_client = {
        .name   = "ipoib",
@@ -117,6 +118,8 @@ int ipoib_open(struct net_device *dev)
 
        set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
+       priv->sm_fullmember_sendonly_support = false;
+
        if (ipoib_ib_dev_open(dev)) {
                if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
                        return 0;
@@ -629,6 +632,77 @@ void ipoib_mark_paths_invalid(struct net_device *dev)
        spin_unlock_irq(&priv->lock);
 }
 
+struct classport_info_context {
+       struct ipoib_dev_priv   *priv;
+       struct completion       done;
+       struct ib_sa_query      *sa_query;
+};
+
+static void classport_info_query_cb(int status, struct ib_class_port_info *rec,
+                                   void *context)
+{
+       struct classport_info_context *cb_ctx = context;
+       struct ipoib_dev_priv *priv;
+
+       WARN_ON(!context);
+
+       priv = cb_ctx->priv;
+
+       if (status || !rec) {
+               pr_debug("device: %s failed query classport_info status: %d\n",
+                        priv->dev->name, status);
+               /* keeps the default, will try next mcast_restart */
+               priv->sm_fullmember_sendonly_support = false;
+               goto out;
+       }
+
+       if (ib_get_cpi_capmask2(rec) &
+           IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT) {
+               pr_debug("device: %s enabled fullmember-sendonly for sendonly MCG\n",
+                        priv->dev->name);
+               priv->sm_fullmember_sendonly_support = true;
+       } else {
+               pr_debug("device: %s disabled fullmember-sendonly for sendonly MCG\n",
+                        priv->dev->name);
+               priv->sm_fullmember_sendonly_support = false;
+       }
+
+out:
+       complete(&cb_ctx->done);
+}
+
+int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv)
+{
+       struct classport_info_context *callback_context;
+       int ret;
+
+       callback_context = kmalloc(sizeof(*callback_context), GFP_KERNEL);
+       if (!callback_context)
+               return -ENOMEM;
+
+       callback_context->priv = priv;
+       init_completion(&callback_context->done);
+
+       ret = ib_sa_classport_info_rec_query(&ipoib_sa_client,
+                                            priv->ca, priv->port, 3000,
+                                            GFP_KERNEL,
+                                            classport_info_query_cb,
+                                            callback_context,
+                                            &callback_context->sa_query);
+       if (ret < 0) {
+               pr_info("%s failed to send ib_sa_classport_info query, ret: %d\n",
+                       priv->dev->name, ret);
+               kfree(callback_context);
+               return ret;
+       }
+
+       /* waiting for the callback to finish before returnning */
+       wait_for_completion(&callback_context->done);
+       kfree(callback_context);
+
+       return ret;
+}
+
 void ipoib_flush_paths(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -1649,6 +1723,7 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
        .ndo_get_vf_config       = ipoib_get_vf_config,
        .ndo_get_vf_stats        = ipoib_get_vf_stats,
        .ndo_set_vf_guid         = ipoib_set_vf_guid,
+       .ndo_set_mac_address     = ipoib_set_mac,
 };
 
 static const struct net_device_ops ipoib_netdev_ops_vf = {
@@ -1771,6 +1846,70 @@ int ipoib_add_umcast_attr(struct net_device *dev)
        return device_create_file(&dev->dev, &dev_attr_umcast);
 }
 
+static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
+{
+       struct ipoib_dev_priv *child_priv;
+       struct net_device *netdev = priv->dev;
+
+       netif_addr_lock(netdev);
+
+       memcpy(&priv->local_gid.global.interface_id,
+              &gid->global.interface_id,
+              sizeof(gid->global.interface_id));
+       memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
+       clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
+
+       netif_addr_unlock(netdev);
+
+       if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
+               down_read(&priv->vlan_rwsem);
+               list_for_each_entry(child_priv, &priv->child_intfs, list)
+                       set_base_guid(child_priv, gid);
+               up_read(&priv->vlan_rwsem);
+       }
+}
+
+static int ipoib_check_lladdr(struct net_device *dev,
+                             struct sockaddr_storage *ss)
+{
+       union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
+       int ret = 0;
+
+       netif_addr_lock(dev);
+
+       /* Make sure the QPN, reserved and subnet prefix match the current
+        * lladdr, it also makes sure the lladdr is unicast.
+        */
+       if (memcmp(dev->dev_addr, ss->__data,
+                  4 + sizeof(gid->global.subnet_prefix)) ||
+           gid->global.interface_id == 0)
+               ret = -EINVAL;
+
+       netif_addr_unlock(dev);
+
+       return ret;
+}
+
+static int ipoib_set_mac(struct net_device *dev, void *addr)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct sockaddr_storage *ss = addr;
+       int ret;
+
+       if (!(dev->priv_flags & IFF_LIVE_ADDR_CHANGE) && netif_running(dev))
+               return -EBUSY;
+
+       ret = ipoib_check_lladdr(dev, ss);
+       if (ret)
+               return ret;
+
+       set_base_guid(priv, (union ib_gid *)(ss->__data + 4));
+
+       queue_work(ipoib_workqueue, &priv->flush_light);
+
+       return 0;
+}
+
 static ssize_t create_child(struct device *dev,
                            struct device_attribute *attr,
                            const char *buf, size_t count)
@@ -1894,6 +2033,7 @@ static struct net_device *ipoib_add_port(const char *format,
                goto device_init_failed;
        } else
                memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+       set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
 
        result = ipoib_dev_init(priv->dev, hca, port);
        if (result < 0) {
index 25889311b1e9c8db412f784a77de5aa8ef0a29cb..82fbc9442608f6e10fb0b851a08baf32cfacc5c8 100644 (file)
@@ -64,6 +64,9 @@ struct ipoib_mcast_iter {
        unsigned int       send_only;
 };
 
+/* join state that allows creating mcg with sendonly member request */
+#define SENDONLY_FULLMEMBER_JOIN       8
+
 /*
  * This should be called with the priv->lock held
  */
@@ -326,12 +329,23 @@ void ipoib_mcast_carrier_on_task(struct work_struct *work)
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   carrier_on_task);
        struct ib_port_attr attr;
+       int ret;
 
        if (ib_query_port(priv->ca, priv->port, &attr) ||
            attr.state != IB_PORT_ACTIVE) {
                ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
                return;
        }
+       /*
+        * Check if can send sendonly MCG's with sendonly-fullmember join state.
+        * It done here after the successfully join to the broadcast group,
+        * because the broadcast group must always be joined first and is always
+        * re-joined if the SM changes substantially.
+        */
+       ret = ipoib_check_sm_sendonly_fullmember_support(priv);
+       if (ret < 0)
+               pr_debug("%s failed query sm support for sendonly-fullmember (ret: %d)\n",
+                        priv->dev->name, ret);
 
        /*
         * Take rtnl_lock to avoid racing with ipoib_stop() and
@@ -515,22 +529,20 @@ static int ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast)
                rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
 
                /*
-                * Send-only IB Multicast joins do not work at the core
-                * IB layer yet, so we can't use them here.  However,
-                * we are emulating an Ethernet multicast send, which
-                * does not require a multicast subscription and will
-                * still send properly.  The most appropriate thing to
+                * Send-only IB Multicast joins work at the core IB layer but
+                * require specific SM support.
+                * We can use such joins here only if the current SM supports that feature.
+                * However, if not, we emulate an Ethernet multicast send,
+                * which does not require a multicast subscription and will
+                * still send properly. The most appropriate thing to
                 * do is to create the group if it doesn't exist as that
                 * most closely emulates the behavior, from a user space
-                * application perspecitive, of Ethernet multicast
-                * operation.  For now, we do a full join, maybe later
-                * when the core IB layers support send only joins we
-                * will use them.
+                * application perspective, of Ethernet multicast operation.
                 */
-#if 0
-               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-                       rec.join_state = 4;
-#endif
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+                   priv->sm_fullmember_sendonly_support)
+                       /* SM supports sendonly-fullmember, otherwise fallback to full-member */
+                       rec.join_state = SENDONLY_FULLMEMBER_JOIN;
        }
        spin_unlock_irq(&priv->lock);
 
@@ -570,11 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
                return;
        }
        priv->local_lid = port_attr.lid;
+       netif_addr_lock(dev);
 
-       if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
-               ipoib_warn(priv, "ib_query_gid() failed\n");
-       else
-               memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+       if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
+               netif_addr_unlock(dev);
+               return;
+       }
+       netif_addr_unlock(dev);
 
        spin_lock_irq(&priv->lock);
        if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
index b809c373e40e54598a39bf555571ff80eba25277..1e7cbbaa15bd0c0369c9595517bd5471f3fa4612 100644 (file)
@@ -307,5 +307,8 @@ void ipoib_event(struct ib_event_handler *handler,
                queue_work(ipoib_workqueue, &priv->flush_normal);
        } else if (record->event == IB_EVENT_PKEY_CHANGE) {
                queue_work(ipoib_workqueue, &priv->flush_heavy);
+       } else if (record->event == IB_EVENT_GID_CHANGE &&
+                  !test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
+               queue_work(ipoib_workqueue, &priv->flush_light);
        }
 }
index fca1a882de27d14e6338e0fbe7210c3393dd8f05..64a35595eab83783ade2451f928832548fcb7271 100644 (file)
@@ -68,6 +68,8 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
        priv->pkey = pkey;
 
        memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
+       memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
+       set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
        priv->dev->broadcast[8] = pkey >> 8;
        priv->dev->broadcast[9] = pkey & 0xff;
 
index 2843f1ae75bdf50bd647b026a68e077cb884723a..887ebadd47745b83ca4ad1fcfa99c0c9371d30a7 100644 (file)
@@ -254,8 +254,8 @@ static void srpt_get_class_port_info(struct ib_dm_mad *mad)
        memset(cif, 0, sizeof(*cif));
        cif->base_version = 1;
        cif->class_version = 1;
-       cif->resp_time_value = 20;
 
+       ib_set_cpi_resp_time(cif, 20);
        mad->mad_hdr.status = 0;
 }
 
index 37dd534cbeab89595280cb2aa0545a1702fd33fd..c8a773ffe23b7f53d6a74b486cc37d4c19e949cf 100644 (file)
@@ -239,12 +239,15 @@ struct ib_vendor_mad {
 
 #define IB_MGMT_CLASSPORTINFO_ATTR_ID  cpu_to_be16(0x0001)
 
+#define IB_CLASS_PORT_INFO_RESP_TIME_MASK      0x1F
+#define IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE 5
+
 struct ib_class_port_info {
        u8                      base_version;
        u8                      class_version;
        __be16                  capability_mask;
-       u8                      reserved[3];
-       u8                      resp_time_value;
+         /* 27 bits for cap_mask2, 5 bits for resp_time */
+       __be32                  cap_mask2_resp_time;
        u8                      redirect_gid[16];
        __be32                  redirect_tcslfl;
        __be16                  redirect_lid;
@@ -259,6 +262,59 @@ struct ib_class_port_info {
        __be32                  trap_qkey;
 };
 
+/**
+ * ib_get_cpi_resp_time - Returns the resp_time value from
+ * cap_mask2_resp_time in ib_class_port_info.
+ * @cpi: A struct ib_class_port_info mad.
+ */
+static inline u8 ib_get_cpi_resp_time(struct ib_class_port_info *cpi)
+{
+       return (u8)(be32_to_cpu(cpi->cap_mask2_resp_time) &
+                   IB_CLASS_PORT_INFO_RESP_TIME_MASK);
+}
+
+/**
+ * ib_set_cpi_resptime - Sets the response time in an
+ * ib_class_port_info mad.
+ * @cpi: A struct ib_class_port_info.
+ * @rtime: The response time to set.
+ */
+static inline void ib_set_cpi_resp_time(struct ib_class_port_info *cpi,
+                                       u8 rtime)
+{
+       cpi->cap_mask2_resp_time =
+               (cpi->cap_mask2_resp_time &
+                cpu_to_be32(~IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
+               cpu_to_be32(rtime & IB_CLASS_PORT_INFO_RESP_TIME_MASK);
+}
+
+/**
+ * ib_get_cpi_capmask2 - Returns the capmask2 value from
+ * cap_mask2_resp_time in ib_class_port_info.
+ * @cpi: A struct ib_class_port_info mad.
+ */
+static inline u32 ib_get_cpi_capmask2(struct ib_class_port_info *cpi)
+{
+       return (be32_to_cpu(cpi->cap_mask2_resp_time) >>
+               IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
+}
+
+/**
+ * ib_set_cpi_capmask2 - Sets the capmask2 in an
+ * ib_class_port_info mad.
+ * @cpi: A struct ib_class_port_info.
+ * @capmask2: The capmask2 to set.
+ */
+static inline void ib_set_cpi_capmask2(struct ib_class_port_info *cpi,
+                                      u32 capmask2)
+{
+       cpi->cap_mask2_resp_time =
+               (cpi->cap_mask2_resp_time &
+                cpu_to_be32(IB_CLASS_PORT_INFO_RESP_TIME_MASK)) |
+               cpu_to_be32(capmask2 <<
+                           IB_CLASS_PORT_INFO_RESP_TIME_FIELD_SIZE);
+}
+
 struct ib_mad_notice_attr {
        u8 generic_type;
        u8 prod_type_msb;
index cdc1c81aa275bda38f97f1e71c3c774dfbc8c983..384041669489e196a1732b6419520b2a054ea651 100644 (file)
@@ -94,6 +94,8 @@ enum ib_sa_selector {
        IB_SA_BEST = 3
 };
 
+#define IB_SA_CAP_MASK2_SENDONLY_FULL_MEM_SUPPORT      BIT(12)
+
 /*
  * Structures for SA records are named "struct ib_sa_xxx_rec."  No
  * attempt is made to pack structures to match the physical layout of
@@ -439,4 +441,14 @@ int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
                              void *context,
                              struct ib_sa_query **sa_query);
 
+/* Support get SA ClassPortInfo */
+int ib_sa_classport_info_rec_query(struct ib_sa_client *client,
+                                  struct ib_device *device, u8 port_num,
+                                  int timeout_ms, gfp_t gfp_mask,
+                                  void (*callback)(int status,
+                                                   struct ib_class_port_info *resp,
+                                                   void *context),
+                                  void *context,
+                                  struct ib_sa_query **sa_query);
+
 #endif /* IB_SA_H */
index 6e373d151cad750ee637fea41800652d02548681..02fe8390c18f6dd6d857a8eac2a855b2174f2cd2 100644 (file)
@@ -135,10 +135,12 @@ enum {
  * Local service operations:
  *   RESOLVE - The client requests the local service to resolve a path.
  *   SET_TIMEOUT - The local service requests the client to set the timeout.
+ *   IP_RESOLVE - The client requests the local service to resolve an IP to GID.
  */
 enum {
        RDMA_NL_LS_OP_RESOLVE = 0,
        RDMA_NL_LS_OP_SET_TIMEOUT,
+       RDMA_NL_LS_OP_IP_RESOLVE,
        RDMA_NL_LS_NUM_OPS
 };
 
@@ -176,6 +178,10 @@ struct rdma_ls_resolve_header {
        __u8 path_use;
 };
 
+struct rdma_ls_ip_resolve_header {
+       __u32 ifindex;
+};
+
 /* Local service attribute type */
 #define RDMA_NLA_F_MANDATORY   (1 << 13)
 #define RDMA_NLA_TYPE_MASK     (~(NLA_F_NESTED | NLA_F_NET_BYTEORDER | \
@@ -193,6 +199,8 @@ struct rdma_ls_resolve_header {
  *   TCLASS          u8
  *   PKEY            u16                        cpu
  *   QOS_CLASS       u16                        cpu
+ *   IPV4            u32                        BE
+ *   IPV6            u8[16]                     BE
  */
 enum {
        LS_NLA_TYPE_UNSPEC = 0,
@@ -204,6 +212,8 @@ enum {
        LS_NLA_TYPE_TCLASS,
        LS_NLA_TYPE_PKEY,
        LS_NLA_TYPE_QOS_CLASS,
+       LS_NLA_TYPE_IPV4,
+       LS_NLA_TYPE_IPV6,
        LS_NLA_TYPE_MAX
 };