]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'rdma/for-next'
authorStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 01:03:04 +0000 (12:03 +1100)
committerStephen Rothwell <sfr@canb.auug.org.au>
Thu, 5 Nov 2015 01:03:04 +0000 (12:03 +1100)
Initial roundup of 4.4 merge window candidates

- "Checksum offload support in user space" enablement
- Misc cxgb4 fixes, add T6 support
- Misc usnic fixes
- 32 bit build warning fixes
- Misc ocrdma fixes
- Multicast loopback prevention extension
- Extend the GID cache to store and return attributes of GIDs
- Misc iSER updates
- iSER clustering update
- Network NameSpace support for rdma CM
- Work Request cleanup series
- New Memory Registration API

140 files changed:
MAINTAINERS
drivers/infiniband/core/addr.c
drivers/infiniband/core/agent.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/core_priv.h
drivers/infiniband/core/device.c
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/multicast.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucma.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/uverbs_marshall.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/cxgb3/iwch_cq.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb3/iwch_provider.h
drivers/infiniband/hw/cxgb3/iwch_qp.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/cxgb4/cq.c
drivers/infiniband/hw/cxgb4/device.c
drivers/infiniband/hw/cxgb4/iw_cxgb4.h
drivers/infiniband/hw/cxgb4/mem.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/cxgb4/qp.c
drivers/infiniband/hw/cxgb4/t4.h
drivers/infiniband/hw/mlx4/ah.c
drivers/infiniband/hw/mlx4/cq.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mcg.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx5/cq.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mthca/mthca_av.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/nes/nes_hw.h
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/nes/nes_verbs.h
drivers/infiniband/hw/ocrdma/ocrdma.h
drivers/infiniband/hw/ocrdma/ocrdma_ah.c
drivers/infiniband/hw/ocrdma/ocrdma_hw.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_stats.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
drivers/infiniband/hw/qib/qib_keys.c
drivers/infiniband/hw/qib/qib_mr.c
drivers/infiniband/hw/qib/qib_qp.c
drivers/infiniband/hw/qib/qib_rc.c
drivers/infiniband/hw/qib/qib_ruc.c
drivers/infiniband/hw/qib/qib_uc.c
drivers/infiniband/hw/qib/qib_ud.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/qib/qib_verbs.h
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_initiator.c
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srp/ib_srp.h
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h [new file with mode: 0644]
drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
drivers/net/ethernet/mellanox/mlx4/en_main.c
drivers/net/ethernet/mellanox/mlx4/en_resources.c
drivers/net/ethernet/mellanox/mlx4/fw.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/qp.c
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.h
drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
drivers/staging/rdma/amso1100/c2_qp.c
drivers/staging/rdma/ehca/ehca_reqs.c
drivers/staging/rdma/hfi1/keys.c
drivers/staging/rdma/hfi1/mr.c
drivers/staging/rdma/hfi1/qp.c
drivers/staging/rdma/hfi1/rc.c
drivers/staging/rdma/hfi1/ruc.c
drivers/staging/rdma/hfi1/uc.c
drivers/staging/rdma/hfi1/ud.c
drivers/staging/rdma/hfi1/verbs.c
drivers/staging/rdma/hfi1/verbs.h
drivers/staging/rdma/ipath/ipath_rc.c
drivers/staging/rdma/ipath/ipath_ruc.c
drivers/staging/rdma/ipath/ipath_uc.c
drivers/staging/rdma/ipath/ipath_ud.c
drivers/staging/rdma/ipath/ipath_verbs.c
drivers/staging/rdma/ipath/ipath_verbs.h
include/linux/mlx4/device.h
include/linux/mlx4/qp.h
include/linux/sunrpc/svc_rdma.h
include/rdma/ib_addr.h
include/rdma/ib_cache.h
include/rdma/ib_pack.h
include/rdma/ib_sa.h
include/rdma/ib_verbs.h
include/rdma/rdma_cm.h
include/uapi/rdma/ib_user_verbs.h
net/9p/trans_rdma.c
net/rds/ib.c
net/rds/ib.h
net/rds/ib_cm.c
net/rds/ib_send.c
net/rds/iw.c
net/rds/iw.h
net/rds/iw_cm.c
net/rds/iw_rdma.c
net/rds/iw_send.c
net/rds/rdma_transport.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/svc_rdma_sendto.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
net/sunrpc/xprtrdma/verbs.c
net/sunrpc/xprtrdma/xprt_rdma.h

index ec706603f4f65a6fe20c6db127244cefa912b8bf..728dcec6f29495ea11f3d9f72cc3d2d313e2b1a0 100644 (file)
@@ -2776,9 +2776,10 @@ S:       Supported
 F:     drivers/net/ethernet/cisco/enic/
 
 CISCO VIC LOW LATENCY NIC DRIVER
-M:     Upinder Malhi <umalhi@cisco.com>
+M:     Christian Benvenuti <benve@cisco.com>
+M:     Dave Goodell <dgoodell@cisco.com>
 S:     Supported
-F:     drivers/infiniband/hw/usnic
+F:     drivers/infiniband/hw/usnic/
 
 CIRRUS LOGIC EP93XX ETHERNET DRIVER
 M:     Hartley Sweeten <hsweeten@visionengravers.com>
index 746cdf56bc76475831cdf39b93a131f24fe804a7..34b1adad07aacf92e9bbfa9621e084f2ec756f74 100644 (file)
@@ -128,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
        int ret = -EADDRNOTAVAIL;
 
        if (dev_addr->bound_dev_if) {
-               dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
+               dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
                if (!dev)
                        return -ENODEV;
                ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -138,7 +138,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
 
        switch (addr->sa_family) {
        case AF_INET:
-               dev = ip_dev_find(&init_net,
+               dev = ip_dev_find(dev_addr->net,
                        ((struct sockaddr_in *) addr)->sin_addr.s_addr);
 
                if (!dev)
@@ -149,12 +149,11 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
                        *vlan_id = rdma_vlan_dev_vlan_id(dev);
                dev_put(dev);
                break;
-
 #if IS_ENABLED(CONFIG_IPV6)
        case AF_INET6:
                rcu_read_lock();
-               for_each_netdev_rcu(&init_net, dev) {
-                       if (ipv6_chk_addr(&init_net,
+               for_each_netdev_rcu(dev_addr->net, dev) {
+                       if (ipv6_chk_addr(dev_addr->net,
                                          &((struct sockaddr_in6 *) addr)->sin6_addr,
                                          dev, 1)) {
                                ret = rdma_copy_addr(dev_addr, dev, NULL);
@@ -236,7 +235,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
        fl4.daddr = dst_ip;
        fl4.saddr = src_ip;
        fl4.flowi4_oif = addr->bound_dev_if;
-       rt = ip_route_output_key(&init_net, &fl4);
+       rt = ip_route_output_key(addr->net, &fl4);
        if (IS_ERR(rt)) {
                ret = PTR_ERR(rt);
                goto out;
@@ -278,12 +277,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
        fl6.saddr = src_in->sin6_addr;
        fl6.flowi6_oif = addr->bound_dev_if;
 
-       dst = ip6_route_output(&init_net, NULL, &fl6);
+       dst = ip6_route_output(addr->net, NULL, &fl6);
        if ((ret = dst->error))
                goto put;
 
        if (ipv6_addr_any(&fl6.saddr)) {
-               ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
+               ret = ipv6_dev_get_saddr(addr->net, ip6_dst_idev(dst)->dev,
                                         &fl6.daddr, 0, &fl6.saddr);
                if (ret)
                        goto put;
@@ -458,7 +457,7 @@ static void resolve_cb(int status, struct sockaddr *src_addr,
 }
 
 int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *dmac, u16 *vlan_id)
+                              u8 *dmac, u16 *vlan_id, int if_index)
 {
        int ret = 0;
        struct rdma_dev_addr dev_addr;
@@ -476,6 +475,8 @@ int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgi
        rdma_gid2ip(&dgid_addr._sockaddr, dgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
+       dev_addr.bound_dev_if = if_index;
+       dev_addr.net = &init_net;
 
        ctx.addr = &dev_addr;
        init_completion(&ctx.comp);
@@ -510,6 +511,7 @@ int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
        rdma_gid2ip(&gid_addr._sockaddr, sgid);
 
        memset(&dev_addr, 0, sizeof(dev_addr));
+       dev_addr.net = &init_net;
        ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
        if (ret)
                return ret;
index 0429040304fd478a7ad7833df48c0bdc74c429bc..4fa524dfb6cf27b1dc958baccba4cb005058f6e8 100644 (file)
@@ -126,7 +126,7 @@ void agent_send_response(const struct ib_mad_hdr *mad_hdr, const struct ib_grh *
                mad_send_wr = container_of(send_buf,
                                           struct ib_mad_send_wr_private,
                                           send_buf);
-               mad_send_wr->send_wr.wr.ud.port_num = port_num;
+               mad_send_wr->send_wr.port_num = port_num;
        }
 
        if (ib_post_send_mad(send_buf, NULL)) {
index 87471ef371986c11f59e6761e7566ebec78cc1cd..89bebeada38b9b5f6a09d9dd896c8047fa57d7ad 100644 (file)
@@ -409,10 +409,10 @@ static int ib_cache_gid_find(struct ib_device *ib_dev,
                                        mask, port, index);
 }
 
-int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
-                             const union ib_gid *gid,
-                             u8 port, struct net_device *ndev,
-                             u16 *index)
+int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
+                              const union ib_gid *gid,
+                              u8 port, struct net_device *ndev,
+                              u16 *index)
 {
        int local_index;
        struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
@@ -438,6 +438,82 @@ int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
 
        return -ENOENT;
 }
+EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+
+/**
+ * ib_find_gid_by_filter - Returns the GID table index where a specified
+ * GID value occurs
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value could be
+ *   searched.
+ * @filter: The filter function is executed on any matching GID in the table.
+ *   If the filter function returns true, the corresponding index is returned,
+ *   otherwise, we continue searching the GID table. It's guaranteed that
+ *   while filter is executed, ndev field is valid and the structure won't
+ *   change. filter is executed in an atomic context. filter must not be NULL.
+ * @index: The index into the cached GID table where the GID was found.  This
+ *   parameter may be NULL.
+ *
+ * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * of which the filter function returns true in the port's GID table.
+ * This function is only supported on RoCE ports.
+ *
+ */
+static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
+                                      const union ib_gid *gid,
+                                      u8 port,
+                                      bool (*filter)(const union ib_gid *,
+                                                     const struct ib_gid_attr *,
+                                                     void *),
+                                      void *context,
+                                      u16 *index)
+{
+       struct ib_gid_table **ports_table = ib_dev->cache.gid_cache;
+       struct ib_gid_table *table;
+       unsigned int i;
+       bool found = false;
+
+       if (!ports_table)
+               return -EOPNOTSUPP;
+
+       if (port < rdma_start_port(ib_dev) ||
+           port > rdma_end_port(ib_dev) ||
+           !rdma_protocol_roce(ib_dev, port))
+               return -EPROTONOSUPPORT;
+
+       table = ports_table[port - rdma_start_port(ib_dev)];
+
+       for (i = 0; i < table->sz; i++) {
+               struct ib_gid_attr attr;
+               unsigned long flags;
+
+               read_lock_irqsave(&table->data_vec[i].lock, flags);
+               if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+                       goto next;
+
+               if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+                       goto next;
+
+               memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
+
+               if (filter(gid, &attr, context))
+                       found = true;
+
+next:
+               read_unlock_irqrestore(&table->data_vec[i].lock, flags);
+
+               if (found)
+                       break;
+       }
+
+       if (!found)
+               return -ENOENT;
+
+       if (index)
+               *index = i;
+       return 0;
+}
 
 static struct ib_gid_table *alloc_gid_table(int sz)
 {
@@ -649,24 +725,44 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
 int ib_get_cached_gid(struct ib_device *device,
                      u8                port_num,
                      int               index,
-                     union ib_gid     *gid)
+                     union ib_gid     *gid,
+                     struct ib_gid_attr *gid_attr)
 {
        if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
                return -EINVAL;
 
-       return __ib_cache_gid_get(device, port_num, index, gid, NULL);
+       return __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
 }
 EXPORT_SYMBOL(ib_get_cached_gid);
 
 int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
+                      struct net_device *ndev,
                       u8               *port_num,
                       u16              *index)
 {
-       return ib_cache_gid_find(device, gid, NULL, port_num, index);
+       return ib_cache_gid_find(device, gid, ndev, port_num, index);
 }
 EXPORT_SYMBOL(ib_find_cached_gid);
 
+int ib_find_gid_by_filter(struct ib_device *device,
+                         const union ib_gid *gid,
+                         u8 port_num,
+                         bool (*filter)(const union ib_gid *gid,
+                                        const struct ib_gid_attr *,
+                                        void *),
+                         void *context, u16 *index)
+{
+       /* Only RoCE GID table supports filter function */
+       if (!rdma_cap_roce_gid_table(device, port_num) && filter)
+               return -EPROTONOSUPPORT;
+
+       return ib_cache_gid_find_by_filter(device, gid,
+                                          port_num, filter,
+                                          context, index);
+}
+EXPORT_SYMBOL(ib_find_gid_by_filter);
+
 int ib_get_cached_pkey(struct ib_device *device,
                       u8                port_num,
                       int               index,
@@ -845,7 +941,7 @@ static void ib_cache_update(struct ib_device *device,
        if (!use_roce_gid_table) {
                for (i = 0;  i < gid_cache->table_len; ++i) {
                        ret = ib_query_gid(device, port, i,
-                                          gid_cache->table + i);
+                                          gid_cache->table + i, NULL);
                        if (ret) {
                                printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
                                       ret, device->name, i);
index 4f918b929eca955532cd5dc0541bc842272a0a3b..0a26dd6d9b19f96d97c9b3b244509892afae7023 100644 (file)
@@ -179,8 +179,6 @@ struct cm_av {
        struct ib_ah_attr ah_attr;
        u16 pkey_index;
        u8 timeout;
-       u8  valid;
-       u8  smac[ETH_ALEN];
 };
 
 struct cm_work {
@@ -361,17 +359,21 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
        unsigned long flags;
        int ret;
        u8 p;
+       struct net_device *ndev = ib_get_ndev_from_path(path);
 
        read_lock_irqsave(&cm.device_lock, flags);
        list_for_each_entry(cm_dev, &cm.device_list, list) {
                if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
-                                       &p, NULL)) {
+                                       ndev, &p, NULL)) {
                        port = cm_dev->port[p-1];
                        break;
                }
        }
        read_unlock_irqrestore(&cm.device_lock, flags);
 
+       if (ndev)
+               dev_put(ndev);
+
        if (!port)
                return -EINVAL;
 
@@ -384,9 +386,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
        ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
                             &av->ah_attr);
        av->timeout = path->packet_life_time + 1;
-       memcpy(av->smac, path->smac, sizeof(av->smac));
 
-       av->valid = 1;
        return 0;
 }
 
@@ -1639,11 +1639,11 @@ static int cm_req_handler(struct cm_work *work)
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 
        memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
-       work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
        ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
        if (ret) {
                ib_get_cached_gid(work->port->cm_dev->ib_device,
-                                 work->port->port_num, 0, &work->path[0].sgid);
+                                 work->port->port_num, 0, &work->path[0].sgid,
+                                 NULL);
                ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
                               &work->path[0].sgid, sizeof work->path[0].sgid,
                               NULL, 0);
@@ -3618,32 +3618,6 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
                *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
                                IB_QP_DEST_QPN | IB_QP_RQ_PSN;
                qp_attr->ah_attr = cm_id_priv->av.ah_attr;
-               if (!cm_id_priv->av.valid) {
-                       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-                       return -EINVAL;
-               }
-               if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
-                       qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
-                       *qp_attr_mask |= IB_QP_VID;
-               }
-               if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
-                       memcpy(qp_attr->smac, cm_id_priv->av.smac,
-                              sizeof(qp_attr->smac));
-                       *qp_attr_mask |= IB_QP_SMAC;
-               }
-               if (cm_id_priv->alt_av.valid) {
-                       if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
-                               qp_attr->alt_vlan_id =
-                                       cm_id_priv->alt_av.ah_attr.vlan_id;
-                               *qp_attr_mask |= IB_QP_ALT_VID;
-                       }
-                       if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
-                               memcpy(qp_attr->alt_smac,
-                                      cm_id_priv->alt_av.smac,
-                                      sizeof(qp_attr->alt_smac));
-                               *qp_attr_mask |= IB_QP_ALT_SMAC;
-                       }
-               }
                qp_attr->path_mtu = cm_id_priv->path_mtu;
                qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
                qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
index 36b12d560e17e5a862a2e37d1f56875b46425f4b..7e93eb1f33eb4190cff10170e67ce2a89a53c769 100644 (file)
@@ -44,6 +44,8 @@
 #include <linux/module.h>
 #include <net/route.h>
 
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
 #include <net/tcp.h>
 #include <net/ipv6.h>
 #include <net/ip_fib.h>
@@ -110,22 +112,33 @@ static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
 static struct workqueue_struct *cma_wq;
-static DEFINE_IDR(tcp_ps);
-static DEFINE_IDR(udp_ps);
-static DEFINE_IDR(ipoib_ps);
-static DEFINE_IDR(ib_ps);
+static int cma_pernet_id;
 
-static struct idr *cma_idr(enum rdma_port_space ps)
+struct cma_pernet {
+       struct idr tcp_ps;
+       struct idr udp_ps;
+       struct idr ipoib_ps;
+       struct idr ib_ps;
+};
+
+static struct cma_pernet *cma_pernet(struct net *net)
+{
+       return net_generic(net, cma_pernet_id);
+}
+
+static struct idr *cma_pernet_idr(struct net *net, enum rdma_port_space ps)
 {
+       struct cma_pernet *pernet = cma_pernet(net);
+
        switch (ps) {
        case RDMA_PS_TCP:
-               return &tcp_ps;
+               return &pernet->tcp_ps;
        case RDMA_PS_UDP:
-               return &udp_ps;
+               return &pernet->udp_ps;
        case RDMA_PS_IPOIB:
-               return &ipoib_ps;
+               return &pernet->ipoib_ps;
        case RDMA_PS_IB:
-               return &ib_ps;
+               return &pernet->ib_ps;
        default:
                return NULL;
        }
@@ -145,24 +158,25 @@ struct rdma_bind_list {
        unsigned short          port;
 };
 
-static int cma_ps_alloc(enum rdma_port_space ps,
+static int cma_ps_alloc(struct net *net, enum rdma_port_space ps,
                        struct rdma_bind_list *bind_list, int snum)
 {
-       struct idr *idr = cma_idr(ps);
+       struct idr *idr = cma_pernet_idr(net, ps);
 
        return idr_alloc(idr, bind_list, snum, snum + 1, GFP_KERNEL);
 }
 
-static struct rdma_bind_list *cma_ps_find(enum rdma_port_space ps, int snum)
+static struct rdma_bind_list *cma_ps_find(struct net *net,
+                                         enum rdma_port_space ps, int snum)
 {
-       struct idr *idr = cma_idr(ps);
+       struct idr *idr = cma_pernet_idr(net, ps);
 
        return idr_find(idr, snum);
 }
 
-static void cma_ps_remove(enum rdma_port_space ps, int snum)
+static void cma_ps_remove(struct net *net, enum rdma_port_space ps, int snum)
 {
-       struct idr *idr = cma_idr(ps);
+       struct idr *idr = cma_pernet_idr(net, ps);
 
        idr_remove(idr, snum);
 }
@@ -427,10 +441,11 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 }
 
 static inline int cma_validate_port(struct ib_device *device, u8 port,
-                                     union ib_gid *gid, int dev_type)
+                                     union ib_gid *gid, int dev_type,
+                                     int bound_if_index)
 {
-       u8 found_port;
        int ret = -ENODEV;
+       struct net_device *ndev = NULL;
 
        if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
                return ret;
@@ -438,9 +453,13 @@ static inline int cma_validate_port(struct ib_device *device, u8 port,
        if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
                return ret;
 
-       ret = ib_find_cached_gid(device, gid, &found_port, NULL);
-       if (port != found_port)
-               return -ENODEV;
+       if (dev_type == ARPHRD_ETHER)
+               ndev = dev_get_by_index(&init_net, bound_if_index);
+
+       ret = ib_find_cached_gid_by_port(device, gid, port, ndev, NULL);
+
+       if (ndev)
+               dev_put(ndev);
 
        return ret;
 }
@@ -472,7 +491,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                       &iboe_gid : &gid;
 
                ret = cma_validate_port(cma_dev->device, port, gidp,
-                                       dev_addr->dev_type);
+                                       dev_addr->dev_type,
+                                       dev_addr->bound_dev_if);
                if (!ret) {
                        id_priv->id.port_num = port;
                        goto out;
@@ -490,7 +510,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
                               &iboe_gid : &gid;
 
                        ret = cma_validate_port(cma_dev->device, port, gidp,
-                                               dev_addr->dev_type);
+                                               dev_addr->dev_type,
+                                               dev_addr->bound_dev_if);
                        if (!ret) {
                                id_priv->id.port_num = port;
                                goto out;
@@ -531,7 +552,9 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
                        if (ib_find_cached_pkey(cur_dev->device, p, pkey, &index))
                                continue;
 
-                       for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i, &gid); i++) {
+                       for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
+                                                      &gid, NULL);
+                            i++) {
                                if (!memcmp(&gid, dgid, sizeof(gid))) {
                                        cma_dev = cur_dev;
                                        sgid = gid;
@@ -577,7 +600,8 @@ static int cma_disable_callback(struct rdma_id_private *id_priv,
        return 0;
 }
 
-struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+struct rdma_cm_id *rdma_create_id(struct net *net,
+                                 rdma_cm_event_handler event_handler,
                                  void *context, enum rdma_port_space ps,
                                  enum ib_qp_type qp_type)
 {
@@ -601,6 +625,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
        INIT_LIST_HEAD(&id_priv->listen_list);
        INIT_LIST_HEAD(&id_priv->mc_list);
        get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num);
+       id_priv->id.route.addr.dev_addr.net = get_net(net);
 
        return &id_priv->id;
 }
@@ -718,18 +743,12 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
                goto out;
 
        ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
-                          qp_attr.ah_attr.grh.sgid_index, &sgid);
+                          qp_attr.ah_attr.grh.sgid_index, &sgid, NULL);
        if (ret)
                goto out;
 
        BUG_ON(id_priv->cma_dev->device != id_priv->id.device);
 
-       if (rdma_protocol_roce(id_priv->id.device, id_priv->id.port_num)) {
-               ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
-
-               if (ret)
-                       goto out;
-       }
        if (conn_param)
                qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
        ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -1260,7 +1279,7 @@ static bool cma_match_net_dev(const struct rdma_id_private *id_priv,
                       cma_protocol_roce(&id_priv->id);
 
        return !addr->dev_addr.bound_dev_if ||
-              (net_eq(dev_net(net_dev), &init_net) &&
+              (net_eq(dev_net(net_dev), addr->dev_addr.net) &&
                addr->dev_addr.bound_dev_if == net_dev->ifindex);
 }
 
@@ -1321,7 +1340,8 @@ static struct rdma_id_private *cma_id_from_event(struct ib_cm_id *cm_id,
                }
        }
 
-       bind_list = cma_ps_find(rdma_ps_from_service_id(req.service_id),
+       bind_list = cma_ps_find(*net_dev ? dev_net(*net_dev) : &init_net,
+                               rdma_ps_from_service_id(req.service_id),
                                cma_port_from_service_id(req.service_id));
        id_priv = cma_find_listener(bind_list, cm_id, ib_event, &req, *net_dev);
        if (IS_ERR(id_priv) && *net_dev) {
@@ -1392,6 +1412,7 @@ static void cma_cancel_operation(struct rdma_id_private *id_priv,
 static void cma_release_port(struct rdma_id_private *id_priv)
 {
        struct rdma_bind_list *bind_list = id_priv->bind_list;
+       struct net *net = id_priv->id.route.addr.dev_addr.net;
 
        if (!bind_list)
                return;
@@ -1399,7 +1420,7 @@ static void cma_release_port(struct rdma_id_private *id_priv)
        mutex_lock(&lock);
        hlist_del(&id_priv->node);
        if (hlist_empty(&bind_list->owners)) {
-               cma_ps_remove(bind_list->ps, bind_list->port);
+               cma_ps_remove(net, bind_list->ps, bind_list->port);
                kfree(bind_list);
        }
        mutex_unlock(&lock);
@@ -1458,6 +1479,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
                cma_deref_id(id_priv->id.context);
 
        kfree(id_priv->id.route.path_rec);
+       put_net(id_priv->id.route.addr.dev_addr.net);
        kfree(id_priv);
 }
 EXPORT_SYMBOL(rdma_destroy_id);
@@ -1588,7 +1610,8 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
                      ib_event->param.req_rcvd.primary_path->service_id;
        int ret;
 
-       id = rdma_create_id(listen_id->event_handler, listen_id->context,
+       id = rdma_create_id(listen_id->route.addr.dev_addr.net,
+                           listen_id->event_handler, listen_id->context,
                            listen_id->ps, ib_event->param.req_rcvd.qp_type);
        if (IS_ERR(id))
                return NULL;
@@ -1643,9 +1666,10 @@ static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
        struct rdma_id_private *id_priv;
        struct rdma_cm_id *id;
        const sa_family_t ss_family = listen_id->route.addr.src_addr.ss_family;
+       struct net *net = listen_id->route.addr.dev_addr.net;
        int ret;
 
-       id = rdma_create_id(listen_id->event_handler, listen_id->context,
+       id = rdma_create_id(net, listen_id->event_handler, listen_id->context,
                            listen_id->ps, IB_QPT_UD);
        if (IS_ERR(id))
                return NULL;
@@ -1882,7 +1906,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
                return -ECONNABORTED;
 
        /* Create a new RDMA id for the new IW CM ID */
-       new_cm_id = rdma_create_id(listen_id->id.event_handler,
+       new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
+                                  listen_id->id.event_handler,
                                   listen_id->id.context,
                                   RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(new_cm_id)) {
@@ -2010,12 +2035,13 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 {
        struct rdma_id_private *dev_id_priv;
        struct rdma_cm_id *id;
+       struct net *net = id_priv->id.route.addr.dev_addr.net;
        int ret;
 
        if (cma_family(id_priv) == AF_IB && !rdma_cap_ib_cm(cma_dev->device, 1))
                return;
 
-       id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps,
+       id = rdma_create_id(net, cma_listen_handler, id_priv, id_priv->id.ps,
                            id_priv->id.qp_type);
        if (IS_ERR(id))
                return;
@@ -2294,16 +2320,17 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 
        route->num_paths = 1;
 
-       if (addr->dev_addr.bound_dev_if)
+       if (addr->dev_addr.bound_dev_if) {
                ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
+               route->path_rec->net = &init_net;
+               route->path_rec->ifindex = addr->dev_addr.bound_dev_if;
+       }
        if (!ndev) {
                ret = -ENODEV;
                goto err2;
        }
 
-       route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
        memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
-       memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
 
        rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
                    &route->path_rec->sgid);
@@ -2426,7 +2453,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
        p = 1;
 
 port_found:
-       ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
+       ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
        if (ret)
                goto out;
 
@@ -2688,7 +2715,8 @@ static int cma_alloc_port(enum rdma_port_space ps,
        if (!bind_list)
                return -ENOMEM;
 
-       ret = cma_ps_alloc(ps, bind_list, snum);
+       ret = cma_ps_alloc(id_priv->id.route.addr.dev_addr.net, ps, bind_list,
+                          snum);
        if (ret < 0)
                goto err;
 
@@ -2707,13 +2735,14 @@ static int cma_alloc_any_port(enum rdma_port_space ps,
        static unsigned int last_used_port;
        int low, high, remaining;
        unsigned int rover;
+       struct net *net = id_priv->id.route.addr.dev_addr.net;
 
-       inet_get_local_port_range(&init_net, &low, &high);
+       inet_get_local_port_range(net, &low, &high);
        remaining = (high - low) + 1;
        rover = prandom_u32() % remaining + low;
 retry:
        if (last_used_port != rover &&
-           !cma_ps_find(ps, (unsigned short)rover)) {
+           !cma_ps_find(net, ps, (unsigned short)rover)) {
                int ret = cma_alloc_port(ps, id_priv, rover);
                /*
                 * Remember previously used port number in order to avoid
@@ -2779,7 +2808,7 @@ static int cma_use_port(enum rdma_port_space ps,
        if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
                return -EACCES;
 
-       bind_list = cma_ps_find(ps, snum);
+       bind_list = cma_ps_find(id_priv->id.route.addr.dev_addr.net, ps, snum);
        if (!bind_list) {
                ret = cma_alloc_port(ps, id_priv, snum);
        } else {
@@ -2971,8 +3000,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
                if (addr->sa_family == AF_INET)
                        id_priv->afonly = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-               else if (addr->sa_family == AF_INET6)
-                       id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
+               else if (addr->sa_family == AF_INET6) {
+                       struct net *net = id_priv->id.route.addr.dev_addr.net;
+
+                       id_priv->afonly = net->ipv6.sysctl.bindv6only;
+               }
 #endif
        }
        ret = cma_get_port(id_priv);
@@ -3777,6 +3809,7 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
        dev_addr = &id_priv->id.route.addr.dev_addr;
 
        if ((dev_addr->bound_dev_if == ndev->ifindex) &&
+           (net_eq(dev_net(ndev), dev_addr->net)) &&
            memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
                printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
                       ndev->name, &id_priv->id);
@@ -3802,9 +3835,6 @@ static int cma_netdev_callback(struct notifier_block *self, unsigned long event,
        struct rdma_id_private *id_priv;
        int ret = NOTIFY_DONE;
 
-       if (dev_net(ndev) != &init_net)
-               return NOTIFY_DONE;
-
        if (event != NETDEV_BONDING_FAILOVER)
                return NOTIFY_DONE;
 
@@ -3999,6 +4029,35 @@ static const struct ibnl_client_cbs cma_cb_table[] = {
                                       .module = THIS_MODULE },
 };
 
+static int cma_init_net(struct net *net)
+{
+       struct cma_pernet *pernet = cma_pernet(net);
+
+       idr_init(&pernet->tcp_ps);
+       idr_init(&pernet->udp_ps);
+       idr_init(&pernet->ipoib_ps);
+       idr_init(&pernet->ib_ps);
+
+       return 0;
+}
+
+static void cma_exit_net(struct net *net)
+{
+       struct cma_pernet *pernet = cma_pernet(net);
+
+       idr_destroy(&pernet->tcp_ps);
+       idr_destroy(&pernet->udp_ps);
+       idr_destroy(&pernet->ipoib_ps);
+       idr_destroy(&pernet->ib_ps);
+}
+
+static struct pernet_operations cma_pernet_operations = {
+       .init = cma_init_net,
+       .exit = cma_exit_net,
+       .id = &cma_pernet_id,
+       .size = sizeof(struct cma_pernet),
+};
+
 static int __init cma_init(void)
 {
        int ret;
@@ -4007,6 +4066,10 @@ static int __init cma_init(void)
        if (!cma_wq)
                return -ENOMEM;
 
+       ret = register_pernet_subsys(&cma_pernet_operations);
+       if (ret)
+               goto err_wq;
+
        ib_sa_register_client(&sa_client);
        rdma_addr_register_client(&addr_client);
        register_netdevice_notifier(&cma_nb);
@@ -4024,6 +4087,7 @@ err:
        unregister_netdevice_notifier(&cma_nb);
        rdma_addr_unregister_client(&addr_client);
        ib_sa_unregister_client(&sa_client);
+err_wq:
        destroy_workqueue(cma_wq);
        return ret;
 }
@@ -4035,11 +4099,8 @@ static void __exit cma_cleanup(void)
        unregister_netdevice_notifier(&cma_nb);
        rdma_addr_unregister_client(&addr_client);
        ib_sa_unregister_client(&sa_client);
+       unregister_pernet_subsys(&cma_pernet_operations);
        destroy_workqueue(cma_wq);
-       idr_destroy(&tcp_ps);
-       idr_destroy(&udp_ps);
-       idr_destroy(&ipoib_ps);
-       idr_destroy(&ib_ps);
 }
 
 module_init(cma_init);
index 70bb36ebb03b8e91ff2cf89445b0a8f52329e591..5cf6eb716f000a7aa07233419a0dcbbce41e0b6d 100644 (file)
@@ -46,8 +46,8 @@ void ib_device_unregister_sysfs(struct ib_device *device);
 void ib_cache_setup(void);
 void ib_cache_cleanup(void);
 
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
-                           struct ib_qp_attr *qp_attr, int *qp_attr_mask);
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+                       struct ib_qp_attr *qp_attr, int *qp_attr_mask);
 
 typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port,
              struct net_device *idev, void *cookie);
@@ -65,11 +65,6 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
                              roce_netdev_callback cb,
                              void *cookie);
 
-int ib_cache_gid_find_by_port(struct ib_device *ib_dev,
-                             const union ib_gid *gid,
-                             u8 port, struct net_device *ndev,
-                             u16 *index);
-
 enum ib_cache_gid_default_mode {
        IB_CACHE_GID_DEFAULT_MODE_SET,
        IB_CACHE_GID_DEFAULT_MODE_DELETE
index 17639117afc6ab8637c7e73a3e2a13e6101a9702..179e8134d57fc13b425254d5162006f9fc201879 100644 (file)
@@ -672,14 +672,20 @@ EXPORT_SYMBOL(ib_query_port);
  * @port_num:Port number to query
  * @index:GID table index to query
  * @gid:Returned GID
+ * @attr: Returned GID attributes related to this GID index (only in RoCE).
+ *   NULL means ignore.
  *
  * ib_query_gid() fetches the specified GID table entry.
  */
 int ib_query_gid(struct ib_device *device,
-                u8 port_num, int index, union ib_gid *gid)
+                u8 port_num, int index, union ib_gid *gid,
+                struct ib_gid_attr *attr)
 {
        if (rdma_cap_roce_gid_table(device, port_num))
-               return ib_get_cached_gid(device, port_num, index, gid);
+               return ib_get_cached_gid(device, port_num, index, gid, attr);
+
+       if (attr)
+               return -EINVAL;
 
        return device->query_gid(device, port_num, index, gid);
 }
@@ -819,27 +825,28 @@ EXPORT_SYMBOL(ib_modify_port);
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @ndev: The ndev related to the GID to search for.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the GID table where the GID was found.  This
  *   parameter may be NULL.
  */
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               u8 *port_num, u16 *index)
+               struct net_device *ndev, u8 *port_num, u16 *index)
 {
        union ib_gid tmp_gid;
        int ret, port, i;
 
        for (port = rdma_start_port(device); port <= rdma_end_port(device); ++port) {
                if (rdma_cap_roce_gid_table(device, port)) {
-                       if (!ib_cache_gid_find_by_port(device, gid, port,
-                                                      NULL, index)) {
+                       if (!ib_find_cached_gid_by_port(device, gid, port,
+                                                       ndev, index)) {
                                *port_num = port;
                                return 0;
                        }
                }
 
                for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
-                       ret = ib_query_gid(device, port, i, &tmp_gid);
+                       ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
                        if (ret)
                                return ret;
                        if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
index 4b5c72311debbe59ae0975ec6d0fa722db13234e..8d8af7a41a30fae4ceb520ce0d11b6f541905c96 100644 (file)
@@ -752,7 +752,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        struct ib_device *device = mad_agent_priv->agent.device;
        u8 port_num;
        struct ib_wc mad_wc;
-       struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
+       struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
        size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
        u16 out_mad_pkey_index = 0;
        u16 drslid;
@@ -761,7 +761,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 
        if (rdma_cap_ib_switch(device) &&
            smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
-               port_num = send_wr->wr.ud.port_num;
+               port_num = send_wr->port_num;
        else
                port_num = mad_agent_priv->agent.port_num;
 
@@ -832,9 +832,9 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
        }
 
        build_smp_wc(mad_agent_priv->agent.qp,
-                    send_wr->wr_id, drslid,
-                    send_wr->wr.ud.pkey_index,
-                    send_wr->wr.ud.port_num, &mad_wc);
+                    send_wr->wr.wr_id, drslid,
+                    send_wr->pkey_index,
+                    send_wr->port_num, &mad_wc);
 
        if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
                mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
@@ -894,7 +894,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 
        local->mad_send_wr = mad_send_wr;
        if (opa) {
-               local->mad_send_wr->send_wr.wr.ud.pkey_index = out_mad_pkey_index;
+               local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
                local->return_wc_byte_len = mad_size;
        }
        /* Reference MAD agent until send side of local completion handled */
@@ -1039,14 +1039,14 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
 
        mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
 
-       mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
-       mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
-       mad_send_wr->send_wr.num_sge = 2;
-       mad_send_wr->send_wr.opcode = IB_WR_SEND;
-       mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
-       mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
-       mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
-       mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
+       mad_send_wr->send_wr.wr.wr_id = (unsigned long) mad_send_wr;
+       mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
+       mad_send_wr->send_wr.wr.num_sge = 2;
+       mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
+       mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
+       mad_send_wr->send_wr.remote_qpn = remote_qpn;
+       mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
+       mad_send_wr->send_wr.pkey_index = pkey_index;
 
        if (rmpp_active) {
                ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
@@ -1151,7 +1151,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
        /* Set WR ID to find mad_send_wr upon completion */
        qp_info = mad_send_wr->mad_agent_priv->qp_info;
-       mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
+       mad_send_wr->send_wr.wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
        mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
 
        mad_agent = mad_send_wr->send_buf.mad_agent;
@@ -1179,7 +1179,7 @@ int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
 
        spin_lock_irqsave(&qp_info->send_queue.lock, flags);
        if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
-               ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
+               ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
                                   &bad_send_wr);
                list = &qp_info->send_queue.list;
        } else {
@@ -1244,7 +1244,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
                 * request associated with the completion
                 */
                next_send_buf = send_buf->next;
-               mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
+               mad_send_wr->send_wr.ah = send_buf->ah;
 
                if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
                    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
@@ -1877,7 +1877,7 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
                                          ((1 << lmc) - 1)));
                } else {
                        if (ib_get_cached_gid(device, port_num,
-                                             attr.grh.sgid_index, &sgid))
+                                             attr.grh.sgid_index, &sgid, NULL))
                                return 0;
                        return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
                                       16);
@@ -2457,7 +2457,7 @@ retry:
        ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
 
        if (queued_send_wr) {
-               ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
+               ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
                                   &bad_send_wr);
                if (ret) {
                        dev_err(&port_priv->device->dev,
@@ -2515,7 +2515,7 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
                        struct ib_send_wr *bad_send_wr;
 
                        mad_send_wr->retry = 0;
-                       ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
+                       ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
                                        &bad_send_wr);
                        if (ret)
                                ib_mad_send_done_handler(port_priv, wc);
@@ -2713,7 +2713,7 @@ static void local_completions(struct work_struct *work)
                        build_smp_wc(recv_mad_agent->agent.qp,
                                     (unsigned long) local->mad_send_wr,
                                     be16_to_cpu(IB_LID_PERMISSIVE),
-                                    local->mad_send_wr->send_wr.wr.ud.pkey_index,
+                                    local->mad_send_wr->send_wr.pkey_index,
                                     recv_mad_agent->agent.port_num, &wc);
 
                        local->mad_priv->header.recv_wc.wc = &wc;
index 4a4f7aad09783de0cfd3fb92031b03890925b760..990698a6ab4b7024116ae50c9417d00ce179b41c 100644 (file)
@@ -123,7 +123,7 @@ struct ib_mad_send_wr_private {
        struct ib_mad_send_buf send_buf;
        u64 header_mapping;
        u64 payload_mapping;
-       struct ib_send_wr send_wr;
+       struct ib_ud_wr send_wr;
        struct ib_sge sg_list[IB_MAD_SEND_REQ_MAX_SG];
        __be64 tid;
        unsigned long timeout;
index d38d8b2b2979ddc2bebb243b98b79a04644fc929..bb6685fb08c61483546505f99037b88af6202ad0 100644 (file)
@@ -729,7 +729,8 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
        u16 gid_index;
        u8 p;
 
-       ret = ib_find_cached_gid(device, &rec->port_gid, &p, &gid_index);
+       ret = ib_find_cached_gid(device, &rec->port_gid,
+                                NULL, &p, &gid_index);
        if (ret)
                return ret;
 
index 8c014b33d8e0a5bc97e6e2408b3ded37ddfd06c2..dcdaa79e3f0faa0dcd9288a0f14cf9e4e996210c 100644 (file)
@@ -1007,26 +1007,29 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
        force_grh = rdma_cap_eth_ah(device, port_num);
 
        if (rec->hop_limit > 1 || force_grh) {
+               struct net_device *ndev = ib_get_ndev_from_path(rec);
+
                ah_attr->ah_flags = IB_AH_GRH;
                ah_attr->grh.dgid = rec->dgid;
 
-               ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
+               ret = ib_find_cached_gid(device, &rec->sgid, ndev, &port_num,
                                         &gid_index);
-               if (ret)
+               if (ret) {
+                       if (ndev)
+                               dev_put(ndev);
                        return ret;
+               }
 
                ah_attr->grh.sgid_index    = gid_index;
                ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
                ah_attr->grh.hop_limit     = rec->hop_limit;
                ah_attr->grh.traffic_class = rec->traffic_class;
+               if (ndev)
+                       dev_put(ndev);
        }
        if (force_grh) {
                memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
-               ah_attr->vlan_id = rec->vlan_id;
-       } else {
-               ah_attr->vlan_id = 0xffff;
        }
-
        return 0;
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -1150,9 +1153,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 
                ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
                          mad->data, &rec);
-               rec.vlan_id = 0xffff;
+               rec.net = NULL;
+               rec.ifindex = 0;
                memset(rec.dmac, 0, ETH_ALEN);
-               memset(rec.smac, 0, ETH_ALEN);
                query->callback(status, &rec, query->context);
        } else
                query->callback(status, NULL, query->context);
index 34cdd74b0a17ed06228bf78f134c2001c976125a..b1f37d4095fa1e15f7402c35a367b00e1f24b6b5 100644 (file)
@@ -289,7 +289,7 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
        union ib_gid gid;
        ssize_t ret;
 
-       ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid);
+       ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
        if (ret)
                return ret;
 
index 30467d10df91b170e40657864354356153545e76..8b5a934e1133d80b42e12d1790b3672d73e4a779 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/slab.h>
 #include <linux/sysctl.h>
 #include <linux/module.h>
+#include <linux/nsproxy.h>
 
 #include <rdma/rdma_user_cm.h>
 #include <rdma/ib_marshall.h>
@@ -472,7 +473,8 @@ static ssize_t ucma_create_id(struct ucma_file *file, const char __user *inbuf,
                return -ENOMEM;
 
        ctx->uid = cmd.uid;
-       ctx->cm_id = rdma_create_id(ucma_event_handler, ctx, cmd.ps, qp_type);
+       ctx->cm_id = rdma_create_id(current->nsproxy->net_ns,
+                                   ucma_event_handler, ctx, cmd.ps, qp_type);
        if (IS_ERR(ctx->cm_id)) {
                ret = PTR_ERR(ctx->cm_id);
                goto err1;
@@ -1211,7 +1213,6 @@ static int ucma_set_ib_path(struct ucma_context *ctx,
                return -EINVAL;
 
        memset(&sa_path, 0, sizeof(sa_path));
-       sa_path.vlan_id = 0xffff;
 
        ib_sa_unpack_path(path_data->path_rec, &sa_path);
        ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1);
index 3863d33c243d80cde7df7c100205d4578aa58d7a..94bbd8c155fcca1daf5f2e9ee0fc65552821ff3f 100644 (file)
@@ -272,5 +272,6 @@ IB_UVERBS_DECLARE_EX_CMD(create_flow);
 IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
 IB_UVERBS_DECLARE_EX_CMD(query_device);
 IB_UVERBS_DECLARE_EX_CMD(create_cq);
+IB_UVERBS_DECLARE_EX_CMD(create_qp);
 
 #endif /* UVERBS_H */
index be4cb9f04be3349f433084b1f95cc817f83ad63e..94816aeb95a0a186760fbe98de55edc69266a54c 100644 (file)
@@ -1478,7 +1478,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof(cmd)))
                return -EFAULT;
 
-       INIT_UDATA(&ucore, buf, cmd.response, sizeof(cmd), sizeof(resp));
+       INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd), sizeof(resp));
 
        INIT_UDATA(&uhw, buf + sizeof(cmd),
                   (unsigned long)cmd.response + sizeof(resp),
@@ -1741,66 +1741,65 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
        return in_len;
 }
 
-ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
-                           struct ib_device *ib_dev,
-                           const char __user *buf, int in_len,
-                           int out_len)
-{
-       struct ib_uverbs_create_qp      cmd;
-       struct ib_uverbs_create_qp_resp resp;
-       struct ib_udata                 udata;
-       struct ib_uqp_object           *obj;
-       struct ib_device               *device;
-       struct ib_pd                   *pd = NULL;
-       struct ib_xrcd                 *xrcd = NULL;
-       struct ib_uobject              *uninitialized_var(xrcd_uobj);
-       struct ib_cq                   *scq = NULL, *rcq = NULL;
-       struct ib_srq                  *srq = NULL;
-       struct ib_qp                   *qp;
-       struct ib_qp_init_attr          attr;
-       int ret;
-
-       if (out_len < sizeof resp)
-               return -ENOSPC;
-
-       if (copy_from_user(&cmd, buf, sizeof cmd))
-               return -EFAULT;
+static int create_qp(struct ib_uverbs_file *file,
+                    struct ib_udata *ucore,
+                    struct ib_udata *uhw,
+                    struct ib_uverbs_ex_create_qp *cmd,
+                    size_t cmd_sz,
+                    int (*cb)(struct ib_uverbs_file *file,
+                              struct ib_uverbs_ex_create_qp_resp *resp,
+                              struct ib_udata *udata),
+                    void *context)
+{
+       struct ib_uqp_object            *obj;
+       struct ib_device                *device;
+       struct ib_pd                    *pd = NULL;
+       struct ib_xrcd                  *xrcd = NULL;
+       struct ib_uobject               *uninitialized_var(xrcd_uobj);
+       struct ib_cq                    *scq = NULL, *rcq = NULL;
+       struct ib_srq                   *srq = NULL;
+       struct ib_qp                    *qp;
+       char                            *buf;
+       struct ib_qp_init_attr          attr;
+       struct ib_uverbs_ex_create_qp_resp resp;
+       int                             ret;
 
-       if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
+       if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
                return -EPERM;
 
-       INIT_UDATA(&udata, buf + sizeof cmd,
-                  (unsigned long) cmd.response + sizeof resp,
-                  in_len - sizeof cmd, out_len - sizeof resp);
-
        obj = kzalloc(sizeof *obj, GFP_KERNEL);
        if (!obj)
                return -ENOMEM;
 
-       init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
+       init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext,
+                 &qp_lock_class);
        down_write(&obj->uevent.uobject.mutex);
 
-       if (cmd.qp_type == IB_QPT_XRC_TGT) {
-               xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
+       if (cmd->qp_type == IB_QPT_XRC_TGT) {
+               xrcd = idr_read_xrcd(cmd->pd_handle, file->ucontext,
+                                    &xrcd_uobj);
                if (!xrcd) {
                        ret = -EINVAL;
                        goto err_put;
                }
                device = xrcd->device;
        } else {
-               if (cmd.qp_type == IB_QPT_XRC_INI) {
-                       cmd.max_recv_wr = cmd.max_recv_sge = 0;
+               if (cmd->qp_type == IB_QPT_XRC_INI) {
+                       cmd->max_recv_wr = 0;
+                       cmd->max_recv_sge = 0;
                } else {
-                       if (cmd.is_srq) {
-                               srq = idr_read_srq(cmd.srq_handle, file->ucontext);
+                       if (cmd->is_srq) {
+                               srq = idr_read_srq(cmd->srq_handle,
+                                                  file->ucontext);
                                if (!srq || srq->srq_type != IB_SRQT_BASIC) {
                                        ret = -EINVAL;
                                        goto err_put;
                                }
                        }
 
-                       if (cmd.recv_cq_handle != cmd.send_cq_handle) {
-                               rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
+                       if (cmd->recv_cq_handle != cmd->send_cq_handle) {
+                               rcq = idr_read_cq(cmd->recv_cq_handle,
+                                                 file->ucontext, 0);
                                if (!rcq) {
                                        ret = -EINVAL;
                                        goto err_put;
@@ -1808,9 +1807,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
                        }
                }
 
-               scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
+               scq = idr_read_cq(cmd->send_cq_handle, file->ucontext, !!rcq);
                rcq = rcq ?: scq;
-               pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
+               pd  = idr_read_pd(cmd->pd_handle, file->ucontext);
                if (!pd || !scq) {
                        ret = -EINVAL;
                        goto err_put;
@@ -1825,31 +1824,49 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
        attr.recv_cq       = rcq;
        attr.srq           = srq;
        attr.xrcd          = xrcd;
-       attr.sq_sig_type   = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
-       attr.qp_type       = cmd.qp_type;
+       attr.sq_sig_type   = cmd->sq_sig_all ? IB_SIGNAL_ALL_WR :
+                                             IB_SIGNAL_REQ_WR;
+       attr.qp_type       = cmd->qp_type;
        attr.create_flags  = 0;
 
-       attr.cap.max_send_wr     = cmd.max_send_wr;
-       attr.cap.max_recv_wr     = cmd.max_recv_wr;
-       attr.cap.max_send_sge    = cmd.max_send_sge;
-       attr.cap.max_recv_sge    = cmd.max_recv_sge;
-       attr.cap.max_inline_data = cmd.max_inline_data;
+       attr.cap.max_send_wr     = cmd->max_send_wr;
+       attr.cap.max_recv_wr     = cmd->max_recv_wr;
+       attr.cap.max_send_sge    = cmd->max_send_sge;
+       attr.cap.max_recv_sge    = cmd->max_recv_sge;
+       attr.cap.max_inline_data = cmd->max_inline_data;
 
        obj->uevent.events_reported     = 0;
        INIT_LIST_HEAD(&obj->uevent.event_list);
        INIT_LIST_HEAD(&obj->mcast_list);
 
-       if (cmd.qp_type == IB_QPT_XRC_TGT)
+       if (cmd_sz >= offsetof(typeof(*cmd), create_flags) +
+                     sizeof(cmd->create_flags))
+               attr.create_flags = cmd->create_flags;
+
+       if (attr.create_flags & ~IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
+               ret = -EINVAL;
+               goto err_put;
+       }
+
+       buf = (void *)cmd + sizeof(*cmd);
+       if (cmd_sz > sizeof(*cmd))
+               if (!(buf[0] == 0 && !memcmp(buf, buf + 1,
+                                            cmd_sz - sizeof(*cmd) - 1))) {
+                       ret = -EINVAL;
+                       goto err_put;
+               }
+
+       if (cmd->qp_type == IB_QPT_XRC_TGT)
                qp = ib_create_qp(pd, &attr);
        else
-               qp = device->create_qp(pd, &attr, &udata);
+               qp = device->create_qp(pd, &attr, uhw);
 
        if (IS_ERR(qp)) {
                ret = PTR_ERR(qp);
                goto err_put;
        }
 
-       if (cmd.qp_type != IB_QPT_XRC_TGT) {
+       if (cmd->qp_type != IB_QPT_XRC_TGT) {
                qp->real_qp       = qp;
                qp->device        = device;
                qp->pd            = pd;
@@ -1875,19 +1892,20 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
                goto err_destroy;
 
        memset(&resp, 0, sizeof resp);
-       resp.qpn             = qp->qp_num;
-       resp.qp_handle       = obj->uevent.uobject.id;
-       resp.max_recv_sge    = attr.cap.max_recv_sge;
-       resp.max_send_sge    = attr.cap.max_send_sge;
-       resp.max_recv_wr     = attr.cap.max_recv_wr;
-       resp.max_send_wr     = attr.cap.max_send_wr;
-       resp.max_inline_data = attr.cap.max_inline_data;
+       resp.base.qpn             = qp->qp_num;
+       resp.base.qp_handle       = obj->uevent.uobject.id;
+       resp.base.max_recv_sge    = attr.cap.max_recv_sge;
+       resp.base.max_send_sge    = attr.cap.max_send_sge;
+       resp.base.max_recv_wr     = attr.cap.max_recv_wr;
+       resp.base.max_send_wr     = attr.cap.max_send_wr;
+       resp.base.max_inline_data = attr.cap.max_inline_data;
 
-       if (copy_to_user((void __user *) (unsigned long) cmd.response,
-                        &resp, sizeof resp)) {
-               ret = -EFAULT;
-               goto err_copy;
-       }
+       resp.response_length = offsetof(typeof(resp), response_length) +
+                              sizeof(resp.response_length);
+
+       ret = cb(file, &resp, ucore);
+       if (ret)
+               goto err_cb;
 
        if (xrcd) {
                obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object,
@@ -1913,9 +1931,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 
        up_write(&obj->uevent.uobject.mutex);
 
-       return in_len;
-
-err_copy:
+       return 0;
+err_cb:
        idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
 
 err_destroy:
@@ -1937,6 +1954,113 @@ err_put:
        return ret;
 }
 
+static int ib_uverbs_create_qp_cb(struct ib_uverbs_file *file,
+                                 struct ib_uverbs_ex_create_qp_resp *resp,
+                                 struct ib_udata *ucore)
+{
+       if (ib_copy_to_udata(ucore, &resp->base, sizeof(resp->base)))
+               return -EFAULT;
+
+       return 0;
+}
+
+ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
+                           struct ib_device *ib_dev,
+                           const char __user *buf, int in_len,
+                           int out_len)
+{
+       struct ib_uverbs_create_qp      cmd;
+       struct ib_uverbs_ex_create_qp   cmd_ex;
+       struct ib_udata                 ucore;
+       struct ib_udata                 uhw;
+       ssize_t resp_size = sizeof(struct ib_uverbs_create_qp_resp);
+       int                             err;
+
+       if (out_len < resp_size)
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof(cmd)))
+               return -EFAULT;
+
+       INIT_UDATA(&ucore, buf, (unsigned long)cmd.response, sizeof(cmd),
+                  resp_size);
+       INIT_UDATA(&uhw, buf + sizeof(cmd),
+                  (unsigned long)cmd.response + resp_size,
+                  in_len - sizeof(cmd), out_len - resp_size);
+
+       memset(&cmd_ex, 0, sizeof(cmd_ex));
+       cmd_ex.user_handle = cmd.user_handle;
+       cmd_ex.pd_handle = cmd.pd_handle;
+       cmd_ex.send_cq_handle = cmd.send_cq_handle;
+       cmd_ex.recv_cq_handle = cmd.recv_cq_handle;
+       cmd_ex.srq_handle = cmd.srq_handle;
+       cmd_ex.max_send_wr = cmd.max_send_wr;
+       cmd_ex.max_recv_wr = cmd.max_recv_wr;
+       cmd_ex.max_send_sge = cmd.max_send_sge;
+       cmd_ex.max_recv_sge = cmd.max_recv_sge;
+       cmd_ex.max_inline_data = cmd.max_inline_data;
+       cmd_ex.sq_sig_all = cmd.sq_sig_all;
+       cmd_ex.qp_type = cmd.qp_type;
+       cmd_ex.is_srq = cmd.is_srq;
+
+       err = create_qp(file, &ucore, &uhw, &cmd_ex,
+                       offsetof(typeof(cmd_ex), is_srq) +
+                       sizeof(cmd.is_srq), ib_uverbs_create_qp_cb,
+                       NULL);
+
+       if (err)
+               return err;
+
+       return in_len;
+}
+
+static int ib_uverbs_ex_create_qp_cb(struct ib_uverbs_file *file,
+                                    struct ib_uverbs_ex_create_qp_resp *resp,
+                                    struct ib_udata *ucore)
+{
+       if (ib_copy_to_udata(ucore, resp, resp->response_length))
+               return -EFAULT;
+
+       return 0;
+}
+
+int ib_uverbs_ex_create_qp(struct ib_uverbs_file *file,
+                          struct ib_device *ib_dev,
+                          struct ib_udata *ucore,
+                          struct ib_udata *uhw)
+{
+       struct ib_uverbs_ex_create_qp_resp resp;
+       struct ib_uverbs_ex_create_qp cmd = {0};
+       int err;
+
+       if (ucore->inlen < (offsetof(typeof(cmd), comp_mask) +
+                           sizeof(cmd.comp_mask)))
+               return -EINVAL;
+
+       err = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen));
+       if (err)
+               return err;
+
+       if (cmd.comp_mask)
+               return -EINVAL;
+
+       if (cmd.reserved)
+               return -EINVAL;
+
+       if (ucore->outlen < (offsetof(typeof(resp), response_length) +
+                            sizeof(resp.response_length)))
+               return -ENOSPC;
+
+       err = create_qp(file, ucore, uhw, &cmd,
+                       min(ucore->inlen, sizeof(cmd)),
+                       ib_uverbs_ex_create_qp_cb, NULL);
+
+       if (err)
+               return err;
+
+       return 0;
+}
+
 ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
                          struct ib_device *ib_dev,
                          const char __user *buf, int in_len, int out_len)
@@ -2221,7 +2345,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
        attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
 
        if (qp->real_qp == qp) {
-               ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
+               ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask);
                if (ret)
                        goto release_qp;
                ret = qp->device->modify_qp(qp, attr,
@@ -2303,6 +2427,12 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
        return in_len;
 }
 
+static void *alloc_wr(size_t wr_size, __u32 num_sge)
+{
+       return kmalloc(ALIGN(wr_size, sizeof (struct ib_sge)) +
+                        num_sge * sizeof (struct ib_sge), GFP_KERNEL);
+};
+
 ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                            struct ib_device *ib_dev,
                            const char __user *buf, int in_len,
@@ -2351,14 +2481,83 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                        goto out_put;
                }
 
-               next = kmalloc(ALIGN(sizeof *next, sizeof (struct ib_sge)) +
-                              user_wr->num_sge * sizeof (struct ib_sge),
-                              GFP_KERNEL);
-               if (!next) {
-                       ret = -ENOMEM;
+               if (is_ud) {
+                       struct ib_ud_wr *ud;
+
+                       if (user_wr->opcode != IB_WR_SEND &&
+                           user_wr->opcode != IB_WR_SEND_WITH_IMM) {
+                               ret = -EINVAL;
+                               goto out_put;
+                       }
+
+                       ud = alloc_wr(sizeof(*ud), user_wr->num_sge);
+                       if (!ud) {
+                               ret = -ENOMEM;
+                               goto out_put;
+                       }
+
+                       ud->ah = idr_read_ah(user_wr->wr.ud.ah, file->ucontext);
+                       if (!ud->ah) {
+                               kfree(ud);
+                               ret = -EINVAL;
+                               goto out_put;
+                       }
+                       ud->remote_qpn = user_wr->wr.ud.remote_qpn;
+                       ud->remote_qkey = user_wr->wr.ud.remote_qkey;
+
+                       next = &ud->wr;
+               } else if (user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+                          user_wr->opcode == IB_WR_RDMA_WRITE ||
+                          user_wr->opcode == IB_WR_RDMA_READ) {
+                       struct ib_rdma_wr *rdma;
+
+                       rdma = alloc_wr(sizeof(*rdma), user_wr->num_sge);
+                       if (!rdma) {
+                               ret = -ENOMEM;
+                               goto out_put;
+                       }
+
+                       rdma->remote_addr = user_wr->wr.rdma.remote_addr;
+                       rdma->rkey = user_wr->wr.rdma.rkey;
+
+                       next = &rdma->wr;
+               } else if (user_wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                          user_wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+                       struct ib_atomic_wr *atomic;
+
+                       atomic = alloc_wr(sizeof(*atomic), user_wr->num_sge);
+                       if (!atomic) {
+                               ret = -ENOMEM;
+                               goto out_put;
+                       }
+
+                       atomic->remote_addr = user_wr->wr.atomic.remote_addr;
+                       atomic->compare_add = user_wr->wr.atomic.compare_add;
+                       atomic->swap = user_wr->wr.atomic.swap;
+                       atomic->rkey = user_wr->wr.atomic.rkey;
+
+                       next = &atomic->wr;
+               } else if (user_wr->opcode == IB_WR_SEND ||
+                          user_wr->opcode == IB_WR_SEND_WITH_IMM ||
+                          user_wr->opcode == IB_WR_SEND_WITH_INV) {
+                       next = alloc_wr(sizeof(*next), user_wr->num_sge);
+                       if (!next) {
+                               ret = -ENOMEM;
+                               goto out_put;
+                       }
+               } else {
+                       ret = -EINVAL;
                        goto out_put;
                }
 
+               if (user_wr->opcode == IB_WR_SEND_WITH_IMM ||
+                   user_wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
+                       next->ex.imm_data =
+                                       (__be32 __force) user_wr->ex.imm_data;
+               } else if (user_wr->opcode == IB_WR_SEND_WITH_INV) {
+                       next->ex.invalidate_rkey = user_wr->ex.invalidate_rkey;
+               }
+
                if (!last)
                        wr = next;
                else
@@ -2371,60 +2570,6 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
                next->opcode     = user_wr->opcode;
                next->send_flags = user_wr->send_flags;
 
-               if (is_ud) {
-                       if (next->opcode != IB_WR_SEND &&
-                           next->opcode != IB_WR_SEND_WITH_IMM) {
-                               ret = -EINVAL;
-                               goto out_put;
-                       }
-
-                       next->wr.ud.ah = idr_read_ah(user_wr->wr.ud.ah,
-                                                    file->ucontext);
-                       if (!next->wr.ud.ah) {
-                               ret = -EINVAL;
-                               goto out_put;
-                       }
-                       next->wr.ud.remote_qpn  = user_wr->wr.ud.remote_qpn;
-                       next->wr.ud.remote_qkey = user_wr->wr.ud.remote_qkey;
-                       if (next->opcode == IB_WR_SEND_WITH_IMM)
-                               next->ex.imm_data =
-                                       (__be32 __force) user_wr->ex.imm_data;
-               } else {
-                       switch (next->opcode) {
-                       case IB_WR_RDMA_WRITE_WITH_IMM:
-                               next->ex.imm_data =
-                                       (__be32 __force) user_wr->ex.imm_data;
-                       case IB_WR_RDMA_WRITE:
-                       case IB_WR_RDMA_READ:
-                               next->wr.rdma.remote_addr =
-                                       user_wr->wr.rdma.remote_addr;
-                               next->wr.rdma.rkey        =
-                                       user_wr->wr.rdma.rkey;
-                               break;
-                       case IB_WR_SEND_WITH_IMM:
-                               next->ex.imm_data =
-                                       (__be32 __force) user_wr->ex.imm_data;
-                               break;
-                       case IB_WR_SEND_WITH_INV:
-                               next->ex.invalidate_rkey =
-                                       user_wr->ex.invalidate_rkey;
-                               break;
-                       case IB_WR_ATOMIC_CMP_AND_SWP:
-                       case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               next->wr.atomic.remote_addr =
-                                       user_wr->wr.atomic.remote_addr;
-                               next->wr.atomic.compare_add =
-                                       user_wr->wr.atomic.compare_add;
-                               next->wr.atomic.swap = user_wr->wr.atomic.swap;
-                               next->wr.atomic.rkey = user_wr->wr.atomic.rkey;
-                       case IB_WR_SEND:
-                               break;
-                       default:
-                               ret = -EINVAL;
-                               goto out_put;
-                       }
-               }
-
                if (next->num_sge) {
                        next->sg_list = (void *) next +
                                ALIGN(sizeof *next, sizeof (struct ib_sge));
@@ -2458,8 +2603,8 @@ out_put:
        put_qp_read(qp);
 
        while (wr) {
-               if (is_ud && wr->wr.ud.ah)
-                       put_ah_read(wr->wr.ud.ah);
+               if (is_ud && ud_wr(wr)->ah)
+                       put_ah_read(ud_wr(wr)->ah);
                next = wr->next;
                kfree(wr);
                wr = next;
@@ -2698,7 +2843,6 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
        attr.grh.sgid_index    = cmd.attr.grh.sgid_index;
        attr.grh.hop_limit     = cmd.attr.grh.hop_limit;
        attr.grh.traffic_class = cmd.attr.grh.traffic_class;
-       attr.vlan_id           = 0;
        memset(&attr.dmac, 0, sizeof(attr.dmac));
        memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16);
 
index c29a660c72fe3674cea593f9cdab483f133edd8f..e3ef28861be63dd453d0fd1b55ec21f939919624 100644 (file)
@@ -127,6 +127,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
        [IB_USER_VERBS_EX_CMD_DESTROY_FLOW]     = ib_uverbs_ex_destroy_flow,
        [IB_USER_VERBS_EX_CMD_QUERY_DEVICE]     = ib_uverbs_ex_query_device,
        [IB_USER_VERBS_EX_CMD_CREATE_CQ]        = ib_uverbs_ex_create_cq,
+       [IB_USER_VERBS_EX_CMD_CREATE_QP]        = ib_uverbs_ex_create_qp,
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
index abd97247443e437ffff62e8238610657ca355394..7d2f14c9bbefaf1f3c28eac11b7bcefbc2802927 100644 (file)
@@ -141,8 +141,8 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
        dst->preference         = src->preference;
        dst->packet_life_time_selector = src->packet_life_time_selector;
 
-       memset(dst->smac, 0, sizeof(dst->smac));
        memset(dst->dmac, 0, sizeof(dst->dmac));
-       dst->vlan_id = 0xffff;
+       dst->net = NULL;
+       dst->ifindex = 0;
 }
 EXPORT_SYMBOL(ib_copy_path_rec_from_user);
index e1f2c9887f3f48ebc20c0931304ae75c5f65c03f..e2e53f9d7a22c91e71d25320ac74ab178658b5df 100644 (file)
@@ -41,6 +41,9 @@
 #include <linux/export.h>
 #include <linux/string.h>
 #include <linux/slab.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <net/addrconf.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
@@ -308,6 +311,35 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 }
 EXPORT_SYMBOL(ib_create_ah);
 
+struct find_gid_index_context {
+       u16 vlan_id;
+};
+
+static bool find_gid_index(const union ib_gid *gid,
+                          const struct ib_gid_attr *gid_attr,
+                          void *context)
+{
+       struct find_gid_index_context *ctx =
+               (struct find_gid_index_context *)context;
+
+       if ((!!(ctx->vlan_id != 0xffff) == !is_vlan_dev(gid_attr->ndev)) ||
+           (is_vlan_dev(gid_attr->ndev) &&
+            vlan_dev_vlan_id(gid_attr->ndev) != ctx->vlan_id))
+               return false;
+
+       return true;
+}
+
+static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
+                                  u16 vlan_id, const union ib_gid *sgid,
+                                  u16 *gid_index)
+{
+       struct find_gid_index_context context = {.vlan_id = vlan_id};
+
+       return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+                                    &context, gid_index);
+}
+
 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                       const struct ib_wc *wc, const struct ib_grh *grh,
                       struct ib_ah_attr *ah_attr)
@@ -318,21 +350,30 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 
        memset(ah_attr, 0, sizeof *ah_attr);
        if (rdma_cap_eth_ah(device, port_num)) {
+               u16 vlan_id = wc->wc_flags & IB_WC_WITH_VLAN ?
+                               wc->vlan_id : 0xffff;
+
                if (!(wc->wc_flags & IB_WC_GRH))
                        return -EPROTOTYPE;
 
-               if (wc->wc_flags & IB_WC_WITH_SMAC &&
-                   wc->wc_flags & IB_WC_WITH_VLAN) {
-                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
-                       ah_attr->vlan_id = wc->vlan_id;
-               } else {
+               if (!(wc->wc_flags & IB_WC_WITH_SMAC) ||
+                   !(wc->wc_flags & IB_WC_WITH_VLAN)) {
                        ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
-                                       ah_attr->dmac, &ah_attr->vlan_id);
+                                                        ah_attr->dmac,
+                                                        wc->wc_flags & IB_WC_WITH_VLAN ?
+                                                        NULL : &vlan_id,
+                                                        0);
                        if (ret)
                                return ret;
                }
-       } else {
-               ah_attr->vlan_id = 0xffff;
+
+               ret = get_sgid_index_from_eth(device, port_num, vlan_id,
+                                             &grh->dgid, &gid_index);
+               if (ret)
+                       return ret;
+
+               if (wc->wc_flags & IB_WC_WITH_SMAC)
+                       memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
        }
 
        ah_attr->dlid = wc->slid;
@@ -344,10 +385,13 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
                ah_attr->ah_flags = IB_AH_GRH;
                ah_attr->grh.dgid = grh->sgid;
 
-               ret = ib_find_cached_gid(device, &grh->dgid, &port_num,
-                                        &gid_index);
-               if (ret)
-                       return ret;
+               if (!rdma_cap_eth_ah(device, port_num)) {
+                       ret = ib_find_cached_gid_by_port(device, &grh->dgid,
+                                                        port_num, NULL,
+                                                        &gid_index);
+                       if (ret)
+                               return ret;
+               }
 
                ah_attr->grh.sgid_index = (u8) gid_index;
                flow_class = be32_to_cpu(grh->version_tclass_flow);
@@ -617,9 +661,7 @@ EXPORT_SYMBOL(ib_create_qp);
 static const struct {
        int                     valid;
        enum ib_qp_attr_mask    req_param[IB_QPT_MAX];
-       enum ib_qp_attr_mask    req_param_add_eth[IB_QPT_MAX];
        enum ib_qp_attr_mask    opt_param[IB_QPT_MAX];
-       enum ib_qp_attr_mask    opt_param_add_eth[IB_QPT_MAX];
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
        [IB_QPS_RESET] = {
                [IB_QPS_RESET] = { .valid = 1 },
@@ -700,12 +742,6 @@ static const struct {
                                                IB_QP_MAX_DEST_RD_ATOMIC        |
                                                IB_QP_MIN_RNR_TIMER),
                        },
-                       .req_param_add_eth = {
-                               [IB_QPT_RC]  = (IB_QP_SMAC),
-                               [IB_QPT_UC]  = (IB_QP_SMAC),
-                               [IB_QPT_XRC_INI]  = (IB_QP_SMAC),
-                               [IB_QPT_XRC_TGT]  = (IB_QP_SMAC)
-                       },
                        .opt_param = {
                                 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX               |
                                                 IB_QP_QKEY),
@@ -726,21 +762,7 @@ static const struct {
                                 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX               |
                                                 IB_QP_QKEY),
                         },
-                       .opt_param_add_eth = {
-                               [IB_QPT_RC]  = (IB_QP_ALT_SMAC                  |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID),
-                               [IB_QPT_UC]  = (IB_QP_ALT_SMAC                  |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID),
-                               [IB_QPT_XRC_INI]  = (IB_QP_ALT_SMAC                     |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID),
-                               [IB_QPT_XRC_TGT]  = (IB_QP_ALT_SMAC                     |
-                                               IB_QP_VID                       |
-                                               IB_QP_ALT_VID)
-                       }
-               }
+               },
        },
        [IB_QPS_RTR]   = {
                [IB_QPS_RESET] = { .valid = 1 },
@@ -962,13 +984,6 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
        req_param = qp_state_table[cur_state][next_state].req_param[type];
        opt_param = qp_state_table[cur_state][next_state].opt_param[type];
 
-       if (ll == IB_LINK_LAYER_ETHERNET) {
-               req_param |= qp_state_table[cur_state][next_state].
-                       req_param_add_eth[type];
-               opt_param |= qp_state_table[cur_state][next_state].
-                       opt_param_add_eth[type];
-       }
-
        if ((mask & req_param) != req_param)
                return 0;
 
@@ -979,40 +994,52 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 }
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 
-int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
-                           struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+int ib_resolve_eth_dmac(struct ib_qp *qp,
+                       struct ib_qp_attr *qp_attr, int *qp_attr_mask)
 {
        int           ret = 0;
-       union ib_gid  sgid;
 
-       if ((*qp_attr_mask & IB_QP_AV)  &&
-           (rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))) {
-               ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
-                                  qp_attr->ah_attr.grh.sgid_index, &sgid);
-               if (ret)
-                       goto out;
+       if (*qp_attr_mask & IB_QP_AV) {
+               if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) ||
+                   qp_attr->ah_attr.port_num > rdma_end_port(qp->device))
+                       return -EINVAL;
+
+               if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num))
+                       return 0;
+
                if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
-                       rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
-                       rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
-                       if (!(*qp_attr_mask & IB_QP_VID))
-                               qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+                       rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw,
+                                       qp_attr->ah_attr.dmac);
                } else {
-                       ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
-                                       qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
-                       if (ret)
-                               goto out;
-                       ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
-                       if (ret)
+                       union ib_gid            sgid;
+                       struct ib_gid_attr      sgid_attr;
+                       int                     ifindex;
+
+                       ret = ib_query_gid(qp->device,
+                                          qp_attr->ah_attr.port_num,
+                                          qp_attr->ah_attr.grh.sgid_index,
+                                          &sgid, &sgid_attr);
+
+                       if (ret || !sgid_attr.ndev) {
+                               if (!ret)
+                                       ret = -ENXIO;
                                goto out;
+                       }
+
+                       ifindex = sgid_attr.ndev->ifindex;
+
+                       ret = rdma_addr_find_dmac_by_grh(&sgid,
+                                                        &qp_attr->ah_attr.grh.dgid,
+                                                        qp_attr->ah_attr.dmac,
+                                                        NULL, ifindex);
+
+                       dev_put(sgid_attr.ndev);
                }
-               *qp_attr_mask |= IB_QP_SMAC;
-               if (qp_attr->vlan_id < 0xFFFF)
-                       *qp_attr_mask |= IB_QP_VID;
        }
 out:
        return ret;
 }
-EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+EXPORT_SYMBOL(ib_resolve_eth_dmac);
 
 
 int ib_modify_qp(struct ib_qp *qp,
@@ -1021,7 +1048,7 @@ int ib_modify_qp(struct ib_qp *qp,
 {
        int ret;
 
-       ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+       ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask);
        if (ret)
                return ret;
 
@@ -1253,31 +1280,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
 }
 EXPORT_SYMBOL(ib_alloc_mr);
 
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(struct ib_device *device,
-                                                         int max_page_list_len)
-{
-       struct ib_fast_reg_page_list *page_list;
-
-       if (!device->alloc_fast_reg_page_list)
-               return ERR_PTR(-ENOSYS);
-
-       page_list = device->alloc_fast_reg_page_list(device, max_page_list_len);
-
-       if (!IS_ERR(page_list)) {
-               page_list->device = device;
-               page_list->max_page_list_len = max_page_list_len;
-       }
-
-       return page_list;
-}
-EXPORT_SYMBOL(ib_alloc_fast_reg_page_list);
-
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
-       page_list->device->free_fast_reg_page_list(page_list);
-}
-EXPORT_SYMBOL(ib_free_fast_reg_page_list);
-
 /* Memory windows */
 
 struct ib_mw *ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
@@ -1469,3 +1471,110 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
                mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
 }
 EXPORT_SYMBOL(ib_check_mr_status);
+
+/**
+ * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
+ *     and set it the memory region.
+ * @mr:            memory region
+ * @sg:            dma mapped scatterlist
+ * @sg_nents:      number of entries in sg
+ * @page_size:     page vector desired page size
+ *
+ * Constraints:
+ * - The first sg element is allowed to have an offset.
+ * - Each sg element must be aligned to page_size (or physically
+ *   contiguous to the previous element). In case an sg element has a
+ *   non contiguous offset, the mapping prefix will not include it.
+ * - The last sg element is allowed to have length less than page_size.
+ * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
+ *   then only max_num_sg entries will be mapped.
+ *
+ * Returns the number of sg elements that were mapped to the memory region.
+ *
+ * After this completes successfully, the  memory region
+ * is ready for registration.
+ */
+int ib_map_mr_sg(struct ib_mr *mr,
+                struct scatterlist *sg,
+                int sg_nents,
+                unsigned int page_size)
+{
+       if (unlikely(!mr->device->map_mr_sg))
+               return -ENOSYS;
+
+       mr->page_size = page_size;
+
+       return mr->device->map_mr_sg(mr, sg, sg_nents);
+}
+EXPORT_SYMBOL(ib_map_mr_sg);
+
+/**
+ * ib_sg_to_pages() - Convert the largest prefix of a sg list
+ *     to a page vector
+ * @mr:            memory region
+ * @sgl:           dma mapped scatterlist
+ * @sg_nents:      number of entries in sg
+ * @set_page:      driver page assignment function pointer
+ *
+ * Core service helper for drivers to covert the largest
+ * prefix of given sg list to a page vector. The sg list
+ * prefix converted is the prefix that meet the requirements
+ * of ib_map_mr_sg.
+ *
+ * Returns the number of sg elements that were assigned to
+ * a page vector.
+ */
+int ib_sg_to_pages(struct ib_mr *mr,
+                  struct scatterlist *sgl,
+                  int sg_nents,
+                  int (*set_page)(struct ib_mr *, u64))
+{
+       struct scatterlist *sg;
+       u64 last_end_dma_addr = 0, last_page_addr = 0;
+       unsigned int last_page_off = 0;
+       u64 page_mask = ~((u64)mr->page_size - 1);
+       int i;
+
+       mr->iova = sg_dma_address(&sgl[0]);
+       mr->length = 0;
+
+       for_each_sg(sgl, sg, sg_nents, i) {
+               u64 dma_addr = sg_dma_address(sg);
+               unsigned int dma_len = sg_dma_len(sg);
+               u64 end_dma_addr = dma_addr + dma_len;
+               u64 page_addr = dma_addr & page_mask;
+
+               if (i && page_addr != dma_addr) {
+                       if (last_end_dma_addr != dma_addr) {
+                               /* gap */
+                               goto done;
+
+                       } else if (last_page_off + dma_len <= mr->page_size) {
+                               /* chunk this fragment with the last */
+                               mr->length += dma_len;
+                               last_end_dma_addr += dma_len;
+                               last_page_off += dma_len;
+                               continue;
+                       } else {
+                               /* map starting from the next page */
+                               page_addr = last_page_addr + mr->page_size;
+                               dma_len -= mr->page_size - last_page_off;
+                       }
+               }
+
+               do {
+                       if (unlikely(set_page(mr, page_addr)))
+                               goto done;
+                       page_addr += mr->page_size;
+               } while (page_addr < end_dma_addr);
+
+               mr->length += dma_len;
+               last_end_dma_addr = end_dma_addr;
+               last_page_addr = end_dma_addr & page_mask;
+               last_page_off = end_dma_addr & ~page_mask;
+       }
+
+done:
+       return i;
+}
+EXPORT_SYMBOL(ib_sg_to_pages);
index cf5474ae68ff010ae23004c45d4b421a1ff1b49c..cfe404925a399f2be0cd1bdbc23c262ad1b24ca9 100644 (file)
@@ -123,7 +123,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
                        wc->opcode = IB_WC_LOCAL_INV;
                        break;
                case T3_FAST_REGISTER:
-                       wc->opcode = IB_WC_FAST_REG_MR;
+                       wc->opcode = IB_WC_REG_MR;
                        break;
                default:
                        printk(KERN_ERR MOD "Unexpected opcode %d "
index 93308c45f298d921fb3fd25b195a2b5970471fe5..c34725ca0bb426606e16708fafab2995490d7187 100644 (file)
@@ -463,6 +463,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
                return -EINVAL;
 
        mhp = to_iwch_mr(ib_mr);
+       kfree(mhp->pages);
        rhp = mhp->rhp;
        mmid = mhp->attr.stag >> 8;
        cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
@@ -821,6 +822,12 @@ static struct ib_mr *iwch_alloc_mr(struct ib_pd *pd,
        if (!mhp)
                goto err;
 
+       mhp->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+       if (!mhp->pages) {
+               ret = -ENOMEM;
+               goto pl_err;
+       }
+
        mhp->rhp = rhp;
        ret = iwch_alloc_pbl(mhp, max_num_sg);
        if (ret)
@@ -847,31 +854,34 @@ err3:
 err2:
        iwch_free_pbl(mhp);
 err1:
+       kfree(mhp->pages);
+pl_err:
        kfree(mhp);
 err:
        return ERR_PTR(ret);
 }
 
-static struct ib_fast_reg_page_list *iwch_alloc_fastreg_pbl(
-                                       struct ib_device *device,
-                                       int page_list_len)
+static int iwch_set_page(struct ib_mr *ibmr, u64 addr)
 {
-       struct ib_fast_reg_page_list *page_list;
+       struct iwch_mr *mhp = to_iwch_mr(ibmr);
 
-       page_list = kmalloc(sizeof *page_list + page_list_len * sizeof(u64),
-                           GFP_KERNEL);
-       if (!page_list)
-               return ERR_PTR(-ENOMEM);
+       if (unlikely(mhp->npages == mhp->attr.pbl_size))
+               return -ENOMEM;
 
-       page_list->page_list = (u64 *)(page_list + 1);
-       page_list->max_page_list_len = page_list_len;
+       mhp->pages[mhp->npages++] = addr;
 
-       return page_list;
+       return 0;
 }
 
-static void iwch_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list)
+static int iwch_map_mr_sg(struct ib_mr *ibmr,
+                         struct scatterlist *sg,
+                         int sg_nents)
 {
-       kfree(page_list);
+       struct iwch_mr *mhp = to_iwch_mr(ibmr);
+
+       mhp->npages = 0;
+
+       return ib_sg_to_pages(ibmr, sg, sg_nents, iwch_set_page);
 }
 
 static int iwch_destroy_qp(struct ib_qp *ib_qp)
@@ -1450,8 +1460,7 @@ int iwch_register_device(struct iwch_dev *dev)
        dev->ibdev.bind_mw = iwch_bind_mw;
        dev->ibdev.dealloc_mw = iwch_dealloc_mw;
        dev->ibdev.alloc_mr = iwch_alloc_mr;
-       dev->ibdev.alloc_fast_reg_page_list = iwch_alloc_fastreg_pbl;
-       dev->ibdev.free_fast_reg_page_list = iwch_free_fastreg_pbl;
+       dev->ibdev.map_mr_sg = iwch_map_mr_sg;
        dev->ibdev.attach_mcast = iwch_multicast_attach;
        dev->ibdev.detach_mcast = iwch_multicast_detach;
        dev->ibdev.process_mad = iwch_process_mad;
index 87c14b0c5ac0c9514d9df59ddd39da08cfef8342..2ac85b86a680dea89becaf76c75956cda42a1644 100644 (file)
@@ -77,6 +77,8 @@ struct iwch_mr {
        struct iwch_dev *rhp;
        u64 kva;
        struct tpt_attributes attr;
+       u64 *pages;
+       u32 npages;
 };
 
 typedef struct iwch_mw iwch_mw_handle;
index b57c0befd962b837f45b36719663a945caf3cdbe..d0548fc6395eac2343775ea4a4b85d3c83bb98b0 100644 (file)
@@ -95,8 +95,8 @@ static int build_rdma_write(union t3_wr *wqe, struct ib_send_wr *wr,
        wqe->write.reserved[0] = 0;
        wqe->write.reserved[1] = 0;
        wqe->write.reserved[2] = 0;
-       wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
-       wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+       wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+       wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
 
        if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) {
                plen = 4;
@@ -137,8 +137,8 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
                wqe->read.local_inv = 0;
        wqe->read.reserved[0] = 0;
        wqe->read.reserved[1] = 0;
-       wqe->read.rem_stag = cpu_to_be32(wr->wr.rdma.rkey);
-       wqe->read.rem_to = cpu_to_be64(wr->wr.rdma.remote_addr);
+       wqe->read.rem_stag = cpu_to_be32(rdma_wr(wr)->rkey);
+       wqe->read.rem_to = cpu_to_be64(rdma_wr(wr)->remote_addr);
        wqe->read.local_stag = cpu_to_be32(wr->sg_list[0].lkey);
        wqe->read.local_len = cpu_to_be32(wr->sg_list[0].length);
        wqe->read.local_to = cpu_to_be64(wr->sg_list[0].addr);
@@ -146,27 +146,28 @@ static int build_rdma_read(union t3_wr *wqe, struct ib_send_wr *wr,
        return 0;
 }
 
-static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
-                               u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
+static int build_memreg(union t3_wr *wqe, struct ib_reg_wr *wr,
+                         u8 *flit_cnt, int *wr_cnt, struct t3_wq *wq)
 {
+       struct iwch_mr *mhp = to_iwch_mr(wr->mr);
        int i;
        __be64 *p;
 
-       if (wr->wr.fast_reg.page_list_len > T3_MAX_FASTREG_DEPTH)
+       if (mhp->npages > T3_MAX_FASTREG_DEPTH)
                return -EINVAL;
        *wr_cnt = 1;
-       wqe->fastreg.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
-       wqe->fastreg.len = cpu_to_be32(wr->wr.fast_reg.length);
-       wqe->fastreg.va_base_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
+       wqe->fastreg.stag = cpu_to_be32(wr->key);
+       wqe->fastreg.len = cpu_to_be32(mhp->ibmr.length);
+       wqe->fastreg.va_base_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
        wqe->fastreg.va_base_lo_fbo =
-                               cpu_to_be32(wr->wr.fast_reg.iova_start & 0xffffffff);
+                               cpu_to_be32(mhp->ibmr.iova & 0xffffffff);
        wqe->fastreg.page_type_perms = cpu_to_be32(
-               V_FR_PAGE_COUNT(wr->wr.fast_reg.page_list_len) |
-               V_FR_PAGE_SIZE(wr->wr.fast_reg.page_shift-12) |
+               V_FR_PAGE_COUNT(mhp->npages) |
+               V_FR_PAGE_SIZE(ilog2(wr->mr->page_size) - 12) |
                V_FR_TYPE(TPT_VATO) |
-               V_FR_PERMS(iwch_ib_to_tpt_access(wr->wr.fast_reg.access_flags)));
+               V_FR_PERMS(iwch_ib_to_tpt_access(wr->access)));
        p = &wqe->fastreg.pbl_addrs[0];
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++, p++) {
+       for (i = 0; i < mhp->npages; i++, p++) {
 
                /* If we need a 2nd WR, then set it up */
                if (i == T3_MAX_FASTREG_FRAG) {
@@ -175,14 +176,14 @@ static int build_fastreg(union t3_wr *wqe, struct ib_send_wr *wr,
                                Q_PTR2IDX((wq->wptr+1), wq->size_log2));
                        build_fw_riwrh((void *)wqe, T3_WR_FASTREG, 0,
                               Q_GENBIT(wq->wptr + 1, wq->size_log2),
-                              0, 1 + wr->wr.fast_reg.page_list_len - T3_MAX_FASTREG_FRAG,
+                              0, 1 + mhp->npages - T3_MAX_FASTREG_FRAG,
                               T3_EOP);
 
                        p = &wqe->pbl_frag.pbl_addrs[0];
                }
-               *p = cpu_to_be64((u64)wr->wr.fast_reg.page_list->page_list[i]);
+               *p = cpu_to_be64((u64)mhp->pages[i]);
        }
-       *flit_cnt = 5 + wr->wr.fast_reg.page_list_len;
+       *flit_cnt = 5 + mhp->npages;
        if (*flit_cnt > 15)
                *flit_cnt = 15;
        return 0;
@@ -414,10 +415,10 @@ int iwch_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        if (!qhp->wq.oldest_read)
                                qhp->wq.oldest_read = sqp;
                        break;
-               case IB_WR_FAST_REG_MR:
+               case IB_WR_REG_MR:
                        t3_wr_opcode = T3_WR_FASTREG;
-                       err = build_fastreg(wqe, wr, &t3_wr_flit_cnt,
-                                                &wr_cnt, &qhp->wq);
+                       err = build_memreg(wqe, reg_wr(wr), &t3_wr_flit_cnt,
+                                          &wr_cnt, &qhp->wq);
                        break;
                case IB_WR_LOCAL_INV:
                        if (wr->send_flags & IB_SEND_FENCE)
index debc39d2cbc2a61d66901dd20ef531703717ed29..c9cffced00ca1df11683b961d0cc48f983056a07 100644 (file)
@@ -632,22 +632,18 @@ static void best_mtu(const unsigned short *mtus, unsigned short mtu,
 
 static int send_connect(struct c4iw_ep *ep)
 {
-       struct cpl_act_open_req *req;
-       struct cpl_t5_act_open_req *t5_req;
-       struct cpl_act_open_req6 *req6;
-       struct cpl_t5_act_open_req6 *t5_req6;
+       struct cpl_act_open_req *req = NULL;
+       struct cpl_t5_act_open_req *t5req = NULL;
+       struct cpl_t6_act_open_req *t6req = NULL;
+       struct cpl_act_open_req6 *req6 = NULL;
+       struct cpl_t5_act_open_req6 *t5req6 = NULL;
+       struct cpl_t6_act_open_req6 *t6req6 = NULL;
        struct sk_buff *skb;
        u64 opt0;
        u32 opt2;
        unsigned int mtu_idx;
        int wscale;
-       int wrlen;
-       int sizev4 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
-                               sizeof(struct cpl_act_open_req) :
-                               sizeof(struct cpl_t5_act_open_req);
-       int sizev6 = is_t4(ep->com.dev->rdev.lldi.adapter_type) ?
-                               sizeof(struct cpl_act_open_req6) :
-                               sizeof(struct cpl_t5_act_open_req6);
+       int win, sizev4, sizev6, wrlen;
        struct sockaddr_in *la = (struct sockaddr_in *)
                                 &ep->com.mapped_local_addr;
        struct sockaddr_in *ra = (struct sockaddr_in *)
@@ -656,8 +652,28 @@ static int send_connect(struct c4iw_ep *ep)
                                   &ep->com.mapped_local_addr;
        struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
                                   &ep->com.mapped_remote_addr;
-       int win;
        int ret;
+       enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
+       u32 isn = (prandom_u32() & ~7UL) - 1;
+
+       switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+       case CHELSIO_T4:
+               sizev4 = sizeof(struct cpl_act_open_req);
+               sizev6 = sizeof(struct cpl_act_open_req6);
+               break;
+       case CHELSIO_T5:
+               sizev4 = sizeof(struct cpl_t5_act_open_req);
+               sizev6 = sizeof(struct cpl_t5_act_open_req6);
+               break;
+       case CHELSIO_T6:
+               sizev4 = sizeof(struct cpl_t6_act_open_req);
+               sizev6 = sizeof(struct cpl_t6_act_open_req6);
+               break;
+       default:
+               pr_err("T%d Chip is not supported\n",
+                      CHELSIO_CHIP_VERSION(adapter_type));
+               return -EINVAL;
+       }
 
        wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
                        roundup(sizev4, 16) :
@@ -706,7 +722,10 @@ static int send_connect(struct c4iw_ep *ep)
                opt2 |= SACK_EN_F;
        if (wscale && enable_tcp_window_scaling)
                opt2 |= WND_SCALE_EN_F;
-       if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+       if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
+               if (peer2peer)
+                       isn += 4;
+
                opt2 |= T5_OPT_2_VALID_F;
                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
                opt2 |= T5_ISS_F;
@@ -718,102 +737,109 @@ static int send_connect(struct c4iw_ep *ep)
 
        t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
 
-       if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
-               if (ep->com.remote_addr.ss_family == AF_INET) {
-                       req = (struct cpl_act_open_req *) skb_put(skb, wrlen);
+       if (ep->com.remote_addr.ss_family == AF_INET) {
+               switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+               case CHELSIO_T4:
+                       req = (struct cpl_act_open_req *)skb_put(skb, wrlen);
                        INIT_TP_WR(req, 0);
-                       OPCODE_TID(req) = cpu_to_be32(
-                                       MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
-                                       ((ep->rss_qid << 14) | ep->atid)));
-                       req->local_port = la->sin_port;
-                       req->peer_port = ra->sin_port;
-                       req->local_ip = la->sin_addr.s_addr;
-                       req->peer_ip = ra->sin_addr.s_addr;
-                       req->opt0 = cpu_to_be64(opt0);
+                       break;
+               case CHELSIO_T5:
+                       t5req = (struct cpl_t5_act_open_req *)skb_put(skb,
+                                       wrlen);
+                       INIT_TP_WR(t5req, 0);
+                       req = (struct cpl_act_open_req *)t5req;
+                       break;
+               case CHELSIO_T6:
+                       t6req = (struct cpl_t6_act_open_req *)skb_put(skb,
+                                       wrlen);
+                       INIT_TP_WR(t6req, 0);
+                       req = (struct cpl_act_open_req *)t6req;
+                       t5req = (struct cpl_t5_act_open_req *)t6req;
+                       break;
+               default:
+                       pr_err("T%d Chip is not supported\n",
+                              CHELSIO_CHIP_VERSION(adapter_type));
+                       ret = -EINVAL;
+                       goto clip_release;
+               }
+
+               OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
+                                       ((ep->rss_qid<<14) | ep->atid)));
+               req->local_port = la->sin_port;
+               req->peer_port = ra->sin_port;
+               req->local_ip = la->sin_addr.s_addr;
+               req->peer_ip = ra->sin_addr.s_addr;
+               req->opt0 = cpu_to_be64(opt0);
+
+               if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
                        req->params = cpu_to_be32(cxgb4_select_ntuple(
                                                ep->com.dev->rdev.lldi.ports[0],
                                                ep->l2t));
                        req->opt2 = cpu_to_be32(opt2);
                } else {
+                       t5req->params = cpu_to_be64(FILTER_TUPLE_V(
+                                               cxgb4_select_ntuple(
+                                               ep->com.dev->rdev.lldi.ports[0],
+                                               ep->l2t)));
+                       t5req->rsvd = cpu_to_be32(isn);
+                       PDBG("%s snd_isn %u\n", __func__, t5req->rsvd);
+                       t5req->opt2 = cpu_to_be32(opt2);
+               }
+       } else {
+               switch (CHELSIO_CHIP_VERSION(adapter_type)) {
+               case CHELSIO_T4:
                        req6 = (struct cpl_act_open_req6 *)skb_put(skb, wrlen);
-
                        INIT_TP_WR(req6, 0);
-                       OPCODE_TID(req6) = cpu_to_be32(
-                                          MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
-                                          ((ep->rss_qid<<14)|ep->atid)));
-                       req6->local_port = la6->sin6_port;
-                       req6->peer_port = ra6->sin6_port;
-                       req6->local_ip_hi = *((__be64 *)
-                                               (la6->sin6_addr.s6_addr));
-                       req6->local_ip_lo = *((__be64 *)
-                                               (la6->sin6_addr.s6_addr + 8));
-                       req6->peer_ip_hi = *((__be64 *)
-                                               (ra6->sin6_addr.s6_addr));
-                       req6->peer_ip_lo = *((__be64 *)
-                                               (ra6->sin6_addr.s6_addr + 8));
-                       req6->opt0 = cpu_to_be64(opt0);
+                       break;
+               case CHELSIO_T5:
+                       t5req6 = (struct cpl_t5_act_open_req6 *)skb_put(skb,
+                                       wrlen);
+                       INIT_TP_WR(t5req6, 0);
+                       req6 = (struct cpl_act_open_req6 *)t5req6;
+                       break;
+               case CHELSIO_T6:
+                       t6req6 = (struct cpl_t6_act_open_req6 *)skb_put(skb,
+                                       wrlen);
+                       INIT_TP_WR(t6req6, 0);
+                       req6 = (struct cpl_act_open_req6 *)t6req6;
+                       t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
+                       break;
+               default:
+                       pr_err("T%d Chip is not supported\n",
+                              CHELSIO_CHIP_VERSION(adapter_type));
+                       ret = -EINVAL;
+                       goto clip_release;
+               }
+
+               OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
+                                       ((ep->rss_qid<<14)|ep->atid)));
+               req6->local_port = la6->sin6_port;
+               req6->peer_port = ra6->sin6_port;
+               req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
+               req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
+               req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
+               req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
+               req6->opt0 = cpu_to_be64(opt0);
+
+               if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
                        req6->params = cpu_to_be32(cxgb4_select_ntuple(
                                                ep->com.dev->rdev.lldi.ports[0],
                                                ep->l2t));
                        req6->opt2 = cpu_to_be32(opt2);
-               }
-       } else {
-               u32 isn = (prandom_u32() & ~7UL) - 1;
-
-               if (peer2peer)
-                       isn += 4;
-
-               if (ep->com.remote_addr.ss_family == AF_INET) {
-                       t5_req = (struct cpl_t5_act_open_req *)
-                                skb_put(skb, wrlen);
-                       INIT_TP_WR(t5_req, 0);
-                       OPCODE_TID(t5_req) = cpu_to_be32(
-                                       MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
-                                       ((ep->rss_qid << 14) | ep->atid)));
-                       t5_req->local_port = la->sin_port;
-                       t5_req->peer_port = ra->sin_port;
-                       t5_req->local_ip = la->sin_addr.s_addr;
-                       t5_req->peer_ip = ra->sin_addr.s_addr;
-                       t5_req->opt0 = cpu_to_be64(opt0);
-                       t5_req->params = cpu_to_be64(FILTER_TUPLE_V(
-                                                    cxgb4_select_ntuple(
-                                            ep->com.dev->rdev.lldi.ports[0],
-                                            ep->l2t)));
-                       t5_req->rsvd = cpu_to_be32(isn);
-                       PDBG("%s snd_isn %u\n", __func__,
-                            be32_to_cpu(t5_req->rsvd));
-                       t5_req->opt2 = cpu_to_be32(opt2);
                } else {
-                       t5_req6 = (struct cpl_t5_act_open_req6 *)
-                                 skb_put(skb, wrlen);
-                       INIT_TP_WR(t5_req6, 0);
-                       OPCODE_TID(t5_req6) = cpu_to_be32(
-                                             MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
-                                             ((ep->rss_qid<<14)|ep->atid)));
-                       t5_req6->local_port = la6->sin6_port;
-                       t5_req6->peer_port = ra6->sin6_port;
-                       t5_req6->local_ip_hi = *((__be64 *)
-                                               (la6->sin6_addr.s6_addr));
-                       t5_req6->local_ip_lo = *((__be64 *)
-                                               (la6->sin6_addr.s6_addr + 8));
-                       t5_req6->peer_ip_hi = *((__be64 *)
-                                               (ra6->sin6_addr.s6_addr));
-                       t5_req6->peer_ip_lo = *((__be64 *)
-                                               (ra6->sin6_addr.s6_addr + 8));
-                       t5_req6->opt0 = cpu_to_be64(opt0);
-                       t5_req6->params = cpu_to_be64(FILTER_TUPLE_V(
-                                                       cxgb4_select_ntuple(
+                       t5req6->params = cpu_to_be64(FILTER_TUPLE_V(
+                                               cxgb4_select_ntuple(
                                                ep->com.dev->rdev.lldi.ports[0],
                                                ep->l2t)));
-                       t5_req6->rsvd = cpu_to_be32(isn);
-                       PDBG("%s snd_isn %u\n", __func__,
-                            be32_to_cpu(t5_req6->rsvd));
-                       t5_req6->opt2 = cpu_to_be32(opt2);
+                       t5req6->rsvd = cpu_to_be32(isn);
+                       PDBG("%s snd_isn %u\n", __func__, t5req6->rsvd);
+                       t5req6->opt2 = cpu_to_be32(opt2);
                }
        }
 
        set_bit(ACT_OPEN_REQ, &ep->com.history);
        ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
+clip_release:
        if (ret && ep->com.remote_addr.ss_family == AF_INET6)
                cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
                                   (const u32 *)&la6->sin6_addr.s6_addr, 1);
@@ -1196,6 +1222,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
        if ((status == 0) || (status == -ECONNREFUSED)) {
                if (!ep->tried_with_mpa_v1) {
                        /* this means MPA_v2 is used */
+                       event.ord = ep->ird;
+                       event.ird = ep->ord;
                        event.private_data_len = ep->plen -
                                sizeof(struct mpa_v2_conn_params);
                        event.private_data = ep->mpa_pkt +
@@ -1203,6 +1231,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
                                sizeof(struct mpa_v2_conn_params);
                } else {
                        /* this means MPA_v1 is used */
+                       event.ord = cur_max_read_depth(ep->com.dev);
+                       event.ird = cur_max_read_depth(ep->com.dev);
                        event.private_data_len = ep->plen;
                        event.private_data = ep->mpa_pkt +
                                sizeof(struct mpa_message);
@@ -1265,8 +1295,8 @@ static void established_upcall(struct c4iw_ep *ep)
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        memset(&event, 0, sizeof(event));
        event.event = IW_CM_EVENT_ESTABLISHED;
-       event.ird = ep->ird;
-       event.ord = ep->ord;
+       event.ird = ep->ord;
+       event.ord = ep->ird;
        if (ep->com.cm_id) {
                PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
                ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1898,7 +1928,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
 
 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
                     struct dst_entry *dst, struct c4iw_dev *cdev,
-                    bool clear_mpa_v1)
+                    bool clear_mpa_v1, enum chip_type adapter_type)
 {
        struct neighbour *n;
        int err, step;
@@ -1933,7 +1963,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
                        goto out;
                ep->mtu = pdev->mtu;
                ep->tx_chan = cxgb4_port_chan(pdev);
-               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+               ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
+                                               cxgb4_port_viid(pdev));
                step = cdev->rdev.lldi.ntxq /
                        cdev->rdev.lldi.nchan;
                ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -1952,7 +1983,8 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
                        goto out;
                ep->mtu = dst_mtu(dst);
                ep->tx_chan = cxgb4_port_chan(pdev);
-               ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
+               ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
+                                               cxgb4_port_viid(pdev));
                step = cdev->rdev.lldi.ntxq /
                        cdev->rdev.lldi.nchan;
                ep->txq_idx = cxgb4_port_idx(pdev) * step;
@@ -2025,7 +2057,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
                err = -EHOSTUNREACH;
                goto fail3;
        }
-       err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false);
+       err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
+                       ep->com.dev->rdev.lldi.adapter_type);
        if (err) {
                pr_err("%s - cannot alloc l2e.\n", __func__);
                goto fail4;
@@ -2213,13 +2246,14 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
        int wscale;
        struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
        int win;
+       enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 
        PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
        BUG_ON(skb_cloned(skb));
 
        skb_get(skb);
        rpl = cplhdr(skb);
-       if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+       if (!is_t4(adapter_type)) {
                skb_trim(skb, roundup(sizeof(*rpl5), 16));
                rpl5 = (void *)rpl;
                INIT_TP_WR(rpl5, ep->hwtid);
@@ -2266,12 +2300,16 @@ static void accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
                const struct tcphdr *tcph;
                u32 hlen = ntohl(req->hdr_len);
 
-               tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
-                       IP_HDR_LEN_G(hlen);
+               if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
+                       tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
+                               IP_HDR_LEN_G(hlen);
+               else
+                       tcph = (const void *)(req + 1) +
+                               T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
                if (tcph->ece && tcph->cwr)
                        opt2 |= CCTRL_ECN_V(1);
        }
-       if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
+       if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
                u32 isn = (prandom_u32() & ~7UL) - 1;
                opt2 |= T5_OPT_2_VALID_F;
                opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
@@ -2302,12 +2340,16 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
        return;
 }
 
-static void get_4tuple(struct cpl_pass_accept_req *req, int *iptype,
-                      __u8 *local_ip, __u8 *peer_ip,
+static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
+                      int *iptype, __u8 *local_ip, __u8 *peer_ip,
                       __be16 *local_port, __be16 *peer_port)
 {
-       int eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
-       int ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+       int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+                     ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+                     T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
+       int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
+                    IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
+                    T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
        struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
        struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
        struct tcphdr *tcp = (struct tcphdr *)
@@ -2362,7 +2404,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
                goto reject;
        }
 
-       get_4tuple(req, &iptype, local_ip, peer_ip, &local_port, &peer_port);
+       get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
+                  local_ip, peer_ip, &local_port, &peer_port);
 
        /* Find output route */
        if (iptype == 4)  {
@@ -2397,7 +2440,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
                goto reject;
        }
 
-       err = import_ep(child_ep, iptype, peer_ip, dst, dev, false);
+       err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
+                       parent_ep->com.dev->rdev.lldi.adapter_type);
        if (err) {
                printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
                       __func__);
@@ -2929,7 +2973,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
        } else {
                if (peer2peer &&
                    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
-                   (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ord == 0)
+                   (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
                        ep->ird = 1;
        }
 
@@ -3189,7 +3233,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
                goto fail2;
        }
 
-       err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true);
+       err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
+                       ep->com.dev->rdev.lldi.adapter_type);
        if (err) {
                printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
                goto fail3;
@@ -3260,6 +3305,10 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
                                sin->sin_addr.s_addr, sin->sin_port, 0,
                                ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
                        if (err == -EBUSY) {
+                               if (c4iw_fatal_error(&ep->com.dev->rdev)) {
+                                       err = -EIO;
+                                       break;
+                               }
                                set_current_state(TASK_UNINTERRUPTIBLE);
                                schedule_timeout(usecs_to_jiffies(100));
                        }
@@ -3593,20 +3642,23 @@ static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
 
 static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
 {
-       u32 l2info;
-       u16 vlantag, len, hdr_len, eth_hdr_len;
+       __be32 l2info;
+       __be16 hdr_len, vlantag, len;
+       u16 eth_hdr_len;
+       int tcp_hdr_len, ip_hdr_len;
        u8 intf;
        struct cpl_rx_pkt *cpl = cplhdr(skb);
        struct cpl_pass_accept_req *req;
        struct tcp_options_received tmp_opt;
        struct c4iw_dev *dev;
+       enum chip_type type;
 
        dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
        /* Store values from cpl_rx_pkt in temporary location. */
-       vlantag = (__force u16) cpl->vlan;
-       len = (__force u16) cpl->len;
-       l2info  = (__force u32) cpl->l2info;
-       hdr_len = (__force u16) cpl->hdr_len;
+       vlantag = cpl->vlan;
+       len = cpl->len;
+       l2info  = cpl->l2info;
+       hdr_len = cpl->hdr_len;
        intf = cpl->iff;
 
        __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
@@ -3623,20 +3675,28 @@ static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
        memset(req, 0, sizeof(*req));
        req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
                         SYN_MAC_IDX_V(RX_MACIDX_G(
-                        (__force int) htonl(l2info))) |
+                        be32_to_cpu(l2info))) |
                         SYN_XACT_MATCH_F);
-       eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
-                           RX_ETHHDR_LEN_G((__force int)htonl(l2info)) :
-                           RX_T5_ETHHDR_LEN_G((__force int)htonl(l2info));
-       req->hdr_len = cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(
-                                       (__force int) htonl(l2info))) |
-                                  TCP_HDR_LEN_V(RX_TCPHDR_LEN_G(
-                                       (__force int) htons(hdr_len))) |
-                                  IP_HDR_LEN_V(RX_IPHDR_LEN_G(
-                                       (__force int) htons(hdr_len))) |
-                                  ETH_HDR_LEN_V(RX_ETHHDR_LEN_G(eth_hdr_len)));
-       req->vlan = (__force __be16) vlantag;
-       req->len = (__force __be16) len;
+       type = dev->rdev.lldi.adapter_type;
+       tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
+       ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
+       req->hdr_len =
+               cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
+       if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
+               eth_hdr_len = is_t4(type) ?
+                               RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
+                               RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
+               req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
+                                           IP_HDR_LEN_V(ip_hdr_len) |
+                                           ETH_HDR_LEN_V(eth_hdr_len));
+       } else { /* T6 and later */
+               eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
+               req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
+                                           T6_IP_HDR_LEN_V(ip_hdr_len) |
+                                           T6_ETH_HDR_LEN_V(eth_hdr_len));
+       }
+       req->vlan = vlantag;
+       req->len = len;
        req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
                                    PASS_OPEN_TOS_V(tos));
        req->tcpopt.mss = htons(tmp_opt.mss_clamp);
@@ -3755,9 +3815,22 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
                goto reject;
        }
 
-       eth_hdr_len = is_t4(dev->rdev.lldi.adapter_type) ?
-                           RX_ETHHDR_LEN_G(htonl(cpl->l2info)) :
-                           RX_T5_ETHHDR_LEN_G(htonl(cpl->l2info));
+       switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
+       case CHELSIO_T4:
+               eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+               break;
+       case CHELSIO_T5:
+               eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+               break;
+       case CHELSIO_T6:
+               eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
+               break;
+       default:
+               pr_err("T%d Chip is not supported\n",
+                      CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
+               goto reject;
+       }
+
        if (eth_hdr_len == ETH_HLEN) {
                eh = (struct ethhdr *)(req + 1);
                iph = (struct iphdr *)(eh + 1);
index 92d518382a9fce90c3e1dbae45034675072da274..de9cd6901752fc1e3da38d64f62bfce7cb853501 100644 (file)
@@ -752,7 +752,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
                        wc->opcode = IB_WC_LOCAL_INV;
                        break;
                case FW_RI_FAST_REGISTER:
-                       wc->opcode = IB_WC_FAST_REG_MR;
+                       wc->opcode = IB_WC_REG_MR;
                        break;
                default:
                        printk(KERN_ERR MOD "Unexpected opcode %d "
index 1a297391b54c16c3a954650b57ef164ee471ca57..58fce1742b8d8c33b91dfa89075617a9c5cf34f7 100644 (file)
@@ -962,12 +962,12 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
                devp->rdev.lldi.sge_egrstatuspagesize;
 
        /*
-        * For T5 devices, we map all of BAR2 with WC.
+        * For T5/T6 devices, we map all of BAR2 with WC.
         * For T4 devices with onchip qp mem, we map only that part
         * of BAR2 with WC.
         */
        devp->rdev.bar2_pa = pci_resource_start(devp->rdev.lldi.pdev, 2);
-       if (is_t5(devp->rdev.lldi.adapter_type)) {
+       if (!is_t4(devp->rdev.lldi.adapter_type)) {
                devp->rdev.bar2_kva = ioremap_wc(devp->rdev.bar2_pa,
                        pci_resource_len(devp->rdev.lldi.pdev, 2));
                if (!devp->rdev.bar2_kva) {
@@ -1267,11 +1267,9 @@ static int enable_qp_db(int id, void *p, void *data)
 static void resume_rc_qp(struct c4iw_qp *qp)
 {
        spin_lock(&qp->lock);
-       t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc,
-                     is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+       t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc, NULL);
        qp->wq.sq.wq_pidx_inc = 0;
-       t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc,
-                     is_t5(qp->rhp->rdev.lldi.adapter_type), NULL);
+       t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc, NULL);
        qp->wq.rq.wq_pidx_inc = 0;
        spin_unlock(&qp->lock);
 }
index c7bb38c931a555b034e76580484d79851dd7546c..00e55faa086aa2a30259c5c8b96cc0ae129a4ef6 100644 (file)
@@ -386,6 +386,10 @@ struct c4iw_mr {
        struct c4iw_dev *rhp;
        u64 kva;
        struct tpt_attributes attr;
+       u64 *mpl;
+       dma_addr_t mpl_addr;
+       u32 max_mpl_len;
+       u32 mpl_len;
 };
 
 static inline struct c4iw_mr *to_c4iw_mr(struct ib_mr *ibmr)
@@ -405,20 +409,6 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw)
        return container_of(ibmw, struct c4iw_mw, ibmw);
 }
 
-struct c4iw_fr_page_list {
-       struct ib_fast_reg_page_list ibpl;
-       DEFINE_DMA_UNMAP_ADDR(mapping);
-       dma_addr_t dma_addr;
-       struct c4iw_dev *dev;
-       int pll_len;
-};
-
-static inline struct c4iw_fr_page_list *to_c4iw_fr_page_list(
-                                       struct ib_fast_reg_page_list *ibpl)
-{
-       return container_of(ibpl, struct c4iw_fr_page_list, ibpl);
-}
-
 struct c4iw_cq {
        struct ib_cq ibcq;
        struct c4iw_dev *rhp;
@@ -966,13 +956,12 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param);
 int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len);
 void c4iw_qp_add_ref(struct ib_qp *qp);
 void c4iw_qp_rem_ref(struct ib_qp *qp);
-void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *page_list);
-struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(
-                                       struct ib_device *device,
-                                       int page_list_len);
 struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
                            enum ib_mr_type mr_type,
                            u32 max_num_sg);
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+                  struct scatterlist *sg,
+                  int sg_nents);
 int c4iw_dealloc_mw(struct ib_mw *mw);
 struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
index 026b91ebd5e2e6806acae6b81dd41d56d599b4d2..e1629ab58db7873a3d9a6c044ba7bc65eb4512c6 100644 (file)
@@ -144,7 +144,7 @@ static int _c4iw_write_mem_inline(struct c4iw_rdev *rdev, u32 addr, u32 len,
                if (i == (num_wqe-1)) {
                        req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR) |
                                                    FW_WR_COMPL_F);
-                       req->wr.wr_lo = (__force __be64)&wr_wait;
+                       req->wr.wr_lo = (__force __be64)(unsigned long)&wr_wait;
                } else
                        req->wr.wr_hi = cpu_to_be32(FW_WR_OP_V(FW_ULPTX_WR));
                req->wr.wr_mid = cpu_to_be32(
@@ -863,6 +863,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
        u32 mmid;
        u32 stag = 0;
        int ret = 0;
+       int length = roundup(max_num_sg * sizeof(u64), 32);
 
        if (mr_type != IB_MR_TYPE_MEM_REG ||
            max_num_sg > t4_max_fr_depth(use_dsgl))
@@ -876,6 +877,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
                goto err;
        }
 
+       mhp->mpl = dma_alloc_coherent(&rhp->rdev.lldi.pdev->dev,
+                                     length, &mhp->mpl_addr, GFP_KERNEL);
+       if (!mhp->mpl) {
+               ret = -ENOMEM;
+               goto err_mpl;
+       }
+       mhp->max_mpl_len = length;
+
        mhp->rhp = rhp;
        ret = alloc_pbl(mhp, max_num_sg);
        if (ret)
@@ -905,54 +914,35 @@ err2:
        c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
                              mhp->attr.pbl_size << 3);
 err1:
+       dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+                         mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
+err_mpl:
        kfree(mhp);
 err:
        return ERR_PTR(ret);
 }
 
-struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device,
-                                                    int page_list_len)
+static int c4iw_set_page(struct ib_mr *ibmr, u64 addr)
 {
-       struct c4iw_fr_page_list *c4pl;
-       struct c4iw_dev *dev = to_c4iw_dev(device);
-       dma_addr_t dma_addr;
-       int pll_len = roundup(page_list_len * sizeof(u64), 32);
-
-       c4pl = kmalloc(sizeof(*c4pl), GFP_KERNEL);
-       if (!c4pl)
-               return ERR_PTR(-ENOMEM);
+       struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
 
-       c4pl->ibpl.page_list = dma_alloc_coherent(&dev->rdev.lldi.pdev->dev,
-                                                 pll_len, &dma_addr,
-                                                 GFP_KERNEL);
-       if (!c4pl->ibpl.page_list) {
-               kfree(c4pl);
-               return ERR_PTR(-ENOMEM);
-       }
-       dma_unmap_addr_set(c4pl, mapping, dma_addr);
-       c4pl->dma_addr = dma_addr;
-       c4pl->dev = dev;
-       c4pl->pll_len = pll_len;
+       if (unlikely(mhp->mpl_len == mhp->max_mpl_len))
+               return -ENOMEM;
 
-       PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
-            __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
-            &c4pl->dma_addr);
+       mhp->mpl[mhp->mpl_len++] = addr;
 
-       return &c4pl->ibpl;
+       return 0;
 }
 
-void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl)
+int c4iw_map_mr_sg(struct ib_mr *ibmr,
+                  struct scatterlist *sg,
+                  int sg_nents)
 {
-       struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl);
+       struct c4iw_mr *mhp = to_c4iw_mr(ibmr);
 
-       PDBG("%s c4pl %p pll_len %u page_list %p dma_addr %pad\n",
-            __func__, c4pl, c4pl->pll_len, c4pl->ibpl.page_list,
-            &c4pl->dma_addr);
+       mhp->mpl_len = 0;
 
-       dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev,
-                         c4pl->pll_len,
-                         c4pl->ibpl.page_list, dma_unmap_addr(c4pl, mapping));
-       kfree(c4pl);
+       return ib_sg_to_pages(ibmr, sg, sg_nents, c4iw_set_page);
 }
 
 int c4iw_dereg_mr(struct ib_mr *ib_mr)
@@ -970,6 +960,9 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
        rhp = mhp->rhp;
        mmid = mhp->attr.stag >> 8;
        remove_handle(rhp, &rhp->mmidr, mmid);
+       if (mhp->mpl)
+               dma_free_coherent(&mhp->rhp->rdev.lldi.pdev->dev,
+                                 mhp->max_mpl_len, mhp->mpl, mhp->mpl_addr);
        dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
                       mhp->attr.pbl_addr);
        if (mhp->attr.pbl_size)
index 7746113552e7b37cb38907c38dc5b4fca7fdebd4..0a7d99818b17d13384e32cb81446bd250647f85f 100644 (file)
@@ -209,7 +209,7 @@ static int c4iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
                if (addr >= rdev->oc_mw_pa)
                        vma->vm_page_prot = t4_pgprot_wc(vma->vm_page_prot);
                else {
-                       if (is_t5(rdev->lldi.adapter_type))
+                       if (!is_t4(rdev->lldi.adapter_type))
                                vma->vm_page_prot =
                                        t4_pgprot_wc(vma->vm_page_prot);
                        else
@@ -557,8 +557,7 @@ int c4iw_register_device(struct c4iw_dev *dev)
        dev->ibdev.bind_mw = c4iw_bind_mw;
        dev->ibdev.dealloc_mw = c4iw_dealloc_mw;
        dev->ibdev.alloc_mr = c4iw_alloc_mr;
-       dev->ibdev.alloc_fast_reg_page_list = c4iw_alloc_fastreg_pbl;
-       dev->ibdev.free_fast_reg_page_list = c4iw_free_fastreg_pbl;
+       dev->ibdev.map_mr_sg = c4iw_map_mr_sg;
        dev->ibdev.attach_mcast = c4iw_multicast_attach;
        dev->ibdev.detach_mcast = c4iw_multicast_detach;
        dev->ibdev.process_mad = c4iw_process_mad;
index 6517e1208ccb42dc38cd551a971e531e2eb7cd51..aa515afee7248823428cb2b725bf10c70f4fd82a 100644 (file)
@@ -528,8 +528,8 @@ static int build_rdma_write(struct t4_sq *sq, union t4_wr *wqe,
        if (wr->num_sge > T4_MAX_SEND_SGE)
                return -EINVAL;
        wqe->write.r2 = 0;
-       wqe->write.stag_sink = cpu_to_be32(wr->wr.rdma.rkey);
-       wqe->write.to_sink = cpu_to_be64(wr->wr.rdma.remote_addr);
+       wqe->write.stag_sink = cpu_to_be32(rdma_wr(wr)->rkey);
+       wqe->write.to_sink = cpu_to_be64(rdma_wr(wr)->remote_addr);
        if (wr->num_sge) {
                if (wr->send_flags & IB_SEND_INLINE) {
                        ret = build_immd(sq, wqe->write.u.immd_src, wr,
@@ -566,10 +566,10 @@ static int build_rdma_read(union t4_wr *wqe, struct ib_send_wr *wr, u8 *len16)
        if (wr->num_sge > 1)
                return -EINVAL;
        if (wr->num_sge) {
-               wqe->read.stag_src = cpu_to_be32(wr->wr.rdma.rkey);
-               wqe->read.to_src_hi = cpu_to_be32((u32)(wr->wr.rdma.remote_addr
+               wqe->read.stag_src = cpu_to_be32(rdma_wr(wr)->rkey);
+               wqe->read.to_src_hi = cpu_to_be32((u32)(rdma_wr(wr)->remote_addr
                                                        >> 32));
-               wqe->read.to_src_lo = cpu_to_be32((u32)wr->wr.rdma.remote_addr);
+               wqe->read.to_src_lo = cpu_to_be32((u32)rdma_wr(wr)->remote_addr);
                wqe->read.stag_sink = cpu_to_be32(wr->sg_list[0].lkey);
                wqe->read.plen = cpu_to_be32(wr->sg_list[0].length);
                wqe->read.to_sink_hi = cpu_to_be32((u32)(wr->sg_list[0].addr
@@ -605,47 +605,41 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
        return 0;
 }
 
-static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
-                        struct ib_send_wr *wr, u8 *len16, u8 t5dev)
+static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
+                       struct ib_reg_wr *wr, u8 *len16, u8 t5dev)
 {
-
+       struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
        struct fw_ri_immd *imdp;
        __be64 *p;
        int i;
-       int pbllen = roundup(wr->wr.fast_reg.page_list_len * sizeof(u64), 32);
+       int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
        int rem;
 
-       if (wr->wr.fast_reg.page_list_len >
-           t4_max_fr_depth(use_dsgl))
+       if (mhp->mpl_len > t4_max_fr_depth(use_dsgl))
                return -EINVAL;
 
        wqe->fr.qpbinde_to_dcacpu = 0;
-       wqe->fr.pgsz_shift = wr->wr.fast_reg.page_shift - 12;
+       wqe->fr.pgsz_shift = ilog2(wr->mr->page_size) - 12;
        wqe->fr.addr_type = FW_RI_VA_BASED_TO;
-       wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->wr.fast_reg.access_flags);
+       wqe->fr.mem_perms = c4iw_ib_to_tpt_access(wr->access);
        wqe->fr.len_hi = 0;
-       wqe->fr.len_lo = cpu_to_be32(wr->wr.fast_reg.length);
-       wqe->fr.stag = cpu_to_be32(wr->wr.fast_reg.rkey);
-       wqe->fr.va_hi = cpu_to_be32(wr->wr.fast_reg.iova_start >> 32);
-       wqe->fr.va_lo_fbo = cpu_to_be32(wr->wr.fast_reg.iova_start &
+       wqe->fr.len_lo = cpu_to_be32(mhp->ibmr.length);
+       wqe->fr.stag = cpu_to_be32(wr->key);
+       wqe->fr.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32);
+       wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
                                        0xffffffff);
 
        if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
-               struct c4iw_fr_page_list *c4pl =
-                       to_c4iw_fr_page_list(wr->wr.fast_reg.page_list);
                struct fw_ri_dsgl *sglp;
 
-               for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
-                       wr->wr.fast_reg.page_list->page_list[i] = (__force u64)
-                               cpu_to_be64((u64)
-                               wr->wr.fast_reg.page_list->page_list[i]);
-               }
+               for (i = 0; i < mhp->mpl_len; i++)
+                       mhp->mpl[i] = (__force u64)cpu_to_be64((u64)mhp->mpl[i]);
 
                sglp = (struct fw_ri_dsgl *)(&wqe->fr + 1);
                sglp->op = FW_RI_DATA_DSGL;
                sglp->r1 = 0;
                sglp->nsge = cpu_to_be16(1);
-               sglp->addr0 = cpu_to_be64(c4pl->dma_addr);
+               sglp->addr0 = cpu_to_be64(mhp->mpl_addr);
                sglp->len0 = cpu_to_be32(pbllen);
 
                *len16 = DIV_ROUND_UP(sizeof(wqe->fr) + sizeof(*sglp), 16);
@@ -657,9 +651,8 @@ static int build_fastreg(struct t4_sq *sq, union t4_wr *wqe,
                imdp->immdlen = cpu_to_be32(pbllen);
                p = (__be64 *)(imdp + 1);
                rem = pbllen;
-               for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
-                       *p = cpu_to_be64(
-                               (u64)wr->wr.fast_reg.page_list->page_list[i]);
+               for (i = 0; i < mhp->mpl_len; i++) {
+                       *p = cpu_to_be64((u64)mhp->mpl[i]);
                        rem -= sizeof(*p);
                        if (++p == (__be64 *)&sq->queue[sq->size])
                                p = (__be64 *)sq->queue;
@@ -712,8 +705,7 @@ static int ring_kernel_sq_db(struct c4iw_qp *qhp, u16 inc)
        spin_lock_irqsave(&qhp->rhp->lock, flags);
        spin_lock(&qhp->lock);
        if (qhp->rhp->db_state == NORMAL)
-               t4_ring_sq_db(&qhp->wq, inc,
-                             is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+               t4_ring_sq_db(&qhp->wq, inc, NULL);
        else {
                add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
                qhp->wq.sq.wq_pidx_inc += inc;
@@ -730,8 +722,7 @@ static int ring_kernel_rq_db(struct c4iw_qp *qhp, u16 inc)
        spin_lock_irqsave(&qhp->rhp->lock, flags);
        spin_lock(&qhp->lock);
        if (qhp->rhp->db_state == NORMAL)
-               t4_ring_rq_db(&qhp->wq, inc,
-                             is_t5(qhp->rhp->rdev.lldi.adapter_type), NULL);
+               t4_ring_rq_db(&qhp->wq, inc, NULL);
        else {
                add_to_fc_list(&qhp->rhp->db_fc_list, &qhp->db_fc_entry);
                qhp->wq.rq.wq_pidx_inc += inc;
@@ -813,13 +804,13 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        if (!qhp->wq.sq.oldest_read)
                                qhp->wq.sq.oldest_read = swsqe;
                        break;
-               case IB_WR_FAST_REG_MR:
+               case IB_WR_REG_MR:
                        fw_opcode = FW_RI_FR_NSMR_WR;
                        swsqe->opcode = FW_RI_FAST_REGISTER;
-                       err = build_fastreg(&qhp->wq.sq, wqe, wr, &len16,
-                                           is_t5(
-                                           qhp->rhp->rdev.lldi.adapter_type) ?
-                                           1 : 0);
+                       err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
+                                          is_t5(
+                                          qhp->rhp->rdev.lldi.adapter_type) ?
+                                          1 : 0);
                        break;
                case IB_WR_LOCAL_INV:
                        if (wr->send_flags & IB_SEND_FENCE)
@@ -860,8 +851,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                idx += DIV_ROUND_UP(len16*16, T4_EQ_ENTRY_SIZE);
        }
        if (!qhp->rhp->rdev.status_page->db_off) {
-               t4_ring_sq_db(&qhp->wq, idx,
-                             is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+               t4_ring_sq_db(&qhp->wq, idx, wqe);
                spin_unlock_irqrestore(&qhp->lock, flag);
        } else {
                spin_unlock_irqrestore(&qhp->lock, flag);
@@ -934,8 +924,7 @@ int c4iw_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
                num_wrs--;
        }
        if (!qhp->rhp->rdev.status_page->db_off) {
-               t4_ring_rq_db(&qhp->wq, idx,
-                             is_t5(qhp->rhp->rdev.lldi.adapter_type), wqe);
+               t4_ring_rq_db(&qhp->wq, idx, wqe);
                spin_unlock_irqrestore(&qhp->lock, flag);
        } else {
                spin_unlock_irqrestore(&qhp->lock, flag);
@@ -1875,7 +1864,7 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        attrs.rq_db_inc = attr->rq_psn;
        mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
        mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
-       if (is_t5(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
+       if (!is_t4(to_c4iw_qp(ibqp)->rhp->rdev.lldi.adapter_type) &&
            (mask & (C4IW_QP_ATTR_SQ_DB|C4IW_QP_ATTR_RQ_DB)))
                return -EINVAL;
 
index 274a7ab13befb367cedae3618a29e2ba0591b72a..1092a2d1f607464152fbd2d7e836e180d2131312 100644 (file)
@@ -455,8 +455,7 @@ static inline void pio_copy(u64 __iomem *dst, u64 *src)
        }
 }
 
-static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
-                                union t4_wr *wqe)
+static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, union t4_wr *wqe)
 {
 
        /* Flush host queue memory writes. */
@@ -482,7 +481,7 @@ static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc, u8 t5,
        writel(QID_V(wq->sq.qid) | PIDX_V(inc), wq->db);
 }
 
-static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc, u8 t5,
+static inline void t4_ring_rq_db(struct t4_wq *wq, u16 inc,
                                 union t4_recv_wr *wqe)
 {
 
index 1688a17de4fe1fc6d887d74d703b51dffaff2e08..86af71351d9a5be6cbdd87f3d22ccb7078c82b23 100644 (file)
@@ -76,7 +76,10 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        struct mlx4_dev *dev = ibdev->dev;
        int is_mcast = 0;
        struct in6_addr in6;
-       u16 vlan_tag;
+       u16 vlan_tag = 0xffff;
+       union ib_gid sgid;
+       struct ib_gid_attr gid_attr;
+       int ret;
 
        memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
        if (rdma_is_multicast_addr(&in6)) {
@@ -85,7 +88,17 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
        } else {
                memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
        }
-       vlan_tag = ah_attr->vlan_id;
+       ret = ib_get_cached_gid(pd->device, ah_attr->port_num,
+                               ah_attr->grh.sgid_index, &sgid, &gid_attr);
+       if (ret)
+               return ERR_PTR(ret);
+       memset(ah->av.eth.s_mac, 0, ETH_ALEN);
+       if (gid_attr.ndev) {
+               if (is_vlan_dev(gid_attr.ndev))
+                       vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
+               memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
+               dev_put(gid_attr.ndev);
+       }
        if (vlan_tag < 0x1000)
                vlan_tag |= (ah_attr->sl & 7) << 13;
        ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
index 5fd49f9435f9dd8d2496d91394271b6270af2cbd..b88fc8f5ab180c717e827f582187b817f92a0e33 100644 (file)
@@ -818,7 +818,7 @@ repoll:
                        wc->opcode    = IB_WC_LSO;
                        break;
                case MLX4_OPCODE_FMR:
-                       wc->opcode    = IB_WC_FAST_REG_MR;
+                       wc->opcode    = IB_WC_REG_MR;
                        break;
                case MLX4_OPCODE_LOCAL_INVAL:
                        wc->opcode    = IB_WC_LOCAL_INV;
index 1cd75ff0225193c475c9a9debe2ccedcf9bdb8b8..870e56b6b25f5c7837f5a4a358597d748aaf1ef5 100644 (file)
@@ -457,7 +457,8 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
                          struct ib_grh *grh, struct ib_mad *mad)
 {
        struct ib_sge list;
-       struct ib_send_wr wr, *bad_wr;
+       struct ib_ud_wr wr;
+       struct ib_send_wr *bad_wr;
        struct mlx4_ib_demux_pv_ctx *tun_ctx;
        struct mlx4_ib_demux_pv_qp *tun_qp;
        struct mlx4_rcv_tunnel_mad *tun_mad;
@@ -582,18 +583,18 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
        list.length = sizeof (struct mlx4_rcv_tunnel_mad);
        list.lkey = tun_ctx->pd->local_dma_lkey;
 
-       wr.wr.ud.ah = ah;
-       wr.wr.ud.port_num = port;
-       wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
-       wr.wr.ud.remote_qpn = dqpn;
-       wr.next = NULL;
-       wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
-       wr.sg_list = &list;
-       wr.num_sge = 1;
-       wr.opcode = IB_WR_SEND;
-       wr.send_flags = IB_SEND_SIGNALED;
-
-       ret = ib_post_send(src_qp, &wr, &bad_wr);
+       wr.ah = ah;
+       wr.port_num = port;
+       wr.remote_qkey = IB_QP_SET_QKEY;
+       wr.remote_qpn = dqpn;
+       wr.wr.next = NULL;
+       wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
+       wr.wr.sg_list = &list;
+       wr.wr.num_sge = 1;
+       wr.wr.opcode = IB_WR_SEND;
+       wr.wr.send_flags = IB_SEND_SIGNALED;
+
+       ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
 out:
        if (ret)
                ib_destroy_ah(ah);
@@ -824,18 +825,29 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 {
        struct mlx4_counter counter_stats;
        struct mlx4_ib_dev *dev = to_mdev(ibdev);
-       int err;
+       struct counter_index *tmp_counter;
+       int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
 
        if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
                return -EINVAL;
 
        memset(&counter_stats, 0, sizeof(counter_stats));
-       err = mlx4_get_counter_stats(dev->dev,
-                                    dev->counters[port_num - 1].index,
-                                    &counter_stats, 0);
-       if (err)
-               err = IB_MAD_RESULT_FAILURE;
-       else {
+       mutex_lock(&dev->counters_table[port_num - 1].mutex);
+       list_for_each_entry(tmp_counter,
+                           &dev->counters_table[port_num - 1].counters_list,
+                           list) {
+               err = mlx4_get_counter_stats(dev->dev,
+                                            tmp_counter->index,
+                                            &counter_stats, 0);
+               if (err) {
+                       err = IB_MAD_RESULT_FAILURE;
+                       stats_avail = 0;
+                       break;
+               }
+               stats_avail = 1;
+       }
+       mutex_unlock(&dev->counters_table[port_num - 1].mutex);
+       if (stats_avail) {
                memset(out_mad->data, 0, sizeof out_mad->data);
                switch (counter_stats.counter_mode & 0xf) {
                case 0:
@@ -1172,10 +1184,11 @@ static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
                         enum ib_qp_type dest_qpt, u16 pkey_index,
                         u32 remote_qpn, u32 qkey, struct ib_ah_attr *attr,
-                        u8 *s_mac, struct ib_mad *mad)
+                        u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
 {
        struct ib_sge list;
-       struct ib_send_wr wr, *bad_wr;
+       struct ib_ud_wr wr;
+       struct ib_send_wr *bad_wr;
        struct mlx4_ib_demux_pv_ctx *sqp_ctx;
        struct mlx4_ib_demux_pv_qp *sqp;
        struct mlx4_mad_snd_buf *sqp_mad;
@@ -1246,22 +1259,25 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
        list.length = sizeof (struct mlx4_mad_snd_buf);
        list.lkey = sqp_ctx->pd->local_dma_lkey;
 
-       wr.wr.ud.ah = ah;
-       wr.wr.ud.port_num = port;
-       wr.wr.ud.pkey_index = wire_pkey_ix;
-       wr.wr.ud.remote_qkey = qkey;
-       wr.wr.ud.remote_qpn = remote_qpn;
-       wr.next = NULL;
-       wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
-       wr.sg_list = &list;
-       wr.num_sge = 1;
-       wr.opcode = IB_WR_SEND;
-       wr.send_flags = IB_SEND_SIGNALED;
+       wr.ah = ah;
+       wr.port_num = port;
+       wr.pkey_index = wire_pkey_ix;
+       wr.remote_qkey = qkey;
+       wr.remote_qpn = remote_qpn;
+       wr.wr.next = NULL;
+       wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
+       wr.wr.sg_list = &list;
+       wr.wr.num_sge = 1;
+       wr.wr.opcode = IB_WR_SEND;
+       wr.wr.send_flags = IB_SEND_SIGNALED;
        if (s_mac)
                memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
+       if (vlan_id < 0x1000)
+               vlan_id |= (attr->sl & 7) << 13;
+       to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
 
 
-       ret = ib_post_send(send_qp, &wr, &bad_wr);
+       ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
 out:
        if (ret)
                ib_destroy_ah(ah);
@@ -1295,6 +1311,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
        u8 *slave_id;
        int slave;
        int port;
+       u16 vlan_id;
 
        /* Get slave that sent this packet */
        if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
@@ -1383,10 +1400,10 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
                fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
 
        memcpy(ah_attr.dmac, tunnel->hdr.mac, 6);
-       ah_attr.vlan_id = be16_to_cpu(tunnel->hdr.vlan);
+       vlan_id = be16_to_cpu(tunnel->hdr.vlan);
        /* if slave have default vlan use it */
        mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
-                                   &ah_attr.vlan_id, &ah_attr.sl);
+                                   &vlan_id, &ah_attr.sl);
 
        mlx4_ib_send_to_wire(dev, slave, ctx->port,
                             is_proxy_qp0(dev, wc->src_qp, slave) ?
@@ -1394,7 +1411,7 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc
                             be16_to_cpu(tunnel->hdr.pkey_index),
                             be32_to_cpu(tunnel->hdr.remote_qpn),
                             be32_to_cpu(tunnel->hdr.qkey),
-                            &ah_attr, wc->smac, &tunnel->mad);
+                            &ah_attr, wc->smac, vlan_id, &tunnel->mad);
 }
 
 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
index efecdf0216d85179c05f6e949d0c7597cf7f4f70..f567160a4a56ed141320b2d153d21f2031cbb464 100644 (file)
@@ -335,7 +335,7 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
        if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
                return index;
 
-       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid);
+       ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, NULL);
        if (ret)
                return ret;
 
@@ -442,6 +442,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
        }
 
+       props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+
        props->vendor_id           = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
                0xffffff;
        props->vendor_part_id      = dev->dev->persist->pdev->device;
@@ -754,7 +756,7 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
        if (!rdma_cap_roce_gid_table(ibdev, port))
                return -ENODEV;
 
-       ret = ib_get_cached_gid(ibdev, port, index, gid);
+       ret = ib_get_cached_gid(ibdev, port, index, gid, NULL);
        if (ret == -EAGAIN) {
                memcpy(gid, &zgid, sizeof(*gid));
                return 0;
@@ -1247,6 +1249,22 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
        return 0;
 }
 
+static void mlx4_ib_delete_counters_table(struct mlx4_ib_dev *ibdev,
+                                         struct mlx4_ib_counters *ctr_table)
+{
+       struct counter_index *counter, *tmp_count;
+
+       mutex_lock(&ctr_table->mutex);
+       list_for_each_entry_safe(counter, tmp_count, &ctr_table->counters_list,
+                                list) {
+               if (counter->allocated)
+                       mlx4_counter_free(ibdev->dev, counter->index);
+               list_del(&counter->list);
+               kfree(counter);
+       }
+       mutex_unlock(&ctr_table->mutex);
+}
+
 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
                   union ib_gid *gid)
 {
@@ -2131,6 +2149,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        int num_req_counters;
        int allocated;
        u32 counter_index;
+       struct counter_index *new_counter_index = NULL;
 
        pr_info_once("%s", mlx4_ib_version);
 
@@ -2247,8 +2266,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        ibdev->ib_dev.rereg_user_mr     = mlx4_ib_rereg_user_mr;
        ibdev->ib_dev.dereg_mr          = mlx4_ib_dereg_mr;
        ibdev->ib_dev.alloc_mr          = mlx4_ib_alloc_mr;
-       ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
-       ibdev->ib_dev.free_fast_reg_page_list  = mlx4_ib_free_fast_reg_page_list;
+       ibdev->ib_dev.map_mr_sg         = mlx4_ib_map_mr_sg;
        ibdev->ib_dev.attach_mcast      = mlx4_ib_mcg_attach;
        ibdev->ib_dev.detach_mcast      = mlx4_ib_mcg_detach;
        ibdev->ib_dev.process_mad       = mlx4_ib_process_mad;
@@ -2293,7 +2311,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 
        ibdev->ib_dev.uverbs_ex_cmd_mask |=
                (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) |
-               (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ);
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) |
+               (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP);
 
        mlx4_ib_alloc_eqs(dev, ibdev);
 
@@ -2302,6 +2321,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
        if (init_node_data(ibdev))
                goto err_map;
 
+       for (i = 0; i < ibdev->num_ports; ++i) {
+               mutex_init(&ibdev->counters_table[i].mutex);
+               INIT_LIST_HEAD(&ibdev->counters_table[i].counters_list);
+       }
+
        num_req_counters = mlx4_is_bonded(dev) ? 1 : ibdev->num_ports;
        for (i = 0; i < num_req_counters; ++i) {
                mutex_init(&ibdev->qp1_proxy_lock[i]);
@@ -2320,15 +2344,34 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                        counter_index = mlx4_get_default_counter_index(dev,
                                                                       i + 1);
                }
-               ibdev->counters[i].index = counter_index;
-               ibdev->counters[i].allocated = allocated;
+               new_counter_index = kmalloc(sizeof(*new_counter_index),
+                                           GFP_KERNEL);
+               if (!new_counter_index) {
+                       if (allocated)
+                               mlx4_counter_free(ibdev->dev, counter_index);
+                       goto err_counter;
+               }
+               new_counter_index->index = counter_index;
+               new_counter_index->allocated = allocated;
+               list_add_tail(&new_counter_index->list,
+                             &ibdev->counters_table[i].counters_list);
+               ibdev->counters_table[i].default_counter = counter_index;
                pr_info("counter index %d for port %d allocated %d\n",
                        counter_index, i + 1, allocated);
        }
        if (mlx4_is_bonded(dev))
                for (i = 1; i < ibdev->num_ports ; ++i) {
-                       ibdev->counters[i].index = ibdev->counters[0].index;
-                       ibdev->counters[i].allocated = 0;
+                       new_counter_index =
+                                       kmalloc(sizeof(struct counter_index),
+                                               GFP_KERNEL);
+                       if (!new_counter_index)
+                               goto err_counter;
+                       new_counter_index->index = counter_index;
+                       new_counter_index->allocated = 0;
+                       list_add_tail(&new_counter_index->list,
+                                     &ibdev->counters_table[i].counters_list);
+                       ibdev->counters_table[i].default_counter =
+                                                               counter_index;
                }
 
        mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
@@ -2437,12 +2480,9 @@ err_steer_qp_release:
                mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
                                      ibdev->steer_qpn_count);
 err_counter:
-       for (i = 0; i < ibdev->num_ports; ++i) {
-               if (ibdev->counters[i].index != -1 &&
-                   ibdev->counters[i].allocated)
-                       mlx4_counter_free(ibdev->dev,
-                                         ibdev->counters[i].index);
-       }
+       for (i = 0; i < ibdev->num_ports; ++i)
+               mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[i]);
+
 err_map:
        iounmap(ibdev->uar_map);
 
@@ -2546,9 +2586,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 
        iounmap(ibdev->uar_map);
        for (p = 0; p < ibdev->num_ports; ++p)
-               if (ibdev->counters[p].index != -1 &&
-                   ibdev->counters[p].allocated)
-                       mlx4_counter_free(ibdev->dev, ibdev->counters[p].index);
+               mlx4_ib_delete_counters_table(ibdev, &ibdev->counters_table[p]);
+
        mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
                mlx4_CLOSE_PORT(dev, p);
 
index 2d5bccd71fc66d121382326655a4180db1692e0b..99451d887266d2bffee04e2a71347f69b4883305 100644 (file)
@@ -222,7 +222,7 @@ static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
        spin_unlock_irqrestore(&dev->sm_lock, flags);
        return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev),
                                    ctx->port, IB_QPT_GSI, 0, 1, IB_QP1_QKEY,
-                                   &ah_attr, NULL, mad);
+                                   &ah_attr, NULL, 0xffff, mad);
 }
 
 static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
index 1e7b23bb2eb0bbb4b95c737585ace30371003494..1caa11edac03347a246467436daa72dd18d5c505 100644 (file)
@@ -129,10 +129,17 @@ struct mlx4_ib_cq {
        struct list_head                recv_qp_list;
 };
 
+#define MLX4_MR_PAGES_ALIGN 0x40
+
 struct mlx4_ib_mr {
        struct ib_mr            ibmr;
+       __be64                  *pages;
+       dma_addr_t              page_map;
+       u32                     npages;
+       u32                     max_pages;
        struct mlx4_mr          mmr;
        struct ib_umem         *umem;
+       void                    *pages_alloc;
 };
 
 struct mlx4_ib_mw {
@@ -140,12 +147,6 @@ struct mlx4_ib_mw {
        struct mlx4_mw          mmw;
 };
 
-struct mlx4_ib_fast_reg_page_list {
-       struct ib_fast_reg_page_list    ibfrpl;
-       __be64                         *mapped_page_list;
-       dma_addr_t                      map;
-};
-
 struct mlx4_ib_fmr {
        struct ib_fmr           ibfmr;
        struct mlx4_fmr         mfmr;
@@ -320,6 +321,7 @@ struct mlx4_ib_qp {
        struct list_head        qps_list;
        struct list_head        cq_recv_list;
        struct list_head        cq_send_list;
+       struct counter_index    *counter_index;
 };
 
 struct mlx4_ib_srq {
@@ -528,10 +530,17 @@ struct mlx4_ib_iov_port {
 };
 
 struct counter_index {
+       struct  list_head       list;
        u32             index;
        u8              allocated;
 };
 
+struct mlx4_ib_counters {
+       struct list_head        counters_list;
+       struct mutex            mutex; /* mutex for accessing counters list */
+       u32                     default_counter;
+};
+
 struct mlx4_ib_dev {
        struct ib_device        ib_dev;
        struct mlx4_dev        *dev;
@@ -550,7 +559,7 @@ struct mlx4_ib_dev {
        struct mutex            cap_mask_mutex;
        bool                    ib_active;
        struct mlx4_ib_iboe     iboe;
-       struct counter_index    counters[MLX4_MAX_PORTS];
+       struct mlx4_ib_counters counters_table[MLX4_MAX_PORTS];
        int                    *eq_table;
        struct kobject         *iov_parent;
        struct kobject         *ports_parent;
@@ -638,11 +647,6 @@ static inline struct mlx4_ib_mw *to_mmw(struct ib_mw *ibmw)
        return container_of(ibmw, struct mlx4_ib_mw, ibmw);
 }
 
-static inline struct mlx4_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
-{
-       return container_of(ibfrpl, struct mlx4_ib_fast_reg_page_list, ibfrpl);
-}
-
 static inline struct mlx4_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
 {
        return container_of(ibfmr, struct mlx4_ib_fmr, ibfmr);
@@ -706,10 +710,9 @@ int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
                               u32 max_num_sg);
-struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
-                                                              int page_list_len);
-void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+                     struct scatterlist *sg,
+                     int sg_nents);
 int mlx4_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period);
 int mlx4_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata);
 struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev,
@@ -813,7 +816,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
                         enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
                         u32 qkey, struct ib_ah_attr *attr, u8 *s_mac,
-                        struct ib_mad *mad);
+                        u16 vlan_id, struct ib_mad *mad);
 
 __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
 
index 2542fd3c1a493e037d6b1e24a4d5b15176ca03f9..4d1e1c632603a7b81e6685cd5ad6020f98a02a47 100644 (file)
@@ -59,7 +59,7 @@ struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
        struct mlx4_ib_mr *mr;
        int err;
 
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
@@ -140,7 +140,7 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
        int err;
        int n;
 
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
@@ -271,11 +271,59 @@ release_mpt_entry:
        return err;
 }
 
+static int
+mlx4_alloc_priv_pages(struct ib_device *device,
+                     struct mlx4_ib_mr *mr,
+                     int max_pages)
+{
+       int size = max_pages * sizeof(u64);
+       int add_size;
+       int ret;
+
+       add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+       mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
+       if (!mr->pages_alloc)
+               return -ENOMEM;
+
+       mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
+
+       mr->page_map = dma_map_single(device->dma_device, mr->pages,
+                                     size, DMA_TO_DEVICE);
+
+       if (dma_mapping_error(device->dma_device, mr->page_map)) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       return 0;
+err:
+       kfree(mr->pages_alloc);
+
+       return ret;
+}
+
+static void
+mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
+{
+       if (mr->pages) {
+               struct ib_device *device = mr->ibmr.device;
+               int size = mr->max_pages * sizeof(u64);
+
+               dma_unmap_single(device->dma_device, mr->page_map,
+                                size, DMA_TO_DEVICE);
+               kfree(mr->pages_alloc);
+               mr->pages = NULL;
+       }
+}
+
 int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 {
        struct mlx4_ib_mr *mr = to_mmr(ibmr);
        int ret;
 
+       mlx4_free_priv_pages(mr);
+
        ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
        if (ret)
                return ret;
@@ -321,21 +369,21 @@ err_free:
 int mlx4_ib_bind_mw(struct ib_qp *qp, struct ib_mw *mw,
                    struct ib_mw_bind *mw_bind)
 {
-       struct ib_send_wr  wr;
+       struct ib_bind_mw_wr  wr;
        struct ib_send_wr *bad_wr;
        int ret;
 
        memset(&wr, 0, sizeof(wr));
-       wr.opcode               = IB_WR_BIND_MW;
-       wr.wr_id                = mw_bind->wr_id;
-       wr.send_flags           = mw_bind->send_flags;
-       wr.wr.bind_mw.mw        = mw;
-       wr.wr.bind_mw.bind_info = mw_bind->bind_info;
-       wr.wr.bind_mw.rkey      = ib_inc_rkey(mw->rkey);
-
-       ret = mlx4_ib_post_send(qp, &wr, &bad_wr);
+       wr.wr.opcode            = IB_WR_BIND_MW;
+       wr.wr.wr_id             = mw_bind->wr_id;
+       wr.wr.send_flags        = mw_bind->send_flags;
+       wr.mw                   = mw;
+       wr.bind_info            = mw_bind->bind_info;
+       wr.rkey                 = ib_inc_rkey(mw->rkey);
+
+       ret = mlx4_ib_post_send(qp, &wr.wr, &bad_wr);
        if (!ret)
-               mw->rkey = wr.wr.bind_mw.rkey;
+               mw->rkey = wr.rkey;
 
        return ret;
 }
@@ -362,7 +410,7 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
            max_num_sg > MLX4_MAX_FAST_REG_PAGES)
                return ERR_PTR(-EINVAL);
 
-       mr = kmalloc(sizeof *mr, GFP_KERNEL);
+       mr = kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
@@ -371,71 +419,30 @@ struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
        if (err)
                goto err_free;
 
+       err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
+       if (err)
+               goto err_free_mr;
+
+       mr->max_pages = max_num_sg;
+
        err = mlx4_mr_enable(dev->dev, &mr->mmr);
        if (err)
-               goto err_mr;
+               goto err_free_pl;
 
        mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
        mr->umem = NULL;
 
        return &mr->ibmr;
 
-err_mr:
+err_free_pl:
+       mlx4_free_priv_pages(mr);
+err_free_mr:
        (void) mlx4_mr_free(dev->dev, &mr->mmr);
-
 err_free:
        kfree(mr);
        return ERR_PTR(err);
 }
 
-struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
-                                                              int page_list_len)
-{
-       struct mlx4_ib_dev *dev = to_mdev(ibdev);
-       struct mlx4_ib_fast_reg_page_list *mfrpl;
-       int size = page_list_len * sizeof (u64);
-
-       if (page_list_len > MLX4_MAX_FAST_REG_PAGES)
-               return ERR_PTR(-EINVAL);
-
-       mfrpl = kmalloc(sizeof *mfrpl, GFP_KERNEL);
-       if (!mfrpl)
-               return ERR_PTR(-ENOMEM);
-
-       mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
-       if (!mfrpl->ibfrpl.page_list)
-               goto err_free;
-
-       mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
-                                                    pdev->dev,
-                                                    size, &mfrpl->map,
-                                                    GFP_KERNEL);
-       if (!mfrpl->mapped_page_list)
-               goto err_free;
-
-       WARN_ON(mfrpl->map & 0x3f);
-
-       return &mfrpl->ibfrpl;
-
-err_free:
-       kfree(mfrpl->ibfrpl.page_list);
-       kfree(mfrpl);
-       return ERR_PTR(-ENOMEM);
-}
-
-void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
-       struct mlx4_ib_dev *dev = to_mdev(page_list->device);
-       struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
-       int size = page_list->max_page_list_len * sizeof (u64);
-
-       dma_free_coherent(&dev->dev->persist->pdev->dev, size,
-                         mfrpl->mapped_page_list,
-                         mfrpl->map);
-       kfree(mfrpl->ibfrpl.page_list);
-       kfree(mfrpl);
-}
-
 struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
                                 struct ib_fmr_attr *fmr_attr)
 {
@@ -528,3 +535,37 @@ int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
 
        return err;
 }
+
+static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
+{
+       struct mlx4_ib_mr *mr = to_mmr(ibmr);
+
+       if (unlikely(mr->npages == mr->max_pages))
+               return -ENOMEM;
+
+       mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
+
+       return 0;
+}
+
+int mlx4_ib_map_mr_sg(struct ib_mr *ibmr,
+                     struct scatterlist *sg,
+                     int sg_nents)
+{
+       struct mlx4_ib_mr *mr = to_mmr(ibmr);
+       int rc;
+
+       mr->npages = 0;
+
+       ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
+                                  sizeof(u64) * mr->max_pages,
+                                  DMA_TO_DEVICE);
+
+       rc = ib_sg_to_pages(ibmr, sg, sg_nents, mlx4_set_page);
+
+       ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
+                                     sizeof(u64) * mr->max_pages,
+                                     DMA_TO_DEVICE);
+
+       return rc;
+}
index 4ad9be3ad61c0a780be7c0ce9aa098989226c77e..a2e4ca56da44e5d9cec5ee4b6a02cf6ab9d7356c 100644 (file)
@@ -111,7 +111,7 @@ static const __be32 mlx4_ib_opcode[] = {
        [IB_WR_ATOMIC_FETCH_AND_ADD]            = cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
        [IB_WR_SEND_WITH_INV]                   = cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
        [IB_WR_LOCAL_INV]                       = cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
-       [IB_WR_FAST_REG_MR]                     = cpu_to_be32(MLX4_OPCODE_FMR),
+       [IB_WR_REG_MR]                          = cpu_to_be32(MLX4_OPCODE_FMR),
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
        [IB_WR_BIND_MW]                         = cpu_to_be32(MLX4_OPCODE_BIND_MW),
@@ -617,6 +617,18 @@ static int qp0_enabled_vf(struct mlx4_dev *dev, int qpn)
        return 0;
 }
 
+static void mlx4_ib_free_qp_counter(struct mlx4_ib_dev *dev,
+                                   struct mlx4_ib_qp *qp)
+{
+       mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+       mlx4_counter_free(dev->dev, qp->counter_index->index);
+       list_del(&qp->counter_index->list);
+       mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+       kfree(qp->counter_index);
+       qp->counter_index = NULL;
+}
+
 static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                            struct ib_qp_init_attr *init_attr,
                            struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp,
@@ -746,9 +758,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
        } else {
                qp->sq_no_prefetch = 0;
 
-               if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
-                       qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
-
                if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
                        qp->flags |= MLX4_IB_QP_LSO;
 
@@ -822,6 +831,9 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
                        goto err_proxy;
        }
 
+       if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)
+               qp->flags |= MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
+
        err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp, gfp);
        if (err)
                goto err_qpn;
@@ -1086,6 +1098,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 {
        struct mlx4_ib_qp *qp = NULL;
        int err;
+       int sup_u_create_flags = MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK;
        u16 xrcdn = 0;
        gfp_t gfp;
 
@@ -1109,8 +1122,10 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
        }
 
        if (init_attr->create_flags &&
-           (udata ||
-            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP | MLX4_IB_QP_CREATE_USE_GFP_NOIO)) &&
+           ((udata && init_attr->create_flags & ~(sup_u_create_flags)) ||
+            ((init_attr->create_flags & ~(MLX4_IB_SRIOV_SQP |
+                                          MLX4_IB_QP_CREATE_USE_GFP_NOIO |
+                                          MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK)) &&
              init_attr->qp_type != IB_QPT_UD) ||
             ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
              init_attr->qp_type > IB_QPT_GSI)))
@@ -1189,6 +1204,9 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
                mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]);
        }
 
+       if (mqp->counter_index)
+               mlx4_ib_free_qp_counter(dev, mqp);
+
        pd = get_pd(mqp);
        destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
 
@@ -1391,11 +1409,12 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
                         enum ib_qp_attr_mask qp_attr_mask,
                         struct mlx4_ib_qp *mqp,
-                        struct mlx4_qp_path *path, u8 port)
+                        struct mlx4_qp_path *path, u8 port,
+                        u16 vlan_id, u8 *smac)
 {
        return _mlx4_set_path(dev, &qp->ah_attr,
-                             mlx4_mac_to_u64((u8 *)qp->smac),
-                             (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+                             mlx4_mac_to_u64(smac),
+                             vlan_id,
                              path, &mqp->pri, port);
 }
 
@@ -1406,9 +1425,8 @@ static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
                             struct mlx4_qp_path *path, u8 port)
 {
        return _mlx4_set_path(dev, &qp->alt_ah_attr,
-                             mlx4_mac_to_u64((u8 *)qp->alt_smac),
-                             (qp_attr_mask & IB_QP_ALT_VID) ?
-                             qp->alt_vlan_id : 0xffff,
+                             0,
+                             0xffff,
                              path, &mqp->alt, port);
 }
 
@@ -1424,7 +1442,8 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
        }
 }
 
-static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
+static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev,
+                                   struct mlx4_ib_qp *qp,
                                    struct mlx4_qp_context *context)
 {
        u64 u64_mac;
@@ -1447,6 +1466,40 @@ static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *
        return 0;
 }
 
+static int create_qp_lb_counter(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
+{
+       struct counter_index *new_counter_index;
+       int err;
+       u32 tmp_idx;
+
+       if (rdma_port_get_link_layer(&dev->ib_dev, qp->port) !=
+           IB_LINK_LAYER_ETHERNET ||
+           !(qp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK) ||
+           !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK))
+               return 0;
+
+       err = mlx4_counter_alloc(dev->dev, &tmp_idx);
+       if (err)
+               return err;
+
+       new_counter_index = kmalloc(sizeof(*new_counter_index), GFP_KERNEL);
+       if (!new_counter_index) {
+               mlx4_counter_free(dev->dev, tmp_idx);
+               return -ENOMEM;
+       }
+
+       new_counter_index->index = tmp_idx;
+       new_counter_index->allocated = 1;
+       qp->counter_index = new_counter_index;
+
+       mutex_lock(&dev->counters_table[qp->port - 1].mutex);
+       list_add_tail(&new_counter_index->list,
+                     &dev->counters_table[qp->port - 1].counters_list);
+       mutex_unlock(&dev->counters_table[qp->port - 1].mutex);
+
+       return 0;
+}
+
 static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                               const struct ib_qp_attr *attr, int attr_mask,
                               enum ib_qp_state cur_state, enum ib_qp_state new_state)
@@ -1460,6 +1513,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        int sqd_event;
        int steer_qp = 0;
        int err = -EINVAL;
+       int counter_index;
 
        /* APM is not supported under RoCE */
        if (attr_mask & IB_QP_ALT_PATH &&
@@ -1519,6 +1573,9 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
        context->sq_size_stride |= qp->sq.wqe_shift - 4;
 
+       if (new_state == IB_QPS_RESET && qp->counter_index)
+               mlx4_ib_free_qp_counter(dev, qp);
+
        if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
                context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
                context->xrcd = cpu_to_be32((u32) qp->xrcdn);
@@ -1543,10 +1600,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        }
 
        if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) {
-               if (dev->counters[qp->port - 1].index != -1) {
-                       context->pri_path.counter_index =
-                                       dev->counters[qp->port - 1].index;
+               err = create_qp_lb_counter(dev, qp);
+               if (err)
+                       goto out;
+
+               counter_index =
+                       dev->counters_table[qp->port - 1].default_counter;
+               if (qp->counter_index)
+                       counter_index = qp->counter_index->index;
+
+               if (counter_index != -1) {
+                       context->pri_path.counter_index = counter_index;
                        optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
+                       if (qp->counter_index) {
+                               context->pri_path.fl |=
+                                       MLX4_FL_ETH_SRC_CHECK_MC_LB;
+                               context->pri_path.vlan_control |=
+                                       MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+                       }
                } else
                        context->pri_path.counter_index =
                                MLX4_SINK_COUNTER_INDEX(dev->dev);
@@ -1565,9 +1636,33 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
        }
 
        if (attr_mask & IB_QP_AV) {
+               u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 :
+                       attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+               union ib_gid gid;
+               struct ib_gid_attr gid_attr;
+               u16 vlan = 0xffff;
+               u8 smac[ETH_ALEN];
+               int status = 0;
+
+               if (rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
+                   attr->ah_attr.ah_flags & IB_AH_GRH) {
+                       int index = attr->ah_attr.grh.sgid_index;
+
+                       status = ib_get_cached_gid(ibqp->device, port_num,
+                                                  index, &gid, &gid_attr);
+                       if (!status && !memcmp(&gid, &zgid, sizeof(gid)))
+                               status = -ENOENT;
+                       if (!status && gid_attr.ndev) {
+                               vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+                               memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
+                               dev_put(gid_attr.ndev);
+                       }
+               }
+               if (status)
+                       goto out;
+
                if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
-                                 attr_mask & IB_QP_PORT ?
-                                 attr->port_num : qp->port))
+                                 port_num, vlan, smac))
                        goto out;
 
                optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1704,7 +1799,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                        if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_UD ||
                            qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI ||
                            qp->mlx4_ib_qp_type == MLX4_IB_QPT_TUN_GSI) {
-                               err = handle_eth_ud_smac_index(dev, qp, (u8 *)attr->smac, context);
+                               err = handle_eth_ud_smac_index(dev, qp, context);
                                if (err) {
                                        err = -EINVAL;
                                        goto out;
@@ -1848,6 +1943,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
                }
        }
 out:
+       if (err && qp->counter_index)
+               mlx4_ib_free_qp_counter(dev, qp);
        if (err && steer_qp)
                mlx4_ib_steer_qp_reg(dev, qp, 0);
        kfree(context);
@@ -2036,14 +2133,14 @@ static int vf_get_qp0_qkey(struct mlx4_dev *dev, int qpn, u32 *qkey)
 }
 
 static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
-                                 struct ib_send_wr *wr,
+                                 struct ib_ud_wr *wr,
                                  void *wqe, unsigned *mlx_seg_len)
 {
        struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
        struct ib_device *ib_dev = &mdev->ib_dev;
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
-       struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+       struct mlx4_ib_ah *ah = to_mah(wr->ah);
        u16 pkey;
        u32 qkey;
        int send_size;
@@ -2051,13 +2148,13 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        int spc;
        int i;
 
-       if (wr->opcode != IB_WR_SEND)
+       if (wr->wr.opcode != IB_WR_SEND)
                return -EINVAL;
 
        send_size = 0;
 
-       for (i = 0; i < wr->num_sge; ++i)
-               send_size += wr->sg_list[i].length;
+       for (i = 0; i < wr->wr.num_sge; ++i)
+               send_size += wr->wr.sg_list[i].length;
 
        /* for proxy-qp0 sends, need to add in size of tunnel header */
        /* for tunnel-qp0 sends, tunnel header is already in s/g list */
@@ -2082,11 +2179,11 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
        mlx->rlid = sqp->ud_header.lrh.destination_lid;
 
        sqp->ud_header.lrh.virtual_lane    = 0;
-       sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+       sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
        ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
        if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
-               sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+               sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        else
                sqp->ud_header.bth.destination_qpn =
                        cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
@@ -2158,14 +2255,14 @@ static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
        }
 }
 
-static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
+static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
        struct ib_device *ib_dev = sqp->qp.ibqp.device;
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_ctrl_seg *ctrl = wqe;
        struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
-       struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+       struct mlx4_ib_ah *ah = to_mah(wr->ah);
        union ib_gid sgid;
        u16 pkey;
        int send_size;
@@ -2179,8 +2276,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
        bool is_grh;
 
        send_size = 0;
-       for (i = 0; i < wr->num_sge; ++i)
-               send_size += wr->sg_list[i].length;
+       for (i = 0; i < wr->wr.num_sge; ++i)
+               send_size += wr->wr.sg_list[i].length;
 
        is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
@@ -2197,7 +2294,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                } else  {
                        err = ib_get_cached_gid(ib_dev,
                                                be32_to_cpu(ah->av.ib.port_pd) >> 24,
-                                               ah->av.ib.gid_index, &sgid);
+                                               ah->av.ib.gid_index, &sgid,
+                                               NULL);
+                       if (!err && !memcmp(&sgid, &zgid, sizeof(sgid)))
+                               err = -ENOENT;
                        if (err)
                                return err;
                }
@@ -2239,7 +2339,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                        ib_get_cached_gid(ib_dev,
                                          be32_to_cpu(ah->av.ib.port_pd) >> 24,
                                          ah->av.ib.gid_index,
-                                         &sqp->ud_header.grh.source_gid);
+                                         &sqp->ud_header.grh.source_gid, NULL);
                }
                memcpy(sqp->ud_header.grh.destination_gid.raw,
                       ah->av.ib.dgid, 16);
@@ -2257,7 +2357,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                mlx->rlid = sqp->ud_header.lrh.destination_lid;
        }
 
-       switch (wr->opcode) {
+       switch (wr->wr.opcode) {
        case IB_WR_SEND:
                sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY;
                sqp->ud_header.immediate_present = 0;
@@ -2265,7 +2365,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode        = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data    = wr->ex.imm_data;
+               sqp->ud_header.immediate_data    = wr->wr.ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -2308,16 +2408,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
                if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
                        sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
        }
-       sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+       sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
        if (!sqp->qp.ibqp.qp_num)
                ib_get_cached_pkey(ib_dev, sqp->qp.port, sqp->pkey_index, &pkey);
        else
-               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->wr.ud.pkey_index, &pkey);
+               ib_get_cached_pkey(ib_dev, sqp->qp.port, wr->pkey_index, &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
-       sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+       sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-       sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
-                                              sqp->qkey : wr->wr.ud.remote_qkey);
+       sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+                                              sqp->qkey : wr->remote_qkey);
        sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
 
        header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
@@ -2405,43 +2505,39 @@ static __be32 convert_access(int acc)
                cpu_to_be32(MLX4_WQE_FMR_PERM_LOCAL_READ);
 }
 
-static void set_fmr_seg(struct mlx4_wqe_fmr_seg *fseg, struct ib_send_wr *wr)
+static void set_reg_seg(struct mlx4_wqe_fmr_seg *fseg,
+                       struct ib_reg_wr *wr)
 {
-       struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
-       int i;
-
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; ++i)
-               mfrpl->mapped_page_list[i] =
-                       cpu_to_be64(wr->wr.fast_reg.page_list->page_list[i] |
-                                   MLX4_MTT_FLAG_PRESENT);
+       struct mlx4_ib_mr *mr = to_mmr(wr->mr);
 
-       fseg->flags             = convert_access(wr->wr.fast_reg.access_flags);
-       fseg->mem_key           = cpu_to_be32(wr->wr.fast_reg.rkey);
-       fseg->buf_list          = cpu_to_be64(mfrpl->map);
-       fseg->start_addr        = cpu_to_be64(wr->wr.fast_reg.iova_start);
-       fseg->reg_len           = cpu_to_be64(wr->wr.fast_reg.length);
+       fseg->flags             = convert_access(wr->access);
+       fseg->mem_key           = cpu_to_be32(wr->key);
+       fseg->buf_list          = cpu_to_be64(mr->page_map);
+       fseg->start_addr        = cpu_to_be64(mr->ibmr.iova);
+       fseg->reg_len           = cpu_to_be64(mr->ibmr.length);
        fseg->offset            = 0; /* XXX -- is this just for ZBVA? */
-       fseg->page_size         = cpu_to_be32(wr->wr.fast_reg.page_shift);
+       fseg->page_size         = cpu_to_be32(ilog2(mr->ibmr.page_size));
        fseg->reserved[0]       = 0;
        fseg->reserved[1]       = 0;
 }
 
-static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg, struct ib_send_wr *wr)
+static void set_bind_seg(struct mlx4_wqe_bind_seg *bseg,
+               struct ib_bind_mw_wr *wr)
 {
        bseg->flags1 =
-               convert_access(wr->wr.bind_mw.bind_info.mw_access_flags) &
+               convert_access(wr->bind_info.mw_access_flags) &
                cpu_to_be32(MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_READ  |
                            MLX4_WQE_FMR_AND_BIND_PERM_REMOTE_WRITE |
                            MLX4_WQE_FMR_AND_BIND_PERM_ATOMIC);
        bseg->flags2 = 0;
-       if (wr->wr.bind_mw.mw->type == IB_MW_TYPE_2)
+       if (wr->mw->type == IB_MW_TYPE_2)
                bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_TYPE_2);
-       if (wr->wr.bind_mw.bind_info.mw_access_flags & IB_ZERO_BASED)
+       if (wr->bind_info.mw_access_flags & IB_ZERO_BASED)
                bseg->flags2 |= cpu_to_be32(MLX4_WQE_BIND_ZERO_BASED);
-       bseg->new_rkey = cpu_to_be32(wr->wr.bind_mw.rkey);
-       bseg->lkey = cpu_to_be32(wr->wr.bind_mw.bind_info.mr->lkey);
-       bseg->addr = cpu_to_be64(wr->wr.bind_mw.bind_info.addr);
-       bseg->length = cpu_to_be64(wr->wr.bind_mw.bind_info.length);
+       bseg->new_rkey = cpu_to_be32(wr->rkey);
+       bseg->lkey = cpu_to_be32(wr->bind_info.mr->lkey);
+       bseg->addr = cpu_to_be64(wr->bind_info.addr);
+       bseg->length = cpu_to_be64(wr->bind_info.length);
 }
 
 static void set_local_inv_seg(struct mlx4_wqe_local_inval_seg *iseg, u32 rkey)
@@ -2458,46 +2554,47 @@ static __always_inline void set_raddr_seg(struct mlx4_wqe_raddr_seg *rseg,
        rseg->reserved = 0;
 }
 
-static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *wr)
+static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg,
+               struct ib_atomic_wr *wr)
 {
-       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
-       } else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+       if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+               aseg->swap_add = cpu_to_be64(wr->swap);
+               aseg->compare  = cpu_to_be64(wr->compare_add);
+       } else if (wr->wr.opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+               aseg->swap_add = cpu_to_be64(wr->compare_add);
+               aseg->compare  = cpu_to_be64(wr->compare_add_mask);
        } else {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->swap_add = cpu_to_be64(wr->compare_add);
                aseg->compare  = 0;
        }
 
 }
 
 static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
-                                 struct ib_send_wr *wr)
+                                 struct ib_atomic_wr *wr)
 {
-       aseg->swap_add          = cpu_to_be64(wr->wr.atomic.swap);
-       aseg->swap_add_mask     = cpu_to_be64(wr->wr.atomic.swap_mask);
-       aseg->compare           = cpu_to_be64(wr->wr.atomic.compare_add);
-       aseg->compare_mask      = cpu_to_be64(wr->wr.atomic.compare_add_mask);
+       aseg->swap_add          = cpu_to_be64(wr->swap);
+       aseg->swap_add_mask     = cpu_to_be64(wr->swap_mask);
+       aseg->compare           = cpu_to_be64(wr->compare_add);
+       aseg->compare_mask      = cpu_to_be64(wr->compare_add_mask);
 }
 
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
-                            struct ib_send_wr *wr)
+                            struct ib_ud_wr *wr)
 {
-       memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
-       dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
-       dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
-       dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
-       memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
+       memcpy(dseg->av, &to_mah(wr->ah)->av, sizeof (struct mlx4_av));
+       dseg->dqpn = cpu_to_be32(wr->remote_qpn);
+       dseg->qkey = cpu_to_be32(wr->remote_qkey);
+       dseg->vlan = to_mah(wr->ah)->av.eth.vlan;
+       memcpy(dseg->mac, to_mah(wr->ah)->av.eth.mac, 6);
 }
 
 static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
                                    struct mlx4_wqe_datagram_seg *dseg,
-                                   struct ib_send_wr *wr,
+                                   struct ib_ud_wr *wr,
                                    enum mlx4_ib_qp_type qpt)
 {
-       union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
+       union mlx4_ext_av *av = &to_mah(wr->ah)->av;
        struct mlx4_av sqp_av = {0};
        int port = *((u8 *) &av->ib.port_pd) & 0x3;
 
@@ -2516,18 +2613,18 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
        dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
 }
 
-static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
+static void build_tunnel_header(struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len)
 {
        struct mlx4_wqe_inline_seg *inl = wqe;
        struct mlx4_ib_tunnel_header hdr;
-       struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
+       struct mlx4_ib_ah *ah = to_mah(wr->ah);
        int spc;
        int i;
 
        memcpy(&hdr.av, &ah->av, sizeof hdr.av);
-       hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
-       hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
-       hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+       hdr.remote_qpn = cpu_to_be32(wr->remote_qpn);
+       hdr.pkey_index = cpu_to_be16(wr->pkey_index);
+       hdr.qkey = cpu_to_be32(wr->remote_qkey);
        memcpy(hdr.mac, ah->av.eth.mac, 6);
        hdr.vlan = ah->av.eth.vlan;
 
@@ -2599,22 +2696,22 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
        dseg->addr       = cpu_to_be64(sg->addr);
 }
 
-static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
+static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_ud_wr *wr,
                         struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
                         __be32 *lso_hdr_sz, __be32 *blh)
 {
-       unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);
+       unsigned halign = ALIGN(sizeof *wqe + wr->hlen, 16);
 
        if (unlikely(halign > MLX4_IB_CACHE_LINE_SIZE))
                *blh = cpu_to_be32(1 << 6);
 
        if (unlikely(!(qp->flags & MLX4_IB_QP_LSO) &&
-                    wr->num_sge > qp->sq.max_gs - (halign >> 4)))
+                    wr->wr.num_sge > qp->sq.max_gs - (halign >> 4)))
                return -EINVAL;
 
-       memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
+       memcpy(wqe->header, wr->header, wr->hlen);
 
-       *lso_hdr_sz  = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
+       *lso_hdr_sz  = cpu_to_be32(wr->mss << 16 | wr->hlen);
        *lso_seg_len = halign;
        return 0;
 }
@@ -2713,11 +2810,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
                        case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
-                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
+                               set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+                                             atomic_wr(wr)->rkey);
                                wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 
-                               set_atomic_seg(wqe, wr);
+                               set_atomic_seg(wqe, atomic_wr(wr));
                                wqe  += sizeof (struct mlx4_wqe_atomic_seg);
 
                                size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2726,11 +2823,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                break;
 
                        case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
-                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
+                               set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+                                             atomic_wr(wr)->rkey);
                                wqe  += sizeof (struct mlx4_wqe_raddr_seg);
 
-                               set_masked_atomic_seg(wqe, wr);
+                               set_masked_atomic_seg(wqe, atomic_wr(wr));
                                wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
 
                                size += (sizeof (struct mlx4_wqe_raddr_seg) +
@@ -2741,8 +2838,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_READ:
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                wqe  += sizeof (struct mlx4_wqe_raddr_seg);
                                size += sizeof (struct mlx4_wqe_raddr_seg) / 16;
                                break;
@@ -2755,18 +2852,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                size += sizeof (struct mlx4_wqe_local_inval_seg) / 16;
                                break;
 
-                       case IB_WR_FAST_REG_MR:
+                       case IB_WR_REG_MR:
                                ctrl->srcrb_flags |=
                                        cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_fmr_seg(wqe, wr);
-                               wqe  += sizeof (struct mlx4_wqe_fmr_seg);
-                               size += sizeof (struct mlx4_wqe_fmr_seg) / 16;
+                               set_reg_seg(wqe, reg_wr(wr));
+                               wqe  += sizeof(struct mlx4_wqe_fmr_seg);
+                               size += sizeof(struct mlx4_wqe_fmr_seg) / 16;
                                break;
 
                        case IB_WR_BIND_MW:
                                ctrl->srcrb_flags |=
                                        cpu_to_be32(MLX4_WQE_CTRL_STRONG_ORDER);
-                               set_bind_seg(wqe, wr);
+                               set_bind_seg(wqe, bind_mw_wr(wr));
                                wqe  += sizeof(struct mlx4_wqe_bind_seg);
                                size += sizeof(struct mlx4_wqe_bind_seg) / 16;
                                break;
@@ -2777,7 +2874,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_TUN_SMI_OWNER:
-                       err =  build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+                       err =  build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+                                       ctrl, &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -2788,19 +2886,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                case MLX4_IB_QPT_TUN_SMI:
                case MLX4_IB_QPT_TUN_GSI:
                        /* this is a UD qp used in MAD responses to slaves. */
-                       set_datagram_seg(wqe, wr);
+                       set_datagram_seg(wqe, ud_wr(wr));
                        /* set the forced-loopback bit in the data seg av */
                        *(__be32 *) wqe |= cpu_to_be32(0x80000000);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
                        break;
                case MLX4_IB_QPT_UD:
-                       set_datagram_seg(wqe, wr);
+                       set_datagram_seg(wqe, ud_wr(wr));
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
 
                        if (wr->opcode == IB_WR_LSO) {
-                               err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz, &blh);
+                               err = build_lso_seg(wqe, ud_wr(wr), qp, &seglen,
+                                               &lso_hdr_sz, &blh);
                                if (unlikely(err)) {
                                        *bad_wr = wr;
                                        goto out;
@@ -2812,7 +2911,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case MLX4_IB_QPT_PROXY_SMI_OWNER:
-                       err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
+                       err = build_sriov_qp0_header(to_msqp(qp), ud_wr(wr),
+                                       ctrl, &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
@@ -2823,7 +2923,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        add_zero_len_inline(wqe);
                        wqe += 16;
                        size++;
-                       build_tunnel_header(wr, wqe, &seglen);
+                       build_tunnel_header(ud_wr(wr), wqe, &seglen);
                        wqe  += seglen;
                        size += seglen / 16;
                        break;
@@ -2833,18 +2933,20 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                         * In this case we first add a UD segment targeting
                         * the tunnel qp, and then add a header with address
                         * information */
-                       set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr,
+                       set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe,
+                                               ud_wr(wr),
                                                qp->mlx4_ib_qp_type);
                        wqe  += sizeof (struct mlx4_wqe_datagram_seg);
                        size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
-                       build_tunnel_header(wr, wqe, &seglen);
+                       build_tunnel_header(ud_wr(wr), wqe, &seglen);
                        wqe  += seglen;
                        size += seglen / 16;
                        break;
 
                case MLX4_IB_QPT_SMI:
                case MLX4_IB_QPT_GSI:
-                       err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
+                       err = build_mlx_header(to_msqp(qp), ud_wr(wr), ctrl,
+                                       &seglen);
                        if (unlikely(err)) {
                                *bad_wr = wr;
                                goto out;
index 2d0dbbf38ceb9f6277bc9c86e726b97a6e33f17d..3dfd287256d628892823173a4794620b4e22849e 100644 (file)
@@ -109,8 +109,8 @@ static enum ib_wc_opcode get_umr_comp(struct mlx5_ib_wq *wq, int idx)
        case IB_WR_LOCAL_INV:
                return IB_WC_LOCAL_INV;
 
-       case IB_WR_FAST_REG_MR:
-               return IB_WC_FAST_REG_MR;
+       case IB_WR_REG_MR:
+               return IB_WC_REG_MR;
 
        default:
                pr_warn("unknown completion status\n");
index 68508d528ba0ecd9ff18768df885aa1b4741ab04..7e97cb55a6bfa9517a726b9be897d17563272a5d 100644 (file)
@@ -1425,8 +1425,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        dev->ib_dev.detach_mcast        = mlx5_ib_mcg_detach;
        dev->ib_dev.process_mad         = mlx5_ib_process_mad;
        dev->ib_dev.alloc_mr            = mlx5_ib_alloc_mr;
-       dev->ib_dev.alloc_fast_reg_page_list = mlx5_ib_alloc_fast_reg_page_list;
-       dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
+       dev->ib_dev.map_mr_sg           = mlx5_ib_map_mr_sg;
        dev->ib_dev.check_mr_status     = mlx5_ib_check_mr_status;
        dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 
index 22123b79d550d6a7e0474501592f36dc6f0b632e..633347260b79454ba10da9de498bc9d68d5a6f9f 100644 (file)
@@ -245,6 +245,7 @@ enum mlx5_ib_qp_flags {
 };
 
 struct mlx5_umr_wr {
+       struct ib_send_wr               wr;
        union {
                u64                     virt_addr;
                u64                     offset;
@@ -257,6 +258,11 @@ struct mlx5_umr_wr {
        u32                             mkey;
 };
 
+static inline struct mlx5_umr_wr *umr_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct mlx5_umr_wr, wr);
+}
+
 struct mlx5_shared_mr_info {
        int mr_id;
        struct ib_umem          *umem;
@@ -313,6 +319,11 @@ enum mlx5_ib_mtt_access_flags {
 
 struct mlx5_ib_mr {
        struct ib_mr            ibmr;
+       void                    *descs;
+       dma_addr_t              desc_map;
+       int                     ndescs;
+       int                     max_descs;
+       int                     desc_size;
        struct mlx5_core_mr     mmr;
        struct ib_umem         *umem;
        struct mlx5_shared_mr_info      *smr_info;
@@ -324,12 +335,7 @@ struct mlx5_ib_mr {
        struct mlx5_create_mkey_mbox_out out;
        struct mlx5_core_sig_ctx    *sig;
        int                     live;
-};
-
-struct mlx5_ib_fast_reg_page_list {
-       struct ib_fast_reg_page_list    ibfrpl;
-       __be64                         *mapped_page_list;
-       dma_addr_t                      map;
+       void                    *descs_alloc;
 };
 
 struct mlx5_ib_umr_context {
@@ -358,20 +364,6 @@ enum {
        MLX5_FMR_BUSY,
 };
 
-struct mlx5_ib_fmr {
-       struct ib_fmr                   ibfmr;
-       struct mlx5_core_mr             mr;
-       int                             access_flags;
-       int                             state;
-       /* protect fmr state
-        */
-       spinlock_t                      lock;
-       u64                             wrid;
-       struct ib_send_wr               wr[2];
-       u8                              page_shift;
-       struct ib_fast_reg_page_list    page_list;
-};
-
 struct mlx5_cache_ent {
        struct list_head        head;
        /* sync access to the cahce entry
@@ -456,11 +448,6 @@ static inline struct mlx5_ib_dev *to_mdev(struct ib_device *ibdev)
        return container_of(ibdev, struct mlx5_ib_dev, ib_dev);
 }
 
-static inline struct mlx5_ib_fmr *to_mfmr(struct ib_fmr *ibfmr)
-{
-       return container_of(ibfmr, struct mlx5_ib_fmr, ibfmr);
-}
-
 static inline struct mlx5_ib_cq *to_mcq(struct ib_cq *ibcq)
 {
        return container_of(ibcq, struct mlx5_ib_cq, ibcq);
@@ -501,11 +488,6 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
        return container_of(ibmr, struct mlx5_ib_mr, ibmr);
 }
 
-static inline struct mlx5_ib_fast_reg_page_list *to_mfrpl(struct ib_fast_reg_page_list *ibfrpl)
-{
-       return container_of(ibfrpl, struct mlx5_ib_fast_reg_page_list, ibfrpl);
-}
-
 struct mlx5_ib_ah {
        struct ib_ah            ibah;
        struct mlx5_av          av;
@@ -573,15 +555,9 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
                               enum ib_mr_type mr_type,
                               u32 max_num_sg);
-struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
-                                                              int page_list_len);
-void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-struct ib_fmr *mlx5_ib_fmr_alloc(struct ib_pd *pd, int acc,
-                                struct ib_fmr_attr *fmr_attr);
-int mlx5_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-                     int npages, u64 iova);
-int mlx5_ib_unmap_fmr(struct list_head *fmr_list);
-int mlx5_ib_fmr_dealloc(struct ib_fmr *ibfmr);
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+                     struct scatterlist *sg,
+                     int sg_nents);
 int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
                        const struct ib_wc *in_wc, const struct ib_grh *in_grh,
                        const struct ib_mad_hdr *in, size_t in_mad_size,
index 54a15b5d336d00043643c09a99f05b89df2861bd..ec8993a7b3beea5a91370c1f66979dae583f49d4 100644 (file)
@@ -687,7 +687,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
                             int access_flags)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
-       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+       struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
        sg->addr = dma;
        sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -715,7 +715,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
                               struct ib_send_wr *wr, u32 key)
 {
-       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+       struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
        wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
        wr->opcode = MLX5_IB_WR_UMR;
@@ -752,7 +752,8 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
        struct device *ddev = dev->ib_dev.dma_device;
        struct umr_common *umrc = &dev->umrc;
        struct mlx5_ib_umr_context umr_context;
-       struct ib_send_wr wr, *bad;
+       struct mlx5_umr_wr umrwr;
+       struct ib_send_wr *bad;
        struct mlx5_ib_mr *mr;
        struct ib_sge sg;
        int size;
@@ -798,14 +799,14 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
                goto free_pas;
        }
 
-       memset(&wr, 0, sizeof(wr));
-       wr.wr_id = (u64)(unsigned long)&umr_context;
-       prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
-                        virt_addr, len, access_flags);
+       memset(&umrwr, 0, sizeof(umrwr));
+       umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
+       prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+                        page_shift, virt_addr, len, access_flags);
 
        mlx5_ib_init_umr_context(&umr_context);
        down(&umrc->sem);
-       err = ib_post_send(umrc->qp, &wr, &bad);
+       err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
        if (err) {
                mlx5_ib_warn(dev, "post send failed, err %d\n", err);
                goto unmap_dma;
@@ -851,8 +852,8 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
        int size;
        __be64 *pas;
        dma_addr_t dma;
-       struct ib_send_wr wr, *bad;
-       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+       struct ib_send_wr *bad;
+       struct mlx5_umr_wr wr;
        struct ib_sge sg;
        int err = 0;
        const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
@@ -917,26 +918,26 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
                dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 
                memset(&wr, 0, sizeof(wr));
-               wr.wr_id = (u64)(unsigned long)&umr_context;
+               wr.wr.wr_id = (u64)(unsigned long)&umr_context;
 
                sg.addr = dma;
                sg.length = ALIGN(npages * sizeof(u64),
                                MLX5_UMR_MTT_ALIGNMENT);
                sg.lkey = dev->umrc.pd->local_dma_lkey;
 
-               wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+               wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
                                MLX5_IB_SEND_UMR_UPDATE_MTT;
-               wr.sg_list = &sg;
-               wr.num_sge = 1;
-               wr.opcode = MLX5_IB_WR_UMR;
-               umrwr->npages = sg.length / sizeof(u64);
-               umrwr->page_shift = PAGE_SHIFT;
-               umrwr->mkey = mr->mmr.key;
-               umrwr->target.offset = start_page_index;
+               wr.wr.sg_list = &sg;
+               wr.wr.num_sge = 1;
+               wr.wr.opcode = MLX5_IB_WR_UMR;
+               wr.npages = sg.length / sizeof(u64);
+               wr.page_shift = PAGE_SHIFT;
+               wr.mkey = mr->mmr.key;
+               wr.target.offset = start_page_index;
 
                mlx5_ib_init_umr_context(&umr_context);
                down(&umrc->sem);
-               err = ib_post_send(umrc->qp, &wr, &bad);
+               err = ib_post_send(umrc->qp, &wr.wr, &bad);
                if (err) {
                        mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
                } else {
@@ -1122,16 +1123,17 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
        struct umr_common *umrc = &dev->umrc;
        struct mlx5_ib_umr_context umr_context;
-       struct ib_send_wr wr, *bad;
+       struct mlx5_umr_wr umrwr;
+       struct ib_send_wr *bad;
        int err;
 
-       memset(&wr, 0, sizeof(wr));
-       wr.wr_id = (u64)(unsigned long)&umr_context;
-       prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
+       memset(&umrwr.wr, 0, sizeof(umrwr));
+       umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
+       prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
 
        mlx5_ib_init_umr_context(&umr_context);
        down(&umrc->sem);
-       err = ib_post_send(umrc->qp, &wr, &bad);
+       err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
        if (err) {
                up(&umrc->sem);
                mlx5_ib_dbg(dev, "err %d\n", err);
@@ -1151,6 +1153,52 @@ error:
        return err;
 }
 
+static int
+mlx5_alloc_priv_descs(struct ib_device *device,
+                     struct mlx5_ib_mr *mr,
+                     int ndescs,
+                     int desc_size)
+{
+       int size = ndescs * desc_size;
+       int add_size;
+       int ret;
+
+       add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+
+       mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
+       if (!mr->descs_alloc)
+               return -ENOMEM;
+
+       mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
+
+       mr->desc_map = dma_map_single(device->dma_device, mr->descs,
+                                     size, DMA_TO_DEVICE);
+       if (dma_mapping_error(device->dma_device, mr->desc_map)) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       return 0;
+err:
+       kfree(mr->descs_alloc);
+
+       return ret;
+}
+
+static void
+mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
+{
+       if (mr->descs) {
+               struct ib_device *device = mr->ibmr.device;
+               int size = mr->max_descs * mr->desc_size;
+
+               dma_unmap_single(device->dma_device, mr->desc_map,
+                                size, DMA_TO_DEVICE);
+               kfree(mr->descs_alloc);
+               mr->descs = NULL;
+       }
+}
+
 static int clean_mr(struct mlx5_ib_mr *mr)
 {
        struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
@@ -1170,6 +1218,8 @@ static int clean_mr(struct mlx5_ib_mr *mr)
                mr->sig = NULL;
        }
 
+       mlx5_free_priv_descs(mr);
+
        if (!umred) {
                err = destroy_mkey(dev, mr);
                if (err) {
@@ -1259,6 +1309,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
        if (mr_type == IB_MR_TYPE_MEM_REG) {
                access_mode = MLX5_ACCESS_MODE_MTT;
                in->seg.log2_page_size = PAGE_SHIFT;
+
+               err = mlx5_alloc_priv_descs(pd->device, mr,
+                                           ndescs, sizeof(u64));
+               if (err)
+                       goto err_free_in;
+
+               mr->desc_size = sizeof(u64);
+               mr->max_descs = ndescs;
        } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
                u32 psv_index[2];
 
@@ -1315,6 +1373,7 @@ err_destroy_psv:
                        mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
                                     mr->sig->psv_wire.psv_idx);
        }
+       mlx5_free_priv_descs(mr);
 err_free_sig:
        kfree(mr->sig);
 err_free_in:
@@ -1324,48 +1383,6 @@ err_free:
        return ERR_PTR(err);
 }
 
-struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
-                                                              int page_list_len)
-{
-       struct mlx5_ib_fast_reg_page_list *mfrpl;
-       int size = page_list_len * sizeof(u64);
-
-       mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
-       if (!mfrpl)
-               return ERR_PTR(-ENOMEM);
-
-       mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
-       if (!mfrpl->ibfrpl.page_list)
-               goto err_free;
-
-       mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
-                                                    size, &mfrpl->map,
-                                                    GFP_KERNEL);
-       if (!mfrpl->mapped_page_list)
-               goto err_free;
-
-       WARN_ON(mfrpl->map & 0x3f);
-
-       return &mfrpl->ibfrpl;
-
-err_free:
-       kfree(mfrpl->ibfrpl.page_list);
-       kfree(mfrpl);
-       return ERR_PTR(-ENOMEM);
-}
-
-void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
-{
-       struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
-       struct mlx5_ib_dev *dev = to_mdev(page_list->device);
-       int size = page_list->max_page_list_len * sizeof(u64);
-
-       dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list,
-                         mfrpl->map);
-       kfree(mfrpl->ibfrpl.page_list);
-       kfree(mfrpl);
-}
-
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
                            struct ib_mr_status *mr_status)
 {
@@ -1406,3 +1423,39 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 done:
        return ret;
 }
+
+static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
+{
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+       __be64 *descs;
+
+       if (unlikely(mr->ndescs == mr->max_descs))
+               return -ENOMEM;
+
+       descs = mr->descs;
+       descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
+
+       return 0;
+}
+
+int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
+                     struct scatterlist *sg,
+                     int sg_nents)
+{
+       struct mlx5_ib_mr *mr = to_mmr(ibmr);
+       int n;
+
+       mr->ndescs = 0;
+
+       ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
+                                  mr->desc_size * mr->max_descs,
+                                  DMA_TO_DEVICE);
+
+       n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+
+       ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
+                                     mr->desc_size * mr->max_descs,
+                                     DMA_TO_DEVICE);
+
+       return n;
+}
index 6f521a3418e8e1c69b9cca74fc8443dd05e30dac..307bdbca8938e1d87493d7048b93126bd9261e26 100644 (file)
@@ -64,7 +64,7 @@ static const u32 mlx5_ib_opcode[] = {
        [IB_WR_ATOMIC_FETCH_AND_ADD]            = MLX5_OPCODE_ATOMIC_FA,
        [IB_WR_SEND_WITH_INV]                   = MLX5_OPCODE_SEND_INVAL,
        [IB_WR_LOCAL_INV]                       = MLX5_OPCODE_UMR,
-       [IB_WR_FAST_REG_MR]                     = MLX5_OPCODE_UMR,
+       [IB_WR_REG_MR]                          = MLX5_OPCODE_UMR,
        [IB_WR_MASKED_ATOMIC_CMP_AND_SWP]       = MLX5_OPCODE_ATOMIC_MASKED_CS,
        [IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]     = MLX5_OPCODE_ATOMIC_MASKED_FA,
        [MLX5_IB_WR_UMR]                        = MLX5_OPCODE_UMR,
@@ -1838,9 +1838,9 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
                             struct ib_send_wr *wr)
 {
-       memcpy(&dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof(struct mlx5_av));
-       dseg->av.dqp_dct = cpu_to_be32(wr->wr.ud.remote_qpn | MLX5_EXTENDED_UD_AV);
-       dseg->av.key.qkey.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+       memcpy(&dseg->av, &to_mah(ud_wr(wr)->ah)->av, sizeof(struct mlx5_av));
+       dseg->av.dqp_dct = cpu_to_be32(ud_wr(wr)->remote_qpn | MLX5_EXTENDED_UD_AV);
+       dseg->av.key.qkey.qkey = cpu_to_be32(ud_wr(wr)->remote_qkey);
 }
 
 static void set_data_ptr_seg(struct mlx5_wqe_data_seg *dseg, struct ib_sge *sg)
@@ -1896,22 +1896,24 @@ static __be64 sig_mkey_mask(void)
        return cpu_to_be64(result);
 }
 
-static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
-                                struct ib_send_wr *wr, int li)
+static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
+                               struct mlx5_ib_mr *mr)
 {
-       memset(umr, 0, sizeof(*umr));
-
-       if (li) {
-               umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
-               umr->flags = 1 << 7;
-               return;
-       }
+       int ndescs = mr->ndescs;
 
-       umr->flags = (1 << 5); /* fail if not free */
-       umr->klm_octowords = get_klm_octo(wr->wr.fast_reg.page_list_len);
+       memset(umr, 0, sizeof(*umr));
+       umr->flags = MLX5_UMR_CHECK_NOT_FREE;
+       umr->klm_octowords = get_klm_octo(ndescs);
        umr->mkey_mask = frwr_mkey_mask();
 }
 
+static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr)
+{
+       memset(umr, 0, sizeof(*umr));
+       umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
+       umr->flags = 1 << 7;
+}
+
 static __be64 get_umr_reg_mr_mask(void)
 {
        u64 result;
@@ -1952,7 +1954,7 @@ static __be64 get_umr_update_mtt_mask(void)
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
                                struct ib_send_wr *wr)
 {
-       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+       struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
        memset(umr, 0, sizeof(*umr));
 
@@ -1987,29 +1989,31 @@ static u8 get_umr_flags(int acc)
                MLX5_PERM_LOCAL_READ | MLX5_PERM_UMR_EN;
 }
 
-static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
-                            int li, int *writ)
+static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
+                            struct mlx5_ib_mr *mr,
+                            u32 key, int access)
 {
-       memset(seg, 0, sizeof(*seg));
-       if (li) {
-               seg->status = MLX5_MKEY_STATUS_FREE;
-               return;
-       }
+       int ndescs = ALIGN(mr->ndescs, 8) >> 1;
 
-       seg->flags = get_umr_flags(wr->wr.fast_reg.access_flags) |
-                    MLX5_ACCESS_MODE_MTT;
-       *writ = seg->flags & (MLX5_PERM_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE);
-       seg->qpn_mkey7_0 = cpu_to_be32((wr->wr.fast_reg.rkey & 0xff) | 0xffffff00);
+       memset(seg, 0, sizeof(*seg));
+       seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
+       seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
-       seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
-       seg->len = cpu_to_be64(wr->wr.fast_reg.length);
-       seg->xlt_oct_size = cpu_to_be32((wr->wr.fast_reg.page_list_len + 1) / 2);
-       seg->log2_page_size = wr->wr.fast_reg.page_shift;
+       seg->start_addr = cpu_to_be64(mr->ibmr.iova);
+       seg->len = cpu_to_be64(mr->ibmr.length);
+       seg->xlt_oct_size = cpu_to_be32(ndescs);
+       seg->log2_page_size = ilog2(mr->ibmr.page_size);
+}
+
+static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
+{
+       memset(seg, 0, sizeof(*seg));
+       seg->status = MLX5_MKEY_STATUS_FREE;
 }
 
 static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
 {
-       struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+       struct mlx5_umr_wr *umrwr = umr_wr(wr);
 
        memset(seg, 0, sizeof(*seg));
        if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
@@ -2028,21 +2032,14 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
                                       mlx5_mkey_variant(umrwr->mkey));
 }
 
-static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
-                          struct ib_send_wr *wr,
-                          struct mlx5_core_dev *mdev,
-                          struct mlx5_ib_pd *pd,
-                          int writ)
+static void set_reg_data_seg(struct mlx5_wqe_data_seg *dseg,
+                            struct mlx5_ib_mr *mr,
+                            struct mlx5_ib_pd *pd)
 {
-       struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(wr->wr.fast_reg.page_list);
-       u64 *page_list = wr->wr.fast_reg.page_list->page_list;
-       u64 perm = MLX5_EN_RD | (writ ? MLX5_EN_WR : 0);
-       int i;
+       int bcount = mr->desc_size * mr->ndescs;
 
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++)
-               mfrpl->mapped_page_list[i] = cpu_to_be64(page_list[i] | perm);
-       dseg->addr = cpu_to_be64(mfrpl->map);
-       dseg->byte_count = cpu_to_be32(ALIGN(sizeof(u64) * wr->wr.fast_reg.page_list_len, 64));
+       dseg->addr = cpu_to_be64(mr->desc_map);
+       dseg->byte_count = cpu_to_be32(ALIGN(bcount, 64));
        dseg->lkey = cpu_to_be32(pd->ibpd.local_dma_lkey);
 }
 
@@ -2224,22 +2221,22 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
        return 0;
 }
 
-static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
-                               void **seg, int *size)
+static int set_sig_data_segment(struct ib_sig_handover_wr *wr,
+                               struct mlx5_ib_qp *qp, void **seg, int *size)
 {
-       struct ib_sig_attrs *sig_attrs = wr->wr.sig_handover.sig_attrs;
-       struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+       struct ib_sig_attrs *sig_attrs = wr->sig_attrs;
+       struct ib_mr *sig_mr = wr->sig_mr;
        struct mlx5_bsf *bsf;
-       u32 data_len = wr->sg_list->length;
-       u32 data_key = wr->sg_list->lkey;
-       u64 data_va = wr->sg_list->addr;
+       u32 data_len = wr->wr.sg_list->length;
+       u32 data_key = wr->wr.sg_list->lkey;
+       u64 data_va = wr->wr.sg_list->addr;
        int ret;
        int wqe_size;
 
-       if (!wr->wr.sig_handover.prot ||
-           (data_key == wr->wr.sig_handover.prot->lkey &&
-            data_va == wr->wr.sig_handover.prot->addr &&
-            data_len == wr->wr.sig_handover.prot->length)) {
+       if (!wr->prot ||
+           (data_key == wr->prot->lkey &&
+            data_va == wr->prot->addr &&
+            data_len == wr->prot->length)) {
                /**
                 * Source domain doesn't contain signature information
                 * or data and protection are interleaved in memory.
@@ -2273,8 +2270,8 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
                struct mlx5_stride_block_ctrl_seg *sblock_ctrl;
                struct mlx5_stride_block_entry *data_sentry;
                struct mlx5_stride_block_entry *prot_sentry;
-               u32 prot_key = wr->wr.sig_handover.prot->lkey;
-               u64 prot_va = wr->wr.sig_handover.prot->addr;
+               u32 prot_key = wr->prot->lkey;
+               u64 prot_va = wr->prot->addr;
                u16 block_size = sig_attrs->mem.sig.dif.pi_interval;
                int prot_size;
 
@@ -2326,16 +2323,16 @@ static int set_sig_data_segment(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
 }
 
 static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
-                                struct ib_send_wr *wr, u32 nelements,
+                                struct ib_sig_handover_wr *wr, u32 nelements,
                                 u32 length, u32 pdn)
 {
-       struct ib_mr *sig_mr = wr->wr.sig_handover.sig_mr;
+       struct ib_mr *sig_mr = wr->sig_mr;
        u32 sig_key = sig_mr->rkey;
        u8 sigerr = to_mmr(sig_mr)->sig->sigerr_count & 1;
 
        memset(seg, 0, sizeof(*seg));
 
-       seg->flags = get_umr_flags(wr->wr.sig_handover.access_flags) |
+       seg->flags = get_umr_flags(wr->access_flags) |
                                   MLX5_ACCESS_MODE_KLM;
        seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00);
        seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 |
@@ -2346,7 +2343,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg,
 }
 
 static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
-                               struct ib_send_wr *wr, u32 nelements)
+                               u32 nelements)
 {
        memset(umr, 0, sizeof(*umr));
 
@@ -2357,37 +2354,37 @@ static void set_sig_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 }
 
 
-static int set_sig_umr_wr(struct ib_send_wr *wr, struct mlx5_ib_qp *qp,
+static int set_sig_umr_wr(struct ib_send_wr *send_wr, struct mlx5_ib_qp *qp,
                          void **seg, int *size)
 {
-       struct mlx5_ib_mr *sig_mr = to_mmr(wr->wr.sig_handover.sig_mr);
+       struct ib_sig_handover_wr *wr = sig_handover_wr(send_wr);
+       struct mlx5_ib_mr *sig_mr = to_mmr(wr->sig_mr);
        u32 pdn = get_pd(qp)->pdn;
        u32 klm_oct_size;
        int region_len, ret;
 
-       if (unlikely(wr->num_sge != 1) ||
-           unlikely(wr->wr.sig_handover.access_flags &
-                    IB_ACCESS_REMOTE_ATOMIC) ||
+       if (unlikely(wr->wr.num_sge != 1) ||
+           unlikely(wr->access_flags & IB_ACCESS_REMOTE_ATOMIC) ||
            unlikely(!sig_mr->sig) || unlikely(!qp->signature_en) ||
            unlikely(!sig_mr->sig->sig_status_checked))
                return -EINVAL;
 
        /* length of the protected region, data + protection */
-       region_len = wr->sg_list->length;
-       if (wr->wr.sig_handover.prot &&
-           (wr->wr.sig_handover.prot->lkey != wr->sg_list->lkey  ||
-            wr->wr.sig_handover.prot->addr != wr->sg_list->addr  ||
-            wr->wr.sig_handover.prot->length != wr->sg_list->length))
-               region_len += wr->wr.sig_handover.prot->length;
+       region_len = wr->wr.sg_list->length;
+       if (wr->prot &&
+           (wr->prot->lkey != wr->wr.sg_list->lkey  ||
+            wr->prot->addr != wr->wr.sg_list->addr  ||
+            wr->prot->length != wr->wr.sg_list->length))
+               region_len += wr->prot->length;
 
        /**
         * KLM octoword size - if protection was provided
         * then we use strided block format (3 octowords),
         * else we use single KLM (1 octoword)
         **/
-       klm_oct_size = wr->wr.sig_handover.prot ? 3 : 1;
+       klm_oct_size = wr->prot ? 3 : 1;
 
-       set_sig_umr_segment(*seg, wr, klm_oct_size);
+       set_sig_umr_segment(*seg, klm_oct_size);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
        if (unlikely((*seg == qp->sq.qend)))
@@ -2433,38 +2430,52 @@ static int set_psv_wr(struct ib_sig_domain *domain,
        return 0;
 }
 
-static int set_frwr_li_wr(void **seg, struct ib_send_wr *wr, int *size,
-                         struct mlx5_core_dev *mdev, struct mlx5_ib_pd *pd, struct mlx5_ib_qp *qp)
+static int set_reg_wr(struct mlx5_ib_qp *qp,
+                     struct ib_reg_wr *wr,
+                     void **seg, int *size)
 {
-       int writ = 0;
-       int li;
+       struct mlx5_ib_mr *mr = to_mmr(wr->mr);
+       struct mlx5_ib_pd *pd = to_mpd(qp->ibqp.pd);
 
-       li = wr->opcode == IB_WR_LOCAL_INV ? 1 : 0;
-       if (unlikely(wr->send_flags & IB_SEND_INLINE))
+       if (unlikely(wr->wr.send_flags & IB_SEND_INLINE)) {
+               mlx5_ib_warn(to_mdev(qp->ibqp.device),
+                            "Invalid IB_SEND_INLINE send flag\n");
                return -EINVAL;
+       }
 
-       set_frwr_umr_segment(*seg, wr, li);
+       set_reg_umr_seg(*seg, mr);
        *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
        *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
        if (unlikely((*seg == qp->sq.qend)))
                *seg = mlx5_get_send_wqe(qp, 0);
-       set_mkey_segment(*seg, wr, li, &writ);
+
+       set_reg_mkey_seg(*seg, mr, wr->key, wr->access);
        *seg += sizeof(struct mlx5_mkey_seg);
        *size += sizeof(struct mlx5_mkey_seg) / 16;
        if (unlikely((*seg == qp->sq.qend)))
                *seg = mlx5_get_send_wqe(qp, 0);
-       if (!li) {
-               if (unlikely(wr->wr.fast_reg.page_list_len >
-                            wr->wr.fast_reg.page_list->max_page_list_len))
-                       return  -ENOMEM;
 
-               set_frwr_pages(*seg, wr, mdev, pd, writ);
-               *seg += sizeof(struct mlx5_wqe_data_seg);
-               *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
-       }
+       set_reg_data_seg(*seg, mr, pd);
+       *seg += sizeof(struct mlx5_wqe_data_seg);
+       *size += (sizeof(struct mlx5_wqe_data_seg) / 16);
+
        return 0;
 }
 
+static void set_linv_wr(struct mlx5_ib_qp *qp, void **seg, int *size)
+{
+       set_linv_umr_seg(*seg);
+       *seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
+       *size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+       set_linv_mkey_seg(*seg);
+       *seg += sizeof(struct mlx5_mkey_seg);
+       *size += sizeof(struct mlx5_mkey_seg) / 16;
+       if (unlikely((*seg == qp->sq.qend)))
+               *seg = mlx5_get_send_wqe(qp, 0);
+}
+
 static void dump_wqe(struct mlx5_ib_qp *qp, int idx, int size_16)
 {
        __be32 *p = NULL;
@@ -2578,7 +2589,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 {
        struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
        struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
-       struct mlx5_core_dev *mdev = dev->mdev;
        struct mlx5_ib_qp *qp = to_mqp(ibqp);
        struct mlx5_ib_mr *mr;
        struct mlx5_wqe_data_seg *dpseg;
@@ -2627,7 +2637,6 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                switch (ibqp->qp_type) {
                case IB_QPT_XRC_INI:
                        xrc = seg;
-                       xrc->xrc_srqn = htonl(wr->xrc_remote_srq_num);
                        seg += sizeof(*xrc);
                        size += sizeof(*xrc) / 16;
                        /* fall through */
@@ -2636,8 +2645,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_READ:
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(seg, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                seg += sizeof(struct mlx5_wqe_raddr_seg);
                                size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
                                break;
@@ -2654,22 +2663,16 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
                                qp->sq.wr_data[idx] = IB_WR_LOCAL_INV;
                                ctrl->imm = cpu_to_be32(wr->ex.invalidate_rkey);
-                               err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
-                               if (err) {
-                                       mlx5_ib_warn(dev, "\n");
-                                       *bad_wr = wr;
-                                       goto out;
-                               }
+                               set_linv_wr(qp, &seg, &size);
                                num_sge = 0;
                                break;
 
-                       case IB_WR_FAST_REG_MR:
+                       case IB_WR_REG_MR:
                                next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
-                               qp->sq.wr_data[idx] = IB_WR_FAST_REG_MR;
-                               ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
-                               err = set_frwr_li_wr(&seg, wr, &size, mdev, to_mpd(ibqp->pd), qp);
+                               qp->sq.wr_data[idx] = IB_WR_REG_MR;
+                               ctrl->imm = cpu_to_be32(reg_wr(wr)->key);
+                               err = set_reg_wr(qp, reg_wr(wr), &seg, &size);
                                if (err) {
-                                       mlx5_ib_warn(dev, "\n");
                                        *bad_wr = wr;
                                        goto out;
                                }
@@ -2678,7 +2681,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                        case IB_WR_REG_SIG_MR:
                                qp->sq.wr_data[idx] = IB_WR_REG_SIG_MR;
-                               mr = to_mmr(wr->wr.sig_handover.sig_mr);
+                               mr = to_mmr(sig_handover_wr(wr)->sig_mr);
 
                                ctrl->imm = cpu_to_be32(mr->ibmr.rkey);
                                err = set_sig_umr_wr(wr, qp, &seg, &size);
@@ -2706,7 +2709,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                        goto out;
                                }
 
-                               err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->mem,
+                               err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->mem,
                                                 mr->sig->psv_memory.psv_idx, &seg,
                                                 &size);
                                if (err) {
@@ -2728,7 +2731,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                }
 
                                next_fence = MLX5_FENCE_MODE_INITIATOR_SMALL;
-                               err = set_psv_wr(&wr->wr.sig_handover.sig_attrs->wire,
+                               err = set_psv_wr(&sig_handover_wr(wr)->sig_attrs->wire,
                                                 mr->sig->psv_wire.psv_idx, &seg,
                                                 &size);
                                if (err) {
@@ -2752,8 +2755,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(seg, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(seg, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                seg  += sizeof(struct mlx5_wqe_raddr_seg);
                                size += sizeof(struct mlx5_wqe_raddr_seg) / 16;
                                break;
@@ -2780,7 +2783,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                goto out;
                        }
                        qp->sq.wr_data[idx] = MLX5_IB_WR_UMR;
-                       ctrl->imm = cpu_to_be32(wr->wr.fast_reg.rkey);
+                       ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey);
                        set_reg_umr_segment(seg, wr);
                        seg += sizeof(struct mlx5_wqe_umr_ctrl_seg);
                        size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16;
index 32f6c63154541dec229ec8904d1438fddb88d14d..bcac294042f5e315e6324cbb0478f56d0524049c 100644 (file)
@@ -281,7 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
                ib_get_cached_gid(&dev->ib_dev,
                                  be32_to_cpu(ah->av->port_pd) >> 24,
                                  ah->av->gid_index % dev->limits.gid_table_len,
-                                 &header->grh.source_gid);
+                                 &header->grh.source_gid, NULL);
                memcpy(header->grh.destination_gid.raw,
                       ah->av->dgid, 16);
        }
index e354b2f04ad9a63d1c70163cf035f31aafbdc3c8..35fe506e2cfa892259a4975919b07355be3df345 100644 (file)
@@ -1476,7 +1476,7 @@ void mthca_free_qp(struct mthca_dev *dev,
 
 /* Create UD header for an MLX send and build a data segment for it */
 static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
-                           int ind, struct ib_send_wr *wr,
+                           int ind, struct ib_ud_wr *wr,
                            struct mthca_mlx_seg *mlx,
                            struct mthca_data_seg *data)
 {
@@ -1485,10 +1485,10 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        u16 pkey;
 
        ib_ud_header_init(256, /* assume a MAD */ 1, 0, 0,
-                         mthca_ah_grh_present(to_mah(wr->wr.ud.ah)), 0,
+                         mthca_ah_grh_present(to_mah(wr->ah)), 0,
                          &sqp->ud_header);
 
-       err = mthca_read_ah(dev, to_mah(wr->wr.ud.ah), &sqp->ud_header);
+       err = mthca_read_ah(dev, to_mah(wr->ah), &sqp->ud_header);
        if (err)
                return err;
        mlx->flags &= ~cpu_to_be32(MTHCA_NEXT_SOLICIT | 1);
@@ -1499,7 +1499,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        mlx->rlid = sqp->ud_header.lrh.destination_lid;
        mlx->vcrc = 0;
 
-       switch (wr->opcode) {
+       switch (wr->wr.opcode) {
        case IB_WR_SEND:
                sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
                sqp->ud_header.immediate_present = 0;
@@ -1507,7 +1507,7 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        case IB_WR_SEND_WITH_IMM:
                sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
                sqp->ud_header.immediate_present = 1;
-               sqp->ud_header.immediate_data = wr->ex.imm_data;
+               sqp->ud_header.immediate_data = wr->wr.ex.imm_data;
                break;
        default:
                return -EINVAL;
@@ -1516,18 +1516,18 @@ static int build_mlx_header(struct mthca_dev *dev, struct mthca_sqp *sqp,
        sqp->ud_header.lrh.virtual_lane    = !sqp->qp.ibqp.qp_num ? 15 : 0;
        if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE)
                sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE;
-       sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
+       sqp->ud_header.bth.solicited_event = !!(wr->wr.send_flags & IB_SEND_SOLICITED);
        if (!sqp->qp.ibqp.qp_num)
                ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
                                   sqp->pkey_index, &pkey);
        else
                ib_get_cached_pkey(&dev->ib_dev, sqp->qp.port,
-                                  wr->wr.ud.pkey_index, &pkey);
+                                  wr->pkey_index, &pkey);
        sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
-       sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
+       sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->remote_qpn);
        sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
-       sqp->ud_header.deth.qkey = cpu_to_be32(wr->wr.ud.remote_qkey & 0x80000000 ?
-                                              sqp->qkey : wr->wr.ud.remote_qkey);
+       sqp->ud_header.deth.qkey = cpu_to_be32(wr->remote_qkey & 0x80000000 ?
+                                              sqp->qkey : wr->remote_qkey);
        sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.ibqp.qp_num);
 
        header_size = ib_ud_header_pack(&sqp->ud_header,
@@ -1569,34 +1569,34 @@ static __always_inline void set_raddr_seg(struct mthca_raddr_seg *rseg,
 }
 
 static __always_inline void set_atomic_seg(struct mthca_atomic_seg *aseg,
-                                          struct ib_send_wr *wr)
+                                          struct ib_atomic_wr *wr)
 {
-       if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
-               aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
+       if (wr->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+               aseg->swap_add = cpu_to_be64(wr->swap);
+               aseg->compare  = cpu_to_be64(wr->compare_add);
        } else {
-               aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+               aseg->swap_add = cpu_to_be64(wr->compare_add);
                aseg->compare  = 0;
        }
 
 }
 
 static void set_tavor_ud_seg(struct mthca_tavor_ud_seg *useg,
-                            struct ib_send_wr *wr)
+                            struct ib_ud_wr *wr)
 {
-       useg->lkey    = cpu_to_be32(to_mah(wr->wr.ud.ah)->key);
-       useg->av_addr = cpu_to_be64(to_mah(wr->wr.ud.ah)->avdma);
-       useg->dqpn    = cpu_to_be32(wr->wr.ud.remote_qpn);
-       useg->qkey    = cpu_to_be32(wr->wr.ud.remote_qkey);
+       useg->lkey    = cpu_to_be32(to_mah(wr->ah)->key);
+       useg->av_addr = cpu_to_be64(to_mah(wr->ah)->avdma);
+       useg->dqpn    = cpu_to_be32(wr->remote_qpn);
+       useg->qkey    = cpu_to_be32(wr->remote_qkey);
 
 }
 
 static void set_arbel_ud_seg(struct mthca_arbel_ud_seg *useg,
-                            struct ib_send_wr *wr)
+                            struct ib_ud_wr *wr)
 {
-       memcpy(useg->av, to_mah(wr->wr.ud.ah)->av, MTHCA_AV_SIZE);
-       useg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
-       useg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
+       memcpy(useg->av, to_mah(wr->ah)->av, MTHCA_AV_SIZE);
+       useg->dqpn = cpu_to_be32(wr->remote_qpn);
+       useg->qkey = cpu_to_be32(wr->remote_qkey);
 }
 
 int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
@@ -1664,11 +1664,11 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
+                               set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+                                             atomic_wr(wr)->rkey);
                                wqe += sizeof (struct mthca_raddr_seg);
 
-                               set_atomic_seg(wqe, wr);
+                               set_atomic_seg(wqe, atomic_wr(wr));
                                wqe += sizeof (struct mthca_atomic_seg);
                                size += (sizeof (struct mthca_raddr_seg) +
                                         sizeof (struct mthca_atomic_seg)) / 16;
@@ -1677,8 +1677,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
                        case IB_WR_RDMA_READ:
-                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
@@ -1694,8 +1694,8 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
@@ -1708,13 +1708,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case UD:
-                       set_tavor_ud_seg(wqe, wr);
+                       set_tavor_ud_seg(wqe, ud_wr(wr));
                        wqe  += sizeof (struct mthca_tavor_ud_seg);
                        size += sizeof (struct mthca_tavor_ud_seg) / 16;
                        break;
 
                case MLX:
-                       err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+                       err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
                                               wqe - sizeof (struct mthca_next_seg),
                                               wqe);
                        if (err) {
@@ -2005,11 +2005,11 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_ATOMIC_CMP_AND_SWP:
                        case IB_WR_ATOMIC_FETCH_AND_ADD:
-                               set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
-                                             wr->wr.atomic.rkey);
+                               set_raddr_seg(wqe, atomic_wr(wr)->remote_addr,
+                                             atomic_wr(wr)->rkey);
                                wqe += sizeof (struct mthca_raddr_seg);
 
-                               set_atomic_seg(wqe, wr);
+                               set_atomic_seg(wqe, atomic_wr(wr));
                                wqe  += sizeof (struct mthca_atomic_seg);
                                size += (sizeof (struct mthca_raddr_seg) +
                                         sizeof (struct mthca_atomic_seg)) / 16;
@@ -2018,8 +2018,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        case IB_WR_RDMA_READ:
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
@@ -2035,8 +2035,8 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        switch (wr->opcode) {
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_WRITE_WITH_IMM:
-                               set_raddr_seg(wqe, wr->wr.rdma.remote_addr,
-                                             wr->wr.rdma.rkey);
+                               set_raddr_seg(wqe, rdma_wr(wr)->remote_addr,
+                                             rdma_wr(wr)->rkey);
                                wqe  += sizeof (struct mthca_raddr_seg);
                                size += sizeof (struct mthca_raddr_seg) / 16;
                                break;
@@ -2049,13 +2049,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case UD:
-                       set_arbel_ud_seg(wqe, wr);
+                       set_arbel_ud_seg(wqe, ud_wr(wr));
                        wqe  += sizeof (struct mthca_arbel_ud_seg);
                        size += sizeof (struct mthca_arbel_ud_seg) / 16;
                        break;
 
                case MLX:
-                       err = build_mlx_header(dev, to_msqp(qp), ind, wr,
+                       err = build_mlx_header(dev, to_msqp(qp), ind, ud_wr(wr),
                                               wqe - sizeof (struct mthca_next_seg),
                                               wqe);
                        if (err) {
index d748e4b31b8ddf2de875fbd57e7e3277ed0171f8..c9080208aad2ec4b022f915407d61479de40c41f 100644 (file)
@@ -1200,12 +1200,6 @@ struct nes_fast_mr_wqe_pbl {
        dma_addr_t      paddr;
 };
 
-struct nes_ib_fast_reg_page_list {
-       struct ib_fast_reg_page_list    ibfrpl;
-       struct nes_fast_mr_wqe_pbl      nes_wqe_pbl;
-       u64                             pbl;
-};
-
 struct nes_listener {
        struct work_struct      work;
        struct workqueue_struct *wq;
index 44cb513f9a87c0597704422393802cebf70f0f45..137880a19ebe4827bc7f8b419ba6faf9dc50de41 100644 (file)
@@ -51,6 +51,7 @@ atomic_t qps_created;
 atomic_t sw_qps_destroyed;
 
 static void nes_unregister_ofa_device(struct nes_ib_device *nesibdev);
+static int nes_dereg_mr(struct ib_mr *ib_mr);
 
 /**
  * nes_alloc_mw
@@ -443,79 +444,46 @@ static struct ib_mr *nes_alloc_mr(struct ib_pd *ibpd,
        } else {
                kfree(nesmr);
                nes_free_resource(nesadapter, nesadapter->allocated_mrs, stag_index);
-               ibmr = ERR_PTR(-ENOMEM);
+               return ERR_PTR(-ENOMEM);
        }
+
+       nesmr->pages = pci_alloc_consistent(nesdev->pcidev,
+                                           max_num_sg * sizeof(u64),
+                                           &nesmr->paddr);
+       if (!nesmr->paddr)
+               goto err;
+
+       nesmr->max_pages = max_num_sg;
+
        return ibmr;
+
+err:
+       nes_dereg_mr(ibmr);
+
+       return ERR_PTR(-ENOMEM);
 }
 
-/*
- * nes_alloc_fast_reg_page_list
- */
-static struct ib_fast_reg_page_list *nes_alloc_fast_reg_page_list(
-                                                       struct ib_device *ibdev,
-                                                       int page_list_len)
+static int nes_set_page(struct ib_mr *ibmr, u64 addr)
 {
-       struct nes_vnic *nesvnic = to_nesvnic(ibdev);
-       struct nes_device *nesdev = nesvnic->nesdev;
-       struct ib_fast_reg_page_list *pifrpl;
-       struct nes_ib_fast_reg_page_list *pnesfrpl;
+       struct nes_mr *nesmr = to_nesmr(ibmr);
 
-       if (page_list_len > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64)))
-               return ERR_PTR(-E2BIG);
-       /*
-        * Allocate the ib_fast_reg_page_list structure, the
-        * nes_fast_bpl structure, and the PLB table.
-        */
-       pnesfrpl = kmalloc(sizeof(struct nes_ib_fast_reg_page_list) +
-                          page_list_len * sizeof(u64), GFP_KERNEL);
-
-       if (!pnesfrpl)
-               return ERR_PTR(-ENOMEM);
+       if (unlikely(nesmr->npages == nesmr->max_pages))
+               return -ENOMEM;
 
-       pifrpl = &pnesfrpl->ibfrpl;
-       pifrpl->page_list = &pnesfrpl->pbl;
-       pifrpl->max_page_list_len = page_list_len;
-       /*
-        * Allocate the WQE PBL
-        */
-       pnesfrpl->nes_wqe_pbl.kva = pci_alloc_consistent(nesdev->pcidev,
-                                                        page_list_len * sizeof(u64),
-                                                        &pnesfrpl->nes_wqe_pbl.paddr);
+       nesmr->pages[nesmr->npages++] = cpu_to_le64(addr);
 
-       if (!pnesfrpl->nes_wqe_pbl.kva) {
-               kfree(pnesfrpl);
-               return ERR_PTR(-ENOMEM);
-       }
-       nes_debug(NES_DBG_MR, "nes_alloc_fast_reg_pbl: nes_frpl = %p, "
-                 "ibfrpl = %p, ibfrpl.page_list = %p, pbl.kva = %p, "
-                 "pbl.paddr = %llx\n", pnesfrpl, &pnesfrpl->ibfrpl,
-                 pnesfrpl->ibfrpl.page_list, pnesfrpl->nes_wqe_pbl.kva,
-                 (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr);
-
-       return pifrpl;
+       return 0;
 }
 
-/*
- * nes_free_fast_reg_page_list
- */
-static void nes_free_fast_reg_page_list(struct ib_fast_reg_page_list *pifrpl)
+static int nes_map_mr_sg(struct ib_mr *ibmr,
+                        struct scatterlist *sg,
+                        int sg_nents)
 {
-       struct nes_vnic *nesvnic = to_nesvnic(pifrpl->device);
-       struct nes_device *nesdev = nesvnic->nesdev;
-       struct nes_ib_fast_reg_page_list *pnesfrpl;
+       struct nes_mr *nesmr = to_nesmr(ibmr);
 
-       pnesfrpl = container_of(pifrpl, struct nes_ib_fast_reg_page_list, ibfrpl);
-       /*
-        * Free the WQE PBL.
-        */
-       pci_free_consistent(nesdev->pcidev,
-                           pifrpl->max_page_list_len * sizeof(u64),
-                           pnesfrpl->nes_wqe_pbl.kva,
-                           pnesfrpl->nes_wqe_pbl.paddr);
-       /*
-        * Free the PBL structure
-        */
-       kfree(pnesfrpl);
+       nesmr->npages = 0;
+
+       return ib_sg_to_pages(ibmr, sg, sg_nents, nes_set_page);
 }
 
 /**
@@ -2683,6 +2651,13 @@ static int nes_dereg_mr(struct ib_mr *ib_mr)
        u16 major_code;
        u16 minor_code;
 
+
+       if (nesmr->pages)
+               pci_free_consistent(nesdev->pcidev,
+                                   nesmr->max_pages * sizeof(u64),
+                                   nesmr->pages,
+                                   nesmr->paddr);
+
        if (nesmr->region) {
                ib_umem_release(nesmr->region);
        }
@@ -3372,9 +3347,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                wqe_misc |= NES_IWARP_SQ_WQE_LOCAL_FENCE;
 
                        set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
-                                           ib_wr->wr.rdma.rkey);
+                                           rdma_wr(ib_wr)->rkey);
                        set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
-                                           ib_wr->wr.rdma.remote_addr);
+                                           rdma_wr(ib_wr)->remote_addr);
 
                        if ((ib_wr->send_flags & IB_SEND_INLINE) &&
                            ((nes_drv_opt & NES_DRV_OPT_NO_INLINE_DATA) == 0) &&
@@ -3409,9 +3384,9 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                        }
 
                        set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX,
-                                           ib_wr->wr.rdma.remote_addr);
+                                           rdma_wr(ib_wr)->remote_addr);
                        set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_STAG_IDX,
-                                           ib_wr->wr.rdma.rkey);
+                                           rdma_wr(ib_wr)->rkey);
                        set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX,
                                            ib_wr->sg_list->length);
                        set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
@@ -3425,19 +3400,13 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                            NES_IWARP_SQ_LOCINV_WQE_INV_STAG_IDX,
                                            ib_wr->ex.invalidate_rkey);
                        break;
-               case IB_WR_FAST_REG_MR:
+               case IB_WR_REG_MR:
                {
-                       int i;
-                       int flags = ib_wr->wr.fast_reg.access_flags;
-                       struct nes_ib_fast_reg_page_list *pnesfrpl =
-                               container_of(ib_wr->wr.fast_reg.page_list,
-                                            struct nes_ib_fast_reg_page_list,
-                                            ibfrpl);
-                       u64 *src_page_list = pnesfrpl->ibfrpl.page_list;
-                       u64 *dst_page_list = pnesfrpl->nes_wqe_pbl.kva;
-
-                       if (ib_wr->wr.fast_reg.page_list_len >
-                           (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
+                       struct nes_mr *mr = to_nesmr(reg_wr(ib_wr)->mr);
+                       int page_shift = ilog2(reg_wr(ib_wr)->mr->page_size);
+                       int flags = reg_wr(ib_wr)->access;
+
+                       if (mr->npages > (NES_4K_PBL_CHUNK_SIZE / sizeof(u64))) {
                                nes_debug(NES_DBG_IW_TX, "SQ_FMR: bad page_list_len\n");
                                err = -EINVAL;
                                break;
@@ -3445,19 +3414,19 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                        wqe_misc = NES_IWARP_SQ_OP_FAST_REG;
                        set_wqe_64bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_VA_FBO_LOW_IDX,
-                                           ib_wr->wr.fast_reg.iova_start);
+                                           mr->ibmr.iova);
                        set_wqe_32bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
-                                           ib_wr->wr.fast_reg.length);
+                                           mr->ibmr.length);
                        set_wqe_32bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
                        set_wqe_32bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
-                                           ib_wr->wr.fast_reg.rkey);
-                       /* Set page size: */
-                       if (ib_wr->wr.fast_reg.page_shift == 12) {
+                                           reg_wr(ib_wr)->key);
+
+                       if (page_shift == 12) {
                                wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_4K;
-                       } else if (ib_wr->wr.fast_reg.page_shift == 21) {
+                       } else if (page_shift == 21) {
                                wqe_misc |= NES_IWARP_SQ_FMR_WQE_PAGE_SIZE_2M;
                        } else {
                                nes_debug(NES_DBG_IW_TX, "Invalid page shift,"
@@ -3465,6 +3434,7 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                err = -EINVAL;
                                break;
                        }
+
                        /* Set access_flags */
                        wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_LOCAL_READ;
                        if (flags & IB_ACCESS_LOCAL_WRITE)
@@ -3480,35 +3450,22 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                wqe_misc |= NES_IWARP_SQ_FMR_WQE_RIGHTS_ENABLE_WINDOW_BIND;
 
                        /* Fill in PBL info: */
-                       if (ib_wr->wr.fast_reg.page_list_len >
-                           pnesfrpl->ibfrpl.max_page_list_len) {
-                               nes_debug(NES_DBG_IW_TX, "Invalid page list length,"
-                                         " ib_wr=%p, value=%u, max=%u\n",
-                                         ib_wr, ib_wr->wr.fast_reg.page_list_len,
-                                         pnesfrpl->ibfrpl.max_page_list_len);
-                               err = -EINVAL;
-                               break;
-                       }
-
                        set_wqe_64bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_PBL_ADDR_LOW_IDX,
-                                           pnesfrpl->nes_wqe_pbl.paddr);
+                                           mr->paddr);
 
                        set_wqe_32bit_value(wqe->wqe_words,
                                            NES_IWARP_SQ_FMR_WQE_PBL_LENGTH_IDX,
-                                           ib_wr->wr.fast_reg.page_list_len * 8);
-
-                       for (i = 0; i < ib_wr->wr.fast_reg.page_list_len; i++)
-                               dst_page_list[i] = cpu_to_le64(src_page_list[i]);
+                                           mr->npages * 8);
 
-                       nes_debug(NES_DBG_IW_TX, "SQ_FMR: iova_start: %llx, "
+                       nes_debug(NES_DBG_IW_TX, "SQ_REG_MR: iova_start: %llx, "
                                  "length: %d, rkey: %0x, pgl_paddr: %llx, "
                                  "page_list_len: %u, wqe_misc: %x\n",
-                                 (unsigned long long) ib_wr->wr.fast_reg.iova_start,
-                                 ib_wr->wr.fast_reg.length,
-                                 ib_wr->wr.fast_reg.rkey,
-                                 (unsigned long long) pnesfrpl->nes_wqe_pbl.paddr,
-                                 ib_wr->wr.fast_reg.page_list_len,
+                                 (unsigned long long) mr->ibmr.iova,
+                                 mr->ibmr.length,
+                                 reg_wr(ib_wr)->key,
+                                 (unsigned long long) mr->paddr,
+                                 mr->npages,
                                  wqe_misc);
                        break;
                }
@@ -3751,7 +3708,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
                                                entry->opcode = IB_WC_LOCAL_INV;
                                                break;
                                        case NES_IWARP_SQ_OP_FAST_REG:
-                                               entry->opcode = IB_WC_FAST_REG_MR;
+                                               entry->opcode = IB_WC_REG_MR;
                                                break;
                                }
 
@@ -3939,8 +3896,7 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
        nesibdev->ibdev.bind_mw = nes_bind_mw;
 
        nesibdev->ibdev.alloc_mr = nes_alloc_mr;
-       nesibdev->ibdev.alloc_fast_reg_page_list = nes_alloc_fast_reg_page_list;
-       nesibdev->ibdev.free_fast_reg_page_list = nes_free_fast_reg_page_list;
+       nesibdev->ibdev.map_mr_sg = nes_map_mr_sg;
 
        nesibdev->ibdev.attach_mcast = nes_multicast_attach;
        nesibdev->ibdev.detach_mcast = nes_multicast_detach;
index 309b31c31ae1ac5e9b7bcd44701f22d65e0a1630..a204b677af22a8d00fba4b5b65bc0620d4cb7a1c 100644 (file)
@@ -79,6 +79,10 @@ struct nes_mr {
        u16               pbls_used;
        u8                mode;
        u8                pbl_4k;
+       __le64            *pages;
+       dma_addr_t        paddr;
+       u32               max_pages;
+       u32               npages;
 };
 
 struct nes_hw_pb {
index b4091ab48db0bc86d8edf42cb76661c5d6b44b52..ae80590aabdf7db6df756c0ee07afa4721f6c37c 100644 (file)
@@ -55,7 +55,7 @@
 #include <be_roce.h>
 #include "ocrdma_sli.h"
 
-#define OCRDMA_ROCE_DRV_VERSION "10.6.0.0"
+#define OCRDMA_ROCE_DRV_VERSION "11.0.0.0"
 
 #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
 #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -193,6 +193,8 @@ struct ocrdma_mr {
        struct ib_mr ibmr;
        struct ib_umem *umem;
        struct ocrdma_hw_mr hwmr;
+       u64 *pages;
+       u32 npages;
 };
 
 struct ocrdma_stats {
@@ -278,7 +280,6 @@ struct ocrdma_dev {
        u32 hba_port_num;
 
        struct list_head entry;
-       struct rcu_head rcu;
        int id;
        u64 *stag_arr;
        u8 sl; /* service level */
index 44766fee1f4e2cacb2d971f7858ad6e3eb4c4875..9820074be59d73bd1aac4b56d4a38254d2f9b7bb 100644 (file)
@@ -45,6 +45,7 @@
 
 #include <rdma/ib_addr.h>
 #include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
 
 #include "ocrdma.h"
 #include "ocrdma_verbs.h"
 
 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
                        struct ib_ah_attr *attr, union ib_gid *sgid,
-                       int pdid, bool *isvlan)
+                       int pdid, bool *isvlan, u16 vlan_tag)
 {
        int status = 0;
-       u16 vlan_tag;
        struct ocrdma_eth_vlan eth;
        struct ocrdma_grh grh;
        int eth_sz;
@@ -68,7 +68,6 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
        memset(&grh, 0, sizeof(grh));
 
        /* VLAN */
-       vlan_tag = attr->vlan_id;
        if (!vlan_tag || (vlan_tag > 0xFFF))
                vlan_tag = dev->pvid;
        if (vlan_tag || dev->pfc_state) {
@@ -115,9 +114,11 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 {
        u32 *ahid_addr;
-       bool isvlan = false;
        int status;
        struct ocrdma_ah *ah;
+       bool isvlan = false;
+       u16 vlan_tag = 0xffff;
+       struct ib_gid_attr sgid_attr;
        struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
        struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
        union ib_gid sgid;
@@ -135,18 +136,25 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
        if (status)
                goto av_err;
 
-       status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid);
+       status = ib_get_cached_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid,
+                                  &sgid_attr);
        if (status) {
                pr_err("%s(): Failed to query sgid, status = %d\n",
                      __func__, status);
                goto av_conf_err;
        }
+       if (sgid_attr.ndev) {
+               if (is_vlan_dev(sgid_attr.ndev))
+                       vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
+               dev_put(sgid_attr.ndev);
+       }
 
        if ((pd->uctx) &&
            (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
            (!rdma_link_local_addr((struct in6_addr *)attr->grh.dgid.raw))) {
                status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
-                                        attr->dmac, &attr->vlan_id);
+                                                   attr->dmac, &vlan_tag,
+                                                   sgid_attr.ndev->ifindex);
                if (status) {
                        pr_err("%s(): Failed to resolve dmac from gid." 
                                "status = %d\n", __func__, status);
@@ -154,7 +162,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
                }
        }
 
-       status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan);
+       status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
        if (status)
                goto av_conf_err;
 
index aab391a15db429104f52765346455ba07efa424b..30f67bebffa35742189c4fc08b91654f75bfdcfa 100644 (file)
@@ -47,6 +47,7 @@
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/ib_cache.h>
 
 #include "ocrdma.h"
 #include "ocrdma_hw.h"
@@ -678,11 +679,33 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
        int dev_event = 0;
        int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
            OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
+       u16 qpid = cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK;
+       u16 cqid = cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK;
 
-       if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID)
-               qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK];
-       if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
-               cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
+       /*
+        * Some FW version returns wrong qp or cq ids in CQEs.
+        * Checking whether the IDs are valid
+        */
+
+       if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID) {
+               if (qpid < dev->attr.max_qp)
+                       qp = dev->qp_tbl[qpid];
+               if (qp == NULL) {
+                       pr_err("ocrdma%d:Async event - qpid %u is not valid\n",
+                              dev->id, qpid);
+                       return;
+               }
+       }
+
+       if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) {
+               if (cqid < dev->attr.max_cq)
+                       cq = dev->cq_tbl[cqid];
+               if (cq == NULL) {
+                       pr_err("ocrdma%d:Async event - cqid %u is not valid\n",
+                              dev->id, cqid);
+                       return;
+               }
+       }
 
        memset(&ib_evt, 0, sizeof(ib_evt));
 
@@ -2448,6 +2471,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        int status;
        struct ib_ah_attr *ah_attr = &attrs->ah_attr;
        union ib_gid sgid, zgid;
+       struct ib_gid_attr sgid_attr;
        u32 vlan_id = 0xFFFF;
        u8 mac_addr[6];
        struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
@@ -2466,10 +2490,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
        memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
               sizeof(cmd->params.dgid));
-       status = ocrdma_query_gid(&dev->ibdev, 1,
-                       ah_attr->grh.sgid_index, &sgid);
-       if (status)
-               return status;
+
+       status = ib_get_cached_gid(&dev->ibdev, 1, ah_attr->grh.sgid_index,
+                                  &sgid, &sgid_attr);
+       if (!status && sgid_attr.ndev) {
+               vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+               memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
+               dev_put(sgid_attr.ndev);
+       }
 
        memset(&zgid, 0, sizeof(zgid));
        if (!memcmp(&sgid, &zgid, sizeof(zgid)))
@@ -2486,17 +2514,15 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
        ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
        ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
        cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
-       if (attr_mask & IB_QP_VID) {
-               vlan_id = attrs->vlan_id;
-       } else if (dev->pfc_state) {
-               vlan_id = 0;
-               pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
-                       dev->id);
-               pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
-                       dev->id);
-       }
 
        if (vlan_id < 0x1000) {
+               if (dev->pfc_state) {
+                       vlan_id = 0;
+                       pr_err("ocrdma%d:Using VLAN with PFC is recommended\n",
+                              dev->id);
+                       pr_err("ocrdma%d:Using VLAN 0 for this connection\n",
+                              dev->id);
+               }
                cmd->params.vlan_dmac_b4_to_b5 |=
                    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
                cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
index 87aa55df7c8211f6f8fbc441d0133b86878387ec..62b7009daa6c18a4d21002636637b5a2d258b231 100644 (file)
@@ -63,8 +63,6 @@ MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION);
 MODULE_AUTHOR("Emulex Corporation");
 MODULE_LICENSE("Dual BSD/GPL");
 
-static LIST_HEAD(ocrdma_dev_list);
-static DEFINE_SPINLOCK(ocrdma_devlist_lock);
 static DEFINE_IDR(ocrdma_dev_id);
 
 void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
@@ -182,8 +180,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
 
        dev->ibdev.alloc_mr = ocrdma_alloc_mr;
-       dev->ibdev.alloc_fast_reg_page_list = ocrdma_alloc_frmr_page_list;
-       dev->ibdev.free_fast_reg_page_list = ocrdma_free_frmr_page_list;
+       dev->ibdev.map_mr_sg = ocrdma_map_mr_sg;
 
        /* mandatory to support user space verbs consumer. */
        dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
@@ -325,9 +322,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
        for (i = 0; i < ARRAY_SIZE(ocrdma_attributes); i++)
                if (device_create_file(&dev->ibdev.dev, ocrdma_attributes[i]))
                        goto sysfs_err;
-       spin_lock(&ocrdma_devlist_lock);
-       list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
-       spin_unlock(&ocrdma_devlist_lock);
        /* Init stats */
        ocrdma_add_port_stats(dev);
        /* Interrupt Moderation */
@@ -356,9 +350,8 @@ idr_err:
        return NULL;
 }
 
-static void ocrdma_remove_free(struct rcu_head *rcu)
+static void ocrdma_remove_free(struct ocrdma_dev *dev)
 {
-       struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
 
        idr_remove(&ocrdma_dev_id, dev->id);
        kfree(dev->mbx_cmd);
@@ -375,15 +368,9 @@ static void ocrdma_remove(struct ocrdma_dev *dev)
        ib_unregister_device(&dev->ibdev);
 
        ocrdma_rem_port_stats(dev);
-
-       spin_lock(&ocrdma_devlist_lock);
-       list_del_rcu(&dev->entry);
-       spin_unlock(&ocrdma_devlist_lock);
-
        ocrdma_free_resources(dev);
        ocrdma_cleanup_hw(dev);
-
-       call_rcu(&dev->rcu, ocrdma_remove_free);
+       ocrdma_remove_free(dev);
 }
 
 static int ocrdma_open(struct ocrdma_dev *dev)
index 69334e214571b94ae0305bc8c9821aa0e261b7fe..86c303a620c1660625ebb94f1b54e578856b26ff 100644 (file)
@@ -855,9 +855,9 @@ void ocrdma_rem_port_stats(struct ocrdma_dev *dev)
 {
        if (!dev->dir)
                return;
+       debugfs_remove(dev->dir);
        mutex_destroy(&dev->stats_lock);
        ocrdma_release_stats_mem(dev);
-       debugfs_remove(dev->dir);
 }
 
 void ocrdma_init_debugfs(void)
index 1f3affb6a477156dec43694d80b70b3b8339c27d..583001bcfb8fc8c10a1e088b451d7de12adc2aa8 100644 (file)
@@ -73,7 +73,7 @@ int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
        if (index >= OCRDMA_MAX_SGID)
                return -EINVAL;
 
-       ret = ib_get_cached_gid(ibdev, port, index, sgid);
+       ret = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
        if (ret == -EAGAIN) {
                memcpy(sgid, &zgid, sizeof(*sgid));
                return 0;
@@ -1013,6 +1013,7 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 
        (void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
 
+       kfree(mr->pages);
        ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 
        /* it could be user registered memory. */
@@ -1997,13 +1998,13 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
 {
        struct ocrdma_ewqe_ud_hdr *ud_hdr =
                (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
-       struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
+       struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
 
-       ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
+       ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
        if (qp->qp_type == IB_QPT_GSI)
                ud_hdr->qkey = qp->qkey;
        else
-               ud_hdr->qkey = wr->wr.ud.remote_qkey;
+               ud_hdr->qkey = ud_wr(wr)->remote_qkey;
        ud_hdr->rsvd_ahid = ah->id;
        if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
                hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
@@ -2106,9 +2107,9 @@ static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
        status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
        if (status)
                return status;
-       ext_rw->addr_lo = wr->wr.rdma.remote_addr;
-       ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
-       ext_rw->lrkey = wr->wr.rdma.rkey;
+       ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
+       ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
+       ext_rw->lrkey = rdma_wr(wr)->rkey;
        ext_rw->len = hdr->total_len;
        return 0;
 }
@@ -2126,46 +2127,12 @@ static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
        hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
        hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
 
-       ext_rw->addr_lo = wr->wr.rdma.remote_addr;
-       ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
-       ext_rw->lrkey = wr->wr.rdma.rkey;
+       ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
+       ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
+       ext_rw->lrkey = rdma_wr(wr)->rkey;
        ext_rw->len = hdr->total_len;
 }
 
-static void build_frmr_pbes(struct ib_send_wr *wr, struct ocrdma_pbl *pbl_tbl,
-                           struct ocrdma_hw_mr *hwmr)
-{
-       int i;
-       u64 buf_addr = 0;
-       int num_pbes;
-       struct ocrdma_pbe *pbe;
-
-       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-       num_pbes = 0;
-
-       /* go through the OS phy regions & fill hw pbe entries into pbls. */
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
-               /* number of pbes can be more for one OS buf, when
-                * buffers are of different sizes.
-                * split the ib_buf to one or more pbes.
-                */
-               buf_addr = wr->wr.fast_reg.page_list->page_list[i];
-               pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
-               pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
-               num_pbes += 1;
-               pbe++;
-
-               /* if the pbl is full storing the pbes,
-                * move to next pbl.
-               */
-               if (num_pbes == (hwmr->pbl_size/sizeof(u64))) {
-                       pbl_tbl++;
-                       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
-               }
-       }
-       return;
-}
-
 static int get_encoded_page_size(int pg_sz)
 {
        /* Max size is 256M 4096 << 16 */
@@ -2176,48 +2143,59 @@ static int get_encoded_page_size(int pg_sz)
        return i;
 }
 
-
-static int ocrdma_build_fr(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
-                          struct ib_send_wr *wr)
+static int ocrdma_build_reg(struct ocrdma_qp *qp,
+                           struct ocrdma_hdr_wqe *hdr,
+                           struct ib_reg_wr *wr)
 {
        u64 fbo;
        struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
-       struct ocrdma_mr *mr;
-       struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
+       struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
+       struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
+       struct ocrdma_pbe *pbe;
        u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
+       int num_pbes = 0, i;
 
        wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
 
-       if (wr->wr.fast_reg.page_list_len > dev->attr.max_pages_per_frmr)
-               return -EINVAL;
-
        hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
        hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
 
-       if (wr->wr.fast_reg.page_list_len == 0)
-               BUG();
-       if (wr->wr.fast_reg.access_flags & IB_ACCESS_LOCAL_WRITE)
+       if (wr->access & IB_ACCESS_LOCAL_WRITE)
                hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
-       if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_WRITE)
+       if (wr->access & IB_ACCESS_REMOTE_WRITE)
                hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
-       if (wr->wr.fast_reg.access_flags & IB_ACCESS_REMOTE_READ)
+       if (wr->access & IB_ACCESS_REMOTE_READ)
                hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
-       hdr->lkey = wr->wr.fast_reg.rkey;
-       hdr->total_len = wr->wr.fast_reg.length;
+       hdr->lkey = wr->key;
+       hdr->total_len = mr->ibmr.length;
 
-       fbo = wr->wr.fast_reg.iova_start -
-           (wr->wr.fast_reg.page_list->page_list[0] & PAGE_MASK);
+       fbo = mr->ibmr.iova - mr->pages[0];
 
-       fast_reg->va_hi = upper_32_bits(wr->wr.fast_reg.iova_start);
-       fast_reg->va_lo = (u32) (wr->wr.fast_reg.iova_start & 0xffffffff);
+       fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
+       fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
        fast_reg->fbo_hi = upper_32_bits(fbo);
        fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
-       fast_reg->num_sges = wr->wr.fast_reg.page_list_len;
-       fast_reg->size_sge =
-               get_encoded_page_size(1 << wr->wr.fast_reg.page_shift);
-       mr = (struct ocrdma_mr *) (unsigned long)
-               dev->stag_arr[(hdr->lkey >> 8) & (OCRDMA_MAX_STAG - 1)];
-       build_frmr_pbes(wr, mr->hwmr.pbl_table, &mr->hwmr);
+       fast_reg->num_sges = mr->npages;
+       fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
+
+       pbe = pbl_tbl->va;
+       for (i = 0; i < mr->npages; i++) {
+               u64 buf_addr = mr->pages[i];
+
+               pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
+               pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
+               num_pbes += 1;
+               pbe++;
+
+               /* if the pbl is full storing the pbes,
+                * move to next pbl.
+               */
+               if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
+                       pbl_tbl++;
+                       pbe = (struct ocrdma_pbe *)pbl_tbl->va;
+               }
+       }
+
        return 0;
 }
 
@@ -2300,8 +2278,8 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                                OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
                        hdr->lkey = wr->ex.invalidate_rkey;
                        break;
-               case IB_WR_FAST_REG_MR:
-                       status = ocrdma_build_fr(qp, hdr, wr);
+               case IB_WR_REG_MR:
+                       status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
                        break;
                default:
                        status = -EINVAL;
@@ -2567,7 +2545,7 @@ static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
                ibwc->opcode = IB_WC_SEND;
                break;
        case OCRDMA_FR_MR:
-               ibwc->opcode = IB_WC_FAST_REG_MR;
+               ibwc->opcode = IB_WC_REG_MR;
                break;
        case OCRDMA_LKEY_INV:
                ibwc->opcode = IB_WC_LOCAL_INV;
@@ -2933,16 +2911,11 @@ expand_cqe:
        }
 stop_cqe:
        cq->getp = cur_getp;
-       if (cq->deferred_arm) {
-               ocrdma_ring_cq_db(dev, cq->id, true, cq->deferred_sol,
-                                 polled_hw_cqes);
+       if (cq->deferred_arm || polled_hw_cqes) {
+               ocrdma_ring_cq_db(dev, cq->id, cq->deferred_arm,
+                                 cq->deferred_sol, polled_hw_cqes);
                cq->deferred_arm = false;
                cq->deferred_sol = false;
-       } else {
-               /* We need to pop the CQE. No need to arm */
-               ocrdma_ring_cq_db(dev, cq->id, false, cq->deferred_sol,
-                                 polled_hw_cqes);
-               cq->deferred_sol = false;
        }
 
        return i;
@@ -3058,6 +3031,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
+       mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+       if (!mr->pages) {
+               status = -ENOMEM;
+               goto pl_err;
+       }
+
        status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
        if (status)
                goto pbl_err;
@@ -3081,30 +3060,12 @@ struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd,
 mbx_err:
        ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
 pbl_err:
+       kfree(mr->pages);
+pl_err:
        kfree(mr);
        return ERR_PTR(-ENOMEM);
 }
 
-struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
-                                                         *ibdev,
-                                                         int page_list_len)
-{
-       struct ib_fast_reg_page_list *frmr_list;
-       int size;
-
-       size = sizeof(*frmr_list) + (page_list_len * sizeof(u64));
-       frmr_list = kzalloc(size, GFP_KERNEL);
-       if (!frmr_list)
-               return ERR_PTR(-ENOMEM);
-       frmr_list->page_list = (u64 *)(frmr_list + 1);
-       return frmr_list;
-}
-
-void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list)
-{
-       kfree(page_list);
-}
-
 #define MAX_KERNEL_PBE_SIZE 65536
 static inline int count_kernel_pbes(struct ib_phys_buf *buf_list,
                                    int buf_cnt, u32 *pbe_size)
@@ -3267,3 +3228,26 @@ pbl_err:
        kfree(mr);
        return ERR_PTR(status);
 }
+
+static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
+{
+       struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
+
+       if (unlikely(mr->npages == mr->hwmr.num_pbes))
+               return -ENOMEM;
+
+       mr->pages[mr->npages++] = addr;
+
+       return 0;
+}
+
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+                    struct scatterlist *sg,
+                    int sg_nents)
+{
+       struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
+
+       mr->npages = 0;
+
+       return ib_sg_to_pages(ibmr, sg, sg_nents, ocrdma_set_page);
+}
index 308c16857a5d03e2d3605136abdf8d3b11ad5361..a2f3b4dc20b0363d0a09769aaee2d69dd957ae33 100644 (file)
@@ -125,9 +125,8 @@ struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
 struct ib_mr *ocrdma_alloc_mr(struct ib_pd *pd,
                              enum ib_mr_type mr_type,
                              u32 max_num_sg);
-struct ib_fast_reg_page_list *ocrdma_alloc_frmr_page_list(struct ib_device
-                                                       *ibdev,
-                                                       int page_list_len);
-void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list *page_list);
+int ocrdma_map_mr_sg(struct ib_mr *ibmr,
+                    struct scatterlist *sg,
+                    int sg_nents);
 
 #endif                         /* __OCRDMA_VERBS_H__ */
index 5afaa218508d222f901252194c5872a18a79be90..d725c565518dc86aee6354390c06e5224c6596d8 100644 (file)
@@ -336,14 +336,15 @@ bail:
 }
 
 /*
- * Initialize the memory region specified by the work reqeust.
+ * Initialize the memory region specified by the work request.
  */
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
+int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
 {
        struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
        struct qib_pd *pd = to_ipd(qp->ibqp.pd);
-       struct qib_mregion *mr;
-       u32 rkey = wr->wr.fast_reg.rkey;
+       struct qib_mr *mr = to_imr(wr->mr);
+       struct qib_mregion *mrg;
+       u32 key = wr->key;
        unsigned i, n, m;
        int ret = -EINVAL;
        unsigned long flags;
@@ -351,33 +352,33 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
        size_t ps;
 
        spin_lock_irqsave(&rkt->lock, flags);
-       if (pd->user || rkey == 0)
+       if (pd->user || key == 0)
                goto bail;
 
-       mr = rcu_dereference_protected(
-               rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
+       mrg = rcu_dereference_protected(
+               rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
                lockdep_is_held(&rkt->lock));
-       if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
+       if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
                goto bail;
 
-       if (wr->wr.fast_reg.page_list_len > mr->max_segs)
+       if (mr->npages > mrg->max_segs)
                goto bail;
 
-       ps = 1UL << wr->wr.fast_reg.page_shift;
-       if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
+       ps = mr->ibmr.page_size;
+       if (mr->ibmr.length > ps * mr->npages)
                goto bail;
 
-       mr->user_base = wr->wr.fast_reg.iova_start;
-       mr->iova = wr->wr.fast_reg.iova_start;
-       mr->lkey = rkey;
-       mr->length = wr->wr.fast_reg.length;
-       mr->access_flags = wr->wr.fast_reg.access_flags;
-       page_list = wr->wr.fast_reg.page_list->page_list;
+       mrg->user_base = mr->ibmr.iova;
+       mrg->iova = mr->ibmr.iova;
+       mrg->lkey = key;
+       mrg->length = mr->ibmr.length;
+       mrg->access_flags = wr->access;
+       page_list = mr->pages;
        m = 0;
        n = 0;
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
-               mr->map[m]->segs[n].vaddr = (void *) page_list[i];
-               mr->map[m]->segs[n].length = ps;
+       for (i = 0; i < mr->npages; i++) {
+               mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
+               mrg->map[m]->segs[n].length = ps;
                if (++n == QIB_SEGSZ) {
                        m++;
                        n = 0;
index 19220dcb9a3b2a1ea00e4aed2d87eef09269a92d..294f5c706be972b4b6563e37a32064fade665be2 100644 (file)
@@ -303,6 +303,7 @@ int qib_dereg_mr(struct ib_mr *ibmr)
        int ret = 0;
        unsigned long timeout;
 
+       kfree(mr->pages);
        qib_free_lkey(&mr->mr);
 
        qib_put_mr(&mr->mr); /* will set completion if last */
@@ -323,7 +324,7 @@ out:
 
 /*
  * Allocate a memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
+ * IB_WR_REG_MR send work request.
  *
  * Return the memory region on success, otherwise return an errno.
  */
@@ -340,37 +341,38 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
        if (IS_ERR(mr))
                return (struct ib_mr *)mr;
 
+       mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+       if (!mr->pages)
+               goto err;
+
        return &mr->ibmr;
+
+err:
+       qib_dereg_mr(&mr->ibmr);
+       return ERR_PTR(-ENOMEM);
 }
 
-struct ib_fast_reg_page_list *
-qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
+static int qib_set_page(struct ib_mr *ibmr, u64 addr)
 {
-       unsigned size = page_list_len * sizeof(u64);
-       struct ib_fast_reg_page_list *pl;
-
-       if (size > PAGE_SIZE)
-               return ERR_PTR(-EINVAL);
-
-       pl = kzalloc(sizeof(*pl), GFP_KERNEL);
-       if (!pl)
-               return ERR_PTR(-ENOMEM);
+       struct qib_mr *mr = to_imr(ibmr);
 
-       pl->page_list = kzalloc(size, GFP_KERNEL);
-       if (!pl->page_list)
-               goto err_free;
+       if (unlikely(mr->npages == mr->mr.max_segs))
+               return -ENOMEM;
 
-       return pl;
+       mr->pages[mr->npages++] = addr;
 
-err_free:
-       kfree(pl);
-       return ERR_PTR(-ENOMEM);
+       return 0;
 }
 
-void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
+int qib_map_mr_sg(struct ib_mr *ibmr,
+                 struct scatterlist *sg,
+                 int sg_nents)
 {
-       kfree(pl->page_list);
-       kfree(pl);
+       struct qib_mr *mr = to_imr(ibmr);
+
+       mr->npages = 0;
+
+       return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
 }
 
 /**
index 4fa88ba2963e6ba21186ae5eb095ea531b741e97..40f85bb3e0d3bdce5289a5c8c9c2418df33e4ca4 100644 (file)
@@ -436,7 +436,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
                        if (qp->ibqp.qp_type == IB_QPT_UD ||
                            qp->ibqp.qp_type == IB_QPT_SMI ||
                            qp->ibqp.qp_type == IB_QPT_GSI)
-                               atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+                               atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
                        if (++qp->s_last >= qp->s_size)
                                qp->s_last = 0;
                }
index 4544d6f88ad77c7f7c69fd6e6d4a138188f493b3..e6b7556d522108951bf38ebc6750b991eb232285 100644 (file)
@@ -373,10 +373,11 @@ int qib_make_rc_req(struct qib_qp *qp)
                                qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
                                goto bail;
                        }
+
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        hwords += sizeof(struct ib_reth) / sizeof(u32);
                        wqe->lpsn = wqe->psn;
@@ -386,15 +387,15 @@ int qib_make_rc_req(struct qib_qp *qp)
                                len = pmtu;
                                break;
                        }
-                       if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+                       if (wqe->rdma_wr.wr.opcode == IB_WR_RDMA_WRITE)
                                qp->s_state = OP(RDMA_WRITE_ONLY);
                        else {
-                               qp->s_state =
-                                       OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+                               qp->s_state = OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
                                /* Immediate data comes after RETH */
-                               ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+                               ohdr->u.rc.imm_data =
+                                       wqe->rdma_wr.wr.ex.imm_data;
                                hwords += 1;
-                               if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+                               if (wqe->rdma_wr.wr.send_flags & IB_SEND_SOLICITED)
                                        bth0 |= IB_BTH_SOLICITED;
                        }
                        bth2 |= IB_BTH_REQ_ACK;
@@ -424,10 +425,11 @@ int qib_make_rc_req(struct qib_qp *qp)
                                        qp->s_next_psn += (len - 1) / pmtu;
                                wqe->lpsn = qp->s_next_psn++;
                        }
+
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        qp->s_state = OP(RDMA_READ_REQUEST);
                        hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -455,24 +457,24 @@ int qib_make_rc_req(struct qib_qp *qp)
                                        qp->s_lsn++;
                                wqe->lpsn = wqe->psn;
                        }
-                       if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+                       if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
                                qp->s_state = OP(COMPARE_SWAP);
                                ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.swap);
+                                       wqe->atomic_wr.swap);
                                ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.compare_add);
+                                       wqe->atomic_wr.compare_add);
                        } else {
                                qp->s_state = OP(FETCH_ADD);
                                ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.compare_add);
+                                       wqe->atomic_wr.compare_add);
                                ohdr->u.atomic_eth.compare_data = 0;
                        }
                        ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-                               wqe->wr.wr.atomic.remote_addr >> 32);
+                               wqe->atomic_wr.remote_addr >> 32);
                        ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-                               wqe->wr.wr.atomic.remote_addr);
+                               wqe->atomic_wr.remote_addr);
                        ohdr->u.atomic_eth.rkey = cpu_to_be32(
-                               wqe->wr.wr.atomic.rkey);
+                               wqe->atomic_wr.rkey);
                        hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
                        ss = NULL;
                        len = 0;
@@ -597,9 +599,9 @@ int qib_make_rc_req(struct qib_qp *qp)
                 */
                len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
                ohdr->u.rc.reth.vaddr =
-                       cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+                       cpu_to_be64(wqe->rdma_wr.remote_addr + len);
                ohdr->u.rc.reth.rkey =
-                       cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                       cpu_to_be32(wqe->rdma_wr.rkey);
                ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
                qp->s_state = OP(RDMA_READ_REQUEST);
                hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
index 22e356ca8058af1511d8a2a4af4947bd2c0fc892..b1aa21bdd484486b50a1c6dd02d6945bacb8dffa 100644 (file)
@@ -459,8 +459,8 @@ again:
                if (wqe->length == 0)
                        break;
                if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-                                         wqe->wr.wr.rdma.remote_addr,
-                                         wqe->wr.wr.rdma.rkey,
+                                         wqe->rdma_wr.remote_addr,
+                                         wqe->rdma_wr.rkey,
                                          IB_ACCESS_REMOTE_WRITE)))
                        goto acc_err;
                qp->r_sge.sg_list = NULL;
@@ -472,8 +472,8 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
                        goto inv_err;
                if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-                                         wqe->wr.wr.rdma.remote_addr,
-                                         wqe->wr.wr.rdma.rkey,
+                                         wqe->rdma_wr.remote_addr,
+                                         wqe->rdma_wr.rkey,
                                          IB_ACCESS_REMOTE_READ)))
                        goto acc_err;
                release = 0;
@@ -490,18 +490,18 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
                        goto inv_err;
                if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-                                         wqe->wr.wr.atomic.remote_addr,
-                                         wqe->wr.wr.atomic.rkey,
+                                         wqe->atomic_wr.remote_addr,
+                                         wqe->atomic_wr.rkey,
                                          IB_ACCESS_REMOTE_ATOMIC)))
                        goto acc_err;
                /* Perform atomic OP and save result. */
                maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = wqe->wr.wr.atomic.compare_add;
+               sdata = wqe->atomic_wr.compare_add;
                *(u64 *) sqp->s_sge.sge.vaddr =
-                       (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+                       (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
                        (u64) atomic64_add_return(sdata, maddr) - sdata :
                        (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     sdata, wqe->wr.wr.atomic.swap);
+                                     sdata, wqe->atomic_wr.swap);
                qib_put_mr(qp->r_sge.sge.mr);
                qp->r_sge.num_sge = 0;
                goto send_comp;
@@ -785,7 +785,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
        if (qp->ibqp.qp_type == IB_QPT_UD ||
            qp->ibqp.qp_type == IB_QPT_SMI ||
            qp->ibqp.qp_type == IB_QPT_GSI)
-               atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+               atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
 
        /* See ch. 11.2.4.1 and 10.7.3.1 */
        if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
index aa3a8035bb68f257a0400bbedd8655e93584c7cb..06a564589c35d1212065cae4e8c77c4ec3647b1c 100644 (file)
@@ -129,9 +129,9 @@ int qib_make_uc_req(struct qib_qp *qp)
                case IB_WR_RDMA_WRITE:
                case IB_WR_RDMA_WRITE_WITH_IMM:
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        hwords += sizeof(struct ib_reth) / 4;
                        if (len > pmtu) {
index 26243b722b5e979c1324471b6e17871d3ef22540..59193f67ea78780fc4be1dd367814f1634f849ac 100644 (file)
@@ -59,7 +59,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
        u32 length;
        enum ib_qp_type sqptype, dqptype;
 
-       qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+       qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
        if (!qp) {
                ibp->n_pkt_drops++;
                return;
@@ -76,7 +76,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
                goto drop;
        }
 
-       ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+       ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
        ppd = ppd_from_ibp(ibp);
 
        if (qp->ibqp.qp_num > 1) {
@@ -106,8 +106,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
        if (qp->ibqp.qp_num) {
                u32 qkey;
 
-               qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
-                       sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+               qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+                       sqp->qkey : swqe->ud_wr.remote_qkey;
                if (unlikely(qkey != qp->qkey)) {
                        u16 lid;
 
@@ -210,7 +210,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
        wc.qp = &qp->ibqp;
        wc.src_qp = sqp->ibqp.qp_num;
        wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
-               swqe->wr.wr.ud.pkey_index : 0;
+               swqe->ud_wr.pkey_index : 0;
        wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
        wc.sl = ah_attr->sl;
        wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
@@ -277,7 +277,7 @@ int qib_make_ud_req(struct qib_qp *qp)
        /* Construct the header. */
        ibp = to_iport(qp->ibqp.device, qp->port_num);
        ppd = ppd_from_ibp(ibp);
-       ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+       ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
        if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
                if (ah_attr->dlid != QIB_PERMISSIVE_LID)
                        this_cpu_inc(ibp->pmastats->n_multicast_xmit);
@@ -363,7 +363,7 @@ int qib_make_ud_req(struct qib_qp *qp)
        bth0 |= extra_bytes << 20;
        bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
                qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
-                            wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+                            wqe->ud_wr.pkey_index : qp->s_pkey_index);
        ohdr->bth[0] = cpu_to_be32(bth0);
        /*
         * Use the multicast QP if the destination LID is a multicast LID.
@@ -371,14 +371,14 @@ int qib_make_ud_req(struct qib_qp *qp)
        ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
                ah_attr->dlid != QIB_PERMISSIVE_LID ?
                cpu_to_be32(QIB_MULTICAST_QPN) :
-               cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+               cpu_to_be32(wqe->ud_wr.remote_qpn);
        ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
        /*
         * Qkeys with the high order bit set mean use the
         * qkey from the QP context instead of the WR (see 10.2.5).
         */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
-                                        qp->qkey : wqe->wr.wr.ud.remote_qkey);
+       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+                                        qp->qkey : wqe->ud_wr.remote_qkey);
        ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 done:
index 3dcc4985b60ff861d5d7e2b5c9a8a9c780bccf2d..de6cb6fcda8df3d5a1060aceecf20c832ba81ace 100644 (file)
@@ -362,8 +362,8 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
         * undefined operations.
         * Make sure buffer is large enough to hold the result for atomics.
         */
-       if (wr->opcode == IB_WR_FAST_REG_MR) {
-               if (qib_fast_reg_mr(qp, wr))
+       if (wr->opcode == IB_WR_REG_MR) {
+               if (qib_reg_mr(qp, reg_wr(wr)))
                        goto bail_inval;
        } else if (qp->ibqp.qp_type == IB_QPT_UC) {
                if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
@@ -374,7 +374,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
                    wr->opcode != IB_WR_SEND_WITH_IMM)
                        goto bail_inval;
                /* Check UD destination address PD */
-               if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+               if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
                        goto bail_inval;
        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
                goto bail_inval;
@@ -397,7 +397,23 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
        rkt = &to_idev(qp->ibqp.device)->lk_table;
        pd = to_ipd(qp->ibqp.pd);
        wqe = get_swqe_ptr(qp, qp->s_head);
-       wqe->wr = *wr;
+
+       if (qp->ibqp.qp_type != IB_QPT_UC &&
+           qp->ibqp.qp_type != IB_QPT_RC)
+               memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+       else if (wr->opcode == IB_WR_REG_MR)
+               memcpy(&wqe->reg_wr, reg_wr(wr),
+                       sizeof(wqe->reg_wr));
+       else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+                wr->opcode == IB_WR_RDMA_WRITE ||
+                wr->opcode == IB_WR_RDMA_READ)
+               memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+       else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+               memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+       else
+               memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
        wqe->length = 0;
        j = 0;
        if (wr->num_sge) {
@@ -426,7 +442,7 @@ static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
                                  qp->port_num - 1)->ibmtu)
                goto bail_inval_free;
        else
-               atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+               atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
        wqe->ssn = qp->s_ssn++;
        qp->s_head = next;
 
@@ -2244,8 +2260,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
        ibdev->reg_user_mr = qib_reg_user_mr;
        ibdev->dereg_mr = qib_dereg_mr;
        ibdev->alloc_mr = qib_alloc_mr;
-       ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
-       ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
+       ibdev->map_mr_sg = qib_map_mr_sg;
        ibdev->alloc_fmr = qib_alloc_fmr;
        ibdev->map_phys_fmr = qib_map_phys_fmr;
        ibdev->unmap_fmr = qib_unmap_fmr;
index a08df70e85038a220a0dce5a3ccc30f9d165bcd5..2baf5ad251ed24a02682051058e6d1a406419e04 100644 (file)
@@ -330,6 +330,8 @@ struct qib_mr {
        struct ib_mr ibmr;
        struct ib_umem *umem;
        struct qib_mregion mr;  /* must be last */
+       u64 *pages;
+       u32 npages;
 };
 
 /*
@@ -338,7 +340,13 @@ struct qib_mr {
  * in qp->s_max_sge.
  */
 struct qib_swqe {
-       struct ib_send_wr wr;   /* don't use wr.sg_list */
+       union {
+               struct ib_send_wr wr;   /* don't use wr.sg_list */
+               struct ib_ud_wr ud_wr;
+               struct ib_reg_wr reg_wr;
+               struct ib_rdma_wr rdma_wr;
+               struct ib_atomic_wr atomic_wr;
+       };
        u32 psn;                /* first packet sequence number */
        u32 lpsn;               /* last packet sequence number */
        u32 ssn;                /* send sequence number */
@@ -1038,12 +1046,11 @@ struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
                           enum ib_mr_type mr_type,
                           u32 max_entries);
 
-struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
-                               struct ib_device *ibdev, int page_list_len);
-
-void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+int qib_map_mr_sg(struct ib_mr *ibmr,
+                 struct scatterlist *sg,
+                 int sg_nents);
 
-int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
 
 struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
                             struct ib_fmr_attr *fmr_attr);
index 0c15bd885035ee5fe5110e1317cafe09e762fe07..565c881a44ba069f69f64da6b1a215fdc3bf7f19 100644 (file)
@@ -343,16 +343,15 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
        netdev = pci_get_drvdata(dev);
 
        us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
-       if (IS_ERR_OR_NULL(us_ibdev)) {
+       if (!us_ibdev) {
                usnic_err("Device %s context alloc failed\n",
                                netdev_name(pci_get_drvdata(dev)));
-               return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+               return ERR_PTR(-EFAULT);
        }
 
        us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
-       if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
-               usnic_err("Failed to alloc ufdev for %s with err %ld\n",
-                               pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+       if (!us_ibdev->ufdev) {
+               usnic_err("Failed to alloc ufdev for %s\n", pci_name(dev));
                goto err_dealloc;
        }
 
index 85dc3f989ff72aa565c85efb4d07fdc7772d3c93..fcea3a24d3eb310ef0ee3c573a3ef9e161c8e809 100644 (file)
@@ -236,8 +236,8 @@ create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
 
        /* Create Flow Handle */
        qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
-       if (IS_ERR_OR_NULL(qp_flow)) {
-               err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+       if (!qp_flow) {
+               err = -ENOMEM;
                goto out_dealloc_flow;
        }
        qp_flow->flow = flow;
@@ -311,8 +311,8 @@ create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
 
        /* Create qp_flow */
        qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
-       if (IS_ERR_OR_NULL(qp_flow)) {
-               err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+       if (!qp_flow) {
+               err = -ENOMEM;
                goto out_dealloc_flow;
        }
        qp_flow->flow = flow;
index edc5b8565d6d9eb5ba6e22e1baefd21d53986b5f..3ede103097547d4355646d322b5570ebb421d2ec 100644 (file)
@@ -360,7 +360,7 @@ struct ipoib_dev_priv {
        unsigned             tx_head;
        unsigned             tx_tail;
        struct ib_sge        tx_sge[MAX_SKB_FRAGS + 1];
-       struct ib_send_wr    tx_wr;
+       struct ib_ud_wr      tx_wr;
        unsigned             tx_outstanding;
        struct ib_wc         send_wc[MAX_SEND_CQE];
 
@@ -528,7 +528,7 @@ static inline void ipoib_build_sge(struct ipoib_dev_priv *priv,
                priv->tx_sge[i + off].addr = mapping[i + off];
                priv->tx_sge[i + off].length = skb_frag_size(&frags[i]);
        }
-       priv->tx_wr.num_sge          = nr_frags + off;
+       priv->tx_wr.wr.num_sge       = nr_frags + off;
 }
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
index c78dc1638030093298c28e13605c86845ce1fcfc..3ae9726efb9837512a62214705bf5e8e9561a02c 100644 (file)
@@ -700,9 +700,9 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 
        ipoib_build_sge(priv, tx_req);
 
-       priv->tx_wr.wr_id       = wr_id | IPOIB_OP_CM;
+       priv->tx_wr.wr.wr_id    = wr_id | IPOIB_OP_CM;
 
-       return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
+       return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
 }
 
 void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
index d266667ca9b82273dd4b7abb4856f69b29f65174..5ea0c14070d1f2d8af36a05c15206c63f406b97d 100644 (file)
@@ -518,19 +518,19 @@ static inline int post_send(struct ipoib_dev_priv *priv,
 
        ipoib_build_sge(priv, tx_req);
 
-       priv->tx_wr.wr_id            = wr_id;
-       priv->tx_wr.wr.ud.remote_qpn = qpn;
-       priv->tx_wr.wr.ud.ah         = address;
+       priv->tx_wr.wr.wr_id    = wr_id;
+       priv->tx_wr.remote_qpn  = qpn;
+       priv->tx_wr.ah          = address;
 
        if (head) {
-               priv->tx_wr.wr.ud.mss    = skb_shinfo(skb)->gso_size;
-               priv->tx_wr.wr.ud.header = head;
-               priv->tx_wr.wr.ud.hlen   = hlen;
-               priv->tx_wr.opcode       = IB_WR_LSO;
+               priv->tx_wr.mss         = skb_shinfo(skb)->gso_size;
+               priv->tx_wr.header      = head;
+               priv->tx_wr.hlen        = hlen;
+               priv->tx_wr.wr.opcode   = IB_WR_LSO;
        } else
-               priv->tx_wr.opcode       = IB_WR_SEND;
+               priv->tx_wr.wr.opcode   = IB_WR_SEND;
 
-       return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
+       return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
 }
 
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -583,9 +583,9 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
        }
 
        if (skb->ip_summed == CHECKSUM_PARTIAL)
-               priv->tx_wr.send_flags |= IB_SEND_IP_CSUM;
+               priv->tx_wr.wr.send_flags |= IB_SEND_IP_CSUM;
        else
-               priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+               priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
 
        if (++priv->tx_outstanding == ipoib_sendq_size) {
                ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
index babba05d7a0eb707f472d7de3cb06843a0844eff..7d3281866ffcd520d4bca543a9ad0544f2ec159f 100644 (file)
@@ -461,7 +461,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
                netdev_update_features(dev);
                dev_set_mtu(dev, ipoib_cm_max_mtu(dev));
                rtnl_unlock();
-               priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+               priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
 
                ipoib_flush_paths(dev);
                rtnl_lock();
@@ -1860,7 +1860,7 @@ static struct net_device *ipoib_add_port(const char *format,
        priv->dev->broadcast[8] = priv->pkey >> 8;
        priv->dev->broadcast[9] = priv->pkey & 0xff;
 
-       result = ib_query_gid(hca, port, 0, &priv->local_gid);
+       result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
        if (result) {
                printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
                       hca->name, port, result);
index d750a86042f3d8da0736c23a52f41737b329c37d..f357ca67a41cd859b2ff5e704621f72813b91fa8 100644 (file)
@@ -245,7 +245,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 
                priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
                spin_unlock_irq(&priv->lock);
-               priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
+               priv->tx_wr.remote_qkey = priv->qkey;
                set_qkey = 1;
        }
 
@@ -561,7 +561,7 @@ void ipoib_mcast_join_task(struct work_struct *work)
        }
        priv->local_lid = port_attr.lid;
 
-       if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
+       if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid, NULL))
                ipoib_warn(priv, "ib_query_gid() failed\n");
        else
                memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
index 78845b6e8b812737477ce68dcbc6c1712477d23d..d48c5bae78774663c17e72ed1e4c87475e1005ad 100644 (file)
@@ -221,9 +221,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
                priv->tx_sge[i].lkey = priv->pd->local_dma_lkey;
 
-       priv->tx_wr.opcode      = IB_WR_SEND;
-       priv->tx_wr.sg_list     = priv->tx_sge;
-       priv->tx_wr.send_flags  = IB_SEND_SIGNALED;
+       priv->tx_wr.wr.opcode           = IB_WR_SEND;
+       priv->tx_wr.wr.sg_list          = priv->tx_sge;
+       priv->tx_wr.wr.send_flags       = IB_SEND_SIGNALED;
 
        priv->rx_sge[0].lkey = priv->pd->local_dma_lkey;
 
index f58ff96b6cbb9778153b4ab79794f8a206fa9343..9080161e01af1614afa5796a134d7fd890a1be45 100644 (file)
@@ -111,7 +111,7 @@ module_param_named(pi_guard, iser_pi_guard, int, S_IRUGO);
 MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
 
 /*
- * iscsi_iser_recv() - Process a successfull recv completion
+ * iscsi_iser_recv() - Process a successful recv completion
  * @conn:         iscsi connection
  * @hdr:          iscsi header
  * @rx_data:      buffer containing receive data payload
@@ -126,7 +126,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 {
        int rc = 0;
        int datalen;
-       int ahslen;
 
        /* verify PDU length */
        datalen = ntoh24(hdr->dlength);
@@ -141,9 +140,6 @@ iscsi_iser_recv(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                iser_dbg("aligned datalen (%d) hdr, %d (IB)\n",
                        datalen, rx_data_len);
 
-       /* read AHS */
-       ahslen = hdr->hlength * 4;
-
        rc = iscsi_complete_pdu(conn, hdr, rx_data, rx_data_len);
        if (rc && rc != ISCSI_ERR_NO_SCSI_CMD)
                goto error;
@@ -766,9 +762,7 @@ iscsi_iser_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *s
        stats->r2t_pdus = conn->r2t_pdus_cnt; /* always 0 */
        stats->tmfcmd_pdus = conn->tmfcmd_pdus_cnt;
        stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
-       stats->custom_length = 1;
-       strcpy(stats->custom[0].desc, "fmr_unalign_cnt");
-       stats->custom[0].value = conn->fmr_unalign_cnt;
+       stats->custom_length = 0;
 }
 
 static int iscsi_iser_get_ep_param(struct iscsi_endpoint *ep,
@@ -973,6 +967,13 @@ static umode_t iser_attr_is_visible(int param_type, int param)
        return 0;
 }
 
+static int iscsi_iser_slave_alloc(struct scsi_device *sdev)
+{
+       blk_queue_virt_boundary(sdev->request_queue, ~MASK_4K);
+
+       return 0;
+}
+
 static struct scsi_host_template iscsi_iser_sht = {
        .module                 = THIS_MODULE,
        .name                   = "iSCSI Initiator over iSER",
@@ -985,7 +986,8 @@ static struct scsi_host_template iscsi_iser_sht = {
        .eh_device_reset_handler= iscsi_eh_device_reset,
        .eh_target_reset_handler = iscsi_eh_recover_target,
        .target_alloc           = iscsi_target_alloc,
-       .use_clustering         = DISABLE_CLUSTERING,
+       .use_clustering         = ENABLE_CLUSTERING,
+       .slave_alloc            = iscsi_iser_slave_alloc,
        .proc_name              = "iscsi_iser",
        .this_id                = -1,
        .track_queue_depth      = 1,
index a5edd6ede692c7be3d1c6da2f355b062cea9e43e..8a5998e6a407997906e45864ac9799fdac468084 100644 (file)
@@ -227,18 +227,13 @@ enum iser_data_dir {
  * @size:         num entries of this sg
  * @data_len:     total beffer byte len
  * @dma_nents:    returned by dma_map_sg
- * @orig_sg:      pointer to the original sg list (in case
- *                we used a copy)
- * @orig_size:    num entris of orig sg list
  */
 struct iser_data_buf {
        struct scatterlist *sg;
-       unsigned int       size;
+       int                size;
        unsigned long      data_len;
        unsigned int       dma_nents;
-       struct scatterlist *orig_sg;
-       unsigned int       orig_size;
-  };
+};
 
 /* fwd declarations */
 struct iser_device;
@@ -300,7 +295,11 @@ struct iser_tx_desc {
        int                          num_sge;
        bool                         mapped;
        u8                           wr_idx;
-       struct ib_send_wr            wrs[ISER_MAX_WRS];
+       union iser_wr {
+               struct ib_send_wr               send;
+               struct ib_reg_wr                fast_reg;
+               struct ib_sig_handover_wr       sig;
+       } wrs[ISER_MAX_WRS];
        struct iser_mem_reg          data_reg;
        struct iser_mem_reg          prot_reg;
        struct ib_sig_attrs          sig_attrs;
@@ -413,7 +412,6 @@ struct iser_device {
  *
  * @mr:         memory region
  * @fmr_pool:   pool of fmrs
- * @frpl:       fast reg page list used by frwrs
  * @page_vec:   fast reg page list used by fmr pool
  * @mr_valid:   is mr valid indicator
  */
@@ -422,10 +420,7 @@ struct iser_reg_resources {
                struct ib_mr             *mr;
                struct ib_fmr_pool       *fmr_pool;
        };
-       union {
-               struct ib_fast_reg_page_list     *frpl;
-               struct iser_page_vec             *page_vec;
-       };
+       struct iser_page_vec             *page_vec;
        u8                                mr_valid:1;
 };
 
@@ -712,11 +707,11 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
 static inline struct ib_send_wr *
 iser_tx_next_wr(struct iser_tx_desc *tx_desc)
 {
-       struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx];
+       struct ib_send_wr *cur_wr = &tx_desc->wrs[tx_desc->wr_idx].send;
        struct ib_send_wr *last_wr;
 
        if (tx_desc->wr_idx) {
-               last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1];
+               last_wr = &tx_desc->wrs[tx_desc->wr_idx - 1].send;
                last_wr->next = cur_wr;
        }
        tx_desc->wr_idx++;
index d511879d8cdfc862765f871b0876b43307e3b7c7..ffd00c42072959ed6747b5094e159cb34532675f 100644 (file)
@@ -661,48 +661,14 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
 
 void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
 {
-       int is_rdma_data_aligned = 1;
-       int is_rdma_prot_aligned = 1;
        int prot_count = scsi_prot_sg_count(iser_task->sc);
 
-       /* if we were reading, copy back to unaligned sglist,
-        * anyway dma_unmap and free the copy
-        */
-       if (iser_task->data[ISER_DIR_IN].orig_sg) {
-               is_rdma_data_aligned = 0;
-               iser_finalize_rdma_unaligned_sg(iser_task,
-                                               &iser_task->data[ISER_DIR_IN],
-                                               ISER_DIR_IN);
-       }
-
-       if (iser_task->data[ISER_DIR_OUT].orig_sg) {
-               is_rdma_data_aligned = 0;
-               iser_finalize_rdma_unaligned_sg(iser_task,
-                                               &iser_task->data[ISER_DIR_OUT],
-                                               ISER_DIR_OUT);
-       }
-
-       if (iser_task->prot[ISER_DIR_IN].orig_sg) {
-               is_rdma_prot_aligned = 0;
-               iser_finalize_rdma_unaligned_sg(iser_task,
-                                               &iser_task->prot[ISER_DIR_IN],
-                                               ISER_DIR_IN);
-       }
-
-       if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
-               is_rdma_prot_aligned = 0;
-               iser_finalize_rdma_unaligned_sg(iser_task,
-                                               &iser_task->prot[ISER_DIR_OUT],
-                                               ISER_DIR_OUT);
-       }
-
        if (iser_task->dir[ISER_DIR_IN]) {
                iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
-               if (is_rdma_data_aligned)
-                       iser_dma_unmap_task_data(iser_task,
-                                                &iser_task->data[ISER_DIR_IN],
-                                                DMA_FROM_DEVICE);
-               if (prot_count && is_rdma_prot_aligned)
+               iser_dma_unmap_task_data(iser_task,
+                                        &iser_task->data[ISER_DIR_IN],
+                                        DMA_FROM_DEVICE);
+               if (prot_count)
                        iser_dma_unmap_task_data(iser_task,
                                                 &iser_task->prot[ISER_DIR_IN],
                                                 DMA_FROM_DEVICE);
@@ -710,11 +676,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
 
        if (iser_task->dir[ISER_DIR_OUT]) {
                iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
-               if (is_rdma_data_aligned)
-                       iser_dma_unmap_task_data(iser_task,
-                                                &iser_task->data[ISER_DIR_OUT],
-                                                DMA_TO_DEVICE);
-               if (prot_count && is_rdma_prot_aligned)
+               iser_dma_unmap_task_data(iser_task,
+                                        &iser_task->data[ISER_DIR_OUT],
+                                        DMA_TO_DEVICE);
+               if (prot_count)
                        iser_dma_unmap_task_data(iser_task,
                                                 &iser_task->prot[ISER_DIR_OUT],
                                                 DMA_TO_DEVICE);
index 4c46d67d37a13100b60c6daa0a0b01b8f6855608..ea765fb9664d36759ff1a90d11272c1bc13ca457 100644 (file)
@@ -88,113 +88,6 @@ int iser_assign_reg_ops(struct iser_device *device)
        return 0;
 }
 
-static void
-iser_free_bounce_sg(struct iser_data_buf *data)
-{
-       struct scatterlist *sg;
-       int count;
-
-       for_each_sg(data->sg, sg, data->size, count)
-               __free_page(sg_page(sg));
-
-       kfree(data->sg);
-
-       data->sg = data->orig_sg;
-       data->size = data->orig_size;
-       data->orig_sg = NULL;
-       data->orig_size = 0;
-}
-
-static int
-iser_alloc_bounce_sg(struct iser_data_buf *data)
-{
-       struct scatterlist *sg;
-       struct page *page;
-       unsigned long length = data->data_len;
-       int i = 0, nents = DIV_ROUND_UP(length, PAGE_SIZE);
-
-       sg = kcalloc(nents, sizeof(*sg), GFP_ATOMIC);
-       if (!sg)
-               goto err;
-
-       sg_init_table(sg, nents);
-       while (length) {
-               u32 page_len = min_t(u32, length, PAGE_SIZE);
-
-               page = alloc_page(GFP_ATOMIC);
-               if (!page)
-                       goto err;
-
-               sg_set_page(&sg[i], page, page_len, 0);
-               length -= page_len;
-               i++;
-       }
-
-       data->orig_sg = data->sg;
-       data->orig_size = data->size;
-       data->sg = sg;
-       data->size = nents;
-
-       return 0;
-
-err:
-       for (; i > 0; i--)
-               __free_page(sg_page(&sg[i - 1]));
-       kfree(sg);
-
-       return -ENOMEM;
-}
-
-static void
-iser_copy_bounce(struct iser_data_buf *data, bool to_buffer)
-{
-       struct scatterlist *osg, *bsg = data->sg;
-       void *oaddr, *baddr;
-       unsigned int left = data->data_len;
-       unsigned int bsg_off = 0;
-       int i;
-
-       for_each_sg(data->orig_sg, osg, data->orig_size, i) {
-               unsigned int copy_len, osg_off = 0;
-
-               oaddr = kmap_atomic(sg_page(osg)) + osg->offset;
-               copy_len = min(left, osg->length);
-               while (copy_len) {
-                       unsigned int len = min(copy_len, bsg->length - bsg_off);
-
-                       baddr = kmap_atomic(sg_page(bsg)) + bsg->offset;
-                       if (to_buffer)
-                               memcpy(baddr + bsg_off, oaddr + osg_off, len);
-                       else
-                               memcpy(oaddr + osg_off, baddr + bsg_off, len);
-
-                       kunmap_atomic(baddr - bsg->offset);
-                       osg_off += len;
-                       bsg_off += len;
-                       copy_len -= len;
-
-                       if (bsg_off >= bsg->length) {
-                               bsg = sg_next(bsg);
-                               bsg_off = 0;
-                       }
-               }
-               kunmap_atomic(oaddr - osg->offset);
-               left -= osg_off;
-       }
-}
-
-static inline void
-iser_copy_from_bounce(struct iser_data_buf *data)
-{
-       iser_copy_bounce(data, false);
-}
-
-static inline void
-iser_copy_to_bounce(struct iser_data_buf *data)
-{
-       iser_copy_bounce(data, true);
-}
-
 struct iser_fr_desc *
 iser_reg_desc_get_fr(struct ib_conn *ib_conn)
 {
@@ -238,62 +131,6 @@ iser_reg_desc_put_fmr(struct ib_conn *ib_conn,
 {
 }
 
-/**
- * iser_start_rdma_unaligned_sg
- */
-static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
-                                       struct iser_data_buf *data,
-                                       enum iser_data_dir cmd_dir)
-{
-       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
-       int rc;
-
-       rc = iser_alloc_bounce_sg(data);
-       if (rc) {
-               iser_err("Failed to allocate bounce for data len %lu\n",
-                        data->data_len);
-               return rc;
-       }
-
-       if (cmd_dir == ISER_DIR_OUT)
-               iser_copy_to_bounce(data);
-
-       data->dma_nents = ib_dma_map_sg(dev, data->sg, data->size,
-                                       (cmd_dir == ISER_DIR_OUT) ?
-                                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
-       if (!data->dma_nents) {
-               iser_err("Got dma_nents %d, something went wrong...\n",
-                        data->dma_nents);
-               rc = -ENOMEM;
-               goto err;
-       }
-
-       return 0;
-err:
-       iser_free_bounce_sg(data);
-       return rc;
-}
-
-/**
- * iser_finalize_rdma_unaligned_sg
- */
-
-void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
-                                    struct iser_data_buf *data,
-                                    enum iser_data_dir cmd_dir)
-{
-       struct ib_device *dev = iser_task->iser_conn->ib_conn.device->ib_device;
-
-       ib_dma_unmap_sg(dev, data->sg, data->size,
-                       (cmd_dir == ISER_DIR_OUT) ?
-                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-       if (cmd_dir == ISER_DIR_IN)
-               iser_copy_from_bounce(data);
-
-       iser_free_bounce_sg(data);
-}
-
 #define IS_4K_ALIGNED(addr)    ((((unsigned long)addr) & ~MASK_4K) == 0)
 
 /**
@@ -355,64 +192,6 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
        return cur_page;
 }
 
-
-/**
- * iser_data_buf_aligned_len - Tries to determine the maximal correctly aligned
- * for RDMA sub-list of a scatter-gather list of memory buffers, and  returns
- * the number of entries which are aligned correctly. Supports the case where
- * consecutive SG elements are actually fragments of the same physcial page.
- */
-static int iser_data_buf_aligned_len(struct iser_data_buf *data,
-                                    struct ib_device *ibdev,
-                                    unsigned sg_tablesize)
-{
-       struct scatterlist *sg, *sgl, *next_sg = NULL;
-       u64 start_addr, end_addr;
-       int i, ret_len, start_check = 0;
-
-       if (data->dma_nents == 1)
-               return 1;
-
-       sgl = data->sg;
-       start_addr  = ib_sg_dma_address(ibdev, sgl);
-
-       if (unlikely(sgl[0].offset &&
-                    data->data_len >= sg_tablesize * PAGE_SIZE)) {
-               iser_dbg("can't register length %lx with offset %x "
-                        "fall to bounce buffer\n", data->data_len,
-                        sgl[0].offset);
-               return 0;
-       }
-
-       for_each_sg(sgl, sg, data->dma_nents, i) {
-               if (start_check && !IS_4K_ALIGNED(start_addr))
-                       break;
-
-               next_sg = sg_next(sg);
-               if (!next_sg)
-                       break;
-
-               end_addr    = start_addr + ib_sg_dma_len(ibdev, sg);
-               start_addr  = ib_sg_dma_address(ibdev, next_sg);
-
-               if (end_addr == start_addr) {
-                       start_check = 0;
-                       continue;
-               } else
-                       start_check = 1;
-
-               if (!IS_4K_ALIGNED(end_addr))
-                       break;
-       }
-       ret_len = (next_sg) ? i : i+1;
-
-       if (unlikely(ret_len != data->dma_nents))
-               iser_warn("rdma alignment violation (%d/%d aligned)\n",
-                         ret_len, data->dma_nents);
-
-       return ret_len;
-}
-
 static void iser_data_buf_dump(struct iser_data_buf *data,
                               struct ib_device *ibdev)
 {
@@ -483,31 +262,6 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem,
        return 0;
 }
 
-static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
-                             struct iser_data_buf *mem,
-                             enum iser_data_dir cmd_dir)
-{
-       struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
-       struct iser_device *device = iser_task->iser_conn->ib_conn.device;
-
-       iscsi_conn->fmr_unalign_cnt++;
-
-       if (iser_debug_level > 0)
-               iser_data_buf_dump(mem, device->ib_device);
-
-       /* unmap the command data before accessing it */
-       iser_dma_unmap_task_data(iser_task, mem,
-                                (cmd_dir == ISER_DIR_OUT) ?
-                                DMA_TO_DEVICE : DMA_FROM_DEVICE);
-
-       /* allocate copy buf, if we are writing, copy the */
-       /* unaligned scatterlist, dma map the copy        */
-       if (iser_start_rdma_unaligned_sg(iser_task, mem, cmd_dir) != 0)
-               return -ENOMEM;
-
-       return 0;
-}
-
 /**
  * iser_reg_page_vec - Register physical memory
  *
@@ -683,7 +437,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 {
        struct iser_tx_desc *tx_desc = &iser_task->desc;
        struct ib_sig_attrs *sig_attrs = &tx_desc->sig_attrs;
-       struct ib_send_wr *wr;
+       struct ib_sig_handover_wr *wr;
        int ret;
 
        memset(sig_attrs, 0, sizeof(*sig_attrs));
@@ -693,26 +447,24 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
 
        iser_set_prot_checks(iser_task->sc, &sig_attrs->check_mask);
 
-       if (!pi_ctx->sig_mr_valid) {
-               wr = iser_tx_next_wr(tx_desc);
-               iser_inv_rkey(wr, pi_ctx->sig_mr);
-       }
-
-       wr = iser_tx_next_wr(tx_desc);
-       wr->opcode = IB_WR_REG_SIG_MR;
-       wr->wr_id = ISER_FASTREG_LI_WRID;
-       wr->sg_list = &data_reg->sge;
-       wr->num_sge = 1;
-       wr->send_flags = 0;
-       wr->wr.sig_handover.sig_attrs = sig_attrs;
-       wr->wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+       if (!pi_ctx->sig_mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), pi_ctx->sig_mr);
+
+       wr = sig_handover_wr(iser_tx_next_wr(tx_desc));
+       wr->wr.opcode = IB_WR_REG_SIG_MR;
+       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.sg_list = &data_reg->sge;
+       wr->wr.num_sge = 1;
+       wr->wr.send_flags = 0;
+       wr->sig_attrs = sig_attrs;
+       wr->sig_mr = pi_ctx->sig_mr;
        if (scsi_prot_sg_count(iser_task->sc))
-               wr->wr.sig_handover.prot = &prot_reg->sge;
+               wr->prot = &prot_reg->sge;
        else
-               wr->wr.sig_handover.prot = NULL;
-       wr->wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE |
-                                          IB_ACCESS_REMOTE_READ |
-                                          IB_ACCESS_REMOTE_WRITE;
+               wr->prot = NULL;
+       wr->access_flags = IB_ACCESS_LOCAL_WRITE |
+                          IB_ACCESS_REMOTE_READ |
+                          IB_ACCESS_REMOTE_WRITE;
        pi_ctx->sig_mr_valid = 0;
 
        sig_reg->sge.lkey = pi_ctx->sig_mr->lkey;
@@ -720,7 +472,7 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task,
        sig_reg->sge.addr = 0;
        sig_reg->sge.length = scsi_transfer_length(iser_task->sc);
 
-       iser_dbg("sig reg: lkey: 0x%x, rkey: 0x%x, addr: 0x%llx, length: %u\n",
+       iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=%u\n",
                 sig_reg->sge.lkey, sig_reg->rkey, sig_reg->sge.addr,
                 sig_reg->sge.length);
 err:
@@ -732,69 +484,41 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task,
                            struct iser_reg_resources *rsc,
                            struct iser_mem_reg *reg)
 {
-       struct ib_conn *ib_conn = &iser_task->iser_conn->ib_conn;
-       struct iser_device *device = ib_conn->device;
-       struct ib_mr *mr = rsc->mr;
-       struct ib_fast_reg_page_list *frpl = rsc->frpl;
        struct iser_tx_desc *tx_desc = &iser_task->desc;
-       struct ib_send_wr *wr;
-       int offset, size, plen;
+       struct ib_mr *mr = rsc->mr;
+       struct ib_reg_wr *wr;
+       int n;
 
-       plen = iser_sg_to_page_vec(mem, device->ib_device, frpl->page_list,
-                                  &offset, &size);
-       if (plen * SIZE_4K < size) {
-               iser_err("fast reg page_list too short to hold this SG\n");
-               return -EINVAL;
-       }
+       if (!rsc->mr_valid)
+               iser_inv_rkey(iser_tx_next_wr(tx_desc), mr);
 
-       if (!rsc->mr_valid) {
-               wr = iser_tx_next_wr(tx_desc);
-               iser_inv_rkey(wr, mr);
+       n = ib_map_mr_sg(mr, mem->sg, mem->size, SIZE_4K);
+       if (unlikely(n != mem->size)) {
+               iser_err("failed to map sg (%d/%d)\n",
+                        n, mem->size);
+               return n < 0 ? n : -EINVAL;
        }
 
-       wr = iser_tx_next_wr(tx_desc);
-       wr->opcode = IB_WR_FAST_REG_MR;
-       wr->wr_id = ISER_FASTREG_LI_WRID;
-       wr->send_flags = 0;
-       wr->wr.fast_reg.iova_start = frpl->page_list[0] + offset;
-       wr->wr.fast_reg.page_list = frpl;
-       wr->wr.fast_reg.page_list_len = plen;
-       wr->wr.fast_reg.page_shift = SHIFT_4K;
-       wr->wr.fast_reg.length = size;
-       wr->wr.fast_reg.rkey = mr->rkey;
-       wr->wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE  |
-                                       IB_ACCESS_REMOTE_WRITE |
-                                       IB_ACCESS_REMOTE_READ);
+       wr = reg_wr(iser_tx_next_wr(tx_desc));
+       wr->wr.opcode = IB_WR_REG_MR;
+       wr->wr.wr_id = ISER_FASTREG_LI_WRID;
+       wr->wr.send_flags = 0;
+       wr->wr.num_sge = 0;
+       wr->mr = mr;
+       wr->key = mr->rkey;
+       wr->access = IB_ACCESS_LOCAL_WRITE  |
+                    IB_ACCESS_REMOTE_WRITE |
+                    IB_ACCESS_REMOTE_READ;
+
        rsc->mr_valid = 0;
 
        reg->sge.lkey = mr->lkey;
        reg->rkey = mr->rkey;
-       reg->sge.addr = frpl->page_list[0] + offset;
-       reg->sge.length = size;
+       reg->sge.addr = mr->iova;
+       reg->sge.length = mr->length;
 
-       iser_dbg("fast reg: lkey=0x%x, rkey=0x%x, addr=0x%llx,"
-                " length=0x%x\n", reg->sge.lkey, reg->rkey,
-                reg->sge.addr, reg->sge.length);
-
-       return 0;
-}
-
-static int
-iser_handle_unaligned_buf(struct iscsi_iser_task *task,
-                         struct iser_data_buf *mem,
-                         enum iser_data_dir dir)
-{
-       struct iser_conn *iser_conn = task->iser_conn;
-       struct iser_device *device = iser_conn->ib_conn.device;
-       int err, aligned_len;
-
-       aligned_len = iser_data_buf_aligned_len(mem, device->ib_device,
-                                               iser_conn->scsi_sg_tablesize);
-       if (aligned_len != mem->dma_nents) {
-               err = fall_to_bounce_buf(task, mem, dir);
-               if (err)
-                       return err;
-       }
+       iser_dbg("lkey=0x%x rkey=0x%x addr=0x%llx length=0x%x\n",
+                reg->sge.lkey, reg->rkey, reg->sge.addr, reg->sge.length);
 
        return 0;
 }
@@ -841,10 +565,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
        bool use_dma_key;
        int err;
 
-       err = iser_handle_unaligned_buf(task, mem, dir);
-       if (unlikely(err))
-               return err;
-
        use_dma_key = (mem->dma_nents == 1 && !iser_always_reg &&
                       scsi_get_prot_op(task->sc) == SCSI_PROT_NORMAL);
 
@@ -867,10 +587,6 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *task,
 
                if (scsi_prot_sg_count(task->sc)) {
                        mem = &task->prot[dir];
-                       err = iser_handle_unaligned_buf(task, mem, dir);
-                       if (unlikely(err))
-                               goto err_reg;
-
                        err = iser_reg_prot_sg(task, mem, desc,
                                               use_dma_key, prot_reg);
                        if (unlikely(err))
index 85132d867bc86fcfcd99b7065e9f746301422de1..a93070210109699909075e02212fc6ae70710960 100644 (file)
@@ -293,35 +293,21 @@ iser_alloc_reg_res(struct ib_device *ib_device,
 {
        int ret;
 
-       res->frpl = ib_alloc_fast_reg_page_list(ib_device, size);
-       if (IS_ERR(res->frpl)) {
-               ret = PTR_ERR(res->frpl);
-               iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
-                        ret);
-               return PTR_ERR(res->frpl);
-       }
-
        res->mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, size);
        if (IS_ERR(res->mr)) {
                ret = PTR_ERR(res->mr);
                iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
-               goto fast_reg_mr_failure;
+               return ret;
        }
        res->mr_valid = 1;
 
        return 0;
-
-fast_reg_mr_failure:
-       ib_free_fast_reg_page_list(res->frpl);
-
-       return ret;
 }
 
 static void
 iser_free_reg_res(struct iser_reg_resources *rsc)
 {
        ib_dereg_mr(rsc->mr);
-       ib_free_fast_reg_page_list(rsc->frpl);
 }
 
 static int
@@ -1017,7 +1003,7 @@ int iser_connect(struct iser_conn   *iser_conn,
        ib_conn->beacon.wr_id = ISER_BEACON_WRID;
        ib_conn->beacon.opcode = IB_WR_SEND;
 
-       ib_conn->cma_id = rdma_create_id(iser_cma_handler,
+       ib_conn->cma_id = rdma_create_id(&init_net, iser_cma_handler,
                                         (void *)iser_conn,
                                         RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(ib_conn->cma_id)) {
@@ -1135,7 +1121,7 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
        wr->opcode = IB_WR_SEND;
        wr->send_flags = signal ? IB_SEND_SIGNALED : 0;
 
-       ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0], &bad_wr);
+       ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr);
        if (ib_ret)
                iser_err("ib_post_send failed, ret:%d opcode:%d\n",
                         ib_ret, bad_wr->opcode);
index aa59037d75040b7d6e1126bb620339c1c603bc89..dfbbbb28090b2301c7742fd28b145b62054a76af 100644 (file)
@@ -473,10 +473,8 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn)
        list_for_each_entry_safe(fr_desc, tmp,
                                 &isert_conn->fr_pool, list) {
                list_del(&fr_desc->list);
-               ib_free_fast_reg_page_list(fr_desc->data_frpl);
                ib_dereg_mr(fr_desc->data_mr);
                if (fr_desc->pi_ctx) {
-                       ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
                        ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
                        ib_dereg_mr(fr_desc->pi_ctx->sig_mr);
                        kfree(fr_desc->pi_ctx);
@@ -504,22 +502,13 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
                return -ENOMEM;
        }
 
-       pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
-                                           ISCSI_ISER_SG_TABLESIZE);
-       if (IS_ERR(pi_ctx->prot_frpl)) {
-               isert_err("Failed to allocate prot frpl err=%ld\n",
-                         PTR_ERR(pi_ctx->prot_frpl));
-               ret = PTR_ERR(pi_ctx->prot_frpl);
-               goto err_pi_ctx;
-       }
-
        pi_ctx->prot_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
                                      ISCSI_ISER_SG_TABLESIZE);
        if (IS_ERR(pi_ctx->prot_mr)) {
                isert_err("Failed to allocate prot frmr err=%ld\n",
                          PTR_ERR(pi_ctx->prot_mr));
                ret = PTR_ERR(pi_ctx->prot_mr);
-               goto err_prot_frpl;
+               goto err_pi_ctx;
        }
        desc->ind |= ISERT_PROT_KEY_VALID;
 
@@ -539,8 +528,6 @@ isert_create_pi_ctx(struct fast_reg_descriptor *desc,
 
 err_prot_mr:
        ib_dereg_mr(pi_ctx->prot_mr);
-err_prot_frpl:
-       ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
 err_pi_ctx:
        kfree(pi_ctx);
 
@@ -551,34 +538,18 @@ static int
 isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
                     struct fast_reg_descriptor *fr_desc)
 {
-       int ret;
-
-       fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
-                                                        ISCSI_ISER_SG_TABLESIZE);
-       if (IS_ERR(fr_desc->data_frpl)) {
-               isert_err("Failed to allocate data frpl err=%ld\n",
-                         PTR_ERR(fr_desc->data_frpl));
-               return PTR_ERR(fr_desc->data_frpl);
-       }
-
        fr_desc->data_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
                                       ISCSI_ISER_SG_TABLESIZE);
        if (IS_ERR(fr_desc->data_mr)) {
                isert_err("Failed to allocate data frmr err=%ld\n",
                          PTR_ERR(fr_desc->data_mr));
-               ret = PTR_ERR(fr_desc->data_mr);
-               goto err_data_frpl;
+               return PTR_ERR(fr_desc->data_mr);
        }
        fr_desc->ind |= ISERT_DATA_KEY_VALID;
 
        isert_dbg("Created fr_desc %p\n", fr_desc);
 
        return 0;
-
-err_data_frpl:
-       ib_free_fast_reg_page_list(fr_desc->data_frpl);
-
-       return ret;
 }
 
 static int
@@ -1579,7 +1550,6 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
        struct iser_hdr *iser_hdr = &rx_desc->iser_header;
        uint64_t read_va = 0, write_va = 0;
        uint32_t read_stag = 0, write_stag = 0;
-       int rc;
 
        switch (iser_hdr->flags & 0xF0) {
        case ISCSI_CTRL:
@@ -1606,8 +1576,8 @@ isert_rx_do_work(struct iser_rx_desc *rx_desc, struct isert_conn *isert_conn)
                break;
        }
 
-       rc = isert_rx_opcode(isert_conn, rx_desc,
-                            read_stag, read_va, write_stag, write_va);
+       isert_rx_opcode(isert_conn, rx_desc,
+                       read_stag, read_va, write_stag, write_va);
 }
 
 static void
@@ -1716,10 +1686,10 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
                isert_unmap_data_buf(isert_conn, &wr->data);
        }
 
-       if (wr->send_wr) {
+       if (wr->rdma_wr) {
                isert_dbg("Cmd %p free send_wr\n", isert_cmd);
-               kfree(wr->send_wr);
-               wr->send_wr = NULL;
+               kfree(wr->rdma_wr);
+               wr->rdma_wr = NULL;
        }
 
        if (wr->ib_sge) {
@@ -1754,7 +1724,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
        }
 
        wr->ib_sge = NULL;
-       wr->send_wr = NULL;
+       wr->rdma_wr = NULL;
 }
 
 static void
@@ -1923,7 +1893,7 @@ isert_completion_rdma_write(struct iser_tx_desc *tx_desc,
        }
 
        device->unreg_rdma_mem(isert_cmd, isert_conn);
-       wr->send_wr_num = 0;
+       wr->rdma_wr_num = 0;
        if (ret)
                transport_send_check_condition_and_sense(se_cmd,
                                                         se_cmd->pi_err, 0);
@@ -1951,7 +1921,7 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
        iscsit_stop_dataout_timer(cmd);
        device->unreg_rdma_mem(isert_cmd, isert_conn);
        cmd->write_data_done = wr->data.len;
-       wr->send_wr_num = 0;
+       wr->rdma_wr_num = 0;
 
        isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
        spin_lock_bh(&cmd->istate_lock);
@@ -2403,7 +2373,7 @@ isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 
 static int
 isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
-                   struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
+                   struct ib_sge *ib_sge, struct ib_rdma_wr *rdma_wr,
                    u32 data_left, u32 offset)
 {
        struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
@@ -2418,8 +2388,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
        sg_nents = min(cmd->se_cmd.t_data_nents - sg_off, isert_conn->max_sge);
        page_off = offset % PAGE_SIZE;
 
-       send_wr->sg_list = ib_sge;
-       send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
+       rdma_wr->wr.sg_list = ib_sge;
+       rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
        /*
         * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
         */
@@ -2444,11 +2414,11 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
        }
 
-       send_wr->num_sge = ++i;
+       rdma_wr->wr.num_sge = ++i;
        isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
-                 send_wr->sg_list, send_wr->num_sge);
+                 rdma_wr->wr.sg_list, rdma_wr->wr.num_sge);
 
-       return send_wr->num_sge;
+       return rdma_wr->wr.num_sge;
 }
 
 static int
@@ -2459,7 +2429,7 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = conn->context;
        struct isert_data_buf *data = &wr->data;
-       struct ib_send_wr *send_wr;
+       struct ib_rdma_wr *rdma_wr;
        struct ib_sge *ib_sge;
        u32 offset, data_len, data_left, rdma_write_max, va_offset = 0;
        int ret = 0, i, ib_sge_cnt;
@@ -2484,11 +2454,11 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
        }
        wr->ib_sge = ib_sge;
 
-       wr->send_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
-       wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
+       wr->rdma_wr_num = DIV_ROUND_UP(data->nents, isert_conn->max_sge);
+       wr->rdma_wr = kzalloc(sizeof(struct ib_rdma_wr) * wr->rdma_wr_num,
                                GFP_KERNEL);
-       if (!wr->send_wr) {
-               isert_dbg("Unable to allocate wr->send_wr\n");
+       if (!wr->rdma_wr) {
+               isert_dbg("Unable to allocate wr->rdma_wr\n");
                ret = -ENOMEM;
                goto unmap_cmd;
        }
@@ -2496,31 +2466,31 @@ isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
        wr->isert_cmd = isert_cmd;
        rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
 
-       for (i = 0; i < wr->send_wr_num; i++) {
-               send_wr = &isert_cmd->rdma_wr.send_wr[i];
+       for (i = 0; i < wr->rdma_wr_num; i++) {
+               rdma_wr = &isert_cmd->rdma_wr.rdma_wr[i];
                data_len = min(data_left, rdma_write_max);
 
-               send_wr->send_flags = 0;
+               rdma_wr->wr.send_flags = 0;
                if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
-                       send_wr->opcode = IB_WR_RDMA_WRITE;
-                       send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset;
-                       send_wr->wr.rdma.rkey = isert_cmd->read_stag;
-                       if (i + 1 == wr->send_wr_num)
-                               send_wr->next = &isert_cmd->tx_desc.send_wr;
+                       rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+                       rdma_wr->remote_addr = isert_cmd->read_va + offset;
+                       rdma_wr->rkey = isert_cmd->read_stag;
+                       if (i + 1 == wr->rdma_wr_num)
+                               rdma_wr->wr.next = &isert_cmd->tx_desc.send_wr;
                        else
-                               send_wr->next = &wr->send_wr[i + 1];
+                               rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
                } else {
-                       send_wr->opcode = IB_WR_RDMA_READ;
-                       send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
-                       send_wr->wr.rdma.rkey = isert_cmd->write_stag;
-                       if (i + 1 == wr->send_wr_num)
-                               send_wr->send_flags = IB_SEND_SIGNALED;
+                       rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+                       rdma_wr->remote_addr = isert_cmd->write_va + va_offset;
+                       rdma_wr->rkey = isert_cmd->write_stag;
+                       if (i + 1 == wr->rdma_wr_num)
+                               rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
                        else
-                               send_wr->next = &wr->send_wr[i + 1];
+                               rdma_wr->wr.next = &wr->rdma_wr[i + 1].wr;
                }
 
                ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
-                                       send_wr, data_len, offset);
+                                       rdma_wr, data_len, offset);
                ib_sge += ib_sge_cnt;
 
                offset += data_len;
@@ -2535,45 +2505,6 @@ unmap_cmd:
        return ret;
 }
 
-static int
-isert_map_fr_pagelist(struct ib_device *ib_dev,
-                     struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
-{
-       u64 start_addr, end_addr, page, chunk_start = 0;
-       struct scatterlist *tmp_sg;
-       int i = 0, new_chunk, last_ent, n_pages;
-
-       n_pages = 0;
-       new_chunk = 1;
-       last_ent = sg_nents - 1;
-       for_each_sg(sg_start, tmp_sg, sg_nents, i) {
-               start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
-               if (new_chunk)
-                       chunk_start = start_addr;
-               end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
-
-               isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
-                         i, (unsigned long long)tmp_sg->dma_address,
-                         tmp_sg->length);
-
-               if ((end_addr & ~PAGE_MASK) && i < last_ent) {
-                       new_chunk = 0;
-                       continue;
-               }
-               new_chunk = 1;
-
-               page = chunk_start & PAGE_MASK;
-               do {
-                       fr_pl[n_pages++] = page;
-                       isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
-                                 n_pages - 1, page);
-                       page += PAGE_SIZE;
-               } while (page < end_addr);
-       }
-
-       return n_pages;
-}
-
 static inline void
 isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
 {
@@ -2599,11 +2530,9 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
        struct isert_device *device = isert_conn->device;
        struct ib_device *ib_dev = device->ib_device;
        struct ib_mr *mr;
-       struct ib_fast_reg_page_list *frpl;
-       struct ib_send_wr fr_wr, inv_wr;
-       struct ib_send_wr *bad_wr, *wr = NULL;
-       int ret, pagelist_len;
-       u32 page_off;
+       struct ib_reg_wr reg_wr;
+       struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
+       int ret, n;
 
        if (mem->dma_nents == 1) {
                sge->lkey = device->pd->local_dma_lkey;
@@ -2614,45 +2543,41 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
                return 0;
        }
 
-       if (ind == ISERT_DATA_KEY_VALID) {
+       if (ind == ISERT_DATA_KEY_VALID)
                /* Registering data buffer */
                mr = fr_desc->data_mr;
-               frpl = fr_desc->data_frpl;
-       } else {
+       else
                /* Registering protection buffer */
                mr = fr_desc->pi_ctx->prot_mr;
-               frpl = fr_desc->pi_ctx->prot_frpl;
-       }
-
-       page_off = mem->offset % PAGE_SIZE;
-
-       isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
-                 fr_desc, mem->nents, mem->offset);
-
-       pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
-                                            &frpl->page_list[0]);
 
        if (!(fr_desc->ind & ind)) {
                isert_inv_rkey(&inv_wr, mr);
                wr = &inv_wr;
        }
 
-       /* Prepare FASTREG WR */
-       memset(&fr_wr, 0, sizeof(fr_wr));
-       fr_wr.wr_id = ISER_FASTREG_LI_WRID;
-       fr_wr.opcode = IB_WR_FAST_REG_MR;
-       fr_wr.wr.fast_reg.iova_start = frpl->page_list[0] + page_off;
-       fr_wr.wr.fast_reg.page_list = frpl;
-       fr_wr.wr.fast_reg.page_list_len = pagelist_len;
-       fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fr_wr.wr.fast_reg.length = mem->len;
-       fr_wr.wr.fast_reg.rkey = mr->rkey;
-       fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
+       n = ib_map_mr_sg(mr, mem->sg, mem->nents, PAGE_SIZE);
+       if (unlikely(n != mem->nents)) {
+               isert_err("failed to map mr sg (%d/%d)\n",
+                        n, mem->nents);
+               return n < 0 ? n : -EINVAL;
+       }
+
+       isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
+                 fr_desc, mem->nents, mem->offset);
+
+       reg_wr.wr.next = NULL;
+       reg_wr.wr.opcode = IB_WR_REG_MR;
+       reg_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
+       reg_wr.wr.send_flags = 0;
+       reg_wr.wr.num_sge = 0;
+       reg_wr.mr = mr;
+       reg_wr.key = mr->lkey;
+       reg_wr.access = IB_ACCESS_LOCAL_WRITE;
 
        if (!wr)
-               wr = &fr_wr;
+               wr = &reg_wr.wr;
        else
-               wr->next = &fr_wr;
+               wr->next = &reg_wr.wr;
 
        ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
        if (ret) {
@@ -2662,8 +2587,8 @@ isert_fast_reg_mr(struct isert_conn *isert_conn,
        fr_desc->ind &= ~ind;
 
        sge->lkey = mr->lkey;
-       sge->addr = frpl->page_list[0] + page_off;
-       sge->length = mem->len;
+       sge->addr = mr->iova;
+       sge->length = mr->length;
 
        isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
                  sge->addr, sge->length, sge->lkey);
@@ -2733,8 +2658,8 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
                 struct isert_rdma_wr *rdma_wr,
                 struct fast_reg_descriptor *fr_desc)
 {
-       struct ib_send_wr sig_wr, inv_wr;
-       struct ib_send_wr *bad_wr, *wr = NULL;
+       struct ib_sig_handover_wr sig_wr;
+       struct ib_send_wr inv_wr, *bad_wr, *wr = NULL;
        struct pi_context *pi_ctx = fr_desc->pi_ctx;
        struct ib_sig_attrs sig_attrs;
        int ret;
@@ -2752,20 +2677,20 @@ isert_reg_sig_mr(struct isert_conn *isert_conn,
        }
 
        memset(&sig_wr, 0, sizeof(sig_wr));
-       sig_wr.opcode = IB_WR_REG_SIG_MR;
-       sig_wr.wr_id = ISER_FASTREG_LI_WRID;
-       sig_wr.sg_list = &rdma_wr->ib_sg[DATA];
-       sig_wr.num_sge = 1;
-       sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE;
-       sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
-       sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
+       sig_wr.wr.opcode = IB_WR_REG_SIG_MR;
+       sig_wr.wr.wr_id = ISER_FASTREG_LI_WRID;
+       sig_wr.wr.sg_list = &rdma_wr->ib_sg[DATA];
+       sig_wr.wr.num_sge = 1;
+       sig_wr.access_flags = IB_ACCESS_LOCAL_WRITE;
+       sig_wr.sig_attrs = &sig_attrs;
+       sig_wr.sig_mr = pi_ctx->sig_mr;
        if (se_cmd->t_prot_sg)
-               sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT];
+               sig_wr.prot = &rdma_wr->ib_sg[PROT];
 
        if (!wr)
-               wr = &sig_wr;
+               wr = &sig_wr.wr;
        else
-               wr->next = &sig_wr;
+               wr->next = &sig_wr.wr;
 
        ret = ib_post_send(isert_conn->qp, wr, &bad_wr);
        if (ret) {
@@ -2859,7 +2784,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
        struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = conn->context;
        struct fast_reg_descriptor *fr_desc = NULL;
-       struct ib_send_wr *send_wr;
+       struct ib_rdma_wr *rdma_wr;
        struct ib_sge *ib_sg;
        u32 offset;
        int ret = 0;
@@ -2900,26 +2825,26 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
        memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
        wr->ib_sge = &wr->s_ib_sge;
-       wr->send_wr_num = 1;
-       memset(&wr->s_send_wr, 0, sizeof(*send_wr));
-       wr->send_wr = &wr->s_send_wr;
+       wr->rdma_wr_num = 1;
+       memset(&wr->s_rdma_wr, 0, sizeof(wr->s_rdma_wr));
+       wr->rdma_wr = &wr->s_rdma_wr;
        wr->isert_cmd = isert_cmd;
 
-       send_wr = &isert_cmd->rdma_wr.s_send_wr;
-       send_wr->sg_list = &wr->s_ib_sge;
-       send_wr->num_sge = 1;
-       send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
+       rdma_wr = &isert_cmd->rdma_wr.s_rdma_wr;
+       rdma_wr->wr.sg_list = &wr->s_ib_sge;
+       rdma_wr->wr.num_sge = 1;
+       rdma_wr->wr.wr_id = (uintptr_t)&isert_cmd->tx_desc;
        if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
-               send_wr->opcode = IB_WR_RDMA_WRITE;
-               send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
-               send_wr->wr.rdma.rkey = isert_cmd->read_stag;
-               send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
+               rdma_wr->wr.opcode = IB_WR_RDMA_WRITE;
+               rdma_wr->remote_addr = isert_cmd->read_va;
+               rdma_wr->rkey = isert_cmd->read_stag;
+               rdma_wr->wr.send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
                                      0 : IB_SEND_SIGNALED;
        } else {
-               send_wr->opcode = IB_WR_RDMA_READ;
-               send_wr->wr.rdma.remote_addr = isert_cmd->write_va;
-               send_wr->wr.rdma.rkey = isert_cmd->write_stag;
-               send_wr->send_flags = IB_SEND_SIGNALED;
+               rdma_wr->wr.opcode = IB_WR_RDMA_READ;
+               rdma_wr->remote_addr = isert_cmd->write_va;
+               rdma_wr->rkey = isert_cmd->write_stag;
+               rdma_wr->wr.send_flags = IB_SEND_SIGNALED;
        }
 
        return 0;
@@ -2967,8 +2892,8 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
                isert_init_send_wr(isert_conn, isert_cmd,
                                   &isert_cmd->tx_desc.send_wr);
-               isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
-               wr->send_wr_num += 1;
+               isert_cmd->rdma_wr.s_rdma_wr.wr.next = &isert_cmd->tx_desc.send_wr;
+               wr->rdma_wr_num += 1;
 
                rc = isert_post_recv(isert_conn, isert_cmd->rx_desc);
                if (rc) {
@@ -2977,7 +2902,7 @@ isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
                }
        }
 
-       rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
+       rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
        if (rc)
                isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
 
@@ -3011,7 +2936,7 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
                return rc;
        }
 
-       rc = ib_post_send(isert_conn->qp, wr->send_wr, &wr_failed);
+       rc = ib_post_send(isert_conn->qp, &wr->rdma_wr->wr, &wr_failed);
        if (rc)
                isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
 
@@ -3097,7 +3022,7 @@ isert_setup_id(struct isert_np *isert_np)
        sa = (struct sockaddr *)&np->np_sockaddr;
        isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa);
 
-       id = rdma_create_id(isert_cma_handler, isert_np,
+       id = rdma_create_id(&init_net, isert_cma_handler, isert_np,
                            RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(id)) {
                isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
index c5b99bcecbcff806945be8baa9cc6dbeca46418a..3d7fbc47c3434c32308136b7faec65590b74d439 100644 (file)
@@ -84,14 +84,12 @@ enum isert_indicator {
 
 struct pi_context {
        struct ib_mr                   *prot_mr;
-       struct ib_fast_reg_page_list   *prot_frpl;
        struct ib_mr                   *sig_mr;
 };
 
 struct fast_reg_descriptor {
        struct list_head                list;
        struct ib_mr                   *data_mr;
-       struct ib_fast_reg_page_list   *data_frpl;
        u8                              ind;
        struct pi_context              *pi_ctx;
 };
@@ -117,9 +115,9 @@ struct isert_rdma_wr {
        enum iser_ib_op_code    iser_ib_op;
        struct ib_sge           *ib_sge;
        struct ib_sge           s_ib_sge;
-       int                     send_wr_num;
-       struct ib_send_wr       *send_wr;
-       struct ib_send_wr       s_send_wr;
+       int                     rdma_wr_num;
+       struct ib_rdma_wr       *rdma_wr;
+       struct ib_rdma_wr       s_rdma_wr;
        struct ib_sge           ib_sg[3];
        struct isert_data_buf   data;
        struct isert_data_buf   prot;
index b481490ad25756f6de36cd718c0983be751c8e5c..32f79624dd28565d3846384f24049e435aef83e7 100644 (file)
@@ -340,8 +340,6 @@ static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
                return;
 
        for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
-               if (d->frpl)
-                       ib_free_fast_reg_page_list(d->frpl);
                if (d->mr)
                        ib_dereg_mr(d->mr);
        }
@@ -362,7 +360,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
        struct srp_fr_pool *pool;
        struct srp_fr_desc *d;
        struct ib_mr *mr;
-       struct ib_fast_reg_page_list *frpl;
        int i, ret = -EINVAL;
 
        if (pool_size <= 0)
@@ -385,12 +382,6 @@ static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
                        goto destroy_pool;
                }
                d->mr = mr;
-               frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
-               if (IS_ERR(frpl)) {
-                       ret = PTR_ERR(frpl);
-                       goto destroy_pool;
-               }
-               d->frpl = frpl;
                list_add_tail(&d->entry, &pool->free_list);
        }
 
@@ -849,11 +840,12 @@ static void srp_free_req_data(struct srp_target_port *target,
 
        for (i = 0; i < target->req_ring_size; ++i) {
                req = &ch->req_ring[i];
-               if (dev->use_fast_reg)
+               if (dev->use_fast_reg) {
                        kfree(req->fr_list);
-               else
+               } else {
                        kfree(req->fmr_list);
-               kfree(req->map_page);
+                       kfree(req->map_page);
+               }
                if (req->indirect_dma_addr) {
                        ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
                                            target->indirect_size,
@@ -887,14 +879,15 @@ static int srp_alloc_req_data(struct srp_rdma_ch *ch)
                                  GFP_KERNEL);
                if (!mr_list)
                        goto out;
-               if (srp_dev->use_fast_reg)
+               if (srp_dev->use_fast_reg) {
                        req->fr_list = mr_list;
-               else
+               } else {
                        req->fmr_list = mr_list;
-               req->map_page = kmalloc(srp_dev->max_pages_per_mr *
-                                       sizeof(void *), GFP_KERNEL);
-               if (!req->map_page)
-                       goto out;
+                       req->map_page = kmalloc(srp_dev->max_pages_per_mr *
+                                               sizeof(void *), GFP_KERNEL);
+                       if (!req->map_page)
+                               goto out;
+               }
                req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
                if (!req->indirect_desc)
                        goto out;
@@ -1286,6 +1279,17 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
        if (state->fmr.next >= state->fmr.end)
                return -ENOMEM;
 
+       WARN_ON_ONCE(!dev->use_fmr);
+
+       if (state->npages == 0)
+               return 0;
+
+       if (state->npages == 1 && target->global_mr) {
+               srp_map_desc(state, state->base_dma_addr, state->dma_len,
+                            target->global_mr->rkey);
+               goto reset_state;
+       }
+
        fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
                                   state->npages, io_addr);
        if (IS_ERR(fmr))
@@ -1297,6 +1301,10 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
        srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
                     state->dma_len, fmr->fmr->rkey);
 
+reset_state:
+       state->npages = 0;
+       state->dma_len = 0;
+
        return 0;
 }
 
@@ -1306,13 +1314,26 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        struct srp_target_port *target = ch->target;
        struct srp_device *dev = target->srp_host->srp_dev;
        struct ib_send_wr *bad_wr;
-       struct ib_send_wr wr;
+       struct ib_reg_wr wr;
        struct srp_fr_desc *desc;
        u32 rkey;
+       int n, err;
 
        if (state->fr.next >= state->fr.end)
                return -ENOMEM;
 
+       WARN_ON_ONCE(!dev->use_fast_reg);
+
+       if (state->sg_nents == 0)
+               return 0;
+
+       if (state->sg_nents == 1 && target->global_mr) {
+               srp_map_desc(state, sg_dma_address(state->sg),
+                            sg_dma_len(state->sg),
+                            target->global_mr->rkey);
+               return 1;
+       }
+
        desc = srp_fr_pool_get(ch->fr_pool);
        if (!desc)
                return -ENOMEM;
@@ -1320,56 +1341,33 @@ static int srp_map_finish_fr(struct srp_map_state *state,
        rkey = ib_inc_rkey(desc->mr->rkey);
        ib_update_fast_reg_key(desc->mr, rkey);
 
-       memcpy(desc->frpl->page_list, state->pages,
-              sizeof(state->pages[0]) * state->npages);
-
-       memset(&wr, 0, sizeof(wr));
-       wr.opcode = IB_WR_FAST_REG_MR;
-       wr.wr_id = FAST_REG_WR_ID_MASK;
-       wr.wr.fast_reg.iova_start = state->base_dma_addr;
-       wr.wr.fast_reg.page_list = desc->frpl;
-       wr.wr.fast_reg.page_list_len = state->npages;
-       wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
-       wr.wr.fast_reg.length = state->dma_len;
-       wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
-                                      IB_ACCESS_REMOTE_READ |
-                                      IB_ACCESS_REMOTE_WRITE);
-       wr.wr.fast_reg.rkey = desc->mr->lkey;
+       n = ib_map_mr_sg(desc->mr, state->sg, state->sg_nents,
+                        dev->mr_page_size);
+       if (unlikely(n < 0))
+               return n;
+
+       wr.wr.next = NULL;
+       wr.wr.opcode = IB_WR_REG_MR;
+       wr.wr.wr_id = FAST_REG_WR_ID_MASK;
+       wr.wr.num_sge = 0;
+       wr.wr.send_flags = 0;
+       wr.mr = desc->mr;
+       wr.key = desc->mr->rkey;
+       wr.access = (IB_ACCESS_LOCAL_WRITE |
+                    IB_ACCESS_REMOTE_READ |
+                    IB_ACCESS_REMOTE_WRITE);
 
        *state->fr.next++ = desc;
        state->nmdesc++;
 
-       srp_map_desc(state, state->base_dma_addr, state->dma_len,
-                    desc->mr->rkey);
+       srp_map_desc(state, desc->mr->iova,
+                    desc->mr->length, desc->mr->rkey);
 
-       return ib_post_send(ch->qp, &wr, &bad_wr);
-}
+       err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
+       if (unlikely(err))
+               return err;
 
-static int srp_finish_mapping(struct srp_map_state *state,
-                             struct srp_rdma_ch *ch)
-{
-       struct srp_target_port *target = ch->target;
-       struct srp_device *dev = target->srp_host->srp_dev;
-       int ret = 0;
-
-       WARN_ON_ONCE(!dev->use_fast_reg && !dev->use_fmr);
-
-       if (state->npages == 0)
-               return 0;
-
-       if (state->npages == 1 && target->global_mr)
-               srp_map_desc(state, state->base_dma_addr, state->dma_len,
-                            target->global_mr->rkey);
-       else
-               ret = dev->use_fast_reg ? srp_map_finish_fr(state, ch) :
-                       srp_map_finish_fmr(state, ch);
-
-       if (ret == 0) {
-               state->npages = 0;
-               state->dma_len = 0;
-       }
-
-       return ret;
+       return n;
 }
 
 static int srp_map_sg_entry(struct srp_map_state *state,
@@ -1389,7 +1387,7 @@ static int srp_map_sg_entry(struct srp_map_state *state,
        while (dma_len) {
                unsigned offset = dma_addr & ~dev->mr_page_mask;
                if (state->npages == dev->max_pages_per_mr || offset != 0) {
-                       ret = srp_finish_mapping(state, ch);
+                       ret = srp_map_finish_fmr(state, ch);
                        if (ret)
                                return ret;
                }
@@ -1411,51 +1409,83 @@ static int srp_map_sg_entry(struct srp_map_state *state,
         */
        ret = 0;
        if (len != dev->mr_page_size)
-               ret = srp_finish_mapping(state, ch);
+               ret = srp_map_finish_fmr(state, ch);
        return ret;
 }
 
-static int srp_map_sg(struct srp_map_state *state, struct srp_rdma_ch *ch,
-                     struct srp_request *req, struct scatterlist *scat,
-                     int count)
+static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
+                         struct srp_request *req, struct scatterlist *scat,
+                         int count)
 {
-       struct srp_target_port *target = ch->target;
-       struct srp_device *dev = target->srp_host->srp_dev;
        struct scatterlist *sg;
        int i, ret;
 
-       state->desc     = req->indirect_desc;
-       state->pages    = req->map_page;
-       if (dev->use_fast_reg) {
-               state->fr.next = req->fr_list;
-               state->fr.end = req->fr_list + target->cmd_sg_cnt;
-       } else if (dev->use_fmr) {
-               state->fmr.next = req->fmr_list;
-               state->fmr.end = req->fmr_list + target->cmd_sg_cnt;
-       }
+       state->desc = req->indirect_desc;
+       state->pages = req->map_page;
+       state->fmr.next = req->fmr_list;
+       state->fmr.end = req->fmr_list + ch->target->cmd_sg_cnt;
 
-       if (dev->use_fast_reg || dev->use_fmr) {
-               for_each_sg(scat, sg, count, i) {
-                       ret = srp_map_sg_entry(state, ch, sg, i);
-                       if (ret)
-                               goto out;
-               }
-               ret = srp_finish_mapping(state, ch);
+       for_each_sg(scat, sg, count, i) {
+               ret = srp_map_sg_entry(state, ch, sg, i);
                if (ret)
-                       goto out;
-       } else {
-               for_each_sg(scat, sg, count, i) {
-                       srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
-                                    ib_sg_dma_len(dev->dev, sg),
-                                    target->global_mr->rkey);
-               }
+                       return ret;
        }
 
+       ret = srp_map_finish_fmr(state, ch);
+       if (ret)
+               return ret;
+
        req->nmdesc = state->nmdesc;
-       ret = 0;
 
-out:
-       return ret;
+       return 0;
+}
+
+static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
+                        struct srp_request *req, struct scatterlist *scat,
+                        int count)
+{
+       state->desc = req->indirect_desc;
+       state->fr.next = req->fr_list;
+       state->fr.end = req->fr_list + ch->target->cmd_sg_cnt;
+       state->sg = scat;
+       state->sg_nents = scsi_sg_count(req->scmnd);
+
+       while (state->sg_nents) {
+               int i, n;
+
+               n = srp_map_finish_fr(state, ch);
+               if (unlikely(n < 0))
+                       return n;
+
+               state->sg_nents -= n;
+               for (i = 0; i < n; i++)
+                       state->sg = sg_next(state->sg);
+       }
+
+       req->nmdesc = state->nmdesc;
+
+       return 0;
+}
+
+static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
+                         struct srp_request *req, struct scatterlist *scat,
+                         int count)
+{
+       struct srp_target_port *target = ch->target;
+       struct srp_device *dev = target->srp_host->srp_dev;
+       struct scatterlist *sg;
+       int i;
+
+       state->desc = req->indirect_desc;
+       for_each_sg(scat, sg, count, i) {
+               srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
+                            ib_sg_dma_len(dev->dev, sg),
+                            target->global_mr->rkey);
+       }
+
+       req->nmdesc = state->nmdesc;
+
+       return 0;
 }
 
 /*
@@ -1474,6 +1504,7 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
        struct srp_map_state state;
        struct srp_direct_buf idb_desc;
        u64 idb_pages[1];
+       struct scatterlist idb_sg[1];
        int ret;
 
        memset(&state, 0, sizeof(state));
@@ -1481,20 +1512,32 @@ static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
        state.gen.next = next_mr;
        state.gen.end = end_mr;
        state.desc = &idb_desc;
-       state.pages = idb_pages;
-       state.pages[0] = (req->indirect_dma_addr &
-                         dev->mr_page_mask);
-       state.npages = 1;
        state.base_dma_addr = req->indirect_dma_addr;
        state.dma_len = idb_len;
-       ret = srp_finish_mapping(&state, ch);
-       if (ret < 0)
-               goto out;
+
+       if (dev->use_fast_reg) {
+               state.sg = idb_sg;
+               state.sg_nents = 1;
+               sg_set_buf(idb_sg, req->indirect_desc, idb_len);
+               idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
+               ret = srp_map_finish_fr(&state, ch);
+               if (ret < 0)
+                       return ret;
+       } else if (dev->use_fmr) {
+               state.pages = idb_pages;
+               state.pages[0] = (req->indirect_dma_addr &
+                                 dev->mr_page_mask);
+               state.npages = 1;
+               ret = srp_map_finish_fmr(&state, ch);
+               if (ret < 0)
+                       return ret;
+       } else {
+               return -EINVAL;
+       }
 
        *idb_rkey = idb_desc.key;
 
-out:
-       return ret;
+       return 0;
 }
 
 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
@@ -1563,7 +1606,12 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
                                   target->indirect_size, DMA_TO_DEVICE);
 
        memset(&state, 0, sizeof(state));
-       srp_map_sg(&state, ch, req, scat, count);
+       if (dev->use_fast_reg)
+               srp_map_sg_fr(&state, ch, req, scat, count);
+       else if (dev->use_fmr)
+               srp_map_sg_fmr(&state, ch, req, scat, count);
+       else
+               srp_map_sg_dma(&state, ch, req, scat, count);
 
        /* We've mapped the request, now pull as much of the indirect
         * descriptor table as we can into the command buffer. If this
@@ -3213,7 +3261,7 @@ static ssize_t srp_create_target(struct device *dev,
        INIT_WORK(&target->tl_err_work, srp_tl_err_work);
        INIT_WORK(&target->remove_work, srp_remove_work);
        spin_lock_init(&target->lock);
-       ret = ib_query_gid(ibdev, host->port, 0, &target->sgid);
+       ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
        if (ret)
                goto out;
 
index 3608f2e4819c412ee742bca4499cd84de32aa343..87a2a919dc43877141426e6d373d12727dc4b65e 100644 (file)
@@ -242,7 +242,6 @@ struct srp_iu {
 struct srp_fr_desc {
        struct list_head                entry;
        struct ib_mr                    *mr;
-       struct ib_fast_reg_page_list    *frpl;
 };
 
 /**
@@ -294,11 +293,17 @@ struct srp_map_state {
                } gen;
        };
        struct srp_direct_buf  *desc;
-       u64                    *pages;
+       union {
+               u64                     *pages;
+               struct scatterlist      *sg;
+       };
        dma_addr_t              base_dma_addr;
        u32                     dma_len;
        u32                     total_len;
-       unsigned int            npages;
+       union {
+               unsigned int    npages;
+               int             sg_nents;
+       };
        unsigned int            nmdesc;
        unsigned int            ndesc;
 };
index f6fe0414139beeafa3ddfba0ed33bf1ebc6a7489..47c4022fda7680e3107710bb703e83fa61affc15 100644 (file)
@@ -546,7 +546,8 @@ static int srpt_refresh_port(struct srpt_port *sport)
        sport->sm_lid = port_attr.sm_lid;
        sport->lid = port_attr.lid;
 
-       ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
+       ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid,
+                          NULL);
        if (ret)
                goto err_query_port;
 
@@ -2822,7 +2823,7 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                              struct srpt_send_ioctx *ioctx)
 {
-       struct ib_send_wr wr;
+       struct ib_rdma_wr wr;
        struct ib_send_wr *bad_wr;
        struct rdma_iu *riu;
        int i;
@@ -2850,29 +2851,29 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
 
        for (i = 0; i < n_rdma; ++i, ++riu) {
                if (dir == DMA_FROM_DEVICE) {
-                       wr.opcode = IB_WR_RDMA_WRITE;
-                       wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
+                       wr.wr.opcode = IB_WR_RDMA_WRITE;
+                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
                                                SRPT_RDMA_WRITE_LAST :
                                                SRPT_RDMA_MID,
                                                ioctx->ioctx.index);
                } else {
-                       wr.opcode = IB_WR_RDMA_READ;
-                       wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
+                       wr.wr.opcode = IB_WR_RDMA_READ;
+                       wr.wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
                                                SRPT_RDMA_READ_LAST :
                                                SRPT_RDMA_MID,
                                                ioctx->ioctx.index);
                }
-               wr.next = NULL;
-               wr.wr.rdma.remote_addr = riu->raddr;
-               wr.wr.rdma.rkey = riu->rkey;
-               wr.num_sge = riu->sge_cnt;
-               wr.sg_list = riu->sge;
+               wr.wr.next = NULL;
+               wr.remote_addr = riu->raddr;
+               wr.rkey = riu->rkey;
+               wr.wr.num_sge = riu->sge_cnt;
+               wr.wr.sg_list = riu->sge;
 
                /* only get completion event for the last rdma write */
                if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
-                       wr.send_flags = IB_SEND_SIGNALED;
+                       wr.wr.send_flags = IB_SEND_SIGNALED;
 
-               ret = ib_post_send(ch->qp, &wr, &bad_wr);
+               ret = ib_post_send(ch->qp, &wr.wr, &bad_wr);
                if (ret)
                        break;
        }
@@ -2881,11 +2882,11 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                 __func__, __LINE__, ret, i, n_rdma);
        if (ret && i > 0) {
-               wr.num_sge = 0;
-               wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
-               wr.send_flags = IB_SEND_SIGNALED;
+               wr.wr.num_sge = 0;
+               wr.wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
+               wr.wr.send_flags = IB_SEND_SIGNALED;
                while (ch->state == CH_LIVE &&
-                       ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
+                       ib_post_send(ch->qp, &wr.wr, &bad_wr) != 0) {
                        pr_info("Trying to abort failed RDMA transfer [%d]\n",
                                ioctx->ioctx.index);
                        msleep(1000);
index 634e50c8c5ef766ae144a2c51344294751902dea..6f268518b37f0ec160d0fe6998942b6042823489 100644 (file)
@@ -49,6 +49,7 @@
 #include <linux/etherdevice.h>
 #include <linux/net_tstamp.h>
 #include <asm/io.h>
+#include "t4_chip_type.h"
 #include "cxgb4_uld.h"
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
@@ -291,31 +292,6 @@ struct pci_params {
        unsigned char width;
 };
 
-#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision))
-#define CHELSIO_CHIP_FPGA          0x100
-#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf)
-#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
-
-#define CHELSIO_T4             0x4
-#define CHELSIO_T5             0x5
-#define CHELSIO_T6             0x6
-
-enum chip_type {
-       T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
-       T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
-       T4_FIRST_REV    = T4_A1,
-       T4_LAST_REV     = T4_A2,
-
-       T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
-       T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
-       T5_FIRST_REV    = T5_A0,
-       T5_LAST_REV     = T5_A1,
-
-       T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0),
-       T6_FIRST_REV    = T6_A0,
-       T6_LAST_REV     = T6_A0,
-};
-
 struct devlog_params {
        u32 memtype;                    /* which memory (EDC0, EDC1, MC) */
        u32 start;                      /* start of log in firmware memory */
@@ -909,21 +885,6 @@ static inline int is_offload(const struct adapter *adap)
        return adap->params.offload;
 }
 
-static inline int is_t6(enum chip_type chip)
-{
-       return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6;
-}
-
-static inline int is_t5(enum chip_type chip)
-{
-       return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5;
-}
-
-static inline int is_t4(enum chip_type chip)
-{
-       return CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4;
-}
-
 static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
 {
        return readl(adap->regs + reg_addr);
index 2cf81857a2971b280005715992c0842e4f21385f..0d147610a06f13819bd1425984f6ec6b3c34e81a 100644 (file)
@@ -1940,6 +1940,28 @@ unsigned int cxgb4_best_aligned_mtu(const unsigned short *mtus,
 }
 EXPORT_SYMBOL(cxgb4_best_aligned_mtu);
 
+/**
+ *     cxgb4_tp_smt_idx - Get the Source Mac Table index for this VI
+ *     @chip: chip type
+ *     @viid: VI id of the given port
+ *
+ *     Return the SMT index for this VI.
+ */
+unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid)
+{
+       /* In T4/T5, SMT contains 256 SMAC entries organized in
+        * 128 rows of 2 entries each.
+        * In T6, SMT contains 256 SMAC entries in 256 rows.
+        * TODO: The below code needs to be updated when we add support
+        * for 256 VFs.
+        */
+       if (CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5)
+               return ((viid & 0x7f) << 1);
+       else
+               return (viid & 0x7f);
+}
+EXPORT_SYMBOL(cxgb4_tp_smt_idx);
+
 /**
  *     cxgb4_port_chan - get the HW channel of a port
  *     @dev: the net device for the port
index c3a8be5541e7cfdbc55f41b81f12917d8561b2a4..cf711d5f15bec5498204bc97475932a72a28ea75 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/skbuff.h>
 #include <linux/inetdevice.h>
 #include <linux/atomic.h>
+#include "cxgb4.h"
 
 /* CPL message priority levels */
 enum {
@@ -290,6 +291,7 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
+unsigned int cxgb4_tp_smt_idx(enum chip_type chip, unsigned int viid);
 unsigned int cxgb4_port_idx(const struct net_device *dev);
 unsigned int cxgb4_best_mtu(const unsigned short *mtus, unsigned short mtu,
                            unsigned int *idx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h b/drivers/net/ethernet/chelsio/cxgb4/t4_chip_type.h
new file mode 100644 (file)
index 0000000..54b7181
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * This file is part of the Chelsio T4 Ethernet driver for Linux.
+ *
+ * Copyright (c) 2003-2015 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __T4_CHIP_TYPE_H__
+#define __T4_CHIP_TYPE_H__
+
+#define CHELSIO_T4             0x4
+#define CHELSIO_T5             0x5
+#define CHELSIO_T6             0x6
+
+/* We code the Chelsio T4 Family "Chip Code" as a tuple:
+ *
+ *     (Chip Version, Chip Revision)
+ *
+ * where:
+ *
+ *     Chip Version: is T4, T5, etc.
+ *     Chip Revision: is the FAB "spin" of the Chip Version.
+ */
+#define CHELSIO_CHIP_CODE(version, revision) (((version) << 4) | (revision))
+#define CHELSIO_CHIP_VERSION(code) (((code) >> 4) & 0xf)
+#define CHELSIO_CHIP_RELEASE(code) ((code) & 0xf)
+
+enum chip_type {
+       T4_A1 = CHELSIO_CHIP_CODE(CHELSIO_T4, 1),
+       T4_A2 = CHELSIO_CHIP_CODE(CHELSIO_T4, 2),
+       T4_FIRST_REV    = T4_A1,
+       T4_LAST_REV     = T4_A2,
+
+       T5_A0 = CHELSIO_CHIP_CODE(CHELSIO_T5, 0),
+       T5_A1 = CHELSIO_CHIP_CODE(CHELSIO_T5, 1),
+       T5_FIRST_REV    = T5_A0,
+       T5_LAST_REV     = T5_A1,
+
+       T6_A0 = CHELSIO_CHIP_CODE(CHELSIO_T6, 0),
+       T6_FIRST_REV    = T6_A0,
+       T6_LAST_REV     = T6_A0,
+};
+
+static inline int is_t4(enum chip_type chip)
+{
+       return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T4);
+}
+
+static inline int is_t5(enum chip_type chip)
+{
+       return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T5);
+}
+
+static inline int is_t6(enum chip_type chip)
+{
+       return (CHELSIO_CHIP_VERSION(chip) == CHELSIO_T6);
+}
+
+#endif /* __T4_CHIP_TYPE_H__ */
index b99144afd4ecc8958961acf2673c53bdcd8fa8df..a072d341e205bf272418d0104b084a0898281e7e 100644 (file)
@@ -417,6 +417,21 @@ struct cpl_t5_act_open_req {
        __be64 params;
 };
 
+struct cpl_t6_act_open_req {
+       WR_HDR;
+       union opcode_tid ot;
+       __be16 local_port;
+       __be16 peer_port;
+       __be32 local_ip;
+       __be32 peer_ip;
+       __be64 opt0;
+       __be32 rsvd;
+       __be32 opt2;
+       __be64 params;
+       __be32 rsvd2;
+       __be32 opt3;
+};
+
 struct cpl_act_open_req6 {
        WR_HDR;
        union opcode_tid ot;
@@ -446,6 +461,23 @@ struct cpl_t5_act_open_req6 {
        __be64 params;
 };
 
+struct cpl_t6_act_open_req6 {
+       WR_HDR;
+       union opcode_tid ot;
+       __be16 local_port;
+       __be16 peer_port;
+       __be64 local_ip_hi;
+       __be64 local_ip_lo;
+       __be64 peer_ip_hi;
+       __be64 peer_ip_lo;
+       __be64 opt0;
+       __be32 rsvd;
+       __be32 opt2;
+       __be64 params;
+       __be32 rsvd2;
+       __be32 opt3;
+};
+
 struct cpl_act_open_rpl {
        union opcode_tid ot;
        __be32 atid_status;
@@ -504,6 +536,19 @@ struct cpl_pass_establish {
 #define TCPOPT_MSS_M   0xF
 #define TCPOPT_MSS_G(x)        (((x) >> TCPOPT_MSS_S) & TCPOPT_MSS_M)
 
+#define T6_TCP_HDR_LEN_S   8
+#define T6_TCP_HDR_LEN_V(x) ((x) << T6_TCP_HDR_LEN_S)
+#define T6_TCP_HDR_LEN_G(x) (((x) >> T6_TCP_HDR_LEN_S) & TCP_HDR_LEN_M)
+
+#define T6_IP_HDR_LEN_S    14
+#define T6_IP_HDR_LEN_V(x) ((x) << T6_IP_HDR_LEN_S)
+#define T6_IP_HDR_LEN_G(x) (((x) >> T6_IP_HDR_LEN_S) & IP_HDR_LEN_M)
+
+#define T6_ETH_HDR_LEN_S    24
+#define T6_ETH_HDR_LEN_M    0xFF
+#define T6_ETH_HDR_LEN_V(x) ((x) << T6_ETH_HDR_LEN_S)
+#define T6_ETH_HDR_LEN_G(x) (((x) >> T6_ETH_HDR_LEN_S) & T6_ETH_HDR_LEN_M)
+
 struct cpl_act_establish {
        union opcode_tid ot;
        __be32 rsvd;
@@ -833,6 +878,9 @@ struct cpl_rx_pkt {
        __be16 err_vec;
 };
 
+#define RX_T6_ETHHDR_LEN_M    0xFF
+#define RX_T6_ETHHDR_LEN_G(x) (((x) >> RX_ETHHDR_LEN_S) & RX_T6_ETHHDR_LEN_M)
+
 #define RXF_PSH_S    20
 #define RXF_PSH_V(x) ((x) << RXF_PSH_S)
 #define RXF_PSH_F    RXF_PSH_V(1U)
index a946e4bf71d2a18cce11bb497ec9b737653ae652..005f910ec955ecdaaa398c6edcc6d95c01d6c2c4 100644 (file)
@@ -123,6 +123,28 @@ void mlx4_en_update_loopback_state(struct net_device *dev,
         */
        if (mlx4_is_mfunc(priv->mdev->dev) || priv->validate_loopback)
                priv->flags |= MLX4_EN_FLAG_ENABLE_HW_LOOPBACK;
+
+       mutex_lock(&priv->mdev->state_lock);
+       if (priv->mdev->dev->caps.flags2 &
+           MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB &&
+           priv->rss_map.indir_qp.qpn) {
+               int i;
+               int err = 0;
+               int loopback = !!(features & NETIF_F_LOOPBACK);
+
+               for (i = 0; i < priv->rx_ring_num; i++) {
+                       int ret;
+
+                       ret = mlx4_en_change_mcast_lb(priv,
+                                                     &priv->rss_map.qps[i],
+                                                     loopback);
+                       if (!err)
+                               err = ret;
+               }
+               if (err)
+                       mlx4_warn(priv->mdev, "failed to change mcast loopback\n");
+       }
+       mutex_unlock(&priv->mdev->state_lock);
 }
 
 static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
index e482fa1bb7410e2eff0f2ed59948bd5fbd7decd8..12aab5a659d33e4c098a6b8bedf2d4014798e14c 100644 (file)
@@ -69,6 +69,15 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
        context->pri_path.counter_index = priv->counter_index;
        context->cqn_send = cpu_to_be32(cqn);
        context->cqn_recv = cpu_to_be32(cqn);
+       if (!rss &&
+           (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_LB_SRC_CHK) &&
+           context->pri_path.counter_index !=
+                           MLX4_SINK_COUNTER_INDEX(mdev->dev)) {
+               /* disable multicast loopback to qp with same counter */
+               if (!(dev->features & NETIF_F_LOOPBACK))
+                       context->pri_path.fl |= MLX4_FL_ETH_SRC_CHECK_MC_LB;
+               context->pri_path.control |= MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
+       }
        context->db_rec_addr = cpu_to_be64(priv->res.db.dma << 2);
        if (!(dev->features & NETIF_F_HW_VLAN_CTAG_RX))
                context->param3 |= cpu_to_be32(1 << 30);
@@ -80,6 +89,22 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
        }
 }
 
+int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
+                           int loopback)
+{
+       int ret;
+       struct mlx4_update_qp_params qp_params;
+
+       memset(&qp_params, 0, sizeof(qp_params));
+       if (!loopback)
+               qp_params.flags = MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB;
+
+       ret = mlx4_update_qp(priv->mdev->dev, qp->qpn,
+                            MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB,
+                            &qp_params);
+
+       return ret;
+}
 
 int mlx4_en_map_buffer(struct mlx4_buf *buf)
 {
index f13a4d7bbf9597535e5f6271dea3769389bc90b6..90db94e83fdeef52023a542539a5a396c2a9a80c 100644 (file)
@@ -155,6 +155,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
                [27] = "Port beacon support",
                [28] = "RX-ALL support",
                [29] = "802.1ad offload support",
+               [31] = "Modifying loopback source checks using UPDATE_QP support",
+               [32] = "Loopback source checks support",
        };
        int i;
 
@@ -964,6 +966,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
        MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
        if (field32 & (1 << 16))
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
+       if (field32 & (1 << 18))
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB;
+       if (field32 & (1 << 19))
+               dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_LB_SRC_CHK;
        if (field32 & (1 << 26))
                dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
        if (field32 & (1 << 20))
index defcf8c395bface7f024043cc51484bd7a4f3820..c41f15102ae0b7cbfd1351431ada14a223f1624e 100644 (file)
@@ -798,7 +798,8 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
 int mlx4_en_map_buffer(struct mlx4_buf *buf);
 void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
-
+int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp,
+                           int loopback);
 void mlx4_en_calc_rx_buf(struct net_device *dev);
 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
index 3311f35d08e0719381a6e267e8cdf0acf9198b4d..168823dde79f3dd48596bdad5a4ea72a95ed0404 100644 (file)
@@ -436,6 +436,23 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
                cmd->qp_context.pri_path.grh_mylmc = params->smac_index;
        }
 
+       if (attr & MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB) {
+               if (!(dev->caps.flags2
+                     & MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+                       mlx4_warn(dev,
+                                 "Trying to set src check LB, but it isn't supported\n");
+                       err = -ENOTSUPP;
+                       goto out;
+               }
+               pri_addr_path_mask |=
+                       1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB;
+               if (params->flags &
+                   MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB) {
+                       cmd->qp_context.pri_path.fl |=
+                               MLX4_FL_ETH_SRC_CHECK_MC_LB;
+               }
+       }
+
        if (attr & MLX4_UPDATE_QP_VSD) {
                qp_mask |= 1ULL << MLX4_UPD_QP_MASK_VSD;
                if (params->flags & MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE)
@@ -458,7 +475,7 @@ int mlx4_update_qp(struct mlx4_dev *dev, u32 qpn,
        err = mlx4_cmd(dev, mailbox->dma, qpn & 0xffffff, 0,
                       MLX4_CMD_UPDATE_QP, MLX4_CMD_TIME_CLASS_A,
                       MLX4_CMD_NATIVE);
-
+out:
        mlx4_free_cmd_mailbox(dev, mailbox);
        return err;
 }
index ac4b99ab1f851c41d1fa108dcbe104f4c307cf48..9813d34f3e5b78e0b3d4a99e253b710278bc7171 100644 (file)
@@ -770,9 +770,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
                        }
                }
 
+               /* preserve IF_COUNTER flag */
+               qpc->pri_path.vlan_control &=
+                       MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER;
                if (vp_oper->state.link_state == IFLA_VF_LINK_STATE_DISABLE &&
                    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_UPDATE_QP) {
-                       qpc->pri_path.vlan_control =
+                       qpc->pri_path.vlan_control |=
                                MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
                                MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED |
                                MLX4_VLAN_CTRL_ETH_TX_BLOCK_UNTAGGED |
@@ -780,12 +783,12 @@ static int update_vport_qp_param(struct mlx4_dev *dev,
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED |
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
                } else if (0 != vp_oper->state.default_vlan) {
-                       qpc->pri_path.vlan_control =
+                       qpc->pri_path.vlan_control |=
                                MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED |
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED;
                } else { /* priority tagged */
-                       qpc->pri_path.vlan_control =
+                       qpc->pri_path.vlan_control |=
                                MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED |
                                MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED;
                }
@@ -3764,9 +3767,6 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
        update_gid(dev, inbox, (u8)slave);
        adjust_proxy_tun_qkey(dev, vhcr, qpc);
        orig_sched_queue = qpc->pri_path.sched_queue;
-       err = update_vport_qp_param(dev, inbox, slave, qpn);
-       if (err)
-               return err;
 
        err = get_res(dev, slave, qpn, RES_QP, &qp);
        if (err)
@@ -3776,6 +3776,10 @@ int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
                goto out;
        }
 
+       err = update_vport_qp_param(dev, inbox, slave, qpn);
+       if (err)
+               goto out;
+
        err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
 out:
        /* if no error, save sched queue value passed in by VF. This is
@@ -4210,7 +4214,9 @@ static int add_eth_header(struct mlx4_dev *dev, int slave,
 
 }
 
-#define MLX4_UPD_QP_PATH_MASK_SUPPORTED (1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX)
+#define MLX4_UPD_QP_PATH_MASK_SUPPORTED      (                                \
+       1ULL << MLX4_UPD_QP_PATH_MASK_MAC_INDEX                     |\
+       1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)
 int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
                           struct mlx4_vhcr *vhcr,
                           struct mlx4_cmd_mailbox *inbox,
@@ -4233,6 +4239,16 @@ int mlx4_UPDATE_QP_wrapper(struct mlx4_dev *dev, int slave,
            (pri_addr_path_mask & ~MLX4_UPD_QP_PATH_MASK_SUPPORTED))
                return -EPERM;
 
+       if ((pri_addr_path_mask &
+            (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB)) &&
+               !(dev->caps.flags2 &
+                 MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB)) {
+                       mlx4_warn(dev,
+                                 "Src check LB for slave %d isn't supported\n",
+                                  slave);
+               return -ENOTSUPP;
+       }
+
        /* Just change the smac for the QP */
        err = get_res(dev, slave, qpn, RES_QP, &rqp);
        if (err) {
index f4b6c33ac318ca3b9a5623d7665dc5a2d07b123f..993d1ff9ba21987612362d122b7082242314b313 100644 (file)
@@ -128,7 +128,9 @@ extern kib_tunables_t  kiblnd_tunables;
                                     IBLND_CREDIT_HIGHWATER_V1 : \
                                     *kiblnd_tunables.kib_peercredits_hiw) /* when eagerly to return credits */
 
-#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(cb, dev, ps, qpt)
+#define kiblnd_rdma_create_id(cb, dev, ps, qpt) rdma_create_id(&init_net, \
+                                                              cb, dev, \
+                                                              ps, qpt)
 
 static inline int
 kiblnd_concurrent_sends_v1(void)
@@ -525,7 +527,7 @@ typedef struct kib_tx                         /* transmit message */
        __u64                  tx_msgaddr;    /* message buffer (I/O addr) */
        DECLARE_PCI_UNMAP_ADDR(tx_msgunmap);  /* for dma_unmap_single() */
        int                    tx_nwrq;       /* # send work items */
-       struct ib_send_wr      *tx_wrq;       /* send work items... */
+       struct ib_rdma_wr      *tx_wrq;       /* send work items... */
        struct ib_sge          *tx_sge;       /* ...and their memory */
        kib_rdma_desc_t        *tx_rd;        /* rdma descriptor */
        int                    tx_nfrags;     /* # entries in... */
index a23a6d956a4d02efd2552db614706cd45ea2849b..a34f1707c16700accd94ace637cc30f29a5be49e 100644 (file)
@@ -834,7 +834,7 @@ kiblnd_post_tx_locked(kib_conn_t *conn, kib_tx_t *tx, int credit)
                /* close_conn will launch failover */
                rc = -ENETDOWN;
        } else {
-               rc = ib_post_send(conn->ibc_cmid->qp, tx->tx_wrq, &bad_wrq);
+               rc = ib_post_send(conn->ibc_cmid->qp, &tx->tx_wrq->wr, &bad_wrq);
        }
 
        conn->ibc_last_send = jiffies;
@@ -1008,7 +1008,7 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
 {
        kib_hca_dev_t *hdev = tx->tx_pool->tpo_hdev;
        struct ib_sge *sge = &tx->tx_sge[tx->tx_nwrq];
-       struct ib_send_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
+       struct ib_rdma_wr *wrq = &tx->tx_wrq[tx->tx_nwrq];
        int nob = offsetof(kib_msg_t, ibm_u) + body_nob;
        struct ib_mr *mr;
 
@@ -1027,12 +1027,12 @@ kiblnd_init_tx_msg(lnet_ni_t *ni, kib_tx_t *tx, int type, int body_nob)
 
        memset(wrq, 0, sizeof(*wrq));
 
-       wrq->next       = NULL;
-       wrq->wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
-       wrq->sg_list    = sge;
-       wrq->num_sge    = 1;
-       wrq->opcode     = IB_WR_SEND;
-       wrq->send_flags = IB_SEND_SIGNALED;
+       wrq->wr.next       = NULL;
+       wrq->wr.wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_TX);
+       wrq->wr.sg_list    = sge;
+       wrq->wr.num_sge    = 1;
+       wrq->wr.opcode     = IB_WR_SEND;
+       wrq->wr.send_flags = IB_SEND_SIGNALED;
 
        tx->tx_nwrq++;
 }
@@ -1044,7 +1044,7 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
        kib_msg_t *ibmsg = tx->tx_msg;
        kib_rdma_desc_t *srcrd = tx->tx_rd;
        struct ib_sge *sge = &tx->tx_sge[0];
-       struct ib_send_wr *wrq = &tx->tx_wrq[0];
+       struct ib_rdma_wr *wrq = &tx->tx_wrq[0], *next;
        int rc  = resid;
        int srcidx;
        int dstidx;
@@ -1090,16 +1090,17 @@ kiblnd_init_rdma(kib_conn_t *conn, kib_tx_t *tx, int type,
                sge->length = wrknob;
 
                wrq = &tx->tx_wrq[tx->tx_nwrq];
+               next = wrq + 1;
 
-               wrq->next       = wrq + 1;
-               wrq->wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
-               wrq->sg_list    = sge;
-               wrq->num_sge    = 1;
-               wrq->opcode     = IB_WR_RDMA_WRITE;
-               wrq->send_flags = 0;
+               wrq->wr.next       = &next->wr;
+               wrq->wr.wr_id      = kiblnd_ptr2wreqid(tx, IBLND_WID_RDMA);
+               wrq->wr.sg_list    = sge;
+               wrq->wr.num_sge    = 1;
+               wrq->wr.opcode     = IB_WR_RDMA_WRITE;
+               wrq->wr.send_flags = 0;
 
-               wrq->wr.rdma.remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
-               wrq->wr.rdma.rkey        = kiblnd_rd_frag_key(dstrd, dstidx);
+               wrq->remote_addr = kiblnd_rd_frag_addr(dstrd, dstidx);
+               wrq->rkey        = kiblnd_rd_frag_key(dstrd, dstidx);
 
                srcidx = kiblnd_rd_consume_frag(srcrd, srcidx, wrknob);
                dstidx = kiblnd_rd_consume_frag(dstrd, dstidx, wrknob);
index 86708dee58b1405c21694b5ea72a53808e2a3c1e..4c43ca935cc7ad5cf1686759e7b663a367ea76b8 100644 (file)
@@ -860,9 +860,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                                flags |= SQ_READ_FENCE;
                        }
                        wr.sqwr.rdma_write.remote_stag =
-                           cpu_to_be32(ib_wr->wr.rdma.rkey);
+                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
                        wr.sqwr.rdma_write.remote_to =
-                           cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
                        err = move_sgl((struct c2_data_addr *)
                                       & (wr.sqwr.rdma_write.data),
                                       ib_wr->sg_list,
@@ -889,9 +889,9 @@ int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
                        wr.sqwr.rdma_read.local_to =
                            cpu_to_be64(ib_wr->sg_list->addr);
                        wr.sqwr.rdma_read.remote_stag =
-                           cpu_to_be32(ib_wr->wr.rdma.rkey);
+                           cpu_to_be32(rdma_wr(ib_wr)->rkey);
                        wr.sqwr.rdma_read.remote_to =
-                           cpu_to_be64(ib_wr->wr.rdma.remote_addr);
+                           cpu_to_be64(rdma_wr(ib_wr)->remote_addr);
                        wr.sqwr.rdma_read.length =
                            cpu_to_be32(ib_wr->sg_list->length);
                        break;
index 47f94984353de9afac694d51806696d5c6d3d7c8..10e2074384f5d83019b222e711b96459441bf378 100644 (file)
@@ -110,19 +110,19 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
 /* need ib_mad struct */
 #include <rdma/ib_mad.h>
 
-static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+static void trace_ud_wr(const struct ib_ud_wr *ud_wr)
 {
        int idx;
        int j;
-       while (send_wr) {
-               struct ib_mad_hdr *mad_hdr = send_wr->wr.ud.mad_hdr;
-               struct ib_sge *sge = send_wr->sg_list;
-               ehca_gen_dbg("send_wr#%x wr_id=%lx num_sge=%x "
-                            "send_flags=%x opcode=%x", idx, send_wr->wr_id,
-                            send_wr->num_sge, send_wr->send_flags,
-                            send_wr->opcode);
+       while (ud_wr) {
+               struct ib_mad_hdr *mad_hdr = ud_wrmad_hdr;
+               struct ib_sge *sge = ud_wr->wr.sg_list;
+               ehca_gen_dbg("ud_wr#%x wr_id=%lx num_sge=%x "
+                            "send_flags=%x opcode=%x", idx, ud_wr->wr.wr_id,
+                            ud_wr->wr.num_sge, ud_wr->wr.send_flags,
+                            ud_wr->.wr.opcode);
                if (mad_hdr) {
-                       ehca_gen_dbg("send_wr#%x mad_hdr base_version=%x "
+                       ehca_gen_dbg("ud_wr#%x mad_hdr base_version=%x "
                                     "mgmt_class=%x class_version=%x method=%x "
                                     "status=%x class_specific=%x tid=%lx "
                                     "attr_id=%x resv=%x attr_mod=%x",
@@ -134,33 +134,33 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
                                     mad_hdr->resv,
                                     mad_hdr->attr_mod);
                }
-               for (j = 0; j < send_wr->num_sge; j++) {
+               for (j = 0; j < ud_wr->wr.num_sge; j++) {
                        u8 *data = __va(sge->addr);
-                       ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
+                       ehca_gen_dbg("ud_wr#%x sge#%x addr=%p length=%x "
                                     "lkey=%x",
                                     idx, j, data, sge->length, sge->lkey);
                        /* assume length is n*16 */
-                       ehca_dmp(data, sge->length, "send_wr#%x sge#%x",
+                       ehca_dmp(data, sge->length, "ud_wr#%x sge#%x",
                                 idx, j);
                        sge++;
                } /* eof for j */
                idx++;
-               send_wr = send_wr->next;
-       } /* eof while send_wr */
+               ud_wr = ud_wr(ud_wr->wr.next);
+       } /* eof while ud_wr */
 }
 
 #endif /* DEBUG_GSI_SEND_WR */
 
 static inline int ehca_write_swqe(struct ehca_qp *qp,
                                  struct ehca_wqe *wqe_p,
-                                 const struct ib_send_wr *send_wr,
+                                 struct ib_send_wr *send_wr,
                                  u32 sq_map_idx,
                                  int hidden)
 {
        u32 idx;
        u64 dma_length;
        struct ehca_av *my_av;
-       u32 remote_qkey = send_wr->wr.ud.remote_qkey;
+       u32 remote_qkey;
        struct ehca_qmap_entry *qmap_entry = &qp->sq_map.map[sq_map_idx];
 
        if (unlikely((send_wr->num_sge < 0) ||
@@ -223,20 +223,21 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
                /* no break is intential here */
        case IB_QPT_UD:
                /* IB 1.2 spec C10-15 compliance */
-               if (send_wr->wr.ud.remote_qkey & 0x80000000)
+               remote_qkey = ud_wr(send_wr)->remote_qkey;
+               if (remote_qkey & 0x80000000)
                        remote_qkey = qp->qkey;
 
-               wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+               wqe_p->destination_qp_number = ud_wr(send_wr)->remote_qpn << 8;
                wqe_p->local_ee_context_qkey = remote_qkey;
-               if (unlikely(!send_wr->wr.ud.ah)) {
-                       ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+               if (unlikely(!ud_wr(send_wr)->ah)) {
+                       ehca_gen_err("ud_wr(send_wr) is NULL. qp=%p", qp);
                        return -EINVAL;
                }
-               if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
+               if (unlikely(ud_wr(send_wr)->remote_qpn == 0)) {
                        ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
                        return -EINVAL;
                }
-               my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+               my_av = container_of(ud_wr(send_wr)->ah, struct ehca_av, ib_ah);
                wqe_p->u.ud_av.ud_av = my_av->av;
 
                /*
@@ -255,9 +256,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
                    qp->qp_type == IB_QPT_GSI)
                        wqe_p->u.ud_av.ud_av.pmtu = 1;
                if (qp->qp_type == IB_QPT_GSI) {
-                       wqe_p->pkeyi = send_wr->wr.ud.pkey_index;
+                       wqe_p->pkeyi = ud_wr(send_wr)->pkey_index;
 #ifdef DEBUG_GSI_SEND_WR
-                       trace_send_wr_ud(send_wr);
+                       trace_ud_wr(ud_wr(send_wr));
 #endif /* DEBUG_GSI_SEND_WR */
                }
                break;
@@ -269,8 +270,8 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
        case IB_QPT_RC:
                /* TODO: atomic not implemented */
                wqe_p->u.nud.remote_virtual_address =
-                       send_wr->wr.rdma.remote_addr;
-               wqe_p->u.nud.rkey = send_wr->wr.rdma.rkey;
+                       rdma_wr(send_wr)->remote_addr;
+               wqe_p->u.nud.rkey = rdma_wr(send_wr)->rkey;
 
                /*
                 * omitted checking of IB_SEND_INLINE
index f6eff177ace1e02f4f5947462785796829937ccd..cb4e6087dfdb263669292140d65da108d825b86e 100644 (file)
@@ -354,58 +354,3 @@ bail:
        rcu_read_unlock();
        return 0;
 }
-
-/*
- * Initialize the memory region specified by the work request.
- */
-int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr)
-{
-       struct hfi1_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-       struct hfi1_pd *pd = to_ipd(qp->ibqp.pd);
-       struct hfi1_mregion *mr;
-       u32 rkey = wr->wr.fast_reg.rkey;
-       unsigned i, n, m;
-       int ret = -EINVAL;
-       unsigned long flags;
-       u64 *page_list;
-       size_t ps;
-
-       spin_lock_irqsave(&rkt->lock, flags);
-       if (pd->user || rkey == 0)
-               goto bail;
-
-       mr = rcu_dereference_protected(
-               rkt->table[(rkey >> (32 - hfi1_lkey_table_size))],
-               lockdep_is_held(&rkt->lock));
-       if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
-               goto bail;
-
-       if (wr->wr.fast_reg.page_list_len > mr->max_segs)
-               goto bail;
-
-       ps = 1UL << wr->wr.fast_reg.page_shift;
-       if (wr->wr.fast_reg.length > ps * wr->wr.fast_reg.page_list_len)
-               goto bail;
-
-       mr->user_base = wr->wr.fast_reg.iova_start;
-       mr->iova = wr->wr.fast_reg.iova_start;
-       mr->lkey = rkey;
-       mr->length = wr->wr.fast_reg.length;
-       mr->access_flags = wr->wr.fast_reg.access_flags;
-       page_list = wr->wr.fast_reg.page_list->page_list;
-       m = 0;
-       n = 0;
-       for (i = 0; i < wr->wr.fast_reg.page_list_len; i++) {
-               mr->map[m]->segs[n].vaddr = (void *) page_list[i];
-               mr->map[m]->segs[n].length = ps;
-               if (++n == HFI1_SEGSZ) {
-                       m++;
-                       n = 0;
-               }
-       }
-
-       ret = 0;
-bail:
-       spin_unlock_irqrestore(&rkt->lock, flags);
-       return ret;
-}
index bd64e4f986f9c2d64f807d2fa9c8b43a9e7b4058..402bd64141769e263e7bc22e67ca86e197464360 100644 (file)
@@ -344,9 +344,10 @@ out:
 
 /*
  * Allocate a memory region usable with the
- * IB_WR_FAST_REG_MR send work request.
+ * IB_WR_REG_MR send work request.
  *
  * Return the memory region on success, otherwise return an errno.
+ * FIXME: IB_WR_REG_MR is not supported
  */
 struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
                            enum ib_mr_type mr_type,
@@ -364,36 +365,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
        return &mr->ibmr;
 }
 
-struct ib_fast_reg_page_list *
-hfi1_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
-{
-       unsigned size = page_list_len * sizeof(u64);
-       struct ib_fast_reg_page_list *pl;
-
-       if (size > PAGE_SIZE)
-               return ERR_PTR(-EINVAL);
-
-       pl = kzalloc(sizeof(*pl), GFP_KERNEL);
-       if (!pl)
-               return ERR_PTR(-ENOMEM);
-
-       pl->page_list = kzalloc(size, GFP_KERNEL);
-       if (!pl->page_list)
-               goto err_free;
-
-       return pl;
-
-err_free:
-       kfree(pl);
-       return ERR_PTR(-ENOMEM);
-}
-
-void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl)
-{
-       kfree(pl->page_list);
-       kfree(pl);
-}
-
 /**
  * hfi1_alloc_fmr - allocate a fast memory region
  * @pd: the protection domain for this memory region
index df1fa56eaf851bdfd1bb6feaa160ab196a0ceb6f..f8c36166962f304f451808daf902fbfd765df0cb 100644 (file)
@@ -422,7 +422,7 @@ static void clear_mr_refs(struct hfi1_qp *qp, int clr_sends)
                        if (qp->ibqp.qp_type == IB_QPT_UD ||
                            qp->ibqp.qp_type == IB_QPT_SMI ||
                            qp->ibqp.qp_type == IB_QPT_GSI)
-                               atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+                               atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
                        if (++qp->s_last >= qp->s_size)
                                qp->s_last = 0;
                }
index 632dd5ba7dfdbc36c34c8b7afdc4396477d8b169..fd0ac608c62dca582bef0ef50604c1a2be6fb644 100644 (file)
@@ -404,9 +404,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
                                goto bail;
                        }
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        hwords += sizeof(struct ib_reth) / sizeof(u32);
                        wqe->lpsn = wqe->psn;
@@ -455,9 +455,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
                                wqe->lpsn = qp->s_next_psn++;
                        }
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        qp->s_state = OP(RDMA_READ_REQUEST);
                        hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -488,21 +488,21 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
                        if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
                                qp->s_state = OP(COMPARE_SWAP);
                                ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.swap);
+                                       wqe->atomic_wr.swap);
                                ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.compare_add);
+                                       wqe->atomic_wr.compare_add);
                        } else {
                                qp->s_state = OP(FETCH_ADD);
                                ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.compare_add);
+                                       wqe->atomic_wr.compare_add);
                                ohdr->u.atomic_eth.compare_data = 0;
                        }
                        ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-                               wqe->wr.wr.atomic.remote_addr >> 32);
+                               wqe->atomic_wr.remote_addr >> 32);
                        ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-                               wqe->wr.wr.atomic.remote_addr);
+                               wqe->atomic_wr.remote_addr);
                        ohdr->u.atomic_eth.rkey = cpu_to_be32(
-                               wqe->wr.wr.atomic.rkey);
+                               wqe->atomic_wr.rkey);
                        hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
                        ss = NULL;
                        len = 0;
@@ -629,9 +629,9 @@ int hfi1_make_rc_req(struct hfi1_qp *qp)
                 */
                len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu;
                ohdr->u.rc.reth.vaddr =
-                       cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+                       cpu_to_be64(wqe->rdma_wr.remote_addr + len);
                ohdr->u.rc.reth.rkey =
-                       cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                       cpu_to_be32(wqe->rdma_wr.rkey);
                ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
                qp->s_state = OP(RDMA_READ_REQUEST);
                hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
index a4115288db666d26a8d2811d2f839100db74acfd..d614474770b31cf7f3bf4424bb85d010858399d7 100644 (file)
@@ -481,8 +481,8 @@ again:
                if (wqe->length == 0)
                        break;
                if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
-                                          wqe->wr.wr.rdma.remote_addr,
-                                          wqe->wr.wr.rdma.rkey,
+                                          wqe->rdma_wr.remote_addr,
+                                          wqe->rdma_wr.rkey,
                                           IB_ACCESS_REMOTE_WRITE)))
                        goto acc_err;
                qp->r_sge.sg_list = NULL;
@@ -494,8 +494,8 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
                        goto inv_err;
                if (unlikely(!hfi1_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
-                                          wqe->wr.wr.rdma.remote_addr,
-                                          wqe->wr.wr.rdma.rkey,
+                                          wqe->rdma_wr.remote_addr,
+                                          wqe->rdma_wr.rkey,
                                           IB_ACCESS_REMOTE_READ)))
                        goto acc_err;
                release = 0;
@@ -512,18 +512,18 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
                        goto inv_err;
                if (unlikely(!hfi1_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
-                                          wqe->wr.wr.atomic.remote_addr,
-                                          wqe->wr.wr.atomic.rkey,
+                                          wqe->atomic_wr.remote_addr,
+                                          wqe->atomic_wr.rkey,
                                           IB_ACCESS_REMOTE_ATOMIC)))
                        goto acc_err;
                /* Perform atomic OP and save result. */
                maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = wqe->wr.wr.atomic.compare_add;
+               sdata = wqe->atomic_wr.compare_add;
                *(u64 *) sqp->s_sge.sge.vaddr =
                        (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
                        (u64) atomic64_add_return(sdata, maddr) - sdata :
                        (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     sdata, wqe->wr.wr.atomic.swap);
+                                     sdata, wqe->atomic_wr.swap);
                hfi1_put_mr(qp->r_sge.sge.mr);
                qp->r_sge.num_sge = 0;
                goto send_comp;
@@ -913,7 +913,7 @@ void hfi1_send_complete(struct hfi1_qp *qp, struct hfi1_swqe *wqe,
        if (qp->ibqp.qp_type == IB_QPT_UD ||
            qp->ibqp.qp_type == IB_QPT_SMI ||
            qp->ibqp.qp_type == IB_QPT_GSI)
-               atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+               atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
 
        /* See ch. 11.2.4.1 and 10.7.3.1 */
        if (!(qp->s_flags & HFI1_S_SIGNAL_REQ_WR) ||
index b536f397737c74bc1007d0fe22d83a86b1bda76b..6095039c4485a642736c694590e3d6320086378b 100644 (file)
@@ -147,9 +147,9 @@ int hfi1_make_uc_req(struct hfi1_qp *qp)
                case IB_WR_RDMA_WRITE:
                case IB_WR_RDMA_WRITE_WITH_IMM:
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        hwords += sizeof(struct ib_reth) / 4;
                        if (len > pmtu) {
index d40d1a1e10aa919be1270b795c2ffa406b8a2b39..5a9c784bec04c5ecbcc64fd8fbbde11b70520d8b 100644 (file)
@@ -80,7 +80,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
 
        rcu_read_lock();
 
-       qp = hfi1_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+       qp = hfi1_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
        if (!qp) {
                ibp->n_pkt_drops++;
                rcu_read_unlock();
@@ -98,7 +98,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
                goto drop;
        }
 
-       ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+       ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
        ppd = ppd_from_ibp(ibp);
 
        if (qp->ibqp.qp_num > 1) {
@@ -128,8 +128,8 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
        if (qp->ibqp.qp_num) {
                u32 qkey;
 
-               qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
-                       sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+               qkey = (int)swqe->ud_wr.remote_qkey < 0 ?
+                       sqp->qkey : swqe->ud_wr.remote_qkey;
                if (unlikely(qkey != qp->qkey)) {
                        u16 lid;
 
@@ -234,7 +234,7 @@ static void ud_loopback(struct hfi1_qp *sqp, struct hfi1_swqe *swqe)
        if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI) {
                if (sqp->ibqp.qp_type == IB_QPT_GSI ||
                    sqp->ibqp.qp_type == IB_QPT_SMI)
-                       wc.pkey_index = swqe->wr.wr.ud.pkey_index;
+                       wc.pkey_index = swqe->ud_wr.pkey_index;
                else
                        wc.pkey_index = sqp->s_pkey_index;
        } else {
@@ -309,7 +309,7 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
        /* Construct the header. */
        ibp = to_iport(qp->ibqp.device, qp->port_num);
        ppd = ppd_from_ibp(ibp);
-       ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+       ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
        if (ah_attr->dlid < HFI1_MULTICAST_LID_BASE ||
            ah_attr->dlid == HFI1_PERMISSIVE_LID) {
                lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
@@ -401,18 +401,18 @@ int hfi1_make_ud_req(struct hfi1_qp *qp)
                bth0 |= IB_BTH_SOLICITED;
        bth0 |= extra_bytes << 20;
        if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_SMI)
-               bth0 |= hfi1_get_pkey(ibp, wqe->wr.wr.ud.pkey_index);
+               bth0 |= hfi1_get_pkey(ibp, wqe->ud_wr.pkey_index);
        else
                bth0 |= hfi1_get_pkey(ibp, qp->s_pkey_index);
        ohdr->bth[0] = cpu_to_be32(bth0);
-       ohdr->bth[1] = cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+       ohdr->bth[1] = cpu_to_be32(wqe->ud_wr.remote_qpn);
        ohdr->bth[2] = cpu_to_be32(mask_psn(qp->s_next_psn++));
        /*
         * Qkeys with the high order bit set mean use the
         * qkey from the QP context instead of the WR (see 10.2.5).
         */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
-                                        qp->qkey : wqe->wr.wr.ud.remote_qkey);
+       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+                                        qp->qkey : wqe->ud_wr.remote_qkey);
        ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
        /* disarm any ahg */
        qp->s_hdr->ahgcount = 0;
index 41bb59eb001c72fe214a4114047fdf03156f9007..6e2da7ee6d2f69c9828a1de861a152f25726c017 100644 (file)
@@ -380,9 +380,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
         * undefined operations.
         * Make sure buffer is large enough to hold the result for atomics.
         */
-       if (wr->opcode == IB_WR_FAST_REG_MR) {
-               return -EINVAL;
-       } else if (qp->ibqp.qp_type == IB_QPT_UC) {
+       if (qp->ibqp.qp_type == IB_QPT_UC) {
                if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
                        return -EINVAL;
        } else if (qp->ibqp.qp_type != IB_QPT_RC) {
@@ -391,7 +389,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
                    wr->opcode != IB_WR_SEND_WITH_IMM)
                        return -EINVAL;
                /* Check UD destination address PD */
-               if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+               if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
                        return -EINVAL;
        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
                return -EINVAL;
@@ -412,7 +410,21 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
        rkt = &to_idev(qp->ibqp.device)->lk_table;
        pd = to_ipd(qp->ibqp.pd);
        wqe = get_swqe_ptr(qp, qp->s_head);
-       wqe->wr = *wr;
+
+
+       if (qp->ibqp.qp_type != IB_QPT_UC &&
+           qp->ibqp.qp_type != IB_QPT_RC)
+               memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+       else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+                wr->opcode == IB_WR_RDMA_WRITE ||
+                wr->opcode == IB_WR_RDMA_READ)
+               memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+       else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+               memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+       else
+               memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
        wqe->length = 0;
        j = 0;
        if (wr->num_sge) {
@@ -438,7 +450,7 @@ static int post_one_send(struct hfi1_qp *qp, struct ib_send_wr *wr)
                if (wqe->length > 0x80000000U)
                        goto bail_inval_free;
        } else {
-               struct hfi1_ah *ah = to_iah(wr->wr.ud.ah);
+               struct hfi1_ah *ah = to_iah(ud_wr(wr)->ah);
 
                atomic_inc(&ah->refcount);
        }
@@ -2048,8 +2060,6 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
        ibdev->reg_user_mr = hfi1_reg_user_mr;
        ibdev->dereg_mr = hfi1_dereg_mr;
        ibdev->alloc_mr = hfi1_alloc_mr;
-       ibdev->alloc_fast_reg_page_list = hfi1_alloc_fast_reg_page_list;
-       ibdev->free_fast_reg_page_list = hfi1_free_fast_reg_page_list;
        ibdev->alloc_fmr = hfi1_alloc_fmr;
        ibdev->map_phys_fmr = hfi1_map_phys_fmr;
        ibdev->unmap_fmr = hfi1_unmap_fmr;
index ed903a93baf70f01cccf62f7225b0976007ea9d5..159ec08bfcd8d613e92f54c390df05f1c188d14d 100644 (file)
@@ -348,7 +348,12 @@ struct hfi1_mr {
  * in qp->s_max_sge.
  */
 struct hfi1_swqe {
-       struct ib_send_wr wr;   /* don't use wr.sg_list */
+       union {
+               struct ib_send_wr wr;   /* don't use wr.sg_list */
+               struct ib_rdma_wr rdma_wr;
+               struct ib_atomic_wr atomic_wr;
+               struct ib_ud_wr ud_wr;
+       };
        u32 psn;                /* first packet sequence number */
        u32 lpsn;               /* last packet sequence number */
        u32 ssn;                /* send sequence number */
@@ -1020,13 +1025,6 @@ struct ib_mr *hfi1_alloc_mr(struct ib_pd *pd,
                            enum ib_mr_type mr_type,
                            u32 max_entries);
 
-struct ib_fast_reg_page_list *hfi1_alloc_fast_reg_page_list(
-                               struct ib_device *ibdev, int page_list_len);
-
-void hfi1_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
-
-int hfi1_fast_reg_mr(struct hfi1_qp *qp, struct ib_send_wr *wr);
-
 struct ib_fmr *hfi1_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
                              struct ib_fmr_attr *fmr_attr);
 
index 79b3dbc9717923bfdbc215e78e10fcf9feb43a06..d4aa53574e576c83024e39822038edbfd3565212 100644 (file)
@@ -350,9 +350,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                                goto bail;
                        }
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        hwords += sizeof(struct ib_reth) / sizeof(u32);
                        wqe->lpsn = wqe->psn;
@@ -401,9 +401,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                                wqe->lpsn = qp->s_next_psn++;
                        }
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        qp->s_state = OP(RDMA_READ_REQUEST);
                        hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
@@ -433,21 +433,21 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                        if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
                                qp->s_state = OP(COMPARE_SWAP);
                                ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.swap);
+                                       wqe->atomic_wr.swap);
                                ohdr->u.atomic_eth.compare_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.compare_add);
+                                       wqe->atomic_wr.compare_add);
                        } else {
                                qp->s_state = OP(FETCH_ADD);
                                ohdr->u.atomic_eth.swap_data = cpu_to_be64(
-                                       wqe->wr.wr.atomic.compare_add);
+                                       wqe->atomic_wr.compare_add);
                                ohdr->u.atomic_eth.compare_data = 0;
                        }
                        ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
-                               wqe->wr.wr.atomic.remote_addr >> 32);
+                               wqe->atomic_wr.remote_addr >> 32);
                        ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
-                               wqe->wr.wr.atomic.remote_addr);
+                               wqe->atomic_wr.remote_addr);
                        ohdr->u.atomic_eth.rkey = cpu_to_be32(
-                               wqe->wr.wr.atomic.rkey);
+                               wqe->atomic_wr.rkey);
                        hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
                        ss = NULL;
                        len = 0;
@@ -567,9 +567,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
                ipath_init_restart(qp, wqe);
                len = ((qp->s_psn - wqe->psn) & IPATH_PSN_MASK) * pmtu;
                ohdr->u.rc.reth.vaddr =
-                       cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+                       cpu_to_be64(wqe->rdma_wr.remote_addr + len);
                ohdr->u.rc.reth.rkey =
-                       cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                       cpu_to_be32(wqe->rdma_wr.rkey);
                ohdr->u.rc.reth.length = cpu_to_be32(qp->s_len);
                qp->s_state = OP(RDMA_READ_REQUEST);
                hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
index 1f95bbaf7602289c79854a2919f966cce35e618a..46af8b03d3d461f23c3bade19230df36e0d67b20 100644 (file)
@@ -353,8 +353,8 @@ again:
                if (wqe->length == 0)
                        break;
                if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
-                                           wqe->wr.wr.rdma.remote_addr,
-                                           wqe->wr.wr.rdma.rkey,
+                                           wqe->rdma_wr.remote_addr,
+                                           wqe->rdma_wr.rkey,
                                            IB_ACCESS_REMOTE_WRITE)))
                        goto acc_err;
                break;
@@ -363,8 +363,8 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
                        goto inv_err;
                if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
-                                           wqe->wr.wr.rdma.remote_addr,
-                                           wqe->wr.wr.rdma.rkey,
+                                           wqe->rdma_wr.remote_addr,
+                                           wqe->rdma_wr.rkey,
                                            IB_ACCESS_REMOTE_READ)))
                        goto acc_err;
                qp->r_sge.sge = wqe->sg_list[0];
@@ -377,18 +377,18 @@ again:
                if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
                        goto inv_err;
                if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
-                                           wqe->wr.wr.atomic.remote_addr,
-                                           wqe->wr.wr.atomic.rkey,
+                                           wqe->atomic_wr.remote_addr,
+                                           wqe->atomic_wr.rkey,
                                            IB_ACCESS_REMOTE_ATOMIC)))
                        goto acc_err;
                /* Perform atomic OP and save result. */
                maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
-               sdata = wqe->wr.wr.atomic.compare_add;
+               sdata = wqe->atomic_wr.compare_add;
                *(u64 *) sqp->s_sge.sge.vaddr =
                        (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
                        (u64) atomic64_add_return(sdata, maddr) - sdata :
                        (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
-                                     sdata, wqe->wr.wr.atomic.swap);
+                                     sdata, wqe->atomic_wr.swap);
                goto send_comp;
 
        default:
index 22e60998f1a7cacd3c9d2e841db946ae31b2e3cd..0246b30280b9c2bc2f673ffd6c980adefbadf39a 100644 (file)
@@ -126,9 +126,9 @@ int ipath_make_uc_req(struct ipath_qp *qp)
                case IB_WR_RDMA_WRITE:
                case IB_WR_RDMA_WRITE_WITH_IMM:
                        ohdr->u.rc.reth.vaddr =
-                               cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+                               cpu_to_be64(wqe->rdma_wr.remote_addr);
                        ohdr->u.rc.reth.rkey =
-                               cpu_to_be32(wqe->wr.wr.rdma.rkey);
+                               cpu_to_be32(wqe->rdma_wr.rkey);
                        ohdr->u.rc.reth.length = cpu_to_be32(len);
                        hwords += sizeof(struct ib_reth) / 4;
                        if (len > pmtu) {
index e8a2a915251e81d1236b8cb1abec368bbaf911a0..3ffc1565d03d877db7853626f268ee45e8ce4eac 100644 (file)
@@ -65,7 +65,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
        u32 rlen;
        u32 length;
 
-       qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
+       qp = ipath_lookup_qpn(&dev->qp_table, swqe->ud_wr.remote_qpn);
        if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
                dev->n_pkt_drops++;
                goto done;
@@ -77,8 +77,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
         * qkey from the QP context instead of the WR (see 10.2.5).
         */
        if (unlikely(qp->ibqp.qp_num &&
-                    ((int) swqe->wr.wr.ud.remote_qkey < 0 ?
-                     sqp->qkey : swqe->wr.wr.ud.remote_qkey) != qp->qkey)) {
+                    ((int) swqe->ud_wr.remote_qkey < 0 ?
+                     sqp->qkey : swqe->ud_wr.remote_qkey) != qp->qkey)) {
                /* XXX OK to lose a count once in a while. */
                dev->qkey_violations++;
                dev->n_pkt_drops++;
@@ -175,7 +175,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
        } else
                spin_unlock_irqrestore(&rq->lock, flags);
 
-       ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+       ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
        if (ah_attr->ah_flags & IB_AH_GRH) {
                ipath_copy_sge(&rsge, &ah_attr->grh, sizeof(struct ib_grh));
                wc.wc_flags |= IB_WC_GRH;
@@ -225,7 +225,7 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
        wc.port_num = 1;
        /* Signal completion event if the solicited bit is set. */
        ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-                      swqe->wr.send_flags & IB_SEND_SOLICITED);
+                      swqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED);
 drop:
        if (atomic_dec_and_test(&qp->refcount))
                wake_up(&qp->wait);
@@ -280,7 +280,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
                next_cur = 0;
 
        /* Construct the header. */
-       ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+       ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
        if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE) {
                if (ah_attr->dlid != IPATH_PERMISSIVE_LID)
                        dev->n_multicast_xmit++;
@@ -322,7 +322,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
        qp->s_wqe = wqe;
        qp->s_sge.sge = wqe->sg_list[0];
        qp->s_sge.sg_list = wqe->sg_list + 1;
-       qp->s_sge.num_sge = wqe->wr.num_sge;
+       qp->s_sge.num_sge = wqe->ud_wr.wr.num_sge;
 
        if (ah_attr->ah_flags & IB_AH_GRH) {
                /* Header size in 32-bit words. */
@@ -340,9 +340,9 @@ int ipath_make_ud_req(struct ipath_qp *qp)
                lrh0 = IPATH_LRH_BTH;
                ohdr = &qp->s_hdr.u.oth;
        }
-       if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+       if (wqe->ud_wr.wr.opcode == IB_WR_SEND_WITH_IMM) {
                qp->s_hdrwords++;
-               ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+               ohdr->u.ud.imm_data = wqe->ud_wr.wr.ex.imm_data;
                bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
        } else
                bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
@@ -360,7 +360,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
                qp->s_hdr.lrh[3] = cpu_to_be16(lid);
        } else
                qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
-       if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+       if (wqe->ud_wr.wr.send_flags & IB_SEND_SOLICITED)
                bth0 |= 1 << 23;
        bth0 |= extra_bytes << 20;
        bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? IPATH_DEFAULT_P_KEY :
@@ -372,14 +372,14 @@ int ipath_make_ud_req(struct ipath_qp *qp)
        ohdr->bth[1] = ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
                ah_attr->dlid != IPATH_PERMISSIVE_LID ?
                cpu_to_be32(IPATH_MULTICAST_QPN) :
-               cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+               cpu_to_be32(wqe->ud_wr.remote_qpn);
        ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & IPATH_PSN_MASK);
        /*
         * Qkeys with the high order bit set mean use the
         * qkey from the QP context instead of the WR (see 10.2.5).
         */
-       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
-                                        qp->qkey : wqe->wr.wr.ud.remote_qkey);
+       ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->ud_wr.remote_qkey < 0 ?
+                                        qp->qkey : wqe->ud_wr.remote_qkey);
        ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 done:
index ed2bbc2f7eaeb76ab9a21913b0fbe1f887bc3895..29e91796fb10965de9d2861fa9aaa4f2e1942a7e 100644 (file)
@@ -374,7 +374,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
                    wr->opcode != IB_WR_SEND_WITH_IMM)
                        goto bail_inval;
                /* Check UD destination address PD */
-               if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+               if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
                        goto bail_inval;
        } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
                goto bail_inval;
@@ -395,7 +395,20 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
        }
 
        wqe = get_swqe_ptr(qp, qp->s_head);
-       wqe->wr = *wr;
+
+       if (qp->ibqp.qp_type != IB_QPT_UC &&
+           qp->ibqp.qp_type != IB_QPT_RC)
+               memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
+       else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
+                wr->opcode == IB_WR_RDMA_WRITE ||
+                wr->opcode == IB_WR_RDMA_READ)
+               memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
+       else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+                wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+               memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
+       else
+               memcpy(&wqe->wr, wr, sizeof(wqe->wr));
+
        wqe->length = 0;
        if (wr->num_sge) {
                acc = wr->opcode >= IB_WR_RDMA_READ ?
index ec167e545e15c3df3d9d61dbad49d7152bbe3846..0a90a56870ab0653022b098d2eda6df510b59ad0 100644 (file)
@@ -277,7 +277,13 @@ struct ipath_mr {
  * in qp->s_max_sge.
  */
 struct ipath_swqe {
-       struct ib_send_wr wr;   /* don't use wr.sg_list */
+       union {
+               struct ib_send_wr wr;   /* don't use wr.sg_list */
+               struct ib_ud_wr ud_wr;
+               struct ib_rdma_wr rdma_wr;
+               struct ib_atomic_wr atomic_wr;
+       };
+
        u32 psn;                /* first packet sequence number */
        u32 lpsn;               /* last packet sequence number */
        u32 ssn;                /* send sequence number */
index 5a8677bafe0408bad140320471e38c100e4bf33a..7501626ab5293414c29df692edb6fa7122871c84 100644 (file)
@@ -214,6 +214,8 @@ enum {
        MLX4_DEV_CAP_FLAG2_IGNORE_FCS           = 1LL <<  28,
        MLX4_DEV_CAP_FLAG2_PHV_EN               = 1LL <<  29,
        MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN      = 1LL <<  30,
+       MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31,
+       MLX4_DEV_CAP_FLAG2_LB_SRC_CHK           = 1ULL << 32,
 };
 
 enum {
index de45a51b3f041d28e644cc6a0ce011d6cd2d4fb6..fe052e234906da97e88206d0b05db2442bced61f 100644 (file)
@@ -135,7 +135,10 @@ struct mlx4_rss_context {
 
 struct mlx4_qp_path {
        u8                      fl;
-       u8                      vlan_control;
+       union {
+               u8                      vlan_control;
+               u8                      control;
+       };
        u8                      disable_pkey_check;
        u8                      pkey_index;
        u8                      counter_index;
@@ -156,9 +159,16 @@ struct mlx4_qp_path {
 };
 
 enum { /* fl */
-       MLX4_FL_CV      = 1 << 6,
-       MLX4_FL_ETH_HIDE_CQE_VLAN       = 1 << 2
+       MLX4_FL_CV      = 1 << 6,
+       MLX4_FL_ETH_HIDE_CQE_VLAN       = 1 << 2,
+       MLX4_FL_ETH_SRC_CHECK_MC_LB     = 1 << 1,
+       MLX4_FL_ETH_SRC_CHECK_UC_LB     = 1 << 0,
 };
+
+enum { /* control */
+       MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER      = 1 << 7,
+};
+
 enum { /* vlan_control */
        MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED      = 1 << 6,
        MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */
@@ -254,6 +264,8 @@ enum {
        MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE               = 14 + 32,
        MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX          = 15 + 32,
        MLX4_UPD_QP_PATH_MASK_FVL_RX                    = 16 + 32,
+       MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB       = 18 + 32,
+       MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB       = 19 + 32,
 };
 
 enum { /* param3 */
@@ -436,11 +448,13 @@ enum mlx4_update_qp_attr {
        MLX4_UPDATE_QP_VSD              = 1 << 1,
        MLX4_UPDATE_QP_RATE_LIMIT       = 1 << 2,
        MLX4_UPDATE_QP_QOS_VPORT        = 1 << 3,
-       MLX4_UPDATE_QP_SUPPORTED_ATTRS  = (1 << 4) - 1
+       MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB      = 1 << 4,
+       MLX4_UPDATE_QP_SUPPORTED_ATTRS  = (1 << 5) - 1
 };
 
 enum mlx4_update_qp_params_flags {
-       MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE          = 1 << 0,
+       MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB     = 1 << 0,
+       MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE          = 1 << 1,
 };
 
 struct mlx4_update_qp_params {
index fb4013edcf5732cf3b777b5bc2ac29b0ac8d1bba..f869807a0d0e2ca93629a7d25092f268dbc8f520 100644 (file)
@@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge {
 };
 struct svc_rdma_fastreg_mr {
        struct ib_mr *mr;
-       void *kva;
-       struct ib_fast_reg_page_list *page_list;
-       int page_list_len;
+       struct scatterlist *sg;
+       int sg_nents;
        unsigned long access_flags;
-       unsigned long map_len;
        enum dma_data_direction direction;
        struct list_head frmr_list;
 };
index fde33ac6b58a1e4eccb52512055f546d3a59e2a6..11528591d0d714f3ea1004566cb799952a9ca5bd 100644 (file)
@@ -47,6 +47,7 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #include <net/ipv6.h>
+#include <net/net_namespace.h>
 
 struct rdma_addr_client {
        atomic_t refcount;
@@ -64,6 +65,16 @@ void rdma_addr_register_client(struct rdma_addr_client *client);
  */
 void rdma_addr_unregister_client(struct rdma_addr_client *client);
 
+/**
+ * struct rdma_dev_addr - Contains resolved RDMA hardware addresses
+ * @src_dev_addr:      Source MAC address.
+ * @dst_dev_addr:      Destination MAC address.
+ * @broadcast:         Broadcast address of the device.
+ * @dev_type:          The interface hardware type of the device.
+ * @bound_dev_if:      An optional device interface index.
+ * @transport:         The transport type used.
+ * @net:               Network namespace containing the bound_dev_if net_dev.
+ */
 struct rdma_dev_addr {
        unsigned char src_dev_addr[MAX_ADDR_LEN];
        unsigned char dst_dev_addr[MAX_ADDR_LEN];
@@ -71,11 +82,14 @@ struct rdma_dev_addr {
        unsigned short dev_type;
        int bound_dev_if;
        enum rdma_transport_type transport;
+       struct net *net;
 };
 
 /**
  * rdma_translate_ip - Translate a local IP address to an RDMA hardware
  *   address.
+ *
+ * The dev_addr->net field must be initialized.
  */
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
                      u16 *vlan_id);
@@ -90,7 +104,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
  * @dst_addr: The destination address to resolve.
  * @addr: A reference to a data location that will receive the resolved
  *   addresses.  The data location must remain valid until the callback has
- *   been invoked.
+ *   been invoked. The net field of the addr struct must be valid.
  * @timeout_ms: Amount of time to wait for the address resolution to complete.
  * @callback: Call invoked once address resolution has completed, timed out,
  *   or been canceled.  A status of 0 indicates success.
@@ -112,7 +126,7 @@ int rdma_addr_size(struct sockaddr *addr);
 
 int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
 int rdma_addr_find_dmac_by_grh(const union ib_gid *sgid, const union ib_gid *dgid,
-                              u8 *smac, u16 *vlan_id);
+                              u8 *smac, u16 *vlan_id, int if_index);
 
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {
index bd92130f4ac5803a60e0c058bc19d26faca9f948..269a27cf0a46f37c7fd9fdc3fc24df2d5070c2d2 100644 (file)
@@ -43,6 +43,8 @@
  * @port_num: The port number of the device to query.
  * @index: The index into the cached GID table to query.
  * @gid: The GID value found at the specified index.
+ * @attr: The GID attribute found at the specified index (only in RoCE).
+ *   NULL means ignore (output parameter).
  *
  * ib_get_cached_gid() fetches the specified GID table entry stored in
  * the local software cache.
 int ib_get_cached_gid(struct ib_device    *device,
                      u8                   port_num,
                      int                  index,
-                     union ib_gid        *gid);
+                     union ib_gid        *gid,
+                     struct ib_gid_attr  *attr);
 
 /**
  * ib_find_cached_gid - Returns the port number and GID table index where
  *   a specified GID value occurs.
  * @device: The device to query.
  * @gid: The GID value to search for.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
  * @port_num: The port number of the device where the GID value was found.
  * @index: The index into the cached GID table where the GID was found.  This
  *   parameter may be NULL.
@@ -64,11 +68,39 @@ int ib_get_cached_gid(struct ib_device    *device,
  * ib_find_cached_gid() searches for the specified GID value in
  * the local software cache.
  */
-int ib_find_cached_gid(struct ib_device   *device,
+int ib_find_cached_gid(struct ib_device *device,
                       const union ib_gid *gid,
-                      u8                 *port_num,
-                      u16                *index);
+                      struct net_device *ndev,
+                      u8               *port_num,
+                      u16              *index);
 
+/**
+ * ib_find_cached_gid_by_port - Returns the GID table index where a specified
+ * GID value occurs
+ * @device: The device to query.
+ * @gid: The GID value to search for.
+ * @port_num: The port number of the device where the GID value sould be
+ *   searched.
+ * @ndev: In RoCE, the net device of the device. Null means ignore.
+ * @index: The index into the cached GID table where the GID was found.  This
+ *   parameter may be NULL.
+ *
+ * ib_find_cached_gid() searches for the specified GID value in
+ * the local software cache.
+ */
+int ib_find_cached_gid_by_port(struct ib_device *device,
+                              const union ib_gid *gid,
+                              u8               port_num,
+                              struct net_device *ndev,
+                              u16              *index);
+
+int ib_find_gid_by_filter(struct ib_device *device,
+                         const union ib_gid *gid,
+                         u8 port_num,
+                         bool (*filter)(const union ib_gid *gid,
+                                        const struct ib_gid_attr *,
+                                        void *),
+                         void *context, u16 *index);
 /**
  * ib_get_cached_pkey - Returns a cached PKey table entry
  * @device: The device to query.
index 709a5331e6b9d2ff04ba262d377b53f6819426c9..e99d8f9a4551d9889501c645e04139fd0f32a3f8 100644 (file)
@@ -76,7 +76,7 @@ enum {
        IB_OPCODE_UC                                = 0x20,
        IB_OPCODE_RD                                = 0x40,
        IB_OPCODE_UD                                = 0x60,
-       /* per IBTA 3.1 Table 38, A10.3.2 */
+       /* per IBTA 1.3 vol 1 Table 38, A10.3.2 */
        IB_OPCODE_CNP                               = 0x80,
 
        /* operations -- just used to define real constants */
index 7e071a6abb34cb977ad26c304bddfcf22a89903a..301969552d0a51e34dcd872daa303a6339721916 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/compiler.h>
 
 #include <linux/atomic.h>
+#include <linux/netdevice.h>
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_mad.h>
@@ -154,11 +155,18 @@ struct ib_sa_path_rec {
        u8           packet_life_time_selector;
        u8           packet_life_time;
        u8           preference;
-       u8           smac[ETH_ALEN];
        u8           dmac[ETH_ALEN];
-       u16          vlan_id;
+       /* ignored in IB */
+       int          ifindex;
+       /* ignored in IB */
+       struct net  *net;
 };
 
+static inline struct net_device *ib_get_ndev_from_path(struct ib_sa_path_rec *rec)
+{
+       return rec->net ? dev_get_by_index(rec->net, rec->ifindex) : NULL;
+}
+
 #define IB_SA_MCMEMBER_REC_MGID                                IB_SA_COMP_MASK( 0)
 #define IB_SA_MCMEMBER_REC_PORT_GID                    IB_SA_COMP_MASK( 1)
 #define IB_SA_MCMEMBER_REC_QKEY                                IB_SA_COMP_MASK( 2)
index 7845fae6f2df1bd7c362174c104b6efbaaf62547..324e9bf8e66cc3bfb5a4873cbf9480d4a1832859 100644 (file)
@@ -137,6 +137,8 @@ enum ib_device_cap_flags {
        IB_DEVICE_BLOCK_MULTICAST_LOOPBACK = (1<<22),
        IB_DEVICE_MEM_WINDOW_TYPE_2A    = (1<<23),
        IB_DEVICE_MEM_WINDOW_TYPE_2B    = (1<<24),
+       IB_DEVICE_RC_IP_CSUM            = (1<<25),
+       IB_DEVICE_RAW_IP_CSUM           = (1<<26),
        IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
        IB_DEVICE_SIGNATURE_HANDOVER    = (1<<30),
        IB_DEVICE_ON_DEMAND_PAGING      = (1<<31),
@@ -697,7 +699,6 @@ struct ib_ah_attr {
        u8                      ah_flags;
        u8                      port_num;
        u8                      dmac[ETH_ALEN];
-       u16                     vlan_id;
 };
 
 enum ib_wc_status {
@@ -736,7 +737,7 @@ enum ib_wc_opcode {
        IB_WC_BIND_MW,
        IB_WC_LSO,
        IB_WC_LOCAL_INV,
-       IB_WC_FAST_REG_MR,
+       IB_WC_REG_MR,
        IB_WC_MASKED_COMP_SWAP,
        IB_WC_MASKED_FETCH_ADD,
 /*
@@ -873,7 +874,6 @@ enum ib_qp_create_flags {
        IB_QP_CREATE_RESERVED_END               = 1 << 31,
 };
 
-
 /*
  * Note: users may not call ib_close_qp or ib_destroy_qp from the event_handler
  * callback to destroy the passed in QP.
@@ -957,10 +957,10 @@ enum ib_qp_attr_mask {
        IB_QP_PATH_MIG_STATE            = (1<<18),
        IB_QP_CAP                       = (1<<19),
        IB_QP_DEST_QPN                  = (1<<20),
-       IB_QP_SMAC                      = (1<<21),
-       IB_QP_ALT_SMAC                  = (1<<22),
-       IB_QP_VID                       = (1<<23),
-       IB_QP_ALT_VID                   = (1<<24),
+       IB_QP_RESERVED1                 = (1<<21),
+       IB_QP_RESERVED2                 = (1<<22),
+       IB_QP_RESERVED3                 = (1<<23),
+       IB_QP_RESERVED4                 = (1<<24),
 };
 
 enum ib_qp_state {
@@ -1010,10 +1010,6 @@ struct ib_qp_attr {
        u8                      rnr_retry;
        u8                      alt_port_num;
        u8                      alt_timeout;
-       u8                      smac[ETH_ALEN];
-       u8                      alt_smac[ETH_ALEN];
-       u16                     vlan_id;
-       u16                     alt_vlan_id;
 };
 
 enum ib_wr_opcode {
@@ -1028,7 +1024,7 @@ enum ib_wr_opcode {
        IB_WR_SEND_WITH_INV,
        IB_WR_RDMA_READ_WITH_INV,
        IB_WR_LOCAL_INV,
-       IB_WR_FAST_REG_MR,
+       IB_WR_REG_MR,
        IB_WR_MASKED_ATOMIC_CMP_AND_SWP,
        IB_WR_MASKED_ATOMIC_FETCH_AND_ADD,
        IB_WR_BIND_MW,
@@ -1066,12 +1062,6 @@ struct ib_sge {
        u32     lkey;
 };
 
-struct ib_fast_reg_page_list {
-       struct ib_device       *device;
-       u64                    *page_list;
-       unsigned int            max_page_list_len;
-};
-
 /**
  * struct ib_mw_bind_info - Parameters for a memory window bind operation.
  * @mr: A memory region to bind the memory window to.
@@ -1100,54 +1090,89 @@ struct ib_send_wr {
                __be32          imm_data;
                u32             invalidate_rkey;
        } ex;
-       union {
-               struct {
-                       u64     remote_addr;
-                       u32     rkey;
-               } rdma;
-               struct {
-                       u64     remote_addr;
-                       u64     compare_add;
-                       u64     swap;
-                       u64     compare_add_mask;
-                       u64     swap_mask;
-                       u32     rkey;
-               } atomic;
-               struct {
-                       struct ib_ah *ah;
-                       void   *header;
-                       int     hlen;
-                       int     mss;
-                       u32     remote_qpn;
-                       u32     remote_qkey;
-                       u16     pkey_index; /* valid for GSI only */
-                       u8      port_num;   /* valid for DR SMPs on switch only */
-               } ud;
-               struct {
-                       u64                             iova_start;
-                       struct ib_fast_reg_page_list   *page_list;
-                       unsigned int                    page_shift;
-                       unsigned int                    page_list_len;
-                       u32                             length;
-                       int                             access_flags;
-                       u32                             rkey;
-               } fast_reg;
-               struct {
-                       struct ib_mw            *mw;
-                       /* The new rkey for the memory window. */
-                       u32                      rkey;
-                       struct ib_mw_bind_info   bind_info;
-               } bind_mw;
-               struct {
-                       struct ib_sig_attrs    *sig_attrs;
-                       struct ib_mr           *sig_mr;
-                       int                     access_flags;
-                       struct ib_sge          *prot;
-               } sig_handover;
-       } wr;
-       u32                     xrc_remote_srq_num;     /* XRC TGT QPs only */
 };
 
+struct ib_rdma_wr {
+       struct ib_send_wr       wr;
+       u64                     remote_addr;
+       u32                     rkey;
+};
+
+static inline struct ib_rdma_wr *rdma_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct ib_rdma_wr, wr);
+}
+
+struct ib_atomic_wr {
+       struct ib_send_wr       wr;
+       u64                     remote_addr;
+       u64                     compare_add;
+       u64                     swap;
+       u64                     compare_add_mask;
+       u64                     swap_mask;
+       u32                     rkey;
+};
+
+static inline struct ib_atomic_wr *atomic_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct ib_atomic_wr, wr);
+}
+
+struct ib_ud_wr {
+       struct ib_send_wr       wr;
+       struct ib_ah            *ah;
+       void                    *header;
+       int                     hlen;
+       int                     mss;
+       u32                     remote_qpn;
+       u32                     remote_qkey;
+       u16                     pkey_index; /* valid for GSI only */
+       u8                      port_num;   /* valid for DR SMPs on switch only */
+};
+
+static inline struct ib_ud_wr *ud_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct ib_ud_wr, wr);
+}
+
+struct ib_reg_wr {
+       struct ib_send_wr       wr;
+       struct ib_mr            *mr;
+       u32                     key;
+       int                     access;
+};
+
+static inline struct ib_reg_wr *reg_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct ib_reg_wr, wr);
+}
+
+struct ib_bind_mw_wr {
+       struct ib_send_wr       wr;
+       struct ib_mw            *mw;
+       /* The new rkey for the memory window. */
+       u32                     rkey;
+       struct ib_mw_bind_info  bind_info;
+};
+
+static inline struct ib_bind_mw_wr *bind_mw_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct ib_bind_mw_wr, wr);
+}
+
+struct ib_sig_handover_wr {
+       struct ib_send_wr       wr;
+       struct ib_sig_attrs    *sig_attrs;
+       struct ib_mr           *sig_mr;
+       int                     access_flags;
+       struct ib_sge          *prot;
+};
+
+static inline struct ib_sig_handover_wr *sig_handover_wr(struct ib_send_wr *wr)
+{
+       return container_of(wr, struct ib_sig_handover_wr, wr);
+}
+
 struct ib_recv_wr {
        struct ib_recv_wr      *next;
        u64                     wr_id;
@@ -1334,6 +1359,9 @@ struct ib_mr {
        struct ib_uobject *uobject;
        u32                lkey;
        u32                rkey;
+       u64                iova;
+       u32                length;
+       unsigned int       page_size;
        atomic_t           usecnt; /* count number of MWs */
 };
 
@@ -1718,9 +1746,9 @@ struct ib_device {
        struct ib_mr *             (*alloc_mr)(struct ib_pd *pd,
                                               enum ib_mr_type mr_type,
                                               u32 max_num_sg);
-       struct ib_fast_reg_page_list * (*alloc_fast_reg_page_list)(struct ib_device *device,
-                                                                  int page_list_len);
-       void                       (*free_fast_reg_page_list)(struct ib_fast_reg_page_list *page_list);
+       int                        (*map_mr_sg)(struct ib_mr *mr,
+                                               struct scatterlist *sg,
+                                               int sg_nents);
        int                        (*rereg_phys_mr)(struct ib_mr *mr,
                                                    int mr_rereg_mask,
                                                    struct ib_pd *pd,
@@ -2176,7 +2204,8 @@ static inline bool rdma_cap_roce_gid_table(const struct ib_device *device,
 }
 
 int ib_query_gid(struct ib_device *device,
-                u8 port_num, int index, union ib_gid *gid);
+                u8 port_num, int index, union ib_gid *gid,
+                struct ib_gid_attr *attr);
 
 int ib_query_pkey(struct ib_device *device,
                  u8 port_num, u16 index, u16 *pkey);
@@ -2190,7 +2219,7 @@ int ib_modify_port(struct ib_device *device,
                   struct ib_port_modify *port_modify);
 
 int ib_find_gid(struct ib_device *device, union ib_gid *gid,
-               u8 *port_num, u16 *index);
+               struct net_device *ndev, u8 *port_num, u16 *index);
 
 int ib_find_pkey(struct ib_device *device,
                 u8 port_num, u16 pkey, u16 *index);
@@ -2828,33 +2857,6 @@ struct ib_mr *ib_alloc_mr(struct ib_pd *pd,
                          enum ib_mr_type mr_type,
                          u32 max_num_sg);
 
-/**
- * ib_alloc_fast_reg_page_list - Allocates a page list array
- * @device - ib device pointer.
- * @page_list_len - size of the page list array to be allocated.
- *
- * This allocates and returns a struct ib_fast_reg_page_list * and a
- * page_list array that is at least page_list_len in size.  The actual
- * size is returned in max_page_list_len.  The caller is responsible
- * for initializing the contents of the page_list array before posting
- * a send work request with the IB_WC_FAST_REG_MR opcode.
- *
- * The page_list array entries must be translated using one of the
- * ib_dma_*() functions just like the addresses passed to
- * ib_map_phys_fmr().  Once the ib_post_send() is issued, the struct
- * ib_fast_reg_page_list must not be modified by the caller until the
- * IB_WC_FAST_REG_MR work request completes.
- */
-struct ib_fast_reg_page_list *ib_alloc_fast_reg_page_list(
-                               struct ib_device *device, int page_list_len);
-
-/**
- * ib_free_fast_reg_page_list - Deallocates a previously allocated
- *   page list array.
- * @page_list - struct ib_fast_reg_page_list pointer to be deallocated.
- */
-void ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list);
-
 /**
  * ib_update_fast_reg_key - updates the key portion of the fast_reg MR
  *   R_Key and L_Key.
@@ -3023,4 +3025,28 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u8 port,
                                            u16 pkey, const union ib_gid *gid,
                                            const struct sockaddr *addr);
 
+int ib_map_mr_sg(struct ib_mr *mr,
+                struct scatterlist *sg,
+                int sg_nents,
+                unsigned int page_size);
+
+static inline int
+ib_map_mr_sg_zbva(struct ib_mr *mr,
+                 struct scatterlist *sg,
+                 int sg_nents,
+                 unsigned int page_size)
+{
+       int n;
+
+       n = ib_map_mr_sg(mr, sg, sg_nents, page_size);
+       mr->iova = 0;
+
+       return n;
+}
+
+int ib_sg_to_pages(struct ib_mr *mr,
+                  struct scatterlist *sgl,
+                  int sg_nents,
+                  int (*set_page)(struct ib_mr *, u64));
+
 #endif /* IB_VERBS_H */
index c92522c192d26df9401d061121fb743935ef9b45..463ec0ccbe517239954ebed1b900b8e2035570cb 100644 (file)
@@ -160,13 +160,17 @@ struct rdma_cm_id {
 /**
  * rdma_create_id - Create an RDMA identifier.
  *
+ * @net: The network namespace in which to create the new id.
  * @event_handler: User callback invoked to report events associated with the
  *   returned rdma_id.
  * @context: User specified context associated with the id.
  * @ps: RDMA port space.
  * @qp_type: type of queue pair associated with the id.
+ *
+ * The id holds a reference on the network namespace until it is destroyed.
  */
-struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
+struct rdma_cm_id *rdma_create_id(struct net *net,
+                                 rdma_cm_event_handler event_handler,
                                  void *context, enum rdma_port_space ps,
                                  enum ib_qp_type qp_type);
 
index 978841eeaff10e1cffc5d1c6e37fbe175c3db197..8126c143a519f2d499d536d2d6e10b2587e9290f 100644 (file)
@@ -92,6 +92,7 @@ enum {
 enum {
        IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
        IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ,
+       IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP,
        IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
        IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
 };
@@ -516,6 +517,25 @@ struct ib_uverbs_create_qp {
        __u64 driver_data[0];
 };
 
+struct ib_uverbs_ex_create_qp {
+       __u64 user_handle;
+       __u32 pd_handle;
+       __u32 send_cq_handle;
+       __u32 recv_cq_handle;
+       __u32 srq_handle;
+       __u32 max_send_wr;
+       __u32 max_recv_wr;
+       __u32 max_send_sge;
+       __u32 max_recv_sge;
+       __u32 max_inline_data;
+       __u8  sq_sig_all;
+       __u8  qp_type;
+       __u8  is_srq;
+       __u8 reserved;
+       __u32 comp_mask;
+       __u32 create_flags;
+};
+
 struct ib_uverbs_open_qp {
        __u64 response;
        __u64 user_handle;
@@ -538,6 +558,12 @@ struct ib_uverbs_create_qp_resp {
        __u32 reserved;
 };
 
+struct ib_uverbs_ex_create_qp_resp {
+       struct ib_uverbs_create_qp_resp base;
+       __u32 comp_mask;
+       __u32 response_length;
+};
+
 /*
  * This struct needs to remain a multiple of 8 bytes to keep the
  * alignment of the modify QP parameters.
index ba1210253f5ec077dc01fbd1e8fc6bf2102253ef..52b4a2f993f2c91e8fdeb82e6d826467f34ec03c 100644 (file)
@@ -655,8 +655,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
                return -ENOMEM;
 
        /* Create the RDMA CM ID */
-       rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
-                                    IB_QPT_RC);
+       rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
+                                    RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(rdma->cm_id))
                goto error;
 
index a833ab7898fe7306968e4ac20553afbee6955f8b..f222885ac0c7397ed08c26106e68157d91267ff6 100644 (file)
@@ -336,7 +336,7 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)
        /* Create a CMA ID and try to bind it. This catches both
         * IB and iWARP capable NICs.
         */
-       cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+       cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(cm_id))
                return PTR_ERR(cm_id);
 
index f17d095678907b588c717b48e3d4602cab401199..b3fdebb57460392ae9a751bfaae2e218ff9b6e17 100644 (file)
@@ -75,7 +75,11 @@ struct rds_ib_connect_private {
 
 struct rds_ib_send_work {
        void                    *s_op;
-       struct ib_send_wr       s_wr;
+       union {
+               struct ib_send_wr       s_wr;
+               struct ib_rdma_wr       s_rdma_wr;
+               struct ib_atomic_wr     s_atomic_wr;
+       };
        struct ib_sge           s_sge[RDS_IB_MAX_SGE];
        unsigned long           s_queued;
 };
index 2b2370e7f356f5db4d23df09356796d0649ee063..da5a7fb98c77abf0c43f0c4825657874eda89ba3 100644 (file)
@@ -668,7 +668,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
 
        /* XXX I wonder what affect the port space has */
        /* delegate cm event handler to rdma_transport */
-       ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+       ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
                                     RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(ic->i_cm_id)) {
                ret = PTR_ERR(ic->i_cm_id);
index 670882c752e9470e6016fc51b0375006f4a94780..eac30bf486d747ce5a78f001de2d65cc5fc7d891 100644 (file)
@@ -777,23 +777,23 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
        send->s_queued = jiffies;
 
        if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
-               send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
-               send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
-               send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
-               send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
-               send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
+               send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
+               send->s_atomic_wr.compare_add = op->op_m_cswp.compare;
+               send->s_atomic_wr.swap = op->op_m_cswp.swap;
+               send->s_atomic_wr.compare_add_mask = op->op_m_cswp.compare_mask;
+               send->s_atomic_wr.swap_mask = op->op_m_cswp.swap_mask;
        } else { /* FADD */
-               send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
-               send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
-               send->s_wr.wr.atomic.swap = 0;
-               send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
-               send->s_wr.wr.atomic.swap_mask = 0;
+               send->s_atomic_wr.wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
+               send->s_atomic_wr.compare_add = op->op_m_fadd.add;
+               send->s_atomic_wr.swap = 0;
+               send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask;
+               send->s_atomic_wr.swap_mask = 0;
        }
        nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
-       send->s_wr.num_sge = 1;
-       send->s_wr.next = NULL;
-       send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
-       send->s_wr.wr.atomic.rkey = op->op_rkey;
+       send->s_atomic_wr.wr.num_sge = 1;
+       send->s_atomic_wr.wr.next = NULL;
+       send->s_atomic_wr.remote_addr = op->op_remote_addr;
+       send->s_atomic_wr.rkey = op->op_rkey;
        send->s_op = op;
        rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
 
@@ -818,11 +818,11 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
        if (nr_sig)
                atomic_add(nr_sig, &ic->i_signaled_sends);
 
-       failed_wr = &send->s_wr;
-       ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
+       failed_wr = &send->s_atomic_wr.wr;
+       ret = ib_post_send(ic->i_cm_id->qp, &send->s_atomic_wr.wr, &failed_wr);
        rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
-                send, &send->s_wr, ret, failed_wr);
-       BUG_ON(failed_wr != &send->s_wr);
+                send, &send->s_atomic_wr, ret, failed_wr);
+       BUG_ON(failed_wr != &send->s_atomic_wr.wr);
        if (ret) {
                printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
                       "returned %d\n", &conn->c_faddr, ret);
@@ -831,9 +831,9 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
                goto out;
        }
 
-       if (unlikely(failed_wr != &send->s_wr)) {
+       if (unlikely(failed_wr != &send->s_atomic_wr.wr)) {
                printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
-               BUG_ON(failed_wr != &send->s_wr);
+               BUG_ON(failed_wr != &send->s_atomic_wr.wr);
        }
 
 out:
@@ -904,22 +904,23 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
 
                send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
-               send->s_wr.wr.rdma.remote_addr = remote_addr;
-               send->s_wr.wr.rdma.rkey = op->op_rkey;
+               send->s_rdma_wr.remote_addr = remote_addr;
+               send->s_rdma_wr.rkey = op->op_rkey;
 
                if (num_sge > max_sge) {
-                       send->s_wr.num_sge = max_sge;
+                       send->s_rdma_wr.wr.num_sge = max_sge;
                        num_sge -= max_sge;
                } else {
-                       send->s_wr.num_sge = num_sge;
+                       send->s_rdma_wr.wr.num_sge = num_sge;
                }
 
-               send->s_wr.next = NULL;
+               send->s_rdma_wr.wr.next = NULL;
 
                if (prev)
-                       prev->s_wr.next = &send->s_wr;
+                       prev->s_rdma_wr.wr.next = &send->s_rdma_wr.wr;
 
-               for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+               for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+                    scat != &op->op_sg[op->op_count]; j++) {
                        len = ib_sg_dma_len(ic->i_cm_id->device, scat);
                        send->s_sge[j].addr =
                                 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -934,7 +935,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                }
 
                rdsdebug("send %p wr %p num_sge %u next %p\n", send,
-                       &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+                       &send->s_rdma_wr.wr,
+                       send->s_rdma_wr.wr.num_sge,
+                       send->s_rdma_wr.wr.next);
 
                prev = send;
                if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -955,11 +958,11 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
        if (nr_sig)
                atomic_add(nr_sig, &ic->i_signaled_sends);
 
-       failed_wr = &first->s_wr;
-       ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+       failed_wr = &first->s_rdma_wr.wr;
+       ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
        rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
-                first, &first->s_wr, ret, failed_wr);
-       BUG_ON(failed_wr != &first->s_wr);
+                first, &first->s_rdma_wr.wr, ret, failed_wr);
+       BUG_ON(failed_wr != &first->s_rdma_wr.wr);
        if (ret) {
                printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
                       "returned %d\n", &conn->c_faddr, ret);
@@ -968,9 +971,9 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                goto out;
        }
 
-       if (unlikely(failed_wr != &first->s_wr)) {
+       if (unlikely(failed_wr != &first->s_rdma_wr.wr)) {
                printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
-               BUG_ON(failed_wr != &first->s_wr);
+               BUG_ON(failed_wr != &first->s_rdma_wr.wr);
        }
 
 
index 3df0295c6659c305751b14e4585e2504fbb7a90f..576f1825fc55769f7242c75c771a50a6e8e5e93b 100644 (file)
@@ -223,7 +223,7 @@ static int rds_iw_laddr_check(struct net *net, __be32 addr)
        /* Create a CMA ID and try to bind it. This catches both
         * IB and iWARP capable NICs.
         */
-       cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
+       cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(cm_id))
                return PTR_ERR(cm_id);
 
index cbe6674e31ee52f6c9fd4380e3f0942127ede7c9..5af01d1758b3914d076088c8906d085b177febe4 100644 (file)
@@ -74,10 +74,13 @@ struct rds_iw_send_work {
        struct rm_rdma_op       *s_op;
        struct rds_iw_mapping   *s_mapping;
        struct ib_mr            *s_mr;
-       struct ib_fast_reg_page_list *s_page_list;
        unsigned char           s_remap_count;
 
-       struct ib_send_wr       s_wr;
+       union {
+               struct ib_send_wr       s_send_wr;
+               struct ib_rdma_wr       s_rdma_wr;
+               struct ib_reg_wr        s_reg_wr;
+       };
        struct ib_sge           s_sge[RDS_IW_MAX_SGE];
        unsigned long           s_queued;
 };
@@ -195,7 +198,7 @@ struct rds_iw_device {
 
 /* Magic WR_ID for ACKs */
 #define RDS_IW_ACK_WR_ID       ((u64)0xffffffffffffffffULL)
-#define RDS_IW_FAST_REG_WR_ID  ((u64)0xefefefefefefefefULL)
+#define RDS_IW_REG_WR_ID       ((u64)0xefefefefefefefefULL)
 #define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
 
 struct rds_iw_statistics {
index a6553a6fb2bc2e5053152b620851790379673d40..aea4c911bc765c1288adc233640e654f200f6f7f 100644 (file)
@@ -524,7 +524,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
 
        /* XXX I wonder what affect the port space has */
        /* delegate cm event handler to rdma_transport */
-       ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
+       ic->i_cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, conn,
                                     RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(ic->i_cm_id)) {
                ret = PTR_ERR(ic->i_cm_id);
index d3d4454ffc84c6603397976e8c027b62c3300fbe..b09a40c1adceebf170617da3826c82002b04807a 100644 (file)
@@ -47,7 +47,6 @@ struct rds_iw_mr {
        struct rdma_cm_id       *cm_id;
 
        struct ib_mr    *mr;
-       struct ib_fast_reg_page_list *page_list;
 
        struct rds_iw_mapping   mapping;
        unsigned char           remap_count;
@@ -77,8 +76,8 @@ struct rds_iw_mr_pool {
 
 static void rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
 static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
                          struct rds_iw_mr *ibmr,
                          struct scatterlist *sg, unsigned int nents);
 static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
@@ -258,19 +257,18 @@ static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
        sg->bytes = 0;
 }
 
-static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
-                       struct rds_iw_scatterlist *sg)
+static int rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
+                                 struct rds_iw_scatterlist *sg)
 {
        struct ib_device *dev = rds_iwdev->dev;
-       u64 *dma_pages = NULL;
-       int i, j, ret;
+       int i, ret;
 
        WARN_ON(sg->dma_len);
 
        sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
        if (unlikely(!sg->dma_len)) {
                printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
-               return ERR_PTR(-EBUSY);
+               return -EBUSY;
        }
 
        sg->bytes = 0;
@@ -303,31 +301,14 @@ static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
        if (sg->dma_npages > fastreg_message_size)
                goto out_unmap;
 
-       dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
-       if (!dma_pages) {
-               ret = -ENOMEM;
-               goto out_unmap;
-       }
 
-       for (i = j = 0; i < sg->dma_len; ++i) {
-               unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
-               u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
-               u64 end_addr;
 
-               end_addr = dma_addr + dma_len;
-               dma_addr &= ~PAGE_MASK;
-               for (; dma_addr < end_addr; dma_addr += PAGE_SIZE)
-                       dma_pages[j++] = dma_addr;
-               BUG_ON(j > sg->dma_npages);
-       }
-
-       return dma_pages;
+       return 0;
 
 out_unmap:
        ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
        sg->dma_len = 0;
-       kfree(dma_pages);
-       return ERR_PTR(ret);
+       return ret;
 }
 
 
@@ -440,7 +421,7 @@ static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
        INIT_LIST_HEAD(&ibmr->mapping.m_list);
        ibmr->mapping.m_mr = ibmr;
 
-       err = rds_iw_init_fastreg(pool, ibmr);
+       err = rds_iw_init_reg(pool, ibmr);
        if (err)
                goto out_no_cigar;
 
@@ -620,7 +601,7 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
        ibmr->cm_id = cm_id;
        ibmr->device = rds_iwdev;
 
-       ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
+       ret = rds_iw_map_reg(rds_iwdev->mr_pool, ibmr, sg, nents);
        if (ret == 0)
                *key_ret = ibmr->mr->rkey;
        else
@@ -636,7 +617,7 @@ out:
 }
 
 /*
- * iWARP fastreg handling
+ * iWARP reg handling
  *
  * The life cycle of a fastreg registration is a bit different from
  * FMRs.
@@ -648,7 +629,7 @@ out:
  * This creates a bit of a problem for us, as we do not have the destination
  * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
  * RDMA to be correctly setup.  If a fastreg request is present, rds_iw_xmit
- * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
+ * will try to queue a LOCAL_INV (if needed) and a REG_MR work request
  * before queuing the SEND. When completions for these arrive, they are
  * dispatched to the MR has a bit set showing that RDMa can be performed.
  *
@@ -657,11 +638,10 @@ out:
  * The expectation there is that this invalidation step includes ALL
  * PREVIOUSLY FREED MRs.
  */
-static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
-                               struct rds_iw_mr *ibmr)
+static int rds_iw_init_reg(struct rds_iw_mr_pool *pool,
+                          struct rds_iw_mr *ibmr)
 {
        struct rds_iw_device *rds_iwdev = pool->device;
-       struct ib_fast_reg_page_list *page_list = NULL;
        struct ib_mr *mr;
        int err;
 
@@ -674,55 +654,44 @@ static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
                return err;
        }
 
-       /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
-        * is not filled in.
-        */
-       page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
-       if (IS_ERR(page_list)) {
-               err = PTR_ERR(page_list);
-
-               printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
-               ib_dereg_mr(mr);
-               return err;
-       }
-
-       ibmr->page_list = page_list;
        ibmr->mr = mr;
        return 0;
 }
 
-static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
+static int rds_iw_rdma_reg_mr(struct rds_iw_mapping *mapping)
 {
        struct rds_iw_mr *ibmr = mapping->m_mr;
-       struct ib_send_wr f_wr, *failed_wr;
-       int ret;
+       struct rds_iw_scatterlist *m_sg = &mapping->m_sg;
+       struct ib_reg_wr reg_wr;
+       struct ib_send_wr *failed_wr;
+       int ret, n;
+
+       n = ib_map_mr_sg_zbva(ibmr->mr, m_sg->list, m_sg->len, PAGE_SIZE);
+       if (unlikely(n != m_sg->len))
+               return n < 0 ? n : -EINVAL;
+
+       reg_wr.wr.next = NULL;
+       reg_wr.wr.opcode = IB_WR_REG_MR;
+       reg_wr.wr.wr_id = RDS_IW_REG_WR_ID;
+       reg_wr.wr.num_sge = 0;
+       reg_wr.mr = ibmr->mr;
+       reg_wr.key = mapping->m_rkey;
+       reg_wr.access = IB_ACCESS_LOCAL_WRITE |
+                       IB_ACCESS_REMOTE_READ |
+                       IB_ACCESS_REMOTE_WRITE;
 
        /*
-        * Perform a WR for the fast_reg_mr. Each individual page
+        * Perform a WR for the reg_mr. Each individual page
         * in the sg list is added to the fast reg page list and placed
-        * inside the fast_reg_mr WR.  The key used is a rolling 8bit
+        * inside the reg_mr WR.  The key used is a rolling 8bit
         * counter, which should guarantee uniqueness.
         */
        ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
        mapping->m_rkey = ibmr->mr->rkey;
 
-       memset(&f_wr, 0, sizeof(f_wr));
-       f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
-       f_wr.opcode = IB_WR_FAST_REG_MR;
-       f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
-       f_wr.wr.fast_reg.rkey = mapping->m_rkey;
-       f_wr.wr.fast_reg.page_list = ibmr->page_list;
-       f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
-       f_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
-                               IB_ACCESS_REMOTE_READ |
-                               IB_ACCESS_REMOTE_WRITE;
-       f_wr.wr.fast_reg.iova_start = 0;
-       f_wr.send_flags = IB_SEND_SIGNALED;
-
-       failed_wr = &f_wr;
-       ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
-       BUG_ON(failed_wr != &f_wr);
+       failed_wr = &reg_wr.wr;
+       ret = ib_post_send(ibmr->cm_id->qp, &reg_wr.wr, &failed_wr);
+       BUG_ON(failed_wr != &reg_wr.wr);
        if (ret)
                printk_ratelimited(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
                        __func__, __LINE__, ret);
@@ -754,21 +723,20 @@ out:
        return ret;
 }
 
-static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
-                       struct rds_iw_mr *ibmr,
-                       struct scatterlist *sg,
-                       unsigned int sg_len)
+static int rds_iw_map_reg(struct rds_iw_mr_pool *pool,
+                         struct rds_iw_mr *ibmr,
+                         struct scatterlist *sg,
+                         unsigned int sg_len)
 {
        struct rds_iw_device *rds_iwdev = pool->device;
        struct rds_iw_mapping *mapping = &ibmr->mapping;
        u64 *dma_pages;
-       int i, ret = 0;
+       int ret = 0;
 
        rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
 
-       dma_pages = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
-       if (IS_ERR(dma_pages)) {
-               ret = PTR_ERR(dma_pages);
+       ret = rds_iw_map_scatterlist(rds_iwdev, &mapping->m_sg);
+       if (ret) {
                dma_pages = NULL;
                goto out;
        }
@@ -778,10 +746,7 @@ static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
                goto out;
        }
 
-       for (i = 0; i < mapping->m_sg.dma_npages; ++i)
-               ibmr->page_list->page_list[i] = dma_pages[i];
-
-       ret = rds_iw_rdma_build_fastreg(mapping);
+       ret = rds_iw_rdma_reg_mr(mapping);
        if (ret)
                goto out;
 
@@ -867,8 +832,6 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
                struct rds_iw_mr *ibmr)
 {
-       if (ibmr->page_list)
-               ib_free_fast_reg_page_list(ibmr->page_list);
        if (ibmr->mr)
                ib_dereg_mr(ibmr->mr);
 }
index 86152ec3b8879a2dcf6eb41f85fa1cd746f5cb00..e20bd503f4bd5c87363b79dbd9e3ca0e0f8e2f4d 100644 (file)
@@ -137,13 +137,13 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
                send->s_op = NULL;
                send->s_mapping = NULL;
 
-               send->s_wr.next = NULL;
-               send->s_wr.wr_id = i;
-               send->s_wr.sg_list = send->s_sge;
-               send->s_wr.num_sge = 1;
-               send->s_wr.opcode = IB_WR_SEND;
-               send->s_wr.send_flags = 0;
-               send->s_wr.ex.imm_data = 0;
+               send->s_send_wr.next = NULL;
+               send->s_send_wr.wr_id = i;
+               send->s_send_wr.sg_list = send->s_sge;
+               send->s_send_wr.num_sge = 1;
+               send->s_send_wr.opcode = IB_WR_SEND;
+               send->s_send_wr.send_flags = 0;
+               send->s_send_wr.ex.imm_data = 0;
 
                sge = rds_iw_data_sge(ic, send->s_sge);
                sge->lkey = 0;
@@ -159,13 +159,6 @@ void rds_iw_send_init_ring(struct rds_iw_connection *ic)
                        printk(KERN_WARNING "RDS/IW: ib_alloc_mr failed\n");
                        break;
                }
-
-               send->s_page_list = ib_alloc_fast_reg_page_list(
-                       ic->i_cm_id->device, fastreg_message_size);
-               if (IS_ERR(send->s_page_list)) {
-                       printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
-                       break;
-               }
        }
 }
 
@@ -177,9 +170,7 @@ void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
        for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
                BUG_ON(!send->s_mr);
                ib_dereg_mr(send->s_mr);
-               BUG_ON(!send->s_page_list);
-               ib_free_fast_reg_page_list(send->s_page_list);
-               if (send->s_wr.opcode == 0xdead)
+               if (send->s_send_wr.opcode == 0xdead)
                        continue;
                if (send->s_rm)
                        rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
@@ -227,7 +218,7 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
                        continue;
                }
 
-               if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
+               if (wc.opcode == IB_WC_REG_MR && wc.wr_id == RDS_IW_REG_WR_ID) {
                        ic->i_fastreg_posted = 1;
                        continue;
                }
@@ -247,12 +238,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
                        send = &ic->i_sends[oldest];
 
                        /* In the error case, wc.opcode sometimes contains garbage */
-                       switch (send->s_wr.opcode) {
+                       switch (send->s_send_wr.opcode) {
                        case IB_WR_SEND:
                                if (send->s_rm)
                                        rds_iw_send_unmap_rm(ic, send, wc.status);
                                break;
-                       case IB_WR_FAST_REG_MR:
+                       case IB_WR_REG_MR:
                        case IB_WR_RDMA_WRITE:
                        case IB_WR_RDMA_READ:
                        case IB_WR_RDMA_READ_WITH_INV:
@@ -262,12 +253,12 @@ void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
                        default:
                                printk_ratelimited(KERN_NOTICE
                                                "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
-                                               __func__, send->s_wr.opcode);
+                                               __func__, send->s_send_wr.opcode);
                                break;
                        }
 
-                       send->s_wr.opcode = 0xdead;
-                       send->s_wr.num_sge = 1;
+                       send->s_send_wr.opcode = 0xdead;
+                       send->s_send_wr.num_sge = 1;
                        if (time_after(jiffies, send->s_queued + HZ/2))
                                rds_iw_stats_inc(s_iw_tx_stalled);
 
@@ -455,10 +446,10 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
 
        WARN_ON(pos != send - ic->i_sends);
 
-       send->s_wr.send_flags = send_flags;
-       send->s_wr.opcode = IB_WR_SEND;
-       send->s_wr.num_sge = 2;
-       send->s_wr.next = NULL;
+       send->s_send_wr.send_flags = send_flags;
+       send->s_send_wr.opcode = IB_WR_SEND;
+       send->s_send_wr.num_sge = 2;
+       send->s_send_wr.next = NULL;
        send->s_queued = jiffies;
        send->s_op = NULL;
 
@@ -472,7 +463,7 @@ rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
        } else {
                /* We're sending a packet with no payload. There is only
                 * one SGE */
-               send->s_wr.num_sge = 1;
+               send->s_send_wr.num_sge = 1;
                sge = &send->s_sge[0];
        }
 
@@ -672,23 +663,23 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
                 */
                if (ic->i_unsignaled_wrs-- == 0) {
                        ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
-                       send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+                       send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
                }
 
                ic->i_unsignaled_bytes -= len;
                if (ic->i_unsignaled_bytes <= 0) {
                        ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
-                       send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+                       send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
                }
 
                /*
                 * Always signal the last one if we're stopping due to flow control.
                 */
                if (flow_controlled && i == (work_alloc-1))
-                       send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+                       send->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
 
                rdsdebug("send %p wr %p num_sge %u next %p\n", send,
-                        &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+                        &send->s_send_wr, send->s_send_wr.num_sge, send->s_send_wr.next);
 
                sent += len;
                rm->data.op_dmaoff += len;
@@ -722,7 +713,7 @@ add_header:
                }
 
                if (prev)
-                       prev->s_wr.next = &send->s_wr;
+                       prev->s_send_wr.next = &send->s_send_wr;
                prev = send;
 
                pos = (pos + 1) % ic->i_send_ring.w_nr;
@@ -736,7 +727,7 @@ add_header:
        /* if we finished the message then send completion owns it */
        if (scat == &rm->data.op_sg[rm->data.op_count]) {
                prev->s_rm = ic->i_rm;
-               prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
+               prev->s_send_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
                ic->i_rm = NULL;
        }
 
@@ -748,11 +739,11 @@ add_header:
                rds_iw_send_add_credits(conn, credit_alloc - i);
 
        /* XXX need to worry about failed_wr and partial sends. */
-       failed_wr = &first->s_wr;
-       ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+       failed_wr = &first->s_send_wr;
+       ret = ib_post_send(ic->i_cm_id->qp, &first->s_send_wr, &failed_wr);
        rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
-                first, &first->s_wr, ret, failed_wr);
-       BUG_ON(failed_wr != &first->s_wr);
+                first, &first->s_send_wr, ret, failed_wr);
+       BUG_ON(failed_wr != &first->s_send_wr);
        if (ret) {
                printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
                       "returned %d\n", &conn->c_faddr, ret);
@@ -770,24 +761,26 @@ out:
        return ret;
 }
 
-static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
+static int rds_iw_build_send_reg(struct rds_iw_send_work *send,
+                                struct scatterlist *sg,
+                                int sg_nents)
 {
-       BUG_ON(nent > send->s_page_list->max_page_list_len);
-       /*
-        * Perform a WR for the fast_reg_mr. Each individual page
-        * in the sg list is added to the fast reg page list and placed
-        * inside the fast_reg_mr WR.
-        */
-       send->s_wr.opcode = IB_WR_FAST_REG_MR;
-       send->s_wr.wr.fast_reg.length = len;
-       send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
-       send->s_wr.wr.fast_reg.page_list = send->s_page_list;
-       send->s_wr.wr.fast_reg.page_list_len = nent;
-       send->s_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
-       send->s_wr.wr.fast_reg.iova_start = sg_addr;
+       int n;
+
+       n = ib_map_mr_sg(send->s_mr, sg, sg_nents, PAGE_SIZE);
+       if (unlikely(n != sg_nents))
+               return n < 0 ? n : -EINVAL;
+
+       send->s_reg_wr.wr.opcode = IB_WR_REG_MR;
+       send->s_reg_wr.wr.wr_id = 0;
+       send->s_reg_wr.wr.num_sge = 0;
+       send->s_reg_wr.mr = send->s_mr;
+       send->s_reg_wr.key = send->s_mr->rkey;
+       send->s_reg_wr.access = IB_ACCESS_REMOTE_WRITE;
 
        ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
+
+       return 0;
 }
 
 int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
@@ -808,6 +801,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
        int sent;
        int ret;
        int num_sge;
+       int sg_nents;
 
        rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
 
@@ -861,9 +855,10 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
        scat = &op->op_sg[0];
        sent = 0;
        num_sge = op->op_count;
+       sg_nents = 0;
 
        for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
-               send->s_wr.send_flags = 0;
+               send->s_rdma_wr.wr.send_flags = 0;
                send->s_queued = jiffies;
 
                /*
@@ -872,7 +867,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                 */
                if (ic->i_unsignaled_wrs-- == 0) {
                        ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
-                       send->s_wr.send_flags = IB_SEND_SIGNALED;
+                       send->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
                }
 
                /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
@@ -880,30 +875,31 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
                 */
                if (op->op_write)
-                       send->s_wr.opcode = IB_WR_RDMA_WRITE;
+                       send->s_rdma_wr.wr.opcode = IB_WR_RDMA_WRITE;
                else
-                       send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+                       send->s_rdma_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
 
-               send->s_wr.wr.rdma.remote_addr = remote_addr;
-               send->s_wr.wr.rdma.rkey = op->op_rkey;
+               send->s_rdma_wr.remote_addr = remote_addr;
+               send->s_rdma_wr.rkey = op->op_rkey;
                send->s_op = op;
 
                if (num_sge > rds_iwdev->max_sge) {
-                       send->s_wr.num_sge = rds_iwdev->max_sge;
+                       send->s_rdma_wr.wr.num_sge = rds_iwdev->max_sge;
                        num_sge -= rds_iwdev->max_sge;
                } else
-                       send->s_wr.num_sge = num_sge;
+                       send->s_rdma_wr.wr.num_sge = num_sge;
 
-               send->s_wr.next = NULL;
+               send->s_rdma_wr.wr.next = NULL;
 
                if (prev)
-                       prev->s_wr.next = &send->s_wr;
+                       prev->s_send_wr.next = &send->s_rdma_wr.wr;
 
-               for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
+               for (j = 0; j < send->s_rdma_wr.wr.num_sge &&
+                    scat != &op->op_sg[op->op_count]; j++) {
                        len = ib_sg_dma_len(ic->i_cm_id->device, scat);
 
-                       if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
-                               send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
+                       if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV)
+                               sg_nents++;
                        else {
                                send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
                                send->s_sge[j].length = len;
@@ -917,15 +913,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
                        scat++;
                }
 
-               if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
-                       send->s_wr.num_sge = 1;
+               if (send->s_rdma_wr.wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
+                       send->s_rdma_wr.wr.num_sge = 1;
                        send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
                        send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
                        send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
                }
 
                rdsdebug("send %p wr %p num_sge %u next %p\n", send,
-                       &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
+                       &send->s_rdma_wr,
+                       send->s_rdma_wr.wr.num_sge,
+                       send->s_rdma_wr.wr.next);
 
                prev = send;
                if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
@@ -934,7 +932,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
 
        /* if we finished the message then send completion owns it */
        if (scat == &op->op_sg[op->op_count])
-               first->s_wr.send_flags = IB_SEND_SIGNALED;
+               first->s_rdma_wr.wr.send_flags = IB_SEND_SIGNALED;
 
        if (i < work_alloc) {
                rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
@@ -948,16 +946,20 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
         * fastreg_mr (or possibly a dma_mr)
         */
        if (!op->op_write) {
-               rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
-                       op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
+               ret = rds_iw_build_send_reg(&ic->i_sends[fr_pos],
+                                           &op->op_sg[0], sg_nents);
+               if (ret) {
+                       printk(KERN_WARNING "RDS/IW: failed to reg send mem\n");
+                       goto out;
+               }
                work_alloc++;
        }
 
-       failed_wr = &first->s_wr;
-       ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
+       failed_wr = &first->s_rdma_wr.wr;
+       ret = ib_post_send(ic->i_cm_id->qp, &first->s_rdma_wr.wr, &failed_wr);
        rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
-                first, &first->s_wr, ret, failed_wr);
-       BUG_ON(failed_wr != &first->s_wr);
+                first, &first->s_rdma_wr, ret, failed_wr);
+       BUG_ON(failed_wr != &first->s_rdma_wr.wr);
        if (ret) {
                printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
                       "returned %d\n", &conn->c_faddr, ret);
index b9b40af5345b6f42a6ac8e7baea1405ac7c20fda..9c1fed81bf0f73927cc97a09c92e66fd926f288a 100644 (file)
@@ -142,8 +142,8 @@ static int rds_rdma_listen_init(void)
        struct rdma_cm_id *cm_id;
        int ret;
 
-       cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
-                              IB_QPT_RC);
+       cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, NULL,
+                              RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(cm_id)) {
                ret = PTR_ERR(cm_id);
                printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
index 0a362397e434fe9a126bcd94a3f2bba99b89ea2f..88cf9e7269c2bd0d626bc64448254be73f602940 100644 (file)
@@ -151,9 +151,13 @@ __frwr_init(struct rpcrdma_mw *r, struct ib_pd *pd, struct ib_device *device,
        f->fr_mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, depth);
        if (IS_ERR(f->fr_mr))
                goto out_mr_err;
-       f->fr_pgl = ib_alloc_fast_reg_page_list(device, depth);
-       if (IS_ERR(f->fr_pgl))
+
+       f->sg = kcalloc(depth, sizeof(*f->sg), GFP_KERNEL);
+       if (!f->sg)
                goto out_list_err;
+
+       sg_init_table(f->sg, depth);
+
        return 0;
 
 out_mr_err:
@@ -163,9 +167,9 @@ out_mr_err:
        return rc;
 
 out_list_err:
-       rc = PTR_ERR(f->fr_pgl);
-       dprintk("RPC:       %s: ib_alloc_fast_reg_page_list status %i\n",
-               __func__, rc);
+       rc = -ENOMEM;
+       dprintk("RPC:       %s: sg allocation failure\n",
+               __func__);
        ib_dereg_mr(f->fr_mr);
        return rc;
 }
@@ -179,7 +183,7 @@ __frwr_release(struct rpcrdma_mw *r)
        if (rc)
                dprintk("RPC:       %s: ib_dereg_mr status %i\n",
                        __func__, rc);
-       ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+       kfree(r->r.frmr.sg);
 }
 
 static int
@@ -315,13 +319,10 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        struct rpcrdma_mw *mw;
        struct rpcrdma_frmr *frmr;
        struct ib_mr *mr;
-       struct ib_send_wr fastreg_wr, *bad_wr;
+       struct ib_reg_wr reg_wr;
+       struct ib_send_wr *bad_wr;
+       int rc, i, n, dma_nents;
        u8 key;
-       int len, pageoff;
-       int i, rc;
-       int seg_len;
-       u64 pa;
-       int page_no;
 
        mw = seg1->rl_mw;
        seg1->rl_mw = NULL;
@@ -334,64 +335,80 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
        } while (mw->r.frmr.fr_state != FRMR_IS_INVALID);
        frmr = &mw->r.frmr;
        frmr->fr_state = FRMR_IS_VALID;
+       mr = frmr->fr_mr;
 
-       pageoff = offset_in_page(seg1->mr_offset);
-       seg1->mr_offset -= pageoff;     /* start of page */
-       seg1->mr_len += pageoff;
-       len = -pageoff;
        if (nsegs > ia->ri_max_frmr_depth)
                nsegs = ia->ri_max_frmr_depth;
 
-       for (page_no = i = 0; i < nsegs;) {
-               rpcrdma_map_one(device, seg, direction);
-               pa = seg->mr_dma;
-               for (seg_len = seg->mr_len; seg_len > 0; seg_len -= PAGE_SIZE) {
-                       frmr->fr_pgl->page_list[page_no++] = pa;
-                       pa += PAGE_SIZE;
-               }
-               len += seg->mr_len;
+       for (i = 0; i < nsegs;) {
+               if (seg->mr_page)
+                       sg_set_page(&frmr->sg[i],
+                                   seg->mr_page,
+                                   seg->mr_len,
+                                   offset_in_page(seg->mr_offset));
+               else
+                       sg_set_buf(&frmr->sg[i], seg->mr_offset,
+                                  seg->mr_len);
+
                ++seg;
                ++i;
+
                /* Check for holes */
                if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
                    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
                        break;
        }
-       dprintk("RPC:       %s: Using frmr %p to map %d segments (%d bytes)\n",
-               __func__, mw, i, len);
-
-       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-       fastreg_wr.wr_id = (unsigned long)(void *)mw;
-       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.wr.fast_reg.iova_start = seg1->mr_dma + pageoff;
-       fastreg_wr.wr.fast_reg.page_list = frmr->fr_pgl;
-       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fastreg_wr.wr.fast_reg.page_list_len = page_no;
-       fastreg_wr.wr.fast_reg.length = len;
-       fastreg_wr.wr.fast_reg.access_flags = writing ?
-                               IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
-                               IB_ACCESS_REMOTE_READ;
-       mr = frmr->fr_mr;
+       frmr->sg_nents = i;
+
+       dma_nents = ib_dma_map_sg(device, frmr->sg, frmr->sg_nents, direction);
+       if (!dma_nents) {
+               pr_err("RPC:       %s: failed to dma map sg %p sg_nents %u\n",
+                      __func__, frmr->sg, frmr->sg_nents);
+               return -ENOMEM;
+       }
+
+       n = ib_map_mr_sg(mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+       if (unlikely(n != frmr->sg_nents)) {
+               pr_err("RPC:       %s: failed to map mr %p (%u/%u)\n",
+                      __func__, frmr->fr_mr, n, frmr->sg_nents);
+               rc = n < 0 ? n : -EINVAL;
+               goto out_senderr;
+       }
+
+       dprintk("RPC:       %s: Using frmr %p to map %u segments (%u bytes)\n",
+               __func__, mw, frmr->sg_nents, mr->length);
+
        key = (u8)(mr->rkey & 0x000000FF);
        ib_update_fast_reg_key(mr, ++key);
-       fastreg_wr.wr.fast_reg.rkey = mr->rkey;
+
+       reg_wr.wr.next = NULL;
+       reg_wr.wr.opcode = IB_WR_REG_MR;
+       reg_wr.wr.wr_id = (uintptr_t)mw;
+       reg_wr.wr.num_sge = 0;
+       reg_wr.wr.send_flags = 0;
+       reg_wr.mr = mr;
+       reg_wr.key = mr->rkey;
+       reg_wr.access = writing ?
+                       IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
+                       IB_ACCESS_REMOTE_READ;
 
        DECR_CQCOUNT(&r_xprt->rx_ep);
-       rc = ib_post_send(ia->ri_id->qp, &fastreg_wr, &bad_wr);
+       rc = ib_post_send(ia->ri_id->qp, &reg_wr.wr, &bad_wr);
        if (rc)
                goto out_senderr;
 
+       seg1->mr_dir = direction;
        seg1->rl_mw = mw;
        seg1->mr_rkey = mr->rkey;
-       seg1->mr_base = seg1->mr_dma + pageoff;
-       seg1->mr_nsegs = i;
-       seg1->mr_len = len;
-       return i;
+       seg1->mr_base = mr->iova;
+       seg1->mr_nsegs = frmr->sg_nents;
+       seg1->mr_len = mr->length;
+
+       return frmr->sg_nents;
 
 out_senderr:
        dprintk("RPC:       %s: ib_post_send status %i\n", __func__, rc);
-       while (i--)
-               rpcrdma_unmap_one(device, --seg);
+       ib_dma_unmap_sg(device, frmr->sg, dma_nents, direction);
        __frwr_queue_recovery(mw);
        return rc;
 }
@@ -405,22 +422,22 @@ frwr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
        struct rpcrdma_mr_seg *seg1 = seg;
        struct rpcrdma_ia *ia = &r_xprt->rx_ia;
        struct rpcrdma_mw *mw = seg1->rl_mw;
+       struct rpcrdma_frmr *frmr = &mw->r.frmr;
        struct ib_send_wr invalidate_wr, *bad_wr;
        int rc, nsegs = seg->mr_nsegs;
 
        dprintk("RPC:       %s: FRMR %p\n", __func__, mw);
 
        seg1->rl_mw = NULL;
-       mw->r.frmr.fr_state = FRMR_IS_INVALID;
+       frmr->fr_state = FRMR_IS_INVALID;
 
        memset(&invalidate_wr, 0, sizeof(invalidate_wr));
        invalidate_wr.wr_id = (unsigned long)(void *)mw;
        invalidate_wr.opcode = IB_WR_LOCAL_INV;
-       invalidate_wr.ex.invalidate_rkey = mw->r.frmr.fr_mr->rkey;
+       invalidate_wr.ex.invalidate_rkey = frmr->fr_mr->rkey;
        DECR_CQCOUNT(&r_xprt->rx_ep);
 
-       while (seg1->mr_nsegs--)
-               rpcrdma_unmap_one(ia->ri_device, seg++);
+       ib_dma_unmap_sg(ia->ri_device, frmr->sg, frmr->sg_nents, seg1->mr_dir);
        read_lock(&ia->ri_qplock);
        rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr);
        read_unlock(&ia->ri_qplock);
index f0c3ff67ca987427136baebf67034ad3bf58a27f..ff4f01e527ecc08a1480ecba8f00d41a90a76571 100644 (file)
@@ -126,7 +126,7 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                        u64 rs_offset,
                        bool last)
 {
-       struct ib_send_wr read_wr;
+       struct ib_rdma_wr read_wr;
        int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
        struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
        int ret, read, pno;
@@ -180,16 +180,16 @@ int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
                clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
        memset(&read_wr, 0, sizeof(read_wr));
-       read_wr.wr_id = (unsigned long)ctxt;
-       read_wr.opcode = IB_WR_RDMA_READ;
-       ctxt->wr_op = read_wr.opcode;
-       read_wr.send_flags = IB_SEND_SIGNALED;
-       read_wr.wr.rdma.rkey = rs_handle;
-       read_wr.wr.rdma.remote_addr = rs_offset;
-       read_wr.sg_list = ctxt->sge;
-       read_wr.num_sge = pages_needed;
-
-       ret = svc_rdma_send(xprt, &read_wr);
+       read_wr.wr.wr_id = (unsigned long)ctxt;
+       read_wr.wr.opcode = IB_WR_RDMA_READ;
+       ctxt->wr_op = read_wr.wr.opcode;
+       read_wr.wr.send_flags = IB_SEND_SIGNALED;
+       read_wr.rkey = rs_handle;
+       read_wr.remote_addr = rs_offset;
+       read_wr.wr.sg_list = ctxt->sge;
+       read_wr.wr.num_sge = pages_needed;
+
+       ret = svc_rdma_send(xprt, &read_wr.wr);
        if (ret) {
                pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
                set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -219,14 +219,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
                         u64 rs_offset,
                         bool last)
 {
-       struct ib_send_wr read_wr;
+       struct ib_rdma_wr read_wr;
        struct ib_send_wr inv_wr;
-       struct ib_send_wr fastreg_wr;
+       struct ib_reg_wr reg_wr;
        u8 key;
-       int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+       int nents = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
        struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
        struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
-       int ret, read, pno;
+       int ret, read, pno, dma_nents, n;
        u32 pg_off = *page_offset;
        u32 pg_no = *page_no;
 
@@ -235,17 +235,14 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
 
        ctxt->direction = DMA_FROM_DEVICE;
        ctxt->frmr = frmr;
-       pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
-       read = min_t(int, (pages_needed << PAGE_SHIFT) - *page_offset,
-                    rs_length);
+       nents = min_t(unsigned int, nents, xprt->sc_frmr_pg_list_len);
+       read = min_t(int, (nents << PAGE_SHIFT) - *page_offset, rs_length);
 
-       frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
        frmr->direction = DMA_FROM_DEVICE;
        frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
-       frmr->map_len = pages_needed << PAGE_SHIFT;
-       frmr->page_list_len = pages_needed;
+       frmr->sg_nents = nents;
 
-       for (pno = 0; pno < pages_needed; pno++) {
+       for (pno = 0; pno < nents; pno++) {
                int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
 
                head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
@@ -253,17 +250,12 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
                head->arg.len += len;
                if (!pg_off)
                        head->count++;
+
+               sg_set_page(&frmr->sg[pno], rqstp->rq_arg.pages[pg_no],
+                           len, pg_off);
+
                rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
                rqstp->rq_next_page = rqstp->rq_respages + 1;
-               frmr->page_list->page_list[pno] =
-                       ib_dma_map_page(xprt->sc_cm_id->device,
-                                       head->arg.pages[pg_no], 0,
-                                       PAGE_SIZE, DMA_FROM_DEVICE);
-               ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
-                                          frmr->page_list->page_list[pno]);
-               if (ret)
-                       goto err;
-               atomic_inc(&xprt->sc_dma_used);
 
                /* adjust offset and wrap to next page if needed */
                pg_off += len;
@@ -279,43 +271,57 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
        else
                clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
 
+       dma_nents = ib_dma_map_sg(xprt->sc_cm_id->device,
+                                 frmr->sg, frmr->sg_nents,
+                                 frmr->direction);
+       if (!dma_nents) {
+               pr_err("svcrdma: failed to dma map sg %p\n",
+                      frmr->sg);
+               return -ENOMEM;
+       }
+       atomic_inc(&xprt->sc_dma_used);
+
+       n = ib_map_mr_sg(frmr->mr, frmr->sg, frmr->sg_nents, PAGE_SIZE);
+       if (unlikely(n != frmr->sg_nents)) {
+               pr_err("svcrdma: failed to map mr %p (%d/%d elements)\n",
+                      frmr->mr, n, frmr->sg_nents);
+               return n < 0 ? n : -EINVAL;
+       }
+
        /* Bump the key */
        key = (u8)(frmr->mr->lkey & 0x000000FF);
        ib_update_fast_reg_key(frmr->mr, ++key);
 
-       ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+       ctxt->sge[0].addr = frmr->mr->iova;
        ctxt->sge[0].lkey = frmr->mr->lkey;
-       ctxt->sge[0].length = read;
+       ctxt->sge[0].length = frmr->mr->length;
        ctxt->count = 1;
        ctxt->read_hdr = head;
 
-       /* Prepare FASTREG WR */
-       memset(&fastreg_wr, 0, sizeof(fastreg_wr));
-       fastreg_wr.opcode = IB_WR_FAST_REG_MR;
-       fastreg_wr.send_flags = IB_SEND_SIGNALED;
-       fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
-       fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
-       fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
-       fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
-       fastreg_wr.wr.fast_reg.length = frmr->map_len;
-       fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
-       fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
-       fastreg_wr.next = &read_wr;
+       /* Prepare REG WR */
+       reg_wr.wr.opcode = IB_WR_REG_MR;
+       reg_wr.wr.wr_id = 0;
+       reg_wr.wr.send_flags = IB_SEND_SIGNALED;
+       reg_wr.wr.num_sge = 0;
+       reg_wr.mr = frmr->mr;
+       reg_wr.key = frmr->mr->lkey;
+       reg_wr.access = frmr->access_flags;
+       reg_wr.wr.next = &read_wr.wr;
 
        /* Prepare RDMA_READ */
        memset(&read_wr, 0, sizeof(read_wr));
-       read_wr.send_flags = IB_SEND_SIGNALED;
-       read_wr.wr.rdma.rkey = rs_handle;
-       read_wr.wr.rdma.remote_addr = rs_offset;
-       read_wr.sg_list = ctxt->sge;
-       read_wr.num_sge = 1;
+       read_wr.wr.send_flags = IB_SEND_SIGNALED;
+       read_wr.rkey = rs_handle;
+       read_wr.remote_addr = rs_offset;
+       read_wr.wr.sg_list = ctxt->sge;
+       read_wr.wr.num_sge = 1;
        if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
-               read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
-               read_wr.wr_id = (unsigned long)ctxt;
-               read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+               read_wr.wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+               read_wr.wr.wr_id = (unsigned long)ctxt;
+               read_wr.wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
        } else {
-               read_wr.opcode = IB_WR_RDMA_READ;
-               read_wr.next = &inv_wr;
+               read_wr.wr.opcode = IB_WR_RDMA_READ;
+               read_wr.wr.next = &inv_wr;
                /* Prepare invalidate */
                memset(&inv_wr, 0, sizeof(inv_wr));
                inv_wr.wr_id = (unsigned long)ctxt;
@@ -323,10 +329,10 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
                inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
                inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
        }
-       ctxt->wr_op = read_wr.opcode;
+       ctxt->wr_op = read_wr.wr.opcode;
 
        /* Post the chain */
-       ret = svc_rdma_send(xprt, &fastreg_wr);
+       ret = svc_rdma_send(xprt, &reg_wr.wr);
        if (ret) {
                pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
                set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
@@ -340,7 +346,8 @@ int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
        atomic_inc(&rdma_stat_read);
        return ret;
  err:
-       svc_rdma_unmap_dma(ctxt);
+       ib_dma_unmap_sg(xprt->sc_cm_id->device,
+                       frmr->sg, frmr->sg_nents, frmr->direction);
        svc_rdma_put_context(ctxt, 0);
        svc_rdma_put_frmr(xprt, frmr);
        return ret;
index 1dfae83170650ec26d53973e6627acdaa78dcddf..969a1ab75fc3c5fb8011157e4f57e8d08f560b42 100644 (file)
@@ -217,7 +217,7 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
                      u32 xdr_off, int write_len,
                      struct svc_rdma_req_map *vec)
 {
-       struct ib_send_wr write_wr;
+       struct ib_rdma_wr write_wr;
        struct ib_sge *sge;
        int xdr_sge_no;
        int sge_no;
@@ -282,17 +282,17 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
        /* Prepare WRITE WR */
        memset(&write_wr, 0, sizeof write_wr);
        ctxt->wr_op = IB_WR_RDMA_WRITE;
-       write_wr.wr_id = (unsigned long)ctxt;
-       write_wr.sg_list = &sge[0];
-       write_wr.num_sge = sge_no;
-       write_wr.opcode = IB_WR_RDMA_WRITE;
-       write_wr.send_flags = IB_SEND_SIGNALED;
-       write_wr.wr.rdma.rkey = rmr;
-       write_wr.wr.rdma.remote_addr = to;
+       write_wr.wr.wr_id = (unsigned long)ctxt;
+       write_wr.wr.sg_list = &sge[0];
+       write_wr.wr.num_sge = sge_no;
+       write_wr.wr.opcode = IB_WR_RDMA_WRITE;
+       write_wr.wr.send_flags = IB_SEND_SIGNALED;
+       write_wr.rkey = rmr;
+       write_wr.remote_addr = to;
 
        /* Post It */
        atomic_inc(&rdma_stat_write);
-       if (svc_rdma_send(xprt, &write_wr))
+       if (svc_rdma_send(xprt, &write_wr.wr))
                goto err;
        return write_len - bc;
  err:
index a133b1e5b5f62a0543a7ab0c2e2fb362b726a5bd..b348b4adef29a48246709cc7f32cf576865753eb 100644 (file)
@@ -750,8 +750,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
        if (!cma_xprt)
                return ERR_PTR(-ENOMEM);
 
-       listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
-                                  IB_QPT_RC);
+       listen_id = rdma_create_id(&init_net, rdma_listen_handler, cma_xprt,
+                                  RDMA_PS_TCP, IB_QPT_RC);
        if (IS_ERR(listen_id)) {
                ret = PTR_ERR(listen_id);
                dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
@@ -790,7 +790,7 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
 static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
 {
        struct ib_mr *mr;
-       struct ib_fast_reg_page_list *pl;
+       struct scatterlist *sg;
        struct svc_rdma_fastreg_mr *frmr;
        u32 num_sg;
 
@@ -803,13 +803,14 @@ static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt)
        if (IS_ERR(mr))
                goto err_free_frmr;
 
-       pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device,
-                                        num_sg);
-       if (IS_ERR(pl))
+       sg = kcalloc(RPCSVC_MAXPAGES, sizeof(*sg), GFP_KERNEL);
+       if (!sg)
                goto err_free_mr;
 
+       sg_init_table(sg, RPCSVC_MAXPAGES);
+
        frmr->mr = mr;
-       frmr->page_list = pl;
+       frmr->sg = sg;
        INIT_LIST_HEAD(&frmr->frmr_list);
        return frmr;
 
@@ -829,8 +830,8 @@ static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt)
                frmr = list_entry(xprt->sc_frmr_q.next,
                                  struct svc_rdma_fastreg_mr, frmr_list);
                list_del_init(&frmr->frmr_list);
+               kfree(frmr->sg);
                ib_dereg_mr(frmr->mr);
-               ib_free_fast_reg_page_list(frmr->page_list);
                kfree(frmr);
        }
 }
@@ -844,8 +845,7 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
                frmr = list_entry(rdma->sc_frmr_q.next,
                                  struct svc_rdma_fastreg_mr, frmr_list);
                list_del_init(&frmr->frmr_list);
-               frmr->map_len = 0;
-               frmr->page_list_len = 0;
+               frmr->sg_nents = 0;
        }
        spin_unlock_bh(&rdma->sc_frmr_q_lock);
        if (frmr)
@@ -854,25 +854,13 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
        return rdma_alloc_frmr(rdma);
 }
 
-static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
-                          struct svc_rdma_fastreg_mr *frmr)
-{
-       int page_no;
-       for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
-               dma_addr_t addr = frmr->page_list->page_list[page_no];
-               if (ib_dma_mapping_error(frmr->mr->device, addr))
-                       continue;
-               atomic_dec(&xprt->sc_dma_used);
-               ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
-                                 frmr->direction);
-       }
-}
-
 void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
                       struct svc_rdma_fastreg_mr *frmr)
 {
        if (frmr) {
-               frmr_unmap_dma(rdma, frmr);
+               ib_dma_unmap_sg(rdma->sc_cm_id->device,
+                               frmr->sg, frmr->sg_nents, frmr->direction);
+               atomic_dec(&rdma->sc_dma_used);
                spin_lock_bh(&rdma->sc_frmr_q_lock);
                WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
                list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
index 93883ffb86e0d0e6244f33d1558d11ac0f1d134b..eadd1655145a3bc5b81bdefb7015792fb3be566a 100644 (file)
@@ -378,7 +378,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
 
        init_completion(&ia->ri_done);
 
-       id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
+       id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
+                           IB_QPT_RC);
        if (IS_ERR(id)) {
                rc = PTR_ERR(id);
                dprintk("RPC:       %s: rdma_create_id() failed %i\n",
index f8dd17be9f43cefe89470cd2c9c9954f76a93ca0..ac7f8d4f632a9e923fdcf3fdbd8f628ad044d34b 100644 (file)
@@ -201,7 +201,8 @@ enum rpcrdma_frmr_state {
 };
 
 struct rpcrdma_frmr {
-       struct ib_fast_reg_page_list    *fr_pgl;
+       struct scatterlist              *sg;
+       int                             sg_nents;
        struct ib_mr                    *fr_mr;
        enum rpcrdma_frmr_state         fr_state;
        struct work_struct              fr_work;