]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Aug 2017 18:42:33 +0000 (11:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 8 Aug 2017 18:42:33 +0000 (11:42 -0700)
Pull rdma fixes from Doug Ledford:
 "Third set of -rc fixes for 4.13 cycle

   - small set of miscellanous fixes

   - a reasonably sizable set of IPoIB fixes that deal with multiple
     long standing issues"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/hns: checking for IS_ERR() instead of NULL
  RDMA/mlx5: Fix existence check for extended address vector
  IB/uverbs: Fix device cleanup
  RDMA/uverbs: Prevent leak of reserved field
  IB/core: Fix race condition in resolving IP to MAC
  IB/ipoib: Notify on modify QP failure only when relevant
  Revert "IB/core: Allow QP state transition from reset to error"
  IB/ipoib: Remove double pointer assigning
  IB/ipoib: Clean error paths in add port
  IB/ipoib: Add get statistics support to SRIOV VF
  IB/ipoib: Add multicast packets statistics
  IB/ipoib: Set IPOIB_NEIGH_TBL_FLUSH after flushed completion initialization
  IB/ipoib: Prevent setting negative values to max_nonsrq_conn_qp
  IB/ipoib: Make sure no in-flight joins while leaving that mcast
  IB/ipoib: Use cancel_delayed_work_sync when needed
  IB/ipoib: Fix race between light events and interface restart

13 files changed:
drivers/infiniband/core/addr.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/hns/hns_roce_hw_v1.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
include/linux/mlx5/qp.h

index 01236cef7bfb1affe07e4214cf6d8baf6ca2a2a1..437522ca97b4b62fd79b8e84fa643ff9c4751ccd 100644 (file)
@@ -61,6 +61,7 @@ struct addr_req {
        void (*callback)(int status, struct sockaddr *src_addr,
                         struct rdma_dev_addr *addr, void *context);
        unsigned long timeout;
+       struct delayed_work work;
        int status;
        u32 seq;
 };
@@ -295,7 +296,7 @@ int rdma_translate_ip(const struct sockaddr *addr,
 }
 EXPORT_SYMBOL(rdma_translate_ip);
 
-static void set_timeout(unsigned long time)
+static void set_timeout(struct delayed_work *delayed_work, unsigned long time)
 {
        unsigned long delay;
 
@@ -303,7 +304,7 @@ static void set_timeout(unsigned long time)
        if ((long)delay < 0)
                delay = 0;
 
-       mod_delayed_work(addr_wq, &work, delay);
+       mod_delayed_work(addr_wq, delayed_work, delay);
 }
 
 static void queue_req(struct addr_req *req)
@@ -318,8 +319,7 @@ static void queue_req(struct addr_req *req)
 
        list_add(&req->list, &temp_req->list);
 
-       if (req_list.next == &req->list)
-               set_timeout(req->timeout);
+       set_timeout(&req->work, req->timeout);
        mutex_unlock(&lock);
 }
 
@@ -574,6 +574,37 @@ static int addr_resolve(struct sockaddr *src_in,
        return ret;
 }
 
+static void process_one_req(struct work_struct *_work)
+{
+       struct addr_req *req;
+       struct sockaddr *src_in, *dst_in;
+
+       mutex_lock(&lock);
+       req = container_of(_work, struct addr_req, work.work);
+
+       if (req->status == -ENODATA) {
+               src_in = (struct sockaddr *)&req->src_addr;
+               dst_in = (struct sockaddr *)&req->dst_addr;
+               req->status = addr_resolve(src_in, dst_in, req->addr,
+                                          true, req->seq);
+               if (req->status && time_after_eq(jiffies, req->timeout)) {
+                       req->status = -ETIMEDOUT;
+               } else if (req->status == -ENODATA) {
+                       /* requeue the work for retrying again */
+                       set_timeout(&req->work, req->timeout);
+                       mutex_unlock(&lock);
+                       return;
+               }
+       }
+       list_del(&req->list);
+       mutex_unlock(&lock);
+
+       req->callback(req->status, (struct sockaddr *)&req->src_addr,
+               req->addr, req->context);
+       put_client(req->client);
+       kfree(req);
+}
+
 static void process_req(struct work_struct *work)
 {
        struct addr_req *req, *temp_req;
@@ -591,20 +622,23 @@ static void process_req(struct work_struct *work)
                                                   true, req->seq);
                        if (req->status && time_after_eq(jiffies, req->timeout))
                                req->status = -ETIMEDOUT;
-                       else if (req->status == -ENODATA)
+                       else if (req->status == -ENODATA) {
+                               set_timeout(&req->work, req->timeout);
                                continue;
+                       }
                }
                list_move_tail(&req->list, &done_list);
        }
 
-       if (!list_empty(&req_list)) {
-               req = list_entry(req_list.next, struct addr_req, list);
-               set_timeout(req->timeout);
-       }
        mutex_unlock(&lock);
 
        list_for_each_entry_safe(req, temp_req, &done_list, list) {
                list_del(&req->list);
+               /* It is safe to cancel other work items from this work item
+                * because at a time there can be only one work item running
+                * with this single threaded work queue.
+                */
+               cancel_delayed_work(&req->work);
                req->callback(req->status, (struct sockaddr *) &req->src_addr,
                        req->addr, req->context);
                put_client(req->client);
@@ -647,6 +681,7 @@ int rdma_resolve_ip(struct rdma_addr_client *client,
        req->context = context;
        req->client = client;
        atomic_inc(&client->refcount);
+       INIT_DELAYED_WORK(&req->work, process_one_req);
        req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq);
 
        req->status = addr_resolve(src_in, dst_in, addr, true, req->seq);
@@ -701,7 +736,7 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
                        req->status = -ECANCELED;
                        req->timeout = jiffies;
                        list_move(&req->list, &req_list);
-                       set_timeout(req->timeout);
+                       set_timeout(&req->work, req->timeout);
                        break;
                }
        }
@@ -807,9 +842,8 @@ static int netevent_callback(struct notifier_block *self, unsigned long event,
        if (event == NETEVENT_NEIGH_UPDATE) {
                struct neighbour *neigh = ctx;
 
-               if (neigh->nud_state & NUD_VALID) {
-                       set_timeout(jiffies);
-               }
+               if (neigh->nud_state & NUD_VALID)
+                       set_timeout(&work, jiffies);
        }
        return 0;
 }
@@ -820,7 +854,7 @@ static struct notifier_block nb = {
 
 int addr_init(void)
 {
-       addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0);
+       addr_wq = alloc_ordered_workqueue("ib_addr", WQ_MEM_RECLAIM);
        if (!addr_wq)
                return -ENOMEM;
 
index 2c98533a0203b084fb198a3eb8088a0bac59522c..c551d2b275fdf339310a087bef9c6e821d7c7e09 100644 (file)
@@ -1153,7 +1153,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
                            int out_len)
 {
        struct ib_uverbs_resize_cq      cmd;
-       struct ib_uverbs_resize_cq_resp resp;
+       struct ib_uverbs_resize_cq_resp resp = {};
        struct ib_udata                 udata;
        struct ib_cq                    *cq;
        int                             ret = -EINVAL;
index 3d2609608f589625d0077167fa2e66a00430b89f..c023e2c81b8f2b06443452f91edcc506b46b6d17 100644 (file)
@@ -250,6 +250,7 @@ void ib_uverbs_release_file(struct kref *ref)
        if (atomic_dec_and_test(&file->device->refcount))
                ib_uverbs_comp_dev(file->device);
 
+       kobject_put(&file->device->kobj);
        kfree(file);
 }
 
@@ -917,7 +918,6 @@ err:
 static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
        struct ib_uverbs_file *file = filp->private_data;
-       struct ib_uverbs_device *dev = file->device;
 
        mutex_lock(&file->cleanup_mutex);
        if (file->ucontext) {
@@ -939,7 +939,6 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
                         ib_uverbs_release_async_event_file);
 
        kref_put(&file->ref, ib_uverbs_release_file);
-       kobject_put(&dev->kobj);
 
        return 0;
 }
index fb98ed67d5bc684b8cc0b941d7140986b95aa99b..7f8fe443df46f5b562ac3b2561e19226e3ab6b68 100644 (file)
@@ -895,7 +895,6 @@ static const struct {
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
        [IB_QPS_RESET] = {
                [IB_QPS_RESET] = { .valid = 1 },
-               [IB_QPS_ERR] =   { .valid = 1 },
                [IB_QPS_INIT]  = {
                        .valid = 1,
                        .req_param = {
index 23fad6d969440bd2bd50a0c8b0dbafe8a92f4ae9..2540b65e242cebcf5b7c9fd60f936bc35bbf019b 100644 (file)
@@ -733,7 +733,7 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
                        continue;
 
                free_mr->mr_free_qp[i] = hns_roce_v1_create_lp_qp(hr_dev, pd);
-               if (IS_ERR(free_mr->mr_free_qp[i])) {
+               if (!free_mr->mr_free_qp[i]) {
                        dev_err(dev, "Create loop qp failed!\n");
                        goto create_lp_qp_failed;
                }
index ae0746754008798fc0c4ab7e940f736c376a72f1..3d701c7a4c9140e488b7427d9d901a4ea77d2786 100644 (file)
@@ -939,7 +939,7 @@ static int mlx5_ib_mr_initiator_pfault_handler(
 
        if (qp->ibqp.qp_type != IB_QPT_RC) {
                av = *wqe;
-               if (av->dqp_dct & be32_to_cpu(MLX5_WQE_AV_EXT))
+               if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
                        *wqe += sizeof(struct mlx5_av);
                else
                        *wqe += sizeof(struct mlx5_base_av);
index ff50a7bd66d864506ec65aef1b63f45ce5d36e36..7ac25059c40f94aad951b28351cf425ebe573197 100644 (file)
@@ -336,6 +336,7 @@ struct ipoib_dev_priv {
        unsigned long flags;
 
        struct rw_semaphore vlan_rwsem;
+       struct mutex mcast_mutex;
 
        struct rb_root  path_tree;
        struct list_head path_list;
index f87d104837dcfab7f0e35b5b7fcae1e021599bfc..d69410c2ed97bdeceb17aedb2a7fe6049c59c310 100644 (file)
@@ -511,7 +511,6 @@ static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
        case IB_CM_REQ_RECEIVED:
                return ipoib_cm_req_handler(cm_id, event);
        case IB_CM_DREQ_RECEIVED:
-               p = cm_id->context;
                ib_send_cm_drep(cm_id, NULL, 0);
                /* Fall through */
        case IB_CM_REJ_RECEIVED:
index 7871379342f48fa77b2e6e8279ca774b4c49ad2f..184a22f4802773efc67131093f4ab4fcc89cd276 100644 (file)
@@ -52,7 +52,8 @@ static const struct ipoib_stats ipoib_gstrings_stats[] = {
        IPOIB_NETDEV_STAT(tx_bytes),
        IPOIB_NETDEV_STAT(tx_errors),
        IPOIB_NETDEV_STAT(rx_dropped),
-       IPOIB_NETDEV_STAT(tx_dropped)
+       IPOIB_NETDEV_STAT(tx_dropped),
+       IPOIB_NETDEV_STAT(multicast),
 };
 
 #define IPOIB_GLOBAL_STATS_LEN ARRAY_SIZE(ipoib_gstrings_stats)
index 57a9655e844deb1cc2eb57d9485f98e195368ac5..2e075377242e2baccc54cda5859d5b3ba7e768d0 100644 (file)
@@ -256,6 +256,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 
        ++dev->stats.rx_packets;
        dev->stats.rx_bytes += skb->len;
+       if (skb->pkt_type == PACKET_MULTICAST)
+               dev->stats.multicast++;
 
        skb->dev = dev;
        if ((dev->features & NETIF_F_RXCSUM) &&
@@ -709,6 +711,27 @@ static int recvs_pending(struct net_device *dev)
        return pending;
 }
 
+static void check_qp_movement_and_print(struct ipoib_dev_priv *priv,
+                                       struct ib_qp *qp,
+                                       enum ib_qp_state new_state)
+{
+       struct ib_qp_attr qp_attr;
+       struct ib_qp_init_attr query_init_attr;
+       int ret;
+
+       ret = ib_query_qp(qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+       if (ret) {
+               ipoib_warn(priv, "%s: Failed to query QP\n", __func__);
+               return;
+       }
+       /* print according to the new-state and the previous state.*/
+       if (new_state == IB_QPS_ERR && qp_attr.qp_state == IB_QPS_RESET)
+               ipoib_dbg(priv, "Failed modify QP, IB_QPS_RESET to IB_QPS_ERR, acceptable\n");
+       else
+               ipoib_warn(priv, "Failed to modify QP to state: %d from state: %d\n",
+                          new_state, qp_attr.qp_state);
+}
+
 int ipoib_ib_dev_stop_default(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -728,7 +751,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
         */
        qp_attr.qp_state = IB_QPS_ERR;
        if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
-               ipoib_warn(priv, "Failed to modify QP to ERROR state\n");
+               check_qp_movement_and_print(priv, priv->qp, IB_QPS_ERR);
 
        /* Wait for all sends and receives to complete */
        begin = jiffies;
index 4ce315c92b480fa705c30b33ffd7253b4cfded3b..6c77df34869dfb719d66787f6ccbb7637b042d36 100644 (file)
@@ -1560,6 +1560,7 @@ static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
        int i, wait_flushed = 0;
 
        init_completion(&priv->ntbl.flushed);
+       set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
        spin_lock_irqsave(&priv->lock, flags);
 
@@ -1604,7 +1605,6 @@ static void ipoib_neigh_hash_uninit(struct net_device *dev)
 
        ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
        init_completion(&priv->ntbl.deleted);
-       set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
 
        /* Stop GC if called at init fail need to cancel work */
        stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
@@ -1847,6 +1847,7 @@ static const struct net_device_ops ipoib_netdev_ops_vf = {
        .ndo_tx_timeout          = ipoib_timeout,
        .ndo_set_rx_mode         = ipoib_set_mcast_list,
        .ndo_get_iflink          = ipoib_get_iflink,
+       .ndo_get_stats64         = ipoib_get_stats,
 };
 
 void ipoib_setup_common(struct net_device *dev)
@@ -1877,6 +1878,7 @@ static void ipoib_build_priv(struct net_device *dev)
        priv->dev = dev;
        spin_lock_init(&priv->lock);
        init_rwsem(&priv->vlan_rwsem);
+       mutex_init(&priv->mcast_mutex);
 
        INIT_LIST_HEAD(&priv->path_list);
        INIT_LIST_HEAD(&priv->child_intfs);
@@ -2173,14 +2175,14 @@ static struct net_device *ipoib_add_port(const char *format,
        priv->dev->dev_id = port - 1;
 
        result = ib_query_port(hca, port, &attr);
-       if (!result)
-               priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
-       else {
+       if (result) {
                printk(KERN_WARNING "%s: ib_query_port %d failed\n",
                       hca->name, port);
                goto device_init_failed;
        }
 
+       priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+
        /* MTU will be reset when mcast join happens */
        priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
        priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
@@ -2211,12 +2213,14 @@ static struct net_device *ipoib_add_port(const char *format,
                printk(KERN_WARNING "%s: ib_query_gid port %d failed (ret = %d)\n",
                       hca->name, port, result);
                goto device_init_failed;
-       } else
-               memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
+       }
+
+       memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
+              sizeof(union ib_gid));
        set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
 
        result = ipoib_dev_init(priv->dev, hca, port);
-       if (result < 0) {
+       if (result) {
                printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
                       hca->name, port, result);
                goto device_init_failed;
@@ -2365,6 +2369,7 @@ static int __init ipoib_init_module(void)
        ipoib_sendq_size = max3(ipoib_sendq_size, 2 * MAX_SEND_CQE, IPOIB_MIN_QUEUE_SIZE);
 #ifdef CONFIG_INFINIBAND_IPOIB_CM
        ipoib_max_conn_qp = min(ipoib_max_conn_qp, IPOIB_CM_MAX_CONN_QP);
+       ipoib_max_conn_qp = max(ipoib_max_conn_qp, 0);
 #endif
 
        /*
index 057f58e6afca249744f2d9013021e3c1c5d6417f..93e149efc1f5fc0382b61dcfc9f84d786d8b52ca 100644 (file)
@@ -684,15 +684,10 @@ void ipoib_mcast_start_thread(struct net_device *dev)
 int ipoib_mcast_stop_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = ipoib_priv(dev);
-       unsigned long flags;
 
        ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-       spin_lock_irqsave(&priv->lock, flags);
-       cancel_delayed_work(&priv->mcast_task);
-       spin_unlock_irqrestore(&priv->lock, flags);
-
-       flush_workqueue(priv->wq);
+       cancel_delayed_work_sync(&priv->mcast_task);
 
        return 0;
 }
@@ -748,6 +743,14 @@ void ipoib_mcast_remove_list(struct list_head *remove_list)
 {
        struct ipoib_mcast *mcast, *tmcast;
 
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
+       list_for_each_entry_safe(mcast, tmcast, remove_list, list)
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+                       wait_for_completion(&mcast->done);
+
        list_for_each_entry_safe(mcast, tmcast, remove_list, list) {
                ipoib_mcast_leave(mcast->dev, mcast);
                ipoib_mcast_free(mcast);
@@ -838,6 +841,7 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
        struct ipoib_mcast *mcast, *tmcast;
        unsigned long flags;
 
+       mutex_lock(&priv->mcast_mutex);
        ipoib_dbg_mcast(priv, "flushing multicast list\n");
 
        spin_lock_irqsave(&priv->lock, flags);
@@ -856,15 +860,8 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /*
-        * make sure the in-flight joins have finished before we attempt
-        * to leave
-        */
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-                       wait_for_completion(&mcast->done);
-
        ipoib_mcast_remove_list(&remove_list);
+       mutex_unlock(&priv->mcast_mutex);
 }
 
 static int ipoib_mcast_addr_is_valid(const u8 *addr, const u8 *broadcast)
@@ -982,14 +979,6 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        netif_addr_unlock(dev);
        local_irq_restore(flags);
 
-       /*
-        * make sure the in-flight joins have finished before we attempt
-        * to leave
-        */
-       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-                       wait_for_completion(&mcast->done);
-
        ipoib_mcast_remove_list(&remove_list);
 
        /*
index 6f41270d80c03128bdeb60e5c6fc1b6ca2b5fe54..f378dc0e7eaf4db75eab8606f03df4e9269602e4 100644 (file)
@@ -212,7 +212,6 @@ struct mlx5_wqe_ctrl_seg {
 #define MLX5_WQE_CTRL_OPCODE_MASK 0xff
 #define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
 #define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
-#define MLX5_WQE_AV_EXT 0x80000000
 
 enum {
        MLX5_ETH_WQE_L3_INNER_CSUM      = 1 << 4,