]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge branches 'cve-fixup', 'ipoib', 'iser', 'misc-4.1', 'or-mlx4' and 'srp' into...
authorDoug Ledford <dledford@redhat.com>
Wed, 15 Apr 2015 20:24:49 +0000 (16:24 -0400)
committerDoug Ledford <dledford@redhat.com>
Wed, 15 Apr 2015 20:24:49 +0000 (16:24 -0400)
24 files changed:
Documentation/filesystems/nfs/nfs-rdma.txt
MAINTAINERS
drivers/infiniband/core/umem.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/hw/mlx4/alias_GUID.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/mlx4_ib.h
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/sysfs.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_cm.c
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/ipoib/ipoib_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/infiniband/ulp/srpt/ib_srpt.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/main.c
drivers/net/ethernet/mellanox/mlx4/mlx4.h
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
include/linux/mlx4/device.h

index 724043858b0834f874aff57df4155f1da3dffad8..95c13aa575ff32eebeb4938f435376ca0595a439 100644 (file)
@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
     To further test the InfiniBand software stack, use IPoIB (this
     assumes you have two IB hosts named host1 and host2):
 
-    host1$ ifconfig ib0 a.b.c.x
-    host2$ ifconfig ib0 a.b.c.y
+    host1$ ip link set dev ib0 up
+    host1$ ip address add dev ib0 a.b.c.x
+    host2$ ip link set dev ib0 up
+    host2$ ip address add dev ib0 a.b.c.y
     host1$ ping a.b.c.y
     host2$ ping a.b.c.x
 
@@ -229,7 +231,8 @@ NFS/RDMA Setup
 
     $ modprobe ib_mthca
     $ modprobe ib_ipoib
-    $ ifconfig ib0 a.b.c.d
+    $ ip li set dev ib0 up
+    $ ip addr add dev ib0 a.b.c.d
 
     NOTE: use unique addresses for the client and server
 
index c72a7baec55cc5fe02675525a241291454d9e605..1b9e38d023181eb09c1aa617f8125291647ea0f4 100644 (file)
@@ -8791,6 +8791,15 @@ W:       http://www.emulex.com
 S:     Supported
 F:     drivers/net/ethernet/emulex/benet/
 
+EMULEX ONECONNECT ROCE DRIVER
+M:     Selvin Xavier <selvin.xavier@emulex.com>
+M:     Devesh Sharma <devesh.sharma@emulex.com>
+M:     Mitesh Ahuja <mitesh.ahuja@emulex.com>
+L:     linux-rdma@vger.kernel.org
+W:     http://www.emulex.com
+S:     Supported
+F:     drivers/infiniband/hw/ocrdma/
+
 SFC NETWORK DRIVER
 M:     Solarflare linux maintainers <linux-net-drivers@solarflare.com>
 M:     Shradha Shah <sshah@solarflare.com>
index 8c014b5dab4c82ff805a744c89655555a56094fc..38acb3cfc545f91658f0d465b69914de99f281b2 100644 (file)
@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        if (dmasync)
                dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
 
+       if (!size)
+               return ERR_PTR(-EINVAL);
+
        /*
         * If the combination of the addr and size requested for this memory
         * region causes an integer overflow, return error.
         */
-       if ((PAGE_ALIGN(addr + size) <= size) ||
-           (PAGE_ALIGN(addr + size) <= addr))
+       if (((addr + size) < addr) ||
+           PAGE_ALIGN(addr + size) < (addr + size))
                return ERR_PTR(-EINVAL);
 
        if (!can_do_mlock())
index 259dcc7779f5e01bc95b66ca90e64d20f7c94087..88cce9bb72fea78a7d03f9c556b394807ca87215 100644 (file)
@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                kfree(uqp);
        }
 
+       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
+               struct ib_srq *srq = uobj->object;
+               struct ib_uevent_object *uevent =
+                       container_of(uobj, struct ib_uevent_object, uobject);
+
+               idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
+               ib_destroy_srq(srq);
+               ib_uverbs_release_uevent(file, uevent);
+               kfree(uevent);
+       }
+
        list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
                struct ib_cq *cq = uobj->object;
                struct ib_uverbs_event_file *ev_file = cq->cq_context;
@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
                kfree(ucq);
        }
 
-       list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
-               struct ib_srq *srq = uobj->object;
-               struct ib_uevent_object *uevent =
-                       container_of(uobj, struct ib_uevent_object, uobject);
-
-               idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
-               ib_destroy_srq(srq);
-               ib_uverbs_release_uevent(file, uevent);
-               kfree(uevent);
-       }
-
        list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
                struct ib_mr *mr = uobj->object;
 
index a31e031afd87486ce0c876c541d4efc7cb981f8c..0f00204d2ecea0e19f0a9917431e4bb8cbbdfa49 100644 (file)
@@ -58,14 +58,19 @@ struct mlx4_alias_guid_work_context {
        int                     query_id;
        struct list_head        list;
        int                     block_num;
+       ib_sa_comp_mask         guid_indexes;
+       u8                      method;
 };
 
 struct mlx4_next_alias_guid_work {
        u8 port;
        u8 block_num;
+       u8 method;
        struct mlx4_sriov_alias_guid_info_rec_det rec_det;
 };
 
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+                                    int *resched_delay_sec);
 
 void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
                                         u8 port_num, u8 *p_data)
@@ -118,6 +123,57 @@ ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
        return IB_SA_COMP_MASK(4 + index);
 }
 
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+                                   int port,  int slave_init)
+{
+       __be64 curr_guid, required_guid;
+       int record_num = slave / 8;
+       int index = slave % 8;
+       int port_index = port - 1;
+       unsigned long flags;
+       int do_work = 0;
+
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+       if (dev->sriov.alias_guid.ports_guid[port_index].state_flags &
+           GUID_STATE_NEED_PORT_INIT)
+               goto unlock;
+       if (!slave_init) {
+               curr_guid = *(__be64 *)&dev->sriov.
+                       alias_guid.ports_guid[port_index].
+                       all_rec_per_port[record_num].
+                       all_recs[GUID_REC_SIZE * index];
+               if (curr_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL) ||
+                   !curr_guid)
+                       goto unlock;
+               required_guid = cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
+       } else {
+               required_guid = mlx4_get_admin_guid(dev->dev, slave, port);
+               if (required_guid == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       goto unlock;
+       }
+       *(__be64 *)&dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].
+               all_recs[GUID_REC_SIZE * index] = required_guid;
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].guid_indexes
+               |= mlx4_ib_get_aguid_comp_mask_from_ix(index);
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].status
+               = MLX4_GUID_INFO_STATUS_IDLE;
+       /* set to run immediately */
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].time_to_run = 0;
+       dev->sriov.alias_guid.ports_guid[port_index].
+               all_rec_per_port[record_num].
+               guids_retry_schedule[index] = 0;
+       do_work = 1;
+unlock:
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+
+       if (do_work)
+               mlx4_ib_init_alias_guid_work(dev, port_index);
+}
+
 /*
  * Whenever new GUID is set/unset (guid table change) create event and
  * notify the relevant slave (master also should be notified).
@@ -138,10 +194,15 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
        enum slave_port_state prev_state;
        __be64 tmp_cur_ag, form_cache_ag;
        enum slave_port_gen_event gen_event;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec;
+       unsigned long flags;
+       __be64 required_value;
 
        if (!mlx4_is_master(dev->dev))
                return;
 
+       rec = &dev->sriov.alias_guid.ports_guid[port_num - 1].
+                       all_rec_per_port[block_num];
        guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
                                   ports_guid[port_num - 1].
                                   all_rec_per_port[block_num].guid_indexes);
@@ -166,8 +227,27 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
                 */
                if (tmp_cur_ag != form_cache_ag)
                        continue;
-               mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
 
+               spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+               required_value = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+
+               if (required_value == cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       required_value = 0;
+
+               if (tmp_cur_ag == required_value) {
+                       rec->guid_indexes = rec->guid_indexes &
+                              ~mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               } else {
+                       /* may notify port down if value is 0 */
+                       if (tmp_cur_ag != MLX4_NOT_SET_GUID) {
+                               spin_unlock_irqrestore(&dev->sriov.
+                                       alias_guid.ag_work_lock, flags);
+                               continue;
+                       }
+               }
+               spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock,
+                                      flags);
+               mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
                /*2 cases: Valid GUID, and Invalid Guid*/
 
                if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
@@ -188,10 +268,14 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
                        set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
                                                      MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
                                                      &gen_event);
-                       pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
-                                slave_id, port_num);
-                       mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
-                                                      MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+                       if (gen_event == SLAVE_PORT_GEN_EVENT_DOWN) {
+                               pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
+                                        slave_id, port_num);
+                               mlx4_gen_port_state_change_eqe(dev->dev,
+                                                              slave_id,
+                                                              port_num,
+                                                              MLX4_PORT_CHANGE_SUBTYPE_DOWN);
+                       }
                }
        }
 }
@@ -206,6 +290,9 @@ static void aliasguid_query_handler(int status,
        int i;
        struct mlx4_sriov_alias_guid_info_rec_det *rec;
        unsigned long flags, flags1;
+       ib_sa_comp_mask declined_guid_indexes = 0;
+       ib_sa_comp_mask applied_guid_indexes = 0;
+       unsigned int resched_delay_sec = 0;
 
        if (!context)
                return;
@@ -216,9 +303,9 @@ static void aliasguid_query_handler(int status,
                all_rec_per_port[cb_ctx->block_num];
 
        if (status) {
-               rec->status = MLX4_GUID_INFO_STATUS_IDLE;
                pr_debug("(port: %d) failed: status = %d\n",
                         cb_ctx->port, status);
+               rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
                goto out;
        }
 
@@ -235,57 +322,101 @@ static void aliasguid_query_handler(int status,
        rec = &dev->sriov.alias_guid.ports_guid[port_index].
                all_rec_per_port[guid_rec->block_num];
 
-       rec->status = MLX4_GUID_INFO_STATUS_SET;
-       rec->method = MLX4_GUID_INFO_RECORD_SET;
-
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
        for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
-               __be64 tmp_cur_ag;
-               tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
+               __be64 sm_response, required_val;
+
+               if (!(cb_ctx->guid_indexes &
+                       mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+                       continue;
+               sm_response = *(__be64 *)&guid_rec->guid_info_list
+                               [i * GUID_REC_SIZE];
+               required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE];
+               if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) {
+                       if (required_val ==
+                           cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                               goto next_entry;
+
+                       /* A new value was set till we got the response */
+                       pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
+                                be64_to_cpu(required_val),
+                                i, guid_rec->block_num);
+                       goto entry_declined;
+               }
+
                /* check if the SM didn't assign one of the records.
-                * if it didn't, if it was not sysadmin request:
-                * ask the SM to give a new GUID, (instead of the driver request).
+                * if it didn't, re-ask for.
                 */
-               if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
-                       mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
-                                    "block_num: %d was declined by SM, "
-                                    "ownership by %d (0 = driver, 1=sysAdmin,"
-                                    " 2=None)\n", __func__, i,
-                                    guid_rec->block_num, rec->ownership);
-                       if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
-                               /* if it is driver assign, asks for new GUID from SM*/
-                               *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
-                                       MLX4_NOT_SET_GUID;
-
-                               /* Mark the record as not assigned, and let it
-                                * be sent again in the next work sched.*/
-                               rec->status = MLX4_GUID_INFO_STATUS_IDLE;
-                               rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
-                       }
+               if (sm_response == MLX4_NOT_SET_GUID) {
+                       if (rec->guids_retry_schedule[i] == 0)
+                               mlx4_ib_warn(&dev->ib_dev,
+                                            "%s:Record num %d in  block_num: %d was declined by SM\n",
+                                            __func__, i,
+                                            guid_rec->block_num);
+                       goto entry_declined;
                } else {
                       /* properly assigned record. */
                       /* We save the GUID we just got from the SM in the
                        * admin_guid in order to be persistent, and in the
                        * request from the sm the process will ask for the same GUID */
-                       if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
-                           tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
-                               /* the sysadmin assignment failed.*/
-                               mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
-                                            " admin guid after SysAdmin "
-                                            "configuration. "
-                                            "Record num %d in block_num:%d "
-                                            "was declined by SM, "
-                                            "new val(0x%llx) was kept\n",
-                                             __func__, i,
-                                            guid_rec->block_num,
-                                            be64_to_cpu(*(__be64 *) &
-                                                        rec->all_recs[i * GUID_REC_SIZE]));
+                       if (required_val &&
+                           sm_response != required_val) {
+                               /* Warn only on first retry */
+                               if (rec->guids_retry_schedule[i] == 0)
+                                       mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
+                                                    " admin guid after SysAdmin "
+                                                    "configuration. "
+                                                    "Record num %d in block_num:%d "
+                                                    "was declined by SM, "
+                                                    "new val(0x%llx) was kept, SM returned (0x%llx)\n",
+                                                     __func__, i,
+                                                    guid_rec->block_num,
+                                                    be64_to_cpu(required_val),
+                                                    be64_to_cpu(sm_response));
+                               goto entry_declined;
                        } else {
-                               memcpy(&rec->all_recs[i * GUID_REC_SIZE],
-                                      &guid_rec->guid_info_list[i * GUID_REC_SIZE],
-                                      GUID_REC_SIZE);
+                               *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
+                                       sm_response;
+                               if (required_val == 0)
+                                       mlx4_set_admin_guid(dev->dev,
+                                                           sm_response,
+                                                           (guid_rec->block_num
+                                                           * NUM_ALIAS_GUID_IN_REC) + i,
+                                                           cb_ctx->port);
+                               goto next_entry;
                        }
                }
+entry_declined:
+               declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               rec->guids_retry_schedule[i] =
+                       (rec->guids_retry_schedule[i] == 0) ?  1 :
+                       min((unsigned int)60,
+                           rec->guids_retry_schedule[i] * 2);
+               /* using the minimum value among all entries in that record */
+               resched_delay_sec = (resched_delay_sec == 0) ?
+                               rec->guids_retry_schedule[i] :
+                               min(resched_delay_sec,
+                                   rec->guids_retry_schedule[i]);
+               continue;
+
+next_entry:
+               rec->guids_retry_schedule[i] = 0;
        }
+
+       applied_guid_indexes =  cb_ctx->guid_indexes & ~declined_guid_indexes;
+       if (declined_guid_indexes ||
+           rec->guid_indexes & ~(applied_guid_indexes)) {
+               pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
+                        guid_rec->block_num,
+                        be64_to_cpu((__force __be64)rec->guid_indexes),
+                        be64_to_cpu((__force __be64)applied_guid_indexes),
+                        be64_to_cpu((__force __be64)declined_guid_indexes));
+               rec->time_to_run = ktime_get_real_ns() +
+                       resched_delay_sec * NSEC_PER_SEC;
+       } else {
+               rec->status = MLX4_GUID_INFO_STATUS_SET;
+       }
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
        /*
        The func is call here to close the cases when the
        sm doesn't send smp, so in the sa response the driver
@@ -297,10 +428,13 @@ static void aliasguid_query_handler(int status,
 out:
        spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
        spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
-       if (!dev->sriov.is_going_down)
+       if (!dev->sriov.is_going_down) {
+               get_low_record_time_index(dev, port_index, &resched_delay_sec);
                queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
                                   &dev->sriov.alias_guid.ports_guid[port_index].
-                                  alias_guid_work, 0);
+                                  alias_guid_work,
+                                  msecs_to_jiffies(resched_delay_sec * 1000));
+       }
        if (cb_ctx->sa_query) {
                list_del(&cb_ctx->list);
                kfree(cb_ctx);
@@ -317,9 +451,7 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
        ib_sa_comp_mask comp_mask = 0;
 
        dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
-               = MLX4_GUID_INFO_STATUS_IDLE;
-       dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
-               = MLX4_GUID_INFO_RECORD_SET;
+               = MLX4_GUID_INFO_STATUS_SET;
 
        /* calculate the comp_mask for that record.*/
        for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
@@ -333,19 +465,21 @@ static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
                need to assign GUIDs, then don't put it up for assignment.
                */
                if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
-                   (!index && !i) ||
-                   MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
-                   ports_guid[port - 1].all_rec_per_port[index].ownership)
+                   (!index && !i))
                        continue;
                comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
        }
        dev->sriov.alias_guid.ports_guid[port - 1].
-               all_rec_per_port[index].guid_indexes = comp_mask;
+               all_rec_per_port[index].guid_indexes |= comp_mask;
+       if (dev->sriov.alias_guid.ports_guid[port - 1].
+           all_rec_per_port[index].guid_indexes)
+               dev->sriov.alias_guid.ports_guid[port - 1].
+               all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
+
 }
 
 static int set_guid_rec(struct ib_device *ibdev,
-                       u8 port, int index,
-                       struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+                       struct mlx4_next_alias_guid_work *rec)
 {
        int err;
        struct mlx4_ib_dev *dev = to_mdev(ibdev);
@@ -354,6 +488,9 @@ static int set_guid_rec(struct ib_device *ibdev,
        struct ib_port_attr attr;
        struct mlx4_alias_guid_work_context *callback_context;
        unsigned long resched_delay, flags, flags1;
+       u8 port = rec->port + 1;
+       int index = rec->block_num;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec_det = &rec->rec_det;
        struct list_head *head =
                &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
 
@@ -380,6 +517,8 @@ static int set_guid_rec(struct ib_device *ibdev,
        callback_context->port = port;
        callback_context->dev = dev;
        callback_context->block_num = index;
+       callback_context->guid_indexes = rec_det->guid_indexes;
+       callback_context->method = rec->method;
 
        memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
 
@@ -399,7 +538,7 @@ static int set_guid_rec(struct ib_device *ibdev,
        callback_context->query_id =
                ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
                                          ibdev, port, &guid_info_rec,
-                                         comp_mask, rec_det->method, 1000,
+                                         comp_mask, rec->method, 1000,
                                          GFP_KERNEL, aliasguid_query_handler,
                                          callback_context,
                                          &callback_context->sa_query);
@@ -434,6 +573,30 @@ out:
        return err;
 }
 
+static void mlx4_ib_guid_port_init(struct mlx4_ib_dev *dev, int port)
+{
+       int j, k, entry;
+       __be64 guid;
+
+       /*Check if the SM doesn't need to assign the GUIDs*/
+       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+               for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
+                       entry = j * NUM_ALIAS_GUID_IN_REC + k;
+                       /* no request for the 0 entry (hw guid) */
+                       if (!entry || entry > dev->dev->persist->num_vfs ||
+                           !mlx4_is_slave_active(dev->dev, entry))
+                               continue;
+                       guid = mlx4_get_admin_guid(dev->dev, entry, port);
+                       *(__be64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
+                               all_rec_per_port[j].all_recs
+                               [GUID_REC_SIZE * k] = guid;
+                       pr_debug("guid was set, entry=%d, val=0x%llx, port=%d\n",
+                                entry,
+                                be64_to_cpu(guid),
+                                port);
+               }
+       }
+}
 void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
 {
        int i;
@@ -443,6 +606,13 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
 
        spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
        spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
+
+       if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
+               GUID_STATE_NEED_PORT_INIT) {
+               mlx4_ib_guid_port_init(dev, port);
+               dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
+                       (~GUID_STATE_NEED_PORT_INIT);
+       }
        for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
                invalidate_guid_record(dev, port, i);
 
@@ -462,60 +632,107 @@ void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
        spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
 }
 
-/* The function returns the next record that was
- * not configured (or failed to be configured) */
-static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
-                                    struct mlx4_next_alias_guid_work *rec)
+static void set_required_record(struct mlx4_ib_dev *dev, u8 port,
+                               struct mlx4_next_alias_guid_work *next_rec,
+                               int record_index)
 {
-       int j;
-       unsigned long flags;
+       int i;
+       int lowset_time_entry = -1;
+       int lowest_time = 0;
+       ib_sa_comp_mask delete_guid_indexes = 0;
+       ib_sa_comp_mask set_guid_indexes = 0;
+       struct mlx4_sriov_alias_guid_info_rec_det *rec =
+                       &dev->sriov.alias_guid.ports_guid[port].
+                       all_rec_per_port[record_index];
 
-       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
-               spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
-               if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
-                   MLX4_GUID_INFO_STATUS_IDLE) {
-                       memcpy(&rec->rec_det,
-                              &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
-                              sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
-                       rec->port = port;
-                       rec->block_num = j;
-                       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
-                               MLX4_GUID_INFO_STATUS_PENDING;
-                       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
-                       return 0;
+       for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
+               if (!(rec->guid_indexes &
+                       mlx4_ib_get_aguid_comp_mask_from_ix(i)))
+                       continue;
+
+               if (*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] ==
+                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL))
+                       delete_guid_indexes |=
+                               mlx4_ib_get_aguid_comp_mask_from_ix(i);
+               else
+                       set_guid_indexes |=
+                               mlx4_ib_get_aguid_comp_mask_from_ix(i);
+
+               if (lowset_time_entry == -1 || rec->guids_retry_schedule[i] <=
+                       lowest_time) {
+                       lowset_time_entry = i;
+                       lowest_time = rec->guids_retry_schedule[i];
                }
-               spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
        }
-       return -ENOENT;
+
+       memcpy(&next_rec->rec_det, rec, sizeof(*rec));
+       next_rec->port = port;
+       next_rec->block_num = record_index;
+
+       if (*(__be64 *)&rec->all_recs[lowset_time_entry * GUID_REC_SIZE] ==
+                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) {
+               next_rec->rec_det.guid_indexes = delete_guid_indexes;
+               next_rec->method = MLX4_GUID_INFO_RECORD_DELETE;
+       } else {
+               next_rec->rec_det.guid_indexes = set_guid_indexes;
+               next_rec->method = MLX4_GUID_INFO_RECORD_SET;
+       }
 }
 
-static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
-                                            int rec_index,
-                                            struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
+/* return index of record that should be updated based on lowest
+ * rescheduled time
+ */
+static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
+                                    int *resched_delay_sec)
 {
-       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
-               rec_det->guid_indexes;
-       memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
-              rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
-       dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
-               rec_det->status;
+       int record_index = -1;
+       u64 low_record_time = 0;
+       struct mlx4_sriov_alias_guid_info_rec_det rec;
+       int j;
+
+       for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
+               rec = dev->sriov.alias_guid.ports_guid[port].
+                       all_rec_per_port[j];
+               if (rec.status == MLX4_GUID_INFO_STATUS_IDLE &&
+                   rec.guid_indexes) {
+                       if (record_index == -1 ||
+                           rec.time_to_run < low_record_time) {
+                               record_index = j;
+                               low_record_time = rec.time_to_run;
+                       }
+               }
+       }
+       if (resched_delay_sec) {
+               u64 curr_time = ktime_get_real_ns();
+
+               *resched_delay_sec = (low_record_time < curr_time) ? 0 :
+                       div_u64((low_record_time - curr_time), NSEC_PER_SEC);
+       }
+
+       return record_index;
 }
 
-static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
+/* The function returns the next record that was
+ * not configured (or failed to be configured) */
+static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
+                                    struct mlx4_next_alias_guid_work *rec)
 {
-       int j;
-       struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
-
-       for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
-               memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
-               rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
-                       IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
-                       IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
-                       IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
-                       IB_SA_GUIDINFO_REC_GID7;
-               rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
-               set_administratively_guid_record(dev, port, j, &rec_det);
+       unsigned long flags;
+       int record_index;
+       int ret = 0;
+
+       spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
+       record_index = get_low_record_time_index(dev, port, NULL);
+
+       if (record_index < 0) {
+               ret = -ENOENT;
+               goto out;
        }
+
+       set_required_record(dev, port, rec, record_index);
+out:
+       spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
+       return ret;
 }
 
 static void alias_guid_work(struct work_struct *work)
@@ -545,9 +762,7 @@ static void alias_guid_work(struct work_struct *work)
                goto out;
        }
 
-       set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
-                    &rec->rec_det);
-
+       set_guid_rec(&dev->ib_dev, rec);
 out:
        kfree(rec);
 }
@@ -562,6 +777,12 @@ void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
        spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
        spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
        if (!dev->sriov.is_going_down) {
+               /* If there is pending one should cancell then run, otherwise
+                 * won't run till previous one is ended as same work
+                 * struct is used.
+                 */
+               cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
+                                   alias_guid_work);
                queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
                           &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
        }
@@ -609,7 +830,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
 {
        char alias_wq_name[15];
        int ret = 0;
-       int i, j, k;
+       int i, j;
        union ib_gid gid;
 
        if (!mlx4_is_master(dev->dev))
@@ -633,33 +854,25 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
        for (i = 0 ; i < dev->num_ports; i++) {
                memset(&dev->sriov.alias_guid.ports_guid[i], 0,
                       sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
-               /*Check if the SM doesn't need to assign the GUIDs*/
+               dev->sriov.alias_guid.ports_guid[i].state_flags |=
+                               GUID_STATE_NEED_PORT_INIT;
                for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
-                       if (mlx4_ib_sm_guid_assign) {
-                               dev->sriov.alias_guid.ports_guid[i].
-                                       all_rec_per_port[j].
-                                       ownership = MLX4_GUID_DRIVER_ASSIGN;
-                               continue;
-                       }
-                       dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
-                                       ownership = MLX4_GUID_NONE_ASSIGN;
-                       /*mark each val as it was deleted,
-                         till the sysAdmin will give it valid val*/
-                       for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
-                               *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
-                                       all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
-                                               cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
-                       }
+                       /* mark each val as it was deleted */
+                       memset(dev->sriov.alias_guid.ports_guid[i].
+                               all_rec_per_port[j].all_recs, 0xFF,
+                               sizeof(dev->sriov.alias_guid.ports_guid[i].
+                               all_rec_per_port[j].all_recs));
                }
                INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
                /*prepare the records, set them to be allocated by sm*/
+               if (mlx4_ib_sm_guid_assign)
+                       for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
+                               mlx4_set_admin_guid(dev->dev, 0, j, i + 1);
                for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
                        invalidate_guid_record(dev, i + 1, j);
 
                dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
                dev->sriov.alias_guid.ports_guid[i].port  = i;
-               if (mlx4_ib_sm_guid_assign)
-                       set_all_slaves_guids(dev, i);
 
                snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
                dev->sriov.alias_guid.ports_guid[i].wq =
index 59040265e3614a23fc8508e88dd5b2138e7107e8..9cd2b002d7ae57fb4f33944cbaa14a3e2a27dedd 100644 (file)
@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                                                        tun_qp->ring[i].addr,
                                                        rx_buf_size,
                                                        DMA_FROM_DEVICE);
+               if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
+                       kfree(tun_qp->ring[i].addr);
+                       goto err;
+               }
        }
 
        for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
                                          tun_qp->tx_ring[i].buf.addr,
                                          tx_buf_size,
                                          DMA_TO_DEVICE);
+               if (ib_dma_mapping_error(ctx->ib_dev,
+                                        tun_qp->tx_ring[i].buf.map)) {
+                       kfree(tun_qp->tx_ring[i].buf.addr);
+                       goto tx_err;
+               }
                tun_qp->tx_ring[i].ah = NULL;
        }
        spin_lock_init(&tun_qp->tx_lock);
index 976bea794b5f7726cd642cb08de4073abd5dcb58..57070c529dfb5ca038e2118212684dc54bb3d21b 100644 (file)
@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
-int mlx4_ib_sm_guid_assign = 1;
+int mlx4_ib_sm_guid_assign = 0;
 module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
-MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
+MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
 
 static const char mlx4_ib_version[] =
        DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
        case MLX4_DEV_EVENT_SLAVE_INIT:
                /* here, p is the slave id */
                do_slave_init(ibdev, p, 1);
+               if (mlx4_is_master(dev)) {
+                       int i;
+
+                       for (i = 1; i <= ibdev->num_ports; i++) {
+                               if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+                                       == IB_LINK_LAYER_INFINIBAND)
+                                       mlx4_ib_slave_alias_guid_event(ibdev,
+                                                                      p, i,
+                                                                      1);
+                       }
+               }
                return;
 
        case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
+               if (mlx4_is_master(dev)) {
+                       int i;
+
+                       for (i = 1; i <= ibdev->num_ports; i++) {
+                               if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
+                                       == IB_LINK_LAYER_INFINIBAND)
+                                       mlx4_ib_slave_alias_guid_event(ibdev,
+                                                                      p, i,
+                                                                      0);
+                       }
+               }
                /* here, p is the slave id */
                do_slave_init(ibdev, p, 0);
                return;
index f829fd935b7901b82649a9d3307b46add1aa2ed5..fce3934372a161680e4e4f2dd9716963e1178790 100644 (file)
@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
 enum mlx4_guid_alias_rec_status {
        MLX4_GUID_INFO_STATUS_IDLE,
        MLX4_GUID_INFO_STATUS_SET,
-       MLX4_GUID_INFO_STATUS_PENDING,
 };
 
-enum mlx4_guid_alias_rec_ownership {
-       MLX4_GUID_DRIVER_ASSIGN,
-       MLX4_GUID_SYSADMIN_ASSIGN,
-       MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
-};
+#define GUID_STATE_NEED_PORT_INIT 0x01
 
 enum mlx4_guid_alias_rec_method {
        MLX4_GUID_INFO_RECORD_SET       = IB_MGMT_METHOD_SET,
@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
        u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
        ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
        enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
-       u8 method; /*set or delete*/
-       enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
+       unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
+       u64 time_to_run;
 };
 
 struct mlx4_sriov_alias_guid_port_rec_det {
@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
        struct workqueue_struct *wq;
        struct delayed_work alias_guid_work;
        u8 port;
+       u32 state_flags;
        struct mlx4_sriov_alias_guid *parent;
        struct list_head cb_list;
 };
@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
 void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
                             struct attribute *attr);
 ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
+void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
+                                   int port, int slave_init);
 
 int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
 
index ed2bd6701f9b131c3dc3261cb2eae21a2d835524..02fc91c6802787e0f8f723417d9c7655b6123d3a 100644 (file)
@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
                        ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
                                          sizeof (struct mlx4_ib_proxy_sqp_hdr),
                                          DMA_FROM_DEVICE);
+               if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
+                       kfree(qp->sqp_proxy_rcv[i].addr);
+                       goto err;
+               }
        }
        return 0;
 
@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
 
        memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
 
-       *lso_hdr_sz  = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
-                                  wr->wr.ud.hlen);
+       *lso_hdr_sz  = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
        *lso_seg_len = halign;
        return 0;
 }
index d10c2b8a5dadbe7eef1774c2eb29471ac317cb27..6797108ce8735b7aa11b08b4c09d8b62c29ec364 100644 (file)
 static ssize_t show_admin_alias_guid(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
-       int record_num;/*0-15*/
-       int guid_index_in_rec; /*0 - 7*/
        struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
                container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
        struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
        struct mlx4_ib_dev *mdev = port->dev;
+       __be64 sysadmin_ag_val;
 
-       record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
-       guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
+       sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
+                                             mlx4_ib_iov_dentry->entry_num,
+                                             port->num);
 
-       return sprintf(buf, "%llx\n",
-                      be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
-                                  ports_guid[port->num - 1].
-                                  all_rec_per_port[record_num].
-                                  all_recs[8 * guid_index_in_rec]));
+       return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
 }
 
 /* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
        struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
        struct mlx4_ib_dev *mdev = port->dev;
        u64 sysadmin_ag_val;
+       unsigned long flags;
 
        record_num = mlx4_ib_iov_dentry->entry_num / 8;
        guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
                pr_err("GUID 0 block 0 is RO\n");
                return count;
        }
+       spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
        sscanf(buf, "%llx", &sysadmin_ag_val);
        *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
                all_rec_per_port[record_num].
@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
        /* Change the state to be pending for update */
        mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
                = MLX4_GUID_INFO_STATUS_IDLE ;
-
-       mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
-               = MLX4_GUID_INFO_RECORD_SET;
-
-       switch (sysadmin_ag_val) {
-       case MLX4_GUID_FOR_DELETE_VAL:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
-                       = MLX4_GUID_INFO_RECORD_DELETE;
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_SYSADMIN_ASSIGN;
-               break;
-       /* The sysadmin requests the SM to re-assign */
-       case MLX4_NOT_SET_GUID:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_DRIVER_ASSIGN;
-               break;
-       /* The sysadmin requests a specific value.*/
-       default:
-               mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
-                       = MLX4_GUID_SYSADMIN_ASSIGN;
-               break;
-       }
+       mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
+                           mlx4_ib_iov_dentry->entry_num,
+                           port->num);
 
        /* set the record index */
        mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
-               = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
+               |= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
 
+       spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
        mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
 
        return count;
index d7562beb542367faf1b93d7ba66e8ef879c73bf4..bd94b0a6e9e535f8d8b4a9e1fa1428e0696e2947 100644 (file)
@@ -87,7 +87,6 @@ enum {
        IPOIB_FLAG_ADMIN_UP       = 2,
        IPOIB_PKEY_ASSIGNED       = 3,
        IPOIB_FLAG_SUBINTERFACE   = 5,
-       IPOIB_MCAST_RUN           = 6,
        IPOIB_STOP_REAPER         = 7,
        IPOIB_FLAG_ADMIN_CM       = 9,
        IPOIB_FLAG_UMCAST         = 10,
@@ -98,9 +97,15 @@ enum {
 
        IPOIB_MCAST_FLAG_FOUND    = 0,  /* used in set_multicast_list */
        IPOIB_MCAST_FLAG_SENDONLY = 1,
-       IPOIB_MCAST_FLAG_BUSY     = 2,  /* joining or already joined */
+       /*
+        * For IPOIB_MCAST_FLAG_BUSY
+        * When set, in flight join and mcast->mc is unreliable
+        * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+        *   haven't started yet
+        * When clear and mcast->mc is valid pointer, join was successful
+        */
+       IPOIB_MCAST_FLAG_BUSY     = 2,
        IPOIB_MCAST_FLAG_ATTACHED = 3,
-       IPOIB_MCAST_JOIN_STARTED  = 4,
 
        MAX_SEND_CQE              = 16,
        IPOIB_CM_COPYBREAK        = 256,
@@ -148,6 +153,7 @@ struct ipoib_mcast {
 
        unsigned long created;
        unsigned long backoff;
+       unsigned long delay_until;
 
        unsigned long flags;
        unsigned char logcount;
@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
        struct completion               deleted;
 };
 
+struct ipoib_qp_state_validate {
+       struct work_struct work;
+       struct ipoib_dev_priv   *priv;
+};
+
 /*
  * Device private locking: network stack tx_lock protects members used
  * in TX fast path, lock protects everything else.  lock nests inside
@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
        struct list_head multicast_list;
        struct rb_root multicast_tree;
 
+       struct workqueue_struct *wq;
        struct delayed_work mcast_task;
        struct work_struct carrier_on_task;
        struct work_struct flush_light;
@@ -426,11 +438,6 @@ struct ipoib_neigh {
 #define IPOIB_UD_MTU(ib_mtu)           (ib_mtu - IPOIB_ENCAP_LEN)
 #define IPOIB_UD_BUF_SIZE(ib_mtu)      (ib_mtu + IB_GRH_BYTES)
 
-static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
-{
-       return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
-}
-
 void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
 static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
 {
@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
 void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
 
 void ipoib_mcast_dev_down(struct net_device *dev);
 void ipoib_mcast_dev_flush(struct net_device *dev);
index 933efcea0d03f11b4da3967b8eedc137da21e08a..56959adb6c7da51ccbb6d20307247b7cb69ad55a 100644 (file)
@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
        }
 
        spin_lock_irq(&priv->lock);
-       queue_delayed_work(ipoib_workqueue,
+       queue_delayed_work(priv->wq,
                           &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
        /* Add this entry to passive ids list head, but do not re-add it
         * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                        spin_lock_irqsave(&priv->lock, flags);
                        list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
                        ipoib_cm_start_rx_drain(priv);
-                       queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                       queue_work(priv->wq, &priv->cm.rx_reap_task);
                        spin_unlock_irqrestore(&priv->lock, flags);
                } else
                        ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                                spin_lock_irqsave(&priv->lock, flags);
                                list_move(&p->list, &priv->cm.rx_reap_list);
                                spin_unlock_irqrestore(&priv->lock, flags);
-                               queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+                               queue_work(priv->wq, &priv->cm.rx_reap_task);
                        }
                        return;
                }
@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 
                if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                        list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                }
 
                clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
 
                if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                        list_move(&tx->list, &priv->cm.reap_list);
-                       queue_work(ipoib_workqueue, &priv->cm.reap_task);
+                       queue_work(priv->wq, &priv->cm.reap_task);
                }
 
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
        tx->dev = dev;
        list_add(&tx->list, &priv->cm.start_list);
        set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
-       queue_work(ipoib_workqueue, &priv->cm.start_task);
+       queue_work(priv->wq, &priv->cm.start_task);
        return tx;
 }
 
@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
        if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
                spin_lock_irqsave(&priv->lock, flags);
                list_move(&tx->list, &priv->cm.reap_list);
-               queue_work(ipoib_workqueue, &priv->cm.reap_task);
+               queue_work(priv->wq, &priv->cm.reap_task);
                ipoib_dbg(priv, "Reap connection for gid %pI6\n",
                          tx->neigh->daddr + 4);
                tx->neigh = NULL;
@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
 
        skb_queue_tail(&priv->cm.skb_queue, skb);
        if (e)
-               queue_work(ipoib_workqueue, &priv->cm.skb_task);
+               queue_work(priv->wq, &priv->cm.skb_task);
 }
 
 static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
        }
 
        if (!list_empty(&priv->cm.passive_ids))
-               queue_delayed_work(ipoib_workqueue,
+               queue_delayed_work(priv->wq,
                                   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
        spin_unlock_irq(&priv->lock);
 }
index 72626c3481749b962fe96b79722d7c8e9c99c585..63b92cbb29ad0ad1f0165a738a47efbe0f650e04 100644 (file)
@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
 static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
                                  u64 mapping[IPOIB_UD_RX_SG])
 {
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE,
-                                   DMA_FROM_DEVICE);
-               ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
-                                 DMA_FROM_DEVICE);
-       } else
-               ib_dma_unmap_single(priv->ca, mapping[0],
-                                   IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
-                                   DMA_FROM_DEVICE);
-}
-
-static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
-                                  struct sk_buff *skb,
-                                  unsigned int length)
-{
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
-               unsigned int size;
-               /*
-                * There is only two buffers needed for max_payload = 4K,
-                * first buf size is IPOIB_UD_HEAD_SIZE
-                */
-               skb->tail += IPOIB_UD_HEAD_SIZE;
-               skb->len  += length;
-
-               size = length - IPOIB_UD_HEAD_SIZE;
-
-               skb_frag_size_set(frag, size);
-               skb->data_len += size;
-               skb->truesize += PAGE_SIZE;
-       } else
-               skb_put(skb, length);
-
+       ib_dma_unmap_single(priv->ca, mapping[0],
+                           IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
+                           DMA_FROM_DEVICE);
 }
 
 static int ipoib_ib_post_receive(struct net_device *dev, int id)
@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct sk_buff *skb;
        int buf_size;
-       int tailroom;
        u64 *mapping;
 
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               buf_size = IPOIB_UD_HEAD_SIZE;
-               tailroom = 128; /* reserve some tailroom for IP/TCP headers */
-       } else {
-               buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-               tailroom = 0;
-       }
+       buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
 
-       skb = dev_alloc_skb(buf_size + tailroom + 4);
+       skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
        if (unlikely(!skb))
                return NULL;
 
@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
        if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
                goto error;
 
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               struct page *page = alloc_page(GFP_ATOMIC);
-               if (!page)
-                       goto partial_error;
-               skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
-               mapping[1] =
-                       ib_dma_map_page(priv->ca, page,
-                                       0, PAGE_SIZE, DMA_FROM_DEVICE);
-               if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
-                       goto partial_error;
-       }
-
        priv->rx_ring[id].skb = skb;
        return skb;
-
-partial_error:
-       ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
 error:
        dev_kfree_skb_any(skb);
        return NULL;
@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
                       wc->byte_len, wc->slid);
 
        ipoib_ud_dma_unmap_rx(priv, mapping);
-       ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
+
+       skb_put(skb, wc->byte_len);
 
        /* First byte of dgid signals multicast when 0xff */
        dgid = &((struct ib_grh *)skb->data)->dgid;
@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
        skb_reset_mac_header(skb);
        skb_pull(skb, IPOIB_ENCAP_LEN);
 
+       skb->truesize = SKB_TRUESIZE(skb->len);
+
        ++dev->stats.rx_packets;
        dev->stats.rx_bytes += skb->len;
 
@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
        }
 }
 
+/*
+ * As the result of a completion error the QP Can be transferred to SQE states.
+ * The function checks if the (send)QP is in SQE state and
+ * moves it back to RTS state, that in order to have it functional again.
+ */
+static void ipoib_qp_state_validate_work(struct work_struct *work)
+{
+       struct ipoib_qp_state_validate *qp_work =
+               container_of(work, struct ipoib_qp_state_validate, work);
+
+       struct ipoib_dev_priv *priv = qp_work->priv;
+       struct ib_qp_attr qp_attr;
+       struct ib_qp_init_attr query_init_attr;
+       int ret;
+
+       ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
+       if (ret) {
+               ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
+                          __func__, ret);
+               goto free_res;
+       }
+       pr_info("%s: QP: 0x%x is in state: %d\n",
+               __func__, priv->qp->qp_num, qp_attr.qp_state);
+
+       /* currently support only in SQE->RTS transition*/
+       if (qp_attr.qp_state == IB_QPS_SQE) {
+               qp_attr.qp_state = IB_QPS_RTS;
+
+               ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
+               if (ret) {
+                       pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
+                               ret, priv->qp->qp_num);
+                       goto free_res;
+               }
+               pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
+                       __func__, priv->qp->qp_num);
+       } else {
+               pr_warn("QP (%d) will stay in state: %d\n",
+                       priv->qp->qp_num, qp_attr.qp_state);
+       }
+
+free_res:
+       kfree(qp_work);
+}
+
 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
                netif_wake_queue(dev);
 
        if (wc->status != IB_WC_SUCCESS &&
-           wc->status != IB_WC_WR_FLUSH_ERR)
+           wc->status != IB_WC_WR_FLUSH_ERR) {
+               struct ipoib_qp_state_validate *qp_work;
                ipoib_warn(priv, "failed send event "
                           "(status=%d, wrid=%d vend_err %x)\n",
                           wc->status, wr_id, wc->vendor_err);
+               qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
+               if (!qp_work) {
+                       ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
+                                  __func__, priv->qp->qp_num);
+                       return;
+               }
+
+               INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
+               qp_work->priv = priv;
+               queue_work(priv->wq, &qp_work->work);
+       }
 }
 
 static int poll_tx(struct ipoib_dev_priv *priv)
@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
        __ipoib_reap_ah(dev);
 
        if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+               queue_delayed_work(priv->wq, &priv->ah_reap_task,
                                   round_jiffies_relative(HZ));
 }
 
+static void ipoib_flush_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       cancel_delayed_work(&priv->ah_reap_task);
+       flush_workqueue(priv->wq);
+       ipoib_reap_ah(&priv->ah_reap_task.work);
+}
+
+static void ipoib_stop_ah(struct net_device *dev)
+{
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+       set_bit(IPOIB_STOP_REAPER, &priv->flags);
+       ipoib_flush_ah(dev);
+}
+
 static void ipoib_ib_tx_timer_func(unsigned long ctx)
 {
        drain_tx_cq((struct net_device *)ctx);
 }
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        int ret;
@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
        }
 
        clear_bit(IPOIB_STOP_REAPER, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+       queue_delayed_work(priv->wq, &priv->ah_reap_task,
                           round_jiffies_relative(HZ));
 
        if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
 dev_stop:
        if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
                napi_enable(&priv->napi);
-       ipoib_ib_dev_stop(dev, flush);
+       ipoib_ib_dev_stop(dev);
        return -1;
 }
 
@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
        return ipoib_mcast_start_thread(dev);
 }
 
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
        clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
        netif_carrier_off(dev);
 
-       ipoib_mcast_stop_thread(dev, flush);
+       ipoib_mcast_stop_thread(dev);
        ipoib_mcast_dev_flush(dev);
 
        ipoib_flush_paths(dev);
@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
        local_bh_enable();
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ib_qp_attr qp_attr;
@@ -877,24 +902,7 @@ timeout:
        if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
                ipoib_warn(priv, "Failed to modify QP to RESET state\n");
 
-       /* Wait for all AHs to be reaped */
-       set_bit(IPOIB_STOP_REAPER, &priv->flags);
-       cancel_delayed_work(&priv->ah_reap_task);
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
-
-       begin = jiffies;
-
-       while (!list_empty(&priv->dead_ahs)) {
-               __ipoib_reap_ah(dev);
-
-               if (time_after(jiffies, begin + HZ)) {
-                       ipoib_warn(priv, "timing out; will leak address handles\n");
-                       break;
-               }
-
-               msleep(1);
-       }
+       ipoib_flush_ah(dev);
 
        ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
 
@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
                    (unsigned long) dev);
 
        if (dev->flags & IFF_UP) {
-               if (ipoib_ib_dev_open(dev, 1)) {
+               if (ipoib_ib_dev_open(dev)) {
                        ipoib_transport_dev_cleanup(dev);
                        return -ENODEV;
                }
@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
        if (level == IPOIB_FLUSH_LIGHT) {
                ipoib_mark_paths_invalid(dev);
                ipoib_mcast_dev_flush(dev);
+               ipoib_flush_ah(dev);
        }
 
        if (level >= IPOIB_FLUSH_NORMAL)
-               ipoib_ib_dev_down(dev, 0);
+               ipoib_ib_dev_down(dev);
 
        if (level == IPOIB_FLUSH_HEAVY) {
                if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-                       ipoib_ib_dev_stop(dev, 0);
-               if (ipoib_ib_dev_open(dev, 0) != 0)
+                       ipoib_ib_dev_stop(dev);
+               if (ipoib_ib_dev_open(dev) != 0)
                        return;
                if (netif_queue_stopped(dev))
                        netif_start_queue(dev);
@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
         */
        ipoib_flush_paths(dev);
 
-       ipoib_mcast_stop_thread(dev, 1);
+       ipoib_mcast_stop_thread(dev);
        ipoib_mcast_dev_flush(dev);
 
+       /*
+        * All of our ah references aren't free until after
+        * ipoib_mcast_dev_flush(), ipoib_flush_paths, and
+        * the neighbor garbage collection is stopped and reaped.
+        * That should all be done now, so make a final ah flush.
+        */
+       ipoib_stop_ah(dev);
+
        ipoib_transport_dev_cleanup(dev);
 }
 
index 657b89b1d291c90c76c57f8d4b8183b87c61aaad..7cad4dd87469a887f1df18a23ef10be2a3aaae71 100644 (file)
@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
 
        set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
-       if (ipoib_ib_dev_open(dev, 1)) {
+       if (ipoib_ib_dev_open(dev)) {
                if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
                        return 0;
                goto err_disable;
@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
        return 0;
 
 err_stop:
-       ipoib_ib_dev_stop(dev, 1);
+       ipoib_ib_dev_stop(dev);
 
 err_disable:
        clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
 
        netif_stop_queue(dev);
 
-       ipoib_ib_dev_down(dev, 1);
-       ipoib_ib_dev_stop(dev, 0);
+       ipoib_ib_dev_down(dev);
+       ipoib_ib_dev_stop(dev);
 
        if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
                struct ipoib_dev_priv *cpriv;
@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
 
                if (!path->query && path_rec_start(dev, path))
                        goto err_path;
-
-               __skb_queue_tail(&neigh->queue, skb);
+               if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
+                       __skb_queue_tail(&neigh->queue, skb);
+               else
+                       goto err_drop;
        }
 
        spin_unlock_irqrestore(&priv->lock, flags);
@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
                        new_path = 1;
                }
                if (path) {
-                       __skb_queue_tail(&path->queue, skb);
+                       if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
+                               __skb_queue_tail(&path->queue, skb);
+                       } else {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                       }
 
                        if (!path->query && path_rec_start(dev, path)) {
                                spin_unlock_irqrestore(&priv->lock, flags);
@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
                return;
        }
 
-       queue_work(ipoib_workqueue, &priv->restart_task);
+       queue_work(priv->wq, &priv->restart_task);
 }
 
 static int ipoib_get_iflink(const struct net_device *dev)
@@ -961,7 +968,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
        __ipoib_reap_neigh(priv);
 
        if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+               queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                                   arp_tbl.gc_interval);
 }
 
@@ -1140,7 +1147,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
 
        /* start garbage collection */
        clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-       queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+       queue_delayed_work(priv->wq, &priv->neigh_reap_task,
                           arp_tbl.gc_interval);
 
        return 0;
@@ -1269,15 +1276,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       if (ipoib_neigh_hash_init(priv) < 0)
-               goto out;
        /* Allocate RX/TX "rings" to hold queued skbs */
        priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
                                GFP_KERNEL);
        if (!priv->rx_ring) {
                printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
                       ca->name, ipoib_recvq_size);
-               goto out_neigh_hash_cleanup;
+               goto out;
        }
 
        priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1292,16 +1297,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
        if (ipoib_ib_dev_init(dev, ca, port))
                goto out_tx_ring_cleanup;
 
+       /*
+        * Must be after ipoib_ib_dev_init so we can allocate a per
+        * device wq there and use it here
+        */
+       if (ipoib_neigh_hash_init(priv) < 0)
+               goto out_dev_uninit;
+
        return 0;
 
+out_dev_uninit:
+       ipoib_ib_dev_cleanup(dev);
+
 out_tx_ring_cleanup:
        vfree(priv->tx_ring);
 
 out_rx_ring_cleanup:
        kfree(priv->rx_ring);
 
-out_neigh_hash_cleanup:
-       ipoib_neigh_hash_uninit(dev);
 out:
        return -ENOMEM;
 }
@@ -1324,6 +1337,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
        }
        unregister_netdevice_many(&head);
 
+       /*
+        * Must be before ipoib_ib_dev_cleanup or we delete an in use
+        * work queue
+        */
+       ipoib_neigh_hash_uninit(dev);
+
        ipoib_ib_dev_cleanup(dev);
 
        kfree(priv->rx_ring);
@@ -1331,8 +1350,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
 
        priv->rx_ring = NULL;
        priv->tx_ring = NULL;
-
-       ipoib_neigh_hash_uninit(dev);
 }
 
 static const struct header_ops ipoib_header_ops = {
@@ -1641,10 +1658,11 @@ sysfs_failed:
 
 register_failed:
        ib_unregister_event_handler(&priv->event_handler);
+       flush_workqueue(ipoib_workqueue);
        /* Stop GC if started before flush */
        set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
        cancel_delayed_work(&priv->neigh_reap_task);
-       flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
 
 event_failed:
        ipoib_dev_cleanup(priv->dev);
@@ -1707,6 +1725,7 @@ static void ipoib_remove_one(struct ib_device *device)
 
        list_for_each_entry_safe(priv, tmp, dev_list, list) {
                ib_unregister_event_handler(&priv->event_handler);
+               flush_workqueue(ipoib_workqueue);
 
                rtnl_lock();
                dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
@@ -1715,7 +1734,7 @@ static void ipoib_remove_one(struct ib_device *device)
                /* Stop GC */
                set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
                cancel_delayed_work(&priv->neigh_reap_task);
-               flush_workqueue(ipoib_workqueue);
+               flush_workqueue(priv->wq);
 
                unregister_netdev(priv->dev);
                free_netdev(priv->dev);
@@ -1750,14 +1769,16 @@ static int __init ipoib_init_module(void)
                return ret;
 
        /*
-        * We create our own workqueue mainly because we want to be
-        * able to flush it when devices are being removed.  We can't
-        * use schedule_work()/flush_scheduled_work() because both
-        * unregister_netdev() and linkwatch_event take the rtnl lock,
-        * so flush_scheduled_work() can deadlock during device
-        * removal.
+        * We create a global workqueue here that is used for all flush
+        * operations.  However, if you attempt to flush a workqueue
+        * from a task on that same workqueue, it deadlocks the system.
+        * We want to be able to flush the tasks associated with a
+        * specific net device, so we also create a workqueue for each
+        * netdevice.  We queue up the tasks for that device only on
+        * its private workqueue, and we only queue up flush events
+        * on our global flush workqueue.  This avoids the deadlocks.
         */
-       ipoib_workqueue = create_singlethread_workqueue("ipoib");
+       ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
        if (!ipoib_workqueue) {
                ret = -ENOMEM;
                goto err_fs;
index ffb83b5f7e805e411f1506d66a53f8465b90c439..0d23e0568deb6fee19247ddbe43fbaabc477edf4 100644 (file)
@@ -55,8 +55,6 @@ MODULE_PARM_DESC(mcast_debug_level,
                 "Enable multicast debug tracing if > 0");
 #endif
 
-static DEFINE_MUTEX(mcast_mutex);
-
 struct ipoib_mcast_iter {
        struct net_device *dev;
        union ib_gid       mgid;
@@ -66,6 +64,48 @@ struct ipoib_mcast_iter {
        unsigned int       send_only;
 };
 
+/*
+ * This should be called with the priv->lock held
+ */
+static void __ipoib_mcast_schedule_join_thread(struct ipoib_dev_priv *priv,
+                                              struct ipoib_mcast *mcast,
+                                              bool delay)
+{
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               return;
+
+       /*
+        * We will be scheduling *something*, so cancel whatever is
+        * currently scheduled first
+        */
+       cancel_delayed_work(&priv->mcast_task);
+       if (mcast && delay) {
+               /*
+                * We had a failure and want to schedule a retry later
+                */
+               mcast->backoff *= 2;
+               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
+               mcast->delay_until = jiffies + (mcast->backoff * HZ);
+               /*
+                * Mark this mcast for its delay, but restart the
+                * task immediately.  The join task will make sure to
+                * clear out all entries without delays, and then
+                * schedule itself to run again when the earliest
+                * delay expires
+                */
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+       } else if (delay) {
+               /*
+                * Special case of retrying after a failure to
+                * allocate the broadcast multicast group, wait
+                * 1 second and try again
+                */
+               queue_delayed_work(priv->wq, &priv->mcast_task, HZ);
+       } else
+               queue_delayed_work(priv->wq, &priv->mcast_task, 0);
+}
+
 static void ipoib_mcast_free(struct ipoib_mcast *mcast)
 {
        struct net_device *dev = mcast->dev;
@@ -103,6 +143,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 
        mcast->dev = dev;
        mcast->created = jiffies;
+       mcast->delay_until = jiffies;
        mcast->backoff = 1;
 
        INIT_LIST_HEAD(&mcast->list);
@@ -185,17 +226,27 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                        spin_unlock_irq(&priv->lock);
                        return -EAGAIN;
                }
-               priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+               /*update priv member according to the new mcast*/
+               priv->broadcast->mcmember.qkey = mcmember->qkey;
+               priv->broadcast->mcmember.mtu = mcmember->mtu;
+               priv->broadcast->mcmember.traffic_class = mcmember->traffic_class;
+               priv->broadcast->mcmember.rate = mcmember->rate;
+               priv->broadcast->mcmember.sl = mcmember->sl;
+               priv->broadcast->mcmember.flow_label = mcmember->flow_label;
+               priv->broadcast->mcmember.hop_limit = mcmember->hop_limit;
+               /* assume if the admin and the mcast are the same both can be changed */
+               if (priv->mcast_mtu == priv->admin_mtu)
+                       priv->admin_mtu =
+                       priv->mcast_mtu =
+                       IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+               else
+                       priv->mcast_mtu =
+                       IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
+
                priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
                spin_unlock_irq(&priv->lock);
                priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
                set_qkey = 1;
-
-               if (!ipoib_cm_admin_enabled(dev)) {
-                       rtnl_lock();
-                       dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-                       rtnl_unlock();
-               }
        }
 
        if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -270,107 +321,35 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
        return 0;
 }
 
-static int
-ipoib_mcast_sendonly_join_complete(int status,
-                                  struct ib_sa_multicast *multicast)
-{
-       struct ipoib_mcast *mcast = multicast->context;
-       struct net_device *dev = mcast->dev;
-
-       /* We trap for port events ourselves. */
-       if (status == -ENETRESET)
-               return 0;
-
-       if (!status)
-               status = ipoib_mcast_join_finish(mcast, &multicast->rec);
-
-       if (status) {
-               if (mcast->logcount++ < 20)
-                       ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
-
-               /* Flush out any queued packets */
-               netif_tx_lock_bh(dev);
-               while (!skb_queue_empty(&mcast->pkt_queue)) {
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
-               }
-               netif_tx_unlock_bh(dev);
-
-               /* Clear the busy flag so we try again */
-               status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
-                                           &mcast->flags);
-       }
-       return status;
-}
-
-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
-{
-       struct net_device *dev = mcast->dev;
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       struct ib_sa_mcmember_rec rec = {
-#if 0                          /* Some SMs don't support send-only yet */
-               .join_state = 4
-#else
-               .join_state = 1
-#endif
-       };
-       int ret = 0;
-
-       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
-               ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
-               return -ENODEV;
-       }
-
-       if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
-               ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
-               return -EBUSY;
-       }
-
-       rec.mgid     = mcast->mcmember.mgid;
-       rec.port_gid = priv->local_gid;
-       rec.pkey     = cpu_to_be16(priv->pkey);
-
-       mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
-                                        priv->port, &rec,
-                                        IB_SA_MCMEMBER_REC_MGID        |
-                                        IB_SA_MCMEMBER_REC_PORT_GID    |
-                                        IB_SA_MCMEMBER_REC_PKEY        |
-                                        IB_SA_MCMEMBER_REC_JOIN_STATE,
-                                        GFP_ATOMIC,
-                                        ipoib_mcast_sendonly_join_complete,
-                                        mcast);
-       if (IS_ERR(mcast->mc)) {
-               ret = PTR_ERR(mcast->mc);
-               clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-               ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
-                          ret);
-       } else {
-               ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
-                               mcast->mcmember.mgid.raw);
-       }
-
-       return ret;
-}
-
 void ipoib_mcast_carrier_on_task(struct work_struct *work)
 {
        struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
                                                   carrier_on_task);
        struct ib_port_attr attr;
 
-       /*
-        * Take rtnl_lock to avoid racing with ipoib_stop() and
-        * turning the carrier back on while a device is being
-        * removed.
-        */
        if (ib_query_port(priv->ca, priv->port, &attr) ||
            attr.state != IB_PORT_ACTIVE) {
                ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
                return;
        }
 
-       rtnl_lock();
+       /*
+        * Take rtnl_lock to avoid racing with ipoib_stop() and
+        * turning the carrier back on while a device is being
+        * removed.  However, ipoib_stop() will attempt to flush
+        * the workqueue while holding the rtnl lock, so loop
+        * on trylock until either we get the lock or we see
+        * FLAG_OPER_UP go away as that signals that we are bailing
+        * and can safely ignore the carrier on work.
+        */
+       while (!rtnl_trylock()) {
+               if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+                       return;
+               else
+                       msleep(20);
+       }
+       if (!ipoib_cm_admin_enabled(priv->dev))
+               dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
        netif_carrier_on(priv->dev);
        rtnl_unlock();
 }
@@ -382,7 +361,9 @@ static int ipoib_mcast_join_complete(int status,
        struct net_device *dev = mcast->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-       ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
+       ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
+                       test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
+                       "sendonly " : "",
                        mcast->mcmember.mgid.raw, status);
 
        /* We trap for port events ourselves. */
@@ -396,49 +377,74 @@ static int ipoib_mcast_join_complete(int status,
 
        if (!status) {
                mcast->backoff = 1;
-               mutex_lock(&mcast_mutex);
-               if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task, 0);
-               mutex_unlock(&mcast_mutex);
+               mcast->delay_until = jiffies;
 
                /*
-                * Defer carrier on work to ipoib_workqueue to avoid a
-                * deadlock on rtnl_lock here.
+                * Defer carrier on work to priv->wq to avoid a
+                * deadlock on rtnl_lock here.  Requeue our multicast
+                * work too, which will end up happening right after
+                * our carrier on task work and will allow us to
+                * send out all of the non-broadcast joins
                 */
-               if (mcast == priv->broadcast)
-                       queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
-               status = 0;
-               goto out;
-       }
+               if (mcast == priv->broadcast) {
+                       spin_lock_irq(&priv->lock);
+                       queue_work(priv->wq, &priv->carrier_on_task);
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+                       goto out_locked;
+               }
+       } else {
+               if (mcast->logcount++ < 20) {
+                       if (status == -ETIMEDOUT || status == -EAGAIN) {
+                               ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
+                                               test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+                                               mcast->mcmember.mgid.raw, status);
+                       } else {
+                               ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
+                                               test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
+                                          mcast->mcmember.mgid.raw, status);
+                       }
+               }
 
-       if (mcast->logcount++ < 20) {
-               if (status == -ETIMEDOUT || status == -EAGAIN) {
-                       ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
-                                       mcast->mcmember.mgid.raw, status);
+               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
+                   mcast->backoff >= 2) {
+                       /*
+                        * We only retry sendonly joins once before we drop
+                        * the packet and quit trying to deal with the
+                        * group.  However, we leave the group in the
+                        * mcast list as an unjoined group.  If we want to
+                        * try joining again, we simply queue up a packet
+                        * and restart the join thread.  The empty queue
+                        * is why the join thread ignores this group.
+                        */
+                       mcast->backoff = 1;
+                       netif_tx_lock_bh(dev);
+                       while (!skb_queue_empty(&mcast->pkt_queue)) {
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
+                       }
+                       netif_tx_unlock_bh(dev);
                } else {
-                       ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
-                                  mcast->mcmember.mgid.raw, status);
+                       spin_lock_irq(&priv->lock);
+                       /* Requeue this join task with a backoff delay */
+                       __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
+                       goto out_locked;
                }
        }
-
-       mcast->backoff *= 2;
-       if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-               mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-       /* Clear the busy flag so we try again */
-       status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
-       mutex_lock(&mcast_mutex);
+out:
        spin_lock_irq(&priv->lock);
-       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
-                                  mcast->backoff * HZ);
+out_locked:
+       /*
+        * Make sure to set mcast->mc before we clear the busy flag to avoid
+        * racing with code that checks for BUSY before checking mcast->mc
+        */
+       if (status)
+               mcast->mc = NULL;
+       else
+               mcast->mc = multicast;
+       clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
        spin_unlock_irq(&priv->lock);
-       mutex_unlock(&mcast_mutex);
-out:
        complete(&mcast->done);
+
        return status;
 }
 
@@ -446,6 +452,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                             int create)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ib_sa_multicast *multicast;
        struct ib_sa_mcmember_rec rec = {
                .join_state = 1
        };
@@ -487,29 +494,18 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
        }
 
-       set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-       init_completion(&mcast->done);
-       set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
-       mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
+       multicast = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
                                         &rec, comp_mask, GFP_KERNEL,
                                         ipoib_mcast_join_complete, mcast);
-       if (IS_ERR(mcast->mc)) {
+       if (IS_ERR(multicast)) {
+               ret = PTR_ERR(multicast);
+               ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
+               spin_lock_irq(&priv->lock);
+               /* Requeue this join task with a backoff delay */
+               __ipoib_mcast_schedule_join_thread(priv, mcast, 1);
                clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+               spin_unlock_irq(&priv->lock);
                complete(&mcast->done);
-               ret = PTR_ERR(mcast->mc);
-               ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
-
-               mcast->backoff *= 2;
-               if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-                       mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-               mutex_lock(&mcast_mutex);
-               if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                       queue_delayed_work(ipoib_workqueue,
-                                          &priv->mcast_task,
-                                          mcast->backoff * HZ);
-               mutex_unlock(&mcast_mutex);
        }
 }
 
@@ -519,8 +515,11 @@ void ipoib_mcast_join_task(struct work_struct *work)
                container_of(work, struct ipoib_dev_priv, mcast_task.work);
        struct net_device *dev = priv->dev;
        struct ib_port_attr port_attr;
+       unsigned long delay_until = 0;
+       struct ipoib_mcast *mcast = NULL;
+       int create = 1;
 
-       if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
                return;
 
        if (ib_query_port(priv->ca, priv->port, &port_attr) ||
@@ -536,93 +535,118 @@ void ipoib_mcast_join_task(struct work_struct *work)
        else
                memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
+       spin_lock_irq(&priv->lock);
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               goto out;
+
        if (!priv->broadcast) {
                struct ipoib_mcast *broadcast;
 
-               if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-                       return;
-
-               broadcast = ipoib_mcast_alloc(dev, 1);
+               broadcast = ipoib_mcast_alloc(dev, 0);
                if (!broadcast) {
                        ipoib_warn(priv, "failed to allocate broadcast group\n");
-                       mutex_lock(&mcast_mutex);
-                       if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-                               queue_delayed_work(ipoib_workqueue,
-                                                  &priv->mcast_task, HZ);
-                       mutex_unlock(&mcast_mutex);
-                       return;
+                       /*
+                        * Restart us after a 1 second delay to retry
+                        * creating our broadcast group and attaching to
+                        * it.  Until this succeeds, this ipoib dev is
+                        * completely stalled (multicast wise).
+                        */
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 1);
+                       goto out;
                }
 
-               spin_lock_irq(&priv->lock);
                memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
                       sizeof (union ib_gid));
                priv->broadcast = broadcast;
 
                __ipoib_mcast_add(dev, priv->broadcast);
-               spin_unlock_irq(&priv->lock);
        }
 
        if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
-                       ipoib_mcast_join(dev, priv->broadcast, 0);
-               return;
-       }
-
-       while (1) {
-               struct ipoib_mcast *mcast = NULL;
-
-               spin_lock_irq(&priv->lock);
-               list_for_each_entry(mcast, &priv->multicast_list, list) {
-                       if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
-                           && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
-                               /* Found the next unjoined group */
-                               break;
+               if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags)) {
+                       mcast = priv->broadcast;
+                       create = 0;
+                       if (mcast->backoff > 1 &&
+                           time_before(jiffies, mcast->delay_until)) {
+                               delay_until = mcast->delay_until;
+                               mcast = NULL;
                        }
                }
-               spin_unlock_irq(&priv->lock);
+               goto out;
+       }
 
-               if (&mcast->list == &priv->multicast_list) {
-                       /* All done */
-                       break;
+       /*
+        * We'll never get here until the broadcast group is both allocated
+        * and attached
+        */
+       list_for_each_entry(mcast, &priv->multicast_list, list) {
+               if (IS_ERR_OR_NULL(mcast->mc) &&
+                   !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+                   (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
+                    !skb_queue_empty(&mcast->pkt_queue))) {
+                       if (mcast->backoff == 1 ||
+                           time_after_eq(jiffies, mcast->delay_until)) {
+                               /* Found the next unjoined group */
+                               init_completion(&mcast->done);
+                               set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+                               if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+                                       create = 0;
+                               else
+                                       create = 1;
+                               spin_unlock_irq(&priv->lock);
+                               ipoib_mcast_join(dev, mcast, create);
+                               spin_lock_irq(&priv->lock);
+                       } else if (!delay_until ||
+                                time_before(mcast->delay_until, delay_until))
+                               delay_until = mcast->delay_until;
                }
-
-               ipoib_mcast_join(dev, mcast, 1);
-               return;
        }
 
-       ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
+       mcast = NULL;
+       ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
 
-       clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+out:
+       if (delay_until) {
+               cancel_delayed_work(&priv->mcast_task);
+               queue_delayed_work(priv->wq, &priv->mcast_task,
+                                  delay_until - jiffies);
+       }
+       if (mcast) {
+               init_completion(&mcast->done);
+               set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+       }
+       spin_unlock_irq(&priv->lock);
+       if (mcast)
+               ipoib_mcast_join(dev, mcast, create);
 }
 
 int ipoib_mcast_start_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags;
 
        ipoib_dbg_mcast(priv, "starting multicast thread\n");
 
-       mutex_lock(&mcast_mutex);
-       if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-               queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
-       mutex_unlock(&mcast_mutex);
+       spin_lock_irqsave(&priv->lock, flags);
+       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+       spin_unlock_irqrestore(&priv->lock, flags);
 
        return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       unsigned long flags;
 
        ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-       mutex_lock(&mcast_mutex);
-       clear_bit(IPOIB_MCAST_RUN, &priv->flags);
+       spin_lock_irqsave(&priv->lock, flags);
        cancel_delayed_work(&priv->mcast_task);
-       mutex_unlock(&mcast_mutex);
+       spin_unlock_irqrestore(&priv->lock, flags);
 
-       if (flush)
-               flush_workqueue(ipoib_workqueue);
+       flush_workqueue(priv->wq);
 
        return 0;
 }
@@ -633,6 +657,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
        int ret = 0;
 
        if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+               ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+       if (!IS_ERR_OR_NULL(mcast->mc))
                ib_sa_free_multicast(mcast->mc);
 
        if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -644,7 +671,9 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
                                      be16_to_cpu(mcast->mcmember.mlid));
                if (ret)
                        ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
-       }
+       } else if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+               ipoib_dbg(priv, "leaving with no mcmember but not a "
+                         "SENDONLY join\n");
 
        return 0;
 }
@@ -667,49 +696,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
        }
 
        mcast = __ipoib_mcast_find(dev, mgid);
-       if (!mcast) {
-               /* Let's create a new send only group now */
-               ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
-                               mgid);
-
-               mcast = ipoib_mcast_alloc(dev, 0);
+       if (!mcast || !mcast->ah) {
                if (!mcast) {
-                       ipoib_warn(priv, "unable to allocate memory for "
-                                  "multicast structure\n");
-                       ++dev->stats.tx_dropped;
-                       dev_kfree_skb_any(skb);
-                       goto out;
-               }
-
-               set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
-               memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
-               __ipoib_mcast_add(dev, mcast);
-               list_add_tail(&mcast->list, &priv->multicast_list);
-       }
+                       /* Let's create a new send only group now */
+                       ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
+                                       mgid);
+
+                       mcast = ipoib_mcast_alloc(dev, 0);
+                       if (!mcast) {
+                               ipoib_warn(priv, "unable to allocate memory "
+                                          "for multicast structure\n");
+                               ++dev->stats.tx_dropped;
+                               dev_kfree_skb_any(skb);
+                               goto unlock;
+                       }
 
-       if (!mcast->ah) {
+                       set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
+                       memcpy(mcast->mcmember.mgid.raw, mgid,
+                              sizeof (union ib_gid));
+                       __ipoib_mcast_add(dev, mcast);
+                       list_add_tail(&mcast->list, &priv->multicast_list);
+               }
                if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
                        skb_queue_tail(&mcast->pkt_queue, skb);
                else {
                        ++dev->stats.tx_dropped;
                        dev_kfree_skb_any(skb);
                }
-
-               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
-                       ipoib_dbg_mcast(priv, "no address vector, "
-                                       "but multicast join already started\n");
-               else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-                       ipoib_mcast_sendonly_join(mcast);
-
-               /*
-                * If lookup completes between here and out:, don't
-                * want to send packet twice.
-                */
-               mcast = NULL;
-       }
-
-out:
-       if (mcast && mcast->ah) {
+               if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+                       __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+               }
+       } else {
                struct ipoib_neigh *neigh;
 
                spin_unlock_irqrestore(&priv->lock, flags);
@@ -759,9 +776,12 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* seperate between the wait to the leave*/
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-               if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
                        wait_for_completion(&mcast->done);
 
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -792,9 +812,14 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        unsigned long flags;
        struct ib_sa_mcmember_rec rec;
 
-       ipoib_dbg_mcast(priv, "restarting multicast task\n");
+       if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
+               /*
+                * shortcut...on shutdown flush is called next, just
+                * let it do all the work
+                */
+               return;
 
-       ipoib_mcast_stop_thread(dev, 0);
+       ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
        local_irq_save(flags);
        netif_addr_lock(dev);
@@ -880,14 +905,27 @@ void ipoib_mcast_restart_task(struct work_struct *work)
        netif_addr_unlock(dev);
        local_irq_restore(flags);
 
-       /* We have to cancel outside of the spinlock */
+       /*
+        * make sure the in-flight joins have finished before we attempt
+        * to leave
+        */
+       list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+               if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+                       wait_for_completion(&mcast->done);
+
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
                ipoib_mcast_leave(mcast->dev, mcast);
                ipoib_mcast_free(mcast);
        }
 
-       if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-               ipoib_mcast_start_thread(dev);
+       /*
+        * Double check that we are still up
+        */
+       if (test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
+               spin_lock_irqsave(&priv->lock, flags);
+               __ipoib_mcast_schedule_join_thread(priv, NULL, 0);
+               spin_unlock_irqrestore(&priv->lock, flags);
+       }
 }
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
index c56d5d44c53b3f11725b6d6da220ea2c440fe496..e5cc43074196dbab1ae216cb43135f7e5f081c66 100644 (file)
@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                goto out_free_pd;
        }
 
+       /*
+        * the various IPoIB tasks assume they will never race against
+        * themselves, so always use a single thread workqueue
+        */
+       priv->wq = create_singlethread_workqueue("ipoib_wq");
+       if (!priv->wq) {
+               printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+               goto out_free_mr;
+       }
+
        size = ipoib_recvq_size + 1;
        ret = ipoib_cm_dev_init(dev);
        if (!ret) {
@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
                        size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
                else
                        size += ipoib_recvq_size * ipoib_max_conn_qp;
-       }
+       } else
+               goto out_free_wq;
 
        priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
        if (IS_ERR(priv->recv_cq)) {
                printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
-               goto out_free_mr;
+               goto out_cm_dev_cleanup;
        }
 
        priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
        priv->tx_wr.send_flags  = IB_SEND_SIGNALED;
 
        priv->rx_sge[0].lkey = priv->mr->lkey;
-       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
-               priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
-               priv->rx_sge[1].length = PAGE_SIZE;
-               priv->rx_sge[1].lkey = priv->mr->lkey;
-               priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
-       } else {
-               priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
-               priv->rx_wr.num_sge = 1;
-       }
+
+       priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
+       priv->rx_wr.num_sge = 1;
+
        priv->rx_wr.next = NULL;
        priv->rx_wr.sg_list = priv->rx_sge;
 
@@ -236,12 +242,19 @@ out_free_send_cq:
 out_free_recv_cq:
        ib_destroy_cq(priv->recv_cq);
 
+out_cm_dev_cleanup:
+       ipoib_cm_dev_cleanup(dev);
+
+out_free_wq:
+       destroy_workqueue(priv->wq);
+       priv->wq = NULL;
+
 out_free_mr:
        ib_dereg_mr(priv->mr);
-       ipoib_cm_dev_cleanup(dev);
 
 out_free_pd:
        ib_dealloc_pd(priv->pd);
+
        return -ENODEV;
 }
 
@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
 
        ipoib_cm_dev_cleanup(dev);
 
+       if (priv->wq) {
+               flush_workqueue(priv->wq);
+               destroy_workqueue(priv->wq);
+               priv->wq = NULL;
+       }
+
        if (ib_dereg_mr(priv->mr))
                ipoib_warn(priv, "ib_dereg_mr failed\n");
 
        if (ib_dealloc_pd(priv->pd))
                ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
 }
 
 void ipoib_event(struct ib_event_handler *handler,
index 0747c0595a9d42b2ff8c9cb231b38be939821ddb..918814cd0f806f5344e5f293e2bb059010237727 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/parser.h>
 #include <linux/random.h>
 #include <linux/jiffies.h>
+#include <rdma/ib_cache.h>
 
 #include <linux/atomic.h>
 
@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
        if (!attr)
                return -ENOMEM;
 
-       ret = ib_find_pkey(target->srp_host->srp_dev->dev,
-                          target->srp_host->port,
-                          be16_to_cpu(target->pkey),
-                          &attr->pkey_index);
+       ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
+                                 target->srp_host->port,
+                                 be16_to_cpu(target->pkey),
+                                 &attr->pkey_index);
        if (ret)
                goto out;
 
index 6e0a477681e90b0efe53330de1b9118000bcc85a..4b9b866e6b0d69ba783c6a9e97ef8d423fa0bc33 100644 (file)
@@ -207,7 +207,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
                }
                break;
        default:
-               printk(KERN_ERR "received unrecognized IB event %d\n",
+               pr_err("received unrecognized IB event %d\n",
                       event->event);
                break;
        }
@@ -218,7 +218,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
  */
 static void srpt_srq_event(struct ib_event *event, void *ctx)
 {
-       printk(KERN_INFO "SRQ event %d\n", event->event);
+       pr_info("SRQ event %d\n", event->event);
 }
 
 /**
@@ -242,8 +242,7 @@ static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
                                 ch->sess_name, srpt_get_ch_state(ch));
                break;
        default:
-               printk(KERN_ERR "received unrecognized IB QP event %d\n",
-                      event->event);
+               pr_err("received unrecognized IB QP event %d\n", event->event);
                break;
        }
 }
@@ -602,7 +601,7 @@ static void srpt_unregister_mad_agent(struct srpt_device *sdev)
                sport = &sdev->port[i - 1];
                WARN_ON(sport->port != i);
                if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
-                       printk(KERN_ERR "disabling MAD processing failed.\n");
+                       pr_err("disabling MAD processing failed.\n");
                if (sport->mad_agent) {
                        ib_unregister_mad_agent(sport->mad_agent);
                        sport->mad_agent = NULL;
@@ -810,7 +809,7 @@ static int srpt_post_send(struct srpt_rdma_ch *ch,
 
        ret = -ENOMEM;
        if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
-               printk(KERN_WARNING "IB send queue full (needed 1)\n");
+               pr_warn("IB send queue full (needed 1)\n");
                goto out;
        }
 
@@ -912,7 +911,7 @@ static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
 
                if (ioctx->n_rbuf >
                    (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
-                       printk(KERN_ERR "received unsupported SRP_CMD request"
+                       pr_err("received unsupported SRP_CMD request"
                               " type (%u out + %u in != %u / %zu)\n",
                               srp_cmd->data_out_desc_cnt,
                               srp_cmd->data_in_desc_cnt,
@@ -1432,7 +1431,7 @@ static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
                srpt_unmap_sg_to_ib_sge(ch, ioctx);
                transport_generic_free_cmd(&ioctx->cmd, 0);
        } else {
-               printk(KERN_ERR "IB completion has been received too late for"
+               pr_err("IB completion has been received too late for"
                       " wr_id = %u.\n", ioctx->ioctx.index);
        }
 }
@@ -1457,7 +1456,7 @@ static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
                                                SRPT_STATE_DATA_IN))
                        target_execute_cmd(&ioctx->cmd);
                else
-                       printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
+                       pr_err("%s[%d]: wrong state = %d\n", __func__,
                               __LINE__, srpt_get_cmd_state(ioctx));
        } else if (opcode == SRPT_RDMA_ABORT) {
                ioctx->rdma_aborted = true;
@@ -1481,7 +1480,7 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
        switch (opcode) {
        case SRPT_RDMA_READ_LAST:
                if (ioctx->n_rdma <= 0) {
-                       printk(KERN_ERR "Received invalid RDMA read"
+                       pr_err("Received invalid RDMA read"
                               " error completion with idx %d\n",
                               ioctx->ioctx.index);
                        break;
@@ -1490,14 +1489,13 @@ static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
                if (state == SRPT_STATE_NEED_DATA)
                        srpt_abort_cmd(ioctx);
                else
-                       printk(KERN_ERR "%s[%d]: wrong state = %d\n",
+                       pr_err("%s[%d]: wrong state = %d\n",
                               __func__, __LINE__, state);
                break;
        case SRPT_RDMA_WRITE_LAST:
                break;
        default:
-               printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
-                      __LINE__, opcode);
+               pr_err("%s[%d]: opcode = %u\n", __func__, __LINE__, opcode);
                break;
        }
 }
@@ -1549,8 +1547,8 @@ static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
                BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
                max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
                if (sense_data_len > max_sense_len) {
-                       printk(KERN_WARNING "truncated sense data from %d to %d"
-                              " bytes\n", sense_data_len, max_sense_len);
+                       pr_warn("truncated sense data from %d to %d"
+                               " bytes\n", sense_data_len, max_sense_len);
                        sense_data_len = max_sense_len;
                }
 
@@ -1628,8 +1626,8 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
        int addressing_method;
 
        if (unlikely(len < 2)) {
-               printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or "
-                      "more", len);
+               pr_err("Illegal LUN length %d, expected 2 bytes or more\n",
+                      len);
                goto out;
        }
 
@@ -1663,7 +1661,7 @@ static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
 
        case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
        default:
-               printk(KERN_ERR "Unimplemented LUN addressing method %u",
+               pr_err("Unimplemented LUN addressing method %u\n",
                       addressing_method);
                break;
        }
@@ -1672,8 +1670,7 @@ out:
        return res;
 
 out_err:
-       printk(KERN_ERR "Support for multi-level LUNs has not yet been"
-              " implemented");
+       pr_err("Support for multi-level LUNs has not yet been implemented\n");
        goto out;
 }
 
@@ -1723,7 +1720,7 @@ static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
        }
 
        if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
-               printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n",
+               pr_err("0x%llx: parsing SRP descriptor table failed.\n",
                       srp_cmd->tag);
                ret = TCM_INVALID_CDB_FIELD;
                goto send_sense;
@@ -1912,7 +1909,7 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
                break;
        case SRP_I_LOGOUT:
-               printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n");
+               pr_err("Not yet implemented: SRP_I_LOGOUT\n");
                break;
        case SRP_CRED_RSP:
                pr_debug("received SRP_CRED_RSP\n");
@@ -1921,10 +1918,10 @@ static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
                pr_debug("received SRP_AER_RSP\n");
                break;
        case SRP_RSP:
-               printk(KERN_ERR "Received SRP_RSP\n");
+               pr_err("Received SRP_RSP\n");
                break;
        default:
-               printk(KERN_ERR "received IU with unknown opcode 0x%x\n",
+               pr_err("received IU with unknown opcode 0x%x\n",
                       srp_cmd->opcode);
                break;
        }
@@ -1948,12 +1945,12 @@ static void srpt_process_rcv_completion(struct ib_cq *cq,
 
                req_lim = atomic_dec_return(&ch->req_lim);
                if (unlikely(req_lim < 0))
-                       printk(KERN_ERR "req_lim = %d < 0\n", req_lim);
+                       pr_err("req_lim = %d < 0\n", req_lim);
                ioctx = sdev->ioctx_ring[index];
                srpt_handle_new_iu(ch, ioctx, NULL);
        } else {
-               printk(KERN_INFO "receiving failed for idx %u with status %d\n",
-                      index, wc->status);
+               pr_info("receiving failed for idx %u with status %d\n",
+                       index, wc->status);
        }
 }
 
@@ -1993,12 +1990,12 @@ static void srpt_process_send_completion(struct ib_cq *cq,
                }
        } else {
                if (opcode == SRPT_SEND) {
-                       printk(KERN_INFO "sending response for idx %u failed"
-                              " with status %d\n", index, wc->status);
+                       pr_info("sending response for idx %u failed"
+                               " with status %d\n", index, wc->status);
                        srpt_handle_send_err_comp(ch, wc->wr_id);
                } else if (opcode != SRPT_RDMA_MID) {
-                       printk(KERN_INFO "RDMA t %d for idx %u failed with"
-                               " status %d", opcode, index, wc->status);
+                       pr_info("RDMA t %d for idx %u failed with"
+                               " status %d\n", opcode, index, wc->status);
                        srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
                }
        }
@@ -2062,15 +2059,15 @@ static int srpt_compl_thread(void *arg)
 
        ch = arg;
        BUG_ON(!ch);
-       printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n",
-              ch->sess_name, ch->thread->comm, current->pid);
+       pr_info("Session %s: kernel thread %s (PID %d) started\n",
+               ch->sess_name, ch->thread->comm, current->pid);
        while (!kthread_should_stop()) {
                wait_event_interruptible(ch->wait_queue,
                        (srpt_process_completion(ch->cq, ch),
                         kthread_should_stop()));
        }
-       printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n",
-              ch->sess_name, ch->thread->comm, current->pid);
+       pr_info("Session %s: kernel thread %s (PID %d) stopped\n",
+               ch->sess_name, ch->thread->comm, current->pid);
        return 0;
 }
 
@@ -2097,7 +2094,7 @@ retry:
                              ch->rq_size + srp_sq_size, 0);
        if (IS_ERR(ch->cq)) {
                ret = PTR_ERR(ch->cq);
-               printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n",
+               pr_err("failed to create CQ cqe= %d ret= %d\n",
                       ch->rq_size + srp_sq_size, ret);
                goto out;
        }
@@ -2123,7 +2120,7 @@ retry:
                                goto retry;
                        }
                }
-               printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
+               pr_err("failed to create_qp ret= %d\n", ret);
                goto err_destroy_cq;
        }
 
@@ -2143,7 +2140,7 @@ retry:
 
        ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
        if (IS_ERR(ch->thread)) {
-               printk(KERN_ERR "failed to create kernel thread %ld\n",
+               pr_err("failed to create kernel thread %ld\n",
                       PTR_ERR(ch->thread));
                ch->thread = NULL;
                goto err_destroy_qp;
@@ -2204,7 +2201,7 @@ static void __srpt_close_ch(struct srpt_rdma_ch *ch)
                /* fall through */
        case CH_LIVE:
                if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
-                       printk(KERN_ERR "sending CM DREQ failed.\n");
+                       pr_err("sending CM DREQ failed.\n");
                break;
        case CH_DISCONNECTING:
                break;
@@ -2291,7 +2288,7 @@ static void srpt_drain_channel(struct ib_cm_id *cm_id)
 
                ret = srpt_ch_qp_err(ch);
                if (ret < 0)
-                       printk(KERN_ERR "Setting queue pair in error state"
+                       pr_err("Setting queue pair in error state"
                               " failed: %d\n", ret);
        }
 }
@@ -2435,17 +2432,17 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 
        it_iu_len = be32_to_cpu(req->req_it_iu_len);
 
-       printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
-              " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
-              " (guid=0x%llx:0x%llx)\n",
-              be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
-              be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
-              be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
-              be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
-              it_iu_len,
-              param->port,
-              be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
-              be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
+       pr_info("Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
+               " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
+               " (guid=0x%llx:0x%llx)\n",
+               be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
+               be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
+               be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
+               be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
+               it_iu_len,
+               param->port,
+               be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
+               be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
 
        rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
        rej = kzalloc(sizeof *rej, GFP_KERNEL);
@@ -2460,7 +2457,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
                ret = -EINVAL;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because its"
+               pr_err("rejected SRP_LOGIN_REQ because its"
                       " length (%d bytes) is out of range (%d .. %d)\n",
                       it_iu_len, 64, srp_max_req_size);
                goto reject;
@@ -2470,7 +2467,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                rej->reason = __constant_cpu_to_be32(
                             SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
                ret = -EINVAL;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port"
+               pr_err("rejected SRP_LOGIN_REQ because the target port"
                       " has not yet been enabled\n");
                goto reject;
        }
@@ -2516,7 +2513,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
                ret = -ENOMEM;
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because it"
+               pr_err("rejected SRP_LOGIN_REQ because it"
                       " has an invalid target port identifier.\n");
                goto reject;
        }
@@ -2525,7 +2522,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
        if (!ch) {
                rej->reason = __constant_cpu_to_be32(
                                        SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n");
+               pr_err("rejected SRP_LOGIN_REQ because no memory.\n");
                ret = -ENOMEM;
                goto reject;
        }
@@ -2562,7 +2559,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
        if (ret) {
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating"
+               pr_err("rejected SRP_LOGIN_REQ because creating"
                       " a new RDMA channel failed.\n");
                goto free_ring;
        }
@@ -2571,7 +2568,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
        if (ret) {
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
-               printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling"
+               pr_err("rejected SRP_LOGIN_REQ because enabling"
                       " RTR failed (error code = %d)\n", ret);
                goto destroy_ib;
        }
@@ -2586,8 +2583,8 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 
        nacl = srpt_lookup_acl(sport, ch->i_port_id);
        if (!nacl) {
-               printk(KERN_INFO "Rejected login because no ACL has been"
-                      " configured yet for initiator %s.\n", ch->sess_name);
+               pr_info("Rejected login because no ACL has been"
+                       " configured yet for initiator %s.\n", ch->sess_name);
                rej->reason = __constant_cpu_to_be32(
                                SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
                goto destroy_ib;
@@ -2631,7 +2628,7 @@ static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
 
        ret = ib_send_cm_rep(cm_id, rep_param);
        if (ret) {
-               printk(KERN_ERR "sending SRP_LOGIN_REQ response failed"
+               pr_err("sending SRP_LOGIN_REQ response failed"
                       " (error code = %d)\n", ret);
                goto release_channel;
        }
@@ -2679,7 +2676,7 @@ out:
 
 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id);
+       pr_info("Received IB REJ for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
@@ -2714,13 +2711,13 @@ static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
 
 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id);
+       pr_info("Received IB TimeWait exit for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id);
+       pr_info("Received IB REP error for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
@@ -2755,9 +2752,9 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
 
        if (send_drep) {
                if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
-                       printk(KERN_ERR "Sending IB DREP failed.\n");
-               printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n",
-                      ch->sess_name);
+                       pr_err("Sending IB DREP failed.\n");
+               pr_info("Received DREQ and sent DREP for session %s.\n",
+                       ch->sess_name);
        }
 }
 
@@ -2766,8 +2763,7 @@ static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
  */
 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
 {
-       printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n",
-              cm_id);
+       pr_info("Received InfiniBand DREP message for cm_id %p.\n", cm_id);
        srpt_drain_channel(cm_id);
 }
 
@@ -2811,14 +2807,13 @@ static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
                srpt_cm_rep_error(cm_id);
                break;
        case IB_CM_DREQ_ERROR:
-               printk(KERN_INFO "Received IB DREQ ERROR event.\n");
+               pr_info("Received IB DREQ ERROR event.\n");
                break;
        case IB_CM_MRA_RECEIVED:
-               printk(KERN_INFO "Received IB MRA event\n");
+               pr_info("Received IB MRA event\n");
                break;
        default:
-               printk(KERN_ERR "received unrecognized IB CM event %d\n",
-                      event->event);
+               pr_err("received unrecognized IB CM event %d\n", event->event);
                break;
        }
 
@@ -2848,8 +2843,8 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                ret = -ENOMEM;
                sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
                if (sq_wr_avail < 0) {
-                       printk(KERN_WARNING "IB send queue full (needed %d)\n",
-                              n_rdma);
+                       pr_warn("IB send queue full (needed %d)\n",
+                               n_rdma);
                        goto out;
                }
        }
@@ -2889,7 +2884,7 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
        }
 
        if (ret)
-               printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
+               pr_err("%s[%d]: ib_post_send() returned %d for %d/%d\n",
                                 __func__, __LINE__, ret, i, n_rdma);
        if (ret && i > 0) {
                wr.num_sge = 0;
@@ -2897,12 +2892,12 @@ static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
                wr.send_flags = IB_SEND_SIGNALED;
                while (ch->state == CH_LIVE &&
                        ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
-                       printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
+                       pr_info("Trying to abort failed RDMA transfer [%d]\n",
                                ioctx->ioctx.index);
                        msleep(1000);
                }
                while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
-                       printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
+                       pr_info("Waiting until RDMA abort finished [%d]\n",
                                ioctx->ioctx.index);
                        msleep(1000);
                }
@@ -2923,17 +2918,17 @@ static int srpt_xfer_data(struct srpt_rdma_ch *ch,
 
        ret = srpt_map_sg_to_ib_sge(ch, ioctx);
        if (ret) {
-               printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret);
+               pr_err("%s[%d] ret=%d\n", __func__, __LINE__, ret);
                goto out;
        }
 
        ret = srpt_perform_rdmas(ch, ioctx);
        if (ret) {
                if (ret == -EAGAIN || ret == -ENOMEM)
-                       printk(KERN_INFO "%s[%d] queue full -- ret=%d\n",
-                                  __func__, __LINE__, ret);
+                       pr_info("%s[%d] queue full -- ret=%d\n",
+                               __func__, __LINE__, ret);
                else
-                       printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n",
+                       pr_err("%s[%d] fatal error -- ret=%d\n",
                               __func__, __LINE__, ret);
                goto out_unmap;
        }
@@ -3058,7 +3053,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
            !ioctx->queue_status_only) {
                ret = srpt_xfer_data(ch, ioctx);
                if (ret) {
-                       printk(KERN_ERR "xfer_data failed for tag %llu\n",
+                       pr_err("xfer_data failed for tag %llu\n",
                               ioctx->tag);
                        return;
                }
@@ -3075,7 +3070,7 @@ static void srpt_queue_response(struct se_cmd *cmd)
        }
        ret = srpt_post_send(ch, ioctx, resp_len);
        if (ret) {
-               printk(KERN_ERR "sending cmd response failed for tag %llu\n",
+               pr_err("sending cmd response failed for tag %llu\n",
                       ioctx->tag);
                srpt_unmap_sg_to_ib_sge(ch, ioctx);
                srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
@@ -3154,7 +3149,7 @@ static int srpt_release_sdev(struct srpt_device *sdev)
        res = wait_event_interruptible(sdev->ch_releaseQ,
                                       srpt_ch_list_empty(sdev));
        if (res)
-               printk(KERN_ERR "%s: interrupted.\n", __func__);
+               pr_err("%s: interrupted.\n", __func__);
 
        return 0;
 }
@@ -3293,7 +3288,7 @@ static void srpt_add_one(struct ib_device *device)
                spin_lock_init(&sport->port_acl_lock);
 
                if (srpt_refresh_port(sport)) {
-                       printk(KERN_ERR "MAD registration failed for %s-%d.\n",
+                       pr_err("MAD registration failed for %s-%d.\n",
                               srpt_sdev_name(sdev), i);
                        goto err_ring;
                }
@@ -3330,7 +3325,7 @@ free_dev:
        kfree(sdev);
 err:
        sdev = NULL;
-       printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name);
+       pr_info("%s(%s) failed.\n", __func__, device->name);
        goto out;
 }
 
@@ -3344,8 +3339,7 @@ static void srpt_remove_one(struct ib_device *device)
 
        sdev = ib_get_client_data(device, &srpt_client);
        if (!sdev) {
-               printk(KERN_INFO "%s(%s): nothing to do.\n", __func__,
-                      device->name);
+               pr_info("%s(%s): nothing to do.\n", __func__, device->name);
                return;
        }
 
@@ -3464,7 +3458,7 @@ static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
 
        nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
        if (!nacl) {
-               printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n");
+               pr_err("Unable to allocate struct srpt_node_acl\n");
                return NULL;
        }
 
@@ -3615,7 +3609,7 @@ static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
        u8 i_port_id[16];
 
        if (srpt_parse_i_port_id(i_port_id, name) < 0) {
-               printk(KERN_ERR "invalid initiator port ID %s\n", name);
+               pr_err("invalid initiator port ID %s\n", name);
                ret = -EINVAL;
                goto err;
        }
@@ -3816,12 +3810,12 @@ static ssize_t srpt_tpg_store_enable(
 
        ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
-               printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
+               pr_err("Unable to extract srpt_tpg_store_enable\n");
                return -EINVAL;
        }
 
        if ((tmp != 0) && (tmp != 1)) {
-               printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
+               pr_err("Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
                return -EINVAL;
        }
        if (tmp == 1)
@@ -3980,7 +3974,7 @@ static int __init srpt_init_module(void)
 
        ret = -EINVAL;
        if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
-               printk(KERN_ERR "invalid value %d for kernel module parameter"
+               pr_err("invalid value %d for kernel module parameter"
                       " srp_max_req_size -- must be at least %d.\n",
                       srp_max_req_size, MIN_MAX_REQ_SIZE);
                goto out;
@@ -3988,7 +3982,7 @@ static int __init srpt_init_module(void)
 
        if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
            || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
-               printk(KERN_ERR "invalid value %d for kernel module parameter"
+               pr_err("invalid value %d for kernel module parameter"
                       " srpt_srq_size -- must be in the range [%d..%d].\n",
                       srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
                goto out;
@@ -3996,7 +3990,7 @@ static int __init srpt_init_module(void)
 
        srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
        if (IS_ERR(srpt_target)) {
-               printk(KERN_ERR "couldn't register\n");
+               pr_err("couldn't register\n");
                ret = PTR_ERR(srpt_target);
                goto out;
        }
@@ -4018,13 +4012,13 @@ static int __init srpt_init_module(void)
 
        ret = target_fabric_configfs_register(srpt_target);
        if (ret < 0) {
-               printk(KERN_ERR "couldn't register\n");
+               pr_err("couldn't register\n");
                goto out_free_target;
        }
 
        ret = ib_register_client(&srpt_client);
        if (ret) {
-               printk(KERN_ERR "couldn't register IB client\n");
+               pr_err("couldn't register IB client\n");
                goto out_unregister_target;
        }
 
index f0fbb4ade85db9b3db5ce049eff76cd978d08353..4f7dc044601e2751ad625e4c011aa3a1c328e62f 100644 (file)
@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
                                return err;
                        }
                        if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
-                               /* compute slave's gid block */
-                               smp->attr_mod = cpu_to_be32(slave / 8);
-                               /* execute cmd */
-                               err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
-                                            vhcr->in_modifier, opcode_modifier,
-                                            vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
-                               if (!err) {
-                                       /* if needed, move slave gid to index 0 */
-                                       if (slave % 8)
-                                               memcpy(outsmp->data,
-                                                      outsmp->data + (slave % 8) * 8, 8);
-                                       /* delete all other gids */
-                                       memset(outsmp->data + 8, 0, 56);
+                               __be64 guid = mlx4_get_admin_guid(dev, slave,
+                                                                 port);
+
+                               /* set the PF admin guid to the FW/HW burned
+                                * GUID, if it wasn't yet set
+                                */
+                               if (slave == 0 && guid == 0) {
+                                       smp->attr_mod = 0;
+                                       err = mlx4_cmd_box(dev,
+                                                          inbox->dma,
+                                                          outbox->dma,
+                                                          vhcr->in_modifier,
+                                                          opcode_modifier,
+                                                          vhcr->op,
+                                                          MLX4_CMD_TIME_CLASS_C,
+                                                          MLX4_CMD_NATIVE);
+                                       if (err)
+                                               return err;
+                                       mlx4_set_admin_guid(dev,
+                                                           *(__be64 *)outsmp->
+                                                           data, slave, port);
+                               } else {
+                                       memcpy(outsmp->data, &guid, 8);
                                }
-                               return err;
+
+                               /* clean all other gids */
+                               memset(outsmp->data + 8, 0, 56);
+                               return 0;
                        }
                        if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
                                err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
                                oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
                                vf_oper->vport[port].vlan_idx = NO_INDX;
                                vf_oper->vport[port].mac_idx = NO_INDX;
+                               mlx4_set_random_admin_guid(dev, i, port);
                        }
                        spin_lock_init(&s_state->lock);
                }
index 190fd624bdfebd6e7b5b9e83f8470b840f8d09cb..2619c9fbf42dfb952473e4779a2ee8d6ebfd2c65 100644 (file)
@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
                                priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
                        }
                        spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
+                       mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
+                                           flr_slave);
                        queue_work(priv->mfunc.master.comm_wq,
                                   &priv->mfunc.master.slave_flr_event_work);
                        break;
index acceb75e8c440c6aab8061cc1cdec7c0d420f4b1..ced5ecab5aa754ad44ae055464608bba66d6b137 100644 (file)
@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
 }
 EXPORT_SYMBOL_GPL(mlx4_counter_free);
 
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
+
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+
+       return priv->mfunc.master.vf_admin[entry].vport[port].guid;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
+
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
+{
+       struct mlx4_priv *priv = mlx4_priv(dev);
+       __be64 guid;
+
+       /* hw GUID */
+       if (entry == 0)
+               return;
+
+       get_random_bytes((char *)&guid, sizeof(guid));
+       guid &= ~(cpu_to_be64(1ULL << 56));
+       guid |= cpu_to_be64(1ULL << 57);
+       priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
+}
+
 static int mlx4_setup_hca(struct mlx4_dev *dev)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
index f30eeb730a8667d44bead81f19c606d6770d2bb9..502d3dd2c888528e71af1cbf1ed276b10d058c81 100644 (file)
@@ -499,6 +499,7 @@ struct mlx4_vport_state {
        bool spoofchk;
        u32 link_state;
        u8 qos_vport;
+       __be64 guid;
 };
 
 struct mlx4_vf_admin_state {
index df2238372ea73a0d71b39450cd816810bbcdd1ad..8a64542abc16127627374cf4cb065eea017d135f 100644 (file)
@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
        return 0;
 }
 
+#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
+
 static void free_4k(struct mlx5_core_dev *dev, u64 addr)
 {
        struct fw_page *fwp;
        int n;
 
-       fwp = find_fw_page(dev, addr & PAGE_MASK);
+       fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
        if (!fwp) {
                mlx5_core_warn(dev, "page not found\n");
                return;
        }
 
-       n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+       n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
        fwp->free_count++;
        set_bit(n, &fwp->bitmask);
        if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
                rb_erase(&fwp->rb_node, &dev->priv.page_root);
                if (fwp->free_count != 1)
                        list_del(&fwp->list);
-               dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
-                              DMA_BIDIRECTIONAL);
+               dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
+                              PAGE_SIZE, DMA_BIDIRECTIONAL);
                __free_page(fwp->page);
                kfree(fwp);
        } else if (fwp->free_count == 1) {
index f9ce34bec45b1d615bb1b191bffd867a23dd559a..83e80ab9450048d121b739bd23b85ccb14a39b36 100644 (file)
@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
 int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
 void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
 
+void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
+                        int port);
+__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
+void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
 int mlx4_flow_attach(struct mlx4_dev *dev,
                     struct mlx4_net_trans_rule *rule, u64 *reg_id);
 int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);