]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - drivers/infiniband/core/uverbs_main.c
IB/uverbs: Enable device removal when there are active user space applications
[karo-tx-linux.git] / drivers / infiniband / core / uverbs_main.c
index 082d9c75bf89579961782dc2e975bcc58774236f..c29a660c72fe3674cea593f9cdab483f133edd8f 100644 (file)
@@ -137,6 +137,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
        struct ib_uverbs_device *dev =
                container_of(kobj, struct ib_uverbs_device, kobj);
 
+       cleanup_srcu_struct(&dev->disassociate_srcu);
        kfree(dev);
 }
 
@@ -207,9 +208,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 {
        struct ib_uobject *uobj, *tmp;
 
-       if (!context)
-               return 0;
-
        context->closing = 1;
 
        list_for_each_entry_safe(uobj, tmp, &context->ah_list, list) {
@@ -318,8 +316,16 @@ static void ib_uverbs_release_file(struct kref *ref)
 {
        struct ib_uverbs_file *file =
                container_of(ref, struct ib_uverbs_file, ref);
+       struct ib_device *ib_dev;
+       int srcu_key;
+
+       srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+       ib_dev = srcu_dereference(file->device->ib_dev,
+                                 &file->device->disassociate_srcu);
+       if (ib_dev && !ib_dev->disassociate_ucontext)
+               module_put(ib_dev->owner);
+       srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 
-       module_put(file->device->ib_dev->owner);
        if (atomic_dec_and_test(&file->device->refcount))
                ib_uverbs_comp_dev(file->device);
 
@@ -343,9 +349,19 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
                        return -EAGAIN;
 
                if (wait_event_interruptible(file->poll_wait,
-                                            !list_empty(&file->event_list)))
+                                            (!list_empty(&file->event_list) ||
+                       /* The barriers built into wait_event_interruptible()
+                        * and wake_up() guarentee this will see the null set
+                        * without using RCU
+                        */
+                                            !file->uverbs_file->device->ib_dev)))
                        return -ERESTARTSYS;
 
+               /* If device was disassociated and no event exists set an error */
+               if (list_empty(&file->event_list) &&
+                   !file->uverbs_file->device->ib_dev)
+                       return -EIO;
+
                spin_lock_irq(&file->lock);
        }
 
@@ -408,8 +424,11 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
 {
        struct ib_uverbs_event_file *file = filp->private_data;
        struct ib_uverbs_event *entry, *tmp;
+       int closed_already = 0;
 
+       mutex_lock(&file->uverbs_file->device->lists_mutex);
        spin_lock_irq(&file->lock);
+       closed_already = file->is_closed;
        file->is_closed = 1;
        list_for_each_entry_safe(entry, tmp, &file->event_list, list) {
                if (entry->counter)
@@ -417,9 +436,14 @@ static int ib_uverbs_event_close(struct inode *inode, struct file *filp)
                kfree(entry);
        }
        spin_unlock_irq(&file->lock);
+       if (!closed_already) {
+               list_del(&file->list);
+               if (file->is_async)
+                       ib_unregister_event_handler(&file->uverbs_file->
+                               event_handler);
+       }
+       mutex_unlock(&file->uverbs_file->device->lists_mutex);
 
-       if (file->is_async)
-               ib_unregister_event_handler(&file->uverbs_file->event_handler);
        kref_put(&file->uverbs_file->ref, ib_uverbs_release_file);
        kref_put(&file->ref, ib_uverbs_release_event_file);
 
@@ -584,6 +608,11 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
        if (IS_ERR(filp))
                goto err_put_refs;
 
+       mutex_lock(&uverbs_file->device->lists_mutex);
+       list_add_tail(&ev_file->list,
+                     &uverbs_file->device->uverbs_events_file_list);
+       mutex_unlock(&uverbs_file->device->lists_mutex);
+
        if (is_async) {
                WARN_ON(uverbs_file->async_file);
                uverbs_file->async_file = ev_file;
@@ -646,12 +675,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                             size_t count, loff_t *pos)
 {
        struct ib_uverbs_file *file = filp->private_data;
-       struct ib_device *ib_dev = file->device->ib_dev;
+       struct ib_device *ib_dev;
        struct ib_uverbs_cmd_hdr hdr;
        __u32 flags;
-
-       if (!ib_dev)
-               return -ENODEV;
+       int srcu_key;
+       ssize_t ret;
 
        if (count < sizeof hdr)
                return -EINVAL;
@@ -659,6 +687,14 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (copy_from_user(&hdr, buf, sizeof hdr))
                return -EFAULT;
 
+       srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+       ib_dev = srcu_dereference(file->device->ib_dev,
+                                 &file->device->disassociate_srcu);
+       if (!ib_dev) {
+               ret = -EIO;
+               goto out;
+       }
+
        flags = (hdr.command &
                 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
 
@@ -666,26 +702,36 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                __u32 command;
 
                if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
-                                          IB_USER_VERBS_CMD_COMMAND_MASK))
-                       return -EINVAL;
+                                          IB_USER_VERBS_CMD_COMMAND_MASK)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
                command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
 
                if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
-                   !uverbs_cmd_table[command])
-                       return -EINVAL;
+                   !uverbs_cmd_table[command]) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
                if (!file->ucontext &&
-                   command != IB_USER_VERBS_CMD_GET_CONTEXT)
-                       return -EINVAL;
+                   command != IB_USER_VERBS_CMD_GET_CONTEXT) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
-               if (!(ib_dev->uverbs_cmd_mask & (1ull << command)))
-                       return -ENOSYS;
+               if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) {
+                       ret = -ENOSYS;
+                       goto out;
+               }
 
-               if (hdr.in_words * 4 != count)
-                       return -EINVAL;
+               if (hdr.in_words * 4 != count) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
-               return uverbs_cmd_table[command](file, ib_dev,
+               ret = uverbs_cmd_table[command](file, ib_dev,
                                                 buf + sizeof(hdr),
                                                 hdr.in_words * 4,
                                                 hdr.out_words * 4);
@@ -696,51 +742,72 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                struct ib_uverbs_ex_cmd_hdr ex_hdr;
                struct ib_udata ucore;
                struct ib_udata uhw;
-               int err;
                size_t written_count = count;
 
                if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
-                                          IB_USER_VERBS_CMD_COMMAND_MASK))
-                       return -EINVAL;
+                                          IB_USER_VERBS_CMD_COMMAND_MASK)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
                command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
 
                if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
-                   !uverbs_ex_cmd_table[command])
-                       return -ENOSYS;
+                   !uverbs_ex_cmd_table[command]) {
+                       ret = -ENOSYS;
+                       goto out;
+               }
 
-               if (!file->ucontext)
-                       return -EINVAL;
+               if (!file->ucontext) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
-               if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command)))
-                       return -ENOSYS;
+               if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) {
+                       ret = -ENOSYS;
+                       goto out;
+               }
 
-               if (count < (sizeof(hdr) + sizeof(ex_hdr)))
-                       return -EINVAL;
+               if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
-               if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr)))
-                       return -EFAULT;
+               if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
 
                count -= sizeof(hdr) + sizeof(ex_hdr);
                buf += sizeof(hdr) + sizeof(ex_hdr);
 
-               if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count)
-                       return -EINVAL;
+               if ((hdr.in_words + ex_hdr.provider_in_words) * 8 != count) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
-               if (ex_hdr.cmd_hdr_reserved)
-                       return -EINVAL;
+               if (ex_hdr.cmd_hdr_reserved) {
+                       ret = -EINVAL;
+                       goto out;
+               }
 
                if (ex_hdr.response) {
-                       if (!hdr.out_words && !ex_hdr.provider_out_words)
-                               return -EINVAL;
+                       if (!hdr.out_words && !ex_hdr.provider_out_words) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
 
                        if (!access_ok(VERIFY_WRITE,
                                       (void __user *) (unsigned long) ex_hdr.response,
-                                      (hdr.out_words + ex_hdr.provider_out_words) * 8))
-                               return -EFAULT;
+                                      (hdr.out_words + ex_hdr.provider_out_words) * 8)) {
+                               ret = -EFAULT;
+                               goto out;
+                       }
                } else {
-                       if (hdr.out_words || ex_hdr.provider_out_words)
-                               return -EINVAL;
+                       if (hdr.out_words || ex_hdr.provider_out_words) {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                }
 
                INIT_UDATA_BUF_OR_NULL(&ucore, buf, (unsigned long) ex_hdr.response,
@@ -752,29 +819,43 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                                       ex_hdr.provider_in_words * 8,
                                       ex_hdr.provider_out_words * 8);
 
-               err = uverbs_ex_cmd_table[command](file,
+               ret = uverbs_ex_cmd_table[command](file,
                                                   ib_dev,
                                                   &ucore,
                                                   &uhw);
-
-               if (err)
-                       return err;
-
-               return written_count;
+               if (!ret)
+                       ret = written_count;
+       } else {
+               ret = -ENOSYS;
        }
 
-       return -ENOSYS;
+out:
+       srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+       return ret;
 }
 
 static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 {
        struct ib_uverbs_file *file = filp->private_data;
-       struct ib_device *ib_dev = file->device->ib_dev;
+       struct ib_device *ib_dev;
+       int ret = 0;
+       int srcu_key;
 
-       if (!ib_dev || !file->ucontext)
-               return -ENODEV;
+       srcu_key = srcu_read_lock(&file->device->disassociate_srcu);
+       ib_dev = srcu_dereference(file->device->ib_dev,
+                                 &file->device->disassociate_srcu);
+       if (!ib_dev) {
+               ret = -EIO;
+               goto out;
+       }
+
+       if (!file->ucontext)
+               ret = -ENODEV;
        else
-               return ib_dev->mmap(file->ucontext, vma);
+               ret = ib_dev->mmap(file->ucontext, vma);
+out:
+       srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
+       return ret;
 }
 
 /*
@@ -791,21 +872,43 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 {
        struct ib_uverbs_device *dev;
        struct ib_uverbs_file *file;
+       struct ib_device *ib_dev;
        int ret;
+       int module_dependent;
+       int srcu_key;
 
        dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
        if (!atomic_inc_not_zero(&dev->refcount))
                return -ENXIO;
 
-       if (!try_module_get(dev->ib_dev->owner)) {
-               ret = -ENODEV;
+       srcu_key = srcu_read_lock(&dev->disassociate_srcu);
+       mutex_lock(&dev->lists_mutex);
+       ib_dev = srcu_dereference(dev->ib_dev,
+                                 &dev->disassociate_srcu);
+       if (!ib_dev) {
+               ret = -EIO;
                goto err;
        }
 
-       file = kmalloc(sizeof *file, GFP_KERNEL);
+       /* In case IB device supports disassociate ucontext, there is no hard
+        * dependency between uverbs device and its low level device.
+        */
+       module_dependent = !(ib_dev->disassociate_ucontext);
+
+       if (module_dependent) {
+               if (!try_module_get(ib_dev->owner)) {
+                       ret = -ENODEV;
+                       goto err;
+               }
+       }
+
+       file = kzalloc(sizeof(*file), GFP_KERNEL);
        if (!file) {
                ret = -ENOMEM;
-               goto err_module;
+               if (module_dependent)
+                       goto err_module;
+
+               goto err;
        }
 
        file->device     = dev;
@@ -816,13 +919,18 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 
        filp->private_data = file;
        kobject_get(&dev->kobj);
+       list_add_tail(&file->list, &dev->uverbs_file_list);
+       mutex_unlock(&dev->lists_mutex);
+       srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
 
        return nonseekable_open(inode, filp);
 
 err_module:
-       module_put(dev->ib_dev->owner);
+       module_put(ib_dev->owner);
 
 err:
+       mutex_unlock(&dev->lists_mutex);
+       srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
        if (atomic_dec_and_test(&dev->refcount))
                ib_uverbs_comp_dev(dev);
 
@@ -833,8 +941,18 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
        struct ib_uverbs_file *file = filp->private_data;
        struct ib_uverbs_device *dev = file->device;
-
-       ib_uverbs_cleanup_ucontext(file, file->ucontext);
+       struct ib_ucontext *ucontext = NULL;
+
+       mutex_lock(&file->device->lists_mutex);
+       ucontext = file->ucontext;
+       file->ucontext = NULL;
+       if (!file->is_closed) {
+               list_del(&file->list);
+               file->is_closed = 1;
+       }
+       mutex_unlock(&file->device->lists_mutex);
+       if (ucontext)
+               ib_uverbs_cleanup_ucontext(file, ucontext);
 
        if (file->async_file)
                kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
@@ -871,12 +989,21 @@ static struct ib_client uverbs_client = {
 static ssize_t show_ibdev(struct device *device, struct device_attribute *attr,
                          char *buf)
 {
+       int ret = -ENODEV;
+       int srcu_key;
        struct ib_uverbs_device *dev = dev_get_drvdata(device);
+       struct ib_device *ib_dev;
 
        if (!dev)
                return -ENODEV;
 
-       return sprintf(buf, "%s\n", dev->ib_dev->name);
+       srcu_key = srcu_read_lock(&dev->disassociate_srcu);
+       ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
+       if (ib_dev)
+               ret = sprintf(buf, "%s\n", ib_dev->name);
+       srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
+
+       return ret;
 }
 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
 
@@ -884,11 +1011,19 @@ static ssize_t show_dev_abi_version(struct device *device,
                                    struct device_attribute *attr, char *buf)
 {
        struct ib_uverbs_device *dev = dev_get_drvdata(device);
+       int ret = -ENODEV;
+       int srcu_key;
+       struct ib_device *ib_dev;
 
        if (!dev)
                return -ENODEV;
+       srcu_key = srcu_read_lock(&dev->disassociate_srcu);
+       ib_dev = srcu_dereference(dev->ib_dev, &dev->disassociate_srcu);
+       if (ib_dev)
+               ret = sprintf(buf, "%d\n", ib_dev->uverbs_abi_ver);
+       srcu_read_unlock(&dev->disassociate_srcu, srcu_key);
 
-       return sprintf(buf, "%d\n", dev->ib_dev->uverbs_abi_ver);
+       return ret;
 }
 static DEVICE_ATTR(abi_version, S_IRUGO, show_dev_abi_version, NULL);
 
@@ -928,6 +1063,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
        int devnum;
        dev_t base;
        struct ib_uverbs_device *uverbs_dev;
+       int ret;
 
        if (!device->alloc_ucontext)
                return;
@@ -936,11 +1072,20 @@ static void ib_uverbs_add_one(struct ib_device *device)
        if (!uverbs_dev)
                return;
 
+       ret = init_srcu_struct(&uverbs_dev->disassociate_srcu);
+       if (ret) {
+               kfree(uverbs_dev);
+               return;
+       }
+
        atomic_set(&uverbs_dev->refcount, 1);
        init_completion(&uverbs_dev->comp);
        uverbs_dev->xrcd_tree = RB_ROOT;
        mutex_init(&uverbs_dev->xrcd_tree_mutex);
        kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
+       mutex_init(&uverbs_dev->lists_mutex);
+       INIT_LIST_HEAD(&uverbs_dev->uverbs_file_list);
+       INIT_LIST_HEAD(&uverbs_dev->uverbs_events_file_list);
 
        spin_lock(&map_lock);
        devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -961,7 +1106,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
        }
        spin_unlock(&map_lock);
 
-       uverbs_dev->ib_dev           = device;
+       rcu_assign_pointer(uverbs_dev->ib_dev, device);
        uverbs_dev->num_comp_vectors = device->num_comp_vectors;
 
        cdev_init(&uverbs_dev->cdev, NULL);
@@ -1005,9 +1150,72 @@ err:
        return;
 }
 
+static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
+                                       struct ib_device *ib_dev)
+{
+       struct ib_uverbs_file *file;
+       struct ib_uverbs_event_file *event_file;
+       struct ib_event event;
+
+       /* Pending running commands to terminate */
+       synchronize_srcu(&uverbs_dev->disassociate_srcu);
+       event.event = IB_EVENT_DEVICE_FATAL;
+       event.element.port_num = 0;
+       event.device = ib_dev;
+
+       mutex_lock(&uverbs_dev->lists_mutex);
+       while (!list_empty(&uverbs_dev->uverbs_file_list)) {
+               struct ib_ucontext *ucontext;
+
+               file = list_first_entry(&uverbs_dev->uverbs_file_list,
+                                       struct ib_uverbs_file, list);
+               file->is_closed = 1;
+               ucontext = file->ucontext;
+               list_del(&file->list);
+               file->ucontext = NULL;
+               kref_get(&file->ref);
+               mutex_unlock(&uverbs_dev->lists_mutex);
+               /* We must release the mutex before going ahead and calling
+                * disassociate_ucontext. disassociate_ucontext might end up
+                * indirectly calling uverbs_close, for example due to freeing
+                * the resources (e.g mmput).
+                */
+               ib_uverbs_event_handler(&file->event_handler, &event);
+               if (ucontext) {
+                       ib_dev->disassociate_ucontext(ucontext);
+                       ib_uverbs_cleanup_ucontext(file, ucontext);
+               }
+
+               mutex_lock(&uverbs_dev->lists_mutex);
+               kref_put(&file->ref, ib_uverbs_release_file);
+       }
+
+       while (!list_empty(&uverbs_dev->uverbs_events_file_list)) {
+               event_file = list_first_entry(&uverbs_dev->
+                                             uverbs_events_file_list,
+                                             struct ib_uverbs_event_file,
+                                             list);
+               spin_lock_irq(&event_file->lock);
+               event_file->is_closed = 1;
+               spin_unlock_irq(&event_file->lock);
+
+               list_del(&event_file->list);
+               if (event_file->is_async) {
+                       ib_unregister_event_handler(&event_file->uverbs_file->
+                                                   event_handler);
+                       event_file->uverbs_file->event_handler.device = NULL;
+               }
+
+               wake_up_interruptible(&event_file->poll_wait);
+               kill_fasync(&event_file->async_queue, SIGIO, POLL_IN);
+       }
+       mutex_unlock(&uverbs_dev->lists_mutex);
+}
+
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 {
        struct ib_uverbs_device *uverbs_dev = client_data;
+       int wait_clients = 1;
 
        if (!uverbs_dev)
                return;
@@ -1021,9 +1229,27 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
        else
                clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
 
+       if (device->disassociate_ucontext) {
+               /* We disassociate HW resources and immediately return.
+                * Userspace will see a EIO errno for all future access.
+                * Upon returning, ib_device may be freed internally and is not
+                * valid any more.
+                * uverbs_device is still available until all clients close
+                * their files, then the uverbs device ref count will be zero
+                * and its resources will be freed.
+                * Note: At this point no more files can be opened since the
+                * cdev was deleted, however active clients can still issue
+                * commands and close their open files.
+                */
+               rcu_assign_pointer(uverbs_dev->ib_dev, NULL);
+               ib_uverbs_free_hw_resources(uverbs_dev, device);
+               wait_clients = 0;
+       }
+
        if (atomic_dec_and_test(&uverbs_dev->refcount))
                ib_uverbs_comp_dev(uverbs_dev);
-       wait_for_completion(&uverbs_dev->comp);
+       if (wait_clients)
+               wait_for_completion(&uverbs_dev->comp);
        kobject_put(&uverbs_dev->kobj);
 }