]> git.kernelconcepts.de Git - karo-tx-linux.git/blob - drivers/infiniband/core/rdma_core.c
88d1e596f910ff764d06b43fac66090a89da71fc
[karo-tx-linux.git] / drivers / infiniband / core / rdma_core.c
1 /*
2  * Copyright (c) 2016, Mellanox Technologies inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/file.h>
34 #include <linux/anon_inodes.h>
35 #include <rdma/ib_verbs.h>
36 #include <rdma/uverbs_types.h>
37 #include <linux/rcupdate.h>
38 #include "uverbs.h"
39 #include "core_priv.h"
40 #include "rdma_core.h"
41
42 void uverbs_uobject_get(struct ib_uobject *uobject)
43 {
44         kref_get(&uobject->ref);
45 }
46
47 static void uverbs_uobject_free(struct kref *ref)
48 {
49         struct ib_uobject *uobj =
50                 container_of(ref, struct ib_uobject, ref);
51
52         if (uobj->type->type_class->needs_kfree_rcu)
53                 kfree_rcu(uobj, rcu);
54         else
55                 kfree(uobj);
56 }
57
58 void uverbs_uobject_put(struct ib_uobject *uobject)
59 {
60         kref_put(&uobject->ref, uverbs_uobject_free);
61 }
62
63 static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
64 {
65         /*
66          * When a shared access is required, we use a positive counter. Each
67          * shared access request checks that the value != -1 and increment it.
68          * Exclusive access is required for operations like write or destroy.
69          * In exclusive access mode, we check that the counter is zero (nobody
70          * claimed this object) and we set it to -1. Releasing a shared access
71          * lock is done simply by decreasing the counter. As for exclusive
72          * access locks, since only a single one of them is is allowed
73          * concurrently, setting the counter to zero is enough for releasing
74          * this lock.
75          */
76         if (!exclusive)
77                 return __atomic_add_unless(&uobj->usecnt, 1, -1) == -1 ?
78                         -EBUSY : 0;
79
80         /* lock is either WRITE or DESTROY - should be exclusive */
81         return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
82 }
83
84 static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
85                                      const struct uverbs_obj_type *type)
86 {
87         struct ib_uobject *uobj = kmalloc(type->obj_size, GFP_KERNEL);
88
89         if (!uobj)
90                 return ERR_PTR(-ENOMEM);
91         /*
92          * user_handle should be filled by the handler,
93          * The object is added to the list in the commit stage.
94          */
95         uobj->context = context;
96         uobj->type = type;
97         atomic_set(&uobj->usecnt, 0);
98         kref_init(&uobj->ref);
99
100         return uobj;
101 }
102
103 static int idr_add_uobj(struct ib_uobject *uobj)
104 {
105         int ret;
106
107         idr_preload(GFP_KERNEL);
108         spin_lock(&uobj->context->ufile->idr_lock);
109
110         /*
111          * We start with allocating an idr pointing to NULL. This represents an
112          * object which isn't initialized yet. We'll replace it later on with
113          * the real object once we commit.
114          */
115         ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
116                         min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
117         if (ret >= 0)
118                 uobj->id = ret;
119
120         spin_unlock(&uobj->context->ufile->idr_lock);
121         idr_preload_end();
122
123         return ret < 0 ? ret : 0;
124 }
125
126 /*
127  * It only removes it from the uobjects list, uverbs_uobject_put() is still
128  * required.
129  */
130 static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
131 {
132         spin_lock(&uobj->context->ufile->idr_lock);
133         idr_remove(&uobj->context->ufile->idr, uobj->id);
134         spin_unlock(&uobj->context->ufile->idr_lock);
135 }
136
137 /* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
138 static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
139                                                  struct ib_ucontext *ucontext,
140                                                  int id, bool exclusive)
141 {
142         struct ib_uobject *uobj;
143
144         rcu_read_lock();
145         /* object won't be released as we're protected in rcu */
146         uobj = idr_find(&ucontext->ufile->idr, id);
147         if (!uobj) {
148                 uobj = ERR_PTR(-ENOENT);
149                 goto free;
150         }
151
152         uverbs_uobject_get(uobj);
153 free:
154         rcu_read_unlock();
155         return uobj;
156 }
157
158 static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
159                                                 struct ib_ucontext *ucontext,
160                                                 int id, bool exclusive)
161 {
162         struct file *f;
163         struct ib_uobject *uobject;
164         const struct uverbs_obj_fd_type *fd_type =
165                 container_of(type, struct uverbs_obj_fd_type, type);
166
167         if (exclusive)
168                 return ERR_PTR(-EOPNOTSUPP);
169
170         f = fget(id);
171         if (!f)
172                 return ERR_PTR(-EBADF);
173
174         uobject = f->private_data;
175         /*
176          * fget(id) ensures we are not currently running uverbs_close_fd,
177          * and the caller is expected to ensure that uverbs_close_fd is never
178          * done while a call top lookup is possible.
179          */
180         if (f->f_op != fd_type->fops) {
181                 fput(f);
182                 return ERR_PTR(-EBADF);
183         }
184
185         uverbs_uobject_get(uobject);
186         return uobject;
187 }
188
189 struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
190                                            struct ib_ucontext *ucontext,
191                                            int id, bool exclusive)
192 {
193         struct ib_uobject *uobj;
194         int ret;
195
196         uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
197         if (IS_ERR(uobj))
198                 return uobj;
199
200         if (uobj->type != type) {
201                 ret = -EINVAL;
202                 goto free;
203         }
204
205         ret = uverbs_try_lock_object(uobj, exclusive);
206         if (ret) {
207                 WARN(ucontext->cleanup_reason,
208                      "ib_uverbs: Trying to lookup_get while cleanup context\n");
209                 goto free;
210         }
211
212         return uobj;
213 free:
214         uobj->type->type_class->lookup_put(uobj, exclusive);
215         uverbs_uobject_put(uobj);
216         return ERR_PTR(ret);
217 }
218
219 static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
220                                                   struct ib_ucontext *ucontext)
221 {
222         int ret;
223         struct ib_uobject *uobj;
224
225         uobj = alloc_uobj(ucontext, type);
226         if (IS_ERR(uobj))
227                 return uobj;
228
229         ret = idr_add_uobj(uobj);
230         if (ret)
231                 goto uobj_put;
232
233         ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
234                                    RDMACG_RESOURCE_HCA_OBJECT);
235         if (ret)
236                 goto idr_remove;
237
238         return uobj;
239
240 idr_remove:
241         uverbs_idr_remove_uobj(uobj);
242 uobj_put:
243         uverbs_uobject_put(uobj);
244         return ERR_PTR(ret);
245 }
246
247 static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
248                                                  struct ib_ucontext *ucontext)
249 {
250         const struct uverbs_obj_fd_type *fd_type =
251                 container_of(type, struct uverbs_obj_fd_type, type);
252         int new_fd;
253         struct ib_uobject *uobj;
254         struct ib_uobject_file *uobj_file;
255         struct file *filp;
256
257         new_fd = get_unused_fd_flags(O_CLOEXEC);
258         if (new_fd < 0)
259                 return ERR_PTR(new_fd);
260
261         uobj = alloc_uobj(ucontext, type);
262         if (IS_ERR(uobj)) {
263                 put_unused_fd(new_fd);
264                 return uobj;
265         }
266
267         uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
268         filp = anon_inode_getfile(fd_type->name,
269                                   fd_type->fops,
270                                   uobj_file,
271                                   fd_type->flags);
272         if (IS_ERR(filp)) {
273                 put_unused_fd(new_fd);
274                 uverbs_uobject_put(uobj);
275                 return (void *)filp;
276         }
277
278         uobj_file->uobj.id = new_fd;
279         uobj_file->uobj.object = filp;
280         uobj_file->ufile = ucontext->ufile;
281         INIT_LIST_HEAD(&uobj->list);
282         kref_get(&uobj_file->ufile->ref);
283
284         return uobj;
285 }
286
287 struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
288                                             struct ib_ucontext *ucontext)
289 {
290         return type->type_class->alloc_begin(type, ucontext);
291 }
292
293 static void uverbs_uobject_add(struct ib_uobject *uobject)
294 {
295         mutex_lock(&uobject->context->uobjects_lock);
296         list_add(&uobject->list, &uobject->context->uobjects);
297         mutex_unlock(&uobject->context->uobjects_lock);
298 }
299
300 static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
301                                                   enum rdma_remove_reason why)
302 {
303         const struct uverbs_obj_idr_type *idr_type =
304                 container_of(uobj->type, struct uverbs_obj_idr_type,
305                              type);
306         int ret = idr_type->destroy_object(uobj, why);
307
308         /*
309          * We can only fail gracefully if the user requested to destroy the
310          * object. In the rest of the cases, just remove whatever you can.
311          */
312         if (why == RDMA_REMOVE_DESTROY && ret)
313                 return ret;
314
315         ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
316                            RDMACG_RESOURCE_HCA_OBJECT);
317         uverbs_idr_remove_uobj(uobj);
318
319         return ret;
320 }
321
322 static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
323 {
324         struct ib_uobject_file *uobj_file =
325                 container_of(uobj, struct ib_uobject_file, uobj);
326         struct file *filp = uobj->object;
327         int id = uobj_file->uobj.id;
328
329         /* Unsuccessful NEW */
330         fput(filp);
331         put_unused_fd(id);
332 }
333
334 static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
335                                                  enum rdma_remove_reason why)
336 {
337         const struct uverbs_obj_fd_type *fd_type =
338                 container_of(uobj->type, struct uverbs_obj_fd_type, type);
339         struct ib_uobject_file *uobj_file =
340                 container_of(uobj, struct ib_uobject_file, uobj);
341         int ret = fd_type->context_closed(uobj_file, why);
342
343         if (why == RDMA_REMOVE_DESTROY && ret)
344                 return ret;
345
346         if (why == RDMA_REMOVE_DURING_CLEANUP) {
347                 alloc_abort_fd_uobject(uobj);
348                 return ret;
349         }
350
351         uobj_file->uobj.context = NULL;
352         return ret;
353 }
354
355 static void lockdep_check(struct ib_uobject *uobj, bool exclusive)
356 {
357 #ifdef CONFIG_LOCKDEP
358         if (exclusive)
359                 WARN_ON(atomic_read(&uobj->usecnt) > 0);
360         else
361                 WARN_ON(atomic_read(&uobj->usecnt) == -1);
362 #endif
363 }
364
365 static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
366                                                     enum rdma_remove_reason why,
367                                                     bool lock)
368 {
369         int ret;
370         struct ib_ucontext *ucontext = uobj->context;
371
372         ret = uobj->type->type_class->remove_commit(uobj, why);
373         if (ret && why == RDMA_REMOVE_DESTROY) {
374                 /* We couldn't remove the object, so just unlock the uobject */
375                 atomic_set(&uobj->usecnt, 0);
376                 uobj->type->type_class->lookup_put(uobj, true);
377         } else {
378                 if (lock)
379                         mutex_lock(&ucontext->uobjects_lock);
380                 list_del(&uobj->list);
381                 if (lock)
382                         mutex_unlock(&ucontext->uobjects_lock);
383                 /* put the ref we took when we created the object */
384                 uverbs_uobject_put(uobj);
385         }
386
387         return ret;
388 }
389
390 /* This is called only for user requested DESTROY reasons */
391 int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
392 {
393         int ret;
394         struct ib_ucontext *ucontext = uobj->context;
395
396         /* put the ref count we took at lookup_get */
397         uverbs_uobject_put(uobj);
398         /* Cleanup is running. Calling this should have been impossible */
399         if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
400                 WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
401                 return 0;
402         }
403         lockdep_check(uobj, true);
404         ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY, true);
405
406         up_read(&ucontext->cleanup_rwsem);
407         return ret;
408 }
409
410 static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
411 {
412         uverbs_uobject_add(uobj);
413         spin_lock(&uobj->context->ufile->idr_lock);
414         /*
415          * We already allocated this IDR with a NULL object, so
416          * this shouldn't fail.
417          */
418         WARN_ON(idr_replace(&uobj->context->ufile->idr,
419                             uobj, uobj->id));
420         spin_unlock(&uobj->context->ufile->idr_lock);
421 }
422
423 static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
424 {
425         struct ib_uobject_file *uobj_file =
426                 container_of(uobj, struct ib_uobject_file, uobj);
427
428         uverbs_uobject_add(&uobj_file->uobj);
429         fd_install(uobj_file->uobj.id, uobj->object);
430         /* This shouldn't be used anymore. Use the file object instead */
431         uobj_file->uobj.id = 0;
432         /* Get another reference as we export this to the fops */
433         uverbs_uobject_get(&uobj_file->uobj);
434 }
435
436 int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
437 {
438         /* Cleanup is running. Calling this should have been impossible */
439         if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
440                 int ret;
441
442                 WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
443                 ret = uobj->type->type_class->remove_commit(uobj,
444                                                             RDMA_REMOVE_DURING_CLEANUP);
445                 if (ret)
446                         pr_warn("ib_uverbs: cleanup of idr object %d failed\n",
447                                 uobj->id);
448                 return ret;
449         }
450
451         uobj->type->type_class->alloc_commit(uobj);
452         up_read(&uobj->context->cleanup_rwsem);
453
454         return 0;
455 }
456
457 static void alloc_abort_idr_uobject(struct ib_uobject *uobj)
458 {
459         uverbs_idr_remove_uobj(uobj);
460         ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
461                            RDMACG_RESOURCE_HCA_OBJECT);
462         uverbs_uobject_put(uobj);
463 }
464
465 void rdma_alloc_abort_uobject(struct ib_uobject *uobj)
466 {
467         uobj->type->type_class->alloc_abort(uobj);
468 }
469
470 static void lookup_put_idr_uobject(struct ib_uobject *uobj, bool exclusive)
471 {
472 }
473
474 static void lookup_put_fd_uobject(struct ib_uobject *uobj, bool exclusive)
475 {
476         struct file *filp = uobj->object;
477
478         WARN_ON(exclusive);
479         /* This indirectly calls uverbs_close_fd and free the object */
480         fput(filp);
481 }
482
483 void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive)
484 {
485         lockdep_check(uobj, exclusive);
486         uobj->type->type_class->lookup_put(uobj, exclusive);
487         /*
488          * In order to unlock an object, either decrease its usecnt for
489          * read access or zero it in case of exclusive access. See
490          * uverbs_try_lock_object for locking schema information.
491          */
492         if (!exclusive)
493                 atomic_dec(&uobj->usecnt);
494         else
495                 atomic_set(&uobj->usecnt, 0);
496
497         uverbs_uobject_put(uobj);
498 }
499
500 const struct uverbs_obj_type_class uverbs_idr_class = {
501         .alloc_begin = alloc_begin_idr_uobject,
502         .lookup_get = lookup_get_idr_uobject,
503         .alloc_commit = alloc_commit_idr_uobject,
504         .alloc_abort = alloc_abort_idr_uobject,
505         .lookup_put = lookup_put_idr_uobject,
506         .remove_commit = remove_commit_idr_uobject,
507         /*
508          * When we destroy an object, we first just lock it for WRITE and
509          * actually DESTROY it in the finalize stage. So, the problematic
510          * scenario is when we just started the finalize stage of the
511          * destruction (nothing was executed yet). Now, the other thread
512          * fetched the object for READ access, but it didn't lock it yet.
513          * The DESTROY thread continues and starts destroying the object.
514          * When the other thread continue - without the RCU, it would
515          * access freed memory. However, the rcu_read_lock delays the free
516          * until the rcu_read_lock of the READ operation quits. Since the
517          * exclusive lock of the object is still taken by the DESTROY flow, the
518          * READ operation will get -EBUSY and it'll just bail out.
519          */
520         .needs_kfree_rcu = true,
521 };
522
523 static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
524 {
525         struct ib_ucontext *ucontext;
526         struct ib_uverbs_file *ufile = uobj_file->ufile;
527         int ret;
528
529         mutex_lock(&uobj_file->ufile->cleanup_mutex);
530
531         /* uobject was either already cleaned up or is cleaned up right now anyway */
532         if (!uobj_file->uobj.context ||
533             !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
534                 goto unlock;
535
536         ucontext = uobj_file->uobj.context;
537         ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE,
538                                           true);
539         up_read(&ucontext->cleanup_rwsem);
540         if (ret)
541                 pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
542 unlock:
543         mutex_unlock(&ufile->cleanup_mutex);
544 }
545
546 void uverbs_close_fd(struct file *f)
547 {
548         struct ib_uobject_file *uobj_file = f->private_data;
549         struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
550
551         _uverbs_close_fd(uobj_file);
552         uverbs_uobject_put(&uobj_file->uobj);
553         kref_put(uverbs_file_ref, ib_uverbs_release_file);
554 }
555
556 void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
557 {
558         enum rdma_remove_reason reason = device_removed ?
559                 RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
560         unsigned int cur_order = 0;
561
562         ucontext->cleanup_reason = reason;
563         /*
564          * Waits for all remove_commit and alloc_commit to finish. Logically, We
565          * want to hold this forever as the context is going to be destroyed,
566          * but we'll release it since it causes a "held lock freed" BUG message.
567          */
568         down_write(&ucontext->cleanup_rwsem);
569
570         while (!list_empty(&ucontext->uobjects)) {
571                 struct ib_uobject *obj, *next_obj;
572                 unsigned int next_order = UINT_MAX;
573
574                 /*
575                  * This shouldn't run while executing other commands on this
576                  * context. Thus, the only thing we should take care of is
577                  * releasing a FD while traversing this list. The FD could be
578                  * closed and released from the _release fop of this FD.
579                  * In order to mitigate this, we add a lock.
580                  * We take and release the lock per order traversal in order
581                  * to let other threads (which might still use the FDs) chance
582                  * to run.
583                  */
584                 mutex_lock(&ucontext->uobjects_lock);
585                 list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
586                                          list)
587                         if (obj->type->destroy_order == cur_order) {
588                                 int ret;
589
590                                 /*
591                                  * if we hit this WARN_ON, that means we are
592                                  * racing with a lookup_get.
593                                  */
594                                 WARN_ON(uverbs_try_lock_object(obj, true));
595                                 ret = _rdma_remove_commit_uobject(obj, reason,
596                                                                   false);
597                                 if (ret)
598                                         pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
599                                                 obj->id, cur_order);
600                         } else {
601                                 next_order = min(next_order,
602                                                  obj->type->destroy_order);
603                         }
604                 mutex_unlock(&ucontext->uobjects_lock);
605                 cur_order = next_order;
606         }
607         up_write(&ucontext->cleanup_rwsem);
608 }
609
610 void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
611 {
612         ucontext->cleanup_reason = 0;
613         mutex_init(&ucontext->uobjects_lock);
614         INIT_LIST_HEAD(&ucontext->uobjects);
615         init_rwsem(&ucontext->cleanup_rwsem);
616 }
617
618 const struct uverbs_obj_type_class uverbs_fd_class = {
619         .alloc_begin = alloc_begin_fd_uobject,
620         .lookup_get = lookup_get_fd_uobject,
621         .alloc_commit = alloc_commit_fd_uobject,
622         .alloc_abort = alloc_abort_fd_uobject,
623         .lookup_put = lookup_put_fd_uobject,
624         .remove_commit = remove_commit_fd_uobject,
625         .needs_kfree_rcu = false,
626 };
627