]> git.kernelconcepts.de Git - karo-tx-linux.git/blobdiff - block/blk-mq.c
blk-mq: initialize mq kobjects in blk_mq_init_allocated_queue()
[karo-tx-linux.git] / block / blk-mq.c
index 9e6b064e533979446a936c45c18f500c6f87725b..ed4b55176cddaedbb08f0101ca7040fea4d18cfd 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/cpu.h>
 #include <linux/cache.h>
 #include <linux/sched/sysctl.h>
+#include <linux/sched/topology.h>
+#include <linux/sched/signal.h>
 #include <linux/delay.h>
 #include <linux/crash_dump.h>
 #include <linux/prefetch.h>
@@ -75,10 +77,20 @@ void blk_mq_freeze_queue_start(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
-static void blk_mq_freeze_queue_wait(struct request_queue *q)
+void blk_mq_freeze_queue_wait(struct request_queue *q)
 {
        wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
 }
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
+
+int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
+                                    unsigned long timeout)
+{
+       return wait_event_timeout(q->mq_freeze_wq,
+                                       percpu_ref_is_zero(&q->q_usage_counter),
+                                       timeout);
+}
+EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
 
 /*
  * Guarantee no request is in use, so we can change any data structure of
@@ -234,6 +246,7 @@ struct request *__blk_mq_alloc_request(struct blk_mq_alloc_data *data,
                        }
                        rq->tag = tag;
                        rq->internal_tag = -1;
+                       data->hctx->tags->rqs[rq->tag] = rq;
                }
 
                blk_mq_rq_ctx_init(data->q, data->ctx, rq, op);
@@ -273,10 +286,9 @@ EXPORT_SYMBOL(blk_mq_alloc_request);
 struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
                unsigned int flags, unsigned int hctx_idx)
 {
-       struct blk_mq_hw_ctx *hctx;
-       struct blk_mq_ctx *ctx;
+       struct blk_mq_alloc_data alloc_data = { .flags = flags };
        struct request *rq;
-       struct blk_mq_alloc_data alloc_data;
+       unsigned int cpu;
        int ret;
 
        /*
@@ -299,25 +311,23 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, int rw,
         * Check if the hardware context is actually mapped to anything.
         * If not tell the caller that it should skip this queue.
         */
-       hctx = q->queue_hw_ctx[hctx_idx];
-       if (!blk_mq_hw_queue_mapped(hctx)) {
-               ret = -EXDEV;
-               goto out_queue_exit;
+       alloc_data.hctx = q->queue_hw_ctx[hctx_idx];
+       if (!blk_mq_hw_queue_mapped(alloc_data.hctx)) {
+               blk_queue_exit(q);
+               return ERR_PTR(-EXDEV);
        }
-       ctx = __blk_mq_get_ctx(q, cpumask_first(hctx->cpumask));
+       cpu = cpumask_first(alloc_data.hctx->cpumask);
+       alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
 
-       blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-       rq = __blk_mq_alloc_request(&alloc_data, rw);
-       if (!rq) {
-               ret = -EWOULDBLOCK;
-               goto out_queue_exit;
-       }
-
-       return rq;
+       rq = blk_mq_sched_get_request(q, NULL, rw, &alloc_data);
 
-out_queue_exit:
+       blk_mq_put_ctx(alloc_data.ctx);
        blk_queue_exit(q);
-       return ERR_PTR(ret);
+
+       if (!rq)
+               return ERR_PTR(-EWOULDBLOCK);
+
+       return rq;
 }
 EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
 
@@ -852,6 +862,9 @@ done:
                return true;
        }
 
+       if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
+               data.flags |= BLK_MQ_REQ_RESERVED;
+
        rq->tag = blk_mq_get_tag(&data);
        if (rq->tag >= 0) {
                if (blk_mq_tag_busy(data.hctx)) {
@@ -865,12 +878,9 @@ done:
        return false;
 }
 
-static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
-                                 struct request *rq)
+static void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
+                                   struct request *rq)
 {
-       if (rq->tag == -1 || rq->internal_tag == -1)
-               return;
-
        blk_mq_put_tag(hctx, hctx->tags, rq->mq_ctx, rq->tag);
        rq->tag = -1;
 
@@ -880,6 +890,26 @@ static void blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
        }
 }
 
+static void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
+                                      struct request *rq)
+{
+       if (rq->tag == -1 || rq->internal_tag == -1)
+               return;
+
+       __blk_mq_put_driver_tag(hctx, rq);
+}
+
+static void blk_mq_put_driver_tag(struct request *rq)
+{
+       struct blk_mq_hw_ctx *hctx;
+
+       if (rq->tag == -1 || rq->internal_tag == -1)
+               return;
+
+       hctx = blk_mq_map_queue(rq->q, rq->mq_ctx->cpu);
+       __blk_mq_put_driver_tag(hctx, rq);
+}
+
 /*
  * If we fail getting a driver tag because all the driver tags are already
  * assigned and on the dispatch list, BUT the first entry does not have a
@@ -989,7 +1019,19 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
 
                bd.rq = rq;
                bd.list = dptr;
-               bd.last = list_empty(list);
+
+               /*
+                * Flag last if we have no more requests, or if we have more
+                * but can't assign a driver tag to it.
+                */
+               if (list_empty(list))
+                       bd.last = true;
+               else {
+                       struct request *nxt;
+
+                       nxt = list_first_entry(list, struct request, queuelist);
+                       bd.last = !blk_mq_get_driver_tag(nxt, NULL, false);
+               }
 
                ret = q->mq_ops->queue_rq(hctx, &bd);
                switch (ret) {
@@ -997,7 +1039,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
                        queued++;
                        break;
                case BLK_MQ_RQ_QUEUE_BUSY:
-                       blk_mq_put_driver_tag(hctx, rq);
+                       blk_mq_put_driver_tag_hctx(hctx, rq);
                        list_add(&rq->queuelist, list);
                        __blk_mq_requeue_request(rq);
                        break;
@@ -1027,6 +1069,13 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list)
         * that is where we will continue on next queue run.
         */
        if (!list_empty(list)) {
+               /*
+                * If we got a driver tag for the next request already,
+                * free it again.
+                */
+               rq = list_first_entry(list, struct request, queuelist);
+               blk_mq_put_driver_tag(rq);
+
                spin_lock(&hctx->lock);
                list_splice_init(list, &hctx->dispatch);
                spin_unlock(&hctx->lock);
@@ -1713,16 +1762,20 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
                                        unsigned int reserved_tags)
 {
        struct blk_mq_tags *tags;
+       int node;
 
-       tags = blk_mq_init_tags(nr_tags, reserved_tags,
-                               set->numa_node,
+       node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+       if (node == NUMA_NO_NODE)
+               node = set->numa_node;
+
+       tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
                                BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
        if (!tags)
                return NULL;
 
        tags->rqs = kzalloc_node(nr_tags * sizeof(struct request *),
                                 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                                set->numa_node);
+                                node);
        if (!tags->rqs) {
                blk_mq_free_tags(tags);
                return NULL;
@@ -1730,7 +1783,7 @@ struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
 
        tags->static_rqs = kzalloc_node(nr_tags * sizeof(struct request *),
                                 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
-                                set->numa_node);
+                                node);
        if (!tags->static_rqs) {
                kfree(tags->rqs);
                blk_mq_free_tags(tags);
@@ -1750,6 +1803,11 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
 {
        unsigned int i, j, entries_per_page, max_order = 4;
        size_t rq_size, left;
+       int node;
+
+       node = blk_mq_hw_queue_to_node(set->mq_map, hctx_idx);
+       if (node == NUMA_NO_NODE)
+               node = set->numa_node;
 
        INIT_LIST_HEAD(&tags->page_list);
 
@@ -1771,7 +1829,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                        this_order--;
 
                do {
-                       page = alloc_pages_node(set->numa_node,
+                       page = alloc_pages_node(node,
                                GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
                                this_order);
                        if (page)
@@ -1804,7 +1862,7 @@ int blk_mq_alloc_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
                        if (set->ops->init_request) {
                                if (set->ops->init_request(set->driver_data,
                                                rq, hctx_idx, i,
-                                               set->numa_node)) {
+                                               node)) {
                                        tags->static_rqs[i] = NULL;
                                        goto fail;
                                }
@@ -1987,7 +2045,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
                struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
                struct blk_mq_hw_ctx *hctx;
 
-               memset(__ctx, 0, sizeof(*__ctx));
                __ctx->cpu = i;
                spin_lock_init(&__ctx->lock);
                INIT_LIST_HEAD(&__ctx->rq_list);
@@ -2294,6 +2351,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
        if (!q->queue_ctx)
                goto err_exit;
 
+       /* init q->mq_kobj and sw queues' kobjects */
+       blk_mq_sysfs_init(q);
+
        q->queue_hw_ctx = kzalloc_node(nr_cpu_ids * sizeof(*(q->queue_hw_ctx)),
                                                GFP_KERNEL, set->numa_node);
        if (!q->queue_hw_ctx)