X-Git-Url: https://git.kernelconcepts.de/?p=karo-tx-linux.git;a=blobdiff_plain;f=block%2Fblk-mq.c;h=7ddc7969fba43b6786607c9ffda5f396ed87af3b;hp=576e7112f8071179b0c8f44bef799dfa5bc75c33;hb=2849450ad39d;hpb=e2b3b80de5cda284c885721e873f9a6e90f68ef8 diff --git a/block/blk-mq.c b/block/blk-mq.c index 576e7112f807..7ddc7969fba4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -501,7 +502,7 @@ EXPORT_SYMBOL(blk_mq_requeue_request); static void blk_mq_requeue_work(struct work_struct *work) { struct request_queue *q = - container_of(work, struct request_queue, requeue_work); + container_of(work, struct request_queue, requeue_work.work); LIST_HEAD(rq_list); struct request *rq, *next; unsigned long flags; @@ -556,16 +557,24 @@ EXPORT_SYMBOL(blk_mq_add_to_requeue_list); void blk_mq_cancel_requeue_work(struct request_queue *q) { - cancel_work_sync(&q->requeue_work); + cancel_delayed_work_sync(&q->requeue_work); } EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work); void blk_mq_kick_requeue_list(struct request_queue *q) { - kblockd_schedule_work(&q->requeue_work); + kblockd_schedule_delayed_work(&q->requeue_work, 0); } EXPORT_SYMBOL(blk_mq_kick_requeue_list); +void blk_mq_delay_kick_requeue_list(struct request_queue *q, + unsigned long msecs) +{ + kblockd_schedule_delayed_work(&q->requeue_work, + msecs_to_jiffies(msecs)); +} +EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list); + void blk_mq_abort_requeue_list(struct request_queue *q) { unsigned long flags; @@ -588,8 +597,10 @@ EXPORT_SYMBOL(blk_mq_abort_requeue_list); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) { - if (tag < tags->nr_tags) + if (tag < tags->nr_tags) { + prefetch(tags->rqs[tag]); return tags->rqs[tag]; + } return NULL; } @@ -672,7 +683,20 @@ static void blk_mq_timeout_work(struct work_struct *work) }; int i; - if (blk_queue_enter(q, true)) + /* A deadlock might occur if a request is stuck requiring a + * timeout at the same time a queue freeze is waiting + * completion, since the timeout code would not be able to + * acquire the queue reference here. + * + * That's why we don't use blk_queue_enter here; instead, we use + * percpu_ref_tryget directly, because we need to be able to + * obtain a reference even in the short window between the queue + * starting to freeze, by dropping the first reference in + * blk_mq_freeze_queue_start, and the moment the last request is + * consumed, marked by the instant q_usage_counter reaches + * zero. + */ + if (!percpu_ref_tryget(&q->q_usage_counter)) return; blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data); @@ -780,11 +804,12 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) struct list_head *dptr; int queued; - WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); - if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) return; + WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && + cpu_online(hctx->next_cpu)); + hctx->run++; /* @@ -922,8 +947,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) put_cpu(); } - kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), - &hctx->run_work, 0); + kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work); } void blk_mq_run_hw_queues(struct request_queue *q, bool async) @@ -944,7 +968,7 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) { - cancel_delayed_work(&hctx->run_work); + cancel_work(&hctx->run_work); cancel_delayed_work(&hctx->delay_work); set_bit(BLK_MQ_S_STOPPED, &hctx->state); } @@ -997,7 +1021,7 @@ static void blk_mq_run_work_fn(struct work_struct *work) { struct blk_mq_hw_ctx *hctx; - hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); + hctx = container_of(work, struct blk_mq_hw_ctx, run_work); __blk_mq_run_hw_queue(hctx); } @@ -1023,10 +1047,11 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) EXPORT_SYMBOL(blk_mq_delay_queue); static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct request *rq, bool at_head) { + struct blk_mq_ctx *ctx = rq->mq_ctx; + trace_block_rq_insert(hctx->queue, rq); if (at_head) @@ -1040,20 +1065,16 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, { struct blk_mq_ctx *ctx = rq->mq_ctx; - __blk_mq_insert_req_list(hctx, ctx, rq, at_head); + __blk_mq_insert_req_list(hctx, rq, at_head); blk_mq_hctx_mark_pending(hctx, ctx); } void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, - bool async) + bool async) { + struct blk_mq_ctx *ctx = rq->mq_ctx; struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx; - - current_ctx = blk_mq_get_ctx(q); - if (!cpu_online(ctx->cpu)) - rq->mq_ctx = ctx = current_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); @@ -1063,8 +1084,6 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, if (run_queue) blk_mq_run_hw_queue(hctx, async); - - blk_mq_put_ctx(current_ctx); } static void blk_mq_insert_requests(struct request_queue *q, @@ -1075,14 +1094,9 @@ static void blk_mq_insert_requests(struct request_queue *q, { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *current_ctx; trace_block_unplug(q, depth, !from_schedule); - current_ctx = blk_mq_get_ctx(q); - - if (!cpu_online(ctx->cpu)) - ctx = current_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); /* @@ -1094,15 +1108,14 @@ static void blk_mq_insert_requests(struct request_queue *q, struct request *rq; rq = list_first_entry(list, struct request, queuelist); + BUG_ON(rq->mq_ctx != ctx); list_del_init(&rq->queuelist); - rq->mq_ctx = ctx; - __blk_mq_insert_req_list(hctx, ctx, rq, false); + __blk_mq_insert_req_list(hctx, rq, false); } blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); blk_mq_run_hw_queue(hctx, from_schedule); - blk_mq_put_ctx(current_ctx); } static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) @@ -1221,7 +1234,7 @@ static struct request *blk_mq_map_request(struct request_queue *q, ctx = blk_mq_get_ctx(q); hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rw_is_sync(bio_op(bio), bio->bi_rw)) + if (rw_is_sync(bio_op(bio), bio->bi_opf)) op_flags |= REQ_SYNC; trace_block_getrq(q, bio, op); @@ -1289,8 +1302,8 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie) */ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw); - const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA); + const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf); + const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); struct blk_map_ctx data; struct request *rq; unsigned int request_count = 0; @@ -1383,8 +1396,8 @@ done: */ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) { - const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw); - const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA); + const int is_sync = rw_is_sync(bio_op(bio), bio->bi_opf); + const int is_flush_fua = bio->bi_opf & (REQ_PREFLUSH | REQ_FUA); struct blk_plug *plug; unsigned int request_count = 0; struct blk_map_ctx data; @@ -1617,16 +1630,17 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) return 0; } +/* + * 'cpu' is going away. splice any existing rq_list entries from this + * software queue to the hw queue dispatch list, and ensure that it + * gets run. + */ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) { - struct request_queue *q = hctx->queue; struct blk_mq_ctx *ctx; LIST_HEAD(tmp); - /* - * Move ctx entries to new CPU, if this one is going away. - */ - ctx = __blk_mq_get_ctx(q, cpu); + ctx = __blk_mq_get_ctx(hctx->queue, cpu); spin_lock(&ctx->lock); if (!list_empty(&ctx->rq_list)) { @@ -1638,24 +1652,11 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) if (list_empty(&tmp)) return NOTIFY_OK; - ctx = blk_mq_get_ctx(q); - spin_lock(&ctx->lock); - - while (!list_empty(&tmp)) { - struct request *rq; - - rq = list_first_entry(&tmp, struct request, queuelist); - rq->mq_ctx = ctx; - list_move_tail(&rq->queuelist, &ctx->rq_list); - } - - hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_hctx_mark_pending(hctx, ctx); - - spin_unlock(&ctx->lock); + spin_lock(&hctx->lock); + list_splice_tail_init(&tmp, &hctx->dispatch); + spin_unlock(&hctx->lock); blk_mq_run_hw_queue(hctx, true); - blk_mq_put_ctx(ctx); return NOTIFY_OK; } @@ -1731,7 +1732,7 @@ static int blk_mq_init_hctx(struct request_queue *q, if (node == NUMA_NO_NODE) node = hctx->numa_node = set->numa_node; - INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); + INIT_WORK(&hctx->run_work, blk_mq_run_work_fn); INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); spin_lock_init(&hctx->lock); INIT_LIST_HEAD(&hctx->dispatch); @@ -2091,7 +2092,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->sg_reserved_size = INT_MAX; - INIT_WORK(&q->requeue_work, blk_mq_requeue_work); + INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work); INIT_LIST_HEAD(&q->requeue_list); spin_lock_init(&q->requeue_lock);