]> git.kernelconcepts.de Git - karo-tx-linux.git/commitdiff
Merge remote-tracking branch 'jens/for-4.2/core' into dm-4.2
authorMike Snitzer <snitzer@redhat.com>
Fri, 29 May 2015 18:17:16 +0000 (14:17 -0400)
committerMike Snitzer <snitzer@redhat.com>
Fri, 29 May 2015 18:17:16 +0000 (14:17 -0400)
1  2 
block/blk-core.c
block/blk-mq.c
block/bounce.c
drivers/md/dm-table.c
drivers/md/dm.c
fs/btrfs/extent_io.c
fs/btrfs/volumes.c
include/linux/blk_types.h
include/linux/blkdev.h

diff --combined block/blk-core.c
index 03b5f8d77f37b4cbad3a12f3a98f9c3ea63a50e7,aa819a58ea24af8e258341cdb739d81a5027fce0..f6ab750060fe019f97d0ccfbca367b9e6cd3b426
@@@ -117,7 -117,7 +117,7 @@@ EXPORT_SYMBOL(blk_rq_init)
  static void req_bio_endio(struct request *rq, struct bio *bio,
                          unsigned int nbytes, int error)
  {
-       if (error)
+       if (error && !(rq->cmd_flags & REQ_CLONE))
                clear_bit(BIO_UPTODATE, &bio->bi_flags);
        else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = -EIO;
        bio_advance(bio, nbytes);
  
        /* don't actually finish bio if it's part of flush sequence */
-       if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
+       if (bio->bi_iter.bi_size == 0 &&
+           !(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE)))
                bio_endio(bio, error);
  }
  
@@@ -285,6 -286,7 +286,7 @@@ inline void __blk_run_queue_uncond(stru
        q->request_fn(q);
        q->request_fn_active--;
  }
+ EXPORT_SYMBOL_GPL(__blk_run_queue_uncond);
  
  /**
   * __blk_run_queue - run a single device queue
@@@ -552,8 -554,6 +554,8 @@@ void blk_cleanup_queue(struct request_q
                q->queue_lock = &q->__queue_lock;
        spin_unlock_irq(lock);
  
 +      bdi_destroy(&q->backing_dev_info);
 +
        /* @q is and will stay empty, shutdown and put */
        blk_put_queue(q);
  }
@@@ -734,8 -734,6 +736,8 @@@ blk_init_queue_node(request_fn_proc *rf
  }
  EXPORT_SYMBOL(blk_init_queue_node);
  
 +static void blk_queue_bio(struct request_queue *q, struct bio *bio);
 +
  struct request_queue *
  blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
                         spinlock_t *lock)
@@@ -1525,7 -1523,8 +1527,8 @@@ bool bio_attempt_front_merge(struct req
   * Caller must ensure !blk_queue_nomerges(q) beforehand.
   */
  bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
-                           unsigned int *request_count)
+                           unsigned int *request_count,
+                           struct request **same_queue_rq)
  {
        struct blk_plug *plug;
        struct request *rq;
        list_for_each_entry_reverse(rq, plug_list, queuelist) {
                int el_ret;
  
-               if (rq->q == q)
+               if (rq->q == q) {
                        (*request_count)++;
+                       /*
+                        * Only blk-mq multiple hardware queues case checks the
+                        * rq in the same queue, there should be only one such
+                        * rq in a queue
+                        **/
+                       if (same_queue_rq)
+                               *same_queue_rq = rq;
+               }
  
                if (rq->q != q || !blk_rq_merge_ok(rq, bio))
                        continue;
@@@ -1580,7 -1587,7 +1591,7 @@@ void init_request_from_bio(struct reque
        blk_rq_bio_prep(req->q, req, bio);
  }
  
 -void blk_queue_bio(struct request_queue *q, struct bio *bio)
 +static void blk_queue_bio(struct request_queue *q, struct bio *bio)
  {
        const bool sync = !!(bio->bi_rw & REQ_SYNC);
        struct blk_plug *plug;
         * any locks.
         */
        if (!blk_queue_nomerges(q) &&
-           blk_attempt_plug_merge(q, bio, &request_count))
+           blk_attempt_plug_merge(q, bio, &request_count, NULL))
                return;
  
        spin_lock_irq(q->queue_lock);
@@@ -1688,6 -1695,7 +1699,6 @@@ out_unlock
                spin_unlock_irq(q->queue_lock);
        }
  }
 -EXPORT_SYMBOL_GPL(blk_queue_bio);     /* for device mapper only */
  
  /*
   * If bio->bi_dev is a partition, remap the location
@@@ -1718,8 -1726,6 +1729,6 @@@ static void handle_bad_sector(struct bi
                        bio->bi_rw,
                        (unsigned long long)bio_end_sector(bio),
                        (long long)(i_size_read(bio->bi_bdev->bd_inode) >> 9));
-       set_bit(BIO_EOF, &bio->bi_flags);
  }
  
  #ifdef CONFIG_FAIL_MAKE_REQUEST
@@@ -2904,95 -2910,22 +2913,22 @@@ int blk_lld_busy(struct request_queue *
  }
  EXPORT_SYMBOL_GPL(blk_lld_busy);
  
- /**
-  * blk_rq_unprep_clone - Helper function to free all bios in a cloned request
-  * @rq: the clone request to be cleaned up
-  *
-  * Description:
-  *     Free all bios in @rq for a cloned request.
-  */
- void blk_rq_unprep_clone(struct request *rq)
- {
-       struct bio *bio;
-       while ((bio = rq->bio) != NULL) {
-               rq->bio = bio->bi_next;
-               bio_put(bio);
-       }
- }
- EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
- /*
-  * Copy attributes of the original request to the clone request.
-  * The actual data parts (e.g. ->cmd, ->sense) are not copied.
-  */
- static void __blk_rq_prep_clone(struct request *dst, struct request *src)
+ void blk_rq_prep_clone(struct request *dst, struct request *src)
  {
        dst->cpu = src->cpu;
-       dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+       dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK);
+       dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE;
        dst->cmd_type = src->cmd_type;
        dst->__sector = blk_rq_pos(src);
        dst->__data_len = blk_rq_bytes(src);
        dst->nr_phys_segments = src->nr_phys_segments;
        dst->ioprio = src->ioprio;
        dst->extra_len = src->extra_len;
- }
- /**
-  * blk_rq_prep_clone - Helper function to setup clone request
-  * @rq: the request to be setup
-  * @rq_src: original request to be cloned
-  * @bs: bio_set that bios for clone are allocated from
-  * @gfp_mask: memory allocation mask for bio
-  * @bio_ctr: setup function to be called for each clone bio.
-  *           Returns %0 for success, non %0 for failure.
-  * @data: private data to be passed to @bio_ctr
-  *
-  * Description:
-  *     Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
-  *     The actual data parts of @rq_src (e.g. ->cmd, ->sense)
-  *     are not copied, and copying such parts is the caller's responsibility.
-  *     Also, pages which the original bios are pointing to are not copied
-  *     and the cloned bios just point same pages.
-  *     So cloned bios must be completed before original bios, which means
-  *     the caller must complete @rq before @rq_src.
-  */
- int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-                     struct bio_set *bs, gfp_t gfp_mask,
-                     int (*bio_ctr)(struct bio *, struct bio *, void *),
-                     void *data)
- {
-       struct bio *bio, *bio_src;
-       if (!bs)
-               bs = fs_bio_set;
-       __rq_for_each_bio(bio_src, rq_src) {
-               bio = bio_clone_fast(bio_src, gfp_mask, bs);
-               if (!bio)
-                       goto free_and_out;
-               if (bio_ctr && bio_ctr(bio, bio_src, data))
-                       goto free_and_out;
-               if (rq->bio) {
-                       rq->biotail->bi_next = bio;
-                       rq->biotail = bio;
-               } else
-                       rq->bio = rq->biotail = bio;
-       }
-       __blk_rq_prep_clone(rq, rq_src);
-       return 0;
- free_and_out:
-       if (bio)
-               bio_put(bio);
-       blk_rq_unprep_clone(rq);
-       return -ENOMEM;
+       dst->bio = src->bio;
+       dst->biotail = src->biotail;
+       dst->cmd = src->cmd;
+       dst->cmd_len = src->cmd_len;
+       dst->sense = src->sense;
  }
  EXPORT_SYMBOL_GPL(blk_rq_prep_clone);
  
@@@ -3034,21 -2967,20 +2970,20 @@@ void blk_start_plug(struct blk_plug *pl
  {
        struct task_struct *tsk = current;
  
+       /*
+        * If this is a nested plug, don't actually assign it.
+        */
+       if (tsk->plug)
+               return;
        INIT_LIST_HEAD(&plug->list);
        INIT_LIST_HEAD(&plug->mq_list);
        INIT_LIST_HEAD(&plug->cb_list);
        /*
-        * If this is a nested plug, don't actually assign it. It will be
-        * flushed on its own.
+        * Store ordering should not be needed here, since a potential
+        * preempt will imply a full memory barrier
         */
-       if (!tsk->plug) {
-               /*
-                * Store ordering should not be needed here, since a potential
-                * preempt will imply a full memory barrier
-                */
-               tsk->plug = plug;
-       }
+       tsk->plug = plug;
  }
  EXPORT_SYMBOL(blk_start_plug);
  
@@@ -3195,10 -3127,11 +3130,11 @@@ void blk_flush_plug_list(struct blk_plu
  
  void blk_finish_plug(struct blk_plug *plug)
  {
+       if (plug != current->plug)
+               return;
        blk_flush_plug_list(plug, false);
  
-       if (plug == current->plug)
-               current->plug = NULL;
+       current->plug = NULL;
  }
  EXPORT_SYMBOL(blk_finish_plug);
  
diff --combined block/blk-mq.c
index e68b71b85a7eaf0e3097debe8bf4dc4078e7a038,c382a34fe5ac61a939f411e810d1a6b1849e146b..87e901501de229ebd439dfc1678ad9c4c6130bb8
@@@ -89,7 -89,8 +89,8 @@@ static int blk_mq_queue_enter(struct re
                        return -EBUSY;
  
                ret = wait_event_interruptible(q->mq_freeze_wq,
-                               !q->mq_freeze_depth || blk_queue_dying(q));
+                               !atomic_read(&q->mq_freeze_depth) ||
+                               blk_queue_dying(q));
                if (blk_queue_dying(q))
                        return -ENODEV;
                if (ret)
@@@ -112,13 -113,10 +113,10 @@@ static void blk_mq_usage_counter_releas
  
  void blk_mq_freeze_queue_start(struct request_queue *q)
  {
-       bool freeze;
+       int freeze_depth;
  
-       spin_lock_irq(q->queue_lock);
-       freeze = !q->mq_freeze_depth++;
-       spin_unlock_irq(q->queue_lock);
-       if (freeze) {
+       freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
+       if (freeze_depth == 1) {
                percpu_ref_kill(&q->mq_usage_counter);
                blk_mq_run_hw_queues(q, false);
        }
@@@ -143,13 -141,11 +141,11 @@@ EXPORT_SYMBOL_GPL(blk_mq_freeze_queue)
  
  void blk_mq_unfreeze_queue(struct request_queue *q)
  {
-       bool wake;
+       int freeze_depth;
  
-       spin_lock_irq(q->queue_lock);
-       wake = !--q->mq_freeze_depth;
-       WARN_ON_ONCE(q->mq_freeze_depth < 0);
-       spin_unlock_irq(q->queue_lock);
-       if (wake) {
+       freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
+       WARN_ON_ONCE(freeze_depth < 0);
+       if (!freeze_depth) {
                percpu_ref_reinit(&q->mq_usage_counter);
                wake_up_all(&q->mq_freeze_wq);
        }
@@@ -677,11 -673,8 +673,11 @@@ static void blk_mq_rq_timer(unsigned lo
                data.next = blk_rq_timeout(round_jiffies_up(data.next));
                mod_timer(&q->timeout, data.next);
        } else {
 -              queue_for_each_hw_ctx(q, hctx, i)
 -                      blk_mq_tag_idle(hctx);
 +              queue_for_each_hw_ctx(q, hctx, i) {
 +                      /* the hctx may be unmapped, so check it here */
 +                      if (blk_mq_hw_queue_mapped(hctx))
 +                              blk_mq_tag_idle(hctx);
 +              }
        }
  }
  
@@@ -858,16 -851,6 +854,16 @@@ static void __blk_mq_run_hw_queue(struc
                spin_lock(&hctx->lock);
                list_splice(&rq_list, &hctx->dispatch);
                spin_unlock(&hctx->lock);
 +              /*
 +               * the queue is expected stopped with BLK_MQ_RQ_QUEUE_BUSY, but
 +               * it's possible the queue is stopped and restarted again
 +               * before this. Queue restart will dispatch requests. And since
 +               * requests in rq_list aren't added into hctx->dispatch yet,
 +               * the requests in rq_list might get lost.
 +               *
 +               * blk_mq_run_hw_queue() already checks the STOPPED bit
 +               **/
 +              blk_mq_run_hw_queue(hctx, true);
        }
  }
  
@@@ -1237,6 -1220,38 +1233,38 @@@ static struct request *blk_mq_map_reque
        return rq;
  }
  
+ static int blk_mq_direct_issue_request(struct request *rq)
+ {
+       int ret;
+       struct request_queue *q = rq->q;
+       struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
+                       rq->mq_ctx->cpu);
+       struct blk_mq_queue_data bd = {
+               .rq = rq,
+               .list = NULL,
+               .last = 1
+       };
+       /*
+        * For OK queue, we are done. For error, kill it. Any other
+        * error (busy), just add it to our list as we previously
+        * would have done
+        */
+       ret = q->mq_ops->queue_rq(hctx, &bd);
+       if (ret == BLK_MQ_RQ_QUEUE_OK)
+               return 0;
+       else {
+               __blk_mq_requeue_request(rq);
+               if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+                       rq->errors = -EIO;
+                       blk_mq_end_request(rq, rq->errors);
+                       return 0;
+               }
+               return -1;
+       }
+ }
  /*
   * Multiple hardware queue variant. This will not use per-process plugs,
   * but will attempt to bypass the hctx queueing if we can go straight to
@@@ -1248,6 -1263,9 +1276,9 @@@ static void blk_mq_make_request(struct 
        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
        struct blk_map_ctx data;
        struct request *rq;
+       unsigned int request_count = 0;
+       struct blk_plug *plug;
+       struct request *same_queue_rq = NULL;
  
        blk_queue_bounce(q, &bio);
  
                return;
        }
  
+       if (!is_flush_fua && !blk_queue_nomerges(q) &&
+           blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
+               return;
        rq = blk_mq_map_request(q, bio, &data);
        if (unlikely(!rq))
                return;
                goto run_queue;
        }
  
+       plug = current->plug;
        /*
         * If the driver supports defer issued based on 'last', then
         * queue it up like normal since we can potentially save some
         * CPU this way.
         */
-       if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
-               struct blk_mq_queue_data bd = {
-                       .rq = rq,
-                       .list = NULL,
-                       .last = 1
-               };
-               int ret;
+       if (((plug && !blk_queue_nomerges(q)) || is_sync) &&
+           !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
+               struct request *old_rq = NULL;
  
                blk_mq_bio_to_request(rq, bio);
  
                /*
-                * For OK queue, we are done. For error, kill it. Any other
-                * error (busy), just add it to our list as we previously
-                * would have done
+                * we do limited pluging. If bio can be merged, do merge.
+                * Otherwise the existing request in the plug list will be
+                * issued. So the plug list will have one request at most
                 */
-               ret = q->mq_ops->queue_rq(data.hctx, &bd);
-               if (ret == BLK_MQ_RQ_QUEUE_OK)
-                       goto done;
-               else {
-                       __blk_mq_requeue_request(rq);
-                       if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
-                               rq->errors = -EIO;
-                               blk_mq_end_request(rq, rq->errors);
-                               goto done;
+               if (plug) {
+                       /*
+                        * The plug list might get flushed before this. If that
+                        * happens, same_queue_rq is invalid and plug list is empty
+                        **/
+                       if (same_queue_rq && !list_empty(&plug->mq_list)) {
+                               old_rq = same_queue_rq;
+                               list_del_init(&old_rq->queuelist);
                        }
-               }
+                       list_add_tail(&rq->queuelist, &plug->mq_list);
+               } else /* is_sync */
+                       old_rq = rq;
+               blk_mq_put_ctx(data.ctx);
+               if (!old_rq)
+                       return;
+               if (!blk_mq_direct_issue_request(old_rq))
+                       return;
+               blk_mq_insert_request(old_rq, false, true, true);
+               return;
        }
  
        if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
  run_queue:
                blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua);
        }
- done:
        blk_mq_put_ctx(data.ctx);
  }
  
@@@ -1322,16 -1347,11 +1360,11 @@@ static void blk_sq_make_request(struct 
  {
        const int is_sync = rw_is_sync(bio->bi_rw);
        const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
-       unsigned int use_plug, request_count = 0;
+       struct blk_plug *plug;
+       unsigned int request_count = 0;
        struct blk_map_ctx data;
        struct request *rq;
  
-       /*
-        * If we have multiple hardware queues, just go directly to
-        * one of those for sync IO.
-        */
-       use_plug = !is_flush_fua && !is_sync;
        blk_queue_bounce(q, &bio);
  
        if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
                return;
        }
  
-       if (use_plug && !blk_queue_nomerges(q) &&
-           blk_attempt_plug_merge(q, bio, &request_count))
+       if (!is_flush_fua && !blk_queue_nomerges(q) &&
+           blk_attempt_plug_merge(q, bio, &request_count, NULL))
                return;
  
        rq = blk_mq_map_request(q, bio, &data);
         * utilize that to temporarily store requests until the task is
         * either done or scheduled away.
         */
-       if (use_plug) {
-               struct blk_plug *plug = current->plug;
-               if (plug) {
-                       blk_mq_bio_to_request(rq, bio);
-                       if (list_empty(&plug->mq_list))
-                               trace_block_plug(q);
-                       else if (request_count >= BLK_MAX_REQUEST_COUNT) {
-                               blk_flush_plug_list(plug, false);
-                               trace_block_plug(q);
-                       }
-                       list_add_tail(&rq->queuelist, &plug->mq_list);
-                       blk_mq_put_ctx(data.ctx);
-                       return;
+       plug = current->plug;
+       if (plug) {
+               blk_mq_bio_to_request(rq, bio);
+               if (list_empty(&plug->mq_list))
+                       trace_block_plug(q);
+               else if (request_count >= BLK_MAX_REQUEST_COUNT) {
+                       blk_flush_plug_list(plug, false);
+                       trace_block_plug(q);
                }
+               list_add_tail(&rq->queuelist, &plug->mq_list);
+               blk_mq_put_ctx(data.ctx);
+               return;
        }
  
        if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@@ -1584,6 -1601,22 +1614,6 @@@ static int blk_mq_hctx_cpu_offline(stru
        return NOTIFY_OK;
  }
  
 -static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu)
 -{
 -      struct request_queue *q = hctx->queue;
 -      struct blk_mq_tag_set *set = q->tag_set;
 -
 -      if (set->tags[hctx->queue_num])
 -              return NOTIFY_OK;
 -
 -      set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num);
 -      if (!set->tags[hctx->queue_num])
 -              return NOTIFY_STOP;
 -
 -      hctx->tags = set->tags[hctx->queue_num];
 -      return NOTIFY_OK;
 -}
 -
  static int blk_mq_hctx_notify(void *data, unsigned long action,
                              unsigned int cpu)
  {
  
        if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
                return blk_mq_hctx_cpu_offline(hctx, cpu);
 -      else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
 -              return blk_mq_hctx_cpu_online(hctx, cpu);
 +
 +      /*
 +       * In case of CPU online, tags may be reallocated
 +       * in blk_mq_map_swqueue() after mapping is updated.
 +       */
  
        return NOTIFY_OK;
  }
@@@ -1775,7 -1805,6 +1805,7 @@@ static void blk_mq_map_swqueue(struct r
        unsigned int i;
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
 +      struct blk_mq_tag_set *set = q->tag_set;
  
        queue_for_each_hw_ctx(q, hctx, i) {
                cpumask_clear(hctx->cpumask);
                 * disable it and free the request entries.
                 */
                if (!hctx->nr_ctx) {
 -                      struct blk_mq_tag_set *set = q->tag_set;
 -
                        if (set->tags[i]) {
                                blk_mq_free_rq_map(set, set->tags[i], i);
                                set->tags[i] = NULL;
 -                              hctx->tags = NULL;
                        }
 +                      hctx->tags = NULL;
                        continue;
                }
  
 +              /* unmapped hw queue can be remapped after CPU topo changed */
 +              if (!set->tags[i])
 +                      set->tags[i] = blk_mq_init_rq_map(set, i);
 +              hctx->tags = set->tags[i];
 +              WARN_ON(!hctx->tags);
 +
                /*
                 * Set the map size to the number of mapped software queues.
                 * This is more accurate and more efficient than looping
@@@ -2052,7 -2077,7 +2082,7 @@@ void blk_mq_free_queue(struct request_q
  /* Basically redo blk_mq_init_queue with queue frozen */
  static void blk_mq_queue_reinit(struct request_queue *q)
  {
-       WARN_ON_ONCE(!q->mq_freeze_depth);
+       WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
  
        blk_mq_sysfs_unregister(q);
  
@@@ -2095,16 -2120,9 +2125,16 @@@ static int blk_mq_queue_reinit_notify(s
         */
        list_for_each_entry(q, &all_q_list, all_q_node)
                blk_mq_freeze_queue_start(q);
 -      list_for_each_entry(q, &all_q_list, all_q_node)
 +      list_for_each_entry(q, &all_q_list, all_q_node) {
                blk_mq_freeze_queue_wait(q);
  
 +              /*
 +               * timeout handler can't touch hw queue during the
 +               * reinitialization
 +               */
 +              del_timer_sync(&q->timeout);
 +      }
 +
        list_for_each_entry(q, &all_q_list, all_q_node)
                blk_mq_queue_reinit(q);
  
diff --combined block/bounce.c
index ed9dd80671204bdebc4005544097fb05b6c90c62,4bac72579c1f01056bf23a2b7a64941806d6e412..3ab0bce1c947ef9be81f09139aa73d9bd4b76ff5
@@@ -128,9 -128,6 +128,6 @@@ static void bounce_end_io(struct bio *b
        struct bio_vec *bvec, *org_vec;
        int i;
  
-       if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags))
-               set_bit(BIO_EOPNOTSUPP, &bio_orig->bi_flags);
        /*
         * free up bounce indirect pages used
         */
@@@ -221,8 -218,8 +218,8 @@@ bounce
                if (page_to_pfn(page) <= queue_bounce_pfn(q) && !force)
                        continue;
  
 -              inc_zone_page_state(to->bv_page, NR_BOUNCE);
                to->bv_page = mempool_alloc(pool, q->bounce_gfp);
 +              inc_zone_page_state(to->bv_page, NR_BOUNCE);
  
                if (rw == WRITE) {
                        char *vto, *vfrom;
diff --combined drivers/md/dm-table.c
index 16ba55ad708992f7e942b2f6ce2048d12be5c1b6,3662b2e49b8dd8603fae86ef8e53912c642b27e1..a5f94125ad01f6b3a3a43fd7ae78e9bad0747995
@@@ -820,12 -820,6 +820,12 @@@ void dm_consume_args(struct dm_arg_set 
  }
  EXPORT_SYMBOL(dm_consume_args);
  
 +static bool __table_type_request_based(unsigned table_type)
 +{
 +      return (table_type == DM_TYPE_REQUEST_BASED ||
 +              table_type == DM_TYPE_MQ_REQUEST_BASED);
 +}
 +
  static int dm_table_set_type(struct dm_table *t)
  {
        unsigned i;
                 * Determine the type from the live device.
                 * Default to bio-based if device is new.
                 */
 -              if (live_md_type == DM_TYPE_REQUEST_BASED ||
 -                  live_md_type == DM_TYPE_MQ_REQUEST_BASED)
 +              if (__table_type_request_based(live_md_type))
                        request_based = 1;
                else
                        bio_based = 1;
                        }
                t->type = DM_TYPE_MQ_REQUEST_BASED;
  
 -      } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) {
 +      } else if (list_empty(devices) && __table_type_request_based(live_md_type)) {
                /* inherit live MD type */
                t->type = live_md_type;
  
@@@ -930,7 -925,10 +930,7 @@@ struct target_type *dm_table_get_immuta
  
  bool dm_table_request_based(struct dm_table *t)
  {
 -      unsigned table_type = dm_table_get_type(t);
 -
 -      return (table_type == DM_TYPE_REQUEST_BASED ||
 -              table_type == DM_TYPE_MQ_REQUEST_BASED);
 +      return __table_type_request_based(dm_table_get_type(t));
  }
  
  bool dm_table_mq_request_based(struct dm_table *t)
@@@ -942,21 -940,28 +942,28 @@@ static int dm_table_alloc_md_mempools(s
  {
        unsigned type = dm_table_get_type(t);
        unsigned per_bio_data_size = 0;
-       struct dm_target *tgt;
        unsigned i;
  
-       if (unlikely(type == DM_TYPE_NONE)) {
+       switch (type) {
+       case DM_TYPE_BIO_BASED:
+               for (i = 0; i < t->num_targets; i++) {
+                       struct dm_target *tgt = t->targets + i;
+                       per_bio_data_size = max(per_bio_data_size,
+                                               tgt->per_bio_data_size);
+               }
+               t->mempools = dm_alloc_bio_mempools(t->integrity_supported,
+                                                   per_bio_data_size);
+               break;
+       case DM_TYPE_REQUEST_BASED:
+       case DM_TYPE_MQ_REQUEST_BASED:
+               t->mempools = dm_alloc_rq_mempools(md, type);
+               break;
+       default:
                DMWARN("no table type is set, can't allocate mempools");
                return -EINVAL;
        }
  
-       if (type == DM_TYPE_BIO_BASED)
-               for (i = 0; i < t->num_targets; i++) {
-                       tgt = t->targets + i;
-                       per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
-               }
-       t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
        if (!t->mempools)
                return -ENOMEM;
  
diff --combined drivers/md/dm.c
index 2caf492890d64b27a0a88f24f4f04d1778448d9a,38837f8ea3279c722cc0a929085e845357825149..4d6f089a0e9e2eca5b8fa58017a29e1da598c2a0
@@@ -990,57 -990,6 +990,6 @@@ static void clone_endio(struct bio *bio
        dec_pending(io, error);
  }
  
- /*
-  * Partial completion handling for request-based dm
-  */
- static void end_clone_bio(struct bio *clone, int error)
- {
-       struct dm_rq_clone_bio_info *info =
-               container_of(clone, struct dm_rq_clone_bio_info, clone);
-       struct dm_rq_target_io *tio = info->tio;
-       struct bio *bio = info->orig;
-       unsigned int nr_bytes = info->orig->bi_iter.bi_size;
-       bio_put(clone);
-       if (tio->error)
-               /*
-                * An error has already been detected on the request.
-                * Once error occurred, just let clone->end_io() handle
-                * the remainder.
-                */
-               return;
-       else if (error) {
-               /*
-                * Don't notice the error to the upper layer yet.
-                * The error handling decision is made by the target driver,
-                * when the request is completed.
-                */
-               tio->error = error;
-               return;
-       }
-       /*
-        * I/O for the bio successfully completed.
-        * Notice the data completion to the upper layer.
-        */
-       /*
-        * bios are processed from the head of the list.
-        * So the completing bio should always be rq->bio.
-        * If it's not, something wrong is happening.
-        */
-       if (tio->orig->bio != bio)
-               DMERR("bio completion is going in the middle of the request");
-       /*
-        * Update the original request.
-        * Do not use blk_end_request() here, because it may complete
-        * the original request before the clone, and break the ordering.
-        */
-       blk_update_request(tio->orig, 0, nr_bytes);
- }
  static struct dm_rq_target_io *tio_from_request(struct request *rq)
  {
        return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
@@@ -1082,13 -1031,13 +1031,11 @@@ static void rq_completed(struct mapped_
        dm_put(md);
  }
  
 -static void free_rq_clone(struct request *clone, bool must_be_mapped)
 +static void free_rq_clone(struct request *clone)
  {
        struct dm_rq_target_io *tio = clone->end_io_data;
        struct mapped_device *md = tio->md;
  
-       blk_rq_unprep_clone(clone);
 -      WARN_ON_ONCE(must_be_mapped && !clone->q);
--
        if (md->type == DM_TYPE_MQ_REQUEST_BASED)
                /* stacked on blk-mq queue(s) */
                tio->ti->type->release_clone_rq(clone);
@@@ -1130,7 -1079,7 +1077,7 @@@ static void dm_end_request(struct reque
                        rq->sense_len = clone->sense_len;
        }
  
 -      free_rq_clone(clone, true);
 +      free_rq_clone(clone);
        if (!rq->q->mq_ops)
                blk_end_request_all(rq, error);
        else
@@@ -1149,7 -1098,7 +1096,7 @@@ static void dm_unprep_request(struct re
        }
  
        if (clone)
 -              free_rq_clone(clone, false);
 +              free_rq_clone(clone);
  }
  
  /*
@@@ -1162,7 -1111,6 +1109,7 @@@ static void old_requeue_request(struct 
  
        spin_lock_irqsave(q->queue_lock, flags);
        blk_requeue_request(q, rq);
 +      blk_run_queue_async(q);
        spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
@@@ -1723,7 -1671,8 +1670,7 @@@ static int dm_merge_bvec(struct request
        struct mapped_device *md = q->queuedata;
        struct dm_table *map = dm_get_live_table_fast(md);
        struct dm_target *ti;
 -      sector_t max_sectors;
 -      int max_size = 0;
 +      sector_t max_sectors, max_size = 0;
  
        if (unlikely(!map))
                goto out;
        max_sectors = min(max_io_len(bvm->bi_sector, ti),
                          (sector_t) queue_max_sectors(q));
        max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
 -      if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */
 -              max_size = 0;
 +
 +      /*
 +       * FIXME: this stop-gap fix _must_ be cleaned up (by passing a sector_t
 +       * to the targets' merge function since it holds sectors not bytes).
 +       * Just doing this as an interim fix for stable@ because the more
 +       * comprehensive cleanup of switching to sector_t will impact every
 +       * DM target that implements a ->merge hook.
 +       */
 +      if (max_size > INT_MAX)
 +              max_size = INT_MAX;
  
        /*
         * merge_bvec_fn() returns number of bytes
         * max is precomputed maximal io size
         */
        if (max_size && ti->type->merge)
 -              max_size = ti->type->merge(ti, bvm, biovec, max_size);
 +              max_size = ti->type->merge(ti, bvm, biovec, (int) max_size);
        /*
         * If the target doesn't support merge method and some of the devices
         * provided their merge_bvec method (we know this by looking for the
@@@ -1827,39 -1768,13 +1774,13 @@@ static void dm_dispatch_clone_request(s
                dm_complete_request(rq, r);
  }
  
- static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
-                                void *data)
+ static void setup_clone(struct request *clone, struct request *rq,
+                       struct dm_rq_target_io *tio)
  {
-       struct dm_rq_target_io *tio = data;
-       struct dm_rq_clone_bio_info *info =
-               container_of(bio, struct dm_rq_clone_bio_info, clone);
-       info->orig = bio_orig;
-       info->tio = tio;
-       bio->bi_end_io = end_clone_bio;
-       return 0;
- }
- static int setup_clone(struct request *clone, struct request *rq,
-                      struct dm_rq_target_io *tio, gfp_t gfp_mask)
- {
-       int r;
-       r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
-                             dm_rq_bio_constructor, tio);
-       if (r)
-               return r;
-       clone->cmd = rq->cmd;
-       clone->cmd_len = rq->cmd_len;
-       clone->sense = rq->sense;
+       blk_rq_prep_clone(clone, rq);
        clone->end_io = end_clone_request;
        clone->end_io_data = tio;
        tio->clone = clone;
-       return 0;
  }
  
  static struct request *clone_rq(struct request *rq, struct mapped_device *md,
                clone = tio->clone;
  
        blk_rq_init(NULL, clone);
-       if (setup_clone(clone, rq, tio, gfp_mask)) {
-               /* -ENOMEM */
-               if (alloc_clone)
-                       free_clone_request(md, clone);
-               return NULL;
-       }
+       setup_clone(clone, rq, tio);
  
        return clone;
  }
@@@ -1977,13 -1887,9 +1893,9 @@@ static int map_request(struct dm_rq_tar
                        dm_kill_unmapped_request(rq, r);
                        return r;
                }
 -              if (IS_ERR(clone))
 -                      return DM_MAPIO_REQUEUE;
 +              if (r != DM_MAPIO_REMAPPED)
 +                      return r;
-               if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
-                       /* -ENOMEM */
-                       ti->type->release_clone_rq(clone);
-                       return DM_MAPIO_REQUEUE;
-               }
+               setup_clone(clone, rq, tio);
        }
  
        switch (r) {
@@@ -2437,8 -2343,6 +2349,6 @@@ static void __bind_mempools(struct mapp
                goto out;
        }
  
-       BUG_ON(!p || md->io_pool || md->rq_pool || md->bs);
        md->io_pool = p->io_pool;
        p->io_pool = NULL;
        md->rq_pool = p->rq_pool;
@@@ -2759,15 -2663,13 +2669,15 @@@ static int dm_mq_queue_rq(struct blk_mq
        if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
                /* clone request is allocated at the end of the pdu */
                tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
 -              if (!clone_rq(rq, md, tio, GFP_ATOMIC))
 -                      return BLK_MQ_RQ_QUEUE_BUSY;
 +              (void) clone_rq(rq, md, tio, GFP_ATOMIC);
                queue_kthread_work(&md->kworker, &tio->work);
        } else {
                /* Direct call is fine since .queue_rq allows allocations */
 -              if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
 -                      dm_requeue_unmapped_original_request(md, rq);
 +              if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
 +                      /* Undo dm_start_request() before requeuing */
 +                      rq_completed(md, rq_data_dir(rq), false);
 +                      return BLK_MQ_RQ_QUEUE_BUSY;
 +              }
        }
  
        return BLK_MQ_RQ_QUEUE_OK;
@@@ -3544,48 -3446,23 +3454,23 @@@ int dm_noflush_suspending(struct dm_tar
  }
  EXPORT_SYMBOL_GPL(dm_noflush_suspending);
  
- struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
-                                           unsigned integrity, unsigned per_bio_data_size)
+ struct dm_md_mempools *dm_alloc_bio_mempools(unsigned integrity,
+                                            unsigned per_bio_data_size)
  {
-       struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
-       struct kmem_cache *cachep = NULL;
-       unsigned int pool_size = 0;
+       struct dm_md_mempools *pools;
+       unsigned int pool_size = dm_get_reserved_bio_based_ios();
        unsigned int front_pad;
  
+       pools = kzalloc(sizeof(*pools), GFP_KERNEL);
        if (!pools)
                return NULL;
  
-       type = filter_md_type(type, md);
+       front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) +
+               offsetof(struct dm_target_io, clone);
  
-       switch (type) {
-       case DM_TYPE_BIO_BASED:
-               cachep = _io_cache;
-               pool_size = dm_get_reserved_bio_based_ios();
-               front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
-               break;
-       case DM_TYPE_REQUEST_BASED:
-               cachep = _rq_tio_cache;
-               pool_size = dm_get_reserved_rq_based_ios();
-               pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
-               if (!pools->rq_pool)
-                       goto out;
-               /* fall through to setup remaining rq-based pools */
-       case DM_TYPE_MQ_REQUEST_BASED:
-               if (!pool_size)
-                       pool_size = dm_get_reserved_rq_based_ios();
-               front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
-               /* per_bio_data_size is not used. See __bind_mempools(). */
-               WARN_ON(per_bio_data_size != 0);
-               break;
-       default:
-               BUG();
-       }
-       if (cachep) {
-               pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
-               if (!pools->io_pool)
-                       goto out;
-       }
+       pools->io_pool = mempool_create_slab_pool(pool_size, _io_cache);
+       if (!pools->io_pool)
+               goto out;
  
        pools->bs = bioset_create_nobvec(pool_size, front_pad);
        if (!pools->bs)
                goto out;
  
        return pools;
  out:
        dm_free_md_mempools(pools);
+       return NULL;
+ }
+ struct dm_md_mempools *dm_alloc_rq_mempools(struct mapped_device *md,
+                                           unsigned type)
+ {
+       unsigned int pool_size = dm_get_reserved_rq_based_ios();
+       struct dm_md_mempools *pools;
+       pools = kzalloc(sizeof(*pools), GFP_KERNEL);
+       if (!pools)
+               return NULL;
+       if (filter_md_type(type, md) == DM_TYPE_REQUEST_BASED) {
+               pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
+               if (!pools->rq_pool)
+                       goto out;
+       }
  
+       pools->io_pool = mempool_create_slab_pool(pool_size, _rq_tio_cache);
+       if (!pools->io_pool)
+               goto out;
+       return pools;
+ out:
+       dm_free_md_mempools(pools);
        return NULL;
  }
  
diff --combined fs/btrfs/extent_io.c
index c32d226bfeccbb28f25f2f417fa9e57b14411136,1e155299abc04e83675ff0d41a49034dd11ea2ae..c374e1e71e5f3e1b80713f39fc4338fb659a8c90
@@@ -2767,8 -2767,6 +2767,6 @@@ static int __must_check submit_one_bio(
        else
                btrfsic_submit_bio(rw, bio);
  
-       if (bio_flagged(bio, BIO_EOPNOTSUPP))
-               ret = -EOPNOTSUPP;
        bio_put(bio);
        return ret;
  }
@@@ -4772,25 -4770,6 +4770,25 @@@ struct extent_buffer *find_extent_buffe
                               start >> PAGE_CACHE_SHIFT);
        if (eb && atomic_inc_not_zero(&eb->refs)) {
                rcu_read_unlock();
 +              /*
 +               * Lock our eb's refs_lock to avoid races with
 +               * free_extent_buffer. When we get our eb it might be flagged
 +               * with EXTENT_BUFFER_STALE and another task running
 +               * free_extent_buffer might have seen that flag set,
 +               * eb->refs == 2, that the buffer isn't under IO (dirty and
 +               * writeback flags not set) and it's still in the tree (flag
 +               * EXTENT_BUFFER_TREE_REF set), therefore being in the process
 +               * of decrementing the extent buffer's reference count twice.
 +               * So here we could race and increment the eb's reference count,
 +               * clear its stale flag, mark it as dirty and drop our reference
 +               * before the other task finishes executing free_extent_buffer,
 +               * which would later result in an attempt to free an extent
 +               * buffer that is dirty.
 +               */
 +              if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) {
 +                      spin_lock(&eb->refs_lock);
 +                      spin_unlock(&eb->refs_lock);
 +              }
                mark_extent_buffer_accessed(eb, NULL);
                return eb;
        }
diff --combined fs/btrfs/volumes.c
index 174f5e1e00abfa533b1cb7483e44aae0f550e63a,dac77d42a9ab431ee6bbf8aa4a5bb910ef0ecc9d..53af23f2c087ad015e720af094fbfc53e1671317
@@@ -345,7 -345,7 +345,7 @@@ loop_lock
                    waitqueue_active(&fs_info->async_submit_wait))
                        wake_up(&fs_info->async_submit_wait);
  
-               BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+               BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
  
                /*
                 * if we're doing the sync list, record that our
@@@ -4625,7 -4625,6 +4625,7 @@@ int btrfs_alloc_chunk(struct btrfs_tran
  {
        u64 chunk_offset;
  
 +      ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex));
        chunk_offset = find_next_chunk(extent_root->fs_info);
        return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type);
  }
@@@ -5586,10 -5585,10 +5586,10 @@@ int btrfs_rmap_block(struct btrfs_mappi
  
  static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err)
  {
-       if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED))
-               bio_endio_nodec(bio, err);
-       else
-               bio_endio(bio, err);
+       bio->bi_private = bbio->private;
+       bio->bi_end_io = bbio->end_io;
+       bio_endio(bio, err);
        btrfs_put_bbio(bbio);
  }
  
@@@ -5633,8 -5632,6 +5633,6 @@@ static void btrfs_end_bio(struct bio *b
                        bio = bbio->orig_bio;
                }
  
-               bio->bi_private = bbio->private;
-               bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                /* only send an error to the higher layers if it is
                 * beyond the tolerance of the btrfs bio
@@@ -5816,8 -5813,6 +5814,6 @@@ static void bbio_error(struct btrfs_bi
                /* Shoud be the original bio. */
                WARN_ON(bio != bbio->orig_bio);
  
-               bio->bi_private = bbio->private;
-               bio->bi_end_io = bbio->end_io;
                btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
                bio->bi_iter.bi_sector = logical >> 9;
  
@@@ -5898,10 -5893,8 +5894,8 @@@ int btrfs_map_bio(struct btrfs_root *ro
                if (dev_nr < total_devs - 1) {
                        bio = btrfs_bio_clone(first_bio, GFP_NOFS);
                        BUG_ON(!bio); /* -ENOMEM */
-               } else {
+               } else
                        bio = first_bio;
-                       bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED;
-               }
  
                submit_stripe_bio(root, bbio, bio,
                                  bbio->stripes[dev_nr].physical, dev_nr, rw,
index b7299febc4b4adfee00cb8b05d6fbf6558f01547,45a6be89957c9f5cf1a9e15192aa6d88506e90cd..6ab9d12d1f17762190870ab153129bac56b82176
@@@ -65,7 -65,7 +65,7 @@@ struct bio 
        unsigned int            bi_seg_front_size;
        unsigned int            bi_seg_back_size;
  
-       atomic_t                bi_remaining;
+       atomic_t                __bi_remaining;
  
        bio_end_io_t            *bi_end_io;
  
@@@ -92,7 -92,7 +92,7 @@@
  
        unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
  
-       atomic_t                bi_cnt;         /* pin count */
+       atomic_t                __bi_cnt;       /* pin count */
  
        struct bio_vec          *bi_io_vec;     /* the actual vec list */
  
   * bio flags
   */
  #define BIO_UPTODATE  0       /* ok after I/O completion */
- #define BIO_RW_BLOCK  1       /* RW_AHEAD set, and read/write would block */
- #define BIO_EOF               2       /* out-out-bounds error */
- #define BIO_SEG_VALID 3       /* bi_phys_segments valid */
- #define BIO_CLONED    4       /* doesn't own data */
- #define BIO_BOUNCED   5       /* bio is a bounce bio */
- #define BIO_USER_MAPPED 6     /* contains user pages */
- #define BIO_EOPNOTSUPP        7       /* not supported */
- #define BIO_NULL_MAPPED 8     /* contains invalid user pages */
- #define BIO_QUIET     9       /* Make BIO Quiet */
- #define BIO_SNAP_STABLE       10      /* bio data must be snapshotted during write */
+ #define BIO_SEG_VALID 1       /* bi_phys_segments valid */
+ #define BIO_CLONED    2       /* doesn't own data */
+ #define BIO_BOUNCED   3       /* bio is a bounce bio */
+ #define BIO_USER_MAPPED 4     /* contains user pages */
+ #define BIO_NULL_MAPPED 5     /* contains invalid user pages */
+ #define BIO_QUIET     6       /* Make BIO Quiet */
+ #define BIO_SNAP_STABLE       7       /* bio data must be snapshotted during write */
+ #define BIO_CHAIN     8       /* chained bio, ->bi_remaining in effect */
+ #define BIO_REFFED    9       /* bio has elevated ->bi_cnt */
  
  /*
   * Flags starting here get preserved by bio_reset() - this includes
@@@ -193,6 -192,7 +192,7 @@@ enum rq_flag_bits 
        __REQ_HASHED,           /* on IO scheduler merge hash */
        __REQ_MQ_INFLIGHT,      /* track inflight for MQ */
        __REQ_NO_TIMEOUT,       /* requests may never expire */
+       __REQ_CLONE,            /* cloned bios */
        __REQ_NR_BITS,          /* stops here */
  };
  
  
  /* This mask is used for both bio and request merge checking */
  #define REQ_NOMERGE_FLAGS \
 -      (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
 +      (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ)
  
  #define REQ_RAHEAD            (1ULL << __REQ_RAHEAD)
  #define REQ_THROTTLED         (1ULL << __REQ_THROTTLED)
  #define REQ_HASHED            (1ULL << __REQ_HASHED)
  #define REQ_MQ_INFLIGHT               (1ULL << __REQ_MQ_INFLIGHT)
  #define REQ_NO_TIMEOUT                (1ULL << __REQ_NO_TIMEOUT)
+ #define REQ_CLONE             (1ULL << __REQ_CLONE)
  
  #endif /* __LINUX_BLK_TYPES_H */
diff --combined include/linux/blkdev.h
index 5d93a6645e88676a7d90a1ac55b5d5d6792da667,9ded80da2c1658b868849940de88e0d34e217e5d..776d2ee43ba649749e470b4222169e6095bc6fc2
@@@ -30,7 -30,6 +30,6 @@@ struct scsi_ioctl_command
  
  struct request_queue;
  struct elevator_queue;
- struct request_pm_state;
  struct blk_trace;
  struct request;
  struct sg_io_hdr;
@@@ -75,18 -74,7 +74,7 @@@ struct request_list 
  enum rq_cmd_type_bits {
        REQ_TYPE_FS             = 1,    /* fs request */
        REQ_TYPE_BLOCK_PC,              /* scsi command */
-       REQ_TYPE_SENSE,                 /* sense request */
-       REQ_TYPE_PM_SUSPEND,            /* suspend request */
-       REQ_TYPE_PM_RESUME,             /* resume request */
-       REQ_TYPE_PM_SHUTDOWN,           /* shutdown request */
-       REQ_TYPE_SPECIAL,               /* driver defined type */
-       /*
-        * for ATA/ATAPI devices. this really doesn't belong here, ide should
-        * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver
-        * private REQ_LB opcodes to differentiate what type of request this is
-        */
-       REQ_TYPE_ATA_TASKFILE,
-       REQ_TYPE_ATA_PC,
+       REQ_TYPE_DRV_PRIV,              /* driver defined types from here */
  };
  
  #define BLK_MAX_CDB   16
@@@ -108,7 -96,7 +96,7 @@@ struct request 
        struct blk_mq_ctx *mq_ctx;
  
        u64 cmd_flags;
-       enum rq_cmd_type_bits cmd_type;
+       unsigned cmd_type;
        unsigned long atomic_flags;
  
        int cpu;
@@@ -216,19 -204,6 +204,6 @@@ static inline unsigned short req_get_io
        return req->ioprio;
  }
  
- /*
-  * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME
-  * requests. Some step values could eventually be made generic.
-  */
- struct request_pm_state
- {
-       /* PM state machine step value, currently driver specific */
-       int     pm_step;
-       /* requested PM state value (S1, S2, S3, S4, ...) */
-       u32     pm_state;
-       void*   data;           /* for driver use */
- };
  #include <linux/elevator.h>
  
  struct blk_queue_ctx;
@@@ -469,7 -444,7 +444,7 @@@ struct request_queue 
        struct mutex            sysfs_lock;
  
        int                     bypass_depth;
-       int                     mq_freeze_depth;
+       atomic_t                mq_freeze_depth;
  
  #if defined(CONFIG_BLK_DEV_BSG)
        bsg_job_fn              *bsg_job_fn;
@@@ -610,10 -585,6 +585,6 @@@ static inline void queue_flag_clear(uns
        (((rq)->cmd_flags & REQ_STARTED) && \
         ((rq)->cmd_type == REQ_TYPE_FS))
  
- #define blk_pm_request(rq)    \
-       ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
-        (rq)->cmd_type == REQ_TYPE_PM_RESUME)
  #define blk_rq_cpu_valid(rq)  ((rq)->cpu != -1)
  #define blk_bidi_rq(rq)               ((rq)->next_rq != NULL)
  /* rq->queuelist of dequeued request must be list_empty() */
@@@ -804,11 -775,7 +775,7 @@@ extern void blk_add_request_payload(str
                unsigned int len);
  extern int blk_rq_check_limits(struct request_queue *q, struct request *rq);
  extern int blk_lld_busy(struct request_queue *q);
- extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
-                            struct bio_set *bs, gfp_t gfp_mask,
-                            int (*bio_ctr)(struct bio *, struct bio *, void *),
-                            void *data);
- extern void blk_rq_unprep_clone(struct request *rq);
+ extern void blk_rq_prep_clone(struct request *rq, struct request *rq_src);
  extern int blk_insert_cloned_request(struct request_queue *q,
                                     struct request *rq);
  extern void blk_delay_queue(struct request_queue *, unsigned long);
@@@ -821,6 -788,8 +788,6 @@@ extern int scsi_cmd_ioctl(struct reques
  extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
                         struct scsi_ioctl_command __user *);
  
 -extern void blk_queue_bio(struct request_queue *q, struct bio *bio);
 -
  /*
   * A queue has just exitted congestion.  Note this in the global counter of
   * congested queues, and wake up anyone who was waiting for requests to be
@@@ -845,6 -814,7 +812,7 @@@ extern void blk_stop_queue(struct reque
  extern void blk_sync_queue(struct request_queue *q);
  extern void __blk_stop_queue(struct request_queue *q);
  extern void __blk_run_queue(struct request_queue *q);
+ extern void __blk_run_queue_uncond(struct request_queue *q);
  extern void blk_run_queue(struct request_queue *);
  extern void blk_run_queue_async(struct request_queue *q);
  extern int blk_rq_map_user(struct request_queue *, struct request *,