Merge branch 'for-4.8/core' of git://git.kernel.dk/linux-block
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2016 22:03:07 +0000 (15:03 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 26 Jul 2016 22:03:07 +0000 (15:03 -0700)
Pull core block updates from Jens Axboe:

   - the big change is the cleanup from Mike Christie, cleaning up our
     uses of command types and modified flags.  This is what will throw
     some merge conflicts

   - regression fix for the above for btrfs, from Vincent

   - following up to the above, better packing of struct request from
     Christoph

   - a 2038 fix for blktrace from Arnd

   - a few trivial/spelling fixes from Bart Van Assche

   - a front merge check fix from Damien, which could cause issues on
     SMR drives

   - Atari partition fix from Gabriel

   - convert cfq to highres timers, since jiffies isn't granular enough
     for some devices these days.  From Jan and Jeff

   - CFQ priority boost fix idle classes, from me

   - cleanup series from Ming, improving our bio/bvec iteration

   - a direct issue fix for blk-mq from Omar

   - fix for plug merging not involving the IO scheduler, like we do for
     other types of merges.  From Tahsin

   - expose DAX type internally and through sysfs.  From Toshi and Yigal

* 'for-4.8/core' of git://git.kernel.dk/linux-block: (76 commits)
  block: Fix front merge check
  block: do not merge requests without consulting with io scheduler
  block: Fix spelling in a source code comment
  block: expose QUEUE_FLAG_DAX in sysfs
  block: add QUEUE_FLAG_DAX for devices to advertise their DAX support
  Btrfs: fix comparison in __btrfs_map_block()
  block: atari: Return early for unsupported sector size
  Doc: block: Fix a typo in queue-sysfs.txt
  cfq-iosched: Charge at least 1 jiffie instead of 1 ns
  cfq-iosched: Fix regression in bonnie++ rewrite performance
  cfq-iosched: Convert slice_resid from u64 to s64
  block: Convert fifo_time from ulong to u64
  blktrace: avoid using timespec
  block/blk-cgroup.c: Declare local symbols static
  block/bio-integrity.c: Add #include "blk.h"
  block/partition-generic.c: Remove a set-but-not-used variable
  block: bio: kill BIO_MAX_SIZE
  cfq-iosched: temporarily boost queue priority for idle classes
  block: drbd: avoid to use BIO_MAX_SIZE
  block: bio: remove BIO_MAX_SECTORS
  ...

199 files changed:
Documentation/block/queue-sysfs.txt
Documentation/block/writeback_cache_control.txt
Documentation/device-mapper/log-writes.txt
arch/um/drivers/ubd_kern.c
block/bio-integrity.c
block/bio.c
block/blk-cgroup.c
block/blk-core.c
block/blk-exec.c
block/blk-flush.c
block/blk-lib.c
block/blk-map.c
block/blk-merge.c
block/blk-mq.c
block/blk-sysfs.c
block/cfq-iosched.c
block/deadline-iosched.c
block/elevator.c
block/partition-generic.c
block/partitions/atari.c
drivers/ata/libata-scsi.c
drivers/block/brd.c
drivers/block/drbd/drbd_actlog.c
drivers/block/drbd/drbd_bitmap.c
drivers/block/drbd/drbd_int.h
drivers/block/drbd/drbd_main.c
drivers/block/drbd/drbd_protocol.h
drivers/block/drbd/drbd_receiver.c
drivers/block/drbd/drbd_req.c
drivers/block/drbd/drbd_worker.c
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/mtip32xx/mtip32xx.c
drivers/block/nbd.c
drivers/block/osdblk.c
drivers/block/pktcdvd.c
drivers/block/ps3disk.c
drivers/block/rbd.c
drivers/block/rsxx/dma.c
drivers/block/skd_main.c
drivers/block/umem.c
drivers/block/virtio_blk.c
drivers/block/xen-blkback/blkback.c
drivers/block/xen-blkfront.c
drivers/block/zram/zram_drv.c
drivers/ide/ide-cd_ioctl.c
drivers/ide/ide-disk.c
drivers/ide/ide-floppy.c
drivers/lightnvm/rrpc.c
drivers/md/bcache/btree.c
drivers/md/bcache/debug.c
drivers/md/bcache/io.c
drivers/md/bcache/journal.c
drivers/md/bcache/movinggc.c
drivers/md/bcache/request.c
drivers/md/bcache/super.c
drivers/md/bcache/writeback.c
drivers/md/bitmap.c
drivers/md/dm-bufio.c
drivers/md/dm-cache-target.c
drivers/md/dm-crypt.c
drivers/md/dm-era-target.c
drivers/md/dm-flakey.c
drivers/md/dm-io.c
drivers/md/dm-kcopyd.c
drivers/md/dm-log-writes.c
drivers/md/dm-log.c
drivers/md/dm-raid.c
drivers/md/dm-raid1.c
drivers/md/dm-region-hash.c
drivers/md/dm-snap-persistent.c
drivers/md/dm-snap.c
drivers/md/dm-stats.c
drivers/md/dm-stripe.c
drivers/md/dm-thin.c
drivers/md/dm.c
drivers/md/linear.c
drivers/md/md.c
drivers/md/md.h
drivers/md/multipath.c
drivers/md/raid0.c
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5-cache.c
drivers/md/raid5.c
drivers/mmc/card/block.c
drivers/mmc/card/queue.c
drivers/mmc/card/queue.h
drivers/mtd/mtd_blkdevs.c
drivers/nvdimm/pmem.c
drivers/nvme/host/core.c
drivers/nvme/host/nvme.h
drivers/s390/block/dcssblk.c
drivers/scsi/osd/osd_initiator.c
drivers/scsi/sd.c
drivers/target/target_core_iblock.c
drivers/target/target_core_pscsi.c
fs/block_dev.c
fs/btrfs/check-integrity.c
fs/btrfs/check-integrity.h
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/inode.c
fs/btrfs/raid56.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/buffer.c
fs/crypto/crypto.c
fs/direct-io.c
fs/exofs/ore.c
fs/ext4/balloc.c
fs/ext4/crypto.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/mmp.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/readpage.c
fs/ext4/super.c
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/gc.c
fs/f2fs/inline.c
fs/f2fs/node.c
fs/f2fs/segment.c
fs/f2fs/trace.c
fs/fat/misc.c
fs/gfs2/bmap.c
fs/gfs2/dir.c
fs/gfs2/log.c
fs/gfs2/lops.c
fs/gfs2/lops.h
fs/gfs2/meta_io.c
fs/gfs2/ops_fstype.c
fs/gfs2/quota.c
fs/hfsplus/hfsplus_fs.h
fs/hfsplus/part_tbl.c
fs/hfsplus/super.c
fs/hfsplus/wrapper.c
fs/isofs/compress.c
fs/jbd2/commit.c
fs/jbd2/journal.c
fs/jbd2/recovery.c
fs/jfs/jfs_logmgr.c
fs/jfs/jfs_metapage.c
fs/logfs/dev_bdev.c
fs/mpage.c
fs/nfs/blocklayout/blocklayout.c
fs/nilfs2/btnode.c
fs/nilfs2/btnode.h
fs/nilfs2/btree.c
fs/nilfs2/gcinode.c
fs/nilfs2/mdt.c
fs/nilfs2/segbuf.c
fs/ntfs/aops.c
fs/ntfs/compress.c
fs/ntfs/file.c
fs/ntfs/logfile.c
fs/ntfs/mft.c
fs/ocfs2/aops.c
fs/ocfs2/buffer_head_io.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/super.c
fs/reiserfs/inode.c
fs/reiserfs/journal.c
fs/reiserfs/stree.c
fs/reiserfs/super.c
fs/squashfs/block.c
fs/udf/dir.c
fs/udf/directory.c
fs/udf/inode.c
fs/ufs/balloc.c
fs/ufs/util.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_buf.c
include/linux/bio.h
include/linux/blk-cgroup.h
include/linux/blk_types.h
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/buffer_head.h
include/linux/bvec.h [new file with mode: 0644]
include/linux/dm-io.h
include/linux/elevator.h
include/linux/fs.h
include/trace/events/bcache.h
include/trace/events/block.h
include/trace/events/f2fs.h
kernel/power/swap.c
kernel/trace/blktrace.c
lib/iov_iter.c
mm/page_io.c

index dce25d8..d515d58 100644 (file)
@@ -53,7 +53,7 @@ disk.
 
 logical_block_size (RO)
 -----------------------
-This is the logcal block size of the device, in bytes.
+This is the logical block size of the device, in bytes.
 
 max_hw_sectors_kb (RO)
 ----------------------
index 59e0516..8a6bdad 100644 (file)
@@ -20,11 +20,11 @@ a forced cache flush, and the Force Unit Access (FUA) flag for requests.
 Explicit cache flushes
 ----------------------
 
-The REQ_FLUSH flag can be OR ed into the r/w flags of a bio submitted from
+The REQ_PREFLUSH flag can be OR ed into the r/w flags of a bio submitted from
 the filesystem and will make sure the volatile cache of the storage device
 has been flushed before the actual I/O operation is started.  This explicitly
 guarantees that previously completed write requests are on non-volatile
-storage before the flagged bio starts. In addition the REQ_FLUSH flag can be
+storage before the flagged bio starts. In addition the REQ_PREFLUSH flag can be
 set on an otherwise empty bio structure, which causes only an explicit cache
 flush without any dependent I/O.  It is recommend to use
 the blkdev_issue_flush() helper for a pure cache flush.
@@ -41,21 +41,21 @@ signaled after the data has been committed to non-volatile storage.
 Implementation details for filesystems
 --------------------------------------
 
-Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to
+Filesystems can simply set the REQ_PREFLUSH and REQ_FUA bits and do not have to
 worry if the underlying devices need any explicit cache flushing and how
-the Forced Unit Access is implemented.  The REQ_FLUSH and REQ_FUA flags
+the Forced Unit Access is implemented.  The REQ_PREFLUSH and REQ_FUA flags
 may both be set on a single bio.
 
 
 Implementation details for make_request_fn based block drivers
 --------------------------------------------------------------
 
-These drivers will always see the REQ_FLUSH and REQ_FUA bits as they sit
+These drivers will always see the REQ_PREFLUSH and REQ_FUA bits as they sit
 directly below the submit_bio interface.  For remapping drivers the REQ_FUA
 bits need to be propagated to underlying devices, and a global flush needs
-to be implemented for bios with the REQ_FLUSH bit set.  For real device
-drivers that do not have a volatile cache the REQ_FLUSH and REQ_FUA bits
-on non-empty bios can simply be ignored, and REQ_FLUSH requests without
+to be implemented for bios with the REQ_PREFLUSH bit set.  For real device
+drivers that do not have a volatile cache the REQ_PREFLUSH and REQ_FUA bits
+on non-empty bios can simply be ignored, and REQ_PREFLUSH requests without
 data can be completed successfully without doing any work.  Drivers for
 devices with volatile caches need to implement the support for these
 flags themselves without any help from the block layer.
@@ -65,17 +65,17 @@ Implementation details for request_fn based block drivers
 --------------------------------------------------------------
 
 For devices that do not support volatile write caches there is no driver
-support required, the block layer completes empty REQ_FLUSH requests before
-entering the driver and strips off the REQ_FLUSH and REQ_FUA bits from
+support required, the block layer completes empty REQ_PREFLUSH requests before
+entering the driver and strips off the REQ_PREFLUSH and REQ_FUA bits from
 requests that have a payload.  For devices with volatile write caches the
 driver needs to tell the block layer that it supports flushing caches by
 doing:
 
        blk_queue_write_cache(sdkp->disk->queue, true, false);
 
-and handle empty REQ_FLUSH requests in its prep_fn/request_fn.  Note that
-REQ_FLUSH requests with a payload are automatically turned into a sequence
-of an empty REQ_FLUSH request followed by the actual write by the block
+and handle empty REQ_OP_FLUSH requests in its prep_fn/request_fn.  Note that
+REQ_PREFLUSH requests with a payload are automatically turned into a sequence
+of an empty REQ_OP_FLUSH request followed by the actual write by the block
 layer.  For devices that also support the FUA bit the block layer needs
 to be told to pass through the REQ_FUA bit using:
 
@@ -83,4 +83,4 @@ to be told to pass through the REQ_FUA bit using:
 
 and the driver must handle write requests that have the REQ_FUA bit set
 in prep_fn/request_fn.  If the FUA bit is not natively supported the block
-layer turns it into an empty REQ_FLUSH request after the actual write.
+layer turns it into an empty REQ_OP_FLUSH request after the actual write.
index c10f30c..f4ebcba 100644 (file)
@@ -14,14 +14,14 @@ Log Ordering
 
 We log things in order of completion once we are sure the write is no longer in
 cache.  This means that normal WRITE requests are not actually logged until the
-next REQ_FLUSH request.  This is to make it easier for userspace to replay the
-log in a way that correlates to what is on disk and not what is in cache, to
-make it easier to detect improper waiting/flushing.
+next REQ_PREFLUSH request.  This is to make it easier for userspace to replay
+the log in a way that correlates to what is on disk and not what is in cache,
+to make it easier to detect improper waiting/flushing.
 
 This works by attaching all WRITE requests to a list once the write completes.
-Once we see a REQ_FLUSH request we splice this list onto the request and once
+Once we see a REQ_PREFLUSH request we splice this list onto the request and once
 the FLUSH request completes we log all of the WRITEs and then the FLUSH.  Only
-completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
+completed WRITEs, at the time the REQ_PREFLUSH is issued, are added in order to
 simulate the worst case scenario with regard to power failures.  Consider the
 following example (W means write, C means complete):
 
index 17e96dc..ef6b4d9 100644 (file)
@@ -1286,7 +1286,7 @@ static void do_ubd_request(struct request_queue *q)
 
                req = dev->request;
 
-               if (req->cmd_flags & REQ_FLUSH) {
+               if (req_op(req) == REQ_OP_FLUSH) {
                        io_req = kmalloc(sizeof(struct io_thread_req),
                                         GFP_ATOMIC);
                        if (io_req == NULL) {
index 711e4d8..15d37b1 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/bio.h>
 #include <linux/workqueue.h>
 #include <linux/slab.h>
+#include "blk.h"
 
 #define BIP_INLINE_VECS        4
 
index 0e4aa42..848cd35 100644 (file)
@@ -656,16 +656,15 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
        bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
        if (!bio)
                return NULL;
-
        bio->bi_bdev            = bio_src->bi_bdev;
        bio->bi_rw              = bio_src->bi_rw;
        bio->bi_iter.bi_sector  = bio_src->bi_iter.bi_sector;
        bio->bi_iter.bi_size    = bio_src->bi_iter.bi_size;
 
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                goto integrity_clone;
 
-       if (bio->bi_rw & REQ_WRITE_SAME) {
+       if (bio_op(bio) == REQ_OP_WRITE_SAME) {
                bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
                goto integrity_clone;
        }
@@ -854,21 +853,20 @@ static void submit_bio_wait_endio(struct bio *bio)
 
 /**
  * submit_bio_wait - submit a bio, and wait until it completes
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * Simple wrapper around submit_bio(). Returns 0 on success, or the error from
  * bio_endio() on failure.
  */
-int submit_bio_wait(int rw, struct bio *bio)
+int submit_bio_wait(struct bio *bio)
 {
        struct submit_bio_ret ret;
 
-       rw |= REQ_SYNC;
        init_completion(&ret.event);
        bio->bi_private = &ret;
        bio->bi_end_io = submit_bio_wait_endio;
-       submit_bio(rw, bio);
+       bio->bi_rw |= REQ_SYNC;
+       submit_bio(bio);
        wait_for_completion_io(&ret.event);
 
        return ret.error;
@@ -1167,7 +1165,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
                goto out_bmd;
 
        if (iter->type & WRITE)
-               bio->bi_rw |= REQ_WRITE;
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
        ret = 0;
 
@@ -1337,7 +1335,7 @@ struct bio *bio_map_user_iov(struct request_queue *q,
         * set data direction, and check if mapped pages need bouncing
         */
        if (iter->type & WRITE)
-               bio->bi_rw |= REQ_WRITE;
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
        bio_set_flag(bio, BIO_USER_MAPPED);
 
@@ -1530,7 +1528,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
                bio->bi_private = data;
        } else {
                bio->bi_end_io = bio_copy_kern_endio;
-               bio->bi_rw |= REQ_WRITE;
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        }
 
        return bio;
@@ -1785,7 +1783,7 @@ struct bio *bio_split(struct bio *bio, int sectors,
         * Discards need a mutable bio_vec to accommodate the payload
         * required by the DSM TRIM and UNMAP commands.
         */
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                split = bio_clone_bioset(bio, gfp, bs);
        else
                split = bio_clone_fast(bio, gfp, bs);
index 66e6f1a..dd38e5c 100644 (file)
@@ -905,7 +905,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
        return 0;
 }
 
-struct cftype blkcg_files[] = {
+static struct cftype blkcg_files[] = {
        {
                .name = "stat",
                .flags = CFTYPE_NOT_ON_ROOT,
@@ -914,7 +914,7 @@ struct cftype blkcg_files[] = {
        { }     /* terminate */
 };
 
-struct cftype blkcg_legacy_files[] = {
+static struct cftype blkcg_legacy_files[] = {
        {
                .name = "reset_stats",
                .write_u64 = blkcg_reset_stats,
index 2475b1c..3cfd67d 100644 (file)
@@ -959,10 +959,10 @@ static void __freed_request(struct request_list *rl, int sync)
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(struct request_list *rl, unsigned int flags)
+static void freed_request(struct request_list *rl, int op, unsigned int flags)
 {
        struct request_queue *q = rl->q;
-       int sync = rw_is_sync(flags);
+       int sync = rw_is_sync(op, flags);
 
        q->nr_rqs[sync]--;
        rl->count[sync]--;
@@ -1029,7 +1029,7 @@ static bool blk_rq_should_init_elevator(struct bio *bio)
         * Flush requests do not use the elevator so skip initialization.
         * This allows a request to share the flush and elevator data.
         */
-       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
+       if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA))
                return false;
 
        return true;
@@ -1054,7 +1054,8 @@ static struct io_context *rq_ioc(struct bio *bio)
 /**
  * __get_request - get a free request
  * @rl: request list to allocate from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
@@ -1065,21 +1066,22 @@ static struct io_context *rq_ioc(struct bio *bio)
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *__get_request(struct request_list *rl, int rw_flags,
-                                    struct bio *bio, gfp_t gfp_mask)
+static struct request *__get_request(struct request_list *rl, int op,
+                                    int op_flags, struct bio *bio,
+                                    gfp_t gfp_mask)
 {
        struct request_queue *q = rl->q;
        struct request *rq;
        struct elevator_type *et = q->elevator->type;
        struct io_context *ioc = rq_ioc(bio);
        struct io_cq *icq = NULL;
-       const bool is_sync = rw_is_sync(rw_flags) != 0;
+       const bool is_sync = rw_is_sync(op, op_flags) != 0;
        int may_queue;
 
        if (unlikely(blk_queue_dying(q)))
                return ERR_PTR(-ENODEV);
 
-       may_queue = elv_may_queue(q, rw_flags);
+       may_queue = elv_may_queue(q, op, op_flags);
        if (may_queue == ELV_MQUEUE_NO)
                goto rq_starved;
 
@@ -1123,7 +1125,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 
        /*
         * Decide whether the new request will be managed by elevator.  If
-        * so, mark @rw_flags and increment elvpriv.  Non-zero elvpriv will
+        * so, mark @op_flags and increment elvpriv.  Non-zero elvpriv will
         * prevent the current elevator from being destroyed until the new
         * request is freed.  This guarantees icq's won't be destroyed and
         * makes creating new ones safe.
@@ -1132,14 +1134,14 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
         * it will be created after releasing queue_lock.
         */
        if (blk_rq_should_init_elevator(bio) && !blk_queue_bypass(q)) {
-               rw_flags |= REQ_ELVPRIV;
+               op_flags |= REQ_ELVPRIV;
                q->nr_rqs_elvpriv++;
                if (et->icq_cache && ioc)
                        icq = ioc_lookup_icq(ioc, q);
        }
 
        if (blk_queue_io_stat(q))
-               rw_flags |= REQ_IO_STAT;
+               op_flags |= REQ_IO_STAT;
        spin_unlock_irq(q->queue_lock);
 
        /* allocate and init request */
@@ -1149,10 +1151,10 @@ static struct request *__get_request(struct request_list *rl, int rw_flags,
 
        blk_rq_init(q, rq);
        blk_rq_set_rl(rq, rl);
-       rq->cmd_flags = rw_flags | REQ_ALLOCED;
+       req_set_op_attrs(rq, op, op_flags | REQ_ALLOCED);
 
        /* init elvpriv */
-       if (rw_flags & REQ_ELVPRIV) {
+       if (op_flags & REQ_ELVPRIV) {
                if (unlikely(et->icq_cache && !icq)) {
                        if (ioc)
                                icq = ioc_create_icq(ioc, q, gfp_mask);
@@ -1178,7 +1180,7 @@ out:
        if (ioc_batching(q, ioc))
                ioc->nr_batch_requests--;
 
-       trace_block_getrq(q, bio, rw_flags & 1);
+       trace_block_getrq(q, bio, op);
        return rq;
 
 fail_elvpriv:
@@ -1208,7 +1210,7 @@ fail_alloc:
         * queue, but this is pretty rare.
         */
        spin_lock_irq(q->queue_lock);
-       freed_request(rl, rw_flags);
+       freed_request(rl, op, op_flags);
 
        /*
         * in the very unlikely event that allocation failed and no
@@ -1226,7 +1228,8 @@ rq_starved:
 /**
  * get_request - get a free request
  * @q: request_queue to allocate request from
- * @rw_flags: RW and SYNC flags
+ * @op: REQ_OP_READ/REQ_OP_WRITE
+ * @op_flags: rq_flag_bits
  * @bio: bio to allocate request for (can be %NULL)
  * @gfp_mask: allocation mask
  *
@@ -1237,17 +1240,18 @@ rq_starved:
  * Returns ERR_PTR on failure, with @q->queue_lock held.
  * Returns request pointer on success, with @q->queue_lock *not held*.
  */
-static struct request *get_request(struct request_queue *q, int rw_flags,
-                                  struct bio *bio, gfp_t gfp_mask)
+static struct request *get_request(struct request_queue *q, int op,
+                                  int op_flags, struct bio *bio,
+                                  gfp_t gfp_mask)
 {
-       const bool is_sync = rw_is_sync(rw_flags) != 0;
+       const bool is_sync = rw_is_sync(op, op_flags) != 0;
        DEFINE_WAIT(wait);
        struct request_list *rl;
        struct request *rq;
 
        rl = blk_get_rl(q, bio);        /* transferred to @rq on success */
 retry:
-       rq = __get_request(rl, rw_flags, bio, gfp_mask);
+       rq = __get_request(rl, op, op_flags, bio, gfp_mask);
        if (!IS_ERR(rq))
                return rq;
 
@@ -1260,7 +1264,7 @@ retry:
        prepare_to_wait_exclusive(&rl->wait[is_sync], &wait,
                                  TASK_UNINTERRUPTIBLE);
 
-       trace_block_sleeprq(q, bio, rw_flags & 1);
+       trace_block_sleeprq(q, bio, op);
 
        spin_unlock_irq(q->queue_lock);
        io_schedule();
@@ -1289,7 +1293,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
        create_io_context(gfp_mask, q->node);
 
        spin_lock_irq(q->queue_lock);
-       rq = get_request(q, rw, NULL, gfp_mask);
+       rq = get_request(q, rw, 0, NULL, gfp_mask);
        if (IS_ERR(rq))
                spin_unlock_irq(q->queue_lock);
        /* q->queue_lock is unlocked at this point */
@@ -1491,13 +1495,14 @@ void __blk_put_request(struct request_queue *q, struct request *req)
         */
        if (req->cmd_flags & REQ_ALLOCED) {
                unsigned int flags = req->cmd_flags;
+               int op = req_op(req);
                struct request_list *rl = blk_rq_rl(req);
 
                BUG_ON(!list_empty(&req->queuelist));
                BUG_ON(ELV_ON_HASH(req));
 
                blk_free_request(rl, req);
-               freed_request(rl, flags);
+               freed_request(rl, op, flags);
                blk_put_rl(rl);
        }
 }
@@ -1712,7 +1717,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
 {
        const bool sync = !!(bio->bi_rw & REQ_SYNC);
        struct blk_plug *plug;
-       int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT;
+       int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
        struct request *req;
        unsigned int request_count = 0;
 
@@ -1731,7 +1736,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
                return BLK_QC_T_NONE;
        }
 
-       if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
+       if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) {
                spin_lock_irq(q->queue_lock);
                where = ELEVATOR_INSERT_FLUSH;
                goto get_rq;
@@ -1772,15 +1777,19 @@ get_rq:
         * but we need to set it earlier to expose the sync flag to the
         * rq allocator and io schedulers.
         */
-       rw_flags = bio_data_dir(bio);
        if (sync)
                rw_flags |= REQ_SYNC;
 
        /*
+        * Add in META/PRIO flags, if set, before we get to the IO scheduler
+        */
+       rw_flags |= (bio->bi_rw & (REQ_META | REQ_PRIO));
+
+       /*
         * Grab a free request. This is might sleep but can not fail.
         * Returns with the queue unlocked.
         */
-       req = get_request(q, rw_flags, bio, GFP_NOIO);
+       req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
        if (IS_ERR(req)) {
                bio->bi_error = PTR_ERR(req);
                bio_endio(bio);
@@ -1849,7 +1858,7 @@ static void handle_bad_sector(struct bio *bio)
        char b[BDEVNAME_SIZE];
 
        printk(KERN_INFO "attempt to access beyond end of device\n");
-       printk(KERN_INFO "%s: rw=%ld, want=%Lu, limit=%Lu\n",
+       printk(KERN_INFO "%s: rw=%d, want=%Lu, limit=%Lu\n",
                        bdevname(bio->bi_bdev, b),
                        bio->bi_rw,
                        (unsigned long long)bio_end_sector(bio),
@@ -1964,23 +1973,23 @@ generic_make_request_checks(struct bio *bio)
         * drivers without flush support don't have to worry
         * about them.
         */
-       if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+       if ((bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
            !test_bit(QUEUE_FLAG_WC, &q->queue_flags)) {
-               bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA);
+               bio->bi_rw &= ~(REQ_PREFLUSH | REQ_FUA);
                if (!nr_sectors) {
                        err = 0;
                        goto end_io;
                }
        }
 
-       if ((bio->bi_rw & REQ_DISCARD) &&
+       if ((bio_op(bio) == REQ_OP_DISCARD) &&
            (!blk_queue_discard(q) ||
             ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
                err = -EOPNOTSUPP;
                goto end_io;
        }
 
-       if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
+       if (bio_op(bio) == REQ_OP_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
                err = -EOPNOTSUPP;
                goto end_io;
        }
@@ -2094,7 +2103,6 @@ EXPORT_SYMBOL(generic_make_request);
 
 /**
  * submit_bio - submit a bio to the block device layer for I/O
- * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
  * @bio: The &struct bio which describes the I/O
  *
  * submit_bio() is very similar in purpose to generic_make_request(), and
@@ -2102,10 +2110,8 @@ EXPORT_SYMBOL(generic_make_request);
  * interfaces; @bio must be presetup and ready for I/O.
  *
  */
-blk_qc_t submit_bio(int rw, struct bio *bio)
+blk_qc_t submit_bio(struct bio *bio)
 {
-       bio->bi_rw |= rw;
-
        /*
         * If it's a regular read/write or a barrier with data attached,
         * go through the normal accounting stuff before submission.
@@ -2113,12 +2119,12 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
        if (bio_has_data(bio)) {
                unsigned int count;
 
-               if (unlikely(rw & REQ_WRITE_SAME))
+               if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
                        count = bdev_logical_block_size(bio->bi_bdev) >> 9;
                else
                        count = bio_sectors(bio);
 
-               if (rw & WRITE) {
+               if (op_is_write(bio_op(bio))) {
                        count_vm_events(PGPGOUT, count);
                } else {
                        task_io_account_read(bio->bi_iter.bi_size);
@@ -2129,7 +2135,7 @@ blk_qc_t submit_bio(int rw, struct bio *bio)
                        char b[BDEVNAME_SIZE];
                        printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
                        current->comm, task_pid_nr(current),
-                               (rw & WRITE) ? "WRITE" : "READ",
+                               op_is_write(bio_op(bio)) ? "WRITE" : "READ",
                                (unsigned long long)bio->bi_iter.bi_sector,
                                bdevname(bio->bi_bdev, b),
                                count);
@@ -2160,7 +2166,7 @@ EXPORT_SYMBOL(submit_bio);
 static int blk_cloned_rq_check_limits(struct request_queue *q,
                                      struct request *rq)
 {
-       if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
+       if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) {
                printk(KERN_ERR "%s: over max size limit.\n", __func__);
                return -EIO;
        }
@@ -2216,7 +2222,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
         */
        BUG_ON(blk_queued_rq(rq));
 
-       if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA))
+       if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
                where = ELEVATOR_INSERT_FLUSH;
 
        add_acct_request(q, rq, where);
@@ -2979,8 +2985,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request_err);
 void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
                     struct bio *bio)
 {
-       /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
-       rq->cmd_flags |= bio->bi_rw & REQ_WRITE;
+       req_set_op(rq, bio_op(bio));
 
        if (bio_has_data(bio))
                rq->nr_phys_segments = bio_phys_segments(q, bio);
@@ -3065,7 +3070,8 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);
 static void __blk_rq_prep_clone(struct request *dst, struct request *src)
 {
        dst->cpu = src->cpu;
-       dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
+       req_set_op_attrs(dst, req_op(src),
+                        (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE);
        dst->cmd_type = src->cmd_type;
        dst->__sector = blk_rq_pos(src);
        dst->__data_len = blk_rq_bytes(src);
@@ -3310,7 +3316,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
                /*
                 * rq is already accounted, so use raw insert
                 */
-               if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA))
+               if (rq->cmd_flags & (REQ_PREFLUSH | REQ_FUA))
                        __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH);
                else
                        __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE);
index 3fec8a2..7ea0432 100644 (file)
@@ -62,7 +62,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 
        /*
         * don't check dying flag for MQ because the request won't
-        * be resued after dying flag is set
+        * be reused after dying flag is set
         */
        if (q->mq_ops) {
                blk_mq_insert_request(rq, at_head, true, false);
index b1c91d2..d308def 100644 (file)
@@ -10,8 +10,8 @@
  * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
  * properties and hardware capability.
  *
- * If a request doesn't have data, only REQ_FLUSH makes sense, which
- * indicates a simple flush request.  If there is data, REQ_FLUSH indicates
+ * If a request doesn't have data, only REQ_PREFLUSH makes sense, which
+ * indicates a simple flush request.  If there is data, REQ_PREFLUSH indicates
  * that the device cache should be flushed before the data is executed, and
  * REQ_FUA means that the data must be on non-volatile media on request
  * completion.
  * difference.  The requests are either completed immediately if there's no
  * data or executed as normal requests otherwise.
  *
- * If the device has writeback cache and supports FUA, REQ_FLUSH is
+ * If the device has writeback cache and supports FUA, REQ_PREFLUSH is
  * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
  *
- * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
- * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
+ * If the device has writeback cache and doesn't support FUA, REQ_PREFLUSH
+ * is translated to PREFLUSH and REQ_FUA to POSTFLUSH.
  *
  * The actual execution of flush is double buffered.  Whenever a request
  * needs to execute PRE or POSTFLUSH, it queues at
  * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
- * flush is issued and the pending_idx is toggled.  When the flush
+ * REQ_OP_FLUSH is issued and the pending_idx is toggled.  When the flush
  * completes, all the requests which were pending are proceeded to the next
  * step.  This allows arbitrary merging of different types of FLUSH/FUA
  * requests.
@@ -103,7 +103,7 @@ static unsigned int blk_flush_policy(unsigned long fflags, struct request *rq)
                policy |= REQ_FSEQ_DATA;
 
        if (fflags & (1UL << QUEUE_FLAG_WC)) {
-               if (rq->cmd_flags & REQ_FLUSH)
+               if (rq->cmd_flags & REQ_PREFLUSH)
                        policy |= REQ_FSEQ_PREFLUSH;
                if (!(fflags & (1UL << QUEUE_FLAG_FUA)) &&
                    (rq->cmd_flags & REQ_FUA))
@@ -330,7 +330,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
        }
 
        flush_rq->cmd_type = REQ_TYPE_FS;
-       flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
+       req_set_op_attrs(flush_rq, REQ_OP_FLUSH, WRITE_FLUSH | REQ_FLUSH_SEQ);
        flush_rq->rq_disk = first_rq->rq_disk;
        flush_rq->end_io = flush_end_io;
 
@@ -391,9 +391,9 @@ void blk_insert_flush(struct request *rq)
 
        /*
         * @policy now records what operations need to be done.  Adjust
-        * REQ_FLUSH and FUA for the driver.
+        * REQ_PREFLUSH and FUA for the driver.
         */
-       rq->cmd_flags &= ~REQ_FLUSH;
+       rq->cmd_flags &= ~REQ_PREFLUSH;
        if (!(fflags & (1UL << QUEUE_FLAG_FUA)))
                rq->cmd_flags &= ~REQ_FUA;
 
@@ -485,8 +485,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 
        bio = bio_alloc(gfp_mask, 0);
        bio->bi_bdev = bdev;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH);
 
-       ret = submit_bio_wait(WRITE_FLUSH, bio);
+       ret = submit_bio_wait(bio);
 
        /*
         * The driver must store the error location in ->bi_sector, if
index 9e29dc3..9031d2a 100644 (file)
@@ -9,21 +9,22 @@
 
 #include "blk.h"
 
-static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages,
+static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
                gfp_t gfp)
 {
        struct bio *new = bio_alloc(gfp, nr_pages);
 
        if (bio) {
                bio_chain(bio, new);
-               submit_bio(rw, bio);
+               submit_bio(bio);
        }
 
        return new;
 }
 
 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
-               sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop)
+               sector_t nr_sects, gfp_t gfp_mask, int op_flags,
+               struct bio **biop)
 {
        struct request_queue *q = bdev_get_queue(bdev);
        struct bio *bio = *biop;
@@ -34,7 +35,7 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                return -ENXIO;
        if (!blk_queue_discard(q))
                return -EOPNOTSUPP;
-       if ((type & REQ_SECURE) && !blk_queue_secdiscard(q))
+       if ((op_flags & REQ_SECURE) && !blk_queue_secdiscard(q))
                return -EOPNOTSUPP;
 
        /* Zero-sector (unknown) and one-sector granularities are the same.  */
@@ -62,9 +63,10 @@ int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                        req_sects = end_sect - sector;
                }
 
-               bio = next_bio(bio, type, 1, gfp_mask);
+               bio = next_bio(bio, 1, gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev = bdev;
+               bio_set_op_attrs(bio, REQ_OP_DISCARD, op_flags);
 
                bio->bi_iter.bi_size = req_sects << 9;
                nr_sects -= req_sects;
@@ -98,19 +100,19 @@ EXPORT_SYMBOL(__blkdev_issue_discard);
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
                sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
-       int type = REQ_WRITE | REQ_DISCARD;
+       int op_flags = 0;
        struct bio *bio = NULL;
        struct blk_plug plug;
        int ret;
 
        if (flags & BLKDEV_DISCARD_SECURE)
-               type |= REQ_SECURE;
+               op_flags |= REQ_SECURE;
 
        blk_start_plug(&plug);
-       ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, type,
+       ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, op_flags,
                        &bio);
        if (!ret && bio) {
-               ret = submit_bio_wait(type, bio);
+               ret = submit_bio_wait(bio);
                if (ret == -EOPNOTSUPP)
                        ret = 0;
                bio_put(bio);
@@ -148,13 +150,14 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
        max_write_same_sectors = UINT_MAX >> 9;
 
        while (nr_sects) {
-               bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask);
+               bio = next_bio(bio, 1, gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev = bdev;
                bio->bi_vcnt = 1;
                bio->bi_io_vec->bv_page = page;
                bio->bi_io_vec->bv_offset = 0;
                bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
+               bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
 
                if (nr_sects > max_write_same_sectors) {
                        bio->bi_iter.bi_size = max_write_same_sectors << 9;
@@ -167,7 +170,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
        }
 
        if (bio) {
-               ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
+               ret = submit_bio_wait(bio);
                bio_put(bio);
        }
        return ret != -EOPNOTSUPP ? ret : 0;
@@ -193,11 +196,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
        unsigned int sz;
 
        while (nr_sects != 0) {
-               bio = next_bio(bio, WRITE,
-                               min(nr_sects, (sector_t)BIO_MAX_PAGES),
+               bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
                                gfp_mask);
                bio->bi_iter.bi_sector = sector;
                bio->bi_bdev   = bdev;
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
                while (nr_sects != 0) {
                        sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -210,7 +213,7 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
        }
 
        if (bio) {
-               ret = submit_bio_wait(WRITE, bio);
+               ret = submit_bio_wait(bio);
                bio_put(bio);
                return ret;
        }
index b9f88b7..61733a6 100644 (file)
@@ -224,7 +224,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
                return PTR_ERR(bio);
 
        if (!reading)
-               bio->bi_rw |= REQ_WRITE;
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
        if (do_copy)
                rq->cmd_flags |= REQ_COPY_USER;
index 2613531..5e4d93e 100644 (file)
@@ -172,9 +172,9 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
        struct bio *split, *res;
        unsigned nsegs;
 
-       if ((*bio)->bi_rw & REQ_DISCARD)
+       if (bio_op(*bio) == REQ_OP_DISCARD)
                split = blk_bio_discard_split(q, *bio, bs, &nsegs);
-       else if ((*bio)->bi_rw & REQ_WRITE_SAME)
+       else if (bio_op(*bio) == REQ_OP_WRITE_SAME)
                split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
        else
                split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
@@ -213,10 +213,10 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
         * This should probably be returning 0, but blk_add_request_payload()
         * (Christoph!!!!)
         */
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                return 1;
 
-       if (bio->bi_rw & REQ_WRITE_SAME)
+       if (bio_op(bio) == REQ_OP_WRITE_SAME)
                return 1;
 
        fbio = bio;
@@ -385,7 +385,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
        nsegs = 0;
        cluster = blk_queue_cluster(q);
 
-       if (bio->bi_rw & REQ_DISCARD) {
+       if (bio_op(bio) == REQ_OP_DISCARD) {
                /*
                 * This is a hack - drivers should be neither modifying the
                 * biovec, nor relying on bi_vcnt - but because of
@@ -400,7 +400,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
                return 0;
        }
 
-       if (bio->bi_rw & REQ_WRITE_SAME) {
+       if (bio_op(bio) == REQ_OP_WRITE_SAME) {
 single_segment:
                *sg = sglist;
                bvec = bio_iovec(bio);
@@ -439,7 +439,7 @@ int blk_rq_map_sg(struct request_queue *q, struct request *rq,
        }
 
        if (q->dma_drain_size && q->dma_drain_needed(rq)) {
-               if (rq->cmd_flags & REQ_WRITE)
+               if (op_is_write(req_op(rq)))
                        memset(q->dma_drain_buffer, 0, q->dma_drain_size);
 
                sg_unmark_end(sg);
@@ -500,7 +500,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
            integrity_req_gap_back_merge(req, bio))
                return 0;
        if (blk_rq_sectors(req) + bio_sectors(bio) >
-           blk_rq_get_max_sectors(req)) {
+           blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
                req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
@@ -524,7 +524,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
            integrity_req_gap_front_merge(req, bio))
                return 0;
        if (blk_rq_sectors(req) + bio_sectors(bio) >
-           blk_rq_get_max_sectors(req)) {
+           blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
                req->cmd_flags |= REQ_NOMERGE;
                if (req == q->last_merge)
                        q->last_merge = NULL;
@@ -570,7 +570,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
         * Will it become too large?
         */
        if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
-           blk_rq_get_max_sectors(req))
+           blk_rq_get_max_sectors(req, blk_rq_pos(req)))
                return 0;
 
        total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -649,7 +649,8 @@ static int attempt_merge(struct request_queue *q, struct request *req,
        if (!rq_mergeable(req) || !rq_mergeable(next))
                return 0;
 
-       if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
+       if (!blk_check_merge_flags(req->cmd_flags, req_op(req), next->cmd_flags,
+                                  req_op(next)))
                return 0;
 
        /*
@@ -663,7 +664,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
            || req_no_special_merge(next))
                return 0;
 
-       if (req->cmd_flags & REQ_WRITE_SAME &&
+       if (req_op(req) == REQ_OP_WRITE_SAME &&
            !blk_write_same_mergeable(req->bio, next->bio))
                return 0;
 
@@ -743,6 +744,12 @@ int attempt_front_merge(struct request_queue *q, struct request *rq)
 int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
                          struct request *next)
 {
+       struct elevator_queue *e = q->elevator;
+
+       if (e->type->ops.elevator_allow_rq_merge_fn)
+               if (!e->type->ops.elevator_allow_rq_merge_fn(q, rq, next))
+                       return 0;
+
        return attempt_merge(q, rq, next);
 }
 
@@ -751,7 +758,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
        if (!rq_mergeable(rq) || !bio_mergeable(bio))
                return false;
 
-       if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
+       if (!blk_check_merge_flags(rq->cmd_flags, req_op(rq), bio->bi_rw,
+                                  bio_op(bio)))
                return false;
 
        /* different data direction or already started, don't merge */
@@ -767,7 +775,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
                return false;
 
        /* must be using the same buffer */
-       if (rq->cmd_flags & REQ_WRITE_SAME &&
+       if (req_op(rq) == REQ_OP_WRITE_SAME &&
            !blk_write_same_mergeable(rq->bio, bio))
                return false;
 
index f9b9049..2a1920c 100644 (file)
@@ -159,16 +159,17 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
 EXPORT_SYMBOL(blk_mq_can_queue);
 
 static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
-                              struct request *rq, unsigned int rw_flags)
+                              struct request *rq, int op,
+                              unsigned int op_flags)
 {
        if (blk_queue_io_stat(q))
-               rw_flags |= REQ_IO_STAT;
+               op_flags |= REQ_IO_STAT;
 
        INIT_LIST_HEAD(&rq->queuelist);
        /* csd/requeue_work/fifo_time is initialized before use */
        rq->q = q;
        rq->mq_ctx = ctx;
-       rq->cmd_flags |= rw_flags;
+       req_set_op_attrs(rq, op, op_flags);
        /* do not touch atomic flags, it needs atomic ops against the timer */
        rq->cpu = -1;
        INIT_HLIST_NODE(&rq->hash);
@@ -203,11 +204,11 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
        rq->end_io_data = NULL;
        rq->next_rq = NULL;
 
-       ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
+       ctx->rq_dispatched[rw_is_sync(op, op_flags)]++;
 }
 
 static struct request *
-__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
+__blk_mq_alloc_request(struct blk_mq_alloc_data *data, int op, int op_flags)
 {
        struct request *rq;
        unsigned int tag;
@@ -222,7 +223,7 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw)
                }
 
                rq->tag = tag;
-               blk_mq_rq_ctx_init(data->q, data->ctx, rq, rw);
+               blk_mq_rq_ctx_init(data->q, data->ctx, rq, op, op_flags);
                return rq;
        }
 
@@ -246,7 +247,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
        blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
 
-       rq = __blk_mq_alloc_request(&alloc_data, rw);
+       rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
        if (!rq && !(flags & BLK_MQ_REQ_NOWAIT)) {
                __blk_mq_run_hw_queue(hctx);
                blk_mq_put_ctx(ctx);
@@ -254,7 +255,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
                ctx = blk_mq_get_ctx(q);
                hctx = q->mq_ops->map_queue(q, ctx->cpu);
                blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
-               rq =  __blk_mq_alloc_request(&alloc_data, rw);
+               rq =  __blk_mq_alloc_request(&alloc_data, rw, 0);
                ctx = alloc_data.ctx;
        }
        blk_mq_put_ctx(ctx);
@@ -784,7 +785,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
                switch (ret) {
                case BLK_MQ_RQ_QUEUE_OK:
                        queued++;
-                       continue;
+                       break;
                case BLK_MQ_RQ_QUEUE_BUSY:
                        list_add(&rq->queuelist, &rq_list);
                        __blk_mq_requeue_request(rq);
@@ -1169,28 +1170,29 @@ static struct request *blk_mq_map_request(struct request_queue *q,
        struct blk_mq_hw_ctx *hctx;
        struct blk_mq_ctx *ctx;
        struct request *rq;
-       int rw = bio_data_dir(bio);
+       int op = bio_data_dir(bio);
+       int op_flags = 0;
        struct blk_mq_alloc_data alloc_data;
 
        blk_queue_enter_live(q);
        ctx = blk_mq_get_ctx(q);
        hctx = q->mq_ops->map_queue(q, ctx->cpu);
 
-       if (rw_is_sync(bio->bi_rw))
-               rw |= REQ_SYNC;
+       if (rw_is_sync(bio_op(bio), bio->bi_rw))
+               op_flags |= REQ_SYNC;
 
-       trace_block_getrq(q, bio, rw);
+       trace_block_getrq(q, bio, op);
        blk_mq_set_alloc_data(&alloc_data, q, BLK_MQ_REQ_NOWAIT, ctx, hctx);
-       rq = __blk_mq_alloc_request(&alloc_data, rw);
+       rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
        if (unlikely(!rq)) {
                __blk_mq_run_hw_queue(hctx);
                blk_mq_put_ctx(ctx);
-               trace_block_sleeprq(q, bio, rw);
+               trace_block_sleeprq(q, bio, op);
 
                ctx = blk_mq_get_ctx(q);
                hctx = q->mq_ops->map_queue(q, ctx->cpu);
                blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
-               rq = __blk_mq_alloc_request(&alloc_data, rw);
+               rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
                ctx = alloc_data.ctx;
                hctx = alloc_data.hctx;
        }
@@ -1244,8 +1246,8 @@ static int blk_mq_direct_issue_request(struct request *rq, blk_qc_t *cookie)
  */
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
-       const int is_sync = rw_is_sync(bio->bi_rw);
-       const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+       const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
+       const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
        struct blk_map_ctx data;
        struct request *rq;
        unsigned int request_count = 0;
@@ -1338,8 +1340,8 @@ done:
  */
 static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
 {
-       const int is_sync = rw_is_sync(bio->bi_rw);
-       const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+       const int is_sync = rw_is_sync(bio_op(bio), bio->bi_rw);
+       const int is_flush_fua = bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
        struct blk_plug *plug;
        unsigned int request_count = 0;
        struct blk_map_ctx data;
index 9920596..f87a7e7 100644 (file)
@@ -379,6 +379,11 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
        return count;
 }
 
+static ssize_t queue_dax_show(struct request_queue *q, char *page)
+{
+       return queue_var_show(blk_queue_dax(q), page);
+}
+
 static struct queue_sysfs_entry queue_requests_entry = {
        .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
        .show = queue_requests_show,
@@ -516,6 +521,11 @@ static struct queue_sysfs_entry queue_wc_entry = {
        .store = queue_wc_store,
 };
 
+static struct queue_sysfs_entry queue_dax_entry = {
+       .attr = {.name = "dax", .mode = S_IRUGO },
+       .show = queue_dax_show,
+};
+
 static struct attribute *default_attrs[] = {
        &queue_requests_entry.attr,
        &queue_ra_entry.attr,
@@ -542,6 +552,7 @@ static struct attribute *default_attrs[] = {
        &queue_random_entry.attr,
        &queue_poll_entry.attr,
        &queue_wc_entry.attr,
+       &queue_dax_entry.attr,
        NULL,
 };
 
index 4a34978..acabba1 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
-#include <linux/jiffies.h>
+#include <linux/ktime.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
 #include <linux/blktrace_api.h>
  */
 /* max queue in one round of service */
 static const int cfq_quantum = 8;
-static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 };
+static const u64 cfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 /* maximum backwards seek, in KiB */
 static const int cfq_back_max = 16 * 1024;
 /* penalty of a backwards seek */
 static const int cfq_back_penalty = 2;
-static const int cfq_slice_sync = HZ / 10;
-static int cfq_slice_async = HZ / 25;
+static const u64 cfq_slice_sync = NSEC_PER_SEC / 10;
+static u64 cfq_slice_async = NSEC_PER_SEC / 25;
 static const int cfq_slice_async_rq = 2;
-static int cfq_slice_idle = HZ / 125;
-static int cfq_group_idle = HZ / 125;
-static const int cfq_target_latency = HZ * 3/10; /* 300 ms */
+static u64 cfq_slice_idle = NSEC_PER_SEC / 125;
+static u64 cfq_group_idle = NSEC_PER_SEC / 125;
+static const u64 cfq_target_latency = (u64)NSEC_PER_SEC * 3/10; /* 300 ms */
 static const int cfq_hist_divisor = 4;
 
 /*
  * offset from end of service tree
  */
-#define CFQ_IDLE_DELAY         (HZ / 5)
+#define CFQ_IDLE_DELAY         (NSEC_PER_SEC / 5)
 
 /*
  * below this threshold, we consider thinktime immediate
  */
-#define CFQ_MIN_TT             (2)
+#define CFQ_MIN_TT             (2 * NSEC_PER_SEC / HZ)
 
 #define CFQ_SLICE_SCALE                (5)
 #define CFQ_HW_QUEUE_MIN       (5)
@@ -73,11 +73,11 @@ static struct kmem_cache *cfq_pool;
 #define CFQ_WEIGHT_LEGACY_MAX  1000
 
 struct cfq_ttime {
-       unsigned long last_end_request;
+       u64 last_end_request;
 
-       unsigned long ttime_total;
+       u64 ttime_total;
+       u64 ttime_mean;
        unsigned long ttime_samples;
-       unsigned long ttime_mean;
 };
 
 /*
@@ -94,7 +94,7 @@ struct cfq_rb_root {
        struct cfq_ttime ttime;
 };
 #define CFQ_RB_ROOT    (struct cfq_rb_root) { .rb = RB_ROOT, \
-                       .ttime = {.last_end_request = jiffies,},}
+                       .ttime = {.last_end_request = ktime_get_ns(),},}
 
 /*
  * Per process-grouping structure
@@ -109,7 +109,7 @@ struct cfq_queue {
        /* service_tree member */
        struct rb_node rb_node;
        /* service_tree key */
-       unsigned long rb_key;
+       u64 rb_key;
        /* prio tree member */
        struct rb_node p_node;
        /* prio tree root we belong to, if any */
@@ -126,13 +126,13 @@ struct cfq_queue {
        struct list_head fifo;
 
        /* time when queue got scheduled in to dispatch first request. */
-       unsigned long dispatch_start;
-       unsigned int allocated_slice;
-       unsigned int slice_dispatch;
+       u64 dispatch_start;
+       u64 allocated_slice;
+       u64 slice_dispatch;
        /* time when first request from queue completed and slice started. */
-       unsigned long slice_start;
-       unsigned long slice_end;
-       long slice_resid;
+       u64 slice_start;
+       u64 slice_end;
+       s64 slice_resid;
 
        /* pending priority requests */
        int prio_pending;
@@ -141,7 +141,7 @@ struct cfq_queue {
 
        /* io prio of this group */
        unsigned short ioprio, org_ioprio;
-       unsigned short ioprio_class;
+       unsigned short ioprio_class, org_ioprio_class;
 
        pid_t pid;
 
@@ -290,7 +290,7 @@ struct cfq_group {
        struct cfq_rb_root service_trees[2][3];
        struct cfq_rb_root service_tree_idle;
 
-       unsigned long saved_wl_slice;
+       u64 saved_wl_slice;
        enum wl_type_t saved_wl_type;
        enum wl_class_t saved_wl_class;
 
@@ -329,7 +329,7 @@ struct cfq_data {
         */
        enum wl_class_t serving_wl_class;
        enum wl_type_t serving_wl_type;
-       unsigned long workload_expires;
+       u64 workload_expires;
        struct cfq_group *serving_group;
 
        /*
@@ -362,7 +362,7 @@ struct cfq_data {
        /*
         * idle window management
         */
-       struct timer_list idle_slice_timer;
+       struct hrtimer idle_slice_timer;
        struct work_struct unplug_work;
 
        struct cfq_queue *active_queue;
@@ -374,22 +374,22 @@ struct cfq_data {
         * tunables, see top of file
         */
        unsigned int cfq_quantum;
-       unsigned int cfq_fifo_expire[2];
        unsigned int cfq_back_penalty;
        unsigned int cfq_back_max;
-       unsigned int cfq_slice[2];
        unsigned int cfq_slice_async_rq;
-       unsigned int cfq_slice_idle;
-       unsigned int cfq_group_idle;
        unsigned int cfq_latency;
-       unsigned int cfq_target_latency;
+       u64 cfq_fifo_expire[2];
+       u64 cfq_slice[2];
+       u64 cfq_slice_idle;
+       u64 cfq_group_idle;
+       u64 cfq_target_latency;
 
        /*
         * Fallback dummy cfqq for extreme OOM conditions
         */
        struct cfq_queue oom_cfqq;
 
-       unsigned long last_delayed_sync;
+       u64 last_delayed_sync;
 };
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
@@ -667,15 +667,16 @@ static inline void cfqg_put(struct cfq_group *cfqg)
 } while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-                                           struct cfq_group *curr_cfqg, int rw)
+                                           struct cfq_group *curr_cfqg, int op,
+                                           int op_flags)
 {
-       blkg_rwstat_add(&cfqg->stats.queued, rw, 1);
+       blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, 1);
        cfqg_stats_end_empty_time(&cfqg->stats);
        cfqg_stats_set_start_group_wait_time(cfqg, curr_cfqg);
 }
 
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
-                       unsigned long time, unsigned long unaccounted_time)
+                       uint64_t time, unsigned long unaccounted_time)
 {
        blkg_stat_add(&cfqg->stats.time, time);
 #ifdef CONFIG_DEBUG_BLK_CGROUP
@@ -683,26 +684,30 @@ static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
 #endif
 }
 
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+                                              int op_flags)
 {
-       blkg_rwstat_add(&cfqg->stats.queued, rw, -1);
+       blkg_rwstat_add(&cfqg->stats.queued, op, op_flags, -1);
 }
 
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw)
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+                                              int op_flags)
 {
-       blkg_rwstat_add(&cfqg->stats.merged, rw, 1);
+       blkg_rwstat_add(&cfqg->stats.merged, op, op_flags, 1);
 }
 
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-                       uint64_t start_time, uint64_t io_start_time, int rw)
+                       uint64_t start_time, uint64_t io_start_time, int op,
+                       int op_flags)
 {
        struct cfqg_stats *stats = &cfqg->stats;
        unsigned long long now = sched_clock();
 
        if (time_after64(now, io_start_time))
-               blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
+               blkg_rwstat_add(&stats->service_time, op, op_flags,
+                               now - io_start_time);
        if (time_after64(io_start_time, start_time))
-               blkg_rwstat_add(&stats->wait_time, rw,
+               blkg_rwstat_add(&stats->wait_time, op, op_flags,
                                io_start_time - start_time);
 }
 
@@ -781,13 +786,16 @@ static inline void cfqg_put(struct cfq_group *cfqg) { }
 #define cfq_log_cfqg(cfqd, cfqg, fmt, args...)         do {} while (0)
 
 static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg,
-                       struct cfq_group *curr_cfqg, int rw) { }
+                       struct cfq_group *curr_cfqg, int op, int op_flags) { }
 static inline void cfqg_stats_update_timeslice_used(struct cfq_group *cfqg,
-                       unsigned long time, unsigned long unaccounted_time) { }
-static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int rw) { }
-static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int rw) { }
+                       uint64_t time, unsigned long unaccounted_time) { }
+static inline void cfqg_stats_update_io_remove(struct cfq_group *cfqg, int op,
+                       int op_flags) { }
+static inline void cfqg_stats_update_io_merged(struct cfq_group *cfqg, int op,
+                       int op_flags) { }
 static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
-                       uint64_t start_time, uint64_t io_start_time, int rw) { }
+                       uint64_t start_time, uint64_t io_start_time, int op,
+                       int op_flags) { }
 
 #endif /* CONFIG_CFQ_GROUP_IOSCHED */
 
@@ -807,7 +815,7 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg,
 static inline bool cfq_io_thinktime_big(struct cfq_data *cfqd,
        struct cfq_ttime *ttime, bool group_idle)
 {
-       unsigned long slice;
+       u64 slice;
        if (!sample_valid(ttime->ttime_samples))
                return false;
        if (group_idle)
@@ -930,17 +938,18 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
  * if a queue is marked sync and has sync io queued. A sync queue with async
  * io only, should not get full sync slice length.
  */
-static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
+static inline u64 cfq_prio_slice(struct cfq_data *cfqd, bool sync,
                                 unsigned short prio)
 {
-       const int base_slice = cfqd->cfq_slice[sync];
+       u64 base_slice = cfqd->cfq_slice[sync];
+       u64 slice = div_u64(base_slice, CFQ_SLICE_SCALE);
 
        WARN_ON(prio >= IOPRIO_BE_NR);
 
-       return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - prio));
+       return base_slice + (slice * (4 - prio));
 }
 
-static inline int
+static inline u64
 cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio);
@@ -958,15 +967,14 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  *
  * The result is also in fixed point w/ CFQ_SERVICE_SHIFT.
  */
-static inline u64 cfqg_scale_charge(unsigned long charge,
+static inline u64 cfqg_scale_charge(u64 charge,
                                    unsigned int vfraction)
 {
        u64 c = charge << CFQ_SERVICE_SHIFT;    /* make it fixed point */
 
        /* charge / vfraction */
        c <<= CFQ_SERVICE_SHIFT;
-       do_div(c, vfraction);
-       return c;
+       return div_u64(c, vfraction);
 }
 
 static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime)
@@ -1019,16 +1027,16 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
        return cfqg->busy_queues_avg[rt];
 }
 
-static inline unsigned
+static inline u64
 cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
        return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT;
 }
 
-static inline unsigned
+static inline u64
 cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-       unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
+       u64 slice = cfq_prio_to_slice(cfqd, cfqq);
        if (cfqd->cfq_latency) {
                /*
                 * interested queues (we consider only the ones with the same
@@ -1036,20 +1044,22 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                 */
                unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
                                                cfq_class_rt(cfqq));
-               unsigned sync_slice = cfqd->cfq_slice[1];
-               unsigned expect_latency = sync_slice * iq;
-               unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+               u64 sync_slice = cfqd->cfq_slice[1];
+               u64 expect_latency = sync_slice * iq;
+               u64 group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
 
                if (expect_latency > group_slice) {
-                       unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
+                       u64 base_low_slice = 2 * cfqd->cfq_slice_idle;
+                       u64 low_slice;
+
                        /* scale low_slice according to IO priority
                         * and sync vs async */
-                       unsigned low_slice =
-                               min(slice, base_low_slice * slice / sync_slice);
+                       low_slice = div64_u64(base_low_slice*slice, sync_slice);
+                       low_slice = min(slice, low_slice);
                        /* the adapted slice value is scaled to fit all iqs
                         * into the target latency */
-                       slice = max(slice * group_slice / expect_latency,
-                                   low_slice);
+                       slice = div64_u64(slice*group_slice, expect_latency);
+                       slice = max(slice, low_slice);
                }
        }
        return slice;
@@ -1058,12 +1068,13 @@ cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 static inline void
 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-       unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+       u64 slice = cfq_scaled_cfqq_slice(cfqd, cfqq);
+       u64 now = ktime_get_ns();
 
-       cfqq->slice_start = jiffies;
-       cfqq->slice_end = jiffies + slice;
+       cfqq->slice_start = now;
+       cfqq->slice_end = now + slice;
        cfqq->allocated_slice = slice;
-       cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
+       cfq_log_cfqq(cfqd, cfqq, "set_slice=%llu", cfqq->slice_end - now);
 }
 
 /*
@@ -1075,7 +1086,7 @@ static inline bool cfq_slice_used(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_slice_new(cfqq))
                return false;
-       if (time_before(jiffies, cfqq->slice_end))
+       if (ktime_get_ns() < cfqq->slice_end)
                return false;
 
        return true;
@@ -1241,8 +1252,8 @@ cfq_find_next_rq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        return cfq_choose_req(cfqd, next, prev, blk_rq_pos(last));
 }
 
-static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
-                                     struct cfq_queue *cfqq)
+static u64 cfq_slice_offset(struct cfq_data *cfqd,
+                           struct cfq_queue *cfqq)
 {
        /*
         * just an approximation, should be ok.
@@ -1435,31 +1446,32 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
        cfqg_stats_update_dequeue(cfqg);
 }
 
-static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
-                                               unsigned int *unaccounted_time)
+static inline u64 cfq_cfqq_slice_usage(struct cfq_queue *cfqq,
+                                      u64 *unaccounted_time)
 {
-       unsigned int slice_used;
+       u64 slice_used;
+       u64 now = ktime_get_ns();
 
        /*
         * Queue got expired before even a single request completed or
         * got expired immediately after first request completion.
         */
-       if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
+       if (!cfqq->slice_start || cfqq->slice_start == now) {
                /*
                 * Also charge the seek time incurred to the group, otherwise
                 * if there are mutiple queues in the group, each can dispatch
                 * a single request on seeky media and cause lots of seek time
                 * and group will never know it.
                 */
-               slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
-                                       1);
+               slice_used = max_t(u64, (now - cfqq->dispatch_start),
+                                       jiffies_to_nsecs(1));
        } else {
-               slice_used = jiffies - cfqq->slice_start;
+               slice_used = now - cfqq->slice_start;
                if (slice_used > cfqq->allocated_slice) {
                        *unaccounted_time = slice_used - cfqq->allocated_slice;
                        slice_used = cfqq->allocated_slice;
                }
-               if (time_after(cfqq->slice_start, cfqq->dispatch_start))
+               if (cfqq->slice_start > cfqq->dispatch_start)
                        *unaccounted_time += cfqq->slice_start -
                                        cfqq->dispatch_start;
        }
@@ -1471,10 +1483,11 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
                                struct cfq_queue *cfqq)
 {
        struct cfq_rb_root *st = &cfqd->grp_service_tree;
-       unsigned int used_sl, charge, unaccounted_sl = 0;
+       u64 used_sl, charge, unaccounted_sl = 0;
        int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg)
                        - cfqg->service_tree_idle.count;
        unsigned int vfr;
+       u64 now = ktime_get_ns();
 
        BUG_ON(nr_sync < 0);
        used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl);
@@ -1496,9 +1509,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        cfq_group_service_tree_add(st, cfqg);
 
        /* This group is being expired. Save the context */
-       if (time_after(cfqd->workload_expires, jiffies)) {
-               cfqg->saved_wl_slice = cfqd->workload_expires
-                                               - jiffies;
+       if (cfqd->workload_expires > now) {
+               cfqg->saved_wl_slice = cfqd->workload_expires - now;
                cfqg->saved_wl_type = cfqd->serving_wl_type;
                cfqg->saved_wl_class = cfqd->serving_wl_class;
        } else
@@ -1507,7 +1519,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
        cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                        st->min_vdisktime);
        cfq_log_cfqq(cfqq->cfqd, cfqq,
-                    "sl_used=%u disp=%u charge=%u iops=%u sect=%lu",
+                    "sl_used=%llu disp=%llu charge=%llu iops=%u sect=%lu",
                     used_sl, cfqq->slice_dispatch, charge,
                     iops_mode(cfqd), cfqq->nr_sectors);
        cfqg_stats_update_timeslice_used(cfqg, used_sl, unaccounted_sl);
@@ -1530,7 +1542,7 @@ static void cfq_init_cfqg_base(struct cfq_group *cfqg)
                *st = CFQ_RB_ROOT;
        RB_CLEAR_NODE(&cfqg->rb_node);
 
-       cfqg->ttime.last_end_request = jiffies;
+       cfqg->ttime.last_end_request = ktime_get_ns();
 }
 
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -2213,10 +2225,11 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 {
        struct rb_node **p, *parent;
        struct cfq_queue *__cfqq;
-       unsigned long rb_key;
+       u64 rb_key;
        struct cfq_rb_root *st;
        int left;
        int new_cfqq = 1;
+       u64 now = ktime_get_ns();
 
        st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq));
        if (cfq_class_idle(cfqq)) {
@@ -2226,7 +2239,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                        __cfqq = rb_entry(parent, struct cfq_queue, rb_node);
                        rb_key += __cfqq->rb_key;
                } else
-                       rb_key += jiffies;
+                       rb_key += now;
        } else if (!add_front) {
                /*
                 * Get our rb key offset. Subtract any residual slice
@@ -2234,13 +2247,13 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 * count indicates slice overrun, and this should position
                 * the next service time further away in the tree.
                 */
-               rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
+               rb_key = cfq_slice_offset(cfqd, cfqq) + now;
                rb_key -= cfqq->slice_resid;
                cfqq->slice_resid = 0;
        } else {
-               rb_key = -HZ;
+               rb_key = -NSEC_PER_SEC;
                __cfqq = cfq_rb_first(st);
-               rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+               rb_key += __cfqq ? __cfqq->rb_key : now;
        }
 
        if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
@@ -2266,7 +2279,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                /*
                 * sort by key, that represents service time.
                 */
-               if (time_before(rb_key, __cfqq->rb_key))
+               if (rb_key < __cfqq->rb_key)
                        p = &parent->rb_left;
                else {
                        p = &parent->rb_right;
@@ -2461,10 +2474,10 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
 {
        elv_rb_del(&cfqq->sort_list, rq);
        cfqq->queued[rq_is_sync(rq)]--;
-       cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+       cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
        cfq_add_rq_rb(rq);
        cfqg_stats_update_io_add(RQ_CFQG(rq), cfqq->cfqd->serving_group,
-                                rq->cmd_flags);
+                                req_op(rq), rq->cmd_flags);
 }
 
 static struct request *
@@ -2517,7 +2530,7 @@ static void cfq_remove_request(struct request *rq)
        cfq_del_rq_rb(rq);
 
        cfqq->cfqd->rq_queued--;
-       cfqg_stats_update_io_remove(RQ_CFQG(rq), rq->cmd_flags);
+       cfqg_stats_update_io_remove(RQ_CFQG(rq), req_op(rq), rq->cmd_flags);
        if (rq->cmd_flags & REQ_PRIO) {
                WARN_ON(!cfqq->prio_pending);
                cfqq->prio_pending--;
@@ -2531,7 +2544,7 @@ static int cfq_merge(struct request_queue *q, struct request **req,
        struct request *__rq;
 
        __rq = cfq_find_rq_fmerge(cfqd, bio);
-       if (__rq && elv_rq_merge_ok(__rq, bio)) {
+       if (__rq && elv_bio_merge_ok(__rq, bio)) {
                *req = __rq;
                return ELEVATOR_FRONT_MERGE;
        }
@@ -2552,7 +2565,7 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
 static void cfq_bio_merged(struct request_queue *q, struct request *req,
                                struct bio *bio)
 {
-       cfqg_stats_update_io_merged(RQ_CFQG(req), bio->bi_rw);
+       cfqg_stats_update_io_merged(RQ_CFQG(req), bio_op(bio), bio->bi_rw);
 }
 
 static void
@@ -2566,7 +2579,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
         * reposition in fifo if next is older than rq
         */
        if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-           time_before(next->fifo_time, rq->fifo_time) &&
+           next->fifo_time < rq->fifo_time &&
            cfqq == RQ_CFQQ(next)) {
                list_move(&rq->queuelist, &next->queuelist);
                rq->fifo_time = next->fifo_time;
@@ -2575,7 +2588,7 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
        if (cfqq->next_rq == next)
                cfqq->next_rq = rq;
        cfq_remove_request(next);
-       cfqg_stats_update_io_merged(RQ_CFQG(rq), next->cmd_flags);
+       cfqg_stats_update_io_merged(RQ_CFQG(rq), req_op(next), next->cmd_flags);
 
        cfqq = RQ_CFQQ(next);
        /*
@@ -2588,8 +2601,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
                cfq_del_cfqq_rr(cfqd, cfqq);
 }
 
-static int cfq_allow_merge(struct request_queue *q, struct request *rq,
-                          struct bio *bio)
+static int cfq_allow_bio_merge(struct request_queue *q, struct request *rq,
+                              struct bio *bio)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_io_cq *cic;
@@ -2613,9 +2626,15 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
        return cfqq == RQ_CFQQ(rq);
 }
 
+static int cfq_allow_rq_merge(struct request_queue *q, struct request *rq,
+                             struct request *next)
+{
+       return RQ_CFQQ(rq) == RQ_CFQQ(next);
+}
+
 static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
-       del_timer(&cfqd->idle_slice_timer);
+       hrtimer_try_to_cancel(&cfqd->idle_slice_timer);
        cfqg_stats_update_idle_time(cfqq->cfqg);
 }
 
@@ -2627,7 +2646,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
                                cfqd->serving_wl_class, cfqd->serving_wl_type);
                cfqg_stats_update_avg_queue_size(cfqq->cfqg);
                cfqq->slice_start = 0;
-               cfqq->dispatch_start = jiffies;
+               cfqq->dispatch_start = ktime_get_ns();
                cfqq->allocated_slice = 0;
                cfqq->slice_end = 0;
                cfqq->slice_dispatch = 0;
@@ -2676,8 +2695,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                if (cfq_cfqq_slice_new(cfqq))
                        cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq);
                else
-                       cfqq->slice_resid = cfqq->slice_end - jiffies;
-               cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+                       cfqq->slice_resid = cfqq->slice_end - ktime_get_ns();
+               cfq_log_cfqq(cfqd, cfqq, "resid=%lld", cfqq->slice_resid);
        }
 
        cfq_group_served(cfqd, cfqq->cfqg, cfqq);
@@ -2911,7 +2930,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        struct cfq_queue *cfqq = cfqd->active_queue;
        struct cfq_rb_root *st = cfqq->service_tree;
        struct cfq_io_cq *cic;
-       unsigned long sl, group_idle = 0;
+       u64 sl, group_idle = 0;
+       u64 now = ktime_get_ns();
 
        /*
         * SSD device without seek penalty, disable idling. But only do so
@@ -2954,8 +2974,8 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
         * time slice.
         */
        if (sample_valid(cic->ttime.ttime_samples) &&
-           (cfqq->slice_end - jiffies < cic->ttime.ttime_mean)) {
-               cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%lu",
+           (cfqq->slice_end - now < cic->ttime.ttime_mean)) {
+               cfq_log_cfqq(cfqd, cfqq, "Not idling. think_time:%llu",
                             cic->ttime.ttime_mean);
                return;
        }
@@ -2976,9 +2996,10 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        else
                sl = cfqd->cfq_slice_idle;
 
-       mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+       hrtimer_start(&cfqd->idle_slice_timer, ns_to_ktime(sl),
+                     HRTIMER_MODE_REL);
        cfqg_stats_set_start_idle_time(cfqq->cfqg);
-       cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu group_idle: %d", sl,
+       cfq_log_cfqq(cfqd, cfqq, "arm_idle: %llu group_idle: %d", sl,
                        group_idle ? 1 : 0);
 }
 
@@ -3018,7 +3039,7 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
                return NULL;
 
        rq = rq_entry_fifo(cfqq->fifo.next);
-       if (time_before(jiffies, rq->fifo_time))
+       if (ktime_get_ns() < rq->fifo_time)
                rq = NULL;
 
        cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
@@ -3096,14 +3117,14 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
        struct cfq_queue *queue;
        int i;
        bool key_valid = false;
-       unsigned long lowest_key = 0;
+       u64 lowest_key = 0;
        enum wl_type_t cur_best = SYNC_NOIDLE_WORKLOAD;
 
        for (i = 0; i <= SYNC_WORKLOAD; ++i) {
                /* select the one with lowest rb_key */
                queue = cfq_rb_first(st_for(cfqg, wl_class, i));
                if (queue &&
-                   (!key_valid || time_before(queue->rb_key, lowest_key))) {
+                   (!key_valid || queue->rb_key < lowest_key)) {
                        lowest_key = queue->rb_key;
                        cur_best = i;
                        key_valid = true;
@@ -3116,11 +3137,12 @@ static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd,
 static void
 choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
 {
-       unsigned slice;
+       u64 slice;
        unsigned count;
        struct cfq_rb_root *st;
-       unsigned group_slice;
+       u64 group_slice;
        enum wl_class_t original_class = cfqd->serving_wl_class;
+       u64 now = ktime_get_ns();
 
        /* Choose next priority. RT > BE > IDLE */
        if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
@@ -3129,7 +3151,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
                cfqd->serving_wl_class = BE_WORKLOAD;
        else {
                cfqd->serving_wl_class = IDLE_WORKLOAD;
-               cfqd->workload_expires = jiffies + 1;
+               cfqd->workload_expires = now + jiffies_to_nsecs(1);
                return;
        }
 
@@ -3147,7 +3169,7 @@ choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg)
        /*
         * check workload expiration, and that we still have other queues ready
         */
-       if (count && !time_after(jiffies, cfqd->workload_expires))
+       if (count && !(now > cfqd->workload_expires))
                return;
 
 new_workload:
@@ -3164,13 +3186,13 @@ new_workload:
         */
        group_slice = cfq_group_slice(cfqd, cfqg);
 
-       slice = group_slice * count /
+       slice = div_u64(group_slice * count,
                max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class],
                      cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd,
-                                       cfqg));
+                                       cfqg)));
 
        if (cfqd->serving_wl_type == ASYNC_WORKLOAD) {
-               unsigned int tmp;
+               u64 tmp;
 
                /*
                 * Async queues are currently system wide. Just taking
@@ -3181,19 +3203,19 @@ new_workload:
                 */
                tmp = cfqd->cfq_target_latency *
                        cfqg_busy_async_queues(cfqd, cfqg);
-               tmp = tmp/cfqd->busy_queues;
-               slice = min_t(unsigned, slice, tmp);
+               tmp = div_u64(tmp, cfqd->busy_queues);
+               slice = min_t(u64, slice, tmp);
 
                /* async workload slice is scaled down according to
                 * the sync/async slice ratio. */
-               slice = slice * cfqd->cfq_slice[0] / cfqd->cfq_slice[1];
+               slice = div64_u64(slice*cfqd->cfq_slice[0], cfqd->cfq_slice[1]);
        } else
                /* sync workload slice is at least 2 * cfq_slice_idle */
                slice = max(slice, 2 * cfqd->cfq_slice_idle);
 
-       slice = max_t(unsigned, slice, CFQ_MIN_TT);
-       cfq_log(cfqd, "workload slice:%d", slice);
-       cfqd->workload_expires = jiffies + slice;
+       slice = max_t(u64, slice, CFQ_MIN_TT);
+       cfq_log(cfqd, "workload slice:%llu", slice);
+       cfqd->workload_expires = now + slice;
 }
 
 static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
@@ -3211,16 +3233,17 @@ static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd)
 static void cfq_choose_cfqg(struct cfq_data *cfqd)
 {
        struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
+       u64 now = ktime_get_ns();
 
        cfqd->serving_group = cfqg;
 
        /* Restore the workload type data */
        if (cfqg->saved_wl_slice) {
-               cfqd->workload_expires = jiffies + cfqg->saved_wl_slice;
+               cfqd->workload_expires = now + cfqg->saved_wl_slice;
                cfqd->serving_wl_type = cfqg->saved_wl_type;
                cfqd->serving_wl_class = cfqg->saved_wl_class;
        } else
-               cfqd->workload_expires = jiffies - 1;
+               cfqd->workload_expires = now - 1;
 
        choose_wl_class_and_type(cfqd, cfqg);
 }
@@ -3232,6 +3255,7 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
 static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
 {
        struct cfq_queue *cfqq, *new_cfqq = NULL;
+       u64 now = ktime_get_ns();
 
        cfqq = cfqd->active_queue;
        if (!cfqq)
@@ -3292,7 +3316,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
         * flight or is idling for a new request, allow either of these
         * conditions to happen (or time out) before selecting a new queue.
         */
-       if (timer_pending(&cfqd->idle_slice_timer)) {
+       if (hrtimer_active(&cfqd->idle_slice_timer)) {
                cfqq = NULL;
                goto keep_queue;
        }
@@ -3303,7 +3327,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
         **/
        if (CFQQ_SEEKY(cfqq) && cfq_cfqq_idle_window(cfqq) &&
            (cfq_cfqq_slice_new(cfqq) ||
-           (cfqq->slice_end - jiffies > jiffies - cfqq->slice_start))) {
+           (cfqq->slice_end - now > now - cfqq->slice_start))) {
                cfq_clear_cfqq_deep(cfqq);
                cfq_clear_cfqq_idle_window(cfqq);
        }
@@ -3381,11 +3405,12 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 static inline bool cfq_slice_used_soon(struct cfq_data *cfqd,
        struct cfq_queue *cfqq)
 {
+       u64 now = ktime_get_ns();
+
        /* the queue hasn't finished any request, can't estimate */
        if (cfq_cfqq_slice_new(cfqq))
                return true;
-       if (time_after(jiffies + cfqd->cfq_slice_idle * cfqq->dispatched,
-               cfqq->slice_end))
+       if (now + cfqd->cfq_slice_idle * cfqq->dispatched > cfqq->slice_end)
                return true;
 
        return false;
@@ -3460,10 +3485,10 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * based on the last sync IO we serviced
         */
        if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
-               unsigned long last_sync = jiffies - cfqd->last_delayed_sync;
+               u64 last_sync = ktime_get_ns() - cfqd->last_delayed_sync;
                unsigned int depth;
 
-               depth = last_sync / cfqd->cfq_slice[1];
+               depth = div64_u64(last_sync, cfqd->cfq_slice[1]);
                if (!depth && !cfqq->dispatched)
                        depth = 1;
                if (depth < max_dispatch)
@@ -3546,7 +3571,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
        if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) &&
            cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
            cfq_class_idle(cfqq))) {
-               cfqq->slice_end = jiffies + 1;
+               cfqq->slice_end = ktime_get_ns() + 1;
                cfq_slice_expired(cfqd, 0);
        }
 
@@ -3624,7 +3649,7 @@ static void cfq_init_icq(struct io_cq *icq)
 {
        struct cfq_io_cq *cic = icq_to_cic(icq);
 
-       cic->ttime.last_end_request = jiffies;
+       cic->ttime.last_end_request = ktime_get_ns();
 }
 
 static void cfq_exit_icq(struct io_cq *icq)
@@ -3682,6 +3707,7 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct cfq_io_cq *cic)
         * elevate the priority of this queue
         */
        cfqq->org_ioprio = cfqq->ioprio;
+       cfqq->org_ioprio_class = cfqq->ioprio_class;
        cfq_clear_cfqq_prio_changed(cfqq);
 }
 
@@ -3845,14 +3871,15 @@ out:
 }
 
 static void
-__cfq_update_io_thinktime(struct cfq_ttime *ttime, unsigned long slice_idle)
+__cfq_update_io_thinktime(struct cfq_ttime *ttime, u64 slice_idle)
 {
-       unsigned long elapsed = jiffies - ttime->last_end_request;
+       u64 elapsed = ktime_get_ns() - ttime->last_end_request;
        elapsed = min(elapsed, 2UL * slice_idle);
 
        ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8;
-       ttime->ttime_total = (7*ttime->ttime_total + 256*elapsed) / 8;
-       ttime->ttime_mean = (ttime->ttime_total + 128) / ttime->ttime_samples;
+       ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed,  8);
+       ttime->ttime_mean = div64_ul(ttime->ttime_total + 128,
+                                    ttime->ttime_samples);
 }
 
 static void
@@ -4105,10 +4132,10 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        cfq_log_cfqq(cfqd, cfqq, "insert_request");
        cfq_init_prio_data(cfqq, RQ_CIC(rq));
 
-       rq->fifo_time = jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
+       rq->fifo_time = ktime_get_ns() + cfqd->cfq_fifo_expire[rq_is_sync(rq)];
        list_add_tail(&rq->queuelist, &cfqq->fifo);
        cfq_add_rq_rb(rq);
-       cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group,
+       cfqg_stats_update_io_add(RQ_CFQG(rq), cfqd->serving_group, req_op(rq),
                                 rq->cmd_flags);
        cfq_rq_enqueued(cfqd, cfqq, rq);
 }
@@ -4153,6 +4180,7 @@ static void cfq_update_hw_tag(struct cfq_data *cfqd)
 static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        struct cfq_io_cq *cic = cfqd->active_cic;
+       u64 now = ktime_get_ns();
 
        /* If the queue already has requests, don't wait */
        if (!RB_EMPTY_ROOT(&cfqq->sort_list))
@@ -4171,7 +4199,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 
        /* if slice left is less than think time, wait busy */
        if (cic && sample_valid(cic->ttime.ttime_samples)
-           && (cfqq->slice_end - jiffies < cic->ttime.ttime_mean))
+           && (cfqq->slice_end - now < cic->ttime.ttime_mean))
                return true;
 
        /*
@@ -4181,7 +4209,7 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         * case where think time is less than a jiffy, mark the queue wait
         * busy if only 1 jiffy is left in the slice.
         */
-       if (cfqq->slice_end - jiffies == 1)
+       if (cfqq->slice_end - now <= jiffies_to_nsecs(1))
                return true;
 
        return false;
@@ -4192,9 +4220,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
        struct cfq_data *cfqd = cfqq->cfqd;
        const int sync = rq_is_sync(rq);
-       unsigned long now;
+       u64 now = ktime_get_ns();
 
-       now = jiffies;
        cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d",
                     !!(rq->cmd_flags & REQ_NOIDLE));
 
@@ -4206,7 +4233,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        cfqq->dispatched--;
        (RQ_CFQG(rq))->dispatched--;
        cfqg_stats_update_completion(cfqq->cfqg, rq_start_time_ns(rq),
-                                    rq_io_start_time_ns(rq), rq->cmd_flags);
+                                    rq_io_start_time_ns(rq), req_op(rq),
+                                    rq->cmd_flags);
 
        cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
 
@@ -4222,7 +4250,16 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                                        cfqq_type(cfqq));
 
                st->ttime.last_end_request = now;
-               if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now))
+               /*
+                * We have to do this check in jiffies since start_time is in
+                * jiffies and it is not trivial to convert to ns. If
+                * cfq_fifo_expire[1] ever comes close to 1 jiffie, this test
+                * will become problematic but so far we are fine (the default
+                * is 128 ms).
+                */
+               if (!time_after(rq->start_time +
+                                 nsecs_to_jiffies(cfqd->cfq_fifo_expire[1]),
+                               jiffies))
                        cfqd->last_delayed_sync = now;
        }
 
@@ -4247,10 +4284,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                 * the queue.
                 */
                if (cfq_should_wait_busy(cfqd, cfqq)) {
-                       unsigned long extend_sl = cfqd->cfq_slice_idle;
+                       u64 extend_sl = cfqd->cfq_slice_idle;
                        if (!cfqd->cfq_slice_idle)
                                extend_sl = cfqd->cfq_group_idle;
-                       cfqq->slice_end = jiffies + extend_sl;
+                       cfqq->slice_end = now + extend_sl;
                        cfq_mark_cfqq_wait_busy(cfqq);
                        cfq_log_cfqq(cfqd, cfqq, "will busy wait");
                }
@@ -4275,6 +4312,24 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                cfq_schedule_dispatch(cfqd);
 }
 
+static void cfqq_boost_on_prio(struct cfq_queue *cfqq, int op_flags)
+{
+       /*
+        * If REQ_PRIO is set, boost class and prio level, if it's below
+        * BE/NORM. If prio is not set, restore the potentially boosted
+        * class/prio level.
+        */
+       if (!(op_flags & REQ_PRIO)) {
+               cfqq->ioprio_class = cfqq->org_ioprio_class;
+               cfqq->ioprio = cfqq->org_ioprio;
+       } else {
+               if (cfq_class_idle(cfqq))
+                       cfqq->ioprio_class = IOPRIO_CLASS_BE;
+               if (cfqq->ioprio > IOPRIO_NORM)
+                       cfqq->ioprio = IOPRIO_NORM;
+       }
+}
+
 static inline int __cfq_may_queue(struct cfq_queue *cfqq)
 {
        if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
@@ -4285,7 +4340,7 @@ static inline int __cfq_may_queue(struct cfq_queue *cfqq)
        return ELV_MQUEUE_MAY;
 }
 
-static int cfq_may_queue(struct request_queue *q, int rw)
+static int cfq_may_queue(struct request_queue *q, int op, int op_flags)
 {
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct task_struct *tsk = current;
@@ -4302,9 +4357,10 @@ static int cfq_may_queue(struct request_queue *q, int rw)
        if (!cic)
                return ELV_MQUEUE_MAY;
 
-       cfqq = cic_to_cfqq(cic, rw_is_sync(rw));
+       cfqq = cic_to_cfqq(cic, rw_is_sync(op, op_flags));
        if (cfqq) {
                cfq_init_prio_data(cfqq, cic);
+               cfqq_boost_on_prio(cfqq, op_flags);
 
                return __cfq_may_queue(cfqq);
        }
@@ -4435,9 +4491,10 @@ static void cfq_kick_queue(struct work_struct *work)
 /*
  * Timer running if the active_queue is currently idling inside its time slice
  */
-static void cfq_idle_slice_timer(unsigned long data)
+static enum hrtimer_restart cfq_idle_slice_timer(struct hrtimer *timer)
 {
-       struct cfq_data *cfqd = (struct cfq_data *) data;
+       struct cfq_data *cfqd = container_of(timer, struct cfq_data,
+                                            idle_slice_timer);
        struct cfq_queue *cfqq;
        unsigned long flags;
        int timed_out = 1;
@@ -4486,11 +4543,12 @@ out_kick:
        cfq_schedule_dispatch(cfqd);
 out_cont:
        spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+       return HRTIMER_NORESTART;
 }
 
 static void cfq_shutdown_timer_wq(struct cfq_data *cfqd)
 {
-       del_timer_sync(&cfqd->idle_slice_timer);
+       hrtimer_cancel(&cfqd->idle_slice_timer);
        cancel_work_sync(&cfqd->unplug_work);
 }
 
@@ -4586,9 +4644,9 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
        cfqg_put(cfqd->root_group);
        spin_unlock_irq(q->queue_lock);
 
-       init_timer(&cfqd->idle_slice_timer);
+       hrtimer_init(&cfqd->idle_slice_timer, CLOCK_MONOTONIC,
+                    HRTIMER_MODE_REL);
        cfqd->idle_slice_timer.function = cfq_idle_slice_timer;
-       cfqd->idle_slice_timer.data = (unsigned long) cfqd;
 
        INIT_WORK(&cfqd->unplug_work, cfq_kick_queue);
 
@@ -4609,7 +4667,7 @@ static int cfq_init_queue(struct request_queue *q, struct elevator_type *e)
         * we optimistically start assuming sync ops weren't delayed in last
         * second, in order to have larger depth for async operations.
         */
-       cfqd->last_delayed_sync = jiffies - HZ;
+       cfqd->last_delayed_sync = ktime_get_ns() - NSEC_PER_SEC;
        return 0;
 
 out_free:
@@ -4652,9 +4710,9 @@ cfq_var_store(unsigned int *var, const char *page, size_t count)
 static ssize_t __FUNC(struct elevator_queue *e, char *page)            \
 {                                                                      \
        struct cfq_data *cfqd = e->elevator_data;                       \
-       unsigned int __data = __VAR;                                    \
+       u64 __data = __VAR;                                             \
        if (__CONV)                                                     \
-               __data = jiffies_to_msecs(__data);                      \
+               __data = div_u64(__data, NSEC_PER_MSEC);                        \
        return cfq_var_show(__data, (page));                            \
 }
 SHOW_FUNCTION(cfq_quantum_show, cfqd->cfq_quantum, 0);
@@ -4671,6 +4729,21 @@ SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 SHOW_FUNCTION(cfq_target_latency_show, cfqd->cfq_target_latency, 1);
 #undef SHOW_FUNCTION
 
+#define USEC_SHOW_FUNCTION(__FUNC, __VAR)                              \
+static ssize_t __FUNC(struct elevator_queue *e, char *page)            \
+{                                                                      \
+       struct cfq_data *cfqd = e->elevator_data;                       \
+       u64 __data = __VAR;                                             \
+       __data = div_u64(__data, NSEC_PER_USEC);                        \
+       return cfq_var_show(__data, (page));                            \
+}
+USEC_SHOW_FUNCTION(cfq_slice_idle_us_show, cfqd->cfq_slice_idle);
+USEC_SHOW_FUNCTION(cfq_group_idle_us_show, cfqd->cfq_group_idle);
+USEC_SHOW_FUNCTION(cfq_slice_sync_us_show, cfqd->cfq_slice[1]);
+USEC_SHOW_FUNCTION(cfq_slice_async_us_show, cfqd->cfq_slice[0]);
+USEC_SHOW_FUNCTION(cfq_target_latency_us_show, cfqd->cfq_target_latency);
+#undef USEC_SHOW_FUNCTION
+
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)                        \
 static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)        \
 {                                                                      \
@@ -4682,7 +4755,7 @@ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)
        else if (__data > (MAX))                                        \
                __data = (MAX);                                         \
        if (__CONV)                                                     \
-               *(__PTR) = msecs_to_jiffies(__data);                    \
+               *(__PTR) = (u64)__data * NSEC_PER_MSEC;                 \
        else                                                            \
                *(__PTR) = __data;                                      \
        return ret;                                                     \
@@ -4705,6 +4778,26 @@ STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 STORE_FUNCTION(cfq_target_latency_store, &cfqd->cfq_target_latency, 1, UINT_MAX, 1);
 #undef STORE_FUNCTION
 
+#define USEC_STORE_FUNCTION(__FUNC, __PTR, MIN, MAX)                   \
+static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count)        \
+{                                                                      \
+       struct cfq_data *cfqd = e->elevator_data;                       \
+       unsigned int __data;                                            \
+       int ret = cfq_var_store(&__data, (page), count);                \
+       if (__data < (MIN))                                             \
+               __data = (MIN);                                         \
+       else if (__data > (MAX))                                        \
+               __data = (MAX);                                         \
+       *(__PTR) = (u64)__data * NSEC_PER_USEC;                         \
+       return ret;                                                     \
+}
+USEC_STORE_FUNCTION(cfq_slice_idle_us_store, &cfqd->cfq_slice_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_group_idle_us_store, &cfqd->cfq_group_idle, 0, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_sync_us_store, &cfqd->cfq_slice[1], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_slice_async_us_store, &cfqd->cfq_slice[0], 1, UINT_MAX);
+USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, UINT_MAX);
+#undef USEC_STORE_FUNCTION
+
 #define CFQ_ATTR(name) \
        __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
 
@@ -4715,12 +4808,17 @@ static struct elv_fs_entry cfq_attrs[] = {
        CFQ_ATTR(back_seek_max),
        CFQ_ATTR(back_seek_penalty),
        CFQ_ATTR(slice_sync),
+       CFQ_ATTR(slice_sync_us),
        CFQ_ATTR(slice_async),
+       CFQ_ATTR(slice_async_us),
        CFQ_ATTR(slice_async_rq),
        CFQ_ATTR(slice_idle),
+       CFQ_ATTR(slice_idle_us),
        CFQ_ATTR(group_idle),
+       CFQ_ATTR(group_idle_us),
        CFQ_ATTR(low_latency),
        CFQ_ATTR(target_latency),
+       CFQ_ATTR(target_latency_us),
        __ATTR_NULL
 };
 
@@ -4729,7 +4827,8 @@ static struct elevator_type iosched_cfq = {
                .elevator_merge_fn =            cfq_merge,
                .elevator_merged_fn =           cfq_merged_request,
                .elevator_merge_req_fn =        cfq_merged_requests,
-               .elevator_allow_merge_fn =      cfq_allow_merge,
+               .elevator_allow_bio_merge_fn =  cfq_allow_bio_merge,
+               .elevator_allow_rq_merge_fn =   cfq_allow_rq_merge,
                .elevator_bio_merged_fn =       cfq_bio_merged,
                .elevator_dispatch_fn =         cfq_dispatch_requests,
                .elevator_add_req_fn =          cfq_insert_request,
@@ -4776,18 +4875,7 @@ static int __init cfq_init(void)
 {
        int ret;
 
-       /*
-        * could be 0 on HZ < 1000 setups
-        */
-       if (!cfq_slice_async)
-               cfq_slice_async = 1;
-       if (!cfq_slice_idle)
-               cfq_slice_idle = 1;
-
 #ifdef CONFIG_CFQ_GROUP_IOSCHED
-       if (!cfq_group_idle)
-               cfq_group_idle = 1;
-
        ret = blkcg_policy_register(&blkcg_policy_cfq);
        if (ret)
                return ret;
index d0dd788..55e0bb6 100644 (file)
@@ -137,7 +137,7 @@ deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
                if (__rq) {
                        BUG_ON(sector != blk_rq_pos(__rq));
 
-                       if (elv_rq_merge_ok(__rq, bio)) {
+                       if (elv_bio_merge_ok(__rq, bio)) {
                                ret = ELEVATOR_FRONT_MERGE;
                                goto out;
                        }
@@ -173,7 +173,8 @@ deadline_merged_requests(struct request_queue *q, struct request *req,
         * and move into next position (next will be deleted) in fifo
         */
        if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
-               if (time_before(next->fifo_time, req->fifo_time)) {
+               if (time_before((unsigned long)next->fifo_time,
+                               (unsigned long)req->fifo_time)) {
                        list_move(&req->queuelist, &next->queuelist);
                        req->fifo_time = next->fifo_time;
                }
@@ -227,7 +228,7 @@ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
        /*
         * rq is expired!
         */
-       if (time_after_eq(jiffies, rq->fifo_time))
+       if (time_after_eq(jiffies, (unsigned long)rq->fifo_time))
                return 1;
 
        return 0;
index c3555c9..7096c22 100644 (file)
@@ -53,13 +53,13 @@ static LIST_HEAD(elv_list);
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
  */
-static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
+static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
 {
        struct request_queue *q = rq->q;
        struct elevator_queue *e = q->elevator;
 
-       if (e->type->ops.elevator_allow_merge_fn)
-               return e->type->ops.elevator_allow_merge_fn(q, rq, bio);
+       if (e->type->ops.elevator_allow_bio_merge_fn)
+               return e->type->ops.elevator_allow_bio_merge_fn(q, rq, bio);
 
        return 1;
 }
@@ -67,17 +67,17 @@ static int elv_iosched_allow_merge(struct request *rq, struct bio *bio)
 /*
  * can we safely merge with this request?
  */
-bool elv_rq_merge_ok(struct request *rq, struct bio *bio)
+bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
 {
        if (!blk_rq_merge_ok(rq, bio))
-               return 0;
+               return false;
 
-       if (!elv_iosched_allow_merge(rq, bio))
-               return 0;
+       if (!elv_iosched_allow_bio_merge(rq, bio))
+               return false;
 
-       return 1;
+       return true;
 }
-EXPORT_SYMBOL(elv_rq_merge_ok);
+EXPORT_SYMBOL(elv_bio_merge_ok);
 
 static struct elevator_type *elevator_find(const char *name)
 {
@@ -366,8 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq)
        list_for_each_prev(entry, &q->queue_head) {
                struct request *pos = list_entry_rq(entry);
 
-               if ((rq->cmd_flags & REQ_DISCARD) !=
-                   (pos->cmd_flags & REQ_DISCARD))
+               if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD))
                        break;
                if (rq_data_dir(rq) != rq_data_dir(pos))
                        break;
@@ -426,7 +425,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
        /*
         * First try one-hit cache.
         */
-       if (q->last_merge && elv_rq_merge_ok(q->last_merge, bio)) {
+       if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
                ret = blk_try_merge(q->last_merge, bio);
                if (ret != ELEVATOR_NO_MERGE) {
                        *req = q->last_merge;
@@ -441,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
         * See if our hash lookup can find a potential backmerge.
         */
        __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
-       if (__rq && elv_rq_merge_ok(__rq, bio)) {
+       if (__rq && elv_bio_merge_ok(__rq, bio)) {
                *req = __rq;
                return ELEVATOR_BACK_MERGE;
        }
@@ -717,12 +716,12 @@ void elv_put_request(struct request_queue *q, struct request *rq)
                e->type->ops.elevator_put_req_fn(rq);
 }
 
-int elv_may_queue(struct request_queue *q, int rw)
+int elv_may_queue(struct request_queue *q, int op, int op_flags)
 {
        struct elevator_queue *e = q->elevator;
 
        if (e->type->ops.elevator_may_queue_fn)
-               return e->type->ops.elevator_may_queue_fn(q, rw);
+               return e->type->ops.elevator_may_queue_fn(q, op, op_flags);
 
        return ELV_MQUEUE_MAY;
 }
index d7eb77e..71d9ed9 100644 (file)
@@ -495,7 +495,6 @@ rescan:
        /* add partitions */
        for (p = 1; p < state->limit; p++) {
                sector_t size, from;
-               struct partition_meta_info *info = NULL;
 
                size = state->parts[p].size;
                if (!size)
@@ -530,8 +529,6 @@ rescan:
                        }
                }
 
-               if (state->parts[p].has_info)
-                       info = &state->parts[p].info;
                part = add_partition(disk, p, from, size,
                                     state->parts[p].flags,
                                     &state->parts[p].info);
index 9875b05..ff1fb93 100644 (file)
@@ -42,6 +42,13 @@ int atari_partition(struct parsed_partitions *state)
        int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */
 #endif
 
+       /*
+        * ATARI partition scheme supports 512 lba only.  If this is not
+        * the case, bail early to avoid miscalculating hd_size.
+        */
+       if (bdev_logical_block_size(state->bdev) != 512)
+               return 0;
+
        rs = read_part_sector(state, 0, &sect);
        if (!rs)
                return -1;
index 2bdb5da..e207b33 100644 (file)
@@ -1190,7 +1190,7 @@ static int atapi_drain_needed(struct request *rq)
        if (likely(rq->cmd_type != REQ_TYPE_BLOCK_PC))
                return 0;
 
-       if (!blk_rq_bytes(rq) || (rq->cmd_flags & REQ_WRITE))
+       if (!blk_rq_bytes(rq) || op_is_write(req_op(rq)))
                return 0;
 
        return atapi_cmd_type(rq->cmd[0]) == ATAPI_MISC;
index c04bd9b..dd96a93 100644 (file)
@@ -339,7 +339,7 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
        if (bio_end_sector(bio) > get_capacity(bdev->bd_disk))
                goto io_error;
 
-       if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+       if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
                if (sector & ((PAGE_SIZE >> SECTOR_SHIFT) - 1) ||
                    bio->bi_iter.bi_size & ~PAGE_MASK)
                        goto io_error;
@@ -509,7 +509,9 @@ static struct brd_device *brd_alloc(int i)
        blk_queue_max_discard_sectors(brd->brd_queue, UINT_MAX);
        brd->brd_queue->limits.discard_zeroes_data = 1;
        queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, brd->brd_queue);
-
+#ifdef CONFIG_BLK_DEV_RAM_DAX
+       queue_flag_set_unlocked(QUEUE_FLAG_DAX, brd->brd_queue);
+#endif
        disk = brd->brd_disk = alloc_disk(max_part);
        if (!disk)
                goto out_free_queue;
index 10459a1..d524973 100644 (file)
@@ -137,19 +137,19 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
 
 static int _drbd_md_sync_page_io(struct drbd_device *device,
                                 struct drbd_backing_dev *bdev,
-                                sector_t sector, int rw)
+                                sector_t sector, int op)
 {
        struct bio *bio;
        /* we do all our meta data IO in aligned 4k blocks. */
        const int size = 4096;
-       int err;
+       int err, op_flags = 0;
 
        device->md_io.done = 0;
        device->md_io.error = -ENODEV;
 
-       if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
-               rw |= REQ_FUA | REQ_FLUSH;
-       rw |= REQ_SYNC | REQ_NOIDLE;
+       if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags))
+               op_flags |= REQ_FUA | REQ_PREFLUSH;
+       op_flags |= REQ_SYNC | REQ_NOIDLE;
 
        bio = bio_alloc_drbd(GFP_NOIO);
        bio->bi_bdev = bdev->md_bdev;
@@ -159,9 +159,9 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
                goto out;
        bio->bi_private = device;
        bio->bi_end_io = drbd_md_endio;
-       bio->bi_rw = rw;
+       bio_set_op_attrs(bio, op, op_flags);
 
-       if (!(rw & WRITE) && device->state.disk == D_DISKLESS && device->ldev == NULL)
+       if (op != REQ_OP_WRITE && device->state.disk == D_DISKLESS && device->ldev == NULL)
                /* special case, drbd_md_read() during drbd_adm_attach(): no get_ldev */
                ;
        else if (!get_ldev_if_state(device, D_ATTACHING)) {
@@ -174,10 +174,10 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
        bio_get(bio); /* one bio_put() is in the completion handler */
        atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
        device->md_io.submit_jif = jiffies;
-       if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
+       if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
                bio_io_error(bio);
        else
-               submit_bio(rw, bio);
+               submit_bio(bio);
        wait_until_done_or_force_detached(device, bdev, &device->md_io.done);
        if (!bio->bi_error)
                err = device->md_io.error;
@@ -188,7 +188,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
 }
 
 int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bdev,
-                        sector_t sector, int rw)
+                        sector_t sector, int op)
 {
        int err;
        D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
@@ -197,19 +197,21 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
 
        dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
             current->comm, current->pid, __func__,
-            (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
+            (unsigned long long)sector, (op == REQ_OP_WRITE) ? "WRITE" : "READ",
             (void*)_RET_IP_ );
 
        if (sector < drbd_md_first_sector(bdev) ||
            sector + 7 > drbd_md_last_sector(bdev))
                drbd_alert(device, "%s [%d]:%s(,%llus,%s) out of range md access!\n",
                     current->comm, current->pid, __func__,
-                    (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
+                    (unsigned long long)sector,
+                    (op == REQ_OP_WRITE) ? "WRITE" : "READ");
 
-       err = _drbd_md_sync_page_io(device, bdev, sector, rw);
+       err = _drbd_md_sync_page_io(device, bdev, sector, op);
        if (err) {
                drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
-                   (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
+                   (unsigned long long)sector,
+                   (op == REQ_OP_WRITE) ? "WRITE" : "READ", err);
        }
        return err;
 }
@@ -845,7 +847,7 @@ int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
        unsigned long count = 0;
        sector_t esector, nr_sectors;
 
-       /* This would be an empty REQ_FLUSH, be silent. */
+       /* This would be an empty REQ_PREFLUSH, be silent. */
        if ((mode == SET_OUT_OF_SYNC) && size == 0)
                return 0;
 
index 92d6fc0..e5d89f6 100644 (file)
@@ -980,7 +980,7 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
        struct drbd_bitmap *b = device->bitmap;
        struct page *page;
        unsigned int len;
-       unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE;
+       unsigned int op = (ctx->flags & BM_AIO_READ) ? REQ_OP_READ : REQ_OP_WRITE;
 
        sector_t on_disk_sector =
                device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1011,12 +1011,12 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
        bio_add_page(bio, page, len, 0);
        bio->bi_private = ctx;
        bio->bi_end_io = drbd_bm_endio;
+       bio_set_op_attrs(bio, op, 0);
 
-       if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
-               bio->bi_rw |= rw;
+       if (drbd_insert_fault(device, (op == REQ_OP_WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) {
                bio_io_error(bio);
        } else {
-               submit_bio(rw, bio);
+               submit_bio(bio);
                /* this should not count as user activity and cause the
                 * resync to throttle -- see drbd_rs_should_slow_down(). */
                atomic_add(len >> 9, &device->rs_sect_ev);
index 7a1cf7e..a64c645 100644 (file)
@@ -1327,14 +1327,14 @@ struct bm_extent {
 #endif
 #endif
 
-/* BIO_MAX_SIZE is 256 * PAGE_SIZE,
+/* Estimate max bio size as 256 * PAGE_SIZE,
  * so for typical PAGE_SIZE of 4k, that is (1<<20) Byte.
  * Since we may live in a mixed-platform cluster,
  * we limit us to a platform agnostic constant here for now.
  * A followup commit may allow even bigger BIO sizes,
  * once we thought that through. */
 #define DRBD_MAX_BIO_SIZE (1U << 20)
-#if DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
+#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT)
 #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
 #endif
 #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12)       /* Works always = 4k */
@@ -1507,7 +1507,7 @@ extern int drbd_resync_finished(struct drbd_device *device);
 extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
 extern void drbd_md_put_buffer(struct drbd_device *device);
 extern int drbd_md_sync_page_io(struct drbd_device *device,
-               struct drbd_backing_dev *bdev, sector_t sector, int rw);
+               struct drbd_backing_dev *bdev, sector_t sector, int op);
 extern void drbd_ov_out_of_sync_found(struct drbd_device *, sector_t, int);
 extern void wait_until_done_or_force_detached(struct drbd_device *device,
                struct drbd_backing_dev *bdev, unsigned int *done);
@@ -1557,7 +1557,7 @@ extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector
                bool throttle_if_app_is_waiting);
 extern int drbd_submit_peer_request(struct drbd_device *,
                                    struct drbd_peer_request *, const unsigned,
-                                   const int);
+                                   const unsigned, const int);
 extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *);
 extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64,
                                                     sector_t, unsigned int,
index 2ba1494..2b37744 100644 (file)
@@ -1603,15 +1603,16 @@ static int _drbd_send_zc_ee(struct drbd_peer_device *peer_device,
        return 0;
 }
 
-static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long bi_rw)
+static u32 bio_flags_to_wire(struct drbd_connection *connection,
+                            struct bio *bio)
 {
        if (connection->agreed_pro_version >= 95)
-               return  (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
-                       (bi_rw & REQ_FUA ? DP_FUA : 0) |
-                       (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
-                       (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
+               return  (bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
+                       (bio->bi_rw & REQ_FUA ? DP_FUA : 0) |
+                       (bio->bi_rw & REQ_PREFLUSH ? DP_FLUSH : 0) |
+                       (bio_op(bio) == REQ_OP_DISCARD ? DP_DISCARD : 0);
        else
-               return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
+               return bio->bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
 }
 
 /* Used to send write or TRIM aka REQ_DISCARD requests
@@ -1636,7 +1637,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
        p->sector = cpu_to_be64(req->i.sector);
        p->block_id = (unsigned long)req;
        p->seq_num = cpu_to_be32(atomic_inc_return(&device->packet_seq));
-       dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio->bi_rw);
+       dp_flags = bio_flags_to_wire(peer_device->connection, req->master_bio);
        if (device->state.conn >= C_SYNC_SOURCE &&
            device->state.conn <= C_PAUSED_SYNC_T)
                dp_flags |= DP_MAY_SET_IN_SYNC;
@@ -3061,7 +3062,7 @@ void drbd_md_write(struct drbd_device *device, void *b)
        D_ASSERT(device, drbd_md_ss(device->ldev) == device->ldev->md.md_offset);
        sector = device->ldev->md.md_offset;
 
-       if (drbd_md_sync_page_io(device, device->ldev, sector, WRITE)) {
+       if (drbd_md_sync_page_io(device, device->ldev, sector, REQ_OP_WRITE)) {
                /* this was a try anyways ... */
                drbd_err(device, "meta data update failed!\n");
                drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
@@ -3263,7 +3264,8 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
         * Affects the paranoia out-of-range access check in drbd_md_sync_page_io(). */
        bdev->md.md_size_sect = 8;
 
-       if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset, READ)) {
+       if (drbd_md_sync_page_io(device, bdev, bdev->md.md_offset,
+                                REQ_OP_READ)) {
                /* NOTE: can't do normal error processing here as this is
                   called BEFORE disk is attached */
                drbd_err(device, "Error while reading metadata.\n");
index ef92453..129f8c7 100644 (file)
@@ -112,7 +112,7 @@ struct p_header100 {
 #define DP_MAY_SET_IN_SYNC    4
 #define DP_UNPLUG             8 /* not used anymore   */
 #define DP_FUA               16 /* equals REQ_FUA     */
-#define DP_FLUSH             32 /* equals REQ_FLUSH   */
+#define DP_FLUSH             32 /* equals REQ_PREFLUSH   */
 #define DP_DISCARD           64 /* equals REQ_DISCARD */
 #define DP_SEND_RECEIVE_ACK 128 /* This is a proto B write request */
 #define DP_SEND_WRITE_ACK   256 /* This is a proto C write request */
index 050aaa1..1ee0023 100644 (file)
@@ -1398,7 +1398,8 @@ void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backin
 /* TODO allocate from our own bio_set. */
 int drbd_submit_peer_request(struct drbd_device *device,
                             struct drbd_peer_request *peer_req,
-                            const unsigned rw, const int fault_type)
+                            const unsigned op, const unsigned op_flags,
+                            const int fault_type)
 {
        struct bio *bios = NULL;
        struct bio *bio;
@@ -1450,7 +1451,7 @@ next_bio:
        /* > peer_req->i.sector, unless this is the first bio */
        bio->bi_iter.bi_sector = sector;
        bio->bi_bdev = device->ldev->backing_bdev;
-       bio->bi_rw = rw;
+       bio_set_op_attrs(bio, op, op_flags);
        bio->bi_private = peer_req;
        bio->bi_end_io = drbd_peer_request_endio;
 
@@ -1458,7 +1459,7 @@ next_bio:
        bios = bio;
        ++n_bios;
 
-       if (rw & REQ_DISCARD) {
+       if (op == REQ_OP_DISCARD) {
                bio->bi_iter.bi_size = data_size;
                goto submit;
        }
@@ -1830,7 +1831,8 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
        spin_unlock_irq(&device->resource->req_lock);
 
        atomic_add(pi->size >> 9, &device->rs_sect_ev);
-       if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
+       if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
+                                    DRBD_FAULT_RS_WR) == 0)
                return 0;
 
        /* don't care for the reason here */
@@ -2152,12 +2154,19 @@ static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, co
 /* see also bio_flags_to_wire()
  * DRBD_REQ_*, because we need to semantically map the flags to data packet
  * flags and back. We may replicate to other kernel versions. */
-static unsigned long wire_flags_to_bio(u32 dpf)
+static unsigned long wire_flags_to_bio_flags(u32 dpf)
 {
        return  (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
                (dpf & DP_FUA ? REQ_FUA : 0) |
-               (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
-               (dpf & DP_DISCARD ? REQ_DISCARD : 0);
+               (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
+}
+
+static unsigned long wire_flags_to_bio_op(u32 dpf)
+{
+       if (dpf & DP_DISCARD)
+               return REQ_OP_DISCARD;
+       else
+               return REQ_OP_WRITE;
 }
 
 static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
@@ -2303,7 +2312,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
        struct drbd_peer_request *peer_req;
        struct p_data *p = pi->data;
        u32 peer_seq = be32_to_cpu(p->seq_num);
-       int rw = WRITE;
+       int op, op_flags;
        u32 dp_flags;
        int err, tp;
 
@@ -2342,14 +2351,15 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
        peer_req->flags |= EE_APPLICATION;
 
        dp_flags = be32_to_cpu(p->dp_flags);
-       rw |= wire_flags_to_bio(dp_flags);
+       op = wire_flags_to_bio_op(dp_flags);
+       op_flags = wire_flags_to_bio_flags(dp_flags);
        if (pi->cmd == P_TRIM) {
                struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
                peer_req->flags |= EE_IS_TRIM;
                if (!blk_queue_discard(q))
                        peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
                D_ASSERT(peer_device, peer_req->i.size > 0);
-               D_ASSERT(peer_device, rw & REQ_DISCARD);
+               D_ASSERT(peer_device, op == REQ_OP_DISCARD);
                D_ASSERT(peer_device, peer_req->pages == NULL);
        } else if (peer_req->pages == NULL) {
                D_ASSERT(device, peer_req->i.size == 0);
@@ -2433,7 +2443,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
                peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
        }
 
-       err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
+       err = drbd_submit_peer_request(device, peer_req, op, op_flags,
+                                      DRBD_FAULT_DT_WR);
        if (!err)
                return 0;
 
@@ -2723,7 +2734,8 @@ submit_for_resync:
 submit:
        update_receiver_timing_details(connection, drbd_submit_peer_request);
        inc_unacked(device);
-       if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
+       if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
+                                    fault_type) == 0)
                return 0;
 
        /* don't care for the reason here */
index 2255dcf..eef6e95 100644 (file)
@@ -1132,7 +1132,7 @@ static int drbd_process_write_request(struct drbd_request *req)
         * replicating, in which case there is no point. */
        if (unlikely(req->i.size == 0)) {
                /* The only size==0 bios we expect are empty flushes. */
-               D_ASSERT(device, req->master_bio->bi_rw & REQ_FLUSH);
+               D_ASSERT(device, req->master_bio->bi_rw & REQ_PREFLUSH);
                if (remote)
                        _req_mod(req, QUEUE_AS_DRBD_BARRIER);
                return remote;
index 4d87499..51fab97 100644 (file)
@@ -174,7 +174,7 @@ void drbd_peer_request_endio(struct bio *bio)
        struct drbd_peer_request *peer_req = bio->bi_private;
        struct drbd_device *device = peer_req->peer_device->device;
        int is_write = bio_data_dir(bio) == WRITE;
-       int is_discard = !!(bio->bi_rw & REQ_DISCARD);
+       int is_discard = !!(bio_op(bio) == REQ_OP_DISCARD);
 
        if (bio->bi_error && __ratelimit(&drbd_ratelimit_state))
                drbd_warn(device, "%s: error=%d s=%llus\n",
@@ -248,7 +248,7 @@ void drbd_request_endio(struct bio *bio)
 
        /* to avoid recursion in __req_mod */
        if (unlikely(bio->bi_error)) {
-               if (bio->bi_rw & REQ_DISCARD)
+               if (bio_op(bio) == REQ_OP_DISCARD)
                        what = (bio->bi_error == -EOPNOTSUPP)
                                ? DISCARD_COMPLETED_NOTSUPP
                                : DISCARD_COMPLETED_WITH_ERROR;
@@ -397,7 +397,8 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
        spin_unlock_irq(&device->resource->req_lock);
 
        atomic_add(size >> 9, &device->rs_sect_ev);
-       if (drbd_submit_peer_request(device, peer_req, READ, DRBD_FAULT_RS_RD) == 0)
+       if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
+                                    DRBD_FAULT_RS_RD) == 0)
                return 0;
 
        /* If it failed because of ENOMEM, retry should help.  If it failed
index 84708a5..f9bfecd 100644 (file)
@@ -3822,8 +3822,9 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive)
        bio.bi_flags |= (1 << BIO_QUIET);
        bio.bi_private = &cbdata;
        bio.bi_end_io = floppy_rb0_cb;
+       bio_set_op_attrs(&bio, REQ_OP_READ, 0);
 
-       submit_bio(READ, &bio);
+       submit_bio(&bio);
        process_fd_request();
 
        init_completion(&cbdata.complete);
index 1fa8cc2..364d491 100644 (file)
@@ -447,7 +447,7 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
 
 static inline void handle_partial_read(struct loop_cmd *cmd, long bytes)
 {
-       if (bytes < 0 || (cmd->rq->cmd_flags & REQ_WRITE))
+       if (bytes < 0 || op_is_write(req_op(cmd->rq)))
                return;
 
        if (unlikely(bytes < blk_rq_bytes(cmd->rq))) {
@@ -541,10 +541,10 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
 
        pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
 
-       if (rq->cmd_flags & REQ_WRITE) {
-               if (rq->cmd_flags & REQ_FLUSH)
+       if (op_is_write(req_op(rq))) {
+               if (req_op(rq) == REQ_OP_FLUSH)
                        ret = lo_req_flush(lo, rq);
-               else if (rq->cmd_flags & REQ_DISCARD)
+               else if (req_op(rq) == REQ_OP_DISCARD)
                        ret = lo_discard(lo, rq, pos);
                else if (lo->transfer)
                        ret = lo_write_transfer(lo, rq, pos);
@@ -1659,8 +1659,8 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
        if (lo->lo_state != Lo_bound)
                return -EIO;
 
-       if (lo->use_dio && !(cmd->rq->cmd_flags & (REQ_FLUSH |
-                                       REQ_DISCARD)))
+       if (lo->use_dio && (req_op(cmd->rq) != REQ_OP_FLUSH ||
+           req_op(cmd->rq) == REQ_OP_DISCARD))
                cmd->use_aio = true;
        else
                cmd->use_aio = false;
@@ -1672,7 +1672,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
 {
-       const bool write = cmd->rq->cmd_flags & REQ_WRITE;
+       const bool write = op_is_write(req_op(cmd->rq));
        struct loop_device *lo = cmd->rq->q->queuedata;
        int ret = 0;
 
index 6053e46..8e3e708 100644 (file)
@@ -3765,7 +3765,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
                        return -ENODATA;
        }
 
-       if (rq->cmd_flags & REQ_DISCARD) {
+       if (req_op(rq) == REQ_OP_DISCARD) {
                int err;
 
                err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
index 6a48ed4..6f55b26 100644 (file)
@@ -282,9 +282,9 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 
        if (req->cmd_type == REQ_TYPE_DRV_PRIV)
                type = NBD_CMD_DISC;
-       else if (req->cmd_flags & REQ_DISCARD)
+       else if (req_op(req) == REQ_OP_DISCARD)
                type = NBD_CMD_TRIM;
-       else if (req->cmd_flags & REQ_FLUSH)
+       else if (req_op(req) == REQ_OP_FLUSH)
                type = NBD_CMD_FLUSH;
        else if (rq_data_dir(req) == WRITE)
                type = NBD_CMD_WRITE;
index c2854a2..92900f5 100644 (file)
@@ -321,7 +321,7 @@ static void osdblk_rq_fn(struct request_queue *q)
                 * driver-specific, etc.
                 */
 
-               do_flush = rq->cmd_flags & REQ_FLUSH;
+               do_flush = (req_op(rq) == REQ_OP_FLUSH);
                do_write = (rq_data_dir(rq) == WRITE);
 
                if (!do_flush) { /* osd_flush does not use a bio */
index d06c62e..9393bc7 100644 (file)
@@ -1074,7 +1074,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
                        BUG();
 
                atomic_inc(&pkt->io_wait);
-               bio->bi_rw = READ;
+               bio_set_op_attrs(bio, REQ_OP_READ, 0);
                pkt_queue_bio(pd, bio);
                frames_read++;
        }
@@ -1336,7 +1336,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
 
        /* Start the write request */
        atomic_set(&pkt->io_wait, 1);
-       pkt->w_bio->bi_rw = WRITE;
+       bio_set_op_attrs(pkt->w_bio, REQ_OP_WRITE, 0);
        pkt_queue_bio(pd, pkt->w_bio);
 }
 
index 4b7e405..acb4452 100644 (file)
@@ -196,7 +196,7 @@ static void ps3disk_do_request(struct ps3_storage_device *dev,
        dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__);
 
        while ((req = blk_fetch_request(q))) {
-               if (req->cmd_flags & REQ_FLUSH) {
+               if (req_op(req) == REQ_OP_FLUSH) {
                        if (ps3disk_submit_flush_request(dev, req))
                                break;
                } else if (req->cmd_type == REQ_TYPE_FS) {
@@ -256,7 +256,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data)
                return IRQ_HANDLED;
        }
 
-       if (req->cmd_flags & REQ_FLUSH) {
+       if (req_op(req) == REQ_OP_FLUSH) {
                read = 0;
                op = "flush";
        } else {
index 81666a5..4506620 100644 (file)
@@ -3286,9 +3286,9 @@ static void rbd_queue_workfn(struct work_struct *work)
                goto err;
        }
 
-       if (rq->cmd_flags & REQ_DISCARD)
+       if (req_op(rq) == REQ_OP_DISCARD)
                op_type = OBJ_OP_DISCARD;
-       else if (rq->cmd_flags & REQ_WRITE)
+       else if (req_op(rq) == REQ_OP_WRITE)
                op_type = OBJ_OP_WRITE;
        else
                op_type = OBJ_OP_READ;
index cf8cd29..5a20385 100644 (file)
@@ -705,7 +705,7 @@ int rsxx_dma_queue_bio(struct rsxx_cardinfo *card,
                dma_cnt[i] = 0;
        }
 
-       if (bio->bi_rw & REQ_DISCARD) {
+       if (bio_op(bio) == REQ_OP_DISCARD) {
                bv_len = bio->bi_iter.bi_size;
 
                while (bv_len > 0) {
index 910e065..5c07a23 100644 (file)
@@ -597,7 +597,7 @@ static void skd_request_fn(struct request_queue *q)
                data_dir = rq_data_dir(req);
                io_flags = req->cmd_flags;
 
-               if (io_flags & REQ_FLUSH)
+               if (req_op(req) == REQ_OP_FLUSH)
                        flush++;
 
                if (io_flags & REQ_FUA)
index 7939b9f..4b3ba74 100644 (file)
@@ -462,7 +462,7 @@ static void process_page(unsigned long data)
                                le32_to_cpu(desc->local_addr)>>9,
                                le32_to_cpu(desc->transfer_size));
                        dump_dmastat(card, control);
-               } else if ((bio->bi_rw & REQ_WRITE) &&
+               } else if (op_is_write(bio_op(bio)) &&
                           le32_to_cpu(desc->local_addr) >> 9 ==
                                card->init_size) {
                        card->init_size += le32_to_cpu(desc->transfer_size) >> 9;
index 42758b5..18e4069 100644 (file)
@@ -172,7 +172,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
        BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
 
        vbr->req = req;
-       if (req->cmd_flags & REQ_FLUSH) {
+       if (req_op(req) == REQ_OP_FLUSH) {
                vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_FLUSH);
                vbr->out_hdr.sector = 0;
                vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(vbr->req));
index 4809c15..4a80ee7 100644 (file)
@@ -501,7 +501,7 @@ static int xen_vbd_translate(struct phys_req *req, struct xen_blkif *blkif,
        struct xen_vbd *vbd = &blkif->vbd;
        int rc = -EACCES;
 
-       if ((operation != READ) && vbd->readonly)
+       if ((operation != REQ_OP_READ) && vbd->readonly)
                goto out;
 
        if (likely(req->nr_sects)) {
@@ -1014,7 +1014,7 @@ static int dispatch_discard_io(struct xen_blkif_ring *ring,
        preq.sector_number = req->u.discard.sector_number;
        preq.nr_sects      = req->u.discard.nr_sectors;
 
-       err = xen_vbd_translate(&preq, blkif, WRITE);
+       err = xen_vbd_translate(&preq, blkif, REQ_OP_WRITE);
        if (err) {
                pr_warn("access denied: DISCARD [%llu->%llu] on dev=%04x\n",
                        preq.sector_number,
@@ -1229,6 +1229,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
        struct bio **biolist = pending_req->biolist;
        int i, nbio = 0;
        int operation;
+       int operation_flags = 0;
        struct blk_plug plug;
        bool drain = false;
        struct grant_page **pages = pending_req->segments;
@@ -1247,17 +1248,19 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
        switch (req_operation) {
        case BLKIF_OP_READ:
                ring->st_rd_req++;
-               operation = READ;
+               operation = REQ_OP_READ;
                break;
        case BLKIF_OP_WRITE:
                ring->st_wr_req++;
-               operation = WRITE_ODIRECT;
+               operation = REQ_OP_WRITE;
+               operation_flags = WRITE_ODIRECT;
                break;
        case BLKIF_OP_WRITE_BARRIER:
                drain = true;
        case BLKIF_OP_FLUSH_DISKCACHE:
                ring->st_f_req++;
-               operation = WRITE_FLUSH;
+               operation = REQ_OP_WRITE;
+               operation_flags = WRITE_FLUSH;
                break;
        default:
                operation = 0; /* make gcc happy */
@@ -1269,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
        nseg = req->operation == BLKIF_OP_INDIRECT ?
               req->u.indirect.nr_segments : req->u.rw.nr_segments;
 
-       if (unlikely(nseg == 0 && operation != WRITE_FLUSH) ||
+       if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) ||
            unlikely((req->operation != BLKIF_OP_INDIRECT) &&
                     (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) ||
            unlikely((req->operation == BLKIF_OP_INDIRECT) &&
@@ -1310,7 +1313,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 
        if (xen_vbd_translate(&preq, ring->blkif, operation) != 0) {
                pr_debug("access denied: %s of [%llu,%llu] on dev=%04x\n",
-                        operation == READ ? "read" : "write",
+                        operation == REQ_OP_READ ? "read" : "write",
                         preq.sector_number,
                         preq.sector_number + preq.nr_sects,
                         ring->blkif->vbd.pdevice);
@@ -1369,6 +1372,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
                        bio->bi_private = pending_req;
                        bio->bi_end_io  = end_block_io_op;
                        bio->bi_iter.bi_sector  = preq.sector_number;
+                       bio_set_op_attrs(bio, operation, operation_flags);
                }
 
                preq.sector_number += seg[i].nsec;
@@ -1376,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
 
        /* This will be hit if the operation was a flush or discard. */
        if (!bio) {
-               BUG_ON(operation != WRITE_FLUSH);
+               BUG_ON(operation_flags != WRITE_FLUSH);
 
                bio = bio_alloc(GFP_KERNEL, 0);
                if (unlikely(bio == NULL))
@@ -1386,20 +1390,21 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
                bio->bi_bdev    = preq.bdev;
                bio->bi_private = pending_req;
                bio->bi_end_io  = end_block_io_op;
+               bio_set_op_attrs(bio, operation, operation_flags);
        }
 
        atomic_set(&pending_req->pendcnt, nbio);
        blk_start_plug(&plug);
 
        for (i = 0; i < nbio; i++)
-               submit_bio(operation, biolist[i]);
+               submit_bio(biolist[i]);
 
        /* Let the I/Os go.. */
        blk_finish_plug(&plug);
 
-       if (operation == READ)
+       if (operation == REQ_OP_READ)
                ring->st_rd_sect += preq.nr_sects;
-       else if (operation WRITE)
+       else if (operation == REQ_OP_WRITE)
                ring->st_wr_sect += preq.nr_sects;
 
        return 0;
index fcc5b4e..da05d3f 100644 (file)
@@ -196,6 +196,7 @@ struct blkfront_info
        unsigned int nr_ring_pages;
        struct request_queue *rq;
        unsigned int feature_flush;
+       unsigned int feature_fua;
        unsigned int feature_discard:1;
        unsigned int feature_secdiscard:1;
        unsigned int discard_granularity;
@@ -746,7 +747,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
                 * The indirect operation can only be a BLKIF_OP_READ or
                 * BLKIF_OP_WRITE
                 */
-               BUG_ON(req->cmd_flags & (REQ_FLUSH | REQ_FUA));
+               BUG_ON(req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA);
                ring_req->operation = BLKIF_OP_INDIRECT;
                ring_req->u.indirect.indirect_op = rq_data_dir(req) ?
                        BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -758,7 +759,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
                ring_req->u.rw.handle = info->handle;
                ring_req->operation = rq_data_dir(req) ?
                        BLKIF_OP_WRITE : BLKIF_OP_READ;
-               if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+               if (req_op(req) == REQ_OP_FLUSH || req->cmd_flags & REQ_FUA) {
                        /*
                         * Ideally we can do an unordered flush-to-disk.
                         * In case the backend onlysupports barriers, use that.
@@ -766,19 +767,14 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri
                         * implement it the same way.  (It's also a FLUSH+FUA,
                         * since it is guaranteed ordered WRT previous writes.)
                         */
-                       switch (info->feature_flush &
-                               ((REQ_FLUSH|REQ_FUA))) {
-                       case REQ_FLUSH|REQ_FUA:
+                       if (info->feature_flush && info->feature_fua)
                                ring_req->operation =
                                        BLKIF_OP_WRITE_BARRIER;
-                               break;
-                       case REQ_FLUSH:
+                       else if (info->feature_flush)
                                ring_req->operation =
                                        BLKIF_OP_FLUSH_DISKCACHE;
-                               break;
-                       default:
+                       else
                                ring_req->operation = 0;
-                       }
                }
                ring_req->u.rw.nr_segments = num_grant;
                if (unlikely(require_extra_req)) {
@@ -847,7 +843,8 @@ static int blkif_queue_request(struct request *req, struct blkfront_ring_info *r
        if (unlikely(rinfo->dev_info->connected != BLKIF_STATE_CONNECTED))
                return 1;
 
-       if (unlikely(req->cmd_flags & (REQ_DISCARD | REQ_SECURE)))
+       if (unlikely(req_op(req) == REQ_OP_DISCARD ||
+                    req->cmd_flags & REQ_SECURE))
                return blkif_queue_discard_req(req, rinfo);
        else
                return blkif_queue_rw_req(req, rinfo);
@@ -867,10 +864,10 @@ static inline bool blkif_request_flush_invalid(struct request *req,
                                               struct blkfront_info *info)
 {
        return ((req->cmd_type != REQ_TYPE_FS) ||
-               ((req->cmd_flags & REQ_FLUSH) &&
-                !(info->feature_flush & REQ_FLUSH)) ||
+               ((req_op(req) == REQ_OP_FLUSH) &&
+                !info->feature_flush) ||
                ((req->cmd_flags & REQ_FUA) &&
-                !(info->feature_flush & REQ_FUA)));
+                !info->feature_fua));
 }
 
 static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -981,24 +978,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
        return 0;
 }
 
-static const char *flush_info(unsigned int feature_flush)
+static const char *flush_info(struct blkfront_info *info)
 {
-       switch (feature_flush & ((REQ_FLUSH | REQ_FUA))) {
-       case REQ_FLUSH|REQ_FUA:
+       if (info->feature_flush && info->feature_fua)
                return "barrier: enabled;";
-       case REQ_FLUSH:
+       else if (info->feature_flush)
                return "flush diskcache: enabled;";
-       default:
+       else
                return "barrier or flush: disabled;";
-       }
 }
 
 static void xlvbd_flush(struct blkfront_info *info)
 {
-       blk_queue_write_cache(info->rq, info->feature_flush & REQ_FLUSH,
-                               info->feature_flush & REQ_FUA);
+       blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
+                             info->feature_fua ? true : false);
        pr_info("blkfront: %s: %s %s %s %s %s\n",
-               info->gd->disk_name, flush_info(info->feature_flush),
+               info->gd->disk_name, flush_info(info),
                "persistent grants:", info->feature_persistent ?
                "enabled;" : "disabled;", "indirect descriptors:",
                info->max_indirect_segments ? "enabled;" : "disabled;");
@@ -1617,6 +1612,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
                        if (unlikely(error)) {
                                if (error == -EOPNOTSUPP)
                                        error = 0;
+                               info->feature_fua = 0;
                                info->feature_flush = 0;
                                xlvbd_flush(info);
                        }
@@ -2064,7 +2060,7 @@ static int blkif_recover(struct blkfront_info *info)
                                bio_trim(cloned_bio, offset, size);
                                cloned_bio->bi_private = split_bio;
                                cloned_bio->bi_end_io = split_bio_end;
-                               submit_bio(cloned_bio->bi_rw, cloned_bio);
+                               submit_bio(cloned_bio);
                        }
                        /*
                         * Now we have to wait for all those smaller bios to
@@ -2073,7 +2069,7 @@ static int blkif_recover(struct blkfront_info *info)
                        continue;
                }
                /* We don't need to split this bio */
-               submit_bio(bio->bi_rw, bio);
+               submit_bio(bio);
        }
 
        return 0;
@@ -2108,8 +2104,10 @@ static int blkfront_resume(struct xenbus_device *dev)
                        /*
                         * Get the bios in the request so we can re-queue them.
                         */
-                       if (shadow[j].request->cmd_flags &
-                                       (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
+                       if (req_op(shadow[i].request) == REQ_OP_FLUSH ||
+                           req_op(shadow[i].request) == REQ_OP_DISCARD ||
+                           shadow[j].request->cmd_flags & (REQ_FUA | REQ_SECURE)) {
+                           
                                /*
                                 * Flush operations don't contain bios, so
                                 * we need to requeue the whole request
@@ -2298,6 +2296,7 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
        unsigned int indirect_segments;
 
        info->feature_flush = 0;
+       info->feature_fua = 0;
 
        err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
                        "feature-barrier", "%d", &barrier,
@@ -2310,8 +2309,11 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
         *
         * If there are barriers, then we use flush.
         */
-       if (!err && barrier)
-               info->feature_flush = REQ_FLUSH | REQ_FUA;
+       if (!err && barrier) {
+               info->feature_flush = 1;
+               info->feature_fua = 1;
+       }
+
        /*
         * And if there is "feature-flush-cache" use that above
         * barriers.
@@ -2320,8 +2322,10 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
                        "feature-flush-cache", "%d", &flush,
                        NULL);
 
-       if (!err && flush)
-               info->feature_flush = REQ_FLUSH;
+       if (!err && flush) {
+               info->feature_flush = 1;
+               info->feature_fua = 0;
+       }
 
        err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
                        "feature-discard", "%d", &discard,
index 8fcad8b..e5e5d19 100644 (file)
@@ -874,7 +874,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
        offset = (bio->bi_iter.bi_sector &
                  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
 
-       if (unlikely(bio->bi_rw & REQ_DISCARD)) {
+       if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
                zram_bio_discard(zram, index, offset, bio);
                bio_endio(bio);
                return;
index 474173e..5887a7a 100644 (file)
@@ -459,9 +459,6 @@ int ide_cdrom_packet(struct cdrom_device_info *cdi,
           layer. the packet must be complete, as we do not
           touch it at all. */
 
-       if (cgc->data_direction == CGC_DATA_WRITE)
-               flags |= REQ_WRITE;
-
        if (cgc->sense)
                memset(cgc->sense, 0, sizeof(struct request_sense));
 
index 05dbcce..e378ef7 100644 (file)
@@ -431,7 +431,7 @@ static int idedisk_prep_fn(struct request_queue *q, struct request *rq)
        ide_drive_t *drive = q->queuedata;
        struct ide_cmd *cmd;
 
-       if (!(rq->cmd_flags & REQ_FLUSH))
+       if (req_op(rq) != REQ_OP_FLUSH)
                return BLKPREP_OK;
 
        if (rq->special) {
index 2fb5350..f079d8d 100644 (file)
@@ -206,7 +206,7 @@ static void idefloppy_create_rw_cmd(ide_drive_t *drive,
        memcpy(rq->cmd, pc->c, 12);
 
        pc->rq = rq;
-       if (rq->cmd_flags & REQ_WRITE)
+       if (cmd == WRITE)
                pc->flags |= PC_FLAG_WRITING;
 
        pc->flags |= PC_FLAG_DMA_OK;
index 2103e97..de86d72 100644 (file)
@@ -342,7 +342,7 @@ try:
 
                /* Perform read to do GC */
                bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
-               bio->bi_rw = READ;
+               bio_set_op_attrs(bio,  REQ_OP_READ, 0);
                bio->bi_private = &wait;
                bio->bi_end_io = rrpc_end_sync_bio;
 
@@ -364,7 +364,7 @@ try:
                reinit_completion(&wait);
 
                bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
-               bio->bi_rw = WRITE;
+               bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
                bio->bi_private = &wait;
                bio->bi_end_io = rrpc_end_sync_bio;
 
@@ -908,7 +908,7 @@ static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
        struct nvm_rq *rqd;
        int err;
 
-       if (bio->bi_rw & REQ_DISCARD) {
+       if (bio_op(bio) == REQ_OP_DISCARD) {
                rrpc_discard(rrpc, bio);
                return BLK_QC_T_NONE;
        }
index eab505e..76f7534 100644 (file)
@@ -294,10 +294,10 @@ static void bch_btree_node_read(struct btree *b)
        closure_init_stack(&cl);
 
        bio = bch_bbio_alloc(b->c);
-       bio->bi_rw      = REQ_META|READ_SYNC;
        bio->bi_iter.bi_size = KEY_SIZE(&b->key) << 9;
        bio->bi_end_io  = btree_node_read_endio;
        bio->bi_private = &cl;
+       bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
 
        bch_bio_map(bio, b->keys.set[0].data);
 
@@ -396,8 +396,8 @@ static void do_btree_node_write(struct btree *b)
 
        b->bio->bi_end_io       = btree_node_write_endio;
        b->bio->bi_private      = cl;
-       b->bio->bi_rw           = REQ_META|WRITE_SYNC|REQ_FUA;
        b->bio->bi_iter.bi_size = roundup(set_bytes(i), block_bytes(b->c));
+       bio_set_op_attrs(b->bio, REQ_OP_WRITE, REQ_META|WRITE_SYNC|REQ_FUA);
        bch_bio_map(b->bio, i);
 
        /*
index 8b1f1d5..c28df16 100644 (file)
@@ -52,9 +52,10 @@ void bch_btree_verify(struct btree *b)
        bio->bi_bdev            = PTR_CACHE(b->c, &b->key, 0)->bdev;
        bio->bi_iter.bi_sector  = PTR_OFFSET(&b->key, 0);
        bio->bi_iter.bi_size    = KEY_SIZE(&v->key) << 9;
+       bio_set_op_attrs(bio, REQ_OP_READ, REQ_META|READ_SYNC);
        bch_bio_map(bio, sorted);
 
-       submit_bio_wait(REQ_META|READ_SYNC, bio);
+       submit_bio_wait(bio);
        bch_bbio_free(bio, b->c);
 
        memcpy(ondisk, sorted, KEY_SIZE(&v->key) << 9);
@@ -113,11 +114,12 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio)
        check = bio_clone(bio, GFP_NOIO);
        if (!check)
                return;
+       bio_set_op_attrs(check, REQ_OP_READ, READ_SYNC);
 
        if (bio_alloc_pages(check, GFP_NOIO))
                goto out_put;
 
-       submit_bio_wait(READ_SYNC, check);
+       submit_bio_wait(check);
 
        bio_for_each_segment(bv, bio, iter) {
                void *p1 = kmap_atomic(bv.bv_page);
index 86a0bb8..fd885cc 100644 (file)
@@ -111,7 +111,7 @@ void bch_bbio_count_io_errors(struct cache_set *c, struct bio *bio,
        struct bbio *b = container_of(bio, struct bbio, bio);
        struct cache *ca = PTR_CACHE(c, &b->key, 0);
 
-       unsigned threshold = bio->bi_rw & REQ_WRITE
+       unsigned threshold = op_is_write(bio_op(bio))
                ? c->congested_write_threshold_us
                : c->congested_read_threshold_us;
 
index 29eba72..6925023 100644 (file)
@@ -54,11 +54,11 @@ reread:             left = ca->sb.bucket_size - offset;
                bio_reset(bio);
                bio->bi_iter.bi_sector  = bucket + offset;
                bio->bi_bdev    = ca->bdev;
-               bio->bi_rw      = READ;
                bio->bi_iter.bi_size    = len << 9;
 
                bio->bi_end_io  = journal_read_endio;
                bio->bi_private = &cl;
+               bio_set_op_attrs(bio, REQ_OP_READ, 0);
                bch_bio_map(bio, data);
 
                closure_bio_submit(bio, &cl);
@@ -418,7 +418,7 @@ static void journal_discard_work(struct work_struct *work)
        struct journal_device *ja =
                container_of(work, struct journal_device, discard_work);
 
-       submit_bio(0, &ja->discard_bio);
+       submit_bio(&ja->discard_bio);
 }
 
 static void do_journal_discard(struct cache *ca)
@@ -449,10 +449,10 @@ static void do_journal_discard(struct cache *ca)
                atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT);
 
                bio_init(bio);
+               bio_set_op_attrs(bio, REQ_OP_DISCARD, 0);
                bio->bi_iter.bi_sector  = bucket_to_sector(ca->set,
                                                ca->sb.d[ja->discard_idx]);
                bio->bi_bdev            = ca->bdev;
-               bio->bi_rw              = REQ_WRITE|REQ_DISCARD;
                bio->bi_max_vecs        = 1;
                bio->bi_io_vec          = bio->bi_inline_vecs;
                bio->bi_iter.bi_size    = bucket_bytes(ca);
@@ -626,11 +626,12 @@ static void journal_write_unlocked(struct closure *cl)
                bio_reset(bio);
                bio->bi_iter.bi_sector  = PTR_OFFSET(k, i);
                bio->bi_bdev    = ca->bdev;
-               bio->bi_rw      = REQ_WRITE|REQ_SYNC|REQ_META|REQ_FLUSH|REQ_FUA;
                bio->bi_iter.bi_size = sectors << 9;
 
                bio->bi_end_io  = journal_write_endio;
                bio->bi_private = w;
+               bio_set_op_attrs(bio, REQ_OP_WRITE,
+                                REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA);
                bch_bio_map(bio, w->data);
 
                trace_bcache_journal_write(bio);
index b929fc9..1881319 100644 (file)
@@ -163,7 +163,7 @@ static void read_moving(struct cache_set *c)
                moving_init(io);
                bio = &io->bio.bio;
 
-               bio->bi_rw      = READ;
+               bio_set_op_attrs(bio, REQ_OP_READ, 0);
                bio->bi_end_io  = read_moving_endio;
 
                if (bio_alloc_pages(bio, GFP_KERNEL))
index 25fa844..69f16f4 100644 (file)
@@ -205,10 +205,10 @@ static void bch_data_insert_start(struct closure *cl)
                return bch_data_invalidate(cl);
 
        /*
-        * Journal writes are marked REQ_FLUSH; if the original write was a
+        * Journal writes are marked REQ_PREFLUSH; if the original write was a
         * flush, it'll wait on the journal write.
         */
-       bio->bi_rw &= ~(REQ_FLUSH|REQ_FUA);
+       bio->bi_rw &= ~(REQ_PREFLUSH|REQ_FUA);
 
        do {
                unsigned i;
@@ -253,7 +253,7 @@ static void bch_data_insert_start(struct closure *cl)
                trace_bcache_cache_insert(k);
                bch_keylist_push(&op->insert_keys);
 
-               n->bi_rw |= REQ_WRITE;
+               bio_set_op_attrs(n, REQ_OP_WRITE, 0);
                bch_submit_bbio(n, op->c, k, 0);
        } while (n != bio);
 
@@ -378,12 +378,12 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 
        if (test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags) ||
            c->gc_stats.in_use > CUTOFF_CACHE_ADD ||
-           (bio->bi_rw & REQ_DISCARD))
+           (bio_op(bio) == REQ_OP_DISCARD))
                goto skip;
 
        if (mode == CACHE_MODE_NONE ||
            (mode == CACHE_MODE_WRITEAROUND &&
-            (bio->bi_rw & REQ_WRITE)))
+            op_is_write(bio_op(bio))))
                goto skip;
 
        if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
@@ -404,7 +404,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
 
        if (!congested &&
            mode == CACHE_MODE_WRITEBACK &&
-           (bio->bi_rw & REQ_WRITE) &&
+           op_is_write(bio_op(bio)) &&
            (bio->bi_rw & REQ_SYNC))
                goto rescale;
 
@@ -657,7 +657,7 @@ static inline struct search *search_alloc(struct bio *bio,
        s->cache_miss           = NULL;
        s->d                    = d;
        s->recoverable          = 1;
-       s->write                = (bio->bi_rw & REQ_WRITE) != 0;
+       s->write                = op_is_write(bio_op(bio));
        s->read_dirty_data      = 0;
        s->start_time           = jiffies;
 
@@ -668,7 +668,7 @@ static inline struct search *search_alloc(struct bio *bio,
        s->iop.write_prio       = 0;
        s->iop.error            = 0;
        s->iop.flags            = 0;
-       s->iop.flush_journal    = (bio->bi_rw & (REQ_FLUSH|REQ_FUA)) != 0;
+       s->iop.flush_journal    = (bio->bi_rw & (REQ_PREFLUSH|REQ_FUA)) != 0;
        s->iop.wq               = bcache_wq;
 
        return s;
@@ -899,7 +899,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
         * But check_overlapping drops dirty keys for which io hasn't started,
         * so we still want to call it.
         */
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                s->iop.bypass = true;
 
        if (should_writeback(dc, s->orig_bio,
@@ -913,22 +913,22 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s)
                s->iop.bio = s->orig_bio;
                bio_get(s->iop.bio);
 
-               if (!(bio->bi_rw & REQ_DISCARD) ||
+               if ((bio_op(bio) != REQ_OP_DISCARD) ||
                    blk_queue_discard(bdev_get_queue(dc->bdev)))
                        closure_bio_submit(bio, cl);
        } else if (s->iop.writeback) {
                bch_writeback_add(dc);
                s->iop.bio = bio;
 
-               if (bio->bi_rw & REQ_FLUSH) {
+               if (bio->bi_rw & REQ_PREFLUSH) {
                        /* Also need to send a flush to the backing device */
                        struct bio *flush = bio_alloc_bioset(GFP_NOIO, 0,
                                                             dc->disk.bio_split);
 
-                       flush->bi_rw    = WRITE_FLUSH;
                        flush->bi_bdev  = bio->bi_bdev;
                        flush->bi_end_io = request_endio;
                        flush->bi_private = cl;
+                       bio_set_op_attrs(flush, REQ_OP_WRITE, WRITE_FLUSH);
 
                        closure_bio_submit(flush, cl);
                }
@@ -992,7 +992,7 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
                                cached_dev_read(dc, s);
                }
        } else {
-               if ((bio->bi_rw & REQ_DISCARD) &&
+               if ((bio_op(bio) == REQ_OP_DISCARD) &&
                    !blk_queue_discard(bdev_get_queue(dc->bdev)))
                        bio_endio(bio);
                else
@@ -1103,7 +1103,7 @@ static blk_qc_t flash_dev_make_request(struct request_queue *q,
                                        &KEY(d->id, bio->bi_iter.bi_sector, 0),
                                        &KEY(d->id, bio_end_sector(bio), 0));
 
-               s->iop.bypass           = (bio->bi_rw & REQ_DISCARD) != 0;
+               s->iop.bypass           = (bio_op(bio) == REQ_OP_DISCARD) != 0;
                s->iop.writeback        = true;
                s->iop.bio              = bio;
 
index f5dbb4e..c944daf 100644 (file)
@@ -212,8 +212,8 @@ static void __write_super(struct cache_sb *sb, struct bio *bio)
        unsigned i;
 
        bio->bi_iter.bi_sector  = SB_SECTOR;
-       bio->bi_rw              = REQ_SYNC|REQ_META;
        bio->bi_iter.bi_size    = SB_SIZE;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
        bch_bio_map(bio, NULL);
 
        out->offset             = cpu_to_le64(sb->offset);
@@ -238,7 +238,7 @@ static void __write_super(struct cache_sb *sb, struct bio *bio)
        pr_debug("ver %llu, flags %llu, seq %llu",
                 sb->version, sb->flags, sb->seq);
 
-       submit_bio(REQ_WRITE, bio);
+       submit_bio(bio);
 }
 
 static void bch_write_bdev_super_unlock(struct closure *cl)
@@ -333,7 +333,7 @@ static void uuid_io_unlock(struct closure *cl)
        up(&c->uuid_write_mutex);
 }
 
-static void uuid_io(struct cache_set *c, unsigned long rw,
+static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
                    struct bkey *k, struct closure *parent)
 {
        struct closure *cl = &c->uuid_write;
@@ -348,21 +348,22 @@ static void uuid_io(struct cache_set *c, unsigned long rw,
        for (i = 0; i < KEY_PTRS(k); i++) {
                struct bio *bio = bch_bbio_alloc(c);
 
-               bio->bi_rw      = REQ_SYNC|REQ_META|rw;
+               bio->bi_rw      = REQ_SYNC|REQ_META|op_flags;
                bio->bi_iter.bi_size = KEY_SIZE(k) << 9;
 
                bio->bi_end_io  = uuid_endio;
                bio->bi_private = cl;
+               bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
                bch_bio_map(bio, c->uuids);
 
                bch_submit_bbio(bio, c, k, i);
 
-               if (!(rw & WRITE))
+               if (op != REQ_OP_WRITE)
                        break;
        }
 
        bch_extent_to_text(buf, sizeof(buf), k);
-       pr_debug("%s UUIDs at %s", rw & REQ_WRITE ? "wrote" : "read", buf);
+       pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
 
        for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
                if (!bch_is_zero(u->uuid, 16))
@@ -381,7 +382,7 @@ static char *uuid_read(struct cache_set *c, struct jset *j, struct closure *cl)
                return "bad uuid pointer";
 
        bkey_copy(&c->uuid_bucket, k);
-       uuid_io(c, READ_SYNC, k, cl);
+       uuid_io(c, REQ_OP_READ, READ_SYNC, k, cl);
 
        if (j->version < BCACHE_JSET_VERSION_UUIDv1) {
                struct uuid_entry_v0    *u0 = (void *) c->uuids;
@@ -426,7 +427,7 @@ static int __uuid_write(struct cache_set *c)
                return 1;
 
        SET_KEY_SIZE(&k.key, c->sb.bucket_size);
-       uuid_io(c, REQ_WRITE, &k.key, &cl);
+       uuid_io(c, REQ_OP_WRITE, 0, &k.key, &cl);
        closure_sync(&cl);
 
        bkey_copy(&c->uuid_bucket, &k.key);
@@ -498,7 +499,8 @@ static void prio_endio(struct bio *bio)
        closure_put(&ca->prio);
 }
 
-static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
+static void prio_io(struct cache *ca, uint64_t bucket, int op,
+                   unsigned long op_flags)
 {
        struct closure *cl = &ca->prio;
        struct bio *bio = bch_bbio_alloc(ca->set);
@@ -507,11 +509,11 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw)
 
        bio->bi_iter.bi_sector  = bucket * ca->sb.bucket_size;
        bio->bi_bdev            = ca->bdev;
-       bio->bi_rw              = REQ_SYNC|REQ_META|rw;
        bio->bi_iter.bi_size    = bucket_bytes(ca);
 
        bio->bi_end_io  = prio_endio;
        bio->bi_private = ca;
+       bio_set_op_attrs(bio, op, REQ_SYNC|REQ_META|op_flags);
        bch_bio_map(bio, ca->disk_buckets);
 
        closure_bio_submit(bio, &ca->prio);
@@ -557,7 +559,7 @@ void bch_prio_write(struct cache *ca)
                BUG_ON(bucket == -1);
 
                mutex_unlock(&ca->set->bucket_lock);
-               prio_io(ca, bucket, REQ_WRITE);
+               prio_io(ca, bucket, REQ_OP_WRITE, 0);
                mutex_lock(&ca->set->bucket_lock);
 
                ca->prio_buckets[i] = bucket;
@@ -599,7 +601,7 @@ static void prio_read(struct cache *ca, uint64_t bucket)
                        ca->prio_last_buckets[bucket_nr] = bucket;
                        bucket_nr++;
 
-                       prio_io(ca, bucket, READ_SYNC);
+                       prio_io(ca, bucket, REQ_OP_READ, READ_SYNC);
 
                        if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8))
                                pr_warn("bad csum reading priorities");
index 6012367..d9fd2a6 100644 (file)
@@ -182,7 +182,7 @@ static void write_dirty(struct closure *cl)
        struct keybuf_key *w = io->bio.bi_private;
 
        dirty_init(w);
-       io->bio.bi_rw           = WRITE;
+       bio_set_op_attrs(&io->bio, REQ_OP_WRITE, 0);
        io->bio.bi_iter.bi_sector = KEY_START(&w->key);
        io->bio.bi_bdev         = io->dc->bdev;
        io->bio.bi_end_io       = dirty_endio;
@@ -251,10 +251,10 @@ static void read_dirty(struct cached_dev *dc)
                io->dc          = dc;
 
                dirty_init(w);
+               bio_set_op_attrs(&io->bio, REQ_OP_READ, 0);
                io->bio.bi_iter.bi_sector = PTR_OFFSET(&w->key, 0);
                io->bio.bi_bdev         = PTR_CACHE(dc->disk.c,
                                                    &w->key, 0)->bdev;
-               io->bio.bi_rw           = READ;
                io->bio.bi_end_io       = read_dirty_endio;
 
                if (bio_alloc_pages(&io->bio, GFP_KERNEL))
index d8129ec..6fff794 100644 (file)
@@ -162,7 +162,7 @@ static int read_sb_page(struct mddev *mddev, loff_t offset,
 
                if (sync_page_io(rdev, target,
                                 roundup(size, bdev_logical_block_size(rdev->bdev)),
-                                page, READ, true)) {
+                                page, REQ_OP_READ, 0, true)) {
                        page->index = index;
                        return 0;
                }
@@ -297,7 +297,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
                        atomic_inc(&bitmap->pending_writes);
                        set_buffer_locked(bh);
                        set_buffer_mapped(bh);
-                       submit_bh(WRITE | REQ_SYNC, bh);
+                       submit_bh(REQ_OP_WRITE, REQ_SYNC, bh);
                        bh = bh->b_this_page;
                }
 
@@ -392,7 +392,7 @@ static int read_page(struct file *file, unsigned long index,
                        atomic_inc(&bitmap->pending_writes);
                        set_buffer_locked(bh);
                        set_buffer_mapped(bh);
-                       submit_bh(READ, bh);
+                       submit_bh(REQ_OP_READ, 0, bh);
                }
                block++;
                bh = bh->b_this_page;
index cd77216..6571c81 100644 (file)
@@ -574,7 +574,8 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
 {
        int r;
        struct dm_io_request io_req = {
-               .bi_rw = rw,
+               .bi_op = rw,
+               .bi_op_flags = 0,
                .notify.fn = dmio_complete,
                .notify.context = b,
                .client = b->c->dm_io,
@@ -634,6 +635,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
         * the dm_buffer's inline bio is local to bufio.
         */
        b->bio.bi_private = end_io;
+       bio_set_op_attrs(&b->bio, rw, 0);
 
        /*
         * We assume that if len >= PAGE_SIZE ptr is page-aligned.
@@ -660,7 +662,7 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
                ptr += PAGE_SIZE;
        } while (len > 0);
 
-       submit_bio(rw, &b->bio);
+       submit_bio(&b->bio);
 }
 
 static void submit_io(struct dm_buffer *b, int rw, sector_t block,
@@ -1326,7 +1328,8 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
 int dm_bufio_issue_flush(struct dm_bufio_client *c)
 {
        struct dm_io_request io_req = {
-               .bi_rw = WRITE_FLUSH,
+               .bi_op = REQ_OP_WRITE,
+               .bi_op_flags = WRITE_FLUSH,
                .mem.type = DM_IO_KMEM,
                .mem.ptr.addr = NULL,
                .client = c->dm_io,
index ee0510f..718744d 100644 (file)
@@ -788,7 +788,8 @@ static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio)
 
        spin_lock_irqsave(&cache->lock, flags);
        if (cache->need_tick_bio &&
-           !(bio->bi_rw & (REQ_FUA | REQ_FLUSH | REQ_DISCARD))) {
+           !(bio->bi_rw & (REQ_FUA | REQ_PREFLUSH)) &&
+           bio_op(bio) != REQ_OP_DISCARD) {
                pb->tick = true;
                cache->need_tick_bio = false;
        }
@@ -829,7 +830,7 @@ static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio)
 
 static int bio_triggers_commit(struct cache *cache, struct bio *bio)
 {
-       return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+       return bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
 }
 
 /*
@@ -851,7 +852,7 @@ static void inc_ds(struct cache *cache, struct bio *bio,
 static bool accountable_bio(struct cache *cache, struct bio *bio)
 {
        return ((bio->bi_bdev == cache->origin_dev->bdev) &&
-               !(bio->bi_rw & REQ_DISCARD));
+               bio_op(bio) != REQ_OP_DISCARD);
 }
 
 static void accounted_begin(struct cache *cache, struct bio *bio)
@@ -1067,7 +1068,8 @@ static void dec_io_migrations(struct cache *cache)
 
 static bool discard_or_flush(struct bio *bio)
 {
-       return bio->bi_rw & (REQ_FLUSH | REQ_FUA | REQ_DISCARD);
+       return bio_op(bio) == REQ_OP_DISCARD ||
+              bio->bi_rw & (REQ_PREFLUSH | REQ_FUA);
 }
 
 static void __cell_defer(struct cache *cache, struct dm_bio_prison_cell *cell)
@@ -1612,8 +1614,8 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
                remap_to_cache(cache, bio, 0);
 
        /*
-        * REQ_FLUSH is not directed at any particular block so we don't
-        * need to inc_ds().  REQ_FUA's are split into a write + REQ_FLUSH
+        * REQ_PREFLUSH is not directed at any particular block so we don't
+        * need to inc_ds().  REQ_FUA's are split into a write + REQ_PREFLUSH
         * by dm-core.
         */
        issue(cache, bio);
@@ -1978,9 +1980,9 @@ static void process_deferred_bios(struct cache *cache)
 
                bio = bio_list_pop(&bios);
 
-               if (bio->bi_rw & REQ_FLUSH)
+               if (bio->bi_rw & REQ_PREFLUSH)
                        process_flush_bio(cache, bio);
-               else if (bio->bi_rw & REQ_DISCARD)
+               else if (bio_op(bio) == REQ_OP_DISCARD)
                        process_discard_bio(cache, &structs, bio);
                else
                        process_bio(cache, &structs, bio);
index 4f3cb35..96dd5d7 100644 (file)
@@ -1136,7 +1136,7 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
        clone->bi_private = io;
        clone->bi_end_io  = crypt_endio;
        clone->bi_bdev    = cc->dev->bdev;
-       clone->bi_rw      = io->base_bio->bi_rw;
+       bio_set_op_attrs(clone, bio_op(io->base_bio), io->base_bio->bi_rw);
 }
 
 static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
@@ -1911,11 +1911,12 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
        struct crypt_config *cc = ti->private;
 
        /*
-        * If bio is REQ_FLUSH or REQ_DISCARD, just bypass crypt queues.
-        * - for REQ_FLUSH device-mapper core ensures that no IO is in-flight
-        * - for REQ_DISCARD caller must use flush if IO ordering matters
+        * If bio is REQ_PREFLUSH or REQ_OP_DISCARD, just bypass crypt queues.
+        * - for REQ_PREFLUSH device-mapper core ensures that no IO is in-flight
+        * - for REQ_OP_DISCARD caller must use flush if IO ordering matters
         */
-       if (unlikely(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) {
+       if (unlikely(bio->bi_rw & REQ_PREFLUSH ||
+           bio_op(bio) == REQ_OP_DISCARD)) {
                bio->bi_bdev = cc->dev->bdev;
                if (bio_sectors(bio))
                        bio->bi_iter.bi_sector = cc->start +
index 665bf32..2faf49d 100644 (file)
@@ -1540,9 +1540,9 @@ static int era_map(struct dm_target *ti, struct bio *bio)
        remap_to_origin(era, bio);
 
        /*
-        * REQ_FLUSH bios carry no data, so we're not interested in them.
+        * REQ_PREFLUSH bios carry no data, so we're not interested in them.
         */
-       if (!(bio->bi_rw & REQ_FLUSH) &&
+       if (!(bio->bi_rw & REQ_PREFLUSH) &&
            (bio_data_dir(bio) == WRITE) &&
            !metadata_current_marked(era->md, block)) {
                defer_bio(era, bio);
index b7341de..29b99fb 100644 (file)
@@ -266,7 +266,7 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
                data[fc->corrupt_bio_byte - 1] = fc->corrupt_bio_value;
 
                DMDEBUG("Corrupting data bio=%p by writing %u to byte %u "
-                       "(rw=%c bi_rw=%lu bi_sector=%llu cur_bytes=%u)\n",
+                       "(rw=%c bi_rw=%u bi_sector=%llu cur_bytes=%u)\n",
                        bio, fc->corrupt_bio_value, fc->corrupt_bio_byte,
                        (bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_rw,
                        (unsigned long long)bio->bi_iter.bi_sector, bio_bytes);
index 06d426e..0e225fd 100644 (file)
@@ -278,8 +278,9 @@ static void km_dp_init(struct dpages *dp, void *data)
 /*-----------------------------------------------------------------
  * IO routines that accept a list of pages.
  *---------------------------------------------------------------*/
-static void do_region(int rw, unsigned region, struct dm_io_region *where,
-                     struct dpages *dp, struct io *io)
+static void do_region(int op, int op_flags, unsigned region,
+                     struct dm_io_region *where, struct dpages *dp,
+                     struct io *io)
 {
        struct bio *bio;
        struct page *page;
@@ -295,24 +296,25 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
        /*
         * Reject unsupported discard and write same requests.
         */
-       if (rw & REQ_DISCARD)
+       if (op == REQ_OP_DISCARD)
                special_cmd_max_sectors = q->limits.max_discard_sectors;
-       else if (rw & REQ_WRITE_SAME)
+       else if (op == REQ_OP_WRITE_SAME)
                special_cmd_max_sectors = q->limits.max_write_same_sectors;
-       if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) {
+       if ((op == REQ_OP_DISCARD || op == REQ_OP_WRITE_SAME) &&
+           special_cmd_max_sectors == 0) {
                dec_count(io, region, -EOPNOTSUPP);
                return;
        }
 
        /*
-        * where->count may be zero if rw holds a flush and we need to
+        * where->count may be zero if op holds a flush and we need to
         * send a zero-sized flush.
         */
        do {
                /*
                 * Allocate a suitably sized-bio.
                 */
-               if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME))
+               if ((op == REQ_OP_DISCARD) || (op == REQ_OP_WRITE_SAME))
                        num_bvecs = 1;
                else
                        num_bvecs = min_t(int, BIO_MAX_PAGES,
@@ -322,13 +324,14 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
                bio->bi_iter.bi_sector = where->sector + (where->count - remaining);
                bio->bi_bdev = where->bdev;
                bio->bi_end_io = endio;
+               bio_set_op_attrs(bio, op, op_flags);
                store_io_and_region_in_bio(bio, io, region);
 
-               if (rw & REQ_DISCARD) {
+               if (op == REQ_OP_DISCARD) {
                        num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
                        bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
                        remaining -= num_sectors;
-               } else if (rw & REQ_WRITE_SAME) {
+               } else if (op == REQ_OP_WRITE_SAME) {
                        /*
                         * WRITE SAME only uses a single page.
                         */
@@ -355,11 +358,11 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
                }
 
                atomic_inc(&io->count);
-               submit_bio(rw, bio);
+               submit_bio(bio);
        } while (remaining);
 }
 
-static void dispatch_io(int rw, unsigned int num_regions,
+static void dispatch_io(int op, int op_flags, unsigned int num_regions,
                        struct dm_io_region *where, struct dpages *dp,
                        struct io *io, int sync)
 {
@@ -369,7 +372,7 @@ static void dispatch_io(int rw, unsigned int num_regions,
        BUG_ON(num_regions > DM_IO_MAX_REGIONS);
 
        if (sync)
-               rw |= REQ_SYNC;
+               op_flags |= REQ_SYNC;
 
        /*
         * For multiple regions we need to be careful to rewind
@@ -377,8 +380,8 @@ static void dispatch_io(int rw, unsigned int num_regions,
         */
        for (i = 0; i < num_regions; i++) {
                *dp = old_pages;
-               if (where[i].count || (rw & REQ_FLUSH))
-                       do_region(rw, i, where + i, dp, io);
+               if (where[i].count || (op_flags & REQ_PREFLUSH))
+                       do_region(op, op_flags, i, where + i, dp, io);
        }
 
        /*
@@ -402,13 +405,13 @@ static void sync_io_complete(unsigned long error, void *context)
 }
 
 static int sync_io(struct dm_io_client *client, unsigned int num_regions,
-                  struct dm_io_region *where, int rw, struct dpages *dp,
-                  unsigned long *error_bits)
+                  struct dm_io_region *where, int op, int op_flags,
+                  struct dpages *dp, unsigned long *error_bits)
 {
        struct io *io;
        struct sync_io sio;
 
-       if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+       if (num_regions > 1 && !op_is_write(op)) {
                WARN_ON(1);
                return -EIO;
        }
@@ -425,7 +428,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
        io->vma_invalidate_address = dp->vma_invalidate_address;
        io->vma_invalidate_size = dp->vma_invalidate_size;
 
-       dispatch_io(rw, num_regions, where, dp, io, 1);
+       dispatch_io(op, op_flags, num_regions, where, dp, io, 1);
 
        wait_for_completion_io(&sio.wait);
 
@@ -436,12 +439,12 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
 }
 
 static int async_io(struct dm_io_client *client, unsigned int num_regions,
-                   struct dm_io_region *where, int rw, struct dpages *dp,
-                   io_notify_fn fn, void *context)
+                   struct dm_io_region *where, int op, int op_flags,
+                   struct dpages *dp, io_notify_fn fn, void *context)
 {
        struct io *io;
 
-       if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
+       if (num_regions > 1 && !op_is_write(op)) {
                WARN_ON(1);
                fn(1, context);
                return -EIO;
@@ -457,7 +460,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
        io->vma_invalidate_address = dp->vma_invalidate_address;
        io->vma_invalidate_size = dp->vma_invalidate_size;
 
-       dispatch_io(rw, num_regions, where, dp, io, 0);
+       dispatch_io(op, op_flags, num_regions, where, dp, io, 0);
        return 0;
 }
 
@@ -480,7 +483,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
 
        case DM_IO_VMA:
                flush_kernel_vmap_range(io_req->mem.ptr.vma, size);
-               if ((io_req->bi_rw & RW_MASK) == READ) {
+               if (io_req->bi_op == REQ_OP_READ) {
                        dp->vma_invalidate_address = io_req->mem.ptr.vma;
                        dp->vma_invalidate_size = size;
                }
@@ -518,10 +521,12 @@ int dm_io(struct dm_io_request *io_req, unsigned num_regions,
 
        if (!io_req->notify.fn)
                return sync_io(io_req->client, num_regions, where,
-                              io_req->bi_rw, &dp, sync_error_bits);
+                              io_req->bi_op, io_req->bi_op_flags, &dp,
+                              sync_error_bits);
 
-       return async_io(io_req->client, num_regions, where, io_req->bi_rw,
-                       &dp, io_req->notify.fn, io_req->notify.context);
+       return async_io(io_req->client, num_regions, where, io_req->bi_op,
+                       io_req->bi_op_flags, &dp, io_req->notify.fn,
+                       io_req->notify.context);
 }
 EXPORT_SYMBOL(dm_io);
 
index 1452ed9..9da1d54 100644 (file)
@@ -465,7 +465,7 @@ static void complete_io(unsigned long error, void *context)
        io_job_finish(kc->throttle);
 
        if (error) {
-               if (job->rw & WRITE)
+               if (op_is_write(job->rw))
                        job->write_err |= error;
                else
                        job->read_err = 1;
@@ -477,7 +477,7 @@ static void complete_io(unsigned long error, void *context)
                }
        }
 
-       if (job->rw & WRITE)
+       if (op_is_write(job->rw))
                push(&kc->complete_jobs, job);
 
        else {
@@ -496,7 +496,8 @@ static int run_io_job(struct kcopyd_job *job)
 {
        int r;
        struct dm_io_request io_req = {
-               .bi_rw = job->rw,
+               .bi_op = job->rw,
+               .bi_op_flags = 0,
                .mem.type = DM_IO_PAGE_LIST,
                .mem.ptr.pl = job->pages,
                .mem.offset = 0,
@@ -550,7 +551,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc,
 
                if (r < 0) {
                        /* error this rogue job */
-                       if (job->rw & WRITE)
+                       if (op_is_write(job->rw))
                                job->write_err = (unsigned long) -1L;
                        else
                                job->read_err = 1;
@@ -734,7 +735,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from,
                /*
                 * Use WRITE SAME to optimize zeroing if all dests support it.
                 */
-               job->rw = WRITE | REQ_WRITE_SAME;
+               job->rw = REQ_OP_WRITE_SAME;
                for (i = 0; i < job->num_dests; i++)
                        if (!bdev_write_same(job->dests[i].bdev)) {
                                job->rw = WRITE;
index 608302e..b5dbf7a 100644 (file)
@@ -205,6 +205,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
        bio->bi_bdev = lc->logdev->bdev;
        bio->bi_end_io = log_end_io;
        bio->bi_private = lc;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
        page = alloc_page(GFP_KERNEL);
        if (!page) {
@@ -226,7 +227,7 @@ static int write_metadata(struct log_writes_c *lc, void *entry,
                DMERR("Couldn't add page to the log block");
                goto error_bio;
        }
-       submit_bio(WRITE, bio);
+       submit_bio(bio);
        return 0;
 error_bio:
        bio_put(bio);
@@ -269,6 +270,7 @@ static int log_one_block(struct log_writes_c *lc,
        bio->bi_bdev = lc->logdev->bdev;
        bio->bi_end_io = log_end_io;
        bio->bi_private = lc;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
        for (i = 0; i < block->vec_cnt; i++) {
                /*
@@ -279,7 +281,7 @@ static int log_one_block(struct log_writes_c *lc,
                                   block->vecs[i].bv_len, 0);
                if (ret != block->vecs[i].bv_len) {
                        atomic_inc(&lc->io_blocks);
-                       submit_bio(WRITE, bio);
+                       submit_bio(bio);
                        bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i);
                        if (!bio) {
                                DMERR("Couldn't alloc log bio");
@@ -290,6 +292,7 @@ static int log_one_block(struct log_writes_c *lc,
                        bio->bi_bdev = lc->logdev->bdev;
                        bio->bi_end_io = log_end_io;
                        bio->bi_private = lc;
+                       bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
 
                        ret = bio_add_page(bio, block->vecs[i].bv_page,
                                           block->vecs[i].bv_len, 0);
@@ -301,7 +304,7 @@ static int log_one_block(struct log_writes_c *lc,
                }
                sector += block->vecs[i].bv_len >> SECTOR_SHIFT;
        }
-       submit_bio(WRITE, bio);
+       submit_bio(bio);
 out:
        kfree(block->data);
        kfree(block);
@@ -552,9 +555,9 @@ static int log_writes_map(struct dm_target *ti, struct bio *bio)
        struct bio_vec bv;
        size_t alloc_size;
        int i = 0;
-       bool flush_bio = (bio->bi_rw & REQ_FLUSH);
+       bool flush_bio = (bio->bi_rw & REQ_PREFLUSH);
        bool fua_bio = (bio->bi_rw & REQ_FUA);
-       bool discard_bio = (bio->bi_rw & REQ_DISCARD);
+       bool discard_bio = (bio_op(bio) == REQ_OP_DISCARD);
 
        pb->block = NULL;
 
index 627d191..4ca2d1d 100644 (file)
@@ -293,7 +293,7 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis
 
 static int rw_header(struct log_c *lc, int rw)
 {
-       lc->io_req.bi_rw = rw;
+       lc->io_req.bi_op = rw;
 
        return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
 }
@@ -306,7 +306,8 @@ static int flush_header(struct log_c *lc)
                .count = 0,
        };
 
-       lc->io_req.bi_rw = WRITE_FLUSH;
+       lc->io_req.bi_op = REQ_OP_WRITE;
+       lc->io_req.bi_op_flags = WRITE_FLUSH;
 
        return dm_io(&lc->io_req, 1, &null_location, NULL);
 }
index 5253274..8cbac62 100644 (file)
@@ -792,7 +792,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
        if (rdev->sb_loaded)
                return 0;
 
-       if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, 1)) {
+       if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, 1)) {
                DMERR("Failed to read superblock of device at position %d",
                      rdev->raid_disk);
                md_error(rdev->mddev, rdev);
@@ -1651,7 +1651,8 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
        for (i = 0; i < rs->md.raid_disks; i++) {
                r = &rs->dev[i].rdev;
                if (test_bit(Faulty, &r->flags) && r->sb_page &&
-                   sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) {
+                   sync_page_io(r, 0, r->sb_size, r->sb_page, REQ_OP_READ, 0,
+                                1)) {
                        DMINFO("Faulty %s device #%d has readable super block."
                               "  Attempting to revive it.",
                               rs->raid_type->name, i);
index b3ccf1e..9f5f460 100644 (file)
@@ -260,7 +260,8 @@ static int mirror_flush(struct dm_target *ti)
        struct dm_io_region io[ms->nr_mirrors];
        struct mirror *m;
        struct dm_io_request io_req = {
-               .bi_rw = WRITE_FLUSH,
+               .bi_op = REQ_OP_WRITE,
+               .bi_op_flags = WRITE_FLUSH,
                .mem.type = DM_IO_KMEM,
                .mem.ptr.addr = NULL,
                .client = ms->io_client,
@@ -541,7 +542,8 @@ static void read_async_bio(struct mirror *m, struct bio *bio)
 {
        struct dm_io_region io;
        struct dm_io_request io_req = {
-               .bi_rw = READ,
+               .bi_op = REQ_OP_READ,
+               .bi_op_flags = 0,
                .mem.type = DM_IO_BIO,
                .mem.ptr.bio = bio,
                .notify.fn = read_callback,
@@ -624,7 +626,7 @@ static void write_callback(unsigned long error, void *context)
         * If the bio is discard, return an error, but do not
         * degrade the array.
         */
-       if (bio->bi_rw & REQ_DISCARD) {
+       if (bio_op(bio) == REQ_OP_DISCARD) {
                bio->bi_error = -EOPNOTSUPP;
                bio_endio(bio);
                return;
@@ -654,7 +656,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
        struct dm_io_region io[ms->nr_mirrors], *dest = io;
        struct mirror *m;
        struct dm_io_request io_req = {
-               .bi_rw = WRITE | (bio->bi_rw & WRITE_FLUSH_FUA),
+               .bi_op = REQ_OP_WRITE,
+               .bi_op_flags = bio->bi_rw & WRITE_FLUSH_FUA,
                .mem.type = DM_IO_BIO,
                .mem.ptr.bio = bio,
                .notify.fn = write_callback,
@@ -662,8 +665,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
                .client = ms->io_client,
        };
 
-       if (bio->bi_rw & REQ_DISCARD) {
-               io_req.bi_rw |= REQ_DISCARD;
+       if (bio_op(bio) == REQ_OP_DISCARD) {
+               io_req.bi_op = REQ_OP_DISCARD;
                io_req.mem.type = DM_IO_KMEM;
                io_req.mem.ptr.addr = NULL;
        }
@@ -701,8 +704,8 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
        bio_list_init(&requeue);
 
        while ((bio = bio_list_pop(writes))) {
-               if ((bio->bi_rw & REQ_FLUSH) ||
-                   (bio->bi_rw & REQ_DISCARD)) {
+               if ((bio->bi_rw & REQ_PREFLUSH) ||
+                   (bio_op(bio) == REQ_OP_DISCARD)) {
                        bio_list_add(&sync, bio);
                        continue;
                }
@@ -1250,7 +1253,8 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error)
         * We need to dec pending if this was a write.
         */
        if (rw == WRITE) {
-               if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
+               if (!(bio->bi_rw & REQ_PREFLUSH) &&
+                   bio_op(bio) != REQ_OP_DISCARD)
                        dm_rh_dec(ms->rh, bio_record->write_region);
                return error;
        }
index 74cb7b9..b118134 100644 (file)
@@ -398,12 +398,12 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
        region_t region = dm_rh_bio_to_region(rh, bio);
        int recovering = 0;
 
-       if (bio->bi_rw & REQ_FLUSH) {
+       if (bio->bi_rw & REQ_PREFLUSH) {
                rh->flush_failure = 1;
                return;
        }
 
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                return;
 
        /* We must inform the log that the sync count has changed. */
@@ -526,7 +526,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
        struct bio *bio;
 
        for (bio = bios->head; bio; bio = bio->bi_next) {
-               if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
+               if (bio->bi_rw & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)
                        continue;
                rh_inc(rh, dm_rh_bio_to_region(rh, bio));
        }
index 4d39093..b8cf956 100644 (file)
@@ -226,8 +226,8 @@ static void do_metadata(struct work_struct *work)
 /*
  * Read or write a chunk aligned and sized block of data from a device.
  */
-static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
-                   int metadata)
+static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int op,
+                   int op_flags, int metadata)
 {
        struct dm_io_region where = {
                .bdev = dm_snap_cow(ps->store->snap)->bdev,
@@ -235,7 +235,8 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
                .count = ps->store->chunk_size,
        };
        struct dm_io_request io_req = {
-               .bi_rw = rw,
+               .bi_op = op,
+               .bi_op_flags = op_flags,
                .mem.type = DM_IO_VMA,
                .mem.ptr.vma = area,
                .client = ps->io_client,
@@ -281,14 +282,14 @@ static void skip_metadata(struct pstore *ps)
  * Read or write a metadata area.  Remembering to skip the first
  * chunk which holds the header.
  */
-static int area_io(struct pstore *ps, int rw)
+static int area_io(struct pstore *ps, int op, int op_flags)
 {
        int r;
        chunk_t chunk;
 
        chunk = area_location(ps, ps->current_area);
 
-       r = chunk_io(ps, ps->area, chunk, rw, 0);
+       r = chunk_io(ps, ps->area, chunk, op, op_flags, 0);
        if (r)
                return r;
 
@@ -302,7 +303,8 @@ static void zero_memory_area(struct pstore *ps)
 
 static int zero_disk_area(struct pstore *ps, chunk_t area)
 {
-       return chunk_io(ps, ps->zero_area, area_location(ps, area), WRITE, 0);
+       return chunk_io(ps, ps->zero_area, area_location(ps, area),
+                       REQ_OP_WRITE, 0, 0);
 }
 
 static int read_header(struct pstore *ps, int *new_snapshot)
@@ -334,7 +336,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
        if (r)
                return r;
 
-       r = chunk_io(ps, ps->header_area, 0, READ, 1);
+       r = chunk_io(ps, ps->header_area, 0, REQ_OP_READ, 0, 1);
        if (r)
                goto bad;
 
@@ -395,7 +397,7 @@ static int write_header(struct pstore *ps)
        dh->version = cpu_to_le32(ps->version);
        dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
 
-       return chunk_io(ps, ps->header_area, 0, WRITE, 1);
+       return chunk_io(ps, ps->header_area, 0, REQ_OP_WRITE, 0, 1);
 }
 
 /*
@@ -739,7 +741,7 @@ static void persistent_commit_exception(struct dm_exception_store *store,
        /*
         * Commit exceptions to disk.
         */
-       if (ps->valid && area_io(ps, WRITE_FLUSH_FUA))
+       if (ps->valid && area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA))
                ps->valid = 0;
 
        /*
@@ -779,7 +781,7 @@ static int persistent_prepare_merge(struct dm_exception_store *store,
                        return 0;
 
                ps->current_area--;
-               r = area_io(ps, READ);
+               r = area_io(ps, REQ_OP_READ, 0);
                if (r < 0)
                        return r;
                ps->current_committed = ps->exceptions_per_area;
@@ -816,7 +818,7 @@ static int persistent_commit_merge(struct dm_exception_store *store,
        for (i = 0; i < nr_merged; i++)
                clear_exception(ps, ps->current_committed - 1 - i);
 
-       r = area_io(ps, WRITE_FLUSH_FUA);
+       r = area_io(ps, REQ_OP_WRITE, WRITE_FLUSH_FUA);
        if (r < 0)
                return r;
 
index 70bb0e8..69ab1ff 100644 (file)
@@ -1680,7 +1680,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
 
        init_tracked_chunk(bio);
 
-       if (bio->bi_rw & REQ_FLUSH) {
+       if (bio->bi_rw & REQ_PREFLUSH) {
                bio->bi_bdev = s->cow->bdev;
                return DM_MAPIO_REMAPPED;
        }
@@ -1799,7 +1799,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio)
 
        init_tracked_chunk(bio);
 
-       if (bio->bi_rw & REQ_FLUSH) {
+       if (bio->bi_rw & REQ_PREFLUSH) {
                if (!dm_bio_get_target_bio_nr(bio))
                        bio->bi_bdev = s->origin->bdev;
                else
@@ -2285,7 +2285,7 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
 
        bio->bi_bdev = o->dev->bdev;
 
-       if (unlikely(bio->bi_rw & REQ_FLUSH))
+       if (unlikely(bio->bi_rw & REQ_PREFLUSH))
                return DM_MAPIO_REMAPPED;
 
        if (bio_rw(bio) != WRITE)
index 8289804..4fba26c 100644 (file)
@@ -514,11 +514,10 @@ static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
 }
 
 static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
-                             unsigned long bi_rw, sector_t len,
+                             int idx, sector_t len,
                              struct dm_stats_aux *stats_aux, bool end,
                              unsigned long duration_jiffies)
 {
-       unsigned long idx = bi_rw & REQ_WRITE;
        struct dm_stat_shared *shared = &s->stat_shared[entry];
        struct dm_stat_percpu *p;
 
@@ -584,7 +583,7 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
 #endif
 }
 
-static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
+static void __dm_stat_bio(struct dm_stat *s, int bi_rw,
                          sector_t bi_sector, sector_t end_sector,
                          bool end, unsigned long duration_jiffies,
                          struct dm_stats_aux *stats_aux)
@@ -645,8 +644,8 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
                last = raw_cpu_ptr(stats->last);
                stats_aux->merged =
                        (bi_sector == (ACCESS_ONCE(last->last_sector) &&
-                                      ((bi_rw & (REQ_WRITE | REQ_DISCARD)) ==
-                                       (ACCESS_ONCE(last->last_rw) & (REQ_WRITE | REQ_DISCARD)))
+                                      ((bi_rw == WRITE) ==
+                                       (ACCESS_ONCE(last->last_rw) == WRITE))
                                       ));
                ACCESS_ONCE(last->last_sector) = end_sector;
                ACCESS_ONCE(last->last_rw) = bi_rw;
index 797ddb9..48f1c01 100644 (file)
@@ -286,14 +286,14 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
        uint32_t stripe;
        unsigned target_bio_nr;
 
-       if (bio->bi_rw & REQ_FLUSH) {
+       if (bio->bi_rw & REQ_PREFLUSH) {
                target_bio_nr = dm_bio_get_target_bio_nr(bio);
                BUG_ON(target_bio_nr >= sc->stripes);
                bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev;
                return DM_MAPIO_REMAPPED;
        }
-       if (unlikely(bio->bi_rw & REQ_DISCARD) ||
-           unlikely(bio->bi_rw & REQ_WRITE_SAME)) {
+       if (unlikely(bio_op(bio) == REQ_OP_DISCARD) ||
+           unlikely(bio_op(bio) == REQ_OP_WRITE_SAME)) {
                target_bio_nr = dm_bio_get_target_bio_nr(bio);
                BUG_ON(target_bio_nr >= sc->stripes);
                return stripe_map_range(sc, bio, target_bio_nr);
index fc803d5..5f9e3d7 100644 (file)
@@ -360,7 +360,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
        sector_t len = block_to_sectors(tc->pool, data_e - data_b);
 
        return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
-                                     GFP_NOWAIT, REQ_WRITE | REQ_DISCARD, &op->bio);
+                                     GFP_NOWAIT, 0, &op->bio);
 }
 
 static void end_discard(struct discard_op *op, int r)
@@ -371,7 +371,8 @@ static void end_discard(struct discard_op *op, int r)
                 * need to wait for the chain to complete.
                 */
                bio_chain(op->bio, op->parent_bio);
-               submit_bio(REQ_WRITE | REQ_DISCARD, op->bio);
+               bio_set_op_attrs(op->bio, REQ_OP_DISCARD, 0);
+               submit_bio(op->bio);
        }
 
        blk_finish_plug(&op->plug);
@@ -696,7 +697,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 
 static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
 {
-       return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+       return (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
                dm_thin_changed_this_transaction(tc->td);
 }
 
@@ -704,7 +705,7 @@ static void inc_all_io_entry(struct pool *pool, struct bio *bio)
 {
        struct dm_thin_endio_hook *h;
 
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                return;
 
        h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -867,7 +868,8 @@ static void __inc_remap_and_issue_cell(void *context,
        struct bio *bio;
 
        while ((bio = bio_list_pop(&cell->bios))) {
-               if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA))
+               if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+                   bio_op(bio) == REQ_OP_DISCARD)
                        bio_list_add(&info->defer_bios, bio);
                else {
                        inc_all_io_entry(info->tc->pool, bio);
@@ -1639,7 +1641,8 @@ static void __remap_and_issue_shared_cell(void *context,
 
        while ((bio = bio_list_pop(&cell->bios))) {
                if ((bio_data_dir(bio) == WRITE) ||
-                   (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)))
+                   (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+                    bio_op(bio) == REQ_OP_DISCARD))
                        bio_list_add(&info->defer_bios, bio);
                else {
                        struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2028,7 +2031,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
                        break;
                }
 
-               if (bio->bi_rw & REQ_DISCARD)
+               if (bio_op(bio) == REQ_OP_DISCARD)
                        pool->process_discard(tc, bio);
                else
                        pool->process_bio(tc, bio);
@@ -2115,7 +2118,7 @@ static void process_thin_deferred_cells(struct thin_c *tc)
                                return;
                        }
 
-                       if (cell->holder->bi_rw & REQ_DISCARD)
+                       if (bio_op(cell->holder) == REQ_OP_DISCARD)
                                pool->process_discard_cell(tc, cell);
                        else
                                pool->process_cell(tc, cell);
@@ -2553,7 +2556,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
                return DM_MAPIO_SUBMITTED;
        }
 
-       if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
+       if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+           bio_op(bio) == REQ_OP_DISCARD) {
                thin_defer_bio_with_throttle(tc, bio);
                return DM_MAPIO_SUBMITTED;
        }
index 1b2f962..aba7ed9 100644 (file)
@@ -723,8 +723,9 @@ static void start_io_acct(struct dm_io *io)
                atomic_inc_return(&md->pending[rw]));
 
        if (unlikely(dm_stats_used(&md->stats)))
-               dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
-                                   bio_sectors(bio), false, 0, &io->stats_aux);
+               dm_stats_account_io(&md->stats, bio_data_dir(bio),
+                                   bio->bi_iter.bi_sector, bio_sectors(bio),
+                                   false, 0, &io->stats_aux);
 }
 
 static void end_io_acct(struct dm_io *io)
@@ -738,8 +739,9 @@ static void end_io_acct(struct dm_io *io)
        generic_end_io_acct(rw, &dm_disk(md)->part0, io->start_time);
 
        if (unlikely(dm_stats_used(&md->stats)))
-               dm_stats_account_io(&md->stats, bio->bi_rw, bio->bi_iter.bi_sector,
-                                   bio_sectors(bio), true, duration, &io->stats_aux);
+               dm_stats_account_io(&md->stats, bio_data_dir(bio),
+                                   bio->bi_iter.bi_sector, bio_sectors(bio),
+                                   true, duration, &io->stats_aux);
 
        /*
         * After this is decremented the bio must not be touched if it is
@@ -1001,12 +1003,12 @@ static void dec_pending(struct dm_io *io, int error)
                if (io_error == DM_ENDIO_REQUEUE)
                        return;
 
-               if ((bio->bi_rw & REQ_FLUSH) && bio->bi_iter.bi_size) {
+               if ((bio->bi_rw & REQ_PREFLUSH) && bio->bi_iter.bi_size) {
                        /*
                         * Preflush done for flush with data, reissue
-                        * without REQ_FLUSH.
+                        * without REQ_PREFLUSH.
                         */
-                       bio->bi_rw &= ~REQ_FLUSH;
+                       bio->bi_rw &= ~REQ_PREFLUSH;
                        queue_io(md, bio);
                } else {
                        /* done with normal IO or empty flush */
@@ -1051,7 +1053,7 @@ static void clone_endio(struct bio *bio)
                }
        }
 
-       if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) &&
+       if (unlikely(r == -EREMOTEIO && (bio_op(bio) == REQ_OP_WRITE_SAME) &&
                     !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors))
                disable_write_same(md);
 
@@ -1121,9 +1123,9 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig)
        if (unlikely(dm_stats_used(&md->stats))) {
                struct dm_rq_target_io *tio = tio_from_request(orig);
                tio->duration_jiffies = jiffies - tio->duration_jiffies;
-               dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
-                                   tio->n_sectors, true, tio->duration_jiffies,
-                                   &tio->stats_aux);
+               dm_stats_account_io(&md->stats, rq_data_dir(orig),
+                                   blk_rq_pos(orig), tio->n_sectors, true,
+                                   tio->duration_jiffies, &tio->stats_aux);
        }
 }
 
@@ -1320,7 +1322,7 @@ static void dm_done(struct request *clone, int error, bool mapped)
                        r = rq_end_io(tio->ti, clone, error, &tio->info);
        }
 
-       if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) &&
+       if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
                     !clone->q->limits.max_write_same_sectors))
                disable_write_same(tio->md);
 
@@ -1475,7 +1477,7 @@ EXPORT_SYMBOL_GPL(dm_set_target_max_io_len);
 
 /*
  * A target may call dm_accept_partial_bio only from the map routine.  It is
- * allowed for all bio types except REQ_FLUSH.
+ * allowed for all bio types except REQ_PREFLUSH.
  *
  * dm_accept_partial_bio informs the dm that the target only wants to process
  * additional n_sectors sectors of the bio and the rest of the data should be
@@ -1505,7 +1507,7 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
 {
        struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
        unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT;
-       BUG_ON(bio->bi_rw & REQ_FLUSH);
+       BUG_ON(bio->bi_rw & REQ_PREFLUSH);
        BUG_ON(bi_size > *tio->len_ptr);
        BUG_ON(n_sectors > bi_size);
        *tio->len_ptr -= bi_size - n_sectors;
@@ -1746,9 +1748,9 @@ static int __split_and_process_non_flush(struct clone_info *ci)
        unsigned len;
        int r;
 
-       if (unlikely(bio->bi_rw & REQ_DISCARD))
+       if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
                return __send_discard(ci);
-       else if (unlikely(bio->bi_rw & REQ_WRITE_SAME))
+       else if (unlikely(bio_op(bio) == REQ_OP_WRITE_SAME))
                return __send_write_same(ci);
 
        ti = dm_table_find_target(ci->map, ci->sector);
@@ -1793,7 +1795,7 @@ static void __split_and_process_bio(struct mapped_device *md,
 
        start_io_acct(ci.io);
 
-       if (bio->bi_rw & REQ_FLUSH) {
+       if (bio->bi_rw & REQ_PREFLUSH) {
                ci.bio = &ci.md->flush_bio;
                ci.sector_count = 0;
                error = __send_empty_flush(&ci);
@@ -2082,8 +2084,9 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
                struct dm_rq_target_io *tio = tio_from_request(orig);
                tio->duration_jiffies = jiffies;
                tio->n_sectors = blk_rq_sectors(orig);
-               dm_stats_account_io(&md->stats, orig->cmd_flags, blk_rq_pos(orig),
-                                   tio->n_sectors, false, 0, &tio->stats_aux);
+               dm_stats_account_io(&md->stats, rq_data_dir(orig),
+                                   blk_rq_pos(orig), tio->n_sectors, false, 0,
+                                   &tio->stats_aux);
        }
 
        /*
@@ -2168,7 +2171,7 @@ static void dm_request_fn(struct request_queue *q)
 
                /* always use block 0 to find the target for flushes for now */
                pos = 0;
-               if (!(rq->cmd_flags & REQ_FLUSH))
+               if (req_op(rq) != REQ_OP_FLUSH)
                        pos = blk_rq_pos(rq);
 
                if ((dm_request_peeked_before_merge_deadline(md) &&
@@ -2412,7 +2415,7 @@ static struct mapped_device *alloc_dev(int minor)
 
        bio_init(&md->flush_bio);
        md->flush_bio.bi_bdev = md->bdev;
-       md->flush_bio.bi_rw = WRITE_FLUSH;
+       bio_set_op_attrs(&md->flush_bio, REQ_OP_WRITE, WRITE_FLUSH);
 
        dm_stats_init(&md->stats);
 
index b7fe7e9..70ff888 100644 (file)
@@ -221,7 +221,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
        struct bio *split;
        sector_t start_sector, end_sector, data_offset;
 
-       if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+       if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
                md_flush_request(mddev, bio);
                return;
        }
@@ -252,7 +252,7 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
                split->bi_iter.bi_sector = split->bi_iter.bi_sector -
                        start_sector + data_offset;
 
-               if (unlikely((split->bi_rw & REQ_DISCARD) &&
+               if (unlikely((bio_op(split) == REQ_OP_DISCARD) &&
                         !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
                        /* Just ignore it */
                        bio_endio(split);
index 866825f..1f123f5 100644 (file)
@@ -394,8 +394,9 @@ static void submit_flushes(struct work_struct *ws)
                        bi->bi_end_io = md_end_flush;
                        bi->bi_private = rdev;
                        bi->bi_bdev = rdev->bdev;
+                       bio_set_op_attrs(bi, REQ_OP_WRITE, WRITE_FLUSH);
                        atomic_inc(&mddev->flush_pending);
-                       submit_bio(WRITE_FLUSH, bi);
+                       submit_bio(bi);
                        rcu_read_lock();
                        rdev_dec_pending(rdev, mddev);
                }
@@ -413,7 +414,7 @@ static void md_submit_flush_data(struct work_struct *ws)
                /* an empty barrier - all done */
                bio_endio(bio);
        else {
-               bio->bi_rw &= ~REQ_FLUSH;
+               bio->bi_rw &= ~REQ_PREFLUSH;
                mddev->pers->make_request(mddev, bio);
        }
 
@@ -742,9 +743,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
        bio_add_page(bio, page, size, 0);
        bio->bi_private = rdev;
        bio->bi_end_io = super_written;
+       bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_FLUSH_FUA);
 
        atomic_inc(&mddev->pending_writes);
-       submit_bio(WRITE_FLUSH_FUA, bio);
+       submit_bio(bio);
 }
 
 void md_super_wait(struct mddev *mddev)
@@ -754,13 +756,14 @@ void md_super_wait(struct mddev *mddev)
 }
 
 int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
-                struct page *page, int rw, bool metadata_op)
+                struct page *page, int op, int op_flags, bool metadata_op)
 {
        struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
        int ret;
 
        bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
                rdev->meta_bdev : rdev->bdev;
+       bio_set_op_attrs(bio, op, op_flags);
        if (metadata_op)
                bio->bi_iter.bi_sector = sector + rdev->sb_start;
        else if (rdev->mddev->reshape_position != MaxSector &&
@@ -770,7 +773,8 @@ int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
        else
                bio->bi_iter.bi_sector = sector + rdev->data_offset;
        bio_add_page(bio, page, size, 0);
-       submit_bio_wait(rw, bio);
+
+       submit_bio_wait(bio);
 
        ret = !bio->bi_error;
        bio_put(bio);
@@ -785,7 +789,7 @@ static int read_disk_sb(struct md_rdev *rdev, int size)
        if (rdev->sb_loaded)
                return 0;
 
-       if (!sync_page_io(rdev, 0, size, rdev->sb_page, READ, true))
+       if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true))
                goto fail;
        rdev->sb_loaded = 1;
        return 0;
@@ -1471,7 +1475,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
                        return -EINVAL;
                bb_sector = (long long)offset;
                if (!sync_page_io(rdev, bb_sector, sectors << 9,
-                                 rdev->bb_page, READ, true))
+                                 rdev->bb_page, REQ_OP_READ, 0, true))
                        return -EIO;
                bbp = (u64 *)page_address(rdev->bb_page);
                rdev->badblocks.shift = sb->bblog_shift;
index b5c4be7..b4f3352 100644 (file)
@@ -424,7 +424,7 @@ struct mddev {
 
        /* Generic flush handling.
         * The last to finish preflush schedules a worker to submit
-        * the rest of the request (without the REQ_FLUSH flag).
+        * the rest of the request (without the REQ_PREFLUSH flag).
         */
        struct bio *flush_bio;
        atomic_t flush_pending;
@@ -618,7 +618,8 @@ extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
                           sector_t sector, int size, struct page *page);
 extern void md_super_wait(struct mddev *mddev);
 extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
-                       struct page *page, int rw, bool metadata_op);
+                       struct page *page, int op, int op_flags,
+                       bool metadata_op);
 extern void md_do_sync(struct md_thread *thread);
 extern void md_new_event(struct mddev *mddev);
 extern int md_allow_write(struct mddev *mddev);
index dd483bb..72ea98e 100644 (file)
@@ -111,7 +111,7 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
        struct multipath_bh * mp_bh;
        struct multipath_info *multipath;
 
-       if (unlikely(bio->bi_rw & REQ_FLUSH)) {
+       if (unlikely(bio->bi_rw & REQ_PREFLUSH)) {
                md_flush_request(mddev, bio);
                return;
        }
index 34783a3..c3d4390 100644 (file)
--- a/