}
static DEVICE_ATTR_RW(provisioning_mode);
+static const char *zeroing_mode[] = {
+ [SD_ZERO_WRITE] = "write",
+ [SD_ZERO_WS] = "writesame",
+ [SD_ZERO_WS16_UNMAP] = "writesame_16_unmap",
+ [SD_ZERO_WS10_UNMAP] = "writesame_10_unmap",
+};
+
+static ssize_t
+zeroing_mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+ return snprintf(buf, 20, "%s\n", zeroing_mode[sdkp->zeroing_mode]);
+}
+
+static ssize_t
+zeroing_mode_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ if (!strncmp(buf, zeroing_mode[SD_ZERO_WRITE], 20))
+ sdkp->zeroing_mode = SD_ZERO_WRITE;
+ else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS], 20))
+ sdkp->zeroing_mode = SD_ZERO_WS;
+ else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS16_UNMAP], 20))
+ sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
+ else if (!strncmp(buf, zeroing_mode[SD_ZERO_WS10_UNMAP], 20))
+ sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
+ else
+ return -EINVAL;
+
+ return count;
+}
+static DEVICE_ATTR_RW(zeroing_mode);
+
static ssize_t
max_medium_access_timeouts_show(struct device *dev,
struct device_attribute *attr, char *buf)
&dev_attr_app_tag_own.attr,
&dev_attr_thin_provisioning.attr,
&dev_attr_provisioning_mode.attr,
+ &dev_attr_zeroing_mode.attr,
&dev_attr_max_write_same_blocks.attr,
&dev_attr_max_medium_access_timeouts.attr,
NULL,
unsigned int logical_block_size = sdkp->device->sector_size;
unsigned int max_blocks = 0;
- q->limits.discard_zeroes_data = 0;
-
- /*
- * When LBPRZ is reported, discard alignment and granularity
- * must be fixed to the logical block size. Otherwise the block
- * layer will drop misaligned portions of the request which can
- * lead to data corruption. If LBPRZ is not set, we honor the
- * device preference.
- */
- if (sdkp->lbprz) {
- q->limits.discard_alignment = 0;
- q->limits.discard_granularity = logical_block_size;
- } else {
- q->limits.discard_alignment = sdkp->unmap_alignment *
- logical_block_size;
- q->limits.discard_granularity =
- max(sdkp->physical_block_size,
- sdkp->unmap_granularity * logical_block_size);
- }
-
+ q->limits.discard_alignment =
+ sdkp->unmap_alignment * logical_block_size;
+ q->limits.discard_granularity =
+ max(sdkp->physical_block_size,
+ sdkp->unmap_granularity * logical_block_size);
sdkp->provisioning_mode = mode;
switch (mode) {
case SD_LBP_WS16:
max_blocks = min_not_zero(sdkp->max_ws_blocks,
(u32)SD_MAX_WS16_BLOCKS);
- q->limits.discard_zeroes_data = sdkp->lbprz;
break;
case SD_LBP_WS10:
max_blocks = min_not_zero(sdkp->max_ws_blocks,
(u32)SD_MAX_WS10_BLOCKS);
- q->limits.discard_zeroes_data = sdkp->lbprz;
break;
case SD_LBP_ZERO:
max_blocks = min_not_zero(sdkp->max_ws_blocks,
(u32)SD_MAX_WS10_BLOCKS);
- q->limits.discard_zeroes_data = 1;
break;
}
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
}
-/**
- * sd_setup_discard_cmnd - unmap blocks on thinly provisioned device
- * @cmd: command to prepare
- *
- * Will set up either UNMAP, WRITE SAME(16) or WRITE SAME(10) depending
- * on the provisioning mode of the target device.
- **/
-static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
+static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
{
- struct request *rq = cmd->request;
struct scsi_device *sdp = cmd->device;
- struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
- sector_t sector = blk_rq_pos(rq);
- unsigned int nr_sectors = blk_rq_sectors(rq);
- unsigned int len;
- int ret;
+ struct request *rq = cmd->request;
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+ unsigned int data_len = 24;
char *buf;
- struct page *page;
-
- sector >>= ilog2(sdp->sector_size) - 9;
- nr_sectors >>= ilog2(sdp->sector_size) - 9;
- page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
- if (!page)
+ rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!rq->special_vec.bv_page)
return BLKPREP_DEFER;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = data_len;
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
- switch (sdkp->provisioning_mode) {
- case SD_LBP_UNMAP:
- buf = page_address(page);
+ cmd->cmd_len = 10;
+ cmd->cmnd[0] = UNMAP;
+ cmd->cmnd[8] = 24;
- cmd->cmd_len = 10;
- cmd->cmnd[0] = UNMAP;
- cmd->cmnd[8] = 24;
+ buf = page_address(rq->special_vec.bv_page);
+ put_unaligned_be16(6 + 16, &buf[0]);
+ put_unaligned_be16(16, &buf[2]);
+ put_unaligned_be64(sector, &buf[8]);
+ put_unaligned_be32(nr_sectors, &buf[16]);
- put_unaligned_be16(6 + 16, &buf[0]);
- put_unaligned_be16(16, &buf[2]);
- put_unaligned_be64(sector, &buf[8]);
- put_unaligned_be32(nr_sectors, &buf[16]);
+ cmd->allowed = SD_MAX_RETRIES;
+ cmd->transfersize = data_len;
+ rq->timeout = SD_TIMEOUT;
+ scsi_req(rq)->resid_len = data_len;
- len = 24;
- break;
+ return scsi_init_io(cmd);
+}
- case SD_LBP_WS16:
- cmd->cmd_len = 16;
- cmd->cmnd[0] = WRITE_SAME_16;
+static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+ struct scsi_device *sdp = cmd->device;
+ struct request *rq = cmd->request;
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 data_len = sdp->sector_size;
+
+ rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!rq->special_vec.bv_page)
+ return BLKPREP_DEFER;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = data_len;
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+
+ cmd->cmd_len = 16;
+ cmd->cmnd[0] = WRITE_SAME_16;
+ if (unmap)
cmd->cmnd[1] = 0x8; /* UNMAP */
- put_unaligned_be64(sector, &cmd->cmnd[2]);
- put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
+ put_unaligned_be64(sector, &cmd->cmnd[2]);
+ put_unaligned_be32(nr_sectors, &cmd->cmnd[10]);
- len = sdkp->device->sector_size;
- break;
+ cmd->allowed = SD_MAX_RETRIES;
+ cmd->transfersize = data_len;
+ rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
+ scsi_req(rq)->resid_len = data_len;
- case SD_LBP_WS10:
- case SD_LBP_ZERO:
- cmd->cmd_len = 10;
- cmd->cmnd[0] = WRITE_SAME;
- if (sdkp->provisioning_mode == SD_LBP_WS10)
- cmd->cmnd[1] = 0x8; /* UNMAP */
- put_unaligned_be32(sector, &cmd->cmnd[2]);
- put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
+ return scsi_init_io(cmd);
+}
- len = sdkp->device->sector_size;
- break;
+static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
+{
+ struct scsi_device *sdp = cmd->device;
+ struct request *rq = cmd->request;
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 data_len = sdp->sector_size;
- default:
- ret = BLKPREP_INVALID;
- goto out;
- }
+ rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
+ if (!rq->special_vec.bv_page)
+ return BLKPREP_DEFER;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = data_len;
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
- rq->timeout = SD_TIMEOUT;
+ cmd->cmd_len = 10;
+ cmd->cmnd[0] = WRITE_SAME;
+ if (unmap)
+ cmd->cmnd[1] = 0x8; /* UNMAP */
+ put_unaligned_be32(sector, &cmd->cmnd[2]);
+ put_unaligned_be16(nr_sectors, &cmd->cmnd[7]);
- cmd->transfersize = len;
cmd->allowed = SD_MAX_RETRIES;
+ cmd->transfersize = data_len;
+ rq->timeout = unmap ? SD_TIMEOUT : SD_WRITE_SAME_TIMEOUT;
+ scsi_req(rq)->resid_len = data_len;
- rq->special_vec.bv_page = page;
- rq->special_vec.bv_offset = 0;
- rq->special_vec.bv_len = len;
+ return scsi_init_io(cmd);
+}
- rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
- scsi_req(rq)->resid_len = len;
+static int sd_setup_write_zeroes_cmnd(struct scsi_cmnd *cmd)
+{
+ struct request *rq = cmd->request;
+ struct scsi_device *sdp = cmd->device;
+ struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
+ u64 sector = blk_rq_pos(rq) >> (ilog2(sdp->sector_size) - 9);
+ u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
+
+ if (!(rq->cmd_flags & REQ_NOUNMAP)) {
+ switch (sdkp->zeroing_mode) {
+ case SD_ZERO_WS16_UNMAP:
+ return sd_setup_write_same16_cmnd(cmd, true);
+ case SD_ZERO_WS10_UNMAP:
+ return sd_setup_write_same10_cmnd(cmd, true);
+ }
+ }
- ret = scsi_init_io(cmd);
-out:
- if (ret != BLKPREP_OK)
- __free_page(page);
- return ret;
+ if (sdp->no_write_same)
+ return BLKPREP_INVALID;
+ if (sdkp->ws16 || sector > 0xffffffff || nr_sectors > 0xffff)
+ return sd_setup_write_same16_cmnd(cmd, false);
+ return sd_setup_write_same10_cmnd(cmd, false);
}
static void sd_config_write_same(struct scsi_disk *sdkp)
sdkp->max_ws_blocks = 0;
}
+ if (sdkp->lbprz && sdkp->lbpws)
+ sdkp->zeroing_mode = SD_ZERO_WS16_UNMAP;
+ else if (sdkp->lbprz && sdkp->lbpws10)
+ sdkp->zeroing_mode = SD_ZERO_WS10_UNMAP;
+ else if (sdkp->max_ws_blocks)
+ sdkp->zeroing_mode = SD_ZERO_WS;
+ else
+ sdkp->zeroing_mode = SD_ZERO_WRITE;
+
out:
blk_queue_max_write_same_sectors(q, sdkp->max_ws_blocks *
(logical_block_size >> 9));
+ blk_queue_max_write_zeroes_sectors(q, sdkp->max_ws_blocks *
+ (logical_block_size >> 9));
}
/**
switch (req_op(rq)) {
case REQ_OP_DISCARD:
- return sd_setup_discard_cmnd(cmd);
+ switch (scsi_disk(rq->rq_disk)->provisioning_mode) {
+ case SD_LBP_UNMAP:
+ return sd_setup_unmap_cmnd(cmd);
+ case SD_LBP_WS16:
+ return sd_setup_write_same16_cmnd(cmd, true);
+ case SD_LBP_WS10:
+ return sd_setup_write_same10_cmnd(cmd, true);
+ case SD_LBP_ZERO:
+ return sd_setup_write_same10_cmnd(cmd, false);
+ default:
+ return BLKPREP_INVALID;
+ }
+ case REQ_OP_WRITE_ZEROES:
+ return sd_setup_write_zeroes_cmnd(cmd);
case REQ_OP_WRITE_SAME:
return sd_setup_write_same_cmnd(cmd);
case REQ_OP_FLUSH:
{
int result = SCpnt->result;
unsigned int good_bytes = result ? 0 : scsi_bufflen(SCpnt);
+ unsigned int sector_size = SCpnt->device->sector_size;
+ unsigned int resid;
struct scsi_sense_hdr sshdr;
struct scsi_disk *sdkp = scsi_disk(SCpnt->request->rq_disk);
struct request *req = SCpnt->request;
switch (req_op(req)) {
case REQ_OP_DISCARD:
+ case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE_SAME:
case REQ_OP_ZONE_RESET:
if (!result) {
scsi_set_resid(SCpnt, blk_rq_bytes(req));
}
break;
+ default:
+ /*
+ * In case of bogus fw or device, we could end up having
+ * an unaligned partial completion. Check this here and force
+ * alignment.
+ */
+ resid = scsi_get_resid(SCpnt);
+ if (resid & (sector_size - 1)) {
+ sd_printk(KERN_INFO, sdkp,
+ "Unaligned partial completion (resid=%u, sector_sz=%u)\n",
+ resid, sector_size);
+ resid = min(scsi_bufflen(SCpnt),
+ round_up(resid, sector_size));
+ scsi_set_resid(SCpnt, resid);
+ }
}
if (result) {
#define READ_CAPACITY_RETRIES_ON_RESET 10
+/*
+ * Ensure that we don't overflow sector_t when CONFIG_LBDAF is not set
+ * and the reported logical block size is bigger than 512 bytes. Note
+ * that last_sector is a u64 and therefore logical_to_sectors() is not
+ * applicable.
+ */
+static bool sd_addressable_capacity(u64 lba, unsigned int sector_size)
+{
+ u64 last_sector = (lba + 1ULL) << (ilog2(sector_size) - 9);
+
+ if (sizeof(sector_t) == 4 && last_sector > U32_MAX)
+ return false;
+
+ return true;
+}
+
static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp,
unsigned char *buffer)
{
return -ENODEV;
}
- if ((sizeof(sdkp->capacity) == 4) && (lba >= 0xffffffffULL)) {
+ if (!sd_addressable_capacity(lba, sector_size)) {
sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
"kernel compiled with support for large block "
"devices.\n");
return sector_size;
}
- if ((sizeof(sdkp->capacity) == 4) && (lba == 0xffffffff)) {
+ if (!sd_addressable_capacity(lba, sector_size)) {
sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use a "
"kernel compiled with support for large block "
"devices.\n");
sd_config_discard(sdkp, SD_LBP_WS16);
} else { /* LBP VPD page tells us what to use */
- if (sdkp->lbpu && sdkp->max_unmap_blocks && !sdkp->lbprz)
+ if (sdkp->lbpu && sdkp->max_unmap_blocks)
sd_config_discard(sdkp, SD_LBP_UNMAP);
else if (sdkp->lbpws)
sd_config_discard(sdkp, SD_LBP_WS16);
q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
- rw_max = BLK_DEF_MAX_SECTORS;
+ rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
+ (sector_t)BLK_DEF_MAX_SECTORS);
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));