Discussion:
[PATCH 1/5] block: set the nr of sectors a dev can compare and write atomically
m***@cs.wisc.edu
2014-10-16 05:37:11 UTC
Permalink
From: Mike Christie <***@cs.wisc.edu>

This adds blk settings helpers to allow drivers to tell the block layer
how many sectors it can process in a COMPARE_AND_WRITE/ATS request.

Signed-off-by: Mike Christie <***@cs.wisc.edu>
---
block/blk-settings.c | 20 ++++++++++++++++++++
block/blk-sysfs.c | 11 +++++++++++
include/linux/blkdev.h | 3 +++
3 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f1a1795..b33cf1c 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -114,6 +114,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->chunk_sectors = 0;
+ lim->max_cmp_and_write_sectors = 0;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0;
lim->discard_granularity = 0;
@@ -147,6 +148,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_hw_sectors = UINT_MAX;
lim->max_segment_size = UINT_MAX;
lim->max_sectors = UINT_MAX;
+ lim->max_cmp_and_write_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -298,6 +300,22 @@ void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors
EXPORT_SYMBOL(blk_queue_chunk_sectors);

/**
+ * blk_queue_max_cmp_and_write_sectors - set max sectors for a single request
+ * @q: the request queue for the device
+ * @max_cmp_and_write_sectors: maximum number of 512b sectors to cmp and write
+ *
+ * Description:
+ * This sets the total number of sectors that the device can process
+ * in a compare and write operation.
+ **/
+void blk_queue_max_cmp_and_write_sectors(struct request_queue *q,
+ unsigned int max_cmp_and_write_sectors)
+{
+ q->limits.max_cmp_and_write_sectors = max_cmp_and_write_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_cmp_and_write_sectors);
+
+/**
* blk_queue_max_discard_sectors - set max sectors for a single discard
* @q: the request queue for the device
* @max_discard_sectors: maximum number of sectors to discard
@@ -548,6 +566,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
+ t->max_cmp_and_write_sectors = min(t->max_cmp_and_write_sectors,
+ b->max_cmp_and_write_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);

t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 17f5c84..c546400 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -161,6 +161,11 @@ static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
(unsigned long long)q->limits.max_write_same_sectors << 9);
}

+static ssize_t queue_cmp_and_write_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_cmp_and_write_sectors << 9);
+}

static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
@@ -374,6 +379,11 @@ static struct queue_sysfs_entry queue_write_same_max_entry = {
.show = queue_write_same_max_show,
};

+static struct queue_sysfs_entry queue_cmp_and_write_max_entry = {
+ .attr = {.name = "cmp_and_write_max_bytes", .mode = S_IRUGO },
+ .show = queue_cmp_and_write_max_show,
+};
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -422,6 +432,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
+ &queue_cmp_and_write_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b465..6d03206 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -290,6 +290,7 @@ struct queue_limits {
unsigned int io_opt;
unsigned int max_discard_sectors;
unsigned int max_write_same_sectors;
+ unsigned int max_cmp_and_write_sectors;
unsigned int discard_granularity;
unsigned int discard_alignment;

@@ -1009,6 +1010,8 @@ extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
+extern void blk_queue_max_cmp_and_write_sectors(struct request_queue *,
+ unsigned int max_cmp_and_write_sectors);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
--
1.7.1
m***@cs.wisc.edu
2014-10-16 05:37:13 UTC
Permalink
From: Mike Christie <***@cs.wisc.edu>

This patch adds support to detect if a device supports COMPARE_AND_WRITE
and execute REQ_COMPARE_AND_WRITE commands.

Signed-off-by: Mike Christie <***@cs.wisc.edu>
---
drivers/scsi/scsi_lib.c | 7 +++++
drivers/scsi/sd.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++
drivers/scsi/sd.h | 1 +
3 files changed, 71 insertions(+), 0 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index d837dc1..9e7ba4f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1021,6 +1021,13 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
/* See SSC3rXX or current. */
action = ACTION_FAIL;
break;
+ case MISCOMPARE:
+ /* miscompare during verify */
+ if (sshdr.asc == 0x1d)
+ /* TODO: better error code to use ??? */
+ error = -ECANCELED;
+ action = ACTION_FAIL;
+ break;
default:
action = ACTION_FAIL;
break;
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2c2041c..d1fa4ef 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -477,6 +477,16 @@ max_write_same_blocks_store(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RW(max_write_same_blocks);

+static ssize_t
+max_cmp_and_write_blocks_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct scsi_disk *sdkp = to_scsi_disk(dev);
+
+ return snprintf(buf, 20, "%u\n", sdkp->max_cmp_and_write_blocks);
+}
+static DEVICE_ATTR_RO(max_cmp_and_write_blocks);
+
static struct attribute *sd_disk_attrs[] = {
&dev_attr_cache_type.attr,
&dev_attr_FUA.attr,
@@ -488,6 +498,7 @@ static struct attribute *sd_disk_attrs[] = {
&dev_attr_thin_provisioning.attr,
&dev_attr_provisioning_mode.attr,
&dev_attr_max_write_same_blocks.attr,
+ &dev_attr_max_cmp_and_write_blocks.attr,
&dev_attr_max_medium_access_timeouts.attr,
NULL,
};
@@ -635,6 +646,54 @@ static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif)
scsi_set_prot_type(scmd, dif);
}

+static void sd_config_cmp_and_write(struct scsi_disk *sdkp)
+{
+ if (sdkp->max_cmp_and_write_blocks > sdkp->max_xfer_blocks) {
+ /* Invalid settings returned. Do not try to support for now */
+ blk_queue_max_cmp_and_write_sectors(sdkp->disk->queue, 0);
+ return;
+ }
+
+ /*
+ * mult by 2, because the block layer wants the total number of
+ * sectors that will be put in bios and transferred.
+ */
+ blk_queue_max_cmp_and_write_sectors(sdkp->disk->queue,
+ 2 * sdkp->max_cmp_and_write_blocks *
+ (sdkp->device->sector_size >> 9));
+}
+
+/**
+ * sd_setup_cmp_and_write_cmnd - compare and write data
+ * @cmd: scsi_cmnd to prepare
+ **/
+static int sd_setup_cmd_and_write_cmd(struct scsi_cmnd *cmd)
+{
+ struct request *rq = cmd->request;
+ struct scsi_device *sdp = cmd->device;
+ sector_t sector = blk_rq_pos(rq);
+ unsigned int nr_sectors = blk_rq_sectors(rq);
+
+ sector >>= ilog2(sdp->sector_size) - 9;
+ nr_sectors >>= ilog2(sdp->sector_size) - 9;
+
+ cmd->cmnd[0] = COMPARE_AND_WRITE;
+ put_unaligned_be64(sector, &cmd->cmnd[2]);
+ /*
+ * rq/bio contains total data to transfer, but the nr LBAs field
+ * is only the data to be compared/written in each step of the
+ * operation.
+ */
+ cmd->cmnd[13] = nr_sectors >> 1;
+ /* TODO - wrprotect and FUA and DPO flags */
+
+ cmd->transfersize = sdp->sector_size;
+ cmd->allowed = SD_MAX_RETRIES;
+ rq->timeout = SD_TIMEOUT;
+
+ return scsi_init_io(cmd, GFP_ATOMIC);
+}
+
static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
{
struct request_queue *q = sdkp->disk->queue;
@@ -1134,6 +1193,8 @@ static int sd_init_command(struct scsi_cmnd *cmd)
return sd_setup_write_same_cmnd(cmd);
else if (rq->cmd_flags & REQ_FLUSH)
return sd_setup_flush_cmnd(cmd);
+ else if (rq->cmd_flags & REQ_CMP_AND_WRITE)
+ return sd_setup_cmd_and_write_cmd(cmd);
else
return sd_setup_read_write_cmnd(cmd);
}
@@ -2596,6 +2657,8 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
get_unaligned_be16(&buffer[6]) * sector_sz);
blk_queue_io_opt(sdkp->disk->queue,
get_unaligned_be32(&buffer[12]) * sector_sz);
+ sdkp->max_cmp_and_write_blocks = buffer[5];
+ sd_config_cmp_and_write(sdkp);

if (buffer[3] == 0x3c) {
unsigned int lba_count, desc_count;
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4c3ab83..fe04ac8 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -68,6 +68,7 @@ struct scsi_disk {
sector_t capacity; /* size in 512-byte sectors */
u32 max_xfer_blocks;
u32 max_ws_blocks;
+ u32 max_cmp_and_write_blocks;
u32 max_unmap_blocks;
u32 unmap_granularity;
u32 unmap_alignment;
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
m***@cs.wisc.edu
2014-10-16 05:37:15 UTC
Permalink
From: Mike Christie <***@cs.wisc.edu>

This patch has the iblock backing store use blkdev_issue_cmp_and_write
if the backing store device/queue supports it.

Signed-off-by: Mike Christie <***@cs.wisc.edu>
---
drivers/target/target_core_iblock.c | 85 ++++++++++++++++++++++++++++-------
1 files changed, 68 insertions(+), 17 deletions(-)

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7e6b857..cac928a 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -153,6 +153,11 @@ static int iblock_configure_device(struct se_device *dev)
*/
dev->dev_attrib.max_write_same_len = 0xFFFF;

+ /* convert from linux block layer 512 byte sector to our block size */
+ dev->dev_attrib.max_compare_and_write_len =
+ (q->limits.max_cmp_and_write_sectors << IBLOCK_LBA_SHIFT) /
+ dev->dev_attrib.hw_block_size;
+
if (blk_queue_nonrot(q))
dev->dev_attrib.is_nonrot = 1;

@@ -413,6 +418,67 @@ iblock_execute_sync_cache(struct se_cmd *cmd)
return 0;
}

+/*
+ * Convert the blocksize advertised to the initiator to the 512 byte
+ * units unconditionally used by the Linux block layer.
+ */
+static int iblock_get_lba_shift(struct se_cmd *cmd)
+{
+ struct se_device *dev = cmd->se_dev;
+
+ if (dev->dev_attrib.block_size == 4096)
+ return 3;
+ else if (dev->dev_attrib.block_size == 2048)
+ return 2;
+ else if (dev->dev_attrib.block_size == 1024)
+ return 1;
+ else
+ return 0;
+}
+
+static sense_reason_t
+iblock_execute_compare_and_write(struct se_cmd *cmd)
+{
+ struct block_device *bdev = IBLOCK_DEV(cmd->se_dev)->ibd_bd;
+ int ret, i = 0;
+ sector_t block_lba;
+ struct scatterlist *sg;
+ struct bio *bio;
+
+ block_lba = cmd->t_task_lba << iblock_get_lba_shift(cmd);
+
+ /* assumes SGLs are PAGE_SIZE */
+ bio = blkdev_setup_cmp_and_write(bdev, block_lba, GFP_KERNEL,
+ cmd->t_data_nents);
+ if (!bio) {
+ pr_err("blkdev_setup_cmp_and_write() failed\n");
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ }
+
+ for_each_sg(cmd->t_data_sg, sg, cmd->t_data_nents, i) {
+ ret = bio_add_pc_page(bdev_get_queue(bdev), bio, sg_page(sg),
+ sg->length, sg->offset);
+ if (ret != sg->length) {
+ bio_put(bio);
+ pr_err("bio_add_pc_page() failed\n");
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ }
+ }
+
+ ret = blkdev_issue_cmp_and_write(bio);
+ if (ret == -ECANCELED) {
+ pr_warn("Target/%s: Send MISCOMPARE check condition and sense\n",
+ cmd->se_dev->transport->name);
+ return TCM_MISCOMPARE_VERIFY;
+ } else if (ret < 0) {
+ pr_err("blkdev_issue_cmp_and_write() failed: %d\n", ret);
+ return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+ }
+
+ target_complete_cmd(cmd, GOOD);
+ return 0;
+}
+
static sense_reason_t
iblock_do_unmap(struct se_cmd *cmd, void *priv,
sector_t lba, sector_t nolb)
@@ -701,23 +767,7 @@ iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
rw = READ;
}

- /*
- * Convert the blocksize advertised to the initiator to the 512 byte
- * units unconditionally used by the Linux block layer.
- */
- if (dev->dev_attrib.block_size == 4096)
- block_lba = (cmd->t_task_lba << 3);
- else if (dev->dev_attrib.block_size == 2048)
- block_lba = (cmd->t_task_lba << 2);
- else if (dev->dev_attrib.block_size == 1024)
- block_lba = (cmd->t_task_lba << 1);
- else if (dev->dev_attrib.block_size == 512)
- block_lba = cmd->t_task_lba;
- else {
- pr_err("Unsupported SCSI -> BLOCK LBA conversion:"
- " %u\n", dev->dev_attrib.block_size);
- return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
- }
+ block_lba = cmd->t_task_lba << iblock_get_lba_shift(cmd);

ibr = kzalloc(sizeof(struct iblock_req), GFP_KERNEL);
if (!ibr)
@@ -841,6 +891,7 @@ static struct sbc_ops iblock_sbc_ops = {
.execute_write_same = iblock_execute_write_same,
.execute_write_same_unmap = iblock_execute_write_same_unmap,
.execute_unmap = iblock_execute_unmap,
+ .execute_compare_and_write = iblock_execute_compare_and_write,
};

static sense_reason_t
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
m***@cs.wisc.edu
2014-10-16 05:37:12 UTC
Permalink
From: Mike Christie <***@cs.wisc.edu>

This patch adds block layer helpers to send a compare and write request.
It differs from requests like discard and write same in that there is a
setup function and a issue one. I did this to allow callers add multiple
pages with variying offsets and lengths.

For the miscompare failure case I am currently having drivers return
-ECANCELED, but was not sure if there is a better return code.

Signed-off-by: Mike Christie <***@cs.wisc.edu>
---
block/blk-lib.c | 79 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/blk_types.h | 6 ++-
include/linux/blkdev.h | 6 +++
3 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 8411be3..fbb1a91 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -141,6 +141,85 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);

+struct bio_cmp_and_write_data {
+ int err;
+ struct completion *wait;
+};
+
+static void bio_cmp_and_write_end_io(struct bio *bio, int err)
+{
+ struct bio_cmp_and_write_data *data = bio->bi_private;
+
+ data->err = err;
+ complete(data->wait);
+ bio_put(bio);
+}
+
+/**
+ * blkdev_setup_cmp_and_write - setup a bio for a compare and write operation
+ * @bdev: blockdev to issue discard for
+ * @sector: start sector
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @nr_pages: number of pages that contain the data to be written and compared
+ *
+ * This function should be called to allocate the bio used for
+ * blkdev_issue_cmp_and_write. The caller should add the pages to be compared
+ * followed by the write pages using bio_add_pc_page.
+ */
+struct bio *blkdev_setup_cmp_and_write(struct block_device *bdev,
+ sector_t sector, gfp_t gfp_mask,
+ int nr_pages)
+{
+ struct bio *bio;
+
+ bio = bio_alloc(gfp_mask, nr_pages);
+ if (!bio)
+ return NULL;
+
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_end_io = bio_cmp_and_write_end_io;
+ bio->bi_bdev = bdev;
+ return bio;
+}
+EXPORT_SYMBOL(blkdev_setup_cmp_and_write);
+
+/**
+ * blkdev_issue_cmp_and_write - queue a compare and write operation
+ * @bio: bio prepd with blkdev_setup_cmp_and_write.
+ * Description:
+ * Issue a compare and write bio for the sectors in question. For the
+ * execution of this request the handler should atomically read @nr_sects
+ * from starting sector @sector, compare them, and if matched write
+ * @nr_sects to @sector.
+ *
+ * If the compare fails and data is not written the handler should return
+ * -ECANCELED.
+ */
+int blkdev_issue_cmp_and_write(struct bio *bio)
+{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+ DECLARE_COMPLETION_ONSTACK(wait);
+ struct bio_cmp_and_write_data data;
+ unsigned int max_cmp_and_write_sectors;
+
+ max_cmp_and_write_sectors = q->limits.max_cmp_and_write_sectors;
+ if (max_cmp_and_write_sectors == 0)
+ return -EOPNOTSUPP;
+
+ if (max_cmp_and_write_sectors < bio->bi_iter.bi_size >> 9)
+ return -EINVAL;
+
+ data.err = 0;
+ data.wait = &wait;
+ bio->bi_private = &data;
+
+ submit_bio(REQ_WRITE | REQ_CMP_AND_WRITE, bio);
+ /* Wait for bio in-flight */
+ wait_for_completion_io(&wait);
+ return data.err;
+}
+EXPORT_SYMBOL(blkdev_issue_cmp_and_write);
+
/**
* blkdev_issue_write_same - queue a write same operation
* @bdev: target blockdev
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 66c2167..37d1eff 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -160,7 +160,7 @@ enum rq_flag_bits {
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_WRITE_SAME, /* write same block many times */
-
+ __REQ_CMP_AND_WRITE, /* compare data and write if matched */
__REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_FUA, /* forced unit access */
__REQ_FLUSH, /* request for cache flush */
@@ -203,14 +203,16 @@ enum rq_flag_bits {
#define REQ_PRIO (1ULL << __REQ_PRIO)
#define REQ_DISCARD (1ULL << __REQ_DISCARD)
#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME)
+#define REQ_CMP_AND_WRITE (1ULL << __REQ_CMP_AND_WRITE)
#define REQ_NOIDLE (1ULL << __REQ_NOIDLE)

+
#define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
- REQ_SECURE)
+ REQ_SECURE | REQ_CMP_AND_WRITE)
#define REQ_CLONE_MASK REQ_COMMON_MASK

#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6d03206..746d5b4 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -696,6 +696,9 @@ static inline bool blk_check_merge_flags(unsigned int flags1,
if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
return false;

+ if (flags1 & REQ_CMP_AND_WRITE || flags2 & REQ_CMP_AND_WRITE)
+ return false;
+
return true;
}

@@ -1167,6 +1170,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,

#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */

+extern struct bio *blkdev_setup_cmp_and_write(struct block_device *, sector_t,
+ gfp_t, int);
+extern int blkdev_issue_cmp_and_write(struct bio *bio);
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
--
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Christoph Hellwig
2014-10-17 09:55:55 UTC
Permalink
Post by m***@cs.wisc.edu
@@ -160,7 +160,7 @@ enum rq_flag_bits {
__REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_WRITE_SAME, /* write same block many times */
-
+ __REQ_CMP_AND_WRITE, /* compare data and write if matched */
I think it's time that we stop overloading the request flags with
request types.

We already have req->cmd_type which actually is a fairly good
description of what we get except for REQ_TYPE_FS, which is a horrible
overload using req->cmd_flags.

Given that you're just one of many currently ongoing patches to add
more flags here I think you need to byte the bullet and fix this up
by replacing REQ_TYPE_FS with:

REQ_TYPE_WRITE
REQ_TYPE_READ
REQ_TYPE_FLUSH
REQ_TYPE_DISCARD
REQ_TYPE_WRITE_SAME
REQ_TYPE_CMP_AND_WRITE

sd.c is a nice guide of what should be a flag and what a type since my
last refactoring of the command_init function.
Martin K. Petersen
2014-10-17 23:38:37 UTC
Permalink
Christoph> We already have req->cmd_type which actually is a fairly good
Christoph> description of what we get except for REQ_TYPE_FS, which is a
Christoph> horrible overload using req->cmd_flags.

Christoph> Given that you're just one of many currently ongoing patches
Christoph> to add more flags here I think you need to byte the bullet
Christoph> and fix this up by replacing REQ_TYPE_FS with:

Christoph> REQ_TYPE_WRITE REQ_TYPE_READ REQ_TYPE_FLUSH REQ_TYPE_DISCARD
Christoph> REQ_TYPE_WRITE_SAME REQ_TYPE_CMP_AND_WRITE

The problem with this is that, as it stands, a bio has no type. And it
would suck if we couldn't keep bio rw and request flags in sync.

I wonder if it would make more sense to move the remaining rq types to
cmd_flags after I'm done with the 64-bit conversion?
--
Martin K. Petersen Oracle Linux Engineering
Christoph Hellwig
2014-10-18 15:16:39 UTC
Permalink
Post by Martin K. Petersen
The problem with this is that, as it stands, a bio has no type. And it
would suck if we couldn't keep bio rw and request flags in sync.
I wonder if it would make more sense to move the remaining rq types to
cmd_flags after I'm done with the 64-bit conversion?
I'd prefer adding a cmd_type to the bio as well and avoid the 64-bit
flag conversion. While we'll probably grow more types of I/Os (e.g.
copy offload) I hope we can actually reduce the number of real flags,
and it's easier to read for sure if we can switch on the command type
in the driver.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
m***@cs.wisc.edu
2014-10-16 05:37:14 UTC
Permalink
From: Mike Christie <***@cs.wisc.edu>

This has lio callout to the backing store module if it supports
using REQ_COMPARE_AND_WRITE. The backing store would set the
device attribute max_compare_and_write_len if it supports using
the new request type. If it is not supported then we do the
read/cmp/write emulation in the sbc layer like before.

Signed-off-by: Mike Christie <***@cs.wisc.edu>
---
drivers/target/target_core_sbc.c | 21 ++++++++++++++++-----
drivers/target/target_core_spc.c | 12 ++++++++++--
drivers/target/target_core_transport.c | 1 +
include/target/target_core_backend.h | 1 +
include/target/target_core_base.h | 1 +
5 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index ebe62af..fe0c16a 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -555,6 +555,21 @@ sbc_compare_and_write(struct se_cmd *cmd)
return TCM_NO_SENSE;
}

+static void sbc_setup_compare_and_write(struct se_cmd *cmd, struct sbc_ops *ops,
+ u32 sectors)
+{
+ cmd->t_task_nolb = sectors;
+ cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE;
+
+ if (cmd->se_dev->dev_attrib.max_compare_and_write_len) {
+ cmd->execute_cmd = ops->execute_compare_and_write;
+ } else {
+ cmd->execute_rw = ops->execute_rw;
+ cmd->execute_cmd = sbc_compare_and_write;
+ cmd->transport_complete_callback = compare_and_write_callback;
+ }
+}
+
static int
sbc_set_prot_op_checks(u8 protect, enum target_prot_type prot_type,
bool is_write, struct se_cmd *cmd)
@@ -842,11 +857,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
*/
size = 2 * sbc_get_size(cmd, sectors);
cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
- cmd->t_task_nolb = sectors;
- cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE;
- cmd->execute_rw = ops->execute_rw;
- cmd->execute_cmd = sbc_compare_and_write;
- cmd->transport_complete_callback = compare_and_write_callback;
+ sbc_setup_compare_and_write(cmd, ops, sectors);
break;
case READ_CAPACITY:
size = READ_CAP_LEN;
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 6cd7222..7db8c22 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -525,8 +525,16 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf)
/*
* Set MAXIMUM COMPARE AND WRITE LENGTH
*/
- if (dev->dev_attrib.emulate_caw)
- buf[5] = 0x01;
+ if (dev->dev_attrib.emulate_caw) {
+ if (!dev->dev_attrib.max_compare_and_write_len)
+ /*
+ * if backing device does not have special handling
+ * then sbc will support up to 1 block.
+ */
+ buf[5] = 0x01;
+ else
+ buf[5] = dev->dev_attrib.max_compare_and_write_len;
+ }

/*
* Set OPTIMAL TRANSFER LENGTH GRANULARITY
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 9ea0d5f..e7706f9 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -1637,6 +1637,7 @@ void transport_generic_request_failure(struct se_cmd *cmd,
case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED:
case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED:
case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED:
+ case TCM_MISCOMPARE_VERIFY:
break;
case TCM_OUT_OF_RESOURCES:
sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 9adc1bc..06924b1 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -53,6 +53,7 @@ struct sbc_ops {
sense_reason_t (*execute_write_same)(struct se_cmd *cmd);
sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd);
sense_reason_t (*execute_unmap)(struct se_cmd *cmd);
+ sense_reason_t (*execute_compare_and_write)(struct se_cmd *cmd);
};

int transport_subsystem_register(struct se_subsystem_api *);
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 23c518a..bc0a26e 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -697,6 +697,7 @@ struct se_dev_attrib {
u32 unmap_granularity;
u32 unmap_granularity_alignment;
u32 max_write_same_len;
+ u32 max_compare_and_write_len;
u32 max_bytes_per_io;
struct se_device *da_dev;
struct config_group da_group;
--
1.7.1
Douglas Gilbert
2014-10-16 10:39:24 UTC
Permalink
The following patches implement the SCSI command COMPARE_AND_WRITE as a new
bio/request type REQ_CMP_AND_WRITE. COMPARE_AND_WRITE is defined in the
The COMPARE AND WRITE command requests that the device server perform the
A) read the specified logical blocks; and
B) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the verify instance of the data is transferred from the
Data-Out Buffer);
2) compare the data read from the specified logical blocks with the verify
instance of the data; and
1) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the write instance of the data transferred from the
Data-Out Buffer); and
2) write those logical blocks.
The most command use of this command today is in VMware ESX where it is used
for locking. See
http://blogs.vmware.com/vsphere/2012/05/vmfs-locking-uncovered.html
[in ESX is it is called ATS (atomic test and set)] for more VMware info.
Linux fits into this use, because its SCSI target layer (LIO) is commonly
used as storage for ESX VMs.
Currently, to support this command in LIO we emulate it by taking a lock,
doing a read, comparing it, then doing a write. The problem this patchset
tries to solve is that in many cases it is more efficient to pass the one
COMPARE_AND_REQUEST request directly to the device where it might have
optimized locking and also will require fewer requests to/from the target
and backing storage device.
I am also bugging the ceph-devel list, because I am working on LIO + ceph
support. I am interested in using ceph's rbd device for the backing
storage for LIO, and I was thinking this request could be implemented similar
to how REQ_DISCARD (unmap/trim) is going to be, and I wanted to get some early
feedback. I know the scsi layer better, so I have only added support in sd in
this patchset.
The following patches were made over the target-pending for-next branch but
also apply to Linus's tree.
As I found when I implemented this command in sg3_utils,
my library's support for handling and reporting the
MISCOMPARE sense key needed to be strengthened. [A sense
buffer with a MISCOMPARE sense key is what results when
the compare in step 2) is unequal.]

Since it was relatively rare prior to VMWare's use of
the COMPARE AND WRITE command, MISCOMPARE is often forgotten
in sense key handling. Also it should not be considered
as an error and definitely should not lead to the command
being retried.

The COMPARE AND WRITE command may fail for other reasons
such as a transport problem or a Unit Attention, so the
SCSI eh logic may need to know about it.

Doug Gilbert
Douglas Gilbert
2014-10-16 20:01:37 UTC
Permalink
Post by Douglas Gilbert
The following patches implement the SCSI command COMPARE_AND_WRITE as a new
bio/request type REQ_CMP_AND_WRITE. COMPARE_AND_WRITE is defined in the
The COMPARE AND WRITE command requests that the device server perform the
A) read the specified logical blocks; and
B) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the verify instance of the data is transferred from the
Data-Out Buffer);
2) compare the data read from the specified logical blocks with the verify
instance of the data; and
1) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the write instance of the data transferred from the
Data-Out Buffer); and
2) write those logical blocks.
The most command use of this command today is in VMware ESX where it is used
for locking. See
http://blogs.vmware.com/vsphere/2012/05/vmfs-locking-uncovered.html
[in ESX is it is called ATS (atomic test and set)] for more VMware info.
Linux fits into this use, because its SCSI target layer (LIO) is commonly
used as storage for ESX VMs.
Currently, to support this command in LIO we emulate it by taking a lock,
doing a read, comparing it, then doing a write. The problem this patchset
tries to solve is that in many cases it is more efficient to pass the one
COMPARE_AND_REQUEST request directly to the device where it might have
optimized locking and also will require fewer requests to/from the target
and backing storage device.
I am also bugging the ceph-devel list, because I am working on LIO + ceph
support. I am interested in using ceph's rbd device for the backing
storage for LIO, and I was thinking this request could be implemented similar
to how REQ_DISCARD (unmap/trim) is going to be, and I wanted to get some early
feedback. I know the scsi layer better, so I have only added support in sd in
this patchset.
The following patches were made over the target-pending for-next branch but
also apply to Linus's tree.
As I found when I implemented this command in sg3_utils,
my library's support for handling and reporting the
MISCOMPARE sense key needed to be strengthened. [A sense
buffer with a MISCOMPARE sense key is what results when
the compare in step 2) is unequal.]
Since it was relatively rare prior to VMWare's use of
the COMPARE AND WRITE command, MISCOMPARE is often forgotten
in sense key handling. Also it should not be considered
as an error and definitely should not lead to the command
being retried.
The COMPARE AND WRITE command may fail for other reasons
such as a transport problem or a Unit Attention, so the
SCSI eh logic may need to know about it.
Elaborating ...

Hannes will enjoy this one: say a COMPARE AND WRITE (CAW) fails
due to a transport error or timeout. What should the EH do *** ?
Answer: read that LBA(s) to see whether the command succeeded
(i.e. wrote the new data)! If it did, do nothing; if it didn't,
repeat the CAW command. And naturally that second CAW may
yield a MISCOMPARE.


Mike proposes using ECANCELED for the errno corresponding to
MISCOMPARE. Not wild about that but can't see anything better,
and it is definitely much better than EIO.

Checked with FreeBSD and this issue has not come up there yet.
If ESX uses a Unix like kernel, it would be interesting to know
which errno (if any) they use.

Doug Gilbert

*** the EH has other options:
- send the transport error or timeout indication back so
the application is alerted to do a "read to check if done".
- if it retries the CAW blindly that might yield a MISCOMPARE
when it actually succeeded (due to the original CAW command
being acted on); but then the application needs to be aware
that ECANCELED may not mean miscompare.
Elliott, Robert (Server Storage)
2014-10-16 20:12:56 UTC
Permalink
-----Original Message-----
Sent: Thursday, 16 October, 2014 3:02 PM
...
...
Post by Douglas Gilbert
The COMPARE AND WRITE command may fail for other reasons
such as a transport problem or a Unit Attention, so the
SCSI eh logic may need to know about it.
Elaborating ...
Hannes will enjoy this one: say a COMPARE AND WRITE (CAW) fails
due to a transport error or timeout. What should the EH do *** ?
Answer: read that LBA(s) to see whether the command succeeded
(i.e. wrote the new data)! If it did, do nothing; if it didn't,
repeat the CAW command. And naturally that second CAW may
yield a MISCOMPARE.
I don't think that is safe.. detecting the write data is in
place doesn't prove the COMPARE AND WRITE really worked as
a whole.

Example: if two applications do this:
* application 1: COMPARE AND WRITE: change 4 to 6
* application 2: COMPARE AND WRITE: change 5 to 6

and they both run into transport errors, then they'll
both read back 6 and think their commands worked when
really only one of them worked.

---
Rob Elliott HP Server Storage
Hannes Reinecke
2014-10-17 06:02:02 UTC
Permalink
Post by Douglas Gilbert
Post by Douglas Gilbert
The following patches implement the SCSI command COMPARE_AND_WRITE =
as
Post by Douglas Gilbert
Post by Douglas Gilbert
a new
bio/request type REQ_CMP_AND_WRITE. COMPARE_AND_WRITE is defined in=
the
Post by Douglas Gilbert
Post by Douglas Gilbert
The COMPARE AND WRITE command requests that the device server perfo=
rm
Post by Douglas Gilbert
Post by Douglas Gilbert
the
A) read the specified logical blocks; and
B) transfer the specified number of logical blocks from th=
e
Post by Douglas Gilbert
Post by Douglas Gilbert
Data-Out
Buffer (i.e., the verify instance of the data is transferr=
ed
Post by Douglas Gilbert
Post by Douglas Gilbert
from the
Data-Out Buffer);
2) compare the data read from the specified logical blocks with the verify
instance of the data; and
3) If the compared data matches, then perform the following operati=
1) transfer the specified number of logical blocks from th=
e
Post by Douglas Gilbert
Post by Douglas Gilbert
Data-Out
Buffer (i.e., the write instance of the data transferred from the
Data-Out Buffer); and
2) write those logical blocks.
The most command use of this command today is in VMware ESX where i=
t
Post by Douglas Gilbert
Post by Douglas Gilbert
is used
for locking. See
http://blogs.vmware.com/vsphere/2012/05/vmfs-locking-uncovered.html
[in ESX is it is called ATS (atomic test and set)] for more VMware =
info.
Post by Douglas Gilbert
Post by Douglas Gilbert
Linux fits into this use, because its SCSI target layer (LIO) is commonly
used as storage for ESX VMs.
Currently, to support this command in LIO we emulate it by taking a lock,
doing a read, comparing it, then doing a write. The problem this patchset
tries to solve is that in many cases it is more efficient to pass t=
he
Post by Douglas Gilbert
Post by Douglas Gilbert
one
COMPARE_AND_REQUEST request directly to the device where it might h=
ave
Post by Douglas Gilbert
Post by Douglas Gilbert
optimized locking and also will require fewer requests to/from the target
and backing storage device.
I am also bugging the ceph-devel list, because I am working on LIO =
+
Post by Douglas Gilbert
Post by Douglas Gilbert
ceph
support. I am interested in using ceph's rbd device for the backing
storage for LIO, and I was thinking this request could be implement=
ed
Post by Douglas Gilbert
Post by Douglas Gilbert
similar
to how REQ_DISCARD (unmap/trim) is going to be, and I wanted to get some early
feedback. I know the scsi layer better, so I have only added suppor=
t
Post by Douglas Gilbert
Post by Douglas Gilbert
in sd in
this patchset.
The following patches were made over the target-pending for-next branch but
also apply to Linus's tree.
As I found when I implemented this command in sg3_utils,
my library's support for handling and reporting the
MISCOMPARE sense key needed to be strengthened. [A sense
buffer with a MISCOMPARE sense key is what results when
the compare in step 2) is unequal.]
Since it was relatively rare prior to VMWare's use of
the COMPARE AND WRITE command, MISCOMPARE is often forgotten
in sense key handling. Also it should not be considered
as an error and definitely should not lead to the command
being retried.
The COMPARE AND WRITE command may fail for other reasons
such as a transport problem or a Unit Attention, so the
SCSI eh logic may need to know about it.
Elaborating ...
Hannes will enjoy this one: say a COMPARE AND WRITE (CAW) fails
due to a transport error or timeout. What should the EH do *** ?
Answer: read that LBA(s) to see whether the command succeeded
(i.e. wrote the new data)! If it did, do nothing; if it didn't,
repeat the CAW command. And naturally that second CAW may
yield a MISCOMPARE.
Hmm. Surely we should be getting a sense code telling us up to
which block the CAW failed? Reading the LBA(s) seems like a daft
idea to me ...
Post by Douglas Gilbert
Mike proposes using ECANCELED for the errno corresponding to
MISCOMPARE. Not wild about that but can't see anything better,
and it is definitely much better than EIO.
Yup. Please do.

Cheers,

Hannes
--=20
Dr. Hannes Reinecke zSeries & Storage
***@suse.de +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N=FCrnberg
GF: J. Hawn, J. Guild, F. Imend=F6rffer, HRB 16746 (AG N=FCrnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" i=
n
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Bart Van Assche
2014-10-18 08:11:47 UTC
Permalink
The following patches implement the SCSI command COMPARE_AND_WRITE as a new
bio/request type REQ_CMP_AND_WRITE. COMPARE_AND_WRITE is defined in the
The COMPARE AND WRITE command requests that the device server perform the
A) read the specified logical blocks; and
B) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the verify instance of the data is transferred from the
Data-Out Buffer);
2) compare the data read from the specified logical blocks with the verify
instance of the data; and
1) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the write instance of the data transferred from the
Data-Out Buffer); and
2) write those logical blocks.
Hello Mike,

Just below the above text one can find the following additional
requirement in SBC-4:
<quote>
4) if the compare operation does not indicate a match, then terminate
the command with CHECK CONDITION status with the sense key set to
MISCOMPARE and the additional sense code set to MISCOMPARE DURING
VERIFY OPERATION. In the sense data (see 4.18 and SPC-4) the offset
from the start of the Data-Out Buffer to the first byte of data that
was not equal shall be reported in the INFORMATION field.
</quote>

What I'm wondering now is how requirement (4) can be supported if
REQ_CMP_AND_WRITE doesn't return the offset of the first byte that
didn't match ? Additionally, shouldn't compare_and_write_callback() be
fixed such that it returns the miscompare offset to its caller ?

Thanks,

Bart.
Mike Christie
2014-10-18 20:32:12 UTC
Permalink
Post by Bart Van Assche
The following patches implement the SCSI command COMPARE_AND_WRITE as a new
bio/request type REQ_CMP_AND_WRITE. COMPARE_AND_WRITE is defined in the
The COMPARE AND WRITE command requests that the device server perform the
A) read the specified logical blocks; and
B) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the verify instance of the data is transferred from the
Data-Out Buffer);
2) compare the data read from the specified logical blocks with the verify
instance of the data; and
1) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the write instance of the data transferred from the
Data-Out Buffer); and
2) write those logical blocks.
Hello Mike,
Just below the above text one can find the following additional
<quote>
4) if the compare operation does not indicate a match, then terminate
the command with CHECK CONDITION status with the sense key set to
MISCOMPARE and the additional sense code set to MISCOMPARE DURING
VERIFY OPERATION. In the sense data (see 4.18 and SPC-4) the offset
from the start of the Data-Out Buffer to the first byte of data that
was not equal shall be reported in the INFORMATION field.
</quote>
What I'm wondering now is how requirement (4) can be supported if
REQ_CMP_AND_WRITE doesn't return the offset of the first byte that
didn't match ? Additionally, shouldn't compare_and_write_callback() be
fixed such that it returns the miscompare offset to its caller ?
Yeah, Hannes pointed out that the original LIO code did not support
this. For the original code, I will fix that in another patchset to keep
this one smaller and focused. For what I need, I have been trying to
think of a nice way to pass additional info around. I think the options are:

1. Instead of making it bio/request based, make it a
request_queue->compare_and_write function. We then just stack them and
can add whatever arguments/return values are necessary and do not have
to mess with the bio/request structs and function chains.

2. Add another argument to the end io functions that can be used to pass
back bio/request type specific info.

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Sagi Grimberg
2014-10-20 07:18:24 UTC
Permalink
Post by Mike Christie
Post by Bart Van Assche
The following patches implement the SCSI command COMPARE_AND_WRITE as a new
bio/request type REQ_CMP_AND_WRITE. COMPARE_AND_WRITE is defined in the
The COMPARE AND WRITE command requests that the device server perform the
A) read the specified logical blocks; and
B) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the verify instance of the data is transferred from the
Data-Out Buffer);
2) compare the data read from the specified logical blocks with the verify
instance of the data; and
1) transfer the specified number of logical blocks from the Data-Out
Buffer (i.e., the write instance of the data transferred from the
Data-Out Buffer); and
2) write those logical blocks.
Hello Mike,
Just below the above text one can find the following additional
<quote>
4) if the compare operation does not indicate a match, then terminate
the command with CHECK CONDITION status with the sense key set to
MISCOMPARE and the additional sense code set to MISCOMPARE DURING
VERIFY OPERATION. In the sense data (see 4.18 and SPC-4) the offset
from the start of the Data-Out Buffer to the first byte of data that
was not equal shall be reported in the INFORMATION field.
</quote>
What I'm wondering now is how requirement (4) can be supported if
REQ_CMP_AND_WRITE doesn't return the offset of the first byte that
didn't match ? Additionally, shouldn't compare_and_write_callback() be
fixed such that it returns the miscompare offset to its caller ?
Yeah, Hannes pointed out that the original LIO code did not support
this. For the original code, I will fix that in another patchset to keep
this one smaller and focused. For what I need, I have been trying to
Thanks Bart for bringing this up,

Actually Mike the lio code does support providing information field with
offset sector in sense data. I added it for PI but it can be used for
other errors such as miscompare (checkout
transport_send_check_condition_and_sense for
TCM_LOGICAL_BLOCK_[GUARD|APP_TAG|REF_TAG]_CHECK_FAILED error codes).
Post by Mike Christie
1. Instead of making it bio/request based, make it a
request_queue->compare_and_write function. We then just stack them and
can add whatever arguments/return values are necessary and do not have
to mess with the bio/request structs and function chains.
2. Add another argument to the end io functions that can be used to pass
back bio/request type specific info.
I'm for (2), can we standardize the way to provide the extra info?
so target iblock code can do the same thing for all?

Sagi.

Loading...