Commit 4363ac7c authored by Martin K. Petersen's avatar Martin K. Petersen Committed by Jens Axboe

block: Implement support for WRITE SAME

The WRITE SAME command supported on some SCSI devices allows the same
block to be efficiently replicated throughout a block range. Only a
single logical block is transferred from the host and the storage device
writes the same data to all blocks described by the I/O.

This patch implements support for WRITE SAME in the block layer. The
blkdev_issue_write_same() function can be used by filesystems and block
drivers to replicate a buffer across a block range. This can be used to
efficiently initialize software RAID devices, etc.
Signed-off-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Acked-by: default avatarMike Snitzer <snitzer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent f31dc1cd
...@@ -206,3 +206,17 @@ Description: ...@@ -206,3 +206,17 @@ Description:
when a discarded area is read the discard_zeroes_data when a discarded area is read the discard_zeroes_data
parameter will be set to one. Otherwise it will be 0 and parameter will be set to one. Otherwise it will be 0 and
the result of reading a discarded area is undefined. the result of reading a discarded area is undefined.
What: /sys/block/<disk>/queue/write_same_max_bytes
Date: January 2012
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Some devices support a write same operation in which a
single data block can be written to a range of several
contiguous blocks on storage. This can be used to wipe
areas on disk or to initialize drives in a RAID
configuration. write_same_max_bytes indicates how many
bytes can be written in a single write same command. If
write_same_max_bytes is 0, write same is not supported
by the device.
...@@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio) ...@@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio)
goto end_io; goto end_io;
} }
if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
err = -EOPNOTSUPP;
goto end_io;
}
/* /*
* Various block parts want %current->io_context and lazy ioc * Various block parts want %current->io_context and lazy ioc
* allocation ends up trading a lot of pain for a small amount of * allocation ends up trading a lot of pain for a small amount of
...@@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request); ...@@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request);
*/ */
void submit_bio(int rw, struct bio *bio) void submit_bio(int rw, struct bio *bio)
{ {
int count = bio_sectors(bio);
bio->bi_rw |= rw; bio->bi_rw |= rw;
/* /*
...@@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio) ...@@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio)
* go through the normal accounting stuff before submission. * go through the normal accounting stuff before submission.
*/ */
if (bio_has_data(bio)) { if (bio_has_data(bio)) {
unsigned int count;
if (unlikely(rw & REQ_WRITE_SAME))
count = bdev_logical_block_size(bio->bi_bdev) >> 9;
else
count = bio_sectors(bio);
if (rw & WRITE) { if (rw & WRITE) {
count_vm_events(PGPGOUT, count); count_vm_events(PGPGOUT, count);
} else { } else {
......
...@@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ...@@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
} }
EXPORT_SYMBOL(blkdev_issue_discard); EXPORT_SYMBOL(blkdev_issue_discard);
/**
* blkdev_issue_write_same - queue a write same operation
* @bdev: target blockdev
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @page: page containing data to write
*
* Description:
* Issue a write same request for the sectors in question.
*/
int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask,
struct page *page)
{
DECLARE_COMPLETION_ONSTACK(wait);
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_write_same_sectors;
struct bio_batch bb;
struct bio *bio;
int ret = 0;
if (!q)
return -ENXIO;
max_write_same_sectors = q->limits.max_write_same_sectors;
if (max_write_same_sectors == 0)
return -EOPNOTSUPP;
atomic_set(&bb.done, 1);
bb.flags = 1 << BIO_UPTODATE;
bb.wait = &wait;
while (nr_sects) {
bio = bio_alloc(gfp_mask, 1);
if (!bio) {
ret = -ENOMEM;
break;
}
bio->bi_sector = sector;
bio->bi_end_io = bio_batch_end_io;
bio->bi_bdev = bdev;
bio->bi_private = &bb;
bio->bi_vcnt = 1;
bio->bi_io_vec->bv_page = page;
bio->bi_io_vec->bv_offset = 0;
bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
if (nr_sects > max_write_same_sectors) {
bio->bi_size = max_write_same_sectors << 9;
nr_sects -= max_write_same_sectors;
sector += max_write_same_sectors;
} else {
bio->bi_size = nr_sects << 9;
nr_sects = 0;
}
atomic_inc(&bb.done);
submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
}
/* Wait for bios in-flight */
if (!atomic_dec_and_test(&bb.done))
wait_for_completion(&wait);
if (!test_bit(BIO_UPTODATE, &bb.flags))
ret = -ENOTSUPP;
return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);
/** /**
* blkdev_issue_zeroout - generate number of zero filed write bios * blkdev_issue_zeroout - generate number of zero filed write bios
* @bdev: blockdev to issue * @bdev: blockdev to issue
......
...@@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, ...@@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
|| next->special) || next->special)
return 0; return 0;
if (req->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(req->bio, next->bio))
return 0;
/* /*
* If we are allowed to merge, then append bio list * If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn * from next to rq and release next. merge_requests_fn
...@@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) ...@@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
if (bio_integrity(bio) != blk_integrity_rq(rq)) if (bio_integrity(bio) != blk_integrity_rq(rq))
return false; return false;
/* must be using the same buffer */
if (rq->cmd_flags & REQ_WRITE_SAME &&
!blk_write_same_mergeable(rq->bio, bio))
return false;
return true; return true;
} }
......
...@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) ...@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0; lim->max_discard_sectors = 0;
lim->discard_granularity = 0; lim->discard_granularity = 0;
lim->discard_alignment = 0; lim->discard_alignment = 0;
...@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) ...@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_segments = USHRT_MAX; lim->max_segments = USHRT_MAX;
lim->max_hw_sectors = UINT_MAX; lim->max_hw_sectors = UINT_MAX;
lim->max_sectors = UINT_MAX; lim->max_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
} }
EXPORT_SYMBOL(blk_set_stacking_limits); EXPORT_SYMBOL(blk_set_stacking_limits);
...@@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, ...@@ -285,6 +287,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
} }
EXPORT_SYMBOL(blk_queue_max_discard_sectors); EXPORT_SYMBOL(blk_queue_max_discard_sectors);
/**
* blk_queue_max_write_same_sectors - set max sectors for a single write same
* @q: the request queue for the device
* @max_write_same_sectors: maximum number of sectors to write per command
**/
void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors)
{
q->limits.max_write_same_sectors = max_write_same_sectors;
}
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
/** /**
* blk_queue_max_segments - set max hw segments for a request for this queue * blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device * @q: the request queue for the device
...@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ...@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
......
...@@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag ...@@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
return queue_var_show(queue_discard_zeroes_data(q), page); return queue_var_show(queue_discard_zeroes_data(q), page);
} }
static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)q->limits.max_write_same_sectors << 9);
}
static ssize_t static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
{ {
...@@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { ...@@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
.show = queue_discard_zeroes_data_show, .show = queue_discard_zeroes_data_show,
}; };
static struct queue_sysfs_entry queue_write_same_max_entry = {
.attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = { static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot, .show = queue_show_nonrot,
...@@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = { ...@@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_granularity_entry.attr, &queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr, &queue_discard_max_entry.attr,
&queue_discard_zeroes_data_entry.attr, &queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr, &queue_nonrot_entry.attr,
&queue_nomerges_entry.attr, &queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr, &queue_rq_affinity_entry.attr,
......
...@@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev) ...@@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev)) if (md_check_no_bitmap(mddev))
return -EINVAL; return -EINVAL;
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
/* if private is not null, we are here after takeover */ /* if private is not null, we are here after takeover */
if (mddev->private == NULL) { if (mddev->private == NULL) {
......
...@@ -1487,9 +1487,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) ...@@ -1487,9 +1487,12 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
bp->bv1 = bi->bi_io_vec[0]; bp->bv1 = bi->bi_io_vec[0];
bp->bv2 = bi->bi_io_vec[0]; bp->bv2 = bi->bi_io_vec[0];
if (bio_is_rw(bi)) {
bp->bv2.bv_offset += first_sectors << 9; bp->bv2.bv_offset += first_sectors << 9;
bp->bv2.bv_len -= first_sectors << 9; bp->bv2.bv_len -= first_sectors << 9;
bp->bv1.bv_len = first_sectors << 9; bp->bv1.bv_len = first_sectors << 9;
}
bp->bio1.bi_io_vec = &bp->bv1; bp->bio1.bi_io_vec = &bp->bv1;
bp->bio2.bi_io_vec = &bp->bv2; bp->bio2.bi_io_vec = &bp->bv2;
......
...@@ -399,6 +399,9 @@ static inline bool bio_is_rw(struct bio *bio) ...@@ -399,6 +399,9 @@ static inline bool bio_is_rw(struct bio *bio)
if (!bio_has_data(bio)) if (!bio_has_data(bio))
return false; return false;
if (bio->bi_rw & REQ_WRITE_SAME)
return false;
return true; return true;
} }
......
...@@ -147,6 +147,7 @@ enum rq_flag_bits { ...@@ -147,6 +147,7 @@ enum rq_flag_bits {
__REQ_PRIO, /* boost priority in cfq */ __REQ_PRIO, /* boost priority in cfq */
__REQ_DISCARD, /* request to discard sectors */ __REQ_DISCARD, /* request to discard sectors */
__REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
__REQ_WRITE_SAME, /* write same block many times */
__REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_NOIDLE, /* don't anticipate more IO after this one */
__REQ_FUA, /* forced unit access */ __REQ_FUA, /* forced unit access */
...@@ -185,13 +186,15 @@ enum rq_flag_bits { ...@@ -185,13 +186,15 @@ enum rq_flag_bits {
#define REQ_META (1 << __REQ_META) #define REQ_META (1 << __REQ_META)
#define REQ_PRIO (1 << __REQ_PRIO) #define REQ_PRIO (1 << __REQ_PRIO)
#define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_DISCARD (1 << __REQ_DISCARD)
#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
#define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_NOIDLE (1 << __REQ_NOIDLE)
#define REQ_FAILFAST_MASK \ #define REQ_FAILFAST_MASK \
(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
#define REQ_COMMON_MASK \ #define REQ_COMMON_MASK \
(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
REQ_SECURE)
#define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_CLONE_MASK REQ_COMMON_MASK
/* This mask is used for both bio and request merge checking */ /* This mask is used for both bio and request merge checking */
......
...@@ -270,6 +270,7 @@ struct queue_limits { ...@@ -270,6 +270,7 @@ struct queue_limits {
unsigned int io_min; unsigned int io_min;
unsigned int io_opt; unsigned int io_opt;
unsigned int max_discard_sectors; unsigned int max_discard_sectors;
unsigned int max_write_same_sectors;
unsigned int discard_granularity; unsigned int discard_granularity;
unsigned int discard_alignment; unsigned int discard_alignment;
...@@ -614,7 +615,18 @@ static inline bool blk_check_merge_flags(unsigned int flags1, ...@@ -614,7 +615,18 @@ static inline bool blk_check_merge_flags(unsigned int flags1,
if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
return false; return false;
if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
return false;
return true;
}
static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
{
if (bio_data(a) == bio_data(b))
return true; return true;
return false;
} }
/* /*
...@@ -818,6 +830,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, ...@@ -818,6 +830,9 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
if (unlikely(cmd_flags & REQ_DISCARD)) if (unlikely(cmd_flags & REQ_DISCARD))
return q->limits.max_discard_sectors; return q->limits.max_discard_sectors;
if (unlikely(cmd_flags & REQ_WRITE_SAME))
return q->limits.max_write_same_sectors;
return q->limits.max_sectors; return q->limits.max_sectors;
} }
...@@ -886,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short); ...@@ -886,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q, extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors); unsigned int max_discard_sectors);
extern void blk_queue_max_write_same_sectors(struct request_queue *q,
unsigned int max_write_same_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q, extern void blk_queue_alignment_offset(struct request_queue *q,
...@@ -1016,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, ...@@ -1016,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct page *page);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask); sector_t nr_sects, gfp_t gfp_mask);
static inline int sb_issue_discard(struct super_block *sb, sector_t block, static inline int sb_issue_discard(struct super_block *sb, sector_t block,
...@@ -1193,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) ...@@ -1193,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
return queue_discard_zeroes_data(bdev_get_queue(bdev)); return queue_discard_zeroes_data(bdev_get_queue(bdev));
} }
static inline unsigned int bdev_write_same(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
if (q)
return q->limits.max_write_same_sectors;
return 0;
}
static inline int queue_dma_alignment(struct request_queue *q) static inline int queue_dma_alignment(struct request_queue *q)
{ {
return q ? q->dma_alignment : 511; return q ? q->dma_alignment : 511;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment