md/raid0: update queue parameter in a safer location.

commit 199dc6ed upstream. When a (e.g.) RAID5 array is reshaped to RAID0, the updating of queue parameters (e.g. max number of sectors per bio) is done in the wrong place. It should be part of ->run, but it is actually part of ->takeover. This means it happens before level_store() calls: blk_set_stacking_limits(&mddev->queue->limits); and so it ineffective. This can lead to errors from underlying devices. So move all the relevant settings out of create_stripe_zones() and into raid0_run(). As this can lead to a bug-on it is suitable for any -stable kernel which supports reshape to RAID0. So 2.6.35 or later. As the bug has been present for five years there is no urgency, so no need to rush into -stable. Fixes: 9af204cf ("md: Add support for Raid5->Raid0 and Raid10->Raid0 takeover") Reported-by: Yi Zhang <yizhan@redhat.com> Signed-off-by: NeilBrown <neilb@suse.com> [ luis: backported to 3.16: - raid0 isn't accessed from dm-raid so no conditional mddev->queue accesses (done with commit 753f2856 "md raid0: access mddev->queue (request queue member) conditionally because it is not set when accessed from dm-raid") - adjusted context ] Signed-off-by: Luis Henriques <luis.henriques@canonical.com>

md/raid0: update queue parameter in a safer location.
commit 199dc6ed upstream. When a (e.g.) RAID5 array is reshaped to RAID0, the updating of queue parameters (e.g. max number of sectors per bio) is done in the wrong place. It should be part of ->run, but it is actually part of ->takeover. This means it happens before level_store() calls: blk_set_stacking_limits(&mddev->queue->limits); and so it ineffective. This can lead to errors from underlying devices. So move all the relevant settings out of create_stripe_zones() and into raid0_run(). As this can lead to a bug-on it is suitable for any -stable kernel which supports reshape to RAID0. So 2.6.35 or later. As the bug has been present for five years there is no urgency, so no need to rush into -stable. Fixes: 9af204cf ("md: Add support for Raid5->Raid0 and Raid10->Raid0 takeover") Reported-by: Yi Zhang <yizhan@redhat.com> Signed-off-by: NeilBrown <neilb@suse.com> [ luis: backported to 3.16: - raid0 isn't accessed from dm-raid so no conditional mddev->queue accesses (done with commit 753f2856 "md raid0: access mddev->queue (request queue member) conditionally because it is not set when accessed from dm-raid") - adjusted context ] Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
f2cbb1bf · NeilBrown · Luis Henriques · 753cf4f4 · f2cbb1bf
Commit f2cbb1bf authored Aug 03, 2015 by NeilBrown Committed by Luis Henriques Dec 13, 2015
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 30 deletions

drivers/md/raid0.c drivers/md/raid0.c +36 -30

No files found.
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -88,7 +88,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 	char b[BDEVNAME_SIZE];
 	char b2[BDEVNAME_SIZE];
 	struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-	bool discard_supported = false;
+	unsigned short blksize = 512;

 	if (!conf)
 		return -ENOMEM;
@@ -103,6 +103,9 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 		sector_div(sectors, mddev->chunk_sectors);
 		rdev1->sectors = sectors * mddev->chunk_sectors;

+		blksize = max(blksize, queue_logical_block_size(
+				      rdev1->bdev->bd_disk->queue));
+
 		rdev_for_each(rdev2, mddev) {
 			pr_debug("md/raid0:%s:   comparing %s(%llu)"
 				 " with %s(%llu)\n",
@@ -139,6 +142,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 	}
 	pr_debug("md/raid0:%s: FINAL %d zones\n",
 		 mdname(mddev), conf->nr_strip_zones);
+	/*
+	 * now since we have the hard sector sizes, we can make sure
+	 * chunk size is a multiple of that sector size
+	 */
+	if ((mddev->chunk_sectors << 9) % blksize) {
+		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
+		       mdname(mddev),
+		       mddev->chunk_sectors << 9, blksize);
+		err = -EINVAL;
+		goto abort;
+	}
+
 	err = -ENOMEM;
 	conf->strip_zone = kzalloc(sizeof(struct strip_zone)*
 				conf->nr_strip_zones, GFP_KERNEL);
@@ -193,18 +208,12 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 		}
 		dev[j] = rdev1;

-		disk_stack_limits(mddev->gendisk, rdev1->bdev,
-				  rdev1->data_offset << 9);
-
 		if (rdev1->bdev->bd_disk->queue->merge_bvec_fn)
 			conf->has_merge_bvec = 1;

 		if (!smallest || (rdev1->sectors < smallest->sectors))
 			smallest = rdev1;
 		cnt++;
-
-		if (blk_queue_discard(bdev_get_queue(rdev1->bdev)))
-			discard_supported = true;
 	}
 	if (cnt != mddev->raid_disks) {
 		printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
@@ -267,26 +276,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
 	mddev->queue->backing_dev_info.congested_fn = raid0_congested;
 	mddev->queue->backing_dev_info.congested_data = mddev;

-	/*
-	 * now since we have the hard sector sizes, we can make sure
-	 * chunk size is a multiple of that sector size
-	 */
-	if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) {
-		printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n",
-		       mdname(mddev),
-		       mddev->chunk_sectors << 9);
-		goto abort;
-	}
-
-	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
-	blk_queue_io_opt(mddev->queue,
-			 (mddev->chunk_sectors << 9) * mddev->raid_disks);
-
-	if (!discard_supported)
-		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
-	else
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
-
 	pr_debug("md/raid0:%s: done.\n", mdname(mddev));
 	*private_conf = conf;

@@ -429,6 +418,8 @@ static int raid0_run(struct mddev *mddev)
 {
 	struct r0conf *conf;
 	int ret;
+	struct md_rdev *rdev;
+	bool discard_supported = false;

 	if (mddev->chunk_sectors == 0) {
 		printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
@@ -437,9 +428,6 @@ static int raid0_run(struct mddev *mddev)
 	}
 	if (md_check_no_bitmap(mddev))
 		return -EINVAL;
-	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
-	blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
-	blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);

 	/* if private is not null, we are here after takeover */
 	if (mddev->private == NULL) {
@@ -449,6 +437,24 @@ static int raid0_run(struct mddev *mddev)
 		mddev->private = conf;
 	}
 	conf = mddev->private;
+	rdev_for_each(rdev, mddev) {
+		disk_stack_limits(mddev->gendisk, rdev->bdev,
+				  rdev->data_offset << 9);
+		if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+			discard_supported = true;
+	}
+	blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
+	blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
+	blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
+
+	blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
+	blk_queue_io_opt(mddev->queue,
+			 (mddev->chunk_sectors << 9) * mddev->raid_disks);
+
+	if (!discard_supported)
+		queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
+	else
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);

 	/* calculate array device size */
 	md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));