xen/blkback: Use physical sector size for setup

Currently xen-blkback passes the logical sector size over xenbus and xen-blkfront sets up the paravirt disk with that logical block size. But newer drives usually have the logical sector size set to 512 for compatibility reasons and would show the actual sector size only in physical sector size. This results in the device being partitioned and accessed in dom0 with the correct sector size, but the guest thinks 512 bytes is the correct block size. And that results in poor performance. To fix this, blkback gets modified to pass also physical-sector-size over xenbus and blkfront to use both values to set up the paravirt disk. I did not just change the passed in sector-size because I am not sure having a bigger logical sector size than the physical one is valid (and that would happen if a newer dom0 kernel hits an older domU kernel). Also this way a domU set up before should still be accessible (just some tools might detect the unaligned setup). [v2: Make xenbus write failure non-fatal] [v3: Use xenbus_scanf instead of xenbus_gather] [v4: Rebased against segment changes] Signed-off-by: Stefan Bader <stefan.bader@canonical.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

xen/blkback: Use physical sector size for setup
Currently xen-blkback passes the logical sector size over xenbus and xen-blkfront sets up the paravirt disk with that logical block size. But newer drives usually have the logical sector size set to 512 for compatibility reasons and would show the actual sector size only in physical sector size. This results in the device being partitioned and accessed in dom0 with the correct sector size, but the guest thinks 512 bytes is the correct block size. And that results in poor performance. To fix this, blkback gets modified to pass also physical-sector-size over xenbus and blkfront to use both values to set up the paravirt disk. I did not just change the passed in sector-size because I am not sure having a bigger logical sector size than the physical one is valid (and that would happen if a newer dom0 kernel hits an older domU kernel). Also this way a domU set up before should still be accessible (just some tools might detect the unaligned setup). [v2: Make xenbus write failure non-fatal] [v3: Use xenbus_scanf instead of xenbus_gather] [v4: Rebased against segment changes] Signed-off-by: Stefan Bader <stefan.bader@canonical.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
7c4d7d71 · Stefan Bader · Konrad Rzeszutek Wilk · 1d199650 · 7c4d7d71 · 7c4d7d71
Commit 7c4d7d71 authored May 13, 2013 by Stefan Bader Committed by Konrad Rzeszutek Wilk Jun 07, 2013
Hide whitespace changes
Inline Side-by-side

Showing with 23 additions and 3 deletions

drivers/block/xen-blkback/xenbus.c drivers/block/xen-blkback/xenbus.c +5 -0

drivers/block/xen-blkfront.c drivers/block/xen-blkfront.c +18 -3

No files found.
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -782,6 +782,11 @@ static void connect(struct backend_info *be)
 				 dev->nodename);
 		goto abort;
 	}
+	err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
+			    bdev_physical_block_size(be->blkif->vbd.bdev));
+	if (err)
+		xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
+				 dev->nodename);

 	err = xenbus_transaction_end(xbt, 0);
 	if (err == -EAGAIN)

--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -609,6 +609,7 @@ static void do_blkif_request(struct request_queue *rq)
 }

 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
+				unsigned int physical_sector_size,
 				unsigned int segments)
 {
 	struct request_queue *rq;
@@ -631,6 +632,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,

 	/* Hard sector size and max sectors impersonate the equiv. hardware. */
 	blk_queue_logical_block_size(rq, sector_size);
+	blk_queue_physical_block_size(rq, physical_sector_size);
 	blk_queue_max_hw_sectors(rq, 512);

 	/* Each segment in a request is up to an aligned page in size. */
@@ -737,7 +739,8 @@ static char *encode_disk_name(char *ptr, unsigned int n)

 static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 			       struct blkfront_info *info,
-			       u16 vdisk_info, u16 sector_size)
+			       u16 vdisk_info, u16 sector_size,
+			       unsigned int physical_sector_size)
 {
 	struct gendisk *gd;
 	int nr_minors = 1;
@@ -804,7 +807,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
 	gd->driverfs_dev = &(info->xbdev->dev);
 	set_capacity(gd, capacity);

-	if (xlvbd_init_blk_queue(gd, sector_size,
+	if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size,
 				 info->max_indirect_segments ? :
 				 BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
 		del_gendisk(gd);
@@ -1698,6 +1701,7 @@ static void blkfront_connect(struct blkfront_info *info)
 {
 	unsigned long long sectors;
 	unsigned long sector_size;
+	unsigned int physical_sector_size;
 	unsigned int binfo;
 	int err;
 	int barrier, flush, discard, persistent;
@@ -1747,6 +1751,16 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}

+	/*
+	 * physcial-sector-size is a newer field, so old backends may not
+	 * provide this. Assume physical sector size to be the same as
+	 * sector_size in that case.
+	 */
+	err = xenbus_scanf(XBT_NIL, info->xbdev->otherend,
+			   "physical-sector-size", "%u", &physical_sector_size);
+	if (err != 1)
+		physical_sector_size = sector_size;
+
 	info->feature_flush = 0;
 	info->flush_op = 0;

@@ -1800,7 +1814,8 @@ static void blkfront_connect(struct blkfront_info *info)
 		return;
 	}

-	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
+	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size,
+				  physical_sector_size);
 	if (err) {
 		xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
 				 info->xbdev->otherend);