Commit e83d776f authored by Keith Busch's avatar Keith Busch Committed by Sagi Grimberg

nvme: only use power of two io boundaries

The kernel requires a power of two for boundaries because that's the
only way it can efficiently split commands that cross them. A
controller, however, may report a non-power of two boundary.

The driver had been rounding the controller's value to one the kernel
can use, but splitting on the wrong boundary provides no benefit on the
device side, and incurs additional submission overhead from non-optimal
splits.

Don't provide any boundary hint if the controller's value can't be used
and log a warning when first scanning a disk's unreported IO boundary.
Since the chunk sector logic has grown, move it to a separate function.

Cc: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: default avatarKeith Busch <kbusch@kernel.org>
Reviewed-by: default avatarMartin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: default avatarSagi Grimberg <sagi@grimberg.me>
parent 192f6c29
...@@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk, ...@@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue); blk_mq_unfreeze_queue(disk->queue);
} }
static inline bool nvme_first_scan(struct gendisk *disk)
{
/* nvme_alloc_ns() scans the disk prior to adding it */
return !(disk->flags & GENHD_FL_UP);
}
static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob;
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
if (!iob)
return;
if (!is_power_of_2(iob)) {
if (nvme_first_scan(ns->disk))
pr_warn("%s: ignoring unaligned IO boundary:%u\n",
ns->disk->disk_name, iob);
return;
}
if (blk_queue_is_zoned(ns->disk->queue)) {
if (nvme_first_scan(ns->disk))
pr_warn("%s: ignoring zoned namespace IO boundary\n",
ns->disk->disk_name);
return;
}
blk_queue_chunk_sectors(ns->queue, iob);
}
static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{ {
unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK; unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data; struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_ctrl *ctrl = ns->ctrl;
int ret; int ret;
u32 iob;
/* /*
* If identify namespace failed, use default 512 byte block size so * If identify namespace failed, use default 512 byte block size so
...@@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ...@@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
return -ENODEV; return -ENODEV;
} }
if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
ns->features = 0; ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms); ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple size */ /* the PI implementation requires metadata equal t10 pi tuple size */
...@@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ...@@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
} }
} }
if (iob && !blk_queue_is_zoned(ns->queue)) nvme_set_chunk_sectors(ns, id);
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_update_disk_info(disk, ns, id); nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk) { if (ns->head->disk) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment