Commit b3b25b1d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.16/dm-fixes-2' of...

Merge tag 'for-4.16/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm

Pull device mapper fixes from Mike Snitzer:

 - Fix an uninitialized variable false warning in dm bufio

 - Fix DM's passthrough ioctl support to be race free against an
   underlying device being removed.

 - Fix corner-case of DM raid resync reporting if/when the raid becomes
   degraded during resync; otherwise automated raid repair will fail.

 - A few DM multipath fixes to make non-SCSI optimizations, that were
   introduced during the 4.16 merge, useful for all non-SCSI devices,
   rather than narrowly define this non-SCSI mode in terms of "nvme".

   This allows the removal of "queue_mode nvme" that really didn't need
   to be introduced. Instead DM core will internalize whether
   nvme-specific IO submission optimizations are doable and DM multipath
   will only do SCSI-specific device handler operations if SCSI is in
   use.

* tag 'for-4.16/dm-fixes-2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm:
  dm table: allow upgrade from bio-based to specialized bio-based variant
  dm mpath: remove unnecessary NVMe branching in favor of scsi_dh checks
  dm table: fix "nvme" test
  dm raid: fix incorrect sync_ratio when degraded
  dm: use blkdev_get rather than bdgrab when issuing pass-through ioctl
  dm bufio: avoid false-positive Wmaybe-uninitialized warning
parents 2f64e70c c934edad
......@@ -386,9 +386,6 @@ static void __cache_size_refresh(void)
static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
enum data_mode *data_mode)
{
unsigned noio_flag;
void *ptr;
if (c->block_size <= DM_BUFIO_BLOCK_SIZE_SLAB_LIMIT) {
*data_mode = DATA_MODE_SLAB;
return kmem_cache_alloc(DM_BUFIO_CACHE(c), gfp_mask);
......@@ -412,16 +409,15 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
* all allocations done by this process (including pagetables) are done
* as if GFP_NOIO was specified.
*/
if (gfp_mask & __GFP_NORETRY) {
unsigned noio_flag = memalloc_noio_save();
void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
if (gfp_mask & __GFP_NORETRY)
noio_flag = memalloc_noio_save();
ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
if (gfp_mask & __GFP_NORETRY)
memalloc_noio_restore(noio_flag);
return ptr;
}
return ptr;
return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
}
/*
......
......@@ -22,6 +22,7 @@
#include <linux/time.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <scsi/scsi_device.h>
#include <scsi/scsi_dh.h>
#include <linux/atomic.h>
#include <linux/blk-mq.h>
......@@ -211,25 +212,13 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
else
m->queue_mode = DM_TYPE_REQUEST_BASED;
} else if (m->queue_mode == DM_TYPE_BIO_BASED ||
m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
if (m->queue_mode == DM_TYPE_BIO_BASED) {
/*
* bio-based doesn't support any direct scsi_dh management;
* it just discovers if a scsi_dh is attached.
*/
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
}
}
if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
set_bit(MPATHF_QUEUE_IO, &m->flags);
atomic_set(&m->pg_init_in_progress, 0);
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
/*
* bio-based doesn't support any direct scsi_dh management;
* it just discovers if a scsi_dh is attached.
*/
set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags);
}
dm_table_set_type(ti->table, m->queue_mode);
......@@ -337,14 +326,12 @@ static void __switch_pg(struct multipath *m, struct priority_group *pg)
{
m->current_pg = pg;
if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
return;
/* Must we initialise the PG first, and queue I/O till it's ready? */
if (m->hw_handler_name) {
set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
set_bit(MPATHF_QUEUE_IO, &m->flags);
} else {
/* FIXME: not needed if no scsi_dh is attached */
clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags);
clear_bit(MPATHF_QUEUE_IO, &m->flags);
}
......@@ -385,8 +372,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
unsigned bypassed = 1;
if (!atomic_read(&m->nr_valid_paths)) {
if (m->queue_mode != DM_TYPE_NVME_BIO_BASED)
clear_bit(MPATHF_QUEUE_IO, &m->flags);
clear_bit(MPATHF_QUEUE_IO, &m->flags);
goto failed;
}
......@@ -599,7 +585,7 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
return pgpath;
}
static struct pgpath *__map_bio_nvme(struct multipath *m, struct bio *bio)
static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
unsigned long flags;
......@@ -634,8 +620,8 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio,
{
struct pgpath *pgpath;
if (m->queue_mode == DM_TYPE_NVME_BIO_BASED)
pgpath = __map_bio_nvme(m, bio);
if (!m->hw_handler_name)
pgpath = __map_bio_fast(m, bio);
else
pgpath = __map_bio(m, bio);
......@@ -675,8 +661,7 @@ static void process_queued_io_list(struct multipath *m)
{
if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
else if (m->queue_mode == DM_TYPE_BIO_BASED ||
m->queue_mode == DM_TYPE_NVME_BIO_BASED)
else if (m->queue_mode == DM_TYPE_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
}
......@@ -838,6 +823,16 @@ static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, char **
*/
kfree(m->hw_handler_name);
m->hw_handler_name = attached_handler_name;
/*
* Init fields that are only used when a scsi_dh is attached
*/
if (!test_and_set_bit(MPATHF_QUEUE_IO, &m->flags)) {
atomic_set(&m->pg_init_in_progress, 0);
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
}
}
}
......@@ -873,6 +868,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
int r;
struct pgpath *p;
struct multipath *m = ti->private;
struct scsi_device *sdev;
/* we need at least a path arg */
if (as->argc < 1) {
......@@ -891,7 +887,9 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
goto bad;
}
if (m->queue_mode != DM_TYPE_NVME_BIO_BASED) {
sdev = scsi_device_from_queue(bdev_get_queue(p->path.dev->bdev));
if (sdev) {
put_device(&sdev->sdev_gendev);
INIT_DELAYED_WORK(&p->activate_path, activate_path_work);
r = setup_scsi_dh(p->path.dev->bdev, m, &ti->error);
if (r) {
......@@ -1001,8 +999,7 @@ static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m)
if (!hw_argc)
return 0;
if (m->queue_mode == DM_TYPE_BIO_BASED ||
m->queue_mode == DM_TYPE_NVME_BIO_BASED) {
if (m->queue_mode == DM_TYPE_BIO_BASED) {
dm_consume_args(as, hw_argc);
DMERR("bio-based multipath doesn't allow hardware handler args");
return 0;
......@@ -1091,8 +1088,6 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m)
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "nvme"))
m->queue_mode = DM_TYPE_NVME_BIO_BASED;
else if (!strcasecmp(queue_mode_name, "rq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
else if (!strcasecmp(queue_mode_name, "mq"))
......@@ -1193,7 +1188,7 @@ static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv)
ti->num_discard_bios = 1;
ti->num_write_same_bios = 1;
ti->num_write_zeroes_bios = 1;
if (m->queue_mode == DM_TYPE_BIO_BASED || m->queue_mode == DM_TYPE_NVME_BIO_BASED)
if (m->queue_mode == DM_TYPE_BIO_BASED)
ti->per_io_data_size = multipath_per_bio_data_size();
else
ti->per_io_data_size = sizeof(struct dm_mpath_io);
......@@ -1730,9 +1725,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
case DM_TYPE_NVME_BIO_BASED:
DMEMIT("queue_mode nvme ");
break;
case DM_TYPE_MQ_REQUEST_BASED:
DMEMIT("queue_mode mq ");
break;
......
......@@ -3408,9 +3408,10 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery,
set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags);
} else {
if (test_bit(MD_RECOVERY_NEEDED, &recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
test_bit(MD_RECOVERY_RUNNING, &recovery))
if (!test_bit(MD_RECOVERY_INTR, &recovery) &&
(test_bit(MD_RECOVERY_NEEDED, &recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &recovery) ||
test_bit(MD_RECOVERY_RUNNING, &recovery)))
r = mddev->curr_resync_completed;
else
r = mddev->recovery_cp;
......
......@@ -942,17 +942,12 @@ static int dm_table_determine_type(struct dm_table *t)
if (t->type != DM_TYPE_NONE) {
/* target already set the table's type */
if (t->type == DM_TYPE_BIO_BASED)
return 0;
else if (t->type == DM_TYPE_NVME_BIO_BASED) {
if (!dm_table_does_not_support_partial_completion(t)) {
DMERR("nvme bio-based is only possible with devices"
" that don't support partial completion");
return -EINVAL;
}
/* Fallthru, also verify all devices are blk-mq */
if (t->type == DM_TYPE_BIO_BASED) {
/* possibly upgrade to a variant of bio-based */
goto verify_bio_based;
}
BUG_ON(t->type == DM_TYPE_DAX_BIO_BASED);
BUG_ON(t->type == DM_TYPE_NVME_BIO_BASED);
goto verify_rq_based;
}
......@@ -985,6 +980,7 @@ static int dm_table_determine_type(struct dm_table *t)
}
if (bio_based) {
verify_bio_based:
/* We must use this table as bio-based */
t->type = DM_TYPE_BIO_BASED;
if (dm_table_supports_dax(t) ||
......@@ -1755,7 +1751,7 @@ static int device_no_partial_completion(struct dm_target *ti, struct dm_dev *dev
char b[BDEVNAME_SIZE];
/* For now, NVMe devices are the only devices of this class */
return (strncmp(bdevname(dev->bdev, b), "nvme", 3) == 0);
return (strncmp(bdevname(dev->bdev, b), "nvme", 4) == 0);
}
static bool dm_table_does_not_support_partial_completion(struct dm_table *t)
......
......@@ -458,9 +458,11 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return dm_get_geometry(md, geo);
}
static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
struct block_device **bdev,
fmode_t *mode)
static char *_dm_claim_ptr = "I belong to device-mapper";
static int dm_get_bdev_for_ioctl(struct mapped_device *md,
struct block_device **bdev,
fmode_t *mode)
{
struct dm_target *tgt;
struct dm_table *map;
......@@ -490,6 +492,10 @@ static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
goto out;
bdgrab(*bdev);
r = blkdev_get(*bdev, *mode, _dm_claim_ptr);
if (r < 0)
goto out;
dm_put_live_table(md, srcu_idx);
return r;
......@@ -508,7 +514,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
struct mapped_device *md = bdev->bd_disk->private_data;
int r;
r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
......@@ -528,7 +534,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
r = __blkdev_driver_ioctl(bdev, mode, cmd, arg);
out:
bdput(bdev);
blkdev_put(bdev, mode);
return r;
}
......@@ -708,14 +714,13 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(td->dm_dev.bdev);
bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _dm_claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
......@@ -3011,7 +3016,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
fmode_t mode;
int r;
r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
......@@ -3021,7 +3026,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
else
r = -EOPNOTSUPP;
bdput(bdev);
blkdev_put(bdev, mode);
return r;
}
......@@ -3032,7 +3037,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
fmode_t mode;
int r;
r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
......@@ -3042,7 +3047,7 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
else
r = -EOPNOTSUPP;
bdput(bdev);
blkdev_put(bdev, mode);
return r;
}
......@@ -3054,7 +3059,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
fmode_t mode;
int r;
r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
......@@ -3064,7 +3069,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
else
r = -EOPNOTSUPP;
bdput(bdev);
blkdev_put(bdev, mode);
return r;
}
......@@ -3075,7 +3080,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
fmode_t mode;
int r;
r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
r = dm_get_bdev_for_ioctl(md, &bdev, &mode);
if (r < 0)
return r;
......@@ -3085,7 +3090,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
else
r = -EOPNOTSUPP;
bdput(bdev);
blkdev_put(bdev, mode);
return r;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment