Commit 86f1152b authored by Benjamin Marzinski's avatar Benjamin Marzinski Committed by Mike Snitzer

dm: allow active and inactive tables to share dm_devs

Until this change, when loading a new DM table, DM core would re-open
all of the devices in the DM table.  Now, DM core will avoid redundant
device opens (and closes when destroying the old table) if the old
table already has a device open using the same mode.  This is achieved
by managing reference counts on the table_devices that DM core now
stores in the mapped_device structure (rather than in the dm_table
structure).  So a mapped_device's active and inactive dm_tables' dm_dev
lists now just point to the dm_devs stored in the mapped_device's
table_devices list.

This improvement in DM core's device reference counting has the
side-effect of fixing a long-standing limitation of the multipath
target: a DM multipath table couldn't include any paths that were unusable
(failed).  For example: if all paths have failed and you add a new,
working, path to the table; you can't use it since the table load would
fail due to it still containing failed paths.  Now a re-load of a
multipath table can include failed devices and when those devices become
active again they can be used instantly.

The device list code in dm.c isn't a straight copy/paste from the code in
dm-table.c, but it's very close (aside from some variable renames).  One
subtle difference is that find_table_device for the tables_devices list
will only match devices with the same name and mode.  This is because we
don't want to upgrade a device's mode in the active table when an
inactive table is loaded.

Access to the mapped_device structure's tables_devices list requires a
mutex (tables_devices_lock), so that tables cannot be created and
destroyed concurrently.
Signed-off-by: default avatarBenjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: default avatarMike Snitzer <snitzer@redhat.com>
parent 1f271972
...@@ -1418,7 +1418,7 @@ static void retrieve_deps(struct dm_table *table, ...@@ -1418,7 +1418,7 @@ static void retrieve_deps(struct dm_table *table,
deps->count = count; deps->count = count;
count = 0; count = 0;
list_for_each_entry (dd, dm_table_get_devices(table), list) list_for_each_entry (dd, dm_table_get_devices(table), list)
deps->dev[count++] = huge_encode_dev(dd->dm_dev.bdev->bd_dev); deps->dev[count++] = huge_encode_dev(dd->dm_dev->bdev->bd_dev);
param->data_size = param->data_start + needed; param->data_size = param->data_start + needed;
} }
......
...@@ -210,15 +210,16 @@ int dm_table_create(struct dm_table **result, fmode_t mode, ...@@ -210,15 +210,16 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
return 0; return 0;
} }
static void free_devices(struct list_head *devices) static void free_devices(struct list_head *devices, struct mapped_device *md)
{ {
struct list_head *tmp, *next; struct list_head *tmp, *next;
list_for_each_safe(tmp, next, devices) { list_for_each_safe(tmp, next, devices) {
struct dm_dev_internal *dd = struct dm_dev_internal *dd =
list_entry(tmp, struct dm_dev_internal, list); list_entry(tmp, struct dm_dev_internal, list);
DMWARN("dm_table_destroy: dm_put_device call missing for %s", DMWARN("%s: dm_table_destroy: dm_put_device call missing for %s",
dd->dm_dev.name); dm_device_name(md), dd->dm_dev->name);
dm_put_table_device(md, dd->dm_dev);
kfree(dd); kfree(dd);
} }
} }
...@@ -247,7 +248,7 @@ void dm_table_destroy(struct dm_table *t) ...@@ -247,7 +248,7 @@ void dm_table_destroy(struct dm_table *t)
vfree(t->highs); vfree(t->highs);
/* free the device list */ /* free the device list */
free_devices(&t->devices); free_devices(&t->devices, t->md);
dm_free_md_mempools(t->mempools); dm_free_md_mempools(t->mempools);
...@@ -262,52 +263,12 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) ...@@ -262,52 +263,12 @@ static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev)
struct dm_dev_internal *dd; struct dm_dev_internal *dd;
list_for_each_entry (dd, l, list) list_for_each_entry (dd, l, list)
if (dd->dm_dev.bdev->bd_dev == dev) if (dd->dm_dev->bdev->bd_dev == dev)
return dd; return dd;
return NULL; return NULL;
} }
/*
* Open a device so we can use it as a map destination.
*/
static int open_dev(struct dm_dev_internal *d, dev_t dev,
struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(d->dm_dev.bdev);
bdev = blkdev_get_by_dev(dev, d->dm_dev.mode | FMODE_EXCL, _claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
r = bd_link_disk_holder(bdev, dm_disk(md));
if (r) {
blkdev_put(bdev, d->dm_dev.mode | FMODE_EXCL);
return r;
}
d->dm_dev.bdev = bdev;
return 0;
}
/*
* Close a device that we've been using.
*/
static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
{
if (!d->dm_dev.bdev)
return;
bd_unlink_disk_holder(d->dm_dev.bdev, dm_disk(md));
blkdev_put(d->dm_dev.bdev, d->dm_dev.mode | FMODE_EXCL);
d->dm_dev.bdev = NULL;
}
/* /*
* If possible, this checks an area of a destination device is invalid. * If possible, this checks an area of a destination device is invalid.
*/ */
...@@ -386,19 +347,17 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, ...@@ -386,19 +347,17 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
struct mapped_device *md) struct mapped_device *md)
{ {
int r; int r;
struct dm_dev_internal dd_new, dd_old; struct dm_dev *old_dev, *new_dev;
dd_new = dd_old = *dd; old_dev = dd->dm_dev;
dd_new.dm_dev.mode |= new_mode; r = dm_get_table_device(md, dd->dm_dev->bdev->bd_dev,
dd_new.dm_dev.bdev = NULL; dd->dm_dev->mode | new_mode, &new_dev);
r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
if (r) if (r)
return r; return r;
dd->dm_dev.mode |= new_mode; dd->dm_dev = new_dev;
close_dev(&dd_old, md); dm_put_table_device(md, old_dev);
return 0; return 0;
} }
...@@ -440,27 +399,22 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, ...@@ -440,27 +399,22 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
if (!dd) if (!dd)
return -ENOMEM; return -ENOMEM;
dd->dm_dev.mode = mode; if ((r = dm_get_table_device(t->md, dev, mode, &dd->dm_dev))) {
dd->dm_dev.bdev = NULL;
if ((r = open_dev(dd, dev, t->md))) {
kfree(dd); kfree(dd);
return r; return r;
} }
format_dev_t(dd->dm_dev.name, dev);
atomic_set(&dd->count, 0); atomic_set(&dd->count, 0);
list_add(&dd->list, &t->devices); list_add(&dd->list, &t->devices);
} else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { } else if (dd->dm_dev->mode != (mode | dd->dm_dev->mode)) {
r = upgrade_mode(dd, mode, t->md); r = upgrade_mode(dd, mode, t->md);
if (r) if (r)
return r; return r;
} }
atomic_inc(&dd->count); atomic_inc(&dd->count);
*result = &dd->dm_dev; *result = dd->dm_dev;
return 0; return 0;
} }
EXPORT_SYMBOL(dm_get_device); EXPORT_SYMBOL(dm_get_device);
...@@ -505,11 +459,23 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, ...@@ -505,11 +459,23 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
*/ */
void dm_put_device(struct dm_target *ti, struct dm_dev *d) void dm_put_device(struct dm_target *ti, struct dm_dev *d)
{ {
struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, int found = 0;
dm_dev); struct list_head *devices = &ti->table->devices;
struct dm_dev_internal *dd;
list_for_each_entry(dd, devices, list) {
if (dd->dm_dev == d) {
found = 1;
break;
}
}
if (!found) {
DMWARN("%s: device %s not in table devices list",
dm_device_name(ti->table->md), d->name);
return;
}
if (atomic_dec_and_test(&dd->count)) { if (atomic_dec_and_test(&dd->count)) {
close_dev(dd, ti->table->md); dm_put_table_device(ti->table->md, d);
list_del(&dd->list); list_del(&dd->list);
kfree(dd); kfree(dd);
} }
...@@ -906,7 +872,7 @@ static int dm_table_set_type(struct dm_table *t) ...@@ -906,7 +872,7 @@ static int dm_table_set_type(struct dm_table *t)
/* Non-request-stackable devices can't be used for request-based dm */ /* Non-request-stackable devices can't be used for request-based dm */
devices = dm_table_get_devices(t); devices = dm_table_get_devices(t);
list_for_each_entry(dd, devices, list) { list_for_each_entry(dd, devices, list) {
if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev.bdev))) { if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) {
DMWARN("table load rejected: including" DMWARN("table load rejected: including"
" non-request-stackable devices"); " non-request-stackable devices");
return -EINVAL; return -EINVAL;
...@@ -1043,7 +1009,7 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t, ...@@ -1043,7 +1009,7 @@ static struct gendisk * dm_table_get_integrity_disk(struct dm_table *t,
struct gendisk *prev_disk = NULL, *template_disk = NULL; struct gendisk *prev_disk = NULL, *template_disk = NULL;
list_for_each_entry(dd, devices, list) { list_for_each_entry(dd, devices, list) {
template_disk = dd->dm_dev.bdev->bd_disk; template_disk = dd->dm_dev->bdev->bd_disk;
if (!blk_get_integrity(template_disk)) if (!blk_get_integrity(template_disk))
goto no_integrity; goto no_integrity;
if (!match_all && !blk_integrity_is_initialized(template_disk)) if (!match_all && !blk_integrity_is_initialized(template_disk))
...@@ -1629,7 +1595,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) ...@@ -1629,7 +1595,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
int r = 0; int r = 0;
list_for_each_entry(dd, devices, list) { list_for_each_entry(dd, devices, list) {
struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev);
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
if (likely(q)) if (likely(q))
...@@ -1637,7 +1603,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) ...@@ -1637,7 +1603,7 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
else else
DMWARN_LIMIT("%s: any_congested: nonexistent device %s", DMWARN_LIMIT("%s: any_congested: nonexistent device %s",
dm_device_name(t->md), dm_device_name(t->md),
bdevname(dd->dm_dev.bdev, b)); bdevname(dd->dm_dev->bdev, b));
} }
list_for_each_entry(cb, &t->target_callbacks, list) list_for_each_entry(cb, &t->target_callbacks, list)
......
...@@ -142,6 +142,9 @@ struct mapped_device { ...@@ -142,6 +142,9 @@ struct mapped_device {
*/ */
struct dm_table *map; struct dm_table *map;
struct list_head table_devices;
struct mutex table_devices_lock;
unsigned long flags; unsigned long flags;
struct request_queue *queue; struct request_queue *queue;
...@@ -212,6 +215,12 @@ struct dm_md_mempools { ...@@ -212,6 +215,12 @@ struct dm_md_mempools {
struct bio_set *bs; struct bio_set *bs;
}; };
struct table_device {
struct list_head list;
atomic_t count;
struct dm_dev dm_dev;
};
#define RESERVED_BIO_BASED_IOS 16 #define RESERVED_BIO_BASED_IOS 16
#define RESERVED_REQUEST_BASED_IOS 256 #define RESERVED_REQUEST_BASED_IOS 256
#define RESERVED_MAX_IOS 1024 #define RESERVED_MAX_IOS 1024
...@@ -669,6 +678,120 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU) ...@@ -669,6 +678,120 @@ static void dm_put_live_table_fast(struct mapped_device *md) __releases(RCU)
rcu_read_unlock(); rcu_read_unlock();
} }
/*
* Open a table device so we can use it as a map destination.
*/
static int open_table_device(struct table_device *td, dev_t dev,
struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
int r;
BUG_ON(td->dm_dev.bdev);
bdev = blkdev_get_by_dev(dev, td->dm_dev.mode | FMODE_EXCL, _claim_ptr);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
r = bd_link_disk_holder(bdev, dm_disk(md));
if (r) {
blkdev_put(bdev, td->dm_dev.mode | FMODE_EXCL);
return r;
}
td->dm_dev.bdev = bdev;
return 0;
}
/*
* Close a table device that we've been using.
*/
static void close_table_device(struct table_device *td, struct mapped_device *md)
{
if (!td->dm_dev.bdev)
return;
bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md));
blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL);
td->dm_dev.bdev = NULL;
}
static struct table_device *find_table_device(struct list_head *l, dev_t dev,
fmode_t mode) {
struct table_device *td;
list_for_each_entry(td, l, list)
if (td->dm_dev.bdev->bd_dev == dev && td->dm_dev.mode == mode)
return td;
return NULL;
}
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
struct dm_dev **result) {
int r;
struct table_device *td;
mutex_lock(&md->table_devices_lock);
td = find_table_device(&md->table_devices, dev, mode);
if (!td) {
td = kmalloc(sizeof(*td), GFP_KERNEL);
if (!td) {
mutex_unlock(&md->table_devices_lock);
return -ENOMEM;
}
td->dm_dev.mode = mode;
td->dm_dev.bdev = NULL;
if ((r = open_table_device(td, dev, md))) {
mutex_unlock(&md->table_devices_lock);
kfree(td);
return r;
}
format_dev_t(td->dm_dev.name, dev);
atomic_set(&td->count, 0);
list_add(&td->list, &md->table_devices);
}
atomic_inc(&td->count);
mutex_unlock(&md->table_devices_lock);
*result = &td->dm_dev;
return 0;
}
EXPORT_SYMBOL_GPL(dm_get_table_device);
void dm_put_table_device(struct mapped_device *md, struct dm_dev *d)
{
struct table_device *td = container_of(d, struct table_device, dm_dev);
mutex_lock(&md->table_devices_lock);
if (atomic_dec_and_test(&td->count)) {
close_table_device(td, md);
list_del(&td->list);
kfree(td);
}
mutex_unlock(&md->table_devices_lock);
}
EXPORT_SYMBOL(dm_put_table_device);
static void free_table_devices(struct list_head *devices)
{
struct list_head *tmp, *next;
list_for_each_safe(tmp, next, devices) {
struct table_device *td = list_entry(tmp, struct table_device, list);
DMWARN("dm_destroy: %s still exists with %d references",
td->dm_dev.name, atomic_read(&td->count));
kfree(td);
}
}
/* /*
* Get the geometry associated with a dm device * Get the geometry associated with a dm device
*/ */
...@@ -1944,12 +2067,14 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1944,12 +2067,14 @@ static struct mapped_device *alloc_dev(int minor)
md->type = DM_TYPE_NONE; md->type = DM_TYPE_NONE;
mutex_init(&md->suspend_lock); mutex_init(&md->suspend_lock);
mutex_init(&md->type_lock); mutex_init(&md->type_lock);
mutex_init(&md->table_devices_lock);
spin_lock_init(&md->deferred_lock); spin_lock_init(&md->deferred_lock);
atomic_set(&md->holders, 1); atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0); atomic_set(&md->open_count, 0);
atomic_set(&md->event_nr, 0); atomic_set(&md->event_nr, 0);
atomic_set(&md->uevent_seq, 0); atomic_set(&md->uevent_seq, 0);
INIT_LIST_HEAD(&md->uevent_list); INIT_LIST_HEAD(&md->uevent_list);
INIT_LIST_HEAD(&md->table_devices);
spin_lock_init(&md->uevent_lock); spin_lock_init(&md->uevent_lock);
md->queue = blk_alloc_queue(GFP_KERNEL); md->queue = blk_alloc_queue(GFP_KERNEL);
...@@ -2035,6 +2160,7 @@ static void free_dev(struct mapped_device *md) ...@@ -2035,6 +2160,7 @@ static void free_dev(struct mapped_device *md)
blk_integrity_unregister(md->disk); blk_integrity_unregister(md->disk);
del_gendisk(md->disk); del_gendisk(md->disk);
cleanup_srcu_struct(&md->io_barrier); cleanup_srcu_struct(&md->io_barrier);
free_table_devices(&md->table_devices);
free_minor(minor); free_minor(minor);
spin_lock(&_minor_lock); spin_lock(&_minor_lock);
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
struct dm_dev_internal { struct dm_dev_internal {
struct list_head list; struct list_head list;
atomic_t count; atomic_t count;
struct dm_dev dm_dev; struct dm_dev *dm_dev;
}; };
struct dm_table; struct dm_table;
...@@ -188,6 +188,9 @@ int dm_cancel_deferred_remove(struct mapped_device *md); ...@@ -188,6 +188,9 @@ int dm_cancel_deferred_remove(struct mapped_device *md);
int dm_request_based(struct mapped_device *md); int dm_request_based(struct mapped_device *md);
sector_t dm_get_size(struct mapped_device *md); sector_t dm_get_size(struct mapped_device *md);
struct request_queue *dm_get_md_queue(struct mapped_device *md); struct request_queue *dm_get_md_queue(struct mapped_device *md);
int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
struct dm_dev **result);
void dm_put_table_device(struct mapped_device *md, struct dm_dev *d);
struct dm_stats *dm_get_stats(struct mapped_device *md); struct dm_stats *dm_get_stats(struct mapped_device *md);
int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action,
......
...@@ -267,9 +267,9 @@ enum { ...@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4 #define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 27 #define DM_VERSION_MINOR 28
#define DM_VERSION_PATCHLEVEL 0 #define DM_VERSION_PATCHLEVEL 0
#define DM_VERSION_EXTRA "-ioctl (2013-10-30)" #define DM_VERSION_EXTRA "-ioctl (2014-09-17)"
/* Status bits */ /* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment