Commit f6bcfd94 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (32 commits)
  dm: raid456 basic support
  dm: per target unplug callback support
  dm: introduce target callbacks and congestion callback
  dm mpath: delay activate_path retry on SCSI_DH_RETRY
  dm: remove superfluous irq disablement in dm_request_fn
  dm log: use PTR_ERR value instead of ENOMEM
  dm snapshot: avoid storing private suspended state
  dm snapshot: persistent make metadata_wq multithreaded
  dm: use non reentrant workqueues if equivalent
  dm: convert workqueues to alloc_ordered
  dm stripe: switch from local workqueue to system_wq
  dm: dont use flush_scheduled_work
  dm snapshot: remove unused dm_snapshot queued_bios_work
  dm ioctl: suppress needless warning messages
  dm crypt: add loop aes iv generator
  dm crypt: add multi key capability
  dm crypt: add post iv call to iv generator
  dm crypt: use io thread for reads only if mempool exhausted
  dm crypt: scale to multiple cpus
  dm crypt: simplify compatible table output
  ...
parents 509e4aef 9d09e663
...@@ -8,7 +8,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset> ...@@ -8,7 +8,7 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
<cipher> <cipher>
Encryption cipher and an optional IV generation mode. Encryption cipher and an optional IV generation mode.
(In format cipher-chainmode-ivopts:ivmode). (In format cipher[:keycount]-chainmode-ivopts:ivmode).
Examples: Examples:
des des
aes-cbc-essiv:sha256 aes-cbc-essiv:sha256
...@@ -20,6 +20,11 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset> ...@@ -20,6 +20,11 @@ Parameters: <cipher> <key> <iv_offset> <device path> <offset>
Key used for encryption. It is encoded as a hexadecimal number. Key used for encryption. It is encoded as a hexadecimal number.
You can only use key sizes that are valid for the selected cipher. You can only use key sizes that are valid for the selected cipher.
<keycount>
Multi-key compatibility mode. You can define <keycount> keys and
then sectors are encrypted according to their offsets (sector 0 uses key0;
sector 1 uses key1 etc.). <keycount> must be a power of two.
<iv_offset> <iv_offset>
The IV offset is a sector count that is added to the sector number The IV offset is a sector count that is added to the sector number
before creating the IV. before creating the IV.
......
Device-mapper RAID (dm-raid) is a bridge from DM to MD. It
provides a way to use device-mapper interfaces to access the MD RAID
drivers.
As with all device-mapper targets, the nominal public interfaces are the
constructor (CTR) tables and the status outputs (both STATUSTYPE_INFO
and STATUSTYPE_TABLE). The CTR table looks like the following:
1: <s> <l> raid \
2: <raid_type> <#raid_params> <raid_params> \
3: <#raid_devs> <meta_dev1> <dev1> .. <meta_devN> <devN>
Line 1 contains the standard first three arguments to any device-mapper
target - the start, length, and target type fields. The target type in
this case is "raid".
Line 2 contains the arguments that define the particular raid
type/personality/level, the required arguments for that raid type, and
any optional arguments. Possible raid types include: raid4, raid5_la,
raid5_ls, raid5_rs, raid6_zr, raid6_nr, and raid6_nc. (raid1 is
planned for the future.) The list of required and optional parameters
is the same for all the current raid types. The required parameters are
positional, while the optional parameters are given as key/value pairs.
The possible parameters are as follows:
<chunk_size> Chunk size in sectors.
[[no]sync] Force/Prevent RAID initialization
[rebuild <idx>] Rebuild the drive indicated by the index
[daemon_sleep <ms>] Time between bitmap daemon work to clear bits
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
[max_write_behind <sectors>] See '-write-behind=' (man mdadm)
[stripe_cache <sectors>] Stripe cache size for higher RAIDs
Line 3 contains the list of devices that compose the array in
metadata/data device pairs. If the metadata is stored separately, a '-'
is given for the metadata device position. If a drive has failed or is
missing at creation time, a '-' can be given for both the metadata and
data drives for a given position.
NB. Currently all metadata devices must be specified as '-'.
Examples:
# RAID4 - 4 data drives, 1 parity
# No metadata devices specified to hold superblock/bitmap info
# Chunk size of 1MiB
# (Lines separated for easy reading)
0 1960893648 raid \
raid4 1 2048 \
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
# RAID4 - 4 data drives, 1 parity (no metadata devices)
# Chunk size of 1MiB, force RAID initialization,
# min recovery rate at 20 kiB/sec/disk
0 1960893648 raid \
raid4 4 2048 min_recovery_rate 20 sync\
5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
Performing a 'dmsetup table' should display the CTR table used to
construct the mapping (with possible reordering of optional
parameters).
Performing a 'dmsetup status' will yield information on the state and
health of the array. The output is as follows:
1: <s> <l> raid \
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
Line 1 is standard DM output. Line 2 is best shown by example:
0 1960893648 raid raid4 5 AAAAA 2/490221568
Here we can see the RAID type is raid4, there are 5 devices - all of
which are 'A'live, and the array is 2/490221568 complete with recovery.
...@@ -240,6 +240,30 @@ config DM_MIRROR ...@@ -240,6 +240,30 @@ config DM_MIRROR
Allow volume managers to mirror logical volumes, also Allow volume managers to mirror logical volumes, also
needed for live data migration tools such as 'pvmove'. needed for live data migration tools such as 'pvmove'.
config DM_RAID
tristate "RAID 4/5/6 target (EXPERIMENTAL)"
depends on BLK_DEV_DM && EXPERIMENTAL
select MD_RAID456
select BLK_DEV_MD
---help---
A dm target that supports RAID4, RAID5 and RAID6 mappings
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
of a single drive. For a given sector (row) number, (N - 1) drives
contain data sectors, and one drive contains the parity protection.
For a RAID-4 set, the parity blocks are present on a single drive,
while a RAID-5 set distributes the parity across the drives in one
of the available parity distribution methods.
A RAID-6 set of N drives with a capacity of C MB per drive
provides the capacity of C * (N - 2) MB, and protects
against a failure of any two drives. For a given sector
(row) number, (N - 2) drives contain data sectors, and two
drives contains two independent redundancy syndromes. Like
RAID-5, RAID-6 distributes the syndromes across the drives
in one of the available parity distribution methods.
config DM_LOG_USERSPACE config DM_LOG_USERSPACE
tristate "Mirror userspace logging (EXPERIMENTAL)" tristate "Mirror userspace logging (EXPERIMENTAL)"
depends on DM_MIRROR && EXPERIMENTAL && NET depends on DM_MIRROR && EXPERIMENTAL && NET
......
...@@ -36,6 +36,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o ...@@ -36,6 +36,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o dm-region-hash.o
obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o obj-$(CONFIG_DM_LOG_USERSPACE) += dm-log-userspace.o
obj-$(CONFIG_DM_ZERO) += dm-zero.o obj-$(CONFIG_DM_ZERO) += dm-zero.o
obj-$(CONFIG_DM_RAID) += dm-raid.o
ifeq ($(CONFIG_DM_UEVENT),y) ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o dm-mod-objs += dm-uevent.o
......
This diff is collapsed.
...@@ -352,7 +352,7 @@ static int __init dm_delay_init(void) ...@@ -352,7 +352,7 @@ static int __init dm_delay_init(void)
{ {
int r = -ENOMEM; int r = -ENOMEM;
kdelayd_wq = create_workqueue("kdelayd"); kdelayd_wq = alloc_workqueue("kdelayd", WQ_MEM_RECLAIM, 0);
if (!kdelayd_wq) { if (!kdelayd_wq) {
DMERR("Couldn't start kdelayd"); DMERR("Couldn't start kdelayd");
goto bad_queue; goto bad_queue;
......
...@@ -295,19 +295,55 @@ static void dm_hash_remove_all(int keep_open_devices) ...@@ -295,19 +295,55 @@ static void dm_hash_remove_all(int keep_open_devices)
DMWARN("remove_all left %d open device(s)", dev_skipped); DMWARN("remove_all left %d open device(s)", dev_skipped);
} }
/*
* Set the uuid of a hash_cell that isn't already set.
*/
static void __set_cell_uuid(struct hash_cell *hc, char *new_uuid)
{
mutex_lock(&dm_hash_cells_mutex);
hc->uuid = new_uuid;
mutex_unlock(&dm_hash_cells_mutex);
list_add(&hc->uuid_list, _uuid_buckets + hash_str(new_uuid));
}
/*
* Changes the name of a hash_cell and returns the old name for
* the caller to free.
*/
static char *__change_cell_name(struct hash_cell *hc, char *new_name)
{
char *old_name;
/*
* Rename and move the name cell.
*/
list_del(&hc->name_list);
old_name = hc->name;
mutex_lock(&dm_hash_cells_mutex);
hc->name = new_name;
mutex_unlock(&dm_hash_cells_mutex);
list_add(&hc->name_list, _name_buckets + hash_str(new_name));
return old_name;
}
static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
const char *new) const char *new)
{ {
char *new_name, *old_name; char *new_data, *old_name = NULL;
struct hash_cell *hc; struct hash_cell *hc;
struct dm_table *table; struct dm_table *table;
struct mapped_device *md; struct mapped_device *md;
unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
/* /*
* duplicate new. * duplicate new.
*/ */
new_name = kstrdup(new, GFP_KERNEL); new_data = kstrdup(new, GFP_KERNEL);
if (!new_name) if (!new_data)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
down_write(&_hash_lock); down_write(&_hash_lock);
...@@ -315,13 +351,19 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, ...@@ -315,13 +351,19 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
/* /*
* Is new free ? * Is new free ?
*/ */
hc = __get_name_cell(new); if (change_uuid)
hc = __get_uuid_cell(new);
else
hc = __get_name_cell(new);
if (hc) { if (hc) {
DMWARN("asked to rename to an already-existing name %s -> %s", DMWARN("Unable to change %s on mapped device %s to one that "
"already exists: %s",
change_uuid ? "uuid" : "name",
param->name, new); param->name, new);
dm_put(hc->md); dm_put(hc->md);
up_write(&_hash_lock); up_write(&_hash_lock);
kfree(new_name); kfree(new_data);
return ERR_PTR(-EBUSY); return ERR_PTR(-EBUSY);
} }
...@@ -330,22 +372,30 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param, ...@@ -330,22 +372,30 @@ static struct mapped_device *dm_hash_rename(struct dm_ioctl *param,
*/ */
hc = __get_name_cell(param->name); hc = __get_name_cell(param->name);
if (!hc) { if (!hc) {
DMWARN("asked to rename a non-existent device %s -> %s", DMWARN("Unable to rename non-existent device, %s to %s%s",
param->name, new); param->name, change_uuid ? "uuid " : "", new);
up_write(&_hash_lock); up_write(&_hash_lock);
kfree(new_name); kfree(new_data);
return ERR_PTR(-ENXIO); return ERR_PTR(-ENXIO);
} }
/* /*
* rename and move the name cell. * Does this device already have a uuid?
*/ */
list_del(&hc->name_list); if (change_uuid && hc->uuid) {
old_name = hc->name; DMWARN("Unable to change uuid of mapped device %s to %s "
mutex_lock(&dm_hash_cells_mutex); "because uuid is already set to %s",
hc->name = new_name; param->name, new, hc->uuid);
mutex_unlock(&dm_hash_cells_mutex); dm_put(hc->md);
list_add(&hc->name_list, _name_buckets + hash_str(new_name)); up_write(&_hash_lock);
kfree(new_data);
return ERR_PTR(-EINVAL);
}
if (change_uuid)
__set_cell_uuid(hc, new_data);
else
old_name = __change_cell_name(hc, new_data);
/* /*
* Wake up any dm event waiters. * Wake up any dm event waiters.
...@@ -729,7 +779,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) ...@@ -729,7 +779,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
hc = __find_device_hash_cell(param); hc = __find_device_hash_cell(param);
if (!hc) { if (!hc) {
DMWARN("device doesn't appear to be in the dev hash table."); DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
up_write(&_hash_lock); up_write(&_hash_lock);
return -ENXIO; return -ENXIO;
} }
...@@ -741,7 +791,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) ...@@ -741,7 +791,7 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
*/ */
r = dm_lock_for_deletion(md); r = dm_lock_for_deletion(md);
if (r) { if (r) {
DMWARN("unable to remove open device %s", hc->name); DMDEBUG_LIMIT("unable to remove open device %s", hc->name);
up_write(&_hash_lock); up_write(&_hash_lock);
dm_put(md); dm_put(md);
return r; return r;
...@@ -774,21 +824,24 @@ static int invalid_str(char *str, void *end) ...@@ -774,21 +824,24 @@ static int invalid_str(char *str, void *end)
static int dev_rename(struct dm_ioctl *param, size_t param_size) static int dev_rename(struct dm_ioctl *param, size_t param_size)
{ {
int r; int r;
char *new_name = (char *) param + param->data_start; char *new_data = (char *) param + param->data_start;
struct mapped_device *md; struct mapped_device *md;
unsigned change_uuid = (param->flags & DM_UUID_FLAG) ? 1 : 0;
if (new_name < param->data || if (new_data < param->data ||
invalid_str(new_name, (void *) param + param_size) || invalid_str(new_data, (void *) param + param_size) ||
strlen(new_name) > DM_NAME_LEN - 1) { strlen(new_data) > (change_uuid ? DM_UUID_LEN - 1 : DM_NAME_LEN - 1)) {
DMWARN("Invalid new logical volume name supplied."); DMWARN("Invalid new mapped device name or uuid string supplied.");
return -EINVAL; return -EINVAL;
} }
r = check_name(new_name); if (!change_uuid) {
if (r) r = check_name(new_data);
return r; if (r)
return r;
}
md = dm_hash_rename(param, new_name); md = dm_hash_rename(param, new_data);
if (IS_ERR(md)) if (IS_ERR(md))
return PTR_ERR(md); return PTR_ERR(md);
...@@ -885,7 +938,7 @@ static int do_resume(struct dm_ioctl *param) ...@@ -885,7 +938,7 @@ static int do_resume(struct dm_ioctl *param)
hc = __find_device_hash_cell(param); hc = __find_device_hash_cell(param);
if (!hc) { if (!hc) {
DMWARN("device doesn't appear to be in the dev hash table."); DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
up_write(&_hash_lock); up_write(&_hash_lock);
return -ENXIO; return -ENXIO;
} }
...@@ -1212,7 +1265,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size) ...@@ -1212,7 +1265,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
hc = __find_device_hash_cell(param); hc = __find_device_hash_cell(param);
if (!hc) { if (!hc) {
DMWARN("device doesn't appear to be in the dev hash table."); DMDEBUG_LIMIT("device doesn't appear to be in the dev hash table.");
up_write(&_hash_lock); up_write(&_hash_lock);
return -ENXIO; return -ENXIO;
} }
......
...@@ -37,6 +37,13 @@ struct dm_kcopyd_client { ...@@ -37,6 +37,13 @@ struct dm_kcopyd_client {
unsigned int nr_pages; unsigned int nr_pages;
unsigned int nr_free_pages; unsigned int nr_free_pages;
/*
* Block devices to unplug.
* Non-NULL pointer means that a block device has some pending requests
* and needs to be unplugged.
*/
struct block_device *unplug[2];
struct dm_io_client *io_client; struct dm_io_client *io_client;
wait_queue_head_t destroyq; wait_queue_head_t destroyq;
...@@ -308,6 +315,31 @@ static int run_complete_job(struct kcopyd_job *job) ...@@ -308,6 +315,31 @@ static int run_complete_job(struct kcopyd_job *job)
return 0; return 0;
} }
/*
* Unplug the block device at the specified index.
*/
static void unplug(struct dm_kcopyd_client *kc, int rw)
{
if (kc->unplug[rw] != NULL) {
blk_unplug(bdev_get_queue(kc->unplug[rw]));
kc->unplug[rw] = NULL;
}
}
/*
* Prepare block device unplug. If there's another device
* to be unplugged at the same array index, we unplug that
* device first.
*/
static void prepare_unplug(struct dm_kcopyd_client *kc, int rw,
struct block_device *bdev)
{
if (likely(kc->unplug[rw] == bdev))
return;
unplug(kc, rw);
kc->unplug[rw] = bdev;
}
static void complete_io(unsigned long error, void *context) static void complete_io(unsigned long error, void *context)
{ {
struct kcopyd_job *job = (struct kcopyd_job *) context; struct kcopyd_job *job = (struct kcopyd_job *) context;
...@@ -345,7 +377,7 @@ static int run_io_job(struct kcopyd_job *job) ...@@ -345,7 +377,7 @@ static int run_io_job(struct kcopyd_job *job)
{ {
int r; int r;
struct dm_io_request io_req = { struct dm_io_request io_req = {
.bi_rw = job->rw | REQ_SYNC | REQ_UNPLUG, .bi_rw = job->rw,
.mem.type = DM_IO_PAGE_LIST, .mem.type = DM_IO_PAGE_LIST,
.mem.ptr.pl = job->pages, .mem.ptr.pl = job->pages,
.mem.offset = job->offset, .mem.offset = job->offset,
...@@ -354,10 +386,16 @@ static int run_io_job(struct kcopyd_job *job) ...@@ -354,10 +386,16 @@ static int run_io_job(struct kcopyd_job *job)
.client = job->kc->io_client, .client = job->kc->io_client,
}; };
if (job->rw == READ) if (job->rw == READ) {
r = dm_io(&io_req, 1, &job->source, NULL); r = dm_io(&io_req, 1, &job->source, NULL);
else prepare_unplug(job->kc, READ, job->source.bdev);
} else {
if (job->num_dests > 1)
io_req.bi_rw |= REQ_UNPLUG;
r = dm_io(&io_req, job->num_dests, job->dests, NULL); r = dm_io(&io_req, job->num_dests, job->dests, NULL);
if (!(io_req.bi_rw & REQ_UNPLUG))
prepare_unplug(job->kc, WRITE, job->dests[0].bdev);
}
return r; return r;
} }
...@@ -435,10 +473,18 @@ static void do_work(struct work_struct *work) ...@@ -435,10 +473,18 @@ static void do_work(struct work_struct *work)
* Pages jobs when successful will jump onto the io jobs * Pages jobs when successful will jump onto the io jobs
* list. io jobs call wake when they complete and it all * list. io jobs call wake when they complete and it all
* starts again. * starts again.
*
* Note that io_jobs add block devices to the unplug array,
* this array is cleared with "unplug" calls. It is thus
* forbidden to run complete_jobs after io_jobs and before
* unplug because the block device could be destroyed in
* job completion callback.
*/ */
process_jobs(&kc->complete_jobs, kc, run_complete_job); process_jobs(&kc->complete_jobs, kc, run_complete_job);
process_jobs(&kc->pages_jobs, kc, run_pages_job); process_jobs(&kc->pages_jobs, kc, run_pages_job);
process_jobs(&kc->io_jobs, kc, run_io_job); process_jobs(&kc->io_jobs, kc, run_io_job);
unplug(kc, READ);
unplug(kc, WRITE);
} }
/* /*
...@@ -619,12 +665,15 @@ int dm_kcopyd_client_create(unsigned int nr_pages, ...@@ -619,12 +665,15 @@ int dm_kcopyd_client_create(unsigned int nr_pages,
INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->io_jobs);
INIT_LIST_HEAD(&kc->pages_jobs); INIT_LIST_HEAD(&kc->pages_jobs);
memset(kc->unplug, 0, sizeof(kc->unplug));
kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache);
if (!kc->job_pool) if (!kc->job_pool)
goto bad_slab; goto bad_slab;
INIT_WORK(&kc->kcopyd_work, do_work); INIT_WORK(&kc->kcopyd_work, do_work);
kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); kc->kcopyd_wq = alloc_workqueue("kcopyd",
WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
if (!kc->kcopyd_wq) if (!kc->kcopyd_wq)
goto bad_workqueue; goto bad_workqueue;
......
...@@ -12,12 +12,22 @@ ...@@ -12,12 +12,22 @@
#include "dm-log-userspace-transfer.h" #include "dm-log-userspace-transfer.h"
#define DM_LOG_USERSPACE_VSN "1.1.0"
struct flush_entry { struct flush_entry {
int type; int type;
region_t region; region_t region;
struct list_head list; struct list_head list;
}; };
/*
* This limit on the number of mark and clear request is, to a degree,
* arbitrary. However, there is some basis for the choice in the limits
* imposed on the size of data payload by dm-log-userspace-transfer.c:
* dm_consult_userspace().
*/
#define MAX_FLUSH_GROUP_COUNT 32
struct log_c { struct log_c {
struct dm_target *ti; struct dm_target *ti;
uint32_t region_size; uint32_t region_size;
...@@ -37,8 +47,15 @@ struct log_c { ...@@ -37,8 +47,15 @@ struct log_c {
*/ */
uint64_t in_sync_hint; uint64_t in_sync_hint;
/*
* Mark and clear requests are held until a flush is issued
* so that we can group, and thereby limit, the amount of
* network traffic between kernel and userspace. The 'flush_lock'
* is used to protect these lists.
*/
spinlock_t flush_lock; spinlock_t flush_lock;
struct list_head flush_list; /* only for clear and mark requests */ struct list_head mark_list;
struct list_head clear_list;
}; };
static mempool_t *flush_entry_pool; static mempool_t *flush_entry_pool;
...@@ -169,7 +186,8 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, ...@@ -169,7 +186,8 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
strncpy(lc->uuid, argv[0], DM_UUID_LEN); strncpy(lc->uuid, argv[0], DM_UUID_LEN);
spin_lock_init(&lc->flush_lock); spin_lock_init(&lc->flush_lock);
INIT_LIST_HEAD(&lc->flush_list); INIT_LIST_HEAD(&lc->mark_list);
INIT_LIST_HEAD(&lc->clear_list);
str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str); str_size = build_constructor_string(ti, argc - 1, argv + 1, &ctr_str);
if (str_size < 0) { if (str_size < 0) {
...@@ -181,8 +199,11 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, ...@@ -181,8 +199,11 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR, r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_CTR,
ctr_str, str_size, NULL, NULL); ctr_str, str_size, NULL, NULL);
if (r == -ESRCH) { if (r < 0) {
DMERR("Userspace log server not found"); if (r == -ESRCH)
DMERR("Userspace log server not found");
else
DMERR("Userspace log server failed to create log");
goto out; goto out;
} }
...@@ -214,10 +235,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, ...@@ -214,10 +235,9 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti,
static void userspace_dtr(struct dm_dirty_log *log) static void userspace_dtr(struct dm_dirty_log *log)
{ {
int r;
struct log_c *lc = log->context; struct log_c *lc = log->context;
r = dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR, (void) dm_consult_userspace(lc->uuid, lc->luid, DM_ULOG_DTR,
NULL, 0, NULL, 0,
NULL, NULL); NULL, NULL);
...@@ -338,6 +358,71 @@ static int userspace_in_sync(struct dm_dirty_log *log, region_t region, ...@@ -338,6 +358,71 @@ static int userspace_in_sync(struct dm_dirty_log *log, region_t region,
return (r) ? 0 : (int)in_sync; return (r) ? 0 : (int)in_sync;
} }
static int flush_one_by_one(struct log_c *lc, struct list_head *flush_list)
{
int r = 0;
struct flush_entry *fe;
list_for_each_entry(fe, flush_list, list) {
r = userspace_do_request(lc, lc->uuid, fe->type,
(char *)&fe->region,
sizeof(fe->region),
NULL, NULL);
if (r)
break;
}
return r;
}
static int flush_by_group(struct log_c *lc, struct list_head *flush_list)
{
int r = 0;
int count;
uint32_t type = 0;
struct flush_entry *fe, *tmp_fe;
LIST_HEAD(tmp_list);
uint64_t group[MAX_FLUSH_GROUP_COUNT];
/*
* Group process the requests
*/
while (!list_empty(flush_list)) {
count = 0;
list_for_each_entry_safe(fe, tmp_fe, flush_list, list) {
group[count] = fe->region;
count++;
list_del(&fe->list);
list_add(&fe->list, &tmp_list);
type = fe->type;
if (count >= MAX_FLUSH_GROUP_COUNT)
break;
}
r = userspace_do_request(lc, lc->uuid, type,
(char *)(group),
count * sizeof(uint64_t),
NULL, NULL);
if (r) {
/* Group send failed. Attempt one-by-one. */
list_splice_init(&tmp_list, flush_list);
r = flush_one_by_one(lc, flush_list);
break;
}
}
/*
* Must collect flush_entrys that were successfully processed
* as a group so that they will be free'd by the caller.
*/
list_splice_init(&tmp_list, flush_list);
return r;
}
/* /*
* userspace_flush * userspace_flush
* *
...@@ -360,31 +445,25 @@ static int userspace_flush(struct dm_dirty_log *log) ...@@ -360,31 +445,25 @@ static int userspace_flush(struct dm_dirty_log *log)
int r = 0; int r = 0;
unsigned long flags; unsigned long flags;
struct log_c *lc = log->context; struct log_c *lc = log->context;
LIST_HEAD(flush_list); LIST_HEAD(mark_list);
LIST_HEAD(clear_list);
struct flush_entry *fe, *tmp_fe; struct flush_entry *fe, *tmp_fe;
spin_lock_irqsave(&lc->flush_lock, flags); spin_lock_irqsave(&lc->flush_lock, flags);
list_splice_init(&lc->flush_list, &flush_list); list_splice_init(&lc->mark_list, &mark_list);
list_splice_init(&lc->clear_list, &clear_list);
spin_unlock_irqrestore(&lc->flush_lock, flags); spin_unlock_irqrestore(&lc->flush_lock, flags);
if (list_empty(&flush_list)) if (list_empty(&mark_list) && list_empty(&clear_list))
return 0; return 0;
/* r = flush_by_group(lc, &mark_list);
* FIXME: Count up requests, group request types, if (r)
* allocate memory to stick all requests in and goto fail;
* send to server in one go. Failing the allocation,
* do it one by one.
*/
list_for_each_entry(fe, &flush_list, list) { r = flush_by_group(lc, &clear_list);
r = userspace_do_request(lc, lc->uuid, fe->type, if (r)
(char *)&fe->region, goto fail;
sizeof(fe->region),
NULL, NULL);
if (r)
goto fail;
}
r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH, r = userspace_do_request(lc, lc->uuid, DM_ULOG_FLUSH,
NULL, 0, NULL, NULL); NULL, 0, NULL, NULL);
...@@ -395,7 +474,11 @@ static int userspace_flush(struct dm_dirty_log *log) ...@@ -395,7 +474,11 @@ static int userspace_flush(struct dm_dirty_log *log)
* Calling code will receive an error and will know that * Calling code will receive an error and will know that
* the log facility has failed. * the log facility has failed.
*/ */
list_for_each_entry_safe(fe, tmp_fe, &flush_list, list) { list_for_each_entry_safe(fe, tmp_fe, &mark_list, list) {
list_del(&fe->list);
mempool_free(fe, flush_entry_pool);
}
list_for_each_entry_safe(fe, tmp_fe, &clear_list, list) {
list_del(&fe->list); list_del(&fe->list);
mempool_free(fe, flush_entry_pool); mempool_free(fe, flush_entry_pool);
} }
...@@ -425,7 +508,7 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region) ...@@ -425,7 +508,7 @@ static void userspace_mark_region(struct dm_dirty_log *log, region_t region)
spin_lock_irqsave(&lc->flush_lock, flags); spin_lock_irqsave(&lc->flush_lock, flags);
fe->type = DM_ULOG_MARK_REGION; fe->type = DM_ULOG_MARK_REGION;
fe->region = region; fe->region = region;
list_add(&fe->list, &lc->flush_list); list_add(&fe->list, &lc->mark_list);
spin_unlock_irqrestore(&lc->flush_lock, flags); spin_unlock_irqrestore(&lc->flush_lock, flags);
return; return;
...@@ -462,7 +545,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region) ...@@ -462,7 +545,7 @@ static void userspace_clear_region(struct dm_dirty_log *log, region_t region)
spin_lock_irqsave(&lc->flush_lock, flags); spin_lock_irqsave(&lc->flush_lock, flags);
fe->type = DM_ULOG_CLEAR_REGION; fe->type = DM_ULOG_CLEAR_REGION;
fe->region = region; fe->region = region;
list_add(&fe->list, &lc->flush_list); list_add(&fe->list, &lc->clear_list);
spin_unlock_irqrestore(&lc->flush_lock, flags); spin_unlock_irqrestore(&lc->flush_lock, flags);
return; return;
...@@ -684,7 +767,7 @@ static int __init userspace_dirty_log_init(void) ...@@ -684,7 +767,7 @@ static int __init userspace_dirty_log_init(void)
return r; return r;
} }
DMINFO("version 1.0.0 loaded"); DMINFO("version " DM_LOG_USERSPACE_VSN " loaded");
return 0; return 0;
} }
...@@ -694,7 +777,7 @@ static void __exit userspace_dirty_log_exit(void) ...@@ -694,7 +777,7 @@ static void __exit userspace_dirty_log_exit(void)
dm_ulog_tfr_exit(); dm_ulog_tfr_exit();
mempool_destroy(flush_entry_pool); mempool_destroy(flush_entry_pool);
DMINFO("version 1.0.0 unloaded"); DMINFO("version " DM_LOG_USERSPACE_VSN " unloaded");
return; return;
} }
......
...@@ -198,6 +198,7 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type, ...@@ -198,6 +198,7 @@ int dm_consult_userspace(const char *uuid, uint64_t luid, int request_type,
memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg)); memset(tfr, 0, DM_ULOG_PREALLOCED_SIZE - sizeof(struct cn_msg));
memcpy(tfr->uuid, uuid, DM_UUID_LEN); memcpy(tfr->uuid, uuid, DM_UUID_LEN);
tfr->version = DM_ULOG_REQUEST_VERSION;
tfr->luid = luid; tfr->luid = luid;
tfr->seq = dm_ulog_seq++; tfr->seq = dm_ulog_seq++;
......
...@@ -455,7 +455,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, ...@@ -455,7 +455,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti,
r = PTR_ERR(lc->io_req.client); r = PTR_ERR(lc->io_req.client);
DMWARN("couldn't allocate disk io client"); DMWARN("couldn't allocate disk io client");
kfree(lc); kfree(lc);
return -ENOMEM; return r;
} }
lc->disk_header = vmalloc(buf_size); lc->disk_header = vmalloc(buf_size);
......
...@@ -23,6 +23,8 @@ ...@@ -23,6 +23,8 @@
#define DM_MSG_PREFIX "multipath" #define DM_MSG_PREFIX "multipath"
#define MESG_STR(x) x, sizeof(x) #define MESG_STR(x) x, sizeof(x)
#define DM_PG_INIT_DELAY_MSECS 2000
#define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
/* Path properties */ /* Path properties */
struct pgpath { struct pgpath {
...@@ -33,8 +35,7 @@ struct pgpath { ...@@ -33,8 +35,7 @@ struct pgpath {
unsigned fail_count; /* Cumulative failure count */ unsigned fail_count; /* Cumulative failure count */
struct dm_path path; struct dm_path path;
struct work_struct deactivate_path; struct delayed_work activate_path;
struct work_struct activate_path;
}; };
#define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path)
...@@ -65,11 +66,15 @@ struct multipath { ...@@ -65,11 +66,15 @@ struct multipath {
const char *hw_handler_name; const char *hw_handler_name;
char *hw_handler_params; char *hw_handler_params;
unsigned nr_priority_groups; unsigned nr_priority_groups;
struct list_head priority_groups; struct list_head priority_groups;
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
unsigned pg_init_required; /* pg_init needs calling? */ unsigned pg_init_required; /* pg_init needs calling? */
unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ unsigned pg_init_in_progress; /* Only one pg_init allowed at once */
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ unsigned pg_init_delay_retry; /* Delay pg_init retry? */
unsigned nr_valid_paths; /* Total number of usable paths */ unsigned nr_valid_paths; /* Total number of usable paths */
struct pgpath *current_pgpath; struct pgpath *current_pgpath;
...@@ -82,6 +87,7 @@ struct multipath { ...@@ -82,6 +87,7 @@ struct multipath {
unsigned saved_queue_if_no_path;/* Saved state during suspension */ unsigned saved_queue_if_no_path;/* Saved state during suspension */
unsigned pg_init_retries; /* Number of times to retry pg_init */ unsigned pg_init_retries; /* Number of times to retry pg_init */
unsigned pg_init_count; /* Number of times pg_init called */ unsigned pg_init_count; /* Number of times pg_init called */
unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */
struct work_struct process_queued_ios; struct work_struct process_queued_ios;
struct list_head queued_ios; struct list_head queued_ios;
...@@ -116,7 +122,6 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; ...@@ -116,7 +122,6 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd;
static void process_queued_ios(struct work_struct *work); static void process_queued_ios(struct work_struct *work);
static void trigger_event(struct work_struct *work); static void trigger_event(struct work_struct *work);
static void activate_path(struct work_struct *work); static void activate_path(struct work_struct *work);
static void deactivate_path(struct work_struct *work);
/*----------------------------------------------- /*-----------------------------------------------
...@@ -129,8 +134,7 @@ static struct pgpath *alloc_pgpath(void) ...@@ -129,8 +134,7 @@ static struct pgpath *alloc_pgpath(void)
if (pgpath) { if (pgpath) {
pgpath->is_active = 1; pgpath->is_active = 1;
INIT_WORK(&pgpath->deactivate_path, deactivate_path); INIT_DELAYED_WORK(&pgpath->activate_path, activate_path);
INIT_WORK(&pgpath->activate_path, activate_path);
} }
return pgpath; return pgpath;
...@@ -141,14 +145,6 @@ static void free_pgpath(struct pgpath *pgpath) ...@@ -141,14 +145,6 @@ static void free_pgpath(struct pgpath *pgpath)
kfree(pgpath); kfree(pgpath);
} }
static void deactivate_path(struct work_struct *work)
{
struct pgpath *pgpath =
container_of(work, struct pgpath, deactivate_path);
blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue);
}
static struct priority_group *alloc_priority_group(void) static struct priority_group *alloc_priority_group(void)
{ {
struct priority_group *pg; struct priority_group *pg;
...@@ -199,6 +195,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) ...@@ -199,6 +195,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti)
INIT_LIST_HEAD(&m->queued_ios); INIT_LIST_HEAD(&m->queued_ios);
spin_lock_init(&m->lock); spin_lock_init(&m->lock);
m->queue_io = 1; m->queue_io = 1;
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
INIT_WORK(&m->process_queued_ios, process_queued_ios); INIT_WORK(&m->process_queued_ios, process_queued_ios);
INIT_WORK(&m->trigger_event, trigger_event); INIT_WORK(&m->trigger_event, trigger_event);
init_waitqueue_head(&m->pg_init_wait); init_waitqueue_head(&m->pg_init_wait);
...@@ -238,14 +235,19 @@ static void free_multipath(struct multipath *m) ...@@ -238,14 +235,19 @@ static void free_multipath(struct multipath *m)
static void __pg_init_all_paths(struct multipath *m) static void __pg_init_all_paths(struct multipath *m)
{ {
struct pgpath *pgpath; struct pgpath *pgpath;
unsigned long pg_init_delay = 0;
m->pg_init_count++; m->pg_init_count++;
m->pg_init_required = 0; m->pg_init_required = 0;
if (m->pg_init_delay_retry)
pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ?
m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS);
list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) {
/* Skip failed paths */ /* Skip failed paths */
if (!pgpath->is_active) if (!pgpath->is_active)
continue; continue;
if (queue_work(kmpath_handlerd, &pgpath->activate_path)) if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path,
pg_init_delay))
m->pg_init_in_progress++; m->pg_init_in_progress++;
} }
} }
...@@ -793,8 +795,9 @@ static int parse_features(struct arg_set *as, struct multipath *m) ...@@ -793,8 +795,9 @@ static int parse_features(struct arg_set *as, struct multipath *m)
const char *param_name; const char *param_name;
static struct param _params[] = { static struct param _params[] = {
{0, 3, "invalid number of feature args"}, {0, 5, "invalid number of feature args"},
{1, 50, "pg_init_retries must be between 1 and 50"}, {1, 50, "pg_init_retries must be between 1 and 50"},
{0, 60000, "pg_init_delay_msecs must be between 0 and 60000"},
}; };
r = read_param(_params, shift(as), &argc, &ti->error); r = read_param(_params, shift(as), &argc, &ti->error);
...@@ -821,6 +824,14 @@ static int parse_features(struct arg_set *as, struct multipath *m) ...@@ -821,6 +824,14 @@ static int parse_features(struct arg_set *as, struct multipath *m)
continue; continue;
} }
if (!strnicmp(param_name, MESG_STR("pg_init_delay_msecs")) &&
(argc >= 1)) {
r = read_param(_params + 2, shift(as),
&m->pg_init_delay_msecs, &ti->error);
argc--;
continue;
}
ti->error = "Unrecognised multipath feature request"; ti->error = "Unrecognised multipath feature request";
r = -EINVAL; r = -EINVAL;
} while (argc && !r); } while (argc && !r);
...@@ -931,7 +942,7 @@ static void flush_multipath_work(struct multipath *m) ...@@ -931,7 +942,7 @@ static void flush_multipath_work(struct multipath *m)
flush_workqueue(kmpath_handlerd); flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m); multipath_wait_for_pg_init_completion(m);
flush_workqueue(kmultipathd); flush_workqueue(kmultipathd);
flush_scheduled_work(); flush_work_sync(&m->trigger_event);
} }
static void multipath_dtr(struct dm_target *ti) static void multipath_dtr(struct dm_target *ti)
...@@ -995,7 +1006,6 @@ static int fail_path(struct pgpath *pgpath) ...@@ -995,7 +1006,6 @@ static int fail_path(struct pgpath *pgpath)
pgpath->path.dev->name, m->nr_valid_paths); pgpath->path.dev->name, m->nr_valid_paths);
schedule_work(&m->trigger_event); schedule_work(&m->trigger_event);
queue_work(kmultipathd, &pgpath->deactivate_path);
out: out:
spin_unlock_irqrestore(&m->lock, flags); spin_unlock_irqrestore(&m->lock, flags);
...@@ -1034,7 +1044,7 @@ static int reinstate_path(struct pgpath *pgpath) ...@@ -1034,7 +1044,7 @@ static int reinstate_path(struct pgpath *pgpath)
m->current_pgpath = NULL; m->current_pgpath = NULL;
queue_work(kmultipathd, &m->process_queued_ios); queue_work(kmultipathd, &m->process_queued_ios);
} else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) {
if (queue_work(kmpath_handlerd, &pgpath->activate_path)) if (queue_work(kmpath_handlerd, &pgpath->activate_path.work))
m->pg_init_in_progress++; m->pg_init_in_progress++;
} }
...@@ -1169,6 +1179,7 @@ static void pg_init_done(void *data, int errors) ...@@ -1169,6 +1179,7 @@ static void pg_init_done(void *data, int errors)
struct priority_group *pg = pgpath->pg; struct priority_group *pg = pgpath->pg;
struct multipath *m = pg->m; struct multipath *m = pg->m;
unsigned long flags; unsigned long flags;
unsigned delay_retry = 0;
/* device or driver problems */ /* device or driver problems */
switch (errors) { switch (errors) {
...@@ -1193,8 +1204,9 @@ static void pg_init_done(void *data, int errors) ...@@ -1193,8 +1204,9 @@ static void pg_init_done(void *data, int errors)
*/ */
bypass_pg(m, pg, 1); bypass_pg(m, pg, 1);
break; break;
/* TODO: For SCSI_DH_RETRY we should wait a couple seconds */
case SCSI_DH_RETRY: case SCSI_DH_RETRY:
/* Wait before retrying. */
delay_retry = 1;
case SCSI_DH_IMM_RETRY: case SCSI_DH_IMM_RETRY:
case SCSI_DH_RES_TEMP_UNAVAIL: case SCSI_DH_RES_TEMP_UNAVAIL:
if (pg_init_limit_reached(m, pgpath)) if (pg_init_limit_reached(m, pgpath))
...@@ -1227,6 +1239,7 @@ static void pg_init_done(void *data, int errors) ...@@ -1227,6 +1239,7 @@ static void pg_init_done(void *data, int errors)
if (!m->pg_init_required) if (!m->pg_init_required)
m->queue_io = 0; m->queue_io = 0;
m->pg_init_delay_retry = delay_retry;
queue_work(kmultipathd, &m->process_queued_ios); queue_work(kmultipathd, &m->process_queued_ios);
/* /*
...@@ -1241,7 +1254,7 @@ static void pg_init_done(void *data, int errors) ...@@ -1241,7 +1254,7 @@ static void pg_init_done(void *data, int errors)
static void activate_path(struct work_struct *work) static void activate_path(struct work_struct *work)
{ {
struct pgpath *pgpath = struct pgpath *pgpath =
container_of(work, struct pgpath, activate_path); container_of(work, struct pgpath, activate_path.work);
scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev),
pg_init_done, pgpath); pg_init_done, pgpath);
...@@ -1382,11 +1395,14 @@ static int multipath_status(struct dm_target *ti, status_type_t type, ...@@ -1382,11 +1395,14 @@ static int multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count);
else { else {
DMEMIT("%u ", m->queue_if_no_path + DMEMIT("%u ", m->queue_if_no_path +
(m->pg_init_retries > 0) * 2); (m->pg_init_retries > 0) * 2 +
(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2);
if (m->queue_if_no_path) if (m->queue_if_no_path)
DMEMIT("queue_if_no_path "); DMEMIT("queue_if_no_path ");
if (m->pg_init_retries) if (m->pg_init_retries)
DMEMIT("pg_init_retries %u ", m->pg_init_retries); DMEMIT("pg_init_retries %u ", m->pg_init_retries);
if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT)
DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs);
} }
if (!m->hw_handler_name || type == STATUSTYPE_INFO) if (!m->hw_handler_name || type == STATUSTYPE_INFO)
...@@ -1655,7 +1671,7 @@ static int multipath_busy(struct dm_target *ti) ...@@ -1655,7 +1671,7 @@ static int multipath_busy(struct dm_target *ti)
*---------------------------------------------------------------*/ *---------------------------------------------------------------*/
static struct target_type multipath_target = { static struct target_type multipath_target = {
.name = "multipath", .name = "multipath",
.version = {1, 1, 1}, .version = {1, 2, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = multipath_ctr, .ctr = multipath_ctr,
.dtr = multipath_dtr, .dtr = multipath_dtr,
...@@ -1687,7 +1703,7 @@ static int __init dm_multipath_init(void) ...@@ -1687,7 +1703,7 @@ static int __init dm_multipath_init(void)
return -EINVAL; return -EINVAL;
} }
kmultipathd = create_workqueue("kmpathd"); kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0);
if (!kmultipathd) { if (!kmultipathd) {
DMERR("failed to create workqueue kmpathd"); DMERR("failed to create workqueue kmpathd");
dm_unregister_target(&multipath_target); dm_unregister_target(&multipath_target);
...@@ -1701,7 +1717,8 @@ static int __init dm_multipath_init(void) ...@@ -1701,7 +1717,8 @@ static int __init dm_multipath_init(void)
* old workqueue would also create a bottleneck in the * old workqueue would also create a bottleneck in the
* path of the storage hardware device activation. * path of the storage hardware device activation.
*/ */
kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd"); kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd",
WQ_MEM_RECLAIM);
if (!kmpath_handlerd) { if (!kmpath_handlerd) {
DMERR("failed to create workqueue kmpath_handlerd"); DMERR("failed to create workqueue kmpath_handlerd");
destroy_workqueue(kmultipathd); destroy_workqueue(kmultipathd);
......
This diff is collapsed.
...@@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti) ...@@ -261,7 +261,7 @@ static int mirror_flush(struct dm_target *ti)
struct dm_io_request io_req = { struct dm_io_request io_req = {
.bi_rw = WRITE_FLUSH, .bi_rw = WRITE_FLUSH,
.mem.type = DM_IO_KMEM, .mem.type = DM_IO_KMEM,
.mem.ptr.bvec = NULL, .mem.ptr.addr = NULL,
.client = ms->io_client, .client = ms->io_client,
}; };
...@@ -637,6 +637,12 @@ static void do_write(struct mirror_set *ms, struct bio *bio) ...@@ -637,6 +637,12 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
.client = ms->io_client, .client = ms->io_client,
}; };
if (bio->bi_rw & REQ_DISCARD) {
io_req.bi_rw |= REQ_DISCARD;
io_req.mem.type = DM_IO_KMEM;
io_req.mem.ptr.addr = NULL;
}
for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++) for (i = 0, m = ms->mirror; i < ms->nr_mirrors; i++, m++)
map_region(dest++, m, bio); map_region(dest++, m, bio);
...@@ -670,7 +676,8 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) ...@@ -670,7 +676,8 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_init(&requeue); bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) { while ((bio = bio_list_pop(writes))) {
if (bio->bi_rw & REQ_FLUSH) { if ((bio->bi_rw & REQ_FLUSH) ||
(bio->bi_rw & REQ_DISCARD)) {
bio_list_add(&sync, bio); bio_list_add(&sync, bio);
continue; continue;
} }
...@@ -1076,8 +1083,10 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1076,8 +1083,10 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = ms; ti->private = ms;
ti->split_io = dm_rh_get_region_size(ms->rh); ti->split_io = dm_rh_get_region_size(ms->rh);
ti->num_flush_requests = 1; ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
ms->kmirrord_wq = create_singlethread_workqueue("kmirrord"); ms->kmirrord_wq = alloc_workqueue("kmirrord",
WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
if (!ms->kmirrord_wq) { if (!ms->kmirrord_wq) {
DMERR("couldn't start kmirrord"); DMERR("couldn't start kmirrord");
r = -ENOMEM; r = -ENOMEM;
...@@ -1130,7 +1139,7 @@ static void mirror_dtr(struct dm_target *ti) ...@@ -1130,7 +1139,7 @@ static void mirror_dtr(struct dm_target *ti)
del_timer_sync(&ms->timer); del_timer_sync(&ms->timer);
flush_workqueue(ms->kmirrord_wq); flush_workqueue(ms->kmirrord_wq);
flush_scheduled_work(); flush_work_sync(&ms->trigger_event);
dm_kcopyd_client_destroy(ms->kcopyd_client); dm_kcopyd_client_destroy(ms->kcopyd_client);
destroy_workqueue(ms->kmirrord_wq); destroy_workqueue(ms->kmirrord_wq);
free_context(ms, ti, ms->nr_mirrors); free_context(ms, ti, ms->nr_mirrors);
...@@ -1406,7 +1415,7 @@ static int mirror_iterate_devices(struct dm_target *ti, ...@@ -1406,7 +1415,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
static struct target_type mirror_target = { static struct target_type mirror_target = {
.name = "mirror", .name = "mirror",
.version = {1, 12, 0}, .version = {1, 12, 1},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = mirror_ctr, .ctr = mirror_ctr,
.dtr = mirror_dtr, .dtr = mirror_dtr,
......
...@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw, ...@@ -256,7 +256,7 @@ static int chunk_io(struct pstore *ps, void *area, chunk_t chunk, int rw,
*/ */
INIT_WORK_ONSTACK(&req.work, do_metadata); INIT_WORK_ONSTACK(&req.work, do_metadata);
queue_work(ps->metadata_wq, &req.work); queue_work(ps->metadata_wq, &req.work);
flush_workqueue(ps->metadata_wq); flush_work(&req.work);
return req.result; return req.result;
} }
...@@ -818,7 +818,7 @@ static int persistent_ctr(struct dm_exception_store *store, ...@@ -818,7 +818,7 @@ static int persistent_ctr(struct dm_exception_store *store,
atomic_set(&ps->pending_count, 0); atomic_set(&ps->pending_count, 0);
ps->callbacks = NULL; ps->callbacks = NULL;
ps->metadata_wq = create_singlethread_workqueue("ksnaphd"); ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
if (!ps->metadata_wq) { if (!ps->metadata_wq) {
kfree(ps); kfree(ps);
DMERR("couldn't start header metadata update thread"); DMERR("couldn't start header metadata update thread");
......
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/log2.h> #include <linux/log2.h>
#include <linux/dm-kcopyd.h> #include <linux/dm-kcopyd.h>
#include <linux/workqueue.h>
#include "dm-exception-store.h" #include "dm-exception-store.h"
...@@ -80,9 +79,6 @@ struct dm_snapshot { ...@@ -80,9 +79,6 @@ struct dm_snapshot {
/* Origin writes don't trigger exceptions until this is set */ /* Origin writes don't trigger exceptions until this is set */
int active; int active;
/* Whether or not owning mapped_device is suspended */
int suspended;
atomic_t pending_exceptions_count; atomic_t pending_exceptions_count;
mempool_t *pending_pool; mempool_t *pending_pool;
...@@ -106,10 +102,6 @@ struct dm_snapshot { ...@@ -106,10 +102,6 @@ struct dm_snapshot {
struct dm_kcopyd_client *kcopyd_client; struct dm_kcopyd_client *kcopyd_client;
/* Queue of snapshot writes for ksnapd to flush */
struct bio_list queued_bios;
struct work_struct queued_bios_work;
/* Wait for events based on state_bits */ /* Wait for events based on state_bits */
unsigned long state_bits; unsigned long state_bits;
...@@ -160,9 +152,6 @@ struct dm_dev *dm_snap_cow(struct dm_snapshot *s) ...@@ -160,9 +152,6 @@ struct dm_dev *dm_snap_cow(struct dm_snapshot *s)
} }
EXPORT_SYMBOL(dm_snap_cow); EXPORT_SYMBOL(dm_snap_cow);
static struct workqueue_struct *ksnapd;
static void flush_queued_bios(struct work_struct *work);
static sector_t chunk_to_sector(struct dm_exception_store *store, static sector_t chunk_to_sector(struct dm_exception_store *store,
chunk_t chunk) chunk_t chunk)
{ {
...@@ -1110,7 +1099,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1110,7 +1099,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->ti = ti; s->ti = ti;
s->valid = 1; s->valid = 1;
s->active = 0; s->active = 0;
s->suspended = 0;
atomic_set(&s->pending_exceptions_count, 0); atomic_set(&s->pending_exceptions_count, 0);
init_rwsem(&s->lock); init_rwsem(&s->lock);
INIT_LIST_HEAD(&s->list); INIT_LIST_HEAD(&s->list);
...@@ -1153,9 +1141,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -1153,9 +1141,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
spin_lock_init(&s->tracked_chunk_lock); spin_lock_init(&s->tracked_chunk_lock);
bio_list_init(&s->queued_bios);
INIT_WORK(&s->queued_bios_work, flush_queued_bios);
ti->private = s; ti->private = s;
ti->num_flush_requests = num_flush_requests; ti->num_flush_requests = num_flush_requests;
...@@ -1279,8 +1264,6 @@ static void snapshot_dtr(struct dm_target *ti) ...@@ -1279,8 +1264,6 @@ static void snapshot_dtr(struct dm_target *ti)
struct dm_snapshot *s = ti->private; struct dm_snapshot *s = ti->private;
struct dm_snapshot *snap_src = NULL, *snap_dest = NULL; struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
flush_workqueue(ksnapd);
down_read(&_origins_lock); down_read(&_origins_lock);
/* Check whether exception handover must be cancelled */ /* Check whether exception handover must be cancelled */
(void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL); (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
...@@ -1342,20 +1325,6 @@ static void flush_bios(struct bio *bio) ...@@ -1342,20 +1325,6 @@ static void flush_bios(struct bio *bio)
} }
} }
static void flush_queued_bios(struct work_struct *work)
{
struct dm_snapshot *s =
container_of(work, struct dm_snapshot, queued_bios_work);
struct bio *queued_bios;
unsigned long flags;
spin_lock_irqsave(&s->pe_lock, flags);
queued_bios = bio_list_get(&s->queued_bios);
spin_unlock_irqrestore(&s->pe_lock, flags);
flush_bios(queued_bios);
}
static int do_origin(struct dm_dev *origin, struct bio *bio); static int do_origin(struct dm_dev *origin, struct bio *bio);
/* /*
...@@ -1760,15 +1729,6 @@ static void snapshot_merge_presuspend(struct dm_target *ti) ...@@ -1760,15 +1729,6 @@ static void snapshot_merge_presuspend(struct dm_target *ti)
stop_merge(s); stop_merge(s);
} }
static void snapshot_postsuspend(struct dm_target *ti)
{
struct dm_snapshot *s = ti->private;
down_write(&s->lock);
s->suspended = 1;
up_write(&s->lock);
}
static int snapshot_preresume(struct dm_target *ti) static int snapshot_preresume(struct dm_target *ti)
{ {
int r = 0; int r = 0;
...@@ -1783,7 +1743,7 @@ static int snapshot_preresume(struct dm_target *ti) ...@@ -1783,7 +1743,7 @@ static int snapshot_preresume(struct dm_target *ti)
DMERR("Unable to resume snapshot source until " DMERR("Unable to resume snapshot source until "
"handover completes."); "handover completes.");
r = -EINVAL; r = -EINVAL;
} else if (!snap_src->suspended) { } else if (!dm_suspended(snap_src->ti)) {
DMERR("Unable to perform snapshot handover until " DMERR("Unable to perform snapshot handover until "
"source is suspended."); "source is suspended.");
r = -EINVAL; r = -EINVAL;
...@@ -1816,7 +1776,6 @@ static void snapshot_resume(struct dm_target *ti) ...@@ -1816,7 +1776,6 @@ static void snapshot_resume(struct dm_target *ti)
down_write(&s->lock); down_write(&s->lock);
s->active = 1; s->active = 1;
s->suspended = 0;
up_write(&s->lock); up_write(&s->lock);
} }
...@@ -2194,7 +2153,7 @@ static int origin_iterate_devices(struct dm_target *ti, ...@@ -2194,7 +2153,7 @@ static int origin_iterate_devices(struct dm_target *ti,
static struct target_type origin_target = { static struct target_type origin_target = {
.name = "snapshot-origin", .name = "snapshot-origin",
.version = {1, 7, 0}, .version = {1, 7, 1},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = origin_ctr, .ctr = origin_ctr,
.dtr = origin_dtr, .dtr = origin_dtr,
...@@ -2207,13 +2166,12 @@ static struct target_type origin_target = { ...@@ -2207,13 +2166,12 @@ static struct target_type origin_target = {
static struct target_type snapshot_target = { static struct target_type snapshot_target = {
.name = "snapshot", .name = "snapshot",
.version = {1, 9, 0}, .version = {1, 10, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = snapshot_ctr, .ctr = snapshot_ctr,
.dtr = snapshot_dtr, .dtr = snapshot_dtr,
.map = snapshot_map, .map = snapshot_map,
.end_io = snapshot_end_io, .end_io = snapshot_end_io,
.postsuspend = snapshot_postsuspend,
.preresume = snapshot_preresume, .preresume = snapshot_preresume,
.resume = snapshot_resume, .resume = snapshot_resume,
.status = snapshot_status, .status = snapshot_status,
...@@ -2222,14 +2180,13 @@ static struct target_type snapshot_target = { ...@@ -2222,14 +2180,13 @@ static struct target_type snapshot_target = {
static struct target_type merge_target = { static struct target_type merge_target = {
.name = dm_snapshot_merge_target_name, .name = dm_snapshot_merge_target_name,
.version = {1, 0, 0}, .version = {1, 1, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = snapshot_ctr, .ctr = snapshot_ctr,
.dtr = snapshot_dtr, .dtr = snapshot_dtr,
.map = snapshot_merge_map, .map = snapshot_merge_map,
.end_io = snapshot_end_io, .end_io = snapshot_end_io,
.presuspend = snapshot_merge_presuspend, .presuspend = snapshot_merge_presuspend,
.postsuspend = snapshot_postsuspend,
.preresume = snapshot_preresume, .preresume = snapshot_preresume,
.resume = snapshot_merge_resume, .resume = snapshot_merge_resume,
.status = snapshot_status, .status = snapshot_status,
...@@ -2291,17 +2248,8 @@ static int __init dm_snapshot_init(void) ...@@ -2291,17 +2248,8 @@ static int __init dm_snapshot_init(void)
goto bad_tracked_chunk_cache; goto bad_tracked_chunk_cache;
} }
ksnapd = create_singlethread_workqueue("ksnapd");
if (!ksnapd) {
DMERR("Failed to create ksnapd workqueue.");
r = -ENOMEM;
goto bad_pending_pool;
}
return 0; return 0;
bad_pending_pool:
kmem_cache_destroy(tracked_chunk_cache);
bad_tracked_chunk_cache: bad_tracked_chunk_cache:
kmem_cache_destroy(pending_cache); kmem_cache_destroy(pending_cache);
bad_pending_cache: bad_pending_cache:
...@@ -2322,8 +2270,6 @@ static int __init dm_snapshot_init(void) ...@@ -2322,8 +2270,6 @@ static int __init dm_snapshot_init(void)
static void __exit dm_snapshot_exit(void) static void __exit dm_snapshot_exit(void)
{ {
destroy_workqueue(ksnapd);
dm_unregister_target(&snapshot_target); dm_unregister_target(&snapshot_target);
dm_unregister_target(&origin_target); dm_unregister_target(&origin_target);
dm_unregister_target(&merge_target); dm_unregister_target(&merge_target);
......
...@@ -39,23 +39,20 @@ struct stripe_c { ...@@ -39,23 +39,20 @@ struct stripe_c {
struct dm_target *ti; struct dm_target *ti;
/* Work struct used for triggering events*/ /* Work struct used for triggering events*/
struct work_struct kstriped_ws; struct work_struct trigger_event;
struct stripe stripe[0]; struct stripe stripe[0];
}; };
static struct workqueue_struct *kstriped;
/* /*
* An event is triggered whenever a drive * An event is triggered whenever a drive
* drops out of a stripe volume. * drops out of a stripe volume.
*/ */
static void trigger_event(struct work_struct *work) static void trigger_event(struct work_struct *work)
{ {
struct stripe_c *sc = container_of(work, struct stripe_c, kstriped_ws); struct stripe_c *sc = container_of(work, struct stripe_c,
trigger_event);
dm_table_event(sc->ti->table); dm_table_event(sc->ti->table);
} }
static inline struct stripe_c *alloc_context(unsigned int stripes) static inline struct stripe_c *alloc_context(unsigned int stripes)
...@@ -160,7 +157,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ...@@ -160,7 +157,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -ENOMEM; return -ENOMEM;
} }
INIT_WORK(&sc->kstriped_ws, trigger_event); INIT_WORK(&sc->trigger_event, trigger_event);
/* Set pointer to dm target; used in trigger_event */ /* Set pointer to dm target; used in trigger_event */
sc->ti = ti; sc->ti = ti;
...@@ -211,7 +208,7 @@ static void stripe_dtr(struct dm_target *ti) ...@@ -211,7 +208,7 @@ static void stripe_dtr(struct dm_target *ti)
for (i = 0; i < sc->stripes; i++) for (i = 0; i < sc->stripes; i++)
dm_put_device(ti, sc->stripe[i].dev); dm_put_device(ti, sc->stripe[i].dev);
flush_workqueue(kstriped); flush_work_sync(&sc->trigger_event);
kfree(sc); kfree(sc);
} }
...@@ -367,7 +364,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, ...@@ -367,7 +364,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
atomic_inc(&(sc->stripe[i].error_count)); atomic_inc(&(sc->stripe[i].error_count));
if (atomic_read(&(sc->stripe[i].error_count)) < if (atomic_read(&(sc->stripe[i].error_count)) <
DM_IO_ERROR_THRESHOLD) DM_IO_ERROR_THRESHOLD)
queue_work(kstriped, &sc->kstriped_ws); schedule_work(&sc->trigger_event);
} }
return error; return error;
...@@ -401,7 +398,7 @@ static void stripe_io_hints(struct dm_target *ti, ...@@ -401,7 +398,7 @@ static void stripe_io_hints(struct dm_target *ti,
static struct target_type stripe_target = { static struct target_type stripe_target = {
.name = "striped", .name = "striped",
.version = {1, 3, 0}, .version = {1, 3, 1},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = stripe_ctr, .ctr = stripe_ctr,
.dtr = stripe_dtr, .dtr = stripe_dtr,
...@@ -422,20 +419,10 @@ int __init dm_stripe_init(void) ...@@ -422,20 +419,10 @@ int __init dm_stripe_init(void)
return r; return r;
} }
kstriped = create_singlethread_workqueue("kstriped");
if (!kstriped) {
DMERR("failed to create workqueue kstriped");
dm_unregister_target(&stripe_target);
return -ENOMEM;
}
return r; return r;
} }
void dm_stripe_exit(void) void dm_stripe_exit(void)
{ {
dm_unregister_target(&stripe_target); dm_unregister_target(&stripe_target);
destroy_workqueue(kstriped);
return;
} }
...@@ -71,6 +71,8 @@ struct dm_table { ...@@ -71,6 +71,8 @@ struct dm_table {
void *event_context; void *event_context;
struct dm_md_mempools *mempools; struct dm_md_mempools *mempools;
struct list_head target_callbacks;
}; };
/* /*
...@@ -204,6 +206,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode, ...@@ -204,6 +206,7 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
return -ENOMEM; return -ENOMEM;
INIT_LIST_HEAD(&t->devices); INIT_LIST_HEAD(&t->devices);
INIT_LIST_HEAD(&t->target_callbacks);
atomic_set(&t->holders, 0); atomic_set(&t->holders, 0);
t->discards_supported = 1; t->discards_supported = 1;
...@@ -1225,10 +1228,17 @@ int dm_table_resume_targets(struct dm_table *t) ...@@ -1225,10 +1228,17 @@ int dm_table_resume_targets(struct dm_table *t)
return 0; return 0;
} }
void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb)
{
list_add(&cb->list, &t->target_callbacks);
}
EXPORT_SYMBOL_GPL(dm_table_add_target_callbacks);
int dm_table_any_congested(struct dm_table *t, int bdi_bits) int dm_table_any_congested(struct dm_table *t, int bdi_bits)
{ {
struct dm_dev_internal *dd; struct dm_dev_internal *dd;
struct list_head *devices = dm_table_get_devices(t); struct list_head *devices = dm_table_get_devices(t);
struct dm_target_callbacks *cb;
int r = 0; int r = 0;
list_for_each_entry(dd, devices, list) { list_for_each_entry(dd, devices, list) {
...@@ -1243,6 +1253,10 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits) ...@@ -1243,6 +1253,10 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits)
bdevname(dd->dm_dev.bdev, b)); bdevname(dd->dm_dev.bdev, b));
} }
list_for_each_entry(cb, &t->target_callbacks, list)
if (cb->congested_fn)
r |= cb->congested_fn(cb, bdi_bits);
return r; return r;
} }
...@@ -1264,6 +1278,7 @@ void dm_table_unplug_all(struct dm_table *t) ...@@ -1264,6 +1278,7 @@ void dm_table_unplug_all(struct dm_table *t)
{ {
struct dm_dev_internal *dd; struct dm_dev_internal *dd;
struct list_head *devices = dm_table_get_devices(t); struct list_head *devices = dm_table_get_devices(t);
struct dm_target_callbacks *cb;
list_for_each_entry(dd, devices, list) { list_for_each_entry(dd, devices, list) {
struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev);
...@@ -1276,6 +1291,10 @@ void dm_table_unplug_all(struct dm_table *t) ...@@ -1276,6 +1291,10 @@ void dm_table_unplug_all(struct dm_table *t)
dm_device_name(t->md), dm_device_name(t->md),
bdevname(dd->dm_dev.bdev, b)); bdevname(dd->dm_dev.bdev, b));
} }
list_for_each_entry(cb, &t->target_callbacks, list)
if (cb->unplug_fn)
cb->unplug_fn(cb);
} }
struct mapped_device *dm_table_get_md(struct dm_table *t) struct mapped_device *dm_table_get_md(struct dm_table *t)
......
...@@ -32,7 +32,6 @@ ...@@ -32,7 +32,6 @@
#define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE" #define DM_COOKIE_ENV_VAR_NAME "DM_COOKIE"
#define DM_COOKIE_LENGTH 24 #define DM_COOKIE_LENGTH 24
static DEFINE_MUTEX(dm_mutex);
static const char *_name = DM_NAME; static const char *_name = DM_NAME;
static unsigned int major = 0; static unsigned int major = 0;
...@@ -328,7 +327,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) ...@@ -328,7 +327,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
{ {
struct mapped_device *md; struct mapped_device *md;
mutex_lock(&dm_mutex);
spin_lock(&_minor_lock); spin_lock(&_minor_lock);
md = bdev->bd_disk->private_data; md = bdev->bd_disk->private_data;
...@@ -346,7 +344,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode) ...@@ -346,7 +344,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
out: out:
spin_unlock(&_minor_lock); spin_unlock(&_minor_lock);
mutex_unlock(&dm_mutex);
return md ? 0 : -ENXIO; return md ? 0 : -ENXIO;
} }
...@@ -355,10 +352,12 @@ static int dm_blk_close(struct gendisk *disk, fmode_t mode) ...@@ -355,10 +352,12 @@ static int dm_blk_close(struct gendisk *disk, fmode_t mode)
{ {
struct mapped_device *md = disk->private_data; struct mapped_device *md = disk->private_data;
mutex_lock(&dm_mutex); spin_lock(&_minor_lock);
atomic_dec(&md->open_count); atomic_dec(&md->open_count);
dm_put(md); dm_put(md);
mutex_unlock(&dm_mutex);
spin_unlock(&_minor_lock);
return 0; return 0;
} }
...@@ -1638,13 +1637,15 @@ static void dm_request_fn(struct request_queue *q) ...@@ -1638,13 +1637,15 @@ static void dm_request_fn(struct request_queue *q)
if (map_request(ti, clone, md)) if (map_request(ti, clone, md))
goto requeued; goto requeued;
spin_lock_irq(q->queue_lock); BUG_ON(!irqs_disabled());
spin_lock(q->queue_lock);
} }
goto out; goto out;
requeued: requeued:
spin_lock_irq(q->queue_lock); BUG_ON(!irqs_disabled());
spin_lock(q->queue_lock);
plug_and_out: plug_and_out:
if (!elv_queue_empty(q)) if (!elv_queue_empty(q))
...@@ -1884,7 +1885,8 @@ static struct mapped_device *alloc_dev(int minor) ...@@ -1884,7 +1885,8 @@ static struct mapped_device *alloc_dev(int minor)
add_disk(md->disk); add_disk(md->disk);
format_dev_t(md->name, MKDEV(_major, minor)); format_dev_t(md->name, MKDEV(_major, minor));
md->wq = create_singlethread_workqueue("kdmflush"); md->wq = alloc_workqueue("kdmflush",
WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
if (!md->wq) if (!md->wq)
goto bad_thread; goto bad_thread;
...@@ -1992,13 +1994,14 @@ static void event_callback(void *context) ...@@ -1992,13 +1994,14 @@ static void event_callback(void *context)
wake_up(&md->eventq); wake_up(&md->eventq);
} }
/*
* Protected by md->suspend_lock obtained by dm_swap_table().
*/
static void __set_size(struct mapped_device *md, sector_t size) static void __set_size(struct mapped_device *md, sector_t size)
{ {
set_capacity(md->disk, size); set_capacity(md->disk, size);
mutex_lock(&md->bdev->bd_inode->i_mutex);
i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT); i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
mutex_unlock(&md->bdev->bd_inode->i_mutex);
} }
/* /*
......
...@@ -193,6 +193,13 @@ struct dm_target { ...@@ -193,6 +193,13 @@ struct dm_target {
char *error; char *error;
}; };
/* Each target can link one of these into the table */
struct dm_target_callbacks {
struct list_head list;
int (*congested_fn) (struct dm_target_callbacks *, int);
void (*unplug_fn)(struct dm_target_callbacks *);
};
int dm_register_target(struct target_type *t); int dm_register_target(struct target_type *t);
void dm_unregister_target(struct target_type *t); void dm_unregister_target(struct target_type *t);
...@@ -268,6 +275,11 @@ int dm_table_create(struct dm_table **result, fmode_t mode, ...@@ -268,6 +275,11 @@ int dm_table_create(struct dm_table **result, fmode_t mode,
int dm_table_add_target(struct dm_table *t, const char *type, int dm_table_add_target(struct dm_table *t, const char *type,
sector_t start, sector_t len, char *params); sector_t start, sector_t len, char *params);
/*
* Target_ctr should call this if it needs to add any callbacks.
*/
void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callbacks *cb);
/* /*
* Finally call this to make the table ready for use. * Finally call this to make the table ready for use.
*/ */
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
* Remove a device, destroy any tables. * Remove a device, destroy any tables.
* *
* DM_DEV_RENAME: * DM_DEV_RENAME:
* Rename a device. * Rename a device or set its uuid if none was previously supplied.
* *
* DM_SUSPEND: * DM_SUSPEND:
* This performs both suspend and resume, depending which flag is * This performs both suspend and resume, depending which flag is
...@@ -267,9 +267,9 @@ enum { ...@@ -267,9 +267,9 @@ enum {
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
#define DM_VERSION_MAJOR 4 #define DM_VERSION_MAJOR 4
#define DM_VERSION_MINOR 18 #define DM_VERSION_MINOR 19
#define DM_VERSION_PATCHLEVEL 0 #define DM_VERSION_PATCHLEVEL 1
#define DM_VERSION_EXTRA "-ioctl (2010-06-29)" #define DM_VERSION_EXTRA "-ioctl (2011-01-07)"
/* Status bits */ /* Status bits */
#define DM_READONLY_FLAG (1 << 0) /* In/Out */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */
...@@ -322,4 +322,10 @@ enum { ...@@ -322,4 +322,10 @@ enum {
*/ */
#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */ #define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */
/*
* If set, rename changes the uuid not the name. Only permitted
* if no uuid was previously supplied: an existing uuid cannot be changed.
*/
#define DM_UUID_FLAG (1 << 14) /* In */
#endif /* _LINUX_DM_IOCTL_H */ #endif /* _LINUX_DM_IOCTL_H */
...@@ -370,6 +370,16 @@ ...@@ -370,6 +370,16 @@
#define DM_ULOG_REQUEST_TYPE(request_type) \ #define DM_ULOG_REQUEST_TYPE(request_type) \
(DM_ULOG_REQUEST_MASK & (request_type)) (DM_ULOG_REQUEST_MASK & (request_type))
/*
* DM_ULOG_REQUEST_VERSION is incremented when there is a
* change to the way information is passed between kernel
* and userspace. This could be a structure change of
* dm_ulog_request or a change in the way requests are
* issued/handled. Changes are outlined here:
* version 1: Initial implementation
*/
#define DM_ULOG_REQUEST_VERSION 1
struct dm_ulog_request { struct dm_ulog_request {
/* /*
* The local unique identifier (luid) and the universally unique * The local unique identifier (luid) and the universally unique
...@@ -383,8 +393,9 @@ struct dm_ulog_request { ...@@ -383,8 +393,9 @@ struct dm_ulog_request {
*/ */
uint64_t luid; uint64_t luid;
char uuid[DM_UUID_LEN]; char uuid[DM_UUID_LEN];
char padding[7]; /* Padding because DM_UUID_LEN = 129 */ char padding[3]; /* Padding because DM_UUID_LEN = 129 */
uint32_t version; /* See DM_ULOG_REQUEST_VERSION */
int32_t error; /* Used to report back processing errors */ int32_t error; /* Used to report back processing errors */
uint32_t seq; /* Sequence number for request */ uint32_t seq; /* Sequence number for request */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment