Commit d9b9be02 authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm

* git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-2.6-dm: (36 commits)
  dm: set queue ordered mode
  dm: move wait queue declaration
  dm: merge pushback and deferred bio lists
  dm: allow uninterruptible wait for pending io
  dm: merge __flush_deferred_io into caller
  dm: move bio_io_error into __split_and_process_bio
  dm: rename __split_bio
  dm: remove unnecessary struct dm_wq_req
  dm: remove unnecessary work queue context field
  dm: remove unnecessary work queue type field
  dm: bio list add bio_list_add_head
  dm snapshot: persistent fix dtr cleanup
  dm snapshot: move status to exception store
  dm snapshot: move ctr parsing to exception store
  dm snapshot: use DMEMIT macro for status
  dm snapshot: remove dm_snap header
  dm snapshot: remove dm_snap header use
  dm exception store: move cow pointer
  dm exception store: move chunk_fields
  dm exception store: move dm_target pointer
  ...
parents 9b59f031 99360b4c
......@@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
bl->tail = bio;
}
static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = bl->head;
bl->head = bio;
if (!bl->tail)
bl->tail = bio;
}
static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2)
{
if (!bl2->head)
......
......@@ -16,30 +16,56 @@
* functions in this file help the target record and restore the
* original bio state.
*/
struct dm_bio_vec_details {
#if PAGE_SIZE < 65536
__u16 bv_len;
__u16 bv_offset;
#else
unsigned bv_len;
unsigned bv_offset;
#endif
};
struct dm_bio_details {
sector_t bi_sector;
struct block_device *bi_bdev;
unsigned int bi_size;
unsigned short bi_idx;
unsigned long bi_flags;
struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES];
};
static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio)
{
unsigned i;
bd->bi_sector = bio->bi_sector;
bd->bi_bdev = bio->bi_bdev;
bd->bi_size = bio->bi_size;
bd->bi_idx = bio->bi_idx;
bd->bi_flags = bio->bi_flags;
for (i = 0; i < bio->bi_vcnt; i++) {
bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len;
bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset;
}
}
static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio)
{
unsigned i;
bio->bi_sector = bd->bi_sector;
bio->bi_bdev = bd->bi_bdev;
bio->bi_size = bd->bi_size;
bio->bi_idx = bd->bi_idx;
bio->bi_flags = bd->bi_flags;
for (i = 0; i < bio->bi_vcnt; i++) {
bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len;
bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset;
}
}
#endif
......@@ -1156,8 +1156,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
crypto_free_ablkcipher(tfm);
bad_cipher:
/* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
kfree(cc);
kzfree(cc);
return -EINVAL;
}
......@@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti)
dm_put_device(ti, cc->dev);
/* Must zero key material before freeing */
memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8));
kfree(cc);
kzfree(cc);
}
static int crypt_map(struct dm_target *ti, struct bio *bio,
......
......@@ -7,6 +7,7 @@
#include "dm-exception-store.h"
#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
......@@ -14,6 +15,257 @@
#define DM_MSG_PREFIX "snapshot exception stores"
static LIST_HEAD(_exception_store_types);
static DEFINE_SPINLOCK(_lock);
static struct dm_exception_store_type *__find_exception_store_type(const char *name)
{
struct dm_exception_store_type *type;
list_for_each_entry(type, &_exception_store_types, list)
if (!strcmp(name, type->name))
return type;
return NULL;
}
static struct dm_exception_store_type *_get_exception_store_type(const char *name)
{
struct dm_exception_store_type *type;
spin_lock(&_lock);
type = __find_exception_store_type(name);
if (type && !try_module_get(type->module))
type = NULL;
spin_unlock(&_lock);
return type;
}
/*
* get_type
* @type_name
*
* Attempt to retrieve the dm_exception_store_type by name. If not already
* available, attempt to load the appropriate module.
*
* Exstore modules are named "dm-exstore-" followed by the 'type_name'.
* Modules may contain multiple types.
* This function will first try the module "dm-exstore-<type_name>",
* then truncate 'type_name' on the last '-' and try again.
*
* For example, if type_name was "clustered-shared", it would search
* 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'.
*
* 'dm-exception-store-<type_name>' is too long of a name in my
* opinion, which is why I've chosen to have the files
* containing exception store implementations be 'dm-exstore-<type_name>'.
* If you want your module to be autoloaded, you will follow this
* naming convention.
*
* Returns: dm_exception_store_type* on success, NULL on failure
*/
static struct dm_exception_store_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
struct dm_exception_store_type *type;
type = _get_exception_store_type(type_name);
if (type)
return type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
DMERR("No memory left to attempt load for \"%s\"", type_name);
return NULL;
}
while (request_module("dm-exstore-%s", type_name_dup) ||
!(type = _get_exception_store_type(type_name))) {
p = strrchr(type_name_dup, '-');
if (!p)
break;
p[0] = '\0';
}
if (!type)
DMWARN("Module for exstore type \"%s\" not found.", type_name);
kfree(type_name_dup);
return type;
}
static void put_type(struct dm_exception_store_type *type)
{
spin_lock(&_lock);
module_put(type->module);
spin_unlock(&_lock);
}
int dm_exception_store_type_register(struct dm_exception_store_type *type)
{
int r = 0;
spin_lock(&_lock);
if (!__find_exception_store_type(type->name))
list_add(&type->list, &_exception_store_types);
else
r = -EEXIST;
spin_unlock(&_lock);
return r;
}
EXPORT_SYMBOL(dm_exception_store_type_register);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type)
{
spin_lock(&_lock);
if (!__find_exception_store_type(type->name)) {
spin_unlock(&_lock);
return -EINVAL;
}
list_del(&type->list);
spin_unlock(&_lock);
return 0;
}
EXPORT_SYMBOL(dm_exception_store_type_unregister);
/*
* Round a number up to the nearest 'size' boundary. size must
* be a power of 2.
*/
static ulong round_up(ulong n, ulong size)
{
size--;
return (n + size) & ~size;
}
static int set_chunk_size(struct dm_exception_store *store,
const char *chunk_size_arg, char **error)
{
unsigned long chunk_size_ulong;
char *value;
chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10);
if (*chunk_size_arg == '\0' || *value != '\0') {
*error = "Invalid chunk size";
return -EINVAL;
}
if (!chunk_size_ulong) {
store->chunk_size = store->chunk_mask = store->chunk_shift = 0;
return 0;
}
/*
* Chunk size must be multiple of page size. Silently
* round up if it's not.
*/
chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9);
/* Check chunk_size is a power of 2 */
if (!is_power_of_2(chunk_size_ulong)) {
*error = "Chunk size is not a power of 2";
return -EINVAL;
}
/* Validate the chunk size against the device block size */
if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) {
*error = "Chunk size is not a multiple of device blocksize";
return -EINVAL;
}
store->chunk_size = chunk_size_ulong;
store->chunk_mask = chunk_size_ulong - 1;
store->chunk_shift = ffs(chunk_size_ulong) - 1;
return 0;
}
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store)
{
int r = 0;
struct dm_exception_store_type *type;
struct dm_exception_store *tmp_store;
char persistent;
if (argc < 3) {
ti->error = "Insufficient exception store arguments";
return -EINVAL;
}
tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
if (!tmp_store) {
ti->error = "Exception store allocation failed";
return -ENOMEM;
}
persistent = toupper(*argv[1]);
if (persistent != 'P' && persistent != 'N') {
ti->error = "Persistent flag is not P or N";
return -EINVAL;
}
type = get_type(argv[1]);
if (!type) {
ti->error = "Exception store type not recognised";
r = -EINVAL;
goto bad_type;
}
tmp_store->type = type;
tmp_store->ti = ti;
r = dm_get_device(ti, argv[0], 0, 0,
FMODE_READ | FMODE_WRITE, &tmp_store->cow);
if (r) {
ti->error = "Cannot get COW device";
goto bad_cow;
}
r = set_chunk_size(tmp_store, argv[2], &ti->error);
if (r)
goto bad_cow;
r = type->ctr(tmp_store, 0, NULL);
if (r) {
ti->error = "Exception store type constructor failed";
goto bad_ctr;
}
*args_used = 3;
*store = tmp_store;
return 0;
bad_ctr:
dm_put_device(ti, tmp_store->cow);
bad_cow:
put_type(type);
bad_type:
kfree(tmp_store);
return r;
}
EXPORT_SYMBOL(dm_exception_store_create);
void dm_exception_store_destroy(struct dm_exception_store *store)
{
store->type->dtr(store);
dm_put_device(store->ti, store->cow);
put_type(store->type);
kfree(store);
}
EXPORT_SYMBOL(dm_exception_store_destroy);
int dm_exception_store_init(void)
{
int r;
......
......@@ -37,11 +37,18 @@ struct dm_snap_exception {
* Abstraction to handle the meta/layout of exception stores (the
* COW device).
*/
struct dm_exception_store {
struct dm_exception_store;
struct dm_exception_store_type {
const char *name;
struct module *module;
int (*ctr) (struct dm_exception_store *store,
unsigned argc, char **argv);
/*
* Destroys this object when you've finished with it.
*/
void (*destroy) (struct dm_exception_store *store);
void (*dtr) (struct dm_exception_store *store);
/*
* The target shouldn't read the COW device until this is
......@@ -72,8 +79,9 @@ struct dm_exception_store {
*/
void (*drop_snapshot) (struct dm_exception_store *store);
int (*status) (struct dm_exception_store *store, status_type_t status,
char *result, unsigned int maxlen);
unsigned (*status) (struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen);
/*
* Return how full the snapshot is.
......@@ -82,7 +90,21 @@ struct dm_exception_store {
sector_t *numerator,
sector_t *denominator);
struct dm_snapshot *snap;
/* For internal device-mapper use only. */
struct list_head list;
};
struct dm_exception_store {
struct dm_exception_store_type *type;
struct dm_target *ti;
struct dm_dev *cow;
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
chunk_t chunk_shift;
void *context;
};
......@@ -129,6 +151,28 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e)
# endif
/*
* Return the number of sectors in the device.
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_exception_store *store,
sector_t sector)
{
return (sector & ~store->chunk_mask) >> store->chunk_shift;
}
int dm_exception_store_type_register(struct dm_exception_store_type *type);
int dm_exception_store_type_unregister(struct dm_exception_store_type *type);
int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
unsigned *args_used,
struct dm_exception_store **store);
void dm_exception_store_destroy(struct dm_exception_store *store);
int dm_exception_store_init(void);
void dm_exception_store_exit(void);
......@@ -141,8 +185,4 @@ void dm_persistent_snapshot_exit(void);
int dm_transient_snapshot_init(void);
void dm_transient_snapshot_exit(void);
int dm_create_persistent(struct dm_exception_store *store);
int dm_create_transient(struct dm_exception_store *store);
#endif /* _LINUX_DM_EXCEPTION_STORE */
......@@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions,
while (1) {
set_current_state(TASK_UNINTERRUPTIBLE);
if (!atomic_read(&io.count) || signal_pending(current))
if (!atomic_read(&io.count))
break;
io_schedule();
}
set_current_state(TASK_RUNNING);
if (atomic_read(&io.count))
return -EINTR;
if (error_bits)
*error_bits = io.error_bits;
......
......@@ -16,40 +16,29 @@
#define DM_MSG_PREFIX "dirty region log"
struct dm_dirty_log_internal {
struct dm_dirty_log_type *type;
struct list_head list;
long use;
};
static LIST_HEAD(_log_types);
static DEFINE_SPINLOCK(_lock);
static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name)
static struct dm_dirty_log_type *__find_dirty_log_type(const char *name)
{
struct dm_dirty_log_internal *log_type;
struct dm_dirty_log_type *log_type;
list_for_each_entry(log_type, &_log_types, list)
if (!strcmp(name, log_type->type->name))
if (!strcmp(name, log_type->name))
return log_type;
return NULL;
}
static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
static struct dm_dirty_log_type *_get_dirty_log_type(const char *name)
{
struct dm_dirty_log_internal *log_type;
struct dm_dirty_log_type *log_type;
spin_lock(&_lock);
log_type = __find_dirty_log_type(name);
if (log_type) {
if (!log_type->use && !try_module_get(log_type->type->module))
log_type = NULL;
else
log_type->use++;
}
if (log_type && !try_module_get(log_type->module))
log_type = NULL;
spin_unlock(&_lock);
......@@ -76,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name)
static struct dm_dirty_log_type *get_type(const char *type_name)
{
char *p, *type_name_dup;
struct dm_dirty_log_internal *log_type;
struct dm_dirty_log_type *log_type;
if (!type_name)
return NULL;
log_type = _get_dirty_log_type(type_name);
if (log_type)
return log_type->type;
return log_type;
type_name_dup = kstrdup(type_name, GFP_KERNEL);
if (!type_name_dup) {
......@@ -105,56 +94,33 @@ static struct dm_dirty_log_type *get_type(const char *type_name)
kfree(type_name_dup);
return log_type ? log_type->type : NULL;
return log_type;
}
static void put_type(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type;
if (!type)
return;
spin_lock(&_lock);
log_type = __find_dirty_log_type(type->name);
if (!log_type)
if (!__find_dirty_log_type(type->name))
goto out;
if (!--log_type->use)
module_put(type->module);
BUG_ON(log_type->use < 0);
module_put(type->module);
out:
spin_unlock(&_lock);
}
static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type),
GFP_KERNEL);
if (log_type)
log_type->type = type;
return log_type;
}
int dm_dirty_log_type_register(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type);
int r = 0;
if (!log_type)
return -ENOMEM;
spin_lock(&_lock);
if (!__find_dirty_log_type(type->name))
list_add(&log_type->list, &_log_types);
else {
kfree(log_type);
list_add(&type->list, &_log_types);
else
r = -EEXIST;
}
spin_unlock(&_lock);
return r;
......@@ -163,25 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register);
int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type)
{
struct dm_dirty_log_internal *log_type;
spin_lock(&_lock);
log_type = __find_dirty_log_type(type->name);
if (!log_type) {
if (!__find_dirty_log_type(type->name)) {
spin_unlock(&_lock);
return -EINVAL;
}
if (log_type->use) {
spin_unlock(&_lock);
return -ETXTBSY;
}
list_del(&log_type->list);
list_del(&type->list);
spin_unlock(&_lock);
kfree(log_type);
return 0;
}
......
......@@ -17,9 +17,7 @@
struct ps_internal {
struct path_selector_type pst;
struct list_head list;
long use;
};
#define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst)
......@@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name)
down_read(&_ps_lock);
psi = __find_path_selector_type(name);
if (psi) {
if ((psi->use == 0) && !try_module_get(psi->pst.module))
psi = NULL;
else
psi->use++;
}
if (psi && !try_module_get(psi->pst.module))
psi = NULL;
up_read(&_ps_lock);
return psi;
......@@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst)
if (!psi)
goto out;
if (--psi->use == 0)
module_put(psi->pst.module);
BUG_ON(psi->use < 0);
module_put(psi->pst.module);
out:
up_read(&_ps_lock);
}
......@@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst)
return -EINVAL;
}
if (psi->use) {
up_write(&_ps_lock);
return -ETXTBSY;
}
list_del(&psi->list);
up_write(&_ps_lock);
......
......@@ -145,6 +145,8 @@ struct dm_raid1_read_record {
struct dm_bio_details details;
};
static struct kmem_cache *_dm_raid1_read_record_cache;
/*
* Every mirror should look like this one.
*/
......@@ -586,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
int state;
struct bio *bio;
struct bio_list sync, nosync, recover, *this_list = NULL;
struct bio_list requeue;
struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh);
region_t region;
if (!writes->head)
return;
......@@ -596,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_init(&sync);
bio_list_init(&nosync);
bio_list_init(&recover);
bio_list_init(&requeue);
while ((bio = bio_list_pop(writes))) {
state = dm_rh_get_state(ms->rh,
dm_rh_bio_to_region(ms->rh, bio), 1);
region = dm_rh_bio_to_region(ms->rh, bio);
if (log->type->is_remote_recovering &&
log->type->is_remote_recovering(log, region)) {
bio_list_add(&requeue, bio);
continue;
}
state = dm_rh_get_state(ms->rh, region, 1);
switch (state) {
case DM_RH_CLEAN:
case DM_RH_DIRTY:
......@@ -618,6 +631,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
bio_list_add(this_list, bio);
}
/*
* Add bios that are delayed due to remote recovery
* back on to the write queue
*/
if (unlikely(requeue.head)) {
spin_lock_irq(&ms->lock);
bio_list_merge(&ms->writes, &requeue);
spin_unlock_irq(&ms->lock);
}
/*
* Increment the pending counts for any regions that will
* be written to (writes to recover regions are going to
......@@ -764,9 +787,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
len = sizeof(struct dm_raid1_read_record);
ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS,
len);
ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS,
_dm_raid1_read_record_cache);
if (!ms->read_record_pool) {
ti->error = "Error creating mirror read_record_pool";
kfree(ms);
......@@ -1279,16 +1302,31 @@ static int __init dm_mirror_init(void)
{
int r;
_dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0);
if (!_dm_raid1_read_record_cache) {
DMERR("Can't allocate dm_raid1_read_record cache");
r = -ENOMEM;
goto bad_cache;
}
r = dm_register_target(&mirror_target);
if (r < 0)
if (r < 0) {
DMERR("Failed to register mirror target");
goto bad_target;
}
return 0;
bad_target:
kmem_cache_destroy(_dm_raid1_read_record_cache);
bad_cache:
return r;
}
static void __exit dm_mirror_exit(void)
{
dm_unregister_target(&mirror_target);
kmem_cache_destroy(_dm_raid1_read_record_cache);
}
/* Module hooks */
......
......@@ -6,7 +6,6 @@
*/
#include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
......@@ -89,7 +88,7 @@ struct commit_callback {
* The top level structure for a persistent exception store.
*/
struct pstore {
struct dm_snapshot *snap; /* up pointer to my snapshot */
struct dm_exception_store *store;
int version;
int valid;
uint32_t exceptions_per_area;
......@@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps)
int r = -ENOMEM;
size_t len;
len = ps->snap->chunk_size << SECTOR_SHIFT;
len = ps->store->chunk_size << SECTOR_SHIFT;
/*
* Allocate the chunk_size block of memory that will hold
......@@ -163,9 +162,12 @@ static int alloc_area(struct pstore *ps)
static void free_area(struct pstore *ps)
{
vfree(ps->area);
if (ps->area)
vfree(ps->area);
ps->area = NULL;
vfree(ps->zero_area);
if (ps->zero_area)
vfree(ps->zero_area);
ps->zero_area = NULL;
}
......@@ -189,9 +191,9 @@ static void do_metadata(struct work_struct *work)
static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * chunk,
.count = ps->snap->chunk_size,
.bdev = ps->store->cow->bdev,
.sector = ps->store->chunk_size * chunk,
.count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = rw,
......@@ -247,15 +249,15 @@ static int area_io(struct pstore *ps, int rw)
static void zero_memory_area(struct pstore *ps)
{
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
}
static int zero_disk_area(struct pstore *ps, chunk_t area)
{
struct dm_io_region where = {
.bdev = ps->snap->cow->bdev,
.sector = ps->snap->chunk_size * area_location(ps, area),
.count = ps->snap->chunk_size,
.bdev = ps->store->cow->bdev,
.sector = ps->store->chunk_size * area_location(ps, area),
.count = ps->store->chunk_size,
};
struct dm_io_request io_req = {
.bi_rw = WRITE,
......@@ -278,15 +280,15 @@ static int read_header(struct pstore *ps, int *new_snapshot)
/*
* Use default chunk size (or hardsect_size, if larger) if none supplied
*/
if (!ps->snap->chunk_size) {
ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->snap->cow->bdev) >> 9);
ps->snap->chunk_mask = ps->snap->chunk_size - 1;
ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1;
if (!ps->store->chunk_size) {
ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS,
bdev_hardsect_size(ps->store->cow->bdev) >> 9);
ps->store->chunk_mask = ps->store->chunk_size - 1;
ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1;
chunk_size_supplied = 0;
}
ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
ps->io_client = dm_io_client_create(sectors_to_pages(ps->store->
chunk_size));
if (IS_ERR(ps->io_client))
return PTR_ERR(ps->io_client);
......@@ -317,22 +319,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
ps->version = le32_to_cpu(dh->version);
chunk_size = le32_to_cpu(dh->chunk_size);
if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size)
if (!chunk_size_supplied || ps->store->chunk_size == chunk_size)
return 0;
DMWARN("chunk size %llu in device metadata overrides "
"table chunk size of %llu.",
(unsigned long long)chunk_size,
(unsigned long long)ps->snap->chunk_size);
(unsigned long long)ps->store->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
free_area(ps);
ps->snap->chunk_size = chunk_size;
ps->snap->chunk_mask = chunk_size - 1;
ps->snap->chunk_shift = ffs(chunk_size) - 1;
ps->store->chunk_size = chunk_size;
ps->store->chunk_mask = chunk_size - 1;
ps->store->chunk_shift = ffs(chunk_size) - 1;
r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size),
ps->io_client);
if (r)
return r;
......@@ -349,13 +351,13 @@ static int write_header(struct pstore *ps)
{
struct disk_header *dh;
memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT);
dh = (struct disk_header *) ps->area;
dh->magic = cpu_to_le32(SNAP_MAGIC);
dh->valid = cpu_to_le32(ps->valid);
dh->version = cpu_to_le32(ps->version);
dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
dh->chunk_size = cpu_to_le32(ps->store->chunk_size);
return chunk_io(ps, 0, WRITE, 1);
}
......@@ -474,18 +476,25 @@ static struct pstore *get_info(struct dm_exception_store *store)
static void persistent_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = get_info(store)->next_free * store->snap->chunk_size;
*denominator = get_dev_size(store->snap->cow->bdev);
*numerator = get_info(store)->next_free * store->chunk_size;
*denominator = get_dev_size(store->cow->bdev);
}
static void persistent_destroy(struct dm_exception_store *store)
static void persistent_dtr(struct dm_exception_store *store)
{
struct pstore *ps = get_info(store);
destroy_workqueue(ps->metadata_wq);
dm_io_client_destroy(ps->io_client);
vfree(ps->callbacks);
/* Created in read_header */
if (ps->io_client)
dm_io_client_destroy(ps->io_client);
free_area(ps);
/* Allocated in persistent_read_metadata */
if (ps->callbacks)
vfree(ps->callbacks);
kfree(ps);
}
......@@ -507,7 +516,7 @@ static int persistent_read_metadata(struct dm_exception_store *store,
/*
* Now we know correct chunk_size, complete the initialisation.
*/
ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) /
sizeof(struct disk_exception);
ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
sizeof(*ps->callbacks));
......@@ -564,10 +573,10 @@ static int persistent_prepare_exception(struct dm_exception_store *store,
struct pstore *ps = get_info(store);
uint32_t stride;
chunk_t next_free;
sector_t size = get_dev_size(store->snap->cow->bdev);
sector_t size = get_dev_size(store->cow->bdev);
/* Is there enough room ? */
if (size < ((ps->next_free + 1) * store->snap->chunk_size))
if (size < ((ps->next_free + 1) * store->chunk_size))
return -ENOSPC;
e->new_chunk = ps->next_free;
......@@ -656,16 +665,17 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
DMWARN("write header failed");
}
int dm_create_persistent(struct dm_exception_store *store)
static int persistent_ctr(struct dm_exception_store *store,
unsigned argc, char **argv)
{
struct pstore *ps;
/* allocate the pstore */
ps = kmalloc(sizeof(*ps), GFP_KERNEL);
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
if (!ps)
return -ENOMEM;
ps->snap = store->snap;
ps->store = store;
ps->valid = 1;
ps->version = SNAPSHOT_DISK_VERSION;
ps->area = NULL;
......@@ -683,22 +693,77 @@ int dm_create_persistent(struct dm_exception_store *store)
return -ENOMEM;
}
store->destroy = persistent_destroy;
store->read_metadata = persistent_read_metadata;
store->prepare_exception = persistent_prepare_exception;
store->commit_exception = persistent_commit_exception;
store->drop_snapshot = persistent_drop_snapshot;
store->fraction_full = persistent_fraction_full;
store->context = ps;
return 0;
}
static unsigned persistent_status(struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen)
{
unsigned sz = 0;
switch (status) {
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
DMEMIT(" %s P %llu", store->cow->name,
(unsigned long long)store->chunk_size);
}
return sz;
}
static struct dm_exception_store_type _persistent_type = {
.name = "persistent",
.module = THIS_MODULE,
.ctr = persistent_ctr,
.dtr = persistent_dtr,
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
.drop_snapshot = persistent_drop_snapshot,
.fraction_full = persistent_fraction_full,
.status = persistent_status,
};
static struct dm_exception_store_type _persistent_compat_type = {
.name = "P",
.module = THIS_MODULE,
.ctr = persistent_ctr,
.dtr = persistent_dtr,
.read_metadata = persistent_read_metadata,
.prepare_exception = persistent_prepare_exception,
.commit_exception = persistent_commit_exception,
.drop_snapshot = persistent_drop_snapshot,
.fraction_full = persistent_fraction_full,
.status = persistent_status,
};
int dm_persistent_snapshot_init(void)
{
return 0;
int r;
r = dm_exception_store_type_register(&_persistent_type);
if (r) {
DMERR("Unable to register persistent exception store type");
return r;
}
r = dm_exception_store_type_register(&_persistent_compat_type);
if (r) {
DMERR("Unable to register old-style persistent exception "
"store type");
dm_exception_store_type_unregister(&_persistent_type);
return r;
}
return r;
}
void dm_persistent_snapshot_exit(void)
{
dm_exception_store_type_unregister(&_persistent_type);
dm_exception_store_type_unregister(&_persistent_compat_type);
}
......@@ -6,7 +6,6 @@
*/
#include "dm-exception-store.h"
#include "dm-snap.h"
#include <linux/mm.h>
#include <linux/pagemap.h>
......@@ -23,7 +22,7 @@ struct transient_c {
sector_t next_free;
};
static void transient_destroy(struct dm_exception_store *store)
static void transient_dtr(struct dm_exception_store *store)
{
kfree(store->context);
}
......@@ -39,14 +38,14 @@ static int transient_read_metadata(struct dm_exception_store *store,
static int transient_prepare_exception(struct dm_exception_store *store,
struct dm_snap_exception *e)
{
struct transient_c *tc = (struct transient_c *) store->context;
sector_t size = get_dev_size(store->snap->cow->bdev);
struct transient_c *tc = store->context;
sector_t size = get_dev_size(store->cow->bdev);
if (size < (tc->next_free + store->snap->chunk_size))
if (size < (tc->next_free + store->chunk_size))
return -1;
e->new_chunk = sector_to_chunk(store->snap, tc->next_free);
tc->next_free += store->snap->chunk_size;
e->new_chunk = sector_to_chunk(store, tc->next_free);
tc->next_free += store->chunk_size;
return 0;
}
......@@ -64,20 +63,14 @@ static void transient_fraction_full(struct dm_exception_store *store,
sector_t *numerator, sector_t *denominator)
{
*numerator = ((struct transient_c *) store->context)->next_free;
*denominator = get_dev_size(store->snap->cow->bdev);
*denominator = get_dev_size(store->cow->bdev);
}
int dm_create_transient(struct dm_exception_store *store)
static int transient_ctr(struct dm_exception_store *store,
unsigned argc, char **argv)
{
struct transient_c *tc;
store->destroy = transient_destroy;
store->read_metadata = transient_read_metadata;
store->prepare_exception = transient_prepare_exception;
store->commit_exception = transient_commit_exception;
store->drop_snapshot = NULL;
store->fraction_full = transient_fraction_full;
tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL);
if (!tc)
return -ENOMEM;
......@@ -88,11 +81,70 @@ int dm_create_transient(struct dm_exception_store *store)
return 0;
}
static unsigned transient_status(struct dm_exception_store *store,
status_type_t status, char *result,
unsigned maxlen)
{
unsigned sz = 0;
switch (status) {
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
DMEMIT(" %s N %llu", store->cow->name,
(unsigned long long)store->chunk_size);
}
return sz;
}
static struct dm_exception_store_type _transient_type = {
.name = "transient",
.module = THIS_MODULE,
.ctr = transient_ctr,
.dtr = transient_dtr,
.read_metadata = transient_read_metadata,
.prepare_exception = transient_prepare_exception,
.commit_exception = transient_commit_exception,
.fraction_full = transient_fraction_full,
.status = transient_status,
};
static struct dm_exception_store_type _transient_compat_type = {
.name = "N",
.module = THIS_MODULE,
.ctr = transient_ctr,
.dtr = transient_dtr,
.read_metadata = transient_read_metadata,
.prepare_exception = transient_prepare_exception,
.commit_exception = transient_commit_exception,
.fraction_full = transient_fraction_full,
.status = transient_status,
};
int dm_transient_snapshot_init(void)
{
return 0;
int r;
r = dm_exception_store_type_register(&_transient_type);
if (r) {
DMWARN("Unable to register transient exception store type");
return r;
}
r = dm_exception_store_type_register(&_transient_compat_type);
if (r) {
DMWARN("Unable to register old-style transient "
"exception store type");
dm_exception_store_type_unregister(&_transient_type);
return r;
}
return r;
}
void dm_transient_snapshot_exit(void)
{
dm_exception_store_type_unregister(&_transient_type);
dm_exception_store_type_unregister(&_transient_compat_type);
}
This diff is collapsed.
/*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
*
* This file is released under the GPL.
*/
#ifndef DM_SNAPSHOT_H
#define DM_SNAPSHOT_H
#include <linux/device-mapper.h>
#include "dm-exception-store.h"
#include "dm-bio-list.h"
#include <linux/blkdev.h>
#include <linux/workqueue.h>
struct exception_table {
uint32_t hash_mask;
unsigned hash_shift;
struct list_head *table;
};
#define DM_TRACKED_CHUNK_HASH_SIZE 16
#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \
(DM_TRACKED_CHUNK_HASH_SIZE - 1))
struct dm_snapshot {
struct rw_semaphore lock;
struct dm_target *ti;
struct dm_dev *origin;
struct dm_dev *cow;
/* List of snapshots per Origin */
struct list_head list;
/* Size of data blocks saved - must be a power of 2 */
chunk_t chunk_size;
chunk_t chunk_mask;
chunk_t chunk_shift;
/* You can't use a snapshot if this is 0 (e.g. if full) */
int valid;
/* Origin writes don't trigger exceptions until this is set */
int active;
/* Used for display of table */
char type;
mempool_t *pending_pool;
atomic_t pending_exceptions_count;
struct exception_table pending;
struct exception_table complete;
/*
* pe_lock protects all pending_exception operations and access
* as well as the snapshot_bios list.
*/
spinlock_t pe_lock;
/* The on disk metadata handler */
struct dm_exception_store store;
struct dm_kcopyd_client *kcopyd_client;
/* Queue of snapshot writes for ksnapd to flush */
struct bio_list queued_bios;
struct work_struct queued_bios_work;
/* Chunks with outstanding reads */
mempool_t *tracked_chunk_pool;
spinlock_t tracked_chunk_lock;
struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE];
};
/*
* Return the number of sectors in the device.
*/
static inline sector_t get_dev_size(struct block_device *bdev)
{
return bdev->bd_inode->i_size >> SECTOR_SHIFT;
}
static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector)
{
return (sector & ~s->chunk_mask) >> s->chunk_shift;
}
static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk)
{
return chunk << s->chunk_shift;
}
static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs)
{
/*
* There is only ever one instance of a particular block
* device so we can compare pointers safely.
*/
return lhs == rhs;
}
#endif
......@@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start,
}
/*
* This upgrades the mode on an already open dm_dev. Being
* This upgrades the mode on an already open dm_dev, being
* careful to leave things as they were if we fail to reopen the
* device.
* device and not to touch the existing bdev field in case
* it is accessed concurrently inside dm_table_any_congested().
*/
static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode,
struct mapped_device *md)
{
int r;
struct dm_dev_internal dd_copy;
dev_t dev = dd->dm_dev.bdev->bd_dev;
struct dm_dev_internal dd_new, dd_old;
dd_copy = *dd;
dd_new = dd_old = *dd;
dd_new.dm_dev.mode |= new_mode;
dd_new.dm_dev.bdev = NULL;
r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md);
if (r)
return r;
dd->dm_dev.mode |= new_mode;
dd->dm_dev.bdev = NULL;
r = open_dev(dd, dev, md);
if (!r)
close_dev(&dd_copy, md);
else
*dd = dd_copy;
close_dev(&dd_old, md);
return r;
return 0;
}
/*
......
......@@ -14,45 +14,34 @@
#define DM_MSG_PREFIX "target"
struct tt_internal {
struct target_type tt;
struct list_head list;
long use;
};
static LIST_HEAD(_targets);
static DECLARE_RWSEM(_lock);
#define DM_MOD_NAME_SIZE 32
static inline struct tt_internal *__find_target_type(const char *name)
static inline struct target_type *__find_target_type(const char *name)
{
struct tt_internal *ti;
struct target_type *tt;
list_for_each_entry (ti, &_targets, list)
if (!strcmp(name, ti->tt.name))
return ti;
list_for_each_entry(tt, &_targets, list)
if (!strcmp(name, tt->name))
return tt;
return NULL;
}
static struct tt_internal *get_target_type(const char *name)
static struct target_type *get_target_type(const char *name)
{
struct tt_internal *ti;
struct target_type *tt;
down_read(&_lock);
ti = __find_target_type(name);
if (ti) {
if ((ti->use == 0) && !try_module_get(ti->tt.module))
ti = NULL;
else
ti->use++;
}
tt = __find_target_type(name);
if (tt && !try_module_get(tt->module))
tt = NULL;
up_read(&_lock);
return ti;
return tt;
}
static void load_module(const char *name)
......@@ -62,92 +51,59 @@ static void load_module(const char *name)
struct target_type *dm_get_target_type(const char *name)
{
struct tt_internal *ti = get_target_type(name);
struct target_type *tt = get_target_type(name);
if (!ti) {
if (!tt) {
load_module(name);
ti = get_target_type(name);
tt = get_target_type(name);
}
return ti ? &ti->tt : NULL;
return tt;
}
void dm_put_target_type(struct target_type *t)
void dm_put_target_type(struct target_type *tt)
{
struct tt_internal *ti = (struct tt_internal *) t;
down_read(&_lock);
if (--ti->use == 0)
module_put(ti->tt.module);
BUG_ON(ti->use < 0);
module_put(tt->module);
up_read(&_lock);
return;
}
static struct tt_internal *alloc_target(struct target_type *t)
{
struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL);
if (ti)
ti->tt = *t;
return ti;
}
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param)
{
struct tt_internal *ti;
struct target_type *tt;
down_read(&_lock);
list_for_each_entry (ti, &_targets, list)
iter_func(&ti->tt, param);
list_for_each_entry(tt, &_targets, list)
iter_func(tt, param);
up_read(&_lock);
return 0;
}
int dm_register_target(struct target_type *t)
int dm_register_target(struct target_type *tt)
{
int rv = 0;
struct tt_internal *ti = alloc_target(t);
if (!ti)
return -ENOMEM;
down_write(&_lock);
if (__find_target_type(t->name))
if (__find_target_type(tt->name))
rv = -EEXIST;
else
list_add(&ti->list, &_targets);
list_add(&tt->list, &_targets);
up_write(&_lock);
if (rv)
kfree(ti);
return rv;
}
void dm_unregister_target(struct target_type *t)
void dm_unregister_target(struct target_type *tt)
{
struct tt_internal *ti;
down_write(&_lock);
if (!(ti = __find_target_type(t->name))) {
DMCRIT("Unregistering unrecognised target: %s", t->name);
BUG();
}
if (ti->use) {
DMCRIT("Attempt to unregister target still in use: %s",
t->name);
if (!__find_target_type(tt->name)) {
DMCRIT("Unregistering unrecognised target: %s", tt->name);
BUG();
}
list_del(&ti->list);
kfree(ti);
list_del(&tt->list);
up_write(&_lock);
}
......@@ -156,17 +112,17 @@ void dm_unregister_target(struct target_type *t)
* io-err: always fails an io, useful for bringing
* up LVs that have holes in them.
*/
static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args)
static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args)
{
return 0;
}
static void io_err_dtr(struct dm_target *ti)
static void io_err_dtr(struct dm_target *tt)
{
/* empty */
}
static int io_err_map(struct dm_target *ti, struct bio *bio,
static int io_err_map(struct dm_target *tt, struct bio *bio,
union map_info *map_context)
{
return -EIO;
......
......@@ -99,19 +99,9 @@ union map_info *dm_get_mapinfo(struct bio *bio)
/*
* Work processed by per-device workqueue.
*/
struct dm_wq_req {
enum {
DM_WQ_FLUSH_DEFERRED,
} type;
struct work_struct work;
struct mapped_device *md;
void *context;
};
struct mapped_device {
struct rw_semaphore io_lock;
struct mutex suspend_lock;
spinlock_t pushback_lock;
rwlock_t map_lock;
atomic_t holders;
atomic_t open_count;
......@@ -129,8 +119,9 @@ struct mapped_device {
*/
atomic_t pending;
wait_queue_head_t wait;
struct work_struct work;
struct bio_list deferred;
struct bio_list pushback;
spinlock_t deferred_lock;
/*
* Processing queue (flush/barriers)
......@@ -453,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio)
return 1;
}
spin_lock_irq(&md->deferred_lock);
bio_list_add(&md->deferred, bio);
spin_unlock_irq(&md->deferred_lock);
up_write(&md->io_lock);
return 0; /* deferred successfully */
......@@ -537,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error)
if (io->error == DM_ENDIO_REQUEUE) {
/*
* Target requested pushing back the I/O.
* This must be handled before the sleeper on
* suspend queue merges the pushback list.
*/
spin_lock_irqsave(&md->pushback_lock, flags);
spin_lock_irqsave(&md->deferred_lock, flags);
if (__noflush_suspending(md))
bio_list_add(&md->pushback, io->bio);
bio_list_add(&md->deferred, io->bio);
else
/* noflush suspend was interrupted. */
io->error = -EIO;
spin_unlock_irqrestore(&md->pushback_lock, flags);
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
end_io_acct(io);
......@@ -834,20 +825,22 @@ static int __clone_and_map(struct clone_info *ci)
}
/*
* Split the bio into several clones.
* Split the bio into several clones and submit it to targets.
*/
static int __split_bio(struct mapped_device *md, struct bio *bio)
static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
{
struct clone_info ci;
int error = 0;
ci.map = dm_get_table(md);
if (unlikely(!ci.map))
return -EIO;
if (unlikely(!ci.map)) {
bio_io_error(bio);
return;
}
if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) {
dm_table_put(ci.map);
bio_endio(bio, -EOPNOTSUPP);
return 0;
return;
}
ci.md = md;
ci.bio = bio;
......@@ -867,8 +860,6 @@ static int __split_bio(struct mapped_device *md, struct bio *bio)
/* drop the extra reference count */
dec_pending(ci.io, error);
dm_table_put(ci.map);
return 0;
}
/*-----------------------------------------------------------------
* CRUD END
......@@ -959,8 +950,9 @@ static int dm_request(struct request_queue *q, struct bio *bio)
down_read(&md->io_lock);
}
r = __split_bio(md, bio);
__split_and_process_bio(md, bio);
up_read(&md->io_lock);
return 0;
out_req:
if (r < 0)
......@@ -1074,6 +1066,8 @@ static int next_free_minor(int *minor)
static struct block_device_operations dm_blk_dops;
static void dm_wq_work(struct work_struct *work);
/*
* Allocate and initialise a blank device with a given minor.
*/
......@@ -1101,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor)
init_rwsem(&md->io_lock);
mutex_init(&md->suspend_lock);
spin_lock_init(&md->pushback_lock);
spin_lock_init(&md->deferred_lock);
rwlock_init(&md->map_lock);
atomic_set(&md->holders, 1);
atomic_set(&md->open_count, 0);
......@@ -1118,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor)
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
blk_queue_make_request(md->queue, dm_request);
blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
md->queue->unplug_fn = dm_unplug_all;
blk_queue_merge_bvec(md->queue, dm_merge_bvec);
......@@ -1140,6 +1135,7 @@ static struct mapped_device *alloc_dev(int minor)
atomic_set(&md->pending, 0);
init_waitqueue_head(&md->wait);
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
md->disk->major = _major;
......@@ -1379,18 +1375,24 @@ void dm_put(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_put);
static int dm_wait_for_completion(struct mapped_device *md)
static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
{
int r = 0;
DECLARE_WAITQUEUE(wait, current);
dm_unplug_all(md->queue);
add_wait_queue(&md->wait, &wait);
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
set_current_state(interruptible);
smp_mb();
if (!atomic_read(&md->pending))
break;
if (signal_pending(current)) {
if (interruptible == TASK_INTERRUPTIBLE &&
signal_pending(current)) {
r = -EINTR;
break;
}
......@@ -1399,67 +1401,40 @@ static int dm_wait_for_completion(struct mapped_device *md)
}
set_current_state(TASK_RUNNING);
remove_wait_queue(&md->wait, &wait);
return r;
}
/*
* Process the deferred bios
*/
static void __flush_deferred_io(struct mapped_device *md)
static void dm_wq_work(struct work_struct *work)
{
struct mapped_device *md = container_of(work, struct mapped_device,
work);
struct bio *c;
while ((c = bio_list_pop(&md->deferred))) {
if (__split_bio(md, c))
bio_io_error(c);
}
clear_bit(DMF_BLOCK_IO, &md->flags);
}
down_write(&md->io_lock);
static void __merge_pushback_list(struct mapped_device *md)
{
unsigned long flags;
next_bio:
spin_lock_irq(&md->deferred_lock);
c = bio_list_pop(&md->deferred);
spin_unlock_irq(&md->deferred_lock);
spin_lock_irqsave(&md->pushback_lock, flags);
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
bio_list_merge_head(&md->deferred, &md->pushback);
bio_list_init(&md->pushback);
spin_unlock_irqrestore(&md->pushback_lock, flags);
}
if (c) {
__split_and_process_bio(md, c);
goto next_bio;
}
static void dm_wq_work(struct work_struct *work)
{
struct dm_wq_req *req = container_of(work, struct dm_wq_req, work);
struct mapped_device *md = req->md;
clear_bit(DMF_BLOCK_IO, &md->flags);
down_write(&md->io_lock);
switch (req->type) {
case DM_WQ_FLUSH_DEFERRED:
__flush_deferred_io(md);
break;
default:
DMERR("dm_wq_work: unrecognised work type %d", req->type);
BUG();
}
up_write(&md->io_lock);
}
static void dm_wq_queue(struct mapped_device *md, int type, void *context,
struct dm_wq_req *req)
{
req->type = type;
req->md = md;
req->context = context;
INIT_WORK(&req->work, dm_wq_work);
queue_work(md->wq, &req->work);
}
static void dm_queue_flush(struct mapped_device *md, int type, void *context)
static void dm_queue_flush(struct mapped_device *md)
{
struct dm_wq_req req;
dm_wq_queue(md, type, context, &req);
queue_work(md->wq, &md->work);
flush_workqueue(md->wq);
}
......@@ -1534,7 +1509,6 @@ static void unlock_fs(struct mapped_device *md)
int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
{
struct dm_table *map = NULL;
DECLARE_WAITQUEUE(wait, current);
int r = 0;
int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0;
int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0;
......@@ -1584,28 +1558,22 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
down_write(&md->io_lock);
set_bit(DMF_BLOCK_IO, &md->flags);
add_wait_queue(&md->wait, &wait);
up_write(&md->io_lock);
/* unplug */
if (map)
dm_table_unplug_all(map);
/*
* Wait for the already-mapped ios to complete.
*/
r = dm_wait_for_completion(md);
r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE);
down_write(&md->io_lock);
remove_wait_queue(&md->wait, &wait);
if (noflush)
__merge_pushback_list(md);
clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
up_write(&md->io_lock);
/* were we interrupted ? */
if (r < 0) {
dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
dm_queue_flush(md);
unlock_fs(md);
goto out; /* pushback list is already flushed, so skip flush */
......@@ -1645,7 +1613,7 @@ int dm_resume(struct mapped_device *md)
if (r)
goto out;
dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL);
dm_queue_flush(md);
unlock_fs(md);
......
......@@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t);
int dm_target_init(void);
void dm_target_exit(void);
struct target_type *dm_get_target_type(const char *name);
void dm_put_target_type(struct target_type *t);
void dm_put_target_type(struct target_type *tt);
int dm_target_iterate(void (*iter_func)(struct target_type *tt,
void *param), void *param);
......
......@@ -139,6 +139,9 @@ struct target_type {
dm_ioctl_fn ioctl;
dm_merge_fn merge;
dm_busy_fn busy;
/* For internal device-mapper use. */
struct list_head list;
};
struct io_restrictions {
......
......@@ -28,6 +28,9 @@ struct dm_dirty_log_type {
const char *name;
struct module *module;
/* For internal device-mapper use */
struct list_head list;
int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti,
unsigned argc, char **argv);
void (*dtr)(struct dm_dirty_log *log);
......@@ -113,6 +116,16 @@ struct dm_dirty_log_type {
*/
int (*status)(struct dm_dirty_log *log, status_type_t status_type,
char *result, unsigned maxlen);
/*
* is_remote_recovering is necessary for cluster mirroring. It provides
* a way to detect recovery on another node, so we aren't writing
* concurrently. This function is likely to block (when a cluster log
* is used).
*
* Returns: 0, 1
*/
int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region);
};
int dm_dirty_log_type_register(struct dm_dirty_log_type *type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment