Commit bc0eea33 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md 17 of 22 - Strengthen the locking of mddev.

Strengthen the locking of mddev.

mddev is only ever locked in md.c, so we move {,un}lock_mddev
out of the header and into md.c, and rename to mddev_{,un}lock
for consistancy with mddev_{get,put,find}.

When building arrays (typically at boot time) we now lock, and unlock
as it is the "right" thing to do.  The lock should never fail.

When generating /proc/mdstat, we lock each array before inspecting it.

In md_ioctl, we lock the mddev early and unlock at the end, rather than
locking in two different places.

In md_open we make sure we can get a lock before completing the open.  This
ensures that we sync with do_md_stop properly.

In md_do_recovery, we lock each mddev before checking it's status.

md_do_recovery must unlock while recovery happens, and a do_md_stop at this
point will deadlock when md_do_recovery tries to regain the lock.  This will be
fixed in a later patch.
parent ee3208bf
...@@ -185,6 +185,21 @@ static mddev_t * mddev_find(int unit) ...@@ -185,6 +185,21 @@ static mddev_t * mddev_find(int unit)
return mddev; return mddev;
} }
static inline int mddev_lock(mddev_t * mddev)
{
return down_interruptible(&mddev->reconfig_sem);
}
static inline int mddev_trylock(mddev_t * mddev)
{
return down_trylock(&mddev->reconfig_sem);
}
static inline void mddev_unlock(mddev_t * mddev)
{
up(&mddev->reconfig_sem);
}
mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
{ {
mdk_rdev_t * rdev; mdk_rdev_t * rdev;
...@@ -1838,24 +1853,29 @@ static void autorun_devices(void) ...@@ -1838,24 +1853,29 @@ static void autorun_devices(void)
mddev = mddev_find(rdev0->sb->md_minor); mddev = mddev_find(rdev0->sb->md_minor);
if (!mddev) { if (!mddev) {
printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); printk(KERN_ERR "md: cannot allocate memory for md drive.\n");
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
export_rdev(rdev);
break; break;
} }
if (mddev->sb || !list_empty(&mddev->disks)) { if (mddev_lock(mddev))
printk(KERN_WARNING "md: md%d locked, cannot run\n",
mdidx(mddev));
else if (mddev->sb || !list_empty(&mddev->disks)) {
printk(KERN_WARNING "md: md%d already running, cannot run %s\n", printk(KERN_WARNING "md: md%d already running, cannot run %s\n",
mdidx(mddev), partition_name(rdev0->dev)); mdidx(mddev), partition_name(rdev0->dev));
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) mddev_unlock(mddev);
export_rdev(rdev); } else {
mddev_put(mddev); printk(KERN_INFO "md: created md%d\n", mdidx(mddev));
continue; ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
} bind_rdev_to_array(rdev, mddev);
printk(KERN_INFO "md: created md%d\n", mdidx(mddev)); list_del_init(&rdev->pending);
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { }
bind_rdev_to_array(rdev, mddev); autorun_array(mddev);
list_del_init(&rdev->pending); mddev_unlock(mddev);
} }
autorun_array(mddev); /* on success, candidates will be empty, on error
* it wont...
*/
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
export_rdev(rdev);
mddev_put(mddev); mddev_put(mddev);
} }
printk(KERN_INFO "md: ... autorun DONE.\n"); printk(KERN_INFO "md: ... autorun DONE.\n");
...@@ -2453,7 +2473,7 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2453,7 +2473,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
case PRINT_RAID_DEBUG: case PRINT_RAID_DEBUG:
err = 0; err = 0;
md_print_devices(); md_print_devices();
goto done_unlock; goto done;
#ifndef MODULE #ifndef MODULE
case RAID_AUTORUN: case RAID_AUTORUN:
...@@ -2497,19 +2517,17 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2497,19 +2517,17 @@ static int md_ioctl(struct inode *inode, struct file *file,
goto abort; goto abort;
} }
err = mddev_lock(mddev);
if (err) {
printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",
err, cmd);
goto abort;
}
switch (cmd) switch (cmd)
{ {
case SET_ARRAY_INFO: case SET_ARRAY_INFO:
/*
* alloc_mddev() should possibly self-lock.
*/
err = lock_mddev(mddev);
if (err) {
printk(KERN_WARNING "md: ioctl, reason %d, cmd %d\n",
err, cmd);
goto abort;
}
if (!list_empty(&mddev->disks)) { if (!list_empty(&mddev->disks)) {
printk(KERN_WARNING "md: array md%d already has disks!\n", printk(KERN_WARNING "md: array md%d already has disks!\n",
mdidx(mddev)); mdidx(mddev));
...@@ -2544,9 +2562,9 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2544,9 +2562,9 @@ static int md_ioctl(struct inode *inode, struct file *file,
if (err) { if (err) {
printk(KERN_WARNING "md: autostart %s failed!\n", printk(KERN_WARNING "md: autostart %s failed!\n",
partition_name(val_to_kdev(arg))); partition_name(val_to_kdev(arg)));
goto abort; goto abort_unlock;
} }
goto done; goto done_unlock;
default:; default:;
} }
...@@ -2554,12 +2572,6 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2554,12 +2572,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
/* /*
* Commands querying/configuring an existing array: * Commands querying/configuring an existing array:
*/ */
err = lock_mddev(mddev);
if (err) {
printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",err, cmd);
goto abort;
}
/* if we don't have a superblock yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ /* if we don't have a superblock yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */
if (!mddev->sb && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { if (!mddev->sb && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) {
err = -ENODEV; err = -ENODEV;
...@@ -2678,7 +2690,7 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2678,7 +2690,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
done_unlock: done_unlock:
abort_unlock: abort_unlock:
unlock_mddev(mddev); mddev_unlock(mddev);
return err; return err;
done: done:
...@@ -2694,12 +2706,21 @@ static int md_open(struct inode *inode, struct file *file) ...@@ -2694,12 +2706,21 @@ static int md_open(struct inode *inode, struct file *file)
* Succeed if we can find or allocate a mddev structure. * Succeed if we can find or allocate a mddev structure.
*/ */
mddev_t *mddev = mddev_find(minor(inode->i_rdev)); mddev_t *mddev = mddev_find(minor(inode->i_rdev));
int err = -ENOMEM;
if (mddev) { if (!mddev)
inode->i_bdev->bd_inode->u.generic_ip = mddev; goto out;
return 0; /* and we "own" a reference */
} else if ((err = mddev_lock(mddev)))
return -ENOMEM; goto put;
err = 0;
mddev_unlock(mddev);
inode->i_bdev->bd_inode->u.generic_ip = mddev_get(mddev);
put:
mddev_put(mddev);
out:
return err;
} }
static int md_release(struct inode *inode, struct file * file) static int md_release(struct inode *inode, struct file * file)
...@@ -2987,7 +3008,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -2987,7 +3008,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
sz += sprintf(page+sz, "\n"); sz += sprintf(page+sz, "\n");
ITERATE_MDDEV(mddev,tmp) { ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev), sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev),
mddev->pers ? "" : "in"); mddev->pers ? "" : "in");
if (mddev->pers) { if (mddev->pers) {
...@@ -3017,6 +3038,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -3017,6 +3038,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
if (!mddev->pers) { if (!mddev->pers) {
sz += sprintf(page+sz, "\n"); sz += sprintf(page+sz, "\n");
mddev_unlock(mddev);
continue; continue;
} }
...@@ -3030,6 +3052,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -3030,6 +3052,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
sz += sprintf(page + sz, " resync=DELAYED"); sz += sprintf(page + sz, " resync=DELAYED");
} }
sz += sprintf(page + sz, "\n"); sz += sprintf(page + sz, "\n");
mddev_unlock(mddev);
} }
sz += status_unused(page + sz); sz += status_unused(page + sz);
...@@ -3306,36 +3329,38 @@ void md_do_recovery(void *data) ...@@ -3306,36 +3329,38 @@ void md_do_recovery(void *data)
mdp_disk_t *spare; mdp_disk_t *spare;
struct list_head *tmp; struct list_head *tmp;
printk(KERN_INFO "md: recovery thread got woken up ...\n"); dprintk(KERN_INFO "md: recovery thread got woken up ...\n");
restart:
ITERATE_MDDEV(mddev,tmp) { ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
sb = mddev->sb; sb = mddev->sb;
if (!sb) if (!sb)
continue; goto unlock;
if (mddev->recovery_running) if (mddev->recovery_running)
continue; goto unlock;
if (sb->active_disks == sb->raid_disks) if (sb->active_disks == sb->raid_disks)
continue; goto unlock;
if (!sb->spare_disks) { if (!sb->spare_disks) {
printk(KERN_ERR "md%d: no spare disk to reconstruct array! " printk(KERN_ERR "md%d: no spare disk to reconstruct array! "
"-- continuing in degraded mode\n", mdidx(mddev)); "-- continuing in degraded mode\n", mdidx(mddev));
continue; goto unlock;
} }
/* /*
* now here we get the spare and resync it. * now here we get the spare and resync it.
*/ */
spare = get_spare(mddev); spare = get_spare(mddev);
if (!spare) if (!spare)
continue; goto unlock;
printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n",
mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor)));
if (!mddev->pers->diskop) if (!mddev->pers->diskop)
continue; goto unlock;
if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE))
continue; goto unlock;
down(&mddev->recovery_sem); down(&mddev->recovery_sem);
mddev->recovery_running = 1; mddev->recovery_running = 1;
mddev_unlock(mddev);
err = md_do_sync(mddev, spare); err = md_do_sync(mddev, spare);
mddev_lock(mddev); /* FIXME this can fail or deadlock with do_md_close */
if (err == -EIO) { if (err == -EIO) {
printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n", printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n",
mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor)));
...@@ -3361,7 +3386,7 @@ void md_do_recovery(void *data) ...@@ -3361,7 +3386,7 @@ void md_do_recovery(void *data)
DISKOP_SPARE_INACTIVE); DISKOP_SPARE_INACTIVE);
up(&mddev->recovery_sem); up(&mddev->recovery_sem);
mddev->recovery_running = 0; mddev->recovery_running = 0;
continue; goto unlock;
} else { } else {
mddev->recovery_running = 0; mddev->recovery_running = 0;
up(&mddev->recovery_sem); up(&mddev->recovery_sem);
...@@ -3379,9 +3404,10 @@ void md_do_recovery(void *data) ...@@ -3379,9 +3404,10 @@ void md_do_recovery(void *data)
} }
mddev->sb_dirty = 1; mddev->sb_dirty = 1;
md_update_sb(mddev); md_update_sb(mddev);
goto restart; unlock:
mddev_unlock(mddev);
} }
printk(KERN_INFO "md: recovery thread finished ...\n"); dprintk(KERN_INFO "md: recovery thread finished ...\n");
} }
...@@ -3397,7 +3423,8 @@ int md_notify_reboot(struct notifier_block *this, ...@@ -3397,7 +3423,8 @@ int md_notify_reboot(struct notifier_block *this,
return NOTIFY_DONE; return NOTIFY_DONE;
ITERATE_MDDEV(mddev,tmp) ITERATE_MDDEV(mddev,tmp)
do_md_stop (mddev, 1); if (mddev_trylock(mddev)==0)
do_md_stop (mddev, 1);
/* /*
* certain more exotic SCSI devices are known to be * certain more exotic SCSI devices are known to be
* volatile wrt too early system reboots. While the * volatile wrt too early system reboots. While the
...@@ -3693,10 +3720,19 @@ void __init md_setup_drive(void) ...@@ -3693,10 +3720,19 @@ void __init md_setup_drive(void)
printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor); printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor);
continue; continue;
} }
if (mddev_lock(mddev)) {
printk(KERN_WARNING
"md: Ignoring md=%d, cannot lock!\n",
minor);
mddev_put(mddev);
continue;
}
if (mddev->sb || !list_empty(&mddev->disks)) { if (mddev->sb || !list_empty(&mddev->disks)) {
printk(KERN_WARNING printk(KERN_WARNING
"md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
minor); minor);
mddev_unlock(mddev);
mddev_put(mddev); mddev_put(mddev);
continue; continue;
} }
...@@ -3751,6 +3787,7 @@ void __init md_setup_drive(void) ...@@ -3751,6 +3787,7 @@ void __init md_setup_drive(void)
do_md_stop(mddev, 0); do_md_stop(mddev, 0);
printk(KERN_WARNING "md: starting md%d failed\n", minor); printk(KERN_WARNING "md: starting md%d failed\n", minor);
} }
mddev_unlock(mddev);
mddev_put(mddev); mddev_put(mddev);
} }
} }
......
...@@ -283,16 +283,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev); ...@@ -283,16 +283,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev);
tmp = tmp->next, tmp->prev != &all_mddevs \ tmp = tmp->next, tmp->prev != &all_mddevs \
; ) ; )
static inline int lock_mddev (mddev_t * mddev)
{
return down_interruptible(&mddev->reconfig_sem);
}
static inline void unlock_mddev (mddev_t * mddev)
{
up(&mddev->reconfig_sem);
}
#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \ #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
x = y; y = __tmp; } while (0) x = y; y = __tmp; } while (0)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment