Commit 03f7b57a authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20230927' of...

Merge tag 'md-next-20230927' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.7/block

Pull MD updates from Song:

"1. Make rdev add/remove independent from daemon thread, by Yu Kuai;
 2. Refactor code around quiesce() and mddev_suspend(), by Yu Kuai."

* tag 'md-next-20230927' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: replace deprecated strncpy with memcpy
  md/md-linear: Annotate struct linear_conf with __counted_by
  md: don't check 'mddev->pers' and 'pers->quiesce' from suspend_lo_store()
  md: don't check 'mddev->pers' from suspend_hi_store()
  md-bitmap: suspend array earlier in location_store()
  md-bitmap: remove the checking of 'pers->quiesce' from location_store()
  md: don't rely on 'mddev->pers' to be set in mddev_suspend()
  md: initialize 'writes_pending' while allocating mddev
  md: initialize 'active_io' while allocating mddev
  md: delay remove_and_add_spares() for read only array to md_start_sync()
  md: factor out a helper rdev_addable() from remove_and_add_spares()
  md: factor out a helper rdev_is_spare() from remove_and_add_spares()
  md: factor out a helper rdev_removeable() from remove_and_add_spares()
  md: delay choosing sync action to md_start_sync()
  md: factor out a helper to choose sync action from md_check_recovery()
  md: use separate work_struct for md_start_sync()
parents aa511ff8 ceb04163
......@@ -749,7 +749,11 @@ static struct raid_set *raid_set_alloc(struct dm_target *ti, struct raid_type *r
return ERR_PTR(-ENOMEM);
}
mddev_init(&rs->md);
if (mddev_init(&rs->md)) {
kfree(rs);
ti->error = "Cannot initialize raid context";
return ERR_PTR(-ENOMEM);
}
rs->raid_disks = raid_devs;
rs->delta_disks = 0;
......@@ -798,6 +802,7 @@ static void raid_set_free(struct raid_set *rs)
dm_put_device(rs->ti, rs->dev[i].data_dev);
}
mddev_destroy(&rs->md);
kfree(rs);
}
......
......@@ -2351,11 +2351,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
rv = mddev_lock(mddev);
if (rv)
return rv;
mddev_suspend(mddev);
if (mddev->pers) {
if (!mddev->pers->quiesce) {
rv = -EBUSY;
goto out;
}
if (mddev->recovery || mddev->sync_thread) {
rv = -EBUSY;
goto out;
......@@ -2369,11 +2367,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
rv = -EBUSY;
goto out;
}
if (mddev->pers) {
mddev_suspend(mddev);
md_bitmap_destroy(mddev);
mddev_resume(mddev);
}
mddev->bitmap_info.offset = 0;
if (mddev->bitmap_info.file) {
struct file *f = mddev->bitmap_info.file;
......@@ -2383,6 +2378,8 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
} else {
/* No bitmap, OK to set a location */
long long offset;
struct bitmap *bitmap;
if (strncmp(buf, "none", 4) == 0)
/* nothing to be done */;
else if (strncmp(buf, "file:", 5) == 0) {
......@@ -2406,26 +2403,21 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
rv = -EINVAL;
goto out;
}
mddev->bitmap_info.offset = offset;
if (mddev->pers) {
struct bitmap *bitmap;
bitmap = md_bitmap_create(mddev, -1);
mddev_suspend(mddev);
if (IS_ERR(bitmap))
if (IS_ERR(bitmap)) {
rv = PTR_ERR(bitmap);
else {
goto out;
}
mddev->bitmap = bitmap;
rv = md_bitmap_load(mddev);
if (rv)
mddev->bitmap_info.offset = 0;
}
if (rv) {
mddev->bitmap_info.offset = 0;
md_bitmap_destroy(mddev);
mddev_resume(mddev);
goto out;
}
mddev_resume(mddev);
}
}
}
if (!mddev->external) {
......@@ -2437,6 +2429,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
}
rv = 0;
out:
mddev_resume(mddev);
mddev_unlock(mddev);
if (rv)
return rv;
......
......@@ -69,6 +69,19 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
if (!conf)
return NULL;
/*
* conf->raid_disks is copy of mddev->raid_disks. The reason to
* keep a copy of mddev->raid_disks in struct linear_conf is,
* mddev->raid_disks may not be consistent with pointers number of
* conf->disks[] when it is updated in linear_add() and used to
* iterate old conf->disks[] earray in linear_congested().
* Here conf->raid_disks is always consitent with number of
* pointers in conf->disks[] array, and mddev->private is updated
* with rcu_assign_pointer() in linear_addr(), such race can be
* avoided.
*/
conf->raid_disks = raid_disks;
cnt = 0;
conf->array_sectors = 0;
......@@ -112,19 +125,6 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
conf->disks[i-1].end_sector +
conf->disks[i].rdev->sectors;
/*
* conf->raid_disks is copy of mddev->raid_disks. The reason to
* keep a copy of mddev->raid_disks in struct linear_conf is,
* mddev->raid_disks may not be consistent with pointers number of
* conf->disks[] when it is updated in linear_add() and used to
* iterate old conf->disks[] earray in linear_congested().
* Here conf->raid_disks is always consitent with number of
* pointers in conf->disks[] array, and mddev->private is updated
* with rcu_assign_pointer() in linear_addr(), such race can be
* avoided.
*/
conf->raid_disks = raid_disks;
return conf;
out:
......
......@@ -12,6 +12,6 @@ struct linear_conf
struct rcu_head rcu;
sector_t array_sectors;
int raid_disks; /* a copy of mddev->raid_disks */
struct dev_info disks[];
struct dev_info disks[] __counted_by(raid_disks);
};
#endif
......@@ -449,7 +449,7 @@ void mddev_suspend(struct mddev *mddev)
set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
percpu_ref_kill(&mddev->active_io);
if (mddev->pers->prepare_suspend)
if (mddev->pers && mddev->pers->prepare_suspend)
mddev->pers->prepare_suspend(mddev);
wait_event(mddev->sb_wait, percpu_ref_is_zero(&mddev->active_io));
......@@ -631,16 +631,39 @@ void mddev_put(struct mddev *mddev)
* flush_workqueue() after mddev_find will succeed in waiting
* for the work to be done.
*/
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
queue_work(md_misc_wq, &mddev->del_work);
}
spin_unlock(&all_mddevs_lock);
}
static void md_safemode_timeout(struct timer_list *t);
static void md_start_sync(struct work_struct *ws);
void mddev_init(struct mddev *mddev)
static void active_io_release(struct percpu_ref *ref)
{
struct mddev *mddev = container_of(ref, struct mddev, active_io);
wake_up(&mddev->sb_wait);
}
static void no_op(struct percpu_ref *r) {}
int mddev_init(struct mddev *mddev)
{
if (percpu_ref_init(&mddev->active_io, active_io_release,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
return -ENOMEM;
if (percpu_ref_init(&mddev->writes_pending, no_op,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
percpu_ref_exit(&mddev->active_io);
return -ENOMEM;
}
/* We want to start with the refcount at zero */
percpu_ref_put(&mddev->writes_pending);
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
mutex_init(&mddev->sync_mutex);
......@@ -662,9 +685,21 @@ void mddev_init(struct mddev *mddev)
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
INIT_WORK(&mddev->sync_work, md_start_sync);
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
return 0;
}
EXPORT_SYMBOL_GPL(mddev_init);
void mddev_destroy(struct mddev *mddev)
{
percpu_ref_exit(&mddev->active_io);
percpu_ref_exit(&mddev->writes_pending);
}
EXPORT_SYMBOL_GPL(mddev_destroy);
static struct mddev *mddev_find_locked(dev_t unit)
{
struct mddev *mddev;
......@@ -708,13 +743,16 @@ static struct mddev *mddev_alloc(dev_t unit)
new = kzalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return ERR_PTR(-ENOMEM);
mddev_init(new);
error = mddev_init(new);
if (error)
goto out_free_new;
spin_lock(&all_mddevs_lock);
if (unit) {
error = -EEXIST;
if (mddev_find_locked(unit))
goto out_free_new;
goto out_destroy_new;
new->unit = unit;
if (MAJOR(unit) == MD_MAJOR)
new->md_minor = MINOR(unit);
......@@ -725,7 +763,7 @@ static struct mddev *mddev_alloc(dev_t unit)
error = -ENODEV;
new->unit = mddev_alloc_unit();
if (!new->unit)
goto out_free_new;
goto out_destroy_new;
new->md_minor = MINOR(new->unit);
new->hold_active = UNTIL_STOP;
}
......@@ -733,8 +771,11 @@ static struct mddev *mddev_alloc(dev_t unit)
list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock);
return new;
out_free_new:
out_destroy_new:
spin_unlock(&all_mddevs_lock);
mddev_destroy(new);
out_free_new:
kfree(new);
return ERR_PTR(error);
}
......@@ -745,6 +786,7 @@ static void mddev_free(struct mddev *mddev)
list_del(&mddev->all_mddevs);
spin_unlock(&all_mddevs_lock);
mddev_destroy(mddev);
kfree(mddev);
}
......@@ -3879,7 +3921,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
return rv;
if (mddev->pers == NULL) {
strncpy(mddev->clevel, buf, slen);
memcpy(mddev->clevel, buf, slen);
if (mddev->clevel[slen-1] == '\n')
slen--;
mddev->clevel[slen] = 0;
......@@ -3912,7 +3954,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
}
/* Now find the new personality */
strncpy(clevel, buf, slen);
memcpy(clevel, buf, slen);
if (clevel[slen-1] == '\n')
slen--;
clevel[slen] = 0;
......@@ -4698,7 +4740,7 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len)
size_t namelen = len-9;
if (namelen >= sizeof(mddev->metadata_type))
namelen = sizeof(mddev->metadata_type)-1;
strncpy(mddev->metadata_type, buf+9, namelen);
memcpy(mddev->metadata_type, buf+9, namelen);
mddev->metadata_type[namelen] = 0;
if (namelen && mddev->metadata_type[namelen-1] == '\n')
mddev->metadata_type[--namelen] = 0;
......@@ -4872,6 +4914,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
/* A write to sync_action is enough to justify
* canceling read-auto mode
*/
flush_work(&mddev->sync_work);
mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread);
}
......@@ -5146,18 +5189,13 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
err = mddev_lock(mddev);
if (err)
return err;
err = -EINVAL;
if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL)
goto unlock;
mddev_suspend(mddev);
mddev->suspend_lo = new;
mddev_resume(mddev);
err = 0;
unlock:
mddev_unlock(mddev);
return err ?: len;
return len;
}
static struct md_sysfs_entry md_suspend_lo =
__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
......@@ -5183,18 +5221,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
err = mddev_lock(mddev);
if (err)
return err;
err = -EINVAL;
if (mddev->pers == NULL)
goto unlock;
mddev_suspend(mddev);
mddev->suspend_hi = new;
mddev_resume(mddev);
err = 0;
unlock:
mddev_unlock(mddev);
return err ?: len;
return len;
}
static struct md_sysfs_entry md_suspend_hi =
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
......@@ -5597,21 +5630,6 @@ static void mddev_delayed_delete(struct work_struct *ws)
kobject_put(&mddev->kobj);
}
static void no_op(struct percpu_ref *r) {}
int mddev_init_writes_pending(struct mddev *mddev)
{
if (mddev->writes_pending.percpu_count_ptr)
return 0;
if (percpu_ref_init(&mddev->writes_pending, no_op,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0)
return -ENOMEM;
/* We want to start with the refcount at zero */
percpu_ref_put(&mddev->writes_pending);
return 0;
}
EXPORT_SYMBOL_GPL(mddev_init_writes_pending);
struct mddev *md_alloc(dev_t dev, char *name)
{
/*
......@@ -5783,12 +5801,6 @@ static void md_safemode_timeout(struct timer_list *t)
}
static int start_dirty_degraded;
static void active_io_release(struct percpu_ref *ref)
{
struct mddev *mddev = container_of(ref, struct mddev, active_io);
wake_up(&mddev->sb_wait);
}
int md_run(struct mddev *mddev)
{
......@@ -5869,15 +5881,10 @@ int md_run(struct mddev *mddev)
nowait = nowait && bdev_nowait(rdev->bdev);
}
err = percpu_ref_init(&mddev->active_io, active_io_release,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
if (err)
return err;
if (!bioset_initialized(&mddev->bio_set)) {
err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (err)
goto exit_active_io;
return err;
}
if (!bioset_initialized(&mddev->sync_set)) {
err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
......@@ -6074,8 +6081,6 @@ int md_run(struct mddev *mddev)
bioset_exit(&mddev->sync_set);
exit_bio_set:
bioset_exit(&mddev->bio_set);
exit_active_io:
percpu_ref_exit(&mddev->active_io);
return err;
}
EXPORT_SYMBOL_GPL(md_run);
......@@ -6291,7 +6296,6 @@ static void __md_stop(struct mddev *mddev)
module_put(pers->owner);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
bioset_exit(&mddev->io_clone_set);
......@@ -6306,7 +6310,6 @@ void md_stop(struct mddev *mddev)
*/
__md_stop_writes(mddev);
__md_stop(mddev);
percpu_ref_exit(&mddev->writes_pending);
}
EXPORT_SYMBOL_GPL(md_stop);
......@@ -7646,6 +7649,10 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
mutex_unlock(&mddev->open_mutex);
sync_blockdev(bdev);
}
if (!md_is_rdwr(mddev))
flush_work(&mddev->sync_work);
err = mddev_lock(mddev);
if (err) {
pr_debug("md: ioctl lock interrupted, reason %d, cmd %d\n",
......@@ -7886,7 +7893,6 @@ static void md_free_disk(struct gendisk *disk)
{
struct mddev *mddev = disk->private_data;
percpu_ref_exit(&mddev->writes_pending);
mddev_free(mddev);
}
......@@ -8570,6 +8576,7 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
BUG_ON(mddev->ro == MD_RDONLY);
if (mddev->ro == MD_AUTO_READ) {
/* need to switch to read/write */
flush_work(&mddev->sync_work);
mddev->ro = MD_RDWR;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
......@@ -9161,6 +9168,85 @@ void md_do_sync(struct md_thread *thread)
}
EXPORT_SYMBOL_GPL(md_do_sync);
static bool rdev_removeable(struct md_rdev *rdev)
{
/* rdev is not used. */
if (rdev->raid_disk < 0)
return false;
/* There are still inflight io, don't remove this rdev. */
if (atomic_read(&rdev->nr_pending))
return false;
/*
* An error occurred but has not yet been acknowledged by the metadata
* handler, don't remove this rdev.
*/
if (test_bit(Blocked, &rdev->flags))
return false;
/* Fautly rdev is not used, it's safe to remove it. */
if (test_bit(Faulty, &rdev->flags))
return true;
/* Journal disk can only be removed if it's faulty. */
if (test_bit(Journal, &rdev->flags))
return false;
/*
* 'In_sync' is cleared while 'raid_disk' is valid, which means
* replacement has just become active from pers->spare_active(), and
* then pers->hot_remove_disk() will replace this rdev with replacement.
*/
if (!test_bit(In_sync, &rdev->flags))
return true;
return false;
}
static bool rdev_is_spare(struct md_rdev *rdev)
{
return !test_bit(Candidate, &rdev->flags) && rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Journal, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags);
}
static bool rdev_addable(struct md_rdev *rdev)
{
/* rdev is already used, don't add it again. */
if (test_bit(Candidate, &rdev->flags) || rdev->raid_disk >= 0 ||
test_bit(Faulty, &rdev->flags))
return false;
/* Allow to add journal disk. */
if (test_bit(Journal, &rdev->flags))
return true;
/* Allow to add if array is read-write. */
if (md_is_rdwr(rdev->mddev))
return true;
/*
* For read-only array, only allow to readd a rdev. And if bitmap is
* used, don't allow to readd a rdev that is too old.
*/
if (rdev->saved_raid_disk >= 0 && !test_bit(Bitmap_sync, &rdev->flags))
return true;
return false;
}
static bool md_spares_need_change(struct mddev *mddev)
{
struct md_rdev *rdev;
rdev_for_each(rdev, mddev)
if (rdev_removeable(rdev) || rdev_addable(rdev))
return true;
return false;
}
static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this)
{
......@@ -9193,12 +9279,8 @@ static int remove_and_add_spares(struct mddev *mddev,
synchronize_rcu();
rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) &&
rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
((test_bit(RemoveSynchronized, &rdev->flags) ||
(!test_bit(In_sync, &rdev->flags) &&
!test_bit(Journal, &rdev->flags))) &&
atomic_read(&rdev->nr_pending)==0)) {
(test_bit(RemoveSynchronized, &rdev->flags) ||
rdev_removeable(rdev))) {
if (mddev->pers->hot_remove_disk(
mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
......@@ -9220,25 +9302,12 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev_for_each(rdev, mddev) {
if (this && this != rdev)
continue;
if (test_bit(Candidate, &rdev->flags))
continue;
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Journal, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags))
if (rdev_is_spare(rdev))
spares++;
if (rdev->raid_disk >= 0)
continue;
if (test_bit(Faulty, &rdev->flags))
if (!rdev_addable(rdev))
continue;
if (!test_bit(Journal, &rdev->flags)) {
if (!md_is_rdwr(mddev) &&
!(rdev->saved_raid_disk >= 0 &&
!test_bit(Bitmap_sync, &rdev->flags)))
continue;
if (!test_bit(Journal, &rdev->flags))
rdev->recovery_offset = 0;
}
if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
/* failure here is OK */
sysfs_link_rdev(mddev, rdev);
......@@ -9254,9 +9323,81 @@ static int remove_and_add_spares(struct mddev *mddev,
return spares;
}
static bool md_choose_sync_action(struct mddev *mddev, int *spares)
{
/* Check if reshape is in progress first. */
if (mddev->reshape_position != MaxSector) {
if (mddev->pers->check_reshape == NULL ||
mddev->pers->check_reshape(mddev) != 0)
return false;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
return true;
}
/*
* Remove any failed drives, then add spares if possible. Spares are
* also removed and re-added, to allow the personality to fail the
* re-add.
*/
*spares = remove_and_add_spares(mddev, NULL);
if (*spares) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
/* Start new recovery. */
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
return true;
}
/* Check if recovery is in progress. */
if (mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
return true;
}
/* Delay to choose resync/check/repair in md_do_sync(). */
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
return true;
/* Nothing to be done */
return false;
}
static void md_start_sync(struct work_struct *ws)
{
struct mddev *mddev = container_of(ws, struct mddev, del_work);
struct mddev *mddev = container_of(ws, struct mddev, sync_work);
int spares = 0;
mddev_lock_nointr(mddev);
if (!md_is_rdwr(mddev)) {
/*
* On a read-only array we can:
* - remove failed devices
* - add already-in_sync devices if the array itself is in-sync.
* As we only add devices that are already in-sync, we can
* activate the spares immediately.
*/
remove_and_add_spares(mddev, NULL);
goto not_running;
}
if (!md_choose_sync_action(mddev, &spares))
goto not_running;
if (!mddev->pers->sync_request)
goto not_running;
/*
* We are adding a device or devices to an array which has the bitmap
* stored on all devices. So make sure all bitmap pages get written.
*/
if (spares)
md_bitmap_write_all(mddev->bitmap);
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "resync"));
......@@ -9264,20 +9405,27 @@ static void md_start_sync(struct work_struct *ws)
pr_warn("%s: could not start resync thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */
goto not_running;
}
mddev_unlock(mddev);
md_wakeup_thread(mddev->sync_thread);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event();
return;
not_running:
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev_unlock(mddev);
wake_up(&resync_wait);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery))
if (mddev->sysfs_action)
sysfs_notify_dirent_safe(mddev->sysfs_action);
} else
md_wakeup_thread(mddev->sync_thread);
if (test_and_clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery) &&
mddev->sysfs_action)
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event();
}
/*
......@@ -9345,7 +9493,6 @@ void md_check_recovery(struct mddev *mddev)
return;
if (mddev_trylock(mddev)) {
int spares = 0;
bool try_set_sync = mddev->safemode != 0;
if (!mddev->external && mddev->safemode == 1)
......@@ -9353,30 +9500,43 @@ void md_check_recovery(struct mddev *mddev)
if (!md_is_rdwr(mddev)) {
struct md_rdev *rdev;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
/* sync_work already queued. */
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}
if (!mddev->external && mddev->in_sync)
/* 'Blocked' flag not needed as failed devices
/*
* 'Blocked' flag not needed as failed devices
* will be recorded if array switched to read/write.
* Leaving it set will prevent the device
* from being removed.
*/
rdev_for_each(rdev, mddev)
clear_bit(Blocked, &rdev->flags);
/* On a read-only array we can:
* - remove failed devices
* - add already-in_sync devices if the array itself
* is in-sync.
* As we only add devices that are already in-sync,
* we can activate the spares immediately.
*/
remove_and_add_spares(mddev, NULL);
/* There is no thread, but we need to call
/*
* There is no thread, but we need to call
* ->spare_active and clear saved_raid_disk
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
/*
* Let md_start_sync() to remove and add rdevs to the
* array.
*/
if (md_spares_need_change(mddev)) {
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
queue_work(md_misc_wq, &mddev->sync_work);
}
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
goto unlock;
}
......@@ -9432,56 +9592,14 @@ void md_check_recovery(struct mddev *mddev)
clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
goto not_running;
/* no recovery is running.
* remove any failed drives, then
* add spares if possible.
* Spares are also removed and re-added, to allow
* the personality to fail the re-add.
*/
if (mddev->reshape_position != MaxSector) {
if (mddev->pers->check_reshape == NULL ||
mddev->pers->check_reshape(mddev) != 0)
/* Cannot proceed */
goto not_running;
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if ((spares = remove_and_add_spares(mddev, NULL))) {
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if (mddev->recovery_cp < MaxSector) {
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
} else if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
/* nothing to be done ... */
goto not_running;
if (mddev->pers->sync_request) {
if (spares) {
/* We are adding a device or devices to an array
* which has the bitmap stored on all devices.
* So make sure all bitmap pages get written
*/
md_bitmap_write_all(mddev->bitmap);
}
INIT_WORK(&mddev->del_work, md_start_sync);
queue_work(md_misc_wq, &mddev->del_work);
goto unlock;
}
not_running:
if (!mddev->sync_thread) {
if (test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
queue_work(md_misc_wq, &mddev->sync_work);
} else {
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
wake_up(&resync_wait);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery))
if (mddev->sysfs_action)
sysfs_notify_dirent_safe(mddev->sysfs_action);
}
unlock:
wake_up(&mddev->sb_wait);
mddev_unlock(mddev);
......
......@@ -453,7 +453,10 @@ struct mddev {
struct kernfs_node *sysfs_degraded; /*handle for 'degraded' */
struct kernfs_node *sysfs_level; /*handle for 'level' */
struct work_struct del_work; /* used for delayed sysfs removal */
/* used for delayed sysfs removal */
struct work_struct del_work;
/* used for register new sync thread */
struct work_struct sync_work;
/* "lock" protects:
* flush_bio transition from NULL to !NULL
......@@ -768,7 +771,6 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
extern void md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
......@@ -795,7 +797,8 @@ extern int md_integrity_register(struct mddev *mddev);
extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev);
extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
extern void mddev_init(struct mddev *mddev);
extern int mddev_init(struct mddev *mddev);
extern void mddev_destroy(struct mddev *mddev);
struct mddev *md_alloc(dev_t dev, char *name);
void mddev_put(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
......
......@@ -3122,8 +3122,7 @@ static int raid1_run(struct mddev *mddev)
mdname(mddev));
return -EIO;
}
if (mddev_init_writes_pending(mddev) < 0)
return -ENOMEM;
/*
* copy the already verified devices into our private RAID1
* bookkeeping area. [whatever we allocate in run(),
......
......@@ -4154,9 +4154,6 @@ static int raid10_run(struct mddev *mddev)
sector_t min_offset_diff = 0;
int first = 1;
if (mddev_init_writes_pending(mddev) < 0)
return -ENOMEM;
if (mddev->private == NULL) {
conf = setup_conf(mddev);
if (IS_ERR(conf))
......
......@@ -7778,9 +7778,6 @@ static int raid5_run(struct mddev *mddev)
long long min_offset_diff = 0;
int first = 1;
if (mddev_init_writes_pending(mddev) < 0)
return -ENOMEM;
if (mddev->recovery_cp != MaxSector)
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
mdname(mddev));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment