Commit 60701311 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20230613' of...

Merge tag 'md-next-20230613' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.5/block

Pull MD updates from Song:

"The major changes are:

 1. Protect md_thread with rcu, by Yu Kuai;
 2. Various non-urgent raid5 and raid1/10 fixes, by Yu Kuai;
 3. Non-urgent raid10 fixes, by Li Nan."

* tag 'md-next-20230613' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md: (29 commits)
  md/raid1-10: limit the number of plugged bio
  md/raid1-10: don't handle pluged bio by daemon thread
  md/md-bitmap: add a new helper to unplug bitmap asynchrously
  md/raid1-10: submit write io directly if bitmap is not enabled
  md/raid1-10: factor out a helper to submit normal write
  md/raid1-10: factor out a helper to add bio to plug
  md/raid10: prevent soft lockup while flush writes
  md/raid10: fix io loss while replacement replace rdev
  md/raid10: Do not add spare disk when recovery fails
  md/raid10: clean up md_add_new_disk()
  md/raid10: prioritize adding disk to 'removed' mirror
  md/raid10: improve code of mrdev in raid10_sync_request
  md/raid10: fix null-ptr-deref of mreplace in raid10_sync_request
  md/raid5: don't start reshape when recovery or replace is in progress
  md: protect md_thread with rcu
  md/bitmap: factor out a helper to set timeout
  md/bitmap: always wake up md_thread in timeout_store
  dm-raid: remove useless checking in raid_message()
  md: factor out a helper to wake up md_thread directly
  md: fix duplicate filename for rdev
  ...
parents d44c4042 460af1f9
......@@ -3750,11 +3750,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
* canceling read-auto mode
*/
mddev->ro = 0;
if (!mddev->suspended && mddev->sync_thread)
if (!mddev->suspended)
md_wakeup_thread(mddev->sync_thread);
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (!mddev->suspended && mddev->thread)
if (!mddev->suspended)
md_wakeup_thread(mddev->thread);
return 0;
......
......@@ -54,14 +54,7 @@ __acquires(bitmap->lock)
{
unsigned char *mappage;
if (page >= bitmap->pages) {
/* This can happen if bitmap_start_sync goes beyond
* End-of-device while looking for a whole page.
* It is harmless.
*/
return -EINVAL;
}
WARN_ON_ONCE(page >= bitmap->pages);
if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */
return 0;
......@@ -1023,7 +1016,6 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
return set;
}
/* this gets called when the md device is ready to unplug its underlying
* (slave) device queues -- before we let any writes go down, we need to
* sync the dirty pages of the bitmap file to disk */
......@@ -1033,8 +1025,7 @@ void md_bitmap_unplug(struct bitmap *bitmap)
int dirty, need_write;
int writing = 0;
if (!bitmap || !bitmap->storage.filemap ||
test_bit(BITMAP_STALE, &bitmap->flags))
if (!md_bitmap_enabled(bitmap))
return;
/* look at each page to see if there are any set bits that need to be
......@@ -1063,6 +1054,35 @@ void md_bitmap_unplug(struct bitmap *bitmap)
}
EXPORT_SYMBOL(md_bitmap_unplug);
struct bitmap_unplug_work {
struct work_struct work;
struct bitmap *bitmap;
struct completion *done;
};
static void md_bitmap_unplug_fn(struct work_struct *work)
{
struct bitmap_unplug_work *unplug_work =
container_of(work, struct bitmap_unplug_work, work);
md_bitmap_unplug(unplug_work->bitmap);
complete(unplug_work->done);
}
void md_bitmap_unplug_async(struct bitmap *bitmap)
{
DECLARE_COMPLETION_ONSTACK(done);
struct bitmap_unplug_work unplug_work;
INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn);
unplug_work.bitmap = bitmap;
unplug_work.done = &done;
queue_work(md_bitmap_wq, &unplug_work.work);
wait_for_completion(&done);
}
EXPORT_SYMBOL(md_bitmap_unplug_async);
static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed);
/* * bitmap_init_from_disk -- called at bitmap_create time to initialize
* the in-memory bitmap from the on-disk bitmap -- also, sets up the
......@@ -1241,11 +1261,28 @@ static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap,
sector_t offset, sector_t *blocks,
int create);
static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout,
bool force)
{
struct md_thread *thread;
rcu_read_lock();
thread = rcu_dereference(mddev->thread);
if (!thread)
goto out;
if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT)
thread->timeout = timeout;
out:
rcu_read_unlock();
}
/*
* bitmap daemon -- periodically wakes up to clean bits and flush pages
* out to disk
*/
void md_bitmap_daemon_work(struct mddev *mddev)
{
struct bitmap *bitmap;
......@@ -1269,7 +1306,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
bitmap->daemon_lastrun = jiffies;
if (bitmap->allclean) {
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
goto done;
}
bitmap->allclean = 1;
......@@ -1366,8 +1403,7 @@ void md_bitmap_daemon_work(struct mddev *mddev)
done:
if (bitmap->allclean == 0)
mddev->thread->timeout =
mddev->bitmap_info.daemon_sleep;
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
mutex_unlock(&mddev->bitmap_info.mutex);
}
......@@ -1387,6 +1423,14 @@ __acquires(bitmap->lock)
sector_t csize;
int err;
if (page >= bitmap->pages) {
/*
* This can happen if bitmap_start_sync goes beyond
* End-of-device while looking for a whole page or
* user set a huge number to sysfs bitmap_set_bits.
*/
return NULL;
}
err = md_bitmap_checkpage(bitmap, page, create, 0);
if (bitmap->bp[page].hijacked ||
......@@ -1820,8 +1864,7 @@ void md_bitmap_destroy(struct mddev *mddev)
mddev->bitmap = NULL; /* disconnect from the md device */
spin_unlock(&mddev->lock);
mutex_unlock(&mddev->bitmap_info.mutex);
if (mddev->thread)
mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true);
md_bitmap_free(bitmap);
}
......@@ -1964,7 +2007,7 @@ int md_bitmap_load(struct mddev *mddev)
/* Kick recovery in case any bits were set */
set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery);
mddev->thread->timeout = mddev->bitmap_info.daemon_sleep;
mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true);
md_wakeup_thread(mddev->thread);
md_bitmap_update_sb(bitmap);
......@@ -2469,17 +2512,11 @@ timeout_store(struct mddev *mddev, const char *buf, size_t len)
timeout = MAX_SCHEDULE_TIMEOUT-1;
if (timeout < 1)
timeout = 1;
mddev->bitmap_info.daemon_sleep = timeout;
if (mddev->thread) {
/* if thread->timeout is MAX_SCHEDULE_TIMEOUT, then
* the bitmap is all clean and we don't need to
* adjust the timeout right now
*/
if (mddev->thread->timeout < MAX_SCHEDULE_TIMEOUT) {
mddev->thread->timeout = timeout;
mddev_set_timeout(mddev, timeout, false);
md_wakeup_thread(mddev->thread);
}
}
return len;
}
......
......@@ -264,6 +264,7 @@ void md_bitmap_sync_with_cluster(struct mddev *mddev,
sector_t new_lo, sector_t new_hi);
void md_bitmap_unplug(struct bitmap *bitmap);
void md_bitmap_unplug_async(struct bitmap *bitmap);
void md_bitmap_daemon_work(struct mddev *mddev);
int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks,
......@@ -273,6 +274,13 @@ int md_bitmap_copy_from_slot(struct mddev *mddev, int slot,
sector_t *lo, sector_t *hi, bool clear_bits);
void md_bitmap_free(struct bitmap *bitmap);
void md_bitmap_wait_behind_writes(struct mddev *mddev);
static inline bool md_bitmap_enabled(struct bitmap *bitmap)
{
return bitmap && bitmap->storage.filemap &&
!test_bit(BITMAP_STALE, &bitmap->flags);
}
#endif
#endif
......@@ -75,14 +75,14 @@ struct md_cluster_info {
sector_t suspend_hi;
int suspend_from; /* the slot which broadcast suspend_lo/hi */
struct md_thread *recovery_thread;
struct md_thread __rcu *recovery_thread;
unsigned long recovery_map;
/* communication loc resources */
struct dlm_lock_resource *ack_lockres;
struct dlm_lock_resource *message_lockres;
struct dlm_lock_resource *token_lockres;
struct dlm_lock_resource *no_new_dev_lockres;
struct md_thread *recv_thread;
struct md_thread __rcu *recv_thread;
struct completion newdisk_completion;
wait_queue_head_t wait;
unsigned long state;
......@@ -362,8 +362,8 @@ static void __recover_slot(struct mddev *mddev, int slot)
set_bit(slot, &cinfo->recovery_map);
if (!cinfo->recovery_thread) {
cinfo->recovery_thread = md_register_thread(recover_bitmaps,
mddev, "recover");
rcu_assign_pointer(cinfo->recovery_thread,
md_register_thread(recover_bitmaps, mddev, "recover"));
if (!cinfo->recovery_thread) {
pr_warn("md-cluster: Could not create recovery thread\n");
return;
......@@ -526,11 +526,15 @@ static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
{
int got_lock = 0;
struct md_thread *thread;
struct md_cluster_info *cinfo = mddev->cluster_info;
mddev->good_device_nr = le32_to_cpu(msg->raid_slot);
dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
wait_event(mddev->thread->wqueue,
/* daemaon thread must exist */
thread = rcu_dereference_protected(mddev->thread, true);
wait_event(thread->wqueue,
(got_lock = mddev_trylock(mddev)) ||
test_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state));
md_reload_sb(mddev, mddev->good_device_nr);
......@@ -889,7 +893,8 @@ static int join(struct mddev *mddev, int nodes)
}
/* Initiate the communication resources */
ret = -ENOMEM;
cinfo->recv_thread = md_register_thread(recv_daemon, mddev, "cluster_recv");
rcu_assign_pointer(cinfo->recv_thread,
md_register_thread(recv_daemon, mddev, "cluster_recv"));
if (!cinfo->recv_thread) {
pr_err("md-cluster: cannot allocate memory for recv_thread!\n");
goto err;
......
......@@ -400,8 +400,8 @@ static int multipath_run (struct mddev *mddev)
if (ret)
goto out_free_conf;
mddev->thread = md_register_thread(multipathd, mddev,
"multipath");
rcu_assign_pointer(mddev->thread,
md_register_thread(multipathd, mddev, "multipath"));
if (!mddev->thread)
goto out_free_conf;
......
This diff is collapsed.
......@@ -122,8 +122,6 @@ struct md_rdev {
struct serial_in_rdev *serial; /* used for raid1 io serialization */
struct work_struct del_work; /* used for delayed sysfs removal */
struct kernfs_node *sysfs_state; /* handle for 'state'
* sysfs entry */
/* handle for 'unacknowledged_bad_blocks' sysfs dentry */
......@@ -367,8 +365,8 @@ struct mddev {
int new_chunk_sectors;
int reshape_backwards;
struct md_thread *thread; /* management thread */
struct md_thread *sync_thread; /* doing resync or reconstruct */
struct md_thread __rcu *thread; /* management thread */
struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
/* 'last_sync_action' is initialized to "none". It is set when a
* sync operation (i.e "data-check", "requested-resync", "resync",
......@@ -531,6 +529,14 @@ struct mddev {
unsigned int good_device_nr; /* good device num within cluster raid */
unsigned int noio_flag; /* for memalloc scope API */
/*
* Temporarily store rdev that will be finally removed when
* reconfig_mutex is unlocked.
*/
struct list_head deleting;
/* Protect the deleting list */
struct mutex delete_mutex;
bool has_superblocks:1;
bool fail_last_dev:1;
bool serialize_policy:1;
......@@ -555,6 +561,23 @@ enum recovery_flags {
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
};
enum md_ro_state {
MD_RDWR,
MD_RDONLY,
MD_AUTO_READ,
MD_MAX_STATE
};
static inline bool md_is_rdwr(struct mddev *mddev)
{
return (mddev->ro == MD_RDWR);
}
static inline bool is_md_suspended(struct mddev *mddev)
{
return percpu_ref_is_dying(&mddev->active_io);
}
static inline int __must_check mddev_lock(struct mddev *mddev)
{
return mutex_lock_interruptible(&mddev->reconfig_mutex);
......@@ -614,6 +637,7 @@ struct md_personality
int (*start_reshape) (struct mddev *mddev);
void (*finish_reshape) (struct mddev *mddev);
void (*update_reshape_pos) (struct mddev *mddev);
void (*prepare_suspend) (struct mddev *mddev);
/* quiesce suspends or resumes internal processing.
* 1 - stop new actions and wait for action io to complete
* 0 - return to normal behaviour
......@@ -734,8 +758,8 @@ extern struct md_thread *md_register_thread(
void (*run)(struct md_thread *thread),
struct mddev *mddev,
const char *name);
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_unregister_thread(struct md_thread __rcu **threadp);
extern void md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
......@@ -828,6 +852,7 @@ struct mdu_array_info_s;
struct mdu_disk_info_s;
extern int mdp_major;
extern struct workqueue_struct *md_bitmap_wq;
void md_autostart_arrays(int part);
int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info);
int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info);
......
......@@ -21,6 +21,7 @@
#define IO_MADE_GOOD ((struct bio *)2)
#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2)
#define MAX_PLUG_BIO 32
/* for managing resync I/O pages */
struct resync_pages {
......@@ -31,6 +32,7 @@ struct resync_pages {
struct raid1_plug_cb {
struct blk_plug_cb cb;
struct bio_list pending;
unsigned int count;
};
static void rbio_pool_free(void *rbio, void *data)
......@@ -110,3 +112,64 @@ static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
size -= len;
} while (idx++ < RESYNC_PAGES && size > 0);
}
static inline void raid1_submit_write(struct bio *bio)
{
struct md_rdev *rdev = (struct md_rdev *)bio->bi_bdev;
bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags))
bio_io_error(bio);
else if (unlikely(bio_op(bio) == REQ_OP_DISCARD &&
!bdev_max_discard_sectors(bio->bi_bdev)))
/* Just ignore it */
bio_endio(bio);
else
submit_bio_noacct(bio);
}
static inline bool raid1_add_bio_to_plug(struct mddev *mddev, struct bio *bio,
blk_plug_cb_fn unplug, int copies)
{
struct raid1_plug_cb *plug = NULL;
struct blk_plug_cb *cb;
/*
* If bitmap is not enabled, it's safe to submit the io directly, and
* this can get optimal performance.
*/
if (!md_bitmap_enabled(mddev->bitmap)) {
raid1_submit_write(bio);
return true;
}
cb = blk_check_plugged(unplug, mddev, sizeof(*plug));
if (!cb)
return false;
plug = container_of(cb, struct raid1_plug_cb, cb);
bio_list_add(&plug->pending, bio);
if (++plug->count / MAX_PLUG_BIO >= copies) {
list_del(&cb->list);
cb->callback(cb, false);
}
return true;
}
/*
* current->bio_list will be set under submit_bio() context, in this case bitmap
* io will be added to the list and wait for current io submission to finish,
* while current io submission must wait for bitmap io to be done. In order to
* avoid such deadlock, submit bitmap io asynchronously.
*/
static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
{
if (current->bio_list)
md_bitmap_unplug_async(bitmap);
else
md_bitmap_unplug(bitmap);
}
......@@ -794,22 +794,13 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
static void flush_bio_list(struct r1conf *conf, struct bio *bio)
{
/* flush any pending bitmap writes to disk before proceeding w/ I/O */
md_bitmap_unplug(conf->mddev->bitmap);
raid1_prepare_flush_writes(conf->mddev->bitmap);
wake_up(&conf->wait_barrier);
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
struct md_rdev *rdev = (void *)bio->bi_bdev;
bio->bi_next = NULL;
bio_set_dev(bio, rdev->bdev);
if (test_bit(Faulty, &rdev->flags)) {
bio_io_error(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!bdev_max_discard_sectors(bio->bi_bdev)))
/* Just ignore it */
bio_endio(bio);
else
submit_bio_noacct(bio);
raid1_submit_write(bio);
bio = next;
cond_resched();
}
......@@ -1178,7 +1169,7 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
struct r1conf *conf = mddev->private;
struct bio *bio;
if (from_schedule || current->bio_list) {
if (from_schedule) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
spin_unlock_irq(&conf->device_lock);
......@@ -1346,8 +1337,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
struct bitmap *bitmap = mddev->bitmap;
unsigned long flags;
struct md_rdev *blocked_rdev;
struct blk_plug_cb *cb;
struct raid1_plug_cb *plug = NULL;
int first_clone;
int max_sectors;
bool write_behind = false;
......@@ -1576,15 +1565,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
r1_bio->sector);
/* flush_pending_writes() needs access to the rdev so...*/
mbio->bi_bdev = (void *)rdev;
cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
if (cb)
plug = container_of(cb, struct raid1_plug_cb, cb);
else
plug = NULL;
if (plug) {
bio_list_add(&plug->pending, mbio);
} else {
if (!raid1_add_bio_to_plug(mddev, mbio, raid1_unplug, disks)) {
spin_lock_irqsave(&conf->device_lock, flags);
bio_list_add(&conf->pending_bio_list, mbio);
spin_unlock_irqrestore(&conf->device_lock, flags);
......@@ -3087,7 +3068,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
}
err = -ENOMEM;
conf->thread = md_register_thread(raid1d, mddev, "raid1");
rcu_assign_pointer(conf->thread,
md_register_thread(raid1d, mddev, "raid1"));
if (!conf->thread)
goto abort;
......@@ -3180,8 +3162,8 @@ static int raid1_run(struct mddev *mddev)
/*
* Ok, everything is just fine now
*/
mddev->thread = conf->thread;
conf->thread = NULL;
rcu_assign_pointer(mddev->thread, conf->thread);
rcu_assign_pointer(conf->thread, NULL);
mddev->private = conf;
set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
......
......@@ -130,7 +130,7 @@ struct r1conf {
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct md_thread *thread;
struct md_thread __rcu *thread;
/* Keep track of cluster resync window to send to other
* nodes.
......
This diff is collapsed.
......@@ -100,7 +100,7 @@ struct r10conf {
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct md_thread *thread;
struct md_thread __rcu *thread;
/*
* Keep track of cluster resync window to send to other nodes.
......
......@@ -120,7 +120,7 @@ struct r5l_log {
struct bio_set bs;
mempool_t meta_pool;
struct md_thread *reclaim_thread;
struct md_thread __rcu *reclaim_thread;
unsigned long reclaim_target; /* number of space that need to be
* reclaimed. if it's 0, reclaim spaces
* used by io_units which are in
......@@ -1576,17 +1576,18 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
void r5l_quiesce(struct r5l_log *log, int quiesce)
{
struct mddev *mddev;
struct mddev *mddev = log->rdev->mddev;
struct md_thread *thread = rcu_dereference_protected(
log->reclaim_thread, lockdep_is_held(&mddev->reconfig_mutex));
if (quiesce) {
/* make sure r5l_write_super_and_discard_space exits */
mddev = log->rdev->mddev;
wake_up(&mddev->sb_wait);
kthread_park(log->reclaim_thread->tsk);
kthread_park(thread->tsk);
r5l_wake_reclaim(log, MaxSector);
r5l_do_reclaim(log);
} else
kthread_unpark(log->reclaim_thread->tsk);
kthread_unpark(thread->tsk);
}
bool r5l_log_disk_error(struct r5conf *conf)
......@@ -3063,6 +3064,7 @@ void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
{
struct r5l_log *log;
struct md_thread *thread;
int ret;
pr_debug("md/raid:%s: using device %pg as journal\n",
......@@ -3121,11 +3123,13 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
spin_lock_init(&log->tree_lock);
INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
log->rdev->mddev, "reclaim");
if (!log->reclaim_thread)
thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev,
"reclaim");
if (!thread)
goto reclaim_thread;
log->reclaim_thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;
thread->timeout = R5C_RECLAIM_WAKEUP_INTERVAL;
rcu_assign_pointer(log->reclaim_thread, thread);
init_waitqueue_head(&log->iounit_wait);
......
......@@ -5966,6 +5966,19 @@ static int add_all_stripe_bios(struct r5conf *conf,
return ret;
}
static bool reshape_inprogress(struct mddev *mddev)
{
return test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery) &&
!test_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
static bool reshape_disabled(struct mddev *mddev)
{
return is_md_suspended(mddev) || !md_is_rdwr(mddev);
}
static enum stripe_result make_stripe_request(struct mddev *mddev,
struct r5conf *conf, struct stripe_request_ctx *ctx,
sector_t logical_sector, struct bio *bi)
......@@ -5997,7 +6010,8 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
if (ahead_of_reshape(mddev, logical_sector,
conf->reshape_safe)) {
spin_unlock_irq(&conf->device_lock);
return STRIPE_SCHEDULE_AND_RETRY;
ret = STRIPE_SCHEDULE_AND_RETRY;
goto out;
}
}
spin_unlock_irq(&conf->device_lock);
......@@ -6076,6 +6090,15 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
out_release:
raid5_release_stripe(sh);
out:
if (ret == STRIPE_SCHEDULE_AND_RETRY && !reshape_inprogress(mddev) &&
reshape_disabled(mddev)) {
bi->bi_status = BLK_STS_IOERR;
ret = STRIPE_FAIL;
pr_err("md/raid456:%s: io failed across reshape position while reshape can't make progress.\n",
mdname(mddev));
}
return ret;
}
......@@ -7708,7 +7731,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
}
sprintf(pers_name, "raid%d", mddev->new_level);
conf->thread = md_register_thread(raid5d, mddev, pers_name);
rcu_assign_pointer(conf->thread,
md_register_thread(raid5d, mddev, pers_name));
if (!conf->thread) {
pr_warn("md/raid:%s: couldn't allocate thread.\n",
mdname(mddev));
......@@ -7931,8 +7955,8 @@ static int raid5_run(struct mddev *mddev)
}
conf->min_offset_diff = min_offset_diff;
mddev->thread = conf->thread;
conf->thread = NULL;
rcu_assign_pointer(mddev->thread, conf->thread);
rcu_assign_pointer(conf->thread, NULL);
mddev->private = conf;
for (i = 0; i < conf->raid_disks && conf->previous_raid_disks;
......@@ -8029,8 +8053,8 @@ static int raid5_run(struct mddev *mddev)
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
"reshape");
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
if (!mddev->sync_thread)
goto abort;
}
......@@ -8377,6 +8401,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
p = conf->disks + disk;
tmp = rdev_mdlock_deref(mddev, p->rdev);
if (test_bit(WantReplacement, &tmp->flags) &&
mddev->reshape_position == MaxSector &&
p->replacement == NULL) {
clear_bit(In_sync, &rdev->flags);
set_bit(Replacement, &rdev->flags);
......@@ -8500,6 +8525,7 @@ static int raid5_start_reshape(struct mddev *mddev)
struct r5conf *conf = mddev->private;
struct md_rdev *rdev;
int spares = 0;
int i;
unsigned long flags;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
......@@ -8511,6 +8537,13 @@ static int raid5_start_reshape(struct mddev *mddev)
if (has_failed(conf))
return -EINVAL;
/* raid5 can't handle concurrent reshape and recovery */
if (mddev->recovery_cp < MaxSector)
return -EBUSY;
for (i = 0; i < conf->raid_disks; i++)
if (rdev_mdlock_deref(mddev, conf->disks[i].replacement))
return -EBUSY;
rdev_for_each(rdev, mddev) {
if (!test_bit(In_sync, &rdev->flags)
&& !test_bit(Faulty, &rdev->flags))
......@@ -8607,8 +8640,8 @@ static int raid5_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
"reshape");
rcu_assign_pointer(mddev->sync_thread,
md_register_thread(md_do_sync, mddev, "reshape"));
if (!mddev->sync_thread) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
......@@ -9043,6 +9076,22 @@ static int raid5_start(struct mddev *mddev)
return r5l_start(conf->log);
}
static void raid5_prepare_suspend(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
wait_event(mddev->sb_wait, !reshape_inprogress(mddev) ||
percpu_ref_is_zero(&mddev->active_io));
if (percpu_ref_is_zero(&mddev->active_io))
return;
/*
* Reshape is not in progress, and array is suspended, io that is
* waiting for reshpape can never be done.
*/
wake_up(&conf->wait_for_overlap);
}
static struct md_personality raid6_personality =
{
.name = "raid6",
......@@ -9063,6 +9112,7 @@ static struct md_personality raid6_personality =
.check_reshape = raid6_check_reshape,
.start_reshape = raid5_start_reshape,
.finish_reshape = raid5_finish_reshape,
.prepare_suspend = raid5_prepare_suspend,
.quiesce = raid5_quiesce,
.takeover = raid6_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
......@@ -9087,6 +9137,7 @@ static struct md_personality raid5_personality =
.check_reshape = raid5_check_reshape,
.start_reshape = raid5_start_reshape,
.finish_reshape = raid5_finish_reshape,
.prepare_suspend = raid5_prepare_suspend,
.quiesce = raid5_quiesce,
.takeover = raid5_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
......@@ -9112,6 +9163,7 @@ static struct md_personality raid4_personality =
.check_reshape = raid5_check_reshape,
.start_reshape = raid5_start_reshape,
.finish_reshape = raid5_finish_reshape,
.prepare_suspend = raid5_prepare_suspend,
.quiesce = raid5_quiesce,
.takeover = raid4_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
......
......@@ -679,7 +679,7 @@ struct r5conf {
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct md_thread *thread;
struct md_thread __rcu *thread;
struct list_head temp_inactive_list[NR_STRIPE_HASH_LOCKS];
struct r5worker_group *worker_groups;
int group_cnt;
......
// SPDX-License-Identifier: GPL-2.0-only
void raid6_neon1_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
void raid6_neon1_xor_syndrome_real(int disks, int start, int stop,
unsigned long bytes, void **ptrs);
void raid6_neon2_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
void raid6_neon2_xor_syndrome_real(int disks, int start, int stop,
unsigned long bytes, void **ptrs);
void raid6_neon4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
void raid6_neon4_xor_syndrome_real(int disks, int start, int stop,
unsigned long bytes, void **ptrs);
void raid6_neon8_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs);
void raid6_neon8_xor_syndrome_real(int disks, int start, int stop,
unsigned long bytes, void **ptrs);
void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
uint8_t *dq, const uint8_t *pbmul,
const uint8_t *qmul);
void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
const uint8_t *qmul);
......@@ -25,6 +25,7 @@
*/
#include <arm_neon.h>
#include "neon.h"
typedef uint8x16_t unative_t;
......
......@@ -8,6 +8,7 @@
#ifdef __KERNEL__
#include <asm/neon.h>
#include "neon.h"
#else
#define kernel_neon_begin()
#define kernel_neon_end()
......@@ -19,13 +20,6 @@ static int raid6_has_neon(void)
return cpu_has_neon();
}
void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
uint8_t *dq, const uint8_t *pbmul,
const uint8_t *qmul);
void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
const uint8_t *qmul);
static void raid6_2data_recov_neon(int disks, size_t bytes, int faila,
int failb, void **ptrs)
{
......
......@@ -5,6 +5,7 @@
*/
#include <arm_neon.h>
#include "neon.h"
#ifdef CONFIG_ARM
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment