Commit c2670cf7 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-6.11-20240612' of...

Merge tag 'md-6.11-20240612' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.11/block

Pull MD updates from Song:

"The major changes in this PR are:

 - sync_action fix and refactoring, by Yu Kuai;
 - Various small fixes by Christoph Hellwig, Li Nan, and Ofir Gal."

* tag 'md-6.11-20240612' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/raid5: avoid BUG_ON() while continue reshape after reassembling
  md: pass in max_sectors for pers->sync_request()
  md: factor out helpers for different sync_action in md_do_sync()
  md: replace last_sync_action with new enum type
  md: use new helpers in md_do_sync()
  md: don't fail action_store() if sync_thread is not registered
  md: remove parameter check_seq for stop_sync_thread()
  md: replace sysfs api sync_action with new helpers
  md: factor out helper to start reshape from action_store()
  md: add new helpers for sync_action
  md: add a new enum type sync_action
  md: rearrange recovery_flags
  md/md-bitmap: fix writing non bitmap pages
  md/raid1: don't free conf on raid0_run failure
  md/raid0: don't free conf on raid0_run failure
  md: make md_flush_request() more readable
  md: fix deadlock between mddev_suspend and flush bio
  md: change the return value type of md_write_start to void
  md: do not delete safemode_timer in mddev_suspend
parents 83a7eefe 305a5170
......@@ -3542,7 +3542,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
recovery = rs->md.recovery;
state = decipher_sync_action(mddev, recovery);
progress = rs_get_progress(rs, recovery, state, resync_max_sectors);
resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ?
resync_mismatches = mddev->last_sync_action == ACTION_CHECK ?
atomic64_read(&mddev->resync_mismatches) : 0;
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
......
......@@ -227,6 +227,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
PAGE_SHIFT;
loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE;
......@@ -269,11 +271,9 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
if (size == 0)
/* bitmap runs in to data */
return -EINVAL;
} else {
/* DATA METADATA BITMAP - no problems */
}
md_super_write(mddev, rdev, sboff + ps, (int) size, page);
md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page);
return 0;
}
......
This diff is collapsed.
......@@ -34,6 +34,61 @@
*/
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
/* Status of sync thread. */
enum sync_action {
/*
* Represent by MD_RECOVERY_SYNC, start when:
* 1) after assemble, sync data from first rdev to other copies, this
* must be done first before other sync actions and will only execute
* once;
* 2) resize the array(notice that this is not reshape), sync data for
* the new range;
*/
ACTION_RESYNC,
/*
* Represent by MD_RECOVERY_RECOVER, start when:
* 1) for new replacement, sync data based on the replace rdev or
* available copies from other rdev;
* 2) for new member disk while the array is degraded, sync data from
* other rdev;
* 3) reassemble after power failure or re-add a hot removed rdev, sync
* data from first rdev to other copies based on bitmap;
*/
ACTION_RECOVER,
/*
* Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED |
* MD_RECOVERY_CHECK, start when user echo "check" to sysfs api
* sync_action, used to check if data copies from differenct rdev are
* the same. The number of mismatch sectors will be exported to user
* by sysfs api mismatch_cnt;
*/
ACTION_CHECK,
/*
* Represent by MD_RECOVERY_SYNC | MD_RECOVERY_REQUESTED, start when
* user echo "repair" to sysfs api sync_action, usually paired with
* ACTION_CHECK, used to force syncing data once user found that there
* are inconsistent data,
*/
ACTION_REPAIR,
/*
* Represent by MD_RECOVERY_RESHAPE, start when new member disk is added
* to the conf, notice that this is different from spares or
* replacement;
*/
ACTION_RESHAPE,
/*
* Represent by MD_RECOVERY_FROZEN, can be set by sysfs api sync_action
* or internal usage like setting the array read-only, will forbid above
* actions.
*/
ACTION_FROZEN,
/*
* All above actions don't match.
*/
ACTION_IDLE,
NR_SYNC_ACTIONS,
};
/*
* The struct embedded in rdev is used to serialize IO.
*/
......@@ -371,13 +426,12 @@ struct mddev {
struct md_thread __rcu *thread; /* management thread */
struct md_thread __rcu *sync_thread; /* doing resync or reconstruct */
/* 'last_sync_action' is initialized to "none". It is set when a
* sync operation (i.e "data-check", "requested-resync", "resync",
* "recovery", or "reshape") is started. It holds this value even
/*
* Set when a sync operation is started. It holds this value even
* when the sync thread is "frozen" (interrupted) or "idle" (stopped
* or finished). It is overwritten when a new sync operation is begun.
* or finished). It is overwritten when a new sync operation is begun.
*/
char *last_sync_action;
enum sync_action last_sync_action;
sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until
......@@ -540,8 +594,6 @@ struct mddev {
*/
struct list_head deleting;
/* Used to synchronize idle and frozen for action_store() */
struct mutex sync_mutex;
/* The sequence number for sync thread */
atomic_t sync_seq;
......@@ -551,22 +603,46 @@ struct mddev {
};
enum recovery_flags {
/* flags for sync thread running status */
/*
* set when one of sync action is set and new sync thread need to be
* registered, or just add/remove spares from conf.
*/
MD_RECOVERY_NEEDED,
/* sync thread is running, or about to be started */
MD_RECOVERY_RUNNING,
/* sync thread needs to be aborted for some reason */
MD_RECOVERY_INTR,
/* sync thread is done and is waiting to be unregistered */
MD_RECOVERY_DONE,
/* running sync thread must abort immediately, and not restart */
MD_RECOVERY_FROZEN,
/* waiting for pers->start() to finish */
MD_RECOVERY_WAIT,
/* interrupted because io-error */
MD_RECOVERY_ERROR,
/* flags determines sync action, see details in enum sync_action */
/* if just this flag is set, action is resync. */
MD_RECOVERY_SYNC,
/*
* paired with MD_RECOVERY_SYNC, if MD_RECOVERY_CHECK is not set,
* action is repair, means user requested resync.
*/
MD_RECOVERY_REQUESTED,
/*
* If neither SYNC or RESHAPE are set, then it is a recovery.
* paired with MD_RECOVERY_SYNC and MD_RECOVERY_REQUESTED, action is
* check.
*/
MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */
MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */
MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */
MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */
MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */
MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */
MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */
MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */
MD_RECOVERY_RESHAPE, /* A reshape is happening */
MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
MD_RECOVERY_CHECK,
/* recovery, or need to try it */
MD_RECOVERY_RECOVER,
/* reshape */
MD_RECOVERY_RESHAPE,
/* remote node is running resync thread */
MD_RESYNCING_REMOTE,
};
enum md_ro_state {
......@@ -653,7 +729,8 @@ struct md_personality
int (*hot_add_disk) (struct mddev *mddev, struct md_rdev *rdev);
int (*hot_remove_disk) (struct mddev *mddev, struct md_rdev *rdev);
int (*spare_active) (struct mddev *mddev);
sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr, int *skipped);
sector_t (*sync_request)(struct mddev *mddev, sector_t sector_nr,
sector_t max_sector, int *skipped);
int (*resize) (struct mddev *mddev, sector_t sectors);
sector_t (*size) (struct mddev *mddev, sector_t sectors, int raid_disks);
int (*check_reshape) (struct mddev *mddev);
......@@ -785,7 +862,10 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
extern void md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern enum sync_action md_sync_action(struct mddev *mddev);
extern enum sync_action md_sync_action_by_name(const char *page);
extern const char *md_sync_action_name(enum sync_action action);
extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
......
......@@ -365,18 +365,13 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors;
}
static void free_conf(struct mddev *mddev, struct r0conf *conf)
{
kfree(conf->strip_zone);
kfree(conf->devlist);
kfree(conf);
}
static void raid0_free(struct mddev *mddev, void *priv)
{
struct r0conf *conf = priv;
free_conf(mddev, conf);
kfree(conf->strip_zone);
kfree(conf->devlist);
kfree(conf);
}
static int raid0_set_limits(struct mddev *mddev)
......@@ -415,7 +410,7 @@ static int raid0_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid0_set_limits(mddev);
if (ret)
goto out_free_conf;
return ret;
}
/* calculate array device size */
......@@ -427,13 +422,7 @@ static int raid0_run(struct mddev *mddev)
dump_zones(mddev);
ret = md_integrity_register(mddev);
if (ret)
goto out_free_conf;
return 0;
out_free_conf:
free_conf(mddev, conf);
return ret;
return md_integrity_register(mddev);
}
/*
......
......@@ -1687,8 +1687,7 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
if (bio_data_dir(bio) == READ)
raid1_read_request(mddev, bio, sectors, NULL);
else {
if (!md_write_start(mddev,bio))
return false;
md_write_start(mddev,bio);
raid1_write_request(mddev, bio, sectors);
}
return true;
......@@ -2757,12 +2756,12 @@ static struct r1bio *raid1_alloc_init_r1buf(struct r1conf *conf)
*/
static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
int *skipped)
sector_t max_sector, int *skipped)
{
struct r1conf *conf = mddev->private;
struct r1bio *r1_bio;
struct bio *bio;
sector_t max_sector, nr_sectors;
sector_t nr_sectors;
int disk = -1;
int i;
int wonly = -1;
......@@ -2778,7 +2777,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
if (init_resync(conf))
return 0;
max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
/* If we aborted, we need to abort the
* sync on the 'current' bitmap chunk (there will
......@@ -3204,7 +3202,6 @@ static int raid1_set_limits(struct mddev *mddev)
return queue_limits_set(mddev->gendisk->queue, &lim);
}
static void raid1_free(struct mddev *mddev, void *priv);
static int raid1_run(struct mddev *mddev)
{
struct r1conf *conf;
......@@ -3238,7 +3235,7 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
if (ret)
goto abort;
return ret;
}
mddev->degraded = 0;
......@@ -3252,8 +3249,7 @@ static int raid1_run(struct mddev *mddev)
*/
if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(mddev, &conf->thread);
ret = -EINVAL;
goto abort;
return -EINVAL;
}
if (conf->raid_disks - mddev->degraded == 1)
......@@ -3277,14 +3273,8 @@ static int raid1_run(struct mddev *mddev)
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
ret = md_integrity_register(mddev);
if (ret) {
if (ret)
md_unregister_thread(mddev, &mddev->thread);
goto abort;
}
return 0;
abort:
raid1_free(mddev, conf);
return ret;
}
......
......@@ -1836,8 +1836,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
&& md_flush_request(mddev, bio))
return true;
if (!md_write_start(mddev, bio))
return false;
md_write_start(mddev, bio);
if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
if (!raid10_handle_discard(mddev, bio))
......@@ -3140,12 +3139,12 @@ static void raid10_set_cluster_sync_high(struct r10conf *conf)
*/
static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
int *skipped)
sector_t max_sector, int *skipped)
{
struct r10conf *conf = mddev->private;
struct r10bio *r10_bio;
struct bio *biolist = NULL, *bio;
sector_t max_sector, nr_sectors;
sector_t nr_sectors;
int i;
int max_sync;
sector_t sync_blocks;
......@@ -3175,10 +3174,6 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
return 0;
skipped:
max_sector = mddev->dev_sectors;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
max_sector = mddev->resync_max_sectors;
if (sector_nr >= max_sector) {
conf->cluster_sync_low = 0;
conf->cluster_sync_high = 0;
......
......@@ -6078,8 +6078,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
}
if (!md_write_start(mddev, bi))
return false;
md_write_start(mddev, bi);
/*
* If array is degraded, better not do chunk aligned read because
* later we might have to read it again in order to reconstruct
......@@ -6255,7 +6254,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->reshape_backwards) {
BUG_ON(writepos < reshape_sectors);
if (WARN_ON(writepos < reshape_sectors))
return MaxSector;
writepos -= reshape_sectors;
readpos += reshape_sectors;
safepos += reshape_sectors;
......@@ -6273,14 +6274,18 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
* to set 'stripe_addr' which is where we will write to.
*/
if (mddev->reshape_backwards) {
BUG_ON(conf->reshape_progress == 0);
if (WARN_ON(conf->reshape_progress == 0))
return MaxSector;
stripe_addr = writepos;
BUG_ON((mddev->dev_sectors &
~((sector_t)reshape_sectors - 1))
- reshape_sectors - stripe_addr
!= sector_nr);
if (WARN_ON((mddev->dev_sectors &
~((sector_t)reshape_sectors - 1)) -
reshape_sectors - stripe_addr != sector_nr))
return MaxSector;
} else {
BUG_ON(writepos != sector_nr + reshape_sectors);
if (WARN_ON(writepos != sector_nr + reshape_sectors))
return MaxSector;
stripe_addr = sector_nr;
}
......@@ -6458,11 +6463,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
}
static inline sector_t raid5_sync_request(struct mddev *mddev, sector_t sector_nr,
int *skipped)
sector_t max_sector, int *skipped)
{
struct r5conf *conf = mddev->private;
struct stripe_head *sh;
sector_t max_sector = mddev->dev_sectors;
sector_t sync_blocks;
int still_degraded = 0;
int i;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment