Commit ef67744e authored by Jens Axboe's avatar Jens Axboe

Merge branch 'md-next' of...

Merge branch 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.9/drivers

Pull MD for 5.9 from Song.

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/raid10: avoid deadlock on recovery.
  raid: md_p.h: drop duplicated word in a comment
  md-cluster: fix rmmod issue when md_cluster convert bitmap to none
  md-cluster: fix safemode_delay value when converting to clustered bitmap
  md/raid5: support config stripe_size by sysfs entry
  md/raid5: set default stripe_size as 4096
  md/raid456: convert macro STRIPE_* to RAID5_STRIPE_*
  raid5: remove the meaningless check in raid5_make_request
  raid5: put the comment of clear_batch_ready to the right place
  raid5: call clear_batch_ready before set STRIPE_ACTIVE
  md: raid10: Fix compilation warning
  md: raid5: Fix compilation warning
  md: raid5-cache: Remove set but unused variable
  md: Fix compilation warning
parents 659bf827 fe630de0
...@@ -101,6 +101,8 @@ static void mddev_detach(struct mddev *mddev); ...@@ -101,6 +101,8 @@ static void mddev_detach(struct mddev *mddev);
* count by 2 for every hour elapsed between read errors. * count by 2 for every hour elapsed between read errors.
*/ */
#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20 #define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
/* Default safemode delay: 200 msec */
#define DEFAULT_SAFEMODE_DELAY ((200 * HZ)/1000 +1)
/* /*
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
* is 1000 KB/sec, so the extra system load does not show up that much. * is 1000 KB/sec, so the extra system load does not show up that much.
...@@ -2469,8 +2471,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) ...@@ -2469,8 +2471,8 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
goto fail; goto fail;
ko = &part_to_dev(rdev->bdev->bd_part)->kobj; ko = &part_to_dev(rdev->bdev->bd_part)->kobj;
if (sysfs_create_link(&rdev->kobj, ko, "block")) /* failure here is OK */
/* failure here is OK */; err = sysfs_create_link(&rdev->kobj, ko, "block");
rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state"); rdev->sysfs_state = sysfs_get_dirent_safe(rdev->kobj.sd, "state");
rdev->sysfs_unack_badblocks = rdev->sysfs_unack_badblocks =
sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks"); sysfs_get_dirent_safe(rdev->kobj.sd, "unacknowledged_bad_blocks");
...@@ -3238,8 +3240,8 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) ...@@ -3238,8 +3240,8 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
return err; return err;
} else } else
sysfs_notify_dirent_safe(rdev->sysfs_state); sysfs_notify_dirent_safe(rdev->sysfs_state);
if (sysfs_link_rdev(rdev->mddev, rdev))
/* failure here is OK */; /* failure here is OK */;
sysfs_link_rdev(rdev->mddev, rdev);
/* don't wakeup anyone, leave that to userspace. */ /* don't wakeup anyone, leave that to userspace. */
} else { } else {
if (slot >= rdev->mddev->raid_disks && if (slot >= rdev->mddev->raid_disks &&
...@@ -6034,7 +6036,7 @@ int md_run(struct mddev *mddev) ...@@ -6034,7 +6036,7 @@ int md_run(struct mddev *mddev)
if (mddev_is_clustered(mddev)) if (mddev_is_clustered(mddev))
mddev->safemode_delay = 0; mddev->safemode_delay = 0;
else else
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
mddev->in_sync = 1; mddev->in_sync = 1;
smp_wmb(); smp_wmb();
spin_lock(&mddev->lock); spin_lock(&mddev->lock);
...@@ -7413,6 +7415,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) ...@@ -7413,6 +7415,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.nodes = 0; mddev->bitmap_info.nodes = 0;
md_cluster_ops->leave(mddev); md_cluster_ops->leave(mddev);
module_put(md_cluster_mod);
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
} }
mddev_suspend(mddev); mddev_suspend(mddev);
md_bitmap_destroy(mddev); md_bitmap_destroy(mddev);
...@@ -8408,6 +8412,7 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); ...@@ -8408,6 +8412,7 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);
int md_setup_cluster(struct mddev *mddev, int nodes) int md_setup_cluster(struct mddev *mddev, int nodes)
{ {
int ret;
if (!md_cluster_ops) if (!md_cluster_ops)
request_module("md-cluster"); request_module("md-cluster");
spin_lock(&pers_lock); spin_lock(&pers_lock);
...@@ -8419,7 +8424,10 @@ int md_setup_cluster(struct mddev *mddev, int nodes) ...@@ -8419,7 +8424,10 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
} }
spin_unlock(&pers_lock); spin_unlock(&pers_lock);
return md_cluster_ops->join(mddev, nodes); ret = md_cluster_ops->join(mddev, nodes);
if (!ret)
mddev->safemode_delay = 0;
return ret;
} }
void md_cluster_stop(struct mddev *mddev) void md_cluster_stop(struct mddev *mddev)
...@@ -9113,8 +9121,8 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -9113,8 +9121,8 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev->recovery_offset = 0; rdev->recovery_offset = 0;
} }
if (mddev->pers->hot_add_disk(mddev, rdev) == 0) { if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */
/* failure here is OK */; sysfs_link_rdev(mddev, rdev);
if (!test_bit(Journal, &rdev->flags)) if (!test_bit(Journal, &rdev->flags))
spares++; spares++;
md_new_event(mddev); md_new_event(mddev);
......
...@@ -980,6 +980,7 @@ static void wait_barrier(struct r10conf *conf) ...@@ -980,6 +980,7 @@ static void wait_barrier(struct r10conf *conf)
{ {
spin_lock_irq(&conf->resync_lock); spin_lock_irq(&conf->resync_lock);
if (conf->barrier) { if (conf->barrier) {
struct bio_list *bio_list = current->bio_list;
conf->nr_waiting++; conf->nr_waiting++;
/* Wait for the barrier to drop. /* Wait for the barrier to drop.
* However if there are already pending * However if there are already pending
...@@ -994,9 +995,16 @@ static void wait_barrier(struct r10conf *conf) ...@@ -994,9 +995,16 @@ static void wait_barrier(struct r10conf *conf)
wait_event_lock_irq(conf->wait_barrier, wait_event_lock_irq(conf->wait_barrier,
!conf->barrier || !conf->barrier ||
(atomic_read(&conf->nr_pending) && (atomic_read(&conf->nr_pending) &&
current->bio_list && bio_list &&
(!bio_list_empty(&current->bio_list[0]) || (!bio_list_empty(&bio_list[0]) ||
!bio_list_empty(&current->bio_list[1]))), !bio_list_empty(&bio_list[1]))) ||
/* move on if recovery thread is
* blocked by us
*/
(conf->mddev->thread->tsk == current &&
test_bit(MD_RECOVERY_RUNNING,
&conf->mddev->recovery) &&
conf->nr_queued > 0),
conf->resync_lock); conf->resync_lock);
conf->nr_waiting--; conf->nr_waiting--;
if (!conf->nr_waiting) if (!conf->nr_waiting)
...@@ -4307,8 +4315,8 @@ static int raid10_start_reshape(struct mddev *mddev) ...@@ -4307,8 +4315,8 @@ static int raid10_start_reshape(struct mddev *mddev)
else else
rdev->recovery_offset = 0; rdev->recovery_offset = 0;
if (sysfs_link_rdev(mddev, rdev)) /* Failure here is OK */
/* Failure here is OK */; sysfs_link_rdev(mddev, rdev);
} }
} else if (rdev->raid_disk >= conf->prev.raid_disks } else if (rdev->raid_disk >= conf->prev.raid_disks
&& !test_bit(Faulty, &rdev->flags)) { && !test_bit(Faulty, &rdev->flags)) {
......
...@@ -195,9 +195,7 @@ struct r5l_log { ...@@ -195,9 +195,7 @@ struct r5l_log {
static inline sector_t r5c_tree_index(struct r5conf *conf, static inline sector_t r5c_tree_index(struct r5conf *conf,
sector_t sect) sector_t sect)
{ {
sector_t offset; sector_div(sect, conf->chunk_sectors);
offset = sector_div(sect, conf->chunk_sectors);
return sect; return sect;
} }
...@@ -298,8 +296,8 @@ r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev) ...@@ -298,8 +296,8 @@ r5c_return_dev_pending_writes(struct r5conf *conf, struct r5dev *dev)
wbi = dev->written; wbi = dev->written;
dev->written = NULL; dev->written = NULL;
while (wbi && wbi->bi_iter.bi_sector < while (wbi && wbi->bi_iter.bi_sector <
dev->sector + STRIPE_SECTORS) { dev->sector + RAID5_STRIPE_SECTORS(conf)) {
wbi2 = r5_next_bio(wbi, dev->sector); wbi2 = r5_next_bio(conf, wbi, dev->sector);
md_write_end(conf->mddev); md_write_end(conf->mddev);
bio_endio(wbi); bio_endio(wbi);
wbi = wbi2; wbi = wbi2;
...@@ -316,7 +314,7 @@ void r5c_handle_cached_data_endio(struct r5conf *conf, ...@@ -316,7 +314,7 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
set_bit(R5_UPTODATE, &sh->dev[i].flags); set_bit(R5_UPTODATE, &sh->dev[i].flags);
r5c_return_dev_pending_writes(conf, &sh->dev[i]); r5c_return_dev_pending_writes(conf, &sh->dev[i]);
md_bitmap_endwrite(conf->mddev->bitmap, sh->sector, md_bitmap_endwrite(conf->mddev->bitmap, sh->sector,
STRIPE_SECTORS, RAID5_STRIPE_SECTORS(conf),
!test_bit(STRIPE_DEGRADED, &sh->state), !test_bit(STRIPE_DEGRADED, &sh->state),
0); 0);
} }
...@@ -364,7 +362,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf) ...@@ -364,7 +362,7 @@ void r5c_check_cached_full_stripe(struct r5conf *conf)
*/ */
if (atomic_read(&conf->r5c_cached_full_stripes) >= if (atomic_read(&conf->r5c_cached_full_stripes) >=
min(R5C_FULL_STRIPE_FLUSH_BATCH(conf), min(R5C_FULL_STRIPE_FLUSH_BATCH(conf),
conf->chunk_sectors >> STRIPE_SHIFT)) conf->chunk_sectors >> RAID5_STRIPE_SHIFT(conf)))
r5l_wake_reclaim(conf->log, 0); r5l_wake_reclaim(conf->log, 0);
} }
......
...@@ -324,7 +324,7 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh) ...@@ -324,7 +324,7 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
* be just after the last logged stripe and write to the same * be just after the last logged stripe and write to the same
* disks. Use bit shift and logarithm to avoid 64-bit division. * disks. Use bit shift and logarithm to avoid 64-bit division.
*/ */
if ((sh->sector == sh_last->sector + STRIPE_SECTORS) && if ((sh->sector == sh_last->sector + RAID5_STRIPE_SECTORS(conf)) &&
(data_sector >> ilog2(conf->chunk_sectors) == (data_sector >> ilog2(conf->chunk_sectors) ==
data_sector_last >> ilog2(conf->chunk_sectors)) && data_sector_last >> ilog2(conf->chunk_sectors)) &&
((data_sector - data_sector_last) * data_disks == ((data_sector - data_sector_last) * data_disks ==
...@@ -844,9 +844,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -844,9 +844,9 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
/* if start and end is 4k aligned, use a 4k block */ /* if start and end is 4k aligned, use a 4k block */
if (block_size == 512 && if (block_size == 512 &&
(r_sector_first & (STRIPE_SECTORS - 1)) == 0 && (r_sector_first & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0 &&
(r_sector_last & (STRIPE_SECTORS - 1)) == 0) (r_sector_last & (RAID5_STRIPE_SECTORS(conf) - 1)) == 0)
block_size = STRIPE_SIZE; block_size = RAID5_STRIPE_SIZE(conf);
/* iterate through blocks in strip */ /* iterate through blocks in strip */
for (i = 0; i < strip_sectors; i += (block_size >> 9)) { for (i = 0; i < strip_sectors; i += (block_size >> 9)) {
...@@ -1274,7 +1274,8 @@ static int ppl_validate_rdev(struct md_rdev *rdev) ...@@ -1274,7 +1274,8 @@ static int ppl_validate_rdev(struct md_rdev *rdev)
ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9); ppl_data_sectors = rdev->ppl.size - (PPL_HEADER_SIZE >> 9);
if (ppl_data_sectors > 0) if (ppl_data_sectors > 0)
ppl_data_sectors = rounddown(ppl_data_sectors, STRIPE_SECTORS); ppl_data_sectors = rounddown(ppl_data_sectors,
RAID5_STRIPE_SECTORS((struct r5conf *)rdev->mddev->private));
if (ppl_data_sectors <= 0) { if (ppl_data_sectors <= 0) {
pr_warn("md/raid:%s: PPL space too small on %s\n", pr_warn("md/raid:%s: PPL space too small on %s\n",
......
This diff is collapsed.
...@@ -472,32 +472,20 @@ struct disk_info { ...@@ -472,32 +472,20 @@ struct disk_info {
*/ */
#define NR_STRIPES 256 #define NR_STRIPES 256
#define DEFAULT_STRIPE_SIZE 4096
#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
#define STRIPE_SIZE PAGE_SIZE #define STRIPE_SIZE PAGE_SIZE
#define STRIPE_SHIFT (PAGE_SHIFT - 9) #define STRIPE_SHIFT (PAGE_SHIFT - 9)
#define STRIPE_SECTORS (STRIPE_SIZE>>9) #define STRIPE_SECTORS (STRIPE_SIZE>>9)
#endif
#define IO_THRESHOLD 1 #define IO_THRESHOLD 1
#define BYPASS_THRESHOLD 1 #define BYPASS_THRESHOLD 1
#define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head)) #define NR_HASH (PAGE_SIZE / sizeof(struct hlist_head))
#define HASH_MASK (NR_HASH - 1) #define HASH_MASK (NR_HASH - 1)
#define MAX_STRIPE_BATCH 8 #define MAX_STRIPE_BATCH 8
/* bio's attached to a stripe+device for I/O are linked together in bi_sector
* order without overlap. There may be several bio's per stripe+device, and
* a bio could span several devices.
* When walking this list for a particular stripe+device, we must never proceed
* beyond a bio that extends past this device, as the next bio might no longer
* be valid.
* This function is used to determine the 'next' bio in the list, given the
* sector of the current stripe+device
*/
static inline struct bio *r5_next_bio(struct bio *bio, sector_t sector)
{
if (bio_end_sector(bio) < sector + STRIPE_SECTORS)
return bio->bi_next;
else
return NULL;
}
/* NOTE NR_STRIPE_HASH_LOCKS must remain below 64. /* NOTE NR_STRIPE_HASH_LOCKS must remain below 64.
* This is because we sometimes take all the spinlocks * This is because we sometimes take all the spinlocks
* and creating that much locking depth can cause * and creating that much locking depth can cause
...@@ -574,6 +562,11 @@ struct r5conf { ...@@ -574,6 +562,11 @@ struct r5conf {
int raid_disks; int raid_disks;
int max_nr_stripes; int max_nr_stripes;
int min_nr_stripes; int min_nr_stripes;
#if PAGE_SIZE != DEFAULT_STRIPE_SIZE
unsigned long stripe_size;
unsigned int stripe_shift;
unsigned long stripe_sectors;
#endif
/* reshape_progress is the leading edge of a 'reshape' /* reshape_progress is the leading edge of a 'reshape'
* It has value MaxSector when no reshape is happening * It has value MaxSector when no reshape is happening
...@@ -690,6 +683,32 @@ struct r5conf { ...@@ -690,6 +683,32 @@ struct r5conf {
struct r5pending_data *next_pending_data; struct r5pending_data *next_pending_data;
}; };
#if PAGE_SIZE == DEFAULT_STRIPE_SIZE
#define RAID5_STRIPE_SIZE(conf) STRIPE_SIZE
#define RAID5_STRIPE_SHIFT(conf) STRIPE_SHIFT
#define RAID5_STRIPE_SECTORS(conf) STRIPE_SECTORS
#else
#define RAID5_STRIPE_SIZE(conf) ((conf)->stripe_size)
#define RAID5_STRIPE_SHIFT(conf) ((conf)->stripe_shift)
#define RAID5_STRIPE_SECTORS(conf) ((conf)->stripe_sectors)
#endif
/* bio's attached to a stripe+device for I/O are linked together in bi_sector
* order without overlap. There may be several bio's per stripe+device, and
* a bio could span several devices.
* When walking this list for a particular stripe+device, we must never proceed
* beyond a bio that extends past this device, as the next bio might no longer
* be valid.
* This function is used to determine the 'next' bio in the list, given the
* sector of the current stripe+device
*/
static inline struct bio *r5_next_bio(struct r5conf *conf, struct bio *bio, sector_t sector)
{
if (bio_end_sector(bio) < sector + RAID5_STRIPE_SECTORS(conf))
return bio->bi_next;
else
return NULL;
}
/* /*
* Our supported algorithms * Our supported algorithms
......
...@@ -123,7 +123,7 @@ typedef struct mdp_device_descriptor_s { ...@@ -123,7 +123,7 @@ typedef struct mdp_device_descriptor_s {
/* /*
* Notes: * Notes:
* - if an array is being reshaped (restriped) in order to change the * - if an array is being reshaped (restriped) in order to change
* the number of active devices in the array, 'raid_disks' will be * the number of active devices in the array, 'raid_disks' will be
* the larger of the old and new numbers. 'delta_disks' will * the larger of the old and new numbers. 'delta_disks' will
* be the "new - old". So if +ve, raid_disks is the new value, and * be the "new - old". So if +ve, raid_disks is the new value, and
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment