Commit 4b382d06 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: allow resync_start to be set while an array is active.
  md/raid10:  reformat some loops with less indenting.
  md/raid10: remove unused variable.
  md/raid10: make more use of 'slot' in raid10d.
  md/raid10: some tidying up in fix_read_error
  md/raid1: improve handling of pages allocated for write-behind.
  md/raid1: try fix_sync_read_error before process_checks.
  md/raid1: tidy up new functions: process_checks and fix_sync_read_error.
  md/raid1: split out two sub-functions from sync_request_write
  md: make error_handler functions more uniform and correct.
  md/multipath: discard ->working_disks in favour of ->degraded
  md/raid1: clean up read_balance.
  md: simplify raid10 read_balance
  md/bitmap: fix saving of events_cleared and other state.
  md: reject a re-add request that cannot be honoured.
  md: Fix race when creating a new md device.
parents bdfbe804 b098636c
...@@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap) ...@@ -493,11 +493,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags); spin_unlock_irqrestore(&bitmap->lock, flags);
sb = kmap_atomic(bitmap->sb_page, KM_USER0); sb = kmap_atomic(bitmap->sb_page, KM_USER0);
sb->events = cpu_to_le64(bitmap->mddev->events); sb->events = cpu_to_le64(bitmap->mddev->events);
if (bitmap->mddev->events < bitmap->events_cleared) { if (bitmap->mddev->events < bitmap->events_cleared)
/* rocking back to read-only */ /* rocking back to read-only */
bitmap->events_cleared = bitmap->mddev->events; bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->events_cleared); sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
} sb->state = cpu_to_le32(bitmap->flags);
/* Just in case these have been changed via sysfs: */ /* Just in case these have been changed via sysfs: */
sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ);
sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind);
...@@ -618,7 +618,7 @@ static int bitmap_read_sb(struct bitmap *bitmap) ...@@ -618,7 +618,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
bitmap->flags |= BITMAP_HOSTENDIAN; bitmap->flags |= BITMAP_HOSTENDIAN;
bitmap->events_cleared = le64_to_cpu(sb->events_cleared); bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
if (sb->state & cpu_to_le32(BITMAP_STALE)) if (bitmap->flags & BITMAP_STALE)
bitmap->events_cleared = bitmap->mddev->events; bitmap->events_cleared = bitmap->mddev->events;
err = 0; err = 0;
out: out:
...@@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits, ...@@ -652,9 +652,11 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
switch (op) { switch (op) {
case MASK_SET: case MASK_SET:
sb->state |= cpu_to_le32(bits); sb->state |= cpu_to_le32(bits);
bitmap->flags |= bits;
break; break;
case MASK_UNSET: case MASK_UNSET:
sb->state &= cpu_to_le32(~bits); sb->state &= cpu_to_le32(~bits);
bitmap->flags &= ~bits;
break; break;
default: default:
BUG(); BUG();
......
...@@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len) ...@@ -3324,7 +3324,7 @@ resync_start_store(mddev_t *mddev, const char *buf, size_t len)
char *e; char *e;
unsigned long long n = simple_strtoull(buf, &e, 10); unsigned long long n = simple_strtoull(buf, &e, 10);
if (mddev->pers) if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
return -EBUSY; return -EBUSY;
if (cmd_match(buf, "none")) if (cmd_match(buf, "none"))
n = MaxSector; n = MaxSector;
...@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name) ...@@ -4347,13 +4347,19 @@ static int md_alloc(dev_t dev, char *name)
disk->fops = &md_fops; disk->fops = &md_fops;
disk->private_data = mddev; disk->private_data = mddev;
disk->queue = mddev->queue; disk->queue = mddev->queue;
blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
/* Allow extended partitions. This makes the /* Allow extended partitions. This makes the
* 'mdp' device redundant, but we can't really * 'mdp' device redundant, but we can't really
* remove it now. * remove it now.
*/ */
disk->flags |= GENHD_FL_EXT_DEVT; disk->flags |= GENHD_FL_EXT_DEVT;
add_disk(disk);
mddev->gendisk = disk; mddev->gendisk = disk;
/* As soon as we call add_disk(), another thread could get
* through to md_open, so make sure it doesn't get too far
*/
mutex_lock(&mddev->open_mutex);
add_disk(disk);
error = kobject_init_and_add(&mddev->kobj, &md_ktype, error = kobject_init_and_add(&mddev->kobj, &md_ktype,
&disk_to_dev(disk)->kobj, "%s", "md"); &disk_to_dev(disk)->kobj, "%s", "md");
if (error) { if (error) {
...@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name) ...@@ -4367,8 +4373,7 @@ static int md_alloc(dev_t dev, char *name)
if (mddev->kobj.sd && if (mddev->kobj.sd &&
sysfs_create_group(&mddev->kobj, &md_bitmap_group)) sysfs_create_group(&mddev->kobj, &md_bitmap_group))
printk(KERN_DEBUG "pointless warning\n"); printk(KERN_DEBUG "pointless warning\n");
mutex_unlock(&mddev->open_mutex);
blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA);
abort: abort:
mutex_unlock(&disks_mutex); mutex_unlock(&disks_mutex);
if (!error && mddev->kobj.sd) { if (!error && mddev->kobj.sd) {
...@@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) ...@@ -5211,6 +5216,16 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
} else } else
super_types[mddev->major_version]. super_types[mddev->major_version].
validate_super(mddev, rdev); validate_super(mddev, rdev);
if ((info->state & (1<<MD_DISK_SYNC)) &&
(!test_bit(In_sync, &rdev->flags) ||
rdev->raid_disk != info->raid_disk)) {
/* This was a hot-add request, but events doesn't
* match, so reject it.
*/
export_rdev(rdev);
return -EINVAL;
}
if (test_bit(In_sync, &rdev->flags)) if (test_bit(In_sync, &rdev->flags))
rdev->saved_raid_disk = rdev->raid_disk; rdev->saved_raid_disk = rdev->raid_disk;
else else
......
...@@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev) ...@@ -146,7 +146,7 @@ static void multipath_status (struct seq_file *seq, mddev_t *mddev)
int i; int i;
seq_printf (seq, " [%d/%d] [", conf->raid_disks, seq_printf (seq, " [%d/%d] [", conf->raid_disks,
conf->working_disks); conf->raid_disks - mddev->degraded);
for (i = 0; i < conf->raid_disks; i++) for (i = 0; i < conf->raid_disks; i++)
seq_printf (seq, "%s", seq_printf (seq, "%s",
conf->multipaths[i].rdev && conf->multipaths[i].rdev &&
...@@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits) ...@@ -186,35 +186,36 @@ static int multipath_congested(void *data, int bits)
static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
{ {
multipath_conf_t *conf = mddev->private; multipath_conf_t *conf = mddev->private;
char b[BDEVNAME_SIZE];
if (conf->working_disks <= 1) { if (conf->raid_disks - mddev->degraded <= 1) {
/* /*
* Uh oh, we can do nothing if this is our last path, but * Uh oh, we can do nothing if this is our last path, but
* first check if this is a queued request for a device * first check if this is a queued request for a device
* which has just failed. * which has just failed.
*/ */
printk(KERN_ALERT printk(KERN_ALERT
"multipath: only one IO path left and IO error.\n"); "multipath: only one IO path left and IO error.\n");
/* leave it active... it's all we have */ /* leave it active... it's all we have */
} else { return;
/* }
* Mark disk as unusable /*
*/ * Mark disk as unusable
if (!test_bit(Faulty, &rdev->flags)) { */
char b[BDEVNAME_SIZE]; if (test_and_clear_bit(In_sync, &rdev->flags)) {
clear_bit(In_sync, &rdev->flags); unsigned long flags;
set_bit(Faulty, &rdev->flags); spin_lock_irqsave(&conf->device_lock, flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); mddev->degraded++;
conf->working_disks--; spin_unlock_irqrestore(&conf->device_lock, flags);
mddev->degraded++;
printk(KERN_ALERT "multipath: IO failure on %s,"
" disabling IO path.\n"
"multipath: Operation continuing"
" on %d IO paths.\n",
bdevname (rdev->bdev,b),
conf->working_disks);
}
} }
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "multipath: IO failure on %s,"
" disabling IO path.\n"
"multipath: Operation continuing"
" on %d IO paths.\n",
bdevname(rdev->bdev, b),
conf->raid_disks - mddev->degraded);
} }
static void print_multipath_conf (multipath_conf_t *conf) static void print_multipath_conf (multipath_conf_t *conf)
...@@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf) ...@@ -227,7 +228,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
printk("(conf==NULL)\n"); printk("(conf==NULL)\n");
return; return;
} }
printk(" --- wd:%d rd:%d\n", conf->working_disks, printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks); conf->raid_disks);
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
...@@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -274,10 +275,11 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
PAGE_CACHE_SIZE - 1); PAGE_CACHE_SIZE - 1);
} }
conf->working_disks++; spin_lock_irq(&conf->device_lock);
mddev->degraded--; mddev->degraded--;
rdev->raid_disk = path; rdev->raid_disk = path;
set_bit(In_sync, &rdev->flags); set_bit(In_sync, &rdev->flags);
spin_unlock_irq(&conf->device_lock);
rcu_assign_pointer(p->rdev, rdev); rcu_assign_pointer(p->rdev, rdev);
err = 0; err = 0;
md_integrity_add_rdev(rdev, mddev); md_integrity_add_rdev(rdev, mddev);
...@@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev) ...@@ -391,6 +393,7 @@ static int multipath_run (mddev_t *mddev)
int disk_idx; int disk_idx;
struct multipath_info *disk; struct multipath_info *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int working_disks;
if (md_check_no_bitmap(mddev)) if (md_check_no_bitmap(mddev))
return -EINVAL; return -EINVAL;
...@@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev) ...@@ -424,7 +427,7 @@ static int multipath_run (mddev_t *mddev)
goto out_free_conf; goto out_free_conf;
} }
conf->working_disks = 0; working_disks = 0;
list_for_each_entry(rdev, &mddev->disks, same_set) { list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk; disk_idx = rdev->raid_disk;
if (disk_idx < 0 || if (disk_idx < 0 ||
...@@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev) ...@@ -446,7 +449,7 @@ static int multipath_run (mddev_t *mddev)
} }
if (!test_bit(Faulty, &rdev->flags)) if (!test_bit(Faulty, &rdev->flags))
conf->working_disks++; working_disks++;
} }
conf->raid_disks = mddev->raid_disks; conf->raid_disks = mddev->raid_disks;
...@@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev) ...@@ -454,12 +457,12 @@ static int multipath_run (mddev_t *mddev)
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->retry_list);
if (!conf->working_disks) { if (!working_disks) {
printk(KERN_ERR "multipath: no operational IO paths for %s\n", printk(KERN_ERR "multipath: no operational IO paths for %s\n",
mdname(mddev)); mdname(mddev));
goto out_free_conf; goto out_free_conf;
} }
mddev->degraded = conf->raid_disks - conf->working_disks; mddev->degraded = conf->raid_disks - working_disks;
conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS, conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
sizeof(struct multipath_bh)); sizeof(struct multipath_bh));
...@@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev) ...@@ -481,7 +484,8 @@ static int multipath_run (mddev_t *mddev)
printk(KERN_INFO printk(KERN_INFO
"multipath: array %s active with %d out of %d IO paths\n", "multipath: array %s active with %d out of %d IO paths\n",
mdname(mddev), conf->working_disks, mddev->raid_disks); mdname(mddev), conf->raid_disks - mddev->degraded,
mddev->raid_disks);
/* /*
* Ok, everything is just fine now * Ok, everything is just fine now
*/ */
......
...@@ -9,7 +9,6 @@ struct multipath_private_data { ...@@ -9,7 +9,6 @@ struct multipath_private_data {
mddev_t *mddev; mddev_t *mddev;
struct multipath_info *multipaths; struct multipath_info *multipaths;
int raid_disks; int raid_disks;
int working_disks;
spinlock_t device_lock; spinlock_t device_lock;
struct list_head retry_list; struct list_head retry_list;
......
This diff is collapsed.
...@@ -94,7 +94,9 @@ struct r1bio_s { ...@@ -94,7 +94,9 @@ struct r1bio_s {
int read_disk; int read_disk;
struct list_head retry_list; struct list_head retry_list;
struct bitmap_update *bitmap_update; /* Next two are only valid when R1BIO_BehindIO is set */
struct page **behind_pages;
int behind_page_count;
/* /*
* if the IO is in WRITE direction, then multiple bios are used. * if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated. * We choose the number when they are allocated.
......
This diff is collapsed.
...@@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1700,27 +1700,25 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
raid5_conf_t *conf = mddev->private; raid5_conf_t *conf = mddev->private;
pr_debug("raid456: error called\n"); pr_debug("raid456: error called\n");
if (!test_bit(Faulty, &rdev->flags)) { if (test_and_clear_bit(In_sync, &rdev->flags)) {
set_bit(MD_CHANGE_DEVS, &mddev->flags); unsigned long flags;
if (test_and_clear_bit(In_sync, &rdev->flags)) { spin_lock_irqsave(&conf->device_lock, flags);
unsigned long flags; mddev->degraded++;
spin_lock_irqsave(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
mddev->degraded++; /*
spin_unlock_irqrestore(&conf->device_lock, flags); * if recovery was running, make sure it aborts.
/* */
* if recovery was running, make sure it aborts. set_bit(MD_RECOVERY_INTR, &mddev->recovery);
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n"
"md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev),
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
} }
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n"
"md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev),
bdevname(rdev->bdev, b),
mdname(mddev),
conf->raid_disks - mddev->degraded);
} }
/* /*
...@@ -5391,7 +5389,8 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) ...@@ -5391,7 +5389,8 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
return -EINVAL; return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk); revalidate_disk(mddev->gendisk);
if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { if (sectors > mddev->dev_sectors &&
mddev->recovery_cp > mddev->dev_sectors) {
mddev->recovery_cp = mddev->dev_sectors; mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment