Commit 544ae5f9 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: use interruptible wait when duration is controlled by userspace.
  md/raid5: suspend shouldn't affect read requests.
  md: tidy up error paths in md_alloc
  md: fix error path when duplicate name is found on md device creation.
  md: avoid dereferencing NULL pointer when accessing suspend_* sysfs attributes.
  md: Use new topology calls to indicate alignment and I/O sizes
parents 7b85425f e62e58a5
...@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) ...@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
rdev->sectors = sectors * mddev->chunk_sectors; rdev->sectors = sectors * mddev->chunk_sectors;
} }
blk_queue_stack_limits(mddev->queue, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->bdev->bd_disk->queue); rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation. * a one page request is never in violation.
......
...@@ -3573,7 +3573,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) ...@@ -3573,7 +3573,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
char *e; char *e;
unsigned long long new = simple_strtoull(buf, &e, 10); unsigned long long new = simple_strtoull(buf, &e, 10);
if (mddev->pers->quiesce == NULL) if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL)
return -EINVAL; return -EINVAL;
if (buf == e || (*e && *e != '\n')) if (buf == e || (*e && *e != '\n'))
return -EINVAL; return -EINVAL;
...@@ -3601,7 +3602,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) ...@@ -3601,7 +3602,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
char *e; char *e;
unsigned long long new = simple_strtoull(buf, &e, 10); unsigned long long new = simple_strtoull(buf, &e, 10);
if (mddev->pers->quiesce == NULL) if (mddev->pers == NULL ||
mddev->pers->quiesce == NULL)
return -EINVAL; return -EINVAL;
if (buf == e || (*e && *e != '\n')) if (buf == e || (*e && *e != '\n'))
return -EINVAL; return -EINVAL;
...@@ -3844,11 +3846,9 @@ static int md_alloc(dev_t dev, char *name) ...@@ -3844,11 +3846,9 @@ static int md_alloc(dev_t dev, char *name)
flush_scheduled_work(); flush_scheduled_work();
mutex_lock(&disks_mutex); mutex_lock(&disks_mutex);
if (mddev->gendisk) { error = -EEXIST;
mutex_unlock(&disks_mutex); if (mddev->gendisk)
mddev_put(mddev); goto abort;
return -EEXIST;
}
if (name) { if (name) {
/* Need to ensure that 'name' is not a duplicate. /* Need to ensure that 'name' is not a duplicate.
...@@ -3860,17 +3860,15 @@ static int md_alloc(dev_t dev, char *name) ...@@ -3860,17 +3860,15 @@ static int md_alloc(dev_t dev, char *name)
if (mddev2->gendisk && if (mddev2->gendisk &&
strcmp(mddev2->gendisk->disk_name, name) == 0) { strcmp(mddev2->gendisk->disk_name, name) == 0) {
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
return -EEXIST; goto abort;
} }
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
} }
error = -ENOMEM;
mddev->queue = blk_alloc_queue(GFP_KERNEL); mddev->queue = blk_alloc_queue(GFP_KERNEL);
if (!mddev->queue) { if (!mddev->queue)
mutex_unlock(&disks_mutex); goto abort;
mddev_put(mddev);
return -ENOMEM;
}
mddev->queue->queuedata = mddev; mddev->queue->queuedata = mddev;
/* Can be unlocked because the queue is new: no concurrency */ /* Can be unlocked because the queue is new: no concurrency */
...@@ -3880,11 +3878,9 @@ static int md_alloc(dev_t dev, char *name) ...@@ -3880,11 +3878,9 @@ static int md_alloc(dev_t dev, char *name)
disk = alloc_disk(1 << shift); disk = alloc_disk(1 << shift);
if (!disk) { if (!disk) {
mutex_unlock(&disks_mutex);
blk_cleanup_queue(mddev->queue); blk_cleanup_queue(mddev->queue);
mddev->queue = NULL; mddev->queue = NULL;
mddev_put(mddev); goto abort;
return -ENOMEM;
} }
disk->major = MAJOR(mddev->unit); disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift; disk->first_minor = unit << shift;
...@@ -3906,16 +3902,22 @@ static int md_alloc(dev_t dev, char *name) ...@@ -3906,16 +3902,22 @@ static int md_alloc(dev_t dev, char *name)
mddev->gendisk = disk; mddev->gendisk = disk;
error = kobject_init_and_add(&mddev->kobj, &md_ktype, error = kobject_init_and_add(&mddev->kobj, &md_ktype,
&disk_to_dev(disk)->kobj, "%s", "md"); &disk_to_dev(disk)->kobj, "%s", "md");
mutex_unlock(&disks_mutex); if (error) {
if (error) /* This isn't possible, but as kobject_init_and_add is marked
* __must_check, we must do something with the result
*/
printk(KERN_WARNING "md: cannot register %s/md - name in use\n", printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
disk->disk_name); disk->disk_name);
else { error = 0;
}
abort:
mutex_unlock(&disks_mutex);
if (!error) {
kobject_uevent(&mddev->kobj, KOBJ_ADD); kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
} }
mddev_put(mddev); mddev_put(mddev);
return 0; return error;
} }
static struct kobject *md_probe(dev_t dev, int *part, void *data) static struct kobject *md_probe(dev_t dev, int *part, void *data)
...@@ -6334,10 +6336,16 @@ void md_do_sync(mddev_t *mddev) ...@@ -6334,10 +6336,16 @@ void md_do_sync(mddev_t *mddev)
sysfs_notify(&mddev->kobj, NULL, "sync_completed"); sysfs_notify(&mddev->kobj, NULL, "sync_completed");
} }
if (j >= mddev->resync_max) while (j >= mddev->resync_max && !kthread_should_stop()) {
wait_event(mddev->recovery_wait, /* As this condition is controlled by user-space,
mddev->resync_max > j * we can block indefinitely, so use '_interruptible'
|| kthread_should_stop()); * to avoid triggering warnings.
*/
flush_signals(current); /* just in case */
wait_event_interruptible(mddev->recovery_wait,
mddev->resync_max > j
|| kthread_should_stop());
}
if (kthread_should_stop()) if (kthread_should_stop())
goto interrupted; goto interrupted;
......
...@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
for (path = first; path <= last; path++) for (path = first; path <= last; path++)
if ((p=conf->multipaths+path)->rdev == NULL) { if ((p=conf->multipaths+path)->rdev == NULL) {
q = rdev->bdev->bd_disk->queue; q = rdev->bdev->bd_disk->queue;
blk_queue_stack_limits(mddev->queue, q); disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
...@@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev) ...@@ -463,9 +464,9 @@ static int multipath_run (mddev_t *mddev)
disk = conf->multipaths + disk_idx; disk = conf->multipaths + disk_idx;
disk->rdev = rdev; disk->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
blk_queue_stack_limits(mddev->queue,
rdev->bdev->bd_disk->queue);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, not that we ever expect a device with * violating it, not that we ever expect a device with
* a merge_bvec_fn to be involved in multipath */ * a merge_bvec_fn to be involved in multipath */
......
...@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev) ...@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev)
} }
dev[j] = rdev1; dev[j] = rdev1;
blk_queue_stack_limits(mddev->queue, disk_stack_limits(mddev->gendisk, rdev1->bdev,
rdev1->bdev->bd_disk->queue); rdev1->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation. * a one page request is never in violation.
...@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev) ...@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev)
mddev->chunk_sectors << 9); mddev->chunk_sectors << 9);
goto abort; goto abort;
} }
blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
blk_queue_io_opt(mddev->queue,
(mddev->chunk_sectors << 9) * mddev->raid_disks);
printk(KERN_INFO "raid0: done.\n"); printk(KERN_INFO "raid0: done.\n");
mddev->private = conf; mddev->private = conf;
return 0; return 0;
......
...@@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
for (mirror = first; mirror <= last; mirror++) for (mirror = first; mirror <= last; mirror++)
if ( !(p=conf->mirrors+mirror)->rdev) { if ( !(p=conf->mirrors+mirror)->rdev) {
blk_queue_stack_limits(mddev->queue, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->bdev->bd_disk->queue); rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation. * a one page request is never in violation.
...@@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev) ...@@ -1988,9 +1988,8 @@ static int run(mddev_t *mddev)
disk = conf->mirrors + disk_idx; disk = conf->mirrors + disk_idx;
disk->rdev = rdev; disk->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev,
blk_queue_stack_limits(mddev->queue, rdev->data_offset << 9);
rdev->bdev->bd_disk->queue);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation. * a one page request is never in violation.
......
...@@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
for ( ; mirror <= last ; mirror++) for ( ; mirror <= last ; mirror++)
if ( !(p=conf->mirrors+mirror)->rdev) { if ( !(p=conf->mirrors+mirror)->rdev) {
blk_queue_stack_limits(mddev->queue, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->bdev->bd_disk->queue); rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation. * a one page request is never in violation.
...@@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) ...@@ -2044,7 +2044,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
static int run(mddev_t *mddev) static int run(mddev_t *mddev)
{ {
conf_t *conf; conf_t *conf;
int i, disk_idx; int i, disk_idx, chunk_size;
mirror_info_t *disk; mirror_info_t *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int nc, fc, fo; int nc, fc, fo;
...@@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev) ...@@ -2130,6 +2130,14 @@ static int run(mddev_t *mddev)
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
mddev->queue->queue_lock = &conf->device_lock; mddev->queue->queue_lock = &conf->device_lock;
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
if (conf->raid_disks % conf->near_copies)
blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks);
else
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks / conf->near_copies));
list_for_each_entry(rdev, &mddev->disks, same_set) { list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk; disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks if (disk_idx >= mddev->raid_disks
...@@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev) ...@@ -2138,9 +2146,8 @@ static int run(mddev_t *mddev)
disk = conf->mirrors + disk_idx; disk = conf->mirrors + disk_idx;
disk->rdev = rdev; disk->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev,
blk_queue_stack_limits(mddev->queue, rdev->data_offset << 9);
rdev->bdev->bd_disk->queue);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation. * a one page request is never in violation.
......
...@@ -3699,13 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi) ...@@ -3699,13 +3699,21 @@ static int make_request(struct request_queue *q, struct bio * bi)
goto retry; goto retry;
} }
} }
/* FIXME what if we get a false positive because these
* are being updated. if (bio_data_dir(bi) == WRITE &&
*/ logical_sector >= mddev->suspend_lo &&
if (logical_sector >= mddev->suspend_lo &&
logical_sector < mddev->suspend_hi) { logical_sector < mddev->suspend_hi) {
release_stripe(sh); release_stripe(sh);
schedule(); /* As the suspend_* range is controlled by
* userspace, we want an interruptible
* wait.
*/
flush_signals(current);
prepare_to_wait(&conf->wait_for_overlap,
&w, TASK_INTERRUPTIBLE);
if (logical_sector >= mddev->suspend_lo &&
logical_sector < mddev->suspend_hi)
schedule();
goto retry; goto retry;
} }
...@@ -4452,7 +4460,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) ...@@ -4452,7 +4460,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
static int run(mddev_t *mddev) static int run(mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
int working_disks = 0; int working_disks = 0, chunk_size;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
if (mddev->recovery_cp != MaxSector) if (mddev->recovery_cp != MaxSector)
...@@ -4607,6 +4615,14 @@ static int run(mddev_t *mddev) ...@@ -4607,6 +4615,14 @@ static int run(mddev_t *mddev)
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
chunk_size = mddev->chunk_sectors << 9;
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded));
list_for_each_entry(rdev, &mddev->disks, same_set)
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
return 0; return 0;
abort: abort:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment