Commit 6cc8e743 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Jens Axboe

loop: scale loop device by introducing per device lock

Currently, loop device has only one global lock: loop_ctl_mutex.

This becomes hot in scenarios where many loop devices are used.

Scale it by introducing per-device lock: lo_mutex that protects
modifications of all fields in struct loop_device.

Keep loop_ctl_mutex to protect global data: loop_index_idr, loop_lookup,
loop_add.

The new lock ordering requirement is that loop_ctl_mutex must be taken
before lo_mutex.
Signed-off-by: default avatarPavel Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: default avatarTyler Hicks <tyhicks@linux.microsoft.com>
Reviewed-by: default avatarPetr Vorel <pvorel@suse.cz>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 767630c6
...@@ -704,7 +704,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -704,7 +704,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
int error; int error;
bool partscan; bool partscan;
error = mutex_lock_killable(&loop_ctl_mutex); error = mutex_lock_killable(&lo->lo_mutex);
if (error) if (error)
return error; return error;
error = -ENXIO; error = -ENXIO;
...@@ -743,9 +743,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -743,9 +743,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
loop_update_dio(lo); loop_update_dio(lo);
blk_mq_unfreeze_queue(lo->lo_queue); blk_mq_unfreeze_queue(lo->lo_queue);
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
/* /*
* We must drop file reference outside of loop_ctl_mutex as dropping * We must drop file reference outside of lo_mutex as dropping
* the file ref can take bd_mutex which creates circular locking * the file ref can take bd_mutex which creates circular locking
* dependency. * dependency.
*/ */
...@@ -755,7 +755,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, ...@@ -755,7 +755,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
return 0; return 0;
out_err: out_err:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
if (file) if (file)
fput(file); fput(file);
return error; return error;
...@@ -1092,7 +1092,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1092,7 +1092,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
goto out_putf; goto out_putf;
} }
error = mutex_lock_killable(&loop_ctl_mutex); error = mutex_lock_killable(&lo->lo_mutex);
if (error) if (error)
goto out_bdev; goto out_bdev;
...@@ -1171,7 +1171,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1171,7 +1171,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
*/ */
bdgrab(bdev); bdgrab(bdev);
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
if (partscan) if (partscan)
loop_reread_partitions(lo, bdev); loop_reread_partitions(lo, bdev);
if (!(mode & FMODE_EXCL)) if (!(mode & FMODE_EXCL))
...@@ -1179,7 +1179,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, ...@@ -1179,7 +1179,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
return 0; return 0;
out_unlock: out_unlock:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
out_bdev: out_bdev:
if (!(mode & FMODE_EXCL)) if (!(mode & FMODE_EXCL))
bd_abort_claiming(bdev, loop_configure); bd_abort_claiming(bdev, loop_configure);
...@@ -1200,7 +1200,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) ...@@ -1200,7 +1200,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
bool partscan = false; bool partscan = false;
int lo_number; int lo_number;
mutex_lock(&loop_ctl_mutex); mutex_lock(&lo->lo_mutex);
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
err = -ENXIO; err = -ENXIO;
goto out_unlock; goto out_unlock;
...@@ -1253,7 +1253,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) ...@@ -1253,7 +1253,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
lo_number = lo->lo_number; lo_number = lo->lo_number;
loop_unprepare_queue(lo); loop_unprepare_queue(lo);
out_unlock: out_unlock:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
if (partscan) { if (partscan) {
/* /*
* bd_mutex has been held already in release path, so don't * bd_mutex has been held already in release path, so don't
...@@ -1284,18 +1284,17 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) ...@@ -1284,18 +1284,17 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
* protects us from all the other places trying to change the 'lo' * protects us from all the other places trying to change the 'lo'
* device. * device.
*/ */
mutex_lock(&loop_ctl_mutex); mutex_lock(&lo->lo_mutex);
lo->lo_flags = 0; lo->lo_flags = 0;
if (!part_shift) if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
lo->lo_state = Lo_unbound; lo->lo_state = Lo_unbound;
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
/* /*
* Need not hold loop_ctl_mutex to fput backing file. * Need not hold lo_mutex to fput backing file. Calling fput holding
* Calling fput holding loop_ctl_mutex triggers a circular * lo_mutex triggers a circular lock dependency possibility warning as
* lock dependency possibility warning as fput can take * fput can take bd_mutex which is usually taken before lo_mutex.
* bd_mutex which is usually taken before loop_ctl_mutex.
*/ */
if (filp) if (filp)
fput(filp); fput(filp);
...@@ -1306,11 +1305,11 @@ static int loop_clr_fd(struct loop_device *lo) ...@@ -1306,11 +1305,11 @@ static int loop_clr_fd(struct loop_device *lo)
{ {
int err; int err;
err = mutex_lock_killable(&loop_ctl_mutex); err = mutex_lock_killable(&lo->lo_mutex);
if (err) if (err)
return err; return err;
if (lo->lo_state != Lo_bound) { if (lo->lo_state != Lo_bound) {
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
return -ENXIO; return -ENXIO;
} }
/* /*
...@@ -1325,11 +1324,11 @@ static int loop_clr_fd(struct loop_device *lo) ...@@ -1325,11 +1324,11 @@ static int loop_clr_fd(struct loop_device *lo)
*/ */
if (atomic_read(&lo->lo_refcnt) > 1) { if (atomic_read(&lo->lo_refcnt) > 1) {
lo->lo_flags |= LO_FLAGS_AUTOCLEAR; lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
return 0; return 0;
} }
lo->lo_state = Lo_rundown; lo->lo_state = Lo_rundown;
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
return __loop_clr_fd(lo, false); return __loop_clr_fd(lo, false);
} }
...@@ -1344,7 +1343,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) ...@@ -1344,7 +1343,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
bool partscan = false; bool partscan = false;
bool size_changed = false; bool size_changed = false;
err = mutex_lock_killable(&loop_ctl_mutex); err = mutex_lock_killable(&lo->lo_mutex);
if (err) if (err)
return err; return err;
if (lo->lo_encrypt_key_size && if (lo->lo_encrypt_key_size &&
...@@ -1411,7 +1410,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) ...@@ -1411,7 +1410,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
partscan = true; partscan = true;
} }
out_unlock: out_unlock:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
if (partscan) if (partscan)
loop_reread_partitions(lo, bdev); loop_reread_partitions(lo, bdev);
...@@ -1425,11 +1424,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) ...@@ -1425,11 +1424,11 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
struct kstat stat; struct kstat stat;
int ret; int ret;
ret = mutex_lock_killable(&loop_ctl_mutex); ret = mutex_lock_killable(&lo->lo_mutex);
if (ret) if (ret)
return ret; return ret;
if (lo->lo_state != Lo_bound) { if (lo->lo_state != Lo_bound) {
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
return -ENXIO; return -ENXIO;
} }
...@@ -1448,10 +1447,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) ...@@ -1448,10 +1447,10 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info)
lo->lo_encrypt_key_size); lo->lo_encrypt_key_size);
} }
/* Drop loop_ctl_mutex while we call into the filesystem. */ /* Drop lo_mutex while we call into the filesystem. */
path = lo->lo_backing_file->f_path; path = lo->lo_backing_file->f_path;
path_get(&path); path_get(&path);
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); ret = vfs_getattr(&path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT);
if (!ret) { if (!ret) {
info->lo_device = huge_encode_dev(stat.dev); info->lo_device = huge_encode_dev(stat.dev);
...@@ -1637,7 +1636,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, ...@@ -1637,7 +1636,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
{ {
int err; int err;
err = mutex_lock_killable(&loop_ctl_mutex); err = mutex_lock_killable(&lo->lo_mutex);
if (err) if (err)
return err; return err;
switch (cmd) { switch (cmd) {
...@@ -1653,7 +1652,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, ...@@ -1653,7 +1652,7 @@ static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd,
default: default:
err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL; err = lo->ioctl ? lo->ioctl(lo, cmd, arg) : -EINVAL;
} }
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
return err; return err;
} }
...@@ -1879,27 +1878,33 @@ static int lo_open(struct block_device *bdev, fmode_t mode) ...@@ -1879,27 +1878,33 @@ static int lo_open(struct block_device *bdev, fmode_t mode)
struct loop_device *lo; struct loop_device *lo;
int err; int err;
/*
* take loop_ctl_mutex to protect lo pointer from race with
* loop_control_ioctl(LOOP_CTL_REMOVE), however, to reduce contention
* release it prior to updating lo->lo_refcnt.
*/
err = mutex_lock_killable(&loop_ctl_mutex); err = mutex_lock_killable(&loop_ctl_mutex);
if (err) if (err)
return err; return err;
lo = bdev->bd_disk->private_data; lo = bdev->bd_disk->private_data;
if (!lo) { if (!lo) {
err = -ENXIO; mutex_unlock(&loop_ctl_mutex);
goto out; return -ENXIO;
} }
err = mutex_lock_killable(&lo->lo_mutex);
atomic_inc(&lo->lo_refcnt);
out:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&loop_ctl_mutex);
return err; if (err)
return err;
atomic_inc(&lo->lo_refcnt);
mutex_unlock(&lo->lo_mutex);
return 0;
} }
static void lo_release(struct gendisk *disk, fmode_t mode) static void lo_release(struct gendisk *disk, fmode_t mode)
{ {
struct loop_device *lo; struct loop_device *lo = disk->private_data;
mutex_lock(&loop_ctl_mutex); mutex_lock(&lo->lo_mutex);
lo = disk->private_data;
if (atomic_dec_return(&lo->lo_refcnt)) if (atomic_dec_return(&lo->lo_refcnt))
goto out_unlock; goto out_unlock;
...@@ -1907,7 +1912,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) ...@@ -1907,7 +1912,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
if (lo->lo_state != Lo_bound) if (lo->lo_state != Lo_bound)
goto out_unlock; goto out_unlock;
lo->lo_state = Lo_rundown; lo->lo_state = Lo_rundown;
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
/* /*
* In autoclear mode, stop the loop thread * In autoclear mode, stop the loop thread
* and remove configuration after last close. * and remove configuration after last close.
...@@ -1924,7 +1929,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) ...@@ -1924,7 +1929,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode)
} }
out_unlock: out_unlock:
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
} }
static const struct block_device_operations lo_fops = { static const struct block_device_operations lo_fops = {
...@@ -1963,10 +1968,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data) ...@@ -1963,10 +1968,10 @@ static int unregister_transfer_cb(int id, void *ptr, void *data)
struct loop_device *lo = ptr; struct loop_device *lo = ptr;
struct loop_func_table *xfer = data; struct loop_func_table *xfer = data;
mutex_lock(&loop_ctl_mutex); mutex_lock(&lo->lo_mutex);
if (lo->lo_encryption == xfer) if (lo->lo_encryption == xfer)
loop_release_xfer(lo); loop_release_xfer(lo);
mutex_unlock(&loop_ctl_mutex); mutex_unlock(&lo->lo_mutex);
return 0; return 0;
} }
...@@ -2152,6 +2157,7 @@ static int loop_add(struct loop_device **l, int i) ...@@ -2152,6 +2157,7 @@ static int loop_add(struct loop_device **l, int i)
disk->flags |= GENHD_FL_NO_PART_SCAN; disk->flags |= GENHD_FL_NO_PART_SCAN;
disk->flags |= GENHD_FL_EXT_DEVT; disk->flags |= GENHD_FL_EXT_DEVT;
atomic_set(&lo->lo_refcnt, 0); atomic_set(&lo->lo_refcnt, 0);
mutex_init(&lo->lo_mutex);
lo->lo_number = i; lo->lo_number = i;
spin_lock_init(&lo->lo_lock); spin_lock_init(&lo->lo_lock);
disk->major = LOOP_MAJOR; disk->major = LOOP_MAJOR;
...@@ -2182,6 +2188,7 @@ static void loop_remove(struct loop_device *lo) ...@@ -2182,6 +2188,7 @@ static void loop_remove(struct loop_device *lo)
blk_cleanup_queue(lo->lo_queue); blk_cleanup_queue(lo->lo_queue);
blk_mq_free_tag_set(&lo->tag_set); blk_mq_free_tag_set(&lo->tag_set);
put_disk(lo->lo_disk); put_disk(lo->lo_disk);
mutex_destroy(&lo->lo_mutex);
kfree(lo); kfree(lo);
} }
...@@ -2261,15 +2268,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd, ...@@ -2261,15 +2268,21 @@ static long loop_control_ioctl(struct file *file, unsigned int cmd,
ret = loop_lookup(&lo, parm); ret = loop_lookup(&lo, parm);
if (ret < 0) if (ret < 0)
break; break;
ret = mutex_lock_killable(&lo->lo_mutex);
if (ret)
break;
if (lo->lo_state != Lo_unbound) { if (lo->lo_state != Lo_unbound) {
ret = -EBUSY; ret = -EBUSY;
mutex_unlock(&lo->lo_mutex);
break; break;
} }
if (atomic_read(&lo->lo_refcnt) > 0) { if (atomic_read(&lo->lo_refcnt) > 0) {
ret = -EBUSY; ret = -EBUSY;
mutex_unlock(&lo->lo_mutex);
break; break;
} }
lo->lo_disk->private_data = NULL; lo->lo_disk->private_data = NULL;
mutex_unlock(&lo->lo_mutex);
idr_remove(&loop_index_idr, lo->lo_number); idr_remove(&loop_index_idr, lo->lo_number);
loop_remove(lo); loop_remove(lo);
break; break;
......
...@@ -62,6 +62,7 @@ struct loop_device { ...@@ -62,6 +62,7 @@ struct loop_device {
struct request_queue *lo_queue; struct request_queue *lo_queue;
struct blk_mq_tag_set tag_set; struct blk_mq_tag_set tag_set;
struct gendisk *lo_disk; struct gendisk *lo_disk;
struct mutex lo_mutex;
}; };
struct loop_cmd { struct loop_cmd {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment