Commit aca740ce authored by Jan Kara's avatar Jan Kara Committed by Christian Brauner

fs: open block device after superblock creation

Currently get_tree_bdev and mount_bdev open the block device before
committing to allocating a super block. That creates problems for
restricting the number of writers to a device, and also leads to a
unusual and not very helpful holder (the fs_type).

Reorganize the super block code to first look whether the superblock for
a particular device does already exist and open the block device only if
it doesn't.

[hch: port to before the bdev_handle changes,
      duplicate the bdev read-only check from blkdev_get_by_path,
      extend the fsfree_mutex coverage to protect against freezes,
      fix an open bdev leak when the bdev is frozen,
      use the bdev local variable more,
      rename the s variable to sb to be more descriptive]
[brauner: remove references to mounts as they're mostly irrelevant]
[brauner & hch: fold fixes for romfs and cramfs for
                syzbot+2faac0423fdc9692822b@syzkaller.appspotmail.com]
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Message-Id: <20230724175145.201318-1-hch@lst.de>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent a4f64a30
...@@ -485,12 +485,16 @@ static void cramfs_kill_sb(struct super_block *sb) ...@@ -485,12 +485,16 @@ static void cramfs_kill_sb(struct super_block *sb)
{ {
struct cramfs_sb_info *sbi = CRAMFS_SB(sb); struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
generic_shutdown_super(sb);
if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
if (sbi && sbi->mtd_point_size) if (sbi && sbi->mtd_point_size)
mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
kill_mtd_super(sb); put_mtd_device(sb->s_mtd);
sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
kill_block_super(sb); sync_blockdev(sb->s_bdev);
blkdev_put(sb->s_bdev, sb->s_type);
} }
kfree(sbi); kfree(sbi);
} }
......
...@@ -583,16 +583,18 @@ static int romfs_init_fs_context(struct fs_context *fc) ...@@ -583,16 +583,18 @@ static int romfs_init_fs_context(struct fs_context *fc)
*/ */
static void romfs_kill_sb(struct super_block *sb) static void romfs_kill_sb(struct super_block *sb)
{ {
generic_shutdown_super(sb);
#ifdef CONFIG_ROMFS_ON_MTD #ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd) { if (sb->s_mtd) {
kill_mtd_super(sb); put_mtd_device(sb->s_mtd);
return; sb->s_mtd = NULL;
} }
#endif #endif
#ifdef CONFIG_ROMFS_ON_BLOCK #ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) { if (sb->s_bdev) {
kill_block_super(sb); sync_blockdev(sb->s_bdev);
return; blkdev_put(sb->s_bdev, sb->s_type);
} }
#endif #endif
} }
......
...@@ -1228,12 +1228,7 @@ static const struct blk_holder_ops fs_holder_ops = { ...@@ -1228,12 +1228,7 @@ static const struct blk_holder_ops fs_holder_ops = {
static int set_bdev_super(struct super_block *s, void *data) static int set_bdev_super(struct super_block *s, void *data)
{ {
s->s_bdev = data; s->s_dev = *(dev_t *)data;
s->s_dev = s->s_bdev->bd_dev;
s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
if (bdev_stable_writes(s->s_bdev))
s->s_iflags |= SB_I_STABLE_WRITES;
return 0; return 0;
} }
...@@ -1244,7 +1239,61 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc) ...@@ -1244,7 +1239,61 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc) static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
{ {
return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key; return !(s->s_iflags & SB_I_RETIRED) &&
s->s_dev == *(dev_t *)fc->sget_key;
}
static int setup_bdev_super(struct super_block *sb, int sb_flags,
struct fs_context *fc)
{
blk_mode_t mode = sb_open_mode(sb_flags);
struct block_device *bdev;
bdev = blkdev_get_by_dev(sb->s_dev, mode, sb->s_type, &fs_holder_ops);
if (IS_ERR(bdev)) {
if (fc)
errorf(fc, "%s: Can't open blockdev", fc->source);
return PTR_ERR(bdev);
}
/*
* This really should be in blkdev_get_by_dev, but right now can't due
* to legacy issues that require us to allow opening a block device node
* writable from userspace even for a read-only block device.
*/
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
blkdev_put(bdev, sb->s_type);
return -EACCES;
}
/*
* Until SB_BORN flag is set, there can be no active superblock
* references and thus no filesystem freezing. get_active_super() will
* just loop waiting for SB_BORN so even freeze_bdev() cannot proceed.
*
* It is enough to check bdev was not frozen before we set s_bdev.
*/
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
blkdev_put(bdev, sb->s_type);
return -EBUSY;
}
spin_lock(&sb_lock);
sb->s_bdev = bdev;
sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
if (bdev_stable_writes(bdev))
sb->s_iflags |= SB_I_STABLE_WRITES;
spin_unlock(&sb_lock);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
sb->s_id);
sb_set_blocksize(sb, block_size(bdev));
return 0;
} }
/** /**
...@@ -1256,71 +1305,48 @@ int get_tree_bdev(struct fs_context *fc, ...@@ -1256,71 +1305,48 @@ int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *, int (*fill_super)(struct super_block *,
struct fs_context *)) struct fs_context *))
{ {
struct block_device *bdev;
struct super_block *s; struct super_block *s;
int error = 0; int error = 0;
dev_t dev;
if (!fc->source) if (!fc->source)
return invalf(fc, "No source specified"); return invalf(fc, "No source specified");
bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags), error = lookup_bdev(fc->source, &dev);
fc->fs_type, &fs_holder_ops); if (error) {
if (IS_ERR(bdev)) { errorf(fc, "%s: Can't lookup blockdev", fc->source);
errorf(fc, "%s: Can't open blockdev", fc->source); return error;
return PTR_ERR(bdev);
}
/* Once the superblock is inserted into the list by sget_fc(), s_umount
* will protect the lockfs code from trying to start a snapshot while
* we are mounting
*/
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
blkdev_put(bdev, fc->fs_type);
return -EBUSY;
} }
fc->sb_flags |= SB_NOSEC; fc->sb_flags |= SB_NOSEC;
fc->sget_key = bdev; fc->sget_key = &dev;
s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s))
if (IS_ERR(s)) {
blkdev_put(bdev, fc->fs_type);
return PTR_ERR(s); return PTR_ERR(s);
}
if (s->s_root) { if (s->s_root) {
/* Don't summarily change the RO/RW state. */ /* Don't summarily change the RO/RW state. */
if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
warnf(fc, "%pg: Can't mount, would change RO state", bdev); warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
deactivate_locked_super(s); deactivate_locked_super(s);
blkdev_put(bdev, fc->fs_type);
return -EBUSY; return -EBUSY;
} }
} else {
/* /*
* s_umount nests inside open_mutex during * We drop s_umount here because we need to open the bdev and
* __invalidate_device(). blkdev_put() acquires * bdev->open_mutex ranks above s_umount (blkdev_put() ->
* open_mutex and can't be called under s_umount. Drop * __invalidate_device()). It is safe because we have active sb
* s_umount temporarily. This is safe as we're * reference and SB_BORN is not set yet.
* holding an active reference.
*/ */
up_write(&s->s_umount); up_write(&s->s_umount);
blkdev_put(bdev, fc->fs_type); error = setup_bdev_super(s, fc->sb_flags, fc);
down_write(&s->s_umount); down_write(&s->s_umount);
} else { if (!error)
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); error = fill_super(s, fc);
shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
fc->fs_type->name, s->s_id);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, fc);
if (error) { if (error) {
deactivate_locked_super(s); deactivate_locked_super(s);
return error; return error;
} }
s->s_flags |= SB_ACTIVE; s->s_flags |= SB_ACTIVE;
} }
...@@ -1332,78 +1358,52 @@ EXPORT_SYMBOL(get_tree_bdev); ...@@ -1332,78 +1358,52 @@ EXPORT_SYMBOL(get_tree_bdev);
static int test_bdev_super(struct super_block *s, void *data) static int test_bdev_super(struct super_block *s, void *data)
{ {
return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data; return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
} }
struct dentry *mount_bdev(struct file_system_type *fs_type, struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int)) int (*fill_super)(struct super_block *, void *, int))
{ {
struct block_device *bdev;
struct super_block *s; struct super_block *s;
int error = 0; int error;
dev_t dev;
bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type, error = lookup_bdev(dev_name, &dev);
&fs_holder_ops); if (error)
if (IS_ERR(bdev)) return ERR_PTR(error);
return ERR_CAST(bdev);
/* flags |= SB_NOSEC;
* once the super is inserted into the list by sget, s_umount s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
* will protect the lockfs code from trying to start a snapshot
* while we are mounting
*/
mutex_lock(&bdev->bd_fsfreeze_mutex);
if (bdev->bd_fsfreeze_count > 0) {
mutex_unlock(&bdev->bd_fsfreeze_mutex);
error = -EBUSY;
goto error_bdev;
}
s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
bdev);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
if (IS_ERR(s)) if (IS_ERR(s))
goto error_s; return ERR_CAST(s);
if (s->s_root) { if (s->s_root) {
if ((flags ^ s->s_flags) & SB_RDONLY) { if ((flags ^ s->s_flags) & SB_RDONLY) {
deactivate_locked_super(s); deactivate_locked_super(s);
error = -EBUSY; return ERR_PTR(-EBUSY);
goto error_bdev;
} }
} else {
/* /*
* s_umount nests inside open_mutex during * We drop s_umount here because we need to open the bdev and
* __invalidate_device(). blkdev_put() acquires * bdev->open_mutex ranks above s_umount (blkdev_put() ->
* open_mutex and can't be called under s_umount. Drop * __invalidate_device()). It is safe because we have active sb
* s_umount temporarily. This is safe as we're * reference and SB_BORN is not set yet.
* holding an active reference.
*/ */
up_write(&s->s_umount); up_write(&s->s_umount);
blkdev_put(bdev, fs_type); error = setup_bdev_super(s, flags, NULL);
down_write(&s->s_umount); down_write(&s->s_umount);
} else { if (!error)
snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
fs_type->name, s->s_id);
sb_set_blocksize(s, block_size(bdev));
error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
if (error) { if (error) {
deactivate_locked_super(s); deactivate_locked_super(s);
goto error; return ERR_PTR(error);
} }
s->s_flags |= SB_ACTIVE; s->s_flags |= SB_ACTIVE;
} }
return dget(s->s_root); return dget(s->s_root);
error_s:
error = PTR_ERR(s);
error_bdev:
blkdev_put(bdev, fs_type);
error:
return ERR_PTR(error);
} }
EXPORT_SYMBOL(mount_bdev); EXPORT_SYMBOL(mount_bdev);
...@@ -1412,8 +1412,10 @@ void kill_block_super(struct super_block *sb) ...@@ -1412,8 +1412,10 @@ void kill_block_super(struct super_block *sb)
struct block_device *bdev = sb->s_bdev; struct block_device *bdev = sb->s_bdev;
generic_shutdown_super(sb); generic_shutdown_super(sb);
sync_blockdev(bdev); if (bdev) {
blkdev_put(bdev, sb->s_type); sync_blockdev(bdev);
blkdev_put(bdev, sb->s_type);
}
} }
EXPORT_SYMBOL(kill_block_super); EXPORT_SYMBOL(kill_block_super);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment