Commit f3a60882 authored by Christian Brauner's avatar Christian Brauner

bdev: open block device as files

Add two new helpers to allow opening block devices as files.
This is not the final infrastructure. This still opens the block device
before opening a struct a file. Until we have removed all references to
struct bdev_handle we can't switch the order:

* Introduce blk_to_file_flags() to translate from block specific to
  flags usable to pen a new file.
* Introduce bdev_file_open_by_{dev,path}().
* Introduce temporary sb_bdev_handle() helper to retrieve a struct
  bdev_handle from a block device file and update places that directly
  reference struct bdev_handle to rely on it.
* Don't count block device openes against the number of open files. A
  bdev_file_open_by_{dev,path}() file is never installed into any
  file descriptor table.

One idea that came to mind was to use kernel_tmpfile_open() which
would require us to pass a path and it would then call do_dentry_open()
going through the regular fops->open::blkdev_open() path. But then we're
back to the problem of routing block specific flags such as
BLK_OPEN_RESTRICT_WRITES through the open path and would have to waste
FMODE_* flags every time we add a new one. With this we can avoid using
a flag bit and we have more leeway in how we open block devices from
bdev_open_by_{dev,path}().

Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-1-adbd023e19cc@kernel.orgSigned-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent bac0a9e5
...@@ -49,6 +49,13 @@ struct block_device *I_BDEV(struct inode *inode) ...@@ -49,6 +49,13 @@ struct block_device *I_BDEV(struct inode *inode)
} }
EXPORT_SYMBOL(I_BDEV); EXPORT_SYMBOL(I_BDEV);
struct block_device *file_bdev(struct file *bdev_file)
{
struct bdev_handle *handle = bdev_file->private_data;
return handle->bdev;
}
EXPORT_SYMBOL(file_bdev);
static void bdev_write_inode(struct block_device *bdev) static void bdev_write_inode(struct block_device *bdev)
{ {
struct inode *inode = bdev->bd_inode; struct inode *inode = bdev->bd_inode;
...@@ -368,12 +375,12 @@ static struct file_system_type bd_type = { ...@@ -368,12 +375,12 @@ static struct file_system_type bd_type = {
}; };
struct super_block *blockdev_superblock __ro_after_init; struct super_block *blockdev_superblock __ro_after_init;
struct vfsmount *blockdev_mnt __ro_after_init;
EXPORT_SYMBOL_GPL(blockdev_superblock); EXPORT_SYMBOL_GPL(blockdev_superblock);
void __init bdev_cache_init(void) void __init bdev_cache_init(void)
{ {
int err; int err;
static struct vfsmount *bd_mnt __ro_after_init;
bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode), bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
...@@ -382,10 +389,10 @@ void __init bdev_cache_init(void) ...@@ -382,10 +389,10 @@ void __init bdev_cache_init(void)
err = register_filesystem(&bd_type); err = register_filesystem(&bd_type);
if (err) if (err)
panic("Cannot register bdev pseudo-fs"); panic("Cannot register bdev pseudo-fs");
bd_mnt = kern_mount(&bd_type); blockdev_mnt = kern_mount(&bd_type);
if (IS_ERR(bd_mnt)) if (IS_ERR(blockdev_mnt))
panic("Cannot create bdev pseudo-fs"); panic("Cannot create bdev pseudo-fs");
blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ blockdev_superblock = blockdev_mnt->mnt_sb; /* For writeback */
} }
struct block_device *bdev_alloc(struct gendisk *disk, u8 partno) struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
...@@ -911,6 +918,92 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, ...@@ -911,6 +918,92 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
} }
EXPORT_SYMBOL(bdev_open_by_dev); EXPORT_SYMBOL(bdev_open_by_dev);
/*
* If BLK_OPEN_WRITE_IOCTL is set then this is a historical quirk
* associated with the floppy driver where it has allowed ioctls if the
* file was opened for writing, but does not allow reads or writes.
* Make sure that this quirk is reflected in @f_flags.
*
* It can also happen if a block device is opened as O_RDWR | O_WRONLY.
*/
static unsigned blk_to_file_flags(blk_mode_t mode)
{
unsigned int flags = 0;
if ((mode & (BLK_OPEN_READ | BLK_OPEN_WRITE)) ==
(BLK_OPEN_READ | BLK_OPEN_WRITE))
flags |= O_RDWR;
else if (mode & BLK_OPEN_WRITE_IOCTL)
flags |= O_RDWR | O_WRONLY;
else if (mode & BLK_OPEN_WRITE)
flags |= O_WRONLY;
else if (mode & BLK_OPEN_READ)
flags |= O_RDONLY; /* homeopathic, because O_RDONLY is 0 */
else
WARN_ON_ONCE(true);
if (mode & BLK_OPEN_NDELAY)
flags |= O_NDELAY;
return flags;
}
struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops)
{
struct file *bdev_file;
struct bdev_handle *handle;
unsigned int flags;
handle = bdev_open_by_dev(dev, mode, holder, hops);
if (IS_ERR(handle))
return ERR_CAST(handle);
flags = blk_to_file_flags(mode);
bdev_file = alloc_file_pseudo_noaccount(handle->bdev->bd_inode,
blockdev_mnt, "", flags | O_LARGEFILE, &def_blk_fops);
if (IS_ERR(bdev_file)) {
bdev_release(handle);
return bdev_file;
}
ihold(handle->bdev->bd_inode);
bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
if (bdev_nowait(handle->bdev))
bdev_file->f_mode |= FMODE_NOWAIT;
bdev_file->f_mapping = handle->bdev->bd_inode->i_mapping;
bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
bdev_file->private_data = handle;
return bdev_file;
}
EXPORT_SYMBOL(bdev_file_open_by_dev);
struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder,
const struct blk_holder_ops *hops)
{
struct file *bdev_file;
dev_t dev;
int error;
error = lookup_bdev(path, &dev);
if (error)
return ERR_PTR(error);
bdev_file = bdev_file_open_by_dev(dev, mode, holder, hops);
if (!IS_ERR(bdev_file) && (mode & BLK_OPEN_WRITE)) {
struct bdev_handle *handle = bdev_file->private_data;
if (bdev_read_only(handle->bdev)) {
fput(bdev_file);
bdev_file = ERR_PTR(-EACCES);
}
}
return bdev_file;
}
EXPORT_SYMBOL(bdev_file_open_by_path);
/** /**
* bdev_open_by_path - open a block device by name * bdev_open_by_path - open a block device by name
* @path: path to the block device to open * @path: path to the block device to open
......
...@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb) ...@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
sb->s_mtd = NULL; sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
sync_blockdev(sb->s_bdev); sync_blockdev(sb->s_bdev);
bdev_release(sb->s_bdev_handle); fput(sb->s_bdev_file);
} }
kfree(sbi); kfree(sbi);
} }
......
...@@ -4247,7 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) ...@@ -4247,7 +4247,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
for (i = 0; i < max_devices; i++) { for (i = 0; i < max_devices; i++) {
if (i == 0) if (i == 0)
FDEV(0).bdev_handle = sbi->sb->s_bdev_handle; FDEV(0).bdev_handle = sb_bdev_handle(sbi->sb);
else if (!RDEV(i).path[0]) else if (!RDEV(i).path[0])
break; break;
......
...@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb) ...@@ -1162,7 +1162,7 @@ static int open_inline_log(struct super_block *sb)
init_waitqueue_head(&log->syncwait); init_waitqueue_head(&log->syncwait);
set_bit(log_INLINELOG, &log->flag); set_bit(log_INLINELOG, &log->flag);
log->bdev_handle = sb->s_bdev_handle; log->bdev_handle = sb_bdev_handle(sb);
log->base = addressPXD(&JFS_SBI(sb)->logpxd); log->base = addressPXD(&JFS_SBI(sb)->logpxd);
log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
(L2LOGPSIZE - sb->s_blocksize_bits); (L2LOGPSIZE - sb->s_blocksize_bits);
......
...@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb) ...@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
#ifdef CONFIG_ROMFS_ON_BLOCK #ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) { if (sb->s_bdev) {
sync_blockdev(sb->s_bdev); sync_blockdev(sb->s_bdev);
bdev_release(sb->s_bdev_handle); fput(sb->s_bdev_file);
} }
#endif #endif
} }
......
...@@ -1532,16 +1532,16 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, ...@@ -1532,16 +1532,16 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
struct fs_context *fc) struct fs_context *fc)
{ {
blk_mode_t mode = sb_open_mode(sb_flags); blk_mode_t mode = sb_open_mode(sb_flags);
struct bdev_handle *bdev_handle; struct file *bdev_file;
struct block_device *bdev; struct block_device *bdev;
bdev_handle = bdev_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops); bdev_file = bdev_file_open_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
if (IS_ERR(bdev_handle)) { if (IS_ERR(bdev_file)) {
if (fc) if (fc)
errorf(fc, "%s: Can't open blockdev", fc->source); errorf(fc, "%s: Can't open blockdev", fc->source);
return PTR_ERR(bdev_handle); return PTR_ERR(bdev_file);
} }
bdev = bdev_handle->bdev; bdev = file_bdev(bdev_file);
/* /*
* This really should be in blkdev_get_by_dev, but right now can't due * This really should be in blkdev_get_by_dev, but right now can't due
...@@ -1549,7 +1549,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, ...@@ -1549,7 +1549,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
* writable from userspace even for a read-only block device. * writable from userspace even for a read-only block device.
*/ */
if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) { if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
bdev_release(bdev_handle); fput(bdev_file);
return -EACCES; return -EACCES;
} }
...@@ -1560,11 +1560,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags, ...@@ -1560,11 +1560,11 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
if (atomic_read(&bdev->bd_fsfreeze_count) > 0) { if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
if (fc) if (fc)
warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev); warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
bdev_release(bdev_handle); fput(bdev_file);
return -EBUSY; return -EBUSY;
} }
spin_lock(&sb_lock); spin_lock(&sb_lock);
sb->s_bdev_handle = bdev_handle; sb->s_bdev_file = bdev_file;
sb->s_bdev = bdev; sb->s_bdev = bdev;
sb->s_bdi = bdi_get(bdev->bd_disk->bdi); sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
if (bdev_stable_writes(bdev)) if (bdev_stable_writes(bdev))
...@@ -1680,7 +1680,7 @@ void kill_block_super(struct super_block *sb) ...@@ -1680,7 +1680,7 @@ void kill_block_super(struct super_block *sb)
generic_shutdown_super(sb); generic_shutdown_super(sb);
if (bdev) { if (bdev) {
sync_blockdev(bdev); sync_blockdev(bdev);
bdev_release(sb->s_bdev_handle); fput(sb->s_bdev_file);
} }
} }
......
...@@ -467,7 +467,7 @@ xfs_open_devices( ...@@ -467,7 +467,7 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers * Setup xfs_mount buffer target pointers
*/ */
error = -ENOMEM; error = -ENOMEM;
mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_handle); mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb_bdev_handle(sb));
if (!mp->m_ddev_targp) if (!mp->m_ddev_targp)
goto out_close_rtdev; goto out_close_rtdev;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <linux/sbitmap.h> #include <linux/sbitmap.h>
#include <linux/uuid.h> #include <linux/uuid.h>
#include <linux/xarray.h> #include <linux/xarray.h>
#include <linux/file.h>
struct module; struct module;
struct request_queue; struct request_queue;
...@@ -1474,6 +1475,7 @@ extern const struct blk_holder_ops fs_holder_ops; ...@@ -1474,6 +1475,7 @@ extern const struct blk_holder_ops fs_holder_ops;
(BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \ (BLK_OPEN_READ | BLK_OPEN_RESTRICT_WRITES | \
(((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE)) (((flags) & SB_RDONLY) ? 0 : BLK_OPEN_WRITE))
/* @bdev_handle will be removed soon. */
struct bdev_handle { struct bdev_handle {
struct block_device *bdev; struct block_device *bdev;
void *holder; void *holder;
...@@ -1484,6 +1486,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, ...@@ -1484,6 +1486,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops); const struct blk_holder_ops *hops);
struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode, struct bdev_handle *bdev_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops); void *holder, const struct blk_holder_ops *hops);
struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops);
struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
void *holder, const struct blk_holder_ops *hops);
int bd_prepare_to_claim(struct block_device *bdev, void *holder, int bd_prepare_to_claim(struct block_device *bdev, void *holder,
const struct blk_holder_ops *hops); const struct blk_holder_ops *hops);
void bd_abort_claiming(struct block_device *bdev, void *holder); void bd_abort_claiming(struct block_device *bdev, void *holder);
...@@ -1494,6 +1500,7 @@ struct block_device *blkdev_get_no_open(dev_t dev); ...@@ -1494,6 +1500,7 @@ struct block_device *blkdev_get_no_open(dev_t dev);
void blkdev_put_no_open(struct block_device *bdev); void blkdev_put_no_open(struct block_device *bdev);
struct block_device *I_BDEV(struct inode *inode); struct block_device *I_BDEV(struct inode *inode);
struct block_device *file_bdev(struct file *bdev_file);
#ifdef CONFIG_BLOCK #ifdef CONFIG_BLOCK
void invalidate_bdev(struct block_device *bdev); void invalidate_bdev(struct block_device *bdev);
......
...@@ -1228,8 +1228,8 @@ struct super_block { ...@@ -1228,8 +1228,8 @@ struct super_block {
#endif #endif
struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct hlist_bl_head s_roots; /* alternate root dentries for NFS */
struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */
struct block_device *s_bdev; struct block_device *s_bdev; /* can go away once we use an accessor for @s_bdev_file */
struct bdev_handle *s_bdev_handle; struct file *s_bdev_file;
struct backing_dev_info *s_bdi; struct backing_dev_info *s_bdi;
struct mtd_info *s_mtd; struct mtd_info *s_mtd;
struct hlist_node s_instances; struct hlist_node s_instances;
...@@ -1327,6 +1327,12 @@ struct super_block { ...@@ -1327,6 +1327,12 @@ struct super_block {
struct list_head s_inodes_wb; /* writeback inodes */ struct list_head s_inodes_wb; /* writeback inodes */
} __randomize_layout; } __randomize_layout;
/* Temporary helper that will go away. */
static inline struct bdev_handle *sb_bdev_handle(struct super_block *sb)
{
return sb->s_bdev_file->private_data;
}
static inline struct user_namespace *i_user_ns(const struct inode *inode) static inline struct user_namespace *i_user_ns(const struct inode *inode)
{ {
return inode->i_sb->s_user_ns; return inode->i_sb->s_user_ns;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment