Commit ed5cc702 authored by Jan Kara's avatar Jan Kara Committed by Christian Brauner

block: Add config option to not allow writing to mounted devices

Writing to mounted devices is dangerous and can lead to filesystem
corruption as well as crashes. Furthermore syzbot comes with more and
more involved examples how to corrupt block device under a mounted
filesystem leading to kernel crashes and reports we can do nothing
about. Add tracking of writers to each block device and a kernel cmdline
argument which controls whether other writeable opens to block devices
open with BLK_OPEN_RESTRICT_WRITES flag are allowed. We will make
filesystems use this flag for used devices.

Note that this effectively only prevents modification of the particular
block device's page cache by other writers. The actual device content
can still be modified by other means - e.g. by issuing direct scsi
commands, by doing writes through devices lower in the storage stack
(e.g. in case loop devices, DM, or MD are involved) etc. But blocking
direct modifications of the block device page cache is enough to give
filesystems a chance to perform data validation when loading data from
the underlying storage and thus prevent kernel crashes.

Syzbot can use this cmdline argument option to avoid uninteresting
crashes. Also users whose userspace setup does not need writing to
mounted block devices can set this option for hardening.

Link: https://lore.kernel.org/all/60788e5d-5c7c-1142-e554-c21d709acfd9@linaro.orgSigned-off-by: default avatarJan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20231101174325.10596-3-jack@suse.czReviewed-by: default avatarJens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarChristian Brauner <brauner@kernel.org>
parent cd34758c
...@@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10 ...@@ -78,6 +78,26 @@ config BLK_DEV_INTEGRITY_T10
select CRC_T10DIF select CRC_T10DIF
select CRC64_ROCKSOFT select CRC64_ROCKSOFT
config BLK_DEV_WRITE_MOUNTED
bool "Allow writing to mounted block devices"
default y
help
When a block device is mounted, writing to its buffer cache is very
likely going to cause filesystem corruption. It is also rather easy to
crash the kernel in this way since the filesystem has no practical way
of detecting these writes to buffer cache and verifying its metadata
integrity. However there are some setups that need this capability
like running fsck on read-only mounted root device, modifying some
features on mounted ext4 filesystem, and similar. If you say N, the
kernel will prevent processes from writing to block devices that are
mounted by filesystems which provides some more protection from runaway
privileged processes and generally makes it much harder to crash
filesystem drivers. Note however that this does not prevent
underlying device(s) from being modified by other means, e.g. by
directly submitting SCSI commands or through access to lower layers of
storage stack. If in doubt, say Y. The configuration can be overridden
with the bdev_allow_write_mounted boot option.
config BLK_DEV_ZONED config BLK_DEV_ZONED
bool "Zoned block device support" bool "Zoned block device support"
select MQ_IOSCHED_DEADLINE select MQ_IOSCHED_DEADLINE
......
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
#include "../fs/internal.h" #include "../fs/internal.h"
#include "blk.h" #include "blk.h"
/* Should we allow writing to mounted block devices? */
static bool bdev_allow_write_mounted = IS_ENABLED(CONFIG_BLK_DEV_WRITE_MOUNTED);
struct bdev_inode { struct bdev_inode {
struct block_device bdev; struct block_device bdev;
struct inode vfs_inode; struct inode vfs_inode;
...@@ -731,6 +734,57 @@ void blkdev_put_no_open(struct block_device *bdev) ...@@ -731,6 +734,57 @@ void blkdev_put_no_open(struct block_device *bdev)
put_device(&bdev->bd_device); put_device(&bdev->bd_device);
} }
static bool bdev_writes_blocked(struct block_device *bdev)
{
return bdev->bd_writers == -1;
}
static void bdev_block_writes(struct block_device *bdev)
{
bdev->bd_writers = -1;
}
static void bdev_unblock_writes(struct block_device *bdev)
{
bdev->bd_writers = 0;
}
static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
{
if (bdev_allow_write_mounted)
return true;
/* Writes blocked? */
if (mode & BLK_OPEN_WRITE && bdev_writes_blocked(bdev))
return false;
if (mode & BLK_OPEN_RESTRICT_WRITES && bdev->bd_writers > 0)
return false;
return true;
}
static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
{
if (bdev_allow_write_mounted)
return;
/* Claim exclusive or shared write access. */
if (mode & BLK_OPEN_RESTRICT_WRITES)
bdev_block_writes(bdev);
else if (mode & BLK_OPEN_WRITE)
bdev->bd_writers++;
}
static void bdev_yield_write_access(struct block_device *bdev, blk_mode_t mode)
{
if (bdev_allow_write_mounted)
return;
/* Yield exclusive or shared write access. */
if (mode & BLK_OPEN_RESTRICT_WRITES)
bdev_unblock_writes(bdev);
else if (mode & BLK_OPEN_WRITE)
bdev->bd_writers--;
}
/** /**
* bdev_open_by_dev - open a block device by device number * bdev_open_by_dev - open a block device by device number
* @dev: device number of block device to open * @dev: device number of block device to open
...@@ -773,6 +827,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, ...@@ -773,6 +827,10 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
if (ret) if (ret)
goto free_handle; goto free_handle;
/* Blocking writes requires exclusive opener */
if (mode & BLK_OPEN_RESTRICT_WRITES && !holder)
return ERR_PTR(-EINVAL);
bdev = blkdev_get_no_open(dev); bdev = blkdev_get_no_open(dev);
if (!bdev) { if (!bdev) {
ret = -ENXIO; ret = -ENXIO;
...@@ -800,12 +858,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder, ...@@ -800,12 +858,16 @@ struct bdev_handle *bdev_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
goto abort_claiming; goto abort_claiming;
if (!try_module_get(disk->fops->owner)) if (!try_module_get(disk->fops->owner))
goto abort_claiming; goto abort_claiming;
ret = -EBUSY;
if (!bdev_may_open(bdev, mode))
goto abort_claiming;
if (bdev_is_partition(bdev)) if (bdev_is_partition(bdev))
ret = blkdev_get_part(bdev, mode); ret = blkdev_get_part(bdev, mode);
else else
ret = blkdev_get_whole(bdev, mode); ret = blkdev_get_whole(bdev, mode);
if (ret) if (ret)
goto put_module; goto put_module;
bdev_claim_write_access(bdev, mode);
if (holder) { if (holder) {
bd_finish_claiming(bdev, holder, hops); bd_finish_claiming(bdev, holder, hops);
...@@ -901,6 +963,8 @@ void bdev_release(struct bdev_handle *handle) ...@@ -901,6 +963,8 @@ void bdev_release(struct bdev_handle *handle)
sync_blockdev(bdev); sync_blockdev(bdev);
mutex_lock(&disk->open_mutex); mutex_lock(&disk->open_mutex);
bdev_yield_write_access(bdev, handle->mode);
if (handle->holder) if (handle->holder)
bd_end_claim(bdev, handle->holder); bd_end_claim(bdev, handle->holder);
...@@ -1069,3 +1133,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat) ...@@ -1069,3 +1133,12 @@ void bdev_statx_dioalign(struct inode *inode, struct kstat *stat)
blkdev_put_no_open(bdev); blkdev_put_no_open(bdev);
} }
static int __init setup_bdev_allow_write_mounted(char *str)
{
if (kstrtobool(str, &bdev_allow_write_mounted))
pr_warn("Invalid option string for bdev_allow_write_mounted:"
" '%s'\n", str);
return 1;
}
__setup("bdev_allow_write_mounted=", setup_bdev_allow_write_mounted);
...@@ -66,6 +66,7 @@ struct block_device { ...@@ -66,6 +66,7 @@ struct block_device {
#ifdef CONFIG_FAIL_MAKE_REQUEST #ifdef CONFIG_FAIL_MAKE_REQUEST
bool bd_make_it_fail; bool bd_make_it_fail;
#endif #endif
int bd_writers;
/* /*
* keep this out-of-line as it's both big and not needed in the fast * keep this out-of-line as it's both big and not needed in the fast
* path * path
......
...@@ -124,6 +124,8 @@ typedef unsigned int __bitwise blk_mode_t; ...@@ -124,6 +124,8 @@ typedef unsigned int __bitwise blk_mode_t;
#define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3)) #define BLK_OPEN_NDELAY ((__force blk_mode_t)(1 << 3))
/* open for "writes" only for ioctls (specialy hack for floppy.c) */ /* open for "writes" only for ioctls (specialy hack for floppy.c) */
#define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4)) #define BLK_OPEN_WRITE_IOCTL ((__force blk_mode_t)(1 << 4))
/* open is exclusive wrt all other BLK_OPEN_WRITE opens to the device */
#define BLK_OPEN_RESTRICT_WRITES ((__force blk_mode_t)(1 << 5))
struct gendisk { struct gendisk {
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment