Commit df981d03 authored by Theodore Ts'o's avatar Theodore Ts'o

ext4: add max_dir_size_kb mount option

Very large directories can cause significant performance problems, or
perhaps even invoke the OOM killer, if the process is running in a
highly constrained memory environment (whether it is VM's with a small
amount of memory or in a small memory cgroup).

So it is useful, in cloud server/data center environments, to be able
to set a filesystem-wide cap on the maximum size of a directory, to
ensure that directories never get larger than a sane size.  We do this
via a new mount option, max_dir_size_kb.  If there is an attempt to
grow the directory larger than max_dir_size_kb, the system call will
return ENOSPC instead.

Google-Bug-Id: 6863013
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>


parent 01fc48e8
...@@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified ...@@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified
Because of the restrictions this options comprises Because of the restrictions this options comprises
it is off by default (e.g. dioread_lock). it is off by default (e.g. dioread_lock).
max_dir_size_kb=n This limits the size of directories so that any
attempt to expand them beyond the specified
limit in kilobytes will cause an ENOSPC error.
This is useful in memory constrained
environments, where a very large directory can
cause severe performance problems or even
provoke the Out Of Memory killer. (For example,
if there is only 512mb memory available, a 176mb
directory may seriously cramp the system's style.)
i_version Enable 64-bit inode version support. This option is i_version Enable 64-bit inode version support. This option is
off by default. off by default.
......
...@@ -1243,6 +1243,7 @@ struct ext4_sb_info { ...@@ -1243,6 +1243,7 @@ struct ext4_sb_info {
unsigned int s_mb_order2_reqs; unsigned int s_mb_order2_reqs;
unsigned int s_mb_group_prealloc; unsigned int s_mb_group_prealloc;
unsigned int s_max_writeback_mb_bump; unsigned int s_max_writeback_mb_bump;
unsigned int s_max_dir_size_kb;
/* where last allocation was done - for stream allocation */ /* where last allocation was done - for stream allocation */
unsigned long s_mb_last_group; unsigned long s_mb_last_group;
unsigned long s_mb_last_start; unsigned long s_mb_last_start;
......
...@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle, ...@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle,
{ {
struct buffer_head *bh; struct buffer_head *bh;
if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
((inode->i_size >> 10) >=
EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
*err = -ENOSPC;
return NULL;
}
*block = inode->i_size >> inode->i_sb->s_blocksize_bits; *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
bh = ext4_bread(handle, inode, *block, 1, err); bh = ext4_bread(handle, inode, *block, 1, err);
......
...@@ -1230,6 +1230,7 @@ enum { ...@@ -1230,6 +1230,7 @@ enum {
Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock, Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
Opt_max_dir_size_kb,
}; };
static const match_table_t tokens = { static const match_table_t tokens = {
...@@ -1303,6 +1304,7 @@ static const match_table_t tokens = { ...@@ -1303,6 +1304,7 @@ static const match_table_t tokens = {
{Opt_init_itable, "init_itable=%u"}, {Opt_init_itable, "init_itable=%u"},
{Opt_init_itable, "init_itable"}, {Opt_init_itable, "init_itable"},
{Opt_noinit_itable, "noinit_itable"}, {Opt_noinit_itable, "noinit_itable"},
{Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
{Opt_removed, "check=none"}, /* mount option from ext2/3 */ {Opt_removed, "check=none"}, /* mount option from ext2/3 */
{Opt_removed, "nocheck"}, /* mount option from ext2/3 */ {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
{Opt_removed, "reservation"}, /* mount option from ext2/3 */ {Opt_removed, "reservation"}, /* mount option from ext2/3 */
...@@ -1483,6 +1485,7 @@ static const struct mount_opts { ...@@ -1483,6 +1485,7 @@ static const struct mount_opts {
{Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
{Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
{Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
{Opt_max_dir_size_kb, 0, MOPT_GTE0},
{Opt_err, 0, 0} {Opt_err, 0, 0}
}; };
...@@ -1598,6 +1601,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -1598,6 +1601,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
if (!args->from) if (!args->from)
arg = EXT4_DEF_LI_WAIT_MULT; arg = EXT4_DEF_LI_WAIT_MULT;
sbi->s_li_wait_mult = arg; sbi->s_li_wait_mult = arg;
} else if (token == Opt_max_dir_size_kb) {
sbi->s_max_dir_size_kb = arg;
} else if (token == Opt_stripe) { } else if (token == Opt_stripe) {
sbi->s_stripe = arg; sbi->s_stripe = arg;
} else if (m->flags & MOPT_DATAJ) { } else if (m->flags & MOPT_DATAJ) {
...@@ -1829,6 +1834,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, ...@@ -1829,6 +1834,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
(sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb)
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
ext4_show_quota_options(seq, sb); ext4_show_quota_options(seq, sb);
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment