Commit c6425702 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: ratelimit inode flush on buffered write ENOSPC

A customer reported rcu stalls and softlockup warnings on a computer
with many CPU cores and many many more IO threads trying to write to a
filesystem that is totally out of space.  Subsequent analysis pointed to
the many many IO threads calling xfs_flush_inodes -> sync_inodes_sb,
which causes a lot of wb_writeback_work to be queued.  The writeback
worker spends so much time trying to wake the many many threads waiting
for writeback completion that it trips the softlockup detector, and (in
this case) the system automatically reboots.

In addition, they complain that the lengthy xfs_flush_inodes scan traps
all of those threads in uninterruptible sleep, which hampers their
ability to kill the program or do anything else to escape the situation.

If there's thousands of threads trying to write to files on a full
filesystem, each of those threads will start separate copies of the
inode flush scan.  This is kind of pointless since we only need one
scan, so rate limit the inode flush.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
parent d4bc4c5f
...@@ -167,6 +167,7 @@ typedef struct xfs_mount { ...@@ -167,6 +167,7 @@ typedef struct xfs_mount {
struct xfs_kobj m_error_meta_kobj; struct xfs_kobj m_error_meta_kobj;
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */ struct xstats m_stats; /* per-fs stats */
struct ratelimit_state m_flush_inodes_ratelimit;
struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_buf_workqueue;
struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_unwritten_workqueue;
......
...@@ -528,6 +528,9 @@ xfs_flush_inodes( ...@@ -528,6 +528,9 @@ xfs_flush_inodes(
{ {
struct super_block *sb = mp->m_super; struct super_block *sb = mp->m_super;
if (!__ratelimit(&mp->m_flush_inodes_ratelimit))
return;
if (down_read_trylock(&sb->s_umount)) { if (down_read_trylock(&sb->s_umount)) {
sync_inodes_sb(sb); sync_inodes_sb(sb);
up_read(&sb->s_umount); up_read(&sb->s_umount);
...@@ -1366,6 +1369,17 @@ xfs_fc_fill_super( ...@@ -1366,6 +1369,17 @@ xfs_fc_fill_super(
if (error) if (error)
goto out_free_names; goto out_free_names;
/*
* Cap the number of invocations of xfs_flush_inodes to 16 for every
* quarter of a second. The magic numbers here were determined by
* observation neither to cause stalls in writeback when there are a
* lot of IO threads and the fs is near ENOSPC, nor cause any fstest
* regressions. YMMV.
*/
ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16);
ratelimit_set_flags(&mp->m_flush_inodes_ratelimit,
RATELIMIT_MSG_ON_RELEASE);
error = xfs_init_mount_workqueues(mp); error = xfs_init_mount_workqueues(mp);
if (error) if (error)
goto out_close_devices; goto out_close_devices;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment