Commit e599b325 authored by Carlos Maiolino's avatar Carlos Maiolino Committed by Ben Myers

xfs: fix race while discarding buffers [V4]

While xfs_buftarg_shrink() is freeing buffers from the dispose list (filled with
buffers from lru list), there is a possibility to have xfs_buf_stale() racing
with it, and removing buffers from dispose list before xfs_buftarg_shrink() does
it.

This happens because xfs_buftarg_shrink() handle the dispose list without
locking and the test condition in xfs_buf_stale() checks for the buffer being in
*any* list:

if (!list_empty(&bp->b_lru))

If the buffer happens to be on dispose list, this causes the buffer counter of
lru list (btp->bt_lru_nr) to be decremented twice (once in xfs_buftarg_shrink()
and another in xfs_buf_stale()) causing a wrong account usage of the lru list.

This may cause xfs_buftarg_shrink() to return a wrong value to the memory
shrinker shrink_slab(), and such account error may also cause an underflowed
value to be returned; since the counter is lower than the current number of
items in the lru list, a decrement may happen when the counter is 0, causing
an underflow on the counter.

The fix uses a new flag field (and a new buffer flag) to serialize buffer
handling during the shrink process. The new flag field has been designed to use
btp->bt_lru_lock/unlock instead of xfs_buf_lock/unlock mechanism.

dchinner, sandeen, aquini and aris also deserve credits for this.
Signed-off-by: default avatarCarlos Maiolino <cmaiolino@redhat.com>
Reviewed-by: default avatarBen Myers <bpm@sgi.com>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarBen Myers <bpm@sgi.com>
parent 643bfc06
...@@ -96,6 +96,7 @@ xfs_buf_lru_add( ...@@ -96,6 +96,7 @@ xfs_buf_lru_add(
atomic_inc(&bp->b_hold); atomic_inc(&bp->b_hold);
list_add_tail(&bp->b_lru, &btp->bt_lru); list_add_tail(&bp->b_lru, &btp->bt_lru);
btp->bt_lru_nr++; btp->bt_lru_nr++;
bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
} }
spin_unlock(&btp->bt_lru_lock); spin_unlock(&btp->bt_lru_lock);
} }
...@@ -154,7 +155,8 @@ xfs_buf_stale( ...@@ -154,7 +155,8 @@ xfs_buf_stale(
struct xfs_buftarg *btp = bp->b_target; struct xfs_buftarg *btp = bp->b_target;
spin_lock(&btp->bt_lru_lock); spin_lock(&btp->bt_lru_lock);
if (!list_empty(&bp->b_lru)) { if (!list_empty(&bp->b_lru) &&
!(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
list_del_init(&bp->b_lru); list_del_init(&bp->b_lru);
btp->bt_lru_nr--; btp->bt_lru_nr--;
atomic_dec(&bp->b_hold); atomic_dec(&bp->b_hold);
...@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink( ...@@ -1501,6 +1503,7 @@ xfs_buftarg_shrink(
*/ */
list_move(&bp->b_lru, &dispose); list_move(&bp->b_lru, &dispose);
btp->bt_lru_nr--; btp->bt_lru_nr--;
bp->b_lru_flags |= _XBF_LRU_DISPOSE;
} }
spin_unlock(&btp->bt_lru_lock); spin_unlock(&btp->bt_lru_lock);
......
...@@ -38,27 +38,28 @@ typedef enum { ...@@ -38,27 +38,28 @@ typedef enum {
XBRW_ZERO = 3, /* Zero target memory */ XBRW_ZERO = 3, /* Zero target memory */
} xfs_buf_rw_t; } xfs_buf_rw_t;
#define XBF_READ (1 << 0) /* buffer intended for reading from device */ #define XBF_READ (1 << 0) /* buffer intended for reading from device */
#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ #define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ #define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ #define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ #define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
/* I/O hints for the BIO layer */ /* I/O hints for the BIO layer */
#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ #define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
#define XBF_FUA (1 << 11)/* force cache write through mode */ #define XBF_FUA (1 << 11)/* force cache write through mode */
#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ #define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
/* flags used only as arguments to access routines */ /* flags used only as arguments to access routines */
#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
/* flags used only internally */ /* flags used only internally */
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
#define _XBF_COMPOUND (1 << 23)/* compound buffer */ #define _XBF_COMPOUND (1 << 23)/* compound buffer */
#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
typedef unsigned int xfs_buf_flags_t; typedef unsigned int xfs_buf_flags_t;
...@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t; ...@@ -72,12 +73,13 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_SYNCIO, "SYNCIO" }, \ { XBF_SYNCIO, "SYNCIO" }, \
{ XBF_FUA, "FUA" }, \ { XBF_FUA, "FUA" }, \
{ XBF_FLUSH, "FLUSH" }, \ { XBF_FLUSH, "FLUSH" }, \
{ XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
{ XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \ { _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \ { _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \
{ _XBF_COMPOUND, "COMPOUND" } { _XBF_COMPOUND, "COMPOUND" }, \
{ _XBF_LRU_DISPOSE, "LRU_DISPOSE" }
typedef struct xfs_buftarg { typedef struct xfs_buftarg {
dev_t bt_dev; dev_t bt_dev;
...@@ -124,7 +126,12 @@ typedef struct xfs_buf { ...@@ -124,7 +126,12 @@ typedef struct xfs_buf {
xfs_buf_flags_t b_flags; /* status flags */ xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */ struct semaphore b_sema; /* semaphore for lockables */
/*
* concurrent access to b_lru and b_lru_flags are protected by
* bt_lru_lock and not by b_sema
*/
struct list_head b_lru; /* lru list */ struct list_head b_lru; /* lru list */
xfs_buf_flags_t b_lru_flags; /* internal lru status flags */
wait_queue_head_t b_waiters; /* unpin waiters */ wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list; struct list_head b_list;
struct xfs_perag *b_pag; /* contains rbtree root */ struct xfs_perag *b_pag; /* contains rbtree root */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment