Commit 87f3eb7a authored by Nathan Scott's avatar Nathan Scott Committed by Stephen Lord

[XFS] Fix a race condition in async pagebuf IO completion,

by moving blk queue manipulation down into pagebuf. 
Fix some busted comments in page_buf.h, use a more
descriptive name for __pagebuf_iorequest.

SGI Modid: 2.5.x-xfs:slinx:155788a
parent af60eee1
......@@ -1114,10 +1114,10 @@ _pagebuf_wait_unpin(
add_wait_queue(&pb->pb_waiters, &wait);
for (;;) {
current->state = TASK_UNINTERRUPTIBLE;
if (atomic_read(&pb->pb_pin_count) == 0) {
if (atomic_read(&pb->pb_pin_count) == 0)
break;
}
pagebuf_run_queues(pb);
if (atomic_read(&pb->pb_io_remaining))
blk_run_queues();
schedule();
}
remove_wait_queue(&pb->pb_waiters, &wait);
......@@ -1224,26 +1224,27 @@ pagebuf_iostart( /* start I/O on a buffer */
return status;
}
pb->pb_flags &=
~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI|PBF_READ_AHEAD);
pb->pb_flags |= flags &
(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_SYNC|PBF_READ_AHEAD);
pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | \
PBF_DELWRI | PBF_READ_AHEAD | PBF_RUN_QUEUES);
pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
PBF_SYNC | PBF_READ_AHEAD | PBF_RUN_QUEUES);
BUG_ON(pb->pb_bn == PAGE_BUF_DADDR_NULL);
/* For writes call internal function which checks for
* filesystem specific callout function and execute it.
/* For writes allow an alternate strategy routine to precede
* the actual I/O request (which may not be issued at all in
* a shutdown situation, for example).
*/
if (flags & PBF_WRITE) {
status = __pagebuf_iorequest(pb);
} else {
status = pagebuf_iorequest(pb);
}
status = (flags & PBF_WRITE) ?
pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
/* Wait for I/O if we are not an async request */
if ((status == 0) && (flags & PBF_ASYNC) == 0) {
/* Wait for I/O if we are not an async request.
* Note: async I/O request completion will release the buffer,
* and that can already be done by this point. So using the
* buffer pointer from here on, after async I/O, is invalid.
*/
if (!status && !(flags & PBF_ASYNC))
status = pagebuf_iowait(pb);
}
return status;
}
......@@ -1381,8 +1382,6 @@ _pagebuf_ioapply(
nr_pages = total_nr_pages;
bio = bio_alloc(GFP_NOIO, nr_pages);
BUG_ON(bio == NULL);
bio->bi_bdev = pb->pb_target->pbr_bdev;
bio->bi_sector = sector;
bio->bi_end_io = bio_end_io_pagebuf;
......@@ -1418,6 +1417,12 @@ _pagebuf_ioapply(
} else {
pagebuf_ioerror(pb, EIO);
}
if (pb->pb_flags & PBF_RUN_QUEUES) {
pb->pb_flags &= ~PBF_RUN_QUEUES;
if (atomic_read(&pb->pb_io_remaining) > 1)
blk_run_queues();
}
}
/*
......@@ -1453,6 +1458,8 @@ pagebuf_iorequest( /* start real I/O */
_pagebuf_wait_unpin(pb);
}
pagebuf_hold(pb);
/* Set the count to 1 initially, this will stop an I/O
* completion callout which happens before we have started
* all the I/O from calling pagebuf_iodone too early.
......@@ -1460,6 +1467,8 @@ pagebuf_iorequest( /* start real I/O */
atomic_set(&pb->pb_io_remaining, 1);
_pagebuf_ioapply(pb);
_pagebuf_iodone(pb, 0);
pagebuf_rele(pb);
return 0;
}
......@@ -1475,7 +1484,8 @@ pagebuf_iowait(
page_buf_t *pb)
{
PB_TRACE(pb, PB_TRACE_REC(iowait), 0);
pagebuf_run_queues(pb);
if (atomic_read(&pb->pb_io_remaining))
blk_run_queues();
down(&pb->pb_iodonesema);
PB_TRACE(pb, PB_TRACE_REC(iowaited), (int)pb->pb_error);
return pb->pb_error;
......@@ -1554,6 +1564,7 @@ pagebuf_iomove(
}
}
/*
* Pagebuf delayed write buffer handling
*/
......@@ -1683,13 +1694,13 @@ pagebuf_daemon(
pb->pb_flags &= ~PBF_DELWRI;
pb->pb_flags |= PBF_WRITE;
__pagebuf_iorequest(pb);
pagebuf_iostrategy(pb);
}
if (as_list_len > 0)
purge_addresses();
if (count)
pagebuf_run_queues(NULL);
blk_run_queues();
force_flush = 0;
} while (pbd_active == 1);
......@@ -1756,9 +1767,9 @@ pagebuf_delwri_flush(
pb->pb_flags &= ~PBF_DELWRI;
pb->pb_flags |= PBF_WRITE;
__pagebuf_iorequest(pb);
pagebuf_iostrategy(pb);
if (++flush_cnt > 32) {
pagebuf_run_queues(NULL);
blk_run_queues();
flush_cnt = 0;
}
......@@ -1767,7 +1778,7 @@ pagebuf_delwri_flush(
spin_unlock(&pbd_delwrite_lock);
pagebuf_run_queues(NULL);
blk_run_queues();
if (pinptr)
*pinptr = pincount;
......
......@@ -128,6 +128,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
PBF_FORCEIO = (1 << 21),
PBF_FLUSH = (1 << 22), /* flush disk write cache */
PBF_READ_AHEAD = (1 << 23),
PBF_RUN_QUEUES = (1 << 24), /* run block device task queue */
} page_buf_flags_t;
......@@ -239,10 +240,6 @@ typedef struct page_buf_s {
} page_buf_t;
/*
* page_buf module entry points
*/
/* Finding and Reading Buffers */
extern page_buf_t *pagebuf_find( /* find buffer for block if */
......@@ -276,12 +273,11 @@ extern page_buf_t *pagebuf_get_no_daddr(/* allocate pagebuf struct */
size_t len,
struct pb_target *); /* mount point "fake" inode */
extern int pagebuf_associate_memory(
extern int pagebuf_associate_memory(
page_buf_t *,
void *,
size_t);
extern void pagebuf_hold( /* increment reference count */
page_buf_t *); /* buffer to hold */
......@@ -291,7 +287,7 @@ extern void pagebuf_readahead( /* read ahead into cache */
size_t, /* length of range */
page_buf_flags_t); /* additional read flags */
/* Writing and Releasing Buffers */
/* Releasing Buffers */
extern void pagebuf_free( /* deallocate a buffer */
page_buf_t *); /* buffer to deallocate */
......@@ -314,11 +310,7 @@ extern int pagebuf_lock( /* lock buffer */
extern void pagebuf_unlock( /* unlock buffer */
page_buf_t *); /* buffer to unlock */
/* Buffer Utility Routines */
static inline int pagebuf_geterror(page_buf_t *pb)
{
return (pb ? pb->pb_error : ENOMEM);
}
/* Buffer Read and Write Routines */
extern void pagebuf_iodone( /* mark buffer I/O complete */
page_buf_t *, /* buffer to mark */
......@@ -339,21 +331,9 @@ extern int pagebuf_iostart( /* start I/O on a buffer */
extern int pagebuf_iorequest( /* start real I/O */
page_buf_t *); /* buffer to convey to device */
/*
* pagebuf_iorequest is the core I/O request routine.
* It assumes that the buffer is well-formed and
* mapped and ready for physical I/O, unlike
* pagebuf_iostart() and pagebuf_iophysio(). Those
* routines call the inode pagebuf_ioinitiate routine to start I/O,
* if it is present, or else call pagebuf_iorequest()
* directly if the inode pagebuf_ioinitiate routine is not present.
*/
extern int pagebuf_iowait( /* wait for buffer I/O done */
page_buf_t *); /* buffer to wait on */
extern caddr_t pagebuf_offset(page_buf_t *, size_t);
extern void pagebuf_iomove( /* move data in/out of pagebuf */
page_buf_t *, /* buffer to manipulate */
size_t, /* starting buffer offset */
......@@ -361,6 +341,22 @@ extern void pagebuf_iomove( /* move data in/out of pagebuf */
caddr_t, /* data pointer */
page_buf_rw_t); /* direction */
static inline int pagebuf_iostrategy(page_buf_t *pb)
{
return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
}
static inline int pagebuf_geterror(page_buf_t *pb)
{
return pb ? pb->pb_error : ENOMEM;
}
/* Buffer Utility Routines */
extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
page_buf_t *, /* buffer to offset into */
size_t); /* offset */
/* Pinning Buffer Storage in Memory */
extern void pagebuf_pin( /* pin buffer in memory */
......@@ -369,33 +365,24 @@ extern void pagebuf_pin( /* pin buffer in memory */
extern void pagebuf_unpin( /* unpin buffered data */
page_buf_t *); /* buffer to unpin */
extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned */
/* Reading and writing pages */
extern int pagebuf_ispin( /* check if buffer is pinned */
page_buf_t *); /* buffer to check */
extern void pagebuf_delwri_dequeue(page_buf_t *);
/* Delayed Write Buffer Routines */
#define PBDF_WAIT 0x01
#define PBDF_TRYLOCK 0x02
extern void pagebuf_delwri_flush(
struct pb_target *,
pb_target_t *,
unsigned long,
int *);
extern int pagebuf_init(void);
extern void pagebuf_terminate(void);
extern void pagebuf_delwri_dequeue(
page_buf_t *);
static __inline__ int __pagebuf_iorequest(page_buf_t *pb)
{
if (pb->pb_strat)
return pb->pb_strat(pb);
return pagebuf_iorequest(pb);
}
/* Buffer Daemon Setup Routines */
static __inline__ void pagebuf_run_queues(page_buf_t *pb)
{
if (!pb || atomic_read(&pb->pb_io_remaining))
blk_run_queues();
}
extern int pagebuf_init(void);
extern void pagebuf_terminate(void);
#endif /* __PAGE_BUF_H__ */
......@@ -113,7 +113,8 @@ pagebuf_lock(
ASSERT(pb->pb_flags & _PBF_LOCKABLE);
PB_TRACE(pb, PB_TRACE_REC(lock), 0);
pagebuf_run_queues(pb);
if (atomic_read(&pb->pb_io_remaining))
blk_run_queues();
down(&pb->pb_sema);
PB_SET_OWNER(pb);
PB_TRACE(pb, PB_TRACE_REC(locked), 0);
......
......@@ -215,21 +215,16 @@ extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
static inline int xfs_bawrite(void *mp, page_buf_t *bp)
{
int ret;
bp->pb_fspriv3 = mp;
bp->pb_strat = xfs_bdstrat_cb;
xfs_buf_undelay(bp);
if ((ret = pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC)) == 0)
pagebuf_run_queues(bp);
return ret;
return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | PBF_RUN_QUEUES);
}
static inline void xfs_buf_relse(page_buf_t *bp)
{
if ((bp->pb_flags & _PBF_LOCKABLE) && !bp->pb_relse)
pagebuf_unlock(bp);
pagebuf_rele(bp);
}
......@@ -263,23 +258,19 @@ static inline void xfs_buf_relse(page_buf_t *bp)
static inline int XFS_bwrite(page_buf_t *pb)
{
int sync = (pb->pb_flags & PBF_ASYNC) == 0;
int error;
int iowait = (pb->pb_flags & PBF_ASYNC) == 0;
int error = 0;
pb->pb_flags |= PBF_SYNC;
if (!iowait)
pb->pb_flags |= PBF_RUN_QUEUES;
xfs_buf_undelay(pb);
__pagebuf_iorequest(pb);
if (sync) {
pagebuf_iostrategy(pb);
if (iowait) {
error = pagebuf_iowait(pb);
xfs_buf_relse(pb);
} else {
pagebuf_run_queues(pb);
error = 0;
}
return error;
}
......@@ -320,4 +311,4 @@ static inline int xfs_bdwrite(void *mp, page_buf_t *bp)
#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
#define xfs_buf_free(bp) pagebuf_free(bp)
#endif
#endif /* __XFS_BUF_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment