Commit 87f3eb7a authored by Nathan Scott's avatar Nathan Scott Committed by Stephen Lord

[XFS] Fix a race condition in async pagebuf IO completion,

by moving blk queue manipulation down into pagebuf. 
Fix some busted comments in page_buf.h, use a more
descriptive name for __pagebuf_iorequest.

SGI Modid: 2.5.x-xfs:slinx:155788a
parent af60eee1
...@@ -1114,10 +1114,10 @@ _pagebuf_wait_unpin( ...@@ -1114,10 +1114,10 @@ _pagebuf_wait_unpin(
add_wait_queue(&pb->pb_waiters, &wait); add_wait_queue(&pb->pb_waiters, &wait);
for (;;) { for (;;) {
current->state = TASK_UNINTERRUPTIBLE; current->state = TASK_UNINTERRUPTIBLE;
if (atomic_read(&pb->pb_pin_count) == 0) { if (atomic_read(&pb->pb_pin_count) == 0)
break; break;
} if (atomic_read(&pb->pb_io_remaining))
pagebuf_run_queues(pb); blk_run_queues();
schedule(); schedule();
} }
remove_wait_queue(&pb->pb_waiters, &wait); remove_wait_queue(&pb->pb_waiters, &wait);
...@@ -1224,26 +1224,27 @@ pagebuf_iostart( /* start I/O on a buffer */ ...@@ -1224,26 +1224,27 @@ pagebuf_iostart( /* start I/O on a buffer */
return status; return status;
} }
pb->pb_flags &= pb->pb_flags &= ~(PBF_READ | PBF_WRITE | PBF_ASYNC | \
~(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_DELWRI|PBF_READ_AHEAD); PBF_DELWRI | PBF_READ_AHEAD | PBF_RUN_QUEUES);
pb->pb_flags |= flags & pb->pb_flags |= flags & (PBF_READ | PBF_WRITE | PBF_ASYNC | \
(PBF_READ|PBF_WRITE|PBF_ASYNC|PBF_SYNC|PBF_READ_AHEAD); PBF_SYNC | PBF_READ_AHEAD | PBF_RUN_QUEUES);
BUG_ON(pb->pb_bn == PAGE_BUF_DADDR_NULL); BUG_ON(pb->pb_bn == PAGE_BUF_DADDR_NULL);
/* For writes call internal function which checks for /* For writes allow an alternate strategy routine to precede
* filesystem specific callout function and execute it. * the actual I/O request (which may not be issued at all in
* a shutdown situation, for example).
*/ */
if (flags & PBF_WRITE) { status = (flags & PBF_WRITE) ?
status = __pagebuf_iorequest(pb); pagebuf_iostrategy(pb) : pagebuf_iorequest(pb);
} else {
status = pagebuf_iorequest(pb);
}
/* Wait for I/O if we are not an async request */ /* Wait for I/O if we are not an async request.
if ((status == 0) && (flags & PBF_ASYNC) == 0) { * Note: async I/O request completion will release the buffer,
* and that can already be done by this point. So using the
* buffer pointer from here on, after async I/O, is invalid.
*/
if (!status && !(flags & PBF_ASYNC))
status = pagebuf_iowait(pb); status = pagebuf_iowait(pb);
}
return status; return status;
} }
...@@ -1381,8 +1382,6 @@ _pagebuf_ioapply( ...@@ -1381,8 +1382,6 @@ _pagebuf_ioapply(
nr_pages = total_nr_pages; nr_pages = total_nr_pages;
bio = bio_alloc(GFP_NOIO, nr_pages); bio = bio_alloc(GFP_NOIO, nr_pages);
BUG_ON(bio == NULL);
bio->bi_bdev = pb->pb_target->pbr_bdev; bio->bi_bdev = pb->pb_target->pbr_bdev;
bio->bi_sector = sector; bio->bi_sector = sector;
bio->bi_end_io = bio_end_io_pagebuf; bio->bi_end_io = bio_end_io_pagebuf;
...@@ -1418,6 +1417,12 @@ _pagebuf_ioapply( ...@@ -1418,6 +1417,12 @@ _pagebuf_ioapply(
} else { } else {
pagebuf_ioerror(pb, EIO); pagebuf_ioerror(pb, EIO);
} }
if (pb->pb_flags & PBF_RUN_QUEUES) {
pb->pb_flags &= ~PBF_RUN_QUEUES;
if (atomic_read(&pb->pb_io_remaining) > 1)
blk_run_queues();
}
} }
/* /*
...@@ -1453,6 +1458,8 @@ pagebuf_iorequest( /* start real I/O */ ...@@ -1453,6 +1458,8 @@ pagebuf_iorequest( /* start real I/O */
_pagebuf_wait_unpin(pb); _pagebuf_wait_unpin(pb);
} }
pagebuf_hold(pb);
/* Set the count to 1 initially, this will stop an I/O /* Set the count to 1 initially, this will stop an I/O
* completion callout which happens before we have started * completion callout which happens before we have started
* all the I/O from calling pagebuf_iodone too early. * all the I/O from calling pagebuf_iodone too early.
...@@ -1460,6 +1467,8 @@ pagebuf_iorequest( /* start real I/O */ ...@@ -1460,6 +1467,8 @@ pagebuf_iorequest( /* start real I/O */
atomic_set(&pb->pb_io_remaining, 1); atomic_set(&pb->pb_io_remaining, 1);
_pagebuf_ioapply(pb); _pagebuf_ioapply(pb);
_pagebuf_iodone(pb, 0); _pagebuf_iodone(pb, 0);
pagebuf_rele(pb);
return 0; return 0;
} }
...@@ -1475,7 +1484,8 @@ pagebuf_iowait( ...@@ -1475,7 +1484,8 @@ pagebuf_iowait(
page_buf_t *pb) page_buf_t *pb)
{ {
PB_TRACE(pb, PB_TRACE_REC(iowait), 0); PB_TRACE(pb, PB_TRACE_REC(iowait), 0);
pagebuf_run_queues(pb); if (atomic_read(&pb->pb_io_remaining))
blk_run_queues();
down(&pb->pb_iodonesema); down(&pb->pb_iodonesema);
PB_TRACE(pb, PB_TRACE_REC(iowaited), (int)pb->pb_error); PB_TRACE(pb, PB_TRACE_REC(iowaited), (int)pb->pb_error);
return pb->pb_error; return pb->pb_error;
...@@ -1554,6 +1564,7 @@ pagebuf_iomove( ...@@ -1554,6 +1564,7 @@ pagebuf_iomove(
} }
} }
/* /*
* Pagebuf delayed write buffer handling * Pagebuf delayed write buffer handling
*/ */
...@@ -1683,13 +1694,13 @@ pagebuf_daemon( ...@@ -1683,13 +1694,13 @@ pagebuf_daemon(
pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags &= ~PBF_DELWRI;
pb->pb_flags |= PBF_WRITE; pb->pb_flags |= PBF_WRITE;
__pagebuf_iorequest(pb); pagebuf_iostrategy(pb);
} }
if (as_list_len > 0) if (as_list_len > 0)
purge_addresses(); purge_addresses();
if (count) if (count)
pagebuf_run_queues(NULL); blk_run_queues();
force_flush = 0; force_flush = 0;
} while (pbd_active == 1); } while (pbd_active == 1);
...@@ -1756,9 +1767,9 @@ pagebuf_delwri_flush( ...@@ -1756,9 +1767,9 @@ pagebuf_delwri_flush(
pb->pb_flags &= ~PBF_DELWRI; pb->pb_flags &= ~PBF_DELWRI;
pb->pb_flags |= PBF_WRITE; pb->pb_flags |= PBF_WRITE;
__pagebuf_iorequest(pb); pagebuf_iostrategy(pb);
if (++flush_cnt > 32) { if (++flush_cnt > 32) {
pagebuf_run_queues(NULL); blk_run_queues();
flush_cnt = 0; flush_cnt = 0;
} }
...@@ -1767,7 +1778,7 @@ pagebuf_delwri_flush( ...@@ -1767,7 +1778,7 @@ pagebuf_delwri_flush(
spin_unlock(&pbd_delwrite_lock); spin_unlock(&pbd_delwrite_lock);
pagebuf_run_queues(NULL); blk_run_queues();
if (pinptr) if (pinptr)
*pinptr = pincount; *pinptr = pincount;
......
...@@ -128,6 +128,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */ ...@@ -128,6 +128,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
PBF_FORCEIO = (1 << 21), PBF_FORCEIO = (1 << 21),
PBF_FLUSH = (1 << 22), /* flush disk write cache */ PBF_FLUSH = (1 << 22), /* flush disk write cache */
PBF_READ_AHEAD = (1 << 23), PBF_READ_AHEAD = (1 << 23),
PBF_RUN_QUEUES = (1 << 24), /* run block device task queue */
} page_buf_flags_t; } page_buf_flags_t;
...@@ -239,10 +240,6 @@ typedef struct page_buf_s { ...@@ -239,10 +240,6 @@ typedef struct page_buf_s {
} page_buf_t; } page_buf_t;
/*
* page_buf module entry points
*/
/* Finding and Reading Buffers */ /* Finding and Reading Buffers */
extern page_buf_t *pagebuf_find( /* find buffer for block if */ extern page_buf_t *pagebuf_find( /* find buffer for block if */
...@@ -281,7 +278,6 @@ extern int pagebuf_associate_memory( ...@@ -281,7 +278,6 @@ extern int pagebuf_associate_memory(
void *, void *,
size_t); size_t);
extern void pagebuf_hold( /* increment reference count */ extern void pagebuf_hold( /* increment reference count */
page_buf_t *); /* buffer to hold */ page_buf_t *); /* buffer to hold */
...@@ -291,7 +287,7 @@ extern void pagebuf_readahead( /* read ahead into cache */ ...@@ -291,7 +287,7 @@ extern void pagebuf_readahead( /* read ahead into cache */
size_t, /* length of range */ size_t, /* length of range */
page_buf_flags_t); /* additional read flags */ page_buf_flags_t); /* additional read flags */
/* Writing and Releasing Buffers */ /* Releasing Buffers */
extern void pagebuf_free( /* deallocate a buffer */ extern void pagebuf_free( /* deallocate a buffer */
page_buf_t *); /* buffer to deallocate */ page_buf_t *); /* buffer to deallocate */
...@@ -314,11 +310,7 @@ extern int pagebuf_lock( /* lock buffer */ ...@@ -314,11 +310,7 @@ extern int pagebuf_lock( /* lock buffer */
extern void pagebuf_unlock( /* unlock buffer */ extern void pagebuf_unlock( /* unlock buffer */
page_buf_t *); /* buffer to unlock */ page_buf_t *); /* buffer to unlock */
/* Buffer Utility Routines */ /* Buffer Read and Write Routines */
static inline int pagebuf_geterror(page_buf_t *pb)
{
return (pb ? pb->pb_error : ENOMEM);
}
extern void pagebuf_iodone( /* mark buffer I/O complete */ extern void pagebuf_iodone( /* mark buffer I/O complete */
page_buf_t *, /* buffer to mark */ page_buf_t *, /* buffer to mark */
...@@ -339,21 +331,9 @@ extern int pagebuf_iostart( /* start I/O on a buffer */ ...@@ -339,21 +331,9 @@ extern int pagebuf_iostart( /* start I/O on a buffer */
extern int pagebuf_iorequest( /* start real I/O */ extern int pagebuf_iorequest( /* start real I/O */
page_buf_t *); /* buffer to convey to device */ page_buf_t *); /* buffer to convey to device */
/*
* pagebuf_iorequest is the core I/O request routine.
* It assumes that the buffer is well-formed and
* mapped and ready for physical I/O, unlike
* pagebuf_iostart() and pagebuf_iophysio(). Those
* routines call the inode pagebuf_ioinitiate routine to start I/O,
* if it is present, or else call pagebuf_iorequest()
* directly if the inode pagebuf_ioinitiate routine is not present.
*/
extern int pagebuf_iowait( /* wait for buffer I/O done */ extern int pagebuf_iowait( /* wait for buffer I/O done */
page_buf_t *); /* buffer to wait on */ page_buf_t *); /* buffer to wait on */
extern caddr_t pagebuf_offset(page_buf_t *, size_t);
extern void pagebuf_iomove( /* move data in/out of pagebuf */ extern void pagebuf_iomove( /* move data in/out of pagebuf */
page_buf_t *, /* buffer to manipulate */ page_buf_t *, /* buffer to manipulate */
size_t, /* starting buffer offset */ size_t, /* starting buffer offset */
...@@ -361,6 +341,22 @@ extern void pagebuf_iomove( /* move data in/out of pagebuf */ ...@@ -361,6 +341,22 @@ extern void pagebuf_iomove( /* move data in/out of pagebuf */
caddr_t, /* data pointer */ caddr_t, /* data pointer */
page_buf_rw_t); /* direction */ page_buf_rw_t); /* direction */
static inline int pagebuf_iostrategy(page_buf_t *pb)
{
return pb->pb_strat ? pb->pb_strat(pb) : pagebuf_iorequest(pb);
}
static inline int pagebuf_geterror(page_buf_t *pb)
{
return pb ? pb->pb_error : ENOMEM;
}
/* Buffer Utility Routines */
extern caddr_t pagebuf_offset( /* pointer at offset in buffer */
page_buf_t *, /* buffer to offset into */
size_t); /* offset */
/* Pinning Buffer Storage in Memory */ /* Pinning Buffer Storage in Memory */
extern void pagebuf_pin( /* pin buffer in memory */ extern void pagebuf_pin( /* pin buffer in memory */
...@@ -369,33 +365,24 @@ extern void pagebuf_pin( /* pin buffer in memory */ ...@@ -369,33 +365,24 @@ extern void pagebuf_pin( /* pin buffer in memory */
extern void pagebuf_unpin( /* unpin buffered data */ extern void pagebuf_unpin( /* unpin buffered data */
page_buf_t *); /* buffer to unpin */ page_buf_t *); /* buffer to unpin */
extern int pagebuf_ispin( page_buf_t *); /* check if pagebuf is pinned */ extern int pagebuf_ispin( /* check if buffer is pinned */
page_buf_t *); /* buffer to check */
/* Reading and writing pages */
extern void pagebuf_delwri_dequeue(page_buf_t *); /* Delayed Write Buffer Routines */
#define PBDF_WAIT 0x01 #define PBDF_WAIT 0x01
#define PBDF_TRYLOCK 0x02 #define PBDF_TRYLOCK 0x02
extern void pagebuf_delwri_flush( extern void pagebuf_delwri_flush(
struct pb_target *, pb_target_t *,
unsigned long, unsigned long,
int *); int *);
extern int pagebuf_init(void); extern void pagebuf_delwri_dequeue(
extern void pagebuf_terminate(void); page_buf_t *);
static __inline__ int __pagebuf_iorequest(page_buf_t *pb) /* Buffer Daemon Setup Routines */
{
if (pb->pb_strat)
return pb->pb_strat(pb);
return pagebuf_iorequest(pb);
}
static __inline__ void pagebuf_run_queues(page_buf_t *pb) extern int pagebuf_init(void);
{ extern void pagebuf_terminate(void);
if (!pb || atomic_read(&pb->pb_io_remaining))
blk_run_queues();
}
#endif /* __PAGE_BUF_H__ */ #endif /* __PAGE_BUF_H__ */
...@@ -113,7 +113,8 @@ pagebuf_lock( ...@@ -113,7 +113,8 @@ pagebuf_lock(
ASSERT(pb->pb_flags & _PBF_LOCKABLE); ASSERT(pb->pb_flags & _PBF_LOCKABLE);
PB_TRACE(pb, PB_TRACE_REC(lock), 0); PB_TRACE(pb, PB_TRACE_REC(lock), 0);
pagebuf_run_queues(pb); if (atomic_read(&pb->pb_io_remaining))
blk_run_queues();
down(&pb->pb_sema); down(&pb->pb_sema);
PB_SET_OWNER(pb); PB_SET_OWNER(pb);
PB_TRACE(pb, PB_TRACE_REC(locked), 0); PB_TRACE(pb, PB_TRACE_REC(locked), 0);
......
...@@ -215,21 +215,16 @@ extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset) ...@@ -215,21 +215,16 @@ extern inline xfs_caddr_t xfs_buf_offset(page_buf_t *bp, size_t offset)
static inline int xfs_bawrite(void *mp, page_buf_t *bp) static inline int xfs_bawrite(void *mp, page_buf_t *bp)
{ {
int ret;
bp->pb_fspriv3 = mp; bp->pb_fspriv3 = mp;
bp->pb_strat = xfs_bdstrat_cb; bp->pb_strat = xfs_bdstrat_cb;
xfs_buf_undelay(bp); xfs_buf_undelay(bp);
if ((ret = pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC)) == 0) return pagebuf_iostart(bp, PBF_WRITE | PBF_ASYNC | PBF_RUN_QUEUES);
pagebuf_run_queues(bp);
return ret;
} }
static inline void xfs_buf_relse(page_buf_t *bp) static inline void xfs_buf_relse(page_buf_t *bp)
{ {
if ((bp->pb_flags & _PBF_LOCKABLE) && !bp->pb_relse) if ((bp->pb_flags & _PBF_LOCKABLE) && !bp->pb_relse)
pagebuf_unlock(bp); pagebuf_unlock(bp);
pagebuf_rele(bp); pagebuf_rele(bp);
} }
...@@ -263,23 +258,19 @@ static inline void xfs_buf_relse(page_buf_t *bp) ...@@ -263,23 +258,19 @@ static inline void xfs_buf_relse(page_buf_t *bp)
static inline int XFS_bwrite(page_buf_t *pb) static inline int XFS_bwrite(page_buf_t *pb)
{ {
int sync = (pb->pb_flags & PBF_ASYNC) == 0; int iowait = (pb->pb_flags & PBF_ASYNC) == 0;
int error; int error = 0;
pb->pb_flags |= PBF_SYNC; pb->pb_flags |= PBF_SYNC;
if (!iowait)
pb->pb_flags |= PBF_RUN_QUEUES;
xfs_buf_undelay(pb); xfs_buf_undelay(pb);
pagebuf_iostrategy(pb);
__pagebuf_iorequest(pb); if (iowait) {
if (sync) {
error = pagebuf_iowait(pb); error = pagebuf_iowait(pb);
xfs_buf_relse(pb); xfs_buf_relse(pb);
} else {
pagebuf_run_queues(pb);
error = 0;
} }
return error; return error;
} }
...@@ -320,4 +311,4 @@ static inline int xfs_bdwrite(void *mp, page_buf_t *bp) ...@@ -320,4 +311,4 @@ static inline int xfs_bdwrite(void *mp, page_buf_t *bp)
#define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target)) #define xfs_buf_get_noaddr(len, target) pagebuf_get_no_daddr((len), (target))
#define xfs_buf_free(bp) pagebuf_free(bp) #define xfs_buf_free(bp) pagebuf_free(bp)
#endif #endif /* __XFS_BUF_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment