Commit 8f20dd0f authored by Nathan Scott's avatar Nathan Scott Committed by Christoph Hellwig

[XFS] Fix delayed write buffer handling to use the correct list

interfaces, add validity checks, remove unused code, fix comments.

SGI Modid: xfs-linux:xfs-kern:169043a
parent c582fee9
...@@ -31,14 +31,10 @@ ...@@ -31,14 +31,10 @@
*/ */
/* /*
* page_buf.c * The xfs_buf.c code provides an abstract buffer cache model on top
* * of the Linux page cache. Cached metadata blocks for a file system
* The page_buf module provides an abstract buffer cache model on top of * are hashed to the inode for the block device. xfs_buf.c assembles
* the Linux page cache. Cached metadata blocks for a file system are * buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
* hashed to the inode for the block device. The page_buf module
* assembles buffer (xfs_buf_t) objects on demand to aggregate such
* cached pages for I/O.
*
* *
* Written by Steve Lord, Jim Mostek, Russell Cattelan * Written by Steve Lord, Jim Mostek, Russell Cattelan
* and Rajagopal Ananthanarayanan ("ananth") at SGI. * and Rajagopal Ananthanarayanan ("ananth") at SGI.
...@@ -251,7 +247,7 @@ _pagebuf_initialize( ...@@ -251,7 +247,7 @@ _pagebuf_initialize(
pb->pb_file_offset = range_base; pb->pb_file_offset = range_base;
/* /*
* Set buffer_length and count_desired to the same value initially. * Set buffer_length and count_desired to the same value initially.
* IO routines should use count_desired, which will be the same in * I/O routines should use count_desired, which will be the same in
* most cases but may be reset (e.g. XFS recovery). * most cases but may be reset (e.g. XFS recovery).
*/ */
pb->pb_buffer_length = pb->pb_count_desired = range_length; pb->pb_buffer_length = pb->pb_count_desired = range_length;
...@@ -514,8 +510,7 @@ _pagebuf_find( /* find buffer for block */ ...@@ -514,8 +510,7 @@ _pagebuf_find( /* find buffer for block */
size_t range_length; size_t range_length;
int hval; int hval;
pb_hash_t *h; pb_hash_t *h;
struct list_head *p; xfs_buf_t *pb, *n;
xfs_buf_t *pb;
int not_locked; int not_locked;
range_base = (ioff << BBSHIFT); range_base = (ioff << BBSHIFT);
...@@ -531,9 +526,7 @@ _pagebuf_find( /* find buffer for block */ ...@@ -531,9 +526,7 @@ _pagebuf_find( /* find buffer for block */
h = &pbhash[hval]; h = &pbhash[hval];
spin_lock(&h->pb_hash_lock); spin_lock(&h->pb_hash_lock);
list_for_each(p, &h->pb_hash) { list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
pb = list_entry(p, xfs_buf_t, pb_hash_list);
if (pb->pb_target == target && if (pb->pb_target == target &&
pb->pb_file_offset == range_base && pb->pb_file_offset == range_base &&
pb->pb_buffer_length == range_length) { pb->pb_buffer_length == range_length) {
...@@ -1116,18 +1109,12 @@ void ...@@ -1116,18 +1109,12 @@ void
pagebuf_iodone_work( pagebuf_iodone_work(
void *v) void *v)
{ {
xfs_buf_t *pb = (xfs_buf_t *)v; xfs_buf_t *bp = (xfs_buf_t *)v;
if (pb->pb_iodone) { if (bp->pb_iodone)
(*(pb->pb_iodone)) (pb); (*(bp->pb_iodone))(bp);
return; else if (bp->pb_flags & PBF_ASYNC)
} xfs_buf_relse(bp);
if (pb->pb_flags & PBF_ASYNC) {
if (!pb->pb_relse)
pagebuf_unlock(pb);
pagebuf_rele(pb);
}
} }
void void
...@@ -1397,22 +1384,7 @@ _pagebuf_ioapply( ...@@ -1397,22 +1384,7 @@ _pagebuf_ioapply(
} }
/* /*
* pagebuf_iorequest * pagebuf_iorequest -- the core I/O request routine.
*
* pagebuf_iorequest is the core I/O request routine.
* It assumes that the buffer is well-formed and
* mapped and ready for physical I/O, unlike
* pagebuf_iostart() and pagebuf_iophysio(). Those
* routines call the pagebuf_ioinitiate routine to start I/O,
* if it is present, or else call pagebuf_iorequest()
* directly if the pagebuf_ioinitiate routine is not present.
*
* This function will be responsible for ensuring access to the
* pages is restricted whilst I/O is in progress - for locking
* pagebufs the pagebuf lock is the mediator, for non-locking
* pagebufs the pages will be locked. In the locking case we
* need to use the pagebuf lock as multiple meta-data buffers
* will reference the same page.
*/ */
int int
pagebuf_iorequest( /* start real I/O */ pagebuf_iorequest( /* start real I/O */
...@@ -1549,6 +1521,8 @@ pagebuf_delwri_queue( ...@@ -1549,6 +1521,8 @@ pagebuf_delwri_queue(
int unlock) int unlock)
{ {
PB_TRACE(pb, "delwri_q", (long)unlock); PB_TRACE(pb, "delwri_q", (long)unlock);
ASSERT(pb->pb_flags & PBF_DELWRI);
spin_lock(&pbd_delwrite_lock); spin_lock(&pbd_delwrite_lock);
/* If already in the queue, dequeue and place at tail */ /* If already in the queue, dequeue and place at tail */
if (!list_empty(&pb->pb_list)) { if (!list_empty(&pb->pb_list)) {
...@@ -1602,8 +1576,8 @@ STATIC int ...@@ -1602,8 +1576,8 @@ STATIC int
pagebuf_daemon( pagebuf_daemon(
void *data) void *data)
{ {
xfs_buf_t *pb; struct list_head tmp;
struct list_head *curr, *next, tmp; xfs_buf_t *pb, *n;
/* Set up the thread */ /* Set up the thread */
daemonize("xfsbufd"); daemonize("xfsbufd");
...@@ -1623,14 +1597,11 @@ pagebuf_daemon( ...@@ -1623,14 +1597,11 @@ pagebuf_daemon(
schedule_timeout(xfs_flush_interval); schedule_timeout(xfs_flush_interval);
spin_lock(&pbd_delwrite_lock); spin_lock(&pbd_delwrite_lock);
list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
list_for_each_safe(curr, next, &pbd_delwrite_queue) {
pb = list_entry(curr, xfs_buf_t, pb_list);
PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb)); PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
ASSERT(pb->pb_flags & PBF_DELWRI);
if ((pb->pb_flags & PBF_DELWRI) && if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
if (!force_flush && if (!force_flush &&
time_before(jiffies, pb->pb_flushtime)) { time_before(jiffies, pb->pb_flushtime)) {
pagebuf_unlock(pb); pagebuf_unlock(pb);
...@@ -1642,12 +1613,11 @@ pagebuf_daemon( ...@@ -1642,12 +1613,11 @@ pagebuf_daemon(
list_move(&pb->pb_list, &tmp); list_move(&pb->pb_list, &tmp);
} }
} }
spin_unlock(&pbd_delwrite_lock); spin_unlock(&pbd_delwrite_lock);
while (!list_empty(&tmp)) { while (!list_empty(&tmp)) {
pb = list_entry(tmp.next, xfs_buf_t, pb_list); pb = list_entry(tmp.next, xfs_buf_t, pb_list);
list_del_init(&pb->pb_list); list_del_init(&pb->pb_list);
pagebuf_iostrategy(pb); pagebuf_iostrategy(pb);
blk_run_address_space(pb->pb_target->pbr_mapping); blk_run_address_space(pb->pb_target->pbr_mapping);
} }
...@@ -1664,31 +1634,24 @@ pagebuf_daemon( ...@@ -1664,31 +1634,24 @@ pagebuf_daemon(
void void
pagebuf_delwri_flush( pagebuf_delwri_flush(
xfs_buftarg_t *target, xfs_buftarg_t *target,
u_long flags, int wait,
int *pinptr) int *pinptr)
{ {
xfs_buf_t *pb; struct list_head tmp;
struct list_head *curr, *next, tmp; xfs_buf_t *pb, *n;
int pincount = 0; int pincount = 0;
pagebuf_runall_queues(pagebuf_dataio_workqueue); pagebuf_runall_queues(pagebuf_dataio_workqueue);
pagebuf_runall_queues(pagebuf_logio_workqueue); pagebuf_runall_queues(pagebuf_logio_workqueue);
spin_lock(&pbd_delwrite_lock);
INIT_LIST_HEAD(&tmp); INIT_LIST_HEAD(&tmp);
spin_lock(&pbd_delwrite_lock);
list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
list_for_each_safe(curr, next, &pbd_delwrite_queue) { if (pb->pb_target != target)
pb = list_entry(curr, xfs_buf_t, pb_list);
/*
* Skip other targets, markers and in progress buffers
*/
if ((pb->pb_flags == 0) || (pb->pb_target != target) ||
!(pb->pb_flags & PBF_DELWRI)) {
continue; continue;
}
ASSERT(pb->pb_flags & PBF_DELWRI);
PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb)); PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
if (pagebuf_ispin(pb)) { if (pagebuf_ispin(pb)) {
pincount++; pincount++;
...@@ -1699,33 +1662,33 @@ pagebuf_delwri_flush( ...@@ -1699,33 +1662,33 @@ pagebuf_delwri_flush(
pb->pb_flags |= PBF_WRITE; pb->pb_flags |= PBF_WRITE;
list_move(&pb->pb_list, &tmp); list_move(&pb->pb_list, &tmp);
} }
/* ok found all the items that can be worked on
* drop the lock and process the private list */
spin_unlock(&pbd_delwrite_lock); spin_unlock(&pbd_delwrite_lock);
list_for_each_safe(curr, next, &tmp) { /*
pb = list_entry(curr, xfs_buf_t, pb_list); * Dropped the delayed write list lock, now walk the temporary list
*/
if (flags & PBDF_WAIT) list_for_each_entry_safe(pb, n, &tmp, pb_list) {
if (wait)
pb->pb_flags &= ~PBF_ASYNC; pb->pb_flags &= ~PBF_ASYNC;
else else
list_del_init(curr); list_del_init(&pb->pb_list);
pagebuf_lock(pb); pagebuf_lock(pb);
pagebuf_iostrategy(pb); pagebuf_iostrategy(pb);
} }
/*
* Remaining list items must be flushed before returning
*/
while (!list_empty(&tmp)) { while (!list_empty(&tmp)) {
pb = list_entry(tmp.next, xfs_buf_t, pb_list); pb = list_entry(tmp.next, xfs_buf_t, pb_list);
list_del_init(&pb->pb_list); list_del_init(&pb->pb_list);
pagebuf_iowait(pb); xfs_iowait(pb);
if (!pb->pb_relse) xfs_buf_relse(pb);
pagebuf_unlock(pb);
pagebuf_rele(pb);
} }
if (flags & PBDF_WAIT) if (wait)
blk_run_address_space(target->pbr_mapping); blk_run_address_space(target->pbr_mapping);
if (pinptr) if (pinptr)
......
...@@ -300,14 +300,10 @@ extern int pagebuf_ispin( /* check if buffer is pinned */ ...@@ -300,14 +300,10 @@ extern int pagebuf_ispin( /* check if buffer is pinned */
/* Delayed Write Buffer Routines */ /* Delayed Write Buffer Routines */
#define PBDF_WAIT 0x01 extern void xfs_buf_delwri_flush(xfs_buftarg_t *, int, int *);
extern void pagebuf_delwri_flush( extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
xfs_buftarg_t *, xfs_buftarg_t *,
unsigned long, unsigned long,
int *);
extern void pagebuf_delwri_dequeue(
xfs_buf_t *);
/* Buffer Daemon Setup Routines */ /* Buffer Daemon Setup Routines */
......
/* /*
* Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved. * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as * under the terms of version 2 of the GNU General Public License as
...@@ -153,8 +153,7 @@ xfs_set_inodeops( ...@@ -153,8 +153,7 @@ xfs_set_inodeops(
inode->i_mapping->a_ops = &linvfs_aops; inode->i_mapping->a_ops = &linvfs_aops;
} else { } else {
inode->i_op = &linvfs_file_inode_operations; inode->i_op = &linvfs_file_inode_operations;
init_special_inode(inode, inode->i_mode, init_special_inode(inode, inode->i_mode, inode->i_rdev);
inode->i_rdev);
} }
} }
...@@ -287,7 +286,7 @@ void ...@@ -287,7 +286,7 @@ void
xfs_flush_buftarg( xfs_flush_buftarg(
xfs_buftarg_t *btp) xfs_buftarg_t *btp)
{ {
pagebuf_delwri_flush(btp, PBDF_WAIT, NULL); pagebuf_delwri_flush(btp, 1, NULL);
} }
void void
...@@ -448,7 +447,8 @@ linvfs_clear_inode( ...@@ -448,7 +447,8 @@ linvfs_clear_inode(
#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR) #define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
STATIC int STATIC int
syncd(void *arg) xfssyncd(
void *arg)
{ {
vfs_t *vfsp = (vfs_t *) arg; vfs_t *vfsp = (vfs_t *) arg;
int error; int error;
...@@ -480,11 +480,12 @@ syncd(void *arg) ...@@ -480,11 +480,12 @@ syncd(void *arg)
} }
STATIC int STATIC int
linvfs_start_syncd(vfs_t *vfsp) linvfs_start_syncd(
vfs_t *vfsp)
{ {
int pid; int pid;
pid = kernel_thread(syncd, (void *) vfsp, pid = kernel_thread(xfssyncd, (void *) vfsp,
CLONE_VM | CLONE_FS | CLONE_FILES); CLONE_VM | CLONE_FS | CLONE_FILES);
if (pid < 0) if (pid < 0)
return pid; return pid;
...@@ -493,7 +494,8 @@ linvfs_start_syncd(vfs_t *vfsp) ...@@ -493,7 +494,8 @@ linvfs_start_syncd(vfs_t *vfsp)
} }
STATIC void STATIC void
linvfs_stop_syncd(vfs_t *vfsp) linvfs_stop_syncd(
vfs_t *vfsp)
{ {
vfsp->vfs_flag |= VFS_UMOUNT; vfsp->vfs_flag |= VFS_UMOUNT;
wmb(); wmb();
......
...@@ -213,9 +213,9 @@ xfs_cleanup(void) ...@@ -213,9 +213,9 @@ xfs_cleanup(void)
*/ */
STATIC int STATIC int
xfs_start_flags( xfs_start_flags(
struct vfs *vfs,
struct xfs_mount_args *ap, struct xfs_mount_args *ap,
struct xfs_mount *mp, struct xfs_mount *mp)
int ronly)
{ {
/* Values are in BBs */ /* Values are in BBs */
if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
...@@ -305,7 +305,7 @@ xfs_start_flags( ...@@ -305,7 +305,7 @@ xfs_start_flags(
* no recovery flag requires a read-only mount * no recovery flag requires a read-only mount
*/ */
if (ap->flags & XFSMNT_NORECOVERY) { if (ap->flags & XFSMNT_NORECOVERY) {
if (!ronly) { if (!(vfs->vfs_flag & VFS_RDONLY)) {
cmn_err(CE_WARN, cmn_err(CE_WARN,
"XFS: tried to mount a FS read-write without recovery!"); "XFS: tried to mount a FS read-write without recovery!");
return XFS_ERROR(EINVAL); return XFS_ERROR(EINVAL);
...@@ -327,10 +327,12 @@ xfs_start_flags( ...@@ -327,10 +327,12 @@ xfs_start_flags(
*/ */
STATIC int STATIC int
xfs_finish_flags( xfs_finish_flags(
struct vfs *vfs,
struct xfs_mount_args *ap, struct xfs_mount_args *ap,
struct xfs_mount *mp, struct xfs_mount *mp)
int ronly)
{ {
int ronly = (vfs->vfs_flag & VFS_RDONLY);
/* Fail a mount where the logbuf is smaller then the log stripe */ /* Fail a mount where the logbuf is smaller then the log stripe */
if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) { if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
if ((ap->logbufsize == -1) && if ((ap->logbufsize == -1) &&
...@@ -420,7 +422,6 @@ xfs_mount( ...@@ -420,7 +422,6 @@ xfs_mount(
struct bhv_desc *p; struct bhv_desc *p;
struct xfs_mount *mp = XFS_BHVTOM(bhvp); struct xfs_mount *mp = XFS_BHVTOM(bhvp);
struct block_device *ddev, *logdev, *rtdev; struct block_device *ddev, *logdev, *rtdev;
int ronly = (vfsp->vfs_flag & VFS_RDONLY);
int flags = 0, error; int flags = 0, error;
ddev = vfsp->vfs_super->s_bdev; ddev = vfsp->vfs_super->s_bdev;
...@@ -472,13 +473,13 @@ xfs_mount( ...@@ -472,13 +473,13 @@ xfs_mount(
/* /*
* Setup flags based on mount(2) options and then the superblock * Setup flags based on mount(2) options and then the superblock
*/ */
error = xfs_start_flags(args, mp, ronly); error = xfs_start_flags(vfsp, args, mp);
if (error) if (error)
goto error; goto error;
error = xfs_readsb(mp); error = xfs_readsb(mp);
if (error) if (error)
goto error; goto error;
error = xfs_finish_flags(args, mp, ronly); error = xfs_finish_flags(vfsp, args, mp);
if (error) { if (error) {
xfs_freesb(mp); xfs_freesb(mp);
goto error; goto error;
...@@ -636,8 +637,7 @@ xfs_mntupdate( ...@@ -636,8 +637,7 @@ xfs_mntupdate(
*/ */
do { do {
VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error); VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error);
pagebuf_delwri_flush(mp->m_ddev_targp, PBDF_WAIT, pagebuf_delwri_flush(mp->m_ddev_targp, 1, &pincount);
&pincount);
if(0 == pincount) { delay(50); count++; } if(0 == pincount) { delay(50); count++; }
} while (count < 2); } while (count < 2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment