Commit 8f20dd0f authored by Nathan Scott's avatar Nathan Scott Committed by Christoph Hellwig

[XFS] Fix delayed write buffer handling to use the correct list

interfaces, add validity checks, remove unused code, fix comments.

SGI Modid: xfs-linux:xfs-kern:169043a
parent c582fee9
......@@ -31,14 +31,10 @@
*/
/*
* page_buf.c
*
* The page_buf module provides an abstract buffer cache model on top of
* the Linux page cache. Cached metadata blocks for a file system are
* hashed to the inode for the block device. The page_buf module
* assembles buffer (xfs_buf_t) objects on demand to aggregate such
* cached pages for I/O.
*
* The xfs_buf.c code provides an abstract buffer cache model on top
* of the Linux page cache. Cached metadata blocks for a file system
* are hashed to the inode for the block device. xfs_buf.c assembles
* buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
*
* Written by Steve Lord, Jim Mostek, Russell Cattelan
* and Rajagopal Ananthanarayanan ("ananth") at SGI.
......@@ -251,7 +247,7 @@ _pagebuf_initialize(
pb->pb_file_offset = range_base;
/*
* Set buffer_length and count_desired to the same value initially.
* IO routines should use count_desired, which will be the same in
* I/O routines should use count_desired, which will be the same in
* most cases but may be reset (e.g. XFS recovery).
*/
pb->pb_buffer_length = pb->pb_count_desired = range_length;
......@@ -514,8 +510,7 @@ _pagebuf_find( /* find buffer for block */
size_t range_length;
int hval;
pb_hash_t *h;
struct list_head *p;
xfs_buf_t *pb;
xfs_buf_t *pb, *n;
int not_locked;
range_base = (ioff << BBSHIFT);
......@@ -531,9 +526,7 @@ _pagebuf_find( /* find buffer for block */
h = &pbhash[hval];
spin_lock(&h->pb_hash_lock);
list_for_each(p, &h->pb_hash) {
pb = list_entry(p, xfs_buf_t, pb_hash_list);
list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
if (pb->pb_target == target &&
pb->pb_file_offset == range_base &&
pb->pb_buffer_length == range_length) {
......@@ -1116,18 +1109,12 @@ void
pagebuf_iodone_work(
void *v)
{
xfs_buf_t *pb = (xfs_buf_t *)v;
xfs_buf_t *bp = (xfs_buf_t *)v;
if (pb->pb_iodone) {
(*(pb->pb_iodone)) (pb);
return;
}
if (pb->pb_flags & PBF_ASYNC) {
if (!pb->pb_relse)
pagebuf_unlock(pb);
pagebuf_rele(pb);
}
if (bp->pb_iodone)
(*(bp->pb_iodone))(bp);
else if (bp->pb_flags & PBF_ASYNC)
xfs_buf_relse(bp);
}
void
......@@ -1397,22 +1384,7 @@ _pagebuf_ioapply(
}
/*
* pagebuf_iorequest
*
* pagebuf_iorequest is the core I/O request routine.
* It assumes that the buffer is well-formed and
* mapped and ready for physical I/O, unlike
* pagebuf_iostart() and pagebuf_iophysio(). Those
* routines call the pagebuf_ioinitiate routine to start I/O,
* if it is present, or else call pagebuf_iorequest()
* directly if the pagebuf_ioinitiate routine is not present.
*
* This function will be responsible for ensuring access to the
* pages is restricted whilst I/O is in progress - for locking
* pagebufs the pagebuf lock is the mediator, for non-locking
* pagebufs the pages will be locked. In the locking case we
* need to use the pagebuf lock as multiple meta-data buffers
* will reference the same page.
* pagebuf_iorequest -- the core I/O request routine.
*/
int
pagebuf_iorequest( /* start real I/O */
......@@ -1549,6 +1521,8 @@ pagebuf_delwri_queue(
int unlock)
{
PB_TRACE(pb, "delwri_q", (long)unlock);
ASSERT(pb->pb_flags & PBF_DELWRI);
spin_lock(&pbd_delwrite_lock);
/* If already in the queue, dequeue and place at tail */
if (!list_empty(&pb->pb_list)) {
......@@ -1602,8 +1576,8 @@ STATIC int
pagebuf_daemon(
void *data)
{
xfs_buf_t *pb;
struct list_head *curr, *next, tmp;
struct list_head tmp;
xfs_buf_t *pb, *n;
/* Set up the thread */
daemonize("xfsbufd");
......@@ -1623,14 +1597,11 @@ pagebuf_daemon(
schedule_timeout(xfs_flush_interval);
spin_lock(&pbd_delwrite_lock);
list_for_each_safe(curr, next, &pbd_delwrite_queue) {
pb = list_entry(curr, xfs_buf_t, pb_list);
list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
ASSERT(pb->pb_flags & PBF_DELWRI);
if ((pb->pb_flags & PBF_DELWRI) &&
!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
if (!force_flush &&
time_before(jiffies, pb->pb_flushtime)) {
pagebuf_unlock(pb);
......@@ -1642,12 +1613,11 @@ pagebuf_daemon(
list_move(&pb->pb_list, &tmp);
}
}
spin_unlock(&pbd_delwrite_lock);
while (!list_empty(&tmp)) {
pb = list_entry(tmp.next, xfs_buf_t, pb_list);
list_del_init(&pb->pb_list);
pagebuf_iostrategy(pb);
blk_run_address_space(pb->pb_target->pbr_mapping);
}
......@@ -1664,31 +1634,24 @@ pagebuf_daemon(
void
pagebuf_delwri_flush(
xfs_buftarg_t *target,
u_long flags,
int wait,
int *pinptr)
{
xfs_buf_t *pb;
struct list_head *curr, *next, tmp;
struct list_head tmp;
xfs_buf_t *pb, *n;
int pincount = 0;
pagebuf_runall_queues(pagebuf_dataio_workqueue);
pagebuf_runall_queues(pagebuf_logio_workqueue);
spin_lock(&pbd_delwrite_lock);
INIT_LIST_HEAD(&tmp);
spin_lock(&pbd_delwrite_lock);
list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
list_for_each_safe(curr, next, &pbd_delwrite_queue) {
pb = list_entry(curr, xfs_buf_t, pb_list);
/*
* Skip other targets, markers and in progress buffers
*/
if ((pb->pb_flags == 0) || (pb->pb_target != target) ||
!(pb->pb_flags & PBF_DELWRI)) {
if (pb->pb_target != target)
continue;
}
ASSERT(pb->pb_flags & PBF_DELWRI);
PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
if (pagebuf_ispin(pb)) {
pincount++;
......@@ -1699,33 +1662,33 @@ pagebuf_delwri_flush(
pb->pb_flags |= PBF_WRITE;
list_move(&pb->pb_list, &tmp);
}
/* ok found all the items that can be worked on
* drop the lock and process the private list */
spin_unlock(&pbd_delwrite_lock);
list_for_each_safe(curr, next, &tmp) {
pb = list_entry(curr, xfs_buf_t, pb_list);
if (flags & PBDF_WAIT)
/*
* Dropped the delayed write list lock, now walk the temporary list
*/
list_for_each_entry_safe(pb, n, &tmp, pb_list) {
if (wait)
pb->pb_flags &= ~PBF_ASYNC;
else
list_del_init(curr);
list_del_init(&pb->pb_list);
pagebuf_lock(pb);
pagebuf_iostrategy(pb);
}
/*
* Remaining list items must be flushed before returning
*/
while (!list_empty(&tmp)) {
pb = list_entry(tmp.next, xfs_buf_t, pb_list);
list_del_init(&pb->pb_list);
pagebuf_iowait(pb);
if (!pb->pb_relse)
pagebuf_unlock(pb);
pagebuf_rele(pb);
xfs_iowait(pb);
xfs_buf_relse(pb);
}
if (flags & PBDF_WAIT)
if (wait)
blk_run_address_space(target->pbr_mapping);
if (pinptr)
......
......@@ -300,14 +300,10 @@ extern int pagebuf_ispin( /* check if buffer is pinned */
/* Delayed Write Buffer Routines */
#define PBDF_WAIT 0x01
extern void pagebuf_delwri_flush(
extern void xfs_buf_delwri_flush(xfs_buftarg_t *, int, int *);
extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
xfs_buftarg_t *,
unsigned long,
int *);
extern void pagebuf_delwri_dequeue(
xfs_buf_t *);
/* Buffer Daemon Setup Routines */
......
/*
* Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
* Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License as
......@@ -153,8 +153,7 @@ xfs_set_inodeops(
inode->i_mapping->a_ops = &linvfs_aops;
} else {
inode->i_op = &linvfs_file_inode_operations;
init_special_inode(inode, inode->i_mode,
inode->i_rdev);
init_special_inode(inode, inode->i_mode, inode->i_rdev);
}
}
......@@ -287,7 +286,7 @@ void
xfs_flush_buftarg(
xfs_buftarg_t *btp)
{
pagebuf_delwri_flush(btp, PBDF_WAIT, NULL);
pagebuf_delwri_flush(btp, 1, NULL);
}
void
......@@ -448,7 +447,8 @@ linvfs_clear_inode(
#define SYNCD_FLAGS (SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)
STATIC int
syncd(void *arg)
xfssyncd(
void *arg)
{
vfs_t *vfsp = (vfs_t *) arg;
int error;
......@@ -480,11 +480,12 @@ syncd(void *arg)
}
STATIC int
linvfs_start_syncd(vfs_t *vfsp)
linvfs_start_syncd(
vfs_t *vfsp)
{
int pid;
int pid;
pid = kernel_thread(syncd, (void *) vfsp,
pid = kernel_thread(xfssyncd, (void *) vfsp,
CLONE_VM | CLONE_FS | CLONE_FILES);
if (pid < 0)
return pid;
......@@ -493,7 +494,8 @@ linvfs_start_syncd(vfs_t *vfsp)
}
STATIC void
linvfs_stop_syncd(vfs_t *vfsp)
linvfs_stop_syncd(
vfs_t *vfsp)
{
vfsp->vfs_flag |= VFS_UMOUNT;
wmb();
......
......@@ -213,9 +213,9 @@ xfs_cleanup(void)
*/
STATIC int
xfs_start_flags(
struct vfs *vfs,
struct xfs_mount_args *ap,
struct xfs_mount *mp,
int ronly)
struct xfs_mount *mp)
{
/* Values are in BBs */
if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
......@@ -305,7 +305,7 @@ xfs_start_flags(
* no recovery flag requires a read-only mount
*/
if (ap->flags & XFSMNT_NORECOVERY) {
if (!ronly) {
if (!(vfs->vfs_flag & VFS_RDONLY)) {
cmn_err(CE_WARN,
"XFS: tried to mount a FS read-write without recovery!");
return XFS_ERROR(EINVAL);
......@@ -327,10 +327,12 @@ xfs_start_flags(
*/
STATIC int
xfs_finish_flags(
struct vfs *vfs,
struct xfs_mount_args *ap,
struct xfs_mount *mp,
int ronly)
struct xfs_mount *mp)
{
int ronly = (vfs->vfs_flag & VFS_RDONLY);
/* Fail a mount where the logbuf is smaller then the log stripe */
if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
if ((ap->logbufsize == -1) &&
......@@ -420,7 +422,6 @@ xfs_mount(
struct bhv_desc *p;
struct xfs_mount *mp = XFS_BHVTOM(bhvp);
struct block_device *ddev, *logdev, *rtdev;
int ronly = (vfsp->vfs_flag & VFS_RDONLY);
int flags = 0, error;
ddev = vfsp->vfs_super->s_bdev;
......@@ -472,13 +473,13 @@ xfs_mount(
/*
* Setup flags based on mount(2) options and then the superblock
*/
error = xfs_start_flags(args, mp, ronly);
error = xfs_start_flags(vfsp, args, mp);
if (error)
goto error;
error = xfs_readsb(mp);
if (error)
goto error;
error = xfs_finish_flags(args, mp, ronly);
error = xfs_finish_flags(vfsp, args, mp);
if (error) {
xfs_freesb(mp);
goto error;
......@@ -636,8 +637,7 @@ xfs_mntupdate(
*/
do {
VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error);
pagebuf_delwri_flush(mp->m_ddev_targp, PBDF_WAIT,
&pincount);
pagebuf_delwri_flush(mp->m_ddev_targp, 1, &pincount);
if(0 == pincount) { delay(50); count++; }
} while (count < 2);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment