[XFS] Fix delayed write buffer handling to use the correct list

interfaces, add validity checks, remove unused code, fix comments. SGI Modid: xfs-linux:xfs-kern:169043a

[XFS] Fix delayed write buffer handling to use the correct list
interfaces, add validity checks, remove unused code, fix comments. SGI Modid: xfs-linux:xfs-kern:169043a
8f20dd0f · Nathan Scott · Christoph Hellwig · c582fee9 · 8f20dd0f · 8f20dd0f
Commit 8f20dd0f authored Apr 22, 2004 by Nathan Scott Committed by Christoph Hellwig Apr 22, 2004
Showing with 63 additions and 102 deletions

fs/xfs/linux/xfs_buf.c fs/xfs/linux/xfs_buf.c +40 -77

fs/xfs/linux/xfs_buf.h fs/xfs/linux/xfs_buf.h +2 -6

fs/xfs/linux/xfs_super.c fs/xfs/linux/xfs_super.c +11 -9

fs/xfs/xfs_vfsops.c fs/xfs/xfs_vfsops.c +10 -10

No files found.
--- a/fs/xfs/linux/xfs_buf.c
+++ b/fs/xfs/linux/xfs_buf.c
@@ -31,14 +31,10 @@
 */

 /*
- *	page_buf.c
- *
- *	The page_buf module provides an abstract buffer cache model on top of
- *	the Linux page cache.  Cached metadata blocks for a file system are
- *	hashed to the inode for the block device.  The page_buf module
- *	assembles buffer (xfs_buf_t) objects on demand to aggregate such
- *	cached pages for I/O.
- *
+ *	The xfs_buf.c code provides an abstract buffer cache model on top
+ *	of the Linux page cache.  Cached metadata blocks for a file system
+ *	are hashed to the inode for the block device.  xfs_buf.c assembles
+ *	buffers (xfs_buf_t) on demand to aggregate such cached pages for I/O.
 *
 *      Written by Steve Lord, Jim Mostek, Russell Cattelan
 *		    and Rajagopal Ananthanarayanan ("ananth") at SGI.
@@ -251,7 +247,7 @@ _pagebuf_initialize(
 	pb->pb_file_offset = range_base;
 	/*
 	 * Set buffer_length and count_desired to the same value initially.
-	 * IO routines should use count_desired, which will be the same in
+	 * I/O routines should use count_desired, which will be the same in
 	 * most cases but may be reset (e.g. XFS recovery).
 	 */
 	pb->pb_buffer_length = pb->pb_count_desired = range_length;
@@ -514,8 +510,7 @@ _pagebuf_find(				/* find buffer for block	*/
 	size_t			range_length;
 	int			hval;
 	pb_hash_t		*h;
-	struct list_head	*p;
-	xfs_buf_t		*pb;
+	xfs_buf_t		*pb, *n;
 	int			not_locked;

 	range_base = (ioff << BBSHIFT);
@@ -531,9 +526,7 @@ _pagebuf_find(				/* find buffer for block	*/
 	h = &pbhash[hval];

 	spin_lock(&h->pb_hash_lock);
-	list_for_each(p, &h->pb_hash) {
-		pb = list_entry(p, xfs_buf_t, pb_hash_list);
-
+	list_for_each_entry_safe(pb, n, &h->pb_hash, pb_hash_list) {
 		if (pb->pb_target == target &&
 		    pb->pb_file_offset == range_base &&
 		    pb->pb_buffer_length == range_length) {
@@ -1116,18 +1109,12 @@ void
 pagebuf_iodone_work(
 	void			*v)
 {
-	xfs_buf_t		*pb = (xfs_buf_t *)v;
+	xfs_buf_t		*bp = (xfs_buf_t *)v;

-	if (pb->pb_iodone) {
-		(*(pb->pb_iodone)) (pb);
-		return;
-	}
-
-	if (pb->pb_flags & PBF_ASYNC) {
-		if (!pb->pb_relse)
-			pagebuf_unlock(pb);
-		pagebuf_rele(pb);
-	}
+	if (bp->pb_iodone)
+		(*(bp->pb_iodone))(bp);
+	else if (bp->pb_flags & PBF_ASYNC)
+		xfs_buf_relse(bp);
 }

 void
@@ -1397,22 +1384,7 @@ _pagebuf_ioapply(
 }

 /*
- *	pagebuf_iorequest
- *
- *	pagebuf_iorequest is the core I/O request routine.
- *	It assumes that the buffer is well-formed and
- *	mapped and ready for physical I/O, unlike
- *	pagebuf_iostart() and pagebuf_iophysio().  Those
- *	routines call the pagebuf_ioinitiate routine to start I/O,
- *	if it is present, or else call pagebuf_iorequest()
- *	directly if the pagebuf_ioinitiate routine is not present.
- *
- *	This function will be responsible for ensuring access to the
- *	pages is restricted whilst I/O is in progress - for locking
- *	pagebufs the pagebuf lock is the mediator, for non-locking
- *	pagebufs the pages will be locked. In the locking case we
- *	need to use the pagebuf lock as multiple meta-data buffers
- *	will reference the same page.
+ *	pagebuf_iorequest -- the core I/O request routine.
 */
 int
 pagebuf_iorequest(			/* start real I/O		*/
@@ -1549,6 +1521,8 @@ pagebuf_delwri_queue(
 	int			unlock)
 {
 	PB_TRACE(pb, "delwri_q", (long)unlock);
+	ASSERT(pb->pb_flags & PBF_DELWRI);
+
 	spin_lock(&pbd_delwrite_lock);
 	/* If already in the queue, dequeue and place at tail */
 	if (!list_empty(&pb->pb_list)) {
@@ -1602,8 +1576,8 @@ STATIC int
 pagebuf_daemon(
 	void			*data)
 {
-	xfs_buf_t		*pb;
-	struct list_head	*curr, *next, tmp;
+	struct list_head	tmp;
+	xfs_buf_t		*pb, *n;

 	/*  Set up the thread  */
 	daemonize("xfsbufd");
@@ -1623,14 +1597,11 @@ pagebuf_daemon(
 		schedule_timeout(xfs_flush_interval);

 		spin_lock(&pbd_delwrite_lock);
-
-		list_for_each_safe(curr, next, &pbd_delwrite_queue) {
-			pb = list_entry(curr, xfs_buf_t, pb_list);
-
+		list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {
 			PB_TRACE(pb, "walkq1", (long)pagebuf_ispin(pb));
+			ASSERT(pb->pb_flags & PBF_DELWRI);

-			if ((pb->pb_flags & PBF_DELWRI) &&
-			     !pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
+			if (!pagebuf_ispin(pb) && !pagebuf_cond_lock(pb)) {
 				if (!force_flush &&
 				    time_before(jiffies, pb->pb_flushtime)) {
 					pagebuf_unlock(pb);
@@ -1642,12 +1613,11 @@ pagebuf_daemon(
 				list_move(&pb->pb_list, &tmp);
 			}
 		}
-
 		spin_unlock(&pbd_delwrite_lock);
+
 		while (!list_empty(&tmp)) {
 			pb = list_entry(tmp.next, xfs_buf_t, pb_list);
 			list_del_init(&pb->pb_list);
-
 			pagebuf_iostrategy(pb);
 			blk_run_address_space(pb->pb_target->pbr_mapping);
 		}
@@ -1664,31 +1634,24 @@ pagebuf_daemon(
 void
 pagebuf_delwri_flush(
 	xfs_buftarg_t		*target,
-	u_long			flags,
+	int			wait,
 	int			*pinptr)
 {
-	xfs_buf_t		*pb;
-	struct list_head	*curr, *next, tmp;
+	struct list_head	tmp;
+	xfs_buf_t		*pb, *n;
 	int			pincount = 0;

 	pagebuf_runall_queues(pagebuf_dataio_workqueue);
 	pagebuf_runall_queues(pagebuf_logio_workqueue);

-	spin_lock(&pbd_delwrite_lock);
 	INIT_LIST_HEAD(&tmp);
+	spin_lock(&pbd_delwrite_lock);
+	list_for_each_entry_safe(pb, n, &pbd_delwrite_queue, pb_list) {

-	list_for_each_safe(curr, next, &pbd_delwrite_queue) {
-		pb = list_entry(curr, xfs_buf_t, pb_list);
-
-		/*
-		 * Skip other targets, markers and in progress buffers
-		 */
-
-		if ((pb->pb_flags == 0) || (pb->pb_target != target) ||
-		    !(pb->pb_flags & PBF_DELWRI)) {
+		if (pb->pb_target != target)
 			continue;
-		}

+		ASSERT(pb->pb_flags & PBF_DELWRI);
 		PB_TRACE(pb, "walkq2", (long)pagebuf_ispin(pb));
 		if (pagebuf_ispin(pb)) {
 			pincount++;
@@ -1699,33 +1662,33 @@ pagebuf_delwri_flush(
 		pb->pb_flags |= PBF_WRITE;
 		list_move(&pb->pb_list, &tmp);
 	}
-	/* ok found all the items that can be worked on 
-	 * drop the lock and process the private list */
 	spin_unlock(&pbd_delwrite_lock);

-	list_for_each_safe(curr, next, &tmp) {
-		pb = list_entry(curr, xfs_buf_t, pb_list);
-
-		if (flags & PBDF_WAIT)
+	/*
+	 * Dropped the delayed write list lock, now walk the temporary list
+	 */
+	list_for_each_entry_safe(pb, n, &tmp, pb_list) {
+		if (wait)
 			pb->pb_flags &= ~PBF_ASYNC;
 		else
-			list_del_init(curr);
+			list_del_init(&pb->pb_list);

 		pagebuf_lock(pb);
 		pagebuf_iostrategy(pb);
 	}

+	/*
+	 * Remaining list items must be flushed before returning
+	 */
 	while (!list_empty(&tmp)) {
 		pb = list_entry(tmp.next, xfs_buf_t, pb_list);

 		list_del_init(&pb->pb_list);
-		pagebuf_iowait(pb);
-		if (!pb->pb_relse)
-			pagebuf_unlock(pb);
-		pagebuf_rele(pb);
+		xfs_iowait(pb);
+		xfs_buf_relse(pb);
 	}

-	if (flags & PBDF_WAIT)
+	if (wait)
 		blk_run_address_space(target->pbr_mapping);

 	if (pinptr)

--- a/fs/xfs/linux/xfs_buf.h
+++ b/fs/xfs/linux/xfs_buf.h
@@ -300,14 +300,10 @@ extern int pagebuf_ispin(		/* check if buffer is pinned	*/

 /* Delayed Write Buffer Routines */

-#define PBDF_WAIT    0x01
-extern void pagebuf_delwri_flush(
+extern void xfs_buf_delwri_flush(xfs_buftarg_t *, int, int *);
+extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
 		xfs_buftarg_t *,
 		unsigned long,
-		int *);
-
-extern void pagebuf_delwri_dequeue(
-		xfs_buf_t *);

 /* Buffer Daemon Setup Routines */


--- a/fs/xfs/linux/xfs_super.c
+++ b/fs/xfs/linux/xfs_super.c
 /*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of version 2 of the GNU General Public License as
@@ -153,8 +153,7 @@ xfs_set_inodeops(
 			inode->i_mapping->a_ops = &linvfs_aops;
 	} else {
 		inode->i_op = &linvfs_file_inode_operations;
-		init_special_inode(inode, inode->i_mode,
-					inode->i_rdev);
+		init_special_inode(inode, inode->i_mode, inode->i_rdev);
 	}
 }

@@ -287,7 +286,7 @@ void
 xfs_flush_buftarg(
 	xfs_buftarg_t		*btp)
 {
-	pagebuf_delwri_flush(btp, PBDF_WAIT, NULL);
+	pagebuf_delwri_flush(btp, 1, NULL);
 }

 void
@@ -448,7 +447,8 @@ linvfs_clear_inode(
 #define SYNCD_FLAGS	(SYNC_FSDATA|SYNC_BDFLUSH|SYNC_ATTR)

 STATIC int
-syncd(void *arg)
+xfssyncd(
+	void			*arg)
 {
 	vfs_t			*vfsp = (vfs_t *) arg;
 	int			error;
@@ -480,11 +480,12 @@ syncd(void *arg)
 }

 STATIC int
-linvfs_start_syncd(vfs_t *vfsp)
+linvfs_start_syncd(
+	vfs_t			*vfsp)
 {
-	int pid;
+	int			pid;

-	pid = kernel_thread(syncd, (void *) vfsp,
+	pid = kernel_thread(xfssyncd, (void *) vfsp,
 			CLONE_VM | CLONE_FS | CLONE_FILES);
 	if (pid < 0)
 		return pid;
@@ -493,7 +494,8 @@ linvfs_start_syncd(vfs_t *vfsp)
 }

 STATIC void
-linvfs_stop_syncd(vfs_t *vfsp)
+linvfs_stop_syncd(
+	vfs_t			*vfsp)
 {
 	vfsp->vfs_flag |= VFS_UMOUNT;
 	wmb();

--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -213,9 +213,9 @@ xfs_cleanup(void)
 */
 STATIC int
 xfs_start_flags(
+	struct vfs		*vfs,
 	struct xfs_mount_args	*ap,
-	struct xfs_mount	*mp,
-	int			ronly)
+	struct xfs_mount	*mp)
 {
 	/* Values are in BBs */
 	if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
@@ -305,7 +305,7 @@ xfs_start_flags(
 	 * no recovery flag requires a read-only mount
 	 */
 	if (ap->flags & XFSMNT_NORECOVERY) {
-		if (!ronly) {
+		if (!(vfs->vfs_flag & VFS_RDONLY)) {
 			cmn_err(CE_WARN,
 	"XFS: tried to mount a FS read-write without recovery!");
 			return XFS_ERROR(EINVAL);
@@ -327,10 +327,12 @@ xfs_start_flags(
 */
 STATIC int
 xfs_finish_flags(
+	struct vfs		*vfs,
 	struct xfs_mount_args	*ap,
-	struct xfs_mount	*mp,
-	int			ronly)
+	struct xfs_mount	*mp)
 {
+	int			ronly = (vfs->vfs_flag & VFS_RDONLY);
+
 	/* Fail a mount where the logbuf is smaller then the log stripe */
 	if (XFS_SB_VERSION_HASLOGV2(&mp->m_sb)) {
 		if ((ap->logbufsize == -1) &&
@@ -420,7 +422,6 @@ xfs_mount(
 	struct bhv_desc		*p;
 	struct xfs_mount	*mp = XFS_BHVTOM(bhvp);
 	struct block_device	*ddev, *logdev, *rtdev;
-	int			ronly = (vfsp->vfs_flag & VFS_RDONLY);
 	int			flags = 0, error;

 	ddev = vfsp->vfs_super->s_bdev;
@@ -472,13 +473,13 @@ xfs_mount(
 	/*
 	 * Setup flags based on mount(2) options and then the superblock
 	 */
-	error = xfs_start_flags(args, mp, ronly);
+	error = xfs_start_flags(vfsp, args, mp);
 	if (error)
 		goto error;
 	error = xfs_readsb(mp);
 	if (error)
 		goto error;
-	error = xfs_finish_flags(args, mp, ronly);
+	error = xfs_finish_flags(vfsp, args, mp);
 	if (error) {
 		xfs_freesb(mp);
 		goto error;
@@ -636,8 +637,7 @@ xfs_mntupdate(
 		 */ 
 		do {
 			VFS_SYNC(vfsp, REMOUNT_READONLY_FLAGS, NULL, error);
-			pagebuf_delwri_flush(mp->m_ddev_targp, PBDF_WAIT,
-								&pincount);
+			pagebuf_delwri_flush(mp->m_ddev_targp, 1, &pincount);
 			if(0 == pincount) { delay(50); count++; }
 		} while (count < 2);