Merge kleikamp.austin.ibm.com:/home/shaggy/bk/jfs-2.5

into kleikamp.austin.ibm.com:/home/shaggy/bk/resize-2.5

Merge kleikamp.austin.ibm.com:/home/shaggy/bk/jfs-2.5
into kleikamp.austin.ibm.com:/home/shaggy/bk/resize-2.5
fde6699c · Dave Kleikamp · 30c4273f · 1c706086 · fde6699c · fde6699c
Commit fde6699c authored Aug 04, 2002 by Dave Kleikamp
11 changed files
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -38,9 +38,7 @@ int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return rc;

-	IWRITE_LOCK(inode);
 	rc |= jfs_commit_inode(inode, 1);
-	IWRITE_UNLOCK(inode);

 	return rc ? -EIO : 0;
 }
@@ -64,10 +62,19 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
 	do {
 		tid = txBegin(ip->i_sb, 0);

+		/*
+		 * The commit_sem cannot be taken before txBegin.
+		 * txBegin may block and there is a chance the inode
+		 * could be marked dirty and need to be committed
+		 * before txBegin unblocks
+		 */
+		down(&JFS_IP(ip)->commit_sem);
+
 		newsize = xtTruncate(tid, ip, length,
 				     COMMIT_TRUNCATE | COMMIT_PWMAP);
 		if (newsize < 0) {
 			txEnd(tid);
+			up(&JFS_IP(ip)->commit_sem);
 			break;
 		}

@@ -76,6 +83,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)

 		txCommit(tid, 1, &ip, 0);
 		txEnd(tid);
+		up(&JFS_IP(ip)->commit_sem);
 	} while (newsize > length);	/* Truncate isn't always atomic */
 }


--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -107,8 +107,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
 	}

 	tid = txBegin(inode->i_sb, COMMIT_INODE);
+	down(&JFS_IP(inode)->commit_sem);
 	rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
 	txEnd(tid);
+	up(&JFS_IP(inode)->commit_sem);
 	return -rc;
 }

@@ -123,25 +125,19 @@ void jfs_write_inode(struct inode *inode, int wait)
 	    !test_cflag(COMMIT_Dirty, inode))
 		return;

-	IWRITE_LOCK(inode);
-
 	if (jfs_commit_inode(inode, wait)) {
 		jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n"));
 	}
-
-	IWRITE_UNLOCK(inode);
 }

 void jfs_delete_inode(struct inode *inode)
 {
 	jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode));

-	IWRITE_LOCK(inode);
 	if (test_cflag(COMMIT_Freewmap, inode))
 		freeZeroLink(inode);

 	diFree(inode);
-	IWRITE_UNLOCK(inode);

 	clear_inode(inode);
 }
@@ -203,8 +199,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,

 	if ((no_size_check ||
 	     ((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) &&
-	    (xtLookup
-	     (ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
+	    (xtLookup(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
 	     == 0) && xlen) {
 		if (xflag & XAD_NOTRECORDED) {
 			if (!create)
@@ -241,8 +236,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
 	 * Allocate a new block
 	 */
 #ifdef _JFS_4K
-	if ((rc =
-	     extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
+	if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
 		goto unlock;
 	rc = extAlloc(ip, 1, lblock64, &xad, FALSE);
 	if (rc)

--- a/fs/jfs/jfs_extent.c
+++ b/fs/jfs/jfs_extent.c
@@ -96,6 +96,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	/* This blocks if we are low on resources */
 	txBeginAnon(ip->i_sb);

+	/* Avoid race with jfs_commit_inode() */
+	down(&JFS_IP(ip)->commit_sem);
+
 	/* validate extent length */
 	if (xlen > MAXXLEN)
 		xlen = MAXXLEN;
@@ -138,8 +141,8 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	 * is smaller than the number of blocks per page.
 	 */
 	nxlen = xlen;
-	if ((rc =
-	     extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
+	if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
+		up(&JFS_IP(ip)->commit_sem);
 		return (rc);
 	}

@@ -160,6 +163,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
 	 */
 	if (rc) {
 		dbFree(ip, nxaddr, nxlen);
+		up(&JFS_IP(ip)->commit_sem);
 		return (rc);
 	}

@@ -174,6 +178,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)

 	mark_inode_dirty(ip);

+	up(&JFS_IP(ip)->commit_sem);
 	/*
 	 * COMMIT_SyncList flags an anonymous tlock on page that is on
 	 * sync list.
@@ -217,6 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	/* This blocks if we are low on resources */
 	txBeginAnon(ip->i_sb);

+	down(&JFS_IP(ip)->commit_sem);
 	/* validate extent length */
 	if (nxlen > MAXXLEN)
 		nxlen = MAXXLEN;
@@ -235,7 +241,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
 		xp->flag = 0;
 		if ((rc = xtUpdate(0, ip, xp)))
-			return (rc);
+			goto exit;
 	}

 	/* try to allocated the request number of blocks for the
@@ -247,7 +253,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	 * space as to satisfy the extend page.
 	 */
 	if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
-		return (rc);
+		goto exit;

 	delta = nxlen - xlen;

@@ -284,7 +290,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 		/* extend the extent */
 		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
 			dbFree(ip, xaddr + xlen, delta);
-			return (rc);
+			goto exit;
 		}
 	} else {
 		/*
@@ -294,7 +300,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 		 */
 		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
 			dbFree(ip, nxaddr, nxlen);
-			return (rc);
+			goto exit;
 		}
 	}

@@ -325,8 +331,9 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
 	xp->flag = xflag;

 	mark_inode_dirty(ip);
-
-	return (0);
+exit:
+	up(&JFS_IP(ip)->commit_sem);
+	return (rc);
 }


@@ -423,19 +430,13 @@ int extRecord(struct inode *ip, xad_t * xp)

 	txBeginAnon(ip->i_sb);

-	/* update the extent */
-	if ((rc = xtUpdate(0, ip, xp)))
-		return (rc);
-
-#ifdef _STILL_TO_PORT
-	/* no longer abnr */
-	cp->cm_abnr = FALSE;
+	down(&JFS_IP(ip)->commit_sem);

-	/* mark the cbuf as modified */
-	cp->cm_modified = TRUE;
-#endif				/*  _STILL_TO_PORT */
+	/* update the extent */
+	rc = xtUpdate(0, ip, xp);

-	return (0);
+	up(&JFS_IP(ip)->commit_sem);
+	return (rc);
 }



--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -19,6 +19,7 @@
 #ifndef _H_JFS_INCORE
 #define _H_JFS_INCORE

+#include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <asm/bitops.h>
 #include "jfs_types.h"
@@ -30,14 +31,6 @@
 */
 #define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */

-/*
- * Due to header ordering problems this can't be in jfs_lock.h
- */
-typedef struct	jfs_rwlock {
-	struct rw_semaphore rw_sem;
-	atomic_t in_use;	/* for hacked implementation of trylock */
-} jfs_rwlock_t;
-
 /*
 * JFS-private inode information
 */
@@ -62,7 +55,19 @@ struct jfs_inode_info {
 	lid_t	atltail;	/* anonymous tlock list tail	*/
 	struct list_head anon_inode_list; /* inodes having anonymous txns */
 	struct list_head mp_list; /* metapages in inode's address space */
-	jfs_rwlock_t rdwrlock;	/* read/write lock	*/
+	/*
+	 * rdwrlock serializes xtree between reads & writes and synchronizes
+	 * changes to special inodes.  It's use would be redundant on
+	 * directories since the i_sem taken in the VFS is sufficient.
+	 */
+	struct rw_semaphore rdwrlock;
+	/*
+	 * commit_sem serializes transaction processing on an inode.
+	 * It must be taken after beginning a transaction (txBegin), since
+	 * dirty inodes may be committed while a new transaction on the
+	 * inode is blocked in txBegin or TxBeginAnon
+	 */
+	struct semaphore commit_sem;
 	lid_t	xtlid;		/* lid of xtree lock on directory */
 	union {
 		struct {
@@ -87,6 +92,12 @@ struct jfs_inode_info {
 #define i_dtroot u.dir._dtroot
 #define i_inline u.link._inline

+
+#define IREAD_LOCK(ip)		down_read(&JFS_IP(ip)->rdwrlock)
+#define IREAD_UNLOCK(ip)	up_read(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_LOCK(ip)		down_write(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_UNLOCK(ip)	up_write(&JFS_IP(ip)->rdwrlock)
+
 /*
 * cflag
 */

--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -91,40 +91,3 @@ struct inode *ialloc(struct inode *parent, umode_t mode)

 	return inode;
 }
-
-/*
- * NAME:	iwritelocklist()
- *
- * FUNCTION:	Lock multiple inodes in sorted order to avoid deadlock
- *
- */
-void iwritelocklist(int n, ...)
-{
-	va_list ilist;
-	struct inode *sort[4];
-	struct inode *ip;
-	int k, m;
-
-	va_start(ilist, n);
-	for (k = 0; k < n; k++)
-		sort[k] = va_arg(ilist, struct inode *);
-	va_end(ilist);
-
-	/* Bubble sort in descending order */
-	do {
-		m = 0;
-		for (k = 0; k < n; k++)
-			if ((k + 1) < n
-			    && sort[k + 1]->i_ino > sort[k]->i_ino) {
-				ip = sort[k];
-				sort[k] = sort[k + 1];
-				sort[k + 1] = ip;
-				m++;
-			}
-	} while (m);
-
-	/* Lock them */
-	for (k = 0; k < n; k++) {
-		IWRITE_LOCK(sort[k]);
-	}
-}
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -24,63 +24,7 @@

 /*
 *	jfs_lock.h
- *
- * JFS lock definition for globally referenced locks
- */
-
-/* readers/writer lock: thread-thread */
-
-/*
- * RW semaphores do not currently have a trylock function.  Since the
- * implementation varies by platform, I have implemented a platform-independent
- * wrapper around the rw_semaphore routines.  If this turns out to be the best
- * way of avoiding our locking problems, I will push to get a trylock
- * implemented in the kernel, but I'd rather find a way to avoid having to
- * use it.
 */
-#define RDWRLOCK_T jfs_rwlock_t
-static inline void RDWRLOCK_INIT(jfs_rwlock_t * Lock)
-{
-	init_rwsem(&Lock->rw_sem);
-	atomic_set(&Lock->in_use, 0);
-}
-static inline void READ_LOCK(jfs_rwlock_t * Lock)
-{
-	atomic_inc(&Lock->in_use);
-	down_read(&Lock->rw_sem);
-}
-static inline void READ_UNLOCK(jfs_rwlock_t * Lock)
-{
-	up_read(&Lock->rw_sem);
-	atomic_dec(&Lock->in_use);
-}
-static inline void WRITE_LOCK(jfs_rwlock_t * Lock)
-{
-	atomic_inc(&Lock->in_use);
-	down_write(&Lock->rw_sem);
-}
-
-static inline int WRITE_TRYLOCK(jfs_rwlock_t * Lock)
-{
-	if (atomic_read(&Lock->in_use))
-		return 0;
-	WRITE_LOCK(Lock);
-	return 1;
-}
-static inline void WRITE_UNLOCK(jfs_rwlock_t * Lock)
-{
-	up_write(&Lock->rw_sem);
-	atomic_dec(&Lock->in_use);
-}
-
-#define IREAD_LOCK(ip)		READ_LOCK(&JFS_IP(ip)->rdwrlock)
-#define IREAD_UNLOCK(ip)	READ_UNLOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_LOCK(ip)		WRITE_LOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_TRYLOCK(ip)	WRITE_TRYLOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_UNLOCK(ip)	WRITE_UNLOCK(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_LOCK_LIST	iwritelocklist
-
-extern void iwritelocklist(int, ...);

 /*
 * Conditional sleep where condition is protected by spinlock

--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1524,8 +1524,6 @@ static int lmLogShutdown(log_t * log)
 *
 * RETURN:	0	- success
 *		errors returned by vms_iowait().
- *			
- * serialization: IWRITE_LOCK(log inode) held on entry/exit
 */
 static int lmLogFileSystem(log_t * log, char *uuid, int activate)
 {

--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -20,6 +20,7 @@
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
+#include <linux/mempool.h>
 #include "jfs_incore.h"
 #include "jfs_filsys.h"
 #include "jfs_metapage.h"
@@ -27,11 +28,6 @@
 #include "jfs_debug.h"

 extern struct task_struct *jfsCommitTask;
-static unsigned int metapages = 1024;	/* ??? Need a better number */
-static unsigned int free_metapages;
-static metapage_t *metapage_buf;
-static unsigned long meta_order;
-static metapage_t *meta_free_list = NULL;
 static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
 static wait_queue_head_t meta_wait;

@@ -93,12 +89,51 @@ static inline void lock_metapage(struct metapage *mp)
 		__lock_metapage(mp);
 }

-int __init metapage_init(void)
+#define METAPOOL_MIN_PAGES 32
+static kmem_cache_t *metapage_cache;
+static mempool_t *metapage_mempool;
+
+static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
-	int i;
-	metapage_t *last = NULL;
-	metapage_t *mp;
+	metapage_t *mp = (metapage_t *)foo;

+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		mp->lid = 0;
+		mp->lsn = 0;
+		mp->flag = 0;
+		mp->data = NULL;
+		mp->clsn = 0;
+		mp->log = NULL;
+		set_bit(META_free, &mp->flag);
+		init_waitqueue_head(&mp->wait);
+	}
+}
+
+static inline metapage_t *alloc_metapage(int no_wait)
+{
+	return mempool_alloc(metapage_mempool, no_wait ? GFP_ATOMIC : GFP_NOFS);
+}
+
+static inline void free_metapage(metapage_t *mp)
+{
+	mp->flag = 0;
+	set_bit(META_free, &mp->flag);
+
+	mempool_free(mp, metapage_mempool);
+}
+
+static void *mp_mempool_alloc(int gfp_mask, void *pool_data)
+{
+	return kmem_cache_alloc(metapage_cache, gfp_mask);
+}
+static void mp_mempool_free(void *element, void *pool_data)
+{
+	return kmem_cache_free(metapage_cache, element);
+}
+
+int __init metapage_init(void)
+{
 	/*
 	 * Initialize wait queue
 	 */
@@ -107,30 +142,18 @@ int __init metapage_init(void)
 	/*
 	 * Allocate the metapage structures
 	 */
-	for (meta_order = 0;
-	     ((PAGE_SIZE << meta_order) / sizeof(metapage_t)) < metapages;
-	     meta_order++);
-	metapages = (PAGE_SIZE << meta_order) / sizeof(metapage_t);
-
-	jFYI(1, ("metapage_init: metapage size = %Zd, metapages = %d\n",
-		 sizeof(metapage_t), metapages));
+	metapage_cache = kmem_cache_create("jfs_mp", sizeof(metapage_t), 0, 0,
+					   init_once, NULL);
+	if (metapage_cache == NULL)
+		return -ENOMEM;

-	metapage_buf =
-	    (metapage_t *) __get_free_pages(GFP_KERNEL, meta_order);
-	assert(metapage_buf);
-	memset(metapage_buf, 0, PAGE_SIZE << meta_order);
+	metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mp_mempool_alloc,
+					  mp_mempool_free, NULL);

-	mp = metapage_buf;
-	for (i = 0; i < metapages; i++, mp++) {
-		mp->flag = 0;
-		set_bit(META_free, &mp->flag);
-		init_waitqueue_head(&mp->wait);
-		mp->hash_next = last;
-		last = mp;
+	if (metapage_mempool == NULL) {
+		kmem_cache_destroy(metapage_cache);
+		return -ENOMEM;
 	}
-	meta_free_list = last;
-	free_metapages = metapages;
-
 	/*
 	 * Now the hash list
 	 */
@@ -147,64 +170,8 @@ int __init metapage_init(void)

 void metapage_exit(void)
 {
-	free_pages((unsigned long) metapage_buf, meta_order);
-	free_pages((unsigned long) hash_table, hash_order);
-	metapage_buf = 0;	/* This is a signal to the jfsIOwait thread */
-}
-
-/*
- * Get metapage structure from freelist
- * 
- * Caller holds meta_lock
- */
-static metapage_t *alloc_metapage(int *dropped_lock)
-{
-	metapage_t *new;
-
-	*dropped_lock = FALSE;
-
-	/*
-	 * Reserve two metapages for the lazy commit thread.  Otherwise
-	 * we may deadlock with holders of metapages waiting for tlocks
-	 * that lazy thread should be freeing.
-	 */
-	if ((free_metapages < 3) && (current != jfsCommitTask)) {
-		INCREMENT(mpStat.allocwait);
-		*dropped_lock = TRUE;
-		__SLEEP_COND(meta_wait, (free_metapages > 2),
-			     spin_lock(&meta_lock), spin_unlock(&meta_lock));
-	}
-
-	assert(meta_free_list);
-
-	new = meta_free_list;
-	meta_free_list = new->hash_next;
-	free_metapages--;
-
-	return new;
-}
-
-/*
- * Put metapage on freelist (holding meta_lock)
- */
-static inline void __free_metapage(metapage_t * mp)
-{
-	mp->flag = 0;
-	set_bit(META_free, &mp->flag);
-	mp->hash_next = meta_free_list;
-	meta_free_list = mp;
-	free_metapages++;
-	wake_up(&meta_wait);
-}
-
-/*
- * Put metapage on freelist (not holding meta_lock)
- */
-static inline void free_metapage(metapage_t * mp)
-{
-	spin_lock(&meta_lock);
-	__free_metapage(mp);
-	spin_unlock(&meta_lock);
+	mempool_destroy(metapage_mempool);
+	kmem_cache_destroy(metapage_cache);
 }

 /*
@@ -307,7 +274,6 @@ metapage_t *__get_metapage(struct inode *inode,
 			   unsigned long lblock, unsigned int size,
 			   int absolute, unsigned long new)
 {
-	int dropped_lock;
 	metapage_t **hash_ptr;
 	int l2BlocksPerPage;
 	int l2bsize;
@@ -354,16 +320,42 @@ metapage_t *__get_metapage(struct inode *inode,
 			return NULL;
 		}
 		
-		mp = alloc_metapage(&dropped_lock);
-		if (dropped_lock) {
-			/* alloc_metapage blocked, we need to search the hash
-			 * again.  (The goto is ugly, maybe we'll clean this
-			 * up in the future.)
+		/*
+		 * Locks held on aggregate inode pages are usually
+		 * not held long, and they are taken in critical code
+		 * paths (committing dirty inodes, txCommit thread) 
+		 * 
+		 * Attempt to get metapage without blocking, tapping into
+		 * reserves if necessary.
+		 */
+		mp = NULL;
+		if (JFS_IP(inode)->fileset == AGGREGATE_I) {
+			mp =  mempool_alloc(metapage_mempool, GFP_ATOMIC);
+			if (!mp) {
+				/*
+				 * mempool is supposed to protect us from
+				 * failing here.  We will try a blocking
+				 * call, but a deadlock is possible here
 				 */
+				printk(KERN_WARNING
+				       "__get_metapage: atomic call to mempool_alloc failed.\n");
+				printk(KERN_WARNING
+				       "Will attempt blocking call\n");
+			}
+		}
+		if (!mp) {
 			metapage_t *mp2;
+
+			spin_unlock(&meta_lock);
+			mp =  mempool_alloc(metapage_mempool, GFP_NOFS);
+			spin_lock(&meta_lock);
+
+			/* we dropped the meta_lock, we need to search the
+			 * hash again.
+			 */
 			mp2 = search_hash(hash_ptr, mapping, lblock);
 			if (mp2) {
-				__free_metapage(mp);
+				free_metapage(mp);
 				mp = mp2;
 				goto page_found;
 			}
@@ -416,7 +408,7 @@ metapage_t *__get_metapage(struct inode *inode,
 	remove_from_hash(mp, hash_ptr);
 	if (!absolute)
 		list_del(&mp->inode_list);
-	__free_metapage(mp);
+	free_metapage(mp);
 	spin_unlock(&meta_lock);
 	return NULL;
 }
@@ -631,12 +623,10 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
 	len += sprintf(buffer,
 		       "JFS Metapage statistics\n"
 		       "=======================\n"
-		       "metapages in use = %d\n"
 		       "page allocations = %d\n"
 		       "page frees = %d\n"
 		       "lock waits = %d\n"
 		       "allocation waits = %d\n",
-		       metapages - free_metapages,
 		       mpStat.pagealloc,
 		       mpStat.pagefree,
 		       mpStat.lockwait,

--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -2898,6 +2898,8 @@ int jfs_sync(void)
 {
 	struct inode *ip;
 	struct jfs_inode_info *jfs_ip;
+	int rc;
+	tid_t tid;

 	lock_kernel();

@@ -2927,17 +2929,19 @@ int jfs_sync(void)
 			ip = &jfs_ip->vfs_inode;

 			/*
-			 * We must release the TXN_LOCK since our
-			 * IWRITE_TRYLOCK implementation may still block
+			 * down_trylock returns 0 on success.  This is
+			 * inconsistent with spin_trylock.
 			 */
-			TXN_UNLOCK();
-			if (IWRITE_TRYLOCK(ip)) {
+			if (! down_trylock(&jfs_ip->commit_sem)) {
 				/*
 				 * inode will be removed from anonymous list
 				 * when it is committed
 				 */
-				jfs_commit_inode(ip, 0);
-				IWRITE_UNLOCK(ip);
+				TXN_UNLOCK();
+				tid = txBegin(ip->i_sb, COMMIT_INODE);
+				rc = txCommit(tid, 1, &ip, 0);
+				txEnd(tid);
+				up(&jfs_ip->commit_sem);
 				/*
 				 * Just to be safe.  I don't know how
 				 * long we can run without blocking
@@ -2945,17 +2949,11 @@ int jfs_sync(void)
 				cond_resched();
 				TXN_LOCK();
 			} else {
-				/* We can't get the write lock.  It may
+				/* We can't get the commit semaphore.  It may
 				 * be held by a thread waiting for tlock's
 				 * so let's not block here.  Save it to
 				 * put back on the anon_list.
 				 */
-
-				/*
-				 * We released TXN_LOCK, let's make sure
-				 * this inode is still there
-				 */
-				TXN_LOCK();
 				if (TxAnchor.anon_list.next !=
 				    &jfs_ip->anon_inode_list)
 					continue;

--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -387,7 +387,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 	    SLAB_CTOR_CONSTRUCTOR) {
 		INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
 		INIT_LIST_HEAD(&jfs_ip->mp_list);
-		RDWRLOCK_INIT(&jfs_ip->rdwrlock);
+		init_rwsem(&jfs_ip->rdwrlock);
+		init_MUTEX(&jfs_ip->commit_sem);
 		inode_init_once(&jfs_ip->vfs_inode);
 	}
 }