Commit fde6699c authored by Dave Kleikamp's avatar Dave Kleikamp

Merge kleikamp.austin.ibm.com:/home/shaggy/bk/jfs-2.5

into kleikamp.austin.ibm.com:/home/shaggy/bk/resize-2.5
parents 30c4273f 1c706086
...@@ -38,9 +38,7 @@ int jfs_fsync(struct file *file, struct dentry *dentry, int datasync) ...@@ -38,9 +38,7 @@ int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return rc; return rc;
IWRITE_LOCK(inode);
rc |= jfs_commit_inode(inode, 1); rc |= jfs_commit_inode(inode, 1);
IWRITE_UNLOCK(inode);
return rc ? -EIO : 0; return rc ? -EIO : 0;
} }
...@@ -64,10 +62,19 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) ...@@ -64,10 +62,19 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
do { do {
tid = txBegin(ip->i_sb, 0); tid = txBegin(ip->i_sb, 0);
/*
* The commit_sem cannot be taken before txBegin.
* txBegin may block and there is a chance the inode
* could be marked dirty and need to be committed
* before txBegin unblocks
*/
down(&JFS_IP(ip)->commit_sem);
newsize = xtTruncate(tid, ip, length, newsize = xtTruncate(tid, ip, length,
COMMIT_TRUNCATE | COMMIT_PWMAP); COMMIT_TRUNCATE | COMMIT_PWMAP);
if (newsize < 0) { if (newsize < 0) {
txEnd(tid); txEnd(tid);
up(&JFS_IP(ip)->commit_sem);
break; break;
} }
...@@ -76,6 +83,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length) ...@@ -76,6 +83,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
txCommit(tid, 1, &ip, 0); txCommit(tid, 1, &ip, 0);
txEnd(tid); txEnd(tid);
up(&JFS_IP(ip)->commit_sem);
} while (newsize > length); /* Truncate isn't always atomic */ } while (newsize > length); /* Truncate isn't always atomic */
} }
......
...@@ -107,8 +107,10 @@ int jfs_commit_inode(struct inode *inode, int wait) ...@@ -107,8 +107,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
} }
tid = txBegin(inode->i_sb, COMMIT_INODE); tid = txBegin(inode->i_sb, COMMIT_INODE);
down(&JFS_IP(inode)->commit_sem);
rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0); rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
txEnd(tid); txEnd(tid);
up(&JFS_IP(inode)->commit_sem);
return -rc; return -rc;
} }
...@@ -123,25 +125,19 @@ void jfs_write_inode(struct inode *inode, int wait) ...@@ -123,25 +125,19 @@ void jfs_write_inode(struct inode *inode, int wait)
!test_cflag(COMMIT_Dirty, inode)) !test_cflag(COMMIT_Dirty, inode))
return; return;
IWRITE_LOCK(inode);
if (jfs_commit_inode(inode, wait)) { if (jfs_commit_inode(inode, wait)) {
jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n")); jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n"));
} }
IWRITE_UNLOCK(inode);
} }
void jfs_delete_inode(struct inode *inode) void jfs_delete_inode(struct inode *inode)
{ {
jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode)); jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode));
IWRITE_LOCK(inode);
if (test_cflag(COMMIT_Freewmap, inode)) if (test_cflag(COMMIT_Freewmap, inode))
freeZeroLink(inode); freeZeroLink(inode);
diFree(inode); diFree(inode);
IWRITE_UNLOCK(inode);
clear_inode(inode); clear_inode(inode);
} }
...@@ -203,8 +199,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock, ...@@ -203,8 +199,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
if ((no_size_check || if ((no_size_check ||
((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) && ((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) &&
(xtLookup (xtLookup(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
== 0) && xlen) { == 0) && xlen) {
if (xflag & XAD_NOTRECORDED) { if (xflag & XAD_NOTRECORDED) {
if (!create) if (!create)
...@@ -241,8 +236,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock, ...@@ -241,8 +236,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
* Allocate a new block * Allocate a new block
*/ */
#ifdef _JFS_4K #ifdef _JFS_4K
if ((rc = if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
goto unlock; goto unlock;
rc = extAlloc(ip, 1, lblock64, &xad, FALSE); rc = extAlloc(ip, 1, lblock64, &xad, FALSE);
if (rc) if (rc)
......
...@@ -96,6 +96,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) ...@@ -96,6 +96,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
/* This blocks if we are low on resources */ /* This blocks if we are low on resources */
txBeginAnon(ip->i_sb); txBeginAnon(ip->i_sb);
/* Avoid race with jfs_commit_inode() */
down(&JFS_IP(ip)->commit_sem);
/* validate extent length */ /* validate extent length */
if (xlen > MAXXLEN) if (xlen > MAXXLEN)
xlen = MAXXLEN; xlen = MAXXLEN;
...@@ -138,8 +141,8 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) ...@@ -138,8 +141,8 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
* is smaller than the number of blocks per page. * is smaller than the number of blocks per page.
*/ */
nxlen = xlen; nxlen = xlen;
if ((rc = if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) { up(&JFS_IP(ip)->commit_sem);
return (rc); return (rc);
} }
...@@ -160,6 +163,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) ...@@ -160,6 +163,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
*/ */
if (rc) { if (rc) {
dbFree(ip, nxaddr, nxlen); dbFree(ip, nxaddr, nxlen);
up(&JFS_IP(ip)->commit_sem);
return (rc); return (rc);
} }
...@@ -174,6 +178,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr) ...@@ -174,6 +178,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
mark_inode_dirty(ip); mark_inode_dirty(ip);
up(&JFS_IP(ip)->commit_sem);
/* /*
* COMMIT_SyncList flags an anonymous tlock on page that is on * COMMIT_SyncList flags an anonymous tlock on page that is on
* sync list. * sync list.
...@@ -217,6 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) ...@@ -217,6 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
/* This blocks if we are low on resources */ /* This blocks if we are low on resources */
txBeginAnon(ip->i_sb); txBeginAnon(ip->i_sb);
down(&JFS_IP(ip)->commit_sem);
/* validate extent length */ /* validate extent length */
if (nxlen > MAXXLEN) if (nxlen > MAXXLEN)
nxlen = MAXXLEN; nxlen = MAXXLEN;
...@@ -235,7 +241,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) ...@@ -235,7 +241,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
if ((xp->flag & XAD_NOTRECORDED) && !abnr) { if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
xp->flag = 0; xp->flag = 0;
if ((rc = xtUpdate(0, ip, xp))) if ((rc = xtUpdate(0, ip, xp)))
return (rc); goto exit;
} }
/* try to allocated the request number of blocks for the /* try to allocated the request number of blocks for the
...@@ -247,7 +253,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) ...@@ -247,7 +253,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
* space as to satisfy the extend page. * space as to satisfy the extend page.
*/ */
if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr))) if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
return (rc); goto exit;
delta = nxlen - xlen; delta = nxlen - xlen;
...@@ -284,7 +290,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) ...@@ -284,7 +290,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
/* extend the extent */ /* extend the extent */
if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) { if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
dbFree(ip, xaddr + xlen, delta); dbFree(ip, xaddr + xlen, delta);
return (rc); goto exit;
} }
} else { } else {
/* /*
...@@ -294,7 +300,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) ...@@ -294,7 +300,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
*/ */
if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) { if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
dbFree(ip, nxaddr, nxlen); dbFree(ip, nxaddr, nxlen);
return (rc); goto exit;
} }
} }
...@@ -325,8 +331,9 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr) ...@@ -325,8 +331,9 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
xp->flag = xflag; xp->flag = xflag;
mark_inode_dirty(ip); mark_inode_dirty(ip);
exit:
return (0); up(&JFS_IP(ip)->commit_sem);
return (rc);
} }
...@@ -423,19 +430,13 @@ int extRecord(struct inode *ip, xad_t * xp) ...@@ -423,19 +430,13 @@ int extRecord(struct inode *ip, xad_t * xp)
txBeginAnon(ip->i_sb); txBeginAnon(ip->i_sb);
/* update the extent */ down(&JFS_IP(ip)->commit_sem);
if ((rc = xtUpdate(0, ip, xp)))
return (rc);
#ifdef _STILL_TO_PORT
/* no longer abnr */
cp->cm_abnr = FALSE;
/* mark the cbuf as modified */ /* update the extent */
cp->cm_modified = TRUE; rc = xtUpdate(0, ip, xp);
#endif /* _STILL_TO_PORT */
return (0); up(&JFS_IP(ip)->commit_sem);
return (rc);
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#ifndef _H_JFS_INCORE #ifndef _H_JFS_INCORE
#define _H_JFS_INCORE #define _H_JFS_INCORE
#include <linux/rwsem.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <asm/bitops.h> #include <asm/bitops.h>
#include "jfs_types.h" #include "jfs_types.h"
...@@ -30,14 +31,6 @@ ...@@ -30,14 +31,6 @@
*/ */
#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */ #define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */
/*
* Due to header ordering problems this can't be in jfs_lock.h
*/
typedef struct jfs_rwlock {
struct rw_semaphore rw_sem;
atomic_t in_use; /* for hacked implementation of trylock */
} jfs_rwlock_t;
/* /*
* JFS-private inode information * JFS-private inode information
*/ */
...@@ -62,7 +55,19 @@ struct jfs_inode_info { ...@@ -62,7 +55,19 @@ struct jfs_inode_info {
lid_t atltail; /* anonymous tlock list tail */ lid_t atltail; /* anonymous tlock list tail */
struct list_head anon_inode_list; /* inodes having anonymous txns */ struct list_head anon_inode_list; /* inodes having anonymous txns */
struct list_head mp_list; /* metapages in inode's address space */ struct list_head mp_list; /* metapages in inode's address space */
jfs_rwlock_t rdwrlock; /* read/write lock */ /*
* rdwrlock serializes xtree between reads & writes and synchronizes
* changes to special inodes. It's use would be redundant on
* directories since the i_sem taken in the VFS is sufficient.
*/
struct rw_semaphore rdwrlock;
/*
* commit_sem serializes transaction processing on an inode.
* It must be taken after beginning a transaction (txBegin), since
* dirty inodes may be committed while a new transaction on the
* inode is blocked in txBegin or TxBeginAnon
*/
struct semaphore commit_sem;
lid_t xtlid; /* lid of xtree lock on directory */ lid_t xtlid; /* lid of xtree lock on directory */
union { union {
struct { struct {
...@@ -87,6 +92,12 @@ struct jfs_inode_info { ...@@ -87,6 +92,12 @@ struct jfs_inode_info {
#define i_dtroot u.dir._dtroot #define i_dtroot u.dir._dtroot
#define i_inline u.link._inline #define i_inline u.link._inline
#define IREAD_LOCK(ip) down_read(&JFS_IP(ip)->rdwrlock)
#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK(ip) down_write(&JFS_IP(ip)->rdwrlock)
#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock)
/* /*
* cflag * cflag
*/ */
......
...@@ -91,40 +91,3 @@ struct inode *ialloc(struct inode *parent, umode_t mode) ...@@ -91,40 +91,3 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
return inode; return inode;
} }
/*
* NAME: iwritelocklist()
*
* FUNCTION: Lock multiple inodes in sorted order to avoid deadlock
*
*/
void iwritelocklist(int n, ...)
{
va_list ilist;
struct inode *sort[4];
struct inode *ip;
int k, m;
va_start(ilist, n);
for (k = 0; k < n; k++)
sort[k] = va_arg(ilist, struct inode *);
va_end(ilist);
/* Bubble sort in descending order */
do {
m = 0;
for (k = 0; k < n; k++)
if ((k + 1) < n
&& sort[k + 1]->i_ino > sort[k]->i_ino) {
ip = sort[k];
sort[k] = sort[k + 1];
sort[k + 1] = ip;
m++;
}
} while (m);
/* Lock them */
for (k = 0; k < n; k++) {
IWRITE_LOCK(sort[k]);
}
}
...@@ -24,63 +24,7 @@ ...@@ -24,63 +24,7 @@
/* /*
* jfs_lock.h * jfs_lock.h
*
* JFS lock definition for globally referenced locks
*/
/* readers/writer lock: thread-thread */
/*
* RW semaphores do not currently have a trylock function. Since the
* implementation varies by platform, I have implemented a platform-independent
* wrapper around the rw_semaphore routines. If this turns out to be the best
* way of avoiding our locking problems, I will push to get a trylock
* implemented in the kernel, but I'd rather find a way to avoid having to
* use it.
*/ */
#define RDWRLOCK_T jfs_rwlock_t
static inline void RDWRLOCK_INIT(jfs_rwlock_t * Lock)
{
init_rwsem(&Lock->rw_sem);
atomic_set(&Lock->in_use, 0);
}
static inline void READ_LOCK(jfs_rwlock_t * Lock)
{
atomic_inc(&Lock->in_use);
down_read(&Lock->rw_sem);
}
static inline void READ_UNLOCK(jfs_rwlock_t * Lock)
{
up_read(&Lock->rw_sem);
atomic_dec(&Lock->in_use);
}
static inline void WRITE_LOCK(jfs_rwlock_t * Lock)
{
atomic_inc(&Lock->in_use);
down_write(&Lock->rw_sem);
}
static inline int WRITE_TRYLOCK(jfs_rwlock_t * Lock)
{
if (atomic_read(&Lock->in_use))
return 0;
WRITE_LOCK(Lock);
return 1;
}
static inline void WRITE_UNLOCK(jfs_rwlock_t * Lock)
{
up_write(&Lock->rw_sem);
atomic_dec(&Lock->in_use);
}
#define IREAD_LOCK(ip) READ_LOCK(&JFS_IP(ip)->rdwrlock)
#define IREAD_UNLOCK(ip) READ_UNLOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK(ip) WRITE_LOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_TRYLOCK(ip) WRITE_TRYLOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_UNLOCK(ip) WRITE_UNLOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK_LIST iwritelocklist
extern void iwritelocklist(int, ...);
/* /*
* Conditional sleep where condition is protected by spinlock * Conditional sleep where condition is protected by spinlock
......
...@@ -1524,8 +1524,6 @@ static int lmLogShutdown(log_t * log) ...@@ -1524,8 +1524,6 @@ static int lmLogShutdown(log_t * log)
* *
* RETURN: 0 - success * RETURN: 0 - success
* errors returned by vms_iowait(). * errors returned by vms_iowait().
*
* serialization: IWRITE_LOCK(log inode) held on entry/exit
*/ */
static int lmLogFileSystem(log_t * log, char *uuid, int activate) static int lmLogFileSystem(log_t * log, char *uuid, int activate)
{ {
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <linux/fs.h> #include <linux/fs.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <linux/mempool.h>
#include "jfs_incore.h" #include "jfs_incore.h"
#include "jfs_filsys.h" #include "jfs_filsys.h"
#include "jfs_metapage.h" #include "jfs_metapage.h"
...@@ -27,11 +28,6 @@ ...@@ -27,11 +28,6 @@
#include "jfs_debug.h" #include "jfs_debug.h"
extern struct task_struct *jfsCommitTask; extern struct task_struct *jfsCommitTask;
static unsigned int metapages = 1024; /* ??? Need a better number */
static unsigned int free_metapages;
static metapage_t *metapage_buf;
static unsigned long meta_order;
static metapage_t *meta_free_list = NULL;
static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED; static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
static wait_queue_head_t meta_wait; static wait_queue_head_t meta_wait;
...@@ -93,12 +89,51 @@ static inline void lock_metapage(struct metapage *mp) ...@@ -93,12 +89,51 @@ static inline void lock_metapage(struct metapage *mp)
__lock_metapage(mp); __lock_metapage(mp);
} }
int __init metapage_init(void) #define METAPOOL_MIN_PAGES 32
static kmem_cache_t *metapage_cache;
static mempool_t *metapage_mempool;
static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
{ {
int i; metapage_t *mp = (metapage_t *)foo;
metapage_t *last = NULL;
metapage_t *mp; if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
mp->lid = 0;
mp->lsn = 0;
mp->flag = 0;
mp->data = NULL;
mp->clsn = 0;
mp->log = NULL;
set_bit(META_free, &mp->flag);
init_waitqueue_head(&mp->wait);
}
}
static inline metapage_t *alloc_metapage(int no_wait)
{
return mempool_alloc(metapage_mempool, no_wait ? GFP_ATOMIC : GFP_NOFS);
}
static inline void free_metapage(metapage_t *mp)
{
mp->flag = 0;
set_bit(META_free, &mp->flag);
mempool_free(mp, metapage_mempool);
}
static void *mp_mempool_alloc(int gfp_mask, void *pool_data)
{
return kmem_cache_alloc(metapage_cache, gfp_mask);
}
static void mp_mempool_free(void *element, void *pool_data)
{
return kmem_cache_free(metapage_cache, element);
}
int __init metapage_init(void)
{
/* /*
* Initialize wait queue * Initialize wait queue
*/ */
...@@ -107,30 +142,18 @@ int __init metapage_init(void) ...@@ -107,30 +142,18 @@ int __init metapage_init(void)
/* /*
* Allocate the metapage structures * Allocate the metapage structures
*/ */
for (meta_order = 0; metapage_cache = kmem_cache_create("jfs_mp", sizeof(metapage_t), 0, 0,
((PAGE_SIZE << meta_order) / sizeof(metapage_t)) < metapages; init_once, NULL);
meta_order++); if (metapage_cache == NULL)
metapages = (PAGE_SIZE << meta_order) / sizeof(metapage_t); return -ENOMEM;
jFYI(1, ("metapage_init: metapage size = %Zd, metapages = %d\n",
sizeof(metapage_t), metapages));
metapage_buf = metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mp_mempool_alloc,
(metapage_t *) __get_free_pages(GFP_KERNEL, meta_order); mp_mempool_free, NULL);
assert(metapage_buf);
memset(metapage_buf, 0, PAGE_SIZE << meta_order);
mp = metapage_buf; if (metapage_mempool == NULL) {
for (i = 0; i < metapages; i++, mp++) { kmem_cache_destroy(metapage_cache);
mp->flag = 0; return -ENOMEM;
set_bit(META_free, &mp->flag);
init_waitqueue_head(&mp->wait);
mp->hash_next = last;
last = mp;
} }
meta_free_list = last;
free_metapages = metapages;
/* /*
* Now the hash list * Now the hash list
*/ */
...@@ -147,64 +170,8 @@ int __init metapage_init(void) ...@@ -147,64 +170,8 @@ int __init metapage_init(void)
void metapage_exit(void) void metapage_exit(void)
{ {
free_pages((unsigned long) metapage_buf, meta_order); mempool_destroy(metapage_mempool);
free_pages((unsigned long) hash_table, hash_order); kmem_cache_destroy(metapage_cache);
metapage_buf = 0; /* This is a signal to the jfsIOwait thread */
}
/*
* Get metapage structure from freelist
*
* Caller holds meta_lock
*/
static metapage_t *alloc_metapage(int *dropped_lock)
{
metapage_t *new;
*dropped_lock = FALSE;
/*
* Reserve two metapages for the lazy commit thread. Otherwise
* we may deadlock with holders of metapages waiting for tlocks
* that lazy thread should be freeing.
*/
if ((free_metapages < 3) && (current != jfsCommitTask)) {
INCREMENT(mpStat.allocwait);
*dropped_lock = TRUE;
__SLEEP_COND(meta_wait, (free_metapages > 2),
spin_lock(&meta_lock), spin_unlock(&meta_lock));
}
assert(meta_free_list);
new = meta_free_list;
meta_free_list = new->hash_next;
free_metapages--;
return new;
}
/*
* Put metapage on freelist (holding meta_lock)
*/
static inline void __free_metapage(metapage_t * mp)
{
mp->flag = 0;
set_bit(META_free, &mp->flag);
mp->hash_next = meta_free_list;
meta_free_list = mp;
free_metapages++;
wake_up(&meta_wait);
}
/*
* Put metapage on freelist (not holding meta_lock)
*/
static inline void free_metapage(metapage_t * mp)
{
spin_lock(&meta_lock);
__free_metapage(mp);
spin_unlock(&meta_lock);
} }
/* /*
...@@ -307,7 +274,6 @@ metapage_t *__get_metapage(struct inode *inode, ...@@ -307,7 +274,6 @@ metapage_t *__get_metapage(struct inode *inode,
unsigned long lblock, unsigned int size, unsigned long lblock, unsigned int size,
int absolute, unsigned long new) int absolute, unsigned long new)
{ {
int dropped_lock;
metapage_t **hash_ptr; metapage_t **hash_ptr;
int l2BlocksPerPage; int l2BlocksPerPage;
int l2bsize; int l2bsize;
...@@ -353,17 +319,43 @@ metapage_t *__get_metapage(struct inode *inode, ...@@ -353,17 +319,43 @@ metapage_t *__get_metapage(struct inode *inode,
jERROR(1, ("MetaData crosses page boundary!!\n")); jERROR(1, ("MetaData crosses page boundary!!\n"));
return NULL; return NULL;
} }
/*
* Locks held on aggregate inode pages are usually
* not held long, and they are taken in critical code
* paths (committing dirty inodes, txCommit thread)
*
* Attempt to get metapage without blocking, tapping into
* reserves if necessary.
*/
mp = NULL;
if (JFS_IP(inode)->fileset == AGGREGATE_I) {
mp = mempool_alloc(metapage_mempool, GFP_ATOMIC);
if (!mp) {
/*
* mempool is supposed to protect us from
* failing here. We will try a blocking
* call, but a deadlock is possible here
*/
printk(KERN_WARNING
"__get_metapage: atomic call to mempool_alloc failed.\n");
printk(KERN_WARNING
"Will attempt blocking call\n");
}
}
if (!mp) {
metapage_t *mp2;
mp = alloc_metapage(&dropped_lock); spin_unlock(&meta_lock);
if (dropped_lock) { mp = mempool_alloc(metapage_mempool, GFP_NOFS);
/* alloc_metapage blocked, we need to search the hash spin_lock(&meta_lock);
* again. (The goto is ugly, maybe we'll clean this
* up in the future.) /* we dropped the meta_lock, we need to search the
* hash again.
*/ */
metapage_t *mp2;
mp2 = search_hash(hash_ptr, mapping, lblock); mp2 = search_hash(hash_ptr, mapping, lblock);
if (mp2) { if (mp2) {
__free_metapage(mp); free_metapage(mp);
mp = mp2; mp = mp2;
goto page_found; goto page_found;
} }
...@@ -416,7 +408,7 @@ metapage_t *__get_metapage(struct inode *inode, ...@@ -416,7 +408,7 @@ metapage_t *__get_metapage(struct inode *inode,
remove_from_hash(mp, hash_ptr); remove_from_hash(mp, hash_ptr);
if (!absolute) if (!absolute)
list_del(&mp->inode_list); list_del(&mp->inode_list);
__free_metapage(mp); free_metapage(mp);
spin_unlock(&meta_lock); spin_unlock(&meta_lock);
return NULL; return NULL;
} }
...@@ -631,12 +623,10 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length, ...@@ -631,12 +623,10 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
len += sprintf(buffer, len += sprintf(buffer,
"JFS Metapage statistics\n" "JFS Metapage statistics\n"
"=======================\n" "=======================\n"
"metapages in use = %d\n"
"page allocations = %d\n" "page allocations = %d\n"
"page frees = %d\n" "page frees = %d\n"
"lock waits = %d\n" "lock waits = %d\n"
"allocation waits = %d\n", "allocation waits = %d\n",
metapages - free_metapages,
mpStat.pagealloc, mpStat.pagealloc,
mpStat.pagefree, mpStat.pagefree,
mpStat.lockwait, mpStat.lockwait,
......
...@@ -2898,6 +2898,8 @@ int jfs_sync(void) ...@@ -2898,6 +2898,8 @@ int jfs_sync(void)
{ {
struct inode *ip; struct inode *ip;
struct jfs_inode_info *jfs_ip; struct jfs_inode_info *jfs_ip;
int rc;
tid_t tid;
lock_kernel(); lock_kernel();
...@@ -2927,17 +2929,19 @@ int jfs_sync(void) ...@@ -2927,17 +2929,19 @@ int jfs_sync(void)
ip = &jfs_ip->vfs_inode; ip = &jfs_ip->vfs_inode;
/* /*
* We must release the TXN_LOCK since our * down_trylock returns 0 on success. This is
* IWRITE_TRYLOCK implementation may still block * inconsistent with spin_trylock.
*/ */
TXN_UNLOCK(); if (! down_trylock(&jfs_ip->commit_sem)) {
if (IWRITE_TRYLOCK(ip)) {
/* /*
* inode will be removed from anonymous list * inode will be removed from anonymous list
* when it is committed * when it is committed
*/ */
jfs_commit_inode(ip, 0); TXN_UNLOCK();
IWRITE_UNLOCK(ip); tid = txBegin(ip->i_sb, COMMIT_INODE);
rc = txCommit(tid, 1, &ip, 0);
txEnd(tid);
up(&jfs_ip->commit_sem);
/* /*
* Just to be safe. I don't know how * Just to be safe. I don't know how
* long we can run without blocking * long we can run without blocking
...@@ -2945,17 +2949,11 @@ int jfs_sync(void) ...@@ -2945,17 +2949,11 @@ int jfs_sync(void)
cond_resched(); cond_resched();
TXN_LOCK(); TXN_LOCK();
} else { } else {
/* We can't get the write lock. It may /* We can't get the commit semaphore. It may
* be held by a thread waiting for tlock's * be held by a thread waiting for tlock's
* so let's not block here. Save it to * so let's not block here. Save it to
* put back on the anon_list. * put back on the anon_list.
*/ */
/*
* We released TXN_LOCK, let's make sure
* this inode is still there
*/
TXN_LOCK();
if (TxAnchor.anon_list.next != if (TxAnchor.anon_list.next !=
&jfs_ip->anon_inode_list) &jfs_ip->anon_inode_list)
continue; continue;
......
This diff is collapsed.
...@@ -387,7 +387,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -387,7 +387,8 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
SLAB_CTOR_CONSTRUCTOR) { SLAB_CTOR_CONSTRUCTOR) {
INIT_LIST_HEAD(&jfs_ip->anon_inode_list); INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
INIT_LIST_HEAD(&jfs_ip->mp_list); INIT_LIST_HEAD(&jfs_ip->mp_list);
RDWRLOCK_INIT(&jfs_ip->rdwrlock); init_rwsem(&jfs_ip->rdwrlock);
init_MUTEX(&jfs_ip->commit_sem);
inode_init_once(&jfs_ip->vfs_inode); inode_init_once(&jfs_ip->vfs_inode);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment