Commit f951b5f5 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://jfs.bkbits.net/linux-2.5

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 8834e16c 8c8da5ae
......@@ -17,11 +17,16 @@ iocharset=name Character set to use for converting from Unicode to
translations. This requires CONFIG_NLS_UTF8 to be set
in the kernel .config file.
resize=value Resize the volume to <value> blocks. JFS only supports
growing a volume, not shrinking it. This option is only
valid during a remount, when the volume is mounted
read-write. The resize keyword with no value will grow
the volume to the full size of the partition.
JFS TODO list:
Plans for our near term development items
- implement online resize for extending JFS volumes
- enhance support for logfile on dedicated partition
- get access control list functionality operational
- get extended attributes functionality operational
......
......@@ -8,7 +8,7 @@ jfs-objs := super.o file.o inode.o namei.o jfs_mount.o jfs_umount.o \
jfs_xtree.o jfs_imap.o jfs_debug.o jfs_dmap.o \
jfs_unicode.o jfs_dtree.o jfs_inode.o \
jfs_extent.o symlink.o jfs_metapage.o \
jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o
jfs_logmgr.o jfs_txnmgr.o jfs_uniupr.o resize.o
EXTRA_CFLAGS += -D_JFS_4K
......
......@@ -38,9 +38,7 @@ int jfs_fsync(struct file *file, struct dentry *dentry, int datasync)
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
return rc;
IWRITE_LOCK(inode);
rc |= jfs_commit_inode(inode, 1);
IWRITE_UNLOCK(inode);
return rc ? -EIO : 0;
}
......@@ -64,10 +62,19 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
do {
tid = txBegin(ip->i_sb, 0);
/*
* The commit_sem cannot be taken before txBegin.
* txBegin may block and there is a chance the inode
* could be marked dirty and need to be committed
* before txBegin unblocks
*/
down(&JFS_IP(ip)->commit_sem);
newsize = xtTruncate(tid, ip, length,
COMMIT_TRUNCATE | COMMIT_PWMAP);
if (newsize < 0) {
txEnd(tid);
up(&JFS_IP(ip)->commit_sem);
break;
}
......@@ -76,6 +83,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
txCommit(tid, 1, &ip, 0);
txEnd(tid);
up(&JFS_IP(ip)->commit_sem);
} while (newsize > length); /* Truncate isn't always atomic */
}
......
......@@ -107,8 +107,10 @@ int jfs_commit_inode(struct inode *inode, int wait)
}
tid = txBegin(inode->i_sb, COMMIT_INODE);
down(&JFS_IP(inode)->commit_sem);
rc = txCommit(tid, 1, &inode, wait ? COMMIT_SYNC : 0);
txEnd(tid);
up(&JFS_IP(inode)->commit_sem);
return -rc;
}
......@@ -123,25 +125,19 @@ void jfs_write_inode(struct inode *inode, int wait)
!test_cflag(COMMIT_Dirty, inode))
return;
IWRITE_LOCK(inode);
if (jfs_commit_inode(inode, wait)) {
jERROR(1, ("jfs_write_inode: jfs_commit_inode failed!\n"));
}
IWRITE_UNLOCK(inode);
}
void jfs_delete_inode(struct inode *inode)
{
jFYI(1, ("In jfs_delete_inode, inode = 0x%p\n", inode));
IWRITE_LOCK(inode);
if (test_cflag(COMMIT_Freewmap, inode))
freeZeroLink(inode);
diFree(inode);
IWRITE_UNLOCK(inode);
clear_inode(inode);
}
......@@ -203,8 +199,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
if ((no_size_check ||
((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size)) &&
(xtLookup
(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
(xtLookup(ip, lblock64, 1, &xflag, &xaddr, &xlen, no_size_check)
== 0) && xlen) {
if (xflag & XAD_NOTRECORDED) {
if (!create)
......@@ -241,8 +236,7 @@ static int jfs_get_block(struct inode *ip, sector_t lblock,
* Allocate a new block
*/
#ifdef _JFS_4K
if ((rc =
extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
goto unlock;
rc = extAlloc(ip, 1, lblock64, &xad, FALSE);
if (rc)
......
......@@ -330,7 +330,7 @@ int dbSync(struct inode *ipbmap)
filemap_fdatawait(ipbmap->i_mapping);
ipbmap->i_state |= I_DIRTY;
diWriteSpecial(ipbmap);
diWriteSpecial(ipbmap, 0);
return (0);
}
......@@ -3175,7 +3175,7 @@ static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
>> wbitno);
word += 1;
word++;
} else {
/* one or more dmap words are fully contained
* within the block range. determine how many
......@@ -3187,6 +3187,7 @@ static int dbAllocDmapBU(bmap_t * bmp, dmap_t * dp, s64 blkno, int nblocks)
/* determine how many bits */
nb = nwords << L2DBWORD;
word += nwords;
}
}
......
......@@ -96,6 +96,9 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
/* This blocks if we are low on resources */
txBeginAnon(ip->i_sb);
/* Avoid race with jfs_commit_inode() */
down(&JFS_IP(ip)->commit_sem);
/* validate extent length */
if (xlen > MAXXLEN)
xlen = MAXXLEN;
......@@ -138,8 +141,8 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
* is smaller than the number of blocks per page.
*/
nxlen = xlen;
if ((rc =
extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
up(&JFS_IP(ip)->commit_sem);
return (rc);
}
......@@ -160,6 +163,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
*/
if (rc) {
dbFree(ip, nxaddr, nxlen);
up(&JFS_IP(ip)->commit_sem);
return (rc);
}
......@@ -174,6 +178,7 @@ extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, boolean_t abnr)
mark_inode_dirty(ip);
up(&JFS_IP(ip)->commit_sem);
/*
* COMMIT_SyncList flags an anonymous tlock on page that is on
* sync list.
......@@ -217,6 +222,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
/* This blocks if we are low on resources */
txBeginAnon(ip->i_sb);
down(&JFS_IP(ip)->commit_sem);
/* validate extent length */
if (nxlen > MAXXLEN)
nxlen = MAXXLEN;
......@@ -235,7 +241,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
xp->flag = 0;
if ((rc = xtUpdate(0, ip, xp)))
return (rc);
goto exit;
}
/* try to allocated the request number of blocks for the
......@@ -247,7 +253,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
* space as to satisfy the extend page.
*/
if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
return (rc);
goto exit;
delta = nxlen - xlen;
......@@ -284,7 +290,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
/* extend the extent */
if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
dbFree(ip, xaddr + xlen, delta);
return (rc);
goto exit;
}
} else {
/*
......@@ -294,7 +300,7 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
*/
if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
dbFree(ip, nxaddr, nxlen);
return (rc);
goto exit;
}
}
......@@ -325,8 +331,9 @@ int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, boolean_t abnr)
xp->flag = xflag;
mark_inode_dirty(ip);
return (0);
exit:
up(&JFS_IP(ip)->commit_sem);
return (rc);
}
......@@ -423,19 +430,13 @@ int extRecord(struct inode *ip, xad_t * xp)
txBeginAnon(ip->i_sb);
/* update the extent */
if ((rc = xtUpdate(0, ip, xp)))
return (rc);
#ifdef _STILL_TO_PORT
/* no longer abnr */
cp->cm_abnr = FALSE;
down(&JFS_IP(ip)->commit_sem);
/* mark the cbuf as modified */
cp->cm_modified = TRUE;
#endif /* _STILL_TO_PORT */
/* update the extent */
rc = xtUpdate(0, ip, xp);
return (0);
up(&JFS_IP(ip)->commit_sem);
return (rc);
}
......
......@@ -285,7 +285,7 @@ int diSync(struct inode *ipimap)
filemap_fdatawrite(ipimap->i_mapping);
filemap_fdatawait(ipimap->i_mapping);
diWriteSpecial(ipimap);
diWriteSpecial(ipimap, 0);
return (0);
}
......@@ -450,12 +450,13 @@ int diRead(struct inode *ip)
* PARAMETERS:
* sb - filesystem superblock
* inum - aggregate inode number
* secondary - 1 if secondary aggregate inode table
*
* RETURN VALUES:
* new inode - success
* NULL - i/o error.
*/
struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
uint address;
......@@ -470,21 +471,16 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
return ip;
}
/*
* If ip->i_number >= 32 (INOSPEREXT), then read from secondary
* aggregate inode table.
*/
if (inum >= INOSPEREXT) {
address =
addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
inum -= INOSPEREXT;
ASSERT(inum < INOSPEREXT);
if (secondary) {
address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
JFS_IP(ip)->ipimap = sbi->ipaimap2;
} else {
address = AITBL_OFF >> L2PSIZE;
JFS_IP(ip)->ipimap = sbi->ipaimap;
}
ASSERT(inum < INOSPEREXT);
ip->i_ino = inum;
address += inum >> 3; /* 8 inodes per 4K page */
......@@ -538,11 +534,12 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum)
*
* PARAMETERS:
* ip - special inode
* secondary - 1 if secondary aggregate inode table
*
* RETURN VALUES: none
*/
void diWriteSpecial(struct inode *ip)
void diWriteSpecial(struct inode *ip, int secondary)
{
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
uint address;
......@@ -550,24 +547,14 @@ void diWriteSpecial(struct inode *ip)
ino_t inum = ip->i_ino;
metapage_t *mp;
/*
* If ip->i_number >= 32 (INOSPEREXT), then write to secondary
* aggregate inode table.
*/
if (!(ip->i_state & I_DIRTY))
return;
ip->i_state &= ~I_DIRTY;
if (inum >= INOSPEREXT) {
address =
addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
inum -= INOSPEREXT;
ASSERT(inum < INOSPEREXT);
} else {
if (secondary)
address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
else
address = AITBL_OFF >> L2PSIZE;
}
ASSERT(inum < INOSPEREXT);
address += inum >> 3; /* 8 inodes per 4K page */
......@@ -2996,7 +2983,7 @@ duplicateIXtree(struct super_block *sb, s64 blkno, int xlen, s64 * xaddr)
/* if AIT2 ipmap2 is bad, do not try to update it */
if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT) /* s_flag */
return;
ip = diReadSpecial(sb, FILESYSTEM_I + INOSPEREXT);
ip = diReadSpecial(sb, FILESYSTEM_I, 1);
if (ip == 0) {
JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
if ((rc = readSuper(sb, &mpsuper)))
......
......@@ -146,15 +146,12 @@ extern int diSync(struct inode *);
/* external references */
extern int diUpdatePMap(struct inode *ipimap, unsigned long inum,
boolean_t is_free, tblock_t * tblk);
#ifdef _STILL_TO_PORT
extern int diExtendFS(inode_t * ipimap, inode_t * ipbmap);
#endif /* _STILL_TO_PORT */
extern int diExtendFS(struct inode *ipimap, struct inode *ipbmap);
extern int diMount(struct inode *);
extern int diUnmount(struct inode *, int);
extern int diRead(struct inode *);
extern struct inode *diReadSpecial(struct super_block *, ino_t);
extern void diWriteSpecial(struct inode *);
extern struct inode *diReadSpecial(struct super_block *, ino_t, int);
extern void diWriteSpecial(struct inode *, int);
extern void diFreeSpecial(struct inode *);
extern int diWrite(tid_t tid, struct inode *);
#endif /* _H_JFS_IMAP */
......@@ -19,6 +19,7 @@
#ifndef _H_JFS_INCORE
#define _H_JFS_INCORE
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <asm/bitops.h>
#include "jfs_types.h"
......@@ -30,14 +31,6 @@
*/
#define JFS_SUPER_MAGIC 0x3153464a /* "JFS1" */
/*
* Due to header ordering problems this can't be in jfs_lock.h
*/
typedef struct jfs_rwlock {
struct rw_semaphore rw_sem;
atomic_t in_use; /* for hacked implementation of trylock */
} jfs_rwlock_t;
/*
* JFS-private inode information
*/
......@@ -62,7 +55,19 @@ struct jfs_inode_info {
lid_t atltail; /* anonymous tlock list tail */
struct list_head anon_inode_list; /* inodes having anonymous txns */
struct list_head mp_list; /* metapages in inode's address space */
jfs_rwlock_t rdwrlock; /* read/write lock */
/*
* rdwrlock serializes xtree between reads & writes and synchronizes
* changes to special inodes. It's use would be redundant on
* directories since the i_sem taken in the VFS is sufficient.
*/
struct rw_semaphore rdwrlock;
/*
* commit_sem serializes transaction processing on an inode.
* It must be taken after beginning a transaction (txBegin), since
* dirty inodes may be committed while a new transaction on the
* inode is blocked in txBegin or TxBeginAnon
*/
struct semaphore commit_sem;
lid_t xtlid; /* lid of xtree lock on directory */
union {
struct {
......@@ -87,6 +92,12 @@ struct jfs_inode_info {
#define i_dtroot u.dir._dtroot
#define i_inline u.link._inline
#define IREAD_LOCK(ip) down_read(&JFS_IP(ip)->rdwrlock)
#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK(ip) down_write(&JFS_IP(ip)->rdwrlock)
#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock)
/*
* cflag
*/
......@@ -125,6 +136,7 @@ struct jfs_sb_info {
u32 logdev; /* 2: external log device */
uint aggregate; /* volume identifier in log record */
pxd_t logpxd; /* 8: pxd describing log */
pxd_t fsckpxd; /* 8: pxd describing fsck wkspc */
pxd_t ait2; /* 8: pxd describing AIT copy */
char uuid[16]; /* 16: 128-bit uuid for volume */
char loguuid[16]; /* 16: 128-bit uuid for log */
......
......@@ -91,40 +91,3 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
return inode;
}
/*
* NAME: iwritelocklist()
*
* FUNCTION: Lock multiple inodes in sorted order to avoid deadlock
*
*/
void iwritelocklist(int n, ...)
{
va_list ilist;
struct inode *sort[4];
struct inode *ip;
int k, m;
va_start(ilist, n);
for (k = 0; k < n; k++)
sort[k] = va_arg(ilist, struct inode *);
va_end(ilist);
/* Bubble sort in descending order */
do {
m = 0;
for (k = 0; k < n; k++)
if ((k + 1) < n
&& sort[k + 1]->i_ino > sort[k]->i_ino) {
ip = sort[k];
sort[k] = sort[k + 1];
sort[k + 1] = ip;
m++;
}
} while (m);
/* Lock them */
for (k = 0; k < n; k++) {
IWRITE_LOCK(sort[k]);
}
}
......@@ -24,63 +24,7 @@
/*
* jfs_lock.h
*
* JFS lock definition for globally referenced locks
*/
/* readers/writer lock: thread-thread */
/*
* RW semaphores do not currently have a trylock function. Since the
* implementation varies by platform, I have implemented a platform-independent
* wrapper around the rw_semaphore routines. If this turns out to be the best
* way of avoiding our locking problems, I will push to get a trylock
* implemented in the kernel, but I'd rather find a way to avoid having to
* use it.
*/
#define RDWRLOCK_T jfs_rwlock_t
static inline void RDWRLOCK_INIT(jfs_rwlock_t * Lock)
{
init_rwsem(&Lock->rw_sem);
atomic_set(&Lock->in_use, 0);
}
static inline void READ_LOCK(jfs_rwlock_t * Lock)
{
atomic_inc(&Lock->in_use);
down_read(&Lock->rw_sem);
}
static inline void READ_UNLOCK(jfs_rwlock_t * Lock)
{
up_read(&Lock->rw_sem);
atomic_dec(&Lock->in_use);
}
static inline void WRITE_LOCK(jfs_rwlock_t * Lock)
{
atomic_inc(&Lock->in_use);
down_write(&Lock->rw_sem);
}
static inline int WRITE_TRYLOCK(jfs_rwlock_t * Lock)
{
if (atomic_read(&Lock->in_use))
return 0;
WRITE_LOCK(Lock);
return 1;
}
static inline void WRITE_UNLOCK(jfs_rwlock_t * Lock)
{
up_write(&Lock->rw_sem);
atomic_dec(&Lock->in_use);
}
#define IREAD_LOCK(ip) READ_LOCK(&JFS_IP(ip)->rdwrlock)
#define IREAD_UNLOCK(ip) READ_UNLOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK(ip) WRITE_LOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_TRYLOCK(ip) WRITE_TRYLOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_UNLOCK(ip) WRITE_UNLOCK(&JFS_IP(ip)->rdwrlock)
#define IWRITE_LOCK_LIST iwritelocklist
extern void iwritelocklist(int, ...);
/*
* Conditional sleep where condition is protected by spinlock
......
......@@ -172,8 +172,6 @@ static int lmWriteRecord(log_t * log, tblock_t * tblk, lrd_t * lrd,
static int lmNextPage(log_t * log);
static int lmLogFileSystem(log_t * log, char *uuid, int activate);
static int lmLogInit(log_t * log);
static int lmLogShutdown(log_t * log);
static int lbmLogInit(log_t * log);
static void lbmLogShutdown(log_t * log);
......@@ -1037,7 +1035,7 @@ int lmLogSync(log_t * log, int nosyncwait)
* by setting syncbarrier flag.
*/
if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) {
log->syncbarrier = 1;
set_bit(log_SYNCBARRIER, &log->flag);
jFYI(1, ("log barrier on: lsn=0x%x syncpt=0x%x\n", lsn,
log->syncpt));
}
......@@ -1068,6 +1066,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
if (!(log = kmalloc(sizeof(log_t), GFP_KERNEL)))
return ENOMEM;
memset(log, 0, sizeof(log_t));
init_waitqueue_head(&log->syncwait);
log->sb = sb; /* This should be a list */
......@@ -1080,7 +1079,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
* file system to log have 1-to-1 relationship;
*/
log->flag = JFS_INLINELOG;
set_bit(log_INLINELOG, &log->flag);
log->bdev = sb->s_bdev;
log->base = addressPXD(&JFS_SBI(sb)->logpxd);
log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
......@@ -1175,7 +1174,7 @@ int lmLogOpen(struct super_block *sb, log_t ** logptr)
*
* serialization: single first open thread
*/
static int lmLogInit(log_t * log)
int lmLogInit(log_t * log)
{
int rc = 0;
lrd_t lrd;
......@@ -1203,7 +1202,7 @@ static int lmLogInit(log_t * log)
*/
if (!(log->flag & JFS_INLINELOG))
if (!test_bit(log_INLINELOG, &log->flag))
log->l2bsize = 12; /* XXX kludge alert XXX */
if ((rc = lbmRead(log, 1, &bpsuper)))
goto errout10;
......@@ -1224,7 +1223,7 @@ static int lmLogInit(log_t * log)
}
/* initialize log inode from log superblock */
if (log->flag & JFS_INLINELOG) {
if (test_bit(log_INLINELOG,&log->flag)) {
if (log->size != le32_to_cpu(logsuper->size)) {
rc = EINVAL;
goto errout20;
......@@ -1244,10 +1243,6 @@ static int lmLogInit(log_t * log)
log, (unsigned long long) log->base, log->size));
}
log->flag |= JFS_GROUPCOMMIT;
/*
log->flag |= JFS_LAZYCOMMIT;
*/
log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
......@@ -1309,7 +1304,6 @@ static int lmLogInit(log_t * log)
log->syncpt = lsn;
log->sync = log->syncpt;
log->nextsync = LOGSYNC_DELTA(log->logsize);
init_waitqueue_head(&log->syncwait);
jFYI(1, ("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x\n",
log->lsn, log->syncpt, log->sync));
......@@ -1377,7 +1371,7 @@ int lmLogClose(struct super_block *sb, log_t * log)
jFYI(1, ("lmLogClose: log:0x%p\n", log));
if (!(log->flag & JFS_INLINELOG))
if (!test_bit(log_INLINELOG, &log->flag))
goto externalLog;
/*
......@@ -1445,7 +1439,7 @@ void lmLogWait(log_t *log)
*
* serialization: single last close thread
*/
static int lmLogShutdown(log_t * log)
int lmLogShutdown(log_t * log)
{
int rc;
lrd_t lrd;
......@@ -1524,8 +1518,6 @@ static int lmLogShutdown(log_t * log)
*
* RETURN: 0 - success
* errors returned by vms_iowait().
*
* serialization: IWRITE_LOCK(log inode) held on entry/exit
*/
static int lmLogFileSystem(log_t * log, char *uuid, int activate)
{
......@@ -1578,37 +1570,6 @@ static int lmLogFileSystem(log_t * log, char *uuid, int activate)
return rc;
}
/*
* lmLogQuiesce()
*/
int lmLogQuiesce(log_t * log)
{
int rc;
rc = lmLogShutdown(log);
return rc;
}
/*
* lmLogResume()
*/
int lmLogResume(log_t * log, struct super_block *sb)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
int rc;
log->base = addressPXD(&sbi->logpxd);
log->size =
(lengthPXD(&sbi->logpxd) << sb->s_blocksize_bits) >> L2LOGPSIZE;
rc = lmLogInit(log);
return rc;
}
/*
* log buffer manager (lbm)
* ------------------------
......@@ -2192,42 +2153,40 @@ int jfsIOWait(void *arg)
return 0;
}
#ifdef _STILL_TO_PORT
/*
* NAME: lmLogFormat()/jfs_logform()
*
* FUNCTION: format file system log (ref. jfs_logform()).
* FUNCTION: format file system log
*
* PARAMETERS:
* log - log inode (with common mount inode base);
* logAddress - start address of log space in FS block;
* log - volume log
* logAddress - start address of log space in FS block
* logSize - length of log space in FS block;
*
* RETURN: 0 - success
* -1 - i/o error
* RETURN: 0 - success
* -EIO - i/o error
*
* XXX: We're synchronously writing one page at a time. This needs to
* be improved by writing multiple pages at once.
*/
int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
int lmLogFormat(log_t *log, s64 logAddress, int logSize)
{
int rc = 0;
cbuf_t *bp;
int rc = -EIO;
struct jfs_sb_info *sbi = JFS_SBI(log->sb);
logsuper_t *logsuper;
logpage_t *lp;
int lspn; /* log sequence page number */
struct lrd *lrd_ptr;
int npbperpage, npages;
int npages = 0;
lbuf_t *bp;
jFYI(0, ("lmLogFormat: logAddress:%Ld logSize:%d\n",
logAddress, logSize));
(long long)logAddress, logSize));
/* allocate a JFS buffer */
bp = rawAllocate();
/* allocate a log buffer */
bp = lbmAllocate(log, 1);
/* map the logical block address to physical block address */
bp->cm_blkno = logAddress << ipmnt->i_l2bfactor;
npbperpage = LOGPSIZE >> ipmnt->i_l2pbsize;
npages = logSize / (LOGPSIZE >> ipmnt->i_l2bsize);
npages = logSize >> sbi->l2nbperpage;
/*
* log space:
......@@ -2241,20 +2200,22 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
/*
* init log superblock: log page 1
*/
logsuper = (logsuper_t *) bp->cm_cdata;
logsuper = (logsuper_t *) bp->l_ldata;
logsuper->magic = cpu_to_le32(LOGMAGIC);
logsuper->version = cpu_to_le32(LOGVERSION);
logsuper->state = cpu_to_le32(LOGREDONE);
logsuper->flag = cpu_to_le32(ipmnt->i_mntflag); /* ? */
logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */
logsuper->size = cpu_to_le32(npages);
logsuper->bsize = cpu_to_le32(ipmnt->i_bsize);
logsuper->l2bsize = cpu_to_le32(ipmnt->i_l2bsize);
logsuper->end =
cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
logsuper->bsize = cpu_to_le32(sbi->bsize);
logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
bp->cm_blkno += npbperpage;
rawWrite(ipmnt, bp, 0);
bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
bp->l_blkno = logAddress + sbi->nbperpage;
lbmStartIO(bp);
if ((rc = lbmIOWait(bp, 0)))
goto exit;
/*
* init pages 2 to npages-1 as log data pages:
......@@ -2270,7 +2231,6 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
* a circular file for the log records;
* lpsn grows by 1 monotonically as each log page is written
* to the circular file of the log;
* Since the AIX DUMMY log record is dropped for this XJFS,
* and setLogpage() will not reset the page number even if
* the eor is equal to LOGPHDRSIZE. In order for binary search
* still work in find log end process, we have to simulate the
......@@ -2279,8 +2239,7 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
* the succeeding log pages will have ascending order of
* the lspn starting from 0, ... (N-2)
*/
lp = (logpage_t *) bp->cm_cdata;
lp = (logpage_t *) bp->l_ldata;
/*
* initialize 1st log page to be written: lpsn = N - 1,
* write a SYNCPT log record is written to this page
......@@ -2295,8 +2254,11 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
lrd_ptr->length = 0;
lrd_ptr->log.syncpt.sync = 0;
bp->cm_blkno += npbperpage;
rawWrite(ipmnt, bp, 0);
bp->l_blkno += sbi->nbperpage;
bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
lbmStartIO(bp);
if ((rc = lbmIOWait(bp, 0)))
goto exit;
/*
* initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
......@@ -2305,20 +2267,23 @@ int lmLogFormat(inode_t * ipmnt, s64 logAddress, int logSize)
lp->h.page = lp->t.page = cpu_to_le32(lspn);
lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
bp->cm_blkno += npbperpage;
rawWrite(ipmnt, bp, 0);
bp->l_blkno += sbi->nbperpage;
bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
lbmStartIO(bp);
if ((rc = lbmIOWait(bp, 0)))
goto exit;
}
rc = 0;
exit:
/*
* finalize log
*/
/* release the buffer */
rawRelease(bp);
lbmFree(bp);
return rc;
}
#endif /* _STILL_TO_PORT */
#ifdef CONFIG_JFS_STATISTICS
int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length,
......
......@@ -379,8 +379,7 @@ typedef struct jfs_log {
int size; /* 4: log size in log page (in page) */
int l2bsize; /* 4: log2 of bsize */
uint flag; /* 4: flag */
uint state; /* 4: state */
long flag; /* 4: flag */
struct lbuf *lbuf_free; /* 4: free lbufs */
wait_queue_head_t free_wait; /* 4: */
......@@ -396,7 +395,6 @@ typedef struct jfs_log {
/* syncpt */
int nextsync; /* 4: bytes to write before next syncpt */
int active; /* 4: */
int syncbarrier; /* 4: */
wait_queue_head_t syncwait; /* 4: */
/* commit */
......@@ -420,6 +418,13 @@ typedef struct jfs_log {
char uuid[16]; /* 16: 128-bit uuid of log device */
} log_t;
/*
* Log flag
*/
#define log_INLINELOG 1
#define log_SYNCBARRIER 2
#define log_QUIESCE 3
/*
* group commit flag
*/
......@@ -499,8 +504,8 @@ extern int lmLogOpen(struct super_block *sb, log_t ** log);
extern void lmLogWait(log_t * log);
extern int lmLogClose(struct super_block *sb, log_t * log);
extern int lmLogSync(log_t * log, int nosyncwait);
extern int lmLogQuiesce(log_t * log);
extern int lmLogResume(log_t * log, struct super_block *sb);
extern int lmLogFormat(struct super_block *sb, s64 logAddress, int logSize);
extern int lmLogShutdown(log_t * log);
extern int lmLogInit(log_t * log);
extern int lmLogFormat(log_t *log, s64 logAddress, int logSize);
#endif /* _H_JFS_LOGMGR */
......@@ -20,6 +20,7 @@
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/buffer_head.h>
#include <linux/mempool.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
......@@ -27,11 +28,6 @@
#include "jfs_debug.h"
extern struct task_struct *jfsCommitTask;
static unsigned int metapages = 1024; /* ??? Need a better number */
static unsigned int free_metapages;
static metapage_t *metapage_buf;
static unsigned long meta_order;
static metapage_t *meta_free_list = NULL;
static spinlock_t meta_lock = SPIN_LOCK_UNLOCKED;
static wait_queue_head_t meta_wait;
......@@ -93,12 +89,51 @@ static inline void lock_metapage(struct metapage *mp)
__lock_metapage(mp);
}
int __init metapage_init(void)
#define METAPOOL_MIN_PAGES 32
static kmem_cache_t *metapage_cache;
static mempool_t *metapage_mempool;
static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
{
int i;
metapage_t *last = NULL;
metapage_t *mp;
metapage_t *mp = (metapage_t *)foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
mp->lid = 0;
mp->lsn = 0;
mp->flag = 0;
mp->data = NULL;
mp->clsn = 0;
mp->log = NULL;
set_bit(META_free, &mp->flag);
init_waitqueue_head(&mp->wait);
}
}
static inline metapage_t *alloc_metapage(int no_wait)
{
return mempool_alloc(metapage_mempool, no_wait ? GFP_ATOMIC : GFP_NOFS);
}
static inline void free_metapage(metapage_t *mp)
{
mp->flag = 0;
set_bit(META_free, &mp->flag);
mempool_free(mp, metapage_mempool);
}
static void *mp_mempool_alloc(int gfp_mask, void *pool_data)
{
return kmem_cache_alloc(metapage_cache, gfp_mask);
}
static void mp_mempool_free(void *element, void *pool_data)
{
return kmem_cache_free(metapage_cache, element);
}
int __init metapage_init(void)
{
/*
* Initialize wait queue
*/
......@@ -107,30 +142,18 @@ int __init metapage_init(void)
/*
* Allocate the metapage structures
*/
for (meta_order = 0;
((PAGE_SIZE << meta_order) / sizeof(metapage_t)) < metapages;
meta_order++);
metapages = (PAGE_SIZE << meta_order) / sizeof(metapage_t);
jFYI(1, ("metapage_init: metapage size = %Zd, metapages = %d\n",
sizeof(metapage_t), metapages));
metapage_cache = kmem_cache_create("jfs_mp", sizeof(metapage_t), 0, 0,
init_once, NULL);
if (metapage_cache == NULL)
return -ENOMEM;
metapage_buf =
(metapage_t *) __get_free_pages(GFP_KERNEL, meta_order);
assert(metapage_buf);
memset(metapage_buf, 0, PAGE_SIZE << meta_order);
metapage_mempool = mempool_create(METAPOOL_MIN_PAGES, mp_mempool_alloc,
mp_mempool_free, NULL);
mp = metapage_buf;
for (i = 0; i < metapages; i++, mp++) {
mp->flag = 0;
set_bit(META_free, &mp->flag);
init_waitqueue_head(&mp->wait);
mp->hash_next = last;
last = mp;
if (metapage_mempool == NULL) {
kmem_cache_destroy(metapage_cache);
return -ENOMEM;
}
meta_free_list = last;
free_metapages = metapages;
/*
* Now the hash list
*/
......@@ -147,64 +170,8 @@ int __init metapage_init(void)
void metapage_exit(void)
{
free_pages((unsigned long) metapage_buf, meta_order);
free_pages((unsigned long) hash_table, hash_order);
metapage_buf = 0; /* This is a signal to the jfsIOwait thread */
}
/*
* Get metapage structure from freelist
*
* Caller holds meta_lock
*/
static metapage_t *alloc_metapage(int *dropped_lock)
{
metapage_t *new;
*dropped_lock = FALSE;
/*
* Reserve two metapages for the lazy commit thread. Otherwise
* we may deadlock with holders of metapages waiting for tlocks
* that lazy thread should be freeing.
*/
if ((free_metapages < 3) && (current != jfsCommitTask)) {
INCREMENT(mpStat.allocwait);
*dropped_lock = TRUE;
__SLEEP_COND(meta_wait, (free_metapages > 2),
spin_lock(&meta_lock), spin_unlock(&meta_lock));
}
assert(meta_free_list);
new = meta_free_list;
meta_free_list = new->hash_next;
free_metapages--;
return new;
}
/*
* Put metapage on freelist (holding meta_lock)
*/
static inline void __free_metapage(metapage_t * mp)
{
mp->flag = 0;
set_bit(META_free, &mp->flag);
mp->hash_next = meta_free_list;
meta_free_list = mp;
free_metapages++;
wake_up(&meta_wait);
}
/*
* Put metapage on freelist (not holding meta_lock)
*/
static inline void free_metapage(metapage_t * mp)
{
spin_lock(&meta_lock);
__free_metapage(mp);
spin_unlock(&meta_lock);
mempool_destroy(metapage_mempool);
kmem_cache_destroy(metapage_cache);
}
/*
......@@ -295,19 +262,18 @@ static int direct_bmap(struct address_space *mapping, long block)
}
struct address_space_operations direct_aops = {
readpage: direct_readpage,
writepage: direct_writepage,
sync_page: block_sync_page,
prepare_write: direct_prepare_write,
commit_write: generic_commit_write,
bmap: direct_bmap,
.readpage = direct_readpage,
.writepage = direct_writepage,
.sync_page = block_sync_page,
.prepare_write = direct_prepare_write,
.commit_write = generic_commit_write,
.bmap = direct_bmap,
};
metapage_t *__get_metapage(struct inode *inode,
unsigned long lblock, unsigned int size,
int absolute, unsigned long new)
{
int dropped_lock;
metapage_t **hash_ptr;
int l2BlocksPerPage;
int l2bsize;
......@@ -353,17 +319,43 @@ metapage_t *__get_metapage(struct inode *inode,
jERROR(1, ("MetaData crosses page boundary!!\n"));
return NULL;
}
/*
* Locks held on aggregate inode pages are usually
* not held long, and they are taken in critical code
* paths (committing dirty inodes, txCommit thread)
*
* Attempt to get metapage without blocking, tapping into
* reserves if necessary.
*/
mp = NULL;
if (JFS_IP(inode)->fileset == AGGREGATE_I) {
mp = mempool_alloc(metapage_mempool, GFP_ATOMIC);
if (!mp) {
/*
* mempool is supposed to protect us from
* failing here. We will try a blocking
* call, but a deadlock is possible here
*/
printk(KERN_WARNING
"__get_metapage: atomic call to mempool_alloc failed.\n");
printk(KERN_WARNING
"Will attempt blocking call\n");
}
}
if (!mp) {
metapage_t *mp2;
mp = alloc_metapage(&dropped_lock);
if (dropped_lock) {
/* alloc_metapage blocked, we need to search the hash
* again. (The goto is ugly, maybe we'll clean this
* up in the future.)
spin_unlock(&meta_lock);
mp = mempool_alloc(metapage_mempool, GFP_NOFS);
spin_lock(&meta_lock);
/* we dropped the meta_lock, we need to search the
* hash again.
*/
metapage_t *mp2;
mp2 = search_hash(hash_ptr, mapping, lblock);
if (mp2) {
__free_metapage(mp);
free_metapage(mp);
mp = mp2;
goto page_found;
}
......@@ -416,7 +408,7 @@ metapage_t *__get_metapage(struct inode *inode,
remove_from_hash(mp, hash_ptr);
if (!absolute)
list_del(&mp->inode_list);
__free_metapage(mp);
free_metapage(mp);
spin_unlock(&meta_lock);
return NULL;
}
......@@ -631,12 +623,10 @@ int jfs_mpstat_read(char *buffer, char **start, off_t offset, int length,
len += sprintf(buffer,
"JFS Metapage statistics\n"
"=======================\n"
"metapages in use = %d\n"
"page allocations = %d\n"
"page frees = %d\n"
"lock waits = %d\n"
"allocation waits = %d\n",
metapages - free_metapages,
mpStat.pagealloc,
mpStat.pagefree,
mpStat.lockwait,
......
......@@ -95,7 +95,7 @@ int jfs_mount(struct super_block *sb)
goto errout20;
}
ipaimap = diReadSpecial(sb, AGGREGATE_I);
ipaimap = diReadSpecial(sb, AGGREGATE_I, 0);
if (ipaimap == NULL) {
jERROR(1, ("jfs_mount: Faild to read AGGREGATE_I\n"));
rc = EIO;
......@@ -118,7 +118,7 @@ int jfs_mount(struct super_block *sb)
/*
* open aggregate block allocation map
*/
ipbmap = diReadSpecial(sb, BMAP_I);
ipbmap = diReadSpecial(sb, BMAP_I, 0);
if (ipbmap == NULL) {
rc = EIO;
goto errout22;
......@@ -148,7 +148,7 @@ int jfs_mount(struct super_block *sb)
* table.
*/
if ((sbi->mntflag & JFS_BAD_SAIT) == 0) {
ipaimap2 = diReadSpecial(sb, AGGREGATE_I + INOSPEREXT);
ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1);
if (ipaimap2 == 0) {
jERROR(1,
("jfs_mount: Faild to read AGGREGATE_I\n"));
......@@ -178,7 +178,7 @@ int jfs_mount(struct super_block *sb)
/*
* open fileset inode allocation map (aka fileset inode)
*/
ipimap = diReadSpecial(sb, FILESYSTEM_I);
ipimap = diReadSpecial(sb, FILESYSTEM_I, 0);
if (ipimap == NULL) {
jERROR(1, ("jfs_mount: Failed to read FILESYSTEM_I\n"));
/* open fileset secondary inode allocation map */
......@@ -410,6 +410,7 @@ static int chkSuper(struct super_block *sb)
memcpy(sbi->uuid, j_sb->s_uuid, sizeof(sbi->uuid));
memcpy(sbi->loguuid, j_sb->s_loguuid, sizeof(sbi->uuid));
}
sbi->fsckpxd = j_sb->s_fsckpxd;
sbi->ait2 = j_sb->s_ait2;
out:
......
......@@ -318,11 +318,12 @@ tid_t txBegin(struct super_block *sb, int flag)
TXN_LOCK();
retry:
if (flag != COMMIT_FORCE) {
if (!(flag & COMMIT_FORCE)) {
/*
* synchronize with logsync barrier
*/
if (log->syncbarrier) {
if (test_bit(log_SYNCBARRIER, &log->flag) ||
test_bit(log_QUIESCE, &log->flag)) {
TXN_SLEEP(&log->syncwait);
goto retry;
}
......@@ -330,8 +331,8 @@ tid_t txBegin(struct super_block *sb, int flag)
if (flag == 0) {
/*
* Don't begin transaction if we're getting starved for tlocks
* unless COMMIT_FORCE (imap changes) or COMMIT_INODE (which
* may ultimately free tlocks)
* unless COMMIT_FORCE or COMMIT_INODE (which may ultimately
* free tlocks)
*/
if (TlocksLow) {
TXN_SLEEP(&TxAnchor.lowlockwait);
......@@ -411,7 +412,8 @@ void txBeginAnon(struct super_block *sb)
/*
* synchronize with logsync barrier
*/
if (log->syncbarrier) {
if (test_bit(log_SYNCBARRIER, &log->flag) ||
test_bit(log_QUIESCE, &log->flag)) {
TXN_SLEEP(&log->syncwait);
goto retry;
}
......@@ -490,14 +492,14 @@ void txEnd(tid_t tid)
/*
* synchronize with logsync barrier
*/
if (log->syncbarrier && log->active == 0) {
if (test_bit(log_SYNCBARRIER, &log->flag) && log->active == 0) {
/* forward log syncpt */
/* lmSync(log); */
jFYI(1, (" log barrier off: 0x%x\n", log->lsn));
/* enable new transactions start */
log->syncbarrier = 0;
clear_bit(log_SYNCBARRIER, &log->flag);
/* wakeup all waitors for logsync barrier */
TXN_WAKEUP(&log->syncwait);
......@@ -823,36 +825,21 @@ static void txRelease(tblock_t * tblk)
*
* FUNCTION: Initiates pageout of pages modified by tid in journalled
* objects and frees their lockwords.
*
* PARAMETER:
* flag -
*
* RETURN: Errors from subroutines.
*/
static void txUnlock(tblock_t * tblk, int flag)
static void txUnlock(tblock_t * tblk)
{
tlock_t *tlck;
linelock_t *linelock;
lid_t lid, next, llid, k;
metapage_t *mp;
log_t *log;
int force;
int difft, diffp;
jFYI(1, ("txUnlock: tblk = 0x%p\n", tblk));
log = (log_t *) JFS_SBI(tblk->sb)->log;
force = flag & COMMIT_FLUSH;
if (log->syncbarrier)
force |= COMMIT_FORCE;
/*
* mark page under tlock homeok (its log has been written):
* if caller has specified FORCE (e.g., iRecycle()), or
* if syncwait for the log is set (i.e., the log sync point
* has fallen behind), or
* if syncpt is set for the page, or
* if the page is new, initiate pageout;
* otherwise, leave the page in memory.
*/
for (lid = tblk->next; lid; lid = next) {
tlck = lid_to_tlock(lid);
......@@ -1268,7 +1255,7 @@ int txCommit(tid_t tid, /* transaction identifier */
txRelease(tblk);
if ((tblk->flag & tblkGC_LAZY) == 0)
txUnlock(tblk, flag);
txUnlock(tblk);
/*
......@@ -2753,7 +2740,7 @@ void txLazyCommit(tblock_t * tblk)
spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK
if (tblk->flag & tblkGC_LAZY) {
txUnlock(tblk, 0);
txUnlock(tblk);
tblk->flag &= ~tblkGC_LAZY;
txEnd(tblk - TxBlock); /* Convert back to tid */
}
......@@ -2887,6 +2874,77 @@ static void LogSyncRelease(metapage_t * mp)
release_metapage(mp);
}
/*
* txQuiesce
*
* Block all new transactions and push anonymous transactions to
* completion
*
* This does almost the same thing as jfs_sync below. We don't
* worry about deadlocking when TlocksLow is set, since we would
* expect jfs_sync to get us out of that jam.
*/
void txQuiesce(struct super_block *sb)
{
struct inode *ip;
struct jfs_inode_info *jfs_ip;
log_t *log = JFS_SBI(sb)->log;
int rc;
tid_t tid;
set_bit(log_QUIESCE, &log->flag);
TXN_LOCK();
restart:
while (!list_empty(&TxAnchor.anon_list)) {
jfs_ip = list_entry(TxAnchor.anon_list.next,
struct jfs_inode_info,
anon_inode_list);
ip = &jfs_ip->vfs_inode;
/*
* inode will be removed from anonymous list
* when it is committed
*/
TXN_UNLOCK();
tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE);
down(&jfs_ip->commit_sem);
rc = txCommit(tid, 1, &ip, 0);
txEnd(tid);
up(&jfs_ip->commit_sem);
/*
* Just to be safe. I don't know how
* long we can run without blocking
*/
cond_resched();
TXN_LOCK();
}
/*
* If jfs_sync is running in parallel, there could be some inodes
* on anon_list2. Let's check.
*/
if (!list_empty(&TxAnchor.anon_list2)) {
list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
INIT_LIST_HEAD(&TxAnchor.anon_list2);
goto restart;
}
TXN_UNLOCK();
}
/*
* txResume()
*
* Allows transactions to start again following txQuiesce
*/
void txResume(struct super_block *sb)
{
log_t *log = JFS_SBI(sb)->log;
clear_bit(log_QUIESCE, &log->flag);
TXN_WAKEUP(&log->syncwait);
}
/*
* jfs_sync(void)
*
......@@ -2898,6 +2956,8 @@ int jfs_sync(void)
{
struct inode *ip;
struct jfs_inode_info *jfs_ip;
int rc;
tid_t tid;
lock_kernel();
......@@ -2927,17 +2987,20 @@ int jfs_sync(void)
ip = &jfs_ip->vfs_inode;
/*
* We must release the TXN_LOCK since our
* IWRITE_TRYLOCK implementation may still block
* down_trylock returns 0 on success. This is
* inconsistent with spin_trylock.
*/
TXN_UNLOCK();
if (IWRITE_TRYLOCK(ip)) {
if (! down_trylock(&jfs_ip->commit_sem)) {
/*
* inode will be removed from anonymous list
* when it is committed
*/
jfs_commit_inode(ip, 0);
IWRITE_UNLOCK(ip);
TXN_UNLOCK();
tid = txBegin(ip->i_sb,
COMMIT_INODE | COMMIT_FORCE);
rc = txCommit(tid, 1, &ip, 0);
txEnd(tid);
up(&jfs_ip->commit_sem);
/*
* Just to be safe. I don't know how
* long we can run without blocking
......@@ -2945,21 +3008,12 @@ int jfs_sync(void)
cond_resched();
TXN_LOCK();
} else {
/* We can't get the write lock. It may
/* We can't get the commit semaphore. It may
* be held by a thread waiting for tlock's
* so let's not block here. Save it to
* put back on the anon_list.
*/
/*
* We released TXN_LOCK, let's make sure
* this inode is still there
*/
TXN_LOCK();
if (TxAnchor.anon_list.next !=
&jfs_ip->anon_inode_list)
continue;
/* Take off anon_list */
list_del(&jfs_ip->anon_inode_list);
......
......@@ -310,4 +310,7 @@ extern void txFreelock(struct inode *ip);
extern int lmLog(log_t * log, tblock_t * tblk, lrd_t * lrd, tlock_t * tlck);
extern void txQuiesce(struct super_block *sb);
extern void txResume(struct super_block *sb);
#endif /* _H_JFS_TXNMGR */
......@@ -2373,7 +2373,6 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
}
#ifdef _STILL_TO_PORT
/*
* xtAppend()
*
......@@ -2392,7 +2391,7 @@ printf("xtUpdate.updateLeft.split p:0x%p\n", p);
* return:
*/
int xtAppend(tid_t tid, /* transaction id */
struct inode *ip, int xflag, s64 xoff, s32 maxblocks, /* @GD1 */
struct inode *ip, int xflag, s64 xoff, s32 maxblocks,
s32 * xlenp, /* (in/out) */
s64 * xaddrp, /* (in/out) */
int flag)
......@@ -2460,7 +2459,7 @@ int xtAppend(tid_t tid, /* transaction id */
pxdlist.maxnpxd = pxdlist.npxd = 0;
pxd = &pxdlist.pxd[0];
nblocks = JFS_SBI(ip->i_sb)->nbperpage;
for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) { /* @GD1 */
for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {
if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
PXDaddress(pxd, xaddr);
PXDlength(pxd, nblocks);
......@@ -2475,7 +2474,7 @@ int xtAppend(tid_t tid, /* transaction id */
goto out;
}
xlen = min(xlen, maxblocks); /* @GD1 */
xlen = min(xlen, maxblocks);
/*
* allocate data extent requested
......@@ -2528,7 +2527,7 @@ int xtAppend(tid_t tid, /* transaction id */
cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
xtlck->lwm.offset =
(xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
(xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
xtlck->lwm.offset;
......@@ -2541,7 +2540,7 @@ int xtAppend(tid_t tid, /* transaction id */
return rc;
}
#ifdef _STILL_TO_PORT
/* - TBD for defragmentaion/reorganization -
*
......
......@@ -69,8 +69,6 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
jFYI(1, ("jfs_create: dip:0x%p name:%s\n", dip, dentry->d_name.name));
IWRITE_LOCK(dip);
/*
* search parent directory for entry/freespace
* (dtSearch() returns parent directory page pinned)
......@@ -91,12 +89,12 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
tid = txBegin(dip->i_sb, 0);
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
jERROR(1, ("jfs_create: dtSearch returned %d\n", rc));
ip->i_nlink = 0;
iput(ip);
txEnd(tid);
goto out2;
goto out3;
}
tblk = tid_to_tblock(tid);
......@@ -118,16 +116,11 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
ino = ip->i_ino;
if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
jERROR(1, ("jfs_create: dtInsert returned %d\n", rc));
/* discard new inode */
ip->i_nlink = 0;
iput(ip);
if (rc == EIO)
txAbort(tid, 1); /* Marks Filesystem dirty */
else
txAbort(tid, 0); /* Filesystem full */
txEnd(tid);
goto out2;
goto out3;
}
ip->i_op = &jfs_file_inode_operations;
......@@ -143,14 +136,21 @@ int jfs_create(struct inode *dip, struct dentry *dentry, int mode)
mark_inode_dirty(dip);
rc = txCommit(tid, 2, &iplist[0], 0);
out3:
txEnd(tid);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
ip->i_nlink = 0;
iput(ip);
}
out2:
free_UCSname(&dname);
out1:
IWRITE_UNLOCK(dip);
jFYI(1, ("jfs_create: rc:%d\n", -rc));
return -rc;
}
......@@ -184,8 +184,6 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
jFYI(1, ("jfs_mkdir: dip:0x%p name:%s\n", dip, dentry->d_name.name));
IWRITE_LOCK(dip);
/* link count overflow on parent directory ? */
if (dip->i_nlink == JFS_LINK_MAX) {
rc = EMLINK;
......@@ -212,12 +210,12 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
tid = txBegin(dip->i_sb, 0);
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE))) {
jERROR(1, ("jfs_mkdir: dtSearch returned %d\n", rc));
ip->i_nlink = 0;
iput(ip);
txEnd(tid);
goto out2;
goto out3;
}
tblk = tid_to_tblock(tid);
......@@ -239,16 +237,12 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
ino = ip->i_ino;
if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
jERROR(1, ("jfs_mkdir: dtInsert returned %d\n", rc));
/* discard new directory inode */
ip->i_nlink = 0;
iput(ip);
if (rc == EIO)
txAbort(tid, 1); /* Marks Filesystem dirty */
else
txAbort(tid, 0); /* Filesystem full */
txEnd(tid);
goto out2;
goto out3;
}
ip->i_nlink = 2; /* for '.' */
......@@ -267,15 +261,21 @@ int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
mark_inode_dirty(dip);
rc = txCommit(tid, 2, &iplist[0], 0);
out3:
txEnd(tid);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
ip->i_nlink = 0;
iput(ip);
}
out2:
free_UCSname(&dname);
out1:
IWRITE_UNLOCK(dip);
jFYI(1, ("jfs_mkdir: rc:%d\n", -rc));
return -rc;
}
......@@ -311,24 +311,21 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
jFYI(1, ("jfs_rmdir: dip:0x%p name:%s\n", dip, dentry->d_name.name));
IWRITE_LOCK_LIST(2, dip, ip);
/* directory must be empty to be removed */
if (!dtEmpty(ip)) {
IWRITE_UNLOCK(ip);
IWRITE_UNLOCK(dip);
rc = ENOTEMPTY;
goto out;
}
if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab))) {
IWRITE_UNLOCK(ip);
IWRITE_UNLOCK(dip);
goto out;
}
tid = txBegin(dip->i_sb, 0);
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
iplist[0] = dip;
iplist[1] = ip;
......@@ -345,9 +342,8 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
if (rc == EIO)
txAbort(tid, 1);
txEnd(tid);
IWRITE_UNLOCK(ip);
IWRITE_UNLOCK(dip);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
goto out2;
}
......@@ -384,7 +380,8 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
txEnd(tid);
IWRITE_UNLOCK(ip);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
/*
* Truncating the directory index table is not guaranteed. It
......@@ -397,8 +394,6 @@ int jfs_rmdir(struct inode *dip, struct dentry *dentry)
clear_cflag(COMMIT_Stale, dip);
}
IWRITE_UNLOCK(dip);
out2:
free_UCSname(&dname);
......@@ -444,10 +439,13 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
goto out;
IWRITE_LOCK_LIST(2, ip, dip);
IWRITE_LOCK(ip);
tid = txBegin(dip->i_sb, 0);
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
iplist[0] = dip;
iplist[1] = ip;
......@@ -460,8 +458,9 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
if (rc == EIO)
txAbort(tid, 1); /* Marks FS Dirty */
txEnd(tid);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
IWRITE_UNLOCK(ip);
IWRITE_UNLOCK(dip);
goto out1;
}
......@@ -483,8 +482,9 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
if ((new_size = commitZeroLink(tid, ip)) < 0) {
txAbort(tid, 1); /* Marks FS Dirty */
txEnd(tid);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
IWRITE_UNLOCK(ip);
IWRITE_UNLOCK(dip);
rc = -new_size; /* We return -rc */
goto out1;
}
......@@ -511,8 +511,13 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
txEnd(tid);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
while (new_size && (rc == 0)) {
tid = txBegin(dip->i_sb, 0);
down(&JFS_IP(ip)->commit_sem);
new_size = xtTruncate_pmap(tid, ip, new_size);
if (new_size < 0) {
txAbort(tid, 1); /* Marks FS Dirty */
......@@ -520,6 +525,7 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
} else
rc = txCommit(tid, 2, &iplist[0], COMMIT_SYNC);
txEnd(tid);
up(&JFS_IP(ip)->commit_sem);
}
if (ip->i_nlink == 0)
......@@ -539,8 +545,6 @@ int jfs_unlink(struct inode *dip, struct dentry *dentry)
clear_cflag(COMMIT_Stale, dip);
}
IWRITE_UNLOCK(dip);
out1:
free_UCSname(&dname);
out:
......@@ -764,10 +768,11 @@ int jfs_link(struct dentry *old_dentry,
("jfs_link: %s %s\n", old_dentry->d_name.name,
dentry->d_name.name));
IWRITE_LOCK_LIST(2, dir, ip);
tid = txBegin(ip->i_sb, 0);
down(&JFS_IP(dir)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
if (ip->i_nlink == JFS_LINK_MAX) {
rc = EMLINK;
goto out;
......@@ -801,11 +806,11 @@ int jfs_link(struct dentry *old_dentry,
rc = txCommit(tid, 2, &iplist[0], 0);
out:
IWRITE_UNLOCK(dir);
IWRITE_UNLOCK(ip);
txEnd(tid);
up(&JFS_IP(dir)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
jFYI(1, ("jfs_link: rc:%d\n", rc));
return -rc;
}
......@@ -849,12 +854,8 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
jFYI(1, ("jfs_symlink: dip:0x%p name:%s\n", dip, name));
IWRITE_LOCK(dip);
ssize = strlen(name) + 1;
tid = txBegin(dip->i_sb, 0);
/*
* search parent directory for entry/freespace
* (dtSearch() returns parent directory page pinned)
......@@ -863,23 +864,24 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
goto out1;
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE)))
goto out2;
/*
* allocate on-disk/in-memory inode for symbolic link:
* (iAlloc() returns new, locked inode)
*/
ip = ialloc(dip, S_IFLNK | 0777);
if (ip == NULL) {
BT_PUTSEARCH(&btstack);
rc = ENOSPC;
goto out2;
}
tid = txBegin(dip->i_sb, 0);
down(&JFS_IP(dip)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
if ((rc = dtSearch(dip, &dname, &ino, &btstack, JFS_CREATE)))
goto out3;
tblk = tid_to_tblock(tid);
tblk->xflag |= COMMIT_CREATE;
tblk->ip = ip;
......@@ -895,9 +897,7 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
if ((rc = dtInsert(tid, dip, &dname, &ino, &btstack))) {
jERROR(1, ("jfs_symlink: dtInsert returned %d\n", rc));
/* discard ne inode */
ip->i_nlink = 0;
iput(ip);
goto out2;
goto out3;
}
......@@ -955,10 +955,8 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
if (mp == NULL) {
dtDelete(tid, dip, &dname, &ino,
JFS_REMOVE);
ip->i_nlink = 0;
iput(ip);
rc = EIO;
goto out2;
goto out3;
}
memcpy(mp->data, name, copy_size);
flush_metapage(mp);
......@@ -977,10 +975,8 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
ip->i_blocks = LBLK2PBLK(sb, xlen);
} else {
dtDelete(tid, dip, &dname, &ino, JFS_REMOVE);
ip->i_nlink = 0;
iput(ip);
rc = ENOSPC;
goto out2;
goto out3;
}
}
......@@ -1008,14 +1004,19 @@ int jfs_symlink(struct inode *dip, struct dentry *dentry, const char *name)
} else
rc = txCommit(tid, 1, &iplist[0], 0);
out2:
out3:
txEnd(tid);
up(&JFS_IP(dip)->commit_sem);
up(&JFS_IP(ip)->commit_sem);
if (rc) {
ip->i_nlink = 0;
iput(ip);
}
out2:
free_UCSname(&dname);
out1:
IWRITE_UNLOCK(dip);
txEnd(tid);
out1:
jFYI(1, ("jfs_symlink: rc:%d\n", -rc));
return -rc;
}
......@@ -1054,19 +1055,6 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
old_ip = old_dentry->d_inode;
new_ip = new_dentry->d_inode;
if (old_dir == new_dir) {
if (new_ip)
IWRITE_LOCK_LIST(3, old_dir, old_ip, new_ip);
else
IWRITE_LOCK_LIST(2, old_dir, old_ip);
} else {
if (new_ip)
IWRITE_LOCK_LIST(4, old_dir, new_dir, old_ip,
new_ip);
else
IWRITE_LOCK_LIST(3, old_dir, new_dir, old_ip);
}
if ((rc = get_UCSname(&old_dname, old_dentry,
JFS_SBI(old_dir->i_sb)->nls_tab)))
goto out1;
......@@ -1112,14 +1100,21 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
rc = EMLINK;
goto out3;
}
}
} else if (new_ip)
IWRITE_LOCK(new_ip);
/*
* The real work starts here
*/
tid = txBegin(new_dir->i_sb, 0);
down(&JFS_IP(new_dir)->commit_sem);
down(&JFS_IP(old_ip)->commit_sem);
if (old_dir != new_dir)
down(&JFS_IP(old_dir)->commit_sem);
if (new_ip) {
down(&JFS_IP(new_ip)->commit_sem);
/*
* Change existing directory entry to new inode number
*/
......@@ -1247,14 +1242,24 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* Don't unlock new_ip if COMMIT_HOLDLOCK is set
*/
if (new_ip && test_cflag(COMMIT_Holdlock, new_ip))
if (new_ip && test_cflag(COMMIT_Holdlock, new_ip)) {
up(&JFS_IP(new_ip)->commit_sem);
new_ip = 0;
}
out4:
txEnd(tid);
up(&JFS_IP(new_dir)->commit_sem);
up(&JFS_IP(old_ip)->commit_sem);
if (old_dir != new_dir)
up(&JFS_IP(old_dir)->commit_sem);
if (new_ip)
up(&JFS_IP(new_ip)->commit_sem);
while (new_size && (rc == 0)) {
tid = txBegin(new_ip->i_sb, 0);
down(&JFS_IP(new_ip)->commit_sem);
new_size = xtTruncate_pmap(tid, new_ip, new_size);
if (new_size < 0) {
txAbort(tid, 1);
......@@ -1262,6 +1267,7 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
} else
rc = txCommit(tid, 1, &new_ip, COMMIT_SYNC);
txEnd(tid);
up(&JFS_IP(new_ip)->commit_sem);
}
if (new_ip && (new_ip->i_nlink == 0))
set_cflag(COMMIT_Nolink, new_ip);
......@@ -1270,12 +1276,8 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
out2:
free_UCSname(&old_dname);
out1:
IWRITE_UNLOCK(old_ip);
if (old_dir != new_dir)
IWRITE_UNLOCK(new_dir);
if (new_ip)
if (new_ip && !S_ISDIR(new_ip->i_mode))
IWRITE_UNLOCK(new_ip);
/*
* Truncating the directory index table is not guaranteed. It
* may need to be done iteratively
......@@ -1287,8 +1289,6 @@ int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
clear_cflag(COMMIT_Stale, old_dir);
}
IWRITE_UNLOCK(old_dir);
jFYI(1, ("jfs_rename: returning %d\n", rc));
return -rc;
}
......@@ -1315,8 +1315,6 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
if ((rc = get_UCSname(&dname, dentry, JFS_SBI(dir->i_sb)->nls_tab)))
goto out;
IWRITE_LOCK(dir);
ip = ialloc(dir, mode);
if (ip == NULL) {
rc = ENOSPC;
......@@ -1325,24 +1323,19 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
tid = txBegin(dir->i_sb, 0);
if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE))) {
ip->i_nlink = 0;
iput(ip);
txEnd(tid);
goto out1;
}
down(&JFS_IP(dir)->commit_sem);
down(&JFS_IP(ip)->commit_sem);
if ((rc = dtSearch(dir, &dname, &ino, &btstack, JFS_CREATE)))
goto out3;
tblk = tid_to_tblock(tid);
tblk->xflag |= COMMIT_CREATE;
tblk->ip = ip;
ino = ip->i_ino;
if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack))) {
ip->i_nlink = 0;
iput(ip);
txEnd(tid);
goto out1;
}
if ((rc = dtInsert(tid, dir, &dname, &ino, &btstack)))
goto out3;
init_special_inode(ip, ip->i_mode, rdev);
......@@ -1357,10 +1350,17 @@ int jfs_mknod(struct inode *dir, struct dentry *dentry, int mode, int rdev)
iplist[0] = dir;
iplist[1] = ip;
rc = txCommit(tid, 2, iplist, 0);
out3:
txEnd(tid);
up(&JFS_IP(ip)->commit_sem);
up(&JFS_IP(dir)->commit_sem);
if (rc) {
ip->i_nlink = 0;
iput(ip);
}
out1:
IWRITE_UNLOCK(dir);
free_UCSname(&dname);
out:
......@@ -1389,9 +1389,7 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry)
if ((rc =
get_UCSname(&key, dentry, JFS_SBI(dip->i_sb)->nls_tab)))
return ERR_PTR(-rc);
IREAD_LOCK(dip);
rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
IREAD_UNLOCK(dip);
free_UCSname(&key);
if (rc == ENOENT) {
d_add(dentry, NULL);
......
/*
* Copyright (c) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_dinode.h"
#include "jfs_imap.h"
#include "jfs_dmap.h"
#include "jfs_superblock.h"
#include "jfs_txnmgr.h"
#include "jfs_debug.h"
#define BITSPERPAGE (PSIZE << 3)
#define L2MEGABYTE 20
#define MEGABYTE (1 << L2MEGABYTE)
#define MEGABYTE32 (MEGABYTE << 5)
/* convert block number to bmap file page number */
#define BLKTODMAPN(b)\
(((b) >> 13) + ((b) >> 23) + ((b) >> 33) + 3 + 1)
/*
* jfs_extendfs()
*
* function: extend file system;
*
* |-------------------------------|----------|----------|
* file system space fsck inline log
* workspace space
*
* input:
* new LVSize: in LV blocks (required)
* new LogSize: in LV blocks (optional)
* new FSSize: in LV blocks (optional)
*
* new configuration:
* 1. set new LogSize as specified or default from new LVSize;
* 2. compute new FSCKSize from new LVSize;
* 3. set new FSSize as MIN(FSSize, LVSize-(LogSize+FSCKSize)) where
* assert(new FSSize >= old FSSize),
* i.e., file system must not be shrinked;
*/
int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
{
int rc = 0;
struct jfs_sb_info *sbi = JFS_SBI(sb);
struct inode *ipbmap = sbi->ipbmap;
struct inode *ipbmap2;
struct inode *ipimap = sbi->ipimap;
log_t *log = sbi->log;
bmap_t *bmp = sbi->bmap;
s64 newLogAddress, newFSCKAddress;
int newFSCKSize;
s64 newMapSize = 0, mapSize;
s64 XAddress, XSize, nblocks, xoff, xaddr, t64;
s64 oldLVSize;
s64 newFSSize;
s64 VolumeSize;
int newNpages = 0, nPages, newPage, xlen, t32;
int tid;
int log_formatted = 0;
struct inode *iplist[1];
struct jfs_superblock *j_sb, *j_sb2;
metapage_t *sbp, *sb2p;
uint old_agsize;
struct buffer_head *bh;
/* If the volume hasn't grown, get out now */
if (sbi->mntflag & JFS_INLINELOG)
oldLVSize = addressPXD(&sbi->logpxd) + lengthPXD(&sbi->logpxd);
else
oldLVSize = addressPXD(&sbi->fsckpxd) +
lengthPXD(&sbi->fsckpxd);
if (oldLVSize >= newLVSize) {
printk(KERN_WARNING
"jfs_extendfs: volume hasn't grown, returning\n");
goto out;
}
VolumeSize = sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits;
if (VolumeSize) {
if (newLVSize > VolumeSize) {
printk(KERN_WARNING "jfs_extendfs: invalid size\n");
rc = -EINVAL;
goto out;
}
} else {
/* check the device */
bh = sb_bread(sb, newLVSize - 1);
if (!bh) {
printk(KERN_WARNING "jfs_extendfs: invalid size\n");
rc = -EINVAL;
goto out;
}
bforget(bh);
}
/* Can't extend write-protected drive */
if (isReadOnly(ipbmap)) {
printk(KERN_WARNING "jfs_extendfs: read-only file system\n");
rc = -EROFS;
goto out;
}
/*
* reconfigure LV spaces
* ---------------------
*
* validate new size, or, if not specified, determine new size
*/
/*
* reconfigure inline log space:
*/
if ((sbi->mntflag & JFS_INLINELOG)) {
if (newLogSize == 0) {
/*
* no size specified: default to 1/256 of aggregate
* size; rounded up to a megabyte boundary;
*/
newLogSize = newLVSize >> 8;
t32 = (1 << (20 - sbi->l2bsize)) - 1;
newLogSize = (newLogSize + t32) & ~t32;
newLogSize =
min(newLogSize, MEGABYTE32 >> sbi->l2bsize);
} else {
/*
* convert the newLogSize to fs blocks.
*
* Since this is given in megabytes, it will always be
* an even number of pages.
*/
newLogSize = (newLogSize * MEGABYTE) >> sbi->l2bsize;
}
} else
newLogSize = 0;
newLogAddress = newLVSize - newLogSize;
/*
* reconfigure fsck work space:
*
* configure it to the end of the logical volume regardless of
* whether file system extends to the end of the aggregate;
* Need enough 4k pages to cover:
* - 1 bit per block in aggregate rounded up to BPERDMAP boundary
* - 1 extra page to handle control page and intermediate level pages
* - 50 extra pages for the chkdsk service log
*/
t64 = ((newLVSize - newLogSize + BPERDMAP - 1) >> L2BPERDMAP)
<< L2BPERDMAP;
t32 = ((t64 + (BITSPERPAGE - 1)) / BITSPERPAGE) + 1 + 50;
newFSCKSize = t32 << sbi->l2nbperpage;
newFSCKAddress = newLogAddress - newFSCKSize;
/*
* compute new file system space;
*/
newFSSize = newLVSize - newLogSize - newFSCKSize;
/* file system cannot be shrinked */
if (newFSSize < bmp->db_mapsize) {
rc = EINVAL;
goto out;
}
/*
* If we're expanding enough that the inline log does not overlap
* the old one, we can format the new log before we quiesce the
* filesystem.
*/
if ((sbi->mntflag & JFS_INLINELOG) && (newLogAddress > oldLVSize)) {
if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
goto out;
log_formatted = 1;
}
/*
* quiesce file system
*
* (prepare to move the inline log and to prevent map update)
*
* block any new transactions and wait for completion of
* all wip transactions and flush modified pages s.t.
* on-disk file system is in consistent state and
* log is not required for recovery.
*/
txQuiesce(sb);
if (sbi->mntflag & JFS_INLINELOG) {
/*
* deactivate old inline log
*/
lmLogShutdown(log);
/*
* mark on-disk super block for fs in transition;
*
* update on-disk superblock for the new space configuration
* of inline log space and fsck work space descriptors:
* N.B. FS descriptor is NOT updated;
*
* crash recovery:
* logredo(): if FM_EXTENDFS, return to fsck() for cleanup;
* fsck(): if FM_EXTENDFS, reformat inline log and fsck
* workspace from superblock inline log descriptor and fsck
* workspace descriptor;
*/
/* read in superblock */
if ((rc = readSuper(sb, &sbp)))
goto error_out;
j_sb = (struct jfs_superblock *) (sbp->data);
/* mark extendfs() in progress */
j_sb->s_state |= cpu_to_le32(FM_EXTENDFS);
j_sb->s_xsize = cpu_to_le64(newFSSize);
PXDaddress(&j_sb->s_xfsckpxd, newFSCKAddress);
PXDlength(&j_sb->s_xfsckpxd, newFSCKSize);
PXDaddress(&j_sb->s_xlogpxd, newLogAddress);
PXDlength(&j_sb->s_xlogpxd, newLogSize);
/* synchronously update superblock */
flush_metapage(sbp);
/*
* format new inline log synchronously;
*
* crash recovery: if log move in progress,
* reformat log and exit success;
*/
if (!log_formatted)
if ((rc = lmLogFormat(log, newLogAddress, newLogSize)))
goto error_out;
/*
* activate new log
*/
log->base = newLogAddress;
log->size = newLogSize >> (L2LOGPSIZE - sb->s_blocksize_bits);
if ((rc = lmLogInit(log)))
goto error_out;
}
/*
* extend block allocation map
* ---------------------------
*
* extendfs() for new extension, retry after crash recovery;
*
* note: both logredo() and fsck() rebuild map from
* the bitmap and configuration parameter from superblock
* (disregarding all other control information in the map);
*
* superblock:
* s_size: aggregate size in physical blocks;
*/
/*
* compute the new block allocation map configuration
*
* map dinode:
* di_size: map file size in byte;
* di_nblocks: number of blocks allocated for map file;
* di_mapsize: number of blocks in aggregate (covered by map);
* map control page:
* db_mapsize: number of blocks in aggregate (covered by map);
*/
newMapSize = newFSSize;
/* number of data pages of new bmap file:
* roundup new size to full dmap page boundary and
* add 1 extra dmap page for next extendfs()
*/
t64 = (newMapSize - 1) + BPERDMAP;
newNpages = BLKTODMAPN(t64) + 1;
/*
* extend map from current map (WITHOUT growing mapfile)
*
* map new extension with unmapped part of the last partial
* dmap page, if applicable, and extra page(s) allocated
* at end of bmap by mkfs() or previous extendfs();
*/
extendBmap:
/* compute number of blocks requested to extend */
mapSize = bmp->db_mapsize;
XAddress = mapSize; /* eXtension Address */
XSize = newMapSize - mapSize; /* eXtension Size */
old_agsize = bmp->db_agsize; /* We need to know if this changes */
/* compute number of blocks that can be extended by current mapfile */
t64 = dbMapFileSizeToMapSize(ipbmap);
if (mapSize > t64) {
printk(KERN_ERR
"jfs_extendfs: mapSize (0x%llx) > t64 (0x%llx)\n",
mapSize, t64);
rc = EIO;
goto error_out;
}
nblocks = min(t64 - mapSize, XSize);
/*
* update map pages for new extension:
*
* update/init dmap and bubble up the control hierarchy
* incrementally fold up dmaps into upper levels;
* update bmap control page;
*/
if ((rc = dbExtendFS(ipbmap, XAddress, nblocks)))
goto error_out;
/*
* the map now has extended to cover additional nblocks:
* dn_mapsize = oldMapsize + nblocks;
*/
/* ipbmap->i_mapsize += nblocks; */
XSize -= nblocks;
/*
* grow map file to cover remaining extension
* and/or one extra dmap page for next extendfs();
*
* allocate new map pages and its backing blocks, and
* update map file xtree
*/
/* compute number of data pages of current bmap file */
nPages = ipbmap->i_size >> L2PSIZE;
/* need to grow map file ? */
if (nPages == newNpages)
goto updateImap;
/*
* grow bmap file for the new map pages required:
*
* allocate growth at the start of newly extended region;
* bmap file only grows sequentially, i.e., both data pages
* and possibly xtree index pages may grow in append mode,
* s.t. logredo() can reconstruct pre-extension state
* by washing away bmap file of pages outside s_size boundary;
*/
/*
* journal map file growth as if a regular file growth:
* (note: bmap is created with di_mode = IFJOURNAL|IFREG);
*
* journaling of bmap file growth is not required since
* logredo() do/can not use log records of bmap file growth
* but it provides careful write semantics, pmap update, etc.;
*/
/* synchronous write of data pages: bmap data pages are
* cached in meta-data cache, and not written out
* by txCommit();
*/
filemap_fdatawait(ipbmap->i_mapping);
filemap_fdatawrite(ipbmap->i_mapping);
filemap_fdatawait(ipbmap->i_mapping);
diWriteSpecial(ipbmap, 0);
newPage = nPages; /* first new page number */
xoff = newPage << sbi->l2nbperpage;
xlen = (newNpages - nPages) << sbi->l2nbperpage;
xlen = min(xlen, (int) nblocks) & ~(sbi->nbperpage - 1);
xaddr = XAddress;
tid = txBegin(sb, COMMIT_FORCE);
if ((rc = xtAppend(tid, ipbmap, 0, xoff, nblocks, &xlen, &xaddr, 0))) {
txEnd(tid);
goto error_out;
}
/* update bmap file size */
ipbmap->i_size += xlen << sbi->l2bsize;
ipbmap->i_blocks += LBLK2PBLK(sb, xlen);
iplist[0] = ipbmap;
rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
txEnd(tid);
if (rc)
goto error_out;
/*
* map file has been grown now to cover extension to further out;
* di_size = new map file size;
*
* if huge extension, the previous extension based on previous
* map file size may not have been sufficient to cover whole extension
* (it could have been used up for new map pages),
* but the newly grown map file now covers lot bigger new free space
* available for further extension of map;
*/
/* any more blocks to extend ? */
if (XSize)
goto extendBmap;
/* finalize bmap */
dbFinalizeBmap(ipbmap);
/*
* update inode allocation map
* ---------------------------
*
* move iag lists from old to new iag;
* agstart field is not updated for logredo() to reconstruct
* iag lists if system crash occurs.
* (computation of ag number from agstart based on agsize
* will correctly identify the new ag);
*/
updateImap:
/* if new AG size the same as old AG size, done! */
if (bmp->db_agsize != old_agsize) {
if ((rc = diExtendFS(ipimap, ipbmap)))
goto error_out;
/* finalize imap */
if ((rc = diSync(ipimap)))
goto error_out;
}
/*
* finalize
* --------
*
* extension is committed when on-disk super block is
* updated with new descriptors: logredo will recover
* crash before it to pre-extension state;
*/
/* sync log to skip log replay of bmap file growth transaction; */
/* lmLogSync(log, 1); */
/*
* synchronous write bmap global control page;
* for crash before completion of write
* logredo() will recover to pre-extendfs state;
* for crash after completion of write,
* logredo() will recover post-extendfs state;
*/
if ((rc = dbSync(ipbmap)))
goto error_out;
/*
* copy primary bmap inode to secondary bmap inode
*/
ipbmap2 = diReadSpecial(sb, BMAP_I, 1);
if (ipbmap2 == NULL) {
printk(KERN_ERR "jfs_extendfs: diReadSpecial(bmap) failed\n");
goto error_out;
}
memcpy(&JFS_IP(ipbmap2)->i_xtroot, &JFS_IP(ipbmap)->i_xtroot, 288);
ipbmap2->i_size = ipbmap->i_size;
ipbmap2->i_blocks = ipbmap->i_blocks;
diWriteSpecial(ipbmap2, 1);
diFreeSpecial(ipbmap2);
/*
* update superblock
*/
if ((rc = readSuper(sb, &sbp)))
goto error_out;
j_sb = (struct jfs_superblock *) (sbp->data);
/* mark extendfs() completion */
j_sb->s_state &= cpu_to_le32(~FM_EXTENDFS);
j_sb->s_size = cpu_to_le64(bmp->db_mapsize) <<
le16_to_cpu(j_sb->s_l2bfactor);
j_sb->s_agsize = cpu_to_le32(bmp->db_agsize);
/* update inline log space descriptor */
if (sbi->mntflag & JFS_INLINELOG) {
PXDaddress(&(j_sb->s_logpxd), newLogAddress);
PXDlength(&(j_sb->s_logpxd), newLogSize);
}
/* record log's mount serial number */
j_sb->s_logserial = cpu_to_le32(log->serial);
/* update fsck work space descriptor */
PXDaddress(&(j_sb->s_fsckpxd), newFSCKAddress);
PXDlength(&(j_sb->s_fsckpxd), newFSCKSize);
j_sb->s_fscklog = 1;
/* sb->s_fsckloglen remains the same */
/* Update secondary superblock */
sb2p = read_metapage(sbi->direct_inode,
SUPER2_OFF >> sb->s_blocksize_bits, PSIZE, 1);
if (sb2p) {
j_sb2 = (struct jfs_superblock *) (sb2p->data);
memcpy(j_sb2, j_sb, sizeof (struct jfs_superblock));
flush_metapage(sb2p);
}
/* write primary superblock */
flush_metapage(sbp);
goto resume;
error_out:
updateSuper(sb, FM_DIRTY);
resume:
/*
* resume file system transactions
*/
txResume(sb);
out:
return rc;
}
......@@ -71,8 +71,9 @@ extern void jfs_delete_inode(struct inode *inode);
extern void jfs_write_inode(struct inode *inode, int wait);
extern struct dentry *jfs_get_parent(struct dentry *dentry);
extern int jfs_extendfs(struct super_block *, s64, int);
#ifdef PROC_FS_JFS /* see jfs_debug.h */
#ifdef PROC_FS_JFS /* see jfs_debug.h */
extern void jfs_proc_init(void);
extern void jfs_proc_clean(void);
#endif
......@@ -119,7 +120,7 @@ static int jfs_statfs(struct super_block *sb, struct statfs *buf)
*/
maxinodes = min((s64) atomic_read(&imap->im_numinos) +
((sbi->bmap->db_nfree >> imap->im_l2nbperiext)
<< L2INOSPEREXT), (s64)0xffffffffLL);
<< L2INOSPEREXT), (s64) 0xffffffffLL);
buf->f_files = maxinodes;
buf->f_ffree = maxinodes - (atomic_read(&imap->im_numinos) -
atomic_read(&imap->im_numfree));
......@@ -156,20 +157,23 @@ static void jfs_put_super(struct super_block *sb)
kfree(sbi);
}
static int parse_options (char * options, struct jfs_sb_info *sbi)
static int parse_options(char *options, struct super_block *sb, s64 *newLVSize)
{
void *nls_map = NULL;
char * this_char;
char * value;
char *this_char;
char *value;
struct jfs_sb_info *sbi = JFS_SBI(sb);
*newLVSize = 0;
if (!options)
return 1;
while ((this_char = strsep (&options, ",")) != NULL) {
while ((this_char = strsep(&options, ",")) != NULL) {
if (!*this_char)
continue;
if ((value = strchr (this_char, '=')) != NULL)
if ((value = strchr(this_char, '=')) != NULL)
*value++ = 0;
if (!strcmp (this_char, "iocharset")) {
if (!strcmp(this_char, "iocharset")) {
if (!value || !*value)
goto needs_arg;
if (nls_map) /* specified iocharset twice! */
......@@ -179,14 +183,25 @@ static int parse_options (char * options, struct jfs_sb_info *sbi)
printk(KERN_ERR "JFS: charset not found\n");
goto cleanup;
}
/* Silently ignore the quota options */
} else if (!strcmp (this_char, "grpquota")
|| !strcmp (this_char, "noquota")
|| !strcmp (this_char, "quota")
|| !strcmp (this_char, "usrquota"))
} else if (!strcmp(this_char, "resize")) {
if (!value || !*value) {
*newLVSize = sb->s_bdev->bd_inode->i_size >>
sb->s_blocksize_bits;
if (*newLVSize == 0)
printk(KERN_ERR
"JFS: Cannot determine volume size\n");
} else
*newLVSize = simple_strtoull(value, &value, 0);
/* Silently ignore the quota options */
} else if (!strcmp(this_char, "grpquota")
|| !strcmp(this_char, "noquota")
|| !strcmp(this_char, "quota")
|| !strcmp(this_char, "usrquota"))
/* Don't do anything ;-) */ ;
else {
printk ("jfs: Unrecognized mount option %s\n", this_char);
printk("jfs: Unrecognized mount option %s\n",
this_char);
goto cleanup;
}
}
......@@ -208,10 +223,22 @@ static int parse_options (char * options, struct jfs_sb_info *sbi)
int jfs_remount(struct super_block *sb, int *flags, char *data)
{
struct jfs_sb_info *sbi = JFS_SBI(sb);
s64 newLVSize = 0;
int rc = 0;
if (!parse_options(data, sbi)) {
if (!parse_options(data, sb, &newLVSize)) {
return -EINVAL;
}
if (newLVSize) {
if (sb->s_flags & MS_RDONLY) {
printk(KERN_ERR
"JFS: resize requires volume to be mounted read-write\n");
return -EROFS;
}
rc = jfs_extendfs(sb, newLVSize, 0);
if (rc)
return rc;
}
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
/*
......@@ -232,20 +259,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
struct jfs_sb_info *sbi;
struct inode *inode;
int rc;
s64 newLVSize = 0;
jFYI(1, ("In jfs_read_super: s_flags=0x%lx\n", sb->s_flags));
sbi = kmalloc(sizeof(struct jfs_sb_info), GFP_KERNEL);
sbi = kmalloc(sizeof (struct jfs_sb_info), GFP_KERNEL);
if (!sbi)
return -ENOSPC;
memset(sbi, 0, sizeof(struct jfs_sb_info));
memset(sbi, 0, sizeof (struct jfs_sb_info));
sb->u.generic_sbp = sbi;
if (!parse_options((char *)data, sbi)) {
if (!parse_options((char *) data, sb, &newLVSize)) {
kfree(sbi);
return -EINVAL;
}
if (newLVSize) {
printk(KERN_ERR "resize option for remount only\n");
return -EINVAL;
}
/*
* Initialize blocksize to 4K.
*/
......@@ -276,8 +309,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
if (rc) {
if (!silent) {
jERROR(1,
("jfs_mount failed w/return code = %d\n",
rc));
("jfs_mount failed w/return code = %d\n", rc));
}
goto out_mount_failed;
}
......@@ -314,7 +346,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
* Page cache is indexed by long.
* I would use MAX_LFS_FILESIZE, but it's only half as big
*/
sb->s_maxbytes = min(((u64)PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
#endif
return 0;
......@@ -379,15 +411,17 @@ extern int txInit(void);
extern void txExit(void);
extern void metapage_exit(void);
static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
{
struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
INIT_LIST_HEAD(&jfs_ip->anon_inode_list);
INIT_LIST_HEAD(&jfs_ip->mp_list);
RDWRLOCK_INIT(&jfs_ip->rdwrlock);
init_rwsem(&jfs_ip->rdwrlock);
init_MUTEX(&jfs_ip->commit_sem);
jfs_ip->atlhead = 0;
inode_init_once(&jfs_ip->vfs_inode);
}
}
......@@ -397,9 +431,8 @@ static int __init init_jfs_fs(void)
int rc;
jfs_inode_cachep =
kmem_cache_create("jfs_ip",
sizeof(struct jfs_inode_info),
0, 0, init_once, NULL);
kmem_cache_create("jfs_ip", sizeof(struct jfs_inode_info), 0, 0,
init_once, NULL);
if (jfs_inode_cachep == NULL)
return -ENOMEM;
......@@ -425,37 +458,32 @@ static int __init init_jfs_fs(void)
* I/O completion thread (endio)
*/
jfsIOthread = kernel_thread(jfsIOWait, 0,
CLONE_FS | CLONE_FILES |
CLONE_SIGHAND);
CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
if (jfsIOthread < 0) {
jERROR(1,
("init_jfs_fs: fork failed w/rc = %d\n",
jfsIOthread));
("init_jfs_fs: fork failed w/rc = %d\n", jfsIOthread));
goto end_txmngr;
}
wait_for_completion(&jfsIOwait); /* Wait until IO thread starts */
wait_for_completion(&jfsIOwait); /* Wait until thread starts */
jfsCommitThread = kernel_thread(jfs_lazycommit, 0,
CLONE_FS | CLONE_FILES |
CLONE_SIGHAND);
CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
if (jfsCommitThread < 0) {
jERROR(1,
("init_jfs_fs: fork failed w/rc = %d\n",
jfsCommitThread));
goto kill_iotask;
}
wait_for_completion(&jfsIOwait); /* Wait until IO thread starts */
wait_for_completion(&jfsIOwait); /* Wait until thread starts */
jfsSyncThread = kernel_thread(jfs_sync, 0,
CLONE_FS | CLONE_FILES |
CLONE_SIGHAND);
CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
if (jfsSyncThread < 0) {
jERROR(1,
("init_jfs_fs: fork failed w/rc = %d\n",
jfsSyncThread));
("init_jfs_fs: fork failed w/rc = %d\n", jfsSyncThread));
goto kill_committask;
}
wait_for_completion(&jfsIOwait); /* Wait until IO thread starts */
wait_for_completion(&jfsIOwait); /* Wait until thread starts */
#ifdef PROC_FS_JFS
jfs_proc_init();
......@@ -463,15 +491,14 @@ static int __init init_jfs_fs(void)
return register_filesystem(&jfs_fs_type);
kill_committask:
jfs_stop_threads = 1;
wake_up(&jfs_commit_thread_wait);
wait_for_completion(&jfsIOwait); /* Wait until Commit thread exits */
wait_for_completion(&jfsIOwait); /* Wait for thread exit */
kill_iotask:
jfs_stop_threads = 1;
wake_up(&jfs_IO_thread_wait);
wait_for_completion(&jfsIOwait); /* Wait until IO thread exits */
wait_for_completion(&jfsIOwait); /* Wait for thread exit */
end_txmngr:
txExit();
free_metapage:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment