Commit 985dcc74 authored by Linus Torvalds's avatar Linus Torvalds

Merge bk://jfs.bkbits.net/linux-2.5

into penguin.transmeta.com:/home/penguin/torvalds/repositories/kernel/linux
parents 06acfb97 28db47b2
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/fs.h> #include <linux/fs.h>
#include "jfs_incore.h" #include "jfs_incore.h"
#include "jfs_dmap.h"
#include "jfs_txnmgr.h" #include "jfs_txnmgr.h"
#include "jfs_xattr.h" #include "jfs_xattr.h"
#include "jfs_debug.h" #include "jfs_debug.h"
...@@ -94,6 +95,47 @@ static void jfs_truncate(struct inode *ip) ...@@ -94,6 +95,47 @@ static void jfs_truncate(struct inode *ip)
IWRITE_UNLOCK(ip); IWRITE_UNLOCK(ip);
} }
static int jfs_open(struct inode *inode, struct file *file)
{
int rc;
if ((rc = generic_file_open(inode, file)))
return rc;
/*
* We attempt to allow only one "active" file open per aggregate
* group. Otherwise, appending to files in parallel can cause
* fragmentation within the files.
*
* If the file is empty, it was probably just created and going
* to be written to. If it has a size, we'll hold off until the
* file is actually grown.
*/
if (S_ISREG(inode->i_mode) && file->f_mode & FMODE_WRITE &&
(inode->i_size == 0)) {
struct jfs_inode_info *ji = JFS_IP(inode);
if (ji->active_ag == -1) {
ji->active_ag = ji->agno;
atomic_inc(
&JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
}
}
return 0;
}
static int jfs_release(struct inode *inode, struct file *file)
{
struct jfs_inode_info *ji = JFS_IP(inode);
if (ji->active_ag != -1) {
struct bmap *bmap = JFS_SBI(inode->i_sb)->bmap;
atomic_dec(&bmap->db_active[ji->active_ag]);
ji->active_ag = -1;
}
return 0;
}
struct inode_operations jfs_file_inode_operations = { struct inode_operations jfs_file_inode_operations = {
.truncate = jfs_truncate, .truncate = jfs_truncate,
.setxattr = jfs_setxattr, .setxattr = jfs_setxattr,
...@@ -103,7 +145,7 @@ struct inode_operations jfs_file_inode_operations = { ...@@ -103,7 +145,7 @@ struct inode_operations jfs_file_inode_operations = {
}; };
struct file_operations jfs_file_operations = { struct file_operations jfs_file_operations = {
.open = generic_file_open, .open = jfs_open,
.llseek = generic_file_llseek, .llseek = generic_file_llseek,
.write = generic_file_write, .write = generic_file_write,
.read = generic_file_read, .read = generic_file_read,
...@@ -112,4 +154,5 @@ struct file_operations jfs_file_operations = { ...@@ -112,4 +154,5 @@ struct file_operations jfs_file_operations = {
.writev = generic_file_writev, .writev = generic_file_writev,
.sendfile = generic_file_sendfile, .sendfile = generic_file_sendfile,
.fsync = jfs_fsync, .fsync = jfs_fsync,
.release = jfs_release,
}; };
...@@ -241,6 +241,7 @@ int dbMount(struct inode *ipbmap) ...@@ -241,6 +241,7 @@ int dbMount(struct inode *ipbmap)
bmp->db_ipbmap = ipbmap; bmp->db_ipbmap = ipbmap;
JFS_SBI(ipbmap->i_sb)->bmap = bmp; JFS_SBI(ipbmap->i_sb)->bmap = bmp;
memset(bmp->db_active, 0, sizeof(bmp->db_active));
DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap); DBINITMAP(bmp->db_mapsize, ipbmap, &bmp->db_DBmap);
/* /*
...@@ -271,6 +272,7 @@ int dbMount(struct inode *ipbmap) ...@@ -271,6 +272,7 @@ int dbMount(struct inode *ipbmap)
int dbUnmount(struct inode *ipbmap, int mounterror) int dbUnmount(struct inode *ipbmap, int mounterror)
{ {
struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
int i;
if (!(mounterror || isReadOnly(ipbmap))) if (!(mounterror || isReadOnly(ipbmap)))
dbSync(ipbmap); dbSync(ipbmap);
...@@ -280,6 +282,14 @@ int dbUnmount(struct inode *ipbmap, int mounterror) ...@@ -280,6 +282,14 @@ int dbUnmount(struct inode *ipbmap, int mounterror)
*/ */
truncate_inode_pages(ipbmap->i_mapping, 0); truncate_inode_pages(ipbmap->i_mapping, 0);
/*
* Sanity Check
*/
for (i = 0; i < bmp->db_numag; i++)
if (atomic_read(&bmp->db_active[i]))
printk(KERN_ERR "dbUnmount: db_active[%d] = %d\n",
i, atomic_read(&bmp->db_active[i]));
/* free the memory for the in-memory bmap. */ /* free the memory for the in-memory bmap. */
kfree(bmp); kfree(bmp);
...@@ -598,102 +608,77 @@ dbUpdatePMap(struct inode *ipbmap, ...@@ -598,102 +608,77 @@ dbUpdatePMap(struct inode *ipbmap,
* *
* FUNCTION: find the preferred allocation group for new allocations. * FUNCTION: find the preferred allocation group for new allocations.
* *
* we try to keep the trailing (rightmost) allocation groups * Within the allocation groups, we maintain a preferred
* free for large allocations. we try to do this by targeting
* new inode allocations towards the leftmost or 'active'
* allocation groups while keeping the rightmost or 'inactive'
* allocation groups free. once the active allocation groups
* have dropped to a certain percentage of free space, we add
* the leftmost inactive allocation group to the active set.
*
* within the active allocation groups, we maintain a preferred
* allocation group which consists of a group with at least * allocation group which consists of a group with at least
* average free space over the active set. it is the preferred * average free space. It is the preferred group that we target
* group that we target new inode allocation towards. the * new inode allocation towards. The tie-in between inode
* tie-in between inode allocation and block allocation occurs * allocation and block allocation occurs as we allocate the
* as we allocate the first (data) block of an inode and specify * first (data) block of an inode and specify the inode (block)
* the inode (block) as the allocation hint for this block. * as the allocation hint for this block.
*
* We try to avoid having more than one open file growing in
* an allocation group, as this will lead to fragmentation.
* This differs from the old OS/2 method of trying to keep
* empty ags around for large allocations.
* *
* PARAMETERS: * PARAMETERS:
* ipbmap - pointer to in-core inode for the block map. * ipbmap - pointer to in-core inode for the block map.
* *
* RETURN VALUES: * RETURN VALUES:
* the preferred allocation group number. * the preferred allocation group number.
*
* note: only called by dbAlloc();
*/ */
int dbNextAG(struct inode *ipbmap) int dbNextAG(struct inode *ipbmap)
{ {
s64 avgfree, inactfree, actfree, rem; s64 avgfree;
int actags, inactags, l2agsize; int agpref;
s64 hwm = 0;
int i;
int next_best = -1;
struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap; struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
BMAP_LOCK(bmp); BMAP_LOCK(bmp);
/* determine the number of active allocation groups (i.e. /* determine the average number of free blocks within the ags. */
* the number of allocation groups up to and including avgfree = (u32)bmp->db_nfree / bmp->db_numag;
* the rightmost allocation group with blocks allocated
* in it.
*/
actags = bmp->db_maxag + 1;
assert(actags <= bmp->db_numag);
/* get the number of inactive allocation groups (i.e. the /*
* number of allocation group following the rightmost group * if the current preferred ag does not have an active allocator
* with allocation in it. * and has at least average freespace, return it
*/ */
inactags = bmp->db_numag - actags; agpref = bmp->db_agpref;
if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
(bmp->db_agfree[agpref] >= avgfree))
goto found;
/* determine how many blocks are in the inactive allocation /* From the last preferred ag, find the next one with at least
* groups. in doing this, we must account for the fact that * average free space.
* the rightmost group might be a partial group (i.e. file
* system size is not a multiple of the group size).
*/ */
l2agsize = bmp->db_agl2size; for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
rem = bmp->db_mapsize & (bmp->db_agsize - 1); if (agpref == bmp->db_numag)
inactfree = (inactags agpref = 0;
&& rem) ? ((inactags - 1) << l2agsize) +
rem : inactags << l2agsize;
/* now determine how many free blocks are in the active if (atomic_read(&bmp->db_active[agpref]))
* allocation groups plus the average number of free blocks /* open file is currently growing in this ag */
* within the active ags. continue;
*/ if (bmp->db_agfree[agpref] >= avgfree)
actfree = bmp->db_nfree - inactfree; goto found;
avgfree = (u32) actfree / (u32) actags; else if (bmp->db_agfree[agpref] > hwm) {
hwm = bmp->db_agfree[agpref];
/* check if not all of the allocation groups are active. next_best = agpref;
*/
if (actags < bmp->db_numag) {
/* not all of the allocation groups are active. determine
* if we should extend the active set by 1 (i.e. add the
* group following the current active set). we do so if
* the number of free blocks within the active set is less
* than the allocation group set and average free within
* the active set is less than 60%. we activate a new group
* by setting the allocation group preference to the new
* group.
*/
if (actfree < bmp->db_agsize &&
((avgfree * 100) >> l2agsize) < 60)
bmp->db_agpref = actags;
} else {
/* all allocation groups are in the active set. check if
* the preferred allocation group has average free space.
* if not, re-establish the preferred group as the leftmost
* group with average free space.
*/
if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
for (bmp->db_agpref = 0; bmp->db_agpref < actags;
bmp->db_agpref++) {
if (bmp->db_agfree[bmp->db_agpref] <=
avgfree)
break;
}
assert(bmp->db_agpref < bmp->db_numag);
} }
} }
/*
* If no inactive ag was found with average freespace, use the
* next best
*/
if (next_best != -1)
agpref = next_best;
/* else agpref should be back to its original value */
found:
bmp->db_agpref = agpref;
BMAP_UNLOCK(bmp); BMAP_UNLOCK(bmp);
/* return the preferred group. /* return the preferred group.
...@@ -701,7 +686,6 @@ int dbNextAG(struct inode *ipbmap) ...@@ -701,7 +686,6 @@ int dbNextAG(struct inode *ipbmap)
return (bmp->db_agpref); return (bmp->db_agpref);
} }
/* /*
* NAME: dbAlloc() * NAME: dbAlloc()
* *
...@@ -750,6 +734,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -750,6 +734,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
struct dmap *dp; struct dmap *dp;
int l2nb; int l2nb;
s64 mapSize; s64 mapSize;
int writers;
/* assert that nblocks is valid */ /* assert that nblocks is valid */
assert(nblocks > 0); assert(nblocks > 0);
...@@ -774,11 +759,10 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -774,11 +759,10 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
/* the hint should be within the map */ /* the hint should be within the map */
assert(hint < mapSize); assert(hint < mapSize);
/* if no hint was specified or the number of blocks to be /* if the number of blocks to be allocated is greater than the
* allocated is greater than the allocation group size, try * allocation group size, try to allocate anywhere.
* to allocate anywhere.
*/ */
if (hint == 0 || l2nb > bmp->db_agl2size) { if (l2nb > bmp->db_agl2size) {
IWRITE_LOCK(ipbmap); IWRITE_LOCK(ipbmap);
rc = dbAllocAny(bmp, nblocks, l2nb, results); rc = dbAllocAny(bmp, nblocks, l2nb, results);
...@@ -790,39 +774,34 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -790,39 +774,34 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
goto write_unlock; goto write_unlock;
} }
/*
* If no hint, let dbNextAG recommend an allocation group
*/
if (hint == 0)
goto pref_ag;
/* we would like to allocate close to the hint. adjust the /* we would like to allocate close to the hint. adjust the
* hint to the block following the hint since the allocators * hint to the block following the hint since the allocators
* will start looking for free space starting at this point. * will start looking for free space starting at this point.
* if the hint was the last block of the file system, try to
* allocate in the same allocation group as the hint.
*/ */
blkno = hint + 1; blkno = hint + 1;
if (blkno >= bmp->db_mapsize) {
blkno--; if (blkno >= bmp->db_mapsize)
goto tryag; goto pref_ag;
}
agno = blkno >> bmp->db_agl2size;
/* check if blkno crosses over into a new allocation group. /* check if blkno crosses over into a new allocation group.
* if so, check if we should allow allocations within this * if so, check if we should allow allocations within this
* allocation group. we try to keep the trailing (rightmost) * allocation group.
* allocation groups of the file system free for large
* allocations and may want to prevent this allocation from
* spilling over into this space.
*/
if ((blkno & (bmp->db_agsize - 1)) == 0) {
/* check if the AG is beyond the rightmost AG with
* allocations in it. if so, call dbNextAG() to
* determine if the allocation should be allowed
* to proceed within this AG or should be targeted
* to another AG.
*/ */
agno = blkno >> bmp->db_agl2size; if ((blkno & (bmp->db_agsize - 1)) == 0)
if (agno > bmp->db_maxag) { /* check if the AG is currenly being written to.
agno = dbNextAG(ipbmap); * if so, call dbNextAG() to find a non-busy
blkno = (s64) agno << bmp->db_agl2size; * AG with sufficient free space.
goto tryag; */
} if (atomic_read(&bmp->db_active[agno]))
} goto pref_ag;
/* check if the allocation request size can be satisfied from a /* check if the allocation request size can be satisfied from a
* single dmap. if so, try to allocate from the dmap containing * single dmap. if so, try to allocate from the dmap containing
...@@ -844,9 +823,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -844,9 +823,8 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
/* first, try to satisfy the allocation request with the /* first, try to satisfy the allocation request with the
* blocks beginning at the hint. * blocks beginning at the hint.
*/ */
if ((rc = if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
dbAllocNext(bmp, dp, blkno, != ENOSPC) {
(int) nblocks)) != ENOSPC) {
if (rc == 0) { if (rc == 0) {
*results = blkno; *results = blkno;
DBALLOC(bmp->db_DBmap, bmp->db_mapsize, DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
...@@ -858,12 +836,23 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -858,12 +836,23 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
goto read_unlock; goto read_unlock;
} }
writers = atomic_read(&bmp->db_active[agno]);
if ((writers > 1) ||
((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
/*
* Someone else is writing in this allocation
* group. To avoid fragmenting, try another ag
*/
release_metapage(mp);
IREAD_UNLOCK(ipbmap);
goto pref_ag;
}
/* next, try to satisfy the allocation request with blocks /* next, try to satisfy the allocation request with blocks
* near the hint. * near the hint.
*/ */
if ((rc = if ((rc =
dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
results))
!= ENOSPC) { != ENOSPC) {
if (rc == 0) { if (rc == 0) {
DBALLOC(bmp->db_DBmap, bmp->db_mapsize, DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
...@@ -876,10 +865,9 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -876,10 +865,9 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
} }
/* try to satisfy the allocation request with blocks within /* try to satisfy the allocation request with blocks within
* the same allocation group as the hint. * the same dmap as the hint.
*/ */
if ((rc = if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
!= ENOSPC) { != ENOSPC) {
if (rc == 0) { if (rc == 0) {
DBALLOC(bmp->db_DBmap, bmp->db_mapsize, DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
...@@ -895,14 +883,30 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results) ...@@ -895,14 +883,30 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
IREAD_UNLOCK(ipbmap); IREAD_UNLOCK(ipbmap);
} }
tryag: /* try to satisfy the allocation request with blocks within
* the same allocation group as the hint.
*/
IWRITE_LOCK(ipbmap);
if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results))
!= ENOSPC) {
if (rc == 0)
DBALLOC(bmp->db_DBmap, bmp->db_mapsize,
*results, nblocks);
goto write_unlock;
}
IWRITE_UNLOCK(ipbmap);
pref_ag:
/*
* Let dbNextAG recommend a preferred allocation group
*/
agno = dbNextAG(ipbmap);
IWRITE_LOCK(ipbmap); IWRITE_LOCK(ipbmap);
/* determine the allocation group number of the hint and try to /* Try to allocate within this allocation group. if that fails, try to
* allocate within this allocation group. if that fails, try to
* allocate anywhere in the map. * allocate anywhere in the map.
*/ */
agno = blkno >> bmp->db_agl2size;
if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == ENOSPC) if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == ENOSPC)
rc = dbAllocAny(bmp, nblocks, l2nb, results); rc = dbAllocAny(bmp, nblocks, l2nb, results);
if (rc == 0) { if (rc == 0) {
...@@ -2314,11 +2318,9 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno, ...@@ -2314,11 +2318,9 @@ static void dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* if so, establish the new maximum allocation group number by * if so, establish the new maximum allocation group number by
* searching left for the first allocation group with allocation. * searching left for the first allocation group with allocation.
*/ */
if ((bmp->db_agfree[agno] == bmp->db_agsize if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
&& agno == bmp->db_maxag) || (agno == bmp->db_numag - 1 (agno == bmp->db_numag - 1 &&
&& bmp->db_agfree[agno] == bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
(bmp-> db_mapsize &
(BPERDMAP - 1)))) {
while (bmp->db_maxag > 0) { while (bmp->db_maxag > 0) {
bmp->db_maxag -= 1; bmp->db_maxag -= 1;
if (bmp->db_agfree[bmp->db_maxag] != if (bmp->db_agfree[bmp->db_maxag] !=
......
...@@ -227,6 +227,7 @@ struct bmap { ...@@ -227,6 +227,7 @@ struct bmap {
struct dbmap db_bmap; /* on-disk aggregate map descriptor */ struct dbmap db_bmap; /* on-disk aggregate map descriptor */
struct inode *db_ipbmap; /* ptr to aggregate map incore inode */ struct inode *db_ipbmap; /* ptr to aggregate map incore inode */
struct semaphore db_bmaplock; /* aggregate map lock */ struct semaphore db_bmaplock; /* aggregate map lock */
atomic_t db_active[MAXAG]; /* count of active, open files in AG */
u32 *db_DBmap; u32 *db_DBmap;
}; };
......
...@@ -514,9 +514,12 @@ int extFill(struct inode *ip, xad_t * xp) ...@@ -514,9 +514,12 @@ int extFill(struct inode *ip, xad_t * xp)
static int static int
extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
{ {
struct jfs_inode_info *ji = JFS_IP(ip);
struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
s64 nb, nblks, daddr, max; s64 nb, nblks, daddr, max;
int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage; int rc, nbperpage = sbi->nbperpage;
struct bmap *mp = JFS_SBI(ip->i_sb)->bmap; struct bmap *bmp = sbi->bmap;
int ag;
/* get the number of blocks to initially attempt to allocate. /* get the number of blocks to initially attempt to allocate.
* we'll first try the number of blocks requested unless this * we'll first try the number of blocks requested unless this
...@@ -524,7 +527,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) ...@@ -524,7 +527,7 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
* blocks in the map. in that case, we'll start off with the * blocks in the map. in that case, we'll start off with the
* maximum free. * maximum free.
*/ */
max = (s64) 1 << mp->db_maxfreebud; max = (s64) 1 << bmp->db_maxfreebud;
if (*nblocks >= max && *nblocks > nbperpage) if (*nblocks >= max && *nblocks > nbperpage)
nb = nblks = (max > nbperpage) ? max : nbperpage; nb = nblks = (max > nbperpage) ? max : nbperpage;
else else
...@@ -549,6 +552,18 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno) ...@@ -549,6 +552,18 @@ extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
*nblocks = nb; *nblocks = nb;
*blkno = daddr; *blkno = daddr;
if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
ag = BLKTOAG(daddr, sbi);
if (ji->active_ag == -1) {
atomic_inc(&bmp->db_active[ag]);
ji->active_ag = ag;
} else if (ji->active_ag != ag) {
atomic_dec(&bmp->db_active[ji->active_ag]);
atomic_inc(&bmp->db_active[ag]);
ji->active_ag = ag;
}
}
return (0); return (0);
} }
......
...@@ -429,6 +429,7 @@ int diRead(struct inode *ip) ...@@ -429,6 +429,7 @@ int diRead(struct inode *ip)
/* set the ag for the inode */ /* set the ag for the inode */
JFS_IP(ip)->agno = BLKTOAG(agstart, sbi); JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
JFS_IP(ip)->active_ag = -1;
return (rc); return (rc);
} }
...@@ -1358,6 +1359,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp) ...@@ -1358,6 +1359,7 @@ diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino); DBG_DIALLOC(JFS_IP(ipimap)->i_imap, ip->i_ino);
jfs_ip->ixpxd = iagp->inoext[extno]; jfs_ip->ixpxd = iagp->inoext[extno];
jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi); jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
jfs_ip->active_ag = -1;
} }
...@@ -1413,6 +1415,21 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) ...@@ -1413,6 +1415,21 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
* moving backward on the disk.) compute the hint within the * moving backward on the disk.) compute the hint within the
* file system and the iag. * file system and the iag.
*/ */
/* get the ag number of this iag */
agno = JFS_IP(pip)->agno;
if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
/*
* There is an open file actively growing. We want to
* allocate new inodes from a different ag to avoid
* fragmentation problems.
*/
agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
AG_LOCK(imap, agno);
goto tryag;
}
inum = pip->i_ino + 1; inum = pip->i_ino + 1;
ino = inum & (INOSPERIAG - 1); ino = inum & (INOSPERIAG - 1);
...@@ -1420,9 +1437,6 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip) ...@@ -1420,9 +1437,6 @@ int diAlloc(struct inode *pip, boolean_t dir, struct inode *ip)
if (ino == 0) if (ino == 0)
inum = pip->i_ino; inum = pip->i_ino;
/* get the ag number of this iag */
agno = JFS_IP(pip)->agno;
/* lock the AG inode map information */ /* lock the AG inode map information */
AG_LOCK(imap, agno); AG_LOCK(imap, agno);
......
...@@ -49,7 +49,7 @@ struct jfs_inode_info { ...@@ -49,7 +49,7 @@ struct jfs_inode_info {
long cflag; /* commit flags */ long cflag; /* commit flags */
u16 bxflag; /* xflag of pseudo buffer? */ u16 bxflag; /* xflag of pseudo buffer? */
unchar agno; /* ag number */ unchar agno; /* ag number */
unchar pad; /* pad */ signed char active_ag; /* ag currently allocating from */
lid_t blid; /* lid of pseudo buffer? */ lid_t blid; /* lid of pseudo buffer? */
lid_t atlhead; /* anonymous tlock list head */ lid_t atlhead; /* anonymous tlock list head */
lid_t atltail; /* anonymous tlock list tail */ lid_t atltail; /* anonymous tlock list tail */
......
...@@ -66,6 +66,7 @@ ...@@ -66,6 +66,7 @@
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/buffer_head.h> /* for sync_blockdev() */ #include <linux/buffer_head.h> /* for sync_blockdev() */
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/suspend.h>
#include "jfs_incore.h" #include "jfs_incore.h"
#include "jfs_filsys.h" #include "jfs_filsys.h"
#include "jfs_metapage.h" #include "jfs_metapage.h"
...@@ -2146,12 +2147,17 @@ int jfsIOWait(void *arg) ...@@ -2146,12 +2147,17 @@ int jfsIOWait(void *arg)
lbmStartIO(bp); lbmStartIO(bp);
spin_lock_irq(&log_redrive_lock); spin_lock_irq(&log_redrive_lock);
} }
if (current->flags & PF_FREEZE) {
spin_unlock_irq(&log_redrive_lock);
refrigerator(PF_IOTHREAD);
} else {
add_wait_queue(&jfs_IO_thread_wait, &wq); add_wait_queue(&jfs_IO_thread_wait, &wq);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_irq(&log_redrive_lock); spin_unlock_irq(&log_redrive_lock);
schedule(); schedule();
current->state = TASK_RUNNING; current->state = TASK_RUNNING;
remove_wait_queue(&jfs_IO_thread_wait, &wq); remove_wait_queue(&jfs_IO_thread_wait, &wq);
}
} while (!jfs_stop_threads); } while (!jfs_stop_threads);
jFYI(1,("jfsIOWait being killed!\n")); jFYI(1,("jfsIOWait being killed!\n"));
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <linux/suspend.h>
#include "jfs_incore.h" #include "jfs_incore.h"
#include "jfs_filsys.h" #include "jfs_filsys.h"
#include "jfs_metapage.h" #include "jfs_metapage.h"
...@@ -2789,8 +2790,6 @@ int jfs_lazycommit(void *arg) ...@@ -2789,8 +2790,6 @@ int jfs_lazycommit(void *arg)
complete(&jfsIOwait); complete(&jfsIOwait);
do { do {
DECLARE_WAITQUEUE(wq, current);
LAZY_LOCK(flags); LAZY_LOCK(flags);
restart: restart:
WorkDone = 0; WorkDone = 0;
...@@ -2825,12 +2824,19 @@ int jfs_lazycommit(void *arg) ...@@ -2825,12 +2824,19 @@ int jfs_lazycommit(void *arg)
if (WorkDone) if (WorkDone)
goto restart; goto restart;
if (current->flags & PF_FREEZE) {
LAZY_UNLOCK(flags);
refrigerator(PF_IOTHREAD);
} else {
DECLARE_WAITQUEUE(wq, current);
add_wait_queue(&jfs_commit_thread_wait, &wq); add_wait_queue(&jfs_commit_thread_wait, &wq);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
LAZY_UNLOCK(flags); LAZY_UNLOCK(flags);
schedule(); schedule();
current->state = TASK_RUNNING; current->state = TASK_RUNNING;
remove_wait_queue(&jfs_commit_thread_wait, &wq); remove_wait_queue(&jfs_commit_thread_wait, &wq);
}
} while (!jfs_stop_threads); } while (!jfs_stop_threads);
if (TxAnchor.unlock_queue) if (TxAnchor.unlock_queue)
...@@ -2981,7 +2987,6 @@ int jfs_sync(void *arg) ...@@ -2981,7 +2987,6 @@ int jfs_sync(void *arg)
complete(&jfsIOwait); complete(&jfsIOwait);
do { do {
DECLARE_WAITQUEUE(wq, current);
/* /*
* write each inode on the anonymous inode list * write each inode on the anonymous inode list
*/ */
...@@ -3030,12 +3035,20 @@ int jfs_sync(void *arg) ...@@ -3030,12 +3035,20 @@ int jfs_sync(void *arg)
} }
/* Add anon_list2 back to anon_list */ /* Add anon_list2 back to anon_list */
list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
if (current->flags & PF_FREEZE) {
TXN_UNLOCK();
refrigerator(PF_IOTHREAD);
} else {
DECLARE_WAITQUEUE(wq, current);
add_wait_queue(&jfs_sync_thread_wait, &wq); add_wait_queue(&jfs_sync_thread_wait, &wq);
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
TXN_UNLOCK(); TXN_UNLOCK();
schedule(); schedule();
current->state = TASK_RUNNING; current->state = TASK_RUNNING;
remove_wait_queue(&jfs_sync_thread_wait, &wq); remove_wait_queue(&jfs_sync_thread_wait, &wq);
}
} while (!jfs_stop_threads); } while (!jfs_stop_threads);
jFYI(1, ("jfs_sync being killed\n")); jFYI(1, ("jfs_sync being killed\n"));
......
...@@ -406,6 +406,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) ...@@ -406,6 +406,7 @@ static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags)
init_rwsem(&jfs_ip->rdwrlock); init_rwsem(&jfs_ip->rdwrlock);
init_MUTEX(&jfs_ip->commit_sem); init_MUTEX(&jfs_ip->commit_sem);
jfs_ip->atlhead = 0; jfs_ip->atlhead = 0;
jfs_ip->active_ag = -1;
inode_init_once(&jfs_ip->vfs_inode); inode_init_once(&jfs_ip->vfs_inode);
} }
} }
......
...@@ -78,6 +78,68 @@ struct ea_buffer { ...@@ -78,6 +78,68 @@ struct ea_buffer {
#define EA_NEW 0x0004 #define EA_NEW 0x0004
#define EA_MALLOC 0x0008 #define EA_MALLOC 0x0008
/* Namespaces */
#define XATTR_SYSTEM_PREFIX "system."
#define XATTR_SYSTEM_PREFIX_LEN (sizeof (XATTR_SYSTEM_PREFIX) - 1)
#define XATTR_USER_PREFIX "user."
#define XATTR_USER_PREFIX_LEN (sizeof (XATTR_USER_PREFIX) - 1)
#define XATTR_OS2_PREFIX "os2."
#define XATTR_OS2_PREFIX_LEN (sizeof (XATTR_OS2_PREFIX) - 1)
/*
* These three routines are used to recognize on-disk extended attributes
* that are in a recognized namespace. If the attribute is not recognized,
* "os2." is prepended to the name
*/
static inline int is_os2_xattr(struct jfs_ea *ea)
{
/*
* Check for "system."
*/
if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
!strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return FALSE;
/*
* Check for "user."
*/
if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
!strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
return FALSE;
/*
* Add any other valid namespace prefixes here
*/
/*
* We assume it's OS/2's flat namespace
*/
return TRUE;
}
static inline int name_size(struct jfs_ea *ea)
{
if (is_os2_xattr(ea))
return ea->namelen + XATTR_OS2_PREFIX_LEN;
else
return ea->namelen;
}
static inline int copy_name(char *buffer, struct jfs_ea *ea)
{
int len = ea->namelen;
if (is_os2_xattr(ea)) {
memcpy(buffer, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN);
buffer += XATTR_OS2_PREFIX_LEN;
len += XATTR_OS2_PREFIX_LEN;
}
memcpy(buffer, ea->name, ea->namelen);
buffer[ea->namelen] = 0;
return len;
}
/* Forward references */ /* Forward references */
static void ea_release(struct inode *inode, struct ea_buffer *ea_buf); static void ea_release(struct inode *inode, struct ea_buffer *ea_buf);
...@@ -577,7 +639,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size) ...@@ -577,7 +639,8 @@ static int ea_put(struct inode *inode, struct ea_buffer *ea_buf, int new_size)
return rc; return rc;
} }
static int can_set_xattr(struct inode *inode, const char *name) static int can_set_xattr(struct inode *inode, const char *name,
void *value, size_t value_len)
{ {
if (IS_RDONLY(inode)) if (IS_RDONLY(inode))
return -EROFS; return -EROFS;
...@@ -585,6 +648,10 @@ static int can_set_xattr(struct inode *inode, const char *name) ...@@ -585,6 +648,10 @@ static int can_set_xattr(struct inode *inode, const char *name)
if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode))
return -EPERM; return -EPERM;
if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) &&
(strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) != 0))
return -EOPNOTSUPP;
if (!S_ISREG(inode->i_mode) && if (!S_ISREG(inode->i_mode) &&
(!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX)) (!S_ISDIR(inode->i_mode) || inode->i_mode &S_ISVTX))
return -EPERM; return -EPERM;
...@@ -602,13 +669,24 @@ int __jfs_setxattr(struct inode *inode, const char *name, void *value, ...@@ -602,13 +669,24 @@ int __jfs_setxattr(struct inode *inode, const char *name, void *value,
int xattr_size; int xattr_size;
int new_size; int new_size;
int namelen = strlen(name); int namelen = strlen(name);
char *os2name = NULL;
int found = 0; int found = 0;
int rc; int rc;
int length; int length;
if ((rc = can_set_xattr(inode, name))) if ((rc = can_set_xattr(inode, name, value, value_len)))
return rc; return rc;
if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
GFP_KERNEL);
if (!os2name)
return -ENOMEM;
strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
name = os2name;
namelen -= XATTR_OS2_PREFIX_LEN;
}
xattr_size = ea_get(inode, &ea_buf, 0); xattr_size = ea_get(inode, &ea_buf, 0);
if (xattr_size < 0) { if (xattr_size < 0) {
rc = xattr_size; rc = xattr_size;
...@@ -714,6 +792,9 @@ int __jfs_setxattr(struct inode *inode, const char *name, void *value, ...@@ -714,6 +792,9 @@ int __jfs_setxattr(struct inode *inode, const char *name, void *value,
release: release:
ea_release(inode, &ea_buf); ea_release(inode, &ea_buf);
out: out:
if (os2name)
kfree(os2name);
return rc; return rc;
} }
...@@ -728,7 +809,7 @@ int jfs_setxattr(struct dentry *dentry, const char *name, void *value, ...@@ -728,7 +809,7 @@ int jfs_setxattr(struct dentry *dentry, const char *name, void *value,
return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags); return __jfs_setxattr(dentry->d_inode, name, value, value_len, flags);
} }
static int can_get_xattr(struct inode *inode, const char *name) static inline int can_get_xattr(struct inode *inode, const char *name)
{ {
return permission(inode, MAY_READ); return permission(inode, MAY_READ);
} }
...@@ -742,12 +823,23 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, ...@@ -742,12 +823,23 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
int xattr_size; int xattr_size;
ssize_t size; ssize_t size;
int namelen = strlen(name); int namelen = strlen(name);
char *os2name = NULL;
int rc; int rc;
char *value; char *value;
if ((rc = can_get_xattr(inode, name))) if ((rc = can_get_xattr(inode, name)))
return rc; return rc;
if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
GFP_KERNEL);
if (!os2name)
return -ENOMEM;
strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
name = os2name;
namelen -= XATTR_OS2_PREFIX_LEN;
}
xattr_size = ea_get(inode, &ea_buf, 0); xattr_size = ea_get(inode, &ea_buf, 0);
if (xattr_size < 0) { if (xattr_size < 0) {
size = xattr_size; size = xattr_size;
...@@ -780,6 +872,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, ...@@ -780,6 +872,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
release: release:
ea_release(inode, &ea_buf); ea_release(inode, &ea_buf);
out: out:
if (os2name)
kfree(os2name);
return size; return size;
} }
...@@ -813,7 +907,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) ...@@ -813,7 +907,7 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
/* compute required size of list */ /* compute required size of list */
for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea))
size += ea->namelen + 1; size += name_size(ea) + 1;
if (!data) if (!data)
goto release; goto release;
...@@ -826,9 +920,8 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size) ...@@ -826,9 +920,8 @@ ssize_t jfs_listxattr(struct dentry * dentry, char *data, size_t buf_size)
/* Copy attribute names to buffer */ /* Copy attribute names to buffer */
buffer = data; buffer = data;
for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) { for (ea = FIRST_EA(ealist); ea < END_EALIST(ealist); ea = NEXT_EA(ea)) {
memcpy(buffer, ea->name, ea->namelen); int namelen = copy_name(buffer, ea);
buffer[ea->namelen] = 0; buffer += namelen + 1;
buffer += ea->namelen + 1;
} }
release: release:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment