Commit f97cd2d9 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

[PATCH] shmem: no sbinfo for shm mount

SGI investigations have shown a dramatic contrast in scalability between
anonymous memory and shmem objects.  Processes building distinct shmem objects
in parallel hit heavy contention on shmem superblock stat_lock.  Across 256
cpus an intensive test runs 300 times slower than anonymous.

Jack Steiner has observed that all the shmem superblock free_blocks and
free_inodes accounting is redundant in the case of the internal mount used for
SysV shared memory and for shared writable /dev/zero objects (the cases which
most concern them): it specifically declines to limit.

Based upon Brent Casavant's SHMEM_NOSBINFO patch, this instead just removes
the shmem_sb_info structure from the internal kernel mount, testing where
necessary for null sbinfo pointer.  shmem_set_size moved within CONFIG_TMPFS,
its arg named "sbinfo" as elsewhere.

This brings shmem object scalability up to that of anonymous memory, in the
case where distinct processes are building (faulting to allocate) distinct
objects.  It significantly improves parallel building of a shared shmem object
(that test runs 14 times faster across 256 cpus), but other issues remain in
that case: to be addressed in later patches.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 77cdadab
......@@ -185,10 +185,12 @@ static spinlock_t shmem_ilock = SPIN_LOCK_UNLOCKED;
static void shmem_free_block(struct inode *inode)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks++;
inode->i_blocks -= BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
}
/*
......@@ -213,11 +215,13 @@ static void shmem_recalc_inode(struct inode *inode)
if (freed > 0) {
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
info->alloced -= freed;
shmem_unacct_blocks(info->flags, freed);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
sbinfo->free_blocks += freed;
inode->i_blocks -= freed*BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
shmem_unacct_blocks(info->flags, freed);
}
}
}
......@@ -350,6 +354,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
* page (and perhaps indirect index pages) yet to allocate:
* a waste to allocate index if we cannot allocate data.
*/
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks <= 1) {
spin_unlock(&sbinfo->stat_lock);
......@@ -358,6 +363,7 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
}
spin_unlock(&info->lock);
page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
......@@ -605,10 +611,12 @@ static void shmem_delete_inode(struct inode *inode)
inode->i_size = 0;
shmem_truncate(inode);
}
if (sbinfo) {
BUG_ON(inode->i_blocks);
spin_lock(&sbinfo->stat_lock);
sbinfo->free_inodes++;
spin_unlock(&sbinfo->stat_lock);
}
clear_inode(inode);
}
......@@ -1001,8 +1009,10 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
} else {
shmem_swp_unmap(entry);
sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks == 0 || shmem_acct_block(info->flags)) {
if (sbinfo->free_blocks == 0 ||
shmem_acct_block(info->flags)) {
spin_unlock(&sbinfo->stat_lock);
spin_unlock(&info->lock);
error = -ENOSPC;
......@@ -1011,6 +1021,11 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
spin_unlock(&sbinfo->stat_lock);
} else if (shmem_acct_block(info->flags)) {
spin_unlock(&info->lock);
error = -ENOSPC;
goto failed;
}
if (!filepage) {
spin_unlock(&info->lock);
......@@ -1187,6 +1202,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (sbinfo) {
spin_lock(&sbinfo->stat_lock);
if (!sbinfo->free_inodes) {
spin_unlock(&sbinfo->stat_lock);
......@@ -1194,6 +1210,7 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
}
sbinfo->free_inodes--;
spin_unlock(&sbinfo->stat_lock);
}
inode = new_inode(sb);
if (inode) {
......@@ -1234,32 +1251,32 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev)
return inode;
}
static int shmem_set_size(struct shmem_sb_info *info,
#ifdef CONFIG_TMPFS
static int shmem_set_size(struct shmem_sb_info *sbinfo,
unsigned long max_blocks, unsigned long max_inodes)
{
int error;
unsigned long blocks, inodes;
spin_lock(&info->stat_lock);
blocks = info->max_blocks - info->free_blocks;
inodes = info->max_inodes - info->free_inodes;
spin_lock(&sbinfo->stat_lock);
blocks = sbinfo->max_blocks - sbinfo->free_blocks;
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
error = -EINVAL;
if (max_blocks < blocks)
goto out;
if (max_inodes < inodes)
goto out;
error = 0;
info->max_blocks = max_blocks;
info->free_blocks = max_blocks - blocks;
info->max_inodes = max_inodes;
info->free_inodes = max_inodes - inodes;
sbinfo->max_blocks = max_blocks;
sbinfo->free_blocks = max_blocks - blocks;
sbinfo->max_inodes = max_inodes;
sbinfo->free_inodes = max_inodes - inodes;
out:
spin_unlock(&info->stat_lock);
spin_unlock(&sbinfo->stat_lock);
return error;
}
#ifdef CONFIG_TMPFS
static struct inode_operations shmem_symlink_inode_operations;
static struct inode_operations shmem_symlink_inline_operations;
......@@ -1819,47 +1836,51 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data)
}
#endif
static void shmem_put_super(struct super_block *sb)
{
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
static int shmem_fill_super(struct super_block *sb,
void *data, int silent)
{
struct inode *inode;
struct dentry *root;
unsigned long blocks, inodes;
int mode = S_IRWXUGO | S_ISVTX;
uid_t uid = current->fsuid;
gid_t gid = current->fsgid;
struct shmem_sb_info *sbinfo;
int err = -ENOMEM;
sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
memset(sbinfo, 0, sizeof(struct shmem_sb_info));
#ifdef CONFIG_TMPFS
/*
* Per default we only allow half of the physical ram per
* tmpfs instance, limiting inodes to one per page of lowmem.
* tmpfs instance, limiting inodes to one per page of lowmem;
* but the internal instance is left unlimited.
*/
blocks = totalram_pages / 2;
inodes = totalram_pages - totalhigh_pages;
if (!(sb->s_flags & MS_NOUSER)) {
struct shmem_sb_info *sbinfo;
unsigned long blocks = totalram_pages / 2;
unsigned long inodes = totalram_pages - totalhigh_pages;
if (inodes > blocks)
inodes = blocks;
#ifdef CONFIG_TMPFS
if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes)) {
err = -EINVAL;
goto failed;
}
#else
sb->s_flags |= MS_NOUSER;
#endif
if (shmem_parse_options(data, &mode,
&uid, &gid, &blocks, &inodes))
return -EINVAL;
sbinfo = kmalloc(sizeof(struct shmem_sb_info), GFP_KERNEL);
if (!sbinfo)
return -ENOMEM;
sb->s_fs_info = sbinfo;
spin_lock_init(&sbinfo->stat_lock);
sbinfo->max_blocks = blocks;
sbinfo->free_blocks = blocks;
sbinfo->max_inodes = inodes;
sbinfo->free_inodes = inodes;
}
#endif
sb->s_maxbytes = SHMEM_MAX_BYTES;
sb->s_blocksize = PAGE_CACHE_SIZE;
sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
......@@ -1879,17 +1900,10 @@ static int shmem_fill_super(struct super_block *sb,
failed_iput:
iput(inode);
failed:
kfree(sbinfo);
sb->s_fs_info = NULL;
shmem_put_super(sb);
return err;
}
static void shmem_put_super(struct super_block *sb)
{
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
static kmem_cache_t *shmem_inode_cachep;
static struct inode *shmem_alloc_inode(struct super_block *sb)
......@@ -2023,15 +2037,13 @@ static int __init init_tmpfs(void)
#ifdef CONFIG_TMPFS
devfs_mk_dir("shm");
#endif
shm_mnt = kern_mount(&tmpfs_fs_type);
shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER,
tmpfs_fs_type.name, NULL);
if (IS_ERR(shm_mnt)) {
error = PTR_ERR(shm_mnt);
printk(KERN_ERR "Could not kern_mount tmpfs\n");
goto out1;
}
/* The internal instance should not do size checking */
shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
return 0;
out1:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment