Commit 4beda7c1 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] remove buffer unused_list

Removes the buffer_head unused list.  Use a mempool instead.

The reduced lock contention provided about a 10% boost on ANton's
12-way.
parent 090da372
...@@ -30,28 +30,13 @@ ...@@ -30,28 +30,13 @@
#include <linux/iobuf.h> #include <linux/iobuf.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/mempool.h>
#include <asm/bitops.h> #include <asm/bitops.h>
#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) #define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
#define NR_RESERVED (10*MAX_BUF_PER_PAGE)
#define MAX_UNUSED_BUFFERS NR_RESERVED+20 /* don't ever have more than this
number of unused buffer heads */
/* Anti-deadlock ordering:
* i_bufferlist_lock > unused_list_lock
*/
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers) #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_inode_buffers)
/*
* A local cache of buffer_heads is maintained at unused_list.
* Free buffers are chained through their b_private field.
*/
static struct buffer_head *unused_list;
static int nr_unused_buffer_heads;
static spinlock_t unused_list_lock = SPIN_LOCK_UNLOCKED;
static DECLARE_WAIT_QUEUE_HEAD(buffer_wait);
/* This is used by some architectures to estimate available memory. */ /* This is used by some architectures to estimate available memory. */
atomic_t buffermem_pages = ATOMIC_INIT(0); atomic_t buffermem_pages = ATOMIC_INIT(0);
...@@ -723,33 +708,6 @@ void invalidate_inode_buffers(struct inode *inode) ...@@ -723,33 +708,6 @@ void invalidate_inode_buffers(struct inode *inode)
spin_unlock(&inode->i_bufferlist_lock); spin_unlock(&inode->i_bufferlist_lock);
} }
static void __put_unused_buffer_head(struct buffer_head * bh)
{
if (bh->b_inode)
BUG();
if (nr_unused_buffer_heads >= MAX_UNUSED_BUFFERS) {
kmem_cache_free(bh_cachep, bh);
} else {
bh->b_bdev = NULL;
bh->b_blocknr = -1;
bh->b_this_page = NULL;
nr_unused_buffer_heads++;
bh->b_private = unused_list;
unused_list = bh;
if (waitqueue_active(&buffer_wait))
wake_up(&buffer_wait);
}
}
void put_unused_buffer_head(struct buffer_head *bh)
{
spin_lock(&unused_list_lock);
__put_unused_buffer_head(bh);
spin_unlock(&unused_list_lock);
}
EXPORT_SYMBOL(put_unused_buffer_head);
/* /*
* Create the appropriate buffers when given a page for data area and * Create the appropriate buffers when given a page for data area and
* the size of each buffer.. Use the bh->b_this_page linked list to * the size of each buffer.. Use the bh->b_this_page linked list to
...@@ -769,7 +727,7 @@ create_buffers(struct page * page, unsigned long size, int async) ...@@ -769,7 +727,7 @@ create_buffers(struct page * page, unsigned long size, int async)
head = NULL; head = NULL;
offset = PAGE_SIZE; offset = PAGE_SIZE;
while ((offset -= size) >= 0) { while ((offset -= size) >= 0) {
bh = get_unused_buffer_head(async); bh = alloc_buffer_head(async);
if (!bh) if (!bh)
goto no_grow; goto no_grow;
...@@ -792,13 +750,11 @@ create_buffers(struct page * page, unsigned long size, int async) ...@@ -792,13 +750,11 @@ create_buffers(struct page * page, unsigned long size, int async)
*/ */
no_grow: no_grow:
if (head) { if (head) {
spin_lock(&unused_list_lock);
do { do {
bh = head; bh = head;
head = head->b_this_page; head = head->b_this_page;
__put_unused_buffer_head(bh); free_buffer_head(bh);
} while (head); } while (head);
spin_unlock(&unused_list_lock);
} }
/* /*
...@@ -1087,54 +1043,6 @@ struct buffer_head * __bread(struct block_device *bdev, int block, int size) ...@@ -1087,54 +1043,6 @@ struct buffer_head * __bread(struct block_device *bdev, int block, int size)
return NULL; return NULL;
} }
/*
* Reserve NR_RESERVED buffer heads for async IO requests to avoid
* no-buffer-head deadlock. Return NULL on failure; waiting for
* buffer heads is now handled in create_buffers().
*/
struct buffer_head * get_unused_buffer_head(int async)
{
struct buffer_head * bh;
spin_lock(&unused_list_lock);
if (nr_unused_buffer_heads > NR_RESERVED) {
bh = unused_list;
unused_list = bh->b_private;
nr_unused_buffer_heads--;
spin_unlock(&unused_list_lock);
return bh;
}
spin_unlock(&unused_list_lock);
/* This is critical. We can't call out to the FS
* to get more buffer heads, because the FS may need
* more buffer-heads itself. Thus SLAB_NOFS.
*/
if((bh = kmem_cache_alloc(bh_cachep, SLAB_NOFS)) != NULL) {
bh->b_blocknr = -1;
bh->b_this_page = NULL;
return bh;
}
/*
* If we need an async buffer, use the reserved buffer heads.
*/
if (async) {
spin_lock(&unused_list_lock);
if (unused_list) {
bh = unused_list;
unused_list = bh->b_private;
nr_unused_buffer_heads--;
spin_unlock(&unused_list_lock);
return bh;
}
spin_unlock(&unused_list_lock);
}
return NULL;
}
EXPORT_SYMBOL(get_unused_buffer_head);
void set_bh_page(struct buffer_head *bh, void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset) struct page *page, unsigned long offset)
{ {
...@@ -2285,15 +2193,13 @@ static /*inline*/ int drop_buffers(struct page *page) ...@@ -2285,15 +2193,13 @@ static /*inline*/ int drop_buffers(struct page *page)
if (!was_uptodate && Page_Uptodate(page)) if (!was_uptodate && Page_Uptodate(page))
buffer_error(); buffer_error();
spin_lock(&unused_list_lock);
do { do {
struct buffer_head *next = bh->b_this_page; struct buffer_head *next = bh->b_this_page;
__remove_inode_queue(bh); __remove_inode_queue(bh);
__put_unused_buffer_head(bh); free_buffer_head(bh);
bh = next; bh = next;
} while (bh != head); } while (bh != head);
spin_unlock(&unused_list_lock);
__clear_page_buffers(page); __clear_page_buffers(page);
return 1; return 1;
failed: failed:
...@@ -2351,3 +2257,57 @@ void wakeup_bdflush(void) ...@@ -2351,3 +2257,57 @@ void wakeup_bdflush(void)
{ {
pdflush_flush(0); pdflush_flush(0);
} }
/*
* Buffer-head allocation
*/
static kmem_cache_t *bh_cachep;
static mempool_t *bh_mempool;
struct buffer_head *alloc_buffer_head(int async)
{
return mempool_alloc(bh_mempool, GFP_NOFS);
}
EXPORT_SYMBOL(alloc_buffer_head);
void free_buffer_head(struct buffer_head *bh)
{
if (bh->b_inode)
BUG();
mempool_free(bh, bh_mempool);
}
EXPORT_SYMBOL(free_buffer_head);
static void init_buffer_head(void *data, kmem_cache_t *cachep, unsigned long flags)
{
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
struct buffer_head * bh = (struct buffer_head *)data;
memset(bh, 0, sizeof(*bh));
bh->b_blocknr = -1;
init_waitqueue_head(&bh->b_wait);
}
}
static void *bh_mempool_alloc(int gfp_mask, void *pool_data)
{
return kmem_cache_alloc(bh_cachep, gfp_mask);
}
static void bh_mempool_free(void *element, void *pool_data)
{
return kmem_cache_free(bh_cachep, element);
}
#define NR_RESERVED (10*MAX_BUF_PER_PAGE)
#define MAX_UNUSED_BUFFERS NR_RESERVED+20
void __init buffer_init(void)
{
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
bh_mempool = mempool_create(MAX_UNUSED_BUFFERS, bh_mempool_alloc,
bh_mempool_free, NULL);
}
...@@ -1386,18 +1386,6 @@ static void __init dcache_init(unsigned long mempages) ...@@ -1386,18 +1386,6 @@ static void __init dcache_init(unsigned long mempages)
} while (i); } while (i);
} }
static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags)
{
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR)
{
struct buffer_head * bh = (struct buffer_head *) foo;
memset(bh, 0, sizeof(*bh));
init_waitqueue_head(&bh->b_wait);
}
}
/* SLAB cache for __getname() consumers */ /* SLAB cache for __getname() consumers */
kmem_cache_t *names_cachep; kmem_cache_t *names_cachep;
...@@ -1407,9 +1395,6 @@ kmem_cache_t *filp_cachep; ...@@ -1407,9 +1395,6 @@ kmem_cache_t *filp_cachep;
/* SLAB cache for dquot structures */ /* SLAB cache for dquot structures */
kmem_cache_t *dquot_cachep; kmem_cache_t *dquot_cachep;
/* SLAB cache for buffer_head structures */
kmem_cache_t *bh_cachep;
EXPORT_SYMBOL(bh_cachep);
EXPORT_SYMBOL(d_genocide); EXPORT_SYMBOL(d_genocide);
extern void bdev_cache_init(void); extern void bdev_cache_init(void);
...@@ -1417,12 +1402,6 @@ extern void cdev_cache_init(void); ...@@ -1417,12 +1402,6 @@ extern void cdev_cache_init(void);
void __init vfs_caches_init(unsigned long mempages) void __init vfs_caches_init(unsigned long mempages)
{ {
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
if(!bh_cachep)
panic("Cannot create buffer head SLAB cache");
names_cachep = kmem_cache_create("names_cache", names_cachep = kmem_cache_create("names_cache",
PATH_MAX, 0, PATH_MAX, 0,
SLAB_HWCACHE_ALIGN, NULL, NULL); SLAB_HWCACHE_ALIGN, NULL, NULL);
......
...@@ -510,7 +510,7 @@ void journal_commit_transaction(journal_t *journal) ...@@ -510,7 +510,7 @@ void journal_commit_transaction(journal_t *journal)
journal_unlock_journal_head(jh); journal_unlock_journal_head(jh);
__brelse(bh); __brelse(bh);
J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
put_unused_buffer_head(bh); free_buffer_head(bh);
/* We also have to unlock and free the corresponding /* We also have to unlock and free the corresponding
shadowed buffer */ shadowed buffer */
......
...@@ -454,10 +454,10 @@ int journal_write_metadata_buffer(transaction_t *transaction, ...@@ -454,10 +454,10 @@ int journal_write_metadata_buffer(transaction_t *transaction,
* Right, time to make up the new buffer_head. * Right, time to make up the new buffer_head.
*/ */
do { do {
new_bh = get_unused_buffer_head(0); new_bh = alloc_buffer_head(0);
if (!new_bh) { if (!new_bh) {
printk (KERN_NOTICE __FUNCTION__ printk (KERN_NOTICE __FUNCTION__
": ENOMEM at get_unused_buffer_head, " ": ENOMEM at alloc_buffer_head, "
"trying again.\n"); "trying again.\n");
yield(); yield();
} }
......
...@@ -206,7 +206,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time; ...@@ -206,7 +206,7 @@ extern int leases_enable, dir_notify_enable, lease_break_time;
extern void update_atime (struct inode *); extern void update_atime (struct inode *);
#define UPDATE_ATIME(inode) update_atime (inode) #define UPDATE_ATIME(inode) update_atime (inode)
extern void buffer_init(unsigned long); extern void buffer_init(void);
extern void inode_init(unsigned long); extern void inode_init(unsigned long);
extern void mnt_init(unsigned long); extern void mnt_init(unsigned long);
extern void files_init(unsigned long); extern void files_init(unsigned long);
...@@ -1536,8 +1536,8 @@ static inline void map_bh(struct buffer_head *bh, struct super_block *sb, int bl ...@@ -1536,8 +1536,8 @@ static inline void map_bh(struct buffer_head *bh, struct super_block *sb, int bl
} }
extern void wakeup_bdflush(void); extern void wakeup_bdflush(void);
extern void put_unused_buffer_head(struct buffer_head * bh); extern struct buffer_head *alloc_buffer_head(int async);
extern struct buffer_head * get_unused_buffer_head(int async); extern void free_buffer_head(struct buffer_head * bh);
extern int brw_page(int, struct page *, struct block_device *, sector_t [], int); extern int brw_page(int, struct page *, struct block_device *, sector_t [], int);
......
...@@ -70,7 +70,6 @@ extern kmem_cache_t *names_cachep; ...@@ -70,7 +70,6 @@ extern kmem_cache_t *names_cachep;
extern kmem_cache_t *files_cachep; extern kmem_cache_t *files_cachep;
extern kmem_cache_t *filp_cachep; extern kmem_cache_t *filp_cachep;
extern kmem_cache_t *dquot_cachep; extern kmem_cache_t *dquot_cachep;
extern kmem_cache_t *bh_cachep;
extern kmem_cache_t *fs_cachep; extern kmem_cache_t *fs_cachep;
extern kmem_cache_t *sigact_cachep; extern kmem_cache_t *sigact_cachep;
extern kmem_cache_t *bio_cachep; extern kmem_cache_t *bio_cachep;
......
...@@ -389,6 +389,7 @@ asmlinkage void __init start_kernel(void) ...@@ -389,6 +389,7 @@ asmlinkage void __init start_kernel(void)
fork_init(mempages); fork_init(mempages);
proc_caches_init(); proc_caches_init();
buffer_init();
vfs_caches_init(mempages); vfs_caches_init(mempages);
radix_tree_init(); radix_tree_init();
#if defined(CONFIG_ARCH_S390) #if defined(CONFIG_ARCH_S390)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment