Commit 78f1f626 authored by Linus Torvalds's avatar Linus Torvalds

Merge home.transmeta.com:/home/torvalds/v2.5/akpm

into home.transmeta.com:/home/torvalds/v2.5/linux
parents 6fe152cf f1dfe022
......@@ -3,11 +3,13 @@ Changes since 2.5.0:
---
[recommended]
New helpers: sb_bread(), sb_getblk(), sb_get_hash_table(), set_bh(),
New helpers: sb_bread(), sb_getblk(), sb_find_get_block(), set_bh(),
sb_set_blocksize() and sb_min_blocksize().
Use them.
(sb_find_get_block() replaces 2.4's get_hash_table())
---
[recommended]
......
......@@ -56,12 +56,16 @@ int __verify_write(const void * addr, unsigned long size)
for (;;) {
survive:
{
int fault = handle_mm_fault(current->mm, vma, start, 1);
if (!fault)
switch (handle_mm_fault(current->mm, vma, start, 1)) {
case VM_FAULT_SIGBUS:
goto bad_area;
if (fault < 0)
case VM_FAULT_OOM:
goto out_of_memory;
case VM_FAULT_MINOR:
case VM_FAULT_MAJOR:
break;
default:
BUG();
}
if (!size)
break;
......@@ -239,16 +243,18 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code)
* the fault.
*/
switch (handle_mm_fault(mm, vma, address, write)) {
case 1:
tsk->min_flt++;
break;
case 2:
tsk->maj_flt++;
break;
case 0:
goto do_sigbus;
default:
goto out_of_memory;
case VM_FAULT_MINOR:
tsk->min_flt++;
break;
case VM_FAULT_MAJOR:
tsk->maj_flt++;
break;
case VM_FAULT_SIGBUS:
goto do_sigbus;
case VM_FAULT_OOM:
goto out_of_memory;
default:
BUG();
}
/*
......
......@@ -964,8 +964,7 @@ void blk_run_queues(void)
return;
}
list_splice(&blk_plug_list, &local_plug_list);
INIT_LIST_HEAD(&blk_plug_list);
list_splice_init(&blk_plug_list, &local_plug_list);
spin_unlock_irq(&blk_plug_lock);
while (!list_empty(&local_plug_list)) {
......
......@@ -740,8 +740,7 @@ void abort_requests(struct hpsb_host *host)
host->ops->devctl(host, CANCEL_REQUESTS, 0);
spin_lock_irqsave(&host->pending_pkt_lock, flags);
list_splice(&host->pending_packets, &llist);
INIT_LIST_HEAD(&host->pending_packets);
list_splice_init(&host->pending_packets, &llist);
spin_unlock_irqrestore(&host->pending_pkt_lock, flags);
list_for_each(lh, &llist) {
......
......@@ -174,6 +174,10 @@
- Add `global_options' as default for options[]. Ditto global_enable_wol,
global_full_duplex.
LK1.1.18 01Jul02 akpm
- Fix for undocumented transceiver power-up bit on some 3c566B's
(Donald Becker, Rahul Karnik)
- See http://www.zip.com.au/~akpm/linux/#3c59x-2.3 for more details.
- Also see Documentation/networking/vortex.txt
*/
......@@ -189,8 +193,8 @@
#define DRV_NAME "3c59x"
#define DRV_VERSION "LK1.1.17"
#define DRV_RELDATE "18 Dec 2001"
#define DRV_VERSION "LK1.1.18"
#define DRV_RELDATE "1 Jul 2002"
......@@ -414,7 +418,7 @@ enum { IS_VORTEX=1, IS_BOOMERANG=2, IS_CYCLONE=4, IS_TORNADO=8,
EEPROM_8BIT=0x10, /* AKPM: Uses 0x230 as the base bitmaps for EEPROM reads */
HAS_PWR_CTRL=0x20, HAS_MII=0x40, HAS_NWAY=0x80, HAS_CB_FNS=0x100,
INVERT_MII_PWR=0x200, INVERT_LED_PWR=0x400, MAX_COLLISION_RESET=0x800,
EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000 };
EEPROM_OFFSET=0x1000, HAS_HWCKSM=0x2000, WNO_XCVR_PWR=0x4000 };
enum vortex_chips {
CH_3C590 = 0,
......@@ -522,7 +526,7 @@ static struct vortex_chip_info {
HAS_HWCKSM, 128, },
{"3c556B Laptop Hurricane",
PCI_USES_IO|PCI_USES_MASTER, IS_TORNADO|HAS_NWAY|EEPROM_OFFSET|HAS_CB_FNS|INVERT_MII_PWR|
HAS_HWCKSM, 128, },
WNO_XCVR_PWR|HAS_HWCKSM, 128, },
{"3c575 [Megahertz] 10/100 LAN CardBus",
PCI_USES_IO|PCI_USES_MASTER, IS_BOOMERANG|HAS_MII|EEPROM_8BIT, 128, },
......@@ -1222,6 +1226,10 @@ static int __devinit vortex_probe1(struct pci_dev *pdev,
if (vp->drv_flags & INVERT_MII_PWR)
n |= 0x4000;
outw(n, ioaddr + Wn2_ResetOptions);
if (vp->drv_flags & WNO_XCVR_PWR) {
EL3WINDOW(0);
outw(0x0800, ioaddr);
}
}
/* Extract our information from the EEPROM data. */
......
......@@ -1031,14 +1031,6 @@ extern unsigned char e100_selftest(struct e100_private *bdp, u32 *st_timeout,
extern unsigned char e100_get_link_state(struct e100_private *bdp);
extern unsigned char e100_wait_scb(struct e100_private *bdp);
#ifndef yield
#define yield() \
do { \
current->policy |= SCHED_YIELD; \
schedule(); \
} while (0)
#endif
extern void e100_deisolate_driver(struct e100_private *bdp,
u8 recover, u8 full_reset);
extern unsigned char e100_hw_reset_recover(struct e100_private *bdp,
......
......@@ -303,15 +303,15 @@ pci_pool_free (struct pci_pool *pool, void *vaddr, dma_addr_t dma)
#ifdef CONFIG_DEBUG_SLAB
if (((dma - page->dma) + (void *)page->vaddr) != vaddr) {
printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%lx\n",
printk (KERN_ERR "pci_pool_free %s/%s, %p (bad vaddr)/%Lx\n",
pool->dev ? pool->dev->slot_name : NULL,
pool->name, vaddr, (unsigned long) dma);
pool->name, vaddr, (unsigned long long) dma);
return;
}
if (page->bitmap [map] & (1UL << block)) {
printk (KERN_ERR "pci_pool_free %s/%s, dma %x already free\n",
printk (KERN_ERR "pci_pool_free %s/%s, dma %Lx already free\n",
pool->dev ? pool->dev->slot_name : NULL,
pool->name, dma);
pool->name, (unsigned long long)dma);
return;
}
memset (vaddr, POOL_POISON_BYTE, pool->size);
......
......@@ -2467,7 +2467,9 @@ struct scatterlist *scsi_alloc_sgtable(Scsi_Cmnd *SCpnt, int gfp_mask)
sgp = scsi_sg_pools + SCpnt->sglist_len;
current->flags |= PF_NOWARN;
sgl = mempool_alloc(sgp->pool, gfp_mask);
current->flags &= ~PF_NOWARN;
if (sgl) {
memset(sgl, 0, sgp->size);
return sgl;
......
......@@ -74,8 +74,10 @@ int scsi_init_io(Scsi_Cmnd *SCpnt)
SCpnt->use_sg = count;
gfp_mask = GFP_NOIO;
if (in_interrupt())
if (in_interrupt()) {
gfp_mask &= ~__GFP_WAIT;
gfp_mask |= __GFP_HIGH;
}
/*
* if sg table allocation fails, requeue request later.
......
......@@ -135,21 +135,26 @@ inline void bio_init(struct bio *bio)
**/
struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
{
struct bio *bio = mempool_alloc(bio_pool, gfp_mask);
struct bio *bio;
struct bio_vec *bvl = NULL;
current->flags |= PF_NOWARN;
bio = mempool_alloc(bio_pool, gfp_mask);
if (unlikely(!bio))
return NULL;
goto out;
if (!nr_iovecs || (bvl = bvec_alloc(gfp_mask,nr_iovecs,&bio->bi_max))) {
bio_init(bio);
bio->bi_destructor = bio_destructor;
bio->bi_io_vec = bvl;
return bio;
goto out;
}
mempool_free(bio, bio_pool);
return NULL;
bio = NULL;
out:
current->flags &= ~PF_NOWARN;
return bio;
}
/**
......
......@@ -23,8 +23,6 @@
#include <asm/uaccess.h>
#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512)
static unsigned long max_block(struct block_device *bdev)
{
unsigned int retval = ~0U;
......
......@@ -36,6 +36,8 @@
#include <linux/buffer_head.h>
#include <asm/bitops.h>
static void invalidate_bh_lrus(void);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
/*
......@@ -389,7 +391,7 @@ asmlinkage long sys_fdatasync(unsigned int fd)
* private_lock is contended then so is mapping->page_lock).
*/
struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, int unused)
__find_get_block_slow(struct block_device *bdev, sector_t block, int unused)
{
struct inode *bd_inode = bdev->bd_inode;
struct address_space *bd_mapping = bd_inode->i_mapping;
......@@ -459,12 +461,15 @@ __find_get_block(struct block_device *bdev, sector_t block, int unused)
pass does the actual I/O. */
void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
{
invalidate_bh_lrus();
/*
* FIXME: what about destroy_dirty_buffers?
* We really want to use invalidate_inode_pages2() for
* that, but not until that's cleaned up.
*/
current->flags |= PF_INVALIDATE;
invalidate_inode_pages(bdev->bd_inode);
current->flags &= ~PF_INVALIDATE;
}
void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers)
......@@ -489,7 +494,6 @@ static void free_more_memory(void)
wakeup_bdflush();
try_to_free_pages(zone, GFP_NOFS, 0);
blk_run_queues();
__set_current_state(TASK_RUNNING);
yield();
}
......@@ -961,7 +965,9 @@ create_buffers(struct page * page, unsigned long size, int retry)
head = NULL;
offset = PAGE_SIZE;
while ((offset -= size) >= 0) {
current->flags |= PF_NOWARN;
bh = alloc_buffer_head();
current->flags &= ~PF_NOWARN;
if (!bh)
goto no_grow;
......@@ -1159,7 +1165,7 @@ grow_buffers(struct block_device *bdev, unsigned long block, int size)
* attempt is failing. FIXME, perhaps?
*/
struct buffer_head *
__getblk(struct block_device *bdev, sector_t block, int size)
__getblk_slow(struct block_device *bdev, sector_t block, int size)
{
for (;;) {
struct buffer_head * bh;
......@@ -1259,7 +1265,8 @@ void __bforget(struct buffer_head *bh)
* Reads a specified block, and returns buffer head that contains it.
* It returns NULL if the block was unreadable.
*/
struct buffer_head * __bread(struct block_device *bdev, int block, int size)
struct buffer_head *
__bread_slow(struct block_device *bdev, sector_t block, int size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
......@@ -1283,6 +1290,165 @@ struct buffer_head * __bread(struct block_device *bdev, int block, int size)
return NULL;
}
/*
* Per-cpu buffer LRU implementation. To reduce the cost of __find_get_block().
* The bhs[] array is sorted - newest buffer is at bhs[0]. Buffers have their
* refcount elevated by one when they're in an LRU. A buffer can only appear
* once in a particular CPU's LRU. A single buffer can be present in multiple
* CPU's LRUs at the same time.
*
* This is a transparent caching front-end to sb_bread(), sb_getblk() and
* sb_find_get_block().
*/
#define BH_LRU_SIZE 7
static struct bh_lru {
spinlock_t lock;
struct buffer_head *bhs[BH_LRU_SIZE];
} ____cacheline_aligned_in_smp bh_lrus[NR_CPUS];
/*
* The LRU management algorithm is dopey-but-simple. Sorry.
*/
static void bh_lru_install(struct buffer_head *bh)
{
struct buffer_head *evictee = NULL;
struct bh_lru *lru;
if (bh == NULL)
return;
lru = &bh_lrus[get_cpu()];
spin_lock(&lru->lock);
if (lru->bhs[0] != bh) {
struct buffer_head *bhs[BH_LRU_SIZE];
int in;
int out = 0;
get_bh(bh);
bhs[out++] = bh;
for (in = 0; in < BH_LRU_SIZE; in++) {
struct buffer_head *bh2 = lru->bhs[in];
if (bh2 == bh) {
__brelse(bh2);
} else {
if (out >= BH_LRU_SIZE) {
BUG_ON(evictee != NULL);
evictee = bh2;
} else {
bhs[out++] = bh2;
}
}
}
while (out < BH_LRU_SIZE)
bhs[out++] = NULL;
memcpy(lru->bhs, bhs, sizeof(bhs));
}
spin_unlock(&lru->lock);
put_cpu();
if (evictee) {
touch_buffer(evictee);
__brelse(evictee);
}
}
static inline struct buffer_head *
lookup_bh(struct block_device *bdev, sector_t block, int size)
{
struct buffer_head *ret = NULL;
struct bh_lru *lru;
int i;
lru = &bh_lrus[get_cpu()];
spin_lock(&lru->lock);
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = lru->bhs[i];
if (bh && bh->b_bdev == bdev &&
bh->b_blocknr == block && bh->b_size == size) {
if (i) {
while (i) {
lru->bhs[i] = lru->bhs[i - 1];
i--;
}
lru->bhs[0] = bh;
}
get_bh(bh);
ret = bh;
break;
}
}
spin_unlock(&lru->lock);
put_cpu();
return ret;
}
struct buffer_head *
__find_get_block(struct block_device *bdev, sector_t block, int size)
{
struct buffer_head *bh = lookup_bh(bdev, block, size);
if (bh == NULL) {
bh = __find_get_block_slow(bdev, block, size);
bh_lru_install(bh);
}
return bh;
}
EXPORT_SYMBOL(__find_get_block);
struct buffer_head *
__getblk(struct block_device *bdev, sector_t block, int size)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
if (bh == NULL) {
bh = __getblk_slow(bdev, block, size);
bh_lru_install(bh);
}
return bh;
}
EXPORT_SYMBOL(__getblk);
struct buffer_head *
__bread(struct block_device *bdev, sector_t block, int size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (bh) {
if (buffer_uptodate(bh))
return bh;
__brelse(bh);
}
bh = __bread_slow(bdev, block, size);
bh_lru_install(bh);
return bh;
}
EXPORT_SYMBOL(__bread);
/*
* This is called rarely - at unmount.
*/
static void invalidate_bh_lrus(void)
{
int cpu_idx;
for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
spin_lock(&bh_lrus[cpu_idx].lock);
for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++) {
int i;
for (i = 0; i < BH_LRU_SIZE; i++) {
brelse(bh_lrus[cpu_idx].bhs[i]);
bh_lrus[cpu_idx].bhs[i] = NULL;
}
}
for (cpu_idx = 0; cpu_idx < NR_CPUS; cpu_idx++)
spin_unlock(&bh_lrus[cpu_idx].lock);
}
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
......@@ -2306,7 +2472,8 @@ static inline int buffer_busy(struct buffer_head *bh)
(bh->b_state & ((1 << BH_Dirty) | (1 << BH_Lock)));
}
static /*inline*/ int drop_buffers(struct page *page)
static inline int
drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
{
struct buffer_head *head = page_buffers(page);
struct buffer_head *bh;
......@@ -2330,9 +2497,9 @@ static /*inline*/ int drop_buffers(struct page *page)
if (!list_empty(&bh->b_assoc_buffers))
__remove_assoc_queue(bh);
free_buffer_head(bh);
bh = next;
} while (bh != head);
*buffers_to_free = head;
__clear_page_buffers(page);
return 1;
failed:
......@@ -2342,17 +2509,20 @@ static /*inline*/ int drop_buffers(struct page *page)
int try_to_free_buffers(struct page *page)
{
struct address_space * const mapping = page->mapping;
struct buffer_head *buffers_to_free = NULL;
int ret = 0;
BUG_ON(!PageLocked(page));
if (PageWriteback(page))
return 0;
if (mapping == NULL) /* swapped-in anon page */
return drop_buffers(page);
if (mapping == NULL) { /* swapped-in anon page */
ret = drop_buffers(page, &buffers_to_free);
goto out;
}
spin_lock(&mapping->private_lock);
ret = drop_buffers(page);
ret = drop_buffers(page, &buffers_to_free);
if (ret && !PageSwapCache(page)) {
/*
* If the filesystem writes its buffers by hand (eg ext3)
......@@ -2365,6 +2535,16 @@ int try_to_free_buffers(struct page *page)
ClearPageDirty(page);
}
spin_unlock(&mapping->private_lock);
out:
if (buffers_to_free) {
struct buffer_head *bh = buffers_to_free;
do {
struct buffer_head *next = bh->b_this_page;
free_buffer_head(bh);
bh = next;
} while (bh != buffers_to_free);
}
return ret;
}
EXPORT_SYMBOL(try_to_free_buffers);
......@@ -2435,6 +2615,9 @@ void __init buffer_init(void)
{
int i;
for (i = 0; i < NR_CPUS; i++)
spin_lock_init(&bh_lrus[i].lock);
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
......
This diff is collapsed.
......@@ -30,8 +30,7 @@
*
* The file system contains group descriptors which are located after the
* super block. Each descriptor contains the number of the bitmap block and
* the free blocks count in the block. The descriptors are loaded in memory
* when a file system is mounted (see ext2_read_super).
* the free blocks count in the block.
*/
......@@ -41,8 +40,8 @@
*
* Return buffer_head of bitmap on success or NULL.
*/
static struct buffer_head *read_inode_bitmap (struct super_block * sb,
unsigned long block_group)
static struct buffer_head *
read_inode_bitmap(struct super_block * sb, unsigned long block_group)
{
struct ext2_group_desc *desc;
struct buffer_head *bh = NULL;
......@@ -53,7 +52,7 @@ static struct buffer_head *read_inode_bitmap (struct super_block * sb,
bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
if (!bh)
ext2_error (sb, "read_inode_bitmap",
ext2_error(sb, "read_inode_bitmap",
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %lu",
block_group, (unsigned long) desc->bg_inode_bitmap);
......@@ -61,75 +60,6 @@ static struct buffer_head *read_inode_bitmap (struct super_block * sb,
return bh;
}
/*
* load_inode_bitmap loads the inode bitmap for a blocks group
*
* It maintains a cache for the last bitmaps loaded. This cache is managed
* with a LRU algorithm.
*
* Notes:
* 1/ There is one cache per mounted file system.
* 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
* this function reads the bitmap without maintaining a LRU cache.
*
* Return the buffer_head of the bitmap or the ERR_PTR(error)
*/
static struct buffer_head *load_inode_bitmap (struct super_block * sb,
unsigned int block_group)
{
int i, slot = 0;
struct ext2_sb_info *sbi = EXT2_SB(sb);
struct buffer_head *bh = sbi->s_inode_bitmap[0];
if (block_group >= sbi->s_groups_count)
ext2_panic (sb, "load_inode_bitmap",
"block_group >= groups_count - "
"block_group = %d, groups_count = %lu",
block_group, sbi->s_groups_count);
if (sbi->s_loaded_inode_bitmaps > 0 &&
sbi->s_inode_bitmap_number[0] == block_group && bh)
goto found;
if (sbi->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
slot = block_group;
bh = sbi->s_inode_bitmap[slot];
if (!bh)
goto read_it;
if (sbi->s_inode_bitmap_number[slot] == slot)
goto found;
ext2_panic (sb, "load_inode_bitmap",
"block_group != inode_bitmap_number");
}
bh = NULL;
for (i = 0; i < sbi->s_loaded_inode_bitmaps &&
sbi->s_inode_bitmap_number[i] != block_group;
i++)
;
if (i < sbi->s_loaded_inode_bitmaps)
bh = sbi->s_inode_bitmap[i];
else if (sbi->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
sbi->s_loaded_inode_bitmaps++;
else
brelse (sbi->s_inode_bitmap[--i]);
while (i--) {
sbi->s_inode_bitmap_number[i+1] = sbi->s_inode_bitmap_number[i];
sbi->s_inode_bitmap[i+1] = sbi->s_inode_bitmap[i];
}
read_it:
if (!bh)
bh = read_inode_bitmap (sb, block_group);
sbi->s_inode_bitmap_number[slot] = block_group;
sbi->s_inode_bitmap[slot] = bh;
if (!bh)
return ERR_PTR(-EIO);
found:
return bh;
}
/*
* NOTE! When we get the inode, we're the only people
* that have access to it, and as such there are no
......@@ -151,8 +81,8 @@ void ext2_free_inode (struct inode * inode)
struct super_block * sb = inode->i_sb;
int is_directory;
unsigned long ino;
struct buffer_head * bh;
struct buffer_head * bh2;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
unsigned long block_group;
unsigned long bit;
struct ext2_group_desc * desc;
......@@ -186,12 +116,13 @@ void ext2_free_inode (struct inode * inode)
}
block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
bh = load_inode_bitmap (sb, block_group);
if (IS_ERR(bh))
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, block_group);
if (!bitmap_bh)
goto error_return;
/* Ok, now we can actually update the inode bitmaps.. */
if (!ext2_clear_bit (bit, bh->b_data))
if (!ext2_clear_bit(bit, bitmap_bh->b_data))
ext2_error (sb, "ext2_free_inode",
"bit already cleared for inode %lu", ino);
else {
......@@ -208,13 +139,14 @@ void ext2_free_inode (struct inode * inode)
cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
}
mark_buffer_dirty(bh);
mark_buffer_dirty(bitmap_bh);
if (sb->s_flags & MS_SYNCHRONOUS) {
ll_rw_block (WRITE, 1, &bh);
wait_on_buffer (bh);
ll_rw_block(WRITE, 1, &bitmap_bh);
wait_on_buffer(bitmap_bh);
}
sb->s_dirt = 1;
error_return:
brelse(bitmap_bh);
unlock_super (sb);
}
......@@ -351,9 +283,9 @@ static int find_group_other(struct super_block *sb, int parent_group)
struct inode * ext2_new_inode(struct inode * dir, int mode)
{
struct super_block * sb;
struct buffer_head * bh;
struct buffer_head * bh2;
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
int group, i;
ino_t ino;
struct inode * inode;
......@@ -361,6 +293,7 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
struct ext2_super_block * es;
struct ext2_inode_info *ei;
int err;
struct inode *ret;
sb = dir->i_sb;
inode = new_inode(sb);
......@@ -381,20 +314,21 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
goto fail;
err = -EIO;
bh = load_inode_bitmap (sb, group);
if (IS_ERR(bh))
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh)
goto fail2;
i = ext2_find_first_zero_bit ((unsigned long *) bh->b_data,
i = ext2_find_first_zero_bit((unsigned long *)bitmap_bh->b_data,
EXT2_INODES_PER_GROUP(sb));
if (i >= EXT2_INODES_PER_GROUP(sb))
goto bad_count;
ext2_set_bit (i, bh->b_data);
ext2_set_bit(i, bitmap_bh->b_data);
mark_buffer_dirty(bh);
mark_buffer_dirty(bitmap_bh);
if (sb->s_flags & MS_SYNCHRONOUS) {
ll_rw_block (WRITE, 1, &bh);
wait_on_buffer (bh);
ll_rw_block(WRITE, 1, &bitmap_bh);
wait_on_buffer(bitmap_bh);
}
ino = group * EXT2_INODES_PER_GROUP(sb) + i + 1;
......@@ -452,17 +386,19 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
insert_inode_hash(inode);
mark_inode_dirty(inode);
unlock_super (sb);
unlock_super(sb);
ret = inode;
if(DQUOT_ALLOC_INODE(inode)) {
DQUOT_DROP(inode);
inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
return ERR_PTR(-EDQUOT);
ret = ERR_PTR(-EDQUOT);
} else {
ext2_debug("allocating inode %lu\n", inode->i_ino);
ext2_preread_inode(inode);
}
ext2_debug ("allocating inode %lu\n", inode->i_ino);
ext2_preread_inode(inode);
return inode;
goto out;
fail2:
desc = ext2_get_group_desc (sb, group, &bh2);
......@@ -476,7 +412,8 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
unlock_super(sb);
make_bad_inode(inode);
iput(inode);
return ERR_PTR(err);
ret = ERR_PTR(err);
goto out;
bad_count:
ext2_error (sb, "ext2_new_inode",
......@@ -491,6 +428,9 @@ struct inode * ext2_new_inode(struct inode * dir, int mode)
desc->bg_free_inodes_count = 0;
mark_buffer_dirty(bh2);
goto repeat;
out:
brelse(bitmap_bh);
return ret;
}
unsigned long ext2_count_free_inodes (struct super_block * sb)
......@@ -498,30 +438,33 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
#ifdef EXT2FS_DEBUG
struct ext2_super_block * es;
unsigned long desc_count = 0, bitmap_count = 0;
struct buffer_head *bitmap_bh = NULL;
int i;
lock_super (sb);
es = EXT2_SB(sb)->s_es;
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
struct ext2_group_desc *desc = ext2_get_group_desc (sb, i, NULL);
struct buffer_head *bh;
struct ext2_group_desc *desc;
unsigned x;
desc = ext2_get_group_desc (sb, i, NULL);
if (!desc)
continue;
desc_count += le16_to_cpu(desc->bg_free_inodes_count);
bh = load_inode_bitmap (sb, i);
if (IS_ERR(bh))
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, i);
if (!bitmap_bh)
continue;
x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8);
x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
printk ("group %d: stored = %d, counted = %lu\n",
i, le16_to_cpu(desc->bg_free_inodes_count), x);
bitmap_count += x;
}
brelse(bitmap_bh);
printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
unlock_super (sb);
unlock_super(sb);
return desc_count;
#else
return le32_to_cpu(EXT2_SB(sb)->s_es->s_free_inodes_count);
......@@ -534,21 +477,23 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
{
struct ext2_super_block * es = EXT2_SB(sb)->s_es;
unsigned long desc_count = 0, bitmap_count = 0;
struct buffer_head *bitmap_bh = NULL;
int i;
for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
struct ext2_group_desc *desc = ext2_get_group_desc(sb, i, NULL);
struct buffer_head *bh;
struct ext2_group_desc *desc;
unsigned x;
desc = ext2_get_group_desc(sb, i, NULL);
if (!desc)
continue;
desc_count += le16_to_cpu(desc->bg_free_inodes_count);
bh = load_inode_bitmap (sb, i);
if (IS_ERR(bh))
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, i);
if (!bitmap_bh)
continue;
x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8);
x = ext2_count_free(bitmap_bh, EXT2_INODES_PER_GROUP(sb) / 8);
if (le16_to_cpu(desc->bg_free_inodes_count) != x)
ext2_error (sb, "ext2_check_inodes_bitmap",
"Wrong free inodes count in group %d, "
......@@ -556,8 +501,9 @@ void ext2_check_inodes_bitmap (struct super_block * sb)
le16_to_cpu(desc->bg_free_inodes_count), x);
bitmap_count += x;
}
brelse(bitmap_bh);
if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
ext2_error (sb, "ext2_check_inodes_bitmap",
ext2_error(sb, "ext2_check_inodes_bitmap",
"Wrong free inodes count in super block, "
"stored = %lu, counted = %lu",
(unsigned long)le32_to_cpu(es->s_free_inodes_count),
......
......@@ -142,12 +142,6 @@ static void ext2_put_super (struct super_block * sb)
if (sbi->s_group_desc[i])
brelse (sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
if (sbi->s_inode_bitmap[i])
brelse (sbi->s_inode_bitmap[i]);
for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
if (sbi->s_block_bitmap[i])
brelse (sbi->s_block_bitmap[i]);
brelse (sbi->s_sbh);
sb->u.generic_sbp = NULL;
kfree(sbi);
......@@ -686,14 +680,6 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
db_count = i;
goto failed_mount2;
}
for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
sbi->s_inode_bitmap_number[i] = 0;
sbi->s_inode_bitmap[i] = NULL;
sbi->s_block_bitmap_number[i] = 0;
sbi->s_block_bitmap[i] = NULL;
}
sbi->s_loaded_inode_bitmaps = 0;
sbi->s_loaded_block_bitmaps = 0;
sbi->s_gdb_count = db_count;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
/*
......
This diff is collapsed.
This diff is collapsed.
......@@ -1632,8 +1632,10 @@ ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
}
ext3_mark_inode_dirty(handle, inode);
ext3_journal_test_restart(handle, inode);
BUFFER_TRACE(bh, "get_write_access");
ext3_journal_get_write_access(handle, bh);
if (bh) {
BUFFER_TRACE(bh, "retaking write access");
ext3_journal_get_write_access(handle, bh);
}
}
/*
......
......@@ -417,10 +417,6 @@ void ext3_put_super (struct super_block * sb)
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++)
brelse(sbi->s_inode_bitmap[i]);
for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++)
brelse(sbi->s_block_bitmap[i]);
brelse(sbi->s_sbh);
/* Debugging code just in case the in-memory inode orphan list
......@@ -1150,14 +1146,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
printk (KERN_ERR "EXT3-fs: group descriptors corrupted !\n");
goto failed_mount2;
}
for (i = 0; i < EXT3_MAX_GROUP_LOADED; i++) {
sbi->s_inode_bitmap_number[i] = 0;
sbi->s_inode_bitmap[i] = NULL;
sbi->s_block_bitmap_number[i] = 0;
sbi->s_block_bitmap[i] = NULL;
}
sbi->s_loaded_inode_bitmaps = 0;
sbi->s_loaded_block_bitmaps = 0;
sbi->s_gdb_count = db_count;
/*
* set up enough so that it can read an inode
......
......@@ -245,6 +245,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
}
if (arg & O_DIRECT) {
if (inode->i_mapping && inode->i_mapping->a_ops) {
if (!inode->i_mapping->a_ops->direct_IO)
return -EINVAL;
}
/*
* alloc_kiovec() can sleep and we are only serialized by
* the big kernel lock here, so abuse the i_sem to serialize
......
......@@ -220,8 +220,7 @@ static void sync_sb_inodes(struct super_block *sb, int sync_mode,
struct list_head *head;
const unsigned long start = jiffies; /* livelock avoidance */
list_splice(&sb->s_dirty, &sb->s_io);
INIT_LIST_HEAD(&sb->s_dirty);
list_splice_init(&sb->s_dirty, &sb->s_io);
head = &sb->s_io;
while ((tmp = head->prev) != head) {
struct inode *inode = list_entry(tmp, struct inode, i_list);
......@@ -262,13 +261,10 @@ static void sync_sb_inodes(struct super_block *sb, int sync_mode,
break;
}
out:
if (!list_empty(&sb->s_io)) {
/*
* Put the rest back, in the correct order.
*/
list_splice(&sb->s_io, sb->s_dirty.prev);
INIT_LIST_HEAD(&sb->s_io);
}
/*
* Put the rest back, in the correct order.
*/
list_splice_init(&sb->s_io, sb->s_dirty.prev);
return;
}
......@@ -287,8 +283,9 @@ static void sync_sb_inodes(struct super_block *sb, int sync_mode,
*
* This is a "memory cleansing" operation, not a "data integrity" operation.
*/
void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
unsigned long *older_than_this)
void writeback_unlocked_inodes(int *nr_to_write,
enum writeback_sync_modes sync_mode,
unsigned long *older_than_this)
{
struct super_block *sb;
......
......@@ -592,7 +592,8 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
J_ASSERT (transaction->t_log_list == NULL);
J_ASSERT (transaction->t_checkpoint_list == NULL);
J_ASSERT (transaction->t_updates == 0);
J_ASSERT (list_empty(&transaction->t_jcb));
J_ASSERT (transaction->t_journal->j_committing_transaction !=
transaction);
......
......@@ -471,7 +471,7 @@ void journal_commit_transaction(journal_t *journal)
transaction's t_log_list queue, and metadata buffers are on
the t_iobuf_list queue.
Wait for the transactions in reverse order. That way we are
Wait for the buffers in reverse order. That way we are
less likely to be woken up until all IOs have completed, and
so we incur less scheduling load.
*/
......@@ -563,8 +563,10 @@ void journal_commit_transaction(journal_t *journal)
jbd_debug(3, "JBD: commit phase 6\n");
if (is_journal_aborted(journal))
if (is_journal_aborted(journal)) {
unlock_journal(journal);
goto skip_commit;
}
/* Done it all: now write the commit record. We should have
* cleaned up our previous buffers by now, so if we are in abort
......@@ -574,9 +576,10 @@ void journal_commit_transaction(journal_t *journal)
descriptor = journal_get_descriptor_buffer(journal);
if (!descriptor) {
__journal_abort_hard(journal);
unlock_journal(journal);
goto skip_commit;
}
/* AKPM: buglet - add `i' to tmp! */
for (i = 0; i < jh2bh(descriptor)->b_size; i += 512) {
journal_header_t *tmp =
......@@ -596,14 +599,32 @@ void journal_commit_transaction(journal_t *journal)
__brelse(bh); /* One for getblk() */
journal_unlock_journal_head(descriptor);
}
lock_journal(journal);
/* End of a transaction! Finally, we can do checkpoint
processing: any buffers committed as a result of this
transaction can be removed from any checkpoint list it was on
before. */
skip_commit:
skip_commit: /* The journal should be unlocked by now. */
/* Call any callbacks that had been registered for handles in this
* transaction. It is up to the callback to free any allocated
* memory.
*/
if (!list_empty(&commit_transaction->t_jcb)) {
struct list_head *p, *n;
int error = is_journal_aborted(journal);
list_for_each_safe(p, n, &commit_transaction->t_jcb) {
struct journal_callback *jcb;
jcb = list_entry(p, struct journal_callback, jcb_list);
list_del(p);
jcb->jcb_func(jcb, error);
}
}
lock_journal(journal);
jbd_debug(3, "JBD: commit phase 7\n");
......
......@@ -58,6 +58,7 @@ EXPORT_SYMBOL(journal_sync_buffer);
#endif
EXPORT_SYMBOL(journal_flush);
EXPORT_SYMBOL(journal_revoke);
EXPORT_SYMBOL(journal_callback_set);
EXPORT_SYMBOL(journal_init_dev);
EXPORT_SYMBOL(journal_init_inode);
......
......@@ -57,6 +57,7 @@ static transaction_t * get_transaction (journal_t * journal, int is_try)
transaction->t_state = T_RUNNING;
transaction->t_tid = journal->j_transaction_sequence++;
transaction->t_expires = jiffies + journal->j_commit_interval;
INIT_LIST_HEAD(&transaction->t_jcb);
/* Set up the commit timer for the new transaction. */
J_ASSERT (!journal->j_commit_timer_active);
......@@ -90,7 +91,14 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
transaction_t *transaction;
int needed;
int nblocks = handle->h_buffer_credits;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
current->comm, nblocks,
journal->j_max_transaction_buffers);
return -ENOSPC;
}
jbd_debug(3, "New handle %p going live.\n", handle);
repeat:
......@@ -200,6 +208,20 @@ static int start_this_handle(journal_t *journal, handle_t *handle)
return 0;
}
/* Allocate a new handle. This should probably be in a slab... */
static handle_t *new_handle(int nblocks)
{
handle_t *handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
if (!handle)
return NULL;
memset(handle, 0, sizeof (handle_t));
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
INIT_LIST_HEAD(&handle->h_jcb);
return handle;
}
/*
* Obtain a new handle.
*
......@@ -226,14 +248,11 @@ handle_t *journal_start(journal_t *journal, int nblocks)
handle->h_ref++;
return handle;
}
handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
handle = new_handle(nblocks);
if (!handle)
return ERR_PTR(-ENOMEM);
memset (handle, 0, sizeof (handle_t));
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
current->journal_info = handle;
err = start_this_handle(journal, handle);
......@@ -332,14 +351,11 @@ handle_t *journal_try_start(journal_t *journal, int nblocks)
if (is_journal_aborted(journal))
return ERR_PTR(-EIO);
handle = jbd_kmalloc(sizeof (handle_t), GFP_NOFS);
handle = new_handle(nblocks);
if (!handle)
return ERR_PTR(-ENOMEM);
memset (handle, 0, sizeof (handle_t));
handle->h_buffer_credits = nblocks;
handle->h_ref = 1;
current->journal_info = handle;
err = try_start_this_handle(journal, handle);
......@@ -1347,6 +1363,28 @@ void journal_sync_buffer(struct buffer_head *bh)
}
#endif
/*
* Register a callback function for this handle. The function will be
* called when the transaction that this handle is part of has been
* committed to disk with the original callback data struct and the
* error status of the journal as parameters. There is no guarantee of
* ordering between handles within a single transaction, nor between
* callbacks registered on the same handle.
*
* The caller is responsible for allocating the journal_callback struct.
* This is to allow the caller to add as much extra data to the callback
* as needed, but reduce the overhead of multiple allocations. The caller
* allocated struct must start with a struct journal_callback at offset 0,
* and has the caller-specific data afterwards.
*/
void journal_callback_set(handle_t *handle,
void (*func)(struct journal_callback *jcb, int error),
struct journal_callback *jcb)
{
list_add_tail(&jcb->jcb_list, &handle->h_jcb);
jcb->jcb_func = func;
}
/*
* All done for a particular handle.
*
......@@ -1411,7 +1449,10 @@ int journal_stop(handle_t *handle)
wake_up(&journal->j_wait_transaction_locked);
}
/*
/* Move callbacks from the handle to the transaction. */
list_splice(&handle->h_jcb, &transaction->t_jcb);
/*
* If the handle is marked SYNC, we need to set another commit
* going! We also want to force a commit if the current
* transaction is occupying too much of the log, or if the
......
......@@ -2975,10 +2975,7 @@ int jfs_sync(void)
}
}
/* Add anon_list2 back to anon_list */
if (!list_empty(&TxAnchor.anon_list2)) {
list_splice(&TxAnchor.anon_list2, &TxAnchor.anon_list);
INIT_LIST_HEAD(&TxAnchor.anon_list2);
}
list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list);
add_wait_queue(&jfs_sync_thread_wait, &wq);
set_current_state(TASK_INTERRUPTIBLE);
TXN_UNLOCK();
......
......@@ -431,7 +431,7 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / PAGE_CACHE_SIZE;
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
nr_bvecs, GFP_NOFS);
nr_bvecs, GFP_NOFS|__GFP_HIGH);
if (bio == NULL)
goto confused;
}
......@@ -475,9 +475,44 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
return bio;
}
/*
* This is a cut-n-paste of generic_writepages(). We _could_
* generalise that function. It'd get a bit messy. We'll see.
/**
* mpage_writepages - walk the list of dirty pages of the given
* address space and writepage() all of them.
*
* @mapping: address space structure to write
* @nr_to_write: subtract the number of written pages from *@nr_to_write
* @get_block: the filesystem's block mapper function.
* If this is NULL then use a_ops->writepage. Otherwise, go
* direct-to-BIO.
*
* This is a library function, which implements the writepages()
* address_space_operation.
*
* (The next two paragraphs refer to code which isn't here yet, but they
* explain the presence of address_space.io_pages)
*
* Pages can be moved from clean_pages or locked_pages onto dirty_pages
* at any time - it's not possible to lock against that. So pages which
* have already been added to a BIO may magically reappear on the dirty_pages
* list. And generic_writepages() will again try to lock those pages.
* But I/O has not yet been started against the page. Thus deadlock.
*
* To avoid this, the entire contents of the dirty_pages list are moved
* onto io_pages up-front. We then walk io_pages, locking the
* pages and submitting them for I/O, moving them to locked_pages.
*
* This has the added benefit of preventing a livelock which would otherwise
* occur if pages are being dirtied faster than we can write them out.
*
* If a page is already under I/O, generic_writepages() skips it, even
* if it's dirty. This is desirable behaviour for memory-cleaning writeback,
* but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
* and msync() need to guarentee that all the data which was dirty at the time
* the call was made get new I/O started against them. The way to do this is
* to run filemap_fdatawait() before calling filemap_fdatawrite().
*
* It's fairly rare for PageWriteback pages to be on ->dirty_pages. It
* means that someone redirtied the page while it was under I/O.
*/
int
mpage_writepages(struct address_space *mapping,
......@@ -487,11 +522,15 @@ mpage_writepages(struct address_space *mapping,
sector_t last_block_in_bio = 0;
int ret = 0;
int done = 0;
int (*writepage)(struct page *);
writepage = NULL;
if (get_block == NULL)
writepage = mapping->a_ops->writepage;
write_lock(&mapping->page_lock);
list_splice(&mapping->dirty_pages, &mapping->io_pages);
INIT_LIST_HEAD(&mapping->dirty_pages);
list_splice_init(&mapping->dirty_pages, &mapping->io_pages);
while (!list_empty(&mapping->io_pages) && !done) {
struct page *page = list_entry(mapping->io_pages.prev,
......@@ -516,8 +555,8 @@ mpage_writepages(struct address_space *mapping,
lock_page(page);
if (page->mapping && TestClearPageDirty(page) &&
!PageWriteback(page)) {
if (page->mapping && !PageWriteback(page) &&
TestClearPageDirty(page)) {
/* FIXME: batch this up */
if (!PageActive(page) && PageLRU(page)) {
spin_lock(&pagemap_lru_lock);
......@@ -527,8 +566,13 @@ mpage_writepages(struct address_space *mapping,
}
spin_unlock(&pagemap_lru_lock);
}
bio = mpage_writepage(bio, page, get_block,
&last_block_in_bio, &ret);
if (writepage) {
ret = (*writepage)(page);
} else {
bio = mpage_writepage(bio, page, get_block,
&last_block_in_bio, &ret);
}
if (ret || (nr_to_write && --(*nr_to_write) <= 0))
done = 1;
} else {
......@@ -538,13 +582,10 @@ mpage_writepages(struct address_space *mapping,
page_cache_release(page);
write_lock(&mapping->page_lock);
}
if (!list_empty(&mapping->io_pages)) {
/*
* Put the rest back, in the correct order.
*/
list_splice(&mapping->io_pages, mapping->dirty_pages.prev);
INIT_LIST_HEAD(&mapping->io_pages);
}
/*
* Put the rest back, in the correct order.
*/
list_splice_init(&mapping->io_pages, mapping->dirty_pages.prev);
write_unlock(&mapping->page_lock);
if (bio)
mpage_bio_submit(WRITE, bio);
......
......@@ -1110,8 +1110,7 @@ nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
/* Set up the RPC argument and reply structs
* NB: take care not to mess about with data->commit et al. */
list_splice(head, &data->pages);
INIT_LIST_HEAD(head);
list_splice_init(head, &data->pages);
first = nfs_list_entry(data->pages.next);
last = nfs_list_entry(data->pages.prev);
inode = first->wb_inode;
......
......@@ -665,6 +665,14 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
}
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
/* NB: we're sure to have correct a_ops only after f_op->open */
if (f->f_flags & O_DIRECT) {
error = -EINVAL;
if (inode->i_mapping && inode->i_mapping->a_ops)
if (!inode->i_mapping->a_ops->direct_IO)
goto cleanup_all;
}
return f;
cleanup_all:
......
......@@ -164,7 +164,7 @@ struct buffer_head *__find_get_block(struct block_device *, sector_t, int);
struct buffer_head * __getblk(struct block_device *, sector_t, int);
void __brelse(struct buffer_head *);
void __bforget(struct buffer_head *);
struct buffer_head * __bread(struct block_device *, int, int);
struct buffer_head *__bread(struct block_device *, sector_t block, int size);
void wakeup_bdflush(void);
struct buffer_head *alloc_buffer_head(void);
void free_buffer_head(struct buffer_head * bh);
......@@ -201,9 +201,9 @@ int generic_osync_inode(struct inode *, int);
* inline definitions
*/
static inline void get_bh(struct buffer_head * bh)
static inline void get_bh(struct buffer_head *bh)
{
atomic_inc(&(bh)->b_count);
atomic_inc(&bh->b_count);
}
static inline void put_bh(struct buffer_head *bh)
......@@ -212,68 +212,49 @@ static inline void put_bh(struct buffer_head *bh)
atomic_dec(&bh->b_count);
}
/*
* If an error happens during the make_request, this function
* has to be recalled. It marks the buffer as clean and not
* uptodate, and it notifys the upper layer about the end
* of the I/O.
*/
static inline void buffer_IO_error(struct buffer_head * bh)
{
clear_buffer_dirty(bh);
/*
* b_end_io has to clear the BH_Uptodate bitflag in the read error
* case, however buffer contents are not necessarily bad if a
* write fails
*/
bh->b_end_io(bh, buffer_uptodate(bh));
}
static inline void brelse(struct buffer_head *buf)
static inline void brelse(struct buffer_head *bh)
{
if (buf)
__brelse(buf);
if (bh)
__brelse(bh);
}
static inline void bforget(struct buffer_head *buf)
static inline void bforget(struct buffer_head *bh)
{
if (buf)
__bforget(buf);
if (bh)
__bforget(bh);
}
static inline struct buffer_head * sb_bread(struct super_block *sb, int block)
static inline struct buffer_head *sb_bread(struct super_block *sb, sector_t block)
{
return __bread(sb->s_bdev, block, sb->s_blocksize);
}
static inline struct buffer_head * sb_getblk(struct super_block *sb, int block)
static inline struct buffer_head *sb_getblk(struct super_block *sb, sector_t block)
{
return __getblk(sb->s_bdev, block, sb->s_blocksize);
}
static inline struct buffer_head *
sb_find_get_block(struct super_block *sb, int block)
sb_find_get_block(struct super_block *sb, sector_t block)
{
return __find_get_block(sb->s_bdev, block, sb->s_blocksize);
}
static inline void
map_bh(struct buffer_head *bh, struct super_block *sb, int block)
map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block)
{
set_buffer_mapped(bh);
bh->b_bdev = sb->s_bdev;
bh->b_blocknr = block;
}
static inline void wait_on_buffer(struct buffer_head * bh)
static inline void wait_on_buffer(struct buffer_head *bh)
{
if (buffer_locked(bh))
__wait_on_buffer(bh);
}
static inline void lock_buffer(struct buffer_head * bh)
static inline void lock_buffer(struct buffer_head *bh)
{
while (test_set_buffer_locked(bh))
__wait_on_buffer(bh);
......
......@@ -16,14 +16,6 @@
#ifndef _LINUX_EXT2_FS_SB
#define _LINUX_EXT2_FS_SB
/*
* The following is not needed anymore since the descriptors buffer
* heads are now dynamically allocated
*/
/* #define EXT2_MAX_GROUP_DESC 8 */
#define EXT2_MAX_GROUP_LOADED 8
/*
* second extended-fs super-block data in memory
*/
......@@ -41,12 +33,6 @@ struct ext2_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext2_super_block * s_es; /* Pointer to the super block in the buffer */
struct buffer_head ** s_group_desc;
unsigned short s_loaded_inode_bitmaps;
unsigned short s_loaded_block_bitmaps;
unsigned long s_inode_bitmap_number[EXT2_MAX_GROUP_LOADED];
struct buffer_head * s_inode_bitmap[EXT2_MAX_GROUP_LOADED];
unsigned long s_block_bitmap_number[EXT2_MAX_GROUP_LOADED];
struct buffer_head * s_block_bitmap[EXT2_MAX_GROUP_LOADED];
unsigned long s_mount_opt;
uid_t s_resuid;
gid_t s_resgid;
......
......@@ -21,14 +21,6 @@
#include <linux/wait.h>
#endif
/*
* The following is not needed anymore since the descriptors buffer
* heads are now dynamically allocated
*/
/* #define EXT3_MAX_GROUP_DESC 8 */
#define EXT3_MAX_GROUP_LOADED 8
/*
* third extended-fs super-block data in memory
*/
......@@ -46,12 +38,6 @@ struct ext3_sb_info {
struct buffer_head * s_sbh; /* Buffer containing the super block */
struct ext3_super_block * s_es; /* Pointer to the super block in the buffer */
struct buffer_head ** s_group_desc;
unsigned short s_loaded_inode_bitmaps;
unsigned short s_loaded_block_bitmaps;
unsigned long s_inode_bitmap_number[EXT3_MAX_GROUP_LOADED];
struct buffer_head * s_inode_bitmap[EXT3_MAX_GROUP_LOADED];
unsigned long s_block_bitmap_number[EXT3_MAX_GROUP_LOADED];
struct buffer_head * s_block_bitmap[EXT3_MAX_GROUP_LOADED];
unsigned long s_mount_opt;
uid_t s_resuid;
gid_t s_resgid;
......
......@@ -18,14 +18,14 @@
#define __GFP_HIGHIO 0x80 /* Can start high mem physical IO? */
#define __GFP_FS 0x100 /* Can call down to low-level FS? */
#define GFP_NOHIGHIO (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
#define GFP_NOIO (__GFP_HIGH | __GFP_WAIT)
#define GFP_NOFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
#define GFP_NOHIGHIO ( __GFP_WAIT | __GFP_IO)
#define GFP_NOIO ( __GFP_WAIT)
#define GFP_NOFS ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_USER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_HIGHUSER ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS | __GFP_HIGHMEM)
#define GFP_KERNEL (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_KERNEL ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_NFS ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
#define GFP_KSWAPD ( __GFP_WAIT | __GFP_IO | __GFP_HIGHIO | __GFP_FS)
/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some
......
......@@ -250,6 +250,13 @@ static inline struct journal_head *bh2jh(struct buffer_head *bh)
return bh->b_private;
}
#define HAVE_JOURNAL_CALLBACK_STATUS
struct journal_callback {
struct list_head jcb_list;
void (*jcb_func)(struct journal_callback *jcb, int error);
/* user data goes here */
};
struct jbd_revoke_table_s;
/* The handle_t type represents a single atomic update being performed
......@@ -280,6 +287,12 @@ struct handle_s
operations */
int h_err;
/* List of application registered callbacks for this handle.
* The function(s) will be called after the transaction that
* this handle is part of has been committed to disk.
*/
struct list_head h_jcb;
/* Flags */
unsigned int h_sync: 1; /* sync-on-close */
unsigned int h_jdata: 1; /* force data journaling */
......@@ -399,6 +412,10 @@ struct transaction_s
/* How many handles used this transaction? */
int t_handle_count;
/* List of registered callback functions for this transaction.
* Called when the transaction is committed. */
struct list_head t_jcb;
};
......@@ -647,6 +664,9 @@ extern int journal_invalidatepage(journal_t *,
extern int journal_try_to_free_buffers(journal_t *, struct page *, int);
extern int journal_stop(handle_t *);
extern int journal_flush (journal_t *);
extern void journal_callback_set(handle_t *handle,
void (*fn)(struct journal_callback *,int),
struct journal_callback *jcb);
extern void journal_lock_updates (journal_t *);
extern void journal_unlock_updates (journal_t *);
......
......@@ -136,6 +136,19 @@ static inline int list_empty(list_t *head)
return head->next == head;
}
static inline void __list_splice(list_t *list, list_t *head)
{
list_t *first = list->next;
list_t *last = list->prev;
list_t *at = head->next;
first->prev = head;
head->next = first;
last->next = at;
at->prev = last;
}
/**
* list_splice - join two lists
* @list: the new list to add.
......@@ -145,15 +158,22 @@ static inline void list_splice(list_t *list, list_t *head)
{
list_t *first = list->next;
if (first != list) {
list_t *last = list->prev;
list_t *at = head->next;
first->prev = head;
head->next = first;
if (first != list)
__list_splice(list, head);
}
last->next = at;
at->prev = last;
/**
* list_splice_init - join two lists and reinitialise the emptied list.
* @list: the new list to add.
* @head: the place to add it in the first list.
*
* The list at @list is reinitialised
*/
static inline void list_splice_init(list_t *list, list_t *head)
{
if (!list_empty(list)) {
__list_splice(list, head);
INIT_LIST_HEAD(list);
}
}
......
......@@ -305,6 +305,16 @@ static inline void set_page_zone(struct page *page, unsigned long zone_num)
#define NOPAGE_SIGBUS (NULL)
#define NOPAGE_OOM ((struct page *) (-1))
/*
* Different kinds of faults, as returned by handle_mm_fault().
* Used to decide whether a process gets delivered SIGBUS or
* just gets major/minor fault counters bumped up.
*/
#define VM_FAULT_OOM (-1)
#define VM_FAULT_SIGBUS 0
#define VM_FAULT_MINOR 1
#define VM_FAULT_MAJOR 2
/* The array of struct pages */
extern struct page *mem_map;
......
......@@ -385,12 +385,12 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */
#define PF_FREE_PAGES 0x00002000 /* per process page freeing */
#define PF_FLUSHER 0x00004000 /* responsible for disk writeback */
#define PF_RADIX_TREE 0x00008000 /* debug: performing radix tree alloc */
#define PF_NOWARN 0x00008000 /* debug: don't warn if alloc fails */
#define PF_FREEZE 0x00010000 /* this task should be frozen for suspend */
#define PF_IOTHREAD 0x00020000 /* this thread is needed for doing I/O to swap */
#define PF_FROZEN 0x00040000 /* frozen for system suspend */
#define PF_INVALIDATE 0x00080000 /* debug: unmounting an fs. killme. */
/*
* Ptrace flags
*/
......@@ -417,8 +417,7 @@ extern int task_prio(task_t *p);
extern int task_nice(task_t *p);
extern int idle_cpu(int cpu);
asmlinkage long sys_sched_yield(void);
#define yield() sys_sched_yield()
void yield(void);
/*
* The default (Linux) execution domain.
......@@ -836,10 +835,11 @@ static inline int need_resched(void)
return unlikely(test_thread_flag(TIF_NEED_RESCHED));
}
extern void __cond_resched(void);
static inline void cond_resched(void)
{
if (need_resched())
schedule();
__cond_resched();
}
/* Reevaluate whether the task has signals pending delivery.
......
......@@ -30,7 +30,10 @@
struct file;
#define CTL_MAXNAME 10
#define CTL_MAXNAME 10 /* how many path components do we allow in a
call to sysctl? In other words, what is
the largest acceptable value for the nlen
member of a struct __sysctl_args to have? */
struct __sysctl_args {
int *name;
......@@ -145,6 +148,7 @@ enum
VM_DIRTY_SYNC=13, /* dirty_sync_ratio */
VM_DIRTY_WB_CS=14, /* dirty_writeback_centisecs */
VM_DIRTY_EXPIRE_CS=15, /* dirty_expire_centisecs */
VM_NR_PDFLUSH_THREADS=16, /* nr_pdflush_threads */
};
......
......@@ -24,18 +24,22 @@ static inline int current_is_pdflush(void)
/*
* fs/fs-writeback.c
*/
#define WB_SYNC_NONE 0 /* Don't wait on anything */
#define WB_SYNC_LAST 1 /* Wait on the last-written mapping */
#define WB_SYNC_ALL 2 /* Wait on every mapping */
#define WB_SYNC_HOLD 3 /* Hold the inode on sb_dirty for sys_sync() */
enum writeback_sync_modes {
WB_SYNC_NONE = 0, /* Don't wait on anything */
WB_SYNC_LAST = 1, /* Wait on the last-written mapping */
WB_SYNC_ALL = 2, /* Wait on every mapping */
WB_SYNC_HOLD = 3, /* Hold the inode on sb_dirty for sys_sync() */
};
void writeback_unlocked_inodes(int *nr_to_write, int sync_mode,
unsigned long *older_than_this);
void writeback_unlocked_inodes(int *nr_to_write,
enum writeback_sync_modes sync_mode,
unsigned long *older_than_this);
void wake_up_inode(struct inode *inode);
void __wait_on_inode(struct inode * inode);
void sync_inodes_sb(struct super_block *, int wait);
void sync_inodes(int wait);
/* writeback.h requires fs.h; it, too, is not included from here. */
static inline void wait_on_inode(struct inode *inode)
{
if (inode->i_state & I_LOCK)
......@@ -45,15 +49,22 @@ static inline void wait_on_inode(struct inode *inode)
/*
* mm/page-writeback.c
*/
/* These 5 are exported to sysctl. */
extern int dirty_background_ratio;
extern int dirty_async_ratio;
extern int dirty_sync_ratio;
extern int dirty_writeback_centisecs;
extern int dirty_expire_centisecs;
void balance_dirty_pages(struct address_space *mapping);
void balance_dirty_pages_ratelimited(struct address_space *mapping);
int pdflush_operation(void (*fn)(unsigned long), unsigned long arg0);
int do_writepages(struct address_space *mapping, int *nr_to_write);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
read-only. */
#endif /* WRITEBACK_H */
......@@ -196,14 +196,12 @@ EXPORT_SYMBOL(notify_change);
EXPORT_SYMBOL(set_blocksize);
EXPORT_SYMBOL(sb_set_blocksize);
EXPORT_SYMBOL(sb_min_blocksize);
EXPORT_SYMBOL(__getblk);
EXPORT_SYMBOL(cdget);
EXPORT_SYMBOL(cdput);
EXPORT_SYMBOL(bdget);
EXPORT_SYMBOL(bdput);
EXPORT_SYMBOL(bd_claim);
EXPORT_SYMBOL(bd_release);
EXPORT_SYMBOL(__bread);
EXPORT_SYMBOL(__brelse);
EXPORT_SYMBOL(__bforget);
EXPORT_SYMBOL(ll_rw_block);
......@@ -475,7 +473,8 @@ EXPORT_SYMBOL(schedule);
EXPORT_SYMBOL(preempt_schedule);
#endif
EXPORT_SYMBOL(schedule_timeout);
EXPORT_SYMBOL(sys_sched_yield);
EXPORT_SYMBOL(yield);
EXPORT_SYMBOL(__cond_resched);
EXPORT_SYMBOL(set_user_nice);
EXPORT_SYMBOL(task_nice);
EXPORT_SYMBOL_GPL(idle_cpu);
......@@ -550,7 +549,6 @@ EXPORT_SYMBOL(file_fsync);
EXPORT_SYMBOL(fsync_buffers_list);
EXPORT_SYMBOL(clear_inode);
EXPORT_SYMBOL(init_special_inode);
EXPORT_SYMBOL(__find_get_block);
EXPORT_SYMBOL(new_inode);
EXPORT_SYMBOL(__insert_inode_hash);
EXPORT_SYMBOL(remove_inode_hash);
......
......@@ -1447,6 +1447,18 @@ asmlinkage long sys_sched_yield(void)
return 0;
}
void __cond_resched(void)
{
set_current_state(TASK_RUNNING);
schedule();
}
void yield(void)
{
set_current_state(TASK_RUNNING);
sys_sched_yield();
}
asmlinkage long sys_sched_get_priority_max(int policy)
{
int ret = -EINVAL;
......
......@@ -262,7 +262,6 @@ void tasklet_kill(struct tasklet_struct *t)
printk("Attempt to kill tasklet from interrupt\n");
while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
current->state = TASK_RUNNING;
do
yield();
while (test_bit(TASKLET_STATE_SCHED, &t->state));
......
......@@ -237,8 +237,7 @@ int freeze_processes(void)
todo++;
}
read_unlock(&tasklist_lock);
sys_sched_yield();
schedule();
yield();
if (time_after(jiffies, start_time + TIMEOUT)) {
PRINTK( "\n" );
printk(KERN_ERR " stopping tasks failed (%d tasks remaining)\n", todo );
......
......@@ -258,6 +258,13 @@ static ctl_table kern_table[] = {
{0}
};
/* Constants for minimum and maximum testing in vm_table.
We use these as one-element integer vectors. */
static int zero = 0;
static int one = 1;
static int one_hundred = 100;
static ctl_table vm_table[] = {
{VM_OVERCOMMIT_MEMORY, "overcommit_memory", &sysctl_overcommit_memory,
sizeof(sysctl_overcommit_memory), 0644, NULL, &proc_dointvec},
......@@ -266,18 +273,37 @@ static ctl_table vm_table[] = {
{VM_PAGE_CLUSTER, "page-cluster",
&page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
{VM_DIRTY_BACKGROUND, "dirty_background_ratio",
&dirty_background_ratio, sizeof(dirty_background_ratio),
0644, NULL, &proc_dointvec},
&dirty_background_ratio, sizeof(dirty_background_ratio),
0644, NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL,
&zero, &one_hundred },
{VM_DIRTY_ASYNC, "dirty_async_ratio", &dirty_async_ratio,
sizeof(dirty_async_ratio), 0644, NULL, &proc_dointvec},
sizeof(dirty_async_ratio), 0644, NULL, &proc_dointvec_minmax,
&sysctl_intvec, NULL, &zero, &one_hundred },
{VM_DIRTY_SYNC, "dirty_sync_ratio", &dirty_sync_ratio,
sizeof(dirty_sync_ratio), 0644, NULL, &proc_dointvec},
sizeof(dirty_sync_ratio), 0644, NULL, &proc_dointvec_minmax,
&sysctl_intvec, NULL, &zero, &one_hundred },
{VM_DIRTY_WB_CS, "dirty_writeback_centisecs",
&dirty_writeback_centisecs, sizeof(dirty_writeback_centisecs), 0644,
NULL, &proc_dointvec},
&dirty_writeback_centisecs, sizeof(dirty_writeback_centisecs), 0644,
NULL, &proc_dointvec_minmax, &sysctl_intvec, NULL,
/* Here, we define the range of possible values for
dirty_writeback_centisecs.
The default value is 5 seconds (500 centisec). We will use 1
centisec, the smallest possible value that could make any sort of
sense. If we allowed the user to set the interval to 0 seconds
(which would presumably mean to chew up all of the CPU looking for
dirty pages and writing them out, without taking a break), the
interval would effectively become 1 second (100 centisecs), due to
some nicely documented throttling code in wb_kupdate().
There is no maximum legal value for dirty_writeback. */
&one , NULL},
{VM_DIRTY_EXPIRE_CS, "dirty_expire_centisecs",
&dirty_expire_centisecs, sizeof(dirty_expire_centisecs), 0644,
NULL, &proc_dointvec},
&dirty_expire_centisecs, sizeof(dirty_expire_centisecs), 0644,
NULL, &proc_dointvec},
{ VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads",
&nr_pdflush_threads, sizeof nr_pdflush_threads,
0444 /* read-only*/, NULL, &proc_dointvec},
{0}
};
......
......@@ -177,8 +177,13 @@ static inline void truncate_partial_page(struct page *page, unsigned partial)
static void truncate_complete_page(struct page *page)
{
/* Leave it on the LRU if it gets converted into anonymous buffers */
if (!PagePrivate(page) || do_invalidatepage(page, 0))
if (!PagePrivate(page) || do_invalidatepage(page, 0)) {
lru_cache_del(page);
} else {
if (current->flags & PF_INVALIDATE)
printk("%s: buffer heads were leaked\n",
current->comm);
}
ClearPageDirty(page);
ClearPageUptodate(page);
remove_inode_page(page);
......@@ -362,16 +367,18 @@ static int invalidate_list_pages2(struct address_space * mapping,
while (curr != head) {
page = list_entry(curr, struct page, list);
if (PageWriteback(page)) {
write_unlock(&mapping->page_lock);
wait_on_page_writeback(page);
unlocked = 1;
write_lock(&mapping->page_lock);
goto restart;
}
if (!TestSetPageLocked(page)) {
int __unlocked;
if (PageWriteback(page)) {
write_unlock(&mapping->page_lock);
wait_on_page_writeback(page);
unlocked = 1;
write_lock(&mapping->page_lock);
unlock_page(page);
goto restart;
}
__unlocked = invalidate_this_page2(mapping, page, curr, head);
unlock_page(page);
unlocked |= __unlocked;
......@@ -510,24 +517,32 @@ int filemap_fdatawait(struct address_space * mapping)
}
/*
* This adds a page to the page cache, starting out as locked,
* owned by us, but unreferenced, not uptodate and with no errors.
* The caller must hold a write_lock on the mapping->page_lock.
* This adds a page to the page cache, starting out as locked, unreferenced,
* not uptodate and with no errors.
*
* The caller must hold a write_lock on mapping->page_lock.
*
* This function is used for two things: adding newly allocated pagecache
* pages and for moving existing anon pages into swapcache.
*
* In the case of pagecache pages, the page is new, so we can just run
* SetPageLocked() against it. The other page state flags were set by
* rmqueue()
*
* In the case of swapcache, try_to_swap_out() has already locked the page, so
* SetPageLocked() is ugly-but-OK there too. The required page state has been
* set up by swap_out_add_to_swap_cache().
*/
static int __add_to_page_cache(struct page *page,
struct address_space *mapping, unsigned long offset)
{
page_cache_get(page);
if (radix_tree_insert(&mapping->page_tree, offset, page) < 0)
goto nomem;
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 | 1 << PG_checked);
SetPageLocked(page);
ClearPageDirty(page);
___add_to_page_cache(page, mapping, offset);
return 0;
nomem:
page_cache_release(page);
if (radix_tree_insert(&mapping->page_tree, offset, page) == 0) {
SetPageLocked(page);
ClearPageDirty(page);
___add_to_page_cache(page, mapping, offset);
page_cache_get(page);
return 0;
}
return -ENOMEM;
}
......@@ -1116,8 +1131,6 @@ static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, si
retval = -EINVAL;
if ((offset & blocksize_mask) || (count & blocksize_mask))
goto out_free;
if (!mapping->a_ops->direct_IO)
goto out_free;
/*
* Flush to disk exclusively the _data_, metadata must remain
......
......@@ -503,18 +503,18 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long
while (!(map = follow_page(mm, start, write))) {
spin_unlock(&mm->page_table_lock);
switch (handle_mm_fault(mm, vma, start, write)) {
case 1:
case VM_FAULT_MINOR:
tsk->min_flt++;
break;
case 2:
case VM_FAULT_MAJOR:
tsk->maj_flt++;
break;
case 0:
if (i) return i;
return -EFAULT;
case VM_FAULT_SIGBUS:
return i ? i : -EFAULT;
case VM_FAULT_OOM:
return i ? i : -ENOMEM;
default:
if (i) return i;
return -ENOMEM;
BUG();
}
spin_lock(&mm->page_table_lock);
}
......@@ -612,7 +612,7 @@ void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes)
page = iobuf->maplist[index];
if (!PageReserved(page))
SetPageDirty(page);
set_page_dirty(page);
remaining -= (PAGE_SIZE - offset);
offset = 0;
......@@ -968,7 +968,7 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return 1; /* Minor fault */
return VM_FAULT_MINOR;
}
}
pte_unmap(page_table);
......@@ -1002,16 +1002,21 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
spin_unlock(&mm->page_table_lock);
page_cache_release(new_page);
page_cache_release(old_page);
return 1; /* Minor fault */
return VM_FAULT_MINOR;
bad_wp_page:
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", address);
return -1;
/*
* This should really halt the system so it can be debugged or
* at least the kernel stops what it's doing before it corrupts
* data, but for the moment just pretend this is OOM.
*/
return VM_FAULT_OOM;
no_mem:
page_cache_release(old_page);
return -1;
return VM_FAULT_OOM;
}
static void vmtruncate_list(list_t *head, unsigned long pgoff)
......@@ -1135,7 +1140,7 @@ static int do_swap_page(struct mm_struct * mm,
struct page *page;
swp_entry_t entry = pte_to_swp_entry(orig_pte);
pte_t pte;
int ret = 1;
int ret = VM_FAULT_MINOR;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
......@@ -1148,17 +1153,19 @@ static int do_swap_page(struct mm_struct * mm,
* Back out if somebody else faulted in this pte while
* we released the page table lock.
*/
int retval;
spin_lock(&mm->page_table_lock);
page_table = pte_offset_map(pmd, address);
retval = pte_same(*page_table, orig_pte) ? -1 : 1;
if (pte_same(*page_table, orig_pte))
ret = VM_FAULT_OOM;
else
ret = VM_FAULT_MINOR;
pte_unmap(page_table);
spin_unlock(&mm->page_table_lock);
return retval;
return ret;
}
/* Had to read the page from swap area: Major fault */
ret = 2;
ret = VM_FAULT_MAJOR;
}
lock_page(page);
......@@ -1174,7 +1181,7 @@ static int do_swap_page(struct mm_struct * mm,
spin_unlock(&mm->page_table_lock);
unlock_page(page);
page_cache_release(page);
return 1;
return VM_FAULT_MINOR;
}
/* The page isn't present yet, go ahead with the fault. */
......@@ -1232,7 +1239,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
pte_unmap(page_table);
page_cache_release(page);
spin_unlock(&mm->page_table_lock);
return 1;
return VM_FAULT_MINOR;
}
mm->rss++;
flush_page_to_ram(page);
......@@ -1246,10 +1253,10 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, entry);
spin_unlock(&mm->page_table_lock);
return 1; /* Minor fault */
return VM_FAULT_MINOR;
no_mem:
return -1;
return VM_FAULT_OOM;
}
/*
......@@ -1277,10 +1284,11 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, 0);
if (new_page == NULL) /* no page was available -- SIGBUS */
return 0;
/* no page was available -- either SIGBUS or OOM */
if (new_page == NOPAGE_SIGBUS)
return VM_FAULT_SIGBUS;
if (new_page == NOPAGE_OOM)
return -1;
return VM_FAULT_OOM;
/*
* Should we do an early C-O-W break?
......@@ -1289,7 +1297,7 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
struct page * page = alloc_page(GFP_HIGHUSER);
if (!page) {
page_cache_release(new_page);
return -1;
return VM_FAULT_OOM;
}
copy_user_highpage(page, new_page, address);
page_cache_release(new_page);
......@@ -1325,13 +1333,13 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
pte_unmap(page_table);
page_cache_release(new_page);
spin_unlock(&mm->page_table_lock);
return 1;
return VM_FAULT_MINOR;
}
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
spin_unlock(&mm->page_table_lock);
return 2; /* Major fault */
return VM_FAULT_MAJOR;
}
/*
......@@ -1383,7 +1391,7 @@ static inline int handle_pte_fault(struct mm_struct *mm,
establish_pte(vma, address, pte, entry);
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
return 1;
return VM_FAULT_MINOR;
}
/*
......@@ -1411,7 +1419,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
}
spin_unlock(&mm->page_table_lock);
return -1;
return VM_FAULT_OOM;
}
/*
......
......@@ -19,8 +19,9 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/init.h>
#include <linux/sysrq.h>
//#include <linux/sysrq.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
/*
* The maximum number of pages to writeout in a single bdflush/kupdate
......@@ -47,6 +48,8 @@
#define SYNC_WRITEBACK_PAGES 1500
/* The following parameters are exported via /proc/sys/vm */
/*
* Dirty memory thresholds, in percentages
*/
......@@ -67,15 +70,18 @@ int dirty_async_ratio = 50;
int dirty_sync_ratio = 60;
/*
* The interval between `kupdate'-style writebacks.
* The interval between `kupdate'-style writebacks, in centiseconds
* (hundredths of a second)
*/
int dirty_writeback_centisecs = 5 * 100;
/*
* The largest amount of time for which data is allowed to remain dirty
* The longest amount of time for which data is allowed to remain dirty
*/
int dirty_expire_centisecs = 30 * 100;
/* End of sysctl-exported parameters */
static void background_writeout(unsigned long _min_pages);
......@@ -233,7 +239,8 @@ static void wb_kupdate(unsigned long arg)
static void wb_timer_fn(unsigned long unused)
{
if (pdflush_operation(wb_kupdate, 0) < 0)
mod_timer(&wb_timer, jiffies + HZ);
mod_timer(&wb_timer, jiffies + HZ); /* delay 1 second */
}
static int __init wb_timer_init(void)
......@@ -307,108 +314,9 @@ int generic_vm_writeback(struct page *page, int *nr_to_write)
}
EXPORT_SYMBOL(generic_vm_writeback);
/**
* generic_writepages - walk the list of dirty pages of the given
* address space and writepage() all of them.
*
* @mapping: address space structure to write
* @nr_to_write: subtract the number of written pages from *@nr_to_write
*
* This is a library function, which implements the writepages()
* address_space_operation.
*
* (The next two paragraphs refer to code which isn't here yet, but they
* explain the presence of address_space.io_pages)
*
* Pages can be moved from clean_pages or locked_pages onto dirty_pages
* at any time - it's not possible to lock against that. So pages which
* have already been added to a BIO may magically reappear on the dirty_pages
* list. And generic_writepages() will again try to lock those pages.
* But I/O has not yet been started against the page. Thus deadlock.
*
* To avoid this, the entire contents of the dirty_pages list are moved
* onto io_pages up-front. We then walk io_pages, locking the
* pages and submitting them for I/O, moving them to locked_pages.
*
* This has the added benefit of preventing a livelock which would otherwise
* occur if pages are being dirtied faster than we can write them out.
*
* If a page is already under I/O, generic_writepages() skips it, even
* if it's dirty. This is desirable behaviour for memory-cleaning writeback,
* but it is INCORRECT for data-integrity system calls such as fsync(). fsync()
* and msync() need to guarentee that all the data which was dirty at the time
* the call was made get new I/O started against them. The way to do this is
* to run filemap_fdatawait() before calling filemap_fdatawrite().
*
* It's fairly rare for PageWriteback pages to be on ->dirty_pages. It
* means that someone redirtied the page while it was under I/O.
*/
int generic_writepages(struct address_space *mapping, int *nr_to_write)
{
int (*writepage)(struct page *) = mapping->a_ops->writepage;
int ret = 0;
int done = 0;
int err;
write_lock(&mapping->page_lock);
list_splice(&mapping->dirty_pages, &mapping->io_pages);
INIT_LIST_HEAD(&mapping->dirty_pages);
while (!list_empty(&mapping->io_pages) && !done) {
struct page *page = list_entry(mapping->io_pages.prev,
struct page, list);
list_del(&page->list);
if (PageWriteback(page)) {
if (PageDirty(page)) {
list_add(&page->list, &mapping->dirty_pages);
continue;
}
list_add(&page->list, &mapping->locked_pages);
continue;
}
if (!PageDirty(page)) {
list_add(&page->list, &mapping->clean_pages);
continue;
}
list_add(&page->list, &mapping->locked_pages);
page_cache_get(page);
write_unlock(&mapping->page_lock);
lock_page(page);
/* It may have been removed from swapcache: check ->mapping */
if (page->mapping && TestClearPageDirty(page) &&
!PageWriteback(page)) {
/* FIXME: batch this up */
if (!PageActive(page) && PageLRU(page)) {
spin_lock(&pagemap_lru_lock);
if (!PageActive(page) && PageLRU(page)) {
list_del(&page->lru);
list_add(&page->lru, &inactive_list);
}
spin_unlock(&pagemap_lru_lock);
}
err = writepage(page);
if (!ret)
ret = err;
if (nr_to_write && --(*nr_to_write) <= 0)
done = 1;
} else {
unlock_page(page);
}
page_cache_release(page);
write_lock(&mapping->page_lock);
}
if (!list_empty(&mapping->io_pages)) {
/*
* Put the rest back, in the correct order.
*/
list_splice(&mapping->io_pages, mapping->dirty_pages.prev);
INIT_LIST_HEAD(&mapping->io_pages);
}
write_unlock(&mapping->page_lock);
return ret;
return mpage_writepages(mapping, nr_to_write, NULL);
}
EXPORT_SYMBOL(generic_writepages);
......
......@@ -86,24 +86,24 @@ static void __free_pages_ok (struct page *page, unsigned int order)
struct page *base;
zone_t *zone;
if (PagePrivate(page))
BUG();
if (page->mapping)
BUG();
if (PageLocked(page))
BUG();
if (PageLRU(page))
BUG();
if (PageActive(page))
BUG();
if (PageWriteback(page))
BUG();
ClearPageDirty(page);
page->flags &= ~(1<<PG_referenced);
if (current->flags & PF_FREE_PAGES)
goto local_freelist;
back_local_freelist:
BUG_ON(PagePrivate(page));
BUG_ON(page->mapping != NULL);
BUG_ON(PageLocked(page));
BUG_ON(PageLRU(page));
BUG_ON(PageActive(page));
BUG_ON(PageWriteback(page));
if (PageDirty(page))
ClearPageDirty(page);
BUG_ON(page_count(page) != 0);
if (unlikely(current->flags & PF_FREE_PAGES)) {
if (!current->nr_local_pages && !in_interrupt()) {
list_add(&page->list, &current->local_pages);
page->index = order;
current->nr_local_pages++;
goto out;
}
}
zone = page_zone(page);
......@@ -113,18 +113,14 @@ static void __free_pages_ok (struct page *page, unsigned int order)
if (page_idx & ~mask)
BUG();
index = page_idx >> (1 + order);
area = zone->free_area + order;
spin_lock_irqsave(&zone->lock, flags);
zone->free_pages -= mask;
while (mask + (1 << (MAX_ORDER-1))) {
struct page *buddy1, *buddy2;
if (area >= zone->free_area + MAX_ORDER)
BUG();
BUG_ON(area >= zone->free_area + MAX_ORDER);
if (!__test_and_change_bit(index, area->map))
/*
* the buddy page is still allocated.
......@@ -137,11 +133,8 @@ static void __free_pages_ok (struct page *page, unsigned int order)
*/
buddy1 = base + (page_idx ^ -mask);
buddy2 = base + page_idx;
if (bad_range(zone, buddy1))
BUG();
if (bad_range(zone, buddy2))
BUG();
BUG_ON(bad_range(zone, buddy1));
BUG_ON(bad_range(zone, buddy2));
list_del(&buddy1->list);
mask <<= 1;
area++;
......@@ -149,19 +142,9 @@ static void __free_pages_ok (struct page *page, unsigned int order)
page_idx &= mask;
}
list_add(&(base + page_idx)->list, &area->free_list);
spin_unlock_irqrestore(&zone->lock, flags);
out:
return;
local_freelist:
if (current->nr_local_pages)
goto back_local_freelist;
if (in_interrupt())
goto back_local_freelist;
list_add(&page->list, &current->local_pages);
page->index = order;
current->nr_local_pages++;
}
#define MARK_USED(index, order, area) \
......@@ -173,8 +156,7 @@ static inline struct page * expand (zone_t *zone, struct page *page,
unsigned long size = 1 << high;
while (high > low) {
if (bad_range(zone, page))
BUG();
BUG_ON(bad_range(zone, page));
area--;
high--;
size >>= 1;
......@@ -183,11 +165,28 @@ static inline struct page * expand (zone_t *zone, struct page *page,
index += size;
page += size;
}
if (bad_range(zone, page))
BUG();
BUG_ON(bad_range(zone, page));
return page;
}
/*
* This page is about to be returned from the page allocator
*/
static inline void prep_new_page(struct page *page)
{
BUG_ON(page->mapping);
BUG_ON(PagePrivate(page));
BUG_ON(PageLocked(page));
BUG_ON(PageLRU(page));
BUG_ON(PageActive(page));
BUG_ON(PageDirty(page));
BUG_ON(PageWriteback(page));
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked);
set_page_count(page, 1);
}
static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
static struct page * rmqueue(zone_t *zone, unsigned int order)
{
......@@ -206,8 +205,7 @@ static struct page * rmqueue(zone_t *zone, unsigned int order)
unsigned int index;
page = list_entry(curr, struct page, list);
if (bad_range(zone, page))
BUG();
BUG_ON(bad_range(zone, page));
list_del(curr);
index = page - zone->zone_mem_map;
if (curr_order != MAX_ORDER-1)
......@@ -217,13 +215,9 @@ static struct page * rmqueue(zone_t *zone, unsigned int order)
page = expand(zone, page, index, order, curr_order, area);
spin_unlock_irqrestore(&zone->lock, flags);
set_page_count(page, 1);
if (bad_range(zone, page))
BUG();
if (PageLRU(page))
BUG();
if (PageActive(page))
BUG();
prep_new_page(page);
return page;
}
curr_order++;
......@@ -266,16 +260,14 @@ struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
}
#endif
static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
static /* inline */ struct page *
balance_classzone(zone_t * classzone, unsigned int gfp_mask,
unsigned int order, int * freed)
{
struct page * page = NULL;
int __freed = 0;
if (!(gfp_mask & __GFP_WAIT))
goto out;
if (in_interrupt())
BUG();
BUG_ON(in_interrupt());
current->allocation_order = order;
current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
......@@ -298,25 +290,9 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask
tmp = list_entry(entry, struct page, list);
if (tmp->index == order && memclass(page_zone(tmp), classzone)) {
list_del(entry);
current->nr_local_pages--;
set_page_count(tmp, 1);
page = tmp;
if (PagePrivate(page))
BUG();
if (page->mapping)
BUG();
if (PageLocked(page))
BUG();
if (PageLRU(page))
BUG();
if (PageActive(page))
BUG();
if (PageDirty(page))
BUG();
if (PageWriteback(page))
BUG();
current->nr_local_pages--;
prep_new_page(page);
break;
}
} while ((entry = entry->next) != local_pages);
......@@ -333,7 +309,6 @@ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask
}
current->nr_local_pages = 0;
}
out:
*freed = __freed;
return page;
}
......@@ -380,7 +355,7 @@ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_
break;
local_min = z->pages_min;
if (!(gfp_mask & __GFP_WAIT))
if (gfp_mask & __GFP_HIGH)
local_min >>= 2;
min += local_min;
if (z->free_pages > min) {
......@@ -405,7 +380,7 @@ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_
return page;
}
nopage:
if (!(current->flags & PF_RADIX_TREE)) {
if (!(current->flags & PF_NOWARN)) {
printk("%s: page allocation failure."
" order:%d, mode:0x%x\n",
current->comm, order, gfp_mask);
......@@ -441,7 +416,6 @@ struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_
goto nopage;
/* Yield for kswapd, and try again */
__set_current_state(TASK_RUNNING);
yield();
goto rebalance;
}
......@@ -800,8 +774,7 @@ void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
unsigned long totalpages, offset, realtotalpages;
const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1);
if (zone_start_paddr & ~PAGE_MASK)
BUG();
BUG_ON(zone_start_paddr & ~PAGE_MASK);
totalpages = 0;
for (i = 0; i < MAX_NR_ZONES; i++) {
......
......@@ -15,37 +15,26 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/bio.h>
#include <linux/buffer_head.h>
#include <asm/pgtable.h>
#include <linux/swapops.h>
static int
swap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
struct swap_info_struct *sis;
swp_entry_t entry;
entry.val = iblock;
sis = get_swap_info_struct(swp_type(entry));
bh_result->b_bdev = sis->bdev;
bh_result->b_blocknr = map_swap_page(sis, swp_offset(entry));
bh_result->b_size = PAGE_SIZE;
set_buffer_mapped(bh_result);
return 0;
}
#include <linux/buffer_head.h> /* for block_sync_page() */
#include <asm/pgtable.h>
static struct bio *
get_swap_bio(int gfp_flags, struct page *page, bio_end_io_t end_io)
{
struct bio *bio;
struct buffer_head bh;
bio = bio_alloc(gfp_flags, 1);
if (bio) {
swap_get_block(NULL, page->index, &bh, 1);
bio->bi_sector = bh.b_blocknr * (PAGE_SIZE >> 9);
bio->bi_bdev = bh.b_bdev;
struct swap_info_struct *sis;
swp_entry_t entry;
entry.val = page->index;
sis = get_swap_info_struct(swp_type(entry));
bio->bi_sector = map_swap_page(sis, swp_offset(entry)) *
(PAGE_SIZE >> 9);
bio->bi_bdev = sis->bdev;
bio->bi_io_vec[0].bv_page = page;
bio->bi_io_vec[0].bv_len = PAGE_SIZE;
bio->bi_io_vec[0].bv_offset = 0;
......@@ -98,6 +87,7 @@ int swap_writepage(struct page *page)
}
bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
if (bio == NULL) {
set_page_dirty(page);
ret = -ENOMEM;
goto out;
}
......@@ -129,7 +119,7 @@ int swap_readpage(struct file *file, struct page *page)
* swapper_space doesn't have a real inode, so it gets a special vm_writeback()
* so we don't need swap special cases in generic_vm_writeback().
*
* Swap pages are PageLocked and PageWriteback while under writeout so that
* Swap pages are !PageLocked and PageWriteback while under writeout so that
* memory allocators will throttle against them.
*/
static int swap_vm_writeback(struct page *page, int *nr_to_write)
......
......@@ -15,6 +15,9 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/sched.h> // Needed by writeback.h
#include <linux/fs.h> // Needed by writeback.h
#include <linux/writeback.h> // Prototypes pdflush_operation()
/*
......@@ -44,8 +47,11 @@ static spinlock_t pdflush_lock = SPIN_LOCK_UNLOCKED;
/*
* The count of currently-running pdflush threads. Protected
* by pdflush_lock.
*
* Readable by sysctl, but not writable. Published to userspace at
* /proc/sys/vm/nr_pdflush_threads.
*/
static int nr_pdflush_threads = 0;
int nr_pdflush_threads = 0;
/*
* The time at which the pdflush thread pool last went empty
......
......@@ -426,22 +426,15 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
swap_free(entry);
ptr[offset] = (swp_entry_t) {0};
while (inode && (PageWriteback(page) ||
move_from_swap_cache(page, idx, inode->i_mapping))) {
while (inode && move_from_swap_cache(page, idx, inode->i_mapping)) {
/*
* Yield for kswapd, and try again - but we're still
* holding the page lock - ugh! fix this up later on.
* Beware of inode being unlinked or truncated: just
* leave try_to_unuse to delete_from_swap_cache if so.
*
* AKPM: We now wait on writeback too. Note that it's
* the page lock which prevents new writeback from starting.
*/
spin_unlock(&info->lock);
if (PageWriteback(page))
wait_on_page_writeback(page);
else
yield();
yield();
spin_lock(&info->lock);
ptr = shmem_swp_entry(info, idx, 0);
if (IS_ERR(ptr))
......@@ -607,6 +600,7 @@ static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct
spin_unlock(&info->lock);
wait_on_page_writeback(page);
unlock_page(page);
page_cache_release(page);
goto repeat;
}
error = move_from_swap_cache(page, idx, mapping);
......
......@@ -1153,12 +1153,12 @@ static int kmem_cache_grow (kmem_cache_t * cachep, int flags)
* in kmem_cache_alloc(). If a caller is seriously mis-behaving they
* will eventually be caught here (where it matters).
*/
if (in_interrupt() && (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC)
if (in_interrupt() && (flags & __GFP_WAIT))
BUG();
ctor_flags = SLAB_CTOR_CONSTRUCTOR;
local_flags = (flags & SLAB_LEVEL_MASK);
if (local_flags == SLAB_ATOMIC)
if (!(local_flags & __GFP_WAIT))
/*
* Not allowed to sleep. Need to tell a constructor about
* this - it might need to know...
......
......@@ -21,16 +21,9 @@
/*
* swapper_inode doesn't do anything much. It is really only here to
* avoid some special-casing in other parts of the kernel.
*
* We set i_size to "infinity" to keep the page I/O functions happy. The swap
* block allocator makes sure that allocations are in-range. A strange
* number is chosen to prevent various arith overflows elsewhere. For example,
* `lblock' in block_read_full_page().
*/
static struct inode swapper_inode = {
i_mapping: &swapper_space,
i_size: PAGE_SIZE * 0xffffffffLL,
i_blkbits: PAGE_SHIFT,
};
extern struct address_space_operations swap_aops;
......@@ -160,9 +153,13 @@ int move_to_swap_cache(struct page *page, swp_entry_t entry)
/* Add it to the swap cache */
*pslot = page;
page->flags &= ~(1 << PG_uptodate | 1 << PG_error
| 1 << PG_referenced | 1 << PG_arch_1
| 1 << PG_checked);
/*
* This code used to clear PG_uptodate, PG_error, PG_arch1,
* PG_referenced and PG_checked. What _should_ it clear?
*/
ClearPageUptodate(page);
ClearPageReferenced(page);
SetPageLocked(page);
ClearPageDirty(page);
___add_to_page_cache(page, &swapper_space, entry.val);
......@@ -205,9 +202,14 @@ int move_from_swap_cache(struct page *page, unsigned long index,
__delete_from_swap_cache(page);
*pslot = page;
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked);
/*
* This code used to clear PG_uptodate, PG_error, PG_referenced,
* PG_arch_1 and PG_checked. It's not really clear why.
*/
ClearPageUptodate(page);
ClearPageReferenced(page);
/*
* ___add_to_page_cache puts the page on ->clean_pages,
* but it's dirty. If it's on ->clean_pages, it will basically
......
......@@ -687,11 +687,10 @@ static int try_to_unuse(unsigned int type)
if ((*swap_map > 1) && PageDirty(page) && PageSwapCache(page)) {
swap_writepage(page);
lock_page(page);
}
if (PageSwapCache(page)) {
wait_on_page_writeback(page);
delete_from_swap_cache(page);
}
if (PageSwapCache(page))
delete_from_swap_cache(page);
/*
* So we could skip searching mms once swap count went
......
......@@ -52,6 +52,9 @@ static inline int is_page_cache_freeable(struct page * page)
* So PF_MEMALLOC is dropped here. This causes the slab allocations to fail
* earlier, so radix-tree nodes will then be allocated from the mempool
* reserves.
*
* We're still using __GFP_HIGH for radix-tree node allocations, so some of
* the emergency pools are available - just not all of them.
*/
static inline int
swap_out_add_to_swap_cache(struct page *page, swp_entry_t entry)
......@@ -60,7 +63,9 @@ swap_out_add_to_swap_cache(struct page *page, swp_entry_t entry)
int ret;
current->flags &= ~PF_MEMALLOC;
current->flags |= PF_RADIX_TREE;
current->flags |= PF_NOWARN;
ClearPageUptodate(page); /* why? */
ClearPageReferenced(page); /* why? */
ret = add_to_swap_cache(page, entry);
current->flags = flags;
return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment