Commit fb796d31 authored by Linus Torvalds's avatar Linus Torvalds

Merge http://linux-sound.bkbits.net/linux-sound

into home.transmeta.com:/home/torvalds/v2.5/linux
parents d9c28b28 3fddff46
......@@ -6,6 +6,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include "cpu.h"
......@@ -13,6 +14,11 @@ static int disable_x86_serial_nr __initdata = 1;
static int disable_P4_HT __initdata = 0;
extern int trap_init_f00f_bug(void);
#ifdef INTEL_MOVSL
struct movsl_mask movsl_mask; /* alignment at which movsl is preferred for
bulk memory copies */
#endif
/*
* Early probe support logic for ppro memory erratum #50
*
......@@ -348,6 +354,25 @@ static void __init init_intel(struct cpuinfo_x86 *c)
/* Work around errata */
Intel_errata_workarounds(c);
#ifdef INTEL_MOVSL
/*
* Set up the preferred alignment for movsl bulk memory moves
*/
switch (c->x86) {
case 4: /* 486: untested */
break;
case 5: /* Old Pentia: untested */
break;
case 6: /* PII/PIII only like movsl with 8-byte alignment */
movsl_mask.mask = 7;
break;
case 15: /* P4 is OK down to 8-byte alignment */
movsl_mask.mask = 7;
break;
}
#endif
}
......
......@@ -21,8 +21,13 @@ struct dmi_header
u16 handle;
};
#undef DMI_DEBUG
#ifdef DMI_DEBUG
#define dmi_printk(x) printk x
#else
#define dmi_printk(x)
//#define dmi_printk(x) printk x
#endif
static char * __init dmi_string(struct dmi_header *dm, u8 s)
{
......@@ -832,7 +837,9 @@ static __init void dmi_check_blacklist(void)
static void __init dmi_decode(struct dmi_header *dm)
{
#ifdef DMI_DEBUG
u8 *data = (u8 *)dm;
#endif
switch(dm->type)
{
......
......@@ -116,8 +116,10 @@ EXPORT_SYMBOL(strncpy_from_user);
EXPORT_SYMBOL(__strncpy_from_user);
EXPORT_SYMBOL(clear_user);
EXPORT_SYMBOL(__clear_user);
EXPORT_SYMBOL(__generic_copy_from_user);
EXPORT_SYMBOL(__generic_copy_to_user);
EXPORT_SYMBOL(copy_from_user);
EXPORT_SYMBOL(__copy_from_user);
EXPORT_SYMBOL(copy_to_user);
EXPORT_SYMBOL(__copy_to_user);
EXPORT_SYMBOL(strnlen_user);
EXPORT_SYMBOL(pci_alloc_consistent);
......
......@@ -328,20 +328,21 @@ asmlinkage unsigned int do_IRQ(struct pt_regs regs)
irq_desc_t *desc = irq_desc + irq;
struct irqaction * action;
unsigned int status;
long esp;
irq_enter();
#ifdef CONFIG_DEBUG_STACKOVERFLOW
/* Debugging check for stack overflow: is there less than 1KB free? */
__asm__ __volatile__("andl %%esp,%0" : "=r" (esp) : "0" (8191));
if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
extern void show_stack(unsigned long *);
printk("do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct task_struct));
__asm__ __volatile__("movl %%esp,%0" : "=r" (esp));
show_stack((void *)esp);
{
long esp;
__asm__ __volatile__("andl %%esp,%0" :
"=r" (esp) : "0" (8191));
if (unlikely(esp < (sizeof(struct task_struct) + 1024))) {
printk("do_IRQ: stack overflow: %ld\n",
esp - sizeof(struct task_struct));
dump_stack();
}
}
#endif
kstat.irqs[cpu][irq]++;
......
......@@ -9,58 +9,20 @@
#include <asm/uaccess.h>
#include <asm/mmx.h>
#ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
unsigned long
__generic_copy_to_user(void *to, const void *from, unsigned long n)
#ifdef INTEL_MOVSL
static inline int movsl_is_ok(const void *a1, const void *a2, unsigned long n)
{
if (access_ok(VERIFY_WRITE, to, n))
{
if(n<512)
__copy_user(to,from,n);
else
mmx_copy_user(to,from,n);
}
return n;
}
unsigned long
__generic_copy_from_user(void *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
{
if(n<512)
__copy_user_zeroing(to,from,n);
else
mmx_copy_user_zeroing(to, from, n);
}
else
memset(to, 0, n);
return n;
if (n < 64)
return 1;
if ((((const long)a1 ^ (const long)a2) & movsl_mask.mask) == 0)
return 1;
return 0;
}
#else
unsigned long
__generic_copy_to_user(void *to, const void *from, unsigned long n)
static inline int movsl_is_ok(const void *a1, const void *a2, unsigned long n)
{
prefetch(from);
if (access_ok(VERIFY_WRITE, to, n))
__copy_user(to,from,n);
return n;
}
unsigned long
__generic_copy_from_user(void *to, const void *from, unsigned long n)
{
prefetchw(to);
if (access_ok(VERIFY_READ, from, n))
__copy_user_zeroing(to,from,n);
else
memset(to, 0, n);
return n;
return 1;
}
#endif
/*
......@@ -188,3 +150,313 @@ long strnlen_user(const char *s, long n)
:"cc");
return res & mask;
}
#ifdef INTEL_MOVSL
static unsigned long
__copy_user_intel(void *to, const void *from,unsigned long size)
{
int d0, d1;
__asm__ __volatile__(
" .align 2,0x90\n"
"0: movl 32(%4), %%eax\n"
" cmpl $67, %0\n"
" jbe 1f\n"
" movl 64(%4), %%eax\n"
" .align 2,0x90\n"
"1: movl 0(%4), %%eax\n"
" movl 4(%4), %%edx\n"
"2: movl %%eax, 0(%3)\n"
"21: movl %%edx, 4(%3)\n"
" movl 8(%4), %%eax\n"
" movl 12(%4),%%edx\n"
"3: movl %%eax, 8(%3)\n"
"31: movl %%edx, 12(%3)\n"
" movl 16(%4), %%eax\n"
" movl 20(%4), %%edx\n"
"4: movl %%eax, 16(%3)\n"
"41: movl %%edx, 20(%3)\n"
" movl 24(%4), %%eax\n"
" movl 28(%4), %%edx\n"
"10: movl %%eax, 24(%3)\n"
"51: movl %%edx, 28(%3)\n"
" movl 32(%4), %%eax\n"
" movl 36(%4), %%edx\n"
"11: movl %%eax, 32(%3)\n"
"61: movl %%edx, 36(%3)\n"
" movl 40(%4), %%eax\n"
" movl 44(%4), %%edx\n"
"12: movl %%eax, 40(%3)\n"
"71: movl %%edx, 44(%3)\n"
" movl 48(%4), %%eax\n"
" movl 52(%4), %%edx\n"
"13: movl %%eax, 48(%3)\n"
"81: movl %%edx, 52(%3)\n"
" movl 56(%4), %%eax\n"
" movl 60(%4), %%edx\n"
"14: movl %%eax, 56(%3)\n"
"91: movl %%edx, 60(%3)\n"
" addl $-64, %0\n"
" addl $64, %4\n"
" addl $64, %3\n"
" cmpl $63, %0\n"
" ja 0b\n"
"5: movl %0, %%eax\n"
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
"6: rep; movsl\n"
" movl %%eax, %0\n"
"7: rep; movsb\n"
"8:\n"
".section .fixup,\"ax\"\n"
"9: lea 0(%%eax,%0,4),%0\n"
" jmp 8b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 2b,8b\n"
" .long 21b,8b\n"
" .long 3b,8b\n"
" .long 31b,8b\n"
" .long 4b,8b\n"
" .long 41b,8b\n"
" .long 10b,8b\n"
" .long 51b,8b\n"
" .long 11b,8b\n"
" .long 61b,8b\n"
" .long 12b,8b\n"
" .long 71b,8b\n"
" .long 13b,8b\n"
" .long 81b,8b\n"
" .long 14b,8b\n"
" .long 91b,8b\n"
" .long 6b,9b\n"
" .long 7b,8b\n"
".previous"
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
return size;
}
static unsigned long
__copy_user_zeroing_intel(void *to, const void *from, unsigned long size)
{
int d0, d1;
__asm__ __volatile__(
" .align 2,0x90\n"
"0: movl 32(%4), %%eax\n"
" cmpl $67, %0\n"
" jbe 2f\n"
"1: movl 64(%4), %%eax\n"
" .align 2,0x90\n"
"2: movl 0(%4), %%eax\n"
"21: movl 4(%4), %%edx\n"
" movl %%eax, 0(%3)\n"
" movl %%edx, 4(%3)\n"
"3: movl 8(%4), %%eax\n"
"31: movl 12(%4),%%edx\n"
" movl %%eax, 8(%3)\n"
" movl %%edx, 12(%3)\n"
"4: movl 16(%4), %%eax\n"
"41: movl 20(%4), %%edx\n"
" movl %%eax, 16(%3)\n"
" movl %%edx, 20(%3)\n"
"10: movl 24(%4), %%eax\n"
"51: movl 28(%4), %%edx\n"
" movl %%eax, 24(%3)\n"
" movl %%edx, 28(%3)\n"
"11: movl 32(%4), %%eax\n"
"61: movl 36(%4), %%edx\n"
" movl %%eax, 32(%3)\n"
" movl %%edx, 36(%3)\n"
"12: movl 40(%4), %%eax\n"
"71: movl 44(%4), %%edx\n"
" movl %%eax, 40(%3)\n"
" movl %%edx, 44(%3)\n"
"13: movl 48(%4), %%eax\n"
"81: movl 52(%4), %%edx\n"
" movl %%eax, 48(%3)\n"
" movl %%edx, 52(%3)\n"
"14: movl 56(%4), %%eax\n"
"91: movl 60(%4), %%edx\n"
" movl %%eax, 56(%3)\n"
" movl %%edx, 60(%3)\n"
" addl $-64, %0\n"
" addl $64, %4\n"
" addl $64, %3\n"
" cmpl $63, %0\n"
" ja 0b\n"
"5: movl %0, %%eax\n"
" shrl $2, %0\n"
" andl $3, %%eax\n"
" cld\n"
"6: rep; movsl\n"
" movl %%eax,%0\n"
"7: rep; movsb\n"
"8:\n"
".section .fixup,\"ax\"\n"
"9: lea 0(%%eax,%0,4),%0\n"
"16: pushl %0\n"
" pushl %%eax\n"
" xorl %%eax,%%eax\n"
" rep; stosb\n"
" popl %%eax\n"
" popl %0\n"
" jmp 8b\n"
".previous\n"
".section __ex_table,\"a\"\n"
" .align 4\n"
" .long 0b,16b\n"
" .long 1b,16b\n"
" .long 2b,16b\n"
" .long 21b,16b\n"
" .long 3b,16b\n"
" .long 31b,16b\n"
" .long 4b,16b\n"
" .long 41b,16b\n"
" .long 10b,16b\n"
" .long 51b,16b\n"
" .long 11b,16b\n"
" .long 61b,16b\n"
" .long 12b,16b\n"
" .long 71b,16b\n"
" .long 13b,16b\n"
" .long 81b,16b\n"
" .long 14b,16b\n"
" .long 91b,16b\n"
" .long 6b,9b\n"
" .long 7b,16b\n"
".previous"
: "=&c"(size), "=&D" (d0), "=&S" (d1)
: "1"(to), "2"(from), "0"(size)
: "eax", "edx", "memory");
return size;
}
#else /* INTEL_MOVSL */
/*
* Leave these declared but undefined. They should not be any references to
* them
*/
unsigned long
__copy_user_zeroing_intel(void *to, const void *from, unsigned long size);
unsigned long
__copy_user_intel(void *to, const void *from,unsigned long size);
#endif /* INTEL_MOVSL */
/* Generic arbitrary sized copy. */
#define __copy_user(to,from,size) \
do { \
int __d0, __d1, __d2; \
__asm__ __volatile__( \
" cmp $7,%0\n" \
" jbe 1f\n" \
" movl %1,%0\n" \
" negl %0\n" \
" andl $7,%0\n" \
" subl %0,%3\n" \
"4: rep; movsb\n" \
" movl %3,%0\n" \
" shrl $2,%0\n" \
" andl $3,%3\n" \
" .align 2,0x90\n" \
"0: rep; movsl\n" \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"5: addl %3,%0\n" \
" jmp 2b\n" \
"3: lea 0(%3,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 4b,5b\n" \
" .long 0b,3b\n" \
" .long 1b,2b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
} while (0)
#define __copy_user_zeroing(to,from,size) \
do { \
int __d0, __d1, __d2; \
__asm__ __volatile__( \
" cmp $7,%0\n" \
" jbe 1f\n" \
" movl %1,%0\n" \
" negl %0\n" \
" andl $7,%0\n" \
" subl %0,%3\n" \
"4: rep; movsb\n" \
" movl %3,%0\n" \
" shrl $2,%0\n" \
" andl $3,%3\n" \
" .align 2,0x90\n" \
"0: rep; movsl\n" \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"5: addl %3,%0\n" \
" jmp 6f\n" \
"3: lea 0(%3,%0,4),%0\n" \
"6: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosb\n" \
" popl %%eax\n" \
" popl %0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 4b,5b\n" \
" .long 0b,3b\n" \
" .long 1b,6b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1), "=r"(__d2) \
: "3"(size), "0"(size), "1"(to), "2"(from) \
: "memory"); \
} while (0)
unsigned long __copy_to_user(void *to, const void *from, unsigned long n)
{
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
else
n = __copy_user_intel(to, from, n);
return n;
}
unsigned long __copy_from_user(void *to, const void *from, unsigned long n)
{
if (movsl_is_ok(to, from, n))
__copy_user_zeroing(to, from, n);
else
n = __copy_user_zeroing_intel(to, from, n);
return n;
}
unsigned long copy_to_user(void *to, const void *from, unsigned long n)
{
prefetch(from);
if (access_ok(VERIFY_WRITE, to, n))
n = __copy_to_user(to, from, n);
return n;
}
unsigned long copy_from_user(void *to, const void *from, unsigned long n)
{
prefetchw(to);
if (access_ok(VERIFY_READ, from, n))
n = __copy_from_user(to, from, n);
return n;
}
......@@ -19,10 +19,12 @@ void kunmap(struct page *page)
}
/*
* The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
* gives a more generic (and caching) interface. But kmap_atomic can
* be used in IRQ contexts, so in some (very limited) cases we need
* it.
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
* no global lock is needed and because the kmap code must perform a global TLB
* invalidation when the kmap pool wraps.
*
* However when holding an atomic kmap is is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
void *kmap_atomic(struct page *page, enum km_type type)
{
......
......@@ -118,7 +118,6 @@ deadline_find_hash(struct deadline_data *dd, sector_t offset)
while ((entry = next) != hash_list) {
next = entry->next;
prefetch(next);
drq = list_entry_hash(entry);
......@@ -193,8 +192,6 @@ deadline_merge(request_queue_t *q, struct list_head **insert, struct bio *bio)
while ((entry = entry->prev) != sort_list) {
__rq = list_entry_rq(entry);
prefetch(entry->prev);
BUG_ON(__rq->flags & REQ_STARTED);
if (!(__rq->flags & REQ_CMD))
......@@ -302,8 +299,6 @@ static void deadline_move_requests(struct deadline_data *dd, struct request *rq)
struct list_head *nxt = rq->queuelist.next;
int this_rq_cost;
prefetch(nxt);
/*
* take it off the sort and fifo list, move
* to dispatch queue
......
......@@ -37,6 +37,13 @@
#include <asm/uaccess.h>
/* Command group 3 is reserved and should never be used. */
const unsigned char scsi_command_size[8] =
{
6, 10, 10, 12,
16, 12, 10, 10
};
#define BLK_DEFAULT_TIMEOUT (60 * HZ)
int blk_do_rq(request_queue_t *q, struct block_device *bdev, struct request *rq)
......@@ -468,3 +475,4 @@ int scsi_cmd_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long ar
}
EXPORT_SYMBOL(scsi_cmd_ioctl);
EXPORT_SYMBOL(scsi_command_size);
......@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,41)
#include <linux/tqueue.h>
#else
......
......@@ -1803,11 +1803,11 @@ static void vortex_tx_timeout(struct net_device *dev)
dev->name, inb(ioaddr + TxStatus),
inw(ioaddr + EL3_STATUS));
EL3WINDOW(4);
printk(KERN_ERR " diagnostics: net %04x media %04x dma %08lx fifo %04x\n",
inw(ioaddr + Wn4_NetDiag),
inw(ioaddr + Wn4_Media),
inl(ioaddr + PktStatus),
inw(ioaddr + Wn4_FIFODiag));
printk(KERN_ERR " diagnostics: net %04x media %04x dma %08x fifo %04x\n",
(unsigned)inw(ioaddr + Wn4_NetDiag),
(unsigned)inw(ioaddr + Wn4_Media),
(unsigned)inl(ioaddr + PktStatus),
(unsigned)inw(ioaddr + Wn4_FIFODiag));
/* Slight code bloat to be user friendly. */
if ((inb(ioaddr + TxStatus) & 0x88) == 0x88)
printk(KERN_ERR "%s: Transmitter encountered 16 collisions --"
......@@ -2643,8 +2643,8 @@ dump_tx_ring(struct net_device *dev)
vp->full_bus_master_tx,
vp->dirty_tx, vp->dirty_tx % TX_RING_SIZE,
vp->cur_tx, vp->cur_tx % TX_RING_SIZE);
printk(KERN_ERR " Transmit list %8.8lx vs. %p.\n",
inl(ioaddr + DownListPtr),
printk(KERN_ERR " Transmit list %8.8x vs. %p.\n",
(unsigned)inl(ioaddr + DownListPtr),
&vp->tx_ring[vp->dirty_tx % TX_RING_SIZE]);
issue_and_wait(dev, DownStall);
for (i = 0; i < TX_RING_SIZE; i++) {
......
......@@ -123,12 +123,6 @@ struct scsi_host_sg_pool scsi_sg_pools[SG_MEMPOOL_NR] = {
*/
unsigned long scsi_pid;
Scsi_Cmnd *last_cmnd;
/* Command group 3 is reserved and should never be used. */
const unsigned char scsi_command_size[8] =
{
6, 10, 10, 12,
16, 12, 10, 10
};
static unsigned long serial_number;
struct softscsi_data {
......
......@@ -751,15 +751,9 @@ static int scsi_init_io(Scsi_Cmnd *SCpnt)
int count, gfp_mask;
/*
* non-sg block request. FIXME: check bouncing for isa hosts!
* if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer
*/
if ((req->flags & REQ_BLOCK_PC) && !req->bio) {
/*
* FIXME: isa bouncing
*/
if (SCpnt->host->unchecked_isa_dma)
goto fail;
SCpnt->request_bufflen = req->data_len;
SCpnt->request_buffer = req->data;
req->buffer = req->data;
......@@ -816,7 +810,6 @@ static int scsi_init_io(Scsi_Cmnd *SCpnt)
/*
* kill it. there should be no leftover blocks in this request
*/
fail:
SCpnt = scsi_end_request(SCpnt, 0, req->nr_sectors);
BUG_ON(SCpnt);
out:
......
......@@ -39,7 +39,6 @@ EXPORT_SYMBOL(scsi_partsize);
EXPORT_SYMBOL(scsi_bios_ptable);
EXPORT_SYMBOL(scsi_allocate_device);
EXPORT_SYMBOL(scsi_do_cmd);
EXPORT_SYMBOL(scsi_command_size);
EXPORT_SYMBOL(scsi_ioctl);
EXPORT_SYMBOL(print_command);
EXPORT_SYMBOL(print_sense);
......
......@@ -608,7 +608,7 @@ void kick_iocb(struct kiocb *iocb)
}
if (!kiocbTryKick(iocb)) {
long flags;
unsigned long flags;
spin_lock_irqsave(&ctx->ctx_lock, flags);
list_add_tail(&iocb->ki_run_list, &ctx->run_list);
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
......
......@@ -120,7 +120,7 @@ blkdev_direct_IO(int rw, struct file *file, const struct iovec *iov,
{
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov, offset,
return generic_direct_IO(rw, inode, inode->i_bdev, iov, offset,
nr_segs, blkdev_get_blocks);
}
......@@ -308,6 +308,7 @@ struct block_device *bdget(dev_t dev)
new_bdev->bd_dev = dev;
new_bdev->bd_contains = NULL;
new_bdev->bd_inode = inode;
new_bdev->bd_block_size = (1 << inode->i_blkbits);
new_bdev->bd_part_count = 0;
new_bdev->bd_invalidated = 0;
inode->i_mode = S_IFBLK;
......
......@@ -29,30 +29,67 @@
*/
#define DIO_PAGES 64
/*
* This code generally works in units of "dio_blocks". A dio_block is
* somewhere between the hard sector size and the filesystem block size. it
* is determined on a per-invokation basis. When talking to the filesystem
* we need to convert dio_blocks to fs_blocks by scaling the dio_block quantity
* down by dio->blkfactor. Similarly, fs-blocksize quantities are converted
* to bio_block quantities by shifting left by blkfactor.
*
* If blkfactor is zero then the user's request was aligned to the filesystem's
* blocksize.
*/
struct dio {
/* BIO submission state */
struct bio *bio; /* bio under assembly */
struct inode *inode;
int rw;
unsigned blkbits; /* doesn't change */
sector_t block_in_file; /* changes */
unsigned blkfactor; /* When we're using an aligment which
is finer than the filesystem's soft
blocksize, this specifies how much
finer. blkfactor=2 means 1/4-block
alignment. Does not change */
unsigned start_zero_done; /* flag: sub-blocksize zeroing has
been performed at the start of a
write */
int pages_in_io; /* approximate total IO pages */
sector_t block_in_file; /* Current offset into the underlying
file in dio_block units. */
unsigned blocks_available; /* At block_in_file. changes */
sector_t final_block_in_request;/* doesn't change */
unsigned first_block_in_page; /* doesn't change, Used only once */
int boundary; /* prev block is at a boundary */
int reap_counter; /* rate limit reaping */
get_blocks_t *get_blocks; /* block mapping function */
sector_t last_block_in_bio; /* current final block in bio */
sector_t next_block_in_bio; /* next block to be added to bio */
sector_t final_block_in_bio; /* current final block in bio + 1 */
sector_t next_block_for_io; /* next block to be put under IO,
in dio_blocks units */
struct buffer_head map_bh; /* last get_blocks() result */
/* Page fetching state */
/*
* Deferred addition of a page to the dio. These variables are
* private to dio_send_cur_page(), submit_page_section() and
* dio_bio_add_page().
*/
struct page *cur_page; /* The page */
unsigned cur_page_offset; /* Offset into it, in bytes */
unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
sector_t cur_page_block; /* Where it starts */
/*
* Page fetching state. These variables belong to dio_refill_pages().
*/
int curr_page; /* changes */
int total_pages; /* doesn't change */
int pages_left; /* approximate total IO pages */
unsigned long curr_user_address;/* changes */
/* Page queue */
/*
* Page queue. These variables belong to dio_refill_pages() and
* dio_get_page().
*/
struct page *pages[DIO_PAGES]; /* page buffer */
unsigned head; /* next page to process */
unsigned tail; /* last valid page + 1 */
......@@ -318,73 +355,40 @@ static int dio_bio_reap(struct dio *dio)
*
* In the case of filesystem holes: the fs may return an arbitrarily-large
* hole by returning an appropriate value in b_size and by clearing
* buffer_mapped(). This code _should_ handle that case correctly, but it has
* only been tested against single-block holes (b_size == blocksize).
* buffer_mapped(). However the direct-io code will only process holes one
* block at a time - it will repeatedly call get_blocks() as it walks the hole.
*/
static int get_more_blocks(struct dio *dio)
{
int ret;
struct buffer_head *map_bh = &dio->map_bh;
if (dio->blocks_available)
return 0;
sector_t fs_startblk; /* Into file, in filesystem-sized blocks */
unsigned long fs_count; /* Number of filesystem-sized blocks */
unsigned long dio_count;/* Number of dio_block-sized blocks */
unsigned long blkmask;
/*
* If there was a memory error and we've overwritten all the
* mapped blocks then we can now return that memory error
*/
if (dio->page_errors) {
ret = dio->page_errors;
goto out;
}
map_bh->b_state = 0;
map_bh->b_size = 0;
BUG_ON(dio->block_in_file >= dio->final_block_in_request);
ret = (*dio->get_blocks)(dio->inode, dio->block_in_file,
dio->final_block_in_request - dio->block_in_file,
map_bh, dio->rw == WRITE);
if (ret)
goto out;
if (buffer_mapped(map_bh)) {
BUG_ON(map_bh->b_size == 0);
BUG_ON((map_bh->b_size & ((1 << dio->blkbits) - 1)) != 0);
dio->blocks_available = map_bh->b_size >> dio->blkbits;
/* blockdevs do not set buffer_new */
if (buffer_new(map_bh)) {
sector_t block = map_bh->b_blocknr;
unsigned i;
for (i = 0; i < dio->blocks_available; i++)
unmap_underlying_metadata(map_bh->b_bdev,
block++);
}
} else {
BUG_ON(dio->rw != READ);
if (dio->bio)
dio_bio_submit(dio);
ret = dio->page_errors;
if (ret == 0) {
map_bh->b_state = 0;
map_bh->b_size = 0;
BUG_ON(dio->block_in_file >= dio->final_block_in_request);
fs_startblk = dio->block_in_file >> dio->blkfactor;
dio_count = dio->final_block_in_request - dio->block_in_file;
fs_count = dio_count >> dio->blkfactor;
blkmask = (1 << dio->blkfactor) - 1;
if (dio_count & blkmask)
fs_count++;
ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count,
map_bh, dio->rw == WRITE);
}
dio->next_block_in_bio = map_bh->b_blocknr;
out:
return ret;
}
/*
* Check to see if we can continue to grow the BIO. If not, then send it.
*/
static void dio_prep_bio(struct dio *dio)
{
if (dio->bio == NULL)
return;
if (dio->boundary ||
dio->last_block_in_bio != dio->next_block_in_bio - 1)
dio_bio_submit(dio);
}
/*
* There is no bio. Make one now.
*/
......@@ -397,7 +401,7 @@ static int dio_new_bio(struct dio *dio, sector_t blkno)
if (ret)
goto out;
sector = blkno << (dio->blkbits - 9);
nr_pages = min(dio->pages_left, bio_get_nr_vecs(dio->map_bh.b_bdev));
nr_pages = min(dio->pages_in_io, bio_get_nr_vecs(dio->map_bh.b_bdev));
BUG_ON(nr_pages <= 0);
ret = dio_bio_alloc(dio, dio->map_bh.b_bdev, sector, nr_pages);
dio->boundary = 0;
......@@ -405,37 +409,199 @@ static int dio_new_bio(struct dio *dio, sector_t blkno)
return ret;
}
/*
* Attempt tp put the current chunk of 'cur_page' into the current BIO. If
* that was successful then update final_block_in_bio and take a ref against
* the just-added page.
*/
static int dio_bio_add_page(struct dio *dio)
{
int ret;
static int
dio_bio_add_page(struct dio *dio, struct page *page,
unsigned int bv_len, unsigned int bv_offset, sector_t blkno)
ret = bio_add_page(dio->bio, dio->cur_page,
dio->cur_page_len, dio->cur_page_offset);
if (ret == dio->cur_page_len) {
dio->pages_in_io--;
page_cache_get(dio->cur_page);
dio->final_block_in_bio = dio->cur_page_block +
(dio->cur_page_len >> dio->blkbits);
ret = 0;
}
return ret;
}
/*
* Put cur_page under IO. The section of cur_page which is described by
* cur_page_offset,cur_page_len is put into a BIO. The section of cur_page
* starts on-disk at cur_page_block.
*
* We take a ref against the page here (on behalf of its presence in the bio).
*
* The caller of this function is responsible for removing cur_page from the
* dio, and for dropping the refcount which came from that presence.
*/
static int dio_send_cur_page(struct dio *dio)
{
int ret = 0;
if (bv_len == 0)
goto out;
if (dio->bio) {
/*
* See whether this new request is contiguous with the old
*/
if (dio->final_block_in_bio != dio->cur_page_block)
dio_bio_submit(dio);
/*
* Submit now if the underlying fs is about to perform a
* metadata read
*/
if (dio->boundary)
dio_bio_submit(dio);
}
/* Take a ref against the page each time it is placed into a BIO */
page_cache_get(page);
if (bio_add_page(dio->bio, page, bv_len, bv_offset) < bv_len) {
if (dio->bio == NULL) {
ret = dio_new_bio(dio, dio->cur_page_block);
if (ret)
goto out;
}
if (dio_bio_add_page(dio) != 0) {
dio_bio_submit(dio);
ret = dio_new_bio(dio, blkno);
ret = dio_new_bio(dio, dio->cur_page_block);
if (ret == 0) {
ret = bio_add_page(dio->bio, page, bv_len, bv_offset);
BUG_ON(ret < bv_len);
} else {
/* The page didn't make it into a BIO */
page_cache_release(page);
ret = dio_bio_add_page(dio);
BUG_ON(ret != 0);
}
}
out:
return ret;
}
/*
* An autonomous function to put a chunk of a page under deferred IO.
*
* The caller doesn't actually know (or care) whether this piece of page is in
* a BIO, or is under IO or whatever. We just take care of all possible
* situations here. The separation between the logic of do_direct_IO() and
* that of submit_page_section() is important for clarity. Please don't break.
*
* The chunk of page starts on-disk at blocknr.
*
* We perform deferred IO, by recording the last-submitted page inside our
* private part of the dio structure. If possible, we just expand the IO
* across that page here.
*
* If that doesn't work out then we put the old page into the bio and add this
* page to the dio instead.
*/
static int
submit_page_section(struct dio *dio, struct page *page,
unsigned offset, unsigned len, sector_t blocknr)
{
int ret = 0;
/*
* Can we just grow the current page's presence in the dio?
*/
if ( (dio->cur_page == page) &&
(dio->cur_page_offset + dio->cur_page_len == offset) &&
(dio->cur_page_block +
(dio->cur_page_len >> dio->blkbits) == blocknr)) {
dio->cur_page_len += len;
/*
* If dio->boundary then we want to schedule the IO now to
* avoid metadata seeks.
*/
if (dio->boundary) {
ret = dio_send_cur_page(dio);
page_cache_release(dio->cur_page);
dio->cur_page = NULL;
}
goto out;
}
/*
* If there's a deferred page already there then send it.
*/
if (dio->cur_page) {
ret = dio_send_cur_page(dio);
page_cache_release(dio->cur_page);
dio->cur_page = NULL;
if (ret)
goto out;
}
dio->pages_left--;
page_cache_get(page); /* It is in dio */
dio->cur_page = page;
dio->cur_page_offset = offset;
dio->cur_page_len = len;
dio->cur_page_block = blocknr;
out:
return ret;
}
/*
* Clean any dirty buffers in the blockdev mapping which alias newly-created
* file blocks. Only called for S_ISREG files - blockdevs do not set
* buffer_new
*/
static void clean_blockdev_aliases(struct dio *dio)
{
unsigned i;
for (i = 0; i < dio->blocks_available; i++) {
unmap_underlying_metadata(dio->map_bh.b_bdev,
dio->map_bh.b_blocknr + i);
}
}
/*
* If we are not writing the entire block and get_block() allocated
* the block for us, we need to fill-in the unused portion of the
* block with zeros. This happens only if user-buffer, fileoffset or
* io length is not filesystem block-size multiple.
*
* `end' is zero if we're doing the start of the IO, 1 at the end of the
* IO.
*/
static void dio_zero_block(struct dio *dio, int end)
{
unsigned dio_blocks_per_fs_block;
unsigned this_chunk_blocks; /* In dio_blocks */
unsigned this_chunk_bytes;
struct page *page;
dio->start_zero_done = 1;
if (!dio->blkfactor || !buffer_new(&dio->map_bh))
return;
dio_blocks_per_fs_block = 1 << dio->blkfactor;
this_chunk_blocks = dio->block_in_file & (dio_blocks_per_fs_block - 1);
if (!this_chunk_blocks)
return;
/*
* We need to zero out part of an fs block. It is either at the
* beginning or the end of the fs block.
*/
if (end)
this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks;
this_chunk_bytes = this_chunk_blocks << dio->blkbits;
page = ZERO_PAGE(dio->cur_user_address);
if (submit_page_section(dio, page, 0, this_chunk_bytes,
dio->next_block_for_io))
return;
dio->next_block_for_io += this_chunk_blocks;
}
/*
* Walk the user pages, and the file, mapping blocks to disk and emitting BIOs.
* Walk the user pages, and the file, mapping blocks to disk and generating
* a sequence of (page,offset,len,block) mappings. These mappings are injected
* into submit_page_section(), which takes care of the next stage of submission
*
* Direct IO against a blockdev is different from a file. Because we can
* happily perform page-sized but 512-byte aligned IOs. It is important that
......@@ -448,73 +614,101 @@ dio_bio_add_page(struct dio *dio, struct page *page,
* it should set b_size to PAGE_SIZE or more inside get_blocks(). This gives
* fine alignment but still allows this function to work in PAGE_SIZE units.
*/
int do_direct_IO(struct dio *dio)
static int do_direct_IO(struct dio *dio)
{
const unsigned blkbits = dio->blkbits;
const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
struct page *page;
unsigned block_in_page;
struct buffer_head *map_bh = &dio->map_bh;
int ret = 0;
/* The I/O can start at any block offset within the first page */
block_in_page = dio->first_block_in_page;
while (dio->block_in_file < dio->final_block_in_request) {
int new_page; /* Need to insert this page into the BIO? */
unsigned int bv_offset;
unsigned int bv_len;
sector_t curr_blkno;
page = dio_get_page(dio);
if (IS_ERR(page)) {
ret = PTR_ERR(page);
goto out;
}
new_page = 1;
bv_offset = 0;
bv_len = 0;
curr_blkno = 0;
while (block_in_page < blocks_per_page) {
unsigned offset_in_page = block_in_page << blkbits;
unsigned this_chunk_bytes; /* # of bytes mapped */
unsigned this_chunk_blocks; /* # of blocks */
unsigned u;
ret = get_more_blocks(dio);
if (ret)
goto fail_release;
if (dio->blocks_available == 0) {
/*
* Need to go and map some more disk
*/
unsigned long blkmask;
unsigned long dio_remainder;
ret = get_more_blocks(dio);
if (ret) {
page_cache_release(page);
goto out;
}
if (!buffer_mapped(map_bh))
goto do_holes;
dio->blocks_available =
map_bh->b_size >> dio->blkbits;
dio->next_block_for_io =
map_bh->b_blocknr << dio->blkfactor;
if (buffer_new(map_bh))
clean_blockdev_aliases(dio);
if (!dio->blkfactor)
goto do_holes;
blkmask = (1 << dio->blkfactor) - 1;
dio_remainder = (dio->block_in_file & blkmask);
/*
* If we are at the start of IO and that IO
* starts partway into a fs-block,
* dio_remainder will be non-zero. If the IO
* is a read then we can simply advance the IO
* cursor to the first block which is to be
* read. But if the IO is a write and the
* block was newly allocated we cannot do that;
* the start of the fs block must be zeroed out
* on-disk
*/
if (!buffer_new(map_bh))
dio->next_block_for_io += dio_remainder;
dio->blocks_available -= dio_remainder;
}
do_holes:
/* Handle holes */
if (!buffer_mapped(&dio->map_bh)) {
if (!buffer_mapped(map_bh)) {
char *kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr + (block_in_page << blkbits),
0, 1 << blkbits);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
dio->block_in_file++;
dio->next_block_in_bio++;
block_in_page++;
goto next_block;
}
dio_prep_bio(dio);
if (dio->bio == NULL) {
ret = dio_new_bio(dio, dio->next_block_in_bio);
if (ret)
goto fail_release;
new_page = 1;
}
if (new_page) {
bv_len = 0;
bv_offset = block_in_page << blkbits;
curr_blkno = dio->next_block_in_bio;
new_page = 0;
}
/* Work out how much disk we can add to this page */
/*
* If we're performing IO which has an alignment which
* is finer than the underlying fs, go check to see if
* we must zero out the start of this block.
*/
if (unlikely(dio->blkfactor && !dio->start_zero_done))
dio_zero_block(dio, 0);
/*
* Work out, in this_chunk_blocks, how much disk we
* can add to this page
*/
this_chunk_blocks = dio->blocks_available;
u = (PAGE_SIZE - (bv_len + bv_offset)) >> blkbits;
u = (PAGE_SIZE - offset_in_page) >> blkbits;
if (this_chunk_blocks > u)
this_chunk_blocks = u;
u = dio->final_block_in_request - dio->block_in_file;
......@@ -523,10 +717,15 @@ int do_direct_IO(struct dio *dio)
this_chunk_bytes = this_chunk_blocks << blkbits;
BUG_ON(this_chunk_bytes == 0);
bv_len += this_chunk_bytes;
dio->next_block_in_bio += this_chunk_blocks;
dio->last_block_in_bio = dio->next_block_in_bio - 1;
dio->boundary = buffer_boundary(&dio->map_bh);
dio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, page, offset_in_page,
this_chunk_bytes, dio->next_block_for_io);
if (ret) {
page_cache_release(page);
goto out;
}
dio->next_block_for_io += this_chunk_blocks;
dio->block_in_file += this_chunk_blocks;
block_in_page += this_chunk_blocks;
dio->blocks_available -= this_chunk_blocks;
......@@ -536,27 +735,20 @@ int do_direct_IO(struct dio *dio)
if (dio->block_in_file == dio->final_block_in_request)
break;
}
ret = dio_bio_add_page(dio, page, bv_len,
bv_offset, curr_blkno);
if (ret)
goto fail_release;
/* Drop the ref which was taken in get_user_pages() */
page_cache_release(page);
block_in_page = 0;
}
goto out;
fail_release:
page_cache_release(page);
out:
return ret;
}
int
static int
direct_io_worker(int rw, struct inode *inode, const struct iovec *iov,
loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks)
loff_t offset, unsigned long nr_segs, unsigned blkbits,
get_blocks_t get_blocks)
{
const unsigned blkbits = inode->i_blkbits;
unsigned long user_addr;
int seg, ret2, ret = 0;
struct dio dio;
......@@ -566,14 +758,18 @@ direct_io_worker(int rw, struct inode *inode, const struct iovec *iov,
dio.inode = inode;
dio.rw = rw;
dio.blkbits = blkbits;
dio.blkfactor = inode->i_blkbits - blkbits;
dio.start_zero_done = 0;
dio.block_in_file = offset >> blkbits;
dio.blocks_available = 0;
dio.cur_page = NULL;
dio.boundary = 0;
dio.reap_counter = 0;
dio.get_blocks = get_blocks;
dio.last_block_in_bio = -1;
dio.next_block_in_bio = -1;
dio.final_block_in_bio = -1;
dio.next_block_for_io = -1;
dio.page_errors = 0;
......@@ -582,10 +778,10 @@ direct_io_worker(int rw, struct inode *inode, const struct iovec *iov,
spin_lock_init(&dio.bio_list_lock);
dio.bio_list = NULL;
dio.waiter = NULL;
dio.pages_left = 0;
dio.pages_in_io = 0;
for (seg = 0; seg < nr_segs; seg++)
dio.pages_left += (iov[seg].iov_len / PAGE_SIZE) + 2;
dio.pages_in_io += (iov[seg].iov_len >> blkbits) + 2;
for (seg = 0; seg < nr_segs; seg++) {
user_addr = (unsigned long)iov[seg].iov_base;
......@@ -619,6 +815,18 @@ direct_io_worker(int rw, struct inode *inode, const struct iovec *iov,
} /* end iovec loop */
/*
* There may be some unwritten disk at the end of a part-written
* fs-block-sized block. Go zero that now.
*/
dio_zero_block(&dio, 1);
if (dio.cur_page) {
ret2 = dio_send_cur_page(&dio);
page_cache_release(dio.cur_page);
if (ret == 0)
ret = ret2;
}
ret2 = dio_await_completion(&dio);
if (ret == 0)
ret = ret2;
......@@ -634,27 +842,44 @@ direct_io_worker(int rw, struct inode *inode, const struct iovec *iov,
* This is a library function for use by filesystem drivers.
*/
int
generic_direct_IO(int rw, struct inode *inode, const struct iovec *iov,
loff_t offset, unsigned long nr_segs, get_blocks_t get_blocks)
generic_direct_IO(int rw, struct inode *inode, struct block_device *bdev,
const struct iovec *iov, loff_t offset, unsigned long nr_segs,
get_blocks_t get_blocks)
{
int seg;
size_t size;
unsigned long addr;
unsigned blocksize_mask = (1 << inode->i_blkbits) - 1;
unsigned blkbits = inode->i_blkbits;
unsigned bdev_blkbits = 0;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
if (offset & blocksize_mask)
goto out;
if (bdev)
bdev_blkbits = blksize_bits(bdev_hardsect_size(bdev));
if (offset & blocksize_mask) {
if (bdev)
blkbits = bdev_blkbits;
blocksize_mask = (1 << blkbits) - 1;
if (offset & blocksize_mask)
goto out;
}
/* Check the memory alignment. Blocks cannot straddle pages */
for (seg = 0; seg < nr_segs; seg++) {
addr = (unsigned long)iov[seg].iov_base;
size = iov[seg].iov_len;
if ((addr & blocksize_mask) || (size & blocksize_mask))
goto out;
if ((addr & blocksize_mask) || (size & blocksize_mask)) {
if (bdev)
blkbits = bdev_blkbits;
blocksize_mask = (1 << blkbits) - 1;
if ((addr & blocksize_mask) || (size & blocksize_mask))
goto out;
}
}
retval = direct_io_worker(rw, inode, iov, offset, nr_segs, get_blocks);
retval = direct_io_worker(rw, inode, iov, offset,
nr_segs, blkbits, get_blocks);
out:
return retval;
}
......@@ -675,7 +900,7 @@ generic_file_direct_IO(int rw, struct file *file, const struct iovec *iov,
}
retval = mapping->a_ops->direct_IO(rw, file, iov, offset, nr_segs);
if (mapping->nrpages)
if (rw == WRITE && mapping->nrpages)
invalidate_inode_pages2(mapping);
out:
return retval;
......
......@@ -624,7 +624,7 @@ ext2_direct_IO(int rw, struct file *file, const struct iovec *iov,
{
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov,
return generic_direct_IO(rw, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, ext2_get_blocks);
}
......
......@@ -1431,7 +1431,7 @@ static int ext3_direct_IO(int rw, struct file *file,
}
}
ret = generic_direct_IO(rw, inode, iov, offset,
ret = generic_direct_IO(rw, inode, inode->i_sb->s_bdev, iov, offset,
nr_segs, ext3_direct_io_get_blocks);
out_stop:
......
......@@ -986,6 +986,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
goto out_fail;
blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
if (!blocksize) {
printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
goto out_fail;
}
/*
* The ext3 superblock will not be buffer aligned for other than 1kB
......
......@@ -226,7 +226,7 @@ __writeback_single_inode(struct inode *inode, int sync,
* The inodes to be written are parked on sb->s_io. They are moved back onto
* sb->s_dirty as they are selected for writing. This way, none can be missed
* on the writer throttling path, and we get decent balancing between many
* throlttled threads: we don't want them all piling up on __wait_on_inode.
* throttled threads: we don't want them all piling up on __wait_on_inode.
*/
static void
sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
......
......@@ -315,7 +315,7 @@ static int jfs_direct_IO(int rw, struct file *file, const struct iovec *iov,
{
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov,
return generic_direct_IO(rw, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, jfs_get_blocks);
}
......
......@@ -260,7 +260,8 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
return 0;
}
int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry)
int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
int they_are_dirs = S_ISDIR(old_dentry->d_inode->i_mode);
......@@ -277,3 +278,48 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode
}
return 0;
}
int simple_readpage(struct file *file, struct page *page)
{
void *kaddr;
if (PageUptodate(page))
goto out;
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, PAGE_CACHE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(page);
SetPageUptodate(page);
out:
unlock_page(page);
return 0;
}
int simple_prepare_write(struct file *file, struct page *page,
unsigned from, unsigned to)
{
if (!PageUptodate(page)) {
if (to - from != PAGE_CACHE_SIZE) {
void *kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, from);
memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
}
SetPageUptodate(page);
}
return 0;
}
int simple_commit_write(struct file *file, struct page *page,
unsigned offset, unsigned to)
{
struct inode *inode = page->mapping->host;
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
if (pos > inode->i_size)
inode->i_size = pos;
set_page_dirty(page);
return 0;
}
......@@ -643,7 +643,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
goto cleanup_file;
}
f->f_ra.ra_pages = inode->i_mapping->backing_dev_info->ra_pages;
file_ra_state_init(&f->f_ra, inode->i_mapping);
f->f_dentry = dentry;
f->f_vfsmnt = mnt;
f->f_pos = 0;
......
......@@ -39,6 +39,7 @@
#include <linux/seq_file.h>
#include <linux/times.h>
#include <linux/profile.h>
#include <linux/blkdev.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
......@@ -404,10 +405,14 @@ static int kstat_read_proc(char *page, char **start, off_t off,
len += sprintf(page + len,
"\nctxt %lu\n"
"btime %lu\n"
"processes %lu\n",
"processes %lu\n"
"procs_running %lu\n"
"procs_blocked %u\n",
nr_context_switches(),
xtime.tv_sec - jif / HZ,
total_forks);
total_forks,
nr_running(),
atomic_read(&nr_iowait_tasks));
return proc_calc_metrics(page, start, off, count, eof, len);
}
......
......@@ -47,48 +47,6 @@ static struct backing_dev_info ramfs_backing_dev_info = {
.memory_backed = 1, /* Does not contribute to dirty memory */
};
/*
* Read a page. Again trivial. If it didn't already exist
* in the page cache, it is zero-filled.
*/
static int ramfs_readpage(struct file *file, struct page * page)
{
if (!PageUptodate(page)) {
char *kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, PAGE_CACHE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
flush_dcache_page(page);
SetPageUptodate(page);
}
unlock_page(page);
return 0;
}
static int ramfs_prepare_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
if (!PageUptodate(page)) {
char *kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, PAGE_CACHE_SIZE);
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_USER0);
SetPageUptodate(page);
}
set_page_dirty(page);
return 0;
}
static int ramfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{
struct inode *inode = page->mapping->host;
loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
if (pos > inode->i_size)
inode->i_size = pos;
return 0;
}
struct inode *ramfs_get_inode(struct super_block *sb, int mode, int dev)
{
struct inode * inode = new_inode(sb);
......@@ -175,10 +133,10 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
}
static struct address_space_operations ramfs_aops = {
.readpage = ramfs_readpage,
.readpage = simple_readpage,
.writepage = fail_writepage,
.prepare_write = ramfs_prepare_write,
.commit_write = ramfs_commit_write
.prepare_write = simple_prepare_write,
.commit_write = simple_commit_write
};
static struct file_operations ramfs_file_operations = {
......
......@@ -607,8 +607,8 @@ linvfs_direct_IO(
{
struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
return generic_direct_IO(rw, inode, iov, offset, nr_segs,
linvfs_get_blocks_direct);
return generic_direct_IO(rw, inode, NULL,
iov, offset, nr_segs, linvfs_get_blocks_direct);
}
......
......@@ -33,7 +33,21 @@
#define segment_eq(a,b) ((a).seg == (b).seg)
extern int __verify_write(const void *, unsigned long);
/*
* movsl can be slow when source and dest are not both 8-byte aligned
*/
#if defined(CONFIG_M586MMX) || defined(CONFIG_M686) || \
defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUM4)
#define INTEL_MOVSL
#endif
#ifdef INTEL_MOVSL
extern struct movsl_mask {
int mask;
} ____cacheline_aligned_in_smp movsl_mask;
#endif
int __verify_write(const void *, unsigned long);
#define __addr_ok(addr) ((unsigned long)(addr) < (current_thread_info()->addr_limit.seg))
......@@ -248,354 +262,10 @@ do { \
: "m"(__m(addr)), "i"(-EFAULT), "0"(err))
/*
* Copy To/From Userspace
*/
/* Generic arbitrary sized copy. */
#define __copy_user(to,from,size) \
do { \
int __d0, __d1; \
__asm__ __volatile__( \
"0: rep; movsl\n" \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: lea 0(%3,%0,4),%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,2b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
: "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
: "memory"); \
} while (0)
#define __copy_user_zeroing(to,from,size) \
do { \
int __d0, __d1; \
__asm__ __volatile__( \
"0: rep; movsl\n" \
" movl %3,%0\n" \
"1: rep; movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: lea 0(%3,%0,4),%0\n" \
"4: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosb\n" \
" popl %%eax\n" \
" popl %0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=&c"(size), "=&D" (__d0), "=&S" (__d1) \
: "r"(size & 3), "0"(size / 4), "1"(to), "2"(from) \
: "memory"); \
} while (0)
/* We let the __ versions of copy_from/to_user inline, because they're often
* used in fast paths and have only a small space overhead.
*/
static inline unsigned long
__generic_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
{
__copy_user_zeroing(to,from,n);
return n;
}
static inline unsigned long
__generic_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
{
__copy_user(to,from,n);
return n;
}
/* Optimize just a little bit when we know the size of the move. */
#define __constant_copy_user(to, from, size) \
do { \
int __d0, __d1; \
switch (size & 3) { \
default: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1:\n" \
".section .fixup,\"ax\"\n" \
"2: shl $2,%0\n" \
" jmp 1b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,2b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
case 1: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: shl $2,%0\n" \
"4: incl %0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
case 2: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsw\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: shl $2,%0\n" \
"4: addl $2,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
case 3: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsw\n" \
"2: movsb\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: shl $2,%0\n" \
"5: addl $2,%0\n" \
"6: incl %0\n" \
" jmp 3b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,4b\n" \
" .long 1b,5b\n" \
" .long 2b,6b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
} \
} while (0)
/* Optimize just a little bit when we know the size of the move. */
#define __constant_copy_user_zeroing(to, from, size) \
do { \
int __d0, __d1; \
switch (size & 3) { \
default: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1:\n" \
".section .fixup,\"ax\"\n" \
"2: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosl\n" \
" popl %%eax\n" \
" popl %0\n" \
" shl $2,%0\n" \
" jmp 1b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,2b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
case 1: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsb\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosl\n" \
" stosb\n" \
" popl %%eax\n" \
" popl %0\n" \
" shl $2,%0\n" \
" incl %0\n" \
" jmp 2b\n" \
"4: pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" stosb\n" \
" popl %%eax\n" \
" incl %0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
case 2: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsw\n" \
"2:\n" \
".section .fixup,\"ax\"\n" \
"3: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosl\n" \
" stosw\n" \
" popl %%eax\n" \
" popl %0\n" \
" shl $2,%0\n" \
" addl $2,%0\n" \
" jmp 2b\n" \
"4: pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" stosw\n" \
" popl %%eax\n" \
" addl $2,%0\n" \
" jmp 2b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,3b\n" \
" .long 1b,4b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
case 3: \
__asm__ __volatile__( \
"0: rep; movsl\n" \
"1: movsw\n" \
"2: movsb\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: pushl %0\n" \
" pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" rep; stosl\n" \
" stosw\n" \
" stosb\n" \
" popl %%eax\n" \
" popl %0\n" \
" shl $2,%0\n" \
" addl $3,%0\n" \
" jmp 2b\n" \
"5: pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" stosw\n" \
" stosb\n" \
" popl %%eax\n" \
" addl $3,%0\n" \
" jmp 2b\n" \
"6: pushl %%eax\n" \
" xorl %%eax,%%eax\n" \
" stosb\n" \
" popl %%eax\n" \
" incl %0\n" \
" jmp 3b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
" .align 4\n" \
" .long 0b,4b\n" \
" .long 1b,5b\n" \
" .long 2b,6b\n" \
".previous" \
: "=c"(size), "=&S" (__d0), "=&D" (__d1)\
: "1"(from), "2"(to), "0"(size/4) \
: "memory"); \
break; \
} \
} while (0)
unsigned long __generic_copy_to_user(void *, const void *, unsigned long);
unsigned long __generic_copy_from_user(void *, const void *, unsigned long);
static inline unsigned long
__constant_copy_to_user(void *to, const void *from, unsigned long n)
{
prefetch(from);
if (access_ok(VERIFY_WRITE, to, n))
__constant_copy_user(to,from,n);
return n;
}
static inline unsigned long
__constant_copy_from_user(void *to, const void *from, unsigned long n)
{
if (access_ok(VERIFY_READ, from, n))
__constant_copy_user_zeroing(to,from,n);
else
memset(to, 0, n);
return n;
}
static inline unsigned long
__constant_copy_to_user_nocheck(void *to, const void *from, unsigned long n)
{
__constant_copy_user(to,from,n);
return n;
}
static inline unsigned long
__constant_copy_from_user_nocheck(void *to, const void *from, unsigned long n)
{
__constant_copy_user_zeroing(to,from,n);
return n;
}
#define copy_to_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_to_user((to),(from),(n)) : \
__generic_copy_to_user((to),(from),(n)))
#define copy_from_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_from_user((to),(from),(n)) : \
__generic_copy_from_user((to),(from),(n)))
#define __copy_to_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_to_user_nocheck((to),(from),(n)) : \
__generic_copy_to_user_nocheck((to),(from),(n)))
#define __copy_from_user(to,from,n) \
(__builtin_constant_p(n) ? \
__constant_copy_from_user_nocheck((to),(from),(n)) : \
__generic_copy_from_user_nocheck((to),(from),(n)))
unsigned long copy_to_user(void *to, const void *from, unsigned long n);
unsigned long copy_from_user(void *to, const void *from, unsigned long n);
unsigned long __copy_to_user(void *to, const void *from, unsigned long n);
unsigned long __copy_from_user(void *to, const void *from, unsigned long n);
long strncpy_from_user(char *dst, const char *src, long count);
long __strncpy_from_user(char *dst, const char *src, long count);
......
......@@ -258,7 +258,7 @@ static inline void clear_in_cr4 (unsigned long mask)
#define TASK_UNMAPPED_32 0x40000000
#define TASK_UNMAPPED_64 (TASK_SIZE/3)
#define TASK_UNMAPPED_BASE \
(test_thread_flags(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
(test_thread_flag(TIF_IA32) ? TASK_UNMAPPED_32 : TASK_UNMAPPED_64)
/*
* Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
......
......@@ -22,30 +22,35 @@ struct request_list {
wait_queue_head_t wait;
};
/*
* try to put the fields that are referenced together in the same cacheline
*/
struct request {
struct list_head queuelist; /* looking for ->queue? you must _not_
* access it directly, use
* blkdev_dequeue_request! */
int ref_count;
void *elevator_private;
unsigned long flags; /* see REQ_ bits below */
unsigned char cmd[16];
kdev_t rq_dev;
sector_t sector;
unsigned long nr_sectors;
unsigned int current_nr_sectors;
unsigned long flags; /* see REQ_ bits below */
void *elevator_private;
int rq_status; /* should split this into a few status bits */
kdev_t rq_dev;
struct gendisk *rq_disk;
int errors;
sector_t sector;
unsigned long start_time;
unsigned long nr_sectors;
sector_t hard_sector; /* the hard_* are block layer
* internals, no driver should
* touch them
*/
unsigned long hard_nr_sectors;
unsigned int hard_cur_sectors;
struct bio *bio;
struct bio *biotail;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
......@@ -59,13 +64,21 @@ struct request {
*/
unsigned short nr_hw_segments;
unsigned int current_nr_sectors;
unsigned int hard_cur_sectors;
int tag;
void *special;
char *buffer;
/* For packet commands */
int ref_count;
request_queue_t *q;
struct request_list *rl;
struct completion *waiting;
void *special;
/*
* when request is used as a packet command carrier
*/
unsigned char cmd[16];
unsigned int data_len;
void *data;
......@@ -73,10 +86,6 @@ struct request {
void *sense;
unsigned int timeout;
struct completion *waiting;
struct bio *bio, *biotail;
request_queue_t *q;
struct request_list *rl;
};
/*
......
......@@ -1250,10 +1250,12 @@ ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov,
extern ssize_t generic_file_sendfile(struct file *, struct file *, loff_t *, size_t);
extern void do_generic_mapping_read(struct address_space *, struct file_ra_state *, struct file *,
loff_t *, read_descriptor_t *, read_actor_t);
extern void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
extern ssize_t generic_file_direct_IO(int rw, struct file *file,
const struct iovec *iov, loff_t offset, unsigned long nr_segs);
extern int generic_direct_IO(int rw, struct inode *inode, const struct iovec
*iov, loff_t offset, unsigned long nr_segs, get_blocks_t *get_blocks);
extern int generic_direct_IO(int rw, struct inode *inode, struct block_device *bdev,
const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_blocks_t *get_blocks);
extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos);
ssize_t generic_file_writev(struct file *filp, const struct iovec *iov,
......@@ -1311,6 +1313,12 @@ extern int simple_rmdir(struct inode *, struct dentry *);
extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
extern int simple_sync_file(struct file *, struct dentry *, int);
extern int simple_empty(struct dentry *);
extern int simple_readpage(struct file *file, struct page *page);
extern int simple_prepare_write(struct file *file, struct page *page,
unsigned offset, unsigned to);
extern int simple_commit_write(struct file *file, struct page *page,
unsigned offset, unsigned to);
extern struct dentry *simple_lookup(struct inode *, struct dentry *);
extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *);
extern struct file_operations simple_dir_operations;
......
......@@ -863,6 +863,14 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
/*
* Check for pending SIGKILL! The new thread should not be allowed
* to slip out of an OOM kill. (or normal SIGKILL.)
*/
if (sigismember(&current->pending.signal, SIGKILL)) {
write_unlock_irq(&tasklist_lock);
goto bad_fork_cleanup_namespace;
}
/* CLONE_PARENT re-uses the old parent */
if (clone_flags & CLONE_PARENT)
......
......@@ -228,6 +228,7 @@ EXPORT_SYMBOL(generic_block_bmap);
EXPORT_SYMBOL(generic_file_read);
EXPORT_SYMBOL(generic_file_sendfile);
EXPORT_SYMBOL(do_generic_mapping_read);
EXPORT_SYMBOL(file_ra_state_init);
EXPORT_SYMBOL(generic_file_write);
EXPORT_SYMBOL(generic_file_write_nolock);
EXPORT_SYMBOL(generic_file_mmap);
......@@ -306,6 +307,9 @@ EXPORT_SYMBOL(simple_unlink);
EXPORT_SYMBOL(simple_rmdir);
EXPORT_SYMBOL(simple_rename);
EXPORT_SYMBOL(simple_sync_file);
EXPORT_SYMBOL(simple_readpage);
EXPORT_SYMBOL(simple_prepare_write);
EXPORT_SYMBOL(simple_commit_write);
EXPORT_SYMBOL(simple_empty);
EXPORT_SYMBOL(fd_install);
EXPORT_SYMBOL(put_unused_fd);
......
......@@ -282,17 +282,9 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
break;
nr_found = __lookup(root, results + ret, cur_index,
max_items - ret, &next_index, max_index);
if (nr_found == 0) {
if (!(cur_index & RADIX_TREE_MAP_MASK))
break;
/*
* It could be that there simply were no items to the
* right of `cur_index' in the leaf node. So we still
* need to search for additional nodes to the right of
* this one.
*/
}
ret += nr_found;
if (next_index == max_index)
break;
cur_index = next_index;
}
out:
......
......@@ -61,6 +61,10 @@
* ->mapping->page_lock
* ->inode_lock
* ->sb_lock (fs/fs-writeback.c)
* ->page_table_lock
* ->swap_device_lock (try_to_unmap_one)
* ->private_lock (try_to_unmap_one)
* ->page_lock (try_to_unmap_one)
*/
/*
......
......@@ -399,6 +399,10 @@ static int vma_merge(struct mm_struct * mm, struct vm_area_struct * prev,
return 0;
}
/*
* The caller must hold down_write(current->mm->mmap_sem).
*/
unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
unsigned long len, unsigned long prot,
unsigned long flags, unsigned long pgoff)
......
......@@ -175,9 +175,13 @@ static void oom_kill(void)
if (p == NULL)
panic("Out of memory and no killable processes...\n");
/* kill all processes that share the ->mm (i.e. all threads) */
oom_kill_task(p);
/*
* kill all processes that share the ->mm (i.e. all threads),
* but are in a different thread group
*/
do_each_thread(g, q)
if (q->mm == p->mm)
if (q->mm == p->mm && q->tgid != p->tgid)
oom_kill_task(q);
while_each_thread(g, q);
......
......@@ -59,6 +59,14 @@ static inline int bad_range(struct zone *zone, struct page *page)
return 0;
}
static void bad_page(const char *function, struct page *page)
{
printk("Bad page state at %s\n", function);
printk("flags:0x%08lx mapping:%p mapped:%d count:%d\n",
page->flags, page->mapping,
page_mapped(page), page_count(page));
}
/*
* Freeing function for a buddy system allocator.
*
......@@ -91,16 +99,19 @@ void __free_pages_ok (struct page *page, unsigned int order)
mod_page_state(pgfree, 1<<order);
BUG_ON(PageLRU(page));
BUG_ON(PagePrivate(page));
BUG_ON(page->mapping != NULL);
BUG_ON(PageLocked(page));
BUG_ON(PageActive(page));
BUG_ON(PageWriteback(page));
BUG_ON(page->pte.direct != 0);
if ( page_mapped(page) ||
page->mapping != NULL ||
page_count(page) != 0 ||
(page->flags & (
1 << PG_lru |
1 << PG_private |
1 << PG_locked |
1 << PG_active |
1 << PG_writeback )))
bad_page(__FUNCTION__, page);
if (PageDirty(page))
ClearPageDirty(page);
BUG_ON(page_count(page) != 0);
if (unlikely(current->flags & PF_FREE_PAGES)) {
if (!current->nr_local_pages && !in_interrupt()) {
......@@ -181,14 +192,17 @@ expand(struct zone *zone, struct page *page,
*/
static inline void prep_new_page(struct page *page)
{
BUG_ON(page->mapping);
BUG_ON(PagePrivate(page));
BUG_ON(PageLocked(page));
BUG_ON(PageLRU(page));
BUG_ON(PageActive(page));
BUG_ON(PageDirty(page));
BUG_ON(PageWriteback(page));
BUG_ON(page->pte.direct != 0);
if ( page->mapping ||
page_mapped(page) ||
(page->flags & (
1 << PG_private |
1 << PG_locked |
1 << PG_lru |
1 << PG_active |
1 << PG_dirty |
1 << PG_writeback )))
bad_page(__FUNCTION__, page);
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked);
......
......@@ -19,6 +19,16 @@ struct backing_dev_info default_backing_dev_info = {
.state = 0,
};
/*
* Initialise a struct file's readahead state
*/
void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping)
{
memset(ra, 0, sizeof(*ra));
ra->ra_pages = mapping->backing_dev_info->ra_pages;
}
/*
* Return max readahead size for this inode in number-of-pages.
*/
......
......@@ -53,7 +53,34 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
clear_page_dirty(page);
ClearPageUptodate(page);
remove_from_page_cache(page);
page_cache_release(page);
page_cache_release(page); /* pagecache ref */
}
/*
* This is for invalidate_inode_pages(). That function can be called at
* any time, and is not supposed to throw away dirty pages. But pages can
* be marked dirty at any time too. So we re-check the dirtiness inside
* ->page_lock. That provides exclusion against the __set_page_dirty
* functions.
*/
static void
invalidate_complete_page(struct address_space *mapping, struct page *page)
{
if (page->mapping != mapping)
return;
if (PagePrivate(page) && !try_to_release_page(page, 0))
return;
write_lock(&mapping->page_lock);
if (PageDirty(page)) {
write_unlock(&mapping->page_lock);
} else {
__remove_from_page_cache(page);
write_unlock(&mapping->page_lock);
ClearPageUptodate(page);
page_cache_release(page); /* pagecache ref */
}
}
/**
......@@ -172,11 +199,9 @@ void invalidate_inode_pages(struct address_space *mapping)
next++;
if (PageDirty(page) || PageWriteback(page))
goto unlock;
if (PagePrivate(page) && !try_to_release_page(page, 0))
goto unlock;
if (page_mapped(page))
goto unlock;
truncate_complete_page(mapping, page);
invalidate_complete_page(mapping, page);
unlock:
unlock_page(page);
}
......@@ -213,7 +238,7 @@ void invalidate_inode_pages2(struct address_space *mapping)
if (page_mapped(page))
clear_page_dirty(page);
else
truncate_complete_page(mapping, page);
invalidate_complete_page(mapping, page);
}
unlock_page(page);
}
......
......@@ -31,6 +31,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/topology.h>
#include <asm/div64.h>
#include <linux/swapops.h>
......@@ -85,7 +86,7 @@ struct shrinker {
shrinker_t shrinker;
struct list_head list;
int seeks; /* seeks to recreate an obj */
int nr; /* objs pending delete */
long nr; /* objs pending delete */
};
static LIST_HEAD(shrinker_list);
......@@ -121,7 +122,7 @@ void remove_shrinker(struct shrinker *shrinker)
kfree(shrinker);
}
#define SHRINK_BATCH 32
#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
*
......@@ -134,29 +135,27 @@ void remove_shrinker(struct shrinker *shrinker)
* slab to avoid swapping.
*
* FIXME: do not do for zone highmem
*
* We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
*/
static int shrink_slab(int scanned, unsigned int gfp_mask)
static int shrink_slab(long scanned, unsigned int gfp_mask)
{
struct list_head *lh;
int pages;
struct shrinker *shrinker;
long pages;
if (down_trylock(&shrinker_sem))
return 0;
pages = nr_used_zone_pages();
list_for_each(lh, &shrinker_list) {
struct shrinker *shrinker;
int entries;
unsigned long delta;
shrinker = list_entry(lh, struct shrinker, list);
entries = (*shrinker->shrinker)(0, gfp_mask);
if (!entries)
continue;
delta = scanned * shrinker->seeks * entries;
shrinker->nr += delta / (pages + 1);
list_for_each_entry(shrinker, &shrinker_list, list) {
long long delta;
delta = scanned * shrinker->seeks;
delta *= (*shrinker->shrinker)(0, gfp_mask);
do_div(delta, pages + 1);
shrinker->nr += delta;
if (shrinker->nr > SHRINK_BATCH) {
int nr = shrinker->nr;
long nr = shrinker->nr;
shrinker->nr = 0;
(*shrinker->shrinker)(nr, gfp_mask);
......@@ -824,7 +823,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
int i;
for (priority = DEF_PRIORITY; priority; priority--) {
int success = 1;
int all_zones_ok = 1;
for (i = 0; i < pgdat->nr_zones; i++) {
struct zone *zone = pgdat->node_zones + i;
......@@ -832,20 +831,24 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
int max_scan;
int to_reclaim;
to_reclaim = zone->pages_high - zone->free_pages;
if (nr_pages && to_free > 0)
if (nr_pages && to_free > 0) { /* Software suspend */
to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8);
if (to_reclaim <= 0)
continue;
success = 0;
} else { /* Zone balancing */
to_reclaim = zone->pages_high-zone->free_pages;
if (to_reclaim <= 0)
continue;
}
all_zones_ok = 0;
max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2;
if (max_scan < SWAP_CLUSTER_MAX)
max_scan = SWAP_CLUSTER_MAX;
to_free -= shrink_zone(zone, max_scan, GFP_KSWAPD,
to_reclaim, &nr_mapped, ps, priority);
shrink_slab(max_scan + nr_mapped, GFP_KSWAPD);
}
if (success)
if (all_zones_ok)
break;
blk_congestion_wait(WRITE, HZ/4);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment