Commit db168263 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'hwpoison' of git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6

* 'hwpoison' of git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-mce-2.6: (21 commits)
  HWPOISON: Enable error_remove_page on btrfs
  HWPOISON: Add simple debugfs interface to inject hwpoison on arbitary PFNs
  HWPOISON: Add madvise() based injector for hardware poisoned pages v4
  HWPOISON: Enable error_remove_page for NFS
  HWPOISON: Enable .remove_error_page for migration aware file systems
  HWPOISON: The high level memory error handler in the VM v7
  HWPOISON: Add PR_MCE_KILL prctl to control early kill behaviour per process
  HWPOISON: shmem: call set_page_dirty() with locked page
  HWPOISON: Define a new error_remove_page address space op for async truncation
  HWPOISON: Add invalidate_inode_page
  HWPOISON: Refactor truncate to allow direct truncating of page v2
  HWPOISON: check and isolate corrupted free pages v2
  HWPOISON: Handle hardware poisoned pages in try_to_unmap
  HWPOISON: Use bitmask/action code for try_to_unmap behaviour
  HWPOISON: x86: Add VM_FAULT_HWPOISON handling to x86 page fault handler v2
  HWPOISON: Add poison check to page fault handling
  HWPOISON: Add basic support for poisoned pages in fault handler v3
  HWPOISON: Add new SIGBUS error codes for hardware poison signals
  HWPOISON: Add support for poison swap entries v2
  HWPOISON: Export some rmap vma locking to outside world
  ...
parents cd604513 465fdd97
...@@ -536,6 +536,7 @@ struct address_space_operations { ...@@ -536,6 +536,7 @@ struct address_space_operations {
/* migrate the contents of a page to the specified target */ /* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *); int (*migratepage) (struct page *, struct page *);
int (*launder_page) (struct page *); int (*launder_page) (struct page *);
int (*error_remove_page) (struct mapping *mapping, struct page *page);
}; };
writepage: called by the VM to write a dirty page to backing store. writepage: called by the VM to write a dirty page to backing store.
...@@ -694,6 +695,12 @@ struct address_space_operations { ...@@ -694,6 +695,12 @@ struct address_space_operations {
prevent redirtying the page, it is kept locked during the whole prevent redirtying the page, it is kept locked during the whole
operation. operation.
error_remove_page: normally set to generic_error_remove_page if truncation
is ok for this address space. Used for memory failure handling.
Setting this implies you deal with pages going away under you,
unless you have them locked or reference counts increased.
The File Object The File Object
=============== ===============
......
...@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm: ...@@ -32,6 +32,8 @@ Currently, these files are in /proc/sys/vm:
- legacy_va_layout - legacy_va_layout
- lowmem_reserve_ratio - lowmem_reserve_ratio
- max_map_count - max_map_count
- memory_failure_early_kill
- memory_failure_recovery
- min_free_kbytes - min_free_kbytes
- min_slab_ratio - min_slab_ratio
- min_unmapped_ratio - min_unmapped_ratio
...@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm: ...@@ -53,7 +55,6 @@ Currently, these files are in /proc/sys/vm:
- vfs_cache_pressure - vfs_cache_pressure
- zone_reclaim_mode - zone_reclaim_mode
============================================================== ==============================================================
block_dump block_dump
...@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation. ...@@ -275,6 +276,44 @@ e.g., up to one or two maps per allocation.
The default value is 65536. The default value is 65536.
=============================================================
memory_failure_early_kill:
Control how to kill processes when uncorrected memory error (typically
a 2bit error in a memory module) is detected in the background by hardware
that cannot be handled by the kernel. In some cases (like the page
still having a valid copy on disk) the kernel will handle the failure
transparently without affecting any applications. But if there is
no other uptodate copy of the data it will kill to prevent any data
corruptions from propagating.
1: Kill all processes that have the corrupted and not reloadable page mapped
as soon as the corruption is detected. Note this is not supported
for a few types of pages, like kernel internally allocated data or
the swap cache, but works for the majority of user pages.
0: Only unmap the corrupted page from all processes and only kill a process
who tries to access it.
The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
handle this if they want to.
This is only active on architectures/platforms with advanced machine
check handling and depends on the hardware capabilities.
Applications can override this setting individually with the PR_MCE_KILL prctl
==============================================================
memory_failure_recovery
Enable memory failure recovery (when supported by the platform)
1: Attempt recovery.
0: Always panic on a memory failure.
============================================================== ==============================================================
min_free_kbytes: min_free_kbytes:
......
...@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, ...@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
info.si_errno = 0; info.si_errno = 0;
info.si_code = si_code; info.si_code = si_code;
info.si_addr = (void __user *)address; info.si_addr = (void __user *)address;
info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
force_sig_info(si_signo, &info, tsk); force_sig_info(si_signo, &info, tsk);
} }
...@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code, ...@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
} }
static void static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
unsigned int fault)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm; struct mm_struct *mm = tsk->mm;
int code = BUS_ADRERR;
up_read(&mm->mmap_sem); up_read(&mm->mmap_sem);
...@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address) ...@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14; tsk->thread.trap_no = 14;
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); #ifdef CONFIG_MEMORY_FAILURE
if (fault & VM_FAULT_HWPOISON) {
printk(KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
tsk->comm, tsk->pid, address);
code = BUS_MCEERR_AR;
}
#endif
force_sig_info_fault(SIGBUS, code, address, tsk);
} }
static noinline void static noinline void
...@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, ...@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if (fault & VM_FAULT_OOM) { if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address); out_of_memory(regs, error_code, address);
} else { } else {
if (fault & VM_FAULT_SIGBUS) if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
do_sigbus(regs, error_code, address); do_sigbus(regs, error_code, address, fault);
else else
BUG(); BUG();
} }
......
...@@ -5269,6 +5269,7 @@ static const struct address_space_operations btrfs_aops = { ...@@ -5269,6 +5269,7 @@ static const struct address_space_operations btrfs_aops = {
.invalidatepage = btrfs_invalidatepage, .invalidatepage = btrfs_invalidatepage,
.releasepage = btrfs_releasepage, .releasepage = btrfs_releasepage,
.set_page_dirty = btrfs_set_page_dirty, .set_page_dirty = btrfs_set_page_dirty,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations btrfs_symlink_aops = { static const struct address_space_operations btrfs_symlink_aops = {
......
...@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = { ...@@ -819,6 +819,7 @@ const struct address_space_operations ext2_aops = {
.writepages = ext2_writepages, .writepages = ext2_writepages,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
const struct address_space_operations ext2_aops_xip = { const struct address_space_operations ext2_aops_xip = {
...@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = { ...@@ -837,6 +838,7 @@ const struct address_space_operations ext2_nobh_aops = {
.direct_IO = ext2_direct_IO, .direct_IO = ext2_direct_IO,
.writepages = ext2_writepages, .writepages = ext2_writepages,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.error_remove_page = generic_error_remove_page,
}; };
/* /*
......
...@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = { ...@@ -1830,6 +1830,7 @@ static const struct address_space_operations ext3_ordered_aops = {
.direct_IO = ext3_direct_IO, .direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations ext3_writeback_aops = { static const struct address_space_operations ext3_writeback_aops = {
...@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = { ...@@ -1845,6 +1846,7 @@ static const struct address_space_operations ext3_writeback_aops = {
.direct_IO = ext3_direct_IO, .direct_IO = ext3_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations ext3_journalled_aops = { static const struct address_space_operations ext3_journalled_aops = {
...@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = { ...@@ -1859,6 +1861,7 @@ static const struct address_space_operations ext3_journalled_aops = {
.invalidatepage = ext3_invalidatepage, .invalidatepage = ext3_invalidatepage,
.releasepage = ext3_releasepage, .releasepage = ext3_releasepage,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
void ext3_set_aops(struct inode *inode) void ext3_set_aops(struct inode *inode)
......
...@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = { ...@@ -3386,6 +3386,7 @@ static const struct address_space_operations ext4_ordered_aops = {
.direct_IO = ext4_direct_IO, .direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations ext4_writeback_aops = { static const struct address_space_operations ext4_writeback_aops = {
...@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = { ...@@ -3401,6 +3402,7 @@ static const struct address_space_operations ext4_writeback_aops = {
.direct_IO = ext4_direct_IO, .direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations ext4_journalled_aops = { static const struct address_space_operations ext4_journalled_aops = {
...@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = { ...@@ -3415,6 +3417,7 @@ static const struct address_space_operations ext4_journalled_aops = {
.invalidatepage = ext4_invalidatepage, .invalidatepage = ext4_invalidatepage,
.releasepage = ext4_releasepage, .releasepage = ext4_releasepage,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations ext4_da_aops = { static const struct address_space_operations ext4_da_aops = {
...@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = { ...@@ -3431,6 +3434,7 @@ static const struct address_space_operations ext4_da_aops = {
.direct_IO = ext4_direct_IO, .direct_IO = ext4_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
void ext4_set_aops(struct inode *inode) void ext4_set_aops(struct inode *inode)
......
...@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = { ...@@ -1135,6 +1135,7 @@ static const struct address_space_operations gfs2_writeback_aops = {
.direct_IO = gfs2_direct_IO, .direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations gfs2_ordered_aops = { static const struct address_space_operations gfs2_ordered_aops = {
...@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = { ...@@ -1151,6 +1152,7 @@ static const struct address_space_operations gfs2_ordered_aops = {
.direct_IO = gfs2_direct_IO, .direct_IO = gfs2_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
static const struct address_space_operations gfs2_jdata_aops = { static const struct address_space_operations gfs2_jdata_aops = {
...@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = { ...@@ -1166,6 +1168,7 @@ static const struct address_space_operations gfs2_jdata_aops = {
.invalidatepage = gfs2_invalidatepage, .invalidatepage = gfs2_invalidatepage,
.releasepage = gfs2_releasepage, .releasepage = gfs2_releasepage,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
void gfs2_set_aops(struct inode *inode) void gfs2_set_aops(struct inode *inode)
......
...@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = { ...@@ -525,6 +525,7 @@ const struct address_space_operations nfs_file_aops = {
.direct_IO = nfs_direct_IO, .direct_IO = nfs_direct_IO,
.migratepage = nfs_migrate_page, .migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page, .launder_page = nfs_launder_page,
.error_remove_page = generic_error_remove_page,
}; };
/* /*
......
...@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = { ...@@ -1550,6 +1550,7 @@ const struct address_space_operations ntfs_aops = {
.migratepage = buffer_migrate_page, /* Move a page cache page from .migratepage = buffer_migrate_page, /* Move a page cache page from
one physical page to an one physical page to an
other. */ other. */
.error_remove_page = generic_error_remove_page,
}; };
/** /**
...@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = { ...@@ -1569,6 +1570,7 @@ const struct address_space_operations ntfs_mst_aops = {
.migratepage = buffer_migrate_page, /* Move a page cache page from .migratepage = buffer_migrate_page, /* Move a page cache page from
one physical page to an one physical page to an
other. */ other. */
.error_remove_page = generic_error_remove_page,
}; };
#ifdef NTFS_RW #ifdef NTFS_RW
......
...@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = { ...@@ -2022,4 +2022,5 @@ const struct address_space_operations ocfs2_aops = {
.releasepage = ocfs2_releasepage, .releasepage = ocfs2_releasepage,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
...@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v) ...@@ -97,7 +97,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
"Committed_AS: %8lu kB\n" "Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n" "VmallocTotal: %8lu kB\n"
"VmallocUsed: %8lu kB\n" "VmallocUsed: %8lu kB\n"
"VmallocChunk: %8lu kB\n", "VmallocChunk: %8lu kB\n"
#ifdef CONFIG_MEMORY_FAILURE
"HardwareCorrupted: %8lu kB\n"
#endif
,
K(i.totalram), K(i.totalram),
K(i.freeram), K(i.freeram),
K(i.bufferram), K(i.bufferram),
...@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v) ...@@ -144,6 +148,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
(unsigned long)VMALLOC_TOTAL >> 10, (unsigned long)VMALLOC_TOTAL >> 10,
vmi.used >> 10, vmi.used >> 10,
vmi.largest_chunk >> 10 vmi.largest_chunk >> 10
#ifdef CONFIG_MEMORY_FAILURE
,atomic_long_read(&mce_bad_pages) << (PAGE_SHIFT - 10)
#endif
); );
hugetlb_report_meminfo(m); hugetlb_report_meminfo(m);
......
...@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = { ...@@ -1635,4 +1635,5 @@ const struct address_space_operations xfs_address_space_operations = {
.direct_IO = xfs_vm_direct_IO, .direct_IO = xfs_vm_direct_IO,
.migratepage = buffer_migrate_page, .migratepage = buffer_migrate_page,
.is_partially_uptodate = block_is_partially_uptodate, .is_partially_uptodate = block_is_partially_uptodate,
.error_remove_page = generic_error_remove_page,
}; };
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#define MADV_REMOVE 9 /* remove these pages & resources */ #define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */ #define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */ #define MADV_DOFORK 11 /* do inherit across fork */
#define MADV_HWPOISON 100 /* poison a page for testing */
#define MADV_MERGEABLE 12 /* KSM may merge identical pages */ #define MADV_MERGEABLE 12 /* KSM may merge identical pages */
#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */ #define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
......
...@@ -82,6 +82,7 @@ typedef struct siginfo { ...@@ -82,6 +82,7 @@ typedef struct siginfo {
#ifdef __ARCH_SI_TRAPNO #ifdef __ARCH_SI_TRAPNO
int _trapno; /* TRAP # which caused the signal */ int _trapno; /* TRAP # which caused the signal */
#endif #endif
short _addr_lsb; /* LSB of the reported address */
} _sigfault; } _sigfault;
/* SIGPOLL */ /* SIGPOLL */
...@@ -112,6 +113,7 @@ typedef struct siginfo { ...@@ -112,6 +113,7 @@ typedef struct siginfo {
#ifdef __ARCH_SI_TRAPNO #ifdef __ARCH_SI_TRAPNO
#define si_trapno _sifields._sigfault._trapno #define si_trapno _sifields._sigfault._trapno
#endif #endif
#define si_addr_lsb _sifields._sigfault._addr_lsb
#define si_band _sifields._sigpoll._band #define si_band _sifields._sigpoll._band
#define si_fd _sifields._sigpoll._fd #define si_fd _sifields._sigpoll._fd
...@@ -192,7 +194,11 @@ typedef struct siginfo { ...@@ -192,7 +194,11 @@ typedef struct siginfo {
#define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */ #define BUS_ADRALN (__SI_FAULT|1) /* invalid address alignment */
#define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */ #define BUS_ADRERR (__SI_FAULT|2) /* non-existant physical address */
#define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */ #define BUS_OBJERR (__SI_FAULT|3) /* object specific hardware error */
#define NSIGBUS 3 /* hardware memory error consumed on a machine check: action required */
#define BUS_MCEERR_AR (__SI_FAULT|4)
/* hardware memory error detected in process but not consumed: action optional*/
#define BUS_MCEERR_AO (__SI_FAULT|5)
#define NSIGBUS 5
/* /*
* SIGTRAP si_codes * SIGTRAP si_codes
......
...@@ -595,6 +595,7 @@ struct address_space_operations { ...@@ -595,6 +595,7 @@ struct address_space_operations {
int (*launder_page) (struct page *); int (*launder_page) (struct page *);
int (*is_partially_uptodate) (struct page *, read_descriptor_t *, int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
unsigned long); unsigned long);
int (*error_remove_page)(struct address_space *, struct page *);
}; };
/* /*
......
...@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page) ...@@ -695,11 +695,12 @@ static inline int page_mapped(struct page *page)
#define VM_FAULT_SIGBUS 0x0002 #define VM_FAULT_SIGBUS 0x0002
#define VM_FAULT_MAJOR 0x0004 #define VM_FAULT_MAJOR 0x0004
#define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */ #define VM_FAULT_WRITE 0x0008 /* Special case for get_user_pages */
#define VM_FAULT_HWPOISON 0x0010 /* Hit poisoned page */
#define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */ #define VM_FAULT_NOPAGE 0x0100 /* ->fault installed the pte, not return page */
#define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */ #define VM_FAULT_LOCKED 0x0200 /* ->fault locked the returned page */
#define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS) #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON)
/* /*
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM. * Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
...@@ -794,6 +795,11 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping, ...@@ -794,6 +795,11 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
extern int vmtruncate(struct inode * inode, loff_t offset); extern int vmtruncate(struct inode * inode, loff_t offset);
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end); extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
int truncate_inode_page(struct address_space *mapping, struct page *page);
int generic_error_remove_page(struct address_space *mapping, struct page *page);
int invalidate_inode_page(struct page *page);
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, extern int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags); unsigned long address, unsigned int flags);
...@@ -1308,5 +1314,12 @@ void vmemmap_populate_print_last(void); ...@@ -1308,5 +1314,12 @@ void vmemmap_populate_print_last(void);
extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim, extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
size_t size); size_t size);
extern void refund_locked_memory(struct mm_struct *mm, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size);
extern void memory_failure(unsigned long pfn, int trapno);
extern int __memory_failure(unsigned long pfn, int trapno, int ref);
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern atomic_long_t mce_bad_pages;
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */ #endif /* _LINUX_MM_H */
...@@ -51,6 +51,9 @@ ...@@ -51,6 +51,9 @@
* PG_buddy is set to indicate that the page is free and in the buddy system * PG_buddy is set to indicate that the page is free and in the buddy system
* (see mm/page_alloc.c). * (see mm/page_alloc.c).
* *
* PG_hwpoison indicates that a page got corrupted in hardware and contains
* data with incorrect ECC bits that triggered a machine check. Accessing is
* not safe since it may cause another machine check. Don't touch!
*/ */
/* /*
...@@ -101,6 +104,9 @@ enum pageflags { ...@@ -101,6 +104,9 @@ enum pageflags {
#endif #endif
#ifdef CONFIG_ARCH_USES_PG_UNCACHED #ifdef CONFIG_ARCH_USES_PG_UNCACHED
PG_uncached, /* Page has been mapped as uncached */ PG_uncached, /* Page has been mapped as uncached */
#endif
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif #endif
__NR_PAGEFLAGS, __NR_PAGEFLAGS,
...@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached) ...@@ -269,6 +275,15 @@ PAGEFLAG(Uncached, uncached)
PAGEFLAG_FALSE(Uncached) PAGEFLAG_FALSE(Uncached)
#endif #endif
#ifdef CONFIG_MEMORY_FAILURE
PAGEFLAG(HWPoison, hwpoison)
TESTSETFLAG(HWPoison, hwpoison)
#define __PG_HWPOISON (1UL << PG_hwpoison)
#else
PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
#endif
static inline int PageUptodate(struct page *page) static inline int PageUptodate(struct page *page)
{ {
int ret = test_bit(PG_uptodate, &(page)->flags); int ret = test_bit(PG_uptodate, &(page)->flags);
...@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page) ...@@ -393,7 +408,7 @@ static inline void __ClearPageTail(struct page *page)
1 << PG_private | 1 << PG_private_2 | \ 1 << PG_private | 1 << PG_private_2 | \
1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_buddy | 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
1 << PG_unevictable | __PG_MLOCKED) 1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON)
/* /*
* Flags checked when a page is prepped for return by the page allocator. * Flags checked when a page is prepped for return by the page allocator.
......
...@@ -88,4 +88,6 @@ ...@@ -88,4 +88,6 @@
#define PR_TASK_PERF_EVENTS_DISABLE 31 #define PR_TASK_PERF_EVENTS_DISABLE 31
#define PR_TASK_PERF_EVENTS_ENABLE 32 #define PR_TASK_PERF_EVENTS_ENABLE 32
#define PR_MCE_KILL 33
#endif /* _LINUX_PRCTL_H */ #endif /* _LINUX_PRCTL_H */
...@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page) ...@@ -81,7 +81,19 @@ static inline void page_dup_rmap(struct page *page)
*/ */
int page_referenced(struct page *, int is_locked, int page_referenced(struct page *, int is_locked,
struct mem_cgroup *cnt, unsigned long *vm_flags); struct mem_cgroup *cnt, unsigned long *vm_flags);
int try_to_unmap(struct page *, int ignore_refs); enum ttu_flags {
TTU_UNMAP = 0, /* unmap mode */
TTU_MIGRATION = 1, /* migration mode */
TTU_MUNLOCK = 2, /* munlock mode */
TTU_ACTION_MASK = 0xff,
TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */
TTU_IGNORE_ACCESS = (1 << 9), /* don't age */
TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */
};
#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK)
int try_to_unmap(struct page *, enum ttu_flags flags);
/* /*
* Called from mm/filemap_xip.c to unmap empty zero page * Called from mm/filemap_xip.c to unmap empty zero page
...@@ -108,6 +120,13 @@ int page_mkclean(struct page *); ...@@ -108,6 +120,13 @@ int page_mkclean(struct page *);
*/ */
int try_to_munlock(struct page *); int try_to_munlock(struct page *);
/*
* Called by memory-failure.c to kill processes.
*/
struct anon_vma *page_lock_anon_vma(struct page *page);
void page_unlock_anon_vma(struct anon_vma *anon_vma);
int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma);
#else /* !CONFIG_MMU */ #else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0) #define anon_vma_init() do {} while (0)
......
...@@ -1734,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p); ...@@ -1734,6 +1734,7 @@ extern cputime_t task_gtime(struct task_struct *p);
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
#define PF_DUMPCORE 0x00000200 /* dumped core */ #define PF_DUMPCORE 0x00000200 /* dumped core */
#define PF_SIGNALED 0x00000400 /* killed by a signal */ #define PF_SIGNALED 0x00000400 /* killed by a signal */
...@@ -1753,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p); ...@@ -1753,6 +1754,7 @@ extern cputime_t task_gtime(struct task_struct *p);
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ #define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
#define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */ #define PF_THREAD_BOUND 0x04000000 /* Thread bound to specific cpu */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */
......
...@@ -34,15 +34,37 @@ static inline int current_is_kswapd(void) ...@@ -34,15 +34,37 @@ static inline int current_is_kswapd(void)
* the type/offset into the pte as 5/27 as well. * the type/offset into the pte as 5/27 as well.
*/ */
#define MAX_SWAPFILES_SHIFT 5 #define MAX_SWAPFILES_SHIFT 5
#ifndef CONFIG_MIGRATION
#define MAX_SWAPFILES (1 << MAX_SWAPFILES_SHIFT) /*
* Use some of the swap files numbers for other purposes. This
* is a convenient way to hook into the VM to trigger special
* actions on faults.
*/
/*
* NUMA node memory migration support
*/
#ifdef CONFIG_MIGRATION
#define SWP_MIGRATION_NUM 2
#define SWP_MIGRATION_READ (MAX_SWAPFILES + SWP_HWPOISON_NUM)
#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
#else #else
/* Use last two entries for page migration swap entries */ #define SWP_MIGRATION_NUM 0
#define MAX_SWAPFILES ((1 << MAX_SWAPFILES_SHIFT)-2)
#define SWP_MIGRATION_READ MAX_SWAPFILES
#define SWP_MIGRATION_WRITE (MAX_SWAPFILES + 1)
#endif #endif
/*
* Handling of hardware poisoned pages with memory corruption.
*/
#ifdef CONFIG_MEMORY_FAILURE
#define SWP_HWPOISON_NUM 1
#define SWP_HWPOISON MAX_SWAPFILES
#else
#define SWP_HWPOISON_NUM 0
#endif
#define MAX_SWAPFILES \
((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
/* /*
* Magic header for a swap area. The first part of the union is * Magic header for a swap area. The first part of the union is
* what the swap magic looks like for the old (limited to 128MB) * what the swap magic looks like for the old (limited to 128MB)
......
...@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry) ...@@ -131,3 +131,41 @@ static inline int is_write_migration_entry(swp_entry_t entry)
#endif #endif
#ifdef CONFIG_MEMORY_FAILURE
/*
* Support for hardware poisoned pages
*/
static inline swp_entry_t make_hwpoison_entry(struct page *page)
{
BUG_ON(!PageLocked(page));
return swp_entry(SWP_HWPOISON, page_to_pfn(page));
}
static inline int is_hwpoison_entry(swp_entry_t entry)
{
return swp_type(entry) == SWP_HWPOISON;
}
#else
static inline swp_entry_t make_hwpoison_entry(struct page *page)
{
return swp_entry(0, 0);
}
static inline int is_hwpoison_entry(swp_entry_t swp)
{
return 0;
}
#endif
#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
static inline int non_swap_entry(swp_entry_t entry)
{
return swp_type(entry) >= MAX_SWAPFILES;
}
#else
static inline int non_swap_entry(swp_entry_t entry)
{
return 0;
}
#endif
...@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, ...@@ -1542,6 +1542,28 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
current->timer_slack_ns = arg2; current->timer_slack_ns = arg2;
error = 0; error = 0;
break; break;
case PR_MCE_KILL:
if (arg4 | arg5)
return -EINVAL;
switch (arg2) {
case 0:
if (arg3 != 0)
return -EINVAL;
current->flags &= ~PF_MCE_PROCESS;
break;
case 1:
current->flags |= PF_MCE_PROCESS;
if (arg3 != 0)
current->flags |= PF_MCE_EARLY;
else
current->flags &= ~PF_MCE_EARLY;
break;
default:
return -EINVAL;
}
error = 0;
break;
default: default:
error = -EINVAL; error = -EINVAL;
break; break;
......
...@@ -1398,6 +1398,31 @@ static struct ctl_table vm_table[] = { ...@@ -1398,6 +1398,31 @@ static struct ctl_table vm_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &scan_unevictable_handler, .proc_handler = &scan_unevictable_handler,
}, },
#ifdef CONFIG_MEMORY_FAILURE
{
.ctl_name = CTL_UNNUMBERED,
.procname = "memory_failure_early_kill",
.data = &sysctl_memory_failure_early_kill,
.maxlen = sizeof(sysctl_memory_failure_early_kill),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
.extra2 = &one,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "memory_failure_recovery",
.data = &sysctl_memory_failure_recovery,
.maxlen = sizeof(sysctl_memory_failure_recovery),
.mode = 0644,
.proc_handler = &proc_dointvec_minmax,
.strategy = &sysctl_intvec,
.extra1 = &zero,
.extra2 = &one,
},
#endif
/* /*
* NOTE: do not add new entries to this table unless you have read * NOTE: do not add new entries to this table unless you have read
* Documentation/sysctl/ctl_unnumbered.txt * Documentation/sysctl/ctl_unnumbered.txt
......
...@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR ...@@ -245,6 +245,20 @@ config DEFAULT_MMAP_MIN_ADDR
/proc/sys/vm/mmap_min_addr tunable. /proc/sys/vm/mmap_min_addr tunable.
config MEMORY_FAILURE
depends on MMU
depends on X86_MCE
bool "Enable recovery from hardware memory errors"
help
Enables code to recover from some memory failures on systems
with MCA recovery. This allows a system to continue running
even when some of its memory has uncorrected errors. This requires
special hardware support and typically ECC memory.
config HWPOISON_INJECT
tristate "Poison pages injector"
depends on MEMORY_FAILURE && DEBUG_KERNEL
config NOMMU_INITIAL_TRIM_EXCESS config NOMMU_INITIAL_TRIM_EXCESS
int "Turn on mmap() excess space trimming before booting" int "Turn on mmap() excess space trimming before booting"
depends on !MMU depends on !MMU
......
...@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o ...@@ -41,5 +41,7 @@ obj-$(CONFIG_SMP) += allocpercpu.o
endif endif
obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
...@@ -104,6 +104,10 @@ ...@@ -104,6 +104,10 @@
* *
* ->task->proc_lock * ->task->proc_lock
* ->dcache_lock (proc_pid_lookup) * ->dcache_lock (proc_pid_lookup)
*
* (code doesn't rely on that order, so you could switch it around)
* ->tasklist_lock (memory_failure, collect_procs_ao)
* ->i_mmap_lock
*/ */
/* /*
......
/* Inject a hwpoison memory failure on a arbitary pfn */
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/mm.h>
static struct dentry *hwpoison_dir, *corrupt_pfn;
static int hwpoison_inject(void *data, u64 val)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
printk(KERN_INFO "Injecting memory failure at pfn %Lx\n", val);
return __memory_failure(val, 18, 0);
}
DEFINE_SIMPLE_ATTRIBUTE(hwpoison_fops, NULL, hwpoison_inject, "%lli\n");
static void pfn_inject_exit(void)
{
if (hwpoison_dir)
debugfs_remove_recursive(hwpoison_dir);
}
static int pfn_inject_init(void)
{
hwpoison_dir = debugfs_create_dir("hwpoison", NULL);
if (hwpoison_dir == NULL)
return -ENOMEM;
corrupt_pfn = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
NULL, &hwpoison_fops);
if (corrupt_pfn == NULL) {
pfn_inject_exit();
return -ENOMEM;
}
return 0;
}
module_init(pfn_inject_init);
module_exit(pfn_inject_exit);
MODULE_LICENSE("GPL");
...@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma, ...@@ -218,6 +218,32 @@ static long madvise_remove(struct vm_area_struct *vma,
return error; return error;
} }
#ifdef CONFIG_MEMORY_FAILURE
/*
* Error injection support for memory error handling.
*/
static int madvise_hwpoison(unsigned long start, unsigned long end)
{
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
for (; start < end; start += PAGE_SIZE) {
struct page *p;
int ret = get_user_pages(current, current->mm, start, 1,
0, 0, &p, NULL);
if (ret != 1)
return ret;
printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
page_to_pfn(p), start);
/* Ignore return value for now */
__memory_failure(page_to_pfn(p), 0, 1);
put_page(p);
}
return ret;
}
#endif
static long static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior) unsigned long start, unsigned long end, int behavior)
...@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) ...@@ -308,6 +334,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
int write; int write;
size_t len; size_t len;
#ifdef CONFIG_MEMORY_FAILURE
if (behavior == MADV_HWPOISON)
return madvise_hwpoison(start, start+len_in);
#endif
if (!madvise_behavior_valid(behavior)) if (!madvise_behavior_valid(behavior))
return error; return error;
......
This diff is collapsed.
...@@ -1325,7 +1325,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -1325,7 +1325,8 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_ERROR) {
if (ret & VM_FAULT_OOM) if (ret & VM_FAULT_OOM)
return i ? i : -ENOMEM; return i ? i : -ENOMEM;
else if (ret & VM_FAULT_SIGBUS) if (ret &
(VM_FAULT_HWPOISON|VM_FAULT_SIGBUS))
return i ? i : -EFAULT; return i ? i : -EFAULT;
BUG(); BUG();
} }
...@@ -2559,8 +2560,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2559,8 +2560,15 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
goto out; goto out;
entry = pte_to_swp_entry(orig_pte); entry = pte_to_swp_entry(orig_pte);
if (is_migration_entry(entry)) { if (unlikely(non_swap_entry(entry))) {
migration_entry_wait(mm, pmd, address); if (is_migration_entry(entry)) {
migration_entry_wait(mm, pmd, address);
} else if (is_hwpoison_entry(entry)) {
ret = VM_FAULT_HWPOISON;
} else {
print_bad_pte(vma, address, orig_pte, NULL);
ret = VM_FAULT_OOM;
}
goto out; goto out;
} }
delayacct_set_flag(DELAYACCT_PF_SWAPIN); delayacct_set_flag(DELAYACCT_PF_SWAPIN);
...@@ -2584,6 +2592,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2584,6 +2592,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Had to read the page from swap area: Major fault */ /* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR; ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT); count_vm_event(PGMAJFAULT);
} else if (PageHWPoison(page)) {
ret = VM_FAULT_HWPOISON;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
goto out;
} }
lock_page(page); lock_page(page);
...@@ -2760,6 +2772,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -2760,6 +2772,12 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
return ret; return ret;
if (unlikely(PageHWPoison(vmf.page))) {
if (ret & VM_FAULT_LOCKED)
unlock_page(vmf.page);
return VM_FAULT_HWPOISON;
}
/* /*
* For consistency in subsequent calls, make the faulted page always * For consistency in subsequent calls, make the faulted page always
* locked. * locked.
......
...@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private, ...@@ -675,7 +675,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
} }
/* Establish migration ptes or remove ptes */ /* Establish migration ptes or remove ptes */
try_to_unmap(page, 1); try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
skip_unmap: skip_unmap:
if (!page_mapped(page)) if (!page_mapped(page))
......
...@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) ...@@ -1149,6 +1149,13 @@ int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page)
EXPORT_SYMBOL(redirty_page_for_writepage); EXPORT_SYMBOL(redirty_page_for_writepage);
/* /*
* Dirty a page.
*
* For pages with a mapping this should be done under the page lock
* for the benefit of asynchronous memory errors who prefer a consistent
* dirty state. This rule can be broken in some special cases,
* but should be better not to.
*
* If the mapping doesn't provide a set_page_dirty a_op, then * If the mapping doesn't provide a set_page_dirty a_op, then
* just fall through and assume that it wants buffer_heads. * just fall through and assume that it wants buffer_heads.
*/ */
......
...@@ -234,6 +234,12 @@ static void bad_page(struct page *page) ...@@ -234,6 +234,12 @@ static void bad_page(struct page *page)
static unsigned long nr_shown; static unsigned long nr_shown;
static unsigned long nr_unshown; static unsigned long nr_unshown;
/* Don't complain about poisoned pages */
if (PageHWPoison(page)) {
__ClearPageBuddy(page);
return;
}
/* /*
* Allow a burst of 60 reports, then keep quiet for that minute; * Allow a burst of 60 reports, then keep quiet for that minute;
* or allow a steady drip of one report per second. * or allow a steady drip of one report per second.
...@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page, ...@@ -666,7 +672,7 @@ static inline void expand(struct zone *zone, struct page *page,
/* /*
* This page is about to be returned from the page allocator * This page is about to be returned from the page allocator
*/ */
static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) static inline int check_new_page(struct page *page)
{ {
if (unlikely(page_mapcount(page) | if (unlikely(page_mapcount(page) |
(page->mapping != NULL) | (page->mapping != NULL) |
...@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) ...@@ -675,6 +681,18 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
bad_page(page); bad_page(page);
return 1; return 1;
} }
return 0;
}
static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
{
int i;
for (i = 0; i < (1 << order); i++) {
struct page *p = page + i;
if (unlikely(check_new_page(p)))
return 1;
}
set_page_private(page, 0); set_page_private(page, 0);
set_page_refcounted(page); set_page_refcounted(page);
......
...@@ -36,6 +36,11 @@ ...@@ -36,6 +36,11 @@
* mapping->tree_lock (widely used, in set_page_dirty, * mapping->tree_lock (widely used, in set_page_dirty,
* in arch-dependent flush_dcache_mmap_lock, * in arch-dependent flush_dcache_mmap_lock,
* within inode_lock in __sync_single_inode) * within inode_lock in __sync_single_inode)
*
* (code doesn't rely on that order so it could be switched around)
* ->tasklist_lock
* anon_vma->lock (memory_failure, collect_procs_anon)
* pte map lock
*/ */
#include <linux/mm.h> #include <linux/mm.h>
...@@ -191,7 +196,7 @@ void __init anon_vma_init(void) ...@@ -191,7 +196,7 @@ void __init anon_vma_init(void)
* Getting a lock on a stable anon_vma from a page off the LRU is * Getting a lock on a stable anon_vma from a page off the LRU is
* tricky: page_lock_anon_vma rely on RCU to guard against the races. * tricky: page_lock_anon_vma rely on RCU to guard against the races.
*/ */
static struct anon_vma *page_lock_anon_vma(struct page *page) struct anon_vma *page_lock_anon_vma(struct page *page)
{ {
struct anon_vma *anon_vma; struct anon_vma *anon_vma;
unsigned long anon_mapping; unsigned long anon_mapping;
...@@ -211,7 +216,7 @@ static struct anon_vma *page_lock_anon_vma(struct page *page) ...@@ -211,7 +216,7 @@ static struct anon_vma *page_lock_anon_vma(struct page *page)
return NULL; return NULL;
} }
static void page_unlock_anon_vma(struct anon_vma *anon_vma) void page_unlock_anon_vma(struct anon_vma *anon_vma)
{ {
spin_unlock(&anon_vma->lock); spin_unlock(&anon_vma->lock);
rcu_read_unlock(); rcu_read_unlock();
...@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm, ...@@ -311,7 +316,7 @@ pte_t *page_check_address(struct page *page, struct mm_struct *mm,
* if the page is not mapped into the page tables of this VMA. Only * if the page is not mapped into the page tables of this VMA. Only
* valid for normal file or anonymous VMAs. * valid for normal file or anonymous VMAs.
*/ */
static int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma) int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
{ {
unsigned long address; unsigned long address;
pte_t *pte; pte_t *pte;
...@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page) ...@@ -756,7 +761,7 @@ void page_remove_rmap(struct page *page)
* repeatedly from either try_to_unmap_anon or try_to_unmap_file. * repeatedly from either try_to_unmap_anon or try_to_unmap_file.
*/ */
static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
int migration) enum ttu_flags flags)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
unsigned long address; unsigned long address;
...@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -778,11 +783,13 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* If it's recently referenced (perhaps page_referenced * If it's recently referenced (perhaps page_referenced
* skipped over this mm) then we should reactivate it. * skipped over this mm) then we should reactivate it.
*/ */
if (!migration) { if (!(flags & TTU_IGNORE_MLOCK)) {
if (vma->vm_flags & VM_LOCKED) { if (vma->vm_flags & VM_LOCKED) {
ret = SWAP_MLOCK; ret = SWAP_MLOCK;
goto out_unmap; goto out_unmap;
} }
}
if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address, pte)) { if (ptep_clear_flush_young_notify(vma, address, pte)) {
ret = SWAP_FAIL; ret = SWAP_FAIL;
goto out_unmap; goto out_unmap;
...@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -800,7 +807,14 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* Update high watermark before we lower rss */ /* Update high watermark before we lower rss */
update_hiwater_rss(mm); update_hiwater_rss(mm);
if (PageAnon(page)) { if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) {
if (PageAnon(page))
dec_mm_counter(mm, anon_rss);
else
dec_mm_counter(mm, file_rss);
set_pte_at(mm, address, pte,
swp_entry_to_pte(make_hwpoison_entry(page)));
} else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) }; swp_entry_t entry = { .val = page_private(page) };
if (PageSwapCache(page)) { if (PageSwapCache(page)) {
...@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -822,12 +836,12 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
* pte. do_swap_page() will wait until the migration * pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling. * pte is removed and then restart fault handling.
*/ */
BUG_ON(!migration); BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
entry = make_migration_entry(page, pte_write(pteval)); entry = make_migration_entry(page, pte_write(pteval));
} }
set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
BUG_ON(pte_file(*pte)); BUG_ON(pte_file(*pte));
} else if (PAGE_MIGRATION && migration) { } else if (PAGE_MIGRATION && (TTU_ACTION(flags) == TTU_MIGRATION)) {
/* Establish migration entry for a file page */ /* Establish migration entry for a file page */
swp_entry_t entry; swp_entry_t entry;
entry = make_migration_entry(page, pte_write(pteval)); entry = make_migration_entry(page, pte_write(pteval));
...@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma) ...@@ -996,12 +1010,13 @@ static int try_to_mlock_page(struct page *page, struct vm_area_struct *vma)
* vm_flags for that VMA. That should be OK, because that vma shouldn't be * vm_flags for that VMA. That should be OK, because that vma shouldn't be
* 'LOCKED. * 'LOCKED.
*/ */
static int try_to_unmap_anon(struct page *page, int unlock, int migration) static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{ {
struct anon_vma *anon_vma; struct anon_vma *anon_vma;
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned int mlocked = 0; unsigned int mlocked = 0;
int ret = SWAP_AGAIN; int ret = SWAP_AGAIN;
int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
if (MLOCK_PAGES && unlikely(unlock)) if (MLOCK_PAGES && unlikely(unlock))
ret = SWAP_SUCCESS; /* default for try_to_munlock() */ ret = SWAP_SUCCESS; /* default for try_to_munlock() */
...@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) ...@@ -1017,7 +1032,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
continue; /* must visit all unlocked vmas */ continue; /* must visit all unlocked vmas */
ret = SWAP_MLOCK; /* saw at least one mlocked vma */ ret = SWAP_MLOCK; /* saw at least one mlocked vma */
} else { } else {
ret = try_to_unmap_one(page, vma, migration); ret = try_to_unmap_one(page, vma, flags);
if (ret == SWAP_FAIL || !page_mapped(page)) if (ret == SWAP_FAIL || !page_mapped(page))
break; break;
} }
...@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) ...@@ -1041,8 +1056,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
/** /**
* try_to_unmap_file - unmap/unlock file page using the object-based rmap method * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
* @page: the page to unmap/unlock * @page: the page to unmap/unlock
* @unlock: request for unlock rather than unmap [unlikely] * @flags: action and flags
* @migration: unmapping for migration - ignored if @unlock
* *
* Find all the mappings of a page using the mapping pointer and the vma chains * Find all the mappings of a page using the mapping pointer and the vma chains
* contained in the address_space struct it points to. * contained in the address_space struct it points to.
...@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration) ...@@ -1054,7 +1068,7 @@ static int try_to_unmap_anon(struct page *page, int unlock, int migration)
* vm_flags for that VMA. That should be OK, because that vma shouldn't be * vm_flags for that VMA. That should be OK, because that vma shouldn't be
* 'LOCKED. * 'LOCKED.
*/ */
static int try_to_unmap_file(struct page *page, int unlock, int migration) static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
{ {
struct address_space *mapping = page->mapping; struct address_space *mapping = page->mapping;
pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
...@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ...@@ -1066,6 +1080,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
unsigned long max_nl_size = 0; unsigned long max_nl_size = 0;
unsigned int mapcount; unsigned int mapcount;
unsigned int mlocked = 0; unsigned int mlocked = 0;
int unlock = TTU_ACTION(flags) == TTU_MUNLOCK;
if (MLOCK_PAGES && unlikely(unlock)) if (MLOCK_PAGES && unlikely(unlock))
ret = SWAP_SUCCESS; /* default for try_to_munlock() */ ret = SWAP_SUCCESS; /* default for try_to_munlock() */
...@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ...@@ -1078,7 +1093,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
continue; /* must visit all vmas */ continue; /* must visit all vmas */
ret = SWAP_MLOCK; ret = SWAP_MLOCK;
} else { } else {
ret = try_to_unmap_one(page, vma, migration); ret = try_to_unmap_one(page, vma, flags);
if (ret == SWAP_FAIL || !page_mapped(page)) if (ret == SWAP_FAIL || !page_mapped(page))
goto out; goto out;
} }
...@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ...@@ -1103,7 +1118,8 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
ret = SWAP_MLOCK; /* leave mlocked == 0 */ ret = SWAP_MLOCK; /* leave mlocked == 0 */
goto out; /* no need to look further */ goto out; /* no need to look further */
} }
if (!MLOCK_PAGES && !migration && (vma->vm_flags & VM_LOCKED)) if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED))
continue; continue;
cursor = (unsigned long) vma->vm_private_data; cursor = (unsigned long) vma->vm_private_data;
if (cursor > max_nl_cursor) if (cursor > max_nl_cursor)
...@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ...@@ -1137,7 +1153,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
do { do {
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
shared.vm_set.list) { shared.vm_set.list) {
if (!MLOCK_PAGES && !migration && if (!MLOCK_PAGES && !(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED)) (vma->vm_flags & VM_LOCKED))
continue; continue;
cursor = (unsigned long) vma->vm_private_data; cursor = (unsigned long) vma->vm_private_data;
...@@ -1177,7 +1193,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ...@@ -1177,7 +1193,7 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
/** /**
* try_to_unmap - try to remove all page table mappings to a page * try_to_unmap - try to remove all page table mappings to a page
* @page: the page to get unmapped * @page: the page to get unmapped
* @migration: migration flag * @flags: action and flags
* *
* Tries to remove all the page table entries which are mapping this * Tries to remove all the page table entries which are mapping this
* page, used in the pageout path. Caller must hold the page lock. * page, used in the pageout path. Caller must hold the page lock.
...@@ -1188,16 +1204,16 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration) ...@@ -1188,16 +1204,16 @@ static int try_to_unmap_file(struct page *page, int unlock, int migration)
* SWAP_FAIL - the page is unswappable * SWAP_FAIL - the page is unswappable
* SWAP_MLOCK - page is mlocked. * SWAP_MLOCK - page is mlocked.
*/ */
int try_to_unmap(struct page *page, int migration) int try_to_unmap(struct page *page, enum ttu_flags flags)
{ {
int ret; int ret;
BUG_ON(!PageLocked(page)); BUG_ON(!PageLocked(page));
if (PageAnon(page)) if (PageAnon(page))
ret = try_to_unmap_anon(page, 0, migration); ret = try_to_unmap_anon(page, flags);
else else
ret = try_to_unmap_file(page, 0, migration); ret = try_to_unmap_file(page, flags);
if (ret != SWAP_MLOCK && !page_mapped(page)) if (ret != SWAP_MLOCK && !page_mapped(page))
ret = SWAP_SUCCESS; ret = SWAP_SUCCESS;
return ret; return ret;
...@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page) ...@@ -1222,8 +1238,8 @@ int try_to_munlock(struct page *page)
VM_BUG_ON(!PageLocked(page) || PageLRU(page)); VM_BUG_ON(!PageLocked(page) || PageLRU(page));
if (PageAnon(page)) if (PageAnon(page))
return try_to_unmap_anon(page, 1, 0); return try_to_unmap_anon(page, TTU_MUNLOCK);
else else
return try_to_unmap_file(page, 1, 0); return try_to_unmap_file(page, TTU_MUNLOCK);
} }
...@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping, ...@@ -1633,8 +1633,8 @@ shmem_write_end(struct file *file, struct address_space *mapping,
if (pos + copied > inode->i_size) if (pos + copied > inode->i_size)
i_size_write(inode, pos + copied); i_size_write(inode, pos + copied);
unlock_page(page);
set_page_dirty(page); set_page_dirty(page);
unlock_page(page);
page_cache_release(page); page_cache_release(page);
return copied; return copied;
...@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s ...@@ -1971,13 +1971,13 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s
iput(inode); iput(inode);
return error; return error;
} }
unlock_page(page);
inode->i_mapping->a_ops = &shmem_aops; inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations; inode->i_op = &shmem_symlink_inode_operations;
kaddr = kmap_atomic(page, KM_USER0); kaddr = kmap_atomic(page, KM_USER0);
memcpy(kaddr, symname, len); memcpy(kaddr, symname, len);
kunmap_atomic(kaddr, KM_USER0); kunmap_atomic(kaddr, KM_USER0);
set_page_dirty(page); set_page_dirty(page);
unlock_page(page);
page_cache_release(page); page_cache_release(page);
} }
if (dir->i_mode & S_ISGID) if (dir->i_mode & S_ISGID)
...@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = { ...@@ -2420,6 +2420,7 @@ static const struct address_space_operations shmem_aops = {
.write_end = shmem_write_end, .write_end = shmem_write_end,
#endif #endif
.migratepage = migrate_page, .migratepage = migrate_page,
.error_remove_page = generic_error_remove_page,
}; };
static const struct file_operations shmem_file_operations = { static const struct file_operations shmem_file_operations = {
......
...@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry) ...@@ -699,7 +699,7 @@ int free_swap_and_cache(swp_entry_t entry)
struct swap_info_struct *p; struct swap_info_struct *p;
struct page *page = NULL; struct page *page = NULL;
if (is_migration_entry(entry)) if (non_swap_entry(entry))
return 1; return 1;
p = swap_info_get(entry); p = swap_info_get(entry);
...@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache) ...@@ -2085,7 +2085,7 @@ static int __swap_duplicate(swp_entry_t entry, bool cache)
int count; int count;
bool has_cache; bool has_cache;
if (is_migration_entry(entry)) if (non_swap_entry(entry))
return -EINVAL; return -EINVAL;
type = swp_type(entry); type = swp_type(entry);
......
...@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page); ...@@ -93,11 +93,11 @@ EXPORT_SYMBOL(cancel_dirty_page);
* its lock, b) when a concurrent invalidate_mapping_pages got there first and * its lock, b) when a concurrent invalidate_mapping_pages got there first and
* c) when tmpfs swizzles a page between a tmpfs inode and swapper_space. * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
*/ */
static void static int
truncate_complete_page(struct address_space *mapping, struct page *page) truncate_complete_page(struct address_space *mapping, struct page *page)
{ {
if (page->mapping != mapping) if (page->mapping != mapping)
return; return -EIO;
if (page_has_private(page)) if (page_has_private(page))
do_invalidatepage(page, 0); do_invalidatepage(page, 0);
...@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page) ...@@ -108,6 +108,7 @@ truncate_complete_page(struct address_space *mapping, struct page *page)
remove_from_page_cache(page); remove_from_page_cache(page);
ClearPageMappedToDisk(page); ClearPageMappedToDisk(page);
page_cache_release(page); /* pagecache ref */ page_cache_release(page); /* pagecache ref */
return 0;
} }
/* /*
...@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) ...@@ -135,6 +136,51 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
return ret; return ret;
} }
int truncate_inode_page(struct address_space *mapping, struct page *page)
{
if (page_mapped(page)) {
unmap_mapping_range(mapping,
(loff_t)page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE, 0);
}
return truncate_complete_page(mapping, page);
}
/*
* Used to get rid of pages on hardware memory corruption.
*/
int generic_error_remove_page(struct address_space *mapping, struct page *page)
{
if (!mapping)
return -EINVAL;
/*
* Only punch for normal data pages for now.
* Handling other types like directories would need more auditing.
*/
if (!S_ISREG(mapping->host->i_mode))
return -EIO;
return truncate_inode_page(mapping, page);
}
EXPORT_SYMBOL(generic_error_remove_page);
/*
* Safely invalidate one page from its pagecache mapping.
* It only drops clean, unused pages. The page must be locked.
*
* Returns 1 if the page is successfully invalidated, otherwise 0.
*/
int invalidate_inode_page(struct page *page)
{
struct address_space *mapping = page_mapping(page);
if (!mapping)
return 0;
if (PageDirty(page) || PageWriteback(page))
return 0;
if (page_mapped(page))
return 0;
return invalidate_complete_page(mapping, page);
}
/** /**
* truncate_inode_pages - truncate range of pages specified by start & end byte offsets * truncate_inode_pages - truncate range of pages specified by start & end byte offsets
* @mapping: mapping to truncate * @mapping: mapping to truncate
...@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -196,12 +242,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
unlock_page(page); unlock_page(page);
continue; continue;
} }
if (page_mapped(page)) { truncate_inode_page(mapping, page);
unmap_mapping_range(mapping,
(loff_t)page_index<<PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE, 0);
}
truncate_complete_page(mapping, page);
unlock_page(page); unlock_page(page);
} }
pagevec_release(&pvec); pagevec_release(&pvec);
...@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping, ...@@ -238,15 +279,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
break; break;
lock_page(page); lock_page(page);
wait_on_page_writeback(page); wait_on_page_writeback(page);
if (page_mapped(page)) { truncate_inode_page(mapping, page);
unmap_mapping_range(mapping,
(loff_t)page->index<<PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE, 0);
}
if (page->index > next) if (page->index > next)
next = page->index; next = page->index;
next++; next++;
truncate_complete_page(mapping, page);
unlock_page(page); unlock_page(page);
} }
pagevec_release(&pvec); pagevec_release(&pvec);
...@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, ...@@ -311,12 +347,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
if (lock_failed) if (lock_failed)
continue; continue;
if (PageDirty(page) || PageWriteback(page)) ret += invalidate_inode_page(page);
goto unlock;
if (page_mapped(page))
goto unlock;
ret += invalidate_complete_page(mapping, page);
unlock:
unlock_page(page); unlock_page(page);
if (next > end) if (next > end)
break; break;
......
...@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, ...@@ -663,7 +663,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
* processes. Try to unmap it here. * processes. Try to unmap it here.
*/ */
if (page_mapped(page) && mapping) { if (page_mapped(page) && mapping) {
switch (try_to_unmap(page, 0)) { switch (try_to_unmap(page, TTU_UNMAP)) {
case SWAP_FAIL: case SWAP_FAIL:
goto activate_locked; goto activate_locked;
case SWAP_AGAIN: case SWAP_AGAIN:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment