Commit 8664b90b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "21 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (21 commits)
  writeback: fix initial dirty limit
  mm/memory-failure: set PageHWPoison before migrate_pages()
  mm: check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_*
  mm/memory-failure: give up error handling for non-tail-refcounted thp
  mm/memory-failure: fix race in counting num_poisoned_pages
  mm/memory-failure: unlock_page before put_page
  ipc: use private shmem or hugetlbfs inodes for shm segments.
  mm: initialize hotplugged pages as reserved
  ocfs2: fix shift left overflow
  kthread: export kthread functions
  fsnotify: fix oops in fsnotify_clear_marks_by_group_flags()
  lib/iommu-common.c: do not use 0xffffffffffffffffl for computing align_mask
  mm/slub: allow merging when SLAB_DEBUG_FREE is set
  signalfd: fix information leak in signalfd_copyinfo
  signal: fix information leak in copy_siginfo_to_user
  signal: fix information leak in copy_siginfo_from_user32
  ocfs2: fix BUG in ocfs2_downconvert_thread_do_work()
  fs, file table: reinit files_stat.max_files after deferred memory initialisation
  mm, meminit: replace rwsem with completion
  mm, meminit: allow early_pfn_to_nid to be used during runtime
  ...
parents a58997e1 a50fcb51
...@@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) ...@@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
* Other callers might not initialize the si_lsb field, * Other callers might not initialize the si_lsb field,
* so check explicitely for the right codes here. * so check explicitely for the right codes here.
*/ */
if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) if (from->si_signo == SIGBUS &&
(from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
#endif #endif
break; break;
...@@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) ...@@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{ {
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) || if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) ||
copy_from_user(to->_sifields._pad, copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE)) from->_sifields._pad, SI_PAD_SIZE))
......
...@@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) ...@@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{ {
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, 3*sizeof(int)) || if (copy_from_user(to, from, 3*sizeof(int)) ||
copy_from_user(to->_sifields._pad, copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE32)) from->_sifields._pad, SI_PAD_SIZE32))
......
...@@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s) ...@@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
{ {
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, 3*sizeof(int)) || if (copy_from_user(to, from, 3*sizeof(int)) ||
copy_from_user(to->_sifields._pad, copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE32)) from->_sifields._pad, SI_PAD_SIZE32))
......
...@@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from) ...@@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo))) if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
return -EFAULT; return -EFAULT;
memset(to, 0, sizeof(*to));
err = __get_user(to->si_signo, &from->si_signo); err = __get_user(to->si_signo, &from->si_signo);
err |= __get_user(to->si_errno, &from->si_errno); err |= __get_user(to->si_errno, &from->si_errno);
err |= __get_user(to->si_code, &from->si_code); err |= __get_user(to->si_code, &from->si_code);
......
...@@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void) ...@@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void)
inode_init_early(); inode_init_early();
} }
void __init vfs_caches_init(unsigned long mempages) void __init vfs_caches_init(void)
{ {
unsigned long reserve;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
mempages -= reserve;
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0, names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
dcache_init(); dcache_init();
inode_init(); inode_init();
files_init(mempages); files_init();
files_maxfiles_init();
mnt_init(); mnt_init();
bdev_cache_init(); bdev_cache_init();
chrdev_init(); chrdev_init();
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/ima.h> #include <linux/ima.h>
#include <linux/swap.h>
#include <linux/atomic.h> #include <linux/atomic.h>
...@@ -308,19 +309,24 @@ void put_filp(struct file *file) ...@@ -308,19 +309,24 @@ void put_filp(struct file *file)
} }
} }
void __init files_init(unsigned long mempages) void __init files_init(void)
{ {
unsigned long n;
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
/* /*
* One file with associated inode and dcache is very roughly 1K. * One file with associated inode and dcache is very roughly 1K. Per default
* Per default don't use more than 10% of our memory for files. * do not use more than 10% of our memory for files.
*/ */
void __init files_maxfiles_init(void)
{
unsigned long n;
unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
memreserve = min(memreserve, totalram_pages - 1);
n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_stat.max_files = max_t(unsigned long, n, NR_FILE);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
} }
...@@ -1010,6 +1010,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, ...@@ -1010,6 +1010,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0); inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
if (!inode) if (!inode)
goto out_dentry; goto out_dentry;
if (creat_flags == HUGETLB_SHMFS_INODE)
inode->i_flags |= S_PRIVATE;
file = ERR_PTR(-ENOMEM); file = ERR_PTR(-ENOMEM);
if (hugetlb_reserve_pages(inode, 0, if (hugetlb_reserve_pages(inode, 0,
......
...@@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, ...@@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
unsigned int flags) unsigned int flags)
{ {
struct fsnotify_mark *lmark, *mark; struct fsnotify_mark *lmark, *mark;
LIST_HEAD(to_free);
/*
* We have to be really careful here. Anytime we drop mark_mutex, e.g.
* fsnotify_clear_marks_by_inode() can come and free marks. Even in our
* to_free list so we have to use mark_mutex even when accessing that
* list. And freeing mark requires us to drop mark_mutex. So we can
* reliably free only the first mark in the list. That's why we first
* move marks to free to to_free list in one go and then free marks in
* to_free list one by one.
*/
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING); mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) { list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
if (mark->flags & flags) { if (mark->flags & flags)
list_move(&mark->g_list, &to_free);
}
mutex_unlock(&group->mark_mutex);
while (1) {
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
if (list_empty(&to_free)) {
mutex_unlock(&group->mark_mutex);
break;
}
mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
fsnotify_get_mark(mark); fsnotify_get_mark(mark);
fsnotify_destroy_mark_locked(mark, group); fsnotify_destroy_mark_locked(mark, group);
mutex_unlock(&group->mark_mutex);
fsnotify_put_mark(mark); fsnotify_put_mark(mark);
} }
}
mutex_unlock(&group->mark_mutex);
} }
/* /*
......
...@@ -685,7 +685,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb, ...@@ -685,7 +685,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
u64 s = i_size_read(inode); u64 s = i_size_read(inode);
sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) + sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) +
(do_div(s, osb->s_clustersize) >> 9); (do_div(s, osb->s_clustersize) >> 9);
ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector, ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
...@@ -910,7 +910,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb, ...@@ -910,7 +910,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)); BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
ret = blkdev_issue_zeroout(osb->sb->s_bdev, ret = blkdev_issue_zeroout(osb->sb->s_bdev,
p_cpos << (osb->s_clustersize_bits - 9), (u64)p_cpos << (osb->s_clustersize_bits - 9),
zero_len_head >> 9, GFP_NOFS, false); zero_len_head >> 9, GFP_NOFS, false);
if (ret < 0) if (ret < 0)
mlog_errno(ret); mlog_errno(ret);
......
...@@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) ...@@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
osb->dc_work_sequence = osb->dc_wake_sequence; osb->dc_work_sequence = osb->dc_wake_sequence;
processed = osb->blocked_lock_count; processed = osb->blocked_lock_count;
while (processed) { /*
BUG_ON(list_empty(&osb->blocked_lock_list)); * blocked lock processing in this loop might call iput which can
* remove items off osb->blocked_lock_list. Downconvert up to
* 'processed' number of locks, but stop short if we had some
* removed in ocfs2_mark_lockres_freeing when downconverting.
*/
while (processed && !list_empty(&osb->blocked_lock_list)) {
lockres = list_entry(osb->blocked_lock_list.next, lockres = list_entry(osb->blocked_lock_list.next,
struct ocfs2_lock_res, l_blocked_list); struct ocfs2_lock_res, l_blocked_list);
list_del_init(&lockres->l_blocked_list); list_del_init(&lockres->l_blocked_list);
......
...@@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, ...@@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
* Other callers might not initialize the si_lsb field, * Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here. * so check explicitly for the right codes here.
*/ */
if (kinfo->si_code == BUS_MCEERR_AR || if (kinfo->si_signo == SIGBUS &&
kinfo->si_code == BUS_MCEERR_AO) (kinfo->si_code == BUS_MCEERR_AR ||
kinfo->si_code == BUS_MCEERR_AO))
err |= __put_user((short) kinfo->si_addr_lsb, err |= __put_user((short) kinfo->si_addr_lsb,
&uinfo->ssi_addr_lsb); &uinfo->ssi_addr_lsb);
#endif #endif
......
...@@ -55,7 +55,8 @@ struct vm_fault; ...@@ -55,7 +55,8 @@ struct vm_fault;
extern void __init inode_init(void); extern void __init inode_init(void);
extern void __init inode_init_early(void); extern void __init inode_init_early(void);
extern void __init files_init(unsigned long); extern void __init files_init(void);
extern void __init files_maxfiles_init(void);
extern struct files_stat_struct files_stat; extern struct files_stat_struct files_stat;
extern unsigned long get_max_files(void); extern unsigned long get_max_files(void);
...@@ -2245,7 +2246,7 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp); ...@@ -2245,7 +2246,7 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp);
/* fs/dcache.c */ /* fs/dcache.c */
extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init_early(void);
extern void __init vfs_caches_init(unsigned long); extern void __init vfs_caches_init(void);
extern struct kmem_cache *names_cachep; extern struct kmem_cache *names_cachep;
......
...@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) ...@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
1 << PG_private | 1 << PG_private_2 | \ 1 << PG_private | 1 << PG_private_2 | \
1 << PG_writeback | 1 << PG_reserved | \ 1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \ 1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \ 1 << PG_unevictable | __PG_MLOCKED | \
__PG_COMPOUND_LOCK) __PG_COMPOUND_LOCK)
/* /*
* Flags checked when a page is prepped for return by the page allocator. * Flags checked when a page is prepped for return by the page allocator.
* Pages being prepped should not have any flags set. It they are set, * Pages being prepped should not have these flags set. It they are set,
* there has been a kernel bug or struct page corruption. * there has been a kernel bug or struct page corruption.
*
* __PG_HWPOISON is exceptional because it needs to be kept beyond page's
* alloc-free cycle to prevent from reusing the page.
*/ */
#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1) #define PAGE_FLAGS_CHECK_AT_PREP \
(((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
#define PAGE_FLAGS_PRIVATE \ #define PAGE_FLAGS_PRIVATE \
(1 << PG_private | 1 << PG_private_2) (1 << PG_private | 1 << PG_private_2)
......
...@@ -656,7 +656,7 @@ asmlinkage __visible void __init start_kernel(void) ...@@ -656,7 +656,7 @@ asmlinkage __visible void __init start_kernel(void)
key_init(); key_init();
security_init(); security_init();
dbg_late_init(); dbg_late_init();
vfs_caches_init(totalram_pages); vfs_caches_init();
signals_init(); signals_init();
/* rootfs populating might need page-writeback */ /* rootfs populating might need page-writeback */
page_writeback_init(); page_writeback_init();
......
...@@ -142,7 +142,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info) ...@@ -142,7 +142,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
if (!leaf) if (!leaf)
return -ENOMEM; return -ENOMEM;
INIT_LIST_HEAD(&leaf->msg_list); INIT_LIST_HEAD(&leaf->msg_list);
info->qsize += sizeof(*leaf);
} }
leaf->priority = msg->m_type; leaf->priority = msg->m_type;
rb_link_node(&leaf->rb_node, parent, p); rb_link_node(&leaf->rb_node, parent, p);
...@@ -187,7 +186,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) ...@@ -187,7 +186,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
"lazy leaf delete!\n"); "lazy leaf delete!\n");
rb_erase(&leaf->rb_node, &info->msg_tree); rb_erase(&leaf->rb_node, &info->msg_tree);
if (info->node_cache) { if (info->node_cache) {
info->qsize -= sizeof(*leaf);
kfree(leaf); kfree(leaf);
} else { } else {
info->node_cache = leaf; info->node_cache = leaf;
...@@ -200,7 +198,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info) ...@@ -200,7 +198,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
if (list_empty(&leaf->msg_list)) { if (list_empty(&leaf->msg_list)) {
rb_erase(&leaf->rb_node, &info->msg_tree); rb_erase(&leaf->rb_node, &info->msg_tree);
if (info->node_cache) { if (info->node_cache) {
info->qsize -= sizeof(*leaf);
kfree(leaf); kfree(leaf);
} else { } else {
info->node_cache = leaf; info->node_cache = leaf;
...@@ -1034,7 +1031,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, ...@@ -1034,7 +1031,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
/* Save our speculative allocation into the cache */ /* Save our speculative allocation into the cache */
INIT_LIST_HEAD(&new_leaf->msg_list); INIT_LIST_HEAD(&new_leaf->msg_list);
info->node_cache = new_leaf; info->node_cache = new_leaf;
info->qsize += sizeof(*new_leaf);
new_leaf = NULL; new_leaf = NULL;
} else { } else {
kfree(new_leaf); kfree(new_leaf);
...@@ -1142,7 +1138,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, ...@@ -1142,7 +1138,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
/* Save our speculative allocation into the cache */ /* Save our speculative allocation into the cache */
INIT_LIST_HEAD(&new_leaf->msg_list); INIT_LIST_HEAD(&new_leaf->msg_list);
info->node_cache = new_leaf; info->node_cache = new_leaf;
info->qsize += sizeof(*new_leaf);
} else { } else {
kfree(new_leaf); kfree(new_leaf);
} }
......
...@@ -545,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ...@@ -545,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if ((shmflg & SHM_NORESERVE) && if ((shmflg & SHM_NORESERVE) &&
sysctl_overcommit_memory != OVERCOMMIT_NEVER) sysctl_overcommit_memory != OVERCOMMIT_NEVER)
acctflag = VM_NORESERVE; acctflag = VM_NORESERVE;
file = shmem_file_setup(name, size, acctflag); file = shmem_kernel_file_setup(name, size, acctflag);
} }
error = PTR_ERR(file); error = PTR_ERR(file);
if (IS_ERR(file)) if (IS_ERR(file))
......
...@@ -97,6 +97,7 @@ bool kthread_should_park(void) ...@@ -97,6 +97,7 @@ bool kthread_should_park(void)
{ {
return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags); return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
} }
EXPORT_SYMBOL_GPL(kthread_should_park);
/** /**
* kthread_freezable_should_stop - should this freezable kthread return now? * kthread_freezable_should_stop - should this freezable kthread return now?
...@@ -171,6 +172,7 @@ void kthread_parkme(void) ...@@ -171,6 +172,7 @@ void kthread_parkme(void)
{ {
__kthread_parkme(to_kthread(current)); __kthread_parkme(to_kthread(current));
} }
EXPORT_SYMBOL_GPL(kthread_parkme);
static int kthread(void *_create) static int kthread(void *_create)
{ {
...@@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k) ...@@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k)
if (kthread) if (kthread)
__kthread_unpark(k, kthread); __kthread_unpark(k, kthread);
} }
EXPORT_SYMBOL_GPL(kthread_unpark);
/** /**
* kthread_park - park a thread created by kthread_create(). * kthread_park - park a thread created by kthread_create().
...@@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k) ...@@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k)
} }
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(kthread_park);
/** /**
* kthread_stop - stop a thread created by kthread_create(). * kthread_stop - stop a thread created by kthread_create().
......
...@@ -2748,12 +2748,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from) ...@@ -2748,12 +2748,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
* Other callers might not initialize the si_lsb field, * Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here. * so check explicitly for the right codes here.
*/ */
if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO) if (from->si_signo == SIGBUS &&
(from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb); err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
#endif #endif
#ifdef SEGV_BNDERR #ifdef SEGV_BNDERR
if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
err |= __put_user(from->si_lower, &to->si_lower); err |= __put_user(from->si_lower, &to->si_lower);
err |= __put_user(from->si_upper, &to->si_upper); err |= __put_user(from->si_upper, &to->si_upper);
}
#endif #endif
break; break;
case __SI_CHLD: case __SI_CHLD:
...@@ -3017,7 +3020,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo, ...@@ -3017,7 +3020,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
int, sig, int, sig,
struct compat_siginfo __user *, uinfo) struct compat_siginfo __user *, uinfo)
{ {
siginfo_t info; siginfo_t info = {};
int ret = copy_siginfo_from_user32(&info, uinfo); int ret = copy_siginfo_from_user32(&info, uinfo);
if (unlikely(ret)) if (unlikely(ret))
return ret; return ret;
...@@ -3061,7 +3064,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo, ...@@ -3061,7 +3064,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
int, sig, int, sig,
struct compat_siginfo __user *, uinfo) struct compat_siginfo __user *, uinfo)
{ {
siginfo_t info; siginfo_t info = {};
if (copy_siginfo_from_user32(&info, uinfo)) if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT; return -EFAULT;
......
...@@ -119,7 +119,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, ...@@ -119,7 +119,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev,
unsigned long align_mask = 0; unsigned long align_mask = 0;
if (align_order > 0) if (align_order > 0)
align_mask = 0xffffffffffffffffl >> (64 - align_order); align_mask = ~0ul >> (BITS_PER_LONG - align_order);
/* Sanity check */ /* Sanity check */
if (unlikely(npages == 0)) { if (unlikely(npages == 0)) {
......
...@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page, ...@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page,
/* after clearing PageTail the gup refcount can be released */ /* after clearing PageTail the gup refcount can be released */
smp_mb__after_atomic(); smp_mb__after_atomic();
/* page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
* retain hwpoison flag of the poisoned tail page:
* fix for the unsuitable process killed on Guest Machine(KVM)
* by the memory-failure.
*/
page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
page_tail->flags |= (page->flags & page_tail->flags |= (page->flags &
((1L << PG_referenced) | ((1L << PG_referenced) |
(1L << PG_swapbacked) | (1L << PG_swapbacked) |
......
...@@ -909,6 +909,18 @@ int get_hwpoison_page(struct page *page) ...@@ -909,6 +909,18 @@ int get_hwpoison_page(struct page *page)
* directly for tail pages. * directly for tail pages.
*/ */
if (PageTransHuge(head)) { if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
* This should be better than triggering BUG_ON when kernel
* tries to touch the "partially handled" page.
*/
if (!PageAnon(head)) {
pr_err("MCE: %#lx: non anonymous thp\n",
page_to_pfn(page));
return 0;
}
if (get_page_unless_zero(head)) { if (get_page_unless_zero(head)) {
if (PageTail(page)) if (PageTail(page))
get_page(page); get_page(page);
...@@ -1134,15 +1146,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1134,15 +1146,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
} }
if (!PageHuge(p) && PageTransHuge(hpage)) { if (!PageHuge(p) && PageTransHuge(hpage)) {
if (!PageAnon(hpage)) {
pr_err("MCE: %#lx: non anonymous thp\n", pfn);
if (TestClearPageHWPoison(p))
atomic_long_sub(nr_pages, &num_poisoned_pages);
put_page(p);
if (p != hpage)
put_page(hpage);
return -EBUSY;
}
if (unlikely(split_huge_page(hpage))) { if (unlikely(split_huge_page(hpage))) {
pr_err("MCE: %#lx: thp split failed\n", pfn); pr_err("MCE: %#lx: thp split failed\n", pfn);
if (TestClearPageHWPoison(p)) if (TestClearPageHWPoison(p))
...@@ -1209,9 +1212,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1209,9 +1212,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
if (!PageHWPoison(p)) { if (!PageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn); printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
atomic_long_sub(nr_pages, &num_poisoned_pages); atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
put_page(hpage); put_page(hpage);
res = 0; return 0;
goto out;
} }
if (hwpoison_filter(p)) { if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p)) if (TestClearPageHWPoison(p))
...@@ -1656,6 +1659,8 @@ static int __soft_offline_page(struct page *page, int flags) ...@@ -1656,6 +1659,8 @@ static int __soft_offline_page(struct page *page, int flags)
inc_zone_page_state(page, NR_ISOLATED_ANON + inc_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page)); page_is_file_cache(page));
list_add(&page->lru, &pagelist); list_add(&page->lru, &pagelist);
if (!TestSetPageHWPoison(page))
atomic_long_inc(&num_poisoned_pages);
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE); MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) { if (ret) {
...@@ -1670,9 +1675,8 @@ static int __soft_offline_page(struct page *page, int flags) ...@@ -1670,9 +1675,8 @@ static int __soft_offline_page(struct page *page, int flags)
pfn, ret, page->flags); pfn, ret, page->flags);
if (ret > 0) if (ret > 0)
ret = -EIO; ret = -EIO;
} else { if (TestClearPageHWPoison(page))
SetPageHWPoison(page); atomic_long_dec(&num_poisoned_pages);
atomic_long_inc(&num_poisoned_pages);
} }
} else { } else {
pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n", pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
......
...@@ -446,7 +446,7 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) ...@@ -446,7 +446,7 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
int nr_pages = PAGES_PER_SECTION; int nr_pages = PAGES_PER_SECTION;
int nid = pgdat->node_id; int nid = pgdat->node_id;
int zone_type; int zone_type;
unsigned long flags; unsigned long flags, pfn;
int ret; int ret;
zone_type = zone - pgdat->node_zones; zone_type = zone - pgdat->node_zones;
...@@ -461,6 +461,14 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn) ...@@ -461,6 +461,14 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
pgdat_resize_unlock(zone->zone_pgdat, &flags); pgdat_resize_unlock(zone->zone_pgdat, &flags);
memmap_init_zone(nr_pages, nid, zone_type, memmap_init_zone(nr_pages, nid, zone_type,
phys_start_pfn, MEMMAP_HOTPLUG); phys_start_pfn, MEMMAP_HOTPLUG);
/* online_page_range is called later and expects pages reserved */
for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) {
if (!pfn_valid(pfn))
continue;
SetPageReserved(pfn_to_page(pfn));
}
return 0; return 0;
} }
......
...@@ -880,7 +880,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage, ...@@ -880,7 +880,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
/* Establish migration ptes or remove ptes */ /* Establish migration ptes or remove ptes */
if (page_mapped(page)) { if (page_mapped(page)) {
try_to_unmap(page, try_to_unmap(page,
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
TTU_IGNORE_HWPOISON);
page_was_mapped = 1; page_was_mapped = 1;
} }
...@@ -950,7 +951,10 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, ...@@ -950,7 +951,10 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
list_del(&page->lru); list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON + dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page)); page_is_file_cache(page));
if (reason != MR_MEMORY_FAILURE) /* Soft-offlined page shouldn't go through lru cache list */
if (reason == MR_MEMORY_FAILURE)
put_page(page);
else
putback_lru_page(page); putback_lru_page(page);
} }
......
...@@ -2063,10 +2063,10 @@ static struct notifier_block ratelimit_nb = { ...@@ -2063,10 +2063,10 @@ static struct notifier_block ratelimit_nb = {
*/ */
void __init page_writeback_init(void) void __init page_writeback_init(void)
{ {
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
writeback_set_ratelimit(); writeback_set_ratelimit();
register_cpu_notifier(&ratelimit_nb); register_cpu_notifier(&ratelimit_nb);
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
} }
/** /**
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/swap.h> #include <linux/swap.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/rwsem.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/bootmem.h> #include <linux/bootmem.h>
...@@ -981,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page, ...@@ -981,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page,
#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \ #if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
/* Only safe to use early in boot when initialisation is single-threaded */
static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata; static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
int __meminit early_pfn_to_nid(unsigned long pfn) int __meminit early_pfn_to_nid(unsigned long pfn)
{ {
static DEFINE_SPINLOCK(early_pfn_lock);
int nid; int nid;
/* The system will behave unpredictably otherwise */ spin_lock(&early_pfn_lock);
BUG_ON(system_state != SYSTEM_BOOTING);
nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache); nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
if (nid >= 0) if (nid < 0)
nid = 0;
spin_unlock(&early_pfn_lock);
return nid; return nid;
/* just returns 0 */
return 0;
} }
#endif #endif
...@@ -1060,7 +1059,15 @@ static void __init deferred_free_range(struct page *page, ...@@ -1060,7 +1059,15 @@ static void __init deferred_free_range(struct page *page,
__free_pages_boot_core(page, pfn, 0); __free_pages_boot_core(page, pfn, 0);
} }
static __initdata DECLARE_RWSEM(pgdat_init_rwsem); /* Completion tracking for deferred_init_memmap() threads */
static atomic_t pgdat_init_n_undone __initdata;
static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp);
static inline void __init pgdat_init_report_one_done(void)
{
if (atomic_dec_and_test(&pgdat_init_n_undone))
complete(&pgdat_init_all_done_comp);
}
/* Initialise remaining memory on a node */ /* Initialise remaining memory on a node */
static int __init deferred_init_memmap(void *data) static int __init deferred_init_memmap(void *data)
...@@ -1077,7 +1084,7 @@ static int __init deferred_init_memmap(void *data) ...@@ -1077,7 +1084,7 @@ static int __init deferred_init_memmap(void *data)
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id); const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
if (first_init_pfn == ULONG_MAX) { if (first_init_pfn == ULONG_MAX) {
up_read(&pgdat_init_rwsem); pgdat_init_report_one_done();
return 0; return 0;
} }
...@@ -1177,7 +1184,8 @@ static int __init deferred_init_memmap(void *data) ...@@ -1177,7 +1184,8 @@ static int __init deferred_init_memmap(void *data)
pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages, pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
jiffies_to_msecs(jiffies - start)); jiffies_to_msecs(jiffies - start));
up_read(&pgdat_init_rwsem);
pgdat_init_report_one_done();
return 0; return 0;
} }
...@@ -1185,14 +1193,17 @@ void __init page_alloc_init_late(void) ...@@ -1185,14 +1193,17 @@ void __init page_alloc_init_late(void)
{ {
int nid; int nid;
/* There will be num_node_state(N_MEMORY) threads */
atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
for_each_node_state(nid, N_MEMORY) { for_each_node_state(nid, N_MEMORY) {
down_read(&pgdat_init_rwsem);
kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid); kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
} }
/* Block until all are initialised */ /* Block until all are initialised */
down_write(&pgdat_init_rwsem); wait_for_completion(&pgdat_init_all_done_comp);
up_write(&pgdat_init_rwsem);
/* Reinit limits that are based on free pages after the kernel is up */
files_maxfiles_init();
} }
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
...@@ -1285,6 +1296,10 @@ static inline int check_new_page(struct page *page) ...@@ -1285,6 +1296,10 @@ static inline int check_new_page(struct page *page)
bad_reason = "non-NULL mapping"; bad_reason = "non-NULL mapping";
if (unlikely(atomic_read(&page->_count) != 0)) if (unlikely(atomic_read(&page->_count) != 0))
bad_reason = "nonzero _count"; bad_reason = "nonzero _count";
if (unlikely(page->flags & __PG_HWPOISON)) {
bad_reason = "HWPoisoned (hardware-corrupted)";
bad_flags = __PG_HWPOISON;
}
if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) { if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set"; bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
bad_flags = PAGE_FLAGS_CHECK_AT_PREP; bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
......
...@@ -3363,8 +3363,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size, ...@@ -3363,8 +3363,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
* shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
* kernel internal. There will be NO LSM permission checks against the * kernel internal. There will be NO LSM permission checks against the
* underlying inode. So users of this interface must do LSM checks at a * underlying inode. So users of this interface must do LSM checks at a
* higher layer. The one user is the big_key implementation. LSM checks * higher layer. The users are the big_key and shm implementations. LSM
* are provided at the key level rather than the inode level. * checks are provided at the key or shm level rather than the inode.
* @name: name for dentry (to be seen in /proc/<pid>/maps * @name: name for dentry (to be seen in /proc/<pid>/maps
* @size: size to be set for the file * @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
......
...@@ -37,8 +37,7 @@ struct kmem_cache *kmem_cache; ...@@ -37,8 +37,7 @@ struct kmem_cache *kmem_cache;
SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
SLAB_FAILSLAB) SLAB_FAILSLAB)
#define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
SLAB_CACHE_DMA | SLAB_NOTRACK)
/* /*
* Merge control. If this is set then no merging of slab caches will occur. * Merge control. If this is set then no merging of slab caches will occur.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment