Commit 8664b90b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "21 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (21 commits)
  writeback: fix initial dirty limit
  mm/memory-failure: set PageHWPoison before migrate_pages()
  mm: check __PG_HWPOISON separately from PAGE_FLAGS_CHECK_AT_*
  mm/memory-failure: give up error handling for non-tail-refcounted thp
  mm/memory-failure: fix race in counting num_poisoned_pages
  mm/memory-failure: unlock_page before put_page
  ipc: use private shmem or hugetlbfs inodes for shm segments.
  mm: initialize hotplugged pages as reserved
  ocfs2: fix shift left overflow
  kthread: export kthread functions
  fsnotify: fix oops in fsnotify_clear_marks_by_group_flags()
  lib/iommu-common.c: do not use 0xffffffffffffffffl for computing align_mask
  mm/slub: allow merging when SLAB_DEBUG_FREE is set
  signalfd: fix information leak in signalfd_copyinfo
  signal: fix information leak in copy_siginfo_to_user
  signal: fix information leak in copy_siginfo_from_user32
  ocfs2: fix BUG in ocfs2_downconvert_thread_do_work()
  fs, file table: reinit files_stat.max_files after deferred memory initialisation
  mm, meminit: replace rwsem with completion
  mm, meminit: allow early_pfn_to_nid to be used during runtime
  ...
parents a58997e1 a50fcb51
......@@ -168,7 +168,8 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
* Other callers might not initialize the si_lsb field,
* so check explicitely for the right codes here.
*/
if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
if (from->si_signo == SIGBUS &&
(from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
#endif
break;
......@@ -201,8 +202,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, __ARCH_SI_PREAMBLE_SIZE) ||
copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE))
......
......@@ -409,8 +409,6 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
{
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, 3*sizeof(int)) ||
copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE32))
......
......@@ -966,8 +966,6 @@ int copy_siginfo_to_user32(struct compat_siginfo __user *d, const siginfo_t *s)
int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
{
memset(to, 0, sizeof *to);
if (copy_from_user(to, from, 3*sizeof(int)) ||
copy_from_user(to->_sifields._pad,
from->_sifields._pad, SI_PAD_SIZE32))
......
......@@ -113,8 +113,6 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from)
if (!access_ok(VERIFY_READ, from, sizeof(struct compat_siginfo)))
return -EFAULT;
memset(to, 0, sizeof(*to));
err = __get_user(to->si_signo, &from->si_signo);
err |= __get_user(to->si_errno, &from->si_errno);
err |= __get_user(to->si_code, &from->si_code);
......
......@@ -3442,22 +3442,15 @@ void __init vfs_caches_init_early(void)
inode_init_early();
}
void __init vfs_caches_init(unsigned long mempages)
void __init vfs_caches_init(void)
{
unsigned long reserve;
/* Base hash sizes on available memory, with a reserve equal to
150% of current kernel size */
reserve = min((mempages - nr_free_pages()) * 3/2, mempages - 1);
mempages -= reserve;
names_cachep = kmem_cache_create("names_cache", PATH_MAX, 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
dcache_init();
inode_init();
files_init(mempages);
files_init();
files_maxfiles_init();
mnt_init();
bdev_cache_init();
chrdev_init();
......
......@@ -25,6 +25,7 @@
#include <linux/hardirq.h>
#include <linux/task_work.h>
#include <linux/ima.h>
#include <linux/swap.h>
#include <linux/atomic.h>
......@@ -308,19 +309,24 @@ void put_filp(struct file *file)
}
}
void __init files_init(unsigned long mempages)
void __init files_init(void)
{
unsigned long n;
filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
/*
* One file with associated inode and dcache is very roughly 1K.
* Per default don't use more than 10% of our memory for files.
/*
* One file with associated inode and dcache is very roughly 1K. Per default
* do not use more than 10% of our memory for files.
*/
void __init files_maxfiles_init(void)
{
unsigned long n;
unsigned long memreserve = (totalram_pages - nr_free_pages()) * 3/2;
memreserve = min(memreserve, totalram_pages - 1);
n = ((totalram_pages - memreserve) * (PAGE_SIZE / 1024)) / 10;
n = (mempages * (PAGE_SIZE / 1024)) / 10;
files_stat.max_files = max_t(unsigned long, n, NR_FILE);
percpu_counter_init(&nr_files, 0, GFP_KERNEL);
}
......@@ -1010,6 +1010,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
inode = hugetlbfs_get_inode(sb, NULL, S_IFREG | S_IRWXUGO, 0);
if (!inode)
goto out_dentry;
if (creat_flags == HUGETLB_SHMFS_INODE)
inode->i_flags |= S_PRIVATE;
file = ERR_PTR(-ENOMEM);
if (hugetlb_reserve_pages(inode, 0,
......
......@@ -412,16 +412,36 @@ void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group,
unsigned int flags)
{
struct fsnotify_mark *lmark, *mark;
LIST_HEAD(to_free);
/*
* We have to be really careful here. Anytime we drop mark_mutex, e.g.
* fsnotify_clear_marks_by_inode() can come and free marks. Even in our
* to_free list so we have to use mark_mutex even when accessing that
* list. And freeing mark requires us to drop mark_mutex. So we can
* reliably free only the first mark in the list. That's why we first
* move marks to free to to_free list in one go and then free marks in
* to_free list one by one.
*/
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
list_for_each_entry_safe(mark, lmark, &group->marks_list, g_list) {
if (mark->flags & flags) {
if (mark->flags & flags)
list_move(&mark->g_list, &to_free);
}
mutex_unlock(&group->mark_mutex);
while (1) {
mutex_lock_nested(&group->mark_mutex, SINGLE_DEPTH_NESTING);
if (list_empty(&to_free)) {
mutex_unlock(&group->mark_mutex);
break;
}
mark = list_first_entry(&to_free, struct fsnotify_mark, g_list);
fsnotify_get_mark(mark);
fsnotify_destroy_mark_locked(mark, group);
mutex_unlock(&group->mark_mutex);
fsnotify_put_mark(mark);
}
}
mutex_unlock(&group->mark_mutex);
}
/*
......
......@@ -685,7 +685,7 @@ static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
u64 s = i_size_read(inode);
sector_t sector = (p_cpos << (osb->s_clustersize_bits - 9)) +
sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) +
(do_div(s, osb->s_clustersize) >> 9);
ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
......@@ -910,7 +910,7 @@ static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
ret = blkdev_issue_zeroout(osb->sb->s_bdev,
p_cpos << (osb->s_clustersize_bits - 9),
(u64)p_cpos << (osb->s_clustersize_bits - 9),
zero_len_head >> 9, GFP_NOFS, false);
if (ret < 0)
mlog_errno(ret);
......
......@@ -4025,9 +4025,13 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
osb->dc_work_sequence = osb->dc_wake_sequence;
processed = osb->blocked_lock_count;
while (processed) {
BUG_ON(list_empty(&osb->blocked_lock_list));
/*
* blocked lock processing in this loop might call iput which can
* remove items off osb->blocked_lock_list. Downconvert up to
* 'processed' number of locks, but stop short if we had some
* removed in ocfs2_mark_lockres_freeing when downconverting.
*/
while (processed && !list_empty(&osb->blocked_lock_list)) {
lockres = list_entry(osb->blocked_lock_list.next,
struct ocfs2_lock_res, l_blocked_list);
list_del_init(&lockres->l_blocked_list);
......
......@@ -121,8 +121,9 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
if (kinfo->si_code == BUS_MCEERR_AR ||
kinfo->si_code == BUS_MCEERR_AO)
if (kinfo->si_signo == SIGBUS &&
(kinfo->si_code == BUS_MCEERR_AR ||
kinfo->si_code == BUS_MCEERR_AO))
err |= __put_user((short) kinfo->si_addr_lsb,
&uinfo->ssi_addr_lsb);
#endif
......
......@@ -55,7 +55,8 @@ struct vm_fault;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
extern void __init files_init(unsigned long);
extern void __init files_init(void);
extern void __init files_maxfiles_init(void);
extern struct files_stat_struct files_stat;
extern unsigned long get_max_files(void);
......@@ -2245,7 +2246,7 @@ extern int ioctl_preallocate(struct file *filp, void __user *argp);
/* fs/dcache.c */
extern void __init vfs_caches_init_early(void);
extern void __init vfs_caches_init(unsigned long);
extern void __init vfs_caches_init(void);
extern struct kmem_cache *names_cachep;
......
......@@ -631,15 +631,19 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
1 << PG_private | 1 << PG_private_2 | \
1 << PG_writeback | 1 << PG_reserved | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
1 << PG_unevictable | __PG_MLOCKED | \
__PG_COMPOUND_LOCK)
/*
* Flags checked when a page is prepped for return by the page allocator.
* Pages being prepped should not have any flags set. It they are set,
* Pages being prepped should not have these flags set. It they are set,
* there has been a kernel bug or struct page corruption.
*
* __PG_HWPOISON is exceptional because it needs to be kept beyond page's
* alloc-free cycle to prevent from reusing the page.
*/
#define PAGE_FLAGS_CHECK_AT_PREP ((1 << NR_PAGEFLAGS) - 1)
#define PAGE_FLAGS_CHECK_AT_PREP \
(((1 << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
#define PAGE_FLAGS_PRIVATE \
(1 << PG_private | 1 << PG_private_2)
......
......@@ -656,7 +656,7 @@ asmlinkage __visible void __init start_kernel(void)
key_init();
security_init();
dbg_late_init();
vfs_caches_init(totalram_pages);
vfs_caches_init();
signals_init();
/* rootfs populating might need page-writeback */
page_writeback_init();
......
......@@ -142,7 +142,6 @@ static int msg_insert(struct msg_msg *msg, struct mqueue_inode_info *info)
if (!leaf)
return -ENOMEM;
INIT_LIST_HEAD(&leaf->msg_list);
info->qsize += sizeof(*leaf);
}
leaf->priority = msg->m_type;
rb_link_node(&leaf->rb_node, parent, p);
......@@ -187,7 +186,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
"lazy leaf delete!\n");
rb_erase(&leaf->rb_node, &info->msg_tree);
if (info->node_cache) {
info->qsize -= sizeof(*leaf);
kfree(leaf);
} else {
info->node_cache = leaf;
......@@ -200,7 +198,6 @@ static inline struct msg_msg *msg_get(struct mqueue_inode_info *info)
if (list_empty(&leaf->msg_list)) {
rb_erase(&leaf->rb_node, &info->msg_tree);
if (info->node_cache) {
info->qsize -= sizeof(*leaf);
kfree(leaf);
} else {
info->node_cache = leaf;
......@@ -1034,7 +1031,6 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
/* Save our speculative allocation into the cache */
INIT_LIST_HEAD(&new_leaf->msg_list);
info->node_cache = new_leaf;
info->qsize += sizeof(*new_leaf);
new_leaf = NULL;
} else {
kfree(new_leaf);
......@@ -1142,7 +1138,6 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
/* Save our speculative allocation into the cache */
INIT_LIST_HEAD(&new_leaf->msg_list);
info->node_cache = new_leaf;
info->qsize += sizeof(*new_leaf);
} else {
kfree(new_leaf);
}
......
......@@ -545,7 +545,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
if ((shmflg & SHM_NORESERVE) &&
sysctl_overcommit_memory != OVERCOMMIT_NEVER)
acctflag = VM_NORESERVE;
file = shmem_file_setup(name, size, acctflag);
file = shmem_kernel_file_setup(name, size, acctflag);
}
error = PTR_ERR(file);
if (IS_ERR(file))
......
......@@ -97,6 +97,7 @@ bool kthread_should_park(void)
{
return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(current)->flags);
}
EXPORT_SYMBOL_GPL(kthread_should_park);
/**
* kthread_freezable_should_stop - should this freezable kthread return now?
......@@ -171,6 +172,7 @@ void kthread_parkme(void)
{
__kthread_parkme(to_kthread(current));
}
EXPORT_SYMBOL_GPL(kthread_parkme);
static int kthread(void *_create)
{
......@@ -411,6 +413,7 @@ void kthread_unpark(struct task_struct *k)
if (kthread)
__kthread_unpark(k, kthread);
}
EXPORT_SYMBOL_GPL(kthread_unpark);
/**
* kthread_park - park a thread created by kthread_create().
......@@ -441,6 +444,7 @@ int kthread_park(struct task_struct *k)
}
return ret;
}
EXPORT_SYMBOL_GPL(kthread_park);
/**
* kthread_stop - stop a thread created by kthread_create().
......
......@@ -2748,12 +2748,15 @@ int copy_siginfo_to_user(siginfo_t __user *to, const siginfo_t *from)
* Other callers might not initialize the si_lsb field,
* so check explicitly for the right codes here.
*/
if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
if (from->si_signo == SIGBUS &&
(from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO))
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
#endif
#ifdef SEGV_BNDERR
if (from->si_signo == SIGSEGV && from->si_code == SEGV_BNDERR) {
err |= __put_user(from->si_lower, &to->si_lower);
err |= __put_user(from->si_upper, &to->si_upper);
}
#endif
break;
case __SI_CHLD:
......@@ -3017,7 +3020,7 @@ COMPAT_SYSCALL_DEFINE3(rt_sigqueueinfo,
int, sig,
struct compat_siginfo __user *, uinfo)
{
siginfo_t info;
siginfo_t info = {};
int ret = copy_siginfo_from_user32(&info, uinfo);
if (unlikely(ret))
return ret;
......@@ -3061,7 +3064,7 @@ COMPAT_SYSCALL_DEFINE4(rt_tgsigqueueinfo,
int, sig,
struct compat_siginfo __user *, uinfo)
{
siginfo_t info;
siginfo_t info = {};
if (copy_siginfo_from_user32(&info, uinfo))
return -EFAULT;
......
......@@ -119,7 +119,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev,
unsigned long align_mask = 0;
if (align_order > 0)
align_mask = 0xffffffffffffffffl >> (64 - align_order);
align_mask = ~0ul >> (BITS_PER_LONG - align_order);
/* Sanity check */
if (unlikely(npages == 0)) {
......
......@@ -1676,12 +1676,7 @@ static void __split_huge_page_refcount(struct page *page,
/* after clearing PageTail the gup refcount can be released */
smp_mb__after_atomic();
/*
* retain hwpoison flag of the poisoned tail page:
* fix for the unsuitable process killed on Guest Machine(KVM)
* by the memory-failure.
*/
page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
page_tail->flags |= (page->flags &
((1L << PG_referenced) |
(1L << PG_swapbacked) |
......
......@@ -909,6 +909,18 @@ int get_hwpoison_page(struct page *page)
* directly for tail pages.
*/
if (PageTransHuge(head)) {
/*
* Non anonymous thp exists only in allocation/free time. We
* can't handle such a case correctly, so let's give it up.
* This should be better than triggering BUG_ON when kernel
* tries to touch the "partially handled" page.
*/
if (!PageAnon(head)) {
pr_err("MCE: %#lx: non anonymous thp\n",
page_to_pfn(page));
return 0;
}
if (get_page_unless_zero(head)) {
if (PageTail(page))
get_page(page);
......@@ -1134,15 +1146,6 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
}
if (!PageHuge(p) && PageTransHuge(hpage)) {
if (!PageAnon(hpage)) {
pr_err("MCE: %#lx: non anonymous thp\n", pfn);
if (TestClearPageHWPoison(p))
atomic_long_sub(nr_pages, &num_poisoned_pages);
put_page(p);
if (p != hpage)
put_page(hpage);
return -EBUSY;
}
if (unlikely(split_huge_page(hpage))) {
pr_err("MCE: %#lx: thp split failed\n", pfn);
if (TestClearPageHWPoison(p))
......@@ -1209,9 +1212,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
if (!PageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: just unpoisoned\n", pfn);
atomic_long_sub(nr_pages, &num_poisoned_pages);
unlock_page(hpage);
put_page(hpage);
res = 0;
goto out;
return 0;
}
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
......@@ -1656,6 +1659,8 @@ static int __soft_offline_page(struct page *page, int flags)
inc_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
list_add(&page->lru, &pagelist);
if (!TestSetPageHWPoison(page))
atomic_long_inc(&num_poisoned_pages);
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
......@@ -1670,9 +1675,8 @@ static int __soft_offline_page(struct page *page, int flags)
pfn, ret, page->flags);
if (ret > 0)
ret = -EIO;
} else {
SetPageHWPoison(page);
atomic_long_inc(&num_poisoned_pages);
if (TestClearPageHWPoison(page))
atomic_long_dec(&num_poisoned_pages);
}
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
......
......@@ -446,7 +446,7 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
int nr_pages = PAGES_PER_SECTION;
int nid = pgdat->node_id;
int zone_type;
unsigned long flags;
unsigned long flags, pfn;
int ret;
zone_type = zone - pgdat->node_zones;
......@@ -461,6 +461,14 @@ static int __meminit __add_zone(struct zone *zone, unsigned long phys_start_pfn)
pgdat_resize_unlock(zone->zone_pgdat, &flags);
memmap_init_zone(nr_pages, nid, zone_type,
phys_start_pfn, MEMMAP_HOTPLUG);
/* online_page_range is called later and expects pages reserved */
for (pfn = phys_start_pfn; pfn < phys_start_pfn + nr_pages; pfn++) {
if (!pfn_valid(pfn))
continue;
SetPageReserved(pfn_to_page(pfn));
}
return 0;
}
......
......@@ -880,7 +880,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
/* Establish migration ptes or remove ptes */
if (page_mapped(page)) {
try_to_unmap(page,
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS|
TTU_IGNORE_HWPOISON);
page_was_mapped = 1;
}
......@@ -950,7 +951,10 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
if (reason != MR_MEMORY_FAILURE)
/* Soft-offlined page shouldn't go through lru cache list */
if (reason == MR_MEMORY_FAILURE)
put_page(page);
else
putback_lru_page(page);
}
......
......@@ -2063,10 +2063,10 @@ static struct notifier_block ratelimit_nb = {
*/
void __init page_writeback_init(void)
{
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
writeback_set_ratelimit();
register_cpu_notifier(&ratelimit_nb);
BUG_ON(wb_domain_init(&global_wb_domain, GFP_KERNEL));
}
/**
......
......@@ -18,7 +18,6 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
#include <linux/rwsem.h>
#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/bootmem.h>
......@@ -981,21 +980,21 @@ static void __init __free_pages_boot_core(struct page *page,
#if defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) || \
defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)
/* Only safe to use early in boot when initialisation is single-threaded */
static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
int __meminit early_pfn_to_nid(unsigned long pfn)
{
static DEFINE_SPINLOCK(early_pfn_lock);
int nid;
/* The system will behave unpredictably otherwise */
BUG_ON(system_state != SYSTEM_BOOTING);
spin_lock(&early_pfn_lock);
nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
if (nid >= 0)
if (nid < 0)
nid = 0;
spin_unlock(&early_pfn_lock);
return nid;
/* just returns 0 */
return 0;
}
#endif
......@@ -1060,7 +1059,15 @@ static void __init deferred_free_range(struct page *page,
__free_pages_boot_core(page, pfn, 0);
}
static __initdata DECLARE_RWSEM(pgdat_init_rwsem);
/* Completion tracking for deferred_init_memmap() threads */
static atomic_t pgdat_init_n_undone __initdata;
static __initdata DECLARE_COMPLETION(pgdat_init_all_done_comp);
static inline void __init pgdat_init_report_one_done(void)
{
if (atomic_dec_and_test(&pgdat_init_n_undone))
complete(&pgdat_init_all_done_comp);
}
/* Initialise remaining memory on a node */
static int __init deferred_init_memmap(void *data)
......@@ -1077,7 +1084,7 @@ static int __init deferred_init_memmap(void *data)
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
if (first_init_pfn == ULONG_MAX) {
up_read(&pgdat_init_rwsem);
pgdat_init_report_one_done();
return 0;
}
......@@ -1177,7 +1184,8 @@ static int __init deferred_init_memmap(void *data)
pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
jiffies_to_msecs(jiffies - start));
up_read(&pgdat_init_rwsem);
pgdat_init_report_one_done();
return 0;
}
......@@ -1185,14 +1193,17 @@ void __init page_alloc_init_late(void)
{
int nid;
/* There will be num_node_state(N_MEMORY) threads */
atomic_set(&pgdat_init_n_undone, num_node_state(N_MEMORY));
for_each_node_state(nid, N_MEMORY) {
down_read(&pgdat_init_rwsem);
kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
}
/* Block until all are initialised */
down_write(&pgdat_init_rwsem);
up_write(&pgdat_init_rwsem);
wait_for_completion(&pgdat_init_all_done_comp);
/* Reinit limits that are based on free pages after the kernel is up */
files_maxfiles_init();
}
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
......@@ -1285,6 +1296,10 @@ static inline int check_new_page(struct page *page)
bad_reason = "non-NULL mapping";
if (unlikely(atomic_read(&page->_count) != 0))
bad_reason = "nonzero _count";
if (unlikely(page->flags & __PG_HWPOISON)) {
bad_reason = "HWPoisoned (hardware-corrupted)";
bad_flags = __PG_HWPOISON;
}
if (unlikely(page->flags & PAGE_FLAGS_CHECK_AT_PREP)) {
bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
......
......@@ -3363,8 +3363,8 @@ static struct file *__shmem_file_setup(const char *name, loff_t size,
* shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be
* kernel internal. There will be NO LSM permission checks against the
* underlying inode. So users of this interface must do LSM checks at a
* higher layer. The one user is the big_key implementation. LSM checks
* are provided at the key level rather than the inode level.
* higher layer. The users are the big_key and shm implementations. LSM
* checks are provided at the key or shm level rather than the inode.
* @name: name for dentry (to be seen in /proc/<pid>/maps
* @size: size to be set for the file
* @flags: VM_NORESERVE suppresses pre-accounting of the entire object size
......
......@@ -37,8 +37,7 @@ struct kmem_cache *kmem_cache;
SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
SLAB_FAILSLAB)
#define SLAB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
SLAB_CACHE_DMA | SLAB_NOTRACK)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
/*
* Merge control. If this is set then no merging of slab caches will occur.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment