Commit a7ca10f2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (incoming from Andrew Morton)

Merge misc fixes from Andrew Morton:
 "21 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (21 commits)
  mm/balloon_compaction: fix deflation when compaction is disabled
  sh: fix sh770x SCIF memory regions
  zram: avoid NULL pointer access in concurrent situation
  mm/slab_common: don't check for duplicate cache names
  ocfs2: fix d_splice_alias() return code checking
  mm: rmap: split out page_remove_file_rmap()
  mm: memcontrol: fix missed end-writeback page accounting
  mm: page-writeback: inline account_page_dirtied() into single caller
  lib/bitmap.c: fix undefined shift in __bitmap_shift_{left|right}()
  drivers/rtc/rtc-bq32k.c: fix register value
  memory-hotplug: clear pgdat which is allocated by bootmem in try_offline_node()
  drivers/rtc/rtc-s3c.c: fix initialization failure without rtc source clock
  kernel/kmod: fix use-after-free of the sub_info structure
  drivers/rtc/rtc-pm8xxx.c: rework to support pm8941 rtc
  mm, thp: fix collapsing of hugepages on madvise
  drivers: of: add return value to of_reserved_mem_device_init()
  mm: free compound page with correct order
  gcov: add ARM64 to GCOV_PROFILE_ALL
  fsnotify: next_i is freed during fsnotify_unmount_inodes.
  mm/compaction.c: avoid premature range skip in isolate_migratepages_range
  ...
parents d506aa68 4d88e6f7
......@@ -118,7 +118,7 @@ static struct plat_sci_port scif0_platform_data = {
};
static struct resource scif0_resources[] = {
DEFINE_RES_MEM(0xfffffe80, 0x100),
DEFINE_RES_MEM(0xfffffe80, 0x10),
DEFINE_RES_IRQ(evt2irq(0x4e0)),
};
......@@ -143,7 +143,7 @@ static struct plat_sci_port scif1_platform_data = {
};
static struct resource scif1_resources[] = {
DEFINE_RES_MEM(0xa4000150, 0x100),
DEFINE_RES_MEM(0xa4000150, 0x10),
DEFINE_RES_IRQ(evt2irq(0x900)),
};
......@@ -169,7 +169,7 @@ static struct plat_sci_port scif2_platform_data = {
};
static struct resource scif2_resources[] = {
DEFINE_RES_MEM(0xa4000140, 0x100),
DEFINE_RES_MEM(0xa4000140, 0x10),
DEFINE_RES_IRQ(evt2irq(0x880)),
};
......
......@@ -223,9 +223,10 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
#undef pr_fmt
#define pr_fmt(fmt) fmt
static void rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
{
dev_set_cma_area(dev, rmem->priv);
return 0;
}
static void rmem_cma_device_release(struct reserved_mem *rmem,
......
......@@ -99,11 +99,12 @@ static ssize_t mem_used_total_show(struct device *dev,
{
u64 val = 0;
struct zram *zram = dev_to_zram(dev);
struct zram_meta *meta = zram->meta;
down_read(&zram->init_lock);
if (init_done(zram))
if (init_done(zram)) {
struct zram_meta *meta = zram->meta;
val = zs_get_total_pages(meta->mem_pool);
}
up_read(&zram->init_lock);
return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
......@@ -173,16 +174,17 @@ static ssize_t mem_used_max_store(struct device *dev,
int err;
unsigned long val;
struct zram *zram = dev_to_zram(dev);
struct zram_meta *meta = zram->meta;
err = kstrtoul(buf, 10, &val);
if (err || val != 0)
return -EINVAL;
down_read(&zram->init_lock);
if (init_done(zram))
if (init_done(zram)) {
struct zram_meta *meta = zram->meta;
atomic_long_set(&zram->stats.max_used_pages,
zs_get_total_pages(meta->mem_pool));
}
up_read(&zram->init_lock);
return len;
......
......@@ -243,23 +243,27 @@ static inline struct reserved_mem *__find_rmem(struct device_node *node)
* This function assign memory region pointed by "memory-region" device tree
* property to the given device.
*/
void of_reserved_mem_device_init(struct device *dev)
int of_reserved_mem_device_init(struct device *dev)
{
struct reserved_mem *rmem;
struct device_node *np;
int ret;
np = of_parse_phandle(dev->of_node, "memory-region", 0);
if (!np)
return;
return -ENODEV;
rmem = __find_rmem(np);
of_node_put(np);
if (!rmem || !rmem->ops || !rmem->ops->device_init)
return;
return -EINVAL;
ret = rmem->ops->device_init(rmem, dev);
if (ret == 0)
dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
rmem->ops->device_init(rmem, dev);
dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
return ret;
}
/**
......
......@@ -1320,7 +1320,7 @@ config RTC_DRV_LPC32XX
config RTC_DRV_PM8XXX
tristate "Qualcomm PMIC8XXX RTC"
depends on MFD_PM8XXX
depends on MFD_PM8XXX || MFD_SPMI_PMIC
help
If you say yes here you get support for the
Qualcomm PMIC8XXX RTC.
......
......@@ -160,7 +160,7 @@ static int trickle_charger_of_init(struct device *dev, struct device_node *node)
dev_err(dev, "bq32k: diode and resistor mismatch\n");
return -EINVAL;
}
reg = 0x25;
reg = 0x45;
break;
default:
......
This diff is collapsed.
......@@ -535,13 +535,15 @@ static int s3c_rtc_probe(struct platform_device *pdev)
}
clk_prepare_enable(info->rtc_clk);
info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
if (IS_ERR(info->rtc_src_clk)) {
dev_err(&pdev->dev, "failed to find rtc source clock\n");
return PTR_ERR(info->rtc_src_clk);
if (info->data->needs_src_clk) {
info->rtc_src_clk = devm_clk_get(&pdev->dev, "rtc_src");
if (IS_ERR(info->rtc_src_clk)) {
dev_err(&pdev->dev,
"failed to find rtc source clock\n");
return PTR_ERR(info->rtc_src_clk);
}
clk_prepare_enable(info->rtc_src_clk);
}
clk_prepare_enable(info->rtc_src_clk);
/* check to see if everything is setup correctly */
if (info->data->enable)
......
......@@ -288,20 +288,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */
if ((&next_i->i_sb_list != list) &&
atomic_read(&next_i->i_count)) {
while (&next_i->i_sb_list != list) {
spin_lock(&next_i->i_lock);
if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
atomic_read(&next_i->i_count)) {
__iget(next_i);
need_iput = next_i;
spin_unlock(&next_i->i_lock);
break;
}
spin_unlock(&next_i->i_lock);
next_i = list_entry(next_i->i_sb_list.next,
struct inode, i_sb_list);
}
/*
* We can safely drop inode_sb_list_lock here because we hold
* references on both inode and next_i. Also no new inodes
* will be added since the umount has begun.
* We can safely drop inode_sb_list_lock here because either
* we actually hold references on both inode and next_i or
* end of list. Also no new inodes will be added since the
* umount has begun.
*/
spin_unlock(&inode_sb_list_lock);
......
......@@ -158,7 +158,7 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
* NOTE: This dentry already has ->d_op set from
* ocfs2_get_parent() and ocfs2_get_dentry()
*/
if (ret)
if (!IS_ERR_OR_NULL(ret))
dentry = ret;
status = ocfs2_dentry_attach_lock(dentry, inode,
......
......@@ -6,7 +6,8 @@
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern int __khugepaged_enter(struct mm_struct *mm);
extern void __khugepaged_exit(struct mm_struct *mm);
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags);
#define khugepaged_enabled() \
(transparent_hugepage_flags & \
......@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm)
__khugepaged_exit(mm);
}
static inline int khugepaged_enter(struct vm_area_struct *vma)
static inline int khugepaged_enter(struct vm_area_struct *vma,
unsigned long vm_flags)
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if ((khugepaged_always() ||
(khugepaged_req_madv() &&
vma->vm_flags & VM_HUGEPAGE)) &&
!(vma->vm_flags & VM_NOHUGEPAGE))
(khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
!(vm_flags & VM_NOHUGEPAGE))
if (__khugepaged_enter(vma->vm_mm))
return -ENOMEM;
return 0;
......@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
static inline void khugepaged_exit(struct mm_struct *mm)
{
}
static inline int khugepaged_enter(struct vm_area_struct *vma)
static inline int khugepaged_enter(struct vm_area_struct *vma,
unsigned long vm_flags)
{
return 0;
}
static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags)
{
return 0;
}
......
......@@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void)
return false;
}
void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked,
unsigned long *flags);
extern atomic_t memcg_moving;
static inline void mem_cgroup_begin_update_page_stat(struct page *page,
bool *locked, unsigned long *flags)
{
if (mem_cgroup_disabled())
return;
rcu_read_lock();
*locked = false;
if (atomic_read(&memcg_moving))
__mem_cgroup_begin_update_page_stat(page, locked, flags);
}
void __mem_cgroup_end_update_page_stat(struct page *page,
unsigned long *flags);
static inline void mem_cgroup_end_update_page_stat(struct page *page,
bool *locked, unsigned long *flags)
{
if (mem_cgroup_disabled())
return;
if (*locked)
__mem_cgroup_end_update_page_stat(page, flags);
rcu_read_unlock();
}
void mem_cgroup_update_page_stat(struct page *page,
enum mem_cgroup_stat_index idx,
int val);
static inline void mem_cgroup_inc_page_stat(struct page *page,
struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
unsigned long *flags);
void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
unsigned long flags);
void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val);
static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
mem_cgroup_update_page_stat(page, idx, 1);
mem_cgroup_update_page_stat(memcg, idx, 1);
}
static inline void mem_cgroup_dec_page_stat(struct page *page,
static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
mem_cgroup_update_page_stat(page, idx, -1);
mem_cgroup_update_page_stat(memcg, idx, -1);
}
unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
......@@ -315,13 +290,14 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
{
}
static inline void mem_cgroup_begin_update_page_stat(struct page *page,
static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
bool *locked, unsigned long *flags)
{
return NULL;
}
static inline void mem_cgroup_end_update_page_stat(struct page *page,
bool *locked, unsigned long *flags)
static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
bool locked, unsigned long flags)
{
}
......@@ -343,12 +319,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
return false;
}
static inline void mem_cgroup_inc_page_stat(struct page *page,
static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
}
static inline void mem_cgroup_dec_page_stat(struct page *page,
static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx)
{
}
......
......@@ -1235,7 +1235,6 @@ int __set_page_dirty_no_writeback(struct page *page);
int redirty_page_for_writepage(struct writeback_control *wbc,
struct page *page);
void account_page_dirtied(struct page *page, struct address_space *mapping);
void account_page_writeback(struct page *page);
int set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page);
......
......@@ -16,7 +16,7 @@ struct reserved_mem {
};
struct reserved_mem_ops {
void (*device_init)(struct reserved_mem *rmem,
int (*device_init)(struct reserved_mem *rmem,
struct device *dev);
void (*device_release)(struct reserved_mem *rmem,
struct device *dev);
......@@ -28,14 +28,17 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
#ifdef CONFIG_OF_RESERVED_MEM
void of_reserved_mem_device_init(struct device *dev);
int of_reserved_mem_device_init(struct device *dev);
void of_reserved_mem_device_release(struct device *dev);
void fdt_init_reserved_mem(void);
void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
phys_addr_t base, phys_addr_t size);
#else
static inline void of_reserved_mem_device_init(struct device *dev) { }
static inline int of_reserved_mem_device_init(struct device *dev)
{
return -ENOSYS;
}
static inline void of_reserved_mem_device_release(struct device *pdev) { }
static inline void fdt_init_reserved_mem(void) { }
......
......@@ -35,7 +35,7 @@ config GCOV_KERNEL
config GCOV_PROFILE_ALL
bool "Profile entire Kernel"
depends on GCOV_KERNEL
depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM
depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM || ARM64
default n
---help---
This options activates profiling for the entire kernel.
......
......@@ -196,12 +196,34 @@ int __request_module(bool wait, const char *fmt, ...)
EXPORT_SYMBOL(__request_module);
#endif /* CONFIG_MODULES */
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
if (info->cleanup)
(*info->cleanup)(info);
kfree(info);
}
static void umh_complete(struct subprocess_info *sub_info)
{
struct completion *comp = xchg(&sub_info->complete, NULL);
/*
* See call_usermodehelper_exec(). If xchg() returns NULL
* we own sub_info, the UMH_KILLABLE caller has gone away
* or the caller used UMH_NO_WAIT.
*/
if (comp)
complete(comp);
else
call_usermodehelper_freeinfo(sub_info);
}
/*
* This is the task which runs the usermode application
*/
static int ____call_usermodehelper(void *data)
{
struct subprocess_info *sub_info = data;
int wait = sub_info->wait & ~UMH_KILLABLE;
struct cred *new;
int retval;
......@@ -221,7 +243,7 @@ static int ____call_usermodehelper(void *data)
retval = -ENOMEM;
new = prepare_kernel_cred(current);
if (!new)
goto fail;
goto out;
spin_lock(&umh_sysctl_lock);
new->cap_bset = cap_intersect(usermodehelper_bset, new->cap_bset);
......@@ -233,7 +255,7 @@ static int ____call_usermodehelper(void *data)
retval = sub_info->init(sub_info, new);
if (retval) {
abort_creds(new);
goto fail;
goto out;
}
}
......@@ -242,12 +264,13 @@ static int ____call_usermodehelper(void *data)
retval = do_execve(getname_kernel(sub_info->path),
(const char __user *const __user *)sub_info->argv,
(const char __user *const __user *)sub_info->envp);
out:
sub_info->retval = retval;
/* wait_for_helper() will call umh_complete if UHM_WAIT_PROC. */
if (wait != UMH_WAIT_PROC)
umh_complete(sub_info);
if (!retval)
return 0;
/* Exec failed? */
fail:
sub_info->retval = retval;
do_exit(0);
}
......@@ -258,26 +281,6 @@ static int call_helper(void *data)
return ____call_usermodehelper(data);
}
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
{
if (info->cleanup)
(*info->cleanup)(info);
kfree(info);
}
static void umh_complete(struct subprocess_info *sub_info)
{
struct completion *comp = xchg(&sub_info->complete, NULL);
/*
* See call_usermodehelper_exec(). If xchg() returns NULL
* we own sub_info, the UMH_KILLABLE caller has gone away.
*/
if (comp)
complete(comp);
else
call_usermodehelper_freeinfo(sub_info);
}
/* Keventd can't block, but this (a child) can. */
static int wait_for_helper(void *data)
{
......@@ -336,18 +339,8 @@ static void __call_usermodehelper(struct work_struct *work)
kmod_thread_locker = NULL;
}
switch (wait) {
case UMH_NO_WAIT:
call_usermodehelper_freeinfo(sub_info);
break;
case UMH_WAIT_PROC:
if (pid > 0)
break;
/* FALLTHROUGH */
case UMH_WAIT_EXEC:
if (pid < 0)
sub_info->retval = pid;
if (pid < 0) {
sub_info->retval = pid;
umh_complete(sub_info);
}
}
......@@ -588,7 +581,12 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
goto out;
}
sub_info->complete = &done;
/*
* Set the completion pointer only if there is a waiter.
* This makes it possible to use umh_complete to free
* the data structure in case of UMH_NO_WAIT.
*/
sub_info->complete = (wait == UMH_NO_WAIT) ? NULL : &done;
sub_info->wait = wait;
queue_work(khelper_wq, &sub_info->work);
......
......@@ -131,7 +131,9 @@ void __bitmap_shift_right(unsigned long *dst,
lower = src[off + k];
if (left && off + k == lim - 1)
lower &= mask;
dst[k] = upper << (BITS_PER_LONG - rem) | lower >> rem;
dst[k] = lower >> rem;
if (rem)
dst[k] |= upper << (BITS_PER_LONG - rem);
if (left && k == lim - 1)
dst[k] &= mask;
}
......@@ -172,7 +174,9 @@ void __bitmap_shift_left(unsigned long *dst,
upper = src[k];
if (left && k == lim - 1)
upper &= (1UL << left) - 1;
dst[k + off] = lower >> (BITS_PER_LONG - rem) | upper << rem;
dst[k + off] = upper << rem;
if (rem)
dst[k + off] |= lower >> (BITS_PER_LONG - rem);
if (left && k + off == lim - 1)
dst[k + off] &= (1UL << left) - 1;
}
......
......@@ -68,11 +68,13 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
* to be released by the balloon driver.
*/
if (trylock_page(page)) {
#ifdef CONFIG_BALLOON_COMPACTION
if (!PagePrivate(page)) {
/* raced with isolation */
unlock_page(page);
continue;
}
#endif
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_delete(page);
__count_vm_event(BALLOON_DEFLATE);
......
......@@ -784,6 +784,9 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
cc->nr_migratepages = 0;
break;
}
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
break;
}
acct_isolated(cc->zone, cc);
......
......@@ -200,7 +200,7 @@ static struct page *get_huge_zero_page(void)
preempt_disable();
if (cmpxchg(&huge_zero_page, NULL, zero_page)) {
preempt_enable();
__free_page(zero_page);
__free_pages(zero_page, compound_order(zero_page));
goto retry;
}
......@@ -232,7 +232,7 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
struct page *zero_page = xchg(&huge_zero_page, NULL);
BUG_ON(zero_page == NULL);
__free_page(zero_page);
__free_pages(zero_page, compound_order(zero_page));
return HPAGE_PMD_NR;
}
......@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_FALLBACK;
if (unlikely(anon_vma_prepare(vma)))
return VM_FAULT_OOM;
if (unlikely(khugepaged_enter(vma)))
if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
return VM_FAULT_OOM;
if (!(flags & FAULT_FLAG_WRITE) &&
transparent_hugepage_use_zero_page()) {
......@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
* register it here without waiting a page fault that
* may not happen any time soon.
*/
if (unlikely(khugepaged_enter_vma_merge(vma)))
if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
return -ENOMEM;
break;
case MADV_NOHUGEPAGE:
......@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm)
return 0;
}
int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
unsigned long vm_flags)
{
unsigned long hstart, hend;
if (!vma->anon_vma)
......@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
if (vma->vm_ops)
/* khugepaged not yet working on file or special mappings */
return 0;
VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (hstart < hend)
return khugepaged_enter(vma);
return khugepaged_enter(vma, vm_flags);
return 0;
}
......
......@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
* start move here.
*/
/* for quick checking without looking up memcg */
atomic_t memcg_moving __read_mostly;
static void mem_cgroup_start_move(struct mem_cgroup *memcg)
{
atomic_inc(&memcg_moving);
atomic_inc(&memcg->moving_account);
synchronize_rcu();
}
......@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
* Now, mem_cgroup_clear_mc() may call this function with NULL.
* We check NULL in callee rather than caller.
*/
if (memcg) {
atomic_dec(&memcg_moving);
if (memcg)
atomic_dec(&memcg->moving_account);
}
}
/*
......@@ -2204,41 +2198,52 @@ bool mem_cgroup_oom_synchronize(bool handle)
return true;
}
/*
* Used to update mapped file or writeback or other statistics.
/**
* mem_cgroup_begin_page_stat - begin a page state statistics transaction
* @page: page that is going to change accounted state
* @locked: &memcg->move_lock slowpath was taken
* @flags: IRQ-state flags for &memcg->move_lock
*
* Notes: Race condition
* This function must mark the beginning of an accounted page state
* change to prevent double accounting when the page is concurrently
* being moved to another memcg:
*
* Charging occurs during page instantiation, while the page is
* unmapped and locked in page migration, or while the page table is
* locked in THP migration. No race is possible.
* memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
* if (TestClearPageState(page))
* mem_cgroup_update_page_stat(memcg, state, -1);
* mem_cgroup_end_page_stat(memcg, locked, flags);
*
* Uncharge happens to pages with zero references, no race possible.
* The RCU lock is held throughout the transaction. The fast path can
* get away without acquiring the memcg->move_lock (@locked is false)
* because page moving starts with an RCU grace period.
*
* Charge moving between groups is protected by checking mm->moving
* account and taking the move_lock in the slowpath.
* The RCU lock also protects the memcg from being freed when the page
* state that is going to change is the only thing preventing the page
* from being uncharged. E.g. end-writeback clearing PageWriteback(),
* which allows migration to go ahead and uncharge the page before the
* account transaction might be complete.
*/
void __mem_cgroup_begin_update_page_stat(struct page *page,
bool *locked, unsigned long *flags)
struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
bool *locked,
unsigned long *flags)
{
struct mem_cgroup *memcg;
struct page_cgroup *pc;
rcu_read_lock();
if (mem_cgroup_disabled())
return NULL;
pc = lookup_page_cgroup(page);
again:
memcg = pc->mem_cgroup;
if (unlikely(!memcg || !PageCgroupUsed(pc)))
return;
/*
* If this memory cgroup is not under account moving, we don't
* need to take move_lock_mem_cgroup(). Because we already hold
* rcu_read_lock(), any calls to move_account will be delayed until
* rcu_read_unlock().
*/
VM_BUG_ON(!rcu_read_lock_held());
return NULL;
*locked = false;
if (atomic_read(&memcg->moving_account) <= 0)
return;
return memcg;
move_lock_mem_cgroup(memcg, flags);
if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
......@@ -2246,36 +2251,40 @@ void __mem_cgroup_begin_update_page_stat(struct page *page,
goto again;
}
*locked = true;
return memcg;
}
void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
/**
* mem_cgroup_end_page_stat - finish a page state statistics transaction
* @memcg: the memcg that was accounted against
* @locked: value received from mem_cgroup_begin_page_stat()
* @flags: value received from mem_cgroup_begin_page_stat()
*/
void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
unsigned long flags)
{
struct page_cgroup *pc = lookup_page_cgroup(page);
if (memcg && locked)
move_unlock_mem_cgroup(memcg, &flags);
/*
* It's guaranteed that pc->mem_cgroup never changes while
* lock is held because a routine modifies pc->mem_cgroup
* should take move_lock_mem_cgroup().
*/
move_unlock_mem_cgroup(pc->mem_cgroup, flags);
rcu_read_unlock();
}
void mem_cgroup_update_page_stat(struct page *page,
/**
* mem_cgroup_update_page_stat - update page state statistics
* @memcg: memcg to account against
* @idx: page state item to account
* @val: number of pages (positive or negative)
*
* See mem_cgroup_begin_page_stat() for locking requirements.
*/
void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx, int val)
{
struct mem_cgroup *memcg;
struct page_cgroup *pc = lookup_page_cgroup(page);
unsigned long uninitialized_var(flags);
if (mem_cgroup_disabled())
return;
VM_BUG_ON(!rcu_read_lock_held());
memcg = pc->mem_cgroup;
if (unlikely(!memcg || !PageCgroupUsed(pc)))
return;
this_cpu_add(memcg->stat->count[idx], val);
if (memcg)
this_cpu_add(memcg->stat->count[idx], val);
}
/*
......
......@@ -1912,7 +1912,6 @@ void try_offline_node(int nid)
unsigned long start_pfn = pgdat->node_start_pfn;
unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages;
unsigned long pfn;
struct page *pgdat_page = virt_to_page(pgdat);
int i;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
......@@ -1941,10 +1940,6 @@ void try_offline_node(int nid)
node_set_offline(nid);
unregister_one_node(nid);
if (!PageSlab(pgdat_page) && !PageCompound(pgdat_page))
/* node data is allocated from boot memory */
return;
/* free waittable in each zone */
for (i = 0; i < MAX_NR_ZONES; i++) {
struct zone *zone = pgdat->node_zones + i;
......
......@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
end, prev->vm_pgoff, NULL);
if (err)
return NULL;
khugepaged_enter_vma_merge(prev);
khugepaged_enter_vma_merge(prev, vm_flags);
return prev;
}
......@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
next->vm_pgoff - pglen, NULL);
if (err)
return NULL;
khugepaged_enter_vma_merge(area);
khugepaged_enter_vma_merge(area, vm_flags);
return area;
}
......@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
}
vma_unlock_anon_vma(vma);
khugepaged_enter_vma_merge(vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(vma->vm_mm);
return error;
}
......@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma,
}
}
vma_unlock_anon_vma(vma);
khugepaged_enter_vma_merge(vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(vma->vm_mm);
return error;
}
......
......@@ -2115,23 +2115,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
}
EXPORT_SYMBOL(account_page_dirtied);
/*
* Helper function for set_page_writeback family.
*
* The caller must hold mem_cgroup_begin/end_update_page_stat() lock
* while calling this function.
* See test_set_page_writeback for example.
*
* NOTE: Unlike account_page_dirtied this does not rely on being atomic
* wrt interrupts.
*/
void account_page_writeback(struct page *page)
{
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
inc_zone_page_state(page, NR_WRITEBACK);
}
EXPORT_SYMBOL(account_page_writeback);
/*
* For address_spaces which do not use buffers. Just tag the page as dirty in
* its radix tree.
......@@ -2344,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
int test_clear_page_writeback(struct page *page)
{
struct address_space *mapping = page_mapping(page);
int ret;
bool locked;
unsigned long memcg_flags;
struct mem_cgroup *memcg;
bool locked;
int ret;
mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
......@@ -2369,22 +2353,23 @@ int test_clear_page_writeback(struct page *page)
ret = TestClearPageWriteback(page);
}
if (ret) {
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
dec_zone_page_state(page, NR_WRITEBACK);
inc_zone_page_state(page, NR_WRITTEN);
}
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
return ret;
}
int __test_set_page_writeback(struct page *page, bool keep_write)
{
struct address_space *mapping = page_mapping(page);
int ret;
bool locked;
unsigned long memcg_flags;
struct mem_cgroup *memcg;
bool locked;
int ret;
mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
if (mapping) {
struct backing_dev_info *bdi = mapping->backing_dev_info;
unsigned long flags;
......@@ -2410,9 +2395,11 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
} else {
ret = TestSetPageWriteback(page);
}
if (!ret)
account_page_writeback(page);
mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
if (!ret) {
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
inc_zone_page_state(page, NR_WRITEBACK);
}
mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
return ret;
}
......
......@@ -171,6 +171,7 @@ static void free_page_cgroup(void *addr)
sizeof(struct page_cgroup) * PAGES_PER_SECTION;
BUG_ON(PageReserved(page));
kmemleak_free(addr);
free_pages_exact(addr, table_size);
}
}
......
......@@ -1042,15 +1042,46 @@ void page_add_new_anon_rmap(struct page *page,
*/
void page_add_file_rmap(struct page *page)
{
bool locked;
struct mem_cgroup *memcg;
unsigned long flags;
bool locked;
mem_cgroup_begin_update_page_stat(page, &locked, &flags);
memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
if (atomic_inc_and_test(&page->_mapcount)) {
__inc_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
}
mem_cgroup_end_update_page_stat(page, &locked, &flags);
mem_cgroup_end_page_stat(memcg, locked, flags);
}
static void page_remove_file_rmap(struct page *page)
{
struct mem_cgroup *memcg;
unsigned long flags;
bool locked;
memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
goto out;
/* Hugepages are not counted in NR_FILE_MAPPED for now. */
if (unlikely(PageHuge(page)))
goto out;
/*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
__dec_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
out:
mem_cgroup_end_page_stat(memcg, locked, flags);
}
/**
......@@ -1061,46 +1092,33 @@ void page_add_file_rmap(struct page *page)
*/
void page_remove_rmap(struct page *page)
{
bool anon = PageAnon(page);
bool locked;
unsigned long flags;
/*
* The anon case has no mem_cgroup page_stat to update; but may
* uncharge_page() below, where the lock ordering can deadlock if
* we hold the lock against page_stat move: so avoid it on anon.
*/
if (!anon)
mem_cgroup_begin_update_page_stat(page, &locked, &flags);
if (!PageAnon(page)) {
page_remove_file_rmap(page);
return;
}
/* page still mapped by someone else? */
if (!atomic_add_negative(-1, &page->_mapcount))
goto out;
return;
/* Hugepages are not counted in NR_ANON_PAGES for now. */
if (unlikely(PageHuge(page)))
return;
/*
* Hugepages are not counted in NR_ANON_PAGES nor NR_FILE_MAPPED
* and not charged by memcg for now.
*
* We use the irq-unsafe __{inc|mod}_zone_page_stat because
* these counters are not modified in interrupt context, and
* these counters are not modified in interrupt context, and
* pte lock(a spinlock) is held, which implies preemption disabled.
*/
if (unlikely(PageHuge(page)))
goto out;
if (anon) {
if (PageTransHuge(page))
__dec_zone_page_state(page,
NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-hpage_nr_pages(page));
} else {
__dec_zone_page_state(page, NR_FILE_MAPPED);
mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
mem_cgroup_end_update_page_stat(page, &locked, &flags);
}
if (PageTransHuge(page))
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
-hpage_nr_pages(page));
if (unlikely(PageMlocked(page)))
clear_page_mlock(page);
/*
* It would be tidy to reset the PageAnon mapping here,
* but that might overwrite a racing page_add_anon_rmap
......@@ -1110,10 +1128,6 @@ void page_remove_rmap(struct page *page)
* Leaving it set also helps swapoff to reinstate ptes
* faster for those pages still in swapcache.
*/
return;
out:
if (!anon)
mem_cgroup_end_update_page_stat(page, &locked, &flags);
}
/*
......
......@@ -93,16 +93,6 @@ static int kmem_cache_sanity_check(const char *name, size_t size)
s->object_size);
continue;
}
#if !defined(CONFIG_SLUB)
if (!strcmp(s->name, name)) {
pr_err("%s (%s): Cache name already exists.\n",
__func__, name);
dump_stack();
s = NULL;
return -EINVAL;
}
#endif
}
WARN_ON(strchr(name, ' ')); /* It confuses parsers */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment