Commit bde17b90 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "12 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  dmapool: fix overflow condition in pool_find_page()
  thermal: avoid division by zero in power allocator
  memcg: remove pcp_counter_lock
  kprobes: use _do_fork() in samples to make them work again
  drivers/input/joystick/Kconfig: zhenhua.c needs BITREVERSE
  memcg: make mem_cgroup_read_stat() unsigned
  memcg: fix dirty page migration
  dax: fix NULL pointer in __dax_pmd_fault()
  mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault
  mm/slab: fix unexpected index mapping result of kmalloc_size(INDEX_NODE+1)
  userfaultfd: remove kernel header include from uapi header
  arch/x86/include/asm/efi.h: fix build failure
parents 1bca1000 676bd991
...@@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...); ...@@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...);
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute); u32 type, u64 attribute);
#ifdef CONFIG_KASAN
/* /*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present * CONFIG_KASAN may redefine memset to __memset. __memset function is present
* only in kernel binary. Since the EFI stub linked into a separate binary it * only in kernel binary. Since the EFI stub linked into a separate binary it
...@@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size, ...@@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
#undef memcpy #undef memcpy
#undef memset #undef memset
#undef memmove #undef memmove
#endif
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
......
...@@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY ...@@ -196,6 +196,7 @@ config JOYSTICK_TWIDJOY
config JOYSTICK_ZHENHUA config JOYSTICK_ZHENHUA
tristate "5-byte Zhenhua RC transmitter" tristate "5-byte Zhenhua RC transmitter"
select SERIO select SERIO
select BITREVERSE
help help
Say Y here if you have a Zhen Hua PPM-4CH transmitter which is Say Y here if you have a Zhen Hua PPM-4CH transmitter which is
supplied with a ready to fly micro electric indoor helicopters supplied with a ready to fly micro electric indoor helicopters
......
...@@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz, ...@@ -144,6 +144,16 @@ static void estimate_pid_constants(struct thermal_zone_device *tz,
switch_on_temp = 0; switch_on_temp = 0;
temperature_threshold = control_temp - switch_on_temp; temperature_threshold = control_temp - switch_on_temp;
/*
* estimate_pid_constants() tries to find appropriate default
* values for thermal zones that don't provide them. If a
* system integrator has configured a thermal zone with two
* passive trip points at the same temperature, that person
* hasn't put any effort to set up the thermal zone properly
* so just give up.
*/
if (!temperature_threshold)
return;
if (!tz->tzp->k_po || force) if (!tz->tzp->k_po || force)
tz->tzp->k_po = int_to_frac(sustainable_power) / tz->tzp->k_po = int_to_frac(sustainable_power) /
......
...@@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE) if (!buffer_size_valid(&bh) || bh.b_size < PMD_SIZE)
goto fallback; goto fallback;
sector = bh.b_blocknr << (blkbits - 9);
if (buffer_unwritten(&bh) || buffer_new(&bh)) { if (buffer_unwritten(&bh) || buffer_new(&bh)) {
int i; int i;
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
bh.b_size);
if (length < 0) {
result = VM_FAULT_SIGBUS;
goto out;
}
if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
goto fallback;
for (i = 0; i < PTRS_PER_PMD; i++) for (i = 0; i < PTRS_PER_PMD; i++)
clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE); clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
wmb_pmem(); wmb_pmem();
...@@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, ...@@ -623,7 +635,6 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_NOPAGE; result = VM_FAULT_NOPAGE;
spin_unlock(ptl); spin_unlock(ptl);
} else { } else {
sector = bh.b_blocknr << (blkbits - 9);
length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn, length = bdev_direct_access(bh.b_bdev, sector, &kaddr, &pfn,
bh.b_size); bh.b_size);
if (length < 0) { if (length < 0) {
......
...@@ -242,7 +242,6 @@ struct mem_cgroup { ...@@ -242,7 +242,6 @@ struct mem_cgroup {
* percpu counter. * percpu counter.
*/ */
struct mem_cgroup_stat_cpu __percpu *stat; struct mem_cgroup_stat_cpu __percpu *stat;
spinlock_t pcp_counter_lock;
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
struct cg_proto tcp_mem; struct cg_proto tcp_mem;
......
...@@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone, ...@@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif #endif
} }
#ifdef CONFIG_MEMCG
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return page->mem_cgroup;
}
static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
{
page->mem_cgroup = memcg;
}
#else
static inline struct mem_cgroup *page_memcg(struct page *page)
{
return NULL;
}
static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
{
}
#endif
/* /*
* Some inline functions in vmstat.h depend on page_zone() * Some inline functions in vmstat.h depend on page_zone()
*/ */
......
...@@ -11,8 +11,6 @@ ...@@ -11,8 +11,6 @@
#include <linux/types.h> #include <linux/types.h>
#include <linux/compiler.h>
#define UFFD_API ((__u64)0xAA) #define UFFD_API ((__u64)0xAA)
/* /*
* After implementing the respective features it will become: * After implementing the respective features it will become:
......
...@@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma) ...@@ -394,7 +394,7 @@ static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
list_for_each_entry(page, &pool->page_list, page_list) { list_for_each_entry(page, &pool->page_list, page_list) {
if (dma < page->dma) if (dma < page->dma)
continue; continue;
if (dma < (page->dma + pool->allocation)) if ((dma - page->dma) < pool->allocation)
return page; return page;
} }
return NULL; return NULL;
......
...@@ -3201,6 +3201,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3201,6 +3201,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
if (iter_vma == vma) if (iter_vma == vma)
continue; continue;
/*
* Shared VMAs have their own reserves and do not affect
* MAP_PRIVATE accounting but it is possible that a shared
* VMA is using the same page so check and skip such VMAs.
*/
if (iter_vma->vm_flags & VM_MAYSHARE)
continue;
/* /*
* Unmap the page from other VMAs without their own reserves. * Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these * They get marked to be SIGKILLed if they fault in these
......
...@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) ...@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
} }
/* /*
* Return page count for single (non recursive) @memcg.
*
* Implementation Note: reading percpu statistics for memcg. * Implementation Note: reading percpu statistics for memcg.
* *
* Both of vmstat[] and percpu_counter has threshold and do periodic * Both of vmstat[] and percpu_counter has threshold and do periodic
* synchronization to implement "quick" read. There are trade-off between * synchronization to implement "quick" read. There are trade-off between
* reading cost and precision of value. Then, we may have a chance to implement * reading cost and precision of value. Then, we may have a chance to implement
* a periodic synchronizion of counter in memcg's counter. * a periodic synchronization of counter in memcg's counter.
* *
* But this _read() function is used for user interface now. The user accounts * But this _read() function is used for user interface now. The user accounts
* memory usage by memory cgroup and he _always_ requires exact value because * memory usage by memory cgroup and he _always_ requires exact value because
...@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) ...@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
* *
* If there are kernel internal actions which can make use of some not-exact * If there are kernel internal actions which can make use of some not-exact
* value, and reading all cpu value can be performance bottleneck in some * value, and reading all cpu value can be performance bottleneck in some
* common workload, threashold and synchonization as vmstat[] should be * common workload, threshold and synchronization as vmstat[] should be
* implemented. * implemented.
*/ */
static long mem_cgroup_read_stat(struct mem_cgroup *memcg, static unsigned long
enum mem_cgroup_stat_index idx) mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
{ {
long val = 0; long val = 0;
int cpu; int cpu;
/* Per-cpu values can be negative, use a signed accumulator */
for_each_possible_cpu(cpu) for_each_possible_cpu(cpu)
val += per_cpu(memcg->stat->count[idx], cpu); val += per_cpu(memcg->stat->count[idx], cpu);
/*
* Summing races with updates, so val may be negative. Avoid exposing
* transient negative values.
*/
if (val < 0)
val = 0;
return val; return val;
} }
...@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) ...@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue; continue;
pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
K(mem_cgroup_read_stat(iter, i))); K(mem_cgroup_read_stat(iter, i)));
} }
...@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, ...@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
enum mem_cgroup_stat_index idx) enum mem_cgroup_stat_index idx)
{ {
struct mem_cgroup *iter; struct mem_cgroup *iter;
long val = 0; unsigned long val = 0;
/* Per-cpu values can be negative, use a signed accumulator */
for_each_mem_cgroup_tree(iter, memcg) for_each_mem_cgroup_tree(iter, memcg)
val += mem_cgroup_read_stat(iter, idx); val += mem_cgroup_read_stat(iter, idx);
if (val < 0) /* race ? */
val = 0;
return val; return val;
} }
...@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) ...@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue; continue;
seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
} }
...@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v) ...@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
(u64)memsw * PAGE_SIZE); (u64)memsw * PAGE_SIZE);
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
long long val = 0; unsigned long long val = 0;
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue; continue;
for_each_mem_cgroup_tree(mi, memcg) for_each_mem_cgroup_tree(mi, memcg)
val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
} }
for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
...@@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void) ...@@ -4179,7 +4185,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg_wb_domain_init(memcg, GFP_KERNEL)) if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto out_free_stat; goto out_free_stat;
spin_lock_init(&memcg->pcp_counter_lock);
return memcg; return memcg;
out_free_stat: out_free_stat:
......
...@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page, ...@@ -740,6 +740,15 @@ static int move_to_new_page(struct page *newpage, struct page *page,
if (PageSwapBacked(page)) if (PageSwapBacked(page))
SetPageSwapBacked(newpage); SetPageSwapBacked(newpage);
/*
* Indirectly called below, migrate_page_copy() copies PG_dirty and thus
* needs newpage's memcg set to transfer memcg dirty page accounting.
* So perform memcg migration in two steps:
* 1. set newpage->mem_cgroup (here)
* 2. clear page->mem_cgroup (below)
*/
set_page_memcg(newpage, page_memcg(page));
mapping = page_mapping(page); mapping = page_mapping(page);
if (!mapping) if (!mapping)
rc = migrate_page(mapping, newpage, page, mode); rc = migrate_page(mapping, newpage, page, mode);
...@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page, ...@@ -756,9 +765,10 @@ static int move_to_new_page(struct page *newpage, struct page *page,
rc = fallback_migrate_page(mapping, newpage, page, mode); rc = fallback_migrate_page(mapping, newpage, page, mode);
if (rc != MIGRATEPAGE_SUCCESS) { if (rc != MIGRATEPAGE_SUCCESS) {
set_page_memcg(newpage, NULL);
newpage->mapping = NULL; newpage->mapping = NULL;
} else { } else {
mem_cgroup_migrate(page, newpage, false); set_page_memcg(page, NULL);
if (page_was_mapped) if (page_was_mapped)
remove_migration_ptes(page, newpage); remove_migration_ptes(page, newpage);
page->mapping = NULL; page->mapping = NULL;
......
...@@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) ...@@ -2190,9 +2190,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD; size += BYTES_PER_WORD;
} }
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
if (size >= kmalloc_size(INDEX_NODE + 1) /*
&& cachep->object_size > cache_line_size() * To activate debug pagealloc, off-slab management is necessary
&& ALIGN(size, cachep->align) < PAGE_SIZE) { * requirement. In early phase of initialization, small sized slab
* doesn't get initialized so it would not be possible. So, we need
* to check size >= 256. It guarantees that all necessary small
* sized slab is initialized in current slab initialization sequence.
*/
if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
size >= 256 && cachep->object_size > cache_line_size() &&
ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE; size = PAGE_SIZE;
} }
......
/* /*
* Here's a sample kernel module showing the use of jprobes to dump * Here's a sample kernel module showing the use of jprobes to dump
* the arguments of do_fork(). * the arguments of _do_fork().
* *
* For more information on theory of operation of jprobes, see * For more information on theory of operation of jprobes, see
* Documentation/kprobes.txt * Documentation/kprobes.txt
* *
* Build and insert the kernel module as done in the kprobe example. * Build and insert the kernel module as done in the kprobe example.
* You will see the trace data in /var/log/messages and on the * You will see the trace data in /var/log/messages and on the
* console whenever do_fork() is invoked to create a new process. * console whenever _do_fork() is invoked to create a new process.
* (Some messages may be suppressed if syslogd is configured to * (Some messages may be suppressed if syslogd is configured to
* eliminate duplicate messages.) * eliminate duplicate messages.)
*/ */
...@@ -17,13 +17,13 @@ ...@@ -17,13 +17,13 @@
#include <linux/kprobes.h> #include <linux/kprobes.h>
/* /*
* Jumper probe for do_fork. * Jumper probe for _do_fork.
* Mirror principle enables access to arguments of the probed routine * Mirror principle enables access to arguments of the probed routine
* from the probe handler. * from the probe handler.
*/ */
/* Proxy routine having the same arguments as actual do_fork() routine */ /* Proxy routine having the same arguments as actual _do_fork() routine */
static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, static long j_do_fork(unsigned long clone_flags, unsigned long stack_start,
unsigned long stack_size, int __user *parent_tidptr, unsigned long stack_size, int __user *parent_tidptr,
int __user *child_tidptr) int __user *child_tidptr)
{ {
...@@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start, ...@@ -36,9 +36,9 @@ static long jdo_fork(unsigned long clone_flags, unsigned long stack_start,
} }
static struct jprobe my_jprobe = { static struct jprobe my_jprobe = {
.entry = jdo_fork, .entry = j_do_fork,
.kp = { .kp = {
.symbol_name = "do_fork", .symbol_name = "_do_fork",
}, },
}; };
......
/* /*
* NOTE: This example is works on x86 and powerpc. * NOTE: This example is works on x86 and powerpc.
* Here's a sample kernel module showing the use of kprobes to dump a * Here's a sample kernel module showing the use of kprobes to dump a
* stack trace and selected registers when do_fork() is called. * stack trace and selected registers when _do_fork() is called.
* *
* For more information on theory of operation of kprobes, see * For more information on theory of operation of kprobes, see
* Documentation/kprobes.txt * Documentation/kprobes.txt
* *
* You will see the trace data in /var/log/messages and on the console * You will see the trace data in /var/log/messages and on the console
* whenever do_fork() is invoked to create a new process. * whenever _do_fork() is invoked to create a new process.
*/ */
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
/* For each probe you need to allocate a kprobe structure */ /* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = { static struct kprobe kp = {
.symbol_name = "do_fork", .symbol_name = "_do_fork",
}; };
/* kprobe pre_handler: called just before the probed instruction is executed */ /* kprobe pre_handler: called just before the probed instruction is executed */
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* usage: insmod kretprobe_example.ko func=<func_name> * usage: insmod kretprobe_example.ko func=<func_name>
* *
* If no func_name is specified, do_fork is instrumented * If no func_name is specified, _do_fork is instrumented
* *
* For more information on theory of operation of kretprobes, see * For more information on theory of operation of kretprobes, see
* Documentation/kprobes.txt * Documentation/kprobes.txt
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include <linux/limits.h> #include <linux/limits.h>
#include <linux/sched.h> #include <linux/sched.h>
static char func_name[NAME_MAX] = "do_fork"; static char func_name[NAME_MAX] = "_do_fork";
module_param_string(func, func_name, NAME_MAX, S_IRUGO); module_param_string(func, func_name, NAME_MAX, S_IRUGO);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the" MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time"); " function's execution time");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment