Commit 06d97c58 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "18 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm, swap: use page-cluster as max window of VMA based swap readahead
  mm: page_vma_mapped: ensure pmd is loaded with READ_ONCE outside of lock
  kmemleak: clear stale pointers from task stacks
  fs/binfmt_misc.c: node could be NULL when evicting inode
  fs/mpage.c: fix mpage_writepage() for pages with buffers
  linux/kernel.h: add/correct kernel-doc notation
  tty: fall back to N_NULL if switching to N_TTY fails during hangup
  Revert "vmalloc: back off when the current task is killed"
  mm/cma.c: take __GFP_NOWARN into account in cma_alloc()
  scripts/kallsyms.c: ignore symbol type 'n'
  userfaultfd: selftest: exercise -EEXIST only in background transfer
  mm: only display online cpus of the numa node
  mm: remove unnecessary WARN_ONCE in page_vma_mapped_walk().
  mm/mempolicy: fix NUMA_INTERLEAVE_HIT counter
  include/linux/of.h: provide of_n_{addr,size}_cells wrappers for !CONFIG_OF
  mm/madvise.c: add description for MADV_WIPEONFORK and MADV_KEEPONFORK
  lib/Kconfig.debug: kernel hacking menu: runtime testing: keep tests together
  mm/migrate: fix indexing bug (off by one) and avoid out of bound access
parents e837d913 61b63972
......@@ -14,13 +14,3 @@ Description: Enable/disable VMA based swap readahead.
still used for tmpfs etc. other users. If set to
false, the global swap readahead algorithm will be
used for all swappable pages.
What: /sys/kernel/mm/swap/vma_ra_max_order
Date: August 2017
Contact: Linux memory management mailing list <linux-mm@kvack.org>
Description: The max readahead size in order for VMA based swap readahead
VMA based swap readahead algorithm will readahead at
most 1 << max_order pages for each readahead. The
real readahead size for each readahead will be scaled
according to the estimation algorithm.
......@@ -27,13 +27,21 @@ static struct bus_type node_subsys = {
static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
{
ssize_t n;
cpumask_var_t mask;
struct node *node_dev = to_node(dev);
const struct cpumask *mask = cpumask_of_node(node_dev->dev.id);
/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
return cpumap_print_to_pagebuf(list, buf, mask);
if (!alloc_cpumask_var(&mask, GFP_KERNEL))
return 0;
cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
n = cpumap_print_to_pagebuf(list, buf, mask);
free_cpumask_var(mask);
return n;
}
static inline ssize_t node_read_cpumask(struct device *dev,
......
......@@ -694,10 +694,8 @@ int tty_ldisc_reinit(struct tty_struct *tty, int disc)
tty_set_termios_ldisc(tty, disc);
retval = tty_ldisc_open(tty, tty->ldisc);
if (retval) {
if (!WARN_ON(disc == N_TTY)) {
tty_ldisc_put(tty->ldisc);
tty->ldisc = NULL;
}
tty_ldisc_put(tty->ldisc);
tty->ldisc = NULL;
}
return retval;
}
......@@ -752,8 +750,9 @@ void tty_ldisc_hangup(struct tty_struct *tty, bool reinit)
if (tty->ldisc) {
if (reinit) {
if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0)
tty_ldisc_reinit(tty, N_TTY);
if (tty_ldisc_reinit(tty, tty->termios.c_line) < 0 &&
tty_ldisc_reinit(tty, N_TTY) < 0)
WARN_ON(tty_ldisc_reinit(tty, N_NULL) < 0);
} else
tty_ldisc_kill(tty);
}
......
......@@ -596,7 +596,7 @@ static void bm_evict_inode(struct inode *inode)
{
Node *e = inode->i_private;
if (e->flags & MISC_FMT_OPEN_FILE)
if (e && e->flags & MISC_FMT_OPEN_FILE)
filp_close(e->interp_file, NULL);
clear_inode(inode);
......
......@@ -716,10 +716,12 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
set_page_writeback(page);
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
if (result)
if (result) {
end_page_writeback(page);
else
} else {
clean_page_buffers(page);
unlock_page(page);
}
blk_queue_exit(bdev->bd_queue);
return result;
}
......
......@@ -468,6 +468,16 @@ static void clean_buffers(struct page *page, unsigned first_unmapped)
try_to_free_buffers(page);
}
/*
* For situations where we want to clean all buffers attached to a page.
* We don't need to calculate how many buffers are attached to the page,
* we just need to specify a number larger than the maximum number of buffers.
*/
void clean_page_buffers(struct page *page)
{
clean_buffers(page, ~0U);
}
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
......@@ -605,10 +615,8 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
if (bio == NULL) {
if (first_unmapped == blocks_per_page) {
if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
page, wbc)) {
clean_buffers(page, first_unmapped);
page, wbc))
goto out;
}
}
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
......
......@@ -232,6 +232,7 @@ int generic_write_end(struct file *, struct address_space *,
loff_t, unsigned, unsigned,
struct page *, void *);
void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
void clean_page_buffers(struct page *page);
int cont_write_begin(struct file *, struct address_space *, loff_t,
unsigned, unsigned, struct page **, void **,
get_block_t *, loff_t *);
......
......@@ -44,6 +44,12 @@
#define STACK_MAGIC 0xdeadbeef
/**
* REPEAT_BYTE - repeat the value @x multiple times as an unsigned long value
* @x: value to repeat
*
* NOTE: @x is not checked for > 0xff; larger values produce odd results.
*/
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
/* @a is a power of 2 value */
......@@ -57,6 +63,10 @@
#define READ 0
#define WRITE 1
/**
* ARRAY_SIZE - get the number of elements in array @arr
* @arr: array to be sized
*/
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
#define u64_to_user_ptr(x) ( \
......@@ -76,7 +86,15 @@
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
#define round_down(x, y) ((x) & ~__round_mask(x, y))
/**
* FIELD_SIZEOF - get the size of a struct's field
* @t: the target struct
* @f: the target struct's field
* Return: the size of @f in the struct definition without having a
* declared instance of @t.
*/
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP
#define DIV_ROUND_DOWN_ULL(ll, d) \
......@@ -107,7 +125,7 @@
/*
* Divide positive or negative dividend by positive or negative divisor
* and round to closest integer. Result is undefined for negative
* divisors if he dividend variable type is unsigned and for negative
* divisors if the dividend variable type is unsigned and for negative
* dividends if the divisor variable type is unsigned.
*/
#define DIV_ROUND_CLOSEST(x, divisor)( \
......@@ -247,13 +265,13 @@ extern int _cond_resched(void);
* @ep_ro: right open interval endpoint
*
* Perform a "reciprocal multiplication" in order to "scale" a value into
* range [0, ep_ro), where the upper interval endpoint is right-open.
* range [0, @ep_ro), where the upper interval endpoint is right-open.
* This is useful, e.g. for accessing a index of an array containing
* ep_ro elements, for example. Think of it as sort of modulus, only that
* @ep_ro elements, for example. Think of it as sort of modulus, only that
* the result isn't that of modulo. ;) Note that if initial input is a
* small value, then result will return 0.
*
* Return: a result based on val in interval [0, ep_ro).
* Return: a result based on @val in interval [0, @ep_ro).
*/
static inline u32 reciprocal_scale(u32 val, u32 ep_ro)
{
......@@ -618,8 +636,8 @@ do { \
* trace_printk - printf formatting in the ftrace buffer
* @fmt: the printf format for printing
*
* Note: __trace_printk is an internal function for trace_printk and
* the @ip is passed in via the trace_printk macro.
* Note: __trace_printk is an internal function for trace_printk() and
* the @ip is passed in via the trace_printk() macro.
*
* This function allows a kernel developer to debug fast path sections
* that printk is not appropriate for. By scattering in various
......@@ -629,7 +647,7 @@ do { \
* This is intended as a debugging tool for the developer only.
* Please refrain from leaving trace_printks scattered around in
* your code. (Extra memory is used for special buffers that are
* allocated when trace_printk() is used)
* allocated when trace_printk() is used.)
*
* A little optization trick is done here. If there's only one
* argument, there's no need to scan the string for printf formats.
......@@ -681,7 +699,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
* the @ip is passed in via the trace_puts macro.
*
* This is similar to trace_printk() but is made for those really fast
* paths that a developer wants the least amount of "Heisenbug" affects,
* paths that a developer wants the least amount of "Heisenbug" effects,
* where the processing of the print format is still too much.
*
* This function allows a kernel developer to debug fast path sections
......@@ -692,7 +710,7 @@ int __trace_printk(unsigned long ip, const char *fmt, ...);
* This is intended as a debugging tool for the developer only.
* Please refrain from leaving trace_puts scattered around in
* your code. (Extra memory is used for special buffers that are
* allocated when trace_puts() is used)
* allocated when trace_puts() is used.)
*
* Returns: 0 if nothing was written, positive # if string was.
* (1 when __trace_bputs is used, strlen(str) when __trace_puts is used)
......@@ -771,6 +789,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
t2 min2 = (y); \
(void) (&min1 == &min2); \
min1 < min2 ? min1 : min2; })
/**
* min - return minimum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define min(x, y) \
__min(typeof(x), typeof(y), \
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
......@@ -781,12 +805,31 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
t2 max2 = (y); \
(void) (&max1 == &max2); \
max1 > max2 ? max1 : max2; })
/**
* max - return maximum of two values of the same or compatible types
* @x: first value
* @y: second value
*/
#define max(x, y) \
__max(typeof(x), typeof(y), \
__UNIQUE_ID(max1_), __UNIQUE_ID(max2_), \
x, y)
/**
* min3 - return minimum of three values
* @x: first value
* @y: second value
* @z: third value
*/
#define min3(x, y, z) min((typeof(x))min(x, y), z)
/**
* max3 - return maximum of three values
* @x: first value
* @y: second value
* @z: third value
*/
#define max3(x, y, z) max((typeof(x))max(x, y), z)
/**
......@@ -805,8 +848,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @lo: lowest allowable value
* @hi: highest allowable value
*
* This macro does strict typechecking of lo/hi to make sure they are of the
* same type as val. See the unnecessary pointer comparisons.
* This macro does strict typechecking of @lo/@hi to make sure they are of the
* same type as @val. See the unnecessary pointer comparisons.
*/
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
......@@ -816,11 +859,24 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
*
* Or not use min/max/clamp at all, of course.
*/
/**
* min_t - return minimum of two values, using the specified type
* @type: data type to use
* @x: first value
* @y: second value
*/
#define min_t(type, x, y) \
__min(type, type, \
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
x, y)
/**
* max_t - return maximum of two values, using the specified type
* @type: data type to use
* @x: first value
* @y: second value
*/
#define max_t(type, x, y) \
__max(type, type, \
__UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
......@@ -834,7 +890,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of type
* 'type' to make all the comparisons.
* @type to make all the comparisons.
*/
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
......@@ -845,15 +901,17 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
* @hi: maximum allowable value
*
* This macro does no typechecking and uses temporary variables of whatever
* type the input argument 'val' is. This is useful when val is an unsigned
* type and min and max are literals that will otherwise be assigned a signed
* type the input argument @val is. This is useful when @val is an unsigned
* type and @lo and @hi are literals that will otherwise be assigned a signed
* integer type.
*/
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
/*
* swap - swap value of @a and @b
/**
* swap - swap values of @a and @b
* @a: first value
* @b: second value
*/
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
......
......@@ -734,6 +734,16 @@ static inline struct device_node *of_get_cpu_node(int cpu,
return NULL;
}
static inline int of_n_addr_cells(struct device_node *np)
{
return 0;
}
static inline int of_n_size_cells(struct device_node *np)
{
return 0;
}
static inline int of_property_read_u64(const struct device_node *np,
const char *propname, u64 *out_value)
{
......
......@@ -42,7 +42,7 @@ enum {
#define THREAD_ALIGN THREAD_SIZE
#endif
#ifdef CONFIG_DEBUG_STACK_USAGE
#if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK)
# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \
__GFP_ZERO)
#else
......
......@@ -215,6 +215,10 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
if (!s)
continue;
#ifdef CONFIG_DEBUG_KMEMLEAK
/* Clear stale pointers from reused stack. */
memset(s->addr, 0, THREAD_SIZE);
#endif
tsk->stack_vm_area = s;
return s->addr;
}
......
......@@ -1590,6 +1590,54 @@ config LATENCYTOP
source kernel/trace/Kconfig
config PROVIDE_OHCI1394_DMA_INIT
bool "Remote debugging over FireWire early on boot"
depends on PCI && X86
help
If you want to debug problems which hang or crash the kernel early
on boot and the crashing machine has a FireWire port, you can use
this feature to remotely access the memory of the crashed machine
over FireWire. This employs remote DMA as part of the OHCI1394
specification which is now the standard for FireWire controllers.
With remote DMA, you can monitor the printk buffer remotely using
firescope and access all memory below 4GB using fireproxy from gdb.
Even controlling a kernel debugger is possible using remote DMA.
Usage:
If ohci1394_dma=early is used as boot parameter, it will initialize
all OHCI1394 controllers which are found in the PCI config space.
As all changes to the FireWire bus such as enabling and disabling
devices cause a bus reset and thereby disable remote DMA for all
devices, be sure to have the cable plugged and FireWire enabled on
the debugging host before booting the debug target for debugging.
This code (~1k) is freed after boot. By then, the firewire stack
in charge of the OHCI-1394 controllers should be used instead.
See Documentation/debugging-via-ohci1394.txt for more information.
config DMA_API_DEBUG
bool "Enable debugging of DMA-API usage"
depends on HAVE_DMA_API_DEBUG
help
Enable this option to debug the use of the DMA API by device drivers.
With this option you will be able to detect common bugs in device
drivers like double-freeing of DMA mappings or freeing mappings that
were never allocated.
This also attempts to catch cases where a page owned by DMA is
accessed by the cpu in a way that could cause data corruption. For
example, this enables cow_user_page() to check that the source page is
not undergoing DMA.
This option causes a performance degradation. Use only if you want to
debug device drivers and dma interactions.
If unsure, say N.
menu "Runtime Testing"
config LKDTM
......@@ -1749,56 +1797,6 @@ config TEST_PARMAN
If unsure, say N.
endmenu # runtime tests
config PROVIDE_OHCI1394_DMA_INIT
bool "Remote debugging over FireWire early on boot"
depends on PCI && X86
help
If you want to debug problems which hang or crash the kernel early
on boot and the crashing machine has a FireWire port, you can use
this feature to remotely access the memory of the crashed machine
over FireWire. This employs remote DMA as part of the OHCI1394
specification which is now the standard for FireWire controllers.
With remote DMA, you can monitor the printk buffer remotely using
firescope and access all memory below 4GB using fireproxy from gdb.
Even controlling a kernel debugger is possible using remote DMA.
Usage:
If ohci1394_dma=early is used as boot parameter, it will initialize
all OHCI1394 controllers which are found in the PCI config space.
As all changes to the FireWire bus such as enabling and disabling
devices cause a bus reset and thereby disable remote DMA for all
devices, be sure to have the cable plugged and FireWire enabled on
the debugging host before booting the debug target for debugging.
This code (~1k) is freed after boot. By then, the firewire stack
in charge of the OHCI-1394 controllers should be used instead.
See Documentation/debugging-via-ohci1394.txt for more information.
config DMA_API_DEBUG
bool "Enable debugging of DMA-API usage"
depends on HAVE_DMA_API_DEBUG
help
Enable this option to debug the use of the DMA API by device drivers.
With this option you will be able to detect common bugs in device
drivers like double-freeing of DMA mappings or freeing mappings that
were never allocated.
This also attempts to catch cases where a page owned by DMA is
accessed by the cpu in a way that could cause data corruption. For
example, this enables cow_user_page() to check that the source page is
not undergoing DMA.
This option causes a performance degradation. Use only if you want to
debug device drivers and dma interactions.
If unsure, say N.
config TEST_LKM
tristate "Test module loading with 'hello world' module"
default n
......@@ -1873,18 +1871,6 @@ config TEST_UDELAY
If unsure, say N.
config MEMTEST
bool "Memtest"
depends on HAVE_MEMBLOCK
---help---
This option adds a kernel parameter 'memtest', which allows memtest
to be set.
memtest=0, mean disabled; -- default
memtest=1, mean do 1 test pattern;
...
memtest=17, mean do 17 test patterns.
If you are unsure how to answer this question, answer N.
config TEST_STATIC_KEYS
tristate "Test static keys"
default n
......@@ -1894,16 +1880,6 @@ config TEST_STATIC_KEYS
If unsure, say N.
config BUG_ON_DATA_CORRUPTION
bool "Trigger a BUG when data corruption is detected"
select DEBUG_LIST
help
Select this option if the kernel should BUG when it encounters
data corruption in kernel memory structures when they get checked
for validity.
If unsure, say N.
config TEST_KMOD
tristate "kmod stress tester"
default n
......@@ -1941,6 +1917,29 @@ config TEST_DEBUG_VIRTUAL
If unsure, say N.
endmenu # runtime tests
config MEMTEST
bool "Memtest"
depends on HAVE_MEMBLOCK
---help---
This option adds a kernel parameter 'memtest', which allows memtest
to be set.
memtest=0, mean disabled; -- default
memtest=1, mean do 1 test pattern;
...
memtest=17, mean do 17 test patterns.
If you are unsure how to answer this question, answer N.
config BUG_ON_DATA_CORRUPTION
bool "Trigger a BUG when data corruption is detected"
select DEBUG_LIST
help
Select this option if the kernel should BUG when it encounters
data corruption in kernel memory structures when they get checked
for validity.
If unsure, say N.
source "samples/Kconfig"
......
......@@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
trace_cma_alloc(pfn, page, count, align);
if (ret) {
if (ret && !(gfp_mask & __GFP_NOWARN)) {
pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
__func__, count, ret);
cma_debug_show_areas(cma);
......
......@@ -757,6 +757,9 @@ madvise_behavior_valid(int behavior)
* MADV_DONTFORK - omit this area from child's address space when forking:
* typically, to avoid COWing pages pinned by get_user_pages().
* MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
* MADV_WIPEONFORK - present the child process with zero-filled memory in this
* range after a fork.
* MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
* MADV_HWPOISON - trigger memory error handler as if the given memory range
* were corrupted by unrecoverable hardware memory failure.
* MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
......@@ -777,7 +780,9 @@ madvise_behavior_valid(int behavior)
* zero - success
* -EINVAL - start + len < 0, start is not page-aligned,
* "behavior" is not a valid value, or application
* is attempting to release locked or shared pages.
* is attempting to release locked or shared pages,
* or the specified address range includes file, Huge TLB,
* MAP_SHARED or VMPFNMAP range.
* -ENOMEM - addresses in the specified range are not currently
* mapped, or are outside the AS of the process.
* -EIO - an I/O error occurred while paging in data.
......
......@@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
struct page *page;
page = __alloc_pages(gfp, order, nid);
if (page && page_to_nid(page) == nid)
inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
if (page && page_to_nid(page) == nid) {
preempt_disable();
__inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
preempt_enable();
}
return page;
}
......
......@@ -2146,8 +2146,9 @@ static int migrate_vma_collect_hole(unsigned long start,
unsigned long addr;
for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE;
migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
migrate->npages++;
migrate->cpages++;
}
......
......@@ -6,17 +6,6 @@
#include "internal.h"
static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
{
pmd_t pmde;
/*
* Make sure we don't re-load pmd between present and !trans_huge check.
* We need a consistent view.
*/
pmde = READ_ONCE(*pvmw->pmd);
return pmd_present(pmde) && !pmd_trans_huge(pmde);
}
static inline bool not_found(struct page_vma_mapped_walk *pvmw)
{
page_vma_mapped_walk_done(pvmw);
......@@ -116,6 +105,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t pmde;
/* The only possible pmd mapping has been handled on last iteration */
if (pvmw->pmd && !pvmw->pte)
......@@ -148,7 +138,13 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
if (!pud_present(*pud))
return false;
pvmw->pmd = pmd_offset(pud, pvmw->address);
if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) {
/*
* Make sure the pmd value isn't cached in a register by the
* compiler and used as a stale value after we've observed a
* subsequent update.
*/
pmde = READ_ONCE(*pvmw->pmd);
if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
pvmw->ptl = pmd_lock(mm, pvmw->pmd);
if (likely(pmd_trans_huge(*pvmw->pmd))) {
if (pvmw->flags & PVMW_MIGRATION)
......@@ -167,17 +163,15 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
return not_found(pvmw);
return true;
}
} else
WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
}
return not_found(pvmw);
} else {
/* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
}
} else {
if (!check_pmd(pvmw))
return false;
} else if (!pmd_present(pmde)) {
return false;
}
if (!map_pte(pvmw))
goto next_pte;
......
......@@ -39,10 +39,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
bool swap_vma_readahead = true;
#define SWAP_RA_MAX_ORDER_DEFAULT 3
static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
......@@ -664,6 +660,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
pte_t *tpte;
#endif
max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
SWAP_RA_ORDER_CEILING);
if (max_win == 1) {
swap_ra->win = 1;
return NULL;
}
faddr = vmf->address;
entry = pte_to_swp_entry(vmf->orig_pte);
if ((unlikely(non_swap_entry(entry))))
......@@ -672,12 +675,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
if (page)
return page;
max_win = 1 << READ_ONCE(swap_ra_max_order);
if (max_win == 1) {
swap_ra->win = 1;
return NULL;
}
fpfn = PFN_DOWN(faddr);
swap_ra_info = GET_SWAP_RA_VAL(vma);
pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
......@@ -786,32 +783,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
vma_ra_enabled_store);
static ssize_t vma_ra_max_order_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", swap_ra_max_order);
}
static ssize_t vma_ra_max_order_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
int err, v;
err = kstrtoint(buf, 10, &v);
if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
return -EINVAL;
swap_ra_max_order = v;
return count;
}
static struct kobj_attribute vma_ra_max_order_attr =
__ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
vma_ra_max_order_store);
static struct attribute *swap_attrs[] = {
&vma_ra_enabled_attr.attr,
&vma_ra_max_order_attr.attr,
NULL,
};
......
......@@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
if (fatal_signal_pending(current)) {
area->nr_pages = i;
goto fail_no_warn;
}
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask|highmem_mask);
else
......@@ -1723,7 +1718,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
fail_no_warn:
vfree(area->addr);
return NULL;
}
......
......@@ -158,7 +158,7 @@ static int read_symbol(FILE *in, struct sym_entry *s)
else if (str[0] == '$')
return -1;
/* exclude debugging symbols */
else if (stype == 'N')
else if (stype == 'N' || stype == 'n')
return -1;
/* include the type field in the symbol name, so that it gets
......
......@@ -397,7 +397,7 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
}
}
static int copy_page(int ufd, unsigned long offset)
static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
......@@ -418,7 +418,7 @@ static int copy_page(int ufd, unsigned long offset)
fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
uffdio_copy.copy), exit(1);
} else {
if (test_uffdio_copy_eexist) {
if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
retry_copy_page(ufd, &uffdio_copy, offset);
}
......@@ -427,6 +427,16 @@ static int copy_page(int ufd, unsigned long offset)
return 0;
}
static int copy_page_retry(int ufd, unsigned long offset)
{
return __copy_page(ufd, offset, true);
}
static int copy_page(int ufd, unsigned long offset)
{
return __copy_page(ufd, offset, false);
}
static void *uffd_poll_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
......@@ -544,7 +554,7 @@ static void *background_thread(void *arg)
for (page_nr = cpu * nr_pages_per_cpu;
page_nr < (cpu+1) * nr_pages_per_cpu;
page_nr++)
copy_page(uffd, page_nr * page_size);
copy_page_retry(uffd, page_nr * page_size);
return NULL;
}
......@@ -779,7 +789,7 @@ static void retry_uffdio_zeropage(int ufd,
}
}
static int uffdio_zeropage(int ufd, unsigned long offset)
static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
{
struct uffdio_zeropage uffdio_zeropage;
int ret;
......@@ -814,7 +824,7 @@ static int uffdio_zeropage(int ufd, unsigned long offset)
fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
uffdio_zeropage.zeropage), exit(1);
} else {
if (test_uffdio_zeropage_eexist) {
if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
retry_uffdio_zeropage(ufd, &uffdio_zeropage,
offset);
......@@ -830,6 +840,11 @@ static int uffdio_zeropage(int ufd, unsigned long offset)
return 0;
}
static int uffdio_zeropage(int ufd, unsigned long offset)
{
return __uffdio_zeropage(ufd, offset, false);
}
/* exercise UFFDIO_ZEROPAGE */
static int userfaultfd_zeropage_test(void)
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment