Commit 51306806 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "18 patches.

  Subsystems affected by this patch series: mm (pagealloc, memcg, kasan,
  memory-failure, and highmem), ubsan, proc, and MAINTAINERS"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  MAINTAINERS: add a couple more files to the Clang/LLVM section
  proc_sysctl: fix oops caused by incorrect command parameters
  powerpc/mm/highmem: use __set_pte_at() for kmap_local()
  mips/mm/highmem: use set_pte() for kmap_local()
  mm/highmem: prepare for overriding set_pte_at()
  sparc/mm/highmem: flush cache and TLB
  mm: fix page reference leak in soft_offline_page()
  ubsan: disable unsigned-overflow check for i386
  kasan, mm: fix resetting page_alloc tags for HW_TAGS
  kasan, mm: fix conflicts with init_on_alloc/free
  kasan: fix HW_TAGS boot parameters
  kasan: fix incorrect arguments passing in kasan_add_zero_shadow
  kasan: fix unaligned address is unhandled in kasan_remove_zero_shadow
  mm: fix numa stats for thp migration
  mm: memcg: fix memcg file_dirty numa stat
  mm: memcg/slab: optimize objcg stock draining
  mm: fix initialization of struct page for holes in memory layout
  x86/setup: don't remove E820_TYPE_RAM for pfn 0
parents fdbc80bd e82d891a
...@@ -160,29 +160,14 @@ intended for use in production as a security mitigation. Therefore it supports ...@@ -160,29 +160,14 @@ intended for use in production as a security mitigation. Therefore it supports
boot parameters that allow to disable KASAN competely or otherwise control boot parameters that allow to disable KASAN competely or otherwise control
particular KASAN features. particular KASAN features.
The things that can be controlled are: - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``).
1. Whether KASAN is enabled at all. - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
2. Whether KASAN collects and saves alloc/free stacks. traces collection (default: ``on`` for ``CONFIG_DEBUG_KERNEL=y``, otherwise
3. Whether KASAN panics on a detected bug or not. ``off``).
The ``kasan.mode`` boot parameter allows to choose one of three main modes: - ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
report or also panic the kernel (default: ``report``).
- ``kasan.mode=off`` - KASAN is disabled, no tag checks are performed
- ``kasan.mode=prod`` - only essential production features are enabled
- ``kasan.mode=full`` - all KASAN features are enabled
The chosen mode provides default control values for the features mentioned
above. However it's also possible to override the default values by providing:
- ``kasan.stacktrace=off`` or ``=on`` - enable alloc/free stack collection
(default: ``on`` for ``mode=full``,
otherwise ``off``)
- ``kasan.fault=report`` or ``=panic`` - only print KASAN report or also panic
(default: ``report``)
If ``kasan.mode`` parameter is not provided, it defaults to ``full`` when
``CONFIG_DEBUG_KERNEL`` is enabled, and to ``prod`` otherwise.
For developers For developers
~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~
......
...@@ -4311,7 +4311,9 @@ W: https://clangbuiltlinux.github.io/ ...@@ -4311,7 +4311,9 @@ W: https://clangbuiltlinux.github.io/
B: https://github.com/ClangBuiltLinux/linux/issues B: https://github.com/ClangBuiltLinux/linux/issues
C: irc://chat.freenode.net/clangbuiltlinux C: irc://chat.freenode.net/clangbuiltlinux
F: Documentation/kbuild/llvm.rst F: Documentation/kbuild/llvm.rst
F: include/linux/compiler-clang.h
F: scripts/clang-tools/ F: scripts/clang-tools/
F: scripts/clang-version.sh
F: scripts/lld-version.sh F: scripts/lld-version.sh
K: \b(?i:clang|llvm)\b K: \b(?i:clang|llvm)\b
......
...@@ -51,6 +51,7 @@ extern void kmap_flush_tlb(unsigned long addr); ...@@ -51,6 +51,7 @@ extern void kmap_flush_tlb(unsigned long addr);
#define flush_cache_kmaps() BUG_ON(cpu_has_dc_aliases) #define flush_cache_kmaps() BUG_ON(cpu_has_dc_aliases)
#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) set_pte(ptep, ptev)
#define arch_kmap_local_post_map(vaddr, pteval) local_flush_tlb_one(vaddr) #define arch_kmap_local_post_map(vaddr, pteval) local_flush_tlb_one(vaddr)
#define arch_kmap_local_post_unmap(vaddr) local_flush_tlb_one(vaddr) #define arch_kmap_local_post_unmap(vaddr) local_flush_tlb_one(vaddr)
......
...@@ -58,6 +58,8 @@ extern pte_t *pkmap_page_table; ...@@ -58,6 +58,8 @@ extern pte_t *pkmap_page_table;
#define flush_cache_kmaps() flush_cache_all() #define flush_cache_kmaps() flush_cache_all()
#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \
__set_pte_at(mm, vaddr, ptep, ptev, 1)
#define arch_kmap_local_post_map(vaddr, pteval) \ #define arch_kmap_local_post_map(vaddr, pteval) \
local_flush_tlb_page(NULL, vaddr) local_flush_tlb_page(NULL, vaddr)
#define arch_kmap_local_post_unmap(vaddr) \ #define arch_kmap_local_post_unmap(vaddr) \
......
...@@ -50,10 +50,11 @@ extern pte_t *pkmap_page_table; ...@@ -50,10 +50,11 @@ extern pte_t *pkmap_page_table;
#define flush_cache_kmaps() flush_cache_all() #define flush_cache_kmaps() flush_cache_all()
/* FIXME: Use __flush_tlb_one(vaddr) instead of flush_cache_all() -- Anton */ /* FIXME: Use __flush_*_one(vaddr) instead of flush_*_all() -- Anton */
#define arch_kmap_local_post_map(vaddr, pteval) flush_cache_all() #define arch_kmap_local_pre_map(vaddr, pteval) flush_cache_all()
#define arch_kmap_local_post_unmap(vaddr) flush_cache_all() #define arch_kmap_local_pre_unmap(vaddr) flush_cache_all()
#define arch_kmap_local_post_map(vaddr, pteval) flush_tlb_all()
#define arch_kmap_local_post_unmap(vaddr) flush_tlb_all()
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -660,17 +660,6 @@ static void __init trim_platform_memory_ranges(void) ...@@ -660,17 +660,6 @@ static void __init trim_platform_memory_ranges(void)
static void __init trim_bios_range(void) static void __init trim_bios_range(void)
{ {
/*
* A special case is the first 4Kb of memory;
* This is a BIOS owned area, not kernel ram, but generally
* not listed as such in the E820 table.
*
* This typically reserves additional memory (64KiB by default)
* since some BIOSes are known to corrupt low memory. See the
* Kconfig help text for X86_RESERVE_LOW.
*/
e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
/* /*
* special case: Some BIOSes report the PC BIOS * special case: Some BIOSes report the PC BIOS
* area (640Kb -> 1Mb) as RAM even though it is not. * area (640Kb -> 1Mb) as RAM even though it is not.
...@@ -728,6 +717,15 @@ early_param("reservelow", parse_reservelow); ...@@ -728,6 +717,15 @@ early_param("reservelow", parse_reservelow);
static void __init trim_low_memory_range(void) static void __init trim_low_memory_range(void)
{ {
/*
* A special case is the first 4Kb of memory;
* This is a BIOS owned area, not kernel ram, but generally
* not listed as such in the E820 table.
*
* This typically reserves additional memory (64KiB by default)
* since some BIOSes are known to corrupt low memory. See the
* Kconfig help text for X86_RESERVE_LOW.
*/
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
} }
......
...@@ -1770,6 +1770,12 @@ static int process_sysctl_arg(char *param, char *val, ...@@ -1770,6 +1770,12 @@ static int process_sysctl_arg(char *param, char *val,
return 0; return 0;
} }
if (!val)
return -EINVAL;
len = strlen(val);
if (len == 0)
return -EINVAL;
/* /*
* To set sysctl options, we use a temporary mount of proc, look up the * To set sysctl options, we use a temporary mount of proc, look up the
* respective sys/ file and write to it. To avoid mounting it when no * respective sys/ file and write to it. To avoid mounting it when no
...@@ -1811,7 +1817,6 @@ static int process_sysctl_arg(char *param, char *val, ...@@ -1811,7 +1817,6 @@ static int process_sysctl_arg(char *param, char *val,
file, param, val); file, param, val);
goto out; goto out;
} }
len = strlen(val);
wret = kernel_write(file, val, len, &pos); wret = kernel_write(file, val, len, &pos);
if (wret < 0) { if (wret < 0) {
err = wret; err = wret;
......
...@@ -123,6 +123,7 @@ config UBSAN_SIGNED_OVERFLOW ...@@ -123,6 +123,7 @@ config UBSAN_SIGNED_OVERFLOW
config UBSAN_UNSIGNED_OVERFLOW config UBSAN_UNSIGNED_OVERFLOW
bool "Perform checking for unsigned arithmetic overflow" bool "Perform checking for unsigned arithmetic overflow"
depends on $(cc-option,-fsanitize=unsigned-integer-overflow) depends on $(cc-option,-fsanitize=unsigned-integer-overflow)
depends on !X86_32 # avoid excessive stack usage on x86-32/clang
help help
This option enables -fsanitize=unsigned-integer-overflow which checks This option enables -fsanitize=unsigned-integer-overflow which checks
for overflow of any arithmetic operations with unsigned integers. This for overflow of any arithmetic operations with unsigned integers. This
......
...@@ -473,6 +473,11 @@ static inline void *arch_kmap_local_high_get(struct page *page) ...@@ -473,6 +473,11 @@ static inline void *arch_kmap_local_high_get(struct page *page)
} }
#endif #endif
#ifndef arch_kmap_local_set_pte
#define arch_kmap_local_set_pte(mm, vaddr, ptep, ptev) \
set_pte_at(mm, vaddr, ptep, ptev)
#endif
/* Unmap a local mapping which was obtained by kmap_high_get() */ /* Unmap a local mapping which was obtained by kmap_high_get() */
static inline bool kmap_high_unmap_local(unsigned long vaddr) static inline bool kmap_high_unmap_local(unsigned long vaddr)
{ {
...@@ -515,7 +520,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot) ...@@ -515,7 +520,7 @@ void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot)
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte - idx))); BUG_ON(!pte_none(*(kmap_pte - idx)));
pteval = pfn_pte(pfn, prot); pteval = pfn_pte(pfn, prot);
set_pte_at(&init_mm, vaddr, kmap_pte - idx, pteval); arch_kmap_local_set_pte(&init_mm, vaddr, kmap_pte - idx, pteval);
arch_kmap_local_post_map(vaddr, pteval); arch_kmap_local_post_map(vaddr, pteval);
current->kmap_ctrl.pteval[kmap_local_idx()] = pteval; current->kmap_ctrl.pteval[kmap_local_idx()] = pteval;
preempt_enable(); preempt_enable();
......
...@@ -19,11 +19,10 @@ ...@@ -19,11 +19,10 @@
#include "kasan.h" #include "kasan.h"
enum kasan_arg_mode { enum kasan_arg {
KASAN_ARG_MODE_DEFAULT, KASAN_ARG_DEFAULT,
KASAN_ARG_MODE_OFF, KASAN_ARG_OFF,
KASAN_ARG_MODE_PROD, KASAN_ARG_ON,
KASAN_ARG_MODE_FULL,
}; };
enum kasan_arg_stacktrace { enum kasan_arg_stacktrace {
...@@ -38,7 +37,7 @@ enum kasan_arg_fault { ...@@ -38,7 +37,7 @@ enum kasan_arg_fault {
KASAN_ARG_FAULT_PANIC, KASAN_ARG_FAULT_PANIC,
}; };
static enum kasan_arg_mode kasan_arg_mode __ro_after_init; static enum kasan_arg kasan_arg __ro_after_init;
static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
static enum kasan_arg_fault kasan_arg_fault __ro_after_init; static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
...@@ -52,26 +51,24 @@ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); ...@@ -52,26 +51,24 @@ DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
/* Whether panic or disable tag checking on fault. */ /* Whether panic or disable tag checking on fault. */
bool kasan_flag_panic __ro_after_init; bool kasan_flag_panic __ro_after_init;
/* kasan.mode=off/prod/full */ /* kasan=off/on */
static int __init early_kasan_mode(char *arg) static int __init early_kasan_flag(char *arg)
{ {
if (!arg) if (!arg)
return -EINVAL; return -EINVAL;
if (!strcmp(arg, "off")) if (!strcmp(arg, "off"))
kasan_arg_mode = KASAN_ARG_MODE_OFF; kasan_arg = KASAN_ARG_OFF;
else if (!strcmp(arg, "prod")) else if (!strcmp(arg, "on"))
kasan_arg_mode = KASAN_ARG_MODE_PROD; kasan_arg = KASAN_ARG_ON;
else if (!strcmp(arg, "full"))
kasan_arg_mode = KASAN_ARG_MODE_FULL;
else else
return -EINVAL; return -EINVAL;
return 0; return 0;
} }
early_param("kasan.mode", early_kasan_mode); early_param("kasan", early_kasan_flag);
/* kasan.stack=off/on */ /* kasan.stacktrace=off/on */
static int __init early_kasan_flag_stacktrace(char *arg) static int __init early_kasan_flag_stacktrace(char *arg)
{ {
if (!arg) if (!arg)
...@@ -113,8 +110,8 @@ void kasan_init_hw_tags_cpu(void) ...@@ -113,8 +110,8 @@ void kasan_init_hw_tags_cpu(void)
* as this function is only called for MTE-capable hardware. * as this function is only called for MTE-capable hardware.
*/ */
/* If KASAN is disabled, do nothing. */ /* If KASAN is disabled via command line, don't initialize it. */
if (kasan_arg_mode == KASAN_ARG_MODE_OFF) if (kasan_arg == KASAN_ARG_OFF)
return; return;
hw_init_tags(KASAN_TAG_MAX); hw_init_tags(KASAN_TAG_MAX);
...@@ -124,43 +121,28 @@ void kasan_init_hw_tags_cpu(void) ...@@ -124,43 +121,28 @@ void kasan_init_hw_tags_cpu(void)
/* kasan_init_hw_tags() is called once on boot CPU. */ /* kasan_init_hw_tags() is called once on boot CPU. */
void __init kasan_init_hw_tags(void) void __init kasan_init_hw_tags(void)
{ {
/* If hardware doesn't support MTE, do nothing. */ /* If hardware doesn't support MTE, don't initialize KASAN. */
if (!system_supports_mte()) if (!system_supports_mte())
return; return;
/* Choose KASAN mode if kasan boot parameter is not provided. */ /* If KASAN is disabled via command line, don't initialize it. */
if (kasan_arg_mode == KASAN_ARG_MODE_DEFAULT) { if (kasan_arg == KASAN_ARG_OFF)
if (IS_ENABLED(CONFIG_DEBUG_KERNEL))
kasan_arg_mode = KASAN_ARG_MODE_FULL;
else
kasan_arg_mode = KASAN_ARG_MODE_PROD;
}
/* Preset parameter values based on the mode. */
switch (kasan_arg_mode) {
case KASAN_ARG_MODE_DEFAULT:
/* Shouldn't happen as per the check above. */
WARN_ON(1);
return;
case KASAN_ARG_MODE_OFF:
/* If KASAN is disabled, do nothing. */
return; return;
case KASAN_ARG_MODE_PROD:
static_branch_enable(&kasan_flag_enabled);
break;
case KASAN_ARG_MODE_FULL:
static_branch_enable(&kasan_flag_enabled);
static_branch_enable(&kasan_flag_stacktrace);
break;
}
/* Now, optionally override the presets. */ /* Enable KASAN. */
static_branch_enable(&kasan_flag_enabled);
switch (kasan_arg_stacktrace) { switch (kasan_arg_stacktrace) {
case KASAN_ARG_STACKTRACE_DEFAULT: case KASAN_ARG_STACKTRACE_DEFAULT:
/*
* Default to enabling stack trace collection for
* debug kernels.
*/
if (IS_ENABLED(CONFIG_DEBUG_KERNEL))
static_branch_enable(&kasan_flag_stacktrace);
break; break;
case KASAN_ARG_STACKTRACE_OFF: case KASAN_ARG_STACKTRACE_OFF:
static_branch_disable(&kasan_flag_stacktrace); /* Do nothing, kasan_flag_stacktrace keeps its default value. */
break; break;
case KASAN_ARG_STACKTRACE_ON: case KASAN_ARG_STACKTRACE_ON:
static_branch_enable(&kasan_flag_stacktrace); static_branch_enable(&kasan_flag_stacktrace);
...@@ -169,11 +151,16 @@ void __init kasan_init_hw_tags(void) ...@@ -169,11 +151,16 @@ void __init kasan_init_hw_tags(void)
switch (kasan_arg_fault) { switch (kasan_arg_fault) {
case KASAN_ARG_FAULT_DEFAULT: case KASAN_ARG_FAULT_DEFAULT:
/*
* Default to no panic on report.
* Do nothing, kasan_flag_panic keeps its default value.
*/
break; break;
case KASAN_ARG_FAULT_REPORT: case KASAN_ARG_FAULT_REPORT:
kasan_flag_panic = false; /* Do nothing, kasan_flag_panic keeps its default value. */
break; break;
case KASAN_ARG_FAULT_PANIC: case KASAN_ARG_FAULT_PANIC:
/* Enable panic on report. */
kasan_flag_panic = true; kasan_flag_panic = true;
break; break;
} }
......
...@@ -373,9 +373,10 @@ static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr, ...@@ -373,9 +373,10 @@ static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr,
if (kasan_pte_table(*pmd)) { if (kasan_pte_table(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) && if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE)) IS_ALIGNED(next, PMD_SIZE)) {
pmd_clear(pmd); pmd_clear(pmd);
continue; continue;
}
} }
pte = pte_offset_kernel(pmd, addr); pte = pte_offset_kernel(pmd, addr);
kasan_remove_pte_table(pte, addr, next); kasan_remove_pte_table(pte, addr, next);
...@@ -398,9 +399,10 @@ static void kasan_remove_pud_table(pud_t *pud, unsigned long addr, ...@@ -398,9 +399,10 @@ static void kasan_remove_pud_table(pud_t *pud, unsigned long addr,
if (kasan_pmd_table(*pud)) { if (kasan_pmd_table(*pud)) {
if (IS_ALIGNED(addr, PUD_SIZE) && if (IS_ALIGNED(addr, PUD_SIZE) &&
IS_ALIGNED(next, PUD_SIZE)) IS_ALIGNED(next, PUD_SIZE)) {
pud_clear(pud); pud_clear(pud);
continue; continue;
}
} }
pmd = pmd_offset(pud, addr); pmd = pmd_offset(pud, addr);
pmd_base = pmd_offset(pud, 0); pmd_base = pmd_offset(pud, 0);
...@@ -424,9 +426,10 @@ static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr, ...@@ -424,9 +426,10 @@ static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr,
if (kasan_pud_table(*p4d)) { if (kasan_pud_table(*p4d)) {
if (IS_ALIGNED(addr, P4D_SIZE) && if (IS_ALIGNED(addr, P4D_SIZE) &&
IS_ALIGNED(next, P4D_SIZE)) IS_ALIGNED(next, P4D_SIZE)) {
p4d_clear(p4d); p4d_clear(p4d);
continue; continue;
}
} }
pud = pud_offset(p4d, addr); pud = pud_offset(p4d, addr);
kasan_remove_pud_table(pud, addr, next); kasan_remove_pud_table(pud, addr, next);
...@@ -457,9 +460,10 @@ void kasan_remove_zero_shadow(void *start, unsigned long size) ...@@ -457,9 +460,10 @@ void kasan_remove_zero_shadow(void *start, unsigned long size)
if (kasan_p4d_table(*pgd)) { if (kasan_p4d_table(*pgd)) {
if (IS_ALIGNED(addr, PGDIR_SIZE) && if (IS_ALIGNED(addr, PGDIR_SIZE) &&
IS_ALIGNED(next, PGDIR_SIZE)) IS_ALIGNED(next, PGDIR_SIZE)) {
pgd_clear(pgd); pgd_clear(pgd);
continue; continue;
}
} }
p4d = p4d_offset(pgd, addr); p4d = p4d_offset(pgd, addr);
...@@ -482,7 +486,6 @@ int kasan_add_zero_shadow(void *start, unsigned long size) ...@@ -482,7 +486,6 @@ int kasan_add_zero_shadow(void *start, unsigned long size)
ret = kasan_populate_early_shadow(shadow_start, shadow_end); ret = kasan_populate_early_shadow(shadow_start, shadow_end);
if (ret) if (ret)
kasan_remove_zero_shadow(shadow_start, kasan_remove_zero_shadow(start, size);
size >> KASAN_SHADOW_SCALE_SHIFT);
return ret; return ret;
} }
...@@ -3115,9 +3115,7 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages) ...@@ -3115,9 +3115,7 @@ void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages)
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
page_counter_uncharge(&memcg->kmem, nr_pages); page_counter_uncharge(&memcg->kmem, nr_pages);
page_counter_uncharge(&memcg->memory, nr_pages); refill_stock(memcg, nr_pages);
if (do_memsw_account())
page_counter_uncharge(&memcg->memsw, nr_pages);
} }
/** /**
......
...@@ -1885,6 +1885,12 @@ static int soft_offline_free_page(struct page *page) ...@@ -1885,6 +1885,12 @@ static int soft_offline_free_page(struct page *page)
return rc; return rc;
} }
static void put_ref_page(struct page *page)
{
if (page)
put_page(page);
}
/** /**
* soft_offline_page - Soft offline a page. * soft_offline_page - Soft offline a page.
* @pfn: pfn to soft-offline * @pfn: pfn to soft-offline
...@@ -1910,20 +1916,26 @@ static int soft_offline_free_page(struct page *page) ...@@ -1910,20 +1916,26 @@ static int soft_offline_free_page(struct page *page)
int soft_offline_page(unsigned long pfn, int flags) int soft_offline_page(unsigned long pfn, int flags)
{ {
int ret; int ret;
struct page *page;
bool try_again = true; bool try_again = true;
struct page *page, *ref_page = NULL;
WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED));
if (!pfn_valid(pfn)) if (!pfn_valid(pfn))
return -ENXIO; return -ENXIO;
if (flags & MF_COUNT_INCREASED)
ref_page = pfn_to_page(pfn);
/* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */ /* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
page = pfn_to_online_page(pfn); page = pfn_to_online_page(pfn);
if (!page) if (!page) {
put_ref_page(ref_page);
return -EIO; return -EIO;
}
if (PageHWPoison(page)) { if (PageHWPoison(page)) {
pr_info("%s: %#lx page already poisoned\n", __func__, pfn); pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
if (flags & MF_COUNT_INCREASED) put_ref_page(ref_page);
put_page(page);
return 0; return 0;
} }
......
...@@ -402,6 +402,7 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -402,6 +402,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
struct zone *oldzone, *newzone; struct zone *oldzone, *newzone;
int dirty; int dirty;
int expected_count = expected_page_refs(mapping, page) + extra_count; int expected_count = expected_page_refs(mapping, page) + extra_count;
int nr = thp_nr_pages(page);
if (!mapping) { if (!mapping) {
/* Anonymous page without mapping */ /* Anonymous page without mapping */
...@@ -437,7 +438,7 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -437,7 +438,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
*/ */
newpage->index = page->index; newpage->index = page->index;
newpage->mapping = page->mapping; newpage->mapping = page->mapping;
page_ref_add(newpage, thp_nr_pages(page)); /* add cache reference */ page_ref_add(newpage, nr); /* add cache reference */
if (PageSwapBacked(page)) { if (PageSwapBacked(page)) {
__SetPageSwapBacked(newpage); __SetPageSwapBacked(newpage);
if (PageSwapCache(page)) { if (PageSwapCache(page)) {
...@@ -459,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -459,7 +460,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
if (PageTransHuge(page)) { if (PageTransHuge(page)) {
int i; int i;
for (i = 1; i < HPAGE_PMD_NR; i++) { for (i = 1; i < nr; i++) {
xas_next(&xas); xas_next(&xas);
xas_store(&xas, newpage); xas_store(&xas, newpage);
} }
...@@ -470,7 +471,7 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -470,7 +471,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
* to one less reference. * to one less reference.
* We know this isn't the last reference. * We know this isn't the last reference.
*/ */
page_ref_unfreeze(page, expected_count - thp_nr_pages(page)); page_ref_unfreeze(page, expected_count - nr);
xas_unlock(&xas); xas_unlock(&xas);
/* Leave irq disabled to prevent preemption while updating stats */ /* Leave irq disabled to prevent preemption while updating stats */
...@@ -493,17 +494,17 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -493,17 +494,17 @@ int migrate_page_move_mapping(struct address_space *mapping,
old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat); old_lruvec = mem_cgroup_lruvec(memcg, oldzone->zone_pgdat);
new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat); new_lruvec = mem_cgroup_lruvec(memcg, newzone->zone_pgdat);
__dec_lruvec_state(old_lruvec, NR_FILE_PAGES); __mod_lruvec_state(old_lruvec, NR_FILE_PAGES, -nr);
__inc_lruvec_state(new_lruvec, NR_FILE_PAGES); __mod_lruvec_state(new_lruvec, NR_FILE_PAGES, nr);
if (PageSwapBacked(page) && !PageSwapCache(page)) { if (PageSwapBacked(page) && !PageSwapCache(page)) {
__dec_lruvec_state(old_lruvec, NR_SHMEM); __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
__inc_lruvec_state(new_lruvec, NR_SHMEM); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
} }
if (dirty && mapping_can_writeback(mapping)) { if (dirty && mapping_can_writeback(mapping)) {
__dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY); __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
__dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING); __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
__inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY); __mod_lruvec_state(new_lruvec, NR_FILE_DIRTY, nr);
__inc_zone_state(newzone, NR_ZONE_WRITE_PENDING); __mod_zone_page_state(newzone, NR_ZONE_WRITE_PENDING, nr);
} }
} }
local_irq_enable(); local_irq_enable();
......
...@@ -1207,8 +1207,10 @@ static void kernel_init_free_pages(struct page *page, int numpages) ...@@ -1207,8 +1207,10 @@ static void kernel_init_free_pages(struct page *page, int numpages)
/* s390's use of memset() could override KASAN redzones. */ /* s390's use of memset() could override KASAN redzones. */
kasan_disable_current(); kasan_disable_current();
for (i = 0; i < numpages; i++) { for (i = 0; i < numpages; i++) {
u8 tag = page_kasan_tag(page + i);
page_kasan_tag_reset(page + i); page_kasan_tag_reset(page + i);
clear_highpage(page + i); clear_highpage(page + i);
page_kasan_tag_set(page + i, tag);
} }
kasan_enable_current(); kasan_enable_current();
} }
...@@ -7078,23 +7080,26 @@ void __init free_area_init_memoryless_node(int nid) ...@@ -7078,23 +7080,26 @@ void __init free_area_init_memoryless_node(int nid)
* Initialize all valid struct pages in the range [spfn, epfn) and mark them * Initialize all valid struct pages in the range [spfn, epfn) and mark them
* PageReserved(). Return the number of struct pages that were initialized. * PageReserved(). Return the number of struct pages that were initialized.
*/ */
static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn,
int zone, int nid)
{ {
unsigned long pfn; unsigned long pfn, zone_spfn, zone_epfn;
u64 pgcnt = 0; u64 pgcnt = 0;
zone_spfn = arch_zone_lowest_possible_pfn[zone];
zone_epfn = arch_zone_highest_possible_pfn[zone];
spfn = clamp(spfn, zone_spfn, zone_epfn);
epfn = clamp(epfn, zone_spfn, zone_epfn);
for (pfn = spfn; pfn < epfn; pfn++) { for (pfn = spfn; pfn < epfn; pfn++) {
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+ pageblock_nr_pages - 1; + pageblock_nr_pages - 1;
continue; continue;
} }
/*
* Use a fake node/zone (0) for now. Some of these pages __init_single_page(pfn_to_page(pfn), pfn, zone, nid);
* (in memblock.reserved but not in memblock.memory) will
* get re-initialized via reserve_bootmem_region() later.
*/
__init_single_page(pfn_to_page(pfn), pfn, 0, 0);
__SetPageReserved(pfn_to_page(pfn)); __SetPageReserved(pfn_to_page(pfn));
pgcnt++; pgcnt++;
} }
...@@ -7103,51 +7108,64 @@ static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) ...@@ -7103,51 +7108,64 @@ static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
} }
/* /*
* Only struct pages that are backed by physical memory are zeroed and * Only struct pages that correspond to ranges defined by memblock.memory
* initialized by going through __init_single_page(). But, there are some * are zeroed and initialized by going through __init_single_page() during
* struct pages which are reserved in memblock allocator and their fields * memmap_init().
* may be accessed (for example page_to_pfn() on some configuration accesses *
* flags). We must explicitly initialize those struct pages. * But, there could be struct pages that correspond to holes in
* memblock.memory. This can happen because of the following reasons:
* - phyiscal memory bank size is not necessarily the exact multiple of the
* arbitrary section size
* - early reserved memory may not be listed in memblock.memory
* - memory layouts defined with memmap= kernel parameter may not align
* nicely with memmap sections
* *
* This function also addresses a similar issue where struct pages are left * Explicitly initialize those struct pages so that:
* uninitialized because the physical address range is not covered by * - PG_Reserved is set
* memblock.memory or memblock.reserved. That could happen when memblock * - zone link is set accorging to the architecture constrains
* layout is manually configured via memmap=, or when the highest physical * - node is set to node id of the next populated region except for the
* address (max_pfn) does not end on a section boundary. * trailing hole where last node id is used
*/ */
static void __init init_unavailable_mem(void) static void __init init_zone_unavailable_mem(int zone)
{ {
phys_addr_t start, end; unsigned long start, end;
u64 i, pgcnt; int i, nid;
phys_addr_t next = 0; u64 pgcnt;
unsigned long next = 0;
/* /*
* Loop through unavailable ranges not covered by memblock.memory. * Loop through holes in memblock.memory and initialize struct
* pages corresponding to these holes
*/ */
pgcnt = 0; pgcnt = 0;
for_each_mem_range(i, &start, &end) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
if (next < start) if (next < start)
pgcnt += init_unavailable_range(PFN_DOWN(next), pgcnt += init_unavailable_range(next, start, zone, nid);
PFN_UP(start));
next = end; next = end;
} }
/* /*
* Early sections always have a fully populated memmap for the whole * Last section may surpass the actual end of memory (e.g. we can
* section - see pfn_valid(). If the last section has holes at the * have 1Gb section and 512Mb of RAM pouplated).
* end and that section is marked "online", the memmap will be * Make sure that memmap has a well defined state in this case.
* considered initialized. Make sure that memmap has a well defined
* state.
*/ */
pgcnt += init_unavailable_range(PFN_DOWN(next), end = round_up(max_pfn, PAGES_PER_SECTION);
round_up(max_pfn, PAGES_PER_SECTION)); pgcnt += init_unavailable_range(next, end, zone, nid);
/* /*
* Struct pages that do not have backing memory. This could be because * Struct pages that do not have backing memory. This could be because
* firmware is using some of this memory, or for some other reasons. * firmware is using some of this memory, or for some other reasons.
*/ */
if (pgcnt) if (pgcnt)
pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); pr_info("Zone %s: zeroed struct page in unavailable ranges: %lld pages", zone_names[zone], pgcnt);
}
static void __init init_unavailable_mem(void)
{
int zone;
for (zone = 0; zone < ZONE_MOVABLE; zone++)
init_zone_unavailable_mem(zone);
} }
#else #else
static inline void __init init_unavailable_mem(void) static inline void __init init_unavailable_mem(void)
......
...@@ -2791,7 +2791,8 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, ...@@ -2791,7 +2791,8 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
void *obj) void *obj)
{ {
if (unlikely(slab_want_init_on_free(s)) && obj) if (unlikely(slab_want_init_on_free(s)) && obj)
memset((void *)((char *)obj + s->offset), 0, sizeof(void *)); memset((void *)((char *)kasan_reset_tag(obj) + s->offset),
0, sizeof(void *));
} }
/* /*
...@@ -2883,7 +2884,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, ...@@ -2883,7 +2884,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
stat(s, ALLOC_FASTPATH); stat(s, ALLOC_FASTPATH);
} }
maybe_wipe_obj_freeptr(s, kasan_reset_tag(object)); maybe_wipe_obj_freeptr(s, object);
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(kasan_reset_tag(object), 0, s->object_size); memset(kasan_reset_tag(object), 0, s->object_size);
...@@ -3329,7 +3330,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, ...@@ -3329,7 +3330,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
int j; int j;
for (j = 0; j < i; j++) for (j = 0; j < i; j++)
memset(p[j], 0, s->object_size); memset(kasan_reset_tag(p[j]), 0, s->object_size);
} }
/* memcg and kmem_cache debug support */ /* memcg and kmem_cache debug support */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment