Commit 1e0d27fc authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "18 patches.

  Subsystems affected by this patch series: mm (hugetlb, compaction,
  vmalloc, shmem, memblock, pagecache, kasan, and hugetlb), mailmap,
  gcov, ubsan, and MAINTAINERS"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  MAINTAINERS/.mailmap: use my @kernel.org address
  mm: hugetlb: fix missing put_page in gather_surplus_pages()
  ubsan: implement __ubsan_handle_alignment_assumption
  kasan: make addr_has_metadata() return true for valid addresses
  kasan: add explicit preconditions to kasan_report()
  mm/filemap: add missing mem_cgroup_uncharge() to __add_to_page_cache_locked()
  mailmap: add entries for Manivannan Sadhasivam
  mailmap: fix name/email for Viresh Kumar
  memblock: do not start bottom-up allocations with kernel_end
  mm: thp: fix MADV_REMOVE deadlock on shmem THP
  init/gcov: allow CONFIG_CONSTRUCTORS on UML to fix module gcov
  mm/vmalloc: separate put pages and flush VM flags
  mm, compaction: move high_pfn to the for loop scope
  mm: migrate: do not migrate HugeTLB page whose refcount is one
  mm: hugetlb: remove VM_BUG_ON_PAGE from page_huge_active
  mm: hugetlb: fix a race between isolating and freeing page
  mm: hugetlb: fix a race between freeing and dissolving the page
  mm: hugetlbfs: fix cannot migrate the fallocated HugeTLB page
parents 17fbcdf9 654eb3f2
...@@ -199,6 +199,8 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com> ...@@ -199,6 +199,8 @@ Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org> Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com> Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com> Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com> Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com> Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
Mark Brown <broonie@sirena.org.uk> Mark Brown <broonie@sirena.org.uk>
...@@ -244,6 +246,7 @@ Morten Welinder <welinder@anemone.rentec.com> ...@@ -244,6 +246,7 @@ Morten Welinder <welinder@anemone.rentec.com>
Morten Welinder <welinder@darter.rentec.com> Morten Welinder <welinder@darter.rentec.com>
Morten Welinder <welinder@troll.com> Morten Welinder <welinder@troll.com>
Mythri P K <mythripk@ti.com> Mythri P K <mythripk@ti.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
Nguyen Anh Quynh <aquynh@gmail.com> Nguyen Anh Quynh <aquynh@gmail.com>
Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com> Nicolas Ferre <nicolas.ferre@microchip.com> <nicolas.ferre@atmel.com>
Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org> Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
...@@ -334,6 +337,8 @@ Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org> ...@@ -334,6 +337,8 @@ Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com> Viresh Kumar <vireshk@kernel.org> <viresh.kumar2@arm.com>
Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com> Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com> Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com> Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com> Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com> Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
......
...@@ -4304,7 +4304,7 @@ S: Maintained ...@@ -4304,7 +4304,7 @@ S: Maintained
F: .clang-format F: .clang-format
CLANG/LLVM BUILD SUPPORT CLANG/LLVM BUILD SUPPORT
M: Nathan Chancellor <natechancellor@gmail.com> M: Nathan Chancellor <nathan@kernel.org>
M: Nick Desaulniers <ndesaulniers@google.com> M: Nick Desaulniers <ndesaulniers@google.com>
L: clang-built-linux@googlegroups.com L: clang-built-linux@googlegroups.com
S: Supported S: Supported
......
...@@ -735,9 +735,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, ...@@ -735,9 +735,10 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_unlock(&hugetlb_fault_mutex_table[hash]); mutex_unlock(&hugetlb_fault_mutex_table[hash]);
set_page_huge_active(page);
/* /*
* unlock_page because locked by add_to_page_cache() * unlock_page because locked by add_to_page_cache()
* page_put due to reference from alloc_huge_page() * put_page() due to reference from alloc_huge_page()
*/ */
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
......
...@@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, ...@@ -770,6 +770,8 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
} }
#endif #endif
void set_page_huge_active(struct page *page);
#else /* CONFIG_HUGETLB_PAGE */ #else /* CONFIG_HUGETLB_PAGE */
struct hstate {}; struct hstate {};
......
...@@ -333,6 +333,13 @@ static inline void *kasan_reset_tag(const void *addr) ...@@ -333,6 +333,13 @@ static inline void *kasan_reset_tag(const void *addr)
return (void *)arch_kasan_reset_tag(addr); return (void *)arch_kasan_reset_tag(addr);
} }
/**
* kasan_report - print a report about a bad memory access detected by KASAN
* @addr: address of the bad access
* @size: size of the bad access
* @is_write: whether the bad access is a write or a read
* @ip: instruction pointer for the accessibility check or the bad access itself
*/
bool kasan_report(unsigned long addr, size_t size, bool kasan_report(unsigned long addr, size_t size,
bool is_write, unsigned long ip); bool is_write, unsigned long ip);
......
...@@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h */ ...@@ -24,7 +24,8 @@ struct notifier_block; /* in notifier.h */
#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */
#define VM_NO_GUARD 0x00000040 /* don't add guard page */ #define VM_NO_GUARD 0x00000040 /* don't add guard page */
#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */
#define VM_MAP_PUT_PAGES 0x00000100 /* put pages and free array in vfree */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */
#define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */
/* /*
* VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC.
...@@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h */ ...@@ -37,12 +38,6 @@ struct notifier_block; /* in notifier.h */
* determine which allocations need the module shadow freed. * determine which allocations need the module shadow freed.
*/ */
/*
* Memory with VM_FLUSH_RESET_PERMS cannot be freed in an interrupt or with
* vfree_atomic().
*/
#define VM_FLUSH_RESET_PERMS 0x00000100 /* Reset direct map and flush TLB on unmap */
/* bits [20..32] reserved for arch specific ioremap internals */ /* bits [20..32] reserved for arch specific ioremap internals */
/* /*
......
...@@ -76,7 +76,6 @@ config CC_HAS_ASM_INLINE ...@@ -76,7 +76,6 @@ config CC_HAS_ASM_INLINE
config CONSTRUCTORS config CONSTRUCTORS
bool bool
depends on !UML
config IRQ_WORK config IRQ_WORK
bool bool
......
...@@ -1066,7 +1066,13 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) ...@@ -1066,7 +1066,13 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
/* Call all constructor functions linked into the kernel. */ /* Call all constructor functions linked into the kernel. */
static void __init do_ctors(void) static void __init do_ctors(void)
{ {
#ifdef CONFIG_CONSTRUCTORS /*
* For UML, the constructors have already been called by the
* normal setup code as it's just a normal ELF binary, so we
* cannot do it again - but we do need CONFIG_CONSTRUCTORS
* even on UML for modules.
*/
#if defined(CONFIG_CONSTRUCTORS) && !defined(CONFIG_UML)
ctor_fn_t *fn = (ctor_fn_t *) __ctors_start; ctor_fn_t *fn = (ctor_fn_t *) __ctors_start;
for (; fn < (ctor_fn_t *) __ctors_end; fn++) for (; fn < (ctor_fn_t *) __ctors_end; fn++)
......
...@@ -4,7 +4,7 @@ menu "GCOV-based kernel profiling" ...@@ -4,7 +4,7 @@ menu "GCOV-based kernel profiling"
config GCOV_KERNEL config GCOV_KERNEL
bool "Enable gcov-based kernel profiling" bool "Enable gcov-based kernel profiling"
depends on DEBUG_FS depends on DEBUG_FS
select CONSTRUCTORS if !UML select CONSTRUCTORS
default n default n
help help
This option enables gcov-based code profiling (e.g. for code coverage This option enables gcov-based code profiling (e.g. for code coverage
......
...@@ -427,3 +427,34 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) ...@@ -427,3 +427,34 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val)
ubsan_epilogue(); ubsan_epilogue();
} }
EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
unsigned long align,
unsigned long offset);
void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
unsigned long align,
unsigned long offset)
{
struct alignment_assumption_data *data = _data;
unsigned long real_ptr;
if (suppress_report(&data->location))
return;
ubsan_prologue(&data->location, "alignment-assumption");
if (offset)
pr_err("assumption of %lu byte alignment (with offset of %lu byte) for pointer of type %s failed",
align, offset, data->type->type_name);
else
pr_err("assumption of %lu byte alignment for pointer of type %s failed",
align, data->type->type_name);
real_ptr = ptr - offset;
pr_err("%saddress is %lu aligned, misalignment offset is %lu bytes",
offset ? "offset " : "", BIT(real_ptr ? __ffs(real_ptr) : 0),
real_ptr & (align - 1));
ubsan_epilogue();
}
EXPORT_SYMBOL(__ubsan_handle_alignment_assumption);
...@@ -78,6 +78,12 @@ struct invalid_value_data { ...@@ -78,6 +78,12 @@ struct invalid_value_data {
struct type_descriptor *type; struct type_descriptor *type;
}; };
struct alignment_assumption_data {
struct source_location location;
struct source_location assumption_location;
struct type_descriptor *type;
};
#if defined(CONFIG_ARCH_SUPPORTS_INT128) #if defined(CONFIG_ARCH_SUPPORTS_INT128)
typedef __int128 s_max; typedef __int128 s_max;
typedef unsigned __int128 u_max; typedef unsigned __int128 u_max;
......
...@@ -1342,7 +1342,7 @@ fast_isolate_freepages(struct compact_control *cc) ...@@ -1342,7 +1342,7 @@ fast_isolate_freepages(struct compact_control *cc)
{ {
unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1); unsigned int limit = min(1U, freelist_scan_limit(cc) >> 1);
unsigned int nr_scanned = 0; unsigned int nr_scanned = 0;
unsigned long low_pfn, min_pfn, high_pfn = 0, highest = 0; unsigned long low_pfn, min_pfn, highest = 0;
unsigned long nr_isolated = 0; unsigned long nr_isolated = 0;
unsigned long distance; unsigned long distance;
struct page *page = NULL; struct page *page = NULL;
...@@ -1387,6 +1387,7 @@ fast_isolate_freepages(struct compact_control *cc) ...@@ -1387,6 +1387,7 @@ fast_isolate_freepages(struct compact_control *cc)
struct page *freepage; struct page *freepage;
unsigned long flags; unsigned long flags;
unsigned int order_scanned = 0; unsigned int order_scanned = 0;
unsigned long high_pfn = 0;
if (!area->nr_free) if (!area->nr_free)
continue; continue;
......
...@@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked(struct page *page, ...@@ -835,6 +835,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
XA_STATE(xas, &mapping->i_pages, offset); XA_STATE(xas, &mapping->i_pages, offset);
int huge = PageHuge(page); int huge = PageHuge(page);
int error; int error;
bool charged = false;
VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(PageSwapBacked(page), page); VM_BUG_ON_PAGE(PageSwapBacked(page), page);
...@@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked(struct page *page, ...@@ -848,6 +849,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
error = mem_cgroup_charge(page, current->mm, gfp); error = mem_cgroup_charge(page, current->mm, gfp);
if (error) if (error)
goto error; goto error;
charged = true;
} }
gfp &= GFP_RECLAIM_MASK; gfp &= GFP_RECLAIM_MASK;
...@@ -896,6 +898,8 @@ noinline int __add_to_page_cache_locked(struct page *page, ...@@ -896,6 +898,8 @@ noinline int __add_to_page_cache_locked(struct page *page,
if (xas_error(&xas)) { if (xas_error(&xas)) {
error = xas_error(&xas); error = xas_error(&xas);
if (charged)
mem_cgroup_uncharge(page);
goto error; goto error;
} }
......
...@@ -2202,7 +2202,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -2202,7 +2202,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{ {
spinlock_t *ptl; spinlock_t *ptl;
struct mmu_notifier_range range; struct mmu_notifier_range range;
bool was_locked = false; bool do_unlock_page = false;
pmd_t _pmd; pmd_t _pmd;
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
...@@ -2218,7 +2218,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -2218,7 +2218,6 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
VM_BUG_ON(freeze && !page); VM_BUG_ON(freeze && !page);
if (page) { if (page) {
VM_WARN_ON_ONCE(!PageLocked(page)); VM_WARN_ON_ONCE(!PageLocked(page));
was_locked = true;
if (page != pmd_page(*pmd)) if (page != pmd_page(*pmd))
goto out; goto out;
} }
...@@ -2227,19 +2226,29 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -2227,19 +2226,29 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
if (pmd_trans_huge(*pmd)) { if (pmd_trans_huge(*pmd)) {
if (!page) { if (!page) {
page = pmd_page(*pmd); page = pmd_page(*pmd);
if (unlikely(!trylock_page(page))) { /*
get_page(page); * An anonymous page must be locked, to ensure that a
_pmd = *pmd; * concurrent reuse_swap_page() sees stable mapcount;
spin_unlock(ptl); * but reuse_swap_page() is not used on shmem or file,
lock_page(page); * and page lock must not be taken when zap_pmd_range()
spin_lock(ptl); * calls __split_huge_pmd() while i_mmap_lock is held.
if (unlikely(!pmd_same(*pmd, _pmd))) { */
unlock_page(page); if (PageAnon(page)) {
if (unlikely(!trylock_page(page))) {
get_page(page);
_pmd = *pmd;
spin_unlock(ptl);
lock_page(page);
spin_lock(ptl);
if (unlikely(!pmd_same(*pmd, _pmd))) {
unlock_page(page);
put_page(page);
page = NULL;
goto repeat;
}
put_page(page); put_page(page);
page = NULL;
goto repeat;
} }
put_page(page); do_unlock_page = true;
} }
} }
if (PageMlocked(page)) if (PageMlocked(page))
...@@ -2249,7 +2258,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -2249,7 +2258,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
__split_huge_pmd_locked(vma, pmd, range.start, freeze); __split_huge_pmd_locked(vma, pmd, range.start, freeze);
out: out:
spin_unlock(ptl); spin_unlock(ptl);
if (!was_locked && page) if (do_unlock_page)
unlock_page(page); unlock_page(page);
/* /*
* No need to double call mmu_notifier->invalidate_range() callback. * No need to double call mmu_notifier->invalidate_range() callback.
......
...@@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock); ...@@ -79,6 +79,21 @@ DEFINE_SPINLOCK(hugetlb_lock);
static int num_fault_mutexes; static int num_fault_mutexes;
struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp; struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
static inline bool PageHugeFreed(struct page *head)
{
return page_private(head + 4) == -1UL;
}
static inline void SetPageHugeFreed(struct page *head)
{
set_page_private(head + 4, -1UL);
}
static inline void ClearPageHugeFreed(struct page *head)
{
set_page_private(head + 4, 0);
}
/* Forward declaration */ /* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta); static int hugetlb_acct_memory(struct hstate *h, long delta);
...@@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) ...@@ -1028,6 +1043,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
list_move(&page->lru, &h->hugepage_freelists[nid]); list_move(&page->lru, &h->hugepage_freelists[nid]);
h->free_huge_pages++; h->free_huge_pages++;
h->free_huge_pages_node[nid]++; h->free_huge_pages_node[nid]++;
SetPageHugeFreed(page);
} }
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
...@@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) ...@@ -1044,6 +1060,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
list_move(&page->lru, &h->hugepage_activelist); list_move(&page->lru, &h->hugepage_activelist);
set_page_refcounted(page); set_page_refcounted(page);
ClearPageHugeFreed(page);
h->free_huge_pages--; h->free_huge_pages--;
h->free_huge_pages_node[nid]--; h->free_huge_pages_node[nid]--;
return page; return page;
...@@ -1344,12 +1361,11 @@ struct hstate *size_to_hstate(unsigned long size) ...@@ -1344,12 +1361,11 @@ struct hstate *size_to_hstate(unsigned long size)
*/ */
bool page_huge_active(struct page *page) bool page_huge_active(struct page *page)
{ {
VM_BUG_ON_PAGE(!PageHuge(page), page); return PageHeadHuge(page) && PagePrivate(&page[1]);
return PageHead(page) && PagePrivate(&page[1]);
} }
/* never called for tail page */ /* never called for tail page */
static void set_page_huge_active(struct page *page) void set_page_huge_active(struct page *page)
{ {
VM_BUG_ON_PAGE(!PageHeadHuge(page), page); VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
SetPagePrivate(&page[1]); SetPagePrivate(&page[1]);
...@@ -1505,6 +1521,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid) ...@@ -1505,6 +1521,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
h->nr_huge_pages++; h->nr_huge_pages++;
h->nr_huge_pages_node[nid]++; h->nr_huge_pages_node[nid]++;
ClearPageHugeFreed(page);
spin_unlock(&hugetlb_lock); spin_unlock(&hugetlb_lock);
} }
...@@ -1755,6 +1772,7 @@ int dissolve_free_huge_page(struct page *page) ...@@ -1755,6 +1772,7 @@ int dissolve_free_huge_page(struct page *page)
{ {
int rc = -EBUSY; int rc = -EBUSY;
retry:
/* Not to disrupt normal path by vainly holding hugetlb_lock */ /* Not to disrupt normal path by vainly holding hugetlb_lock */
if (!PageHuge(page)) if (!PageHuge(page))
return 0; return 0;
...@@ -1771,6 +1789,26 @@ int dissolve_free_huge_page(struct page *page) ...@@ -1771,6 +1789,26 @@ int dissolve_free_huge_page(struct page *page)
int nid = page_to_nid(head); int nid = page_to_nid(head);
if (h->free_huge_pages - h->resv_huge_pages == 0) if (h->free_huge_pages - h->resv_huge_pages == 0)
goto out; goto out;
/*
* We should make sure that the page is already on the free list
* when it is dissolved.
*/
if (unlikely(!PageHugeFreed(head))) {
spin_unlock(&hugetlb_lock);
cond_resched();
/*
* Theoretically, we should return -EBUSY when we
* encounter this race. In fact, we have a chance
* to successfully dissolve the page if we do a
* retry. Because the race window is quite small.
* If we seize this opportunity, it is an optimization
* for increasing the success rate of dissolving page.
*/
goto retry;
}
/* /*
* Move PageHWPoison flag from head page to the raw error page, * Move PageHWPoison flag from head page to the raw error page,
* which makes any subpages rather than the error page reusable. * which makes any subpages rather than the error page reusable.
...@@ -2009,13 +2047,16 @@ static int gather_surplus_pages(struct hstate *h, long delta) ...@@ -2009,13 +2047,16 @@ static int gather_surplus_pages(struct hstate *h, long delta)
/* Free the needed pages to the hugetlb pool */ /* Free the needed pages to the hugetlb pool */
list_for_each_entry_safe(page, tmp, &surplus_list, lru) { list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
int zeroed;
if ((--needed) < 0) if ((--needed) < 0)
break; break;
/* /*
* This page is now managed by the hugetlb allocator and has * This page is now managed by the hugetlb allocator and has
* no users -- drop the buddy allocator's reference. * no users -- drop the buddy allocator's reference.
*/ */
VM_BUG_ON_PAGE(!put_page_testzero(page), page); zeroed = put_page_testzero(page);
VM_BUG_ON_PAGE(!zeroed, page);
enqueue_huge_page(h, page); enqueue_huge_page(h, page);
} }
free: free:
...@@ -5555,9 +5596,9 @@ bool isolate_huge_page(struct page *page, struct list_head *list) ...@@ -5555,9 +5596,9 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
{ {
bool ret = true; bool ret = true;
VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock); spin_lock(&hugetlb_lock);
if (!page_huge_active(page) || !get_page_unless_zero(page)) { if (!PageHeadHuge(page) || !page_huge_active(page) ||
!get_page_unless_zero(page)) {
ret = false; ret = false;
goto unlock; goto unlock;
} }
......
...@@ -209,7 +209,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, ...@@ -209,7 +209,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
static inline bool addr_has_metadata(const void *addr) static inline bool addr_has_metadata(const void *addr)
{ {
return true; return (is_vmalloc_addr(addr) || virt_addr_valid(addr));
} }
#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
......
...@@ -275,14 +275,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end, ...@@ -275,14 +275,6 @@ __memblock_find_range_top_down(phys_addr_t start, phys_addr_t end,
* *
* Find @size free area aligned to @align in the specified range and node. * Find @size free area aligned to @align in the specified range and node.
* *
* When allocation direction is bottom-up, the @start should be greater
* than the end of the kernel image. Otherwise, it will be trimmed. The
* reason is that we want the bottom-up allocation just near the kernel
* image so it is highly likely that the allocated memory and the kernel
* will reside in the same node.
*
* If bottom-up allocation failed, will try to allocate memory top-down.
*
* Return: * Return:
* Found address on success, 0 on failure. * Found address on success, 0 on failure.
*/ */
...@@ -291,8 +283,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, ...@@ -291,8 +283,6 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
phys_addr_t end, int nid, phys_addr_t end, int nid,
enum memblock_flags flags) enum memblock_flags flags)
{ {
phys_addr_t kernel_end, ret;
/* pump up @end */ /* pump up @end */
if (end == MEMBLOCK_ALLOC_ACCESSIBLE || if (end == MEMBLOCK_ALLOC_ACCESSIBLE ||
end == MEMBLOCK_ALLOC_KASAN) end == MEMBLOCK_ALLOC_KASAN)
...@@ -301,40 +291,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, ...@@ -301,40 +291,13 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size,
/* avoid allocating the first page */ /* avoid allocating the first page */
start = max_t(phys_addr_t, start, PAGE_SIZE); start = max_t(phys_addr_t, start, PAGE_SIZE);
end = max(start, end); end = max(start, end);
kernel_end = __pa_symbol(_end);
/*
* try bottom-up allocation only when bottom-up mode
* is set and @end is above the kernel image.
*/
if (memblock_bottom_up() && end > kernel_end) {
phys_addr_t bottom_up_start;
/* make sure we will allocate above the kernel */
bottom_up_start = max(start, kernel_end);
/* ok, try bottom-up allocation first */ if (memblock_bottom_up())
ret = __memblock_find_range_bottom_up(bottom_up_start, end, return __memblock_find_range_bottom_up(start, end, size, align,
size, align, nid, flags); nid, flags);
if (ret) else
return ret; return __memblock_find_range_top_down(start, end, size, align,
nid, flags);
/*
* we always limit bottom-up allocation above the kernel,
* but top-down allocation doesn't have the limit, so
* retrying top-down allocation may succeed when bottom-up
* allocation failed.
*
* bottom-up allocation is expected to be fail very rarely,
* so we use WARN_ONCE() here to see the stack trace if
* fail happens.
*/
WARN_ONCE(IS_ENABLED(CONFIG_MEMORY_HOTREMOVE),
"memblock: bottom-up allocation failed, memory hotremove may be affected\n");
}
return __memblock_find_range_top_down(start, end, size, align, nid,
flags);
} }
/** /**
......
...@@ -1280,6 +1280,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, ...@@ -1280,6 +1280,12 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return -ENOSYS; return -ENOSYS;
} }
if (page_count(hpage) == 1) {
/* page was freed from under us. So we are done. */
putback_active_hugepage(hpage);
return MIGRATEPAGE_SUCCESS;
}
new_hpage = get_new_page(hpage, private); new_hpage = get_new_page(hpage, private);
if (!new_hpage) if (!new_hpage)
return -ENOMEM; return -ENOMEM;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment