Commit ed3bad2e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "10 patches.

  Subsystems affected by this patch series: MAINTAINERS and mm (shmem,
  pagealloc, tracing, memcg, memory-failure, vmscan, kfence, and
  hugetlb)"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  hugetlb: don't pass page cache pages to restore_reserve_on_error
  kfence: fix is_kfence_address() for addresses below KFENCE_POOL_SIZE
  mm: vmscan: fix missing psi annotation for node_reclaim()
  mm/hwpoison: retry with shake_page() for unhandlable pages
  mm: memcontrol: fix occasional OOMs due to proportional memory.low reclaim
  MAINTAINERS: update ClangBuiltLinux IRC chat
  mmflags.h: add missing __GFP_ZEROTAGS and __GFP_SKIP_KASAN_POISON names
  mm/page_alloc: don't corrupt pcppage_migratetype
  Revert "mm: swap: check if swap backing device is congested or not"
  Revert "mm/shmem: fix shmem_swapin() race with swapoff"
parents 8ba9fbe1 c7b1850d
......@@ -4508,7 +4508,7 @@ L: clang-built-linux@googlegroups.com
S: Supported
W: https://clangbuiltlinux.github.io/
B: https://github.com/ClangBuiltLinux/linux/issues
C: irc://chat.freenode.net/clangbuiltlinux
C: irc://irc.libera.chat/clangbuiltlinux
F: Documentation/kbuild/llvm.rst
F: include/linux/compiler-clang.h
F: scripts/clang-tools/
......
......@@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate;
static __always_inline bool is_kfence_address(const void *addr)
{
/*
* The non-NULL check is required in case the __kfence_pool pointer was
* never initialized; keep it in the slow-path after the range-check.
* The __kfence_pool != NULL check is required to deal with the case
* where __kfence_pool == NULL && addr < KFENCE_POOL_SIZE. Keep it in
* the slow-path after the range-check!
*/
return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr);
return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && __kfence_pool);
}
/**
......
......@@ -612,12 +612,15 @@ static inline bool mem_cgroup_disabled(void)
return !cgroup_subsys_enabled(memory_cgrp_subsys);
}
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
static inline void mem_cgroup_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg,
bool in_low_reclaim)
unsigned long *min,
unsigned long *low)
{
*min = *low = 0;
if (mem_cgroup_disabled())
return 0;
return;
/*
* There is no reclaim protection applied to a targeted reclaim.
......@@ -653,13 +656,10 @@ static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
*
*/
if (root == memcg)
return 0;
if (in_low_reclaim)
return READ_ONCE(memcg->memory.emin);
return;
return max(READ_ONCE(memcg->memory.emin),
READ_ONCE(memcg->memory.elow));
*min = READ_ONCE(memcg->memory.emin);
*low = READ_ONCE(memcg->memory.elow);
}
void mem_cgroup_calculate_protection(struct mem_cgroup *root,
......@@ -1147,11 +1147,12 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
{
}
static inline unsigned long mem_cgroup_protection(struct mem_cgroup *root,
static inline void mem_cgroup_protection(struct mem_cgroup *root,
struct mem_cgroup *memcg,
bool in_low_reclaim)
unsigned long *min,
unsigned long *low)
{
return 0;
*min = *low = 0;
}
static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
......
......@@ -48,7 +48,9 @@
{(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \
{(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \
{(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\
{(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\
{(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"}, \
{(unsigned long)__GFP_SKIP_KASAN_POISON,"__GFP_SKIP_KASAN_POISON"}\
#define show_gfp_flags(flags) \
(flags) ? __print_flags(flags, "|", \
......
......@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
if (!rc) {
/*
* This indicates there is an entry in the reserve map
* added by alloc_huge_page. We know it was added
* not added by alloc_huge_page. We know it was added
* before the alloc_huge_page call, otherwise
* HPageRestoreReserve would be set on the page.
* Remove the entry so that a subsequent allocation
......@@ -4660,6 +4660,8 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(&range);
out_release_all:
/* No restore in case of successful pagetable update (Break COW) */
if (new_page != old_page)
restore_reserve_on_error(h, vma, haddr, new_page);
put_page(new_page);
out_release_old:
......@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
pte_t new_pte;
spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h);
bool new_page = false;
bool new_page, new_pagecache_page = false;
/*
* Currently, we are forced to kill the process in the event the
......@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto out;
retry:
new_page = false;
page = find_lock_page(mapping, idx);
if (!page) {
/* Check for page in userfault range */
......@@ -4842,6 +4845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto retry;
goto out;
}
new_pagecache_page = true;
} else {
lock_page(page);
if (unlikely(anon_vma_prepare(vma))) {
......@@ -4926,6 +4930,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl);
backout_unlocked:
unlock_page(page);
/* restore reserve for newly allocated pages not in page cache */
if (new_page && !new_pagecache_page)
restore_reserve_on_error(h, vma, haddr, page);
put_page(page);
goto out;
......@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
int ret = -ENOMEM;
struct page *page;
int writable;
bool new_pagecache_page = false;
if (is_continue) {
ret = -EFAULT;
......@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ret = huge_add_to_page_cache(page, mapping, idx);
if (ret)
goto out_release_nounlock;
new_pagecache_page = true;
}
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
......@@ -5291,6 +5299,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (vm_shared || is_continue)
unlock_page(page);
out_release_nounlock:
if (!new_pagecache_page)
restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page);
goto out;
......
......@@ -1146,7 +1146,7 @@ static int __get_hwpoison_page(struct page *page)
* unexpected races caused by taking a page refcount.
*/
if (!HWPoisonHandlable(head))
return 0;
return -EBUSY;
if (PageTransHuge(head)) {
/*
......@@ -1199,9 +1199,15 @@ static int get_any_page(struct page *p, unsigned long flags)
}
goto out;
} else if (ret == -EBUSY) {
/* We raced with freeing huge page to buddy, retry. */
if (pass++ < 3)
/*
* We raced with (possibly temporary) unhandlable
* page, retry.
*/
if (pass++ < 3) {
shake_page(p, 1);
goto try_again;
}
ret = -EIO;
goto out;
}
}
......
......@@ -3453,21 +3453,12 @@ void free_unref_page_list(struct list_head *list)
* comment in free_unref_page.
*/
migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES)) {
if (unlikely(is_migrate_isolate(migratetype))) {
list_del(&page->lru);
free_one_page(page_zone(page), page, pfn, 0,
migratetype, FPI_NONE);
free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
continue;
}
/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
*/
set_pcppage_migratetype(page, MIGRATE_MOVABLE);
}
set_page_private(page, pfn);
}
......@@ -3475,7 +3466,15 @@ void free_unref_page_list(struct list_head *list)
list_for_each_entry_safe(page, next, list, lru) {
pfn = page_private(page);
set_page_private(page, 0);
/*
* Non-isolated types over MIGRATE_PCPTYPES get added
* to the MIGRATE_MOVABLE pcp list.
*/
migratetype = get_pcppage_migratetype(page);
if (unlikely(migratetype >= MIGRATE_PCPTYPES))
migratetype = MIGRATE_MOVABLE;
trace_mm_page_free_batched(page);
free_unref_page_commit(page, pfn, migratetype, 0);
......
......@@ -1696,8 +1696,7 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
struct swap_info_struct *si;
struct page *page = NULL;
struct page *page;
swp_entry_t swap;
int error;
......@@ -1705,12 +1704,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap = radix_to_swp_entry(*pagep);
*pagep = NULL;
/* Prevent swapoff from happening to us. */
si = get_swap_device(swap);
if (!si) {
error = EINVAL;
goto failed;
}
/* Look it up and read it in.. */
page = lookup_swap_cache(swap, NULL, 0);
if (!page) {
......@@ -1772,8 +1765,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
swap_free(swap);
*pagep = page;
if (si)
put_swap_device(si);
return 0;
failed:
if (!shmem_confirm_swap(mapping, index, swap))
......@@ -1784,9 +1775,6 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
put_page(page);
}
if (si)
put_swap_device(si);
return error;
}
......
......@@ -628,13 +628,6 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
if (!mask)
goto skip;
/* Test swap type to make sure the dereference is safe */
if (likely(si->flags & (SWP_BLKDEV | SWP_FS_OPS))) {
struct inode *inode = si->swap_file->f_mapping->host;
if (inode_read_congested(inode))
goto skip;
}
do_poll = false;
/* Read a page_cluster sized and aligned cluster around offset. */
start_offset = offset & ~mask;
......
......@@ -100,9 +100,12 @@ struct scan_control {
unsigned int may_swap:1;
/*
* Cgroups are not reclaimed below their configured memory.low,
* unless we threaten to OOM. If any cgroups are skipped due to
* memory.low and nothing was reclaimed, go back for memory.low.
* Cgroup memory below memory.low is protected as long as we
* don't threaten to OOM. If any cgroup is reclaimed at
* reduced force or passed over entirely due to its memory.low
* setting (memcg_low_skipped), and nothing is reclaimed as a
* result, then go back for one more cycle that reclaims the protected
* memory (memcg_low_reclaim) to avert OOM.
*/
unsigned int memcg_low_reclaim:1;
unsigned int memcg_low_skipped:1;
......@@ -2537,15 +2540,14 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
for_each_evictable_lru(lru) {
int file = is_file_lru(lru);
unsigned long lruvec_size;
unsigned long low, min;
unsigned long scan;
unsigned long protection;
lruvec_size = lruvec_lru_size(lruvec, lru, sc->reclaim_idx);
protection = mem_cgroup_protection(sc->target_mem_cgroup,
memcg,
sc->memcg_low_reclaim);
mem_cgroup_protection(sc->target_mem_cgroup, memcg,
&min, &low);
if (protection) {
if (min || low) {
/*
* Scale a cgroup's reclaim pressure by proportioning
* its current usage to its memory.low or memory.min
......@@ -2576,6 +2578,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
* hard protection.
*/
unsigned long cgroup_size = mem_cgroup_size(memcg);
unsigned long protection;
/* memory.low scaling, make sure we retry before OOM */
if (!sc->memcg_low_reclaim && low > min) {
protection = low;
sc->memcg_low_skipped = 1;
} else {
protection = min;
}
/* Avoid TOCTOU with earlier protection check */
cgroup_size = max(cgroup_size, protection);
......@@ -4413,11 +4424,13 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
.may_swap = 1,
.reclaim_idx = gfp_zone(gfp_mask),
};
unsigned long pflags;
trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
sc.gfp_mask);
cond_resched();
psi_memstall_enter(&pflags);
fs_reclaim_acquire(sc.gfp_mask);
/*
* We need to be able to allocate from the reserves for RECLAIM_UNMAP
......@@ -4442,6 +4455,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
fs_reclaim_release(sc.gfp_mask);
psi_memstall_leave(&pflags);
trace_mm_vmscan_node_reclaim_end(sc.nr_reclaimed);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment