Commit 995d03ae authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "15 fixes"

[ This does not merge the "fortify: use WARN instead of BUG for now"
  patch, which needs a bit of extra work to build cleanly with all
  configurations. Arnd is on it.   - Linus ]

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  ocfs2: don't clear SGID when inheriting ACLs
  mm: allow page_cache_get_speculative in interrupt context
  userfaultfd: non-cooperative: flush event_wqh at release time
  ipc: add missing container_of()s for randstruct
  cpuset: fix a deadlock due to incomplete patching of cpusets_enabled()
  userfaultfd_zeropage: return -ENOSPC in case mm has gone
  mm: take memory hotplug lock within numa_zonelist_order_handler()
  mm/page_io.c: fix oops during block io poll in swapin path
  zram: do not free pool->size_class
  kthread: fix documentation build warning
  kasan: avoid -Wmaybe-uninitialized warning
  userfaultfd: non-cooperative: notify about unmap of destination during mremap
  mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries
  pid: kill pidhash_size in pidhash_init()
  mm/hugetlb.c: __get_user_pages ignores certain follow_hugetlb_page errors
parents 8d3fe85f 19ec8e48
...@@ -240,18 +240,6 @@ int ocfs2_set_acl(handle_t *handle, ...@@ -240,18 +240,6 @@ int ocfs2_set_acl(handle_t *handle,
switch (type) { switch (type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS; name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
umode_t mode;
ret = posix_acl_update_mode(inode, &mode, &acl);
if (ret)
return ret;
ret = ocfs2_acl_set_mode(inode, di_bh,
handle, mode);
if (ret)
return ret;
}
break; break;
case ACL_TYPE_DEFAULT: case ACL_TYPE_DEFAULT:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT; name_index = OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
...@@ -289,7 +277,19 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type) ...@@ -289,7 +277,19 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh); had_lock = ocfs2_inode_lock_tracker(inode, &bh, 1, &oh);
if (had_lock < 0) if (had_lock < 0)
return had_lock; return had_lock;
if (type == ACL_TYPE_ACCESS && acl) {
umode_t mode;
status = posix_acl_update_mode(inode, &mode, &acl);
if (status)
goto unlock;
status = ocfs2_acl_set_mode(inode, bh, NULL, mode);
if (status)
goto unlock;
}
status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL); status = ocfs2_set_acl(NULL, inode, bh, type, acl, NULL, NULL);
unlock:
ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock);
brelse(bh); brelse(bh);
return status; return status;
......
...@@ -854,6 +854,9 @@ static int userfaultfd_release(struct inode *inode, struct file *file) ...@@ -854,6 +854,9 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
__wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range); __wake_up_locked_key(&ctx->fault_wqh, TASK_NORMAL, &range);
spin_unlock(&ctx->fault_pending_wqh.lock); spin_unlock(&ctx->fault_pending_wqh.lock);
/* Flush pending events that may still wait on event_wqh */
wake_up_all(&ctx->event_wqh);
wake_up_poll(&ctx->fd_wqh, POLLHUP); wake_up_poll(&ctx->fd_wqh, POLLHUP);
userfaultfd_ctx_put(ctx); userfaultfd_ctx_put(ctx);
return 0; return 0;
...@@ -1643,6 +1646,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, ...@@ -1643,6 +1646,8 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start, ret = mfill_zeropage(ctx->mm, uffdio_zeropage.range.start,
uffdio_zeropage.range.len); uffdio_zeropage.range.len);
mmput(ctx->mm); mmput(ctx->mm);
} else {
return -ENOSPC;
} }
if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage))) if (unlikely(put_user(ret, &user_uffdio_zeropage->zeropage)))
return -EFAULT; return -EFAULT;
......
...@@ -18,6 +18,19 @@ ...@@ -18,6 +18,19 @@
#ifdef CONFIG_CPUSETS #ifdef CONFIG_CPUSETS
/*
* Static branch rewrites can happen in an arbitrary order for a given
* key. In code paths where we need to loop with read_mems_allowed_begin() and
* read_mems_allowed_retry() to get a consistent view of mems_allowed, we need
* to ensure that begin() always gets rewritten before retry() in the
* disabled -> enabled transition. If not, then if local irqs are disabled
* around the loop, we can deadlock since retry() would always be
* comparing the latest value of the mems_allowed seqcount against 0 as
* begin() still would see cpusets_enabled() as false. The enabled -> disabled
* transition should happen in reverse order for the same reasons (want to stop
* looking at real value of mems_allowed.sequence in retry() first).
*/
extern struct static_key_false cpusets_pre_enable_key;
extern struct static_key_false cpusets_enabled_key; extern struct static_key_false cpusets_enabled_key;
static inline bool cpusets_enabled(void) static inline bool cpusets_enabled(void)
{ {
...@@ -32,12 +45,14 @@ static inline int nr_cpusets(void) ...@@ -32,12 +45,14 @@ static inline int nr_cpusets(void)
static inline void cpuset_inc(void) static inline void cpuset_inc(void)
{ {
static_branch_inc(&cpusets_pre_enable_key);
static_branch_inc(&cpusets_enabled_key); static_branch_inc(&cpusets_enabled_key);
} }
static inline void cpuset_dec(void) static inline void cpuset_dec(void)
{ {
static_branch_dec(&cpusets_enabled_key); static_branch_dec(&cpusets_enabled_key);
static_branch_dec(&cpusets_pre_enable_key);
} }
extern int cpuset_init(void); extern int cpuset_init(void);
...@@ -115,7 +130,7 @@ extern void cpuset_print_current_mems_allowed(void); ...@@ -115,7 +130,7 @@ extern void cpuset_print_current_mems_allowed(void);
*/ */
static inline unsigned int read_mems_allowed_begin(void) static inline unsigned int read_mems_allowed_begin(void)
{ {
if (!cpusets_enabled()) if (!static_branch_unlikely(&cpusets_pre_enable_key))
return 0; return 0;
return read_seqcount_begin(&current->mems_allowed_seq); return read_seqcount_begin(&current->mems_allowed_seq);
...@@ -129,7 +144,7 @@ static inline unsigned int read_mems_allowed_begin(void) ...@@ -129,7 +144,7 @@ static inline unsigned int read_mems_allowed_begin(void)
*/ */
static inline bool read_mems_allowed_retry(unsigned int seq) static inline bool read_mems_allowed_retry(unsigned int seq)
{ {
if (!cpusets_enabled()) if (!static_branch_unlikely(&cpusets_enabled_key))
return false; return false;
return read_seqcount_retry(&current->mems_allowed_seq, seq); return read_seqcount_retry(&current->mems_allowed_seq, seq);
......
...@@ -15,7 +15,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), ...@@ -15,7 +15,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
* @threadfn: the function to run in the thread * @threadfn: the function to run in the thread
* @data: data pointer for @threadfn() * @data: data pointer for @threadfn()
* @namefmt: printf-style format string for the thread name * @namefmt: printf-style format string for the thread name
* @...: arguments for @namefmt. * @arg...: arguments for @namefmt.
* *
* This macro will create a kthread on the current node, leaving it in * This macro will create a kthread on the current node, leaving it in
* the stopped state. This is just a helper for kthread_create_on_node(); * the stopped state. This is just a helper for kthread_create_on_node();
......
...@@ -494,6 +494,10 @@ struct mm_struct { ...@@ -494,6 +494,10 @@ struct mm_struct {
* PROT_NONE or PROT_NUMA mapped page. * PROT_NONE or PROT_NUMA mapped page.
*/ */
bool tlb_flush_pending; bool tlb_flush_pending;
#endif
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
/* See flush_tlb_batched_pending() */
bool tlb_flush_batched;
#endif #endif
struct uprobes_state uprobes_state; struct uprobes_state uprobes_state;
#ifdef CONFIG_HUGETLB_PAGE #ifdef CONFIG_HUGETLB_PAGE
......
...@@ -163,8 +163,6 @@ void release_pages(struct page **pages, int nr, bool cold); ...@@ -163,8 +163,6 @@ void release_pages(struct page **pages, int nr, bool cold);
*/ */
static inline int page_cache_get_speculative(struct page *page) static inline int page_cache_get_speculative(struct page *page)
{ {
VM_BUG_ON(in_interrupt());
#ifdef CONFIG_TINY_RCU #ifdef CONFIG_TINY_RCU
# ifdef CONFIG_PREEMPT_COUNT # ifdef CONFIG_PREEMPT_COUNT
VM_BUG_ON(!in_atomic() && !irqs_disabled()); VM_BUG_ON(!in_atomic() && !irqs_disabled());
......
...@@ -1034,7 +1034,8 @@ void msg_exit_ns(struct ipc_namespace *ns) ...@@ -1034,7 +1034,8 @@ void msg_exit_ns(struct ipc_namespace *ns)
static int sysvipc_msg_proc_show(struct seq_file *s, void *it) static int sysvipc_msg_proc_show(struct seq_file *s, void *it)
{ {
struct user_namespace *user_ns = seq_user_ns(s); struct user_namespace *user_ns = seq_user_ns(s);
struct msg_queue *msq = it; struct kern_ipc_perm *ipcp = it;
struct msg_queue *msq = container_of(ipcp, struct msg_queue, q_perm);
seq_printf(s, seq_printf(s,
"%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
......
...@@ -2179,7 +2179,8 @@ void exit_sem(struct task_struct *tsk) ...@@ -2179,7 +2179,8 @@ void exit_sem(struct task_struct *tsk)
static int sysvipc_sem_proc_show(struct seq_file *s, void *it) static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
{ {
struct user_namespace *user_ns = seq_user_ns(s); struct user_namespace *user_ns = seq_user_ns(s);
struct sem_array *sma = it; struct kern_ipc_perm *ipcp = it;
struct sem_array *sma = container_of(ipcp, struct sem_array, sem_perm);
time_t sem_otime; time_t sem_otime;
/* /*
......
...@@ -1380,9 +1380,11 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) ...@@ -1380,9 +1380,11 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
static int sysvipc_shm_proc_show(struct seq_file *s, void *it) static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
{ {
struct user_namespace *user_ns = seq_user_ns(s); struct user_namespace *user_ns = seq_user_ns(s);
struct shmid_kernel *shp = it; struct kern_ipc_perm *ipcp = it;
struct shmid_kernel *shp;
unsigned long rss = 0, swp = 0; unsigned long rss = 0, swp = 0;
shp = container_of(ipcp, struct shmid_kernel, shm_perm);
shm_add_rss_swap(shp, &rss, &swp); shm_add_rss_swap(shp, &rss, &swp);
#if BITS_PER_LONG <= 32 #if BITS_PER_LONG <= 32
......
...@@ -63,6 +63,7 @@ ...@@ -63,6 +63,7 @@
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/wait.h> #include <linux/wait.h>
DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key); DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
/* See "Frequency meter" comments, below. */ /* See "Frequency meter" comments, below. */
......
...@@ -575,13 +575,10 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) ...@@ -575,13 +575,10 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
*/ */
void __init pidhash_init(void) void __init pidhash_init(void)
{ {
unsigned int pidhash_size;
pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18, pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
HASH_EARLY | HASH_SMALL | HASH_ZERO, HASH_EARLY | HASH_SMALL | HASH_ZERO,
&pidhash_shift, NULL, &pidhash_shift, NULL,
0, 4096); 0, 4096);
pidhash_size = 1U << pidhash_shift;
} }
void __init pidmap_init(void) void __init pidmap_init(void)
......
...@@ -4078,6 +4078,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -4078,6 +4078,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long vaddr = *position; unsigned long vaddr = *position;
unsigned long remainder = *nr_pages; unsigned long remainder = *nr_pages;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
int err = -EFAULT;
while (vaddr < vma->vm_end && remainder) { while (vaddr < vma->vm_end && remainder) {
pte_t *pte; pte_t *pte;
...@@ -4154,11 +4155,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -4154,11 +4155,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
} }
ret = hugetlb_fault(mm, vma, vaddr, fault_flags); ret = hugetlb_fault(mm, vma, vaddr, fault_flags);
if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_ERROR) {
int err = vm_fault_to_errno(ret, flags); err = vm_fault_to_errno(ret, flags);
if (err)
return err;
remainder = 0; remainder = 0;
break; break;
} }
...@@ -4213,7 +4210,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -4213,7 +4210,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
*/ */
*position = vaddr; *position = vaddr;
return i ? i : -EFAULT; return i ? i : err;
} }
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE #ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
......
...@@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq; ...@@ -498,6 +498,7 @@ extern struct workqueue_struct *mm_percpu_wq;
#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH #ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
void try_to_unmap_flush(void); void try_to_unmap_flush(void);
void try_to_unmap_flush_dirty(void); void try_to_unmap_flush_dirty(void);
void flush_tlb_batched_pending(struct mm_struct *mm);
#else #else
static inline void try_to_unmap_flush(void) static inline void try_to_unmap_flush(void)
{ {
...@@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void) ...@@ -505,7 +506,9 @@ static inline void try_to_unmap_flush(void)
static inline void try_to_unmap_flush_dirty(void) static inline void try_to_unmap_flush_dirty(void)
{ {
} }
static inline void flush_tlb_batched_pending(struct mm_struct *mm)
{
}
#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */ #endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
extern const struct trace_print_flags pageflag_names[]; extern const struct trace_print_flags pageflag_names[];
......
...@@ -401,6 +401,7 @@ void kasan_report(unsigned long addr, size_t size, ...@@ -401,6 +401,7 @@ void kasan_report(unsigned long addr, size_t size,
disable_trace_on_warning(); disable_trace_on_warning();
info.access_addr = (void *)addr; info.access_addr = (void *)addr;
info.first_bad_addr = (void *)addr;
info.access_size = size; info.access_size = size;
info.is_write = is_write; info.is_write = is_write;
info.ip = ip; info.ip = ip;
......
...@@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -320,6 +320,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
tlb_remove_check_page_size_change(tlb, PAGE_SIZE); tlb_remove_check_page_size_change(tlb, PAGE_SIZE);
orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl); orig_pte = pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode(); arch_enter_lazy_mmu_mode();
for (; addr != end; pte++, addr += PAGE_SIZE) { for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte; ptent = *pte;
......
...@@ -1197,6 +1197,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ...@@ -1197,6 +1197,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
init_rss_vec(rss); init_rss_vec(rss);
start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl); start_pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
pte = start_pte; pte = start_pte;
flush_tlb_batched_pending(mm);
arch_enter_lazy_mmu_mode(); arch_enter_lazy_mmu_mode();
do { do {
pte_t ptent = *pte; pte_t ptent = *pte;
......
...@@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -64,6 +64,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
atomic_read(&vma->vm_mm->mm_users) == 1) atomic_read(&vma->vm_mm->mm_users) == 1)
target_node = numa_node_id(); target_node = numa_node_id();
flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode(); arch_enter_lazy_mmu_mode();
do { do {
oldpte = *pte; oldpte = *pte;
......
...@@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, ...@@ -152,6 +152,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
new_ptl = pte_lockptr(mm, new_pmd); new_ptl = pte_lockptr(mm, new_pmd);
if (new_ptl != old_ptl) if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
flush_tlb_batched_pending(vma->vm_mm);
arch_enter_lazy_mmu_mode(); arch_enter_lazy_mmu_mode();
for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE, for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
...@@ -428,6 +429,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, ...@@ -428,6 +429,7 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
static unsigned long mremap_to(unsigned long addr, unsigned long old_len, static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
unsigned long new_addr, unsigned long new_len, bool *locked, unsigned long new_addr, unsigned long new_len, bool *locked,
struct vm_userfaultfd_ctx *uf, struct vm_userfaultfd_ctx *uf,
struct list_head *uf_unmap_early,
struct list_head *uf_unmap) struct list_head *uf_unmap)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
...@@ -446,7 +448,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, ...@@ -446,7 +448,7 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
if (addr + old_len > new_addr && new_addr + new_len > addr) if (addr + old_len > new_addr && new_addr + new_len > addr)
goto out; goto out;
ret = do_munmap(mm, new_addr, new_len, NULL); ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
if (ret) if (ret)
goto out; goto out;
...@@ -514,6 +516,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, ...@@ -514,6 +516,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
unsigned long charged = 0; unsigned long charged = 0;
bool locked = false; bool locked = false;
struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
LIST_HEAD(uf_unmap_early);
LIST_HEAD(uf_unmap); LIST_HEAD(uf_unmap);
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
...@@ -541,7 +544,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, ...@@ -541,7 +544,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
if (flags & MREMAP_FIXED) { if (flags & MREMAP_FIXED) {
ret = mremap_to(addr, old_len, new_addr, new_len, ret = mremap_to(addr, old_len, new_addr, new_len,
&locked, &uf, &uf_unmap); &locked, &uf, &uf_unmap_early, &uf_unmap);
goto out; goto out;
} }
...@@ -621,6 +624,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, ...@@ -621,6 +624,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
up_write(&current->mm->mmap_sem); up_write(&current->mm->mmap_sem);
if (locked && new_len > old_len) if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len); mm_populate(new_addr + old_len, new_len - old_len);
userfaultfd_unmap_complete(mm, &uf_unmap_early);
mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
userfaultfd_unmap_complete(mm, &uf_unmap); userfaultfd_unmap_complete(mm, &uf_unmap);
return ret; return ret;
......
...@@ -4891,9 +4891,11 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write, ...@@ -4891,9 +4891,11 @@ int numa_zonelist_order_handler(struct ctl_table *table, int write,
NUMA_ZONELIST_ORDER_LEN); NUMA_ZONELIST_ORDER_LEN);
user_zonelist_order = oldval; user_zonelist_order = oldval;
} else if (oldval != user_zonelist_order) { } else if (oldval != user_zonelist_order) {
mem_hotplug_begin();
mutex_lock(&zonelists_mutex); mutex_lock(&zonelists_mutex);
build_all_zonelists(NULL, NULL); build_all_zonelists(NULL, NULL);
mutex_unlock(&zonelists_mutex); mutex_unlock(&zonelists_mutex);
mem_hotplug_done();
} }
} }
out: out:
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <linux/frontswap.h> #include <linux/frontswap.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/uio.h> #include <linux/uio.h>
#include <linux/sched/task.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
static struct bio *get_swap_bio(gfp_t gfp_flags, static struct bio *get_swap_bio(gfp_t gfp_flags,
...@@ -136,6 +137,7 @@ static void end_swap_bio_read(struct bio *bio) ...@@ -136,6 +137,7 @@ static void end_swap_bio_read(struct bio *bio)
WRITE_ONCE(bio->bi_private, NULL); WRITE_ONCE(bio->bi_private, NULL);
bio_put(bio); bio_put(bio);
wake_up_process(waiter); wake_up_process(waiter);
put_task_struct(waiter);
} }
int generic_swapfile_activate(struct swap_info_struct *sis, int generic_swapfile_activate(struct swap_info_struct *sis,
...@@ -378,6 +380,11 @@ int swap_readpage(struct page *page, bool do_poll) ...@@ -378,6 +380,11 @@ int swap_readpage(struct page *page, bool do_poll)
goto out; goto out;
} }
bdev = bio->bi_bdev; bdev = bio->bi_bdev;
/*
* Keep this task valid during swap readpage because the oom killer may
* attempt to access it in the page fault retry time check.
*/
get_task_struct(current);
bio->bi_private = current; bio->bi_private = current;
bio_set_op_attrs(bio, REQ_OP_READ, 0); bio_set_op_attrs(bio, REQ_OP_READ, 0);
count_vm_event(PSWPIN); count_vm_event(PSWPIN);
......
...@@ -604,6 +604,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) ...@@ -604,6 +604,13 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
arch_tlbbatch_add_mm(&tlb_ubc->arch, mm); arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);
tlb_ubc->flush_required = true; tlb_ubc->flush_required = true;
/*
* Ensure compiler does not re-order the setting of tlb_flush_batched
* before the PTE is cleared.
*/
barrier();
mm->tlb_flush_batched = true;
/* /*
* If the PTE was dirty then it's best to assume it's writable. The * If the PTE was dirty then it's best to assume it's writable. The
* caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush() * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
...@@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) ...@@ -631,6 +638,35 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
return should_defer; return should_defer;
} }
/*
* Reclaim unmaps pages under the PTL but do not flush the TLB prior to
* releasing the PTL if TLB flushes are batched. It's possible for a parallel
* operation such as mprotect or munmap to race between reclaim unmapping
* the page and flushing the page. If this race occurs, it potentially allows
* access to data via a stale TLB entry. Tracking all mm's that have TLB
* batching in flight would be expensive during reclaim so instead track
* whether TLB batching occurred in the past and if so then do a flush here
* if required. This will cost one additional flush per reclaim cycle paid
* by the first operation at risk such as mprotect and mumap.
*
* This must be called under the PTL so that an access to tlb_flush_batched
* that is potentially a "reclaim vs mprotect/munmap/etc" race will synchronise
* via the PTL.
*/
void flush_tlb_batched_pending(struct mm_struct *mm)
{
if (mm->tlb_flush_batched) {
flush_tlb_mm(mm);
/*
* Do not allow the compiler to re-order the clearing of
* tlb_flush_batched before the tlb is flushed.
*/
barrier();
mm->tlb_flush_batched = false;
}
}
#else #else
static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable) static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
{ {
......
...@@ -2453,7 +2453,6 @@ void zs_destroy_pool(struct zs_pool *pool) ...@@ -2453,7 +2453,6 @@ void zs_destroy_pool(struct zs_pool *pool)
} }
destroy_cache(pool); destroy_cache(pool);
kfree(pool->size_class);
kfree(pool->name); kfree(pool->name);
kfree(pool); kfree(pool);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment