Commit cb085634 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-hotfixes-stable-2023-04-19-16-36' of...

Merge tag 'mm-hotfixes-stable-2023-04-19-16-36' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
 "22 hotfixes.

  19 are cc:stable and the remainder address issues which were
  introduced during this merge cycle, or aren't considered suitable for
  -stable backporting.

  19 are for MM and the remainder are for other subsystems"

* tag 'mm-hotfixes-stable-2023-04-19-16-36' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (22 commits)
  nilfs2: initialize unused bytes in segment summary blocks
  mm: page_alloc: skip regions with hugetlbfs pages when allocating 1G pages
  mm/mmap: regression fix for unmapped_area{_topdown}
  maple_tree: fix mas_empty_area() search
  maple_tree: make maple state reusable after mas_empty_area_rev()
  mm: kmsan: handle alloc failures in kmsan_ioremap_page_range()
  mm: kmsan: handle alloc failures in kmsan_vmap_pages_range_noflush()
  tools/Makefile: do missed s/vm/mm/
  mm: fix memory leak on mm_init error handling
  mm/page_alloc: fix potential deadlock on zonelist_update_seq seqlock
  kernel/sys.c: fix and improve control flow in __sys_setres[ug]id()
  Revert "userfaultfd: don't fail on unrecognized features"
  writeback, cgroup: fix null-ptr-deref write in bdi_split_work_to_wbs
  maple_tree: fix a potential memory leak, OOB access, or other unpredictable bug
  tools/mm/page_owner_sort.c: fix TGID output when cull=tg is used
  mailmap: update jtoppins' entry to reference correct email
  mm/mempolicy: fix use-after-free of VMA iterator
  mm/huge_memory.c: warn with pr_warn_ratelimited instead of VM_WARN_ON_ONCE_FOLIO
  mm/mprotect: fix do_mprotect_pkey() return on error
  mm/khugepaged: check again on anon uffd-wp during isolation
  ...
parents 23990b1a ef832747
...@@ -232,6 +232,8 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com> ...@@ -232,6 +232,8 @@ Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org> John Crispin <john@phrozen.org> <blogic@openwrt.org>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de> John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com> John Stultz <johnstul@us.ibm.com>
<jon.toppins+linux@gmail.com> <jtoppins@cumulusnetworks.com>
<jon.toppins+linux@gmail.com> <jtoppins@redhat.com>
Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org> Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
<josh@joshtriplett.org> <josh@freedesktop.org> <josh@joshtriplett.org> <josh@freedesktop.org>
<josh@joshtriplett.org> <josh@kernel.org> <josh@joshtriplett.org> <josh@kernel.org>
......
...@@ -978,6 +978,16 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, ...@@ -978,6 +978,16 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
continue; continue;
} }
/*
* If wb_tryget fails, the wb has been shutdown, skip it.
*
* Pin @wb so that it stays on @bdi->wb_list. This allows
* continuing iteration from @wb after dropping and
* regrabbing rcu read lock.
*/
if (!wb_tryget(wb))
continue;
/* alloc failed, execute synchronously using on-stack fallback */ /* alloc failed, execute synchronously using on-stack fallback */
work = &fallback_work; work = &fallback_work;
*work = *base_work; *work = *base_work;
...@@ -986,13 +996,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, ...@@ -986,13 +996,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
work->done = &fallback_work_done; work->done = &fallback_work_done;
wb_queue_work(wb, work); wb_queue_work(wb, work);
/*
* Pin @wb so that it stays on @bdi->wb_list. This allows
* continuing iteration from @wb after dropping and
* regrabbing rcu read lock.
*/
wb_get(wb);
last_wb = wb; last_wb = wb;
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) ...@@ -430,6 +430,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
return 0; return 0;
} }
/**
* nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area
* @sci: segment constructor object
*
* nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of
* the current segment summary block.
*/
static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci)
{
struct nilfs_segsum_pointer *ssp;
ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr;
if (ssp->offset < ssp->bh->b_size)
memset(ssp->bh->b_data + ssp->offset, 0,
ssp->bh->b_size - ssp->offset);
}
static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
{ {
sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks;
...@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) ...@@ -438,6 +455,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci)
* The current segment is filled up * The current segment is filled up
* (internal code) * (internal code)
*/ */
nilfs_segctor_zeropad_segsum(sci);
sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg);
return nilfs_segctor_reset_segment_buffer(sci); return nilfs_segctor_reset_segment_buffer(sci);
} }
...@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, ...@@ -542,6 +560,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci,
goto retry; goto retry;
} }
if (unlikely(required)) { if (unlikely(required)) {
nilfs_segctor_zeropad_segsum(sci);
err = nilfs_segbuf_extend_segsum(segbuf); err = nilfs_segbuf_extend_segsum(segbuf);
if (unlikely(err)) if (unlikely(err))
goto failed; goto failed;
...@@ -1533,6 +1552,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, ...@@ -1533,6 +1552,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage; sci->sc_stage = prev_stage;
} }
nilfs_segctor_zeropad_segsum(sci);
nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile);
return 0; return 0;
......
...@@ -1955,8 +1955,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, ...@@ -1955,8 +1955,10 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx,
ret = -EFAULT; ret = -EFAULT;
if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api))) if (copy_from_user(&uffdio_api, buf, sizeof(uffdio_api)))
goto out; goto out;
/* Ignore unsupported features (userspace built against newer kernel) */ features = uffdio_api.features;
features = uffdio_api.features & UFFD_API_FEATURES; ret = -EINVAL;
if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))
goto err_out;
ret = -EPERM; ret = -EPERM;
if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE)) if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))
goto err_out; goto err_out;
......
...@@ -134,11 +134,12 @@ void kmsan_kfree_large(const void *ptr); ...@@ -134,11 +134,12 @@ void kmsan_kfree_large(const void *ptr);
* @page_shift: page_shift passed to vmap_range_noflush(). * @page_shift: page_shift passed to vmap_range_noflush().
* *
* KMSAN maps shadow and origin pages of @pages into contiguous ranges in * KMSAN maps shadow and origin pages of @pages into contiguous ranges in
* vmalloc metadata address range. * vmalloc metadata address range. Returns 0 on success, callers must check
* for non-zero return value.
*/ */
void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages, pgprot_t prot, struct page **pages,
unsigned int page_shift); unsigned int page_shift);
/** /**
* kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap. * kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
...@@ -159,11 +160,12 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end); ...@@ -159,11 +160,12 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end);
* @page_shift: page_shift argument passed to vmap_range_noflush(). * @page_shift: page_shift argument passed to vmap_range_noflush().
* *
* KMSAN creates new metadata pages for the physical pages mapped into the * KMSAN creates new metadata pages for the physical pages mapped into the
* virtual memory. * virtual memory. Returns 0 on success, callers must check for non-zero return
* value.
*/ */
void kmsan_ioremap_page_range(unsigned long addr, unsigned long end, int kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot, phys_addr_t phys_addr, pgprot_t prot,
unsigned int page_shift); unsigned int page_shift);
/** /**
* kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call. * kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call.
...@@ -281,12 +283,13 @@ static inline void kmsan_kfree_large(const void *ptr) ...@@ -281,12 +283,13 @@ static inline void kmsan_kfree_large(const void *ptr)
{ {
} }
static inline void kmsan_vmap_pages_range_noflush(unsigned long start, static inline int kmsan_vmap_pages_range_noflush(unsigned long start,
unsigned long end, unsigned long end,
pgprot_t prot, pgprot_t prot,
struct page **pages, struct page **pages,
unsigned int page_shift) unsigned int page_shift)
{ {
return 0;
} }
static inline void kmsan_vunmap_range_noflush(unsigned long start, static inline void kmsan_vunmap_range_noflush(unsigned long start,
...@@ -294,12 +297,12 @@ static inline void kmsan_vunmap_range_noflush(unsigned long start, ...@@ -294,12 +297,12 @@ static inline void kmsan_vunmap_range_noflush(unsigned long start,
{ {
} }
static inline void kmsan_ioremap_page_range(unsigned long start, static inline int kmsan_ioremap_page_range(unsigned long start,
unsigned long end, unsigned long end,
phys_addr_t phys_addr, phys_addr_t phys_addr, pgprot_t prot,
pgprot_t prot, unsigned int page_shift)
unsigned int page_shift)
{ {
return 0;
} }
static inline void kmsan_iounmap_page_range(unsigned long start, static inline void kmsan_iounmap_page_range(unsigned long start,
......
...@@ -1174,6 +1174,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, ...@@ -1174,6 +1174,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
fail_pcpu: fail_pcpu:
while (i > 0) while (i > 0)
percpu_counter_destroy(&mm->rss_stat[--i]); percpu_counter_destroy(&mm->rss_stat[--i]);
destroy_context(mm);
fail_nocontext: fail_nocontext:
mm_free_pgd(mm); mm_free_pgd(mm);
fail_nopgd: fail_nopgd:
......
...@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) ...@@ -664,6 +664,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
struct cred *new; struct cred *new;
int retval; int retval;
kuid_t kruid, keuid, ksuid; kuid_t kruid, keuid, ksuid;
bool ruid_new, euid_new, suid_new;
kruid = make_kuid(ns, ruid); kruid = make_kuid(ns, ruid);
keuid = make_kuid(ns, euid); keuid = make_kuid(ns, euid);
...@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) ...@@ -678,25 +679,29 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if ((suid != (uid_t) -1) && !uid_valid(ksuid)) if ((suid != (uid_t) -1) && !uid_valid(ksuid))
return -EINVAL; return -EINVAL;
old = current_cred();
/* check for no-op */
if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) &&
(euid == (uid_t) -1 || (uid_eq(keuid, old->euid) &&
uid_eq(keuid, old->fsuid))) &&
(suid == (uid_t) -1 || uid_eq(ksuid, old->suid)))
return 0;
ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid);
euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
!uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid);
suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
!uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid);
if ((ruid_new || euid_new || suid_new) &&
!ns_capable_setid(old->user_ns, CAP_SETUID))
return -EPERM;
new = prepare_creds(); new = prepare_creds();
if (!new) if (!new)
return -ENOMEM; return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (!ns_capable_setid(old->user_ns, CAP_SETUID)) {
if (ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) &&
!uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid))
goto error;
if (euid != (uid_t) -1 && !uid_eq(keuid, old->uid) &&
!uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid))
goto error;
if (suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) &&
!uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid))
goto error;
}
if (ruid != (uid_t) -1) { if (ruid != (uid_t) -1) {
new->uid = kruid; new->uid = kruid;
if (!uid_eq(kruid, old->uid)) { if (!uid_eq(kruid, old->uid)) {
...@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) ...@@ -761,6 +766,7 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
struct cred *new; struct cred *new;
int retval; int retval;
kgid_t krgid, kegid, ksgid; kgid_t krgid, kegid, ksgid;
bool rgid_new, egid_new, sgid_new;
krgid = make_kgid(ns, rgid); krgid = make_kgid(ns, rgid);
kegid = make_kgid(ns, egid); kegid = make_kgid(ns, egid);
...@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) ...@@ -773,23 +779,28 @@ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) if ((sgid != (gid_t) -1) && !gid_valid(ksgid))
return -EINVAL; return -EINVAL;
old = current_cred();
/* check for no-op */
if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) &&
(egid == (gid_t) -1 || (gid_eq(kegid, old->egid) &&
gid_eq(kegid, old->fsgid))) &&
(sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid)))
return 0;
rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
!gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid);
egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
!gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid);
sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
!gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid);
if ((rgid_new || egid_new || sgid_new) &&
!ns_capable_setid(old->user_ns, CAP_SETGID))
return -EPERM;
new = prepare_creds(); new = prepare_creds();
if (!new) if (!new)
return -ENOMEM; return -ENOMEM;
old = current_cred();
retval = -EPERM;
if (!ns_capable_setid(old->user_ns, CAP_SETGID)) {
if (rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) &&
!gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid))
goto error;
if (egid != (gid_t) -1 && !gid_eq(kegid, old->gid) &&
!gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid))
goto error;
if (sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) &&
!gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid))
goto error;
}
if (rgid != (gid_t) -1) if (rgid != (gid_t) -1)
new->gid = krgid; new->gid = krgid;
......
...@@ -1303,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) ...@@ -1303,26 +1303,21 @@ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp)
node = mas->alloc; node = mas->alloc;
node->request_count = 0; node->request_count = 0;
while (requested) { while (requested) {
max_req = MAPLE_ALLOC_SLOTS; max_req = MAPLE_ALLOC_SLOTS - node->node_count;
if (node->node_count) { slots = (void **)&node->slot[node->node_count];
unsigned int offset = node->node_count;
slots = (void **)&node->slot[offset];
max_req -= offset;
} else {
slots = (void **)&node->slot;
}
max_req = min(requested, max_req); max_req = min(requested, max_req);
count = mt_alloc_bulk(gfp, max_req, slots); count = mt_alloc_bulk(gfp, max_req, slots);
if (!count) if (!count)
goto nomem_bulk; goto nomem_bulk;
if (node->node_count == 0) {
node->slot[0]->node_count = 0;
node->slot[0]->request_count = 0;
}
node->node_count += count; node->node_count += count;
allocated += count; allocated += count;
node = node->slot[0]; node = node->slot[0];
node->node_count = 0;
node->request_count = 0;
requested -= count; requested -= count;
} }
mas->alloc->total = allocated; mas->alloc->total = allocated;
...@@ -4970,7 +4965,8 @@ static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min) ...@@ -4970,7 +4965,8 @@ static inline void *mas_prev_entry(struct ma_state *mas, unsigned long min)
* Return: True if found in a leaf, false otherwise. * Return: True if found in a leaf, false otherwise.
* *
*/ */
static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) static bool mas_rev_awalk(struct ma_state *mas, unsigned long size,
unsigned long *gap_min, unsigned long *gap_max)
{ {
enum maple_type type = mte_node_type(mas->node); enum maple_type type = mte_node_type(mas->node);
struct maple_node *node = mas_mn(mas); struct maple_node *node = mas_mn(mas);
...@@ -5035,8 +5031,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size) ...@@ -5035,8 +5031,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
if (unlikely(ma_is_leaf(type))) { if (unlikely(ma_is_leaf(type))) {
mas->offset = offset; mas->offset = offset;
mas->min = min; *gap_min = min;
mas->max = min + gap - 1; *gap_max = min + gap - 1;
return true; return true;
} }
...@@ -5060,10 +5056,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) ...@@ -5060,10 +5056,10 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
{ {
enum maple_type type = mte_node_type(mas->node); enum maple_type type = mte_node_type(mas->node);
unsigned long pivot, min, gap = 0; unsigned long pivot, min, gap = 0;
unsigned char offset; unsigned char offset, data_end;
unsigned long *gaps; unsigned long *gaps, *pivots;
unsigned long *pivots = ma_pivots(mas_mn(mas), type); void __rcu **slots;
void __rcu **slots = ma_slots(mas_mn(mas), type); struct maple_node *node;
bool found = false; bool found = false;
if (ma_is_dense(type)) { if (ma_is_dense(type)) {
...@@ -5071,13 +5067,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size) ...@@ -5071,13 +5067,15 @@ static inline bool mas_anode_descend(struct ma_state *mas, unsigned long size)
return true; return true;
} }
gaps = ma_gaps(mte_to_node(mas->node), type); node = mas_mn(mas);
pivots = ma_pivots(node, type);
slots = ma_slots(node, type);
gaps = ma_gaps(node, type);
offset = mas->offset; offset = mas->offset;
min = mas_safe_min(mas, pivots, offset); min = mas_safe_min(mas, pivots, offset);
for (; offset < mt_slots[type]; offset++) { data_end = ma_data_end(node, type, pivots, mas->max);
pivot = mas_safe_pivot(mas, pivots, offset, type); for (; offset <= data_end; offset++) {
if (offset && !pivot) pivot = mas_logical_pivot(mas, pivots, offset, type);
break;
/* Not within lower bounds */ /* Not within lower bounds */
if (mas->index > pivot) if (mas->index > pivot)
...@@ -5312,6 +5310,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min, ...@@ -5312,6 +5310,9 @@ int mas_empty_area(struct ma_state *mas, unsigned long min,
unsigned long *pivots; unsigned long *pivots;
enum maple_type mt; enum maple_type mt;
if (min >= max)
return -EINVAL;
if (mas_is_start(mas)) if (mas_is_start(mas))
mas_start(mas); mas_start(mas);
else if (mas->offset >= 2) else if (mas->offset >= 2)
...@@ -5366,6 +5367,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, ...@@ -5366,6 +5367,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
{ {
struct maple_enode *last = mas->node; struct maple_enode *last = mas->node;
if (min >= max)
return -EINVAL;
if (mas_is_start(mas)) { if (mas_is_start(mas)) {
mas_start(mas); mas_start(mas);
mas->offset = mas_data_end(mas); mas->offset = mas_data_end(mas);
...@@ -5385,7 +5389,7 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, ...@@ -5385,7 +5389,7 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
mas->index = min; mas->index = min;
mas->last = max; mas->last = max;
while (!mas_rev_awalk(mas, size)) { while (!mas_rev_awalk(mas, size, &min, &max)) {
if (last == mas->node) { if (last == mas->node) {
if (!mas_rewind_node(mas)) if (!mas_rewind_node(mas))
return -EBUSY; return -EBUSY;
...@@ -5400,17 +5404,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min, ...@@ -5400,17 +5404,9 @@ int mas_empty_area_rev(struct ma_state *mas, unsigned long min,
if (unlikely(mas->offset == MAPLE_NODE_SLOTS)) if (unlikely(mas->offset == MAPLE_NODE_SLOTS))
return -EBUSY; return -EBUSY;
/*
* mas_rev_awalk() has set mas->min and mas->max to the gap values. If
* the maximum is outside the window we are searching, then use the last
* location in the search.
* mas->max and mas->min is the range of the gap.
* mas->index and mas->last are currently set to the search range.
*/
/* Trim the upper limit to the max. */ /* Trim the upper limit to the max. */
if (mas->max <= mas->last) if (max <= mas->last)
mas->last = mas->max; mas->last = max;
mas->index = mas->last - size + 1; mas->index = mas->last - size + 1;
return 0; return 0;
......
...@@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs); ...@@ -507,6 +507,15 @@ static LIST_HEAD(offline_cgwbs);
static void cleanup_offline_cgwbs_workfn(struct work_struct *work); static void cleanup_offline_cgwbs_workfn(struct work_struct *work);
static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn); static DECLARE_WORK(cleanup_offline_cgwbs_work, cleanup_offline_cgwbs_workfn);
static void cgwb_free_rcu(struct rcu_head *rcu_head)
{
struct bdi_writeback *wb = container_of(rcu_head,
struct bdi_writeback, rcu);
percpu_ref_exit(&wb->refcnt);
kfree(wb);
}
static void cgwb_release_workfn(struct work_struct *work) static void cgwb_release_workfn(struct work_struct *work)
{ {
struct bdi_writeback *wb = container_of(work, struct bdi_writeback, struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
...@@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct work_struct *work) ...@@ -529,11 +538,10 @@ static void cgwb_release_workfn(struct work_struct *work)
list_del(&wb->offline_node); list_del(&wb->offline_node);
spin_unlock_irq(&cgwb_lock); spin_unlock_irq(&cgwb_lock);
percpu_ref_exit(&wb->refcnt);
wb_exit(wb); wb_exit(wb);
bdi_put(bdi); bdi_put(bdi);
WARN_ON_ONCE(!list_empty(&wb->b_attached)); WARN_ON_ONCE(!list_empty(&wb->b_attached));
kfree_rcu(wb, rcu); call_rcu(&wb->rcu, cgwb_free_rcu);
} }
static void cgwb_release(struct percpu_ref *refcnt) static void cgwb_release(struct percpu_ref *refcnt)
......
...@@ -1838,10 +1838,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1838,10 +1838,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (is_swap_pmd(*pmd)) { if (is_swap_pmd(*pmd)) {
swp_entry_t entry = pmd_to_swp_entry(*pmd); swp_entry_t entry = pmd_to_swp_entry(*pmd);
struct page *page = pfn_swap_entry_to_page(entry); struct page *page = pfn_swap_entry_to_page(entry);
pmd_t newpmd;
VM_BUG_ON(!is_pmd_migration_entry(*pmd)); VM_BUG_ON(!is_pmd_migration_entry(*pmd));
if (is_writable_migration_entry(entry)) { if (is_writable_migration_entry(entry)) {
pmd_t newpmd;
/* /*
* A protection check is difficult so * A protection check is difficult so
* just be safe and disable write * just be safe and disable write
...@@ -1855,8 +1855,16 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1855,8 +1855,16 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
newpmd = pmd_swp_mksoft_dirty(newpmd); newpmd = pmd_swp_mksoft_dirty(newpmd);
if (pmd_swp_uffd_wp(*pmd)) if (pmd_swp_uffd_wp(*pmd))
newpmd = pmd_swp_mkuffd_wp(newpmd); newpmd = pmd_swp_mkuffd_wp(newpmd);
set_pmd_at(mm, addr, pmd, newpmd); } else {
newpmd = *pmd;
} }
if (uffd_wp)
newpmd = pmd_swp_mkuffd_wp(newpmd);
else if (uffd_wp_resolve)
newpmd = pmd_swp_clear_uffd_wp(newpmd);
if (!pmd_same(*pmd, newpmd))
set_pmd_at(mm, addr, pmd, newpmd);
goto unlock; goto unlock;
} }
#endif #endif
...@@ -2657,9 +2665,10 @@ int split_huge_page_to_list(struct page *page, struct list_head *list) ...@@ -2657,9 +2665,10 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
is_hzp = is_huge_zero_page(&folio->page); is_hzp = is_huge_zero_page(&folio->page);
VM_WARN_ON_ONCE_FOLIO(is_hzp, folio); if (is_hzp) {
if (is_hzp) pr_warn_ratelimited("Called split_huge_page for huge zero page\n");
return -EBUSY; return -EBUSY;
}
if (folio_test_writeback(folio)) if (folio_test_writeback(folio))
return -EBUSY; return -EBUSY;
...@@ -3251,6 +3260,8 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, ...@@ -3251,6 +3260,8 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw,
pmdswp = swp_entry_to_pmd(entry); pmdswp = swp_entry_to_pmd(entry);
if (pmd_soft_dirty(pmdval)) if (pmd_soft_dirty(pmdval))
pmdswp = pmd_swp_mksoft_dirty(pmdswp); pmdswp = pmd_swp_mksoft_dirty(pmdswp);
if (pmd_uffd_wp(pmdval))
pmdswp = pmd_swp_mkuffd_wp(pmdswp);
set_pmd_at(mm, address, pvmw->pmd, pmdswp); set_pmd_at(mm, address, pvmw->pmd, pmdswp);
page_remove_rmap(page, vma, true); page_remove_rmap(page, vma, true);
put_page(page); put_page(page);
......
...@@ -572,6 +572,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, ...@@ -572,6 +572,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
result = SCAN_PTE_NON_PRESENT; result = SCAN_PTE_NON_PRESENT;
goto out; goto out;
} }
if (pte_uffd_wp(pteval)) {
result = SCAN_PTE_UFFD_WP;
goto out;
}
page = vm_normal_page(vma, address, pteval); page = vm_normal_page(vma, address, pteval);
if (unlikely(!page) || unlikely(is_zone_device_page(page))) { if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
result = SCAN_PAGE_NULL; result = SCAN_PAGE_NULL;
......
...@@ -148,35 +148,74 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end) ...@@ -148,35 +148,74 @@ void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end)
* into the virtual memory. If those physical pages already had shadow/origin, * into the virtual memory. If those physical pages already had shadow/origin,
* those are ignored. * those are ignored.
*/ */
void kmsan_ioremap_page_range(unsigned long start, unsigned long end, int kmsan_ioremap_page_range(unsigned long start, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot, phys_addr_t phys_addr, pgprot_t prot,
unsigned int page_shift) unsigned int page_shift)
{ {
gfp_t gfp_mask = GFP_KERNEL | __GFP_ZERO; gfp_t gfp_mask = GFP_KERNEL | __GFP_ZERO;
struct page *shadow, *origin; struct page *shadow, *origin;
unsigned long off = 0; unsigned long off = 0;
int nr; int nr, err = 0, clean = 0, mapped;
if (!kmsan_enabled || kmsan_in_runtime()) if (!kmsan_enabled || kmsan_in_runtime())
return; return 0;
nr = (end - start) / PAGE_SIZE; nr = (end - start) / PAGE_SIZE;
kmsan_enter_runtime(); kmsan_enter_runtime();
for (int i = 0; i < nr; i++, off += PAGE_SIZE) { for (int i = 0; i < nr; i++, off += PAGE_SIZE, clean = i) {
shadow = alloc_pages(gfp_mask, 1); shadow = alloc_pages(gfp_mask, 1);
origin = alloc_pages(gfp_mask, 1); origin = alloc_pages(gfp_mask, 1);
__vmap_pages_range_noflush( if (!shadow || !origin) {
err = -ENOMEM;
goto ret;
}
mapped = __vmap_pages_range_noflush(
vmalloc_shadow(start + off), vmalloc_shadow(start + off),
vmalloc_shadow(start + off + PAGE_SIZE), prot, &shadow, vmalloc_shadow(start + off + PAGE_SIZE), prot, &shadow,
PAGE_SHIFT); PAGE_SHIFT);
__vmap_pages_range_noflush( if (mapped) {
err = mapped;
goto ret;
}
shadow = NULL;
mapped = __vmap_pages_range_noflush(
vmalloc_origin(start + off), vmalloc_origin(start + off),
vmalloc_origin(start + off + PAGE_SIZE), prot, &origin, vmalloc_origin(start + off + PAGE_SIZE), prot, &origin,
PAGE_SHIFT); PAGE_SHIFT);
if (mapped) {
__vunmap_range_noflush(
vmalloc_shadow(start + off),
vmalloc_shadow(start + off + PAGE_SIZE));
err = mapped;
goto ret;
}
origin = NULL;
}
/* Page mapping loop finished normally, nothing to clean up. */
clean = 0;
ret:
if (clean > 0) {
/*
* Something went wrong. Clean up shadow/origin pages allocated
* on the last loop iteration, then delete mappings created
* during the previous iterations.
*/
if (shadow)
__free_pages(shadow, 1);
if (origin)
__free_pages(origin, 1);
__vunmap_range_noflush(
vmalloc_shadow(start),
vmalloc_shadow(start + clean * PAGE_SIZE));
__vunmap_range_noflush(
vmalloc_origin(start),
vmalloc_origin(start + clean * PAGE_SIZE));
} }
flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end)); flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end)); flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
kmsan_leave_runtime(); kmsan_leave_runtime();
return err;
} }
void kmsan_iounmap_page_range(unsigned long start, unsigned long end) void kmsan_iounmap_page_range(unsigned long start, unsigned long end)
......
...@@ -216,27 +216,29 @@ void kmsan_free_page(struct page *page, unsigned int order) ...@@ -216,27 +216,29 @@ void kmsan_free_page(struct page *page, unsigned int order)
kmsan_leave_runtime(); kmsan_leave_runtime();
} }
void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, int kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages, pgprot_t prot, struct page **pages,
unsigned int page_shift) unsigned int page_shift)
{ {
unsigned long shadow_start, origin_start, shadow_end, origin_end; unsigned long shadow_start, origin_start, shadow_end, origin_end;
struct page **s_pages, **o_pages; struct page **s_pages, **o_pages;
int nr, mapped; int nr, mapped, err = 0;
if (!kmsan_enabled) if (!kmsan_enabled)
return; return 0;
shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW); shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW);
shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW); shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW);
if (!shadow_start) if (!shadow_start)
return; return 0;
nr = (end - start) / PAGE_SIZE; nr = (end - start) / PAGE_SIZE;
s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL); s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL); o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
if (!s_pages || !o_pages) if (!s_pages || !o_pages) {
err = -ENOMEM;
goto ret; goto ret;
}
for (int i = 0; i < nr; i++) { for (int i = 0; i < nr; i++) {
s_pages[i] = shadow_page_for(pages[i]); s_pages[i] = shadow_page_for(pages[i]);
o_pages[i] = origin_page_for(pages[i]); o_pages[i] = origin_page_for(pages[i]);
...@@ -249,10 +251,16 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, ...@@ -249,10 +251,16 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
kmsan_enter_runtime(); kmsan_enter_runtime();
mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot, mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot,
s_pages, page_shift); s_pages, page_shift);
KMSAN_WARN_ON(mapped); if (mapped) {
err = mapped;
goto ret;
}
mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot, mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot,
o_pages, page_shift); o_pages, page_shift);
KMSAN_WARN_ON(mapped); if (mapped) {
err = mapped;
goto ret;
}
kmsan_leave_runtime(); kmsan_leave_runtime();
flush_tlb_kernel_range(shadow_start, shadow_end); flush_tlb_kernel_range(shadow_start, shadow_end);
flush_tlb_kernel_range(origin_start, origin_end); flush_tlb_kernel_range(origin_start, origin_end);
...@@ -262,6 +270,7 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end, ...@@ -262,6 +270,7 @@ void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
ret: ret:
kfree(s_pages); kfree(s_pages);
kfree(o_pages); kfree(o_pages);
return err;
} }
/* Allocate metadata for pages allocated at boot time. */ /* Allocate metadata for pages allocated at boot time. */
......
...@@ -790,61 +790,50 @@ static int vma_replace_policy(struct vm_area_struct *vma, ...@@ -790,61 +790,50 @@ static int vma_replace_policy(struct vm_area_struct *vma,
return err; return err;
} }
/* Step 2: apply policy to a range and do splits. */ /* Split or merge the VMA (if required) and apply the new policy */
static int mbind_range(struct mm_struct *mm, unsigned long start, static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long end, struct mempolicy *new_pol) struct vm_area_struct **prev, unsigned long start,
unsigned long end, struct mempolicy *new_pol)
{ {
VMA_ITERATOR(vmi, mm, start); struct vm_area_struct *merged;
struct vm_area_struct *prev; unsigned long vmstart, vmend;
struct vm_area_struct *vma;
int err = 0;
pgoff_t pgoff; pgoff_t pgoff;
int err;
prev = vma_prev(&vmi); vmend = min(end, vma->vm_end);
vma = vma_find(&vmi, end); if (start > vma->vm_start) {
if (WARN_ON(!vma)) *prev = vma;
vmstart = start;
} else {
vmstart = vma->vm_start;
}
if (mpol_equal(vma_policy(vma), new_pol))
return 0; return 0;
if (start > vma->vm_start) pgoff = vma->vm_pgoff + ((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma; merged = vma_merge(vmi, vma->vm_mm, *prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, new_pol,
do { vma->vm_userfaultfd_ctx, anon_vma_name(vma));
unsigned long vmstart = max(start, vma->vm_start); if (merged) {
unsigned long vmend = min(end, vma->vm_end); *prev = merged;
return vma_replace_policy(merged, new_pol);
if (mpol_equal(vma_policy(vma), new_pol)) }
goto next;
if (vma->vm_start != vmstart) {
pgoff = vma->vm_pgoff + err = split_vma(vmi, vma, vmstart, 1);
((vmstart - vma->vm_start) >> PAGE_SHIFT);
prev = vma_merge(&vmi, mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff,
new_pol, vma->vm_userfaultfd_ctx,
anon_vma_name(vma));
if (prev) {
vma = prev;
goto replace;
}
if (vma->vm_start != vmstart) {
err = split_vma(&vmi, vma, vmstart, 1);
if (err)
goto out;
}
if (vma->vm_end != vmend) {
err = split_vma(&vmi, vma, vmend, 0);
if (err)
goto out;
}
replace:
err = vma_replace_policy(vma, new_pol);
if (err) if (err)
goto out; return err;
next: }
prev = vma;
} for_each_vma_range(vmi, vma, end);
out: if (vma->vm_end != vmend) {
return err; err = split_vma(vmi, vma, vmend, 0);
if (err)
return err;
}
*prev = vma;
return vma_replace_policy(vma, new_pol);
} }
/* Set the process memory policy */ /* Set the process memory policy */
...@@ -1259,6 +1248,8 @@ static long do_mbind(unsigned long start, unsigned long len, ...@@ -1259,6 +1248,8 @@ static long do_mbind(unsigned long start, unsigned long len,
nodemask_t *nmask, unsigned long flags) nodemask_t *nmask, unsigned long flags)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vma_iterator vmi;
struct mempolicy *new; struct mempolicy *new;
unsigned long end; unsigned long end;
int err; int err;
...@@ -1328,7 +1319,13 @@ static long do_mbind(unsigned long start, unsigned long len, ...@@ -1328,7 +1319,13 @@ static long do_mbind(unsigned long start, unsigned long len,
goto up_out; goto up_out;
} }
err = mbind_range(mm, start, end, new); vma_iter_init(&vmi, mm, start);
prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) {
err = mbind_range(&vmi, vma, &prev, start, end, new);
if (err)
break;
}
if (!err) { if (!err) {
int nr_failed = 0; int nr_failed = 0;
...@@ -1489,10 +1486,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le ...@@ -1489,10 +1486,8 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
unsigned long, home_node, unsigned long, flags) unsigned long, home_node, unsigned long, flags)
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct vm_area_struct *vma; struct vm_area_struct *vma, *prev;
struct mempolicy *new, *old; struct mempolicy *new, *old;
unsigned long vmstart;
unsigned long vmend;
unsigned long end; unsigned long end;
int err = -ENOENT; int err = -ENOENT;
VMA_ITERATOR(vmi, mm, start); VMA_ITERATOR(vmi, mm, start);
...@@ -1521,6 +1516,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le ...@@ -1521,6 +1516,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
if (end == start) if (end == start)
return 0; return 0;
mmap_write_lock(mm); mmap_write_lock(mm);
prev = vma_prev(&vmi);
for_each_vma_range(vmi, vma, end) { for_each_vma_range(vmi, vma, end) {
/* /*
* If any vma in the range got policy other than MPOL_BIND * If any vma in the range got policy other than MPOL_BIND
...@@ -1541,9 +1537,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le ...@@ -1541,9 +1537,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
} }
new->home_node = home_node; new->home_node = home_node;
vmstart = max(start, vma->vm_start); err = mbind_range(&vmi, vma, &prev, start, end, new);
vmend = min(end, vma->vm_end);
err = mbind_range(mm, vmstart, vmend, new);
mpol_put(new); mpol_put(new);
if (err) if (err)
break; break;
......
...@@ -1518,7 +1518,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) ...@@ -1518,7 +1518,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
*/ */
static unsigned long unmapped_area(struct vm_unmapped_area_info *info) static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
{ {
unsigned long length, gap; unsigned long length, gap, low_limit;
struct vm_area_struct *tmp;
MA_STATE(mas, &current->mm->mm_mt, 0, 0); MA_STATE(mas, &current->mm->mm_mt, 0, 0);
...@@ -1527,12 +1528,29 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) ...@@ -1527,12 +1528,29 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
if (length < info->length) if (length < info->length)
return -ENOMEM; return -ENOMEM;
if (mas_empty_area(&mas, info->low_limit, info->high_limit - 1, low_limit = info->low_limit;
length)) retry:
if (mas_empty_area(&mas, low_limit, info->high_limit - 1, length))
return -ENOMEM; return -ENOMEM;
gap = mas.index; gap = mas.index;
gap += (info->align_offset - gap) & info->align_mask; gap += (info->align_offset - gap) & info->align_mask;
tmp = mas_next(&mas, ULONG_MAX);
if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) < gap + length - 1) {
low_limit = tmp->vm_end;
mas_reset(&mas);
goto retry;
}
} else {
tmp = mas_prev(&mas, 0);
if (tmp && vm_end_gap(tmp) > gap) {
low_limit = vm_end_gap(tmp);
mas_reset(&mas);
goto retry;
}
}
return gap; return gap;
} }
...@@ -1548,7 +1566,8 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info) ...@@ -1548,7 +1566,8 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
*/ */
static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
{ {
unsigned long length, gap; unsigned long length, gap, high_limit, gap_end;
struct vm_area_struct *tmp;
MA_STATE(mas, &current->mm->mm_mt, 0, 0); MA_STATE(mas, &current->mm->mm_mt, 0, 0);
/* Adjust search length to account for worst case alignment overhead */ /* Adjust search length to account for worst case alignment overhead */
...@@ -1556,12 +1575,31 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) ...@@ -1556,12 +1575,31 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
if (length < info->length) if (length < info->length)
return -ENOMEM; return -ENOMEM;
if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, high_limit = info->high_limit;
retry:
if (mas_empty_area_rev(&mas, info->low_limit, high_limit - 1,
length)) length))
return -ENOMEM; return -ENOMEM;
gap = mas.last + 1 - info->length; gap = mas.last + 1 - info->length;
gap -= (gap - info->align_offset) & info->align_mask; gap -= (gap - info->align_offset) & info->align_mask;
gap_end = mas.last;
tmp = mas_next(&mas, ULONG_MAX);
if (tmp && (tmp->vm_flags & VM_GROWSDOWN)) { /* Avoid prev check if possible */
if (vm_start_gap(tmp) <= gap_end) {
high_limit = vm_start_gap(tmp);
mas_reset(&mas);
goto retry;
}
} else {
tmp = mas_prev(&mas, 0);
if (tmp && vm_end_gap(tmp) > gap) {
high_limit = tmp->vm_start;
mas_reset(&mas);
goto retry;
}
}
return gap; return gap;
} }
......
...@@ -838,7 +838,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, ...@@ -838,7 +838,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
} }
tlb_finish_mmu(&tlb); tlb_finish_mmu(&tlb);
if (vma_iter_end(&vmi) < end) if (!error && vma_iter_end(&vmi) < end)
error = -ENOMEM; error = -ENOMEM;
out: out:
......
...@@ -6632,7 +6632,21 @@ static void __build_all_zonelists(void *data) ...@@ -6632,7 +6632,21 @@ static void __build_all_zonelists(void *data)
int nid; int nid;
int __maybe_unused cpu; int __maybe_unused cpu;
pg_data_t *self = data; pg_data_t *self = data;
unsigned long flags;
/*
* Explicitly disable this CPU's interrupts before taking seqlock
* to prevent any IRQ handler from calling into the page allocator
* (e.g. GFP_ATOMIC) that could hit zonelist_iter_begin and livelock.
*/
local_irq_save(flags);
/*
* Explicitly disable this CPU's synchronous printk() before taking
* seqlock to prevent any printk() from trying to hold port->lock, for
* tty_insert_flip_string_and_push_buffer() on other CPU might be
* calling kmalloc(GFP_ATOMIC | __GFP_NOWARN) with port->lock held.
*/
printk_deferred_enter();
write_seqlock(&zonelist_update_seq); write_seqlock(&zonelist_update_seq);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
...@@ -6671,6 +6685,8 @@ static void __build_all_zonelists(void *data) ...@@ -6671,6 +6685,8 @@ static void __build_all_zonelists(void *data)
} }
write_sequnlock(&zonelist_update_seq); write_sequnlock(&zonelist_update_seq);
printk_deferred_exit();
local_irq_restore(flags);
} }
static noinline void __init static noinline void __init
...@@ -9450,6 +9466,9 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, ...@@ -9450,6 +9466,9 @@ static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
if (PageReserved(page)) if (PageReserved(page))
return false; return false;
if (PageHuge(page))
return false;
} }
return true; return true;
} }
......
...@@ -222,7 +222,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn) ...@@ -222,7 +222,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
if (lruvec) if (lruvec)
unlock_page_lruvec_irqrestore(lruvec, flags); unlock_page_lruvec_irqrestore(lruvec, flags);
folios_put(fbatch->folios, folio_batch_count(fbatch)); folios_put(fbatch->folios, folio_batch_count(fbatch));
folio_batch_init(fbatch); folio_batch_reinit(fbatch);
} }
static void folio_batch_add_and_move(struct folio_batch *fbatch, static void folio_batch_add_and_move(struct folio_batch *fbatch,
......
...@@ -313,8 +313,8 @@ int ioremap_page_range(unsigned long addr, unsigned long end, ...@@ -313,8 +313,8 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
ioremap_max_page_shift); ioremap_max_page_shift);
flush_cache_vmap(addr, end); flush_cache_vmap(addr, end);
if (!err) if (!err)
kmsan_ioremap_page_range(addr, end, phys_addr, prot, err = kmsan_ioremap_page_range(addr, end, phys_addr, prot,
ioremap_max_page_shift); ioremap_max_page_shift);
return err; return err;
} }
...@@ -605,7 +605,11 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end, ...@@ -605,7 +605,11 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
int vmap_pages_range_noflush(unsigned long addr, unsigned long end, int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift) pgprot_t prot, struct page **pages, unsigned int page_shift)
{ {
kmsan_vmap_pages_range_noflush(addr, end, prot, pages, page_shift); int ret = kmsan_vmap_pages_range_noflush(addr, end, prot, pages,
page_shift);
if (ret)
return ret;
return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift); return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
} }
......
...@@ -39,7 +39,7 @@ help: ...@@ -39,7 +39,7 @@ help:
@echo ' turbostat - Intel CPU idle stats and freq reporting tool' @echo ' turbostat - Intel CPU idle stats and freq reporting tool'
@echo ' usb - USB testing tools' @echo ' usb - USB testing tools'
@echo ' virtio - vhost test module' @echo ' virtio - vhost test module'
@echo ' vm - misc vm tools' @echo ' mm - misc mm tools'
@echo ' wmi - WMI interface examples' @echo ' wmi - WMI interface examples'
@echo ' x86_energy_perf_policy - Intel energy policy tool' @echo ' x86_energy_perf_policy - Intel energy policy tool'
@echo '' @echo ''
...@@ -69,7 +69,7 @@ acpi: FORCE ...@@ -69,7 +69,7 @@ acpi: FORCE
cpupower: FORCE cpupower: FORCE
$(call descend,power/$@) $(call descend,power/$@)
cgroup counter firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE cgroup counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@) $(call descend,$@)
bpf/%: FORCE bpf/%: FORCE
...@@ -118,7 +118,7 @@ kvm_stat: FORCE ...@@ -118,7 +118,7 @@ kvm_stat: FORCE
all: acpi cgroup counter cpupower gpio hv firewire \ all: acpi cgroup counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \ perf selftests bootconfig spi turbostat usb \
virtio vm bpf x86_energy_perf_policy \ virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \ tmon freefall iio objtool kvm_stat wmi \
pci debugging tracing thermal thermometer thermal-engine pci debugging tracing thermal thermometer thermal-engine
...@@ -128,7 +128,7 @@ acpi_install: ...@@ -128,7 +128,7 @@ acpi_install:
cpupower_install: cpupower_install:
$(call descend,power/$(@:_install=),install) $(call descend,power/$(@:_install=),install)
cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install: cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install) $(call descend,$(@:_install=),install)
selftests_install: selftests_install:
...@@ -158,7 +158,7 @@ kvm_stat_install: ...@@ -158,7 +158,7 @@ kvm_stat_install:
install: acpi_install cgroup_install counter_install cpupower_install gpio_install \ install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \ hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \ perf_install selftests_install turbostat_install usb_install \
virtio_install vm_install bpf_install x86_energy_perf_policy_install \ virtio_install mm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \ tmon_install freefall_install objtool_install kvm_stat_install \
wmi_install pci_install debugging_install intel-speed-select_install \ wmi_install pci_install debugging_install intel-speed-select_install \
tracing_install thermometer_install thermal-engine_install tracing_install thermometer_install thermal-engine_install
...@@ -169,7 +169,7 @@ acpi_clean: ...@@ -169,7 +169,7 @@ acpi_clean:
cpupower_clean: cpupower_clean:
$(call descend,power/cpupower,clean) $(call descend,power/cpupower,clean)
cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean: cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean) $(call descend,$(@:_clean=),clean)
libapi_clean: libapi_clean:
...@@ -211,7 +211,7 @@ build_clean: ...@@ -211,7 +211,7 @@ build_clean:
clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \ clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \ perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \ gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean
......
...@@ -857,7 +857,7 @@ int main(int argc, char **argv) ...@@ -857,7 +857,7 @@ int main(int argc, char **argv)
if (cull & CULL_PID || filter & FILTER_PID) if (cull & CULL_PID || filter & FILTER_PID)
fprintf(fout, ", PID %d", list[i].pid); fprintf(fout, ", PID %d", list[i].pid);
if (cull & CULL_TGID || filter & FILTER_TGID) if (cull & CULL_TGID || filter & FILTER_TGID)
fprintf(fout, ", TGID %d", list[i].pid); fprintf(fout, ", TGID %d", list[i].tgid);
if (cull & CULL_COMM || filter & FILTER_COMM) if (cull & CULL_COMM || filter & FILTER_COMM)
fprintf(fout, ", task_comm_name: %s", list[i].comm); fprintf(fout, ", task_comm_name: %s", list[i].comm);
if (cull & CULL_ALLOCATOR) { if (cull & CULL_ALLOCATOR) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment