Commit 15fb96a3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'mm-stable-2023-05-03-16-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull more MM updates from Andrew Morton:

 - Some DAMON cleanups from Kefeng Wang

 - Some KSM work from David Hildenbrand, to make the PR_SET_MEMORY_MERGE
   ioctl's behavior more similar to KSM's behavior.

[ Andrew called these "final", but I suspect we'll have a series fixing
  up the fact that the last commit in the dmapools series in the
  previous pull seems to have unintentionally just reverted all the
  other commits in the same series..   - Linus ]

* tag 'mm-stable-2023-05-03-16-22' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
  mm: hwpoison: coredump: support recovery from dump_user_range()
  mm/page_alloc: add some comments to explain the possible hole in __pageblock_pfn_to_page()
  mm/ksm: move disabling KSM from s390/gmap code to KSM code
  selftests/ksm: ksm_functional_tests: add prctl unmerge test
  mm/ksm: unmerge and clear VM_MERGEABLE when setting PR_SET_MEMORY_MERGE=0
  mm/damon/paddr: fix missing folio_sz update in damon_pa_young()
  mm/damon/paddr: minor refactor of damon_pa_mark_accessed_or_deactivate()
  mm/damon/paddr: minor refactor of damon_pa_pageout()
parents 671e148d 245f0922
......@@ -2585,30 +2585,12 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
int gmap_mark_unmergeable(void)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
unsigned long vm_flags;
int ret;
VMA_ITERATOR(vmi, mm, 0);
/*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
for_each_vma(vmi, vma) {
/* Copy vm_flags to avoid partial modifications in ksm_madvise */
vm_flags = vma->vm_flags;
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
MADV_UNMERGEABLE, &vm_flags);
if (ret)
return ret;
vm_flags_reset(vma, vm_flags);
}
mm->def_flags &= ~VM_MERGEABLE;
return 0;
return ksm_disable(current->mm);
}
EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
......
......@@ -882,6 +882,7 @@ static int dump_emit_page(struct coredump_params *cprm, struct page *page)
pos = file->f_pos;
bvec_set_page(&bvec, page, PAGE_SIZE, 0);
iov_iter_bvec(&iter, ITER_SOURCE, &bvec, 1, PAGE_SIZE);
iov_iter_set_copy_mc(&iter);
n = __kernel_write_iter(cprm->file, &iter, &pos);
if (n != PAGE_SIZE)
return 0;
......
......@@ -21,6 +21,8 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
void ksm_add_vma(struct vm_area_struct *vma);
int ksm_enable_merge_any(struct mm_struct *mm);
int ksm_disable_merge_any(struct mm_struct *mm);
int ksm_disable(struct mm_struct *mm);
int __ksm_enter(struct mm_struct *mm);
void __ksm_exit(struct mm_struct *mm);
......@@ -79,6 +81,11 @@ static inline void ksm_add_vma(struct vm_area_struct *vma)
{
}
static inline int ksm_disable(struct mm_struct *mm)
{
return 0;
}
static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
{
return 0;
......
......@@ -42,6 +42,7 @@ struct iov_iter_state {
struct iov_iter {
u8 iter_type;
bool copy_mc;
bool nofault;
bool data_source;
bool user_backed;
......@@ -256,8 +257,22 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
#ifdef CONFIG_ARCH_HAS_COPY_MC
size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
static inline void iov_iter_set_copy_mc(struct iov_iter *i)
{
i->copy_mc = true;
}
static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
{
return i->copy_mc;
}
#else
#define _copy_mc_to_iter _copy_to_iter
static inline void iov_iter_set_copy_mc(struct iov_iter *i) { }
static inline bool iov_iter_is_copy_mc(const struct iov_iter *i)
{
return false;
}
#endif
size_t iov_iter_zero(size_t bytes, struct iov_iter *);
......@@ -380,6 +395,7 @@ static inline void iov_iter_ubuf(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter) {
.iter_type = ITER_UBUF,
.copy_mc = false,
.user_backed = true,
.data_source = direction,
.ubuf = buf,
......
......@@ -2695,16 +2695,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (mmap_write_lock_killable(me->mm))
return -EINTR;
if (arg2) {
if (arg2)
error = ksm_enable_merge_any(me->mm);
} else {
/*
* TODO: we might want disable KSM on all VMAs and
* trigger unsharing to completely disable KSM.
*/
clear_bit(MMF_VM_MERGE_ANY, &me->mm->flags);
error = 0;
}
else
error = ksm_disable_merge_any(me->mm);
mmap_write_unlock(me->mm);
break;
case PR_GET_MEMORY_MERGE:
......
......@@ -434,6 +434,7 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter) {
.iter_type = ITER_IOVEC,
.copy_mc = false,
.nofault = false,
.user_backed = true,
.data_source = direction,
......@@ -630,6 +631,14 @@ size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
#endif /* CONFIG_ARCH_HAS_COPY_MC */
static void *memcpy_from_iter(struct iov_iter *i, void *to, const void *from,
size_t size)
{
if (iov_iter_is_copy_mc(i))
return (void *)copy_mc_to_kernel(to, from, size);
return memcpy(to, from, size);
}
size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
if (WARN_ON_ONCE(!i->data_source))
......@@ -639,7 +648,7 @@ size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
might_fault();
iterate_and_advance(i, bytes, base, len, off,
copyin(addr + off, base, len),
memcpy(addr + off, base, len)
memcpy_from_iter(i, addr + off, base, len)
)
return bytes;
......@@ -862,7 +871,7 @@ size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t byt
}
iterate_and_advance(i, bytes, base, len, off,
copyin(p + off, base, len),
memcpy(p + off, base, len)
memcpy_from_iter(i, p + off, base, len)
)
kunmap_atomic(kaddr);
return bytes;
......@@ -1043,6 +1052,7 @@ void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter){
.iter_type = ITER_KVEC,
.copy_mc = false,
.data_source = direction,
.kvec = kvec,
.nr_segs = nr_segs,
......@@ -1059,6 +1069,7 @@ void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
WARN_ON(direction & ~(READ | WRITE));
*i = (struct iov_iter){
.iter_type = ITER_BVEC,
.copy_mc = false,
.data_source = direction,
.bvec = bvec,
.nr_segs = nr_segs,
......@@ -1105,6 +1116,7 @@ void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
BUG_ON(direction & ~1);
*i = (struct iov_iter) {
.iter_type = ITER_XARRAY,
.copy_mc = false,
.data_source = direction,
.xarray = xarray,
.xarray_start = start,
......@@ -1128,6 +1140,7 @@ void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
BUG_ON(direction != READ);
*i = (struct iov_iter){
.iter_type = ITER_DISCARD,
.copy_mc = false,
.data_source = false,
.count = count,
.iov_offset = 0
......
......@@ -134,10 +134,8 @@ static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
}
need_lock = !folio_test_anon(folio) || folio_test_ksm(folio);
if (need_lock && !folio_trylock(folio)) {
folio_put(folio);
return false;
}
if (need_lock && !folio_trylock(folio))
goto out;
rmap_walk(folio, &rwc);
......@@ -238,21 +236,18 @@ static unsigned long damon_pa_pageout(struct damon_region *r, struct damos *s)
if (!folio)
continue;
if (damos_pa_filter_out(s, folio)) {
folio_put(folio);
continue;
}
if (damos_pa_filter_out(s, folio))
goto put_folio;
folio_clear_referenced(folio);
folio_test_clear_young(folio);
if (!folio_isolate_lru(folio)) {
folio_put(folio);
continue;
}
if (!folio_isolate_lru(folio))
goto put_folio;
if (folio_test_unevictable(folio))
folio_putback_lru(folio);
else
list_add(&folio->lru, &folio_list);
put_folio:
folio_put(folio);
}
applied = reclaim_pages(&folio_list);
......@@ -271,16 +266,15 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
if (!folio)
continue;
if (damos_pa_filter_out(s, folio)) {
folio_put(folio);
continue;
}
if (damos_pa_filter_out(s, folio))
goto put_folio;
if (mark_accessed)
folio_mark_accessed(folio);
else
folio_deactivate(folio);
applied += folio_nr_pages(folio);
put_folio:
folio_put(folio);
}
return applied * PAGE_SIZE;
......
......@@ -2520,6 +2520,22 @@ static void __ksm_add_vma(struct vm_area_struct *vma)
vm_flags_set(vma, VM_MERGEABLE);
}
static int __ksm_del_vma(struct vm_area_struct *vma)
{
int err;
if (!(vma->vm_flags & VM_MERGEABLE))
return 0;
if (vma->anon_vma) {
err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
if (err)
return err;
}
vm_flags_clear(vma, VM_MERGEABLE);
return 0;
}
/**
* ksm_add_vma - Mark vma as mergeable if compatible
*
......@@ -2542,6 +2558,20 @@ static void ksm_add_vmas(struct mm_struct *mm)
__ksm_add_vma(vma);
}
static int ksm_del_vmas(struct mm_struct *mm)
{
struct vm_area_struct *vma;
int err;
VMA_ITERATOR(vmi, mm, 0);
for_each_vma(vmi, vma) {
err = __ksm_del_vma(vma);
if (err)
return err;
}
return 0;
}
/**
* ksm_enable_merge_any - Add mm to mm ksm list and enable merging on all
* compatible VMA's
......@@ -2569,6 +2599,46 @@ int ksm_enable_merge_any(struct mm_struct *mm)
return 0;
}
/**
* ksm_disable_merge_any - Disable merging on all compatible VMA's of the mm,
* previously enabled via ksm_enable_merge_any().
*
* Disabling merging implies unmerging any merged pages, like setting
* MADV_UNMERGEABLE would. If unmerging fails, the whole operation fails and
* merging on all compatible VMA's remains enabled.
*
* @mm: Pointer to mm
*
* Returns 0 on success, otherwise error code
*/
int ksm_disable_merge_any(struct mm_struct *mm)
{
int err;
if (!test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return 0;
err = ksm_del_vmas(mm);
if (err) {
ksm_add_vmas(mm);
return err;
}
clear_bit(MMF_VM_MERGE_ANY, &mm->flags);
return 0;
}
int ksm_disable(struct mm_struct *mm)
{
mmap_assert_write_locked(mm);
if (!test_bit(MMF_VM_MERGEABLE, &mm->flags))
return 0;
if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
return ksm_disable_merge_any(mm);
return ksm_del_vmas(mm);
}
int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
unsigned long end, int advice, unsigned long *vm_flags)
{
......
......@@ -1502,6 +1502,15 @@ void __free_pages_core(struct page *page, unsigned int order)
* interleaving within a single pageblock. It is therefore sufficient to check
* the first and last page of a pageblock and avoid checking each individual
* page in a pageblock.
*
* Note: the function may return non-NULL struct page even for a page block
* which contains a memory hole (i.e. there is no physical memory for a subset
* of the pfn range). For example, if the pageblock order is MAX_ORDER, which
* will fall into 2 sub-sections, and the end pfn of the pageblock may be hole
* even though the start pfn is online and valid. This should be safe most of
* the time because struct pages are still initialized via init_unavailable_range()
* and pfn walkers shouldn't touch any physical memory range for which they do
* not recognize any specific metadata in struct pages.
*/
struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
unsigned long end_pfn, struct zone *zone)
......
......@@ -91,9 +91,10 @@ static int ksm_merge(void)
return 0;
}
static char *mmap_and_merge_range(char val, unsigned long size)
static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl)
{
char *map;
int ret;
map = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
......@@ -110,7 +111,17 @@ static char *mmap_and_merge_range(char val, unsigned long size)
/* Make sure each page contains the same values to merge them. */
memset(map, val, size);
if (madvise(map, size, MADV_MERGEABLE)) {
if (use_prctl) {
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
goto unmap;
} else if (ret) {
ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
goto unmap;
}
} else if (madvise(map, size, MADV_MERGEABLE)) {
ksft_test_result_fail("MADV_MERGEABLE failed\n");
goto unmap;
}
......@@ -133,7 +144,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
map = mmap_and_merge_range(0xcf, size);
map = mmap_and_merge_range(0xcf, size, false);
if (map == MAP_FAILED)
return;
......@@ -155,7 +166,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
map = mmap_and_merge_range(0xcf, size);
map = mmap_and_merge_range(0xcf, size, false);
if (map == MAP_FAILED)
return;
......@@ -187,7 +198,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
map = mmap_and_merge_range(0xcf, size);
map = mmap_and_merge_range(0xcf, size, false);
if (map == MAP_FAILED)
return;
......@@ -323,9 +334,31 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
static void test_prctl_unmerge(void)
{
const unsigned int size = 2 * MiB;
char *map;
ksft_print_msg("[RUN] %s\n", __func__);
map = mmap_and_merge_range(0xcf, size, true);
if (map == MAP_FAILED)
return;
if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
goto unmap;
}
ksft_test_result(!range_maps_duplicates(map, size),
"Pages were unmerged\n");
unmap:
munmap(map, size);
}
int main(int argc, char **argv)
{
unsigned int tests = 4;
unsigned int tests = 5;
int err;
#ifdef __NR_userfaultfd
......@@ -355,6 +388,7 @@ int main(int argc, char **argv)
test_prctl();
test_prctl_fork();
test_prctl_unmerge();
err = ksft_get_fail_cnt();
if (err)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment