Commit eaf649eb authored by Minchan Kim's avatar Minchan Kim Committed by Linus Torvalds

mm: swap: clean up swap readahead

When I see recent change of swap readahead, I am very unhappy about
current code structure which diverges two swap readahead algorithm in
do_swap_page.  This patch is to clean it up.

Main motivation is that fault handler doesn't need to be aware of
readahead algorithms but just should call swapin_readahead.

As first step, this patch cleans up a little bit but not perfect (I just
separate for review easier) so next patch will make the goal complete.

[minchan@kernel.org: do not check readahead flag with THP anon]
  Link: http://lkml.kernel.org/r/874lm83zho.fsf@yhuang-dev.intel.com
  Link: http://lkml.kernel.org/r/20180227232611.169883-1-minchan@kernel.org
Link: http://lkml.kernel.org/r/1509520520-32367-2-git-send-email-minchan@kernel.org
Link: http://lkml.kernel.org/r/20180220085249.151400-2-minchan@kernel.orgSigned-off-by: default avatarMinchan Kim <minchan@kernel.org>
Reviewed-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e830c63a
...@@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t, ...@@ -424,12 +424,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
bool *new_page_allocated); bool *new_page_allocated);
extern struct page *swapin_readahead(swp_entry_t, gfp_t, extern struct page *swapin_readahead(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr); struct vm_area_struct *vma, unsigned long addr);
extern struct page *swap_readahead_detect(struct vm_fault *vmf,
struct vma_swap_readahead *swap_ra);
extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
struct vm_fault *vmf, struct vm_fault *vmf);
struct vma_swap_readahead *swap_ra);
/* linux/mm/swapfile.c */ /* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages; extern atomic_long_t nr_swap_pages;
...@@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void) ...@@ -548,15 +544,8 @@ static inline bool swap_use_vma_readahead(void)
return false; return false;
} }
static inline struct page *swap_readahead_detect( static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
struct vm_fault *vmf, struct vma_swap_readahead *swap_ra) gfp_t gfp_mask, struct vm_fault *vmf)
{
return NULL;
}
static inline struct page *do_swap_page_readahead(
swp_entry_t fentry, gfp_t gfp_mask,
struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
{ {
return NULL; return NULL;
} }
......
...@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range); ...@@ -2883,26 +2883,16 @@ EXPORT_SYMBOL(unmap_mapping_range);
int do_swap_page(struct vm_fault *vmf) int do_swap_page(struct vm_fault *vmf)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL, *swapcache = NULL; struct page *page = NULL, *swapcache;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
struct vma_swap_readahead swap_ra;
swp_entry_t entry; swp_entry_t entry;
pte_t pte; pte_t pte;
int locked; int locked;
int exclusive = 0; int exclusive = 0;
int ret = 0; int ret = 0;
bool vma_readahead = swap_use_vma_readahead();
if (vma_readahead) { if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
page = swap_readahead_detect(vmf, &swap_ra);
swapcache = page;
}
if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
if (page)
put_page(page);
goto out; goto out;
}
entry = pte_to_swp_entry(vmf->orig_pte); entry = pte_to_swp_entry(vmf->orig_pte);
if (unlikely(non_swap_entry(entry))) { if (unlikely(non_swap_entry(entry))) {
...@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf) ...@@ -2928,11 +2918,8 @@ int do_swap_page(struct vm_fault *vmf)
delayacct_set_flag(DELAYACCT_PF_SWAPIN); delayacct_set_flag(DELAYACCT_PF_SWAPIN);
if (!page) { page = lookup_swap_cache(entry, vma, vmf->address);
page = lookup_swap_cache(entry, vma_readahead ? vma : NULL, swapcache = page;
vmf->address);
swapcache = page;
}
if (!page) { if (!page) {
struct swap_info_struct *si = swp_swap_info(entry); struct swap_info_struct *si = swp_swap_info(entry);
...@@ -2949,9 +2936,9 @@ int do_swap_page(struct vm_fault *vmf) ...@@ -2949,9 +2936,9 @@ int do_swap_page(struct vm_fault *vmf)
swap_readpage(page, true); swap_readpage(page, true);
} }
} else { } else {
if (vma_readahead) if (swap_use_vma_readahead())
page = do_swap_page_readahead(entry, page = do_swap_page_readahead(entry,
GFP_HIGHUSER_MOVABLE, vmf, &swap_ra); GFP_HIGHUSER_MOVABLE, vmf);
else else
page = swapin_readahead(entry, page = swapin_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, vmf->address); GFP_HIGHUSER_MOVABLE, vma, vmf->address);
...@@ -2982,7 +2969,6 @@ int do_swap_page(struct vm_fault *vmf) ...@@ -2982,7 +2969,6 @@ int do_swap_page(struct vm_fault *vmf)
*/ */
ret = VM_FAULT_HWPOISON; ret = VM_FAULT_HWPOISON;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN); delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
swapcache = page;
goto out_release; goto out_release;
} }
......
...@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, ...@@ -332,32 +332,43 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
unsigned long addr) unsigned long addr)
{ {
struct page *page; struct page *page;
unsigned long ra_info;
int win, hits, readahead;
page = find_get_page(swap_address_space(entry), swp_offset(entry)); page = find_get_page(swap_address_space(entry), swp_offset(entry));
INC_CACHE_INFO(find_total); INC_CACHE_INFO(find_total);
if (page) { if (page) {
bool vma_ra = swap_use_vma_readahead();
bool readahead;
INC_CACHE_INFO(find_success); INC_CACHE_INFO(find_success);
/*
* At the moment, we don't support PG_readahead for anon THP
* so let's bail out rather than confusing the readahead stat.
*/
if (unlikely(PageTransCompound(page))) if (unlikely(PageTransCompound(page)))
return page; return page;
readahead = TestClearPageReadahead(page); readahead = TestClearPageReadahead(page);
if (vma) { if (vma && vma_ra) {
ra_info = GET_SWAP_RA_VAL(vma); unsigned long ra_val;
win = SWAP_RA_WIN(ra_info); int win, hits;
hits = SWAP_RA_HITS(ra_info);
ra_val = GET_SWAP_RA_VAL(vma);
win = SWAP_RA_WIN(ra_val);
hits = SWAP_RA_HITS(ra_val);
if (readahead) if (readahead)
hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX); hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
atomic_long_set(&vma->swap_readahead_info, atomic_long_set(&vma->swap_readahead_info,
SWAP_RA_VAL(addr, win, hits)); SWAP_RA_VAL(addr, win, hits));
} }
if (readahead) { if (readahead) {
count_vm_event(SWAP_RA_HIT); count_vm_event(SWAP_RA_HIT);
if (!vma) if (!vma || !vma_ra)
atomic_inc(&swapin_readahead_hits); atomic_inc(&swapin_readahead_hits);
} }
} }
return page; return page;
} }
...@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask, ...@@ -586,8 +597,7 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
continue; continue;
if (page_allocated) { if (page_allocated) {
swap_readpage(page, false); swap_readpage(page, false);
if (offset != entry_offset && if (offset != entry_offset) {
likely(!PageTransCompound(page))) {
SetPageReadahead(page); SetPageReadahead(page);
count_vm_event(SWAP_RA); count_vm_event(SWAP_RA);
} }
...@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma, ...@@ -649,16 +659,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE)); PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
} }
struct page *swap_readahead_detect(struct vm_fault *vmf, static void swap_ra_info(struct vm_fault *vmf,
struct vma_swap_readahead *swap_ra) struct vma_swap_readahead *ra_info)
{ {
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
unsigned long swap_ra_info; unsigned long ra_val;
struct page *page;
swp_entry_t entry; swp_entry_t entry;
unsigned long faddr, pfn, fpfn; unsigned long faddr, pfn, fpfn;
unsigned long start, end; unsigned long start, end;
pte_t *pte; pte_t *pte, *orig_pte;
unsigned int max_win, hits, prev_win, win, left; unsigned int max_win, hits, prev_win, win, left;
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
pte_t *tpte; pte_t *tpte;
...@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, ...@@ -667,30 +676,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster), max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
SWAP_RA_ORDER_CEILING); SWAP_RA_ORDER_CEILING);
if (max_win == 1) { if (max_win == 1) {
swap_ra->win = 1; ra_info->win = 1;
return NULL; return;
} }
faddr = vmf->address; faddr = vmf->address;
entry = pte_to_swp_entry(vmf->orig_pte); orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
if ((unlikely(non_swap_entry(entry)))) entry = pte_to_swp_entry(*pte);
return NULL; if ((unlikely(non_swap_entry(entry)))) {
page = lookup_swap_cache(entry, vma, faddr); pte_unmap(orig_pte);
if (page) return;
return page; }
fpfn = PFN_DOWN(faddr); fpfn = PFN_DOWN(faddr);
swap_ra_info = GET_SWAP_RA_VAL(vma); ra_val = GET_SWAP_RA_VAL(vma);
pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info)); pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
prev_win = SWAP_RA_WIN(swap_ra_info); prev_win = SWAP_RA_WIN(ra_val);
hits = SWAP_RA_HITS(swap_ra_info); hits = SWAP_RA_HITS(ra_val);
swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits, ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
max_win, prev_win); max_win, prev_win);
atomic_long_set(&vma->swap_readahead_info, atomic_long_set(&vma->swap_readahead_info,
SWAP_RA_VAL(faddr, win, 0)); SWAP_RA_VAL(faddr, win, 0));
if (win == 1) if (win == 1) {
return NULL; pte_unmap(orig_pte);
return;
}
/* Copy the PTEs because the page table may be unmapped */ /* Copy the PTEs because the page table may be unmapped */
if (fpfn == pfn + 1) if (fpfn == pfn + 1)
...@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf, ...@@ -703,23 +714,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left, swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
&start, &end); &start, &end);
} }
swap_ra->nr_pte = end - start; ra_info->nr_pte = end - start;
swap_ra->offset = fpfn - start; ra_info->offset = fpfn - start;
pte = vmf->pte - swap_ra->offset; pte -= ra_info->offset;
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
swap_ra->ptes = pte; ra_info->ptes = pte;
#else #else
tpte = swap_ra->ptes; tpte = ra_info->ptes;
for (pfn = start; pfn != end; pfn++) for (pfn = start; pfn != end; pfn++)
*tpte++ = *pte++; *tpte++ = *pte++;
#endif #endif
pte_unmap(orig_pte);
return NULL;
} }
struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
struct vm_fault *vmf, struct vm_fault *vmf)
struct vma_swap_readahead *swap_ra)
{ {
struct blk_plug plug; struct blk_plug plug;
struct vm_area_struct *vma = vmf->vma; struct vm_area_struct *vma = vmf->vma;
...@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, ...@@ -728,12 +737,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
swp_entry_t entry; swp_entry_t entry;
unsigned int i; unsigned int i;
bool page_allocated; bool page_allocated;
struct vma_swap_readahead ra_info = {0,};
if (swap_ra->win == 1) swap_ra_info(vmf, &ra_info);
if (ra_info.win == 1)
goto skip; goto skip;
blk_start_plug(&plug); blk_start_plug(&plug);
for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte; for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
i++, pte++) { i++, pte++) {
pentry = *pte; pentry = *pte;
if (pte_none(pentry)) if (pte_none(pentry))
...@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, ...@@ -749,8 +760,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
continue; continue;
if (page_allocated) { if (page_allocated) {
swap_readpage(page, false); swap_readpage(page, false);
if (i != swap_ra->offset && if (i != ra_info.offset) {
likely(!PageTransCompound(page))) {
SetPageReadahead(page); SetPageReadahead(page);
count_vm_event(SWAP_RA); count_vm_event(SWAP_RA);
} }
...@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask, ...@@ -761,7 +771,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
lru_add_drain(); lru_add_drain();
skip: skip:
return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address, return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
swap_ra->win == 1); ra_info.win == 1);
} }
#ifdef CONFIG_SYSFS #ifdef CONFIG_SYSFS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment