Commit 62c9827c authored by Gang Li's avatar Gang Li Committed by Linus Torvalds

shmem: fix a race between shmem_unused_huge_shrink and shmem_evict_inode

Fix a data race in commit 779750d2 ("shmem: split huge pages beyond
i_size under memory pressure").

Here are call traces causing race:

   Call Trace 1:
     shmem_unused_huge_shrink+0x3ae/0x410
     ? __list_lru_walk_one.isra.5+0x33/0x160
     super_cache_scan+0x17c/0x190
     shrink_slab.part.55+0x1ef/0x3f0
     shrink_node+0x10e/0x330
     kswapd+0x380/0x740
     kthread+0xfc/0x130
     ? mem_cgroup_shrink_node+0x170/0x170
     ? kthread_create_on_node+0x70/0x70
     ret_from_fork+0x1f/0x30

   Call Trace 2:
     shmem_evict_inode+0xd8/0x190
     evict+0xbe/0x1c0
     do_unlinkat+0x137/0x330
     do_syscall_64+0x76/0x120
     entry_SYSCALL_64_after_hwframe+0x3d/0xa2

A simple explanation:

Image there are 3 items in the local list (@list).  In the first
traversal, A is not deleted from @list.

  1)    A->B->C
        ^
        |
        pos (leave)

In the second traversal, B is deleted from @list.  Concurrently, A is
deleted from @list through shmem_evict_inode() since last reference
counter of inode is dropped by other thread.  Then the @list is corrupted.

  2)    A->B->C
        ^  ^
        |  |
     evict pos (drop)

We should make sure the inode is either on the global list or deleted from
any local list before iput().

Fixed by moving inodes back to global list before we put them.

[akpm@linux-foundation.org: coding style fixes]

Link: https://lkml.kernel.org/r/20211125064502.99983-1-ligang.bdlg@bytedance.com
Fixes: 779750d2 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: default avatarGang Li <ligang.bdlg@bytedance.com>
Reviewed-by: default avatarMuchun Song <songmuchun@bytedance.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a7605426
...@@ -554,7 +554,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, ...@@ -554,7 +554,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
struct shmem_inode_info *info; struct shmem_inode_info *info;
struct page *page; struct page *page;
unsigned long batch = sc ? sc->nr_to_scan : 128; unsigned long batch = sc ? sc->nr_to_scan : 128;
int removed = 0, split = 0; int split = 0;
if (list_empty(&sbinfo->shrinklist)) if (list_empty(&sbinfo->shrinklist))
return SHRINK_STOP; return SHRINK_STOP;
...@@ -569,7 +569,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, ...@@ -569,7 +569,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
/* inode is about to be evicted */ /* inode is about to be evicted */
if (!inode) { if (!inode) {
list_del_init(&info->shrinklist); list_del_init(&info->shrinklist);
removed++;
goto next; goto next;
} }
...@@ -577,12 +576,12 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, ...@@ -577,12 +576,12 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
if (round_up(inode->i_size, PAGE_SIZE) == if (round_up(inode->i_size, PAGE_SIZE) ==
round_up(inode->i_size, HPAGE_PMD_SIZE)) { round_up(inode->i_size, HPAGE_PMD_SIZE)) {
list_move(&info->shrinklist, &to_remove); list_move(&info->shrinklist, &to_remove);
removed++;
goto next; goto next;
} }
list_move(&info->shrinklist, &list); list_move(&info->shrinklist, &list);
next: next:
sbinfo->shrinklist_len--;
if (!--batch) if (!--batch)
break; break;
} }
...@@ -602,7 +601,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, ...@@ -602,7 +601,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
inode = &info->vfs_inode; inode = &info->vfs_inode;
if (nr_to_split && split >= nr_to_split) if (nr_to_split && split >= nr_to_split)
goto leave; goto move_back;
page = find_get_page(inode->i_mapping, page = find_get_page(inode->i_mapping,
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
...@@ -616,38 +615,44 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, ...@@ -616,38 +615,44 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
} }
/* /*
* Leave the inode on the list if we failed to lock * Move the inode on the list back to shrinklist if we failed
* the page at this time. * to lock the page at this time.
* *
* Waiting for the lock may lead to deadlock in the * Waiting for the lock may lead to deadlock in the
* reclaim path. * reclaim path.
*/ */
if (!trylock_page(page)) { if (!trylock_page(page)) {
put_page(page); put_page(page);
goto leave; goto move_back;
} }
ret = split_huge_page(page); ret = split_huge_page(page);
unlock_page(page); unlock_page(page);
put_page(page); put_page(page);
/* If split failed leave the inode on the list */ /* If split failed move the inode on the list back to shrinklist */
if (ret) if (ret)
goto leave; goto move_back;
split++; split++;
drop: drop:
list_del_init(&info->shrinklist); list_del_init(&info->shrinklist);
removed++; goto put;
leave: move_back:
/*
* Make sure the inode is either on the global list or deleted
* from any local list before iput() since it could be deleted
* in another thread once we put the inode (then the local list
* is corrupted).
*/
spin_lock(&sbinfo->shrinklist_lock);
list_move(&info->shrinklist, &sbinfo->shrinklist);
sbinfo->shrinklist_len++;
spin_unlock(&sbinfo->shrinklist_lock);
put:
iput(inode); iput(inode);
} }
spin_lock(&sbinfo->shrinklist_lock);
list_splice_tail(&list, &sbinfo->shrinklist);
sbinfo->shrinklist_len -= removed;
spin_unlock(&sbinfo->shrinklist_lock);
return split; return split;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment