Commit 2d2b8d2b authored by Yu Zhao's avatar Yu Zhao Committed by Linus Torvalds

mm/vmscan.c: fix potential deadlock in reclaim_pages()

Theoretically without the protect from memalloc_noreclaim_save() and
memalloc_noreclaim_restore(), reclaim_pages() can go into the block
I/O layer recursively and deadlock.

Querying 'reclaim_pages' in our kernel crash databases didn't yield
any results. So the deadlock seems unlikely to happen. A possible
explanation is that the only user of reclaim_pages(), i.e.,
MADV_PAGEOUT, is usually called before memory pressure builds up,
e.g., on Android and Chrome OS. Under such a condition, allocations in
the block I/O layer can be fulfilled without diverting to direct
reclaim and therefore the recursion is avoided.

Link: https://lkml.kernel.org/r/20210622074642.785473-1-yuzhao@google.com
Link: https://lkml.kernel.org/r/20210614194727.2684053-1-yuzhao@google.comSigned-off-by: default avatarYu Zhao <yuzhao@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 4a8f021b
...@@ -1701,6 +1701,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, ...@@ -1701,6 +1701,7 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
unsigned int nr_reclaimed; unsigned int nr_reclaimed;
struct page *page, *next; struct page *page, *next;
LIST_HEAD(clean_pages); LIST_HEAD(clean_pages);
unsigned int noreclaim_flag;
list_for_each_entry_safe(page, next, page_list, lru) { list_for_each_entry_safe(page, next, page_list, lru) {
if (!PageHuge(page) && page_is_file_lru(page) && if (!PageHuge(page) && page_is_file_lru(page) &&
...@@ -1711,8 +1712,17 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, ...@@ -1711,8 +1712,17 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
} }
} }
/*
* We should be safe here since we are only dealing with file pages and
* we are not kswapd and therefore cannot write dirty file pages. But
* call memalloc_noreclaim_save() anyway, just in case these conditions
* change in the future.
*/
noreclaim_flag = memalloc_noreclaim_save();
nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc, nr_reclaimed = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
&stat, true); &stat, true);
memalloc_noreclaim_restore(noreclaim_flag);
list_splice(&clean_pages, page_list); list_splice(&clean_pages, page_list);
mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE,
-(long)nr_reclaimed); -(long)nr_reclaimed);
...@@ -2306,6 +2316,7 @@ unsigned long reclaim_pages(struct list_head *page_list) ...@@ -2306,6 +2316,7 @@ unsigned long reclaim_pages(struct list_head *page_list)
LIST_HEAD(node_page_list); LIST_HEAD(node_page_list);
struct reclaim_stat dummy_stat; struct reclaim_stat dummy_stat;
struct page *page; struct page *page;
unsigned int noreclaim_flag;
struct scan_control sc = { struct scan_control sc = {
.gfp_mask = GFP_KERNEL, .gfp_mask = GFP_KERNEL,
.priority = DEF_PRIORITY, .priority = DEF_PRIORITY,
...@@ -2314,6 +2325,8 @@ unsigned long reclaim_pages(struct list_head *page_list) ...@@ -2314,6 +2325,8 @@ unsigned long reclaim_pages(struct list_head *page_list)
.may_swap = 1, .may_swap = 1,
}; };
noreclaim_flag = memalloc_noreclaim_save();
while (!list_empty(page_list)) { while (!list_empty(page_list)) {
page = lru_to_page(page_list); page = lru_to_page(page_list);
if (nid == NUMA_NO_NODE) { if (nid == NUMA_NO_NODE) {
...@@ -2350,6 +2363,8 @@ unsigned long reclaim_pages(struct list_head *page_list) ...@@ -2350,6 +2363,8 @@ unsigned long reclaim_pages(struct list_head *page_list)
} }
} }
memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed; return nr_reclaimed;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment