Commit 0ed950d1 authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Linus Torvalds

mm,hwpoison: make get_hwpoison_page() call get_any_page()

__get_hwpoison_page() could fail to grab refcount by some race condition,
so it's helpful if we can handle it by retrying.  We already have retry
logic, so make get_hwpoison_page() call get_any_page() when called from
memory_failure().

As a result, get_hwpoison_page() can return negative values (i.e.  error
code), so some callers are also changed to handle error cases.
soft_offline_page() does nothing for -EBUSY because that's enough and
users in userspace can easily handle it.  unpoison_memory() is also
unchanged because it's broken and need thorough fixes (will be done
later).

Link: https://lkml.kernel.org/r/20210603233632.2964832-3-nao.horiguchi@gmail.comSigned-off-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Tony Luck <tony.luck@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a3f5d80e
...@@ -5938,6 +5938,8 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb) ...@@ -5938,6 +5938,8 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
*hugetlb = true; *hugetlb = true;
if (HPageFreed(page) || HPageMigratable(page)) if (HPageFreed(page) || HPageMigratable(page))
ret = get_page_unless_zero(page); ret = get_page_unless_zero(page);
else
ret = -EBUSY;
} }
spin_unlock_irq(&hugetlb_lock); spin_unlock_irq(&hugetlb_lock);
return ret; return ret;
......
...@@ -1117,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page) ...@@ -1117,13 +1117,6 @@ static inline bool HWPoisonHandlable(struct page *page)
return PageLRU(page) || __PageMovable(page); return PageLRU(page) || __PageMovable(page);
} }
/**
* __get_hwpoison_page() - Get refcount for memory error handling:
* @page: raw error page (hit by memory error)
*
* Return: return 0 if failed to grab the refcount, otherwise true (some
* non-zero value.)
*/
static int __get_hwpoison_page(struct page *page) static int __get_hwpoison_page(struct page *page)
{ {
struct page *head = compound_head(page); struct page *head = compound_head(page);
...@@ -1168,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page) ...@@ -1168,15 +1161,6 @@ static int __get_hwpoison_page(struct page *page)
return 0; return 0;
} }
/*
* Safely get reference count of an arbitrary page.
*
* Returns 0 for a free page, 1 for an in-use page,
* -EIO for a page-type we cannot handle and -EBUSY if we raced with an
* allocation.
* We only incremented refcount in case the page was already in-use and it
* is a known type we can handle.
*/
static int get_any_page(struct page *p, unsigned long flags) static int get_any_page(struct page *p, unsigned long flags)
{ {
int ret = 0, pass = 0; int ret = 0, pass = 0;
...@@ -1186,50 +1170,77 @@ static int get_any_page(struct page *p, unsigned long flags) ...@@ -1186,50 +1170,77 @@ static int get_any_page(struct page *p, unsigned long flags)
count_increased = true; count_increased = true;
try_again: try_again:
if (!count_increased && !__get_hwpoison_page(p)) { if (!count_increased) {
if (page_count(p)) { ret = __get_hwpoison_page(p);
/* We raced with an allocation, retry. */ if (!ret) {
if (pass++ < 3) if (page_count(p)) {
goto try_again; /* We raced with an allocation, retry. */
ret = -EBUSY; if (pass++ < 3)
} else if (!PageHuge(p) && !is_free_buddy_page(p)) { goto try_again;
/* We raced with put_page, retry. */ ret = -EBUSY;
} else if (!PageHuge(p) && !is_free_buddy_page(p)) {
/* We raced with put_page, retry. */
if (pass++ < 3)
goto try_again;
ret = -EIO;
}
goto out;
} else if (ret == -EBUSY) {
/* We raced with freeing huge page to buddy, retry. */
if (pass++ < 3) if (pass++ < 3)
goto try_again; goto try_again;
ret = -EIO; goto out;
} }
}
if (PageHuge(p) || HWPoisonHandlable(p)) {
ret = 1;
} else { } else {
if (PageHuge(p) || HWPoisonHandlable(p)) { /*
ret = 1; * A page we cannot handle. Check whether we can turn
} else { * it into something we can handle.
/* */
* A page we cannot handle. Check whether we can turn if (pass++ < 3) {
* it into something we can handle.
*/
if (pass++ < 3) {
put_page(p);
shake_page(p, 1);
count_increased = false;
goto try_again;
}
put_page(p); put_page(p);
ret = -EIO; shake_page(p, 1);
count_increased = false;
goto try_again;
} }
put_page(p);
ret = -EIO;
} }
out:
return ret; return ret;
} }
static int get_hwpoison_page(struct page *p, unsigned long flags, /**
enum mf_flags ctxt) * get_hwpoison_page() - Get refcount for memory error handling
* @p: Raw error page (hit by memory error)
* @flags: Flags controlling behavior of error handling
*
* get_hwpoison_page() takes a page refcount of an error page to handle memory
* error on it, after checking that the error page is in a well-defined state
* (defined as a page-type we can successfully handle the memor error on it,
* such as LRU page and hugetlb page).
*
* Memory error handling could be triggered at any time on any type of page,
* so it's prone to race with typical memory management lifecycle (like
* allocation and free). So to avoid such races, get_hwpoison_page() takes
* extra care for the error page's state (as done in __get_hwpoison_page()),
* and has some retry logic in get_any_page().
*
* Return: 0 on failure,
* 1 on success for in-use pages in a well-defined state,
* -EIO for pages on which we can not handle memory errors,
* -EBUSY when get_hwpoison_page() has raced with page lifecycle
* operations like allocation and free.
*/
static int get_hwpoison_page(struct page *p, unsigned long flags)
{ {
int ret; int ret;
zone_pcp_disable(page_zone(p)); zone_pcp_disable(page_zone(p));
if (ctxt == MF_SOFT_OFFLINE) ret = get_any_page(p, flags);
ret = get_any_page(p, flags);
else
ret = __get_hwpoison_page(p);
zone_pcp_enable(page_zone(p)); zone_pcp_enable(page_zone(p));
return ret; return ret;
...@@ -1418,27 +1429,33 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) ...@@ -1418,27 +1429,33 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
num_poisoned_pages_inc(); num_poisoned_pages_inc();
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) { if (!(flags & MF_COUNT_INCREASED)) {
/* res = get_hwpoison_page(p, flags);
* Check "filter hit" and "race with other subpage." if (!res) {
*/ /*
lock_page(head); * Check "filter hit" and "race with other subpage."
if (PageHWPoison(head)) { */
if ((hwpoison_filter(p) && TestClearPageHWPoison(p)) lock_page(head);
|| (p != head && TestSetPageHWPoison(head))) { if (PageHWPoison(head)) {
num_poisoned_pages_dec(); if ((hwpoison_filter(p) && TestClearPageHWPoison(p))
unlock_page(head); || (p != head && TestSetPageHWPoison(head))) {
return 0; num_poisoned_pages_dec();
unlock_page(head);
return 0;
}
} }
unlock_page(head);
res = MF_FAILED;
if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
page_ref_inc(p);
res = MF_RECOVERED;
}
action_result(pfn, MF_MSG_FREE_HUGE, res);
return res == MF_RECOVERED ? 0 : -EBUSY;
} else if (res < 0) {
action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
return -EBUSY;
} }
unlock_page(head);
res = MF_FAILED;
if (!dissolve_free_huge_page(p) && take_page_off_buddy(p)) {
page_ref_inc(p);
res = MF_RECOVERED;
}
action_result(pfn, MF_MSG_FREE_HUGE, res);
return res == MF_RECOVERED ? 0 : -EBUSY;
} }
lock_page(head); lock_page(head);
...@@ -1641,28 +1658,35 @@ int memory_failure(unsigned long pfn, int flags) ...@@ -1641,28 +1658,35 @@ int memory_failure(unsigned long pfn, int flags)
* In fact it's dangerous to directly bump up page count from 0, * In fact it's dangerous to directly bump up page count from 0,
* that may make page_ref_freeze()/page_ref_unfreeze() mismatch. * that may make page_ref_freeze()/page_ref_unfreeze() mismatch.
*/ */
if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p, flags, 0)) { if (!(flags & MF_COUNT_INCREASED)) {
if (is_free_buddy_page(p)) { res = get_hwpoison_page(p, flags);
if (take_page_off_buddy(p)) { if (!res) {
page_ref_inc(p); if (is_free_buddy_page(p)) {
res = MF_RECOVERED; if (take_page_off_buddy(p)) {
} else { page_ref_inc(p);
/* We lost the race, try again */ res = MF_RECOVERED;
if (retry) { } else {
ClearPageHWPoison(p); /* We lost the race, try again */
num_poisoned_pages_dec(); if (retry) {
retry = false; ClearPageHWPoison(p);
goto try_again; num_poisoned_pages_dec();
retry = false;
goto try_again;
}
res = MF_FAILED;
} }
res = MF_FAILED; action_result(pfn, MF_MSG_BUDDY, res);
res = res == MF_RECOVERED ? 0 : -EBUSY;
} else {
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
res = -EBUSY;
} }
action_result(pfn, MF_MSG_BUDDY, res); goto unlock_mutex;
res = res == MF_RECOVERED ? 0 : -EBUSY; } else if (res < 0) {
} else { action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
res = -EBUSY; res = -EBUSY;
goto unlock_mutex;
} }
goto unlock_mutex;
} }
if (PageTransHuge(hpage)) { if (PageTransHuge(hpage)) {
...@@ -1940,7 +1964,7 @@ int unpoison_memory(unsigned long pfn) ...@@ -1940,7 +1964,7 @@ int unpoison_memory(unsigned long pfn)
return 0; return 0;
} }
if (!get_hwpoison_page(p, flags, 0)) { if (!get_hwpoison_page(p, flags)) {
if (TestClearPageHWPoison(p)) if (TestClearPageHWPoison(p))
num_poisoned_pages_dec(); num_poisoned_pages_dec();
unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
...@@ -2156,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags) ...@@ -2156,7 +2180,7 @@ int soft_offline_page(unsigned long pfn, int flags)
retry: retry:
get_online_mems(); get_online_mems();
ret = get_hwpoison_page(page, flags, MF_SOFT_OFFLINE); ret = get_hwpoison_page(page, flags);
put_online_mems(); put_online_mems();
if (ret > 0) { if (ret > 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment