Commit 3ad3d901 authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Linus Torvalds

mm: mmu_notifier: fix freed page still mapped in secondary MMU

mmu_notifier_release() is called when the process is exiting.  It will
delete all the mmu notifiers.  But at this time the page belonging to the
process is still present in page tables and is present on the LRU list, so
this race will happen:

      CPU 0                 CPU 1
mmu_notifier_release:    try_to_unmap:
   hlist_del_init_rcu(&mn->hlist);
                            ptep_clear_flush_notify:
                                  mmu nofifler not found
                            free page  !!!!!!
                            /*
                             * At the point, the page has been
                             * freed, but it is still mapped in
                             * the secondary MMU.
                             */

  mn->ops->release(mn, mm);

Then the box is not stable and sometimes we can get this bug:

[  738.075923] BUG: Bad page state in process migrate-perf  pfn:03bec
[  738.075931] page:ffffea00000efb00 count:0 mapcount:0 mapping:          (null) index:0x8076
[  738.075936] page flags: 0x20000000000014(referenced|dirty)

The same issue is present in mmu_notifier_unregister().

We can call ->release before deleting the notifier to ensure the page has
been unmapped from the secondary MMU before it is freed.
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent bdf4f4d2
...@@ -33,25 +33,14 @@ ...@@ -33,25 +33,14 @@
void __mmu_notifier_release(struct mm_struct *mm) void __mmu_notifier_release(struct mm_struct *mm)
{ {
struct mmu_notifier *mn; struct mmu_notifier *mn;
struct hlist_node *n;
spin_lock(&mm->mmu_notifier_mm->lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
mn = hlist_entry(mm->mmu_notifier_mm->list.first,
struct mmu_notifier,
hlist);
/*
* We arrived before mmu_notifier_unregister so
* mmu_notifier_unregister will do nothing other than
* to wait ->release to finish and
* mmu_notifier_unregister to return.
*/
hlist_del_init_rcu(&mn->hlist);
/* /*
* RCU here will block mmu_notifier_unregister until * RCU here will block mmu_notifier_unregister until
* ->release returns. * ->release returns.
*/ */
rcu_read_lock(); rcu_read_lock();
spin_unlock(&mm->mmu_notifier_mm->lock); hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist)
/* /*
* if ->release runs before mmu_notifier_unregister it * if ->release runs before mmu_notifier_unregister it
* must be handled as it's the only way for the driver * must be handled as it's the only way for the driver
...@@ -62,7 +51,19 @@ void __mmu_notifier_release(struct mm_struct *mm) ...@@ -62,7 +51,19 @@ void __mmu_notifier_release(struct mm_struct *mm)
if (mn->ops->release) if (mn->ops->release)
mn->ops->release(mn, mm); mn->ops->release(mn, mm);
rcu_read_unlock(); rcu_read_unlock();
spin_lock(&mm->mmu_notifier_mm->lock); spin_lock(&mm->mmu_notifier_mm->lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_mm->list))) {
mn = hlist_entry(mm->mmu_notifier_mm->list.first,
struct mmu_notifier,
hlist);
/*
* We arrived before mmu_notifier_unregister so
* mmu_notifier_unregister will do nothing other than
* to wait ->release to finish and
* mmu_notifier_unregister to return.
*/
hlist_del_init_rcu(&mn->hlist);
} }
spin_unlock(&mm->mmu_notifier_mm->lock); spin_unlock(&mm->mmu_notifier_mm->lock);
...@@ -284,16 +285,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) ...@@ -284,16 +285,13 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
{ {
BUG_ON(atomic_read(&mm->mm_count) <= 0); BUG_ON(atomic_read(&mm->mm_count) <= 0);
spin_lock(&mm->mmu_notifier_mm->lock);
if (!hlist_unhashed(&mn->hlist)) { if (!hlist_unhashed(&mn->hlist)) {
hlist_del_rcu(&mn->hlist);
/* /*
* RCU here will force exit_mmap to wait ->release to finish * RCU here will force exit_mmap to wait ->release to finish
* before freeing the pages. * before freeing the pages.
*/ */
rcu_read_lock(); rcu_read_lock();
spin_unlock(&mm->mmu_notifier_mm->lock);
/* /*
* exit_mmap will block in mmu_notifier_release to * exit_mmap will block in mmu_notifier_release to
* guarantee ->release is called before freeing the * guarantee ->release is called before freeing the
...@@ -302,8 +300,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm) ...@@ -302,8 +300,11 @@ void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
if (mn->ops->release) if (mn->ops->release)
mn->ops->release(mn, mm); mn->ops->release(mn, mm);
rcu_read_unlock(); rcu_read_unlock();
} else
spin_lock(&mm->mmu_notifier_mm->lock);
hlist_del_rcu(&mn->hlist);
spin_unlock(&mm->mmu_notifier_mm->lock); spin_unlock(&mm->mmu_notifier_mm->lock);
}
/* /*
* Wait any running method to finish, of course including * Wait any running method to finish, of course including
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment