Commit 4ceb5db9 authored by Linus Torvalds's avatar Linus Torvalds

Fix get_user_pages() race for write access

There's no real guarantee that handle_mm_fault() will always be able to
break a COW situation - if an update from another thread ends up
modifying the page table some way, handle_mm_fault() may end up
requiring us to re-try the operation.

That's normally fine, but get_user_pages() ended up re-trying it as a
read, and thus a write access could in theory end up losing the dirty
bit or be done on a page that had not been properly COW'ed.

This makes get_user_pages() always retry write accesses as write
accesses by making "follow_page()" require that a writable follow has
the dirty bit set.  That simplifies the code and solves the race: if the
COW break fails for some reason, we'll just loop around and try again.
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 8d894c47
...@@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address, ...@@ -811,18 +811,15 @@ static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
pte = *ptep; pte = *ptep;
pte_unmap(ptep); pte_unmap(ptep);
if (pte_present(pte)) { if (pte_present(pte)) {
if (write && !pte_write(pte)) if (write && !pte_dirty(pte))
goto out; goto out;
if (read && !pte_read(pte)) if (read && !pte_read(pte))
goto out; goto out;
pfn = pte_pfn(pte); pfn = pte_pfn(pte);
if (pfn_valid(pfn)) { if (pfn_valid(pfn)) {
page = pfn_to_page(pfn); page = pfn_to_page(pfn);
if (accessed) { if (accessed)
if (write && !pte_dirty(pte) &&!PageDirty(page))
set_page_dirty(page);
mark_page_accessed(page); mark_page_accessed(page);
}
return page; return page;
} }
} }
...@@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -941,10 +938,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
do { do {
struct page *page; struct page *page;
int lookup_write = write;
cond_resched_lock(&mm->page_table_lock); cond_resched_lock(&mm->page_table_lock);
while (!(page = follow_page(mm, start, lookup_write))) { while (!(page = follow_page(mm, start, write))) {
/* /*
* Shortcut for anonymous pages. We don't want * Shortcut for anonymous pages. We don't want
* to force the creation of pages tables for * to force the creation of pages tables for
...@@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -952,8 +948,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
* nobody touched so far. This is important * nobody touched so far. This is important
* for doing a core dump for these mappings. * for doing a core dump for these mappings.
*/ */
if (!lookup_write && if (!write && untouched_anonymous_page(mm,vma,start)) {
untouched_anonymous_page(mm,vma,start)) {
page = ZERO_PAGE(start); page = ZERO_PAGE(start);
break; break;
} }
...@@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, ...@@ -972,14 +967,6 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
default: default:
BUG(); BUG();
} }
/*
* Now that we have performed a write fault
* and surely no longer have a shared page we
* shouldn't write, we shouldn't ignore an
* unwritable page in the page table if
* we are forcing write access.
*/
lookup_write = write && !force;
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
} }
if (pages) { if (pages) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment