Commit a5b338f2 authored by Andrea Arcangeli's avatar Andrea Arcangeli Committed by Linus Torvalds

thp: update futex compound knowledge

Futex code is smarter than most other gup_fast O_DIRECT code and knows
about the compound internals.  However now doing a put_page(head_page)
will not release the pin on the tail page taken by gup-fast, leading to
all sort of refcounting bugchecks.  Getting a stable head_page is a little
tricky.

page_head = page is there because if this is not a tail page it's also the
page_head.  Only in case this is a tail page, compound_head is called,
otherwise it's guaranteed unnecessary.  And if it's a tail page
compound_head has to run atomically inside irq disabled section
__get_user_pages_fast before returning.  Otherwise ->first_page won't be a
stable pointer.

Disableing irq before __get_user_page_fast and releasing irq after running
compound_head is needed because if __get_user_page_fast returns == 1, it
means the huge pmd is established and cannot go away from under us.
pmdp_splitting_flush_notify in __split_huge_page_splitting will have to
wait for local_irq_enable before the IPI delivery can return.  This means
__split_huge_page_refcount can't be running from under us, and in turn
when we run compound_head(page) we're not reading a dangling pointer from
tailpage->first_page.  Then after we get to stable head page, we are
always safe to call compound_lock and after taking the compound lock on
head page we can finally re-check if the page returned by gup-fast is
still a tail page.  in which case we're set and we didn't need to split
the hugepage in order to take a futex on it.
Signed-off-by: default avatarAndrea Arcangeli <aarcange@redhat.com>
Acked-by: default avatarMel Gorman <mel@csn.ul.ie>
Acked-by: default avatarRik van Riel <riel@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a95a82e9
...@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) ...@@ -233,7 +233,7 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
{ {
unsigned long address = (unsigned long)uaddr; unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct page *page; struct page *page, *page_head;
int err; int err;
/* /*
...@@ -265,11 +265,46 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) ...@@ -265,11 +265,46 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
if (err < 0) if (err < 0)
return err; return err;
page = compound_head(page); #ifdef CONFIG_TRANSPARENT_HUGEPAGE
lock_page(page); page_head = page;
if (!page->mapping) { if (unlikely(PageTail(page))) {
unlock_page(page);
put_page(page); put_page(page);
/* serialize against __split_huge_page_splitting() */
local_irq_disable();
if (likely(__get_user_pages_fast(address, 1, 1, &page) == 1)) {
page_head = compound_head(page);
/*
* page_head is valid pointer but we must pin
* it before taking the PG_lock and/or
* PG_compound_lock. The moment we re-enable
* irqs __split_huge_page_splitting() can
* return and the head page can be freed from
* under us. We can't take the PG_lock and/or
* PG_compound_lock on a page that could be
* freed from under us.
*/
if (page != page_head) {
get_page(page_head);
put_page(page);
}
local_irq_enable();
} else {
local_irq_enable();
goto again;
}
}
#else
page_head = compound_head(page);
if (page != page_head) {
get_page(page_head);
put_page(page);
}
#endif
lock_page(page_head);
if (!page_head->mapping) {
unlock_page(page_head);
put_page(page_head);
goto again; goto again;
} }
...@@ -280,20 +315,20 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key) ...@@ -280,20 +315,20 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key)
* it's a read-only handle, it's expected that futexes attach to * it's a read-only handle, it's expected that futexes attach to
* the object not the particular process. * the object not the particular process.
*/ */
if (PageAnon(page)) { if (PageAnon(page_head)) {
key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */ key->both.offset |= FUT_OFF_MMSHARED; /* ref taken on mm */
key->private.mm = mm; key->private.mm = mm;
key->private.address = address; key->private.address = address;
} else { } else {
key->both.offset |= FUT_OFF_INODE; /* inode-based key */ key->both.offset |= FUT_OFF_INODE; /* inode-based key */
key->shared.inode = page->mapping->host; key->shared.inode = page_head->mapping->host;
key->shared.pgoff = page->index; key->shared.pgoff = page_head->index;
} }
get_futex_key_refs(key); get_futex_key_refs(key);
unlock_page(page); unlock_page(page_head);
put_page(page); put_page(page_head);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment