Commit 6d75f366 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

lib: radix-tree: check accounting of existing slot replacement users

The bug in khugepaged fixed earlier in this series shows that radix tree
slot replacement is fragile; and it will become more so when not only
NULL<->!NULL transitions need to be caught but transitions from and to
exceptional entries as well.  We need checks.

Re-implement radix_tree_replace_slot() on top of the sanity-checked
__radix_tree_replace().  This requires existing callers to also pass the
radix tree root, but it'll warn us when somebody replaces slots with
contents that need proper accounting (transitions between NULL entries,
real entries, exceptional entries) and where a replacement through the
slot pointer would corrupt the radix tree node counts.

Link: http://lkml.kernel.org/r/20161117193021.GB23430@cmpxchg.orgSigned-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Suggested-by: default avatarJan Kara <jack@suse.cz>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f7942430
...@@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr, ...@@ -1015,7 +1015,7 @@ static inline void gmap_insert_rmap(struct gmap *sg, unsigned long vmaddr,
if (slot) { if (slot) {
rmap->next = radix_tree_deref_slot_protected(slot, rmap->next = radix_tree_deref_slot_protected(slot,
&sg->guest_table_lock); &sg->guest_table_lock);
radix_tree_replace_slot(slot, rmap); radix_tree_replace_slot(&sg->host_to_rmap, slot, rmap);
} else { } else {
rmap->next = NULL; rmap->next = NULL;
radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT, radix_tree_insert(&sg->host_to_rmap, vmaddr >> PAGE_SHIFT,
......
...@@ -254,7 +254,7 @@ static void __init intc_subgroup_map(struct intc_desc_int *d) ...@@ -254,7 +254,7 @@ static void __init intc_subgroup_map(struct intc_desc_int *d)
radix_tree_tag_clear(&d->tree, entry->enum_id, radix_tree_tag_clear(&d->tree, entry->enum_id,
INTC_TAG_VIRQ_NEEDS_ALLOC); INTC_TAG_VIRQ_NEEDS_ALLOC);
radix_tree_replace_slot((void **)entries[i], radix_tree_replace_slot(&d->tree, (void **)entries[i],
&intc_irq_xlate[irq]); &intc_irq_xlate[irq]);
} }
......
...@@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot) ...@@ -342,7 +342,7 @@ static inline void *lock_slot(struct address_space *mapping, void **slot)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock); radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry |= RADIX_DAX_ENTRY_LOCK; entry |= RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(slot, (void *)entry); radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
return (void *)entry; return (void *)entry;
} }
...@@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot) ...@@ -356,7 +356,7 @@ static inline void *unlock_slot(struct address_space *mapping, void **slot)
radix_tree_deref_slot_protected(slot, &mapping->tree_lock); radix_tree_deref_slot_protected(slot, &mapping->tree_lock);
entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK; entry &= ~(unsigned long)RADIX_DAX_ENTRY_LOCK;
radix_tree_replace_slot(slot, (void *)entry); radix_tree_replace_slot(&mapping->page_tree, slot, (void *)entry);
return (void *)entry; return (void *)entry;
} }
......
...@@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg) ...@@ -249,20 +249,6 @@ static inline int radix_tree_exception(void *arg)
return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK); return unlikely((unsigned long)arg & RADIX_TREE_ENTRY_MASK);
} }
/**
* radix_tree_replace_slot - replace item in a slot
* @pslot: pointer to slot, returned by radix_tree_lookup_slot
* @item: new item to store in the slot.
*
* For use with radix_tree_lookup_slot(). Caller must hold tree write locked
* across slot lookup and replacement.
*/
static inline void radix_tree_replace_slot(void **pslot, void *item)
{
BUG_ON(radix_tree_is_internal_node(item));
rcu_assign_pointer(*pslot, item);
}
int __radix_tree_create(struct radix_tree_root *root, unsigned long index, int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
unsigned order, struct radix_tree_node **nodep, unsigned order, struct radix_tree_node **nodep,
void ***slotp); void ***slotp);
...@@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long); ...@@ -280,6 +266,8 @@ void **radix_tree_lookup_slot(struct radix_tree_root *, unsigned long);
void __radix_tree_replace(struct radix_tree_root *root, void __radix_tree_replace(struct radix_tree_root *root,
struct radix_tree_node *node, struct radix_tree_node *node,
void **slot, void *item); void **slot, void *item);
void radix_tree_replace_slot(struct radix_tree_root *root,
void **slot, void *item);
bool __radix_tree_delete_node(struct radix_tree_root *root, bool __radix_tree_delete_node(struct radix_tree_root *root,
struct radix_tree_node *node); struct radix_tree_node *node);
void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
......
...@@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) ...@@ -753,19 +753,10 @@ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
} }
EXPORT_SYMBOL(radix_tree_lookup); EXPORT_SYMBOL(radix_tree_lookup);
/** static void replace_slot(struct radix_tree_root *root,
* __radix_tree_replace - replace item in a slot
* @root: radix tree root
* @node: pointer to tree node
* @slot: pointer to slot in @node
* @item: new item to store in the slot.
*
* For use with __radix_tree_lookup(). Caller must hold tree write locked
* across slot lookup and replacement.
*/
void __radix_tree_replace(struct radix_tree_root *root,
struct radix_tree_node *node, struct radix_tree_node *node,
void **slot, void *item) void **slot, void *item,
bool warn_typeswitch)
{ {
void *old = rcu_dereference_raw(*slot); void *old = rcu_dereference_raw(*slot);
int exceptional; int exceptional;
...@@ -776,7 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root, ...@@ -776,7 +767,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
exceptional = !!radix_tree_exceptional_entry(item) - exceptional = !!radix_tree_exceptional_entry(item) -
!!radix_tree_exceptional_entry(old); !!radix_tree_exceptional_entry(old);
WARN_ON_ONCE(exceptional && !node && slot != (void **)&root->rnode); WARN_ON_ONCE(warn_typeswitch && exceptional);
if (node) if (node)
node->exceptional += exceptional; node->exceptional += exceptional;
...@@ -784,6 +775,50 @@ void __radix_tree_replace(struct radix_tree_root *root, ...@@ -784,6 +775,50 @@ void __radix_tree_replace(struct radix_tree_root *root,
rcu_assign_pointer(*slot, item); rcu_assign_pointer(*slot, item);
} }
/**
* __radix_tree_replace - replace item in a slot
* @root: radix tree root
* @node: pointer to tree node
* @slot: pointer to slot in @node
* @item: new item to store in the slot.
*
* For use with __radix_tree_lookup(). Caller must hold tree write locked
* across slot lookup and replacement.
*/
void __radix_tree_replace(struct radix_tree_root *root,
struct radix_tree_node *node,
void **slot, void *item)
{
/*
* This function supports replacing exceptional entries, but
* that needs accounting against the node unless the slot is
* root->rnode.
*/
replace_slot(root, node, slot, item,
!node && slot != (void **)&root->rnode);
}
/**
* radix_tree_replace_slot - replace item in a slot
* @root: radix tree root
* @slot: pointer to slot
* @item: new item to store in the slot.
*
* For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
* radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
* across slot lookup and replacement.
*
* NOTE: This cannot be used to switch between non-entries (empty slots),
* regular entries, and exceptional entries, as that requires accounting
* inside the radix tree node. When switching from one type of entry to
* another, use __radix_tree_lookup() and __radix_tree_replace().
*/
void radix_tree_replace_slot(struct radix_tree_root *root,
void **slot, void *item)
{
replace_slot(root, NULL, slot, item, true);
}
/** /**
* radix_tree_tag_set - set a tag on a radix tree node * radix_tree_tag_set - set a tag on a radix tree node
* @root: radix tree root * @root: radix tree root
......
...@@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space *mapping, ...@@ -147,7 +147,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
false); false);
} }
} }
radix_tree_replace_slot(slot, page); radix_tree_replace_slot(&mapping->page_tree, slot, page);
mapping->nrpages++; mapping->nrpages++;
if (node) { if (node) {
workingset_node_pages_inc(node); workingset_node_pages_inc(node);
...@@ -196,7 +196,7 @@ static void page_cache_tree_delete(struct address_space *mapping, ...@@ -196,7 +196,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
shadow = NULL; shadow = NULL;
} }
radix_tree_replace_slot(slot, shadow); radix_tree_replace_slot(&mapping->page_tree, slot, shadow);
if (!node) if (!node)
break; break;
......
...@@ -1426,7 +1426,7 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1426,7 +1426,7 @@ static void collapse_shmem(struct mm_struct *mm,
list_add_tail(&page->lru, &pagelist); list_add_tail(&page->lru, &pagelist);
/* Finally, replace with the new page. */ /* Finally, replace with the new page. */
radix_tree_replace_slot(slot, radix_tree_replace_slot(&mapping->page_tree, slot,
new_page + (index % HPAGE_PMD_NR)); new_page + (index % HPAGE_PMD_NR));
slot = radix_tree_iter_next(&iter); slot = radix_tree_iter_next(&iter);
...@@ -1538,7 +1538,8 @@ static void collapse_shmem(struct mm_struct *mm, ...@@ -1538,7 +1538,8 @@ static void collapse_shmem(struct mm_struct *mm,
/* Unfreeze the page. */ /* Unfreeze the page. */
list_del(&page->lru); list_del(&page->lru);
page_ref_unfreeze(page, 2); page_ref_unfreeze(page, 2);
radix_tree_replace_slot(slot, page); radix_tree_replace_slot(&mapping->page_tree,
slot, page);
spin_unlock_irq(&mapping->tree_lock); spin_unlock_irq(&mapping->tree_lock);
putback_lru_page(page); putback_lru_page(page);
unlock_page(page); unlock_page(page);
......
...@@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -482,7 +482,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
SetPageDirty(newpage); SetPageDirty(newpage);
} }
radix_tree_replace_slot(pslot, newpage); radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
/* /*
* Drop cache reference from old page by unfreezing * Drop cache reference from old page by unfreezing
...@@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, ...@@ -556,7 +556,7 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
get_page(newpage); get_page(newpage);
radix_tree_replace_slot(pslot, newpage); radix_tree_replace_slot(&mapping->page_tree, pslot, newpage);
page_ref_unfreeze(page, expected_count - 1); page_ref_unfreeze(page, expected_count - 1);
......
...@@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space *mapping, ...@@ -49,7 +49,7 @@ static void clear_exceptional_entry(struct address_space *mapping,
goto unlock; goto unlock;
if (*slot != entry) if (*slot != entry)
goto unlock; goto unlock;
radix_tree_replace_slot(slot, NULL); radix_tree_replace_slot(&mapping->page_tree, slot, NULL);
mapping->nrexceptional--; mapping->nrexceptional--;
if (!node) if (!node)
goto unlock; goto unlock;
......
...@@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order) ...@@ -146,7 +146,7 @@ static void multiorder_check(unsigned long index, int order)
slot = radix_tree_lookup_slot(&tree, index); slot = radix_tree_lookup_slot(&tree, index);
free(*slot); free(*slot);
radix_tree_replace_slot(slot, item2); radix_tree_replace_slot(&tree, slot, item2);
for (i = min; i < max; i++) { for (i = min; i < max; i++) {
struct item *item = item_lookup(&tree, i); struct item *item = item_lookup(&tree, i);
assert(item != 0); assert(item != 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment