Commit 40137906 authored by Herbert Xu's avatar Herbert Xu Committed by David S. Miller

rhashtable: Add nested tables

This patch adds code that handles GFP_ATOMIC kmalloc failure on
insertion.  As we cannot use vmalloc, we solve it by making our
hash table nested.  That is, we allocate single pages at each level
and reach our desired table size by nesting them.

When a nested table is created, only a single page is allocated
at the top-level.  Lower levels are allocated on demand during
insertion.  Therefore for each insertion to succeed, only two
(non-consecutive) pages are needed.

After a nested table is created, a rehash will be scheduled in
order to switch to a vmalloced table as soon as possible.  Also,
the rehash code will never rehash into a nested table.  If we
detect a nested table during a rehash, the rehash will be aborted
and a new rehash will be scheduled.
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9dbbfb0a
...@@ -61,6 +61,7 @@ struct rhlist_head { ...@@ -61,6 +61,7 @@ struct rhlist_head {
/** /**
* struct bucket_table - Table of hash buckets * struct bucket_table - Table of hash buckets
* @size: Number of hash buckets * @size: Number of hash buckets
* @nest: Number of bits of first-level nested table.
* @rehash: Current bucket being rehashed * @rehash: Current bucket being rehashed
* @hash_rnd: Random seed to fold into hash * @hash_rnd: Random seed to fold into hash
* @locks_mask: Mask to apply before accessing locks[] * @locks_mask: Mask to apply before accessing locks[]
...@@ -68,10 +69,12 @@ struct rhlist_head { ...@@ -68,10 +69,12 @@ struct rhlist_head {
* @walkers: List of active walkers * @walkers: List of active walkers
* @rcu: RCU structure for freeing the table * @rcu: RCU structure for freeing the table
* @future_tbl: Table under construction during rehashing * @future_tbl: Table under construction during rehashing
* @ntbl: Nested table used when out of memory.
* @buckets: size * hash buckets * @buckets: size * hash buckets
*/ */
struct bucket_table { struct bucket_table {
unsigned int size; unsigned int size;
unsigned int nest;
unsigned int rehash; unsigned int rehash;
u32 hash_rnd; u32 hash_rnd;
unsigned int locks_mask; unsigned int locks_mask;
...@@ -81,7 +84,7 @@ struct bucket_table { ...@@ -81,7 +84,7 @@ struct bucket_table {
struct bucket_table __rcu *future_tbl; struct bucket_table __rcu *future_tbl;
struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
}; };
/** /**
...@@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, ...@@ -374,6 +377,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
void *arg); void *arg);
void rhashtable_destroy(struct rhashtable *ht); void rhashtable_destroy(struct rhashtable *ht);
struct rhash_head __rcu **rht_bucket_nested(const struct bucket_table *tbl,
unsigned int hash);
struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht,
struct bucket_table *tbl,
unsigned int hash);
#define rht_dereference(p, ht) \ #define rht_dereference(p, ht) \
rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
...@@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht); ...@@ -389,6 +398,27 @@ void rhashtable_destroy(struct rhashtable *ht);
#define rht_entry(tpos, pos, member) \ #define rht_entry(tpos, pos, member) \
({ tpos = container_of(pos, typeof(*tpos), member); 1; }) ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
static inline struct rhash_head __rcu *const *rht_bucket(
const struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
static inline struct rhash_head __rcu **rht_bucket_var(
struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) :
&tbl->buckets[hash];
}
static inline struct rhash_head __rcu **rht_bucket_insert(
struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash)
{
return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) :
&tbl->buckets[hash];
}
/** /**
* rht_for_each_continue - continue iterating over hash chain * rht_for_each_continue - continue iterating over hash chain
* @pos: the &struct rhash_head to use as a loop cursor. * @pos: the &struct rhash_head to use as a loop cursor.
...@@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht); ...@@ -408,7 +438,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* @hash: the hash value / bucket index * @hash: the hash value / bucket index
*/ */
#define rht_for_each(pos, tbl, hash) \ #define rht_for_each(pos, tbl, hash) \
rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) rht_for_each_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
/** /**
* rht_for_each_entry_continue - continue iterating over hash chain * rht_for_each_entry_continue - continue iterating over hash chain
...@@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht); ...@@ -433,7 +463,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* @member: name of the &struct rhash_head within the hashable struct. * @member: name of the &struct rhash_head within the hashable struct.
*/ */
#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ #define rht_for_each_entry(tpos, pos, tbl, hash, member) \
rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ rht_for_each_entry_continue(tpos, pos, *rht_bucket(tbl, hash), \
tbl, hash, member) tbl, hash, member)
/** /**
...@@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht); ...@@ -448,13 +478,13 @@ void rhashtable_destroy(struct rhashtable *ht);
* This hash chain list-traversal primitive allows for the looped code to * This hash chain list-traversal primitive allows for the looped code to
* remove the loop cursor from the list. * remove the loop cursor from the list.
*/ */
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \
for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ for (pos = rht_dereference_bucket(*rht_bucket(tbl, hash), tbl, hash), \
next = !rht_is_a_nulls(pos) ? \ next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
(!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \
pos = next, \ pos = next, \
next = !rht_is_a_nulls(pos) ? \ next = !rht_is_a_nulls(pos) ? \
rht_dereference_bucket(pos->next, tbl, hash) : NULL) rht_dereference_bucket(pos->next, tbl, hash) : NULL)
/** /**
...@@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht); ...@@ -485,7 +515,7 @@ void rhashtable_destroy(struct rhashtable *ht);
* traversal is guarded by rcu_read_lock(). * traversal is guarded by rcu_read_lock().
*/ */
#define rht_for_each_rcu(pos, tbl, hash) \ #define rht_for_each_rcu(pos, tbl, hash) \
rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) rht_for_each_rcu_continue(pos, *rht_bucket(tbl, hash), tbl, hash)
/** /**
* rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
...@@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht); ...@@ -518,8 +548,8 @@ void rhashtable_destroy(struct rhashtable *ht);
* the _rcu mutation primitives such as rhashtable_insert() as long as the * the _rcu mutation primitives such as rhashtable_insert() as long as the
* traversal is guarded by rcu_read_lock(). * traversal is guarded by rcu_read_lock().
*/ */
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \
rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ rht_for_each_entry_rcu_continue(tpos, pos, *rht_bucket(tbl, hash), \
tbl, hash, member) tbl, hash, member)
/** /**
...@@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup( ...@@ -565,7 +595,7 @@ static inline struct rhash_head *__rhashtable_lookup(
.ht = ht, .ht = ht,
.key = key, .key = key,
}; };
const struct bucket_table *tbl; struct bucket_table *tbl;
struct rhash_head *he; struct rhash_head *he;
unsigned int hash; unsigned int hash;
...@@ -697,8 +727,12 @@ static inline void *__rhashtable_insert_fast( ...@@ -697,8 +727,12 @@ static inline void *__rhashtable_insert_fast(
} }
elasticity = ht->elasticity; elasticity = ht->elasticity;
pprev = &tbl->buckets[hash]; pprev = rht_bucket_insert(ht, tbl, hash);
rht_for_each(head, tbl, hash) { data = ERR_PTR(-ENOMEM);
if (!pprev)
goto out;
rht_for_each_continue(head, *pprev, tbl, hash) {
struct rhlist_head *plist; struct rhlist_head *plist;
struct rhlist_head *list; struct rhlist_head *list;
...@@ -736,7 +770,7 @@ static inline void *__rhashtable_insert_fast( ...@@ -736,7 +770,7 @@ static inline void *__rhashtable_insert_fast(
if (unlikely(rht_grow_above_100(ht, tbl))) if (unlikely(rht_grow_above_100(ht, tbl)))
goto slow_path; goto slow_path;
head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); head = rht_dereference_bucket(*pprev, tbl, hash);
RCU_INIT_POINTER(obj->next, head); RCU_INIT_POINTER(obj->next, head);
if (rhlist) { if (rhlist) {
...@@ -746,7 +780,7 @@ static inline void *__rhashtable_insert_fast( ...@@ -746,7 +780,7 @@ static inline void *__rhashtable_insert_fast(
RCU_INIT_POINTER(list->next, NULL); RCU_INIT_POINTER(list->next, NULL);
} }
rcu_assign_pointer(tbl->buckets[hash], obj); rcu_assign_pointer(*pprev, obj);
atomic_inc(&ht->nelems); atomic_inc(&ht->nelems);
if (rht_grow_above_75(ht, tbl)) if (rht_grow_above_75(ht, tbl))
...@@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one( ...@@ -955,8 +989,8 @@ static inline int __rhashtable_remove_fast_one(
spin_lock_bh(lock); spin_lock_bh(lock);
pprev = &tbl->buckets[hash]; pprev = rht_bucket_var(tbl, hash);
rht_for_each(he, tbl, hash) { rht_for_each_continue(he, *pprev, tbl, hash) {
struct rhlist_head *list; struct rhlist_head *list;
list = container_of(he, struct rhlist_head, rhead); list = container_of(he, struct rhlist_head, rhead);
...@@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast( ...@@ -1107,8 +1141,8 @@ static inline int __rhashtable_replace_fast(
spin_lock_bh(lock); spin_lock_bh(lock);
pprev = &tbl->buckets[hash]; pprev = rht_bucket_var(tbl, hash);
rht_for_each(he, tbl, hash) { rht_for_each_continue(he, *pprev, tbl, hash) {
if (he != obj_old) { if (he != obj_old) {
pprev = &he->next; pprev = &he->next;
continue; continue;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment