Commit 355b590c authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

ipv4: speedup ip_idents_reserve()

Under stress, ip_idents_reserve() is accessing a contended
cache line twice, with non optimal MESI transactions.

If we place timestamps in separate location, we reduce this
pressure by ~50% and allow atomic_add_return() to issue
a Request for Ownership.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 9dd3c797
...@@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, ...@@ -457,12 +457,9 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
} }
#define IP_IDENTS_SZ 2048u #define IP_IDENTS_SZ 2048u
struct ip_ident_bucket {
atomic_t id;
u32 stamp32;
};
static struct ip_ident_bucket *ip_idents __read_mostly; static atomic_t *ip_idents __read_mostly;
static u32 *ip_tstamps __read_mostly;
/* In order to protect privacy, we add a perturbation to identifiers /* In order to protect privacy, we add a perturbation to identifiers
* if one generator is seldom used. This makes hard for an attacker * if one generator is seldom used. This makes hard for an attacker
...@@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly; ...@@ -470,15 +467,16 @@ static struct ip_ident_bucket *ip_idents __read_mostly;
*/ */
u32 ip_idents_reserve(u32 hash, int segs) u32 ip_idents_reserve(u32 hash, int segs)
{ {
struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ; u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
u32 old = ACCESS_ONCE(bucket->stamp32); atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
u32 old = ACCESS_ONCE(*p_tstamp);
u32 now = (u32)jiffies; u32 now = (u32)jiffies;
u32 delta = 0; u32 delta = 0;
if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old); delta = prandom_u32_max(now - old);
return atomic_add_return(segs + delta, &bucket->id) - segs; return atomic_add_return(segs + delta, p_id) - segs;
} }
EXPORT_SYMBOL(ip_idents_reserve); EXPORT_SYMBOL(ip_idents_reserve);
...@@ -2741,6 +2739,10 @@ int __init ip_rt_init(void) ...@@ -2741,6 +2739,10 @@ int __init ip_rt_init(void)
prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents)); prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
if (!ip_tstamps)
panic("IP: failed to allocate ip_tstamps\n");
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment