Commit 80c802f3 authored by Timo Teräs's avatar Timo Teräs Committed by David S. Miller

xfrm: cache bundles instead of policies for outgoing flows

__xfrm_lookup() is called for each packet transmitted out of
system. The xfrm_find_bundle() does a linear search which can
kill system performance depending on how many bundles are
required per policy.

This modifies __xfrm_lookup() to store bundles directly in
the flow cache. If we did not get a hit, we just create a new
bundle instead of doing slow search. This means that we can now
get multiple xfrm_dst's for same flow (on per-cpu basis).
Signed-off-by: default avatarTimo Teras <timo.teras@iki.fi>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent fe1a5f03
...@@ -267,7 +267,6 @@ struct xfrm_policy_afinfo { ...@@ -267,7 +267,6 @@ struct xfrm_policy_afinfo {
xfrm_address_t *saddr, xfrm_address_t *saddr,
xfrm_address_t *daddr); xfrm_address_t *daddr);
int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr); int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
void (*decode_session)(struct sk_buff *skb, void (*decode_session)(struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
int reverse); int reverse);
...@@ -483,13 +482,13 @@ struct xfrm_policy { ...@@ -483,13 +482,13 @@ struct xfrm_policy {
struct timer_list timer; struct timer_list timer;
struct flow_cache_object flo; struct flow_cache_object flo;
atomic_t genid;
u32 priority; u32 priority;
u32 index; u32 index;
struct xfrm_mark mark; struct xfrm_mark mark;
struct xfrm_selector selector; struct xfrm_selector selector;
struct xfrm_lifetime_cfg lft; struct xfrm_lifetime_cfg lft;
struct xfrm_lifetime_cur curlft; struct xfrm_lifetime_cur curlft;
struct dst_entry *bundles;
struct xfrm_policy_walk_entry walk; struct xfrm_policy_walk_entry walk;
u8 type; u8 type;
u8 action; u8 action;
...@@ -879,11 +878,15 @@ struct xfrm_dst { ...@@ -879,11 +878,15 @@ struct xfrm_dst {
struct rt6_info rt6; struct rt6_info rt6;
} u; } u;
struct dst_entry *route; struct dst_entry *route;
struct flow_cache_object flo;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int num_pols, num_xfrms;
#ifdef CONFIG_XFRM_SUB_POLICY #ifdef CONFIG_XFRM_SUB_POLICY
struct flowi *origin; struct flowi *origin;
struct xfrm_selector *partner; struct xfrm_selector *partner;
#endif #endif
u32 genid; u32 xfrm_genid;
u32 policy_genid;
u32 route_mtu_cached; u32 route_mtu_cached;
u32 child_mtu_cached; u32 child_mtu_cached;
u32 route_cookie; u32 route_cookie;
...@@ -893,6 +896,7 @@ struct xfrm_dst { ...@@ -893,6 +896,7 @@ struct xfrm_dst {
#ifdef CONFIG_XFRM #ifdef CONFIG_XFRM
static inline void xfrm_dst_destroy(struct xfrm_dst *xdst) static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
{ {
xfrm_pols_put(xdst->pols, xdst->num_pols);
dst_release(xdst->route); dst_release(xdst->route);
if (likely(xdst->u.dst.xfrm)) if (likely(xdst->u.dst.xfrm))
xfrm_state_put(xdst->u.dst.xfrm); xfrm_state_put(xdst->u.dst.xfrm);
......
...@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net, ...@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
return 0; return 0;
} }
static struct dst_entry *
__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
struct dst_entry *dst;
read_lock_bh(&policy->lock);
for (dst = policy->bundles; dst; dst = dst->next) {
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
xdst->u.rt.fl.fl4_src == fl->fl4_src &&
xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
dst_clone(dst);
break;
}
}
read_unlock_bh(&policy->lock);
return dst;
}
static int xfrm4_get_tos(struct flowi *fl) static int xfrm4_get_tos(struct flowi *fl)
{ {
return fl->fl4_tos; return fl->fl4_tos;
...@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { ...@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.dst_ops = &xfrm4_dst_ops, .dst_ops = &xfrm4_dst_ops,
.dst_lookup = xfrm4_dst_lookup, .dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr, .get_saddr = xfrm4_get_saddr,
.find_bundle = __xfrm4_find_bundle,
.decode_session = _decode_session4, .decode_session = _decode_session4,
.get_tos = xfrm4_get_tos, .get_tos = xfrm4_get_tos,
.init_path = xfrm4_init_path, .init_path = xfrm4_init_path,
......
...@@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net, ...@@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
return 0; return 0;
} }
static struct dst_entry *
__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
{
struct dst_entry *dst;
/* Still not clear if we should set fl->fl6_{src,dst}... */
read_lock_bh(&policy->lock);
for (dst = policy->bundles; dst; dst = dst->next) {
struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
struct in6_addr fl_dst_prefix, fl_src_prefix;
ipv6_addr_prefix(&fl_dst_prefix,
&fl->fl6_dst,
xdst->u.rt6.rt6i_dst.plen);
ipv6_addr_prefix(&fl_src_prefix,
&fl->fl6_src,
xdst->u.rt6.rt6i_src.plen);
if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
(xdst->u.rt6.rt6i_dst.plen != 128 ||
xdst->u.rt6.rt6i_src.plen != 128))) {
dst_clone(dst);
break;
}
}
read_unlock_bh(&policy->lock);
return dst;
}
static int xfrm6_get_tos(struct flowi *fl) static int xfrm6_get_tos(struct flowi *fl)
{ {
return 0; return 0;
...@@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { ...@@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.dst_ops = &xfrm6_dst_ops, .dst_ops = &xfrm6_dst_ops,
.dst_lookup = xfrm6_dst_lookup, .dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr, .get_saddr = xfrm6_get_saddr,
.find_bundle = __xfrm6_find_bundle,
.decode_session = _decode_session6, .decode_session = _decode_session6,
.get_tos = xfrm6_get_tos, .get_tos = xfrm6_get_tos,
.init_path = xfrm6_init_path, .init_path = xfrm6_init_path,
......
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
DEFINE_MUTEX(xfrm_cfg_mutex); DEFINE_MUTEX(xfrm_cfg_mutex);
EXPORT_SYMBOL(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex);
static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_RWLOCK(xfrm_policy_lock); static DEFINE_RWLOCK(xfrm_policy_lock);
static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
...@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock); ...@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
static void xfrm_init_pmtu(struct dst_entry *dst); static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir); int dir);
...@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy) ...@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
{ {
BUG_ON(!policy->walk.dead); BUG_ON(!policy->walk.dead);
BUG_ON(policy->bundles);
if (del_timer(&policy->timer)) if (del_timer(&policy->timer))
BUG(); BUG();
...@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy); ...@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_gc_kill(struct xfrm_policy *policy) static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
{ {
struct dst_entry *dst; atomic_inc(&policy->genid);
while ((dst = policy->bundles) != NULL) {
policy->bundles = dst->next;
dst_free(dst);
}
if (del_timer(&policy->timer)) if (del_timer(&policy->timer))
atomic_dec(&policy->refcnt); atomic_dec(&policy->refcnt);
...@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct xfrm_policy *delpol; struct xfrm_policy *delpol;
struct hlist_head *chain; struct hlist_head *chain;
struct hlist_node *entry, *newpos; struct hlist_node *entry, *newpos;
struct dst_entry *gc_list;
u32 mark = policy->mark.v & policy->mark.m; u32 mark = policy->mark.v & policy->mark.m;
write_lock_bh(&xfrm_policy_lock); write_lock_bh(&xfrm_policy_lock);
...@@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
else if (xfrm_bydst_should_resize(net, dir, NULL)) else if (xfrm_bydst_should_resize(net, dir, NULL))
schedule_work(&net->xfrm.policy_hash_work); schedule_work(&net->xfrm.policy_hash_work);
read_lock_bh(&xfrm_policy_lock);
gc_list = NULL;
entry = &policy->bydst;
hlist_for_each_entry_continue(policy, entry, bydst) {
struct dst_entry *dst;
write_lock(&policy->lock);
dst = policy->bundles;
if (dst) {
struct dst_entry *tail = dst;
while (tail->next)
tail = tail->next;
tail->next = gc_list;
gc_list = dst;
policy->bundles = NULL;
}
write_unlock(&policy->lock);
}
read_unlock_bh(&xfrm_policy_lock);
while (gc_list) {
struct dst_entry *dst = gc_list;
gc_list = dst->next;
dst_free(dst);
}
return 0; return 0;
} }
EXPORT_SYMBOL(xfrm_policy_insert); EXPORT_SYMBOL(xfrm_policy_insert);
...@@ -998,6 +965,19 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ...@@ -998,6 +965,19 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
return ret; return ret;
} }
static struct xfrm_policy *
__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
{
#ifdef CONFIG_XFRM_SUB_POLICY
struct xfrm_policy *pol;
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
if (pol != NULL)
return pol;
#endif
return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
}
static struct flow_cache_object * static struct flow_cache_object *
xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
u8 dir, struct flow_cache_object *old_obj, void *ctx) u8 dir, struct flow_cache_object *old_obj, void *ctx)
...@@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, ...@@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
if (old_obj) if (old_obj)
xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
#ifdef CONFIG_XFRM_SUB_POLICY pol = __xfrm_policy_lookup(net, fl, family, dir);
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir); if (IS_ERR_OR_NULL(pol))
if (IS_ERR(pol))
return ERR_CAST(pol); return ERR_CAST(pol);
if (pol)
goto found;
#endif
pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
if (IS_ERR(pol))
return ERR_CAST(pol);
if (pol)
goto found;
return NULL;
found:
/* Resolver returns two references: /* Resolver returns two references:
* one for cache and one for caller of flow_cache_lookup() */ * one for cache and one for caller of flow_cache_lookup() */
xfrm_pol_hold(pol); xfrm_pol_hold(pol);
...@@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, ...@@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
* still valid. * still valid.
*/ */
static struct dst_entry *
xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
{
struct dst_entry *x;
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
if (unlikely(afinfo == NULL))
return ERR_PTR(-EINVAL);
x = afinfo->find_bundle(fl, policy);
xfrm_policy_put_afinfo(afinfo);
return x;
}
static inline int xfrm_get_tos(struct flowi *fl, int family) static inline int xfrm_get_tos(struct flowi *fl, int family)
{ {
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
...@@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family) ...@@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
return tos; return tos;
} }
static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
if (xdst->route == NULL) {
/* Dummy bundle - if it has xfrms we were not
* able to build bundle as template resolution failed.
* It means we need to try again resolving. */
if (xdst->num_xfrms > 0)
return NULL;
} else {
/* Real bundle */
if (stale_bundle(dst))
return NULL;
}
dst_hold(dst);
return flo;
}
static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
if (!xdst->route)
return 0;
if (stale_bundle(dst))
return 0;
return 1;
}
static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
{
struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
struct dst_entry *dst = &xdst->u.dst;
dst_free(dst);
}
static const struct flow_cache_ops xfrm_bundle_fc_ops = {
.get = xfrm_bundle_flo_get,
.check = xfrm_bundle_flo_check,
.delete = xfrm_bundle_flo_delete,
};
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
{ {
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
...@@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) ...@@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
BUG(); BUG();
} }
xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS); xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
xfrm_policy_put_afinfo(afinfo); xfrm_policy_put_afinfo(afinfo);
xdst->flo.ops = &xfrm_bundle_fc_ops;
return xdst; return xdst;
} }
...@@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, ...@@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
return err; return err;
} }
/* Allocate chain of dst_entry's, attach known xfrm's, calculate /* Allocate chain of dst_entry's, attach known xfrm's, calculate
* all the metrics... Shortly, bundle a bundle. * all the metrics... Shortly, bundle a bundle.
*/ */
...@@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, ...@@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst_hold(dst); dst_hold(dst);
dst1->xfrm = xfrm[i]; dst1->xfrm = xfrm[i];
xdst->genid = xfrm[i]->genid; xdst->xfrm_genid = xfrm[i]->genid;
dst1->obsolete = -1; dst1->obsolete = -1;
dst1->flags |= DST_HOST; dst1->flags |= DST_HOST;
...@@ -1558,164 +1565,279 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl) ...@@ -1558,164 +1565,279 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
#endif #endif
} }
static int stale_bundle(struct dst_entry *dst); static int xfrm_expand_policies(struct flowi *fl, u16 family,
struct xfrm_policy **pols,
/* Main function: finds/creates a bundle for given flow. int *num_pols, int *num_xfrms)
*
* At the moment we eat a raw IP route. Mostly to speed up lookups
* on interfaces with disabled IPsec.
*/
int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
struct sock *sk, int flags)
{ {
struct xfrm_policy *policy; int i;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
int npols;
int pol_dead;
int xfrm_nr;
int pi;
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct dst_entry *dst, *dst_orig = *dst_p;
int nx = 0;
int err;
u32 genid;
u16 family;
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
restart: if (*num_pols == 0 || !pols[0]) {
genid = atomic_read(&flow_cache_genid); *num_pols = 0;
policy = NULL; *num_xfrms = 0;
for (pi = 0; pi < ARRAY_SIZE(pols); pi++) return 0;
pols[pi] = NULL; }
npols = 0; if (IS_ERR(pols[0]))
pol_dead = 0; return PTR_ERR(pols[0]);
xfrm_nr = 0;
if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { *num_xfrms = pols[0]->xfrm_nr;
policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
err = PTR_ERR(policy); #ifdef CONFIG_XFRM_SUB_POLICY
if (IS_ERR(policy)) { if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
goto dropdst; pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
XFRM_POLICY_TYPE_MAIN,
fl, family,
XFRM_POLICY_OUT);
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
return PTR_ERR(pols[1]);
}
(*num_pols) ++;
(*num_xfrms) += pols[1]->xfrm_nr;
}
}
#endif
for (i = 0; i < *num_pols; i++) {
if (pols[i]->action != XFRM_POLICY_ALLOW) {
*num_xfrms = -1;
break;
} }
} }
if (!policy) { return 0;
struct flow_cache_object *flo;
/* To accelerate a bit... */ }
if ((dst_orig->flags & DST_NOXFRM) ||
!net->xfrm.policy_count[XFRM_POLICY_OUT])
goto nopol;
flo = flow_cache_lookup(net, fl, dst_orig->ops->family, static struct xfrm_dst *
dir, xfrm_policy_lookup, NULL); xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
err = PTR_ERR(flo); struct flowi *fl, u16 family,
if (IS_ERR(flo)) { struct dst_entry *dst_orig)
{
struct net *net = xp_net(pols[0]);
struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
struct dst_entry *dst;
struct xfrm_dst *xdst;
int err;
/* Try to instantiate a bundle */
err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
if (err < 0) {
if (err != -EAGAIN)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
goto dropdst; return ERR_PTR(err);
} }
if (flo)
policy = container_of(flo, struct xfrm_policy, flo); dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
if (IS_ERR(dst)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
return ERR_CAST(dst);
}
xdst = (struct xfrm_dst *)dst;
xdst->num_xfrms = err;
if (num_pols > 1)
err = xfrm_dst_update_parent(dst, &pols[1]->selector);
else else
policy = NULL; err = xfrm_dst_update_origin(dst, fl);
if (unlikely(err)) {
dst_free(dst);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
return ERR_PTR(err);
} }
if (!policy) xdst->num_pols = num_pols;
goto nopol; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
xdst->policy_genid = atomic_read(&pols[0]->genid);
family = dst_orig->ops->family; return xdst;
pols[0] = policy; }
npols ++;
xfrm_nr += pols[0]->xfrm_nr;
err = -ENOENT; static struct flow_cache_object *
if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP)) xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
struct flow_cache_object *oldflo, void *ctx)
{
struct dst_entry *dst_orig = (struct dst_entry *)ctx;
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct xfrm_dst *xdst, *new_xdst;
int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
/* Check if the policies from old bundle are usable */
xdst = NULL;
if (oldflo) {
xdst = container_of(oldflo, struct xfrm_dst, flo);
num_pols = xdst->num_pols;
num_xfrms = xdst->num_xfrms;
pol_dead = 0;
for (i = 0; i < num_pols; i++) {
pols[i] = xdst->pols[i];
pol_dead |= pols[i]->walk.dead;
}
if (pol_dead) {
dst_free(&xdst->u.dst);
xdst = NULL;
num_pols = 0;
num_xfrms = 0;
oldflo = NULL;
}
}
/* Resolve policies to use if we couldn't get them from
* previous cache entry */
if (xdst == NULL) {
num_pols = 1;
pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
err = xfrm_expand_policies(fl, family, pols,
&num_pols, &num_xfrms);
if (err < 0)
goto inc_error;
if (num_pols == 0)
return NULL;
if (num_xfrms <= 0)
goto make_dummy_bundle;
}
new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
if (IS_ERR(new_xdst)) {
err = PTR_ERR(new_xdst);
if (err != -EAGAIN)
goto error; goto error;
if (oldflo == NULL)
goto make_dummy_bundle;
dst_hold(&xdst->u.dst);
return oldflo;
}
policy->curlft.use_time = get_seconds(); /* Kill the previous bundle */
if (xdst) {
/* The policies were stolen for newly generated bundle */
xdst->num_pols = 0;
dst_free(&xdst->u.dst);
}
switch (policy->action) { /* Flow cache does not have reference, it dst_free()'s,
default: * but we do need to return one reference for original caller */
case XFRM_POLICY_BLOCK: dst_hold(&new_xdst->u.dst);
/* Prohibit the flow */ return &new_xdst->flo;
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
err = -EPERM;
goto error;
case XFRM_POLICY_ALLOW: make_dummy_bundle:
#ifndef CONFIG_XFRM_SUB_POLICY /* We found policies, but there's no bundles to instantiate:
if (policy->xfrm_nr == 0) { * either because the policy blocks, has no transformations or
/* Flow passes not transformed. */ * we could not build template (no xfrm_states).*/
xfrm_pol_put(policy); xdst = xfrm_alloc_dst(net, family);
return 0; if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
return ERR_CAST(xdst);
} }
#endif xdst->num_pols = num_pols;
xdst->num_xfrms = num_xfrms;
memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
dst_hold(&xdst->u.dst);
return &xdst->flo;
/* Try to find matching bundle. inc_error:
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
error:
if (xdst != NULL)
dst_free(&xdst->u.dst);
else
xfrm_pols_put(pols, num_pols);
return ERR_PTR(err);
}
/* Main function: finds/creates a bundle for given flow.
* *
* LATER: help from flow cache. It is optional, this * At the moment we eat a raw IP route. Mostly to speed up lookups
* is required only for output policy. * on interfaces with disabled IPsec.
*/ */
dst = xfrm_find_bundle(fl, policy, family); int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
if (IS_ERR(dst)) { struct sock *sk, int flags)
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR); {
err = PTR_ERR(dst); struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
goto error; struct flow_cache_object *flo;
} struct xfrm_dst *xdst;
struct dst_entry *dst, *dst_orig = *dst_p, *route;
u16 family = dst_orig->ops->family;
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
int i, err, num_pols, num_xfrms, drop_pols = 0;
if (dst) restart:
break; dst = NULL;
xdst = NULL;
route = NULL;
#ifdef CONFIG_XFRM_SUB_POLICY if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { num_pols = 1;
pols[1] = xfrm_policy_lookup_bytype(net, pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
XFRM_POLICY_TYPE_MAIN, err = xfrm_expand_policies(fl, family, pols,
fl, family, &num_pols, &num_xfrms);
XFRM_POLICY_OUT); if (err < 0)
if (pols[1]) { goto dropdst;
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); if (num_pols) {
err = PTR_ERR(pols[1]); if (num_xfrms <= 0) {
goto error; drop_pols = num_pols;
goto no_transform;
} }
if (pols[1]->action == XFRM_POLICY_BLOCK) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); xdst = xfrm_resolve_and_create_bundle(
err = -EPERM; pols, num_pols, fl,
goto error; family, dst_orig);
if (IS_ERR(xdst)) {
xfrm_pols_put(pols, num_pols);
err = PTR_ERR(xdst);
goto dropdst;
} }
npols ++;
xfrm_nr += pols[1]->xfrm_nr; spin_lock_bh(&xfrm_policy_sk_bundle_lock);
xdst->u.dst.next = xfrm_policy_sk_bundles;
xfrm_policy_sk_bundles = &xdst->u.dst;
spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
route = xdst->route;
} }
} }
/* if (xdst == NULL) {
* Because neither flowi nor bundle information knows about /* To accelerate a bit... */
* transformation template size. On more than one policy usage if ((dst_orig->flags & DST_NOXFRM) ||
* we can realize whether all of them is bypass or not after !net->xfrm.policy_count[XFRM_POLICY_OUT])
* they are searched. See above not-transformed bypass goto nopol;
* is surrounded by non-sub policy configuration, too.
*/ flo = flow_cache_lookup(net, fl, family, dir,
if (xfrm_nr == 0) { xfrm_bundle_lookup, dst_orig);
/* Flow passes not transformed. */ if (flo == NULL)
xfrm_pols_put(pols, npols); goto nopol;
return 0; if (IS_ERR(flo)) {
err = PTR_ERR(flo);
goto dropdst;
} }
xdst = container_of(flo, struct xfrm_dst, flo);
#endif num_pols = xdst->num_pols;
nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); num_xfrms = xdst->num_xfrms;
memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
route = xdst->route;
}
if (unlikely(nx<0)) { dst = &xdst->u.dst;
err = nx; if (route == NULL && num_xfrms > 0) {
if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) { /* The only case when xfrm_bundle_lookup() returns a
* bundle with null route, is when the template could
* not be resolved. It means policies are there, but
* bundle could not be created, since we don't yet
* have the xfrm_state's. We need to wait for KM to
* negotiate new SA's or bail out with error.*/
if (net->xfrm.sysctl_larval_drop) {
/* EREMOTE tells the caller to generate /* EREMOTE tells the caller to generate
* a one-shot blackhole route. * a one-shot blackhole route. */
*/ dst_release(dst);
xfrm_pols_put(pols, num_pols);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
xfrm_pol_put(policy);
return -EREMOTE; return -EREMOTE;
} }
if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) { if (flags & XFRM_LOOKUP_WAIT) {
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&net->xfrm.km_waitq, &wait); add_wait_queue(&net->xfrm.km_waitq, &wait);
...@@ -1724,92 +1846,60 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl, ...@@ -1724,92 +1846,60 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
remove_wait_queue(&net->xfrm.km_waitq, &wait); remove_wait_queue(&net->xfrm.km_waitq, &wait);
nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (!signal_pending(current)) {
dst_release(dst);
if (nx == -EAGAIN && signal_pending(current)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
err = -ERESTART;
goto error;
}
if (nx == -EAGAIN ||
genid != atomic_read(&flow_cache_genid)) {
xfrm_pols_put(pols, npols);
goto restart; goto restart;
} }
err = nx;
}
if (err < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
goto error;
}
}
if (nx == 0) {
/* Flow passes not transformed. */
xfrm_pols_put(pols, npols);
return 0;
}
dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig); err = -ERESTART;
err = PTR_ERR(dst); } else
if (IS_ERR(dst)) { err = -EAGAIN;
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
goto error; goto error;
} }
for (pi = 0; pi < npols; pi++) no_transform:
pol_dead |= pols[pi]->walk.dead; if (num_pols == 0)
goto nopol;
write_lock_bh(&policy->lock);
if (unlikely(pol_dead || stale_bundle(dst))) {
/* Wow! While we worked on resolving, this
* policy has gone. Retry. It is not paranoia,
* we just cannot enlist new bundle to dead object.
* We can't enlist stable bundles either.
*/
write_unlock_bh(&policy->lock);
dst_free(dst);
if (pol_dead) if ((flags & XFRM_LOOKUP_ICMP) &&
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD); !(pols[0]->flags & XFRM_POLICY_ICMP)) {
else err = -ENOENT;
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
err = -EHOSTUNREACH;
goto error; goto error;
} }
if (npols > 1) for (i = 0; i < num_pols; i++)
err = xfrm_dst_update_parent(dst, &pols[1]->selector); pols[i]->curlft.use_time = get_seconds();
else
err = xfrm_dst_update_origin(dst, fl);
if (unlikely(err)) {
write_unlock_bh(&policy->lock);
dst_free(dst);
XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
goto error;
}
dst->next = policy->bundles; if (num_xfrms < 0) {
policy->bundles = dst; /* Prohibit the flow */
dst_hold(dst); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
write_unlock_bh(&policy->lock); err = -EPERM;
} goto error;
} else if (num_xfrms > 0) {
/* Flow transformed */
*dst_p = dst; *dst_p = dst;
dst_release(dst_orig); dst_release(dst_orig);
xfrm_pols_put(pols, npols); } else {
/* Flow passes untransformed */
dst_release(dst);
}
ok:
xfrm_pols_put(pols, drop_pols);
return 0; return 0;
nopol:
if (!(flags & XFRM_LOOKUP_ICMP))
goto ok;
err = -ENOENT;
error: error:
xfrm_pols_put(pols, npols); dst_release(dst);
dropdst: dropdst:
dst_release(dst_orig); dst_release(dst_orig);
*dst_p = NULL; *dst_p = NULL;
xfrm_pols_put(pols, drop_pols);
return err; return err;
nopol:
err = -ENOENT;
if (flags & XFRM_LOOKUP_ICMP)
goto dropdst;
return 0;
} }
EXPORT_SYMBOL(__xfrm_lookup); EXPORT_SYMBOL(__xfrm_lookup);
...@@ -2161,69 +2251,22 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) ...@@ -2161,69 +2251,22 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst; return dst;
} }
static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) static void __xfrm_garbage_collect(struct net *net)
{
struct dst_entry *dst, **dstp;
write_lock(&pol->lock);
dstp = &pol->bundles;
while ((dst=*dstp) != NULL) {
if (func(dst)) {
*dstp = dst->next;
dst->next = *gc_list_p;
*gc_list_p = dst;
} else {
dstp = &dst->next;
}
}
write_unlock(&pol->lock);
}
static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
{ {
struct dst_entry *gc_list = NULL; struct dst_entry *head, *next;
int dir;
read_lock_bh(&xfrm_policy_lock); flow_cache_flush();
for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
struct xfrm_policy *pol;
struct hlist_node *entry;
struct hlist_head *table;
int i;
hlist_for_each_entry(pol, entry, spin_lock_bh(&xfrm_policy_sk_bundle_lock);
&net->xfrm.policy_inexact[dir], bydst) head = xfrm_policy_sk_bundles;
prune_one_bundle(pol, func, &gc_list); xfrm_policy_sk_bundles = NULL;
spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
table = net->xfrm.policy_bydst[dir].table; while (head) {
for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) { next = head->next;
hlist_for_each_entry(pol, entry, table + i, bydst) dst_free(head);
prune_one_bundle(pol, func, &gc_list); head = next;
}
} }
read_unlock_bh(&xfrm_policy_lock);
while (gc_list) {
struct dst_entry *dst = gc_list;
gc_list = dst->next;
dst_free(dst);
}
}
static int unused_bundle(struct dst_entry *dst)
{
return !atomic_read(&dst->__refcnt);
}
static void __xfrm_garbage_collect(struct net *net)
{
xfrm_prune_bundles(net, unused_bundle);
}
static int xfrm_flush_bundles(struct net *net)
{
xfrm_prune_bundles(net, stale_bundle);
return 0;
} }
static void xfrm_init_pmtu(struct dst_entry *dst) static void xfrm_init_pmtu(struct dst_entry *dst)
...@@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, ...@@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
return 0; return 0;
if (dst->xfrm->km.state != XFRM_STATE_VALID) if (dst->xfrm->km.state != XFRM_STATE_VALID)
return 0; return 0;
if (xdst->genid != dst->xfrm->genid) if (xdst->xfrm_genid != dst->xfrm->genid)
return 0;
if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
return 0; return 0;
if (strict && fl && if (strict && fl &&
...@@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void ...@@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
switch (event) { switch (event) {
case NETDEV_DOWN: case NETDEV_DOWN:
xfrm_flush_bundles(dev_net(dev)); __xfrm_garbage_collect(dev_net(dev));
} }
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, ...@@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
struct xfrm_migrate *m, int num_migrate) struct xfrm_migrate *m, int num_migrate)
{ {
struct xfrm_migrate *mp; struct xfrm_migrate *mp;
struct dst_entry *dst;
int i, j, n = 0; int i, j, n = 0;
write_lock_bh(&pol->lock); write_lock_bh(&pol->lock);
...@@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, ...@@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
sizeof(pol->xfrm_vec[i].saddr)); sizeof(pol->xfrm_vec[i].saddr));
pol->xfrm_vec[i].encap_family = mp->new_family; pol->xfrm_vec[i].encap_family = mp->new_family;
/* flush bundles */ /* flush bundles */
while ((dst = pol->bundles) != NULL) { atomic_inc(&pol->genid);
pol->bundles = dst->next;
dst_free(dst);
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment