Commit 586cf468 authored by Alexey Kuznetsov's avatar Alexey Kuznetsov Committed by David S. Miller

[IPSEC] More work.

1. Expiration of SAs. Some missing updates of counters.
   Question: very strange, rfc defines use_time as time of the first use
   of SA. But kame setkey refers to this as lastuse.
2. Bug fixes for tunnel mode and forwarding.
3. Fix bugs in per-socket policy: policy entries do not leak but are destroyed,
   when socket is closed, and are cloned on children of listening sockets.
4. Implemented use policy: i.e. use ipsec if a SA is available,
   ignore if it is not.
5. Added sysctl to disable in/out policy on some devices.
   It is set on loopback by default.
6. Remove resolved reference from template. It is not used,
   but pollutes code.
7. Added all the SASTATEs, now they make sense.
parent b4903b22
...@@ -19,6 +19,8 @@ struct ipv4_devconf ...@@ -19,6 +19,8 @@ struct ipv4_devconf
int tag; int tag;
int arp_filter; int arp_filter;
int medium_id; int medium_id;
int no_xfrm;
int no_policy;
void *sysctl; void *sysctl;
}; };
......
...@@ -351,6 +351,8 @@ enum ...@@ -351,6 +351,8 @@ enum
NET_IPV4_CONF_TAG=12, NET_IPV4_CONF_TAG=12,
NET_IPV4_CONF_ARPFILTER=13, NET_IPV4_CONF_ARPFILTER=13,
NET_IPV4_CONF_MEDIUM_ID=14, NET_IPV4_CONF_MEDIUM_ID=14,
NET_IPV4_CONF_NOXFRM=15,
NET_IPV4_CONF_NOPOLICY=16,
}; };
/* /proc/sys/net/ipv6 */ /* /proc/sys/net/ipv6 */
......
...@@ -53,8 +53,6 @@ ...@@ -53,8 +53,6 @@
7. ->share Sharing mode. 7. ->share Sharing mode.
Q: how to implement private sharing mode? To add struct sock* to Q: how to implement private sharing mode? To add struct sock* to
flow id? flow id?
8. ->resolved If template uniquely resolves to a static xfrm_state,
the reference is stores here.
Having this template we search through SAD searching for entries Having this template we search through SAD searching for entries
with appropriate mode/proto/algo, permitted by selector. with appropriate mode/proto/algo, permitted by selector.
...@@ -114,6 +112,8 @@ struct xfrm_selector ...@@ -114,6 +112,8 @@ struct xfrm_selector
void *owner; void *owner;
}; };
#define XFRM_INF (~(u64)0)
struct xfrm_lifetime_cfg struct xfrm_lifetime_cfg
{ {
u64 soft_byte_limit; u64 soft_byte_limit;
...@@ -161,9 +161,9 @@ struct xfrm_state ...@@ -161,9 +161,9 @@ struct xfrm_state
/* Key manger bits */ /* Key manger bits */
struct { struct {
int state; u8 state;
u8 dying;
u32 seq; u32 seq;
u64 warn_bytes;
} km; } km;
/* Parameters of this state. */ /* Parameters of this state. */
...@@ -195,6 +195,7 @@ struct xfrm_state ...@@ -195,6 +195,7 @@ struct xfrm_state
} stats; } stats;
struct xfrm_lifetime_cur curlft; struct xfrm_lifetime_cur curlft;
struct timer_list timer;
/* Reference to data common to all the instances of this /* Reference to data common to all the instances of this
* transformer. */ * transformer. */
...@@ -255,13 +256,13 @@ struct xfrm_tmpl ...@@ -255,13 +256,13 @@ struct xfrm_tmpl
/* Sharing mode: unique, this session only, this user only etc. */ /* Sharing mode: unique, this session only, this user only etc. */
__u8 share; __u8 share;
/* May skip this transfomration if no SA is found */
__u8 optional;
/* Bit mask of algos allowed for acquisition */ /* Bit mask of algos allowed for acquisition */
__u32 aalgos; __u32 aalgos;
__u32 ealgos; __u32 ealgos;
__u32 calgos; __u32 calgos;
/* If template statically resolved, hold ref here */
struct xfrm_state *resolved;
}; };
#define XFRM_MAX_DEPTH 3 #define XFRM_MAX_DEPTH 3
...@@ -419,11 +420,35 @@ static inline int xfrm_route_forward(struct sk_buff *skb) ...@@ -419,11 +420,35 @@ static inline int xfrm_route_forward(struct sk_buff *skb)
__xfrm_route_forward(skb); __xfrm_route_forward(skb);
} }
extern int __xfrm_sk_clone_policy(struct sock *sk);
static inline int xfrm_sk_clone_policy(struct sock *sk)
{
if (unlikely(sk->policy[0] || sk->policy[1]))
return xfrm_sk_clone_policy(sk);
return 0;
}
extern void __xfrm_sk_free_policy(struct xfrm_policy *);
static inline void xfrm_sk_free_policy(struct sock *sk)
{
if (unlikely(sk->policy[0] != NULL)) {
__xfrm_sk_free_policy(sk->policy[0]);
sk->policy[0] = NULL;
}
if (unlikely(sk->policy[1] != NULL)) {
__xfrm_sk_free_policy(sk->policy[1]);
sk->policy[1] = NULL;
}
}
extern void xfrm_state_init(void); extern void xfrm_state_init(void);
extern void xfrm_input_init(void); extern void xfrm_input_init(void);
extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *); extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *);
extern struct xfrm_state *xfrm_state_alloc(void); extern struct xfrm_state *xfrm_state_alloc(void);
extern struct xfrm_state *xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol); extern struct xfrm_state *xfrm_state_find(u32 daddr, u32 saddr, struct flowi *fl, struct xfrm_tmpl *tmpl,
struct xfrm_policy *pol, int *err);
extern int xfrm_state_check_expire(struct xfrm_state *x); extern int xfrm_state_check_expire(struct xfrm_state *x);
extern void xfrm_state_insert(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x);
extern int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb);
...@@ -437,7 +462,7 @@ extern int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl); ...@@ -437,7 +462,7 @@ extern int xfrm_check_selectors(struct xfrm_state **x, int n, struct flowi *fl);
extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_rcv(struct sk_buff *skb);
extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen); extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen);
struct xfrm_policy *xfrm_policy_alloc(void); struct xfrm_policy *xfrm_policy_alloc(int gfp);
extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl); struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
...@@ -450,6 +475,7 @@ extern void xfrm_policy_flush(void); ...@@ -450,6 +475,7 @@ extern void xfrm_policy_flush(void);
extern void xfrm_policy_kill(struct xfrm_policy *); extern void xfrm_policy_kill(struct xfrm_policy *);
extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
extern struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl); extern struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl);
extern int xfrm_flush_bundles(struct xfrm_state *x);
extern wait_queue_head_t *km_waitq; extern wait_queue_head_t *km_waitq;
extern void km_warn_expired(struct xfrm_state *x); extern void km_warn_expired(struct xfrm_state *x);
......
...@@ -110,6 +110,7 @@ ...@@ -110,6 +110,7 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/ipip.h> #include <net/ipip.h>
#include <net/inet_common.h> #include <net/inet_common.h>
#include <net/xfrm.h>
#ifdef CONFIG_IP_MROUTE #ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h> #include <linux/mroute.h>
#endif #endif
...@@ -196,6 +197,8 @@ void inet_sock_release(struct sock *sk) ...@@ -196,6 +197,8 @@ void inet_sock_release(struct sock *sk)
sock_orphan(sk); sock_orphan(sk);
xfrm_sk_free_policy(sk);
#ifdef INET_REFCNT_DEBUG #ifdef INET_REFCNT_DEBUG
if (atomic_read(&sk->refcnt) != 1) if (atomic_read(&sk->refcnt) != 1)
printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n",
......
...@@ -231,6 +231,7 @@ int ah_output(struct sk_buff *skb) ...@@ -231,6 +231,7 @@ int ah_output(struct sk_buff *skb)
skb->nh.raw = skb->data; skb->nh.raw = skb->data;
x->curlft.bytes += skb->len; x->curlft.bytes += skb->len;
x->curlft.packets++;
spin_unlock_bh(&x->lock); spin_unlock_bh(&x->lock);
if ((skb->dst = dst_pop(dst)) == NULL) if ((skb->dst = dst_pop(dst)) == NULL)
goto error; goto error;
......
...@@ -857,6 +857,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ...@@ -857,6 +857,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
inet_insert_ifa(ifa); inet_insert_ifa(ifa);
} }
in_dev->cnf.no_xfrm = 1;
in_dev->cnf.no_policy = 1;
} }
ip_mc_up(in_dev); ip_mc_up(in_dev);
break; break;
...@@ -1041,7 +1043,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, ...@@ -1041,7 +1043,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write,
static struct devinet_sysctl_table { static struct devinet_sysctl_table {
struct ctl_table_header *sysctl_header; struct ctl_table_header *sysctl_header;
ctl_table devinet_vars[15]; ctl_table devinet_vars[17];
ctl_table devinet_dev[2]; ctl_table devinet_dev[2];
ctl_table devinet_conf_dir[2]; ctl_table devinet_conf_dir[2];
ctl_table devinet_proto_dir[2]; ctl_table devinet_proto_dir[2];
...@@ -1160,6 +1162,22 @@ static struct devinet_sysctl_table { ...@@ -1160,6 +1162,22 @@ static struct devinet_sysctl_table {
.mode = 0644, .mode = 0644,
.proc_handler =&proc_dointvec, .proc_handler =&proc_dointvec,
}, },
{
.ctl_name = NET_IPV4_CONF_NOXFRM,
.procname = "disable_xfrm",
.data = &ipv4_devconf.no_xfrm,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler =&proc_dointvec,
},
{
.ctl_name = NET_IPV4_CONF_NOPOLICY,
.procname = "disable_policy",
.data = &ipv4_devconf.no_policy,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler =&proc_dointvec,
},
}, },
.devinet_dev = { .devinet_dev = {
{ {
......
...@@ -199,7 +199,7 @@ esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset, ...@@ -199,7 +199,7 @@ esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset,
/* Check that skb data bits are writable. If they are not, copy data /* Check that skb data bits are writable. If they are not, copy data
* to newly created private area. If "tailbits" is given, make sure that * to newly created private area. If "tailbits" is given, make sure that
* tailbits bytes beoynd current end of skb are writable. * tailbits bytes beyond current end of skb are writable.
* *
* Returns amount of elements of scatterlist to load for subsequent * Returns amount of elements of scatterlist to load for subsequent
* transformations and pointer to writable trailer skb. * transformations and pointer to writable trailer skb.
...@@ -433,6 +433,7 @@ int esp_output(struct sk_buff *skb) ...@@ -433,6 +433,7 @@ int esp_output(struct sk_buff *skb)
skb->nh.raw = skb->data; skb->nh.raw = skb->data;
x->curlft.bytes += skb->len; x->curlft.bytes += skb->len;
x->curlft.packets++;
spin_unlock_bh(&x->lock); spin_unlock_bh(&x->lock);
if ((skb->dst = dst_pop(dst)) == NULL) if ((skb->dst = dst_pop(dst)) == NULL)
goto error; goto error;
......
...@@ -1265,6 +1265,8 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, ...@@ -1265,6 +1265,8 @@ static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr,
atomic_set(&rth->u.dst.__refcnt, 1); atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST; rth->u.dst.flags= DST_HOST;
if (in_dev->cnf.no_policy)
rth->u.dst.flags |= DST_NOPOLICY;
rth->fl.fl4_dst = daddr; rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr; rth->rt_dst = daddr;
rth->fl.fl4_tos = tos; rth->fl.fl4_tos = tos;
...@@ -1470,6 +1472,10 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, ...@@ -1470,6 +1472,10 @@ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
atomic_set(&rth->u.dst.__refcnt, 1); atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST; rth->u.dst.flags= DST_HOST;
if (in_dev->cnf.no_policy)
rth->u.dst.flags |= DST_NOPOLICY;
if (in_dev->cnf.no_xfrm)
rth->u.dst.flags |= DST_NOXFRM;
rth->fl.fl4_dst = daddr; rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr; rth->rt_dst = daddr;
rth->fl.fl4_tos = tos; rth->fl.fl4_tos = tos;
...@@ -1547,6 +1553,8 @@ out: return err; ...@@ -1547,6 +1553,8 @@ out: return err;
atomic_set(&rth->u.dst.__refcnt, 1); atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST; rth->u.dst.flags= DST_HOST;
if (in_dev->cnf.no_policy)
rth->u.dst.flags |= DST_NOPOLICY;
rth->fl.fl4_dst = daddr; rth->fl.fl4_dst = daddr;
rth->rt_dst = daddr; rth->rt_dst = daddr;
rth->fl.fl4_tos = tos; rth->fl.fl4_tos = tos;
...@@ -1719,6 +1727,7 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) ...@@ -1719,6 +1727,7 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
unsigned flags = 0; unsigned flags = 0;
struct rtable *rth; struct rtable *rth;
struct net_device *dev_out = NULL; struct net_device *dev_out = NULL;
struct in_device *in_dev = NULL;
unsigned hash; unsigned hash;
int free_res = 0; int free_res = 0;
int err; int err;
...@@ -1895,6 +1904,10 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) ...@@ -1895,6 +1904,10 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
if (dev_out->flags & IFF_LOOPBACK) if (dev_out->flags & IFF_LOOPBACK)
flags |= RTCF_LOCAL; flags |= RTCF_LOCAL;
in_dev = in_dev_get(dev_out);
if (!in_dev)
goto e_inval;
if (res.type == RTN_BROADCAST) { if (res.type == RTN_BROADCAST) {
flags |= RTCF_BROADCAST | RTCF_LOCAL; flags |= RTCF_BROADCAST | RTCF_LOCAL;
if (res.fi) { if (res.fi) {
...@@ -1903,11 +1916,8 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) ...@@ -1903,11 +1916,8 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
} }
} else if (res.type == RTN_MULTICAST) { } else if (res.type == RTN_MULTICAST) {
flags |= RTCF_MULTICAST|RTCF_LOCAL; flags |= RTCF_MULTICAST|RTCF_LOCAL;
read_lock(&inetdev_lock); if (!ip_check_mc(in_dev, oldflp->fl4_dst))
if (!__in_dev_get(dev_out) ||
!ip_check_mc(__in_dev_get(dev_out), oldflp->fl4_dst))
flags &= ~RTCF_LOCAL; flags &= ~RTCF_LOCAL;
read_unlock(&inetdev_lock);
/* If multicast route do not exist use /* If multicast route do not exist use
default one, but do not gateway in this case. default one, but do not gateway in this case.
Yes, it is hack. Yes, it is hack.
...@@ -1924,6 +1934,10 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) ...@@ -1924,6 +1934,10 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
atomic_set(&rth->u.dst.__refcnt, 1); atomic_set(&rth->u.dst.__refcnt, 1);
rth->u.dst.flags= DST_HOST; rth->u.dst.flags= DST_HOST;
if (in_dev->cnf.no_xfrm)
rth->u.dst.flags |= DST_NOXFRM;
if (in_dev->cnf.no_policy)
rth->u.dst.flags |= DST_NOPOLICY;
rth->fl.fl4_dst = oldflp->fl4_dst; rth->fl.fl4_dst = oldflp->fl4_dst;
rth->fl.fl4_tos = tos; rth->fl.fl4_tos = tos;
rth->fl.fl4_src = oldflp->fl4_src; rth->fl.fl4_src = oldflp->fl4_src;
...@@ -1959,20 +1973,17 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) ...@@ -1959,20 +1973,17 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
} }
#ifdef CONFIG_IP_MROUTE #ifdef CONFIG_IP_MROUTE
if (res.type == RTN_MULTICAST) { if (res.type == RTN_MULTICAST) {
struct in_device *in_dev = in_dev_get(dev_out); if (IN_DEV_MFORWARD(in_dev) &&
if (in_dev) { !LOCAL_MCAST(oldflp->fl4_dst)) {
if (IN_DEV_MFORWARD(in_dev) && rth->u.dst.input = ip_mr_input;
!LOCAL_MCAST(oldflp->fl4_dst)) { rth->u.dst.output = ip_mc_output;
rth->u.dst.input = ip_mr_input;
rth->u.dst.output = ip_mc_output;
}
in_dev_put(in_dev);
} }
} }
#endif #endif
} }
rt_set_nexthop(rth, &res, 0); rt_set_nexthop(rth, &res, 0);
rth->rt_flags = flags; rth->rt_flags = flags;
...@@ -1983,6 +1994,8 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp) ...@@ -1983,6 +1994,8 @@ int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
fib_res_put(&res); fib_res_put(&res);
if (dev_out) if (dev_out)
dev_put(dev_out); dev_put(dev_out);
if (in_dev)
in_dev_put(in_dev);
out: return err; out: return err;
e_inval: e_inval:
......
...@@ -257,6 +257,7 @@ ...@@ -257,6 +257,7 @@
#include <net/icmp.h> #include <net/icmp.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/xfrm.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/ioctls.h> #include <asm/ioctls.h>
...@@ -1920,6 +1921,8 @@ void tcp_destroy_sock(struct sock *sk) ...@@ -1920,6 +1921,8 @@ void tcp_destroy_sock(struct sock *sk)
tcp_kill_sk_queues(sk); tcp_kill_sk_queues(sk);
xfrm_sk_free_policy(sk);
#ifdef INET_REFCNT_DEBUG #ifdef INET_REFCNT_DEBUG
if (atomic_read(&sk->refcnt) != 1) { if (atomic_read(&sk->refcnt) != 1) {
printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n", printk(KERN_DEBUG "Destruction TCP %p delayed, c=%d\n",
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include <linux/sysctl.h> #include <linux/sysctl.h>
#include <net/tcp.h> #include <net/tcp.h>
#include <net/inet_common.h> #include <net/inet_common.h>
#include <net/xfrm.h>
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
#define SYNC_INIT 0 /* let the user enable it */ #define SYNC_INIT 0 /* let the user enable it */
...@@ -685,6 +686,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, ...@@ -685,6 +686,13 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
if ((filter = newsk->filter) != NULL) if ((filter = newsk->filter) != NULL)
sk_filter_charge(newsk, filter); sk_filter_charge(newsk, filter);
#endif #endif
if (unlikely(xfrm_sk_clone_policy(newsk))) {
/* It is still raw copy of parent, so invalidate
* destructor and make plain sk_free() */
newsk->destruct = NULL;
sk_free(newsk);
return NULL;
}
/* Now setup tcp_opt */ /* Now setup tcp_opt */
newtp = tcp_sk(newsk); newtp = tcp_sk(newsk);
......
...@@ -81,6 +81,9 @@ int xfrm4_rcv(struct sk_buff *skb) ...@@ -81,6 +81,9 @@ int xfrm4_rcv(struct sk_buff *skb)
if (x->props.replay_window) if (x->props.replay_window)
xfrm_replay_advance(x, seq); xfrm_replay_advance(x, seq);
x->curlft.bytes += skb->len;
x->curlft.packets++;
spin_unlock(&x->lock); spin_unlock(&x->lock);
xfrm_vec[xfrm_nr++] = x; xfrm_vec[xfrm_nr++] = x;
......
This diff is collapsed.
...@@ -2,16 +2,11 @@ ...@@ -2,16 +2,11 @@
#include <linux/pfkeyv2.h> #include <linux/pfkeyv2.h>
#include <linux/ipsec.h> #include <linux/ipsec.h>
/* Each xfrm_state is linked to three tables: /* Each xfrm_state may be linked to two tables:
1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
2. Hash table by daddr to find what SAs exist for given 2. Hash table by daddr to find what SAs exist for given
destination/tunnel endpoint. (output) destination/tunnel endpoint. (output)
3. (optional, NI) Radix tree by _selector_ for the case,
when we have to find a tunnel mode SA appropriate for given flow,
but do not know tunnel endpoint. At the moment we do
not support this and assume that tunnel endpoint is given
by policy. (output)
*/ */
static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED; static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
...@@ -29,6 +24,82 @@ static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; ...@@ -29,6 +24,82 @@ static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
wait_queue_head_t *km_waitq; wait_queue_head_t *km_waitq;
#define ACQ_EXPIRES 30
static void __xfrm_state_delete(struct xfrm_state *x);
unsigned long make_jiffies(long secs)
{
if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
return MAX_SCHEDULE_TIMEOUT-1;
else
return secs*HZ;
}
static void xfrm_timer_handler(unsigned long data)
{
struct xfrm_state *x = (struct xfrm_state*)data;
unsigned long now = (unsigned long)xtime.tv_sec;
long next = LONG_MAX;
int warn = 0;
spin_lock(&x->lock);
if (x->km.state == XFRM_STATE_DEAD)
goto out;
if (x->km.state == XFRM_STATE_EXPIRED)
goto expired;
if (x->lft.hard_add_expires_seconds) {
long tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
next = tmo;
}
if (x->lft.hard_use_expires_seconds && x->curlft.use_time) {
long tmo = x->lft.hard_use_expires_seconds +
x->curlft.use_time - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
next = tmo;
}
if (x->km.dying)
goto resched;
if (x->lft.soft_add_expires_seconds) {
long tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0)
warn = 1;
else if (tmo < next)
next = tmo;
}
if (x->lft.soft_use_expires_seconds && x->curlft.use_time) {
long tmo = x->lft.soft_use_expires_seconds +
x->curlft.use_time - now;
if (tmo <= 0)
warn = 1;
else if (tmo < next)
next = tmo;
}
if (warn)
km_warn_expired(x);
resched:
if (next != LONG_MAX &&
!mod_timer(&x->timer, jiffies + make_jiffies(next)))
atomic_inc(&x->refcnt);
goto out;
expired:
km_expired(x);
__xfrm_state_delete(x);
out:
spin_unlock(&x->lock);
xfrm_state_put(x);
}
struct xfrm_state *xfrm_state_alloc(void) struct xfrm_state *xfrm_state_alloc(void)
{ {
struct xfrm_state *x; struct xfrm_state *x;
...@@ -40,6 +111,14 @@ struct xfrm_state *xfrm_state_alloc(void) ...@@ -40,6 +111,14 @@ struct xfrm_state *xfrm_state_alloc(void)
atomic_set(&x->refcnt, 1); atomic_set(&x->refcnt, 1);
INIT_LIST_HEAD(&x->bydst); INIT_LIST_HEAD(&x->bydst);
INIT_LIST_HEAD(&x->byspi); INIT_LIST_HEAD(&x->byspi);
init_timer(&x->timer);
x->timer.function = xfrm_timer_handler;
x->timer.data = (unsigned long)x;
x->curlft.add_time = (unsigned long)xtime.tv_sec;
x->lft.soft_byte_limit = XFRM_INF;
x->lft.soft_packet_limit = XFRM_INF;
x->lft.hard_byte_limit = XFRM_INF;
x->lft.hard_packet_limit = XFRM_INF;
x->lock = SPIN_LOCK_UNLOCKED; x->lock = SPIN_LOCK_UNLOCKED;
} }
return x; return x;
...@@ -48,6 +127,8 @@ struct xfrm_state *xfrm_state_alloc(void) ...@@ -48,6 +127,8 @@ struct xfrm_state *xfrm_state_alloc(void)
void __xfrm_state_destroy(struct xfrm_state *x) void __xfrm_state_destroy(struct xfrm_state *x)
{ {
BUG_TRAP(x->km.state == XFRM_STATE_DEAD); BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
if (del_timer(&x->timer))
BUG();
if (x->aalg) if (x->aalg)
kfree(x->aalg); kfree(x->aalg);
if (x->ealg) if (x->ealg)
...@@ -59,11 +140,10 @@ void __xfrm_state_destroy(struct xfrm_state *x) ...@@ -59,11 +140,10 @@ void __xfrm_state_destroy(struct xfrm_state *x)
kfree(x); kfree(x);
} }
void xfrm_state_delete(struct xfrm_state *x) static void __xfrm_state_delete(struct xfrm_state *x)
{ {
int kill = 0; int kill = 0;
spin_lock_bh(&x->lock);
if (x->km.state != XFRM_STATE_DEAD) { if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD; x->km.state = XFRM_STATE_DEAD;
kill = 1; kill = 1;
...@@ -75,14 +155,24 @@ void xfrm_state_delete(struct xfrm_state *x) ...@@ -75,14 +155,24 @@ void xfrm_state_delete(struct xfrm_state *x)
atomic_dec(&x->refcnt); atomic_dec(&x->refcnt);
} }
spin_unlock(&xfrm_state_lock); spin_unlock(&xfrm_state_lock);
if (del_timer(&x->timer))
atomic_dec(&x->refcnt);
if (atomic_read(&x->refcnt) != 1)
xfrm_flush_bundles(x);
} }
spin_unlock_bh(&x->lock);
if (kill && x->type) if (kill && x->type)
x->type->destructor(x); x->type->destructor(x);
wake_up(km_waitq); wake_up(km_waitq);
} }
void xfrm_state_delete(struct xfrm_state *x)
{
spin_lock_bh(&x->lock);
__xfrm_state_delete(x);
spin_unlock_bh(&x->lock);
}
void xfrm_state_flush(u8 proto) void xfrm_state_flush(u8 proto)
{ {
int i; int i;
...@@ -109,18 +199,21 @@ void xfrm_state_flush(u8 proto) ...@@ -109,18 +199,21 @@ void xfrm_state_flush(u8 proto)
} }
struct xfrm_state * struct xfrm_state *
xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol) xfrm_state_find(u32 daddr, u32 saddr, struct flowi *fl, struct xfrm_tmpl *tmpl,
struct xfrm_policy *pol, int *err)
{ {
unsigned h = ntohl(daddr); unsigned h = ntohl(daddr);
struct xfrm_state *x; struct xfrm_state *x;
int acquire_in_progress = 0; int acquire_in_progress = 0;
int error = 0; int error = 0;
struct xfrm_state *best = NULL;
h = (h ^ (h>>16)) % XFRM_DST_HSIZE; h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
spin_lock_bh(&xfrm_state_lock); spin_lock_bh(&xfrm_state_lock);
list_for_each_entry(x, xfrm_state_bydst+h, bydst) { list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
if (daddr == x->id.daddr.xfrm4_addr && if (daddr == x->id.daddr.xfrm4_addr &&
(saddr == x->props.saddr.xfrm4_addr || !saddr || !x->props.saddr.xfrm4_addr) &&
tmpl->mode == x->props.mode && tmpl->mode == x->props.mode &&
tmpl->id.proto == x->id.proto) { tmpl->id.proto == x->id.proto) {
/* Resolution logic: /* Resolution logic:
...@@ -139,9 +232,11 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm ...@@ -139,9 +232,11 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm
if (x->km.state == XFRM_STATE_VALID) { if (x->km.state == XFRM_STATE_VALID) {
if (!xfrm4_selector_match(&x->sel, fl)) if (!xfrm4_selector_match(&x->sel, fl))
continue; continue;
atomic_inc(&x->refcnt); if (!best ||
spin_unlock_bh(&xfrm_state_lock); best->km.dying > x->km.dying ||
return x; (best->km.dying == x->km.dying &&
best->curlft.add_time < x->curlft.add_time))
best = x;
} else if (x->km.state == XFRM_STATE_ACQ) { } else if (x->km.state == XFRM_STATE_ACQ) {
acquire_in_progress = 1; acquire_in_progress = 1;
} else if (x->km.state == XFRM_STATE_ERROR || } else if (x->km.state == XFRM_STATE_ERROR ||
...@@ -152,6 +247,12 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm ...@@ -152,6 +247,12 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm
} }
} }
if (best) {
atomic_inc(&best->refcnt);
spin_unlock_bh(&xfrm_state_lock);
return best;
}
x = NULL; x = NULL;
if (!error && !acquire_in_progress && if (!error && !acquire_in_progress &&
((x = xfrm_state_alloc()) != NULL)) { ((x = xfrm_state_alloc()) != NULL)) {
...@@ -172,10 +273,10 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm ...@@ -172,10 +273,10 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm
x->sel.ifindex = fl->oif; x->sel.ifindex = fl->oif;
x->id = tmpl->id; x->id = tmpl->id;
if (x->id.daddr.xfrm4_addr == 0) if (x->id.daddr.xfrm4_addr == 0)
x->id.daddr = x->sel.daddr; x->id.daddr.xfrm4_addr = daddr;
x->props.saddr = tmpl->saddr; x->props.saddr = tmpl->saddr;
if (x->props.saddr.xfrm4_addr == 0) if (x->props.saddr.xfrm4_addr == 0)
x->props.saddr = x->sel.saddr; x->props.saddr.xfrm4_addr = saddr;
x->props.mode = tmpl->mode; x->props.mode = tmpl->mode;
if (km_query(x, tmpl, pol) == 0) { if (km_query(x, tmpl, pol) == 0) {
...@@ -188,6 +289,9 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm ...@@ -188,6 +289,9 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm
list_add(&x->byspi, xfrm_state_byspi+h); list_add(&x->byspi, xfrm_state_byspi+h);
atomic_inc(&x->refcnt); atomic_inc(&x->refcnt);
} }
x->lft.hard_add_expires_seconds = ACQ_EXPIRES;
atomic_inc(&x->refcnt);
mod_timer(&x->timer, ACQ_EXPIRES*HZ);
} else { } else {
x->km.state = XFRM_STATE_DEAD; x->km.state = XFRM_STATE_DEAD;
xfrm_state_put(x); xfrm_state_put(x);
...@@ -195,6 +299,8 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm ...@@ -195,6 +299,8 @@ xfrm_state_find(u32 daddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm
} }
} }
spin_unlock_bh(&xfrm_state_lock); spin_unlock_bh(&xfrm_state_lock);
if (!x)
*err = acquire_in_progress ? -EAGAIN : -ENOMEM;
return x; return x;
} }
...@@ -213,26 +319,33 @@ void xfrm_state_insert(struct xfrm_state *x) ...@@ -213,26 +319,33 @@ void xfrm_state_insert(struct xfrm_state *x)
list_add(&x->byspi, xfrm_state_byspi+h); list_add(&x->byspi, xfrm_state_byspi+h);
atomic_inc(&x->refcnt); atomic_inc(&x->refcnt);
if (!mod_timer(&x->timer, jiffies + HZ))
atomic_inc(&x->refcnt);
spin_unlock_bh(&xfrm_state_lock); spin_unlock_bh(&xfrm_state_lock);
wake_up(km_waitq); wake_up(km_waitq);
} }
int xfrm_state_check_expire(struct xfrm_state *x) int xfrm_state_check_expire(struct xfrm_state *x)
{ {
if (!x->curlft.use_time)
x->curlft.use_time = (unsigned long)xtime.tv_sec;
if (x->km.state != XFRM_STATE_VALID) if (x->km.state != XFRM_STATE_VALID)
return -EINVAL; return -EINVAL;
if (x->lft.hard_byte_limit && if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.bytes >= x->lft.hard_byte_limit) { x->curlft.packets >= x->lft.hard_packet_limit) {
km_expired(x); km_expired(x);
if (!mod_timer(&x->timer, jiffies + ACQ_EXPIRES*HZ))
atomic_inc(&x->refcnt);
return -EINVAL; return -EINVAL;
} }
if (x->km.warn_bytes && if (!x->km.dying &&
x->curlft.bytes >= x->km.warn_bytes) { (x->curlft.bytes >= x->lft.soft_byte_limit ||
x->km.warn_bytes = 0; x->curlft.packets >= x->lft.soft_packet_limit))
km_warn_expired(x); km_warn_expired(x);
}
return 0; return 0;
} }
...@@ -309,6 +422,9 @@ xfrm_find_acq(u8 mode, u16 reqid, u8 proto, u32 daddr, u32 saddr) ...@@ -309,6 +422,9 @@ xfrm_find_acq(u8 mode, u16 reqid, u8 proto, u32 daddr, u32 saddr)
x0->id.proto = proto; x0->id.proto = proto;
x0->props.mode = mode; x0->props.mode = mode;
x0->props.reqid = reqid; x0->props.reqid = reqid;
x0->lft.hard_add_expires_seconds = ACQ_EXPIRES;
atomic_inc(&x0->refcnt);
mod_timer(&x0->timer, jiffies + ACQ_EXPIRES*HZ);
atomic_inc(&x0->refcnt); atomic_inc(&x0->refcnt);
list_add_tail(&x0->bydst, xfrm_state_bydst+h); list_add_tail(&x0->bydst, xfrm_state_bydst+h);
wake_up(km_waitq); wake_up(km_waitq);
...@@ -476,6 +592,7 @@ void km_warn_expired(struct xfrm_state *x) ...@@ -476,6 +592,7 @@ void km_warn_expired(struct xfrm_state *x)
{ {
struct xfrm_mgr *km; struct xfrm_mgr *km;
x->km.dying = 1;
read_lock(&xfrm_km_lock); read_lock(&xfrm_km_lock);
list_for_each_entry(km, &xfrm_km_list, list) list_for_each_entry(km, &xfrm_km_list, list)
km->notify(x, 0); km->notify(x, 0);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment