Commit c303a9b2 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec

Steffen Klassert says:

====================
pull request (net): ipsec 2019-01-25

1) Several patches to fix the fallout from the recent
   tree based policy lookup work. From Florian Westphal.

2) Fix VTI for IPCOMP for 'not compressed' IPCOMP packets.
   We need an extra IPIP handler to process these packets
   correctly. From Su Yanjun.

3) Fix validation of template and selector families for
   MODE_ROUTEOPTIMIZATION with ipv4-in-ipv6 packets.
   This can lead to a stack-out-of-bounds because
   flowi4 struct is treated as flowi6 struct.
   Fix from Florian Westphal.

4) Restore the default behaviour of the xfrm set-mark
   in the output path. This was changed accidentally
   when mark setting was extended to the input path.
   From Benedict Wong.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1fc7f56d e2612cd4
......@@ -74,6 +74,33 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
return 0;
}
static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type)
{
struct ip_tunnel *tunnel;
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
skb->dev = tunnel->dev;
return xfrm_input(skb, nexthdr, spi, encap_type);
}
return -EINVAL;
drop:
kfree_skb(skb);
return 0;
}
static int vti_rcv(struct sk_buff *skb)
{
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
......@@ -82,6 +109,14 @@ static int vti_rcv(struct sk_buff *skb)
return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
}
static int vti_rcv_ipip(struct sk_buff *skb)
{
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0);
}
static int vti_rcv_cb(struct sk_buff *skb, int err)
{
unsigned short family;
......@@ -435,6 +470,12 @@ static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
.priority = 100,
};
static struct xfrm_tunnel ipip_handler __read_mostly = {
.handler = vti_rcv_ipip,
.err_handler = vti4_err,
.priority = 0,
};
static int __net_init vti_init_net(struct net *net)
{
int err;
......@@ -603,6 +644,13 @@ static int __init vti_init(void)
if (err < 0)
goto xfrm_proto_comp_failed;
msg = "ipip tunnel";
err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
if (err < 0) {
pr_info("%s: cant't register tunnel\n",__func__);
goto xfrm_tunnel_failed;
}
msg = "netlink interface";
err = rtnl_link_register(&vti_link_ops);
if (err < 0)
......@@ -612,6 +660,8 @@ static int __init vti_init(void)
rtnl_link_failed:
xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
xfrm_tunnel_failed:
xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
xfrm_proto_comp_failed:
xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
xfrm_proto_ah_failed:
......
......@@ -680,16 +680,6 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_unlock(&hash_resize_mutex);
}
static void xfrm_hash_reset_inexact_table(struct net *net)
{
struct xfrm_pol_inexact_bin *b;
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)
INIT_HLIST_HEAD(&b->hhead);
}
/* Make sure *pol can be inserted into fastbin.
* Useful to check that later insert requests will be sucessful
* (provided xfrm_policy_lock is held throughout).
......@@ -833,13 +823,13 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
u16 family)
{
unsigned int matched_s, matched_d;
struct hlist_node *newpos = NULL;
struct xfrm_policy *policy, *p;
matched_s = 0;
matched_d = 0;
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
struct hlist_node *newpos = NULL;
bool matches_s, matches_d;
if (!policy->bydst_reinsert)
......@@ -849,16 +839,19 @@ static void xfrm_policy_inexact_list_reinsert(struct net *net,
policy->bydst_reinsert = false;
hlist_for_each_entry(p, &n->hhead, bydst) {
if (policy->priority >= p->priority)
if (policy->priority > p->priority)
newpos = &p->bydst;
else if (policy->priority == p->priority &&
policy->pos > p->pos)
newpos = &p->bydst;
else
break;
}
if (newpos)
hlist_add_behind(&policy->bydst, newpos);
hlist_add_behind_rcu(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, &n->hhead);
hlist_add_head_rcu(&policy->bydst, &n->hhead);
/* paranoia checks follow.
* Check that the reinserted policy matches at least
......@@ -893,12 +886,13 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
struct rb_root *new,
u16 family)
{
struct rb_node **p, *parent = NULL;
struct xfrm_pol_inexact_node *node;
struct rb_node **p, *parent;
/* we should not have another subtree here */
WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
restart:
parent = NULL;
p = &new->rb_node;
while (*p) {
u8 prefixlen;
......@@ -918,12 +912,11 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
} else {
struct xfrm_policy *tmp;
hlist_for_each_entry(tmp, &node->hhead, bydst)
tmp->bydst_reinsert = true;
hlist_for_each_entry(tmp, &n->hhead, bydst)
hlist_for_each_entry(tmp, &n->hhead, bydst) {
tmp->bydst_reinsert = true;
hlist_del_rcu(&tmp->bydst);
}
INIT_HLIST_HEAD(&node->hhead);
xfrm_policy_inexact_list_reinsert(net, node, family);
if (node->prefixlen == n->prefixlen) {
......@@ -935,8 +928,7 @@ static void xfrm_policy_inexact_node_reinsert(struct net *net,
kfree_rcu(n, rcu);
n = node;
n->prefixlen = prefixlen;
*p = new->rb_node;
parent = NULL;
goto restart;
}
}
......@@ -965,12 +957,11 @@ static void xfrm_policy_inexact_node_merge(struct net *net,
family);
}
hlist_for_each_entry(tmp, &v->hhead, bydst)
tmp->bydst_reinsert = true;
hlist_for_each_entry(tmp, &n->hhead, bydst)
hlist_for_each_entry(tmp, &v->hhead, bydst) {
tmp->bydst_reinsert = true;
hlist_del_rcu(&tmp->bydst);
}
INIT_HLIST_HEAD(&n->hhead);
xfrm_policy_inexact_list_reinsert(net, n, family);
}
......@@ -1235,6 +1226,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
write_seqcount_begin(&xfrm_policy_hash_generation);
/* make sure that we can insert the indirect policies again before
* we start with destructive action.
......@@ -1278,10 +1270,14 @@ static void xfrm_hash_rebuild(struct work_struct *work)
}
/* reset the bydst and inexact table in all directions */
xfrm_hash_reset_inexact_table(net);
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
struct hlist_node *n;
hlist_for_each_entry_safe(policy, n,
&net->xfrm.policy_inexact[dir],
bydst_inexact_list)
hlist_del_init(&policy->bydst_inexact_list);
hmask = net->xfrm.policy_bydst[dir].hmask;
odst = net->xfrm.policy_bydst[dir].table;
for (i = hmask; i >= 0; i--)
......@@ -1313,6 +1309,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
hlist_del_rcu(&policy->bydst);
if (!chain) {
void *p = xfrm_policy_inexact_insert(policy, dir, 0);
......@@ -1334,6 +1333,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
out_unlock:
__xfrm_policy_inexact_flush(net);
write_seqcount_end(&xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex);
......@@ -2600,7 +2600,10 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
dst_copy_metrics(dst1, dst);
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
__u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
__u32 mark = 0;
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
family = xfrm[i]->props.family;
dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
......
......@@ -1488,10 +1488,15 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
if (!ut[i].family)
ut[i].family = family;
if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&
(ut[i].family != prev_family))
return -EINVAL;
switch (ut[i].mode) {
case XFRM_MODE_TUNNEL:
case XFRM_MODE_BEET:
break;
default:
if (ut[i].family != prev_family)
return -EINVAL;
break;
}
if (ut[i].mode >= XFRM_MODE_MAX)
return -EINVAL;
......
......@@ -28,6 +28,19 @@ KEY_AES=0x0123456789abcdef0123456789012345
SPI1=0x1
SPI2=0x2
do_esp_policy() {
local ns=$1
local me=$2
local remote=$3
local lnet=$4
local rnet=$5
# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
# to fwd decrypted packets after esp processing:
ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
}
do_esp() {
local ns=$1
local me=$2
......@@ -40,10 +53,59 @@ do_esp() {
ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
# to fwd decrypted packets after esp processing:
ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
do_esp_policy $ns $me $remote $lnet $rnet
}
# add policies with different netmasks, to make sure kernel carries
# the policies contained within new netmask over when search tree is
# re-built.
# peer netns that are supposed to be encapsulated via esp have addresses
# in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively.
#
# Adding a policy for '10.0.1.0/23' will make it necessary to
# alter the prefix of 10.0.1.0 subnet.
# In case new prefix overlaps with existing node, the node and all
# policies it carries need to be merged with the existing one(s).
#
# Do that here.
do_overlap()
{
local ns=$1
# adds new nodes to tree (neither network exists yet in policy database).
ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block
# adds a new node in the 10.0.0.0/24 tree (dst node exists).
ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block
# adds a 10.2.0.0/23 node, but for different dst.
ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block
# dst now overlaps with the 10.0.1.0/24 ESP policy in fwd.
# kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23.
# But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node
# also has to be merged too, including source-sorted subtrees.
# old:
# 10.0.0.0/24 (node 1 in dst tree of the bin)
# 10.1.0.0/24 (node in src tree of dst node 1)
# 10.2.0.0/24 (node in src tree of dst node 1)
# 10.0.1.0/24 (node 2 in dst tree of the bin)
# 10.0.2.0/24 (node in src tree of dst node 2)
# 10.2.0.0/24 (node in src tree of dst node 2)
#
# The next 'policy add' adds dst '10.0.0.0/23', which means
# that dst node 1 and dst node 2 have to be merged including
# the sub-tree. As no duplicates are allowed, policies in
# the two '10.0.2.0/24' are also merged.
#
# after the 'add', internal search tree should look like this:
# 10.0.0.0/23 (node in dst tree of bin)
# 10.0.2.0/24 (node in src tree of dst node)
# 10.1.0.0/24 (node in src tree of dst node)
# 10.2.0.0/24 (node in src tree of dst node)
#
# 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23.
ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block
}
do_esp_policy_get_check() {
......@@ -160,6 +222,41 @@ check_xfrm() {
return $lret
}
check_exceptions()
{
logpostfix="$1"
local lret=0
# ping to .254 should be excluded from the tunnel (exception is in place).
check_xfrm 0 254
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .254 to fail ($logpostfix)"
lret=1
else
echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)"
fi
# ping to .253 should use use ipsec due to direct policy exception.
check_xfrm 1 253
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)"
lret=1
else
echo "PASS: direct policy matches ($logpostfix)"
fi
# ping to .2 should use ipsec.
check_xfrm 1 2
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)"
lret=1
else
echo "PASS: policy matches ($logpostfix)"
fi
return $lret
}
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
......@@ -270,33 +367,45 @@ do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96
do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96
# ping to .254 should now be excluded from the tunnel
check_xfrm 0 254
check_exceptions "exceptions"
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .254 to fail"
ret=1
else
echo "PASS: ping to .254 bypassed ipsec tunnel"
fi
# ping to .253 should use use ipsec due to direct policy exception.
check_xfrm 1 253
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .253 to use ipsec tunnel"
ret=1
else
echo "PASS: direct policy matches"
fi
# insert block policies with adjacent/overlapping netmasks
do_overlap ns3
# ping to .2 should use ipsec.
check_xfrm 1 2
check_exceptions "exceptions and block policies"
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .2 to use ipsec tunnel"
ret=1
else
echo "PASS: policy matches"
fi
for n in ns3 ns4;do
ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125
sleep $((RANDOM%5))
done
check_exceptions "exceptions and block policies after hresh changes"
# full flush of policy db, check everything gets freed incl. internal meta data
ip -net ns3 xfrm policy flush
do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
# move inexact policies to hash table
ip -net ns3 xfrm policy set hthresh4 16 16
sleep $((RANDOM%5))
check_exceptions "exceptions and block policies after hthresh change in ns3"
# restore original hthresh settings -- move policies back to tables
for n in ns3 ns4;do
ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
sleep $((RANDOM%5))
done
check_exceptions "exceptions and block policies after hresh change to normal"
for i in 1 2 3 4;do ip netns del ns$i;done
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment