Commit 90ffa72f authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-gre-collect_md'

William Tu says:

====================
add ip6 gre and gretap collect_md mode

Similar to gre, vxlan, geneve, ipip tunnels, allow ip6gretap tunnels to
operate in collect metadata mode.  The first patch adds the support to
ip6_gre.c. The second patch enables unsetting the csum for ipv6 tunnel,
when using bpf_skb_[gs]et_tunnel_key() helpers.  Finally, the last patch
adds the ip6 gre and gretap tunnel test cases to BPF sample code.

The corresponding iproute2 patch:
https://marc.info/?l=linux-netdev&m=151216943128087&w=2
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents c34bc2b5 56ddd302
...@@ -3026,9 +3026,10 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, ...@@ -3026,9 +3026,10 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
IPV6_FLOWLABEL_MASK; IPV6_FLOWLABEL_MASK;
} else { } else {
info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
}
if (flags & BPF_F_ZERO_CSUM_TX) if (flags & BPF_F_ZERO_CSUM_TX)
info->key.tun_flags &= ~TUNNEL_CSUM; info->key.tun_flags &= ~TUNNEL_CSUM;
}
return 0; return 0;
} }
......
...@@ -56,6 +56,7 @@ ...@@ -56,6 +56,7 @@
#include <net/ip6_tunnel.h> #include <net/ip6_tunnel.h>
#include <net/gre.h> #include <net/gre.h>
#include <net/erspan.h> #include <net/erspan.h>
#include <net/dst_metadata.h>
static bool log_ecn_error = true; static bool log_ecn_error = true;
...@@ -69,6 +70,7 @@ static unsigned int ip6gre_net_id __read_mostly; ...@@ -69,6 +70,7 @@ static unsigned int ip6gre_net_id __read_mostly;
struct ip6gre_net { struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
struct ip6_tnl __rcu *collect_md_tun;
struct net_device *fb_tunnel_dev; struct net_device *fb_tunnel_dev;
}; };
...@@ -229,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, ...@@ -229,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand) if (cand)
return cand; return cand;
t = rcu_dereference(ign->collect_md_tun);
if (t && t->dev->flags & IFF_UP)
return t;
dev = ign->fb_tunnel_dev; dev = ign->fb_tunnel_dev;
if (dev->flags & IFF_UP) if (dev->flags & IFF_UP)
return netdev_priv(dev); return netdev_priv(dev);
...@@ -264,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) ...@@ -264,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{ {
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
if (t->parms.collect_md)
rcu_assign_pointer(ign->collect_md_tun, t);
rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t); rcu_assign_pointer(*tp, t);
} }
...@@ -273,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) ...@@ -273,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp; struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter; struct ip6_tnl *iter;
if (t->parms.collect_md)
rcu_assign_pointer(ign->collect_md_tun, NULL);
for (tp = ip6gre_bucket(ign, t); for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL; (iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) { tp = &iter->next) {
...@@ -463,7 +475,22 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi) ...@@ -463,7 +475,22 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key, &ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto); tpi->proto);
if (tunnel) { if (tunnel) {
if (tunnel->parms.collect_md) {
struct metadata_dst *tun_dst;
__be64 tun_id;
__be16 flags;
flags = tpi->flags;
tun_id = key32_to_tunnel_id(tpi->key);
tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
if (!tun_dst)
return PACKET_REJECT;
ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
} else {
ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error); ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
}
return PACKET_RCVD; return PACKET_RCVD;
} }
...@@ -633,8 +660,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, ...@@ -633,8 +660,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
/* Push GRE header. */ /* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
__be16 flags;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info ||
!(tun_info->mode & IP_TUNNEL_INFO_TX) ||
ip_tunnel_info_af(tun_info) != AF_INET6))
return -EINVAL;
key = &tun_info->key;
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = IPPROTO_GRE;
fl6->daddr = key->u.ipv6.dst;
fl6->flowlabel = key->label;
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id), 0);
} else {
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE); NEXTHDR_GRE);
...@@ -645,13 +702,15 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) ...@@ -645,13 +702,15 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev); struct ip6_tnl *t = netdev_priv(dev);
int encap_limit = -1; int encap_limit = -1;
struct flowi6 fl6; struct flowi6 fl6;
__u8 dsfield; __u8 dsfield = 0;
__u32 mtu; __u32 mtu;
int err; int err;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
prepare_ip6gre_xmit_ipv4(skb, dev, &fl6, &dsfield, &encap_limit); if (!t->parms.collect_md)
prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
&dsfield, &encap_limit);
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err) if (err)
...@@ -676,14 +735,15 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) ...@@ -676,14 +735,15 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int encap_limit = -1; int encap_limit = -1;
struct flowi6 fl6; struct flowi6 fl6;
__u8 dsfield; __u8 dsfield = 0;
__u32 mtu; __u32 mtu;
int err; int err;
if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
return -1; return -1;
if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit)) if (!t->parms.collect_md &&
prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
return -1; return -1;
if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM))) if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
...@@ -731,6 +791,7 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) ...@@ -731,6 +791,7 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit; encap_limit = t->parms.encap_limit;
if (!t->parms.collect_md)
memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)); err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
...@@ -1201,6 +1262,11 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) ...@@ -1201,6 +1262,11 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8; dev->mtu -= 8;
if (tunnel->parms.collect_md) {
dev->features |= NETIF_F_NETNS_LOCAL;
netif_keep_dst(dev);
}
return 0; return 0;
} }
...@@ -1215,6 +1281,9 @@ static int ip6gre_tunnel_init(struct net_device *dev) ...@@ -1215,6 +1281,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev); tunnel = netdev_priv(dev);
if (tunnel->parms.collect_md)
return 0;
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
...@@ -1464,6 +1533,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[], ...@@ -1464,6 +1533,9 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_ERSPAN_INDEX]) if (data[IFLA_GRE_ERSPAN_INDEX])
parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
} }
static int ip6gre_tap_init(struct net_device *dev) static int ip6gre_tap_init(struct net_device *dev)
...@@ -1622,8 +1694,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, ...@@ -1622,8 +1694,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
ip6gre_netlink_parms(data, &nt->parms); ip6gre_netlink_parms(data, &nt->parms);
if (nt->parms.collect_md) {
if (rtnl_dereference(ign->collect_md_tun))
return -EEXIST;
} else {
if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
return -EEXIST; return -EEXIST;
}
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev); eth_hw_addr_random(dev);
...@@ -1742,6 +1819,8 @@ static size_t ip6gre_get_size(const struct net_device *dev) ...@@ -1742,6 +1819,8 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) + nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */ /* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) + nla_total_size(2) +
/* IFLA_GRE_COLLECT_METADATA */
nla_total_size(0) +
/* IFLA_GRE_FWMARK */ /* IFLA_GRE_FWMARK */
nla_total_size(4) + nla_total_size(4) +
/* IFLA_GRE_ERSPAN_INDEX */ /* IFLA_GRE_ERSPAN_INDEX */
...@@ -1781,6 +1860,11 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) ...@@ -1781,6 +1860,11 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags)) t->encap.flags))
goto nla_put_failure; goto nla_put_failure;
if (p->collect_md) {
if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
goto nla_put_failure;
}
return 0; return 0;
nla_put_failure: nla_put_failure:
...@@ -1803,6 +1887,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { ...@@ -1803,6 +1887,7 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 }, [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
}; };
......
...@@ -861,7 +861,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb, ...@@ -861,7 +861,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
struct metadata_dst *tun_dst, struct metadata_dst *tun_dst,
bool log_ecn_err) bool log_ecn_err)
{ {
return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate, return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
log_ecn_err); log_ecn_err);
} }
EXPORT_SYMBOL(ip6_tnl_rcv); EXPORT_SYMBOL(ip6_tnl_rcv);
...@@ -979,6 +979,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, ...@@ -979,6 +979,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
int ret = 0; int ret = 0;
struct net *net = t->net; struct net *net = t->net;
if (t->parms.collect_md)
return 1;
if ((p->flags & IP6_TNL_F_CAP_XMIT) || if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) && ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
(ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) { (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
......
...@@ -81,6 +81,49 @@ int _gre_get_tunnel(struct __sk_buff *skb) ...@@ -81,6 +81,49 @@ int _gre_get_tunnel(struct __sk_buff *skb)
return TC_ACT_OK; return TC_ACT_OK;
} }
SEC("ip6gretap_set_tunnel")
int _ip6gretap_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
key.remote_ipv6[3] = _htonl(0x11); /* ::11 */
key.tunnel_id = 2;
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
key.tunnel_label = 0xabcde;
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
SEC("ip6gretap_get_tunnel")
int _ip6gretap_get_tunnel(struct __sk_buff *skb)
{
char fmt[] = "key %d remote ip6 ::%x label %x\n";
struct bpf_tunnel_key key;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
ERROR(ret);
return TC_ACT_SHOT;
}
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
return TC_ACT_OK;
}
SEC("erspan_set_tunnel") SEC("erspan_set_tunnel")
int _erspan_set_tunnel(struct __sk_buff *skb) int _erspan_set_tunnel(struct __sk_buff *skb)
{ {
......
...@@ -33,6 +33,30 @@ function add_gre_tunnel { ...@@ -33,6 +33,30 @@ function add_gre_tunnel {
ip addr add dev $DEV 10.1.1.200/24 ip addr add dev $DEV 10.1.1.200/24
} }
function add_ip6gretap_tunnel {
# assign ipv6 address
ip netns exec at_ns0 ip addr add ::11/96 dev veth0
ip netns exec at_ns0 ip link set dev veth0 up
ip addr add dev veth1 ::22/96
ip link set dev veth1 up
# in namespace
ip netns exec at_ns0 \
ip link add dev $DEV_NS type $TYPE flowlabel 0xbcdef key 2 \
local ::11 remote ::22
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
ip netns exec at_ns0 ip link set dev $DEV_NS up
# out of namespace
ip link add dev $DEV type $TYPE external
ip addr add dev $DEV 10.1.1.200/24
ip addr add dev $DEV fc80::200/24
ip link set dev $DEV up
}
function add_erspan_tunnel { function add_erspan_tunnel {
# in namespace # in namespace
ip netns exec at_ns0 \ ip netns exec at_ns0 \
...@@ -113,6 +137,41 @@ function test_gre { ...@@ -113,6 +137,41 @@ function test_gre {
cleanup cleanup
} }
function test_ip6gre {
TYPE=ip6gre
DEV_NS=ip6gre00
DEV=ip6gre11
config_device
# reuse the ip6gretap function
add_ip6gretap_tunnel
attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
# underlay
ping6 -c 4 ::11
# overlay: ipv4 over ipv6
ip netns exec at_ns0 ping -c 1 10.1.1.200
ping -c 1 10.1.1.100
# overlay: ipv6 over ipv6
ip netns exec at_ns0 ping6 -c 1 fc80::200
cleanup
}
function test_ip6gretap {
TYPE=ip6gretap
DEV_NS=ip6gretap00
DEV=ip6gretap11
config_device
add_ip6gretap_tunnel
attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
# underlay
ping6 -c 4 ::11
# overlay: ipv4 over ipv6
ip netns exec at_ns0 ping -i .2 -c 1 10.1.1.200
ping -c 1 10.1.1.100
# overlay: ipv6 over ipv6
ip netns exec at_ns0 ping6 -c 1 fc80::200
cleanup
}
function test_erspan { function test_erspan {
TYPE=erspan TYPE=erspan
DEV_NS=erspan00 DEV_NS=erspan00
...@@ -175,6 +234,8 @@ function cleanup { ...@@ -175,6 +234,8 @@ function cleanup {
ip link del veth1 ip link del veth1
ip link del ipip11 ip link del ipip11
ip link del gretap11 ip link del gretap11
ip link del ip6gre11
ip link del ip6gretap11
ip link del vxlan11 ip link del vxlan11
ip link del geneve11 ip link del geneve11
ip link del erspan11 ip link del erspan11
...@@ -187,6 +248,10 @@ trap cleanup 0 2 3 6 9 ...@@ -187,6 +248,10 @@ trap cleanup 0 2 3 6 9
cleanup cleanup
echo "Testing GRE tunnel..." echo "Testing GRE tunnel..."
test_gre test_gre
echo "Testing IP6GRE tunnel..."
test_ip6gre
echo "Testing IP6GRETAP tunnel..."
test_ip6gretap
echo "Testing ERSPAN tunnel..." echo "Testing ERSPAN tunnel..."
test_erspan test_erspan
echo "Testing VXLAN tunnel..." echo "Testing VXLAN tunnel..."
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment