Commit 2e62fa69 authored by David S. Miller's avatar David S. Miller

Merge branch 'vxlan_group_policy_extension'

Thomas Graf says:

====================
VXLAN Group Policy Extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.

Simple usage example:

10.1.1.1:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
   # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200

10.1.1.2:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
   # iptables -I INPUT -m mark --mark 0x200 -j DROP

iproute2 [1] and OVS [2] support will be provided in separate patches.

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3f3558bb 1dd144cf
......@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}
/* Find VXLAN socket based on network namespace, address family and UDP port */
static struct vxlan_sock *vxlan_find_sock(struct net *net,
sa_family_t family, __be16 port)
/* Find VXLAN socket based on network namespace, address family and UDP port
* and enabled unshareable flags.
*/
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
__be16 port, u32 flags)
{
struct vxlan_sock *vs;
u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
inet_sk(vs->sock->sk)->sk.sk_family == family)
inet_sk(vs->sock->sk)->sk.sk_family == family &&
(vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
return vs;
}
return NULL;
......@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
sa_family_t family, __be16 port)
sa_family_t family, __be16 port,
u32 flags)
{
struct vxlan_sock *vs;
vs = vxlan_find_sock(net, family, port);
vs = vxlan_find_sock(net, family, port, flags);
if (!vs)
return NULL;
......@@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
continue;
vh2 = (struct vxlanhdr *)(p->data + off_vx);
if (vh->vx_vni != vh2->vx_vni) {
if (vh->vx_flags != vh2->vx_flags ||
vh->vx_vni != vh2->vx_vni) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
......@@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct vxlan_sock *vs;
struct vxlanhdr *vxh;
u32 flags, vni;
struct vxlan_metadata md = {0};
/* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
......@@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
vni &= VXLAN_VID_MASK;
}
/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
struct vxlanhdr_gbp *gbp;
gbp = (struct vxlanhdr_gbp *)vxh;
md.gbp = ntohs(gbp->policy_id);
if (gbp->dont_learn)
md.gbp |= VXLAN_GBP_DONT_LEARN;
if (gbp->policy_applied)
md.gbp |= VXLAN_GBP_POLICY_APPLIED;
flags &= ~VXLAN_GBP_USED_BITS;
}
if (flags || (vni & ~VXLAN_VID_MASK)) {
/* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from
......@@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
goto bad_flags;
}
vs->rcv(vs, skb, vxh->vx_vni);
md.vni = vxh->vx_vni;
vs->rcv(vs, skb, &md);
return 0;
drop:
......@@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
return 1;
}
static void vxlan_rcv(struct vxlan_sock *vs,
struct sk_buff *skb, __be32 vx_vni)
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct vxlan_metadata *md)
{
struct iphdr *oip = NULL;
struct ipv6hdr *oip6 = NULL;
......@@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
int err = 0;
union vxlan_addr *remote_ip;
vni = ntohl(vx_vni) >> 8;
vni = ntohl(md->vni) >> 8;
/* Is this VNI defined? */
vxlan = vxlan_vs_find_vni(vs, vni);
if (!vxlan)
......@@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
goto drop;
skb_reset_network_header(skb);
skb->mark = md->gbp;
if (oip6)
err = IP6_ECN_decapsulate(oip6, skb);
......@@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}
static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
struct vxlan_metadata *md)
{
struct vxlanhdr_gbp *gbp;
gbp = (struct vxlanhdr_gbp *)vxh;
vxh->vx_flags |= htonl(VXLAN_HF_GBP);
if (md->gbp & VXLAN_GBP_DONT_LEARN)
gbp->dont_learn = 1;
if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
gbp->policy_applied = 1;
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}
#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr, __u8 prio, __u8 ttl,
__be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
__be16 src_port, __be16 dst_port,
struct vxlan_metadata *md, bool xnet)
{
struct vxlanhdr *vxh;
int min_headroom;
......@@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni;
vxh->vx_vni = md->vni;
if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
......@@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
}
}
if (vs->flags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vs, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
......@@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
__be16 src_port, __be16 dst_port,
struct vxlan_metadata *md, bool xnet)
{
struct vxlanhdr *vxh;
int min_headroom;
......@@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni;
vxh->vx_vni = md->vni;
if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
......@@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
}
}
if (vs->flags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vs, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
......@@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *old_iph;
struct flowi4 fl4;
union vxlan_addr *dst;
struct vxlan_metadata md;
__be16 src_port = 0, dst_port;
u32 vni;
__be16 df = 0;
......@@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ip_rt_put(rt);
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
dst->sa.sa_family, dst_port);
dst->sa.sa_family, dst_port,
vxlan->flags);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
......@@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
md.vni = htonl(vni << 8);
md.gbp = skb->mark;
err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
fl4.saddr, dst->sin.sin_addr.s_addr,
tos, ttl, df, src_port, dst_port,
htonl(vni << 8),
tos, ttl, df, src_port, dst_port, &md,
!net_eq(vxlan->net, dev_net(vxlan->dev)));
if (err < 0) {
/* skb is already freed. */
......@@ -1968,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_release(ndst);
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
dst->sa.sa_family, dst_port);
dst->sa.sa_family, dst_port,
vxlan->flags);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
......@@ -1976,10 +2031,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ttl = ttl ? : ip6_dst_hoplimit(ndst);
md.vni = htonl(vni << 8);
md.gbp = skb->mark;
err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
dev, &fl6.saddr, &fl6.daddr, 0, ttl,
src_port, dst_port, htonl(vni << 8),
src_port, dst_port, &md,
!net_eq(vxlan->net, dev_net(vxlan->dev)));
#endif
}
......@@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
vxlan->dst_port);
vxlan->dst_port, vxlan->flags);
if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
/* If we have a socket with same port already, reuse it */
vxlan_vs_add_dev(vs, vxlan);
......@@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
};
static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
......@@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
return vs;
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
if (vs && ((vs->rcv != rcv) ||
!atomic_add_unless(&vs->refcnt, 1, 0)))
vs = ERR_PTR(-EBUSY);
......@@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
vxlan->flags |= VXLAN_F_REMCSUM_RX;
if (data[IFLA_VXLAN_GBP])
vxlan->flags |= VXLAN_F_GBP;
if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
vxlan->dst_port)) {
vxlan->dst_port, vxlan->flags)) {
pr_info("duplicate VNI %u\n", vni);
return -EEXIST;
}
......@@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
goto nla_put_failure;
if (vxlan->flags & VXLAN_F_GBP &&
nla_put_flag(skb, IFLA_VXLAN_GBP))
goto nla_put_failure;
return 0;
nla_put_failure:
......
......@@ -97,7 +97,10 @@ struct ip_tunnel {
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
#define TUNNEL_OAM __cpu_to_be16(0x0200)
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
struct tnl_ptk_info {
__be16 flags;
......
......@@ -11,15 +11,76 @@
#define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
/* VXLAN protocol header */
/*
* VXLAN Group Based Policy Extension:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R| Group Policy ID |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | VXLAN Network Identifier (VNI) | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* D = Don't Learn bit. When set, this bit indicates that the egress
* VTEP MUST NOT learn the source address of the encapsulated frame.
*
* A = Indicates that the group policy has already been applied to
* this packet. Policies MUST NOT be applied by devices when the
* A bit is set.
*
* [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
*/
struct vxlanhdr_gbp {
__u8 vx_flags;
#ifdef __LITTLE_ENDIAN_BITFIELD
__u8 reserved_flags1:3,
policy_applied:1,
reserved_flags2:2,
dont_learn:1,
reserved_flags3:1;
#elif defined(__BIG_ENDIAN_BITFIELD)
__u8 reserved_flags1:1,
dont_learn:1,
reserved_flags2:2,
policy_applied:1,
reserved_flags3:3;
#else
#error "Please fix <asm/byteorder.h>"
#endif
__be16 policy_id;
__be32 vx_vni;
};
#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
/* skb->mark mapping
*
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R| Group Policy ID |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*/
#define VXLAN_GBP_DONT_LEARN (BIT(6) << 16)
#define VXLAN_GBP_POLICY_APPLIED (BIT(3) << 16)
#define VXLAN_GBP_ID_MASK (0xFFFF)
/* VXLAN protocol header:
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* |G|R|R|R|I|R|R|C| Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | VXLAN Network Identifier (VNI) | Reserved |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
*
* G = 1 Group Policy (VXLAN-GBP)
* I = 1 VXLAN Network Identifier (VNI) present
* C = 1 Remote checksum offload (RCO)
*/
struct vxlanhdr {
__be32 vx_flags;
__be32 vx_vni;
};
/* VXLAN header flags. */
#define VXLAN_HF_VNI 0x08000000
#define VXLAN_HF_RCO 0x00200000
#define VXLAN_HF_RCO BIT(24)
#define VXLAN_HF_VNI BIT(27)
#define VXLAN_HF_GBP BIT(31)
/* Remote checksum offload header option */
#define VXLAN_RCO_MASK 0x7f /* Last byte of vni field */
......@@ -32,8 +93,14 @@ struct vxlanhdr {
#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
struct vxlan_metadata {
__be32 vni;
u32 gbp;
};
struct vxlan_sock;
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
struct vxlan_metadata *md);
/* per UDP socket information */
struct vxlan_sock {
......@@ -60,6 +127,10 @@ struct vxlan_sock {
#define VXLAN_F_UDP_ZERO_CSUM6_RX 0x100
#define VXLAN_F_REMCSUM_TX 0x200
#define VXLAN_F_REMCSUM_RX 0x400
#define VXLAN_F_GBP 0x800
/* These flags must match in order for a socket to be shareable */
#define VXLAN_F_UNSHAREABLE VXLAN_F_GBP
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
......@@ -70,7 +141,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
__be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
bool xnet);
static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
netdev_features_t features)
......
......@@ -372,6 +372,7 @@ enum {
IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
IFLA_VXLAN_REMCSUM_TX,
IFLA_VXLAN_REMCSUM_RX,
IFLA_VXLAN_GBP,
__IFLA_VXLAN_MAX
};
#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
......
......@@ -252,11 +252,21 @@ enum ovs_vport_attr {
#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
enum {
OVS_VXLAN_EXT_UNSPEC,
OVS_VXLAN_EXT_GBP, /* Flag or __u32 */
__OVS_VXLAN_EXT_MAX,
};
#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
*/
enum {
OVS_TUNNEL_ATTR_UNSPEC,
OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
OVS_TUNNEL_ATTR_EXTENSION,
__OVS_TUNNEL_ATTR_MAX
};
......@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, /* Array of Geneve options. */
OVS_TUNNEL_KEY_ATTR_TP_SRC, /* be16 src Transport Port. */
OVS_TUNNEL_KEY_ATTR_TP_DST, /* be16 dst Transport Port. */
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS, /* Nested OVS_VXLAN_EXT_* */
__OVS_TUNNEL_KEY_ATTR_MAX
};
......
......@@ -691,7 +691,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
8)) - 1
> sizeof(key->tun_opts));
memcpy(GENEVE_OPTS(key, tun_info->options_len),
memcpy(TUN_METADATA_OPTS(key, tun_info->options_len),
tun_info->options, tun_info->options_len);
key->tun_opts_len = tun_info->options_len;
} else {
......
......@@ -53,7 +53,7 @@ struct ovs_key_ipv4_tunnel {
struct ovs_tunnel_info {
struct ovs_key_ipv4_tunnel tunnel;
const struct geneve_opt *options;
const void *options;
u8 options_len;
};
......@@ -61,10 +61,10 @@ struct ovs_tunnel_info {
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
*/
#define GENEVE_OPTS(flow_key, opt_len) \
((struct geneve_opt *)((flow_key)->tun_opts + \
FIELD_SIZEOF(struct sw_flow_key, tun_opts) - \
opt_len))
#define TUN_METADATA_OFFSET(opt_len) \
(FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be32 saddr, __be32 daddr,
......@@ -73,7 +73,7 @@ static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be16 tp_dst,
__be64 tun_id,
__be16 tun_flags,
const struct geneve_opt *opts,
const void *opts,
u8 opts_len)
{
tun_info->tunnel.tun_id = tun_id;
......@@ -105,7 +105,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
__be16 tp_dst,
__be64 tun_id,
__be16 tun_flags,
const struct geneve_opt *opts,
const void *opts,
u8 opts_len)
{
__ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
......
This diff is collapsed.
......@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
opts_len = geneveh->opt_len * 4;
flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
(geneveh->oam ? TUNNEL_OAM : 0) |
(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
......@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
__be16 sport;
struct rtable *rt;
struct flowi4 fl;
u8 vni[3];
u8 vni[3], opts_len, *opts;
__be16 df;
int err;
......@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
tunnel_id_to_vni(tun_key->tun_id, vni);
skb->ignore_df = 1;
if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
opts = (u8 *)tun_info->options;
opts_len = tun_info->options_len;
} else {
opts = NULL;
opts_len = 0;
}
err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
tun_key->ipv4_dst, tun_key->ipv4_tos,
tun_key->ipv4_ttl, df, sport, dport,
tun_key->tun_flags, vni,
tun_info->options_len, (u8 *)tun_info->options,
tun_key->tun_flags, vni, opts_len, opts,
false);
if (err < 0)
ip_rt_put(rt);
......
......@@ -40,6 +40,7 @@
#include "datapath.h"
#include "vport.h"
#include "vport-vxlan.h"
/**
* struct vxlan_port - Keeps track of open UDP ports
......@@ -49,6 +50,7 @@
struct vxlan_port {
struct vxlan_sock *vs;
char name[IFNAMSIZ];
u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
};
static struct vport_ops ovs_vxlan_vport_ops;
......@@ -59,19 +61,30 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
}
/* Called with rcu_read_lock and BH disabled. */
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct vxlan_metadata *md)
{
struct ovs_tunnel_info tun_info;
struct vxlan_port *vxlan_port;
struct vport *vport = vs->data;
struct iphdr *iph;
struct ovs_vxlan_opts opts = {
.gbp = md->gbp,
};
__be64 key;
__be16 flags;
flags = TUNNEL_KEY;
vxlan_port = vxlan_vport(vport);
if (vxlan_port->exts & VXLAN_F_GBP)
flags |= TUNNEL_VXLAN_OPT;
/* Save outer tunnel values */
iph = ip_hdr(skb);
key = cpu_to_be64(ntohl(vx_vni) >> 8);
key = cpu_to_be64(ntohl(md->vni) >> 8);
ovs_flow_tun_info_init(&tun_info, iph,
udp_hdr(skb)->source, udp_hdr(skb)->dest,
key, TUNNEL_KEY, NULL, 0);
key, flags, &opts, sizeof(opts));
ovs_vport_receive(vport, skb, &tun_info);
}
......@@ -83,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
if (vxlan_port->exts) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
if (vxlan_port->exts & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;
nla_nest_end(skb, exts);
}
return 0;
}
......@@ -95,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
ovs_vport_deferred_free(vport);
}
static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
};
static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
{
struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
struct vxlan_port *vxlan_port;
int err;
if (nla_len(attr) < sizeof(struct nlattr))
return -EINVAL;
err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
if (err < 0)
return err;
vxlan_port = vxlan_vport(vport);
if (exts[OVS_VXLAN_EXT_GBP])
vxlan_port->exts |= VXLAN_F_GBP;
return 0;
}
static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
......@@ -127,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
if (a) {
err = vxlan_configure_exts(vport, a);
if (err) {
ovs_vport_free(vport);
goto error;
}
}
vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
vxlan_port->exts);
if (IS_ERR(vs)) {
ovs_vport_free(vport);
return (void *)vs;
......@@ -140,12 +203,28 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
return ERR_PTR(err);
}
static int vxlan_ext_gbp(struct sk_buff *skb)
{
const struct ovs_tunnel_info *tun_info;
const struct ovs_vxlan_opts *opts;
tun_info = OVS_CB(skb)->egress_tun_info;
opts = tun_info->options;
if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
tun_info->options_len >= sizeof(*opts))
return opts->gbp;
else
return 0;
}
static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
{
struct net *net = ovs_dp_get_net(vport->dp);
struct vxlan_port *vxlan_port = vxlan_vport(vport);
__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
const struct ovs_key_ipv4_tunnel *tun_key;
struct vxlan_metadata md = {0};
struct rtable *rt;
struct flowi4 fl;
__be16 src_port;
......@@ -170,12 +249,14 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
skb->ignore_df = 1;
src_port = udp_flow_src_port(net, skb, 0, 0, true);
md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
md.gbp = vxlan_ext_gbp(skb);
err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
fl.saddr, tun_key->ipv4_dst,
tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
src_port, dst_port,
htonl(be64_to_cpu(tun_key->tun_id) << 8),
&md,
false);
if (err < 0)
ip_rt_put(rt);
......
#ifndef VPORT_VXLAN_H
#define VPORT_VXLAN_H 1
#include <linux/kernel.h>
#include <linux/types.h>
struct ovs_vxlan_opts {
__u32 gbp;
};
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment