Commit 25cd9ba0 authored by Simon Horman's avatar Simon Horman Committed by Pravin B Shelar

openvswitch: Add basic MPLS support to kernel

Allow datapath to recognize and extract MPLS labels into flow keys
and execute actions which push, pop, and set labels on packets.

Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer.

Cc: Ravi K <rkerur@gmail.com>
Cc: Leo Alterman <lalterman@nicira.com>
Cc: Isaku Yamahata <yamahata@valinux.co.jp>
Cc: Joe Stringer <joe@wand.net.nz>
Signed-off-by: default avatarSimon Horman <horms@verge.net.au>
Signed-off-by: default avatarJesse Gross <jesse@nicira.com>
Signed-off-by: default avatarPravin B Shelar <pshelar@nicira.com>
parent 59b93b41
/*
* Copyright (c) 2014 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef _NET_MPLS_H
#define _NET_MPLS_H 1
#include <linux/if_ether.h>
#include <linux/netdevice.h>
#define MPLS_HLEN 4
static inline bool eth_p_mpls(__be16 eth_type)
{
return eth_type == htons(ETH_P_MPLS_UC) ||
eth_type == htons(ETH_P_MPLS_MC);
}
/*
* For non-MPLS skbs this will correspond to the network header.
* For MPLS skbs it will be before the network_header as the MPLS
* label stack lies between the end of the mac header and the network
* header. That is, for MPLS skbs the end of the mac header
* is the top of the MPLS label stack.
*/
static inline unsigned char *skb_mpls_header(struct sk_buff *skb)
{
return skb_mac_header(skb) + skb->mac_len;
}
#endif
...@@ -293,6 +293,9 @@ enum ovs_key_attr { ...@@ -293,6 +293,9 @@ enum ovs_key_attr {
OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash
is not computed by the datapath. */ is not computed by the datapath. */
OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */
OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls.
* The implementation may restrict
* the accepted length of the array. */
#ifdef __KERNEL__ #ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */
...@@ -340,6 +343,10 @@ struct ovs_key_ethernet { ...@@ -340,6 +343,10 @@ struct ovs_key_ethernet {
__u8 eth_dst[ETH_ALEN]; __u8 eth_dst[ETH_ALEN];
}; };
struct ovs_key_mpls {
__be32 mpls_lse;
};
struct ovs_key_ipv4 { struct ovs_key_ipv4 {
__be32 ipv4_src; __be32 ipv4_src;
__be32 ipv4_dst; __be32 ipv4_dst;
...@@ -483,6 +490,19 @@ enum ovs_userspace_attr { ...@@ -483,6 +490,19 @@ enum ovs_userspace_attr {
#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
/**
* struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument.
* @mpls_lse: MPLS label stack entry to push.
* @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
*
* The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
* %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
*/
struct ovs_action_push_mpls {
__be32 mpls_lse;
__be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
};
/** /**
* struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
* @vlan_tpid: Tag protocol identifier (TPID) to push. * @vlan_tpid: Tag protocol identifier (TPID) to push.
...@@ -534,6 +554,15 @@ struct ovs_action_hash { ...@@ -534,6 +554,15 @@ struct ovs_action_hash {
* @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
* @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
* the nested %OVS_SAMPLE_ATTR_* attributes. * the nested %OVS_SAMPLE_ATTR_* attributes.
* @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the
* top of the packets MPLS label stack. Set the ethertype of the
* encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to
* indicate the new packet contents.
* @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the
* packet's MPLS label stack. Set the encapsulating frame's ethertype to
* indicate the new packet contents. This could potentially still be
* %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there
* is no MPLS label stack, as determined by ethertype, no action is taken.
* *
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
...@@ -550,6 +579,9 @@ enum ovs_action_attr { ...@@ -550,6 +579,9 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */
OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
__OVS_ACTION_ATTR_MAX __OVS_ACTION_ATTR_MAX
}; };
......
...@@ -118,6 +118,7 @@ ...@@ -118,6 +118,7 @@
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/mpls.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <linux/in.h> #include <linux/in.h>
#include <linux/jhash.h> #include <linux/jhash.h>
...@@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, ...@@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features, netdev_features_t features,
__be16 type) __be16 type)
{ {
if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) if (eth_p_mpls(type))
features &= skb->dev->mpls_features; features &= skb->dev->mpls_features;
return features; return features;
......
...@@ -30,6 +30,7 @@ config OPENVSWITCH ...@@ -30,6 +30,7 @@ config OPENVSWITCH
config OPENVSWITCH_GRE config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support" tristate "Open vSwitch GRE tunneling support"
select NET_MPLS_GSO
depends on INET depends on INET
depends on OPENVSWITCH depends on OPENVSWITCH
depends on NET_IPGRE_DEMUX depends on NET_IPGRE_DEMUX
......
...@@ -28,10 +28,12 @@ ...@@ -28,10 +28,12 @@
#include <linux/in6.h> #include <linux/in6.h>
#include <linux/if_arp.h> #include <linux/if_arp.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <net/ip.h> #include <net/ip.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/checksum.h> #include <net/checksum.h>
#include <net/dsfield.h> #include <net/dsfield.h>
#include <net/mpls.h>
#include <net/sctp/checksum.h> #include <net/sctp/checksum.h>
#include "datapath.h" #include "datapath.h"
...@@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len) ...@@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len)
return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
} }
static int push_mpls(struct sk_buff *skb,
const struct ovs_action_push_mpls *mpls)
{
__be32 *new_mpls_lse;
struct ethhdr *hdr;
/* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */
if (skb->encapsulation)
return -ENOTSUPP;
if (skb_cow_head(skb, MPLS_HLEN) < 0)
return -ENOMEM;
skb_push(skb, MPLS_HLEN);
memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);
skb_reset_mac_header(skb);
new_mpls_lse = (__be32 *)skb_mpls_header(skb);
*new_mpls_lse = mpls->mpls_lse;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse,
MPLS_HLEN, 0));
hdr = eth_hdr(skb);
hdr->h_proto = mpls->mpls_ethertype;
skb_set_inner_protocol(skb, skb->protocol);
skb->protocol = mpls->mpls_ethertype;
return 0;
}
static int pop_mpls(struct sk_buff *skb, const __be16 ethertype)
{
struct ethhdr *hdr;
int err;
err = make_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_sub(skb->csum,
csum_partial(skb_mpls_header(skb),
MPLS_HLEN, 0));
memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb),
skb->mac_len);
__skb_pull(skb, MPLS_HLEN);
skb_reset_mac_header(skb);
/* skb_mpls_header() is used to locate the ethertype
* field correctly in the presence of VLAN tags.
*/
hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN);
hdr->h_proto = ethertype;
if (eth_p_mpls(skb->protocol))
skb->protocol = ethertype;
return 0;
}
static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse)
{
__be32 *stack;
int err;
err = make_writable(skb, skb->mac_len + MPLS_HLEN);
if (unlikely(err))
return err;
stack = (__be32 *)skb_mpls_header(skb);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
__be32 diff[] = { ~(*stack), *mpls_lse };
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}
*stack = *mpls_lse;
return 0;
}
/* remove VLAN header from packet and update csum accordingly. */ /* remove VLAN header from packet and update csum accordingly. */
static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
{ {
...@@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) ...@@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
vlan_set_encap_proto(skb, vhdr); vlan_set_encap_proto(skb, vhdr);
skb->mac_header += VLAN_HLEN; skb->mac_header += VLAN_HLEN;
if (skb_network_offset(skb) < ETH_HLEN) if (skb_network_offset(skb) < ETH_HLEN)
skb_set_network_header(skb, ETH_HLEN); skb_set_network_header(skb, ETH_HLEN);
skb_reset_mac_len(skb);
/* Update mac_len for subsequent MPLS actions */
skb_reset_mac_len(skb);
return 0; return 0;
} }
...@@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla ...@@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla
if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
return -ENOMEM; return -ENOMEM;
/* Update mac_len for subsequent MPLS actions */
skb->mac_len += VLAN_HLEN;
if (skb->ip_summed == CHECKSUM_COMPLETE) if (skb->ip_summed == CHECKSUM_COMPLETE)
skb->csum = csum_add(skb->csum, csum_partial(skb->data skb->csum = csum_add(skb->csum, csum_partial(skb->data
...@@ -612,6 +704,10 @@ static int execute_set_action(struct sk_buff *skb, ...@@ -612,6 +704,10 @@ static int execute_set_action(struct sk_buff *skb,
case OVS_KEY_ATTR_SCTP: case OVS_KEY_ATTR_SCTP:
err = set_sctp(skb, nla_data(nested_attr)); err = set_sctp(skb, nla_data(nested_attr));
break; break;
case OVS_KEY_ATTR_MPLS:
err = set_mpls(skb, nla_data(nested_attr));
break;
} }
return err; return err;
...@@ -690,6 +786,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, ...@@ -690,6 +786,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a); execute_hash(skb, key, a);
break; break;
case OVS_ACTION_ATTR_PUSH_MPLS:
err = push_mpls(skb, nla_data(a));
break;
case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, nla_get_be16(a));
break;
case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_PUSH_VLAN:
err = push_vlan(skb, nla_data(a)); err = push_vlan(skb, nla_data(a));
if (unlikely(err)) /* skb already freed. */ if (unlikely(err)) /* skb already freed. */
......
...@@ -560,7 +560,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) ...@@ -560,7 +560,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
goto err_flow_free; goto err_flow_free;
err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS],
&flow->key, 0, &acts); &flow->key, &acts);
if (err) if (err)
goto err_flow_free; goto err_flow_free;
...@@ -846,7 +846,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ...@@ -846,7 +846,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
goto err_kfree_flow; goto err_kfree_flow;
error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
0, &acts); &acts);
if (error) { if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
goto err_kfree_acts; goto err_kfree_acts;
...@@ -953,7 +953,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, ...@@ -953,7 +953,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
return acts; return acts;
ovs_flow_mask_key(&masked_key, key, mask); ovs_flow_mask_key(&masked_key, key, mask);
error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); error = ovs_nla_copy_actions(a, &masked_key, &acts);
if (error) { if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
kfree(acts); kfree(acts);
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <linux/if_arp.h> #include <linux/if_arp.h>
#include <linux/ip.h> #include <linux/ip.h>
#include <linux/ipv6.h> #include <linux/ipv6.h>
#include <linux/mpls.h>
#include <linux/sctp.h> #include <linux/sctp.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/tcp.h> #include <linux/tcp.h>
...@@ -42,6 +43,7 @@ ...@@ -42,6 +43,7 @@
#include <net/ip.h> #include <net/ip.h>
#include <net/ip_tunnels.h> #include <net/ip_tunnels.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/mpls.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include "datapath.h" #include "datapath.h"
...@@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) ...@@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
return -ENOMEM; return -ENOMEM;
skb_reset_network_header(skb); skb_reset_network_header(skb);
skb_reset_mac_len(skb);
__skb_push(skb, skb->data - skb_mac_header(skb)); __skb_push(skb, skb->data - skb_mac_header(skb));
/* Network layer. */ /* Network layer. */
...@@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) ...@@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4)); memset(&key->ipv4, 0, sizeof(key->ipv4));
} }
} else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;
/* In the presence of an MPLS label stack the end of the L2
* header and the beginning of the L3 header differ.
*
* Advance network_header to the beginning of the L3
* header. mac_len corresponds to the end of the L2 header.
*/
while (1) {
__be32 lse;
error = check_header(skb, skb->mac_len + stack_len);
if (unlikely(error))
return 0;
memcpy(&lse, skb_network_header(skb), MPLS_HLEN);
if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
skb_set_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_LS_S_MASK))
break;
stack_len += MPLS_HLEN;
}
} else if (key->eth.type == htons(ETH_P_IPV6)) { } else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */ int nh_len; /* IPv6 Header + Extensions */
......
...@@ -102,12 +102,17 @@ struct sw_flow_key { ...@@ -102,12 +102,17 @@ struct sw_flow_key {
__be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
__be16 type; /* Ethernet frame type. */ __be16 type; /* Ethernet frame type. */
} eth; } eth;
struct { union {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ struct {
u8 tos; /* IP ToS. */ __be32 top_lse; /* top label stack entry */
u8 ttl; /* IP TTL/hop limit. */ } mpls;
u8 frag; /* One of OVS_FRAG_TYPE_*. */ struct {
} ip; u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
};
struct { struct {
__be16 src; /* TCP/UDP/SCTP source port. */ __be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <net/ip.h> #include <net/ip.h>
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/ndisc.h> #include <net/ndisc.h>
#include <net/mpls.h>
#include "flow_netlink.h" #include "flow_netlink.h"
...@@ -134,7 +135,8 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -134,7 +135,8 @@ static bool match_validate(const struct sw_flow_match *match,
| (1 << OVS_KEY_ATTR_ICMP) | (1 << OVS_KEY_ATTR_ICMP)
| (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ICMPV6)
| (1 << OVS_KEY_ATTR_ARP) | (1 << OVS_KEY_ATTR_ARP)
| (1 << OVS_KEY_ATTR_ND)); | (1 << OVS_KEY_ATTR_ND)
| (1 << OVS_KEY_ATTR_MPLS));
/* Always allowed mask fields. */ /* Always allowed mask fields. */
mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
...@@ -149,6 +151,12 @@ static bool match_validate(const struct sw_flow_match *match, ...@@ -149,6 +151,12 @@ static bool match_validate(const struct sw_flow_match *match,
mask_allowed |= 1 << OVS_KEY_ATTR_ARP; mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
} }
if (eth_p_mpls(match->key->eth.type)) {
key_expected |= 1 << OVS_KEY_ATTR_MPLS;
if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
}
if (match->key->eth.type == htons(ETH_P_IP)) { if (match->key->eth.type == htons(ETH_P_IP)) {
key_expected |= 1 << OVS_KEY_ATTR_IPV4; key_expected |= 1 << OVS_KEY_ATTR_IPV4;
if (match->mask && (match->mask->key.eth.type == htons(0xffff))) if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
...@@ -266,6 +274,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { ...@@ -266,6 +274,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
[OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
[OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
[OVS_KEY_ATTR_TUNNEL] = -1, [OVS_KEY_ATTR_TUNNEL] = -1,
[OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
}; };
static bool is_all_zero(const u8 *fp, size_t size) static bool is_all_zero(const u8 *fp, size_t size)
...@@ -735,6 +744,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, ...@@ -735,6 +744,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
attrs &= ~(1 << OVS_KEY_ATTR_ARP); attrs &= ~(1 << OVS_KEY_ATTR_ARP);
} }
if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
const struct ovs_key_mpls *mpls_key;
mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
SW_FLOW_KEY_PUT(match, mpls.top_lse,
mpls_key->mpls_lse, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
}
if (attrs & (1 << OVS_KEY_ATTR_TCP)) { if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
const struct ovs_key_tcp *tcp_key; const struct ovs_key_tcp *tcp_key;
...@@ -1140,6 +1159,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, ...@@ -1140,6 +1159,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_op = htons(output->ip.proto); arp_key->arp_op = htons(output->ip.proto);
ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
} else if (eth_p_mpls(swkey->eth.type)) {
struct ovs_key_mpls *mpls_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
if (!nla)
goto nla_put_failure;
mpls_key = nla_data(nla);
mpls_key->mpls_lse = output->mpls.top_lse;
} }
if ((swkey->eth.type == htons(ETH_P_IP) || if ((swkey->eth.type == htons(ETH_P_IP) ||
...@@ -1336,9 +1363,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, ...@@ -1336,9 +1363,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa,
a->nla_len = sfa->actions_len - st_offset; a->nla_len = sfa->actions_len - st_offset;
} }
static int ovs_nla_copy_actions__(const struct nlattr *attr,
const struct sw_flow_key *key,
int depth, struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci);
static int validate_and_copy_sample(const struct nlattr *attr, static int validate_and_copy_sample(const struct nlattr *attr,
const struct sw_flow_key *key, int depth, const struct sw_flow_key *key, int depth,
struct sw_flow_actions **sfa) struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci)
{ {
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions; const struct nlattr *probability, *actions;
...@@ -1375,7 +1408,8 @@ static int validate_and_copy_sample(const struct nlattr *attr, ...@@ -1375,7 +1408,8 @@ static int validate_and_copy_sample(const struct nlattr *attr,
if (st_acts < 0) if (st_acts < 0)
return st_acts; return st_acts;
err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa,
eth_type, vlan_tci);
if (err) if (err)
return err; return err;
...@@ -1385,10 +1419,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, ...@@ -1385,10 +1419,10 @@ static int validate_and_copy_sample(const struct nlattr *attr,
return 0; return 0;
} }
static int validate_tp_port(const struct sw_flow_key *flow_key) static int validate_tp_port(const struct sw_flow_key *flow_key,
__be16 eth_type)
{ {
if ((flow_key->eth.type == htons(ETH_P_IP) || if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
flow_key->eth.type == htons(ETH_P_IPV6)) &&
(flow_key->tp.src || flow_key->tp.dst)) (flow_key->tp.src || flow_key->tp.dst))
return 0; return 0;
...@@ -1483,7 +1517,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, ...@@ -1483,7 +1517,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
static int validate_set(const struct nlattr *a, static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key, const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa, struct sw_flow_actions **sfa,
bool *set_tun) bool *set_tun, __be16 eth_type)
{ {
const struct nlattr *ovs_key = nla_data(a); const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key); int key_type = nla_type(ovs_key);
...@@ -1508,6 +1542,9 @@ static int validate_set(const struct nlattr *a, ...@@ -1508,6 +1542,9 @@ static int validate_set(const struct nlattr *a,
break; break;
case OVS_KEY_ATTR_TUNNEL: case OVS_KEY_ATTR_TUNNEL:
if (eth_p_mpls(eth_type))
return -EINVAL;
*set_tun = true; *set_tun = true;
err = validate_and_copy_set_tun(a, sfa); err = validate_and_copy_set_tun(a, sfa);
if (err) if (err)
...@@ -1515,7 +1552,7 @@ static int validate_set(const struct nlattr *a, ...@@ -1515,7 +1552,7 @@ static int validate_set(const struct nlattr *a,
break; break;
case OVS_KEY_ATTR_IPV4: case OVS_KEY_ATTR_IPV4:
if (flow_key->eth.type != htons(ETH_P_IP)) if (eth_type != htons(ETH_P_IP))
return -EINVAL; return -EINVAL;
if (!flow_key->ip.proto) if (!flow_key->ip.proto)
...@@ -1531,7 +1568,7 @@ static int validate_set(const struct nlattr *a, ...@@ -1531,7 +1568,7 @@ static int validate_set(const struct nlattr *a,
break; break;
case OVS_KEY_ATTR_IPV6: case OVS_KEY_ATTR_IPV6:
if (flow_key->eth.type != htons(ETH_P_IPV6)) if (eth_type != htons(ETH_P_IPV6))
return -EINVAL; return -EINVAL;
if (!flow_key->ip.proto) if (!flow_key->ip.proto)
...@@ -1553,19 +1590,24 @@ static int validate_set(const struct nlattr *a, ...@@ -1553,19 +1590,24 @@ static int validate_set(const struct nlattr *a,
if (flow_key->ip.proto != IPPROTO_TCP) if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL; return -EINVAL;
return validate_tp_port(flow_key); return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_UDP: case OVS_KEY_ATTR_UDP:
if (flow_key->ip.proto != IPPROTO_UDP) if (flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL; return -EINVAL;
return validate_tp_port(flow_key); return validate_tp_port(flow_key, eth_type);
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
return -EINVAL;
break;
case OVS_KEY_ATTR_SCTP: case OVS_KEY_ATTR_SCTP:
if (flow_key->ip.proto != IPPROTO_SCTP) if (flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL; return -EINVAL;
return validate_tp_port(flow_key); return validate_tp_port(flow_key, eth_type);
default: default:
return -EINVAL; return -EINVAL;
...@@ -1609,12 +1651,13 @@ static int copy_action(const struct nlattr *from, ...@@ -1609,12 +1651,13 @@ static int copy_action(const struct nlattr *from,
return 0; return 0;
} }
int ovs_nla_copy_actions(const struct nlattr *attr, static int ovs_nla_copy_actions__(const struct nlattr *attr,
const struct sw_flow_key *key, const struct sw_flow_key *key,
int depth, int depth, struct sw_flow_actions **sfa,
struct sw_flow_actions **sfa) __be16 eth_type, __be16 vlan_tci)
{ {
const struct nlattr *a; const struct nlattr *a;
bool out_tnl_port = false;
int rem, err; int rem, err;
if (depth >= SAMPLE_ACTION_DEPTH) if (depth >= SAMPLE_ACTION_DEPTH)
...@@ -1626,6 +1669,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, ...@@ -1626,6 +1669,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
[OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
[OVS_ACTION_ATTR_USERSPACE] = (u32)-1, [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
[OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
[OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SET] = (u32)-1,
...@@ -1655,6 +1700,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, ...@@ -1655,6 +1700,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
case OVS_ACTION_ATTR_OUTPUT: case OVS_ACTION_ATTR_OUTPUT:
if (nla_get_u32(a) >= DP_MAX_PORTS) if (nla_get_u32(a) >= DP_MAX_PORTS)
return -EINVAL; return -EINVAL;
out_tnl_port = false;
break; break;
case OVS_ACTION_ATTR_HASH: { case OVS_ACTION_ATTR_HASH: {
...@@ -1671,6 +1718,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, ...@@ -1671,6 +1718,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
} }
case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_POP_VLAN:
vlan_tci = htons(0);
break; break;
case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_PUSH_VLAN:
...@@ -1679,19 +1727,66 @@ int ovs_nla_copy_actions(const struct nlattr *attr, ...@@ -1679,19 +1727,66 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return -EINVAL; return -EINVAL;
if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
return -EINVAL; return -EINVAL;
vlan_tci = vlan->vlan_tci;
break; break;
case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_RECIRC:
break; break;
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nla_data(a);
/* Networking stack do not allow simultaneous Tunnel
* and MPLS GSO.
*/
if (out_tnl_port)
return -EINVAL;
if (!eth_p_mpls(mpls->mpls_ethertype))
return -EINVAL;
/* Prohibit push MPLS other than to a white list
* for packets that have a known tag order.
*/
if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
(eth_type != htons(ETH_P_IP) &&
eth_type != htons(ETH_P_IPV6) &&
eth_type != htons(ETH_P_ARP) &&
eth_type != htons(ETH_P_RARP) &&
!eth_p_mpls(eth_type)))
return -EINVAL;
eth_type = mpls->mpls_ethertype;
break;
}
case OVS_ACTION_ATTR_POP_MPLS:
if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
!eth_p_mpls(eth_type))
return -EINVAL;
/* Disallow subsequent L2.5+ set and mpls_pop actions
* as there is no check here to ensure that the new
* eth_type is valid and thus set actions could
* write off the end of the packet or otherwise
* corrupt it.
*
* Support for these actions is planned using packet
* recirculation.
*/
eth_type = htons(0);
break;
case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SET:
err = validate_set(a, key, sfa, &skip_copy); err = validate_set(a, key, sfa,
&out_tnl_port, eth_type);
if (err) if (err)
return err; return err;
skip_copy = out_tnl_port;
break; break;
case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SAMPLE:
err = validate_and_copy_sample(a, key, depth, sfa); err = validate_and_copy_sample(a, key, depth, sfa,
eth_type, vlan_tci);
if (err) if (err)
return err; return err;
skip_copy = true; skip_copy = true;
...@@ -1713,6 +1808,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr, ...@@ -1713,6 +1808,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
return 0; return 0;
} }
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa)
{
return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type,
key->eth.tci);
}
static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb)
{ {
const struct nlattr *a; const struct nlattr *a;
......
...@@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, ...@@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
const struct nlattr *); const struct nlattr *);
int ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key, int depth, const struct sw_flow_key *key,
struct sw_flow_actions **sfa); struct sw_flow_actions **sfa);
int ovs_nla_put_actions(const struct nlattr *attr, int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb); int len, struct sk_buff *skb);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment