Commit 73d4d645 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-openvswitch-add-sample-multicasting'

Adrian Moreno says:

====================
net: openvswitch: Add sample multicasting.

** Background **
Currently, OVS supports several packet sampling mechanisms (sFlow,
per-bridge IPFIX, per-flow IPFIX). These end up being translated into a
userspace action that needs to be handled by ovs-vswitchd's handler
threads only to be forwarded to some third party application that
will somehow process the sample and provide observability on the
datapath.

A particularly interesting use-case is controller-driven
per-flow IPFIX sampling where the OpenFlow controller can add metadata
to samples (via two 32bit integers) and this metadata is then available
to the sample-collecting system for correlation.

** Problem **
The fact that sampled traffic share netlink sockets and handler thread
time with upcalls, apart from being a performance bottleneck in the
sample extraction itself, can severely compromise the datapath,
yielding this solution unfit for highly loaded production systems.

Users are left with little options other than guessing what sampling
rate will be OK for their traffic pattern and system load and dealing
with the lost accuracy.

Looking at available infrastructure, an obvious candidated would be
to use psample. However, it's current state does not help with the
use-case at stake because sampled packets do not contain user-defined
metadata.

** Proposal **
This series is an attempt to fix this situation by extending the
existing psample infrastructure to carry a variable length
user-defined cookie.

The main existing user of psample is tc's act_sample. It is also
extended to forward the action's cookie to psample.

Finally, a new OVS action (OVS_SAMPLE_ATTR_PSAMPLE) is created.
It accepts a group and an optional cookie and uses psample to
multicast the packet and the metadata.
====================

Link: https://patch.msgid.link/20240704085710.353845-1-amorenoz@redhat.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 064fbc4e 30d772a0
...@@ -727,6 +727,12 @@ attribute-sets: ...@@ -727,6 +727,12 @@ attribute-sets:
name: dec-ttl name: dec-ttl
type: nest type: nest
nested-attributes: dec-ttl-attrs nested-attributes: dec-ttl-attrs
-
name: psample
type: nest
nested-attributes: psample-attrs
doc: |
Sends a packet sample to psample for external observation.
- -
name: tunnel-key-attrs name: tunnel-key-attrs
enum-name: ovs-tunnel-key-attr enum-name: ovs-tunnel-key-attr
...@@ -938,6 +944,17 @@ attribute-sets: ...@@ -938,6 +944,17 @@ attribute-sets:
- -
name: gbp name: gbp
type: u32 type: u32
-
name: psample-attrs
enum-name: ovs-psample-attr
name-prefix: ovs-psample-attr-
attributes:
-
name: group
type: u32
-
name: cookie
type: binary
operations: operations:
name-prefix: ovs-flow-cmd- name-prefix: ovs-flow-cmd-
......
...@@ -24,7 +24,10 @@ struct psample_metadata { ...@@ -24,7 +24,10 @@ struct psample_metadata {
u8 out_tc_valid:1, u8 out_tc_valid:1,
out_tc_occ_valid:1, out_tc_occ_valid:1,
latency_valid:1, latency_valid:1,
unused:5; rate_as_probability:1,
unused:4;
const u8 *user_cookie;
u32 user_cookie_len;
}; };
struct psample_group *psample_group_get(struct net *net, u32 group_num); struct psample_group *psample_group_get(struct net *net, u32 group_num);
......
...@@ -649,7 +649,8 @@ enum ovs_flow_attr { ...@@ -649,7 +649,8 @@ enum ovs_flow_attr {
* Actions are passed as nested attributes. * Actions are passed as nested attributes.
* *
* Executes the specified actions with the given probability on a per-packet * Executes the specified actions with the given probability on a per-packet
* basis. * basis. Nested actions will be able to access the probability value of the
* parent @OVS_ACTION_ATTR_SAMPLE.
*/ */
enum ovs_sample_attr { enum ovs_sample_attr {
OVS_SAMPLE_ATTR_UNSPEC, OVS_SAMPLE_ATTR_UNSPEC,
...@@ -914,6 +915,31 @@ struct check_pkt_len_arg { ...@@ -914,6 +915,31 @@ struct check_pkt_len_arg {
}; };
#endif #endif
#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
/**
* enum ovs_psample_attr - Attributes for %OVS_ACTION_ATTR_PSAMPLE
* action.
*
* @OVS_PSAMPLE_ATTR_GROUP: 32-bit number to identify the source of the
* sample.
* @OVS_PSAMPLE_ATTR_COOKIE: An optional variable-length binary cookie that
* contains user-defined metadata. The maximum length is
* OVS_PSAMPLE_COOKIE_MAX_SIZE bytes.
*
* Sends the packet to the psample multicast group with the specified group and
* cookie. It is possible to combine this action with the
* %OVS_ACTION_ATTR_TRUNC action to limit the size of the sample.
*/
enum ovs_psample_attr {
OVS_PSAMPLE_ATTR_GROUP = 1, /* u32 number. */
OVS_PSAMPLE_ATTR_COOKIE, /* Optional, user specified cookie. */
/* private: */
__OVS_PSAMPLE_ATTR_MAX
};
#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1)
/** /**
* enum ovs_action_attr - Action types. * enum ovs_action_attr - Action types.
* *
...@@ -966,6 +992,8 @@ struct check_pkt_len_arg { ...@@ -966,6 +992,8 @@ struct check_pkt_len_arg {
* of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
* argument. * argument.
* @OVS_ACTION_ATTR_DROP: Explicit drop action. * @OVS_ACTION_ATTR_DROP: Explicit drop action.
* @OVS_ACTION_ATTR_PSAMPLE: Send a sample of the packet to external observers
* via psample.
* *
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
...@@ -1004,6 +1032,7 @@ enum ovs_action_attr { ...@@ -1004,6 +1032,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */
OVS_ACTION_ATTR_DROP, /* u32 error code. */ OVS_ACTION_ATTR_DROP, /* u32 error code. */
OVS_ACTION_ATTR_PSAMPLE, /* Nested OVS_PSAMPLE_ATTR_*. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */ * from userspace. */
......
...@@ -8,7 +8,11 @@ enum { ...@@ -8,7 +8,11 @@ enum {
PSAMPLE_ATTR_ORIGSIZE, PSAMPLE_ATTR_ORIGSIZE,
PSAMPLE_ATTR_SAMPLE_GROUP, PSAMPLE_ATTR_SAMPLE_GROUP,
PSAMPLE_ATTR_GROUP_SEQ, PSAMPLE_ATTR_GROUP_SEQ,
PSAMPLE_ATTR_SAMPLE_RATE, PSAMPLE_ATTR_SAMPLE_RATE, /* u32, ratio between observed and
* sampled packets or scaled probability
* if PSAMPLE_ATTR_SAMPLE_PROBABILITY
* is set.
*/
PSAMPLE_ATTR_DATA, PSAMPLE_ATTR_DATA,
PSAMPLE_ATTR_GROUP_REFCOUNT, PSAMPLE_ATTR_GROUP_REFCOUNT,
PSAMPLE_ATTR_TUNNEL, PSAMPLE_ATTR_TUNNEL,
...@@ -19,6 +23,11 @@ enum { ...@@ -19,6 +23,11 @@ enum {
PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */ PSAMPLE_ATTR_LATENCY, /* u64, nanoseconds */
PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */ PSAMPLE_ATTR_TIMESTAMP, /* u64, nanoseconds */
PSAMPLE_ATTR_PROTO, /* u16 */ PSAMPLE_ATTR_PROTO, /* u16 */
PSAMPLE_ATTR_USER_COOKIE, /* binary, user provided data */
PSAMPLE_ATTR_SAMPLE_PROBABILITY,/* no argument, interpret rate in
* PSAMPLE_ATTR_SAMPLE_RATE as a
* probability scaled 0 - U32_MAX.
*/
__PSAMPLE_ATTR_MAX __PSAMPLE_ATTR_MAX
}; };
......
...@@ -10,6 +10,7 @@ config OPENVSWITCH ...@@ -10,6 +10,7 @@ config OPENVSWITCH
(NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \ (NF_CONNTRACK && ((!NF_DEFRAG_IPV6 || NF_DEFRAG_IPV6) && \
(!NF_NAT || NF_NAT) && \ (!NF_NAT || NF_NAT) && \
(!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT))) (!NETFILTER_CONNCOUNT || NETFILTER_CONNCOUNT)))
depends on PSAMPLE || !PSAMPLE
select LIBCRC32C select LIBCRC32C
select MPLS select MPLS
select NET_MPLS_GSO select NET_MPLS_GSO
......
...@@ -24,6 +24,11 @@ ...@@ -24,6 +24,11 @@
#include <net/checksum.h> #include <net/checksum.h>
#include <net/dsfield.h> #include <net/dsfield.h>
#include <net/mpls.h> #include <net/mpls.h>
#if IS_ENABLED(CONFIG_PSAMPLE)
#include <net/psample.h>
#endif
#include <net/sctp/checksum.h> #include <net/sctp/checksum.h>
#include "datapath.h" #include "datapath.h"
...@@ -1043,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb, ...@@ -1043,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
struct nlattr *sample_arg; struct nlattr *sample_arg;
int rem = nla_len(attr); int rem = nla_len(attr);
const struct sample_arg *arg; const struct sample_arg *arg;
u32 init_probability;
bool clone_flow_key; bool clone_flow_key;
int err;
/* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */ /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
sample_arg = nla_data(attr); sample_arg = nla_data(attr);
arg = nla_data(sample_arg); arg = nla_data(sample_arg);
actions = nla_next(sample_arg, &rem); actions = nla_next(sample_arg, &rem);
init_probability = OVS_CB(skb)->probability;
if ((arg->probability != U32_MAX) && if ((arg->probability != U32_MAX) &&
(!arg->probability || get_random_u32() > arg->probability)) { (!arg->probability || get_random_u32() > arg->probability)) {
...@@ -1057,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb, ...@@ -1057,9 +1065,16 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
return 0; return 0;
} }
OVS_CB(skb)->probability = arg->probability;
clone_flow_key = !arg->exec; clone_flow_key = !arg->exec;
return clone_execute(dp, skb, key, 0, actions, rem, last, err = clone_execute(dp, skb, key, 0, actions, rem, last,
clone_flow_key); clone_flow_key);
if (!last)
OVS_CB(skb)->probability = init_probability;
return err;
} }
/* When 'last' is true, clone() should always consume the 'skb'. /* When 'last' is true, clone() should always consume the 'skb'.
...@@ -1299,6 +1314,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) ...@@ -1299,6 +1314,44 @@ static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key)
return 0; return 0;
} }
#if IS_ENABLED(CONFIG_PSAMPLE)
static void execute_psample(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr)
{
struct psample_group psample_group = {};
struct psample_metadata md = {};
const struct nlattr *a;
u32 rate;
int rem;
nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
switch (nla_type(a)) {
case OVS_PSAMPLE_ATTR_GROUP:
psample_group.group_num = nla_get_u32(a);
break;
case OVS_PSAMPLE_ATTR_COOKIE:
md.user_cookie = nla_data(a);
md.user_cookie_len = nla_len(a);
break;
}
}
psample_group.net = ovs_dp_get_net(dp);
md.in_ifindex = OVS_CB(skb)->input_vport->dev->ifindex;
md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
md.rate_as_probability = 1;
rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
psample_sample_packet(&psample_group, skb, rate, &md);
}
#else
static void execute_psample(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr)
{}
#endif
/* Execute a list of actions against 'skb'. */ /* Execute a list of actions against 'skb'. */
static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, struct sw_flow_key *key,
...@@ -1502,6 +1555,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, ...@@ -1502,6 +1555,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
ovs_kfree_skb_reason(skb, reason); ovs_kfree_skb_reason(skb, reason);
return 0; return 0;
} }
case OVS_ACTION_ATTR_PSAMPLE:
execute_psample(dp, skb, a);
OVS_CB(skb)->cutlen = 0;
if (nla_is_last(a, rem)) {
consume_skb(skb);
return 0;
}
break;
} }
if (unlikely(err)) { if (unlikely(err)) {
......
...@@ -115,12 +115,15 @@ struct datapath { ...@@ -115,12 +115,15 @@ struct datapath {
* fragmented. * fragmented.
* @acts_origlen: The netlink size of the flow actions applied to this skb. * @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed. * @cutlen: The number of bytes from the packet end to be removed.
* @probability: The sampling probability that was applied to this skb; 0 means
* no sampling has occurred; U32_MAX means 100% probability.
*/ */
struct ovs_skb_cb { struct ovs_skb_cb {
struct vport *input_vport; struct vport *input_vport;
u16 mru; u16 mru;
u16 acts_origlen; u16 acts_origlen;
u32 cutlen; u32 cutlen;
u32 probability;
}; };
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
......
...@@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) ...@@ -64,6 +64,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_TRUNC:
case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_USERSPACE:
case OVS_ACTION_ATTR_DROP: case OVS_ACTION_ATTR_DROP:
case OVS_ACTION_ATTR_PSAMPLE:
break; break;
case OVS_ACTION_ATTR_CT: case OVS_ACTION_ATTR_CT:
...@@ -2409,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) ...@@ -2409,7 +2410,7 @@ static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
/* Whenever new actions are added, the need to update this /* Whenever new actions are added, the need to update this
* function should be considered. * function should be considered.
*/ */
BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 24); BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25);
if (!actions) if (!actions)
return; return;
...@@ -3157,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net, ...@@ -3157,6 +3158,28 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return 0; return 0;
} }
static int validate_psample(const struct nlattr *attr)
{
static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = {
[OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 },
[OVS_PSAMPLE_ATTR_COOKIE] = {
.type = NLA_BINARY,
.len = OVS_PSAMPLE_COOKIE_MAX_SIZE,
},
};
struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1];
int err;
if (!IS_ENABLED(CONFIG_PSAMPLE))
return -EOPNOTSUPP;
err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL);
if (err)
return err;
return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL;
}
static int copy_action(const struct nlattr *from, static int copy_action(const struct nlattr *from,
struct sw_flow_actions **sfa, bool log) struct sw_flow_actions **sfa, bool log)
{ {
...@@ -3212,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, ...@@ -3212,6 +3235,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
[OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1,
[OVS_ACTION_ATTR_DROP] = sizeof(u32), [OVS_ACTION_ATTR_DROP] = sizeof(u32),
[OVS_ACTION_ATTR_PSAMPLE] = (u32)-1,
}; };
const struct ovs_action_push_vlan *vlan; const struct ovs_action_push_vlan *vlan;
int type = nla_type(a); int type = nla_type(a);
...@@ -3490,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, ...@@ -3490,6 +3514,12 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
return -EINVAL; return -EINVAL;
break; break;
case OVS_ACTION_ATTR_PSAMPLE:
err = validate_psample(a);
if (err)
return err;
break;
default: default:
OVS_NLERR(log, "Unknown Action type %d", type); OVS_NLERR(log, "Unknown Action type %d", type);
return -EINVAL; return -EINVAL;
......
...@@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb, ...@@ -500,6 +500,7 @@ int ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
OVS_CB(skb)->input_vport = vport; OVS_CB(skb)->input_vport = vport;
OVS_CB(skb)->mru = 0; OVS_CB(skb)->mru = 0;
OVS_CB(skb)->cutlen = 0; OVS_CB(skb)->cutlen = 0;
OVS_CB(skb)->probability = 0;
if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) { if (unlikely(dev_net(skb->dev) != ovs_dp_get_net(vport->dp))) {
u32 mark; u32 mark;
......
...@@ -376,6 +376,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, ...@@ -376,6 +376,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
void *data; void *data;
int ret; int ret;
if (!genl_has_listeners(&psample_nl_family, group->net,
PSAMPLE_NL_MCGRP_SAMPLE))
return;
meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) + meta_len = (in_ifindex ? nla_total_size(sizeof(u16)) : 0) +
(out_ifindex ? nla_total_size(sizeof(u16)) : 0) + (out_ifindex ? nla_total_size(sizeof(u16)) : 0) +
(md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) + (md->out_tc_valid ? nla_total_size(sizeof(u16)) : 0) +
...@@ -386,7 +390,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, ...@@ -386,7 +390,9 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
nla_total_size(sizeof(u32)) + /* group_num */ nla_total_size(sizeof(u32)) + /* group_num */
nla_total_size(sizeof(u32)) + /* seq */ nla_total_size(sizeof(u32)) + /* seq */
nla_total_size_64bit(sizeof(u64)) + /* timestamp */ nla_total_size_64bit(sizeof(u64)) + /* timestamp */
nla_total_size(sizeof(u16)); /* protocol */ nla_total_size(sizeof(u16)) + /* protocol */
(md->user_cookie_len ?
nla_total_size(md->user_cookie_len) : 0); /* user cookie */
#ifdef CONFIG_INET #ifdef CONFIG_INET
tun_info = skb_tunnel_info(skb); tun_info = skb_tunnel_info(skb);
...@@ -486,6 +492,14 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb, ...@@ -486,6 +492,14 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
} }
#endif #endif
if (md->user_cookie && md->user_cookie_len &&
nla_put(nl_skb, PSAMPLE_ATTR_USER_COOKIE, md->user_cookie_len,
md->user_cookie))
goto error;
if (md->rate_as_probability)
nla_put_flag(skb, PSAMPLE_ATTR_SAMPLE_PROBABILITY);
genlmsg_end(nl_skb, data); genlmsg_end(nl_skb, data);
genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0, genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC); PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
......
...@@ -167,7 +167,9 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb, ...@@ -167,7 +167,9 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
{ {
struct tcf_sample *s = to_sample(a); struct tcf_sample *s = to_sample(a);
struct psample_group *psample_group; struct psample_group *psample_group;
u8 cookie_data[TC_COOKIE_MAX_SIZE];
struct psample_metadata md = {}; struct psample_metadata md = {};
struct tc_cookie *user_cookie;
int retval; int retval;
tcf_lastuse_update(&s->tcf_tm); tcf_lastuse_update(&s->tcf_tm);
...@@ -189,6 +191,16 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb, ...@@ -189,6 +191,16 @@ TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb,
if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev)) if (skb_at_tc_ingress(skb) && tcf_sample_dev_ok_push(skb->dev))
skb_push(skb, skb->mac_len); skb_push(skb, skb->mac_len);
rcu_read_lock();
user_cookie = rcu_dereference(a->user_cookie);
if (user_cookie) {
memcpy(cookie_data, user_cookie->data,
user_cookie->len);
md.user_cookie = cookie_data;
md.user_cookie_len = user_cookie->len;
}
rcu_read_unlock();
md.trunc_size = s->truncate ? s->trunc_size : skb->len; md.trunc_size = s->truncate ? s->trunc_size : skb->len;
psample_sample_packet(psample_group, skb, s->rate, &md); psample_sample_packet(psample_group, skb, s->rate, &md);
......
...@@ -20,7 +20,8 @@ tests=" ...@@ -20,7 +20,8 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces upcall_interfaces ovs: test the upcall interfaces
drop_reason drop: test drop reasons are emitted" drop_reason drop: test drop reasons are emitted
psample psample: Sampling packets with psample"
info() { info() {
[ "${ovs_dir}" != "" ] && [ "${ovs_dir}" != "" ] &&
...@@ -105,12 +106,21 @@ ovs_netns_spawn_daemon() { ...@@ -105,12 +106,21 @@ ovs_netns_spawn_daemon() {
shift shift
netns=$1 netns=$1
shift shift
info "spawning cmd: $*" if [ "$netns" == "_default" ]; then
ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr & $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
else
ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
fi
pid=$! pid=$!
ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null" ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
} }
ovs_spawn_daemon() {
sbx=$1
shift
ovs_netns_spawn_daemon $sbx "_default" $*
}
ovs_add_netns_and_veths () { ovs_add_netns_and_veths () {
info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}" info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
ovs_sbx "$1" ip netns add "$3" || return 1 ovs_sbx "$1" ip netns add "$3" || return 1
...@@ -173,6 +183,19 @@ ovs_drop_reason_count() ...@@ -173,6 +183,19 @@ ovs_drop_reason_count()
return `echo "$perf_output" | grep "$pattern" | wc -l` return `echo "$perf_output" | grep "$pattern" | wc -l`
} }
ovs_test_flow_fails () {
ERR_MSG="Flow actions may not be safe on all matching packets"
PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
ovs_add_flow $@ &> /dev/null $@ && return 1
POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
if [ "$PRE_TEST" == "$POST_TEST" ]; then
return 1
fi
return 0
}
usage() { usage() {
echo echo
echo "$0 [OPTIONS] [TEST]..." echo "$0 [OPTIONS] [TEST]..."
...@@ -187,6 +210,92 @@ usage() { ...@@ -187,6 +210,92 @@ usage() {
exit 1 exit 1
} }
# psample test
# - use psample to observe packets
test_psample() {
sbx_add "test_psample" || return $?
# Add a datapath with per-vport dispatching.
ovs_add_dp "test_psample" psample -V 2:1 || return 1
info "create namespaces"
ovs_add_netns_and_veths "test_psample" "psample" \
client c0 c1 172.31.110.10/24 -u || return 1
ovs_add_netns_and_veths "test_psample" "psample" \
server s0 s1 172.31.110.20/24 -u || return 1
# Check if psample actions can be configured.
ovs_add_flow "test_psample" psample \
'in_port(1),eth(),eth_type(0x0806),arp()' 'psample(group=1)' &> /dev/null
if [ $? == 1 ]; then
info "no support for psample - skipping"
ovs_exit_sig
return $ksft_skip
fi
ovs_del_flows "test_psample" psample
# Test action verification.
OLDIFS=$IFS
IFS='*'
min_key='in_port(1),eth(),eth_type(0x0800),ipv4()'
for testcase in \
"cookie to large"*"psample(group=1,cookie=1615141312111009080706050403020100)" \
"no group with cookie"*"psample(cookie=abcd)" \
"no group"*"psample()";
do
set -- $testcase;
ovs_test_flow_fails "test_psample" psample $min_key $2
if [ $? == 1 ]; then
info "failed - $1"
return 1
fi
done
IFS=$OLDIFS
ovs_del_flows "test_psample" psample
# Allow ARP
ovs_add_flow "test_psample" psample \
'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
ovs_add_flow "test_psample" psample \
'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
# Sample first 14 bytes of all traffic.
ovs_add_flow "test_psample" psample \
"in_port(1),eth(),eth_type(0x0800),ipv4()" \
"trunc(14),psample(group=1,cookie=c0ffee),2"
# Sample all traffic. In this case, use a sample() action with both
# psample and an upcall emulating simultaneous local sampling and
# sFlow / IPFIX.
nlpid=$(grep -E "listening on upcall packet handler" \
$ovs_dir/s0.out | cut -d ":" -f 2 | tr -d ' ')
ovs_add_flow "test_psample" psample \
"in_port(2),eth(),eth_type(0x0800),ipv4()" \
"sample(sample=100%,actions(psample(group=2,cookie=eeff0c),userspace(pid=${nlpid},userdata=eeff0c))),1"
# Record psample data.
ovs_spawn_daemon "test_psample" python3 $ovs_base/ovs-dpctl.py psample-events
# Send a single ping.
sleep 1
ovs_sbx "test_psample" ip netns exec client ping -I c1 172.31.110.20 -c 1 || return 1
sleep 1
# We should have received one userspace action upcall and 2 psample packets.
grep -E "userspace action command" $ovs_dir/s0.out >/dev/null 2>&1 || return 1
# client -> server samples should only contain the first 14 bytes of the packet.
grep -E "rate:4294967295,group:1,cookie:c0ffee data:[0-9a-f]{28}$" \
$ovs_dir/stdout >/dev/null 2>&1 || return 1
grep -E "rate:4294967295,group:2,cookie:eeff0c" \
$ovs_dir/stdout >/dev/null 2>&1 || return 1
return 0
}
# drop_reason test # drop_reason test
# - drop packets and verify the right drop reason is reported # - drop packets and verify the right drop reason is reported
test_drop_reason() { test_drop_reason() {
......
...@@ -8,6 +8,7 @@ import argparse ...@@ -8,6 +8,7 @@ import argparse
import errno import errno
import ipaddress import ipaddress
import logging import logging
import math
import multiprocessing import multiprocessing
import re import re
import socket import socket
...@@ -27,8 +28,10 @@ try: ...@@ -27,8 +28,10 @@ try:
from pyroute2.netlink import genlmsg from pyroute2.netlink import genlmsg
from pyroute2.netlink import nla from pyroute2.netlink import nla
from pyroute2.netlink import nlmsg_atoms from pyroute2.netlink import nlmsg_atoms
from pyroute2.netlink.event import EventSocket
from pyroute2.netlink.exceptions import NetlinkError from pyroute2.netlink.exceptions import NetlinkError
from pyroute2.netlink.generic import GenericNetlinkSocket from pyroute2.netlink.generic import GenericNetlinkSocket
from pyroute2.netlink.nlsocket import Marshal
import pyroute2 import pyroute2
import pyroute2.iproute import pyroute2.iproute
...@@ -60,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2 ...@@ -60,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2
OVS_FLOW_CMD_GET = 3 OVS_FLOW_CMD_GET = 3
OVS_FLOW_CMD_SET = 4 OVS_FLOW_CMD_SET = 4
UINT32_MAX = 0xFFFFFFFF
def macstr(mac): def macstr(mac):
outstr = ":".join(["%02X" % i for i in mac]) outstr = ":".join(["%02X" % i for i in mac])
...@@ -281,6 +285,75 @@ def parse_extract_field( ...@@ -281,6 +285,75 @@ def parse_extract_field(
return str_skipped, data return str_skipped, data
def parse_attrs(actstr, attr_desc):
"""Parses the given action string and returns a list of netlink
attributes based on a list of attribute descriptions.
Each element in the attribute description list is a tuple such as:
(name, attr_name, parse_func)
where:
name: is the string representing the attribute
attr_name: is the name of the attribute as defined in the uAPI.
parse_func: is a callable accepting a string and returning either
a single object (the parsed attribute value) or a tuple of
two values (the parsed attribute value and the remaining string)
Returns a list of attributes and the remaining string.
"""
def parse_attr(actstr, key, func):
actstr = actstr[len(key) :]
if not func:
return None, actstr
delim = actstr[0]
actstr = actstr[1:]
if delim == "=":
pos = strcspn(actstr, ",)")
ret = func(actstr[:pos])
else:
ret = func(actstr)
if isinstance(ret, tuple):
(datum, actstr) = ret
else:
datum = ret
actstr = actstr[strcspn(actstr, ",)"):]
if delim == "(":
if not actstr or actstr[0] != ")":
raise ValueError("Action contains unbalanced parentheses")
actstr = actstr[1:]
actstr = actstr[strspn(actstr, ", ") :]
return datum, actstr
attrs = []
attr_desc = list(attr_desc)
while actstr and actstr[0] != ")" and attr_desc:
found = False
for i, (key, attr, func) in enumerate(attr_desc):
if actstr.startswith(key):
datum, actstr = parse_attr(actstr, key, func)
attrs.append([attr, datum])
found = True
del attr_desc[i]
if not found:
raise ValueError("Unknown attribute: '%s'" % actstr)
actstr = actstr[strspn(actstr, ", ") :]
if actstr[0] != ")":
raise ValueError("Action string contains extra garbage or has "
"unbalanced parenthesis: '%s'" % actstr)
return attrs, actstr[1:]
class ovs_dp_msg(genlmsg): class ovs_dp_msg(genlmsg):
# include the OVS version # include the OVS version
# We need a custom header rather than just being able to rely on # We need a custom header rather than just being able to rely on
...@@ -299,7 +372,7 @@ class ovsactions(nla): ...@@ -299,7 +372,7 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_SET", "ovskey"), ("OVS_ACTION_ATTR_SET", "ovskey"),
("OVS_ACTION_ATTR_PUSH_VLAN", "none"), ("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
("OVS_ACTION_ATTR_POP_VLAN", "flag"), ("OVS_ACTION_ATTR_POP_VLAN", "flag"),
("OVS_ACTION_ATTR_SAMPLE", "none"), ("OVS_ACTION_ATTR_SAMPLE", "sample"),
("OVS_ACTION_ATTR_RECIRC", "uint32"), ("OVS_ACTION_ATTR_RECIRC", "uint32"),
("OVS_ACTION_ATTR_HASH", "none"), ("OVS_ACTION_ATTR_HASH", "none"),
("OVS_ACTION_ATTR_PUSH_MPLS", "none"), ("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
...@@ -318,8 +391,85 @@ class ovsactions(nla): ...@@ -318,8 +391,85 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
("OVS_ACTION_ATTR_DEC_TTL", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"),
("OVS_ACTION_ATTR_DROP", "uint32"), ("OVS_ACTION_ATTR_DROP", "uint32"),
("OVS_ACTION_ATTR_PSAMPLE", "psample"),
) )
class psample(nla):
nla_flags = NLA_F_NESTED
nla_map = (
("OVS_PSAMPLE_ATTR_UNSPEC", "none"),
("OVS_PSAMPLE_ATTR_GROUP", "uint32"),
("OVS_PSAMPLE_ATTR_COOKIE", "array(uint8)"),
)
def dpstr(self, more=False):
args = "group=%d" % self.get_attr("OVS_PSAMPLE_ATTR_GROUP")
cookie = self.get_attr("OVS_PSAMPLE_ATTR_COOKIE")
if cookie:
args += ",cookie(%s)" % \
"".join(format(x, "02x") for x in cookie)
return "psample(%s)" % args
def parse(self, actstr):
desc = (
("group", "OVS_PSAMPLE_ATTR_GROUP", int),
("cookie", "OVS_PSAMPLE_ATTR_COOKIE",
lambda x: list(bytearray.fromhex(x)))
)
attrs, actstr = parse_attrs(actstr, desc)
for attr in attrs:
self["attrs"].append(attr)
return actstr
class sample(nla):
nla_flags = NLA_F_NESTED
nla_map = (
("OVS_SAMPLE_ATTR_UNSPEC", "none"),
("OVS_SAMPLE_ATTR_PROBABILITY", "uint32"),
("OVS_SAMPLE_ATTR_ACTIONS", "ovsactions"),
)
def dpstr(self, more=False):
args = []
args.append("sample={:.2f}%".format(
100 * self.get_attr("OVS_SAMPLE_ATTR_PROBABILITY") /
UINT32_MAX))
actions = self.get_attr("OVS_SAMPLE_ATTR_ACTIONS")
if actions:
args.append("actions(%s)" % actions.dpstr(more))
return "sample(%s)" % ",".join(args)
def parse(self, actstr):
def parse_nested_actions(actstr):
subacts = ovsactions()
parsed_len = subacts.parse(actstr)
return subacts, actstr[parsed_len :]
def percent_to_rate(percent):
percent = float(percent.strip('%'))
return int(math.floor(UINT32_MAX * (percent / 100.0) + .5))
desc = (
("sample", "OVS_SAMPLE_ATTR_PROBABILITY", percent_to_rate),
("actions", "OVS_SAMPLE_ATTR_ACTIONS", parse_nested_actions),
)
attrs, actstr = parse_attrs(actstr, desc)
for attr in attrs:
self["attrs"].append(attr)
return actstr
class ctact(nla): class ctact(nla):
nla_flags = NLA_F_NESTED nla_flags = NLA_F_NESTED
...@@ -441,13 +591,27 @@ class ovsactions(nla): ...@@ -441,13 +591,27 @@ class ovsactions(nla):
print_str += "userdata=" print_str += "userdata="
for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"): for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
print_str += "%x." % f print_str += "%x." % f
if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None: if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None:
print_str += "egress_tun_port=%d" % self.get_attr( print_str += "egress_tun_port=%d" % self.get_attr(
"OVS_USERSPACE_ATTR_TUN_PORT" "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT"
) )
print_str += ")" print_str += ")"
return print_str return print_str
def parse(self, actstr):
attrs_desc = (
("pid", "OVS_USERSPACE_ATTR_PID", int),
("userdata", "OVS_USERSPACE_ATTR_USERDATA",
lambda x: list(bytearray.fromhex(x))),
("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int)
)
attrs, actstr = parse_attrs(actstr, attrs_desc)
for attr in attrs:
self["attrs"].append(attr)
return actstr
def dpstr(self, more=False): def dpstr(self, more=False):
print_str = "" print_str = ""
...@@ -683,6 +847,37 @@ class ovsactions(nla): ...@@ -683,6 +847,37 @@ class ovsactions(nla):
self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
parsed = True parsed = True
elif parse_starts_block(actstr, "sample(", False):
sampleact = self.sample()
actstr = sampleact.parse(actstr[len("sample(") : ])
self["attrs"].append(["OVS_ACTION_ATTR_SAMPLE", sampleact])
parsed = True
elif parse_starts_block(actstr, "psample(", False):
psampleact = self.psample()
actstr = psampleact.parse(actstr[len("psample(") : ])
self["attrs"].append(["OVS_ACTION_ATTR_PSAMPLE", psampleact])
parsed = True
elif parse_starts_block(actstr, "userspace(", False):
uact = self.userspace()
actstr = uact.parse(actstr[len("userspace(") : ])
self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact])
parsed = True
elif parse_starts_block(actstr, "trunc(", False):
parencount += 1
actstr, val = parse_extract_field(
actstr,
"trunc(",
r"([0-9]+)",
int,
False,
None,
)
self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val])
parsed = True
actstr = actstr[strspn(actstr, ", ") :] actstr = actstr[strspn(actstr, ", ") :]
while parencount > 0: while parencount > 0:
parencount -= 1 parencount -= 1
...@@ -2267,10 +2462,70 @@ class OvsFlow(GenericNetlinkSocket): ...@@ -2267,10 +2462,70 @@ class OvsFlow(GenericNetlinkSocket):
print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True) print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True)
def execute(self, packetmsg): def execute(self, packetmsg):
print("userspace execute command") print("userspace execute command", flush=True)
def action(self, packetmsg): def action(self, packetmsg):
print("userspace action command") print("userspace action command", flush=True)
class psample_sample(genlmsg):
nla_map = (
("PSAMPLE_ATTR_IIFINDEX", "none"),
("PSAMPLE_ATTR_OIFINDEX", "none"),
("PSAMPLE_ATTR_ORIGSIZE", "none"),
("PSAMPLE_ATTR_SAMPLE_GROUP", "uint32"),
("PSAMPLE_ATTR_GROUP_SEQ", "none"),
("PSAMPLE_ATTR_SAMPLE_RATE", "uint32"),
("PSAMPLE_ATTR_DATA", "array(uint8)"),
("PSAMPLE_ATTR_GROUP_REFCOUNT", "none"),
("PSAMPLE_ATTR_TUNNEL", "none"),
("PSAMPLE_ATTR_PAD", "none"),
("PSAMPLE_ATTR_OUT_TC", "none"),
("PSAMPLE_ATTR_OUT_TC_OCC", "none"),
("PSAMPLE_ATTR_LATENCY", "none"),
("PSAMPLE_ATTR_TIMESTAMP", "none"),
("PSAMPLE_ATTR_PROTO", "none"),
("PSAMPLE_ATTR_USER_COOKIE", "array(uint8)"),
)
def dpstr(self):
fields = []
data = ""
for (attr, value) in self["attrs"]:
if attr == "PSAMPLE_ATTR_SAMPLE_GROUP":
fields.append("group:%d" % value)
if attr == "PSAMPLE_ATTR_SAMPLE_RATE":
fields.append("rate:%d" % value)
if attr == "PSAMPLE_ATTR_USER_COOKIE":
value = "".join(format(x, "02x") for x in value)
fields.append("cookie:%s" % value)
if attr == "PSAMPLE_ATTR_DATA" and len(value) > 0:
data = "data:%s" % "".join(format(x, "02x") for x in value)
return ("%s %s" % (",".join(fields), data)).strip()
class psample_msg(Marshal):
PSAMPLE_CMD_SAMPLE = 0
PSAMPLE_CMD_GET_GROUP = 1
PSAMPLE_CMD_NEW_GROUP = 2
PSAMPLE_CMD_DEL_GROUP = 3
PSAMPLE_CMD_SET_FILTER = 4
msg_map = {PSAMPLE_CMD_SAMPLE: psample_sample}
class PsampleEvent(EventSocket):
genl_family = "psample"
mcast_groups = ["packets"]
marshal_class = psample_msg
def read_samples(self):
while True:
try:
for msg in self.get():
print(msg.dpstr(), flush=True)
except NetlinkError as ne:
raise ne
def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
...@@ -2337,7 +2592,7 @@ def main(argv): ...@@ -2337,7 +2592,7 @@ def main(argv):
help="Increment 'verbose' output counter.", help="Increment 'verbose' output counter.",
default=0, default=0,
) )
subparsers = parser.add_subparsers() subparsers = parser.add_subparsers(dest="subcommand")
showdpcmd = subparsers.add_parser("show") showdpcmd = subparsers.add_parser("show")
showdpcmd.add_argument( showdpcmd.add_argument(
...@@ -2412,6 +2667,8 @@ def main(argv): ...@@ -2412,6 +2667,8 @@ def main(argv):
delfscmd = subparsers.add_parser("del-flows") delfscmd = subparsers.add_parser("del-flows")
delfscmd.add_argument("flsbr", help="Datapath name") delfscmd.add_argument("flsbr", help="Datapath name")
subparsers.add_parser("psample-events")
args = parser.parse_args() args = parser.parse_args()
if args.verbose > 0: if args.verbose > 0:
...@@ -2426,6 +2683,9 @@ def main(argv): ...@@ -2426,6 +2683,9 @@ def main(argv):
sys.setrecursionlimit(100000) sys.setrecursionlimit(100000)
if args.subcommand == "psample-events":
PsampleEvent().read_samples()
if hasattr(args, "showdp"): if hasattr(args, "showdp"):
found = False found = False
for iface in ndb.interfaces: for iface in ndb.interfaces:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment