Commit b65ef48c authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'XDP-hints: change RX-hash kfunc bpf_xdp_metadata_rx_hash'

Jesper Dangaard Brouer says:

====================

Current API for bpf_xdp_metadata_rx_hash() returns the raw RSS hash value,
but doesn't provide information on the RSS hash type (part of 6.3-rc).

This patchset proposal is to change the function call signature via adding
a pointer value argument for providing the RSS hash type.

Patchset also removes all bpf_printk's from xdp_hw_metadata program
that we expect driver developers to use. Instead counters are introduced
for relaying e.g. skip and fail info.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 0646dc31 0f26b74e
......@@ -681,14 +681,32 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
struct mlx4_cqe *cqe = _ctx->cqe;
enum xdp_rss_hash_type xht = 0;
__be16 status;
if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH)))
return -ENODATA;
*hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid);
*hash = be32_to_cpu(cqe->immed_rss_invalid);
status = cqe->status;
if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP))
xht = XDP_RSS_L4_TCP;
if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP))
xht = XDP_RSS_L4_UDP;
if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F))
xht |= XDP_RSS_L3_IPV4;
if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) {
xht |= XDP_RSS_L3_IPV6;
if (cqe->ipv6_ext_mask)
xht |= XDP_RSS_L3_DYNHDR;
}
*rss_type = xht;
return 0;
}
......
......@@ -798,7 +798,8 @@ int mlx4_en_netdev_event(struct notifier_block *this,
struct xdp_md;
int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash);
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
/*
* Functions for time stamping
......
......@@ -34,6 +34,7 @@
#include <net/xdp_sock_drv.h>
#include "en/xdp.h"
#include "en/params.h"
#include <linux/bitfield.h>
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{
......@@ -169,14 +170,72 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}
static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
/* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
#define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */
#define RSS_L4 GENMASK(1, 0)
#define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */
/* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
enum mlx5_rss_hash_type {
RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
};
/* Invalid combinations will simply return zero, allows no boundary checks */
static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = {
[RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE,
[1] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[2] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[3] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4,
[RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP,
[RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP,
[RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC,
[RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6,
[RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP,
[RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP,
[RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC,
[12] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[13] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[14] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[15] = XDP_RSS_TYPE_NONE, /* Implicit zero */
};
static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
const struct mlx5_cqe64 *cqe = _ctx->cqe;
u32 hash_type, l4_type, ip_type, lookup;
if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
return -ENODATA;
*hash = be32_to_cpu(_ctx->cqe->rss_hash_result);
*hash = be32_to_cpu(cqe->rss_hash_result);
hash_type = cqe->rss_hash_type;
BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */
ip_type = hash_type & CQE_RSS_HTYPE_IP;
l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type);
lookup = ip_type | l4_type;
*rss_type = mlx5_xdp_rss_type[lookup];
return 0;
}
......
......@@ -1648,14 +1648,18 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}
static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
struct veth_xdp_buff *_ctx = (void *)ctx;
struct sk_buff *skb = _ctx->skb;
if (!_ctx->skb)
if (!skb)
return -ENODATA;
*hash = skb_get_hash(_ctx->skb);
*hash = skb_get_hash(skb);
*rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE;
return 0;
}
......
......@@ -36,6 +36,7 @@
#include <linux/types.h>
#include <rdma/ib_verbs.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/bitfield.h>
#if defined(__LITTLE_ENDIAN)
#define MLX5_SET_HOST_ENDIANNESS 0
......@@ -980,14 +981,23 @@ enum {
};
enum {
CQE_RSS_HTYPE_IP = 0x3 << 2,
CQE_RSS_HTYPE_IP = GENMASK(3, 2),
/* cqe->rss_hash_type[3:2] - IP destination selected for hash
* (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved)
*/
CQE_RSS_HTYPE_L4 = 0x3 << 6,
CQE_RSS_IP_NONE = 0x0,
CQE_RSS_IPV4 = 0x1,
CQE_RSS_IPV6 = 0x2,
CQE_RSS_RESERVED = 0x3,
CQE_RSS_HTYPE_L4 = GENMASK(7, 6),
/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
* (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
*/
CQE_RSS_L4_NONE = 0x0,
CQE_RSS_L4_TCP = 0x1,
CQE_RSS_L4_UDP = 0x2,
CQE_RSS_L4_IPSEC = 0x3,
};
enum {
......
......@@ -1624,7 +1624,8 @@ struct net_device_ops {
struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
};
/**
......
......@@ -8,6 +8,7 @@
#include <linux/skbuff.h> /* skb_shared_info */
#include <uapi/linux/netdev.h>
#include <linux/bitfield.h>
/**
* DOC: XDP RX-queue information
......@@ -425,6 +426,52 @@ XDP_METADATA_KFUNC_xxx
MAX_XDP_METADATA_KFUNC,
};
enum xdp_rss_hash_type {
/* First part: Individual bits for L3/L4 types */
XDP_RSS_L3_IPV4 = BIT(0),
XDP_RSS_L3_IPV6 = BIT(1),
/* The fixed (L3) IPv4 and IPv6 headers can both be followed by
* variable/dynamic headers, IPv4 called Options and IPv6 called
* Extension Headers. HW RSS type can contain this info.
*/
XDP_RSS_L3_DYNHDR = BIT(2),
/* When RSS hash covers L4 then drivers MUST set XDP_RSS_L4 bit in
* addition to the protocol specific bit. This ease interaction with
* SKBs and avoids reserving a fixed mask for future L4 protocol bits.
*/
XDP_RSS_L4 = BIT(3), /* L4 based hash, proto can be unknown */
XDP_RSS_L4_TCP = BIT(4),
XDP_RSS_L4_UDP = BIT(5),
XDP_RSS_L4_SCTP = BIT(6),
XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */
/* Second part: RSS hash type combinations used for driver HW mapping */
XDP_RSS_TYPE_NONE = 0,
XDP_RSS_TYPE_L2 = XDP_RSS_TYPE_NONE,
XDP_RSS_TYPE_L3_IPV4 = XDP_RSS_L3_IPV4,
XDP_RSS_TYPE_L3_IPV6 = XDP_RSS_L3_IPV6,
XDP_RSS_TYPE_L3_IPV4_OPT = XDP_RSS_L3_IPV4 | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L3_IPV6_EX = XDP_RSS_L3_IPV6 | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_ANY = XDP_RSS_L4,
XDP_RSS_TYPE_L4_IPV4_TCP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,
XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR,
};
#ifdef CONFIG_NET
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
......
......@@ -734,13 +734,21 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim
* bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
* @ctx: XDP context pointer.
* @hash: Return value pointer.
* @rss_type: Return value pointer for RSS type.
*
* The RSS hash type (@rss_type) specifies what portion of packet headers NIC
* hardware used when calculating RSS hash value. The RSS type can be decoded
* via &enum xdp_rss_hash_type either matching on individual L3/L4 bits
* ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types*
* ``XDP_RSS_TYPE_L*``.
*
* Return:
* * Returns 0 on success or ``-errno`` on error.
* * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc
* * ``-ENODATA`` : means no RX-hash available for this frame
*/
__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
return -EOPNOTSUPP;
}
......
......@@ -273,6 +273,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
return -1;
ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);
......
......@@ -12,10 +12,14 @@ struct {
__type(value, __u32);
} xsk SEC(".maps");
__u64 pkts_skip = 0;
__u64 pkts_fail = 0;
__u64 pkts_redir = 0;
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
__u32 *hash) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
......@@ -26,7 +30,7 @@ int rx(struct xdp_md *ctx)
struct udphdr *udp = NULL;
struct iphdr *iph = NULL;
struct xdp_meta *meta;
int ret;
int err;
data = (void *)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
......@@ -46,17 +50,20 @@ int rx(struct xdp_md *ctx)
udp = NULL;
}
if (!udp)
if (!udp) {
__sync_add_and_fetch(&pkts_skip, 1);
return XDP_PASS;
}
if (udp->dest != bpf_htons(9091))
/* Forwarding UDP:9091 to AF_XDP */
if (udp->dest != bpf_htons(9091)) {
__sync_add_and_fetch(&pkts_skip, 1);
return XDP_PASS;
}
bpf_printk("forwarding UDP:9091 to AF_XDP");
ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
if (ret != 0) {
bpf_printk("bpf_xdp_adjust_meta returned %d", ret);
err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
if (err) {
__sync_add_and_fetch(&pkts_fail, 1);
return XDP_PASS;
}
......@@ -65,20 +72,19 @@ int rx(struct xdp_md *ctx)
meta = data_meta;
if (meta + 1 > data) {
bpf_printk("bpf_xdp_adjust_meta doesn't appear to work");
__sync_add_and_fetch(&pkts_fail, 1);
return XDP_PASS;
}
if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
else
err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
if (err)
meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */
if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
bpf_printk("populated rx_hash with %u", meta->rx_hash);
else
meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
if (err < 0)
meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */
__sync_add_and_fetch(&pkts_redir, 1);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
......
......@@ -21,8 +21,8 @@ struct {
extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
__u32 *hash) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
SEC("xdp")
int rx(struct xdp_md *ctx)
......@@ -56,7 +56,7 @@ int rx(struct xdp_md *ctx)
if (timestamp == 0)
meta->rx_timestamp = 1;
bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
......
......@@ -5,17 +5,18 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
__u32 *hash) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
int called;
SEC("freplace/rx")
int freplace_rx(struct xdp_md *ctx)
{
enum xdp_rss_hash_type type = 0;
u32 hash = 0;
/* Call _any_ metadata function to make sure we don't crash. */
bpf_xdp_metadata_rx_hash(ctx, &hash);
bpf_xdp_metadata_rx_hash(ctx, &hash, &type);
called++;
return XDP_PASS;
}
......
......@@ -141,7 +141,11 @@ static void verify_xdp_metadata(void *data)
meta = data - sizeof(*meta);
printf("rx_timestamp: %llu\n", meta->rx_timestamp);
printf("rx_hash: %u\n", meta->rx_hash);
if (meta->rx_hash_err < 0)
printf("No rx_hash err=%d\n", meta->rx_hash_err);
else
printf("rx_hash: 0x%X with RSS type:0x%X\n",
meta->rx_hash, meta->rx_hash_type);
}
static void verify_skb_metadata(int fd)
......@@ -212,7 +216,9 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd)
while (true) {
errno = 0;
ret = poll(fds, rxq + 1, 1000);
printf("poll: %d (%d)\n", ret, errno);
printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
ret, errno, bpf_obj->bss->pkts_skip,
bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
if (ret < 0)
break;
if (ret == 0)
......
......@@ -12,4 +12,8 @@
struct xdp_meta {
__u64 rx_timestamp;
__u32 rx_hash;
union {
__u32 rx_hash_type;
__s32 rx_hash_err;
};
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment