Commit 341ac980 authored by Stanislav Fomichev's avatar Stanislav Fomichev Committed by Alexei Starovoitov

xsk: Support tx_metadata_len

For zerocopy mode, tx_desc->addr can point to an arbitrary offset
and carry some TX metadata in the headroom. For copy mode, there
is no way currently to populate skb metadata.

Introduce new tx_metadata_len umem config option that indicates how many
bytes to treat as metadata. Metadata bytes come prior to tx_desc address
(same as in RX case).

The size of the metadata has mostly the same constraints as XDP:
- less than 256 bytes
- 8-byte aligned (compared to 4-byte alignment on xdp, due to 8-byte
  timestamp in the completion)
- non-zero

This data is not interpreted in any way right now.
Reviewed-by: default avatarSong Yoong Siang <yoong.siang.song@intel.com>
Signed-off-by: default avatarStanislav Fomichev <sdf@google.com>
Reviewed-by: default avatarJakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/r/20231127190319.1190813-2-sdf@google.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 40d0eb02
......@@ -30,6 +30,7 @@ struct xdp_umem {
struct user_struct *user;
refcount_t users;
u8 flags;
u8 tx_metadata_len;
bool zc;
struct page **pgs;
int id;
......
......@@ -77,6 +77,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
bool dma_need_sync;
......
......@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
__u32 tx_metadata_len;
};
struct xdp_statistics {
......
......@@ -199,6 +199,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
return -EINVAL;
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
......@@ -207,6 +210,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
......
......@@ -1283,6 +1283,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
struct xdp_umem_reg_v2 {
__u64 addr; /* Start of packet data area */
__u64 len; /* Length of packet data area */
__u32 chunk_size;
__u32 headroom;
__u32 flags;
};
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
......@@ -1326,8 +1334,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
else if (optlen < sizeof(mr))
else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
else if (optlen < sizeof(mr))
mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
......
......@@ -85,6 +85,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
pool->tx_metadata_len = umem->tx_metadata_len;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
......
......@@ -143,15 +143,17 @@ static inline bool xp_unused_options_set(u32 options)
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 offset = desc->addr & (pool->chunk_size - 1);
u64 addr = desc->addr - pool->tx_metadata_len;
u64 len = desc->len + pool->tx_metadata_len;
u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
if (offset + desc->len > pool->chunk_size)
if (offset + len > pool->chunk_size)
return false;
if (desc->addr >= pool->addrs_cnt)
if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
......@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
if (desc->len > pool->chunk_size)
if (len > pool->chunk_size)
return false;
if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
......
......@@ -76,6 +76,7 @@ struct xdp_umem_reg {
__u32 chunk_size;
__u32 headroom;
__u32 flags;
__u32 tx_metadata_len;
};
struct xdp_statistics {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment