Commit bb18fc7a authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'abstract-page-from-net-stack'

Mina Almasry says:

====================
Abstract page from net stack

This series is a prerequisite to the devmem TCP series. For a full
snapshot of the code which includes these changes, feel free to check:

https://github.com/mina/linux/commits/tcpdevmem-rfcv5/

Currently these components in the net stack use the struct page
directly:

1. Drivers.
2. Page pool.
3. skb_frag_t.

To add support for new (non struct page) memory types to the net stack, we
must first abstract the current memory type.

Originally the plan was to reuse struct page* for the new memory types,
and to set the LSB on the page* to indicate it's not really a page.
However, for safe compiler type checking we need to introduce a new type.

struct netmem is introduced to abstract the underlying memory type.
Currently it's a no-op abstraction that is always a struct page underneath.
In parallel there is an undergoing effort to add support for devmem to the
net stack:

https://lore.kernel.org/netdev/20231208005250.2910004-1-almasrymina@google.com/

Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Yunsheng Lin <linyunsheng@huawei.com>
Cc: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
====================

Link: https://lore.kernel.org/r/20240214223405.1972973-1-almasrymina@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 74293ea1 21d2e673
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#endif #endif
#include <net/net_debug.h> #include <net/net_debug.h>
#include <net/dropreason-core.h> #include <net/dropreason-core.h>
#include <net/netmem.h>
/** /**
* DOC: skb checksums * DOC: skb checksums
...@@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags; ...@@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags;
*/ */
#define GSO_BY_FRAGS 0xFFFF #define GSO_BY_FRAGS 0xFFFF
typedef struct bio_vec skb_frag_t; typedef struct skb_frag {
netmem_ref netmem;
unsigned int len;
unsigned int offset;
} skb_frag_t;
/** /**
* skb_frag_size() - Returns the size of a skb fragment * skb_frag_size() - Returns the size of a skb fragment
...@@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t; ...@@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t;
*/ */
static inline unsigned int skb_frag_size(const skb_frag_t *frag) static inline unsigned int skb_frag_size(const skb_frag_t *frag)
{ {
return frag->bv_len; return frag->len;
} }
/** /**
...@@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) ...@@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
*/ */
static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
{ {
frag->bv_len = size; frag->len = size;
} }
/** /**
...@@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) ...@@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
*/ */
static inline void skb_frag_size_add(skb_frag_t *frag, int delta) static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
{ {
frag->bv_len += delta; frag->len += delta;
} }
/** /**
...@@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) ...@@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
*/ */
static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
{ {
frag->bv_len -= delta; frag->len -= delta;
} }
/** /**
...@@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p) ...@@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p)
* skb_frag_foreach_page - loop over pages in a fragment * skb_frag_foreach_page - loop over pages in a fragment
* *
* @f: skb frag to operate on * @f: skb frag to operate on
* @f_off: offset from start of f->bv_page * @f_off: offset from start of f->netmem
* @f_len: length from f_off to loop over * @f_len: length from f_off to loop over
* @p: (temp var) current page * @p: (temp var) current page
* @p_off: (temp var) offset from start of current page, * @p_off: (temp var) offset from start of current page,
...@@ -2429,22 +2434,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) ...@@ -2429,22 +2434,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
return skb_headlen(skb) + __skb_pagelen(skb); return skb_headlen(skb) + __skb_pagelen(skb);
} }
static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
netmem_ref netmem, int off,
int size)
{
frag->netmem = netmem;
frag->offset = off;
skb_frag_size_set(frag, size);
}
static inline void skb_frag_fill_page_desc(skb_frag_t *frag, static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
struct page *page, struct page *page,
int off, int size) int off, int size)
{ {
frag->bv_page = page; skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
frag->bv_offset = off; }
skb_frag_size_set(frag, size);
static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
int i, netmem_ref netmem,
int off, int size)
{
skb_frag_t *frag = &shinfo->frags[i];
skb_frag_fill_netmem_desc(frag, netmem, off, size);
} }
static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
int i, struct page *page, int i, struct page *page,
int off, int size) int off, int size)
{ {
skb_frag_t *frag = &shinfo->frags[i]; __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
size);
skb_frag_fill_page_desc(frag, page, off, size);
} }
/** /**
...@@ -2460,10 +2480,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) ...@@ -2460,10 +2480,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
} }
/** /**
* __skb_fill_page_desc - initialise a paged fragment in an skb * __skb_fill_netmem_desc - initialise a fragment in an skb
* @skb: buffer containing fragment to be initialised * @skb: buffer containing fragment to be initialised
* @i: paged fragment index to initialise * @i: fragment index to initialise
* @page: the page to use for this fragment * @netmem: the netmem to use for this fragment
* @off: the offset to the data with @page * @off: the offset to the data with @page
* @size: the length of the data * @size: the length of the data
* *
...@@ -2472,10 +2492,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) ...@@ -2472,10 +2492,12 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
* *
* Does not take any additional reference on the fragment. * Does not take any additional reference on the fragment.
*/ */
static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size) netmem_ref netmem, int off, int size)
{ {
__skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); struct page *page = netmem_to_page(netmem);
__skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
/* Propagate page pfmemalloc to the skb if we can. The problem is /* Propagate page pfmemalloc to the skb if we can. The problem is
* that not all callers have unique ownership of the page but rely * that not all callers have unique ownership of the page but rely
...@@ -2483,7 +2505,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, ...@@ -2483,7 +2505,20 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
*/ */
page = compound_head(page); page = compound_head(page);
if (page_is_pfmemalloc(page)) if (page_is_pfmemalloc(page))
skb->pfmemalloc = true; skb->pfmemalloc = true;
}
static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size)
{
__skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
}
static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
netmem_ref netmem, int off, int size)
{
__skb_fill_netmem_desc(skb, i, netmem, off, size);
skb_shinfo(skb)->nr_frags = i + 1;
} }
/** /**
...@@ -2503,8 +2538,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, ...@@ -2503,8 +2538,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
static inline void skb_fill_page_desc(struct sk_buff *skb, int i, static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
struct page *page, int off, int size) struct page *page, int off, int size)
{ {
__skb_fill_page_desc(skb, i, page, off, size); skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
skb_shinfo(skb)->nr_frags = i + 1;
} }
/** /**
...@@ -2528,8 +2562,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, ...@@ -2528,8 +2562,16 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
shinfo->nr_frags = i + 1; shinfo->nr_frags = i + 1;
} }
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
int size, unsigned int truesize); int off, int size, unsigned int truesize);
static inline void skb_add_rx_frag(struct sk_buff *skb, int i,
struct page *page, int off, int size,
unsigned int truesize)
{
skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
truesize);
}
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize); unsigned int truesize);
...@@ -3378,7 +3420,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, ...@@ -3378,7 +3420,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page,
*/ */
static inline unsigned int skb_frag_off(const skb_frag_t *frag) static inline unsigned int skb_frag_off(const skb_frag_t *frag)
{ {
return frag->bv_offset; return frag->offset;
} }
/** /**
...@@ -3388,7 +3430,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) ...@@ -3388,7 +3430,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
*/ */
static inline void skb_frag_off_add(skb_frag_t *frag, int delta) static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
{ {
frag->bv_offset += delta; frag->offset += delta;
} }
/** /**
...@@ -3398,7 +3440,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) ...@@ -3398,7 +3440,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
*/ */
static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
{ {
frag->bv_offset = offset; frag->offset = offset;
} }
/** /**
...@@ -3409,7 +3451,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) ...@@ -3409,7 +3451,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
static inline void skb_frag_off_copy(skb_frag_t *fragto, static inline void skb_frag_off_copy(skb_frag_t *fragto,
const skb_frag_t *fragfrom) const skb_frag_t *fragfrom)
{ {
fragto->bv_offset = fragfrom->bv_offset; fragto->offset = fragfrom->offset;
} }
/** /**
...@@ -3420,7 +3462,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, ...@@ -3420,7 +3462,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
*/ */
static inline struct page *skb_frag_page(const skb_frag_t *frag) static inline struct page *skb_frag_page(const skb_frag_t *frag)
{ {
return frag->bv_page; return netmem_to_page(frag->netmem);
} }
/** /**
...@@ -3528,7 +3570,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) ...@@ -3528,7 +3570,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
static inline void skb_frag_page_copy(skb_frag_t *fragto, static inline void skb_frag_page_copy(skb_frag_t *fragto,
const skb_frag_t *fragfrom) const skb_frag_t *fragfrom)
{ {
fragto->bv_page = fragfrom->bv_page; fragto->netmem = fragfrom->netmem;
} }
bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
......
/* SPDX-License-Identifier: GPL-2.0
*
* Network memory
*
* Author: Mina Almasry <almasrymina@google.com>
*/
#ifndef _NET_NETMEM_H
#define _NET_NETMEM_H
/**
* typedef netmem_ref - a nonexistent type marking a reference to generic
* network memory.
*
* A netmem_ref currently is always a reference to a struct page. This
* abstraction is introduced so support for new memory types can be added.
*
* Use the supplied helpers to obtain the underlying memory pointer and fields.
*/
typedef unsigned long __bitwise netmem_ref;
/* This conversion fails (returns NULL) if the netmem_ref is not struct page
* backed.
*
* Currently struct page is the only possible netmem, and this helper never
* fails.
*/
static inline struct page *netmem_to_page(netmem_ref netmem)
{
return (__force struct page *)netmem;
}
/* Converting from page to netmem is always safe, because a page can always be
* a netmem.
*/
static inline netmem_ref page_to_netmem(struct page *page)
{
return (__force netmem_ref)page;
}
#endif /* _NET_NETMEM_H */
...@@ -115,6 +115,24 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init; ...@@ -115,6 +115,24 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init;
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags); EXPORT_SYMBOL(sysctl_max_skb_frags);
/* kcm_write_msgs() relies on casting paged frags to bio_vec to use
* iov_iter_bvec(). These static asserts ensure the cast is valid is long as the
* netmem is a page.
*/
static_assert(offsetof(struct bio_vec, bv_page) ==
offsetof(skb_frag_t, netmem));
static_assert(sizeof_field(struct bio_vec, bv_page) ==
sizeof_field(skb_frag_t, netmem));
static_assert(offsetof(struct bio_vec, bv_len) == offsetof(skb_frag_t, len));
static_assert(sizeof_field(struct bio_vec, bv_len) ==
sizeof_field(skb_frag_t, len));
static_assert(offsetof(struct bio_vec, bv_offset) ==
offsetof(skb_frag_t, offset));
static_assert(sizeof_field(struct bio_vec, bv_offset) ==
sizeof_field(skb_frag_t, offset));
#undef FN #undef FN
#define FN(reason) [SKB_DROP_REASON_##reason] = #reason, #define FN(reason) [SKB_DROP_REASON_##reason] = #reason,
static const char * const drop_reasons[] = { static const char * const drop_reasons[] = {
...@@ -845,17 +863,17 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, ...@@ -845,17 +863,17 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
} }
EXPORT_SYMBOL(__napi_alloc_skb); EXPORT_SYMBOL(__napi_alloc_skb);
void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
int size, unsigned int truesize) int off, int size, unsigned int truesize)
{ {
DEBUG_NET_WARN_ON_ONCE(size > truesize); DEBUG_NET_WARN_ON_ONCE(size > truesize);
skb_fill_page_desc(skb, i, page, off, size); skb_fill_netmem_desc(skb, i, netmem, off, size);
skb->len += size; skb->len += size;
skb->data_len += size; skb->data_len += size;
skb->truesize += truesize; skb->truesize += truesize;
} }
EXPORT_SYMBOL(skb_add_rx_frag); EXPORT_SYMBOL(skb_add_rx_frag_netmem);
void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
unsigned int truesize) unsigned int truesize)
...@@ -1999,10 +2017,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) ...@@ -1999,10 +2017,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
/* skb frags point to kernel buffers */ /* skb frags point to kernel buffers */
for (i = 0; i < new_frags - 1; i++) { for (i = 0; i < new_frags - 1; i++) {
__skb_fill_page_desc(skb, i, head, 0, psize); __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
head = (struct page *)page_private(head); head = (struct page *)page_private(head);
} }
__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
d_off);
skb_shinfo(skb)->nr_frags = new_frags; skb_shinfo(skb)->nr_frags = new_frags;
release: release:
...@@ -3740,7 +3759,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) ...@@ -3740,7 +3759,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
if (plen) { if (plen) {
page = virt_to_head_page(from->head); page = virt_to_head_page(from->head);
offset = from->data - (unsigned char *)page_address(page); offset = from->data - (unsigned char *)page_address(page);
__skb_fill_page_desc(to, 0, page, offset, plen); __skb_fill_netmem_desc(to, 0, page_to_netmem(page),
offset, plen);
get_page(page); get_page(page);
j = 1; j = 1;
len -= plen; len -= plen;
......
...@@ -627,7 +627,8 @@ static int kcm_write_msgs(struct kcm_sock *kcm) ...@@ -627,7 +627,8 @@ static int kcm_write_msgs(struct kcm_sock *kcm)
skb = txm->frag_skb; skb = txm->frag_skb;
} }
if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { if (WARN_ON(!skb_shinfo(skb)->nr_frags) ||
WARN_ON_ONCE(!skb_frag_page(&skb_shinfo(skb)->frags[0]))) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
...@@ -637,8 +638,8 @@ static int kcm_write_msgs(struct kcm_sock *kcm) ...@@ -637,8 +638,8 @@ static int kcm_write_msgs(struct kcm_sock *kcm)
msize += skb_frag_size(&skb_shinfo(skb)->frags[i]); msize += skb_frag_size(&skb_shinfo(skb)->frags[i]);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, (const struct bio_vec *)skb_shinfo(skb)->frags,
msize); skb_shinfo(skb)->nr_frags, msize);
iov_iter_advance(&msg.msg_iter, txm->frag_offset); iov_iter_advance(&msg.msg_iter, txm->frag_offset);
do { do {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment