Commit 578b0546 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'shrink-struct-ubuf_info'

Pavel Begunkov says:

====================
shrink struct ubuf_info

struct ubuf_info is large but not all fields are needed for all
cases. We have limited space in io_uring for it and large ubuf_info
prevents some struct embedding, even though we use only a subset
of the fields. It's also not very clean trying to use this typeless
extra space.

Shrink struct ubuf_info to only necessary fields used in generic paths,
namely ->callback, ->refcnt and ->flags, which take only 16 bytes. And
make MSG_ZEROCOPY and some other users to embed it into a larger struct
ubuf_info_msgzc mimicking the former ubuf_info.

Note, xen/vhost may also have some cleaning on top by creating
new structs containing ubuf_info but with proper types.
====================

Link: https://lore.kernel.org/r/cover.1663892211.git.asml.silence@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 929a6cdf e7d2b510
...@@ -62,7 +62,7 @@ struct pending_tx_info { ...@@ -62,7 +62,7 @@ struct pending_tx_info {
* ubuf_to_vif is a helper which finds the struct xenvif from a pointer * ubuf_to_vif is a helper which finds the struct xenvif from a pointer
* to this field. * to this field.
*/ */
struct ubuf_info callback_struct; struct ubuf_info_msgzc callback_struct;
}; };
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE) #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, XEN_PAGE_SIZE)
......
...@@ -591,8 +591,8 @@ int xenvif_init_queue(struct xenvif_queue *queue) ...@@ -591,8 +591,8 @@ int xenvif_init_queue(struct xenvif_queue *queue)
} }
for (i = 0; i < MAX_PENDING_REQS; i++) { for (i = 0; i < MAX_PENDING_REQS; i++) {
queue->pending_tx_info[i].callback_struct = (struct ubuf_info) queue->pending_tx_info[i].callback_struct = (struct ubuf_info_msgzc)
{ .callback = xenvif_zerocopy_callback, { { .callback = xenvif_zerocopy_callback },
{ { .ctx = NULL, { { .ctx = NULL,
.desc = i } } }; .desc = i } } };
queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE; queue->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
......
...@@ -133,7 +133,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue, ...@@ -133,7 +133,7 @@ static inline unsigned long idx_to_kaddr(struct xenvif_queue *queue,
/* Find the containing VIF's structure from a pointer in pending_tx_info array /* Find the containing VIF's structure from a pointer in pending_tx_info array
*/ */
static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info *ubuf) static inline struct xenvif_queue *ubuf_to_queue(const struct ubuf_info_msgzc *ubuf)
{ {
u16 pending_idx = ubuf->desc; u16 pending_idx = ubuf->desc;
struct pending_tx_info *temp = struct pending_tx_info *temp =
...@@ -1228,11 +1228,12 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) ...@@ -1228,11 +1228,12 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
return work_done; return work_done;
} }
void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf_base,
bool zerocopy_success) bool zerocopy_success)
{ {
unsigned long flags; unsigned long flags;
pending_ring_idx_t index; pending_ring_idx_t index;
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct xenvif_queue *queue = ubuf_to_queue(ubuf); struct xenvif_queue *queue = ubuf_to_queue(ubuf);
/* This is the only place where we grab this lock, to protect callbacks /* This is the only place where we grab this lock, to protect callbacks
...@@ -1241,7 +1242,7 @@ void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf, ...@@ -1241,7 +1242,7 @@ void xenvif_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *ubuf,
spin_lock_irqsave(&queue->callback_lock, flags); spin_lock_irqsave(&queue->callback_lock, flags);
do { do {
u16 pending_idx = ubuf->desc; u16 pending_idx = ubuf->desc;
ubuf = (struct ubuf_info *) ubuf->ctx; ubuf = (struct ubuf_info_msgzc *) ubuf->ctx;
BUG_ON(queue->dealloc_prod - queue->dealloc_cons >= BUG_ON(queue->dealloc_prod - queue->dealloc_cons >=
MAX_PENDING_REQS); MAX_PENDING_REQS);
index = pending_index(queue->dealloc_prod); index = pending_index(queue->dealloc_prod);
......
...@@ -118,7 +118,7 @@ struct vhost_net_virtqueue { ...@@ -118,7 +118,7 @@ struct vhost_net_virtqueue {
/* Number of XDP frames batched */ /* Number of XDP frames batched */
int batched_xdp; int batched_xdp;
/* an array of userspace buffers info */ /* an array of userspace buffers info */
struct ubuf_info *ubuf_info; struct ubuf_info_msgzc *ubuf_info;
/* Reference counting for outstanding ubufs. /* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */ * Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs; struct vhost_net_ubuf_ref *ubufs;
...@@ -382,8 +382,9 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net, ...@@ -382,8 +382,9 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
} }
static void vhost_zerocopy_callback(struct sk_buff *skb, static void vhost_zerocopy_callback(struct sk_buff *skb,
struct ubuf_info *ubuf, bool success) struct ubuf_info *ubuf_base, bool success)
{ {
struct ubuf_info_msgzc *ubuf = uarg_to_msgzc(ubuf_base);
struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
struct vhost_virtqueue *vq = ubufs->vq; struct vhost_virtqueue *vq = ubufs->vq;
int cnt; int cnt;
...@@ -871,7 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) ...@@ -871,7 +872,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
size_t len, total_len = 0; size_t len, total_len = 0;
int err; int err;
struct vhost_net_ubuf_ref *ubufs; struct vhost_net_ubuf_ref *ubufs;
struct ubuf_info *ubuf; struct ubuf_info_msgzc *ubuf;
bool zcopy_used; bool zcopy_used;
int sent_pkts = 0; int sent_pkts = 0;
...@@ -907,14 +908,14 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock) ...@@ -907,14 +908,14 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
ubuf = nvq->ubuf_info + nvq->upend_idx; ubuf = nvq->ubuf_info + nvq->upend_idx;
vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head); vq->heads[nvq->upend_idx].id = cpu_to_vhost32(vq, head);
vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS;
ubuf->callback = vhost_zerocopy_callback;
ubuf->ctx = nvq->ubufs; ubuf->ctx = nvq->ubufs;
ubuf->desc = nvq->upend_idx; ubuf->desc = nvq->upend_idx;
ubuf->flags = SKBFL_ZEROCOPY_FRAG; ubuf->ubuf.callback = vhost_zerocopy_callback;
refcount_set(&ubuf->refcnt, 1); ubuf->ubuf.flags = SKBFL_ZEROCOPY_FRAG;
refcount_set(&ubuf->ubuf.refcnt, 1);
msg.msg_control = &ctl; msg.msg_control = &ctl;
ctl.type = TUN_MSG_UBUF; ctl.type = TUN_MSG_UBUF;
ctl.ptr = ubuf; ctl.ptr = &ubuf->ubuf;
msg.msg_controllen = sizeof(ctl); msg.msg_controllen = sizeof(ctl);
ubufs = nvq->ubufs; ubufs = nvq->ubufs;
atomic_inc(&ubufs->refcount); atomic_inc(&ubufs->refcount);
......
...@@ -533,6 +533,13 @@ enum { ...@@ -533,6 +533,13 @@ enum {
struct ubuf_info { struct ubuf_info {
void (*callback)(struct sk_buff *, struct ubuf_info *, void (*callback)(struct sk_buff *, struct ubuf_info *,
bool zerocopy_success); bool zerocopy_success);
refcount_t refcnt;
u8 flags;
};
struct ubuf_info_msgzc {
struct ubuf_info ubuf;
union { union {
struct { struct {
unsigned long desc; unsigned long desc;
...@@ -545,8 +552,6 @@ struct ubuf_info { ...@@ -545,8 +552,6 @@ struct ubuf_info {
u32 bytelen; u32 bytelen;
}; };
}; };
refcount_t refcnt;
u8 flags;
struct mmpin { struct mmpin {
struct user_struct *user; struct user_struct *user;
...@@ -555,6 +560,8 @@ struct ubuf_info { ...@@ -555,6 +560,8 @@ struct ubuf_info {
}; };
#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
#define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \
ubuf)
int mm_account_pinned_pages(struct mmpin *mmp, size_t size); int mm_account_pinned_pages(struct mmpin *mmp, size_t size);
void mm_unaccount_pinned_pages(struct mmpin *mmp); void mm_unaccount_pinned_pages(struct mmpin *mmp);
......
...@@ -1188,7 +1188,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); ...@@ -1188,7 +1188,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{ {
struct ubuf_info *uarg; struct ubuf_info_msgzc *uarg;
struct sk_buff *skb; struct sk_buff *skb;
WARN_ON_ONCE(!in_task()); WARN_ON_ONCE(!in_task());
...@@ -1206,19 +1206,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) ...@@ -1206,19 +1206,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return NULL; return NULL;
} }
uarg->callback = msg_zerocopy_callback; uarg->ubuf.callback = msg_zerocopy_callback;
uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1;
uarg->len = 1; uarg->len = 1;
uarg->bytelen = size; uarg->bytelen = size;
uarg->zerocopy = 1; uarg->zerocopy = 1;
uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
refcount_set(&uarg->refcnt, 1); refcount_set(&uarg->ubuf.refcnt, 1);
sock_hold(sk); sock_hold(sk);
return uarg; return &uarg->ubuf;
} }
static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg)
{ {
return container_of((void *)uarg, struct sk_buff, cb); return container_of((void *)uarg, struct sk_buff, cb);
} }
...@@ -1227,6 +1227,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, ...@@ -1227,6 +1227,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
struct ubuf_info *uarg) struct ubuf_info *uarg)
{ {
if (uarg) { if (uarg) {
struct ubuf_info_msgzc *uarg_zc;
const u32 byte_limit = 1 << 19; /* limit to a few TSO */ const u32 byte_limit = 1 << 19; /* limit to a few TSO */
u32 bytelen, next; u32 bytelen, next;
...@@ -1242,8 +1243,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, ...@@ -1242,8 +1243,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
return NULL; return NULL;
} }
bytelen = uarg->bytelen + size; uarg_zc = uarg_to_msgzc(uarg);
if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { bytelen = uarg_zc->bytelen + size;
if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) {
/* TCP can create new skb to attach new uarg */ /* TCP can create new skb to attach new uarg */
if (sk->sk_type == SOCK_STREAM) if (sk->sk_type == SOCK_STREAM)
goto new_alloc; goto new_alloc;
...@@ -1251,11 +1253,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, ...@@ -1251,11 +1253,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size,
} }
next = (u32)atomic_read(&sk->sk_zckey); next = (u32)atomic_read(&sk->sk_zckey);
if ((u32)(uarg->id + uarg->len) == next) { if ((u32)(uarg_zc->id + uarg_zc->len) == next) {
if (mm_account_pinned_pages(&uarg->mmp, size)) if (mm_account_pinned_pages(&uarg_zc->mmp, size))
return NULL; return NULL;
uarg->len++; uarg_zc->len++;
uarg->bytelen = bytelen; uarg_zc->bytelen = bytelen;
atomic_set(&sk->sk_zckey, ++next); atomic_set(&sk->sk_zckey, ++next);
/* no extra ref when appending to datagram (MSG_MORE) */ /* no extra ref when appending to datagram (MSG_MORE) */
...@@ -1291,7 +1293,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) ...@@ -1291,7 +1293,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len)
return true; return true;
} }
static void __msg_zerocopy_callback(struct ubuf_info *uarg) static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg)
{ {
struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sk_buff *tail, *skb = skb_from_uarg(uarg);
struct sock_exterr_skb *serr; struct sock_exterr_skb *serr;
...@@ -1344,19 +1346,21 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg) ...@@ -1344,19 +1346,21 @@ static void __msg_zerocopy_callback(struct ubuf_info *uarg)
void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg,
bool success) bool success)
{ {
uarg->zerocopy = uarg->zerocopy & success; struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg);
uarg_zc->zerocopy = uarg_zc->zerocopy & success;
if (refcount_dec_and_test(&uarg->refcnt)) if (refcount_dec_and_test(&uarg->refcnt))
__msg_zerocopy_callback(uarg); __msg_zerocopy_callback(uarg_zc);
} }
EXPORT_SYMBOL_GPL(msg_zerocopy_callback); EXPORT_SYMBOL_GPL(msg_zerocopy_callback);
void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
{ {
struct sock *sk = skb_from_uarg(uarg)->sk; struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk;
atomic_dec(&sk->sk_zckey); atomic_dec(&sk->sk_zckey);
uarg->len--; uarg_to_msgzc(uarg)->len--;
if (have_uref) if (have_uref)
msg_zerocopy_callback(NULL, uarg, true); msg_zerocopy_callback(NULL, uarg, true);
......
...@@ -1043,7 +1043,7 @@ static int __ip_append_data(struct sock *sk, ...@@ -1043,7 +1043,7 @@ static int __ip_append_data(struct sock *sk,
paged = true; paged = true;
zc = true; zc = true;
} else { } else {
uarg->zerocopy = 0; uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref); skb_zcopy_set(skb, uarg, &extra_uref);
} }
} }
......
...@@ -1239,7 +1239,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) ...@@ -1239,7 +1239,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
} }
zc = sk->sk_route_caps & NETIF_F_SG; zc = sk->sk_route_caps & NETIF_F_SG;
if (!zc) if (!zc)
uarg->zerocopy = 0; uarg_to_msgzc(uarg)->zerocopy = 0;
} }
} }
......
...@@ -1567,7 +1567,7 @@ static int __ip6_append_data(struct sock *sk, ...@@ -1567,7 +1567,7 @@ static int __ip6_append_data(struct sock *sk,
paged = true; paged = true;
zc = true; zc = true;
} else { } else {
uarg->zerocopy = 0; uarg_to_msgzc(uarg)->zerocopy = 0;
skb_zcopy_set(skb, uarg, &extra_uref); skb_zcopy_set(skb, uarg, &extra_uref);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment