Commit 0ea488ff authored by John Fastabend's avatar John Fastabend Committed by Alexei Starovoitov

bpf: sockmap, convert bpf_compute_data_pointers to bpf_*_sk_skb

In commit

  'bpf: bpf_compute_data uses incorrect cb structure' (8108a775)

we added the routine bpf_compute_data_end_sk_skb() to compute the
correct data_end values, but this has since been lost. In kernel
v4.14 this was correct and the above patch was applied in it
entirety. Then when v4.14 was merged into v4.15-rc1 net-next tree
we lost the piece that renamed bpf_compute_data_pointers to the
new function bpf_compute_data_end_sk_skb. This was done here,

e1ea2f98 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")

When it conflicted with the following rename patch,

6aaae2b6 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")

Finally, after a refactor I thought even the function
bpf_compute_data_end_sk_skb() was no longer needed and it was
erroneously removed.

However, we never reverted the sk_skb_convert_ctx_access() usage of
tcp_skb_cb which had been committed and survived the merge conflict.
Here we fix this by adding back the helper and *_data_end_sk_skb()
usage. Using the bpf_skc_data_end mapping is not correct because it
expects a qdisc_skb_cb object but at the sock layer this is not the
case. Even though it happens to work here because we don't overwrite
any data in-use at the socket layer and the cb structure is cleared
later this has potential to create some subtle issues. But, even
more concretely the filter.c access check uses tcp_skb_cb.

And by some act of chance though,

struct bpf_skb_data_end {
        struct qdisc_skb_cb        qdisc_cb;             /*     0    28 */

        /* XXX 4 bytes hole, try to pack */

        void *                     data_meta;            /*    32     8 */
        void *                     data_end;             /*    40     8 */

        /* size: 48, cachelines: 1, members: 3 */
        /* sum members: 44, holes: 1, sum holes: 4 */
        /* last cacheline: 48 bytes */
};

and then tcp_skb_cb,

struct tcp_skb_cb {
	[...]
                struct {
                        __u32      flags;                /*    24     4 */
                        struct sock * sk_redir;          /*    32     8 */
                        void *     data_end;             /*    40     8 */
                } bpf;                                   /*          24 */
        };

So when we use offset_of() to track down the byte offset we get 40 in
either case and everything continues to work. Fix this mess and use
correct structures its unclear how long this might actually work for
until someone moves the structs around.
Reported-by: default avatarMartin KaFai Lau <kafai@fb.com>
Fixes: e1ea2f98 ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net")
Fixes: 6aaae2b6 ("bpf: rename bpf_compute_data_end into bpf_compute_data_pointers")
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 7ebc14d5
...@@ -828,6 +828,10 @@ struct tcp_skb_cb { ...@@ -828,6 +828,10 @@ struct tcp_skb_cb {
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
{
TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
}
#if IS_ENABLED(CONFIG_IPV6) #if IS_ENABLED(CONFIG_IPV6)
/* This is the variant of inet6_iif() that must be used by TCP, /* This is the variant of inet6_iif() that must be used by TCP,
......
...@@ -1236,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) ...@@ -1236,7 +1236,7 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
*/ */
TCP_SKB_CB(skb)->bpf.sk_redir = NULL; TCP_SKB_CB(skb)->bpf.sk_redir = NULL;
skb->sk = psock->sock; skb->sk = psock->sock;
bpf_compute_data_pointers(skb); bpf_compute_data_end_sk_skb(skb);
preempt_disable(); preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi); rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable(); preempt_enable();
...@@ -1491,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp, ...@@ -1491,7 +1491,7 @@ static int smap_parse_func_strparser(struct strparser *strp,
* any socket yet. * any socket yet.
*/ */
skb->sk = psock->sock; skb->sk = psock->sock;
bpf_compute_data_pointers(skb); bpf_compute_data_end_sk_skb(skb);
rc = (*prog->bpf_func)(skb, prog->insnsi); rc = (*prog->bpf_func)(skb, prog->insnsi);
skb->sk = NULL; skb->sk = NULL;
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { ...@@ -1762,6 +1762,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
static inline int sk_skb_try_make_writable(struct sk_buff *skb,
unsigned int write_len)
{
int err = __bpf_try_make_writable(skb, write_len);
bpf_compute_data_end_sk_skb(skb);
return err;
}
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
{
/* Idea is the following: should the needed direct read/write
* test fail during runtime, we can pull in more data and redo
* again, since implicitly, we invalidate previous checks here.
*
* Or, since we know how much we need to make read/writeable,
* this can be done once at the program beginning for direct
* access case. By this we overcome limitations of only current
* headroom being accessible.
*/
return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
}
static const struct bpf_func_proto sk_skb_pull_data_proto = {
.func = sk_skb_pull_data,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
u64, from, u64, to, u64, flags) u64, from, u64, to, u64, flags)
{ {
...@@ -2864,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) ...@@ -2864,8 +2895,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
return __skb_trim_rcsum(skb, new_len); return __skb_trim_rcsum(skb, new_len);
} }
BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
u64, flags) u64 flags)
{ {
u32 max_len = __bpf_skb_max_len(skb); u32 max_len = __bpf_skb_max_len(skb);
u32 min_len = __bpf_skb_min_len(skb); u32 min_len = __bpf_skb_min_len(skb);
...@@ -2901,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, ...@@ -2901,6 +2932,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
if (!ret && skb_is_gso(skb)) if (!ret && skb_is_gso(skb))
skb_gso_reset(skb); skb_gso_reset(skb);
} }
return ret;
}
BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_pointers(skb); bpf_compute_data_pointers(skb);
return ret; return ret;
...@@ -2915,8 +2953,26 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = { ...@@ -2915,8 +2953,26 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags) u64, flags)
{
int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
}
static const struct bpf_func_proto sk_skb_change_tail_proto = {
.func = sk_skb_change_tail,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
u64 flags)
{ {
u32 max_len = __bpf_skb_max_len(skb); u32 max_len = __bpf_skb_max_len(skb);
u32 new_len = skb->len + head_room; u32 new_len = skb->len + head_room;
...@@ -2942,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, ...@@ -2942,8 +2998,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb); skb_reset_mac_header(skb);
} }
return ret;
}
BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
u64, flags)
{
int ret = __bpf_skb_change_head(skb, head_room, flags);
bpf_compute_data_pointers(skb); bpf_compute_data_pointers(skb);
return 0; return ret;
} }
static const struct bpf_func_proto bpf_skb_change_head_proto = { static const struct bpf_func_proto bpf_skb_change_head_proto = {
...@@ -2955,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { ...@@ -2955,6 +3019,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
u64, flags)
{
int ret = __bpf_skb_change_head(skb, head_room, flags);
bpf_compute_data_end_sk_skb(skb);
return ret;
}
static const struct bpf_func_proto sk_skb_change_head_proto = {
.func = sk_skb_change_head,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{ {
return xdp_data_meta_unsupported(xdp) ? 0 : return xdp_data_meta_unsupported(xdp) ? 0 :
...@@ -4618,9 +4699,12 @@ bool bpf_helper_changes_pkt_data(void *func) ...@@ -4618,9 +4699,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_store_bytes || func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto || func == bpf_skb_change_proto ||
func == bpf_skb_change_head || func == bpf_skb_change_head ||
func == sk_skb_change_head ||
func == bpf_skb_change_tail || func == bpf_skb_change_tail ||
func == sk_skb_change_tail ||
func == bpf_skb_adjust_room || func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data || func == bpf_skb_pull_data ||
func == sk_skb_pull_data ||
func == bpf_clone_redirect || func == bpf_clone_redirect ||
func == bpf_l3_csum_replace || func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace || func == bpf_l4_csum_replace ||
...@@ -4872,11 +4956,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) ...@@ -4872,11 +4956,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes: case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto; return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data: case BPF_FUNC_skb_pull_data:
return &bpf_skb_pull_data_proto; return &sk_skb_pull_data_proto;
case BPF_FUNC_skb_change_tail: case BPF_FUNC_skb_change_tail:
return &bpf_skb_change_tail_proto; return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head: case BPF_FUNC_skb_change_head:
return &bpf_skb_change_head_proto; return &sk_skb_change_head_proto;
case BPF_FUNC_get_socket_cookie: case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto; return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid: case BPF_FUNC_get_socket_uid:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment