Commit e95c6cf4 authored by David S. Miller's avatar David S. Miller

Merge branch 'sockmap-fixes'

John Fastabend says:

====================
sockmap fixes for net

The following implements a set of fixes for sockmap and changes the
API slightly in a few places to reduce preempt_disable/enable scope.
We do this here in net because it requires an API change and this
avoids getting stuck with legacy API going forward.

The short description:

Access to skb mark is removed, it is problematic when we add
features in the future because mark is a union and used by the
TCP/socket code internally. We don't want to expose this to the
BPF programs or let programs change the values.

The other change is caching metadata in the skb itself between
when the BPF program returns a redirect code and the core code
implements the redirect. This avoids having per cpu metadata.

Finally, tighten restriction on using sockmap to CAP_NET_ADMIN and
only SOCK_STREAM sockets.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 1cc276ce 9ef2a8cd
...@@ -728,7 +728,7 @@ void xdp_do_flush_map(void); ...@@ -728,7 +728,7 @@ void xdp_do_flush_map(void);
void bpf_warn_invalid_xdp_action(u32 act); void bpf_warn_invalid_xdp_action(u32 act);
void bpf_warn_invalid_xdp_redirect(u32 ifindex); void bpf_warn_invalid_xdp_redirect(u32 ifindex);
struct sock *do_sk_redirect_map(void); struct sock *do_sk_redirect_map(struct sk_buff *skb);
#ifdef CONFIG_BPF_JIT #ifdef CONFIG_BPF_JIT
extern int bpf_jit_enable; extern int bpf_jit_enable;
......
...@@ -840,6 +840,11 @@ struct tcp_skb_cb { ...@@ -840,6 +840,11 @@ struct tcp_skb_cb {
struct inet6_skb_parm h6; struct inet6_skb_parm h6;
#endif #endif
} header; /* For incoming skbs */ } header; /* For incoming skbs */
struct {
__u32 key;
__u32 flags;
struct bpf_map *map;
} bpf;
}; };
}; };
......
...@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) ...@@ -78,6 +78,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
int err = -EINVAL; int err = -EINVAL;
u64 cost; u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
......
...@@ -39,6 +39,7 @@ ...@@ -39,6 +39,7 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/list.h> #include <linux/list.h>
#include <net/strparser.h> #include <net/strparser.h>
#include <net/tcp.h>
struct bpf_stab { struct bpf_stab {
struct bpf_map map; struct bpf_map map;
...@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) ...@@ -101,9 +102,16 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
return SK_DROP; return SK_DROP;
skb_orphan(skb); skb_orphan(skb);
/* We need to ensure that BPF metadata for maps is also cleared
* when we orphan the skb so that we don't have the possibility
* to reference a stale map.
*/
TCP_SKB_CB(skb)->bpf.map = NULL;
skb->sk = psock->sock; skb->sk = psock->sock;
bpf_compute_data_end(skb); bpf_compute_data_end(skb);
preempt_disable();
rc = (*prog->bpf_func)(skb, prog->insnsi); rc = (*prog->bpf_func)(skb, prog->insnsi);
preempt_enable();
skb->sk = NULL; skb->sk = NULL;
return rc; return rc;
...@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) ...@@ -114,17 +122,10 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
struct sock *sk; struct sock *sk;
int rc; int rc;
/* Because we use per cpu values to feed input from sock redirect
* in BPF program to do_sk_redirect_map() call we need to ensure we
* are not preempted. RCU read lock is not sufficient in this case
* with CONFIG_PREEMPT_RCU enabled so we must be explicit here.
*/
preempt_disable();
rc = smap_verdict_func(psock, skb); rc = smap_verdict_func(psock, skb);
switch (rc) { switch (rc) {
case SK_REDIRECT: case SK_REDIRECT:
sk = do_sk_redirect_map(); sk = do_sk_redirect_map(skb);
preempt_enable();
if (likely(sk)) { if (likely(sk)) {
struct smap_psock *peer = smap_psock_sk(sk); struct smap_psock *peer = smap_psock_sk(sk);
...@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) ...@@ -141,8 +142,6 @@ static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
/* Fall through and free skb otherwise */ /* Fall through and free skb otherwise */
case SK_DROP: case SK_DROP:
default: default:
if (rc != SK_REDIRECT)
preempt_enable();
kfree_skb(skb); kfree_skb(skb);
} }
} }
...@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) ...@@ -487,6 +486,9 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
int err = -EINVAL; int err = -EINVAL;
u64 cost; u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE) attr->value_size != 4 || attr->map_flags & ~BPF_F_NUMA_NODE)
...@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map, ...@@ -840,6 +842,12 @@ static int sock_map_update_elem(struct bpf_map *map,
return -EINVAL; return -EINVAL;
} }
if (skops.sk->sk_type != SOCK_STREAM ||
skops.sk->sk_protocol != IPPROTO_TCP) {
fput(socket->file);
return -EOPNOTSUPP;
}
err = sock_map_ctx_update_elem(&skops, map, key, flags); err = sock_map_ctx_update_elem(&skops, map, key, flags);
fput(socket->file); fput(socket->file);
return err; return err;
......
...@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = { ...@@ -1839,31 +1839,31 @@ static const struct bpf_func_proto bpf_redirect_proto = {
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_ANYTHING,
}; };
BPF_CALL_3(bpf_sk_redirect_map, struct bpf_map *, map, u32, key, u64, flags) BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct bpf_map *, map, u32, key, u64, flags)
{ {
struct redirect_info *ri = this_cpu_ptr(&redirect_info); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
if (unlikely(flags)) if (unlikely(flags))
return SK_ABORTED; return SK_ABORTED;
ri->ifindex = key; tcb->bpf.key = key;
ri->flags = flags; tcb->bpf.flags = flags;
ri->map = map; tcb->bpf.map = map;
return SK_REDIRECT; return SK_REDIRECT;
} }
struct sock *do_sk_redirect_map(void) struct sock *do_sk_redirect_map(struct sk_buff *skb)
{ {
struct redirect_info *ri = this_cpu_ptr(&redirect_info); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
struct sock *sk = NULL; struct sock *sk = NULL;
if (ri->map) { if (tcb->bpf.map) {
sk = __sock_map_lookup_elem(ri->map, ri->ifindex); sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
ri->ifindex = 0; tcb->bpf.key = 0;
ri->map = NULL; tcb->bpf.map = NULL;
/* we do not clear flags for future lookup */
} }
return sk; return sk;
...@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = { ...@@ -1873,9 +1873,10 @@ static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
.func = bpf_sk_redirect_map, .func = bpf_sk_redirect_map,
.gpl_only = false, .gpl_only = false,
.ret_type = RET_INTEGER, .ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR, .arg1_type = ARG_PTR_TO_CTX,
.arg2_type = ARG_ANYTHING, .arg2_type = ARG_CONST_MAP_PTR,
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
.arg4_type = ARG_ANYTHING,
}; };
BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
...@@ -3683,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size, ...@@ -3683,7 +3684,6 @@ static bool sk_skb_is_valid_access(int off, int size,
{ {
if (type == BPF_WRITE) { if (type == BPF_WRITE) {
switch (off) { switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_index): case bpf_ctx_range(struct __sk_buff, tc_index):
case bpf_ctx_range(struct __sk_buff, priority): case bpf_ctx_range(struct __sk_buff, priority):
break; break;
...@@ -3693,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size, ...@@ -3693,6 +3693,7 @@ static bool sk_skb_is_valid_access(int off, int size,
} }
switch (off) { switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, tc_classid):
return false; return false;
case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data):
......
...@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb) ...@@ -62,7 +62,7 @@ int bpf_prog2(struct __sk_buff *skb)
ret = 1; ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret); bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(&sock_map, ret, 0); return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
} }
SEC("sockops") SEC("sockops")
......
...@@ -569,9 +569,10 @@ union bpf_attr { ...@@ -569,9 +569,10 @@ union bpf_attr {
* @flags: reserved for future use * @flags: reserved for future use
* Return: 0 on success or negative error code * Return: 0 on success or negative error code
* *
* int bpf_sk_redirect_map(map, key, flags) * int bpf_sk_redirect_map(skb, map, key, flags)
* Redirect skb to a sock in map using key as a lookup key for the * Redirect skb to a sock in map using key as a lookup key for the
* sock in map. * sock in map.
* @skb: pointer to skb
* @map: pointer to sockmap * @map: pointer to sockmap
* @key: key to lookup sock in map * @key: key to lookup sock in map
* @flags: reserved for future use * @flags: reserved for future use
......
...@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) = ...@@ -65,7 +65,7 @@ static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval, static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
int optlen) = int optlen) =
(void *) BPF_FUNC_setsockopt; (void *) BPF_FUNC_setsockopt;
static int (*bpf_sk_redirect_map)(void *map, int key, int flags) = static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
(void *) BPF_FUNC_sk_redirect_map; (void *) BPF_FUNC_sk_redirect_map;
static int (*bpf_sock_map_update)(void *map, void *key, void *value, static int (*bpf_sock_map_update)(void *map, void *key, void *value,
unsigned long long flags) = unsigned long long flags) =
......
...@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb) ...@@ -61,8 +61,8 @@ int bpf_prog2(struct __sk_buff *skb)
bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk); bpf_printk("verdict: data[0] = redir(%u:%u)\n", map, sk);
if (!map) if (!map)
return bpf_sk_redirect_map(&sock_map_rx, sk, 0); return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
return bpf_sk_redirect_map(&sock_map_tx, sk, 0); return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
} }
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
...@@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data) ...@@ -466,7 +466,7 @@ static void test_sockmap(int tasks, void *data)
int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc; int one = 1, map_fd_rx, map_fd_tx, map_fd_break, s, sc, rc;
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break; struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_break;
int ports[] = {50200, 50201, 50202, 50204}; int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, sfd[6] = {0xdeadbeef}; int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0}; u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog; int parse_prog, verdict_prog;
struct sockaddr_in addr; struct sockaddr_in addr;
...@@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data) ...@@ -548,6 +548,16 @@ static void test_sockmap(int tasks, void *data)
goto out_sockmap; goto out_sockmap;
} }
/* Test update with unsupported UDP socket */
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
if (!err) {
printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
i, udp);
goto out_sockmap;
}
/* Test update without programs */ /* Test update without programs */
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY); err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
......
...@@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = { ...@@ -1130,15 +1130,27 @@ static struct bpf_test tests[] = {
.errstr = "invalid bpf_context access", .errstr = "invalid bpf_context access",
}, },
{ {
"check skb->mark is writeable by SK_SKB", "invalid access of skb->mark for SK_SKB",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
.result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB,
.errstr = "invalid bpf_context access",
},
{
"check skb->mark is not writeable by SK_SKB",
.insns = { .insns = {
BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct __sk_buff, mark)), offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}, },
.result = ACCEPT, .result = REJECT,
.prog_type = BPF_PROG_TYPE_SK_SKB, .prog_type = BPF_PROG_TYPE_SK_SKB,
.errstr = "invalid bpf_context access",
}, },
{ {
"check skb->tc_index is writeable by SK_SKB", "check skb->tc_index is writeable by SK_SKB",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment