Commit ff267287 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2018-06-08

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix in the BPF verifier to reject modified ctx pointers on helper
   functions, from Daniel.

2) Fix in BPF kselftests for get_cgroup_id_user() helper to only
   record the cgroup id for a provided pid in order to reduce test
   failures from processes interferring with the test, from Yonghong.

3) Fix a crash in AF_XDP's mem accounting when the process owning
   the sock has CAP_IPC_LOCK capabilities set, from Daniel.

4) Fix an issue for AF_XDP on 32 bit machines where XDP_UMEM_PGOFF_*_RING
   defines need ULL suffixes and use loff_t type as they are otherwise
   truncated, from Geert.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8d97ca6b c09290c5
...@@ -63,8 +63,8 @@ struct xdp_statistics { ...@@ -63,8 +63,8 @@ struct xdp_statistics {
/* Pgoff for mmaping the rings */ /* Pgoff for mmaping the rings */
#define XDP_PGOFF_RX_RING 0 #define XDP_PGOFF_RX_RING 0
#define XDP_PGOFF_TX_RING 0x80000000 #define XDP_PGOFF_TX_RING 0x80000000
#define XDP_UMEM_PGOFF_FILL_RING 0x100000000 #define XDP_UMEM_PGOFF_FILL_RING 0x100000000ULL
#define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000 #define XDP_UMEM_PGOFF_COMPLETION_RING 0x180000000ULL
/* Rx/Tx descriptor */ /* Rx/Tx descriptor */
struct xdp_desc { struct xdp_desc {
......
...@@ -1617,6 +1617,30 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env, ...@@ -1617,6 +1617,30 @@ static int get_callee_stack_depth(struct bpf_verifier_env *env,
} }
#endif #endif
static int check_ctx_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno)
{
/* Access to ctx or passing it to a helper is only allowed in
* its original, unmodified form.
*/
if (reg->off) {
verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
regno, reg->off);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
return -EACCES;
}
return 0;
}
/* truncate register to smaller size (in bytes) /* truncate register to smaller size (in bytes)
* must be called with size < BPF_REG_SIZE * must be called with size < BPF_REG_SIZE
*/ */
...@@ -1686,24 +1710,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn ...@@ -1686,24 +1710,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
verbose(env, "R%d leaks addr into ctx\n", value_regno); verbose(env, "R%d leaks addr into ctx\n", value_regno);
return -EACCES; return -EACCES;
} }
/* ctx accesses must be at a fixed offset, so that we can
* determine what type of data were returned.
*/
if (reg->off) {
verbose(env,
"dereference of modified ctx ptr R%d off=%d+%d, ctx+const is allowed, ctx+const+const is not\n",
regno, reg->off, off - reg->off);
return -EACCES;
}
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); err = check_ctx_reg(env, reg, regno);
verbose(env, if (err < 0)
"variable ctx access var_off=%s off=%d size=%d", return err;
tn_buf, off, size);
return -EACCES;
}
err = check_ctx_access(env, insn_idx, off, size, t, &reg_type); err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
if (!err && t == BPF_READ && value_regno >= 0) { if (!err && t == BPF_READ && value_regno >= 0) {
/* ctx access returns either a scalar, or a /* ctx access returns either a scalar, or a
...@@ -1984,6 +1995,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, ...@@ -1984,6 +1995,9 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
expected_type = PTR_TO_CTX; expected_type = PTR_TO_CTX;
if (type != expected_type) if (type != expected_type)
goto err_type; goto err_type;
err = check_ctx_reg(env, reg, regno);
if (err < 0)
return err;
} else if (arg_type_is_mem_ptr(arg_type)) { } else if (arg_type_is_mem_ptr(arg_type)) {
expected_type = PTR_TO_STACK; expected_type = PTR_TO_STACK;
/* One exception here. In case function allows for NULL to be /* One exception here. In case function allows for NULL to be
......
...@@ -132,8 +132,10 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem) ...@@ -132,8 +132,10 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
{ {
atomic_long_sub(umem->npgs, &umem->user->locked_vm); if (umem->user) {
free_uid(umem->user); atomic_long_sub(umem->npgs, &umem->user->locked_vm);
free_uid(umem->user);
}
} }
static void xdp_umem_release(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem)
......
...@@ -643,7 +643,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, ...@@ -643,7 +643,7 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname,
static int xsk_mmap(struct file *file, struct socket *sock, static int xsk_mmap(struct file *file, struct socket *sock,
struct vm_area_struct *vma) struct vm_area_struct *vma)
{ {
unsigned long offset = vma->vm_pgoff << PAGE_SHIFT; loff_t offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
unsigned long size = vma->vm_end - vma->vm_start; unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk); struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL; struct xsk_queue *q = NULL;
......
...@@ -11,12 +11,24 @@ struct bpf_map_def SEC("maps") cg_ids = { ...@@ -11,12 +11,24 @@ struct bpf_map_def SEC("maps") cg_ids = {
.max_entries = 1, .max_entries = 1,
}; };
struct bpf_map_def SEC("maps") pidmap = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(__u32),
.value_size = sizeof(__u32),
.max_entries = 1,
};
SEC("tracepoint/syscalls/sys_enter_nanosleep") SEC("tracepoint/syscalls/sys_enter_nanosleep")
int trace(void *ctx) int trace(void *ctx)
{ {
__u32 key = 0; __u32 pid = bpf_get_current_pid_tgid();
__u32 key = 0, *expected_pid;
__u64 *val; __u64 *val;
expected_pid = bpf_map_lookup_elem(&pidmap, &key);
if (!expected_pid || *expected_pid != pid)
return 0;
val = bpf_map_lookup_elem(&cg_ids, &key); val = bpf_map_lookup_elem(&cg_ids, &key);
if (val) if (val)
*val = bpf_get_current_cgroup_id(); *val = bpf_get_current_cgroup_id();
......
...@@ -50,13 +50,13 @@ int main(int argc, char **argv) ...@@ -50,13 +50,13 @@ int main(int argc, char **argv)
const char *probe_name = "syscalls/sys_enter_nanosleep"; const char *probe_name = "syscalls/sys_enter_nanosleep";
const char *file = "get_cgroup_id_kern.o"; const char *file = "get_cgroup_id_kern.o";
int err, bytes, efd, prog_fd, pmu_fd; int err, bytes, efd, prog_fd, pmu_fd;
int cgroup_fd, cgidmap_fd, pidmap_fd;
struct perf_event_attr attr = {}; struct perf_event_attr attr = {};
int cgroup_fd, cgidmap_fd;
struct bpf_object *obj; struct bpf_object *obj;
__u64 kcgid = 0, ucgid; __u64 kcgid = 0, ucgid;
__u32 key = 0, pid;
int exit_code = 1; int exit_code = 1;
char buf[256]; char buf[256];
__u32 key = 0;
err = setup_cgroup_environment(); err = setup_cgroup_environment();
if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err, if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
...@@ -81,6 +81,14 @@ int main(int argc, char **argv) ...@@ -81,6 +81,14 @@ int main(int argc, char **argv)
cgidmap_fd, errno)) cgidmap_fd, errno))
goto close_prog; goto close_prog;
pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
pidmap_fd, errno))
goto close_prog;
pid = getpid();
bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
snprintf(buf, sizeof(buf), snprintf(buf, sizeof(buf),
"/sys/kernel/debug/tracing/events/%s/id", probe_name); "/sys/kernel/debug/tracing/events/%s/id", probe_name);
efd = open(buf, O_RDONLY, 0); efd = open(buf, O_RDONLY, 0);
......
...@@ -8647,7 +8647,7 @@ static struct bpf_test tests[] = { ...@@ -8647,7 +8647,7 @@ static struct bpf_test tests[] = {
offsetof(struct __sk_buff, mark)), offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(), BPF_EXIT_INSN(),
}, },
.errstr = "dereference of modified ctx ptr R1 off=68+8, ctx+const is allowed, ctx+const+const is not", .errstr = "dereference of modified ctx ptr",
.result = REJECT, .result = REJECT,
.prog_type = BPF_PROG_TYPE_SCHED_CLS, .prog_type = BPF_PROG_TYPE_SCHED_CLS,
}, },
...@@ -12258,6 +12258,62 @@ static struct bpf_test tests[] = { ...@@ -12258,6 +12258,62 @@ static struct bpf_test tests[] = {
.result = ACCEPT, .result = ACCEPT,
.retval = 5, .retval = 5,
}, },
{
"pass unmodified ctx pointer to helper",
.insns = {
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_csum_update),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = ACCEPT,
},
{
"pass modified ctx pointer to helper, 1",
.insns = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_csum_update),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "dereference of modified ctx ptr",
},
{
"pass modified ctx pointer to helper, 2",
.insns = {
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_socket_cookie),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.result_unpriv = REJECT,
.result = REJECT,
.errstr_unpriv = "dereference of modified ctx ptr",
.errstr = "dereference of modified ctx ptr",
},
{
"pass modified ctx pointer to helper, 3",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_csum_update),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.errstr = "variable ctx access var_off=(0x0; 0x4)",
},
}; };
static int probe_filter_length(const struct bpf_insn *fp) static int probe_filter_length(const struct bpf_insn *fp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment