Commit 4af20ab9 authored by Andrii Nakryiko's avatar Andrii Nakryiko

Merge branch 'bpf-fix-accesses-to-uninit-stack-slots'

Andrei Matei says:

====================
bpf: fix accesses to uninit stack slots

Fix two related issues issues around verifying stack accesses:
1. accesses to uninitialized stack memory was allowed inconsistently
2. the maximum stack depth needed for a program was not always
maintained correctly

The two issues are fixed together in one commit because the code for one
affects the other.

V4 to V5:
- target bpf-next (Alexei)

V3 to V4:
- minor fixup to comment in patch 1 (Eduard)
- C89-style in patch 3 (Andrii)

V2 to V3:
- address review comments from Andrii and Eduard
- drop new verifier tests in favor of editing existing tests to check
  for stack depth
- append a patch with a bit of cleanup coming out of the previous review
====================

Link: https://lore.kernel.org/r/20231208032519.260451-1-andreimatei1@gmail.comSigned-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents 8b7b0e5f 2929bfac
......@@ -321,7 +321,17 @@ struct bpf_func_state {
/* The following fields should be last. See copy_func_state() */
int acquired_refs;
struct bpf_reference_state *refs;
/* The state of the stack. Each element of the array describes BPF_REG_SIZE
* (i.e. 8) bytes worth of stack memory.
* stack[0] represents bytes [*(r10-8)..*(r10-1)]
* stack[1] represents bytes [*(r10-16)..*(r10-9)]
* ...
* stack[allocated_stack/8 - 1] represents [*(r10-allocated_stack)..*(r10-allocated_stack+7)]
*/
struct bpf_stack_state *stack;
/* Size of the current stack, in bytes. The stack state is tracked below, in
* `stack`. allocated_stack is always a multiple of BPF_REG_SIZE.
*/
int allocated_stack;
};
......@@ -658,6 +668,10 @@ struct bpf_verifier_env {
int exception_callback_subprog;
bool explore_alu_limits;
bool allow_ptr_leaks;
/* Allow access to uninitialized stack memory. Writes with fixed offset are
* always allowed, so this refers to reads (with fixed or variable offset),
* to writes with variable offset and to indirect (helper) accesses.
*/
bool allow_uninit_stack;
bool bpf_capable;
bool bypass_spec_v1;
......
......@@ -1259,9 +1259,16 @@ static int resize_reference_state(struct bpf_func_state *state, size_t n)
return 0;
}
static int grow_stack_state(struct bpf_func_state *state, int size)
/* Possibly update state->allocated_stack to be at least size bytes. Also
* possibly update the function's high-water mark in its bpf_subprog_info.
*/
static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
{
size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
/* The stack size is always a multiple of BPF_REG_SIZE. */
size = round_up(size, BPF_REG_SIZE);
n = size / BPF_REG_SIZE;
if (old_n >= n)
return 0;
......@@ -1271,6 +1278,11 @@ static int grow_stack_state(struct bpf_func_state *state, int size)
return -ENOMEM;
state->allocated_stack = size;
/* update known max for given subprogram */
if (env->subprog_info[state->subprogno].stack_depth < size)
env->subprog_info[state->subprogno].stack_depth = size;
return 0;
}
......@@ -4440,9 +4452,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
struct bpf_reg_state *reg = NULL;
int insn_flags = insn_stack_access_flags(state->frameno, spi);
err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
if (err)
return err;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
* so it's aligned access and [off, off + size) are within stack limits
*/
......@@ -4595,10 +4604,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
(!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
writing_zero = true;
err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
if (err)
return err;
for (i = min_off; i < max_off; i++) {
int spi;
......@@ -5774,20 +5779,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
strict);
}
static int update_stack_depth(struct bpf_verifier_env *env,
const struct bpf_func_state *func,
int off)
{
u16 stack = env->subprog_info[func->subprogno].stack_depth;
if (stack >= -off)
return 0;
/* update known max for given subprogram */
env->subprog_info[func->subprogno].stack_depth = -off;
return 0;
}
/* starting from main bpf function walk all instructions of the function
* and recursively walk all callees that given function can call.
* Ignore jump and exit insns.
......@@ -6577,13 +6568,14 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env,
* The minimum valid offset is -MAX_BPF_STACK for writes, and
* -state->allocated_stack for reads.
*/
static int check_stack_slot_within_bounds(s64 off,
struct bpf_func_state *state,
enum bpf_access_type t)
static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
s64 off,
struct bpf_func_state *state,
enum bpf_access_type t)
{
int min_valid_off;
if (t == BPF_WRITE)
if (t == BPF_WRITE || env->allow_uninit_stack)
min_valid_off = -MAX_BPF_STACK;
else
min_valid_off = -state->allocated_stack;
......@@ -6632,7 +6624,7 @@ static int check_stack_access_within_bounds(
max_off = reg->smax_value + off + access_size;
}
err = check_stack_slot_within_bounds(min_off, state, type);
err = check_stack_slot_within_bounds(env, min_off, state, type);
if (!err && max_off > 0)
err = -EINVAL; /* out of stack access into non-negative offsets */
......@@ -6647,8 +6639,13 @@ static int check_stack_access_within_bounds(
verbose(env, "invalid variable-offset%s stack R%d var_off=%s off=%d size=%d\n",
err_extra, regno, tn_buf, off, access_size);
}
return err;
}
return err;
/* Note that there is no stack access with offset zero, so the needed stack
* size is -min_off, not -min_off+1.
*/
return grow_stack_state(env, state, -min_off /* size */);
}
/* check whether memory at (regno + off) is accessible for t = (read | write)
......@@ -6663,7 +6660,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
{
struct bpf_reg_state *regs = cur_regs(env);
struct bpf_reg_state *reg = regs + regno;
struct bpf_func_state *state;
int size, err = 0;
size = bpf_size_to_bytes(bpf_size);
......@@ -6806,11 +6802,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
if (err)
return err;
state = func(env, reg);
err = update_stack_depth(env, state, off);
if (err)
return err;
if (t == BPF_READ)
err = check_stack_read(env, regno, off, size,
value_regno);
......@@ -7004,7 +6995,8 @@ static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_i
/* When register 'regno' is used to read the stack (either directly or through
* a helper function) make sure that it's within stack boundary and, depending
* on the access type, that all elements of the stack are initialized.
* on the access type and privileges, that all elements of the stack are
* initialized.
*
* 'off' includes 'regno->off', but not its dynamic part (if any).
*
......@@ -7112,8 +7104,11 @@ static int check_stack_range_initialized(
slot = -i - 1;
spi = slot / BPF_REG_SIZE;
if (state->allocated_stack <= slot)
goto err;
if (state->allocated_stack <= slot) {
verbose(env, "verifier bug: allocated_stack too small");
return -EFAULT;
}
stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
if (*stype == STACK_MISC)
goto mark;
......@@ -7137,7 +7132,6 @@ static int check_stack_range_initialized(
goto mark;
}
err:
if (tnum_is_const(reg->var_off)) {
verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
err_extra, regno, min_off, i - min_off, access_size);
......@@ -7162,7 +7156,7 @@ static int check_stack_range_initialized(
* helper may write to the entire memory range.
*/
}
return update_stack_depth(env, state, min_off);
return 0;
}
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
......
......@@ -846,7 +846,7 @@ __naked int delayed_precision_mark(void)
"call %[bpf_iter_num_next];"
"if r0 == 0 goto 2f;"
"if r6 != 42 goto 3f;"
"r7 = -32;"
"r7 = -33;"
"call %[bpf_get_prandom_u32];"
"r6 = r0;"
"goto 1b;\n"
......
......@@ -13,7 +13,7 @@ __noinline int foo(int (*arr)[10])
}
SEC("cgroup_skb/ingress")
__failure __msg("invalid indirect read from stack")
__success
int global_func16(struct __sk_buff *skb)
{
int array[10];
......
......@@ -27,8 +27,8 @@ __naked void stack_out_of_bounds(void)
SEC("socket")
__description("uninitialized stack1")
__failure __msg("invalid indirect read from stack")
__failure_unpriv
__success __log_level(4) __msg("stack depth 8")
__failure_unpriv __msg_unpriv("invalid indirect read from stack")
__naked void uninitialized_stack1(void)
{
asm volatile (" \
......@@ -45,8 +45,8 @@ __naked void uninitialized_stack1(void)
SEC("socket")
__description("uninitialized stack2")
__failure __msg("invalid read from stack")
__failure_unpriv
__success __log_level(4) __msg("stack depth 8")
__failure_unpriv __msg_unpriv("invalid read from stack")
__naked void uninitialized_stack2(void)
{
asm volatile (" \
......
......@@ -5,9 +5,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
SEC("cgroup/sysctl")
SEC("socket")
__description("ARG_PTR_TO_LONG uninitialized")
__failure __msg("invalid indirect read from stack R4 off -16+0 size 8")
__success
__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8")
__naked void arg_ptr_to_long_uninitialized(void)
{
asm volatile (" \
......
......@@ -5,9 +5,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
SEC("tc")
SEC("socket")
__description("raw_stack: no skb_load_bytes")
__failure __msg("invalid read from stack R6 off=-8 size=8")
__success
__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8")
__naked void stack_no_skb_load_bytes(void)
{
asm volatile (" \
......
......@@ -59,9 +59,10 @@ __naked void stack_read_priv_vs_unpriv(void)
" ::: __clobber_all);
}
SEC("lwt_in")
SEC("cgroup/skb")
__description("variable-offset stack read, uninitialized")
__failure __msg("invalid variable-offset read from stack R2")
__success
__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
__naked void variable_offset_stack_read_uninitialized(void)
{
asm volatile (" \
......@@ -83,12 +84,55 @@ __naked void variable_offset_stack_read_uninitialized(void)
SEC("socket")
__description("variable-offset stack write, priv vs unpriv")
__success __failure_unpriv
__success
/* Check that the maximum stack depth is correctly maintained according to the
* maximum possible variable offset.
*/
__log_level(4) __msg("stack depth 16")
__failure_unpriv
/* Variable stack access is rejected for unprivileged.
*/
__msg_unpriv("R2 variable stack access prohibited for !root")
__retval(0)
__naked void stack_write_priv_vs_unpriv(void)
{
asm volatile (" \
/* Get an unknown value */ \
r2 = *(u32*)(r1 + 0); \
/* Make it small and 8-byte aligned */ \
r2 &= 8; \
r2 -= 16; \
/* Add it to fp. We now have either fp-8 or \
* fp-16, but we don't know which \
*/ \
r2 += r10; \
/* Dereference it for a stack write */ \
r0 = 0; \
*(u64*)(r2 + 0) = r0; \
exit; \
" ::: __clobber_all);
}
/* Similar to the previous test, but this time also perform a read from the
* address written to with a variable offset. The read is allowed, showing that,
* after a variable-offset write, a priviledged program can read the slots that
* were in the range of that write (even if the verifier doesn't actually know if
* the slot being read was really written to or not.
*
* Despite this test being mostly a superset, the previous test is also kept for
* the sake of it checking the stack depth in the case where there is no read.
*/
SEC("socket")
__description("variable-offset stack write followed by read")
__success
/* Check that the maximum stack depth is correctly maintained according to the
* maximum possible variable offset.
*/
__log_level(4) __msg("stack depth 16")
__failure_unpriv
__msg_unpriv("R2 variable stack access prohibited for !root")
__retval(0)
__naked void stack_write_followed_by_read(void)
{
asm volatile (" \
/* Get an unknown value */ \
......@@ -103,12 +147,7 @@ __naked void stack_write_priv_vs_unpriv(void)
/* Dereference it for a stack write */ \
r0 = 0; \
*(u64*)(r2 + 0) = r0; \
/* Now read from the address we just wrote. This shows\
* that, after a variable-offset write, a priviledged\
* program can read the slots that were in the range of\
* that write (even if the verifier doesn't actually know\
* if the slot being read was really written to or not.\
*/ \
/* Now read from the address we just wrote. */ \
r3 = *(u64*)(r2 + 0); \
r0 = 0; \
exit; \
......@@ -282,9 +321,10 @@ __naked void access_min_out_of_bound(void)
: __clobber_all);
}
SEC("lwt_in")
SEC("cgroup/skb")
__description("indirect variable-offset stack access, min_off < min_initialized")
__failure __msg("invalid indirect read from stack R2 var_off")
__success
__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
__naked void access_min_off_min_initialized(void)
{
asm volatile (" \
......
......@@ -83,17 +83,6 @@
.result = REJECT,
.errstr = "!read_ok",
},
{
"Can't use cmpxchg on uninit memory",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 3),
BPF_MOV64_IMM(BPF_REG_2, 4),
BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8),
BPF_EXIT_INSN(),
},
.result = REJECT,
.errstr = "invalid read from stack",
},
{
"BPF_W cmpxchg should zero top 32 bits",
.insns = {
......
......@@ -1505,7 +1505,9 @@
.prog_type = BPF_PROG_TYPE_XDP,
.fixup_map_hash_8b = { 23 },
.result = REJECT,
.errstr = "invalid read from stack R7 off=-16 size=8",
.errstr = "R0 invalid mem access 'scalar'",
.result_unpriv = REJECT,
.errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
},
{
"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment