Commit 6fb3f727 authored by Andrii Nakryiko's avatar Andrii Nakryiko

Merge branch 'improvements-for-tracking-scalars-in-the-bpf-verifier'

Maxim Mikityanskiy says:

====================
Improvements for tracking scalars in the BPF verifier

From: Maxim Mikityanskiy <maxim@isovalent.com>

The goal of this series is to extend the verifier's capabilities of
tracking scalars when they are spilled to stack, especially when the
spill or fill is narrowing. It also contains a fix by Eduard for
infinite loop detection and a state pruning optimization by Eduard that
compensates for a verification complexity regression introduced by
tracking unbounded scalars. These improvements reduce the surface of
false rejections that I saw while working on Cilium codebase.

Patches 1-9 of the original series were previously applied in v2.

Patches 1-2 (Maxim): Support the case when boundary checks are first
performed after the register was spilled to the stack.

Patches 3-4 (Maxim): Support narrowing fills.

Patches 5-6 (Eduard): Optimization for state pruning in stacksafe() to
mitigate the verification complexity regression.

veristat -e file,prog,states -f '!states_diff<50' -f '!states_pct<10' -f '!states_a<10' -f '!states_b<10' -C ...

 * Without patch 5:

File                  Program   States (A)  States (B)  States    (DIFF)
--------------------  --------  ----------  ----------  ----------------
pyperf100.bpf.o       on_event        4878        6528   +1650 (+33.83%)
pyperf180.bpf.o       on_event        6936       11032   +4096 (+59.05%)
pyperf600.bpf.o       on_event       22271       39455  +17184 (+77.16%)
pyperf600_iter.bpf.o  on_event         400         490     +90 (+22.50%)
strobemeta.bpf.o      on_event        4895       14028  +9133 (+186.58%)

 * With patch 5:

File                     Program        States (A)  States (B)  States   (DIFF)
-----------------------  -------------  ----------  ----------  ---------------
bpf_xdp.o                tail_lb_ipv4         2770        2224   -546 (-19.71%)
pyperf100.bpf.o          on_event             4878        5848   +970 (+19.89%)
pyperf180.bpf.o          on_event             6936        8868  +1932 (+27.85%)
pyperf600.bpf.o          on_event            22271       29656  +7385 (+33.16%)
pyperf600_iter.bpf.o     on_event              400         450    +50 (+12.50%)
xdp_synproxy_kern.bpf.o  syncookie_tc          280         226    -54 (-19.29%)
xdp_synproxy_kern.bpf.o  syncookie_xdp         302         228    -74 (-24.50%)

v2 changes:

Fixed comments in patch 1, moved endianness checks to header files in
patch 12 where possible, added Eduard's ACKs.

v3 changes:

Maxim: Removed __is_scalar_unbounded altogether, addressed Andrii's
comments.

Eduard: Patch #5 (#14 in v2) changed significantly:
- Logical changes:
  - Handling of STACK_{MISC,ZERO} mix turned out to be incorrect:
    a mix of MISC and ZERO in old state is not equivalent to e.g.
    just MISC is current state, because verifier could have deduced
    zero scalars from ZERO slots in old state for some loads.
  - There is no reason to limit the change only to cases when
    old or current stack is a spill of unbounded scalar,
    it is valid to compare any 64-bit scalar spill with fake
    register impersonating MISC.
  - STACK_ZERO vs spilled zero case was dropped,
    after recent changes for zero handling by Andrii and Yonghong
    it is hard (impossible?) to conjure all ZERO slots for an spi.
    => the case does not make any difference in veristat results.
- Use global static variable for unbound_reg (Andrii)
- Code shuffling to remove duplication in stacksafe() (Andrii)
====================

Link: https://lore.kernel.org/r/20240127175237.526726-1-maxtram95@gmail.comSigned-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents 943b043a 73a28d9d
......@@ -919,6 +919,15 @@ static inline void mark_verifier_state_scratched(struct bpf_verifier_env *env)
env->scratched_stack_slots = ~0ULL;
}
static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_size)
{
#ifdef __BIG_ENDIAN
off -= spill_size - fill_size;
#endif
return !(off % BPF_REG_SIZE);
}
const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
const char *dynptr_type_str(enum bpf_dynptr_type type);
const char *iter_type_str(const struct btf *btf, u32 btf_id);
......
......@@ -1155,6 +1155,12 @@ static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
stack->spilled_ptr.type == SCALAR_VALUE;
}
static bool is_spilled_scalar_reg64(const struct bpf_stack_state *stack)
{
return stack->slot_type[0] == STACK_SPILL &&
stack->spilled_ptr.type == SCALAR_VALUE;
}
/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which
* case they are equivalent, or it's STACK_ZERO, in which case we preserve
* more precise STACK_ZERO.
......@@ -2264,8 +2270,7 @@ static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
}
/* Mark a register as having a completely unknown (scalar) value. */
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
static void __mark_reg_unknown_imprecise(struct bpf_reg_state *reg)
{
/*
* Clear type, off, and union(map_ptr, range) and
......@@ -2277,10 +2282,20 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
reg->ref_obj_id = 0;
reg->var_off = tnum_unknown;
reg->frameno = 0;
reg->precise = !env->bpf_capable;
reg->precise = false;
__mark_reg_unbounded(reg);
}
/* Mark a register as having a completely unknown (scalar) value,
* initialize .precise as true when not bpf capable.
*/
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
struct bpf_reg_state *reg)
{
__mark_reg_unknown_imprecise(reg);
reg->precise = !env->bpf_capable;
}
static void mark_reg_unknown(struct bpf_verifier_env *env,
struct bpf_reg_state *regs, u32 regno)
{
......@@ -4380,20 +4395,6 @@ static u64 reg_const_value(struct bpf_reg_state *reg, bool subreg32)
return subreg32 ? tnum_subreg(reg->var_off).value : reg->var_off.value;
}
static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
{
return tnum_is_unknown(reg->var_off) &&
reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
reg->umin_value == 0 && reg->umax_value == U64_MAX &&
reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
}
static bool register_is_bounded(struct bpf_reg_state *reg)
{
return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
}
static bool __is_pointer_value(bool allow_ptr_leaks,
const struct bpf_reg_state *reg)
{
......@@ -4504,7 +4505,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
return err;
mark_stack_slot_scratched(env, spi);
if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) && env->bpf_capable) {
if (reg && !(off % BPF_REG_SIZE) && reg->type == SCALAR_VALUE && env->bpf_capable) {
bool reg_value_fits;
reg_value_fits = get_reg_width(reg) <= BITS_PER_BYTE * size;
......@@ -4792,7 +4793,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
if (dst_regno < 0)
return 0;
if (!(off % BPF_REG_SIZE) && size == spill_size) {
if (size <= spill_size &&
bpf_stack_narrow_access_ok(off, size, spill_size)) {
/* The earlier check_reg_arg() has decided the
* subreg_def for this insn. Save it first.
*/
......@@ -4800,6 +4802,12 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
copy_register_state(&state->regs[dst_regno], reg);
state->regs[dst_regno].subreg_def = subreg_def;
/* Break the relation on a narrowing fill.
* coerce_reg_to_size will adjust the boundaries.
*/
if (get_reg_width(reg) > size * BITS_PER_BYTE)
state->regs[dst_regno].id = 0;
} else {
int spill_cnt = 0, zero_cnt = 0;
......@@ -6075,10 +6083,10 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
* values are also truncated so we push 64-bit bounds into
* 32-bit bounds. Above were truncated < 32-bits already.
*/
if (size < 4) {
if (size < 4)
__mark_reg32_unbounded(reg);
reg_bounds_sync(reg);
}
reg_bounds_sync(reg);
}
static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
......@@ -16493,6 +16501,43 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
}
}
static struct bpf_reg_state unbound_reg;
static __init int unbound_reg_init(void)
{
__mark_reg_unknown_imprecise(&unbound_reg);
unbound_reg.live |= REG_LIVE_READ;
return 0;
}
late_initcall(unbound_reg_init);
static bool is_stack_all_misc(struct bpf_verifier_env *env,
struct bpf_stack_state *stack)
{
u32 i;
for (i = 0; i < ARRAY_SIZE(stack->slot_type); ++i) {
if ((stack->slot_type[i] == STACK_MISC) ||
(stack->slot_type[i] == STACK_INVALID && env->allow_uninit_stack))
continue;
return false;
}
return true;
}
static struct bpf_reg_state *scalar_reg_for_stack(struct bpf_verifier_env *env,
struct bpf_stack_state *stack)
{
if (is_spilled_scalar_reg64(stack))
return &stack->spilled_ptr;
if (is_stack_all_misc(env, stack))
return &unbound_reg;
return NULL;
}
static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
{
......@@ -16531,6 +16576,20 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
if (i >= cur->allocated_stack)
return false;
/* 64-bit scalar spill vs all slots MISC and vice versa.
* Load from all slots MISC produces unbound scalar.
* Construct a fake register for such stack and call
* regsafe() to ensure scalar ids are compared.
*/
old_reg = scalar_reg_for_stack(env, &old->stack[spi]);
cur_reg = scalar_reg_for_stack(env, &cur->stack[spi]);
if (old_reg && cur_reg) {
if (!regsafe(env, old_reg, cur_reg, idmap, exact))
return false;
i += BPF_REG_SIZE - 1;
continue;
}
/* if old state was safe with misc data in the stack
* it will be safe with zero-initialized stack.
* The opposite is not true
......
......@@ -217,7 +217,7 @@ __naked void uninit_u32_from_the_stack(void)
SEC("tc")
__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data")
__failure __msg("invalid access to packet")
__success __retval(0)
__naked void u16_offset_to_skb_data(void)
{
asm volatile (" \
......@@ -225,13 +225,19 @@ __naked void u16_offset_to_skb_data(void)
r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
w4 = 20; \
*(u32*)(r10 - 8) = r4; \
r4 = *(u16*)(r10 - 8); \
"
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
"r4 = *(u16*)(r10 - 8);"
#else
"r4 = *(u16*)(r10 - 6);"
#endif
" \
r0 = r2; \
/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
r0 += r4; \
/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
if r0 > r3 goto l0_%=; \
/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
/* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
r0 = *(u32*)(r2 + 0); \
l0_%=: r0 = 0; \
exit; \
......@@ -268,7 +274,7 @@ l0_%=: r0 = 0; \
}
SEC("tc")
__description("Spill a u32 const scalar. Refill as u16 from fp-6. Offset to skb->data")
__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data")
__failure __msg("invalid access to packet")
__naked void _6_offset_to_skb_data(void)
{
......@@ -277,7 +283,13 @@ __naked void _6_offset_to_skb_data(void)
r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
w4 = 20; \
*(u32*)(r10 - 8) = r4; \
r4 = *(u16*)(r10 - 6); \
"
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
"r4 = *(u16*)(r10 - 6);"
#else
"r4 = *(u16*)(r10 - 8);"
#endif
" \
r0 = r2; \
/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
r0 += r4; \
......@@ -452,9 +464,9 @@ l0_%=: r1 >>= 16; \
SEC("raw_tp")
__log_level(2)
__success
__msg("fp-8=0m??mmmm")
__msg("fp-16=00mm??mm")
__msg("fp-24=00mm???m")
__msg("fp-8=0m??scalar()")
__msg("fp-16=00mm??scalar()")
__msg("fp-24=00mm???scalar()")
__naked void spill_subregs_preserve_stack_zero(void)
{
asm volatile (
......@@ -940,4 +952,296 @@ l0_%=: r0 = 0; \
: __clobber_all);
}
SEC("xdp")
__description("spill unbounded reg, then range check src")
__success __retval(0)
__naked void spill_unbounded(void)
{
asm volatile (" \
/* Produce an unbounded scalar. */ \
call %[bpf_get_prandom_u32]; \
/* Spill r0 to stack. */ \
*(u64*)(r10 - 8) = r0; \
/* Boundary check on r0. */ \
if r0 > 16 goto l0_%=; \
/* Fill r0 from stack. */ \
r0 = *(u64*)(r10 - 8); \
/* Boundary check on r0 with predetermined result. */\
if r0 <= 16 goto l0_%=; \
/* Dead branch: the verifier should prune it. Do an invalid memory\
* access if the verifier follows it. \
*/ \
r0 = *(u64*)(r9 + 0); \
l0_%=: r0 = 0; \
exit; \
" :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
SEC("xdp")
__description("32-bit fill after 64-bit spill")
__success __retval(0)
__naked void fill_32bit_after_spill_64bit(void)
{
asm volatile(" \
/* Randomize the upper 32 bits. */ \
call %[bpf_get_prandom_u32]; \
r0 <<= 32; \
/* 64-bit spill r0 to stack. */ \
*(u64*)(r10 - 8) = r0; \
/* 32-bit fill r0 from stack. */ \
"
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
"r0 = *(u32*)(r10 - 8);"
#else
"r0 = *(u32*)(r10 - 4);"
#endif
" \
/* Boundary check on r0 with predetermined result. */\
if r0 == 0 goto l0_%=; \
/* Dead branch: the verifier should prune it. Do an invalid memory\
* access if the verifier follows it. \
*/ \
r0 = *(u64*)(r9 + 0); \
l0_%=: exit; \
" :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
SEC("xdp")
__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID")
__success __retval(0)
__naked void fill_32bit_after_spill_64bit_preserve_id(void)
{
asm volatile (" \
/* Randomize the lower 32 bits. */ \
call %[bpf_get_prandom_u32]; \
w0 &= 0xffffffff; \
/* 64-bit spill r0 to stack - should assign an ID. */\
*(u64*)(r10 - 8) = r0; \
/* 32-bit fill r1 from stack - should preserve the ID. */\
"
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
"r1 = *(u32*)(r10 - 8);"
#else
"r1 = *(u32*)(r10 - 4);"
#endif
" \
/* Compare r1 with another register to trigger find_equal_scalars. */\
r2 = 0; \
if r1 != r2 goto l0_%=; \
/* The result of this comparison is predefined. */\
if r0 == r2 goto l0_%=; \
/* Dead branch: the verifier should prune it. Do an invalid memory\
* access if the verifier follows it. \
*/ \
r0 = *(u64*)(r9 + 0); \
exit; \
l0_%=: r0 = 0; \
exit; \
" :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
SEC("xdp")
__description("32-bit fill after 64-bit spill should clear ID")
__failure __msg("math between ctx pointer and 4294967295 is not allowed")
__naked void fill_32bit_after_spill_64bit_clear_id(void)
{
asm volatile (" \
r6 = r1; \
/* Roll one bit to force the verifier to track both branches. */\
call %[bpf_get_prandom_u32]; \
r0 &= 0x8; \
/* Put a large number into r1. */ \
r1 = 0xffffffff; \
r1 <<= 32; \
r1 += r0; \
/* 64-bit spill r1 to stack - should assign an ID. */\
*(u64*)(r10 - 8) = r1; \
/* 32-bit fill r2 from stack - should clear the ID. */\
"
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
"r2 = *(u32*)(r10 - 8);"
#else
"r2 = *(u32*)(r10 - 4);"
#endif
" \
/* Compare r2 with another register to trigger find_equal_scalars.\
* Having one random bit is important here, otherwise the verifier cuts\
* the corners. If the ID was mistakenly preserved on fill, this would\
* cause the verifier to think that r1 is also equal to zero in one of\
* the branches, and equal to eight on the other branch.\
*/ \
r3 = 0; \
if r2 != r3 goto l0_%=; \
l0_%=: r1 >>= 32; \
/* The verifier shouldn't propagate r2's range to r1, so it should\
* still remember r1 = 0xffffffff and reject the below.\
*/ \
r6 += r1; \
r0 = *(u32*)(r6 + 0); \
exit; \
" :
: __imm(bpf_get_prandom_u32)
: __clobber_all);
}
/* stacksafe(): check if stack spill of an imprecise scalar in old state
* is considered equivalent to STACK_{MISC,INVALID} in cur state.
*/
SEC("socket")
__success __log_level(2)
__msg("8: (79) r1 = *(u64 *)(r10 -8)")
__msg("8: safe")
__msg("processed 11 insns")
/* STACK_INVALID should prevent verifier in unpriv mode from
* considering states equivalent and force an error on second
* verification path (entry - label 1 - label 2).
*/
__failure_unpriv
__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
__msg_unpriv("9: (95) exit")
__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
__msg_unpriv("invalid read from stack off -8+2 size 8")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void old_imprecise_scalar_vs_cur_stack_misc(void)
{
asm volatile(
/* get a random value for branching */
"call %[bpf_ktime_get_ns];"
"if r0 == 0 goto 1f;"
/* conjure scalar at fp-8 */
"r0 = 42;"
"*(u64*)(r10 - 8) = r0;"
"goto 2f;"
"1:"
/* conjure STACK_{MISC,INVALID} at fp-8 */
"call %[bpf_ktime_get_ns];"
"*(u16*)(r10 - 8) = r0;"
"*(u16*)(r10 - 4) = r0;"
"2:"
/* read fp-8, should be considered safe on second visit */
"r1 = *(u64*)(r10 - 8);"
"exit;"
:
: __imm(bpf_ktime_get_ns)
: __clobber_all);
}
/* stacksafe(): check that stack spill of a precise scalar in old state
* is not considered equivalent to STACK_MISC in cur state.
*/
SEC("socket")
__success __log_level(2)
/* verifier should visit 'if r1 == 0x2a ...' two times:
* - once for path entry - label 2;
* - once for path entry - label 1 - label 2.
*/
__msg("if r1 == 0x2a goto pc+0")
__msg("if r1 == 0x2a goto pc+0")
__msg("processed 15 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void old_precise_scalar_vs_cur_stack_misc(void)
{
asm volatile(
/* get a random value for branching */
"call %[bpf_ktime_get_ns];"
"if r0 == 0 goto 1f;"
/* conjure scalar at fp-8 */
"r0 = 42;"
"*(u64*)(r10 - 8) = r0;"
"goto 2f;"
"1:"
/* conjure STACK_MISC at fp-8 */
"call %[bpf_ktime_get_ns];"
"*(u64*)(r10 - 8) = r0;"
"*(u32*)(r10 - 4) = r0;"
"2:"
/* read fp-8, should not be considered safe on second visit */
"r1 = *(u64*)(r10 - 8);"
/* use r1 in precise context */
"if r1 == 42 goto +0;"
"exit;"
:
: __imm(bpf_ktime_get_ns)
: __clobber_all);
}
/* stacksafe(): check if STACK_MISC in old state is considered
* equivalent to stack spill of a scalar in cur state.
*/
SEC("socket")
__success __log_level(2)
__msg("8: (79) r0 = *(u64 *)(r10 -8)")
__msg("8: safe")
__msg("processed 11 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void old_stack_misc_vs_cur_scalar(void)
{
asm volatile(
/* get a random value for branching */
"call %[bpf_ktime_get_ns];"
"if r0 == 0 goto 1f;"
/* conjure STACK_{MISC,INVALID} at fp-8 */
"call %[bpf_ktime_get_ns];"
"*(u16*)(r10 - 8) = r0;"
"*(u16*)(r10 - 4) = r0;"
"goto 2f;"
"1:"
/* conjure scalar at fp-8 */
"r0 = 42;"
"*(u64*)(r10 - 8) = r0;"
"2:"
/* read fp-8, should be considered safe on second visit */
"r0 = *(u64*)(r10 - 8);"
"exit;"
:
: __imm(bpf_ktime_get_ns)
: __clobber_all);
}
/* stacksafe(): check that STACK_MISC in old state is not considered
* equivalent to stack spill of a non-scalar in cur state.
*/
SEC("socket")
__success __log_level(2)
/* verifier should process exit instructions twice:
* - once for path entry - label 2;
* - once for path entry - label 1 - label 2.
*/
__msg("r1 = *(u64 *)(r10 -8)")
__msg("exit")
__msg("r1 = *(u64 *)(r10 -8)")
__msg("exit")
__msg("processed 11 insns")
__flag(BPF_F_TEST_STATE_FREQ)
__naked void old_stack_misc_vs_cur_ctx_ptr(void)
{
asm volatile(
/* remember context pointer in r9 */
"r9 = r1;"
/* get a random value for branching */
"call %[bpf_ktime_get_ns];"
"if r0 == 0 goto 1f;"
/* conjure STACK_MISC at fp-8 */
"call %[bpf_ktime_get_ns];"
"*(u64*)(r10 - 8) = r0;"
"*(u32*)(r10 - 4) = r0;"
"goto 2f;"
"1:"
/* conjure context pointer in fp-8 */
"*(u64*)(r10 - 8) = r9;"
"2:"
/* read fp-8, should not be considered safe on second visit */
"r1 = *(u64*)(r10 - 8);"
"exit;"
:
: __imm(bpf_ktime_get_ns)
: __clobber_all);
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment