Commit dfab99df authored by Chuyi Zhou's avatar Chuyi Zhou Committed by Alexei Starovoitov

bpf: teach the verifier to enforce css_iter and task_iter in RCU CS

css_iter and task_iter should be used in rcu section. Specifically, in
sleepable progs explicit bpf_rcu_read_lock() is needed before use these
iters. In normal bpf progs that have implicit rcu_read_lock(), it's OK to
use them directly.

This patch adds a new a KF flag KF_RCU_PROTECTED for bpf_iter_task_new and
bpf_iter_css_new. It means the kfunc should be used in RCU CS. We check
whether we are in rcu cs before we want to invoke this kfunc. If the rcu
protection is guaranteed, we would let st->type = PTR_TO_STACK | MEM_RCU.
Once user do rcu_unlock during the iteration, state MEM_RCU of regs would
be cleared. is_iter_reg_valid_init() will reject if reg->type is UNTRUSTED.

It is worth noting that currently, bpf_rcu_read_unlock does not
clear the state of the STACK_ITER reg, since bpf_for_each_spilled_reg
only considers STACK_SPILL. This patch also let bpf_for_each_spilled_reg
search STACK_ITER.
Signed-off-by: default avatarChuyi Zhou <zhouchuyi@bytedance.com>
Acked-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20231018061746.111364-6-zhouchuyi@bytedance.comSigned-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parent 7251d090
...@@ -386,19 +386,18 @@ struct bpf_verifier_state { ...@@ -386,19 +386,18 @@ struct bpf_verifier_state {
u32 jmp_history_cnt; u32 jmp_history_cnt;
}; };
#define bpf_get_spilled_reg(slot, frame) \ #define bpf_get_spilled_reg(slot, frame, mask) \
(((slot < frame->allocated_stack / BPF_REG_SIZE) && \ (((slot < frame->allocated_stack / BPF_REG_SIZE) && \
(frame->stack[slot].slot_type[0] == STACK_SPILL)) \ ((1 << frame->stack[slot].slot_type[0]) & (mask))) \
? &frame->stack[slot].spilled_ptr : NULL) ? &frame->stack[slot].spilled_ptr : NULL)
/* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */ /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
#define bpf_for_each_spilled_reg(iter, frame, reg) \ #define bpf_for_each_spilled_reg(iter, frame, reg, mask) \
for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \ for (iter = 0, reg = bpf_get_spilled_reg(iter, frame, mask); \
iter < frame->allocated_stack / BPF_REG_SIZE; \ iter < frame->allocated_stack / BPF_REG_SIZE; \
iter++, reg = bpf_get_spilled_reg(iter, frame)) iter++, reg = bpf_get_spilled_reg(iter, frame, mask))
/* Invoke __expr over regsiters in __vst, setting __state and __reg */ #define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr) \
#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
({ \ ({ \
struct bpf_verifier_state *___vstate = __vst; \ struct bpf_verifier_state *___vstate = __vst; \
int ___i, ___j; \ int ___i, ___j; \
...@@ -410,7 +409,7 @@ struct bpf_verifier_state { ...@@ -410,7 +409,7 @@ struct bpf_verifier_state {
__reg = &___regs[___j]; \ __reg = &___regs[___j]; \
(void)(__expr); \ (void)(__expr); \
} \ } \
bpf_for_each_spilled_reg(___j, __state, __reg) { \ bpf_for_each_spilled_reg(___j, __state, __reg, __mask) { \
if (!__reg) \ if (!__reg) \
continue; \ continue; \
(void)(__expr); \ (void)(__expr); \
...@@ -418,6 +417,10 @@ struct bpf_verifier_state { ...@@ -418,6 +417,10 @@ struct bpf_verifier_state {
} \ } \
}) })
/* Invoke __expr over regsiters in __vst, setting __state and __reg */
#define bpf_for_each_reg_in_vstate(__vst, __state, __reg, __expr) \
bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, 1 << STACK_SPILL, __expr)
/* linked list of verifier states used to prune search */ /* linked list of verifier states used to prune search */
struct bpf_verifier_state_list { struct bpf_verifier_state_list {
struct bpf_verifier_state state; struct bpf_verifier_state state;
......
...@@ -74,6 +74,7 @@ ...@@ -74,6 +74,7 @@
#define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */ #define KF_ITER_NEW (1 << 8) /* kfunc implements BPF iter constructor */
#define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */ #define KF_ITER_NEXT (1 << 9) /* kfunc implements BPF iter next method */
#define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */ #define KF_ITER_DESTROY (1 << 10) /* kfunc implements BPF iter destructor */
#define KF_RCU_PROTECTED (1 << 11) /* kfunc should be protected by rcu cs when they are invoked */
/* /*
* Tag marking a kernel function as a kfunc. This is meant to minimize the * Tag marking a kernel function as a kfunc. This is meant to minimize the
......
...@@ -2563,10 +2563,10 @@ BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY) ...@@ -2563,10 +2563,10 @@ BTF_ID_FLAGS(func, bpf_iter_task_vma_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_task_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_task_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_task_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_iter_css_new, KF_ITER_NEW | KF_TRUSTED_ARGS | KF_RCU_PROTECTED)
BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_css_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_adjust)
......
...@@ -1173,7 +1173,12 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg ...@@ -1173,7 +1173,12 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
static void __mark_reg_known_zero(struct bpf_reg_state *reg); static void __mark_reg_known_zero(struct bpf_reg_state *reg);
static bool in_rcu_cs(struct bpf_verifier_env *env);
static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta);
static int mark_stack_slots_iter(struct bpf_verifier_env *env, static int mark_stack_slots_iter(struct bpf_verifier_env *env,
struct bpf_kfunc_call_arg_meta *meta,
struct bpf_reg_state *reg, int insn_idx, struct bpf_reg_state *reg, int insn_idx,
struct btf *btf, u32 btf_id, int nr_slots) struct btf *btf, u32 btf_id, int nr_slots)
{ {
...@@ -1194,6 +1199,12 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, ...@@ -1194,6 +1199,12 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env,
__mark_reg_known_zero(st); __mark_reg_known_zero(st);
st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
if (is_kfunc_rcu_protected(meta)) {
if (in_rcu_cs(env))
st->type |= MEM_RCU;
else
st->type |= PTR_UNTRUSTED;
}
st->live |= REG_LIVE_WRITTEN; st->live |= REG_LIVE_WRITTEN;
st->ref_obj_id = i == 0 ? id : 0; st->ref_obj_id = i == 0 ? id : 0;
st->iter.btf = btf; st->iter.btf = btf;
...@@ -1268,7 +1279,7 @@ static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, ...@@ -1268,7 +1279,7 @@ static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
return true; return true;
} }
static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
struct btf *btf, u32 btf_id, int nr_slots) struct btf *btf, u32 btf_id, int nr_slots)
{ {
struct bpf_func_state *state = func(env, reg); struct bpf_func_state *state = func(env, reg);
...@@ -1276,26 +1287,28 @@ static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_ ...@@ -1276,26 +1287,28 @@ static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_
spi = iter_get_spi(env, reg, nr_slots); spi = iter_get_spi(env, reg, nr_slots);
if (spi < 0) if (spi < 0)
return false; return -EINVAL;
for (i = 0; i < nr_slots; i++) { for (i = 0; i < nr_slots; i++) {
struct bpf_stack_state *slot = &state->stack[spi - i]; struct bpf_stack_state *slot = &state->stack[spi - i];
struct bpf_reg_state *st = &slot->spilled_ptr; struct bpf_reg_state *st = &slot->spilled_ptr;
if (st->type & PTR_UNTRUSTED)
return -EPROTO;
/* only main (first) slot has ref_obj_id set */ /* only main (first) slot has ref_obj_id set */
if (i == 0 && !st->ref_obj_id) if (i == 0 && !st->ref_obj_id)
return false; return -EINVAL;
if (i != 0 && st->ref_obj_id) if (i != 0 && st->ref_obj_id)
return false; return -EINVAL;
if (st->iter.btf != btf || st->iter.btf_id != btf_id) if (st->iter.btf != btf || st->iter.btf_id != btf_id)
return false; return -EINVAL;
for (j = 0; j < BPF_REG_SIZE; j++) for (j = 0; j < BPF_REG_SIZE; j++)
if (slot->slot_type[j] != STACK_ITER) if (slot->slot_type[j] != STACK_ITER)
return false; return -EINVAL;
} }
return true; return 0;
} }
/* Check if given stack slot is "special": /* Check if given stack slot is "special":
...@@ -7640,15 +7653,24 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id ...@@ -7640,15 +7653,24 @@ static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_id
return err; return err;
} }
err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots); err = mark_stack_slots_iter(env, meta, reg, insn_idx, meta->btf, btf_id, nr_slots);
if (err) if (err)
return err; return err;
} else { } else {
/* iter_next() or iter_destroy() expect initialized iter state*/ /* iter_next() or iter_destroy() expect initialized iter state*/
if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) { err = is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots);
switch (err) {
case 0:
break;
case -EINVAL:
verbose(env, "expected an initialized iter_%s as arg #%d\n", verbose(env, "expected an initialized iter_%s as arg #%d\n",
iter_type_str(meta->btf, btf_id), regno); iter_type_str(meta->btf, btf_id), regno);
return -EINVAL; return err;
case -EPROTO:
verbose(env, "expected an RCU CS when using %s\n", meta->func_name);
return err;
default:
return err;
} }
spi = iter_get_spi(env, reg, nr_slots); spi = iter_get_spi(env, reg, nr_slots);
...@@ -10231,6 +10253,11 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta) ...@@ -10231,6 +10253,11 @@ static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_RCU; return meta->kfunc_flags & KF_RCU;
} }
static bool is_kfunc_rcu_protected(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->kfunc_flags & KF_RCU_PROTECTED;
}
static bool __kfunc_param_match_suffix(const struct btf *btf, static bool __kfunc_param_match_suffix(const struct btf *btf,
const struct btf_param *arg, const struct btf_param *arg,
const char *suffix) const char *suffix)
...@@ -11582,6 +11609,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -11582,6 +11609,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
if (env->cur_state->active_rcu_lock) { if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state; struct bpf_func_state *state;
struct bpf_reg_state *reg; struct bpf_reg_state *reg;
u32 clear_mask = (1 << STACK_SPILL) | (1 << STACK_ITER);
if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
...@@ -11592,7 +11620,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, ...@@ -11592,7 +11620,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
verbose(env, "nested rcu read lock (kernel function %s)\n", func_name); verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
return -EINVAL; return -EINVAL;
} else if (rcu_unlock) { } else if (rcu_unlock) {
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ bpf_for_each_reg_in_vstate_mask(env->cur_state, state, reg, clear_mask, ({
if (reg->type & MEM_RCU) { if (reg->type & MEM_RCU) {
reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
reg->type |= PTR_UNTRUSTED; reg->type |= PTR_UNTRUSTED;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment