Commit 23a1f682 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2020-10-01

The following pull-request contains BPF updates for your *net-next* tree.

We've added 90 non-merge commits during the last 8 day(s) which contain
a total of 103 files changed, 7662 insertions(+), 1894 deletions(-).

Note that once bpf(/net) tree gets merged into net-next, there will be a small
merge conflict in tools/lib/bpf/btf.c between commit 12450081 ("libbpf: Fix
native endian assumption when parsing BTF") from the bpf tree and the commit
3289959b ("libbpf: Support BTF loading and raw data output in both endianness")
from the bpf-next tree. Correct resolution would be to stick with bpf-next, it
should look like:

  [...]
        /* check BTF magic */
        if (fread(&magic, 1, sizeof(magic), f) < sizeof(magic)) {
                err = -EIO;
                goto err_out;
        }
        if (magic != BTF_MAGIC && magic != bswap_16(BTF_MAGIC)) {
                /* definitely not a raw BTF */
                err = -EPROTO;
                goto err_out;
        }

        /* get file size */
  [...]

The main changes are:

1) Add bpf_snprintf_btf() and bpf_seq_printf_btf() helpers to support displaying
   BTF-based kernel data structures out of BPF programs, from Alan Maguire.

2) Speed up RCU tasks trace grace periods by a factor of 50 & fix a few race
   conditions exposed by it. It was discussed to take these via BPF and
   networking tree to get better testing exposure, from Paul E. McKenney.

3) Support multi-attach for freplace programs, needed for incremental attachment
   of multiple XDP progs using libxdp dispatcher model, from Toke Høiland-Jørgensen.

4) libbpf support for appending new BTF types at the end of BTF object, allowing
   intrusive changes of prog's BTF (useful for future linking), from Andrii Nakryiko.

5) Several BPF helper improvements e.g. avoid atomic op in cookie generator and add
   a redirect helper into neighboring subsys, from Daniel Borkmann.

6) Allow map updates on sockmaps from bpf_iter context in order to migrate sockmaps
   from one to another, from Lorenz Bauer.

7) Fix 32 bit to 64 bit assignment from latest alu32 bounds tracking which caused
   a verifier issue due to type downgrade to scalar, from John Fastabend.

8) Follow-up on tail-call support in BPF subprogs which optimizes x64 JIT prologue
   and epilogue sections, from Maciej Fijalkowski.

9) Add an option to perf RB map to improve sharing of event entries by avoiding remove-
   on-close behavior. Also, add BPF_PROG_TEST_RUN for raw_tracepoint, from Song Liu.

10) Fix a crash in AF_XDP's socket_release when memory allocation for UMEMs fails,
    from Magnus Karlsson.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7c89d9d9 6208689f
......@@ -281,7 +281,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
if (tail_call_reachable)
EMIT1(0x50); /* push rax */
*pprog = prog;
......@@ -407,9 +408,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
int off1 = 49;
int off2 = 38;
int off3 = 16;
int off1 = 42;
int off2 = 31;
int off3 = 9;
int cnt = 0;
/* count the additional bytes used for popping callee regs from stack
......@@ -421,6 +422,12 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
off2 += pop_bytes;
off3 += pop_bytes;
if (stack_depth) {
off1 += 7;
off2 += 7;
off3 += 7;
}
/*
* rdi - pointer to ctx
* rsi - pointer to bpf_array
......@@ -465,8 +472,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used,
prog = *pprog;
EMIT1(0x58); /* pop rax */
EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
round_up(stack_depth, 8));
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */
round_up(stack_depth, 8));
/* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */
EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */
......@@ -491,7 +499,7 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
int tcc_off = -4 - round_up(stack_depth, 8);
u8 *prog = *pprog;
int pop_bytes = 0;
int off1 = 27;
int off1 = 20;
int poke_off;
int cnt = 0;
......@@ -506,10 +514,14 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
* total bytes for:
* - nop5/ jmpq $off
* - pop callee regs
* - sub rsp, $val
* - sub rsp, $val if depth > 0
* - pop rax
*/
poke_off = X86_PATCH_SIZE + pop_bytes + 7 + 1;
poke_off = X86_PATCH_SIZE + pop_bytes + 1;
if (stack_depth) {
poke_off += 7;
off1 += 7;
}
/*
* if (tail_call_cnt > MAX_TAIL_CALL_CNT)
......@@ -533,7 +545,8 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
pop_callee_regs(pprog, callee_regs_used);
prog = *pprog;
EMIT1(0x58); /* pop rax */
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
if (stack_depth)
EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8));
memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
prog += X86_PATCH_SIZE;
......@@ -1441,8 +1454,6 @@ xadd: if (is_imm8(insn->off))
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
if (tail_call_reachable)
EMIT1(0x59); /* pop rcx, get rid of tail_call_cnt */
EMIT1(0xC9); /* leave */
EMIT1(0xC3); /* ret */
break;
......
......@@ -292,6 +292,7 @@ enum bpf_arg_type {
ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */
ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */
ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */
ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
__BPF_ARG_TYPE_MAX,
};
......@@ -382,8 +383,22 @@ enum bpf_reg_type {
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */
PTR_TO_BTF_ID, /* reg points to kernel struct */
PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */
/* PTR_TO_BTF_ID points to a kernel struct that does not need
* to be null checked by the BPF program. This does not imply the
* pointer is _not_ null and in practice this can easily be a null
* pointer when reading pointer chains. The assumption is program
* context will handle null pointer dereference typically via fault
* handling. The verifier must keep this in mind and can make no
* assumptions about null or non-null when doing branch analysis.
* Further, when passed into helpers the helpers can not, without
* additional context, assume the value is non-null.
*/
PTR_TO_BTF_ID,
/* PTR_TO_BTF_ID_OR_NULL points to a kernel struct that has not
* been checked for null. Used primarily to inform the verifier
* an explicit null check is required for this struct.
*/
PTR_TO_BTF_ID_OR_NULL,
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */
......@@ -591,6 +606,13 @@ struct bpf_trampoline {
struct bpf_ksym ksym;
};
struct bpf_attach_target_info {
struct btf_func_model fmodel;
long tgt_addr;
const char *tgt_name;
const struct btf_type *tgt_type;
};
#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
struct bpf_dispatcher_prog {
......@@ -618,9 +640,10 @@ static __always_inline unsigned int bpf_dispatcher_nop_func(
return bpf_func(ctx, insnsi);
}
#ifdef CONFIG_BPF_JIT
struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr);
struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info);
void bpf_trampoline_put(struct bpf_trampoline *tr);
#define BPF_DISPATCHER_INIT(_name) { \
.mutex = __MUTEX_INITIALIZER(_name.mutex), \
......@@ -665,17 +688,20 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym);
void bpf_ksym_add(struct bpf_ksym *ksym);
void bpf_ksym_del(struct bpf_ksym *ksym);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
struct bpf_trampoline *tr)
{
return NULL;
return -ENOTSUPP;
}
static inline int bpf_trampoline_link_prog(struct bpf_prog *prog)
static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog,
struct bpf_trampoline *tr)
{
return -ENOTSUPP;
}
static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
static inline struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
return -ENOTSUPP;
return ERR_PTR(-EOPNOTSUPP);
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
#define DEFINE_BPF_DISPATCHER(name)
......@@ -739,7 +765,11 @@ struct bpf_prog_aux {
u32 max_rdonly_access;
u32 max_rdwr_access;
const struct bpf_ctx_arg_aux *ctx_arg_info;
struct bpf_prog *linked_prog;
struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
struct bpf_prog *dst_prog;
struct bpf_trampoline *dst_trampoline;
enum bpf_prog_type saved_dst_prog_type;
enum bpf_attach_type saved_dst_attach_type;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
bool offload_requested;
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
......@@ -747,7 +777,6 @@ struct bpf_prog_aux {
bool sleepable;
bool tail_call_reachable;
enum bpf_tramp_prog_type trampoline_prog_type;
struct bpf_trampoline *trampoline;
struct hlist_node tramp_hlist;
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
const struct btf_type *attach_func_proto;
......@@ -1340,6 +1369,8 @@ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr,
union bpf_attr __user *uattr);
void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
struct btf *bpf_get_btf_vmlinux(void);
/* Map specifics */
struct xdp_buff;
struct sk_buff;
......@@ -1381,6 +1412,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr);
bool btf_ctx_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info);
......@@ -1402,7 +1436,7 @@ int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *regs);
int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
struct bpf_reg_state *reg);
int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
struct btf *btf, const struct btf_type *t);
struct bpf_prog *bpf_prog_by_id(u32 id);
......@@ -1793,6 +1827,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
......
......@@ -347,8 +347,9 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
{
return (log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
log->level == BPF_LOG_KERNEL;
return log &&
((log->level && log->ubuf && !bpf_verifier_log_full(log)) ||
log->level == BPF_LOG_KERNEL);
}
#define BPF_MAX_SUBPROGS 256
......@@ -449,4 +450,17 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
int check_ctx_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
u32 btf_id)
{
return tgt_prog ? (((u64)tgt_prog->aux->id) << 32 | btf_id) : btf_id;
}
int bpf_check_attach_target(struct bpf_verifier_log *log,
const struct bpf_prog *prog,
const struct bpf_prog *tgt_prog,
u32 btf_id,
struct bpf_attach_target_info *tgt_info);
#endif /* _LINUX_BPF_VERIFIER_H */
......@@ -6,6 +6,7 @@
#include <linux/types.h>
#include <uapi/linux/btf.h>
#include <uapi/linux/bpf.h>
#define BTF_TYPE_EMIT(type) ((void)(type *)0)
......@@ -13,6 +14,7 @@ struct btf;
struct btf_member;
struct btf_type;
union bpf_attr;
struct btf_show;
extern const struct file_operations btf_fops;
......@@ -46,8 +48,45 @@ int btf_get_info_by_fd(const struct btf *btf,
const struct btf_type *btf_type_id_size(const struct btf *btf,
u32 *type_id,
u32 *ret_size);
/*
* Options to control show behaviour.
* - BTF_SHOW_COMPACT: no formatting around type information
* - BTF_SHOW_NONAME: no struct/union member names/types
* - BTF_SHOW_PTR_RAW: show raw (unobfuscated) pointer values;
* equivalent to %px.
* - BTF_SHOW_ZERO: show zero-valued struct/union members; they
* are not displayed by default
* - BTF_SHOW_UNSAFE: skip use of bpf_probe_read() to safely read
* data before displaying it.
*/
#define BTF_SHOW_COMPACT BTF_F_COMPACT
#define BTF_SHOW_NONAME BTF_F_NONAME
#define BTF_SHOW_PTR_RAW BTF_F_PTR_RAW
#define BTF_SHOW_ZERO BTF_F_ZERO
#define BTF_SHOW_UNSAFE (1ULL << 4)
void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m);
int btf_type_seq_show_flags(const struct btf *btf, u32 type_id, void *obj,
struct seq_file *m, u64 flags);
/*
* Copy len bytes of string representation of obj of BTF type_id into buf.
*
* @btf: struct btf object
* @type_id: type id of type obj points to
* @obj: pointer to typed data
* @buf: buffer to write to
* @len: maximum length to write to buf
* @flags: show options (see above)
*
* Return: length that would have been/was copied as per snprintf, or
* negative error.
*/
int btf_type_snprintf_show(const struct btf *btf, u32 type_id, void *obj,
char *buf, int len, u64 flags);
int btf_get_fd_by_id(u32 id);
u32 btf_id(const struct btf *btf);
bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __LINUX_COOKIE_H
#define __LINUX_COOKIE_H
#include <linux/atomic.h>
#include <linux/percpu.h>
#include <asm/local.h>
struct pcpu_gen_cookie {
local_t nesting;
u64 last;
} __aligned(16);
struct gen_cookie {
struct pcpu_gen_cookie __percpu *local;
atomic64_t forward_last ____cacheline_aligned_in_smp;
atomic64_t reverse_last;
};
#define COOKIE_LOCAL_BATCH 4096
#define DEFINE_COOKIE(name) \
static DEFINE_PER_CPU(struct pcpu_gen_cookie, __##name); \
static struct gen_cookie name = { \
.local = &__##name, \
.forward_last = ATOMIC64_INIT(0), \
.reverse_last = ATOMIC64_INIT(0), \
}
static __always_inline u64 gen_cookie_next(struct gen_cookie *gc)
{
struct pcpu_gen_cookie *local = this_cpu_ptr(gc->local);
u64 val;
if (likely(local_inc_return(&local->nesting) == 1)) {
val = local->last;
if (__is_defined(CONFIG_SMP) &&
unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) {
s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH,
&gc->forward_last);
val = next - COOKIE_LOCAL_BATCH;
}
local->last = ++val;
} else {
val = atomic64_dec_return(&gc->reverse_last);
}
local_dec(&local->nesting);
return val;
}
#endif /* __LINUX_COOKIE_H */
......@@ -50,6 +50,7 @@ static inline void rcu_read_lock_trace(void)
struct task_struct *t = current;
WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + 1);
barrier();
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) &&
t->trc_reader_special.b.need_mb)
smp_mb(); // Pairs with update-side barriers
......@@ -72,6 +73,9 @@ static inline void rcu_read_unlock_trace(void)
rcu_lock_release(&rcu_trace_lock_map);
nesting = READ_ONCE(t->trc_reader_nesting) - 1;
barrier(); // Critical section before disabling.
// Disable IPI-based setting of .need_qs.
WRITE_ONCE(t->trc_reader_nesting, INT_MIN);
if (likely(!READ_ONCE(t->trc_reader_special.s)) || nesting) {
WRITE_ONCE(t->trc_reader_nesting, nesting);
return; // We assume shallow reader nesting.
......
......@@ -2548,6 +2548,11 @@ static inline int skb_mac_header_was_set(const struct sk_buff *skb)
return skb->mac_header != (typeof(skb->mac_header))~0U;
}
static inline void skb_unset_mac_header(struct sk_buff *skb)
{
skb->mac_header = (typeof(skb->mac_header))~0U;
}
static inline void skb_reset_mac_header(struct sk_buff *skb)
{
skb->mac_header = skb->data - skb->head;
......
......@@ -25,7 +25,19 @@ void sock_diag_unregister(const struct sock_diag_handler *h);
void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
u64 sock_gen_cookie(struct sock *sk);
u64 __sock_gen_cookie(struct sock *sk);
static inline u64 sock_gen_cookie(struct sock *sk)
{
u64 cookie;
preempt_disable();
cookie = __sock_gen_cookie(sk);
preempt_enable();
return cookie;
}
int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie);
void sock_diag_save_cookie(struct sock *sk, __u32 *cookie);
......
......@@ -20,8 +20,6 @@ void bpf_sk_storage_free(struct sock *sk);
extern const struct bpf_func_proto bpf_sk_storage_get_proto;
extern const struct bpf_func_proto bpf_sk_storage_delete_proto;
extern const struct bpf_func_proto sk_storage_get_btf_proto;
extern const struct bpf_func_proto sk_storage_delete_btf_proto;
struct bpf_local_storage_elem;
struct bpf_sk_storage_diag;
......
......@@ -230,7 +230,7 @@ extern struct list_head net_namespace_list;
struct net *get_net_ns_by_pid(pid_t pid);
struct net *get_net_ns_by_fd(int fd);
u64 net_gen_cookie(struct net *net);
u64 __net_gen_cookie(struct net *net);
#ifdef CONFIG_SYSCTL
void ipx_register_sysctl(void);
......
......@@ -414,6 +414,9 @@ enum {
/* Enable memory-mapping BPF map */
BPF_F_MMAPABLE = (1U << 10),
/* Share perf_event among processes */
BPF_F_PRESERVE_ELEMS = (1U << 11),
};
/* Flags for BPF_PROG_QUERY. */
......@@ -424,6 +427,11 @@ enum {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
/* Flags for BPF_PROG_TEST_RUN */
/* If set, run the test on the cpu specified by bpf_attr.test.cpu */
#define BPF_F_TEST_RUN_ON_CPU (1U << 0)
/* type for BPF_ENABLE_STATS */
enum bpf_stats_type {
/* enabled run_time_ns and run_cnt */
......@@ -566,6 +574,8 @@ union bpf_attr {
*/
__aligned_u64 ctx_in;
__aligned_u64 ctx_out;
__u32 flags;
__u32 cpu;
} test;
struct { /* anonymous struct used by BPF_*_GET_*_ID */
......@@ -632,8 +642,13 @@ union bpf_attr {
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
union {
__u32 target_btf_id; /* btf_id of target to attach to */
struct {
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
};
};
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
......@@ -2512,7 +2527,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
* long bpf_sk_release(struct bpf_sock *sock)
* long bpf_sk_release(void *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
......@@ -2692,7 +2707,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
* long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* long bpf_tcp_check_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
......@@ -2861,6 +2876,7 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf-local-storage cannot be found.
* **-EINVAL** if sk is not a fullsock (e.g. a request_sock).
*
* long bpf_send_signal(u32 sig)
* Description
......@@ -2877,7 +2893,7 @@ union bpf_attr {
*
* **-EAGAIN** if bpf program can try again.
*
* s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* s64 bpf_tcp_gen_syncookie(void *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Try to issue a SYN cookie for the packet with corresponding
* IP/TCP headers, *iph* and *th*, on the listening socket in *sk*.
......@@ -3106,7 +3122,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* long bpf_sk_assign(struct sk_buff *skb, void *sk, u64 flags)
* Description
* Helper is overloaded depending on BPF program type. This
* description applies to **BPF_PROG_TYPE_SCHED_CLS** and
......@@ -3234,11 +3250,11 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
* u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
* u64 bpf_sk_cgroup_id(void *sk)
* Description
* Return the cgroup v2 id of the socket *sk*.
*
* *sk* must be a non-**NULL** pointer to a full socket, e.g. one
* *sk* must be a non-**NULL** pointer to a socket, e.g. one
* returned from **bpf_sk_lookup_xxx**\ (),
* **bpf_sk_fullsock**\ (), etc. The format of returned id is
* same as in **bpf_skb_cgroup_id**\ ().
......@@ -3248,7 +3264,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
* u64 bpf_sk_ancestor_cgroup_id(void *sk, int ancestor_level)
* Description
* Return id of cgroup v2 that is ancestor of cgroup associated
* with the *sk* at the *ancestor_level*. The root cgroup is at
......@@ -3586,6 +3602,72 @@ union bpf_attr {
* the data in *dst*. This is a wrapper of **copy_from_user**\ ().
* Return
* 0 on success, or a negative error in case of failure.
*
* long bpf_snprintf_btf(char *str, u32 str_size, struct btf_ptr *ptr, u32 btf_ptr_size, u64 flags)
* Description
* Use BTF to store a string representation of *ptr*->ptr in *str*,
* using *ptr*->type_id. This value should specify the type
* that *ptr*->ptr points to. LLVM __builtin_btf_type_id(type, 1)
* can be used to look up vmlinux BTF type ids. Traversing the
* data structure using BTF, the type information and values are
* stored in the first *str_size* - 1 bytes of *str*. Safe copy of
* the pointer data is carried out to avoid kernel crashes during
* operation. Smaller types can use string space on the stack;
* larger programs can use map data to store the string
* representation.
*
* The string can be subsequently shared with userspace via
* bpf_perf_event_output() or ring buffer interfaces.
* bpf_trace_printk() is to be avoided as it places too small
* a limit on string size to be useful.
*
* *flags* is a combination of
*
* **BTF_F_COMPACT**
* no formatting around type information
* **BTF_F_NONAME**
* no struct/union member names/types
* **BTF_F_PTR_RAW**
* show raw (unobfuscated) pointer values;
* equivalent to printk specifier %px.
* **BTF_F_ZERO**
* show zero-valued struct/union members; they
* are not displayed by default
*
* Return
* The number of bytes that were written (or would have been
* written if output had to be truncated due to string size),
* or a negative error in cases of failure.
*
* long bpf_seq_printf_btf(struct seq_file *m, struct btf_ptr *ptr, u32 ptr_size, u64 flags)
* Description
* Use BTF to write to seq_write a string representation of
* *ptr*->ptr, using *ptr*->type_id as per bpf_snprintf_btf().
* *flags* are identical to those used for bpf_snprintf_btf.
* Return
* 0 on success or a negative error in case of failure.
*
* u64 bpf_skb_cgroup_classid(struct sk_buff *skb)
* Description
* See **bpf_get_cgroup_classid**\ () for the main description.
* This helper differs from **bpf_get_cgroup_classid**\ () in that
* the cgroup v1 net_cls class is retrieved only from the *skb*'s
* associated socket instead of the current process.
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
* long bpf_redirect_neigh(u32 ifindex, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*
* and fill in L2 addresses from neighboring subsystem. This helper
* is somewhat similar to **bpf_redirect**\ (), except that it
* fills in e.g. MAC addresses based on the L3 information from
* the packet. This helper is supported for IPv4 and IPv6 protocols.
* The *flags* argument is reserved and must be 0. The helper is
* currently only supported for tc BPF program types.
* Return
* The helper returns **TC_ACT_REDIRECT** on success or
* **TC_ACT_SHOT** on error.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
......@@ -3737,6 +3819,10 @@ union bpf_attr {
FN(inode_storage_delete), \
FN(d_path), \
FN(copy_from_user), \
FN(snprintf_btf), \
FN(seq_printf_btf), \
FN(skb_cgroup_classid), \
FN(redirect_neigh), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
......@@ -4845,4 +4931,34 @@ struct bpf_sk_lookup {
__u32 local_port; /* Host byte order */
};
/*
* struct btf_ptr is used for typed pointer representation; the
* type id is used to render the pointer data as the appropriate type
* via the bpf_snprintf_btf() helper described above. A flags field -
* potentially to specify additional details about the BTF pointer
* (rather than its mode of display) - is included for future use.
* Display flags - BTF_F_* - are passed to bpf_snprintf_btf separately.
*/
struct btf_ptr {
void *ptr;
__u32 type_id;
__u32 flags; /* BTF ptr flags; unused at present. */
};
/*
* Flags to control bpf_snprintf_btf() behaviour.
* - BTF_F_COMPACT: no formatting around type information
* - BTF_F_NONAME: no struct/union member names/types
* - BTF_F_PTR_RAW: show raw (unobfuscated) pointer values;
* equivalent to %px.
* - BTF_F_ZERO: show zero-valued struct/union members; they
* are not displayed by default
*/
enum {
BTF_F_COMPACT = (1ULL << 0),
BTF_F_NONAME = (1ULL << 1),
BTF_F_PTR_RAW = (1ULL << 2),
BTF_F_ZERO = (1ULL << 3),
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -15,7 +15,8 @@
#include "map_in_map.h"
#define ARRAY_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK)
(BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
BPF_F_PRESERVE_ELEMS)
static void bpf_array_free_percpu(struct bpf_array *array)
{
......@@ -64,6 +65,10 @@ int array_map_alloc_check(union bpf_attr *attr)
attr->map_flags & BPF_F_MMAPABLE)
return -EINVAL;
if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
attr->map_flags & BPF_F_PRESERVE_ELEMS)
return -EINVAL;
if (attr->value_size > KMALLOC_MAX_SIZE)
/* if value_size is bigger, the user space won't be able to
* access the elements.
......@@ -1134,6 +1139,9 @@ static void perf_event_fd_array_release(struct bpf_map *map,
struct bpf_event_entry *ee;
int i;
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
return;
rcu_read_lock();
for (i = 0; i < array->map.max_entries; i++) {
ee = READ_ONCE(array->ptrs[i]);
......@@ -1143,12 +1151,19 @@ static void perf_event_fd_array_release(struct bpf_map *map,
rcu_read_unlock();
}
static void perf_event_fd_array_map_free(struct bpf_map *map)
{
if (map->map_flags & BPF_F_PRESERVE_ELEMS)
bpf_fd_array_map_clear(map);
fd_array_map_free(map);
}
static int perf_event_array_map_btf_id;
const struct bpf_map_ops perf_event_array_map_ops = {
.map_meta_equal = bpf_map_meta_equal,
.map_alloc_check = fd_array_map_alloc_check,
.map_alloc = array_map_alloc,
.map_free = fd_array_map_free,
.map_free = perf_event_fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
.map_delete_elem = fd_array_map_delete_elem,
......
......@@ -88,8 +88,8 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
mutex_lock(&seq->lock);
if (!seq->buf) {
seq->size = PAGE_SIZE;
seq->buf = kmalloc(seq->size, GFP_KERNEL);
seq->size = PAGE_SIZE << 3;
seq->buf = kvmalloc(seq->size, GFP_KERNEL);
if (!seq->buf) {
err = -ENOMEM;
goto done;
......
......@@ -56,9 +56,9 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_inode_storage_delete:
return &bpf_inode_storage_delete_proto;
case BPF_FUNC_sk_storage_get:
return &sk_storage_get_btf_proto;
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &sk_storage_delete_btf_proto;
return &bpf_sk_storage_delete_proto;
default:
return tracing_prog_func_proto(func_id, prog);
}
......
This diff is collapsed.
......@@ -99,6 +99,7 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
mutex_init(&fp->aux->used_maps_mutex);
mutex_init(&fp->aux->dst_mutex);
return fp;
}
......@@ -255,6 +256,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
{
if (fp->aux) {
mutex_destroy(&fp->aux->used_maps_mutex);
mutex_destroy(&fp->aux->dst_mutex);
free_percpu(fp->aux->stats);
kfree(fp->aux->poke_tab);
kfree(fp->aux);
......@@ -2138,7 +2140,8 @@ static void bpf_prog_free_deferred(struct work_struct *work)
if (aux->prog->has_callchain_buf)
put_callchain_buffers();
#endif
bpf_trampoline_put(aux->trampoline);
if (aux->dst_trampoline)
bpf_trampoline_put(aux->dst_trampoline);
for (i = 0; i < aux->func_cnt; i++)
bpf_jit_free(aux->func[i]);
if (aux->func_cnt) {
......@@ -2154,8 +2157,8 @@ void bpf_prog_free(struct bpf_prog *fp)
{
struct bpf_prog_aux *aux = fp->aux;
if (aux->linked_prog)
bpf_prog_put(aux->linked_prog);
if (aux->dst_prog)
bpf_prog_put(aux->dst_prog);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
schedule_work(&aux->work);
}
......@@ -2216,6 +2219,8 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak;
const struct bpf_func_proto bpf_get_local_storage_proto __weak;
const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak;
const struct bpf_func_proto bpf_snprintf_btf_proto __weak;
const struct bpf_func_proto bpf_seq_printf_btf_proto __weak;
const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
{
......
......@@ -155,8 +155,7 @@ static void cpu_map_kthread_stop(struct work_struct *work)
kthread_stop(rcpu->kthread);
}
static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
struct xdp_frame *xdpf,
static struct sk_buff *cpu_map_build_skb(struct xdp_frame *xdpf,
struct sk_buff *skb)
{
unsigned int hard_start_headroom;
......@@ -365,7 +364,7 @@ static int cpu_map_kthread_run(void *data)
struct sk_buff *skb = skbs[i];
int ret;
skb = cpu_map_build_skb(rcpu, xdpf, skb);
skb = cpu_map_build_skb(xdpf, skb);
if (!skb) {
xdp_return_frame(xdpf);
continue;
......
......@@ -683,6 +683,10 @@ bpf_base_func_proto(enum bpf_func_id func_id)
if (!perfmon_capable())
return NULL;
return bpf_get_trace_printk_proto();
case BPF_FUNC_snprintf_btf:
if (!perfmon_capable())
return NULL;
return &bpf_snprintf_btf_proto;
case BPF_FUNC_jiffies64:
return &bpf_jiffies64_proto;
default:
......
/FEATURE-DUMP.libbpf
/bpf_helper_defs.h
/feature
/bpf_preload_umd
......@@ -12,6 +12,8 @@ userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
userprogs := bpf_preload_umd
clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
bpf_preload_umd-objs := iterators/iterators.o
bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
......
......@@ -42,7 +42,7 @@ struct bpf_prog_aux {
__u32 id;
char name[16];
const char *attach_func_name;
struct bpf_prog *linked_prog;
struct bpf_prog *dst_prog;
struct bpf_func_info *func_info;
struct btf *btf;
};
......@@ -108,7 +108,7 @@ int dump_bpf_prog(struct bpf_iter__bpf_prog *ctx)
BPF_SEQ_PRINTF(seq, "%4u %-16s %s %s\n", aux->id,
get_name(aux->btf, aux->func_info[0].type_id, aux->name),
aux->attach_func_name, aux->linked_prog->aux->name);
aux->attach_func_name, aux->dst_prog->aux->name);
return 0;
}
char LICENSE[] SEC("license") = "GPL";
......@@ -191,7 +191,7 @@ int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
rcu_read_lock();
sk = reuseport_array_lookup_elem(map, key);
if (sk) {
*(u64 *)value = sock_gen_cookie(sk);
*(u64 *)value = __sock_gen_cookie(sk);
err = 0;
} else {
err = -ENOENT;
......
......@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/bpf_lirc.h>
#include <linux/bpf_verifier.h>
#include <linux/btf.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
......@@ -2154,14 +2155,14 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
prog->expected_attach_type = attr->expected_attach_type;
prog->aux->attach_btf_id = attr->attach_btf_id;
if (attr->attach_prog_fd) {
struct bpf_prog *tgt_prog;
struct bpf_prog *dst_prog;
tgt_prog = bpf_prog_get(attr->attach_prog_fd);
if (IS_ERR(tgt_prog)) {
err = PTR_ERR(tgt_prog);
dst_prog = bpf_prog_get(attr->attach_prog_fd);
if (IS_ERR(dst_prog)) {
err = PTR_ERR(dst_prog);
goto free_prog_nouncharge;
}
prog->aux->linked_prog = tgt_prog;
prog->aux->dst_prog = dst_prog;
}
prog->aux->offload_requested = !!attr->prog_ifindex;
......@@ -2345,8 +2346,12 @@ void bpf_link_put(struct bpf_link *link)
if (!atomic64_dec_and_test(&link->refcnt))
return;
INIT_WORK(&link->work, bpf_link_put_deferred);
schedule_work(&link->work);
if (in_atomic()) {
INIT_WORK(&link->work, bpf_link_put_deferred);
schedule_work(&link->work);
} else {
bpf_link_free(link);
}
}
static int bpf_link_release(struct inode *inode, struct file *filp)
......@@ -2494,11 +2499,23 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
struct bpf_tracing_link {
struct bpf_link link;
enum bpf_attach_type attach_type;
struct bpf_trampoline *trampoline;
struct bpf_prog *tgt_prog;
};
static void bpf_tracing_link_release(struct bpf_link *link)
{
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link);
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog,
tr_link->trampoline));
bpf_trampoline_put(tr_link->trampoline);
/* tgt_prog is NULL if target is a kernel function */
if (tr_link->tgt_prog)
bpf_prog_put(tr_link->tgt_prog);
}
static void bpf_tracing_link_dealloc(struct bpf_link *link)
......@@ -2538,10 +2555,15 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
.fill_link_info = bpf_tracing_link_fill_link_info,
};
static int bpf_tracing_prog_attach(struct bpf_prog *prog)
static int bpf_tracing_prog_attach(struct bpf_prog *prog,
int tgt_prog_fd,
u32 btf_id)
{
struct bpf_link_primer link_primer;
struct bpf_prog *tgt_prog = NULL;
struct bpf_trampoline *tr = NULL;
struct bpf_tracing_link *link;
u64 key = 0;
int err;
switch (prog->type) {
......@@ -2570,6 +2592,28 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
goto out_put_prog;
}
if (!!tgt_prog_fd != !!btf_id) {
err = -EINVAL;
goto out_put_prog;
}
if (tgt_prog_fd) {
/* For now we only allow new targets for BPF_PROG_TYPE_EXT */
if (prog->type != BPF_PROG_TYPE_EXT) {
err = -EINVAL;
goto out_put_prog;
}
tgt_prog = bpf_prog_get(tgt_prog_fd);
if (IS_ERR(tgt_prog)) {
err = PTR_ERR(tgt_prog);
tgt_prog = NULL;
goto out_put_prog;
}
key = bpf_trampoline_compute_key(tgt_prog, btf_id);
}
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
......@@ -2579,20 +2623,100 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
&bpf_tracing_link_lops, prog);
link->attach_type = prog->expected_attach_type;
err = bpf_link_prime(&link->link, &link_primer);
if (err) {
kfree(link);
goto out_put_prog;
mutex_lock(&prog->aux->dst_mutex);
/* There are a few possible cases here:
*
* - if prog->aux->dst_trampoline is set, the program was just loaded
* and not yet attached to anything, so we can use the values stored
* in prog->aux
*
* - if prog->aux->dst_trampoline is NULL, the program has already been
* attached to a target and its initial target was cleared (below)
*
* - if tgt_prog != NULL, the caller specified tgt_prog_fd +
* target_btf_id using the link_create API.
*
* - if tgt_prog == NULL when this function was called using the old
* raw_tracepoint_open API, and we need a target from prog->aux
*
* The combination of no saved target in prog->aux, and no target
* specified on load is illegal, and we reject that here.
*/
if (!prog->aux->dst_trampoline && !tgt_prog) {
err = -ENOENT;
goto out_unlock;
}
err = bpf_trampoline_link_prog(prog);
if (!prog->aux->dst_trampoline ||
(key && key != prog->aux->dst_trampoline->key)) {
/* If there is no saved target, or the specified target is
* different from the destination specified at load time, we
* need a new trampoline and a check for compatibility
*/
struct bpf_attach_target_info tgt_info = {};
err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id,
&tgt_info);
if (err)
goto out_unlock;
tr = bpf_trampoline_get(key, &tgt_info);
if (!tr) {
err = -ENOMEM;
goto out_unlock;
}
} else {
/* The caller didn't specify a target, or the target was the
* same as the destination supplied during program load. This
* means we can reuse the trampoline and reference from program
* load time, and there is no need to allocate a new one. This
* can only happen once for any program, as the saved values in
* prog->aux are cleared below.
*/
tr = prog->aux->dst_trampoline;
tgt_prog = prog->aux->dst_prog;
}
err = bpf_link_prime(&link->link, &link_primer);
if (err)
goto out_unlock;
err = bpf_trampoline_link_prog(prog, tr);
if (err) {
bpf_link_cleanup(&link_primer);
goto out_put_prog;
link = NULL;
goto out_unlock;
}
link->tgt_prog = tgt_prog;
link->trampoline = tr;
/* Always clear the trampoline and target prog from prog->aux to make
* sure the original attach destination is not kept alive after a
* program is (re-)attached to another target.
*/
if (prog->aux->dst_prog &&
(tgt_prog_fd || tr != prog->aux->dst_trampoline))
/* got extra prog ref from syscall, or attaching to different prog */
bpf_prog_put(prog->aux->dst_prog);
if (prog->aux->dst_trampoline && tr != prog->aux->dst_trampoline)
/* we allocated a new trampoline, so free the old one */
bpf_trampoline_put(prog->aux->dst_trampoline);
prog->aux->dst_prog = NULL;
prog->aux->dst_trampoline = NULL;
mutex_unlock(&prog->aux->dst_mutex);
return bpf_link_settle(&link_primer);
out_unlock:
if (tr && tr != prog->aux->dst_trampoline)
bpf_trampoline_put(tr);
mutex_unlock(&prog->aux->dst_mutex);
kfree(link);
out_put_prog:
if (tgt_prog_fd && tgt_prog)
bpf_prog_put(tgt_prog);
bpf_prog_put(prog);
return err;
}
......@@ -2706,7 +2830,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
tp_name = prog->aux->attach_func_name;
break;
}
return bpf_tracing_prog_attach(prog);
return bpf_tracing_prog_attach(prog, 0, 0);
case BPF_PROG_TYPE_RAW_TRACEPOINT:
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
if (strncpy_from_user(buf,
......@@ -2975,7 +3099,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
}
}
#define BPF_PROG_TEST_RUN_LAST_FIELD test.ctx_out
#define BPF_PROG_TEST_RUN_LAST_FIELD test.cpu
static int bpf_prog_test_run(const union bpf_attr *attr,
union bpf_attr __user *uattr)
......@@ -3890,10 +4014,15 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
if (attr->link_create.attach_type == BPF_TRACE_ITER &&
prog->expected_attach_type == BPF_TRACE_ITER)
return bpf_iter_link_attach(attr, prog);
if (attr->link_create.attach_type != prog->expected_attach_type)
return -EINVAL;
if (prog->expected_attach_type == BPF_TRACE_ITER)
return bpf_iter_link_attach(attr, prog);
else if (prog->type == BPF_PROG_TYPE_EXT)
return bpf_tracing_prog_attach(prog,
attr->link_create.target_fd,
attr->link_create.target_btf_id);
return -EINVAL;
}
......@@ -3907,18 +4036,25 @@ static int link_create(union bpf_attr *attr)
if (CHECK_ATTR(BPF_LINK_CREATE))
return -EINVAL;
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC)
return -EINVAL;
prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
prog = bpf_prog_get(attr->link_create.prog_fd);
if (IS_ERR(prog))
return PTR_ERR(prog);
ret = bpf_prog_attach_check_attach_type(prog,
attr->link_create.attach_type);
if (ret)
goto err_out;
goto out;
if (prog->type == BPF_PROG_TYPE_EXT) {
ret = tracing_bpf_link_attach(attr, prog);
goto out;
}
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC || ptype != prog->type) {
ret = -EINVAL;
goto out;
}
switch (ptype) {
case BPF_PROG_TYPE_CGROUP_SKB:
......@@ -3946,7 +4082,7 @@ static int link_create(union bpf_attr *attr)
ret = -EINVAL;
}
err_out:
out:
if (ret < 0)
bpf_prog_put(prog);
return ret;
......
......@@ -65,7 +65,7 @@ static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
bpf_image_ksym_add(tr->image, ksym);
}
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
struct bpf_trampoline *tr;
struct hlist_head *head;
......@@ -261,14 +261,12 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog)
}
}
int bpf_trampoline_link_prog(struct bpf_prog *prog)
int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
{
enum bpf_tramp_prog_type kind;
struct bpf_trampoline *tr;
int err = 0;
int cnt;
tr = prog->aux->trampoline;
kind = bpf_attach_type_to_tramp(prog);
mutex_lock(&tr->mutex);
if (tr->extension_prog) {
......@@ -301,7 +299,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
}
hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]);
tr->progs_cnt[kind]++;
err = bpf_trampoline_update(prog->aux->trampoline);
err = bpf_trampoline_update(tr);
if (err) {
hlist_del(&prog->aux->tramp_hlist);
tr->progs_cnt[kind]--;
......@@ -312,13 +310,11 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog)
}
/* bpf_trampoline_unlink_prog() should never fail. */
int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr)
{
enum bpf_tramp_prog_type kind;
struct bpf_trampoline *tr;
int err;
tr = prog->aux->trampoline;
kind = bpf_attach_type_to_tramp(prog);
mutex_lock(&tr->mutex);
if (kind == BPF_TRAMP_REPLACE) {
......@@ -330,12 +326,32 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
}
hlist_del(&prog->aux->tramp_hlist);
tr->progs_cnt[kind]--;
err = bpf_trampoline_update(prog->aux->trampoline);
err = bpf_trampoline_update(tr);
out:
mutex_unlock(&tr->mutex);
return err;
}
struct bpf_trampoline *bpf_trampoline_get(u64 key,
struct bpf_attach_target_info *tgt_info)
{
struct bpf_trampoline *tr;
tr = bpf_trampoline_lookup(key);
if (!tr)
return NULL;
mutex_lock(&tr->mutex);
if (tr->func.addr)
goto out;
memcpy(&tr->func.model, &tgt_info->fmodel, sizeof(tgt_info->fmodel));
tr->func.addr = (void *)tgt_info->tgt_addr;
out:
mutex_unlock(&tr->mutex);
return tr;
}
void bpf_trampoline_put(struct bpf_trampoline *tr)
{
if (!tr)
......
This diff is collapsed.
......@@ -28,6 +28,8 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp);
* @kthread_ptr: This flavor's grace-period/callback-invocation kthread.
* @gp_func: This flavor's grace-period-wait function.
* @gp_state: Grace period's most recent state transition (debugging).
* @gp_sleep: Per-grace-period sleep to prevent CPU-bound looping.
* @init_fract: Initial backoff sleep interval.
* @gp_jiffies: Time of last @gp_state transition.
* @gp_start: Most recent grace-period start in jiffies.
* @n_gps: Number of grace periods completed since boot.
......@@ -48,6 +50,8 @@ struct rcu_tasks {
struct wait_queue_head cbs_wq;
raw_spinlock_t cbs_lock;
int gp_state;
int gp_sleep;
int init_fract;
unsigned long gp_jiffies;
unsigned long gp_start;
unsigned long n_gps;
......@@ -81,7 +85,7 @@ static struct rcu_tasks rt_name = \
DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu);
/* Avoid IPIing CPUs early in the grace period. */
#define RCU_TASK_IPI_DELAY (HZ / 2)
#define RCU_TASK_IPI_DELAY (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) ? HZ / 2 : 0)
static int rcu_task_ipi_delay __read_mostly = RCU_TASK_IPI_DELAY;
module_param(rcu_task_ipi_delay, int, 0644);
......@@ -231,7 +235,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
cond_resched();
}
/* Paranoid sleep to keep this from entering a tight loop */
schedule_timeout_idle(HZ/10);
schedule_timeout_idle(rtp->gp_sleep);
set_tasks_gp_state(rtp, RTGS_WAIT_CBS);
}
......@@ -329,8 +333,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
*/
lastreport = jiffies;
/* Start off with HZ/10 wait and slowly back off to 1 HZ wait. */
fract = 10;
// Start off with initial wait and slowly back off to 1 HZ wait.
fract = rtp->init_fract;
if (fract > HZ)
fract = HZ;
for (;;) {
bool firstreport;
......@@ -553,6 +559,8 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
static int __init rcu_spawn_tasks_kthread(void)
{
rcu_tasks.gp_sleep = HZ / 10;
rcu_tasks.init_fract = 10;
rcu_tasks.pregp_func = rcu_tasks_pregp_step;
rcu_tasks.pertask_func = rcu_tasks_pertask;
rcu_tasks.postscan_func = rcu_tasks_postscan;
......@@ -685,6 +693,7 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks_rude);
static int __init rcu_spawn_tasks_rude_kthread(void)
{
rcu_tasks_rude.gp_sleep = HZ / 10;
rcu_spawn_tasks_kthread_generic(&rcu_tasks_rude);
return 0;
}
......@@ -745,9 +754,9 @@ static DEFINE_PER_CPU(bool, trc_ipi_to_cpu);
// The number of detections of task quiescent state relying on
// heavyweight readers executing explicit memory barriers.
unsigned long n_heavy_reader_attempts;
unsigned long n_heavy_reader_updates;
unsigned long n_heavy_reader_ofl_updates;
static unsigned long n_heavy_reader_attempts;
static unsigned long n_heavy_reader_updates;
static unsigned long n_heavy_reader_ofl_updates;
void call_rcu_tasks_trace(struct rcu_head *rhp, rcu_callback_t func);
DEFINE_RCU_TASKS(rcu_tasks_trace, rcu_tasks_wait_gp, call_rcu_tasks_trace,
......@@ -821,6 +830,12 @@ static void trc_read_check_handler(void *t_in)
WRITE_ONCE(t->trc_reader_checked, true);
goto reset_ipi;
}
// If we are racing with an rcu_read_unlock_trace(), try again later.
if (unlikely(t->trc_reader_nesting < 0)) {
if (WARN_ON_ONCE(atomic_dec_and_test(&trc_n_readers_need_end)))
wake_up(&trc_wait);
goto reset_ipi;
}
WRITE_ONCE(t->trc_reader_checked, true);
// Get here if the task is in a read-side critical section. Set
......@@ -911,7 +926,8 @@ static void trc_wait_for_one_reader(struct task_struct *t,
// If currently running, send an IPI, either way, add to list.
trc_add_holdout(t, bhp);
if (task_curr(t) && time_after(jiffies, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
if (task_curr(t) &&
time_after(jiffies + 1, rcu_tasks_trace.gp_start + rcu_task_ipi_delay)) {
// The task is currently running, so try IPIing it.
cpu = task_cpu(t);
......@@ -1072,15 +1088,17 @@ static void rcu_tasks_trace_postgp(struct rcu_tasks *rtp)
if (ret)
break; // Count reached zero.
// Stall warning time, so make a list of the offenders.
rcu_read_lock();
for_each_process_thread(g, t)
if (READ_ONCE(t->trc_reader_special.b.need_qs))
trc_add_holdout(t, &holdouts);
rcu_read_unlock();
firstreport = true;
list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list)
if (READ_ONCE(t->trc_reader_special.b.need_qs)) {
list_for_each_entry_safe(t, g, &holdouts, trc_holdout_list) {
if (READ_ONCE(t->trc_reader_special.b.need_qs))
show_stalled_task_trace(t, &firstreport);
trc_del_holdout(t);
}
trc_del_holdout(t); // Release task_struct reference.
}
if (firstreport)
pr_err("INFO: rcu_tasks_trace detected stalls? (Counter/taskslist mismatch?)\n");
show_stalled_ipi_trace();
......@@ -1163,6 +1181,17 @@ EXPORT_SYMBOL_GPL(rcu_barrier_tasks_trace);
static int __init rcu_spawn_tasks_trace_kthread(void)
{
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) {
rcu_tasks_trace.gp_sleep = HZ / 10;
rcu_tasks_trace.init_fract = 10;
} else {
rcu_tasks_trace.gp_sleep = HZ / 200;
if (rcu_tasks_trace.gp_sleep <= 0)
rcu_tasks_trace.gp_sleep = 1;
rcu_tasks_trace.init_fract = HZ / 5;
if (rcu_tasks_trace.init_fract <= 0)
rcu_tasks_trace.init_fract = 1;
}
rcu_tasks_trace.pregp_func = rcu_tasks_trace_pregp_step;
rcu_tasks_trace.pertask_func = rcu_tasks_trace_pertask;
rcu_tasks_trace.postscan_func = rcu_tasks_trace_postscan;
......
......@@ -7,6 +7,7 @@
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/bpf_perf_event.h>
#include <linux/btf.h>
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
......@@ -16,6 +17,9 @@
#include <linux/error-injection.h>
#include <linux/btf_ids.h>
#include <uapi/linux/bpf.h>
#include <uapi/linux/btf.h>
#include <asm/tlb.h>
#include "trace_probe.h"
......@@ -67,6 +71,10 @@ static struct bpf_raw_event_map *bpf_get_raw_tracepoint_module(const char *name)
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
u64 flags, const struct btf **btf,
s32 *btf_id);
/**
* trace_call_bpf - invoke BPF program
* @call: tracepoint event
......@@ -772,6 +780,31 @@ static const struct bpf_func_proto bpf_seq_write_proto = {
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_4(bpf_seq_printf_btf, struct seq_file *, m, struct btf_ptr *, ptr,
u32, btf_ptr_size, u64, flags)
{
const struct btf *btf;
s32 btf_id;
int ret;
ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
if (ret)
return ret;
return btf_type_seq_show_flags(btf, btf_id, ptr->ptr, m, flags);
}
static const struct bpf_func_proto bpf_seq_printf_btf_proto = {
.func = bpf_seq_printf_btf,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_BTF_ID,
.arg1_btf_id = &btf_seq_file_ids[0],
.arg2_type = ARG_PTR_TO_MEM,
.arg3_type = ARG_CONST_SIZE_OR_ZERO,
.arg4_type = ARG_ANYTHING,
};
static __always_inline int
get_map_perf_counter(struct bpf_map *map, u64 flags,
u64 *value, u64 *enabled, u64 *running)
......@@ -1147,6 +1180,65 @@ static const struct bpf_func_proto bpf_d_path_proto = {
.allowed = bpf_d_path_allowed,
};
#define BTF_F_ALL (BTF_F_COMPACT | BTF_F_NONAME | \
BTF_F_PTR_RAW | BTF_F_ZERO)
static int bpf_btf_printf_prepare(struct btf_ptr *ptr, u32 btf_ptr_size,
u64 flags, const struct btf **btf,
s32 *btf_id)
{
const struct btf_type *t;
if (unlikely(flags & ~(BTF_F_ALL)))
return -EINVAL;
if (btf_ptr_size != sizeof(struct btf_ptr))
return -EINVAL;
*btf = bpf_get_btf_vmlinux();
if (IS_ERR_OR_NULL(*btf))
return PTR_ERR(*btf);
if (ptr->type_id > 0)
*btf_id = ptr->type_id;
else
return -EINVAL;
if (*btf_id > 0)
t = btf_type_by_id(*btf, *btf_id);
if (*btf_id <= 0 || !t)
return -ENOENT;
return 0;
}
BPF_CALL_5(bpf_snprintf_btf, char *, str, u32, str_size, struct btf_ptr *, ptr,
u32, btf_ptr_size, u64, flags)
{
const struct btf *btf;
s32 btf_id;
int ret;
ret = bpf_btf_printf_prepare(ptr, btf_ptr_size, flags, &btf, &btf_id);
if (ret)
return ret;
return btf_type_snprintf_show(btf, btf_id, ptr->ptr, str, str_size,
flags);
}
const struct bpf_func_proto bpf_snprintf_btf_proto = {
.func = bpf_snprintf_btf,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_MEM,
.arg2_type = ARG_CONST_SIZE,
.arg3_type = ARG_PTR_TO_MEM,
.arg4_type = ARG_CONST_SIZE,
.arg5_type = ARG_ANYTHING,
};
const struct bpf_func_proto *
bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
......@@ -1233,6 +1325,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_task_stack_proto;
case BPF_FUNC_copy_from_user:
return prog->aux->sleepable ? &bpf_copy_from_user_proto : NULL;
case BPF_FUNC_snprintf_btf:
return &bpf_snprintf_btf_proto;
default:
return NULL;
}
......@@ -1630,6 +1724,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return prog->expected_attach_type == BPF_TRACE_ITER ?
&bpf_seq_write_proto :
NULL;
case BPF_FUNC_seq_printf_btf:
return prog->expected_attach_type == BPF_TRACE_ITER ?
&bpf_seq_printf_btf_proto :
NULL;
case BPF_FUNC_d_path:
return &bpf_d_path_proto;
default:
......@@ -1678,6 +1776,7 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
};
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
.test_run = bpf_prog_test_run_raw_tp,
};
const struct bpf_verifier_ops tracing_verifier_ops = {
......
......@@ -11,6 +11,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <linux/error-injection.h>
#include <linux/smp.h>
#define CREATE_TRACE_POINTS
#include <trace/events/bpf_test_run.h>
......@@ -204,6 +205,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
int b = 2, err = -EFAULT;
u32 retval = 0;
if (kattr->test.flags || kattr->test.cpu)
return -EINVAL;
switch (prog->expected_attach_type) {
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
......@@ -236,6 +240,84 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog,
return err;
}
struct bpf_raw_tp_test_run_info {
struct bpf_prog *prog;
void *ctx;
u32 retval;
};
static void
__bpf_prog_test_run_raw_tp(void *data)
{
struct bpf_raw_tp_test_run_info *info = data;
rcu_read_lock();
info->retval = BPF_PROG_RUN(info->prog, info->ctx);
rcu_read_unlock();
}
int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
const union bpf_attr *kattr,
union bpf_attr __user *uattr)
{
void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
__u32 ctx_size_in = kattr->test.ctx_size_in;
struct bpf_raw_tp_test_run_info info;
int cpu = kattr->test.cpu, err = 0;
int current_cpu;
/* doesn't support data_in/out, ctx_out, duration, or repeat */
if (kattr->test.data_in || kattr->test.data_out ||
kattr->test.ctx_out || kattr->test.duration ||
kattr->test.repeat)
return -EINVAL;
if (ctx_size_in < prog->aux->max_ctx_offset)
return -EINVAL;
if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
return -EINVAL;
if (ctx_size_in) {
info.ctx = kzalloc(ctx_size_in, GFP_USER);
if (!info.ctx)
return -ENOMEM;
if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
err = -EFAULT;
goto out;
}
} else {
info.ctx = NULL;
}
info.prog = prog;
current_cpu = get_cpu();
if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
cpu == current_cpu) {
__bpf_prog_test_run_raw_tp(&info);
} else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
/* smp_call_function_single() also checks cpu_online()
* after csd_lock(). However, since cpu is from user
* space, let's do an extra quick check to filter out
* invalid value before smp_call_function_single().
*/
err = -ENXIO;
} else {
err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
&info, 1);
}
put_cpu();
if (!err &&
copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
err = -EFAULT;
out:
kfree(info.ctx);
return err;
}
static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
{
void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
......@@ -410,6 +492,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
void *data;
int ret;
if (kattr->test.flags || kattr->test.cpu)
return -EINVAL;
data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
if (IS_ERR(data))
......@@ -607,6 +692,9 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
return -EINVAL;
if (kattr->test.flags || kattr->test.cpu)
return -EINVAL;
if (size < ETH_HLEN)
return -EINVAL;
......
......@@ -269,7 +269,7 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
{
struct bpf_local_storage_data *sdata;
if (flags > BPF_SK_STORAGE_GET_F_CREATE)
if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
return (unsigned long)NULL;
sdata = sk_storage_lookup(sk, map, true);
......@@ -299,6 +299,9 @@ BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
{
if (!sk || !sk_fullsock(sk))
return -EINVAL;
if (refcount_inc_not_zero(&sk->sk_refcnt)) {
int err;
......@@ -355,7 +358,7 @@ const struct bpf_func_proto bpf_sk_storage_get_proto = {
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_SOCKET,
.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
......@@ -375,27 +378,7 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = {
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_SOCKET,
};
const struct bpf_func_proto sk_storage_get_btf_proto = {
.func = bpf_sk_storage_get,
.gpl_only = false,
.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
.arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
.arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
const struct bpf_func_proto sk_storage_delete_btf_proto = {
.func = bpf_sk_storage_delete,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_BTF_ID,
.arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
.arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
};
struct bpf_sk_storage_diag {
......
This diff is collapsed.
......@@ -19,6 +19,7 @@
#include <linux/net_namespace.h>
#include <linux/sched/task.h>
#include <linux/uidgid.h>
#include <linux/cookie.h>
#include <net/sock.h>
#include <net/netlink.h>
......@@ -69,16 +70,16 @@ EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
static atomic64_t cookie_gen;
DEFINE_COOKIE(net_cookie);
u64 net_gen_cookie(struct net *net)
u64 __net_gen_cookie(struct net *net)
{
while (1) {
u64 res = atomic64_read(&net->net_cookie);
if (res)
return res;
res = atomic64_inc_return(&cookie_gen);
res = gen_cookie_next(&net_cookie);
atomic64_cmpxchg(&net->net_cookie, 0, res);
}
}
......@@ -1101,7 +1102,10 @@ static int __init net_ns_init(void)
panic("Could not allocate generic netns");
rcu_assign_pointer(init_net.gen, ng);
net_gen_cookie(&init_net);
preempt_disable();
__net_gen_cookie(&init_net);
preempt_enable();
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net, &init_user_ns))
......
......@@ -11,7 +11,7 @@
#include <linux/tcp.h>
#include <linux/workqueue.h>
#include <linux/nospec.h>
#include <linux/cookie.h>
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>
......@@ -19,16 +19,17 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;
static atomic64_t cookie_gen;
u64 sock_gen_cookie(struct sock *sk)
DEFINE_COOKIE(sock_cookie);
u64 __sock_gen_cookie(struct sock *sk)
{
while (1) {
u64 res = atomic64_read(&sk->sk_cookie);
if (res)
return res;
res = atomic64_inc_return(&cookie_gen);
res = gen_cookie_next(&sock_cookie);
atomic64_cmpxchg(&sk->sk_cookie, 0, res);
}
}
......
......@@ -401,7 +401,7 @@ static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
if (!sk)
return ERR_PTR(-ENOENT);
sock_gen_cookie(sk);
__sock_gen_cookie(sk);
return &sk->sk_cookie;
}
......@@ -610,6 +610,9 @@ static int sock_map_update_elem(struct bpf_map *map, void *key,
struct sock *sk = (struct sock *)value;
int ret;
if (unlikely(!sk || !sk_fullsock(sk)))
return -EINVAL;
if (!sock_map_sk_is_suitable(sk))
return -EOPNOTSUPP;
......@@ -1206,7 +1209,7 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
if (!sk)
return ERR_PTR(-ENOENT);
sock_gen_cookie(sk);
__sock_gen_cookie(sk);
return &sk->sk_cookie;
}
......
......@@ -28,22 +28,6 @@ static u32 unsupported_ops[] = {
static const struct btf_type *tcp_sock_type;
static u32 tcp_sock_id, sock_id;
static struct bpf_func_proto btf_sk_storage_get_proto __read_mostly;
static struct bpf_func_proto btf_sk_storage_delete_proto __read_mostly;
static void convert_sk_func_proto(struct bpf_func_proto *to, const struct bpf_func_proto *from)
{
int i;
*to = *from;
for (i = 0; i < ARRAY_SIZE(to->arg_type); i++) {
if (to->arg_type[i] == ARG_PTR_TO_SOCKET) {
to->arg_type[i] = ARG_PTR_TO_BTF_ID;
to->arg_btf_id[i] = &tcp_sock_id;
}
}
}
static int bpf_tcp_ca_init(struct btf *btf)
{
s32 type_id;
......@@ -59,9 +43,6 @@ static int bpf_tcp_ca_init(struct btf *btf)
tcp_sock_id = type_id;
tcp_sock_type = btf_type_by_id(btf, tcp_sock_id);
convert_sk_func_proto(&btf_sk_storage_get_proto, &bpf_sk_storage_get_proto);
convert_sk_func_proto(&btf_sk_storage_delete_proto, &bpf_sk_storage_delete_proto);
return 0;
}
......@@ -188,9 +169,9 @@ bpf_tcp_ca_get_func_proto(enum bpf_func_id func_id,
case BPF_FUNC_tcp_send_ack:
return &bpf_tcp_send_ack_proto;
case BPF_FUNC_sk_storage_get:
return &btf_sk_storage_get_proto;
return &bpf_sk_storage_get_proto;
case BPF_FUNC_sk_storage_delete:
return &btf_sk_storage_delete_proto;
return &bpf_sk_storage_delete_proto;
default:
return bpf_base_func_proto(func_id);
}
......
......@@ -703,6 +703,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
xs->pool = xp_create_and_assign_umem(xs,
umem_xs->umem);
if (!xs->pool) {
err = -ENOMEM;
sockfd_put(sock);
goto out_unlock;
}
......
......@@ -96,7 +96,7 @@ struct xsk_queue {
* seen and read by the consumer.
*
* The consumer peeks into the ring to see if the producer has written
* any new entries. If so, the producer can then read these entries
* any new entries. If so, the consumer can then read these entries
* and when it is done reading them release them back to the producer
* so that the producer can use these slots to fill in new entries.
*
......
......@@ -31,28 +31,30 @@ struct {
#define PARSE_IP 3
#define PARSE_IPV6 4
/* protocol dispatch routine.
* It tail-calls next BPF program depending on eth proto
* Note, we could have used:
* bpf_tail_call(skb, &jmp_table, proto);
* but it would need large prog_array
/* Protocol dispatch routine. It tail-calls next BPF program depending
* on eth proto. Note, we could have used ...
*
* bpf_tail_call(skb, &jmp_table, proto);
*
* ... but it would need large prog_array and cannot be optimised given
* the map key is not static.
*/
static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
{
switch (proto) {
case ETH_P_8021Q:
case ETH_P_8021AD:
bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN);
break;
case ETH_P_MPLS_UC:
case ETH_P_MPLS_MC:
bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS);
break;
case ETH_P_IP:
bpf_tail_call(skb, &jmp_table, PARSE_IP);
bpf_tail_call_static(skb, &jmp_table, PARSE_IP);
break;
case ETH_P_IPV6:
bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6);
break;
}
}
......
......@@ -433,6 +433,7 @@ class PrinterHelpers(Printer):
'struct sk_msg_md',
'struct xdp_md',
'struct path',
'struct btf_ptr',
]
known_types = {
'...',
......@@ -474,6 +475,7 @@ class PrinterHelpers(Printer):
'struct udp6_sock',
'struct task_struct',
'struct path',
'struct btf_ptr',
}
mapped_types = {
'u8': '__u8',
......
This diff is collapsed.
......@@ -98,19 +98,18 @@ PC_FILE = libbpf.pc
ifdef EXTRA_CFLAGS
CFLAGS := $(EXTRA_CFLAGS)
else
CFLAGS := -g -Wall
CFLAGS := -g -O2
endif
# Append required CFLAGS
override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum
override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
# flags specific for shared library
SHLIB_FLAGS := -DSHARED
SHLIB_FLAGS := -DSHARED -fPIC
ifeq ($(VERBOSE),1)
Q =
......
This diff is collapsed.
......@@ -174,8 +174,9 @@ struct bpf_link_create_opts {
__u32 flags;
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
__u32 target_btf_id;
};
#define bpf_link_create_opts__last_field iter_info_len
#define bpf_link_create_opts__last_field target_btf_id
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
......@@ -234,7 +235,7 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd);
LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf,
__u32 log_buf_size, bool do_log);
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
......@@ -251,6 +252,32 @@ struct bpf_prog_bind_opts {
LIBBPF_API int bpf_prog_bind_map(int prog_fd, int map_fd,
const struct bpf_prog_bind_opts *opts);
struct bpf_test_run_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
const void *data_in; /* optional */
void *data_out; /* optional */
__u32 data_size_in;
__u32 data_size_out; /* in: max length of data_out
* out: length of data_out
*/
const void *ctx_in; /* optional */
void *ctx_out; /* optional */
__u32 ctx_size_in;
__u32 ctx_size_out; /* in: max length of ctx_out
* out: length of cxt_out
*/
__u32 retval; /* out: return code of the BPF program */
int repeat;
__u32 duration; /* out: average per repetition in ns */
__u32 flags;
__u32 cpu;
};
#define bpf_test_run_opts__last_field cpu
LIBBPF_API int bpf_prog_test_run_opts(int prog_fd,
struct bpf_test_run_opts *opts);
#ifdef __cplusplus
} /* extern "C" */
#endif
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0 */
#define SOCKMAP_MAX_ENTRIES (64)
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment