Commit da4e023e authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-prog-stats'

Alexei Starovoitov says:

====================
Introduce per program stats to monitor the usage BPF.

v2->v3:
- rename to run_time_ns/run_cnt everywhere

v1->v2:
- fixed u64 stats on 32-bit archs. Thanks Eric
- use more verbose run_time_ns in json output as suggested by Andrii
- refactored prog_alloc and clarified behavior of stats in subprogs
====================
Acked-by: default avatarAndrii Nakryiko <andriin@fb.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 143bdc2e 88ad472b
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/rbtree_latch.h> #include <linux/rbtree_latch.h>
#include <linux/numa.h> #include <linux/numa.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/u64_stats_sync.h>
struct bpf_verifier_env; struct bpf_verifier_env;
struct perf_event; struct perf_event;
...@@ -340,6 +341,12 @@ enum bpf_cgroup_storage_type { ...@@ -340,6 +341,12 @@ enum bpf_cgroup_storage_type {
#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX #define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX
struct bpf_prog_stats {
u64 cnt;
u64 nsecs;
struct u64_stats_sync syncp;
};
struct bpf_prog_aux { struct bpf_prog_aux {
atomic_t refcnt; atomic_t refcnt;
u32 used_map_cnt; u32 used_map_cnt;
...@@ -389,6 +396,7 @@ struct bpf_prog_aux { ...@@ -389,6 +396,7 @@ struct bpf_prog_aux {
* main prog always has linfo_idx == 0 * main prog always has linfo_idx == 0
*/ */
u32 linfo_idx; u32 linfo_idx;
struct bpf_prog_stats __percpu *stats;
union { union {
struct work_struct work; struct work_struct work;
struct rcu_head rcu; struct rcu_head rcu;
...@@ -559,6 +567,7 @@ void bpf_map_area_free(void *base); ...@@ -559,6 +567,7 @@ void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
extern int sysctl_unprivileged_bpf_disabled; extern int sysctl_unprivileged_bpf_disabled;
extern int sysctl_bpf_stats_enabled;
int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_prog_new_fd(struct bpf_prog *prog);
......
...@@ -533,7 +533,24 @@ struct sk_filter { ...@@ -533,7 +533,24 @@ struct sk_filter {
struct bpf_prog *prog; struct bpf_prog *prog;
}; };
#define BPF_PROG_RUN(filter, ctx) ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); }) DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
#define BPF_PROG_RUN(prog, ctx) ({ \
u32 ret; \
cant_sleep(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
stats = this_cpu_ptr(prog->aux->stats); \
u64_stats_update_begin(&stats->syncp); \
stats->cnt++; \
stats->nsecs += sched_clock() - start; \
u64_stats_update_end(&stats->syncp); \
} else { \
ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
} \
ret; })
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
...@@ -764,6 +781,7 @@ void bpf_prog_free_jited_linfo(struct bpf_prog *prog); ...@@ -764,6 +781,7 @@ void bpf_prog_free_jited_linfo(struct bpf_prog *prog);
void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog); void bpf_prog_free_unused_jited_linfo(struct bpf_prog *prog);
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags); gfp_t gfp_extra_flags);
void __bpf_prog_free(struct bpf_prog *fp); void __bpf_prog_free(struct bpf_prog *fp);
......
...@@ -2813,6 +2813,8 @@ struct bpf_prog_info { ...@@ -2813,6 +2813,8 @@ struct bpf_prog_info {
__u32 jited_line_info_rec_size; __u32 jited_line_info_rec_size;
__u32 nr_prog_tags; __u32 nr_prog_tags;
__aligned_u64 prog_tags; __aligned_u64 prog_tags;
__u64 run_time_ns;
__u64 run_cnt;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_map_info { struct bpf_map_info {
......
...@@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns ...@@ -78,7 +78,7 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
return NULL; return NULL;
} }
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags)
{ {
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog_aux *aux; struct bpf_prog_aux *aux;
...@@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) ...@@ -104,6 +104,26 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
return fp; return fp;
} }
struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags;
struct bpf_prog *prog;
prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags);
if (!prog)
return NULL;
prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
if (!prog->aux->stats) {
kfree(prog->aux);
vfree(prog);
return NULL;
}
u64_stats_init(&prog->aux->stats->syncp);
return prog;
}
EXPORT_SYMBOL_GPL(bpf_prog_alloc); EXPORT_SYMBOL_GPL(bpf_prog_alloc);
int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog)
...@@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, ...@@ -231,7 +251,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
void __bpf_prog_free(struct bpf_prog *fp) void __bpf_prog_free(struct bpf_prog *fp)
{ {
if (fp->aux) {
free_percpu(fp->aux->stats);
kfree(fp->aux); kfree(fp->aux);
}
vfree(fp); vfree(fp);
} }
...@@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, ...@@ -2069,6 +2092,10 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
return -EFAULT; return -EFAULT;
} }
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
int sysctl_bpf_stats_enabled __read_mostly;
/* All definitions of tracepoints related to BPF. */ /* All definitions of tracepoints related to BPF. */
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <linux/bpf_trace.h> #include <linux/bpf_trace.h>
......
...@@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp) ...@@ -1283,24 +1283,54 @@ static int bpf_prog_release(struct inode *inode, struct file *filp)
return 0; return 0;
} }
static void bpf_prog_get_stats(const struct bpf_prog *prog,
struct bpf_prog_stats *stats)
{
u64 nsecs = 0, cnt = 0;
int cpu;
for_each_possible_cpu(cpu) {
const struct bpf_prog_stats *st;
unsigned int start;
u64 tnsecs, tcnt;
st = per_cpu_ptr(prog->aux->stats, cpu);
do {
start = u64_stats_fetch_begin_irq(&st->syncp);
tnsecs = st->nsecs;
tcnt = st->cnt;
} while (u64_stats_fetch_retry_irq(&st->syncp, start));
nsecs += tnsecs;
cnt += tcnt;
}
stats->nsecs = nsecs;
stats->cnt = cnt;
}
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp) static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
{ {
const struct bpf_prog *prog = filp->private_data; const struct bpf_prog *prog = filp->private_data;
char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
struct bpf_prog_stats stats;
bpf_prog_get_stats(prog, &stats);
bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
seq_printf(m, seq_printf(m,
"prog_type:\t%u\n" "prog_type:\t%u\n"
"prog_jited:\t%u\n" "prog_jited:\t%u\n"
"prog_tag:\t%s\n" "prog_tag:\t%s\n"
"memlock:\t%llu\n" "memlock:\t%llu\n"
"prog_id:\t%u\n", "prog_id:\t%u\n"
"run_time_ns:\t%llu\n"
"run_cnt:\t%llu\n",
prog->type, prog->type,
prog->jited, prog->jited,
prog_tag, prog_tag,
prog->pages * 1ULL << PAGE_SHIFT, prog->pages * 1ULL << PAGE_SHIFT,
prog->aux->id); prog->aux->id,
stats.nsecs,
stats.cnt);
} }
#endif #endif
...@@ -2122,6 +2152,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ...@@ -2122,6 +2152,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_prog_info __user *uinfo = u64_to_user_ptr(attr->info.info);
struct bpf_prog_info info = {}; struct bpf_prog_info info = {};
u32 info_len = attr->info.info_len; u32 info_len = attr->info.info_len;
struct bpf_prog_stats stats;
char __user *uinsns; char __user *uinsns;
u32 ulen; u32 ulen;
int err; int err;
...@@ -2161,6 +2192,10 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog, ...@@ -2161,6 +2192,10 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
if (err) if (err)
return err; return err;
bpf_prog_get_stats(prog, &stats);
info.run_time_ns = stats.nsecs;
info.run_cnt = stats.cnt;
if (!capable(CAP_SYS_ADMIN)) { if (!capable(CAP_SYS_ADMIN)) {
info.jited_prog_len = 0; info.jited_prog_len = 0;
info.xlated_prog_len = 0; info.xlated_prog_len = 0;
......
...@@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env) ...@@ -7320,7 +7320,12 @@ static int jit_subprogs(struct bpf_verifier_env *env)
subprog_end = env->subprog_info[i + 1].start; subprog_end = env->subprog_info[i + 1].start;
len = subprog_end - subprog_start; len = subprog_end - subprog_start;
func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); /* BPF_PROG_RUN doesn't call subprogs directly,
* hence main prog stats include the runtime of subprogs.
* subprogs don't have IDs and not reachable via prog_get_next_id
* func[i]->aux->stats will never be accessed and stays NULL
*/
func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
if (!func[i]) if (!func[i])
goto out_free; goto out_free;
memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
......
...@@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, ...@@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
#endif #endif
static int proc_dopipe_max_size(struct ctl_table *table, int write, static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos); void __user *buffer, size_t *lenp, loff_t *ppos);
static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos);
#ifdef CONFIG_MAGIC_SYSRQ #ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses its own private copy */ /* Note: sysrq code uses its own private copy */
...@@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = { ...@@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = {
.extra2 = &one, .extra2 = &one,
}, },
#endif #endif
{
.procname = "bpf_stats_enabled",
.data = &sysctl_bpf_stats_enabled,
.maxlen = sizeof(sysctl_bpf_stats_enabled),
.mode = 0644,
.proc_handler = proc_dointvec_minmax_bpf_stats,
.extra1 = &zero,
.extra2 = &one,
},
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
{ {
.procname = "panic_on_rcu_stall", .procname = "panic_on_rcu_stall",
...@@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, ...@@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
#endif /* CONFIG_PROC_SYSCTL */ #endif /* CONFIG_PROC_SYSCTL */
static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
int ret, bpf_stats = *(int *)table->data;
struct ctl_table tmp = *table;
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
tmp.data = &bpf_stats;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
*(int *)table->data = bpf_stats;
if (bpf_stats)
static_branch_enable(&bpf_stats_enabled_key);
else
static_branch_disable(&bpf_stats_enabled_key);
}
return ret;
}
/* /*
* No sense putting this after each symbol definition, twice, * No sense putting this after each symbol definition, twice,
* exception granted :-) * exception granted :-)
......
...@@ -171,7 +171,7 @@ EXAMPLES ...@@ -171,7 +171,7 @@ EXAMPLES
:: ::
10: xdp name some_prog tag 005a3d2123620c8b gpl 10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10
loaded_at 2017-09-29T20:11:00+0000 uid 0 loaded_at 2017-09-29T20:11:00+0000 uid 0
xlated 528B jited 370B memlock 4096B map_ids 10 xlated 528B jited 370B memlock 4096B map_ids 10
...@@ -184,6 +184,8 @@ EXAMPLES ...@@ -184,6 +184,8 @@ EXAMPLES
"type": "xdp", "type": "xdp",
"tag": "005a3d2123620c8b", "tag": "005a3d2123620c8b",
"gpl_compatible": true, "gpl_compatible": true,
"run_time_ns": 81632,
"run_cnt": 10,
"loaded_at": 1506715860, "loaded_at": 1506715860,
"uid": 0, "uid": 0,
"bytes_xlated": 528, "bytes_xlated": 528,
......
...@@ -214,6 +214,10 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) ...@@ -214,6 +214,10 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
info->tag[4], info->tag[5], info->tag[6], info->tag[7]); info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible); jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
if (info->run_time_ns) {
jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns);
jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt);
}
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
...@@ -277,6 +281,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) ...@@ -277,6 +281,9 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
fprint_hex(stdout, info->tag, BPF_TAG_SIZE, ""); fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("%s", info->gpl_compatible ? " gpl" : ""); printf("%s", info->gpl_compatible ? " gpl" : "");
if (info->run_time_ns)
printf(" run_time_ns %lld run_cnt %lld",
info->run_time_ns, info->run_cnt);
printf("\n"); printf("\n");
if (info->load_time) { if (info->load_time) {
......
...@@ -2813,6 +2813,8 @@ struct bpf_prog_info { ...@@ -2813,6 +2813,8 @@ struct bpf_prog_info {
__u32 jited_line_info_rec_size; __u32 jited_line_info_rec_size;
__u32 nr_prog_tags; __u32 nr_prog_tags;
__aligned_u64 prog_tags; __aligned_u64 prog_tags;
__u64 run_time_ns;
__u64 run_cnt;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
struct bpf_map_info { struct bpf_map_info {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment