Commit b625030c authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf: add percpu stats for bpf_map'

Anton Protopopov says:

====================
This series adds a mechanism for maps to populate per-cpu counters on
insertions/deletions. The sum of these counters can be accessed by a new kfunc
from map iterator and tracing programs.

The following patches are present in the series:

  * Patch 1 adds a generic per-cpu counter to struct bpf_map
  * Patch 2 adds a new kfunc to access the sum of per-cpu counters
  * Patch 3 utilizes this mechanism for hash-based maps
  * Patch 4 extends the preloaded map iterator to dump the sum
  * Patch 5 adds a self-test for the change

The reason for adding this functionality in our case (Cilium) is to get signals
about how full some heavy-used maps are and what the actual dynamic profile of
map capacity is. In the case of LRU maps this is impossible to get this
information anyhow else. The original presentation can be found here [1].

  [1] https://lpc.events/event/16/contributions/1368/

v4 -> v5:
* don't pass useless empty opts when creating a link, pass NULL (Hou)
* add a debug message (Hou)
* make code more readable (Alexei)
* remove the selftest which only checked that elem_count != NULL

v3 -> v4:
* fix selftests:
  * added test code for batch map operations
  * added a test for BPF_MAP_TYPE_HASH_OF_MAPS (Hou)
  * added tests for BPF_MAP_TYPE_LRU* with BPF_F_NO_COMMON_LRU (Hou)
  * map_info was called multiple times unnecessarily (Hou)
  * small fixes + some memory leaks (Hou)
* fixed wrong error path for freeing a non-prealloc map (Hou)
* fixed counters for batch delete operations (Hou)

v2 -> v3:
- split commits to better represent update logic (Alexei)
- remove filter from kfunc to allow all tracing programs (Alexei)
- extend selftests (Alexei)

v1 -> v2:
- make the counters generic part of struct bpf_map (Alexei)
- don't use map_info and /proc/self/fdinfo in favor of a kfunc (Alexei)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents fd283ab1 6c1b8cb6
......@@ -275,6 +275,7 @@ struct bpf_map {
} owner;
bool bypass_spec_v1;
bool frozen; /* write-once; write-protected by freeze_mutex */
s64 __percpu *elem_count;
};
static inline const char *btf_field_type_name(enum btf_field_type type)
......@@ -2040,6 +2041,35 @@ bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, size_t align,
}
#endif
static inline int
bpf_map_init_elem_count(struct bpf_map *map)
{
size_t size = sizeof(*map->elem_count), align = size;
gfp_t flags = GFP_USER | __GFP_NOWARN;
map->elem_count = bpf_map_alloc_percpu(map, size, align, flags);
if (!map->elem_count)
return -ENOMEM;
return 0;
}
static inline void
bpf_map_free_elem_count(struct bpf_map *map)
{
free_percpu(map->elem_count);
}
static inline void bpf_map_inc_elem_count(struct bpf_map *map)
{
this_cpu_inc(*map->elem_count);
}
static inline void bpf_map_dec_elem_count(struct bpf_map *map)
{
this_cpu_dec(*map->elem_count);
}
extern int sysctl_unprivileged_bpf_disabled;
static inline bool bpf_allow_ptr_leaks(void)
......
......@@ -302,6 +302,7 @@ static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
struct htab_elem *l;
if (node) {
bpf_map_inc_elem_count(&htab->map);
l = container_of(node, struct htab_elem, lru_node);
memcpy(l->key, key, htab->map.key_size);
return l;
......@@ -510,12 +511,16 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
htab->n_buckets > U32_MAX / sizeof(struct bucket))
goto free_htab;
err = bpf_map_init_elem_count(&htab->map);
if (err)
goto free_htab;
err = -ENOMEM;
htab->buckets = bpf_map_area_alloc(htab->n_buckets *
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
goto free_htab;
goto free_elem_count;
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++) {
htab->map_locked[i] = bpf_map_alloc_percpu(&htab->map,
......@@ -593,6 +598,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
bpf_mem_alloc_destroy(&htab->ma);
free_elem_count:
bpf_map_free_elem_count(&htab->map);
free_htab:
lockdep_unregister_key(&htab->lockdep_key);
bpf_map_area_free(htab);
......@@ -804,6 +811,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
if (l == tgt_l) {
hlist_nulls_del_rcu(&l->hash_node);
check_and_free_fields(htab, l);
bpf_map_dec_elem_count(&htab->map);
break;
}
......@@ -900,6 +908,8 @@ static bool is_map_full(struct bpf_htab *htab)
static void inc_elem_count(struct bpf_htab *htab)
{
bpf_map_inc_elem_count(&htab->map);
if (htab->use_percpu_counter)
percpu_counter_add_batch(&htab->pcount, 1, PERCPU_COUNTER_BATCH);
else
......@@ -908,6 +918,8 @@ static void inc_elem_count(struct bpf_htab *htab)
static void dec_elem_count(struct bpf_htab *htab)
{
bpf_map_dec_elem_count(&htab->map);
if (htab->use_percpu_counter)
percpu_counter_add_batch(&htab->pcount, -1, PERCPU_COUNTER_BATCH);
else
......@@ -920,6 +932,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
htab_put_fd_value(htab, l);
if (htab_is_prealloc(htab)) {
bpf_map_dec_elem_count(&htab->map);
check_and_free_fields(htab, l);
__pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
......@@ -1000,6 +1013,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
if (!l)
return ERR_PTR(-E2BIG);
l_new = container_of(l, struct htab_elem, fnode);
bpf_map_inc_elem_count(&htab->map);
}
} else {
if (is_map_full(htab))
......@@ -1168,6 +1182,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
{
check_and_free_fields(htab, elem);
bpf_map_dec_elem_count(&htab->map);
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}
......@@ -1357,8 +1372,10 @@ static long __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
err:
htab_unlock_bucket(htab, b, hash, flags);
err_lock_bucket:
if (l_new)
if (l_new) {
bpf_map_dec_elem_count(&htab->map);
bpf_lru_push_free(&htab->lru, &l_new->lru_node);
}
return ret;
}
......@@ -1523,6 +1540,7 @@ static void htab_map_free(struct bpf_map *map)
prealloc_destroy(htab);
}
bpf_map_free_elem_count(map);
free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets);
bpf_mem_alloc_destroy(&htab->pcpu_ma);
......
......@@ -93,7 +93,7 @@ static struct bpf_iter_reg bpf_map_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map, map),
PTR_TO_BTF_ID_OR_NULL },
PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.seq_info = &bpf_map_seq_info,
};
......@@ -193,3 +193,40 @@ static int __init bpf_map_iter_init(void)
}
late_initcall(bpf_map_iter_init);
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
__bpf_kfunc s64 bpf_map_sum_elem_count(struct bpf_map *map)
{
s64 *pcount;
s64 ret = 0;
int cpu;
if (!map || !map->elem_count)
return 0;
for_each_possible_cpu(cpu) {
pcount = per_cpu_ptr(map->elem_count, cpu);
ret += READ_ONCE(*pcount);
}
return ret;
}
__diag_pop();
BTF_SET8_START(bpf_map_iter_kfunc_ids)
BTF_ID_FLAGS(func, bpf_map_sum_elem_count, KF_TRUSTED_ARGS)
BTF_SET8_END(bpf_map_iter_kfunc_ids)
static const struct btf_kfunc_id_set bpf_map_iter_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_map_iter_kfunc_ids,
};
static int init_subsystem(void)
{
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_map_iter_kfunc_set);
}
late_initcall(init_subsystem);
......@@ -73,6 +73,8 @@ static const char *get_name(struct btf *btf, long btf_id, const char *fallback)
return str + name_off;
}
__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{
......@@ -84,9 +86,12 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
return 0;
if (seq_num == 0)
BPF_SEQ_PRINTF(seq, " id name max_entries\n");
BPF_SEQ_PRINTF(seq, " id name max_entries cur_entries\n");
BPF_SEQ_PRINTF(seq, "%4u %-16s %10d %10lld\n",
map->id, map->name, map->max_entries,
bpf_map_sum_elem_count(map));
BPF_SEQ_PRINTF(seq, "%4u %-16s%6d\n", map->id, map->name, map->max_entries);
return 0;
}
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Isovalent */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
__u32 target_id;
__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct bpf_map *map = ctx->map;
if (map && map->id == target_id)
BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
return 0;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment