Commit a10423b8 authored by Alexei Starovoitov's avatar Alexei Starovoitov Committed by David S. Miller

bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map

Primary use case is a histogram array of latency
where bpf program computes the latency of block requests or other
events and stores histogram of latency into array of 64 elements.
All cpus are constantly running, so normal increment is not accurate,
bpf_xadd causes cache ping-pong and this per-cpu approach allows
fastest collision-free counters.
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 824bd0ce
...@@ -151,6 +151,7 @@ struct bpf_array { ...@@ -151,6 +151,7 @@ struct bpf_array {
union { union {
char value[0] __aligned(8); char value[0] __aligned(8);
void *ptrs[0] __aligned(8); void *ptrs[0] __aligned(8);
void __percpu *pptrs[0] __aligned(8);
}; };
}; };
#define MAX_TAIL_CALL_CNT 32 #define MAX_TAIL_CALL_CNT 32
......
...@@ -82,6 +82,7 @@ enum bpf_map_type { ...@@ -82,6 +82,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_PROG_ARRAY, BPF_MAP_TYPE_PROG_ARRAY,
BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_MAP_TYPE_PERF_EVENT_ARRAY,
BPF_MAP_TYPE_PERCPU_HASH, BPF_MAP_TYPE_PERCPU_HASH,
BPF_MAP_TYPE_PERCPU_ARRAY,
}; };
enum bpf_prog_type { enum bpf_prog_type {
......
...@@ -17,11 +17,39 @@ ...@@ -17,11 +17,39 @@
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
static void bpf_array_free_percpu(struct bpf_array *array)
{
int i;
for (i = 0; i < array->map.max_entries; i++)
free_percpu(array->pptrs[i]);
}
static int bpf_array_alloc_percpu(struct bpf_array *array)
{
void __percpu *ptr;
int i;
for (i = 0; i < array->map.max_entries; i++) {
ptr = __alloc_percpu_gfp(array->elem_size, 8,
GFP_USER | __GFP_NOWARN);
if (!ptr) {
bpf_array_free_percpu(array);
return -ENOMEM;
}
array->pptrs[i] = ptr;
}
return 0;
}
/* Called from syscall */ /* Called from syscall */
static struct bpf_map *array_map_alloc(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr)
{ {
bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
struct bpf_array *array; struct bpf_array *array;
u32 elem_size, array_size; u64 array_size;
u32 elem_size;
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
...@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -36,12 +64,16 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
elem_size = round_up(attr->value_size, 8); elem_size = round_up(attr->value_size, 8);
/* check round_up into zero and u32 overflow */ array_size = sizeof(*array);
if (elem_size == 0 || if (percpu)
attr->max_entries > (U32_MAX - PAGE_SIZE - sizeof(*array)) / elem_size) array_size += (u64) attr->max_entries * sizeof(void *);
else
array_size += (u64) attr->max_entries * elem_size;
/* make sure there is no u32 overflow later in round_up() */
if (array_size >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
array_size = sizeof(*array) + attr->max_entries * elem_size;
/* allocate all map elements and zero-initialize them */ /* allocate all map elements and zero-initialize them */
array = kzalloc(array_size, GFP_USER | __GFP_NOWARN); array = kzalloc(array_size, GFP_USER | __GFP_NOWARN);
...@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -52,12 +84,25 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
} }
/* copy mandatory map attributes */ /* copy mandatory map attributes */
array->map.map_type = attr->map_type;
array->map.key_size = attr->key_size; array->map.key_size = attr->key_size;
array->map.value_size = attr->value_size; array->map.value_size = attr->value_size;
array->map.max_entries = attr->max_entries; array->map.max_entries = attr->max_entries;
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
array->elem_size = elem_size; array->elem_size = elem_size;
if (!percpu)
goto out;
array_size += (u64) attr->max_entries * elem_size * num_possible_cpus();
if (array_size >= U32_MAX - PAGE_SIZE ||
elem_size > PCPU_MIN_UNIT_SIZE || bpf_array_alloc_percpu(array)) {
kvfree(array);
return ERR_PTR(-ENOMEM);
}
out:
array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT;
return &array->map; return &array->map;
} }
...@@ -67,12 +112,24 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -67,12 +112,24 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key)
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key; u32 index = *(u32 *)key;
if (index >= array->map.max_entries) if (unlikely(index >= array->map.max_entries))
return NULL; return NULL;
return array->value + array->elem_size * index; return array->value + array->elem_size * index;
} }
/* Called from eBPF program */
static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key;
if (unlikely(index >= array->map.max_entries))
return NULL;
return this_cpu_ptr(array->pptrs[index]);
}
/* Called from syscall */ /* Called from syscall */
static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key) static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
{ {
...@@ -99,19 +156,24 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -99,19 +156,24 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_array *array = container_of(map, struct bpf_array, map);
u32 index = *(u32 *)key; u32 index = *(u32 *)key;
if (map_flags > BPF_EXIST) if (unlikely(map_flags > BPF_EXIST))
/* unknown flags */ /* unknown flags */
return -EINVAL; return -EINVAL;
if (index >= array->map.max_entries) if (unlikely(index >= array->map.max_entries))
/* all elements were pre-allocated, cannot insert a new one */ /* all elements were pre-allocated, cannot insert a new one */
return -E2BIG; return -E2BIG;
if (map_flags == BPF_NOEXIST) if (unlikely(map_flags == BPF_NOEXIST))
/* all elements already exist */ /* all elements already exist */
return -EEXIST; return -EEXIST;
memcpy(array->value + array->elem_size * index, value, map->value_size); if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
memcpy(this_cpu_ptr(array->pptrs[index]),
value, map->value_size);
else
memcpy(array->value + array->elem_size * index,
value, map->value_size);
return 0; return 0;
} }
...@@ -133,6 +195,9 @@ static void array_map_free(struct bpf_map *map) ...@@ -133,6 +195,9 @@ static void array_map_free(struct bpf_map *map)
*/ */
synchronize_rcu(); synchronize_rcu();
if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
kvfree(array); kvfree(array);
} }
...@@ -150,9 +215,24 @@ static struct bpf_map_type_list array_type __read_mostly = { ...@@ -150,9 +215,24 @@ static struct bpf_map_type_list array_type __read_mostly = {
.type = BPF_MAP_TYPE_ARRAY, .type = BPF_MAP_TYPE_ARRAY,
}; };
static const struct bpf_map_ops percpu_array_ops = {
.map_alloc = array_map_alloc,
.map_free = array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = percpu_array_map_lookup_elem,
.map_update_elem = array_map_update_elem,
.map_delete_elem = array_map_delete_elem,
};
static struct bpf_map_type_list percpu_array_type __read_mostly = {
.ops = &percpu_array_ops,
.type = BPF_MAP_TYPE_PERCPU_ARRAY,
};
static int __init register_array_map(void) static int __init register_array_map(void)
{ {
bpf_register_map_type(&array_type); bpf_register_map_type(&array_type);
bpf_register_map_type(&percpu_array_type);
return 0; return 0;
} }
late_initcall(register_array_map); late_initcall(register_array_map);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment