Commit f14b488d authored by David S. Miller's avatar David S. Miller

Merge branch 'bpf-map-prealloc'

Alexei Starovoitov says:

====================
bpf: map pre-alloc

v1->v2:
. fix few issues spotted by Daniel
. converted stackmap into pre-allocation as well
. added a workaround for lockdep false positive
. added pcpu_freelist_populate to be used by hashmap and stackmap

this path set switches bpf hash map to use pre-allocation by default
and introduces BPF_F_NO_PREALLOC flag to keep old behavior for cases
where full map pre-allocation is too memory expensive.

Some time back Daniel Wagner reported crashes when bpf hash map is
used to compute time intervals between preempt_disable->preempt_enable
and recently Tom Zanussi reported a dead lock in iovisor/bcc/funccount
tool if it's used to count the number of invocations of kernel
'*spin*' functions. Both problems are due to the recursive use of
slub and can only be solved by pre-allocating all map elements.

A lot of different solutions were considered. Many implemented,
but at the end pre-allocation seems to be the only feasible answer.
As far as pre-allocation goes it also was implemented 4 different ways:
- simple free-list with single lock
- percpu_ida with optimizations
- blk-mq-tag variant customized for bpf use case
- percpu_freelist
For bpf style of alloc/free patterns percpu_freelist is the best
and implemented in this patch set.
Detailed performance numbers in patch 3.
Patch 2 introduces percpu_freelist
Patch 1 fixes simple deadlocks due to missing recursion checks
Patch 5: converts stackmap to pre-allocation
Patches 6-9: prepare test infra
Patch 10: stress test for hash map infra. It attaches to spin_lock
functions and bpf_map_update/delete are called from different contexts
Patch 11: stress for bpf_get_stackid
Patch 12: map performance test
Reported-by: default avatarDaniel Wagner <daniel.wagner@bmw-carit.de>
Reported-by: default avatarTom Zanussi <tom.zanussi@linux.intel.com>
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 8aba8b83 c3f85cff
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <uapi/linux/bpf.h> #include <uapi/linux/bpf.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/percpu.h>
struct bpf_map; struct bpf_map;
...@@ -36,6 +37,7 @@ struct bpf_map { ...@@ -36,6 +37,7 @@ struct bpf_map {
u32 key_size; u32 key_size;
u32 value_size; u32 value_size;
u32 max_entries; u32 max_entries;
u32 map_flags;
u32 pages; u32 pages;
struct user_struct *user; struct user_struct *user;
const struct bpf_map_ops *ops; const struct bpf_map_ops *ops;
...@@ -163,6 +165,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f ...@@ -163,6 +165,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
const struct bpf_func_proto *bpf_get_trace_printk_proto(void); const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
#ifdef CONFIG_BPF_SYSCALL #ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_prog_type(struct bpf_prog_type_list *tl);
void bpf_register_map_type(struct bpf_map_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl);
...@@ -175,6 +179,7 @@ struct bpf_map *__bpf_map_get(struct fd f); ...@@ -175,6 +179,7 @@ struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map); void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
extern int sysctl_unprivileged_bpf_disabled; extern int sysctl_unprivileged_bpf_disabled;
...@@ -190,6 +195,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, ...@@ -190,6 +195,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags); u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags); u64 flags);
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters. * forced to use 'long' read/writes to try to atomically copy long counters.
......
...@@ -101,12 +101,15 @@ enum bpf_prog_type { ...@@ -101,12 +101,15 @@ enum bpf_prog_type {
#define BPF_NOEXIST 1 /* create new element if it didn't exist */ #define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */ #define BPF_EXIST 2 /* update existing element */
#define BPF_F_NO_PREALLOC (1U << 0)
union bpf_attr { union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */ struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */ __u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */ __u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */ __u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */ __u32 max_entries; /* max number of entries in a map */
__u32 map_flags; /* prealloc or not */
}; };
struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
......
obj-y := core.o obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
ifeq ($(CONFIG_PERF_EVENTS),y) ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif endif
...@@ -53,7 +53,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) ...@@ -53,7 +53,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0) attr->value_size == 0 || attr->map_flags)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1)) if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
......
This diff is collapsed.
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include "percpu_freelist.h"
int pcpu_freelist_init(struct pcpu_freelist *s)
{
int cpu;
s->freelist = alloc_percpu(struct pcpu_freelist_head);
if (!s->freelist)
return -ENOMEM;
for_each_possible_cpu(cpu) {
struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu);
raw_spin_lock_init(&head->lock);
head->first = NULL;
}
return 0;
}
void pcpu_freelist_destroy(struct pcpu_freelist *s)
{
free_percpu(s->freelist);
}
static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
struct pcpu_freelist_node *node)
{
raw_spin_lock(&head->lock);
node->next = head->first;
head->first = node;
raw_spin_unlock(&head->lock);
}
void pcpu_freelist_push(struct pcpu_freelist *s,
struct pcpu_freelist_node *node)
{
struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
__pcpu_freelist_push(head, node);
}
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
u32 nr_elems)
{
struct pcpu_freelist_head *head;
unsigned long flags;
int i, cpu, pcpu_entries;
pcpu_entries = nr_elems / num_possible_cpus() + 1;
i = 0;
/* disable irq to workaround lockdep false positive
* in bpf usage pcpu_freelist_populate() will never race
* with pcpu_freelist_push()
*/
local_irq_save(flags);
for_each_possible_cpu(cpu) {
again:
head = per_cpu_ptr(s->freelist, cpu);
__pcpu_freelist_push(head, buf);
i++;
buf += elem_size;
if (i == nr_elems)
break;
if (i % pcpu_entries)
goto again;
}
local_irq_restore(flags);
}
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
{
struct pcpu_freelist_head *head;
struct pcpu_freelist_node *node;
int orig_cpu, cpu;
orig_cpu = cpu = raw_smp_processor_id();
while (1) {
head = per_cpu_ptr(s->freelist, cpu);
raw_spin_lock(&head->lock);
node = head->first;
if (node) {
head->first = node->next;
raw_spin_unlock(&head->lock);
return node;
}
raw_spin_unlock(&head->lock);
cpu = cpumask_next(cpu, cpu_possible_mask);
if (cpu >= nr_cpu_ids)
cpu = 0;
if (cpu == orig_cpu)
return NULL;
}
}
/* Copyright (c) 2016 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#ifndef __PERCPU_FREELIST_H__
#define __PERCPU_FREELIST_H__
#include <linux/spinlock.h>
#include <linux/percpu.h>
struct pcpu_freelist_head {
struct pcpu_freelist_node *first;
raw_spinlock_t lock;
};
struct pcpu_freelist {
struct pcpu_freelist_head __percpu *freelist;
};
struct pcpu_freelist_node {
struct pcpu_freelist_node *next;
};
void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
u32 nr_elems);
int pcpu_freelist_init(struct pcpu_freelist *);
void pcpu_freelist_destroy(struct pcpu_freelist *s);
#endif
...@@ -10,9 +10,10 @@ ...@@ -10,9 +10,10 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/stacktrace.h> #include <linux/stacktrace.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include "percpu_freelist.h"
struct stack_map_bucket { struct stack_map_bucket {
struct rcu_head rcu; struct pcpu_freelist_node fnode;
u32 hash; u32 hash;
u32 nr; u32 nr;
u64 ip[]; u64 ip[];
...@@ -20,10 +21,34 @@ struct stack_map_bucket { ...@@ -20,10 +21,34 @@ struct stack_map_bucket {
struct bpf_stack_map { struct bpf_stack_map {
struct bpf_map map; struct bpf_map map;
void *elems;
struct pcpu_freelist freelist;
u32 n_buckets; u32 n_buckets;
struct stack_map_bucket __rcu *buckets[]; struct stack_map_bucket *buckets[];
}; };
static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
{
u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size;
int err;
smap->elems = vzalloc(elem_size * smap->map.max_entries);
if (!smap->elems)
return -ENOMEM;
err = pcpu_freelist_init(&smap->freelist);
if (err)
goto free_elems;
pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
smap->map.max_entries);
return 0;
free_elems:
vfree(smap->elems);
return err;
}
/* Called from syscall */ /* Called from syscall */
static struct bpf_map *stack_map_alloc(union bpf_attr *attr) static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{ {
...@@ -35,6 +60,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) ...@@ -35,6 +60,9 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM); return ERR_PTR(-EPERM);
if (attr->map_flags)
return ERR_PTR(-EINVAL);
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
value_size < 8 || value_size % 8 || value_size < 8 || value_size % 8 ||
...@@ -67,12 +95,22 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr) ...@@ -67,12 +95,22 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
smap->n_buckets = n_buckets; smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;
err = bpf_map_precharge_memlock(smap->map.pages);
if (err)
goto free_smap;
err = get_callchain_buffers(); err = get_callchain_buffers();
if (err) if (err)
goto free_smap; goto free_smap;
err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;
return &smap->map; return &smap->map;
put_buffers:
put_callchain_buffers();
free_smap: free_smap:
kvfree(smap); kvfree(smap);
return ERR_PTR(err); return ERR_PTR(err);
...@@ -118,7 +156,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) ...@@ -118,7 +156,7 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
ips = trace->ip + skip + init_nr; ips = trace->ip + skip + init_nr;
hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0); hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
id = hash & (smap->n_buckets - 1); id = hash & (smap->n_buckets - 1);
bucket = rcu_dereference(smap->buckets[id]); bucket = READ_ONCE(smap->buckets[id]);
if (bucket && bucket->hash == hash) { if (bucket && bucket->hash == hash) {
if (flags & BPF_F_FAST_STACK_CMP) if (flags & BPF_F_FAST_STACK_CMP)
...@@ -132,19 +170,18 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) ...@@ -132,19 +170,18 @@ static u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5)
if (bucket && !(flags & BPF_F_REUSE_STACKID)) if (bucket && !(flags & BPF_F_REUSE_STACKID))
return -EEXIST; return -EEXIST;
new_bucket = kmalloc(sizeof(struct stack_map_bucket) + map->value_size, new_bucket = (struct stack_map_bucket *)
GFP_ATOMIC | __GFP_NOWARN); pcpu_freelist_pop(&smap->freelist);
if (unlikely(!new_bucket)) if (unlikely(!new_bucket))
return -ENOMEM; return -ENOMEM;
memcpy(new_bucket->ip, ips, trace_len); memcpy(new_bucket->ip, ips, trace_len);
memset(new_bucket->ip + trace_len / 8, 0, map->value_size - trace_len);
new_bucket->hash = hash; new_bucket->hash = hash;
new_bucket->nr = trace_nr; new_bucket->nr = trace_nr;
old_bucket = xchg(&smap->buckets[id], new_bucket); old_bucket = xchg(&smap->buckets[id], new_bucket);
if (old_bucket) if (old_bucket)
kfree_rcu(old_bucket, rcu); pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
return id; return id;
} }
...@@ -157,17 +194,34 @@ const struct bpf_func_proto bpf_get_stackid_proto = { ...@@ -157,17 +194,34 @@ const struct bpf_func_proto bpf_get_stackid_proto = {
.arg3_type = ARG_ANYTHING, .arg3_type = ARG_ANYTHING,
}; };
/* Called from syscall or from eBPF program */ /* Called from eBPF program */
static void *stack_map_lookup_elem(struct bpf_map *map, void *key) static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
{
return NULL;
}
/* Called from syscall */
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
{ {
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
struct stack_map_bucket *bucket; struct stack_map_bucket *bucket, *old_bucket;
u32 id = *(u32 *)key; u32 id = *(u32 *)key, trace_len;
if (unlikely(id >= smap->n_buckets)) if (unlikely(id >= smap->n_buckets))
return NULL; return -ENOENT;
bucket = rcu_dereference(smap->buckets[id]);
return bucket ? bucket->ip : NULL; bucket = xchg(&smap->buckets[id], NULL);
if (!bucket)
return -ENOENT;
trace_len = bucket->nr * sizeof(u64);
memcpy(value, bucket->ip, trace_len);
memset(value + trace_len, 0, map->value_size - trace_len);
old_bucket = xchg(&smap->buckets[id], bucket);
if (old_bucket)
pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
return 0;
} }
static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key) static int stack_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
...@@ -193,7 +247,7 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key) ...@@ -193,7 +247,7 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
old_bucket = xchg(&smap->buckets[id], NULL); old_bucket = xchg(&smap->buckets[id], NULL);
if (old_bucket) { if (old_bucket) {
kfree_rcu(old_bucket, rcu); pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
return 0; return 0;
} else { } else {
return -ENOENT; return -ENOENT;
...@@ -204,13 +258,12 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key) ...@@ -204,13 +258,12 @@ static int stack_map_delete_elem(struct bpf_map *map, void *key)
static void stack_map_free(struct bpf_map *map) static void stack_map_free(struct bpf_map *map)
{ {
struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
int i;
/* wait for bpf programs to complete before freeing stack map */
synchronize_rcu(); synchronize_rcu();
for (i = 0; i < smap->n_buckets; i++) vfree(smap->elems);
if (smap->buckets[i]) pcpu_freelist_destroy(&smap->freelist);
kfree_rcu(smap->buckets[i], rcu);
kvfree(smap); kvfree(smap);
put_callchain_buffers(); put_callchain_buffers();
} }
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/version.h> #include <linux/version.h>
DEFINE_PER_CPU(int, bpf_prog_active);
int sysctl_unprivileged_bpf_disabled __read_mostly; int sysctl_unprivileged_bpf_disabled __read_mostly;
static LIST_HEAD(bpf_map_types); static LIST_HEAD(bpf_map_types);
...@@ -46,6 +48,19 @@ void bpf_register_map_type(struct bpf_map_type_list *tl) ...@@ -46,6 +48,19 @@ void bpf_register_map_type(struct bpf_map_type_list *tl)
list_add(&tl->list_node, &bpf_map_types); list_add(&tl->list_node, &bpf_map_types);
} }
int bpf_map_precharge_memlock(u32 pages)
{
struct user_struct *user = get_current_user();
unsigned long memlock_limit, cur;
memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
cur = atomic_long_read(&user->locked_vm);
free_uid(user);
if (cur + pages > memlock_limit)
return -EPERM;
return 0;
}
static int bpf_map_charge_memlock(struct bpf_map *map) static int bpf_map_charge_memlock(struct bpf_map *map)
{ {
struct user_struct *user = get_current_user(); struct user_struct *user = get_current_user();
...@@ -151,7 +166,7 @@ int bpf_map_new_fd(struct bpf_map *map) ...@@ -151,7 +166,7 @@ int bpf_map_new_fd(struct bpf_map *map)
offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
sizeof(attr->CMD##_LAST_FIELD)) != NULL sizeof(attr->CMD##_LAST_FIELD)) != NULL
#define BPF_MAP_CREATE_LAST_FIELD max_entries #define BPF_MAP_CREATE_LAST_FIELD map_flags
/* called via syscall */ /* called via syscall */
static int map_create(union bpf_attr *attr) static int map_create(union bpf_attr *attr)
{ {
...@@ -275,6 +290,8 @@ static int map_lookup_elem(union bpf_attr *attr) ...@@ -275,6 +290,8 @@ static int map_lookup_elem(union bpf_attr *attr)
err = bpf_percpu_hash_copy(map, key, value); err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value); err = bpf_percpu_array_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_copy(map, key, value);
} else { } else {
rcu_read_lock(); rcu_read_lock();
ptr = map->ops->map_lookup_elem(map, key); ptr = map->ops->map_lookup_elem(map, key);
...@@ -347,6 +364,11 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -347,6 +364,11 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(value, uvalue, value_size) != 0) if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value; goto free_value;
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
* inside bpf map update or delete otherwise deadlocks are possible
*/
preempt_disable();
__this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
err = bpf_percpu_hash_update(map, key, value, attr->flags); err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
...@@ -356,6 +378,8 @@ static int map_update_elem(union bpf_attr *attr) ...@@ -356,6 +378,8 @@ static int map_update_elem(union bpf_attr *attr)
err = map->ops->map_update_elem(map, key, value, attr->flags); err = map->ops->map_update_elem(map, key, value, attr->flags);
rcu_read_unlock(); rcu_read_unlock();
} }
__this_cpu_dec(bpf_prog_active);
preempt_enable();
free_value: free_value:
kfree(value); kfree(value);
...@@ -394,9 +418,13 @@ static int map_delete_elem(union bpf_attr *attr) ...@@ -394,9 +418,13 @@ static int map_delete_elem(union bpf_attr *attr)
if (copy_from_user(key, ukey, map->key_size) != 0) if (copy_from_user(key, ukey, map->key_size) != 0)
goto free_key; goto free_key;
preempt_disable();
__this_cpu_inc(bpf_prog_active);
rcu_read_lock(); rcu_read_lock();
err = map->ops->map_delete_elem(map, key); err = map->ops->map_delete_elem(map, key);
rcu_read_unlock(); rcu_read_unlock();
__this_cpu_dec(bpf_prog_active);
preempt_enable();
free_key: free_key:
kfree(key); kfree(key);
......
...@@ -13,8 +13,6 @@ ...@@ -13,8 +13,6 @@
#include <linux/ctype.h> #include <linux/ctype.h>
#include "trace.h" #include "trace.h"
static DEFINE_PER_CPU(int, bpf_prog_active);
/** /**
* trace_call_bpf - invoke BPF program * trace_call_bpf - invoke BPF program
* @prog: BPF program * @prog: BPF program
......
...@@ -61,6 +61,7 @@ struct bpf_map_def { ...@@ -61,6 +61,7 @@ struct bpf_map_def {
unsigned int key_size; unsigned int key_size;
unsigned int value_size; unsigned int value_size;
unsigned int max_entries; unsigned int max_entries;
unsigned int map_flags;
}; };
static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) = static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
......
...@@ -157,9 +157,13 @@ static int load_maps(struct bpf_map_def *maps, int len) ...@@ -157,9 +157,13 @@ static int load_maps(struct bpf_map_def *maps, int len)
map_fd[i] = bpf_create_map(maps[i].type, map_fd[i] = bpf_create_map(maps[i].type,
maps[i].key_size, maps[i].key_size,
maps[i].value_size, maps[i].value_size,
maps[i].max_entries); maps[i].max_entries,
if (map_fd[i] < 0) maps[i].map_flags);
if (map_fd[i] < 0) {
printf("failed to create a map: %d %s\n",
errno, strerror(errno));
return 1; return 1;
}
if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY) if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i]; prog_array_fd = map_fd[i];
...@@ -343,3 +347,65 @@ void read_trace_pipe(void) ...@@ -343,3 +347,65 @@ void read_trace_pipe(void)
} }
} }
} }
#define MAX_SYMS 300000
static struct ksym syms[MAX_SYMS];
static int sym_cnt;
static int ksym_cmp(const void *p1, const void *p2)
{
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
}
int load_kallsyms(void)
{
FILE *f = fopen("/proc/kallsyms", "r");
char func[256], buf[256];
char symbol;
void *addr;
int i = 0;
if (!f)
return -ENOENT;
while (!feof(f)) {
if (!fgets(buf, sizeof(buf), f))
break;
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
if (!addr)
continue;
syms[i].addr = (long) addr;
syms[i].name = strdup(func);
i++;
}
sym_cnt = i;
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
return 0;
}
struct ksym *ksym_search(long key)
{
int start = 0, end = sym_cnt;
int result;
while (start < end) {
size_t mid = start + (end - start) / 2;
result = key - syms[mid].addr;
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
return &syms[mid];
}
if (start >= 1 && syms[start - 1].addr < key &&
key < syms[start].addr)
/* valid ksym */
return &syms[start - 1];
/* out of range. return _stext */
return &syms[0];
}
...@@ -23,5 +23,11 @@ extern int event_fd[MAX_PROGS]; ...@@ -23,5 +23,11 @@ extern int event_fd[MAX_PROGS];
int load_bpf_file(char *path); int load_bpf_file(char *path);
void read_trace_pipe(void); void read_trace_pipe(void);
struct ksym {
long addr;
char *name;
};
int load_kallsyms(void);
struct ksym *ksym_search(long key);
#endif #endif
...@@ -44,7 +44,7 @@ static void usage(void) ...@@ -44,7 +44,7 @@ static void usage(void)
static int bpf_map_create(void) static int bpf_map_create(void)
{ {
return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t), return bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(uint32_t),
sizeof(uint32_t), 1024); sizeof(uint32_t), 1024, 0);
} }
static int bpf_prog_create(const char *object) static int bpf_prog_create(const char *object)
......
...@@ -19,13 +19,14 @@ static __u64 ptr_to_u64(void *ptr) ...@@ -19,13 +19,14 @@ static __u64 ptr_to_u64(void *ptr)
} }
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries) int max_entries, int map_flags)
{ {
union bpf_attr attr = { union bpf_attr attr = {
.map_type = map_type, .map_type = map_type,
.key_size = key_size, .key_size = key_size,
.value_size = value_size, .value_size = value_size,
.max_entries = max_entries .max_entries = max_entries,
.map_flags = map_flags,
}; };
return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); return syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
struct bpf_insn; struct bpf_insn;
int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size, int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
int max_entries); int max_entries, int map_flags);
int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags); int bpf_update_elem(int fd, void *key, void *value, unsigned long long flags);
int bpf_lookup_elem(int fd, void *key, void *value); int bpf_lookup_elem(int fd, void *key, void *value);
int bpf_delete_elem(int fd, void *key); int bpf_delete_elem(int fd, void *key);
......
...@@ -18,80 +18,15 @@ ...@@ -18,80 +18,15 @@
#include "libbpf.h" #include "libbpf.h"
#include "bpf_load.h" #include "bpf_load.h"
#define MAX_SYMS 300000
#define PRINT_RAW_ADDR 0 #define PRINT_RAW_ADDR 0
static struct ksym {
long addr;
char *name;
} syms[MAX_SYMS];
static int sym_cnt;
static int ksym_cmp(const void *p1, const void *p2)
{
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
}
static int load_kallsyms(void)
{
FILE *f = fopen("/proc/kallsyms", "r");
char func[256], buf[256];
char symbol;
void *addr;
int i = 0;
if (!f)
return -ENOENT;
while (!feof(f)) {
if (!fgets(buf, sizeof(buf), f))
break;
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
if (!addr)
continue;
syms[i].addr = (long) addr;
syms[i].name = strdup(func);
i++;
}
sym_cnt = i;
qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
return 0;
}
static void *search(long key)
{
int start = 0, end = sym_cnt;
int result;
while (start < end) {
size_t mid = start + (end - start) / 2;
result = key - syms[mid].addr;
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
return &syms[mid];
}
if (start >= 1 && syms[start - 1].addr < key &&
key < syms[start].addr)
/* valid ksym */
return &syms[start - 1];
/* out of range. return _stext */
return &syms[0];
}
static void print_ksym(__u64 addr) static void print_ksym(__u64 addr)
{ {
struct ksym *sym; struct ksym *sym;
if (!addr) if (!addr)
return; return;
sym = search(addr); sym = ksym_search(addr);
if (PRINT_RAW_ADDR) if (PRINT_RAW_ADDR)
printf("%s/%llx;", sym->name, addr); printf("%s/%llx;", sym->name, addr);
else else
......
...@@ -34,7 +34,7 @@ static int test_sock(void) ...@@ -34,7 +34,7 @@ static int test_sock(void)
long long value = 0, tcp_cnt, udp_cnt, icmp_cnt; long long value = 0, tcp_cnt, udp_cnt, icmp_cnt;
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
256); 256, 0);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno)); printf("failed to create map '%s'\n", strerror(errno));
goto cleanup; goto cleanup;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
* Testsuite for eBPF maps * Testsuite for eBPF maps
* *
* Copyright (c) 2014 PLUMgrid, http://plumgrid.com * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
* Copyright (c) 2016 Facebook
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public * modify it under the terms of version 2 of the GNU General Public
...@@ -17,13 +18,16 @@ ...@@ -17,13 +18,16 @@
#include <stdlib.h> #include <stdlib.h>
#include "libbpf.h" #include "libbpf.h"
static int map_flags;
/* sanity tests for map API */ /* sanity tests for map API */
static void test_hashmap_sanity(int i, void *data) static void test_hashmap_sanity(int i, void *data)
{ {
long long key, next_key, value; long long key, next_key, value;
int map_fd; int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), 2); map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
2, map_flags);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create hashmap '%s'\n", strerror(errno)); printf("failed to create hashmap '%s'\n", strerror(errno));
exit(1); exit(1);
...@@ -99,7 +103,7 @@ static void test_percpu_hashmap_sanity(int task, void *data) ...@@ -99,7 +103,7 @@ static void test_percpu_hashmap_sanity(int task, void *data)
int map_fd, i; int map_fd, i;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key), map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
sizeof(value[0]), 2); sizeof(value[0]), 2, map_flags);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create hashmap '%s'\n", strerror(errno)); printf("failed to create hashmap '%s'\n", strerror(errno));
exit(1); exit(1);
...@@ -188,7 +192,8 @@ static void test_arraymap_sanity(int i, void *data) ...@@ -188,7 +192,8 @@ static void test_arraymap_sanity(int i, void *data)
int key, next_key, map_fd; int key, next_key, map_fd;
long long value; long long value;
map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value), 2); map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
2, 0);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create arraymap '%s'\n", strerror(errno)); printf("failed to create arraymap '%s'\n", strerror(errno));
exit(1); exit(1);
...@@ -244,7 +249,7 @@ static void test_percpu_arraymap_many_keys(void) ...@@ -244,7 +249,7 @@ static void test_percpu_arraymap_many_keys(void)
int key, map_fd, i; int key, map_fd, i;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(values[0]), nr_keys); sizeof(values[0]), nr_keys, 0);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create per-cpu arraymap '%s'\n", printf("failed to create per-cpu arraymap '%s'\n",
strerror(errno)); strerror(errno));
...@@ -275,7 +280,7 @@ static void test_percpu_arraymap_sanity(int i, void *data) ...@@ -275,7 +280,7 @@ static void test_percpu_arraymap_sanity(int i, void *data)
int key, next_key, map_fd; int key, next_key, map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key), map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
sizeof(values[0]), 2); sizeof(values[0]), 2, 0);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create arraymap '%s'\n", strerror(errno)); printf("failed to create arraymap '%s'\n", strerror(errno));
exit(1); exit(1);
...@@ -336,7 +341,7 @@ static void test_map_large(void) ...@@ -336,7 +341,7 @@ static void test_map_large(void)
/* allocate 4Mbyte of memory */ /* allocate 4Mbyte of memory */
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE); MAP_SIZE, map_flags);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create large map '%s'\n", strerror(errno)); printf("failed to create large map '%s'\n", strerror(errno));
exit(1); exit(1);
...@@ -421,7 +426,7 @@ static void test_map_parallel(void) ...@@ -421,7 +426,7 @@ static void test_map_parallel(void)
int data[2]; int data[2];
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value), map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE); MAP_SIZE, map_flags);
if (map_fd < 0) { if (map_fd < 0) {
printf("failed to create map for parallel test '%s'\n", printf("failed to create map for parallel test '%s'\n",
strerror(errno)); strerror(errno));
...@@ -463,7 +468,7 @@ static void test_map_parallel(void) ...@@ -463,7 +468,7 @@ static void test_map_parallel(void)
assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT); assert(bpf_get_next_key(map_fd, &key, &key) == -1 && errno == ENOENT);
} }
int main(void) static void run_all_tests(void)
{ {
test_hashmap_sanity(0, NULL); test_hashmap_sanity(0, NULL);
test_percpu_hashmap_sanity(0, NULL); test_percpu_hashmap_sanity(0, NULL);
...@@ -474,6 +479,14 @@ int main(void) ...@@ -474,6 +479,14 @@ int main(void)
test_map_large(); test_map_large();
test_map_parallel(); test_map_parallel();
test_map_stress(); test_map_stress();
}
int main(void)
{
map_flags = 0;
run_all_tests();
map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
printf("test_maps: OK\n"); printf("test_maps: OK\n");
return 0; return 0;
} }
...@@ -1198,7 +1198,7 @@ static int create_map(void) ...@@ -1198,7 +1198,7 @@ static int create_map(void)
int map_fd; int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, map_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
sizeof(long long), sizeof(long long), 1024); sizeof(long long), sizeof(long long), 1024, 0);
if (map_fd < 0) if (map_fd < 0)
printf("failed to create map '%s'\n", strerror(errno)); printf("failed to create map '%s'\n", strerror(errno));
...@@ -1210,7 +1210,7 @@ static int create_prog_array(void) ...@@ -1210,7 +1210,7 @@ static int create_prog_array(void)
int map_fd; int map_fd;
map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, map_fd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY,
sizeof(int), sizeof(int), 4); sizeof(int), sizeof(int), 4, 0);
if (map_fd < 0) if (map_fd < 0)
printf("failed to create prog_array '%s'\n", strerror(errno)); printf("failed to create prog_array '%s'\n", strerror(errno));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment