Commit 990bca1f authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf-batch-ops'

Brian Vazquez says:

====================
This patch series introduce batch ops that can be added to bpf maps to
lookup/lookup_and_delete/update/delete more than 1 element at the time,
this is specially useful when syscall overhead is a problem and in case
of hmap it will provide a reliable way of traversing them.

The implementation inclues a generic approach that could potentially be
used by any bpf map and adds it to arraymap, it also includes the specific
implementation of hashmaps which are traversed using buckets instead
of keys.

The bpf syscall subcommands introduced are:

  BPF_MAP_LOOKUP_BATCH
  BPF_MAP_LOOKUP_AND_DELETE_BATCH
  BPF_MAP_UPDATE_BATCH
  BPF_MAP_DELETE_BATCH

The UAPI attribute is:

  struct { /* struct used by BPF_MAP_*_BATCH commands */
         __aligned_u64   in_batch;       /* start batch,
                                          * NULL to start from beginning
                                          */
         __aligned_u64   out_batch;      /* output: next start batch */
         __aligned_u64   keys;
         __aligned_u64   values;
         __u32           count;          /* input/output:
                                          * input: # of key/value
                                          * elements
                                          * output: # of filled elements
                                          */
         __u32           map_fd;
         __u64           elem_flags;
         __u64           flags;
  } batch;

in_batch and out_batch are only used for lookup and lookup_and_delete since
those are the only two operations that attempt to traverse the map.

update/delete batch ops should provide the keys/values that user wants
to modify.

Here are the previous discussions on the batch processing:
 - https://lore.kernel.org/bpf/20190724165803.87470-1-brianvv@google.com/
 - https://lore.kernel.org/bpf/20190829064502.2750303-1-yhs@fb.com/
 - https://lore.kernel.org/bpf/20190906225434.3635421-1-yhs@fb.com/

Changelog sinve v4:
 - Remove unnecessary checks from libbpf API (Andrii Nakryiko)
 - Move DECLARE_LIBBPF_OPTS with all var declarations (Andrii Nakryiko)
 - Change bucket internal buffer size to 5 entries (Yonghong Song)
 - Fix some minor bugs in hashtab batch ops implementation (Yonghong Song)

Changelog sinve v3:
 - Do not use copy_to_user inside atomic region (Yonghong Song)
 - Use _opts approach on libbpf APIs (Andrii Nakryiko)
 - Drop generic_map_lookup_and_delete_batch support
 - Free malloc-ed memory in tests (Yonghong Song)
 - Reverse christmas tree (Yonghong Song)
 - Add acked labels

Changelog sinve v2:
 - Add generic batch support for lpm_trie and test it (Yonghong Song)
 - Use define MAP_LOOKUP_RETRIES for retries (John Fastabend)
 - Return errors directly and remove labels (Yonghong Song)
 - Insert new API functions into libbpf alphabetically (Yonghong Song)
 - Change hlist_nulls_for_each_entry_rcu to
   hlist_nulls_for_each_entry_safe in htab batch ops (Yonghong Song)

Changelog since v1:
 - Fix SOB ordering and remove Co-authored-by tag (Alexei Starovoitov)

Changelog since RFC:
 - Change batch to in_batch and out_batch to support more flexible opaque
   values to iterate the bpf maps.
 - Remove update/delete specific batch ops for htab and use the generic
   implementations instead.
====================
Acked-by: default avatarYonghong Song <yhs@fb.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 83e4b88b f0fac2ce
......@@ -44,6 +44,15 @@ struct bpf_map_ops {
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_update_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
/* funcs callable from userspace and from eBPF programs */
void *(*map_lookup_elem)(struct bpf_map *map, void *key);
......@@ -982,6 +991,15 @@ void *bpf_map_area_alloc(u64 size, int numa_node);
void *bpf_map_area_mmapable_alloc(u64 size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int generic_map_update_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
int generic_map_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
extern int sysctl_unprivileged_bpf_disabled;
......
......@@ -107,6 +107,10 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
BPF_MAP_FREEZE,
BPF_BTF_GET_NEXT_ID,
BPF_MAP_LOOKUP_BATCH,
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
};
enum bpf_map_type {
......@@ -420,6 +424,23 @@ union bpf_attr {
__u64 flags;
};
struct { /* struct used by BPF_MAP_*_BATCH commands */
__aligned_u64 in_batch; /* start batch,
* NULL to start from beginning
*/
__aligned_u64 out_batch; /* output: next start batch */
__aligned_u64 keys;
__aligned_u64 values;
__u32 count; /* input/output:
* input: # of key/value
* elements
* output: # of filled elements
*/
__u32 map_fd;
__u64 elem_flags;
__u64 flags;
} batch;
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
......
......@@ -503,6 +503,8 @@ const struct bpf_map_ops array_map_ops = {
.map_mmap = array_map_mmap,
.map_seq_show_elem = array_map_seq_show_elem,
.map_check_btf = array_map_check_btf,
.map_lookup_batch = generic_map_lookup_batch,
.map_update_batch = generic_map_update_batch,
};
const struct bpf_map_ops percpu_array_map_ops = {
......
......@@ -17,6 +17,16 @@
(BPF_F_NO_PREALLOC | BPF_F_NO_COMMON_LRU | BPF_F_NUMA_NODE | \
BPF_F_ACCESS_MASK | BPF_F_ZERO_SEED)
#define BATCH_OPS(_name) \
.map_lookup_batch = \
_name##_map_lookup_batch, \
.map_lookup_and_delete_batch = \
_name##_map_lookup_and_delete_batch, \
.map_update_batch = \
generic_map_update_batch, \
.map_delete_batch = \
generic_map_delete_batch
struct bucket {
struct hlist_nulls_head head;
raw_spinlock_t lock;
......@@ -1232,6 +1242,256 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
static int
__htab_map_lookup_and_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr,
bool do_delete, bool is_lru_map,
bool is_percpu)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
u32 bucket_cnt, total, key_size, value_size, roundup_key_size;
void *keys = NULL, *values = NULL, *value, *dst_key, *dst_val;
void __user *uvalues = u64_to_user_ptr(attr->batch.values);
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
unsigned long flags;
struct htab_elem *l;
struct bucket *b;
int ret = 0;
elem_map_flags = attr->batch.elem_flags;
if ((elem_map_flags & ~BPF_F_LOCK) ||
((elem_map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
return -EINVAL;
map_flags = attr->batch.flags;
if (map_flags)
return -EINVAL;
max_count = attr->batch.count;
if (!max_count)
return 0;
if (put_user(0, &uattr->batch.count))
return -EFAULT;
batch = 0;
if (ubatch && copy_from_user(&batch, ubatch, sizeof(batch)))
return -EFAULT;
if (batch >= htab->n_buckets)
return -ENOENT;
key_size = htab->map.key_size;
roundup_key_size = round_up(htab->map.key_size, 8);
value_size = htab->map.value_size;
size = round_up(value_size, 8);
if (is_percpu)
value_size = size * num_possible_cpus();
total = 0;
/* while experimenting with hash tables with sizes ranging from 10 to
* 1000, it was observed that a bucket can have upto 5 entries.
*/
bucket_size = 5;
alloc:
/* We cannot do copy_from_user or copy_to_user inside
* the rcu_read_lock. Allocate enough space here.
*/
keys = kvmalloc(key_size * bucket_size, GFP_USER | __GFP_NOWARN);
values = kvmalloc(value_size * bucket_size, GFP_USER | __GFP_NOWARN);
if (!keys || !values) {
ret = -ENOMEM;
goto after_loop;
}
again:
preempt_disable();
this_cpu_inc(bpf_prog_active);
rcu_read_lock();
again_nocopy:
dst_key = keys;
dst_val = values;
b = &htab->buckets[batch];
head = &b->head;
raw_spin_lock_irqsave(&b->lock, flags);
bucket_cnt = 0;
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
bucket_cnt++;
if (bucket_cnt > (max_count - total)) {
if (total == 0)
ret = -ENOSPC;
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
preempt_enable();
goto after_loop;
}
if (bucket_cnt > bucket_size) {
bucket_size = bucket_cnt;
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
preempt_enable();
kvfree(keys);
kvfree(values);
goto alloc;
}
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
memcpy(dst_key, l->key, key_size);
if (is_percpu) {
int off = 0, cpu;
void __percpu *pptr;
pptr = htab_elem_get_ptr(l, map->key_size);
for_each_possible_cpu(cpu) {
bpf_long_memcpy(dst_val + off,
per_cpu_ptr(pptr, cpu), size);
off += size;
}
} else {
value = l->key + roundup_key_size;
if (elem_map_flags & BPF_F_LOCK)
copy_map_value_locked(map, dst_val, value,
true);
else
copy_map_value(map, dst_val, value);
check_and_init_map_lock(map, dst_val);
}
if (do_delete) {
hlist_nulls_del_rcu(&l->hash_node);
if (is_lru_map)
bpf_lru_push_free(&htab->lru, &l->lru_node);
else
free_htab_elem(htab, l);
}
dst_key += key_size;
dst_val += value_size;
}
raw_spin_unlock_irqrestore(&b->lock, flags);
/* If we are not copying data, we can go to next bucket and avoid
* unlocking the rcu.
*/
if (!bucket_cnt && (batch + 1 < htab->n_buckets)) {
batch++;
goto again_nocopy;
}
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
preempt_enable();
if (bucket_cnt && (copy_to_user(ukeys + total * key_size, keys,
key_size * bucket_cnt) ||
copy_to_user(uvalues + total * value_size, values,
value_size * bucket_cnt))) {
ret = -EFAULT;
goto after_loop;
}
total += bucket_cnt;
batch++;
if (batch >= htab->n_buckets) {
ret = -ENOENT;
goto after_loop;
}
goto again;
after_loop:
if (ret == -EFAULT)
goto out;
/* copy # of entries and next batch */
ubatch = u64_to_user_ptr(attr->batch.out_batch);
if (copy_to_user(ubatch, &batch, sizeof(batch)) ||
put_user(total, &uattr->batch.count))
ret = -EFAULT;
out:
kvfree(keys);
kvfree(values);
return ret;
}
static int
htab_percpu_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
false, true);
}
static int
htab_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
false, true);
}
static int
htab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
false, false);
}
static int
htab_map_lookup_and_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
false, false);
}
static int
htab_lru_percpu_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
true, true);
}
static int
htab_lru_percpu_map_lookup_and_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
true, true);
}
static int
htab_lru_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, false,
true, false);
}
static int
htab_lru_map_lookup_and_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
return __htab_map_lookup_and_delete_batch(map, attr, uattr, true,
true, false);
}
const struct bpf_map_ops htab_map_ops = {
.map_alloc_check = htab_map_alloc_check,
.map_alloc = htab_map_alloc,
......@@ -1242,6 +1502,7 @@ const struct bpf_map_ops htab_map_ops = {
.map_delete_elem = htab_map_delete_elem,
.map_gen_lookup = htab_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
BATCH_OPS(htab),
};
const struct bpf_map_ops htab_lru_map_ops = {
......@@ -1255,6 +1516,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
.map_delete_elem = htab_lru_map_delete_elem,
.map_gen_lookup = htab_lru_map_gen_lookup,
.map_seq_show_elem = htab_map_seq_show_elem,
BATCH_OPS(htab_lru),
};
/* Called from eBPF program */
......@@ -1368,6 +1630,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
.map_update_elem = htab_percpu_map_update_elem,
.map_delete_elem = htab_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
BATCH_OPS(htab_percpu),
};
const struct bpf_map_ops htab_lru_percpu_map_ops = {
......@@ -1379,6 +1642,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
.map_update_elem = htab_lru_percpu_map_update_elem,
.map_delete_elem = htab_lru_map_delete_elem,
.map_seq_show_elem = htab_percpu_map_seq_show_elem,
BATCH_OPS(htab_lru_percpu),
};
static int fd_htab_map_alloc_check(union bpf_attr *attr)
......
......@@ -129,6 +129,152 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
return map;
}
static u32 bpf_map_value_size(struct bpf_map *map)
{
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
return round_up(map->value_size, 8) * num_possible_cpus();
else if (IS_FD_MAP(map))
return sizeof(u32);
else
return map->value_size;
}
static void maybe_wait_bpf_programs(struct bpf_map *map)
{
/* Wait for any running BPF programs to complete so that
* userspace, when we return to it, knows that all programs
* that could be running use the new map value.
*/
if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
synchronize_rcu();
}
static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
void *value, __u64 flags)
{
int err;
/* Need to create a kthread, thus must support schedule */
if (bpf_map_is_dev_bound(map)) {
return bpf_map_offload_update_elem(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
return map->ops->map_update_elem(map, key, value, flags);
} else if (IS_FD_PROG_ARRAY(map)) {
return bpf_fd_array_map_update_elem(map, f.file, key, value,
flags);
}
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
* inside bpf map update or delete otherwise deadlocks are possible
*/
preempt_disable();
__this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_update(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
err = bpf_percpu_cgroup_storage_update(map, key, value,
flags);
} else if (IS_FD_ARRAY(map)) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
flags);
rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
rcu_read_lock();
err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
flags);
rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
/* rcu_read_lock() is not needed */
err = bpf_fd_reuseport_array_update_elem(map, key, value,
flags);
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
map->map_type == BPF_MAP_TYPE_STACK) {
err = map->ops->map_push_elem(map, value, flags);
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, flags);
rcu_read_unlock();
}
__this_cpu_dec(bpf_prog_active);
preempt_enable();
maybe_wait_bpf_programs(map);
return err;
}
static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
__u64 flags)
{
void *ptr;
int err;
if (bpf_map_is_dev_bound(map))
return bpf_map_offload_lookup_elem(map, key, value);
preempt_disable();
this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
err = bpf_percpu_cgroup_storage_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_copy(map, key, value);
} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
err = bpf_fd_array_map_lookup_elem(map, key, value);
} else if (IS_FD_HASH(map)) {
err = bpf_fd_htab_map_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
map->map_type == BPF_MAP_TYPE_STACK) {
err = map->ops->map_peek_elem(map, value);
} else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
/* struct_ops map requires directly updating "value" */
err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
} else {
rcu_read_lock();
if (map->ops->map_lookup_elem_sys_only)
ptr = map->ops->map_lookup_elem_sys_only(map, key);
else
ptr = map->ops->map_lookup_elem(map, key);
if (IS_ERR(ptr)) {
err = PTR_ERR(ptr);
} else if (!ptr) {
err = -ENOENT;
} else {
err = 0;
if (flags & BPF_F_LOCK)
/* lock 'ptr' and copy everything but lock */
copy_map_value_locked(map, value, ptr, true);
else
copy_map_value(map, value, ptr);
/* mask lock, since value wasn't zero inited */
check_and_init_map_lock(map, value);
}
rcu_read_unlock();
}
this_cpu_dec(bpf_prog_active);
preempt_enable();
maybe_wait_bpf_programs(map);
return err;
}
static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
{
/* We really just want to fail instead of triggering OOM killer
......@@ -827,7 +973,7 @@ static int map_lookup_elem(union bpf_attr *attr)
void __user *uvalue = u64_to_user_ptr(attr->value);
int ufd = attr->map_fd;
struct bpf_map *map;
void *key, *value, *ptr;
void *key, *value;
u32 value_size;
struct fd f;
int err;
......@@ -859,75 +1005,14 @@ static int map_lookup_elem(union bpf_attr *attr)
goto err_put;
}
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
value_size = round_up(map->value_size, 8) * num_possible_cpus();
else if (IS_FD_MAP(map))
value_size = sizeof(u32);
else
value_size = map->value_size;
value_size = bpf_map_value_size(map);
err = -ENOMEM;
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
goto free_key;
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_lookup_elem(map, key, value);
goto done;
}
preempt_disable();
this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
err = bpf_percpu_cgroup_storage_copy(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) {
err = bpf_stackmap_copy(map, key, value);
} else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) {
err = bpf_fd_array_map_lookup_elem(map, key, value);
} else if (IS_FD_HASH(map)) {
err = bpf_fd_htab_map_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
err = bpf_fd_reuseport_array_lookup_elem(map, key, value);
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
map->map_type == BPF_MAP_TYPE_STACK) {
err = map->ops->map_peek_elem(map, value);
} else if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
/* struct_ops map requires directly updating "value" */
err = bpf_struct_ops_map_sys_lookup_elem(map, key, value);
} else {
rcu_read_lock();
if (map->ops->map_lookup_elem_sys_only)
ptr = map->ops->map_lookup_elem_sys_only(map, key);
else
ptr = map->ops->map_lookup_elem(map, key);
if (IS_ERR(ptr)) {
err = PTR_ERR(ptr);
} else if (!ptr) {
err = -ENOENT;
} else {
err = 0;
if (attr->flags & BPF_F_LOCK)
/* lock 'ptr' and copy everything but lock */
copy_map_value_locked(map, value, ptr, true);
else
copy_map_value(map, value, ptr);
/* mask lock, since value wasn't zero inited */
check_and_init_map_lock(map, value);
}
rcu_read_unlock();
}
this_cpu_dec(bpf_prog_active);
preempt_enable();
done:
err = bpf_map_copy_value(map, key, value, attr->flags);
if (err)
goto free_value;
......@@ -946,16 +1031,6 @@ static int map_lookup_elem(union bpf_attr *attr)
return err;
}
static void maybe_wait_bpf_programs(struct bpf_map *map)
{
/* Wait for any running BPF programs to complete so that
* userspace, when we return to it, knows that all programs
* that could be running use the new map value.
*/
if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
synchronize_rcu();
}
#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
......@@ -1011,61 +1086,8 @@ static int map_update_elem(union bpf_attr *attr)
if (copy_from_user(value, uvalue, value_size) != 0)
goto free_value;
/* Need to create a kthread, thus must support schedule */
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_update_elem(map, key, value, attr->flags);
goto out;
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
map->map_type == BPF_MAP_TYPE_SOCKHASH ||
map->map_type == BPF_MAP_TYPE_SOCKMAP ||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
err = map->ops->map_update_elem(map, key, value, attr->flags);
goto out;
} else if (IS_FD_PROG_ARRAY(map)) {
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
goto out;
}
err = bpf_map_update_value(map, f, key, value, attr->flags);
/* must increment bpf_prog_active to avoid kprobe+bpf triggering from
* inside bpf map update or delete otherwise deadlocks are possible
*/
preempt_disable();
__this_cpu_inc(bpf_prog_active);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
err = bpf_percpu_cgroup_storage_update(map, key, value,
attr->flags);
} else if (IS_FD_ARRAY(map)) {
rcu_read_lock();
err = bpf_fd_array_map_update_elem(map, f.file, key, value,
attr->flags);
rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
rcu_read_lock();
err = bpf_fd_htab_map_update_elem(map, f.file, key, value,
attr->flags);
rcu_read_unlock();
} else if (map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) {
/* rcu_read_lock() is not needed */
err = bpf_fd_reuseport_array_update_elem(map, key, value,
attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_QUEUE ||
map->map_type == BPF_MAP_TYPE_STACK) {
err = map->ops->map_push_elem(map, value, attr->flags);
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
rcu_read_unlock();
}
__this_cpu_dec(bpf_prog_active);
preempt_enable();
maybe_wait_bpf_programs(map);
out:
free_value:
kfree(value);
free_key:
......@@ -1196,6 +1218,214 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
}
int generic_map_delete_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
void __user *keys = u64_to_user_ptr(attr->batch.keys);
u32 cp, max_count;
int err = 0;
void *key;
if (attr->batch.elem_flags & ~BPF_F_LOCK)
return -EINVAL;
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)) {
return -EINVAL;
}
max_count = attr->batch.count;
if (!max_count)
return 0;
for (cp = 0; cp < max_count; cp++) {
key = __bpf_copy_key(keys + cp * map->key_size, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
break;
}
if (bpf_map_is_dev_bound(map)) {
err = bpf_map_offload_delete_elem(map, key);
break;
}
preempt_disable();
__this_cpu_inc(bpf_prog_active);
rcu_read_lock();
err = map->ops->map_delete_elem(map, key);
rcu_read_unlock();
__this_cpu_dec(bpf_prog_active);
preempt_enable();
maybe_wait_bpf_programs(map);
if (err)
break;
}
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
return err;
}
int generic_map_update_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
void __user *values = u64_to_user_ptr(attr->batch.values);
void __user *keys = u64_to_user_ptr(attr->batch.keys);
u32 value_size, cp, max_count;
int ufd = attr->map_fd;
void *key, *value;
struct fd f;
int err = 0;
f = fdget(ufd);
if (attr->batch.elem_flags & ~BPF_F_LOCK)
return -EINVAL;
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map)) {
return -EINVAL;
}
value_size = bpf_map_value_size(map);
max_count = attr->batch.count;
if (!max_count)
return 0;
value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
if (!value)
return -ENOMEM;
for (cp = 0; cp < max_count; cp++) {
key = __bpf_copy_key(keys + cp * map->key_size, map->key_size);
if (IS_ERR(key)) {
err = PTR_ERR(key);
break;
}
err = -EFAULT;
if (copy_from_user(value, values + cp * value_size, value_size))
break;
err = bpf_map_update_value(map, f, key, value,
attr->batch.elem_flags);
if (err)
break;
}
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
kfree(value);
kfree(key);
return err;
}
#define MAP_LOOKUP_RETRIES 3
int generic_map_lookup_batch(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
void __user *values = u64_to_user_ptr(attr->batch.values);
void __user *keys = u64_to_user_ptr(attr->batch.keys);
void *buf, *buf_prevkey, *prev_key, *key, *value;
int err, retry = MAP_LOOKUP_RETRIES;
u32 value_size, cp, max_count;
bool first_key = false;
if (attr->batch.elem_flags & ~BPF_F_LOCK)
return -EINVAL;
if ((attr->batch.elem_flags & BPF_F_LOCK) &&
!map_value_has_spin_lock(map))
return -EINVAL;
value_size = bpf_map_value_size(map);
max_count = attr->batch.count;
if (!max_count)
return 0;
if (put_user(0, &uattr->batch.count))
return -EFAULT;
buf_prevkey = kmalloc(map->key_size, GFP_USER | __GFP_NOWARN);
if (!buf_prevkey)
return -ENOMEM;
buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
if (!buf) {
kvfree(buf_prevkey);
return -ENOMEM;
}
err = -EFAULT;
first_key = false;
prev_key = NULL;
if (ubatch && copy_from_user(buf_prevkey, ubatch, map->key_size))
goto free_buf;
key = buf;
value = key + map->key_size;
if (ubatch)
prev_key = buf_prevkey;
for (cp = 0; cp < max_count;) {
rcu_read_lock();
err = map->ops->map_get_next_key(map, prev_key, key);
rcu_read_unlock();
if (err)
break;
err = bpf_map_copy_value(map, key, value,
attr->batch.elem_flags);
if (err == -ENOENT) {
if (retry) {
retry--;
continue;
}
err = -EINTR;
break;
}
if (err)
goto free_buf;
if (copy_to_user(keys + cp * map->key_size, key,
map->key_size)) {
err = -EFAULT;
goto free_buf;
}
if (copy_to_user(values + cp * value_size, value, value_size)) {
err = -EFAULT;
goto free_buf;
}
if (!prev_key)
prev_key = buf_prevkey;
swap(prev_key, key);
retry = MAP_LOOKUP_RETRIES;
cp++;
}
if (err == -EFAULT)
goto free_buf;
if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
(cp && copy_to_user(uobatch, prev_key, map->key_size))))
err = -EFAULT;
free_buf:
kfree(buf_prevkey);
kfree(buf);
return err;
}
#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
static int map_lookup_and_delete_elem(union bpf_attr *attr)
......@@ -3052,6 +3282,61 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
return err;
}
#define BPF_MAP_BATCH_LAST_FIELD batch.flags
#define BPF_DO_BATCH(fn) \
do { \
if (!fn) { \
err = -ENOTSUPP; \
goto err_put; \
} \
err = fn(map, attr, uattr); \
} while (0)
static int bpf_map_do_batch(const union bpf_attr *attr,
union bpf_attr __user *uattr,
int cmd)
{
struct bpf_map *map;
int err, ufd;
struct fd f;
if (CHECK_ATTR(BPF_MAP_BATCH))
return -EINVAL;
ufd = attr->batch.map_fd;
f = fdget(ufd);
map = __bpf_map_get(f);
if (IS_ERR(map))
return PTR_ERR(map);
if ((cmd == BPF_MAP_LOOKUP_BATCH ||
cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) &&
!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) {
err = -EPERM;
goto err_put;
}
if (cmd != BPF_MAP_LOOKUP_BATCH &&
!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
err = -EPERM;
goto err_put;
}
if (cmd == BPF_MAP_LOOKUP_BATCH)
BPF_DO_BATCH(map->ops->map_lookup_batch);
else if (cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH)
BPF_DO_BATCH(map->ops->map_lookup_and_delete_batch);
else if (cmd == BPF_MAP_UPDATE_BATCH)
BPF_DO_BATCH(map->ops->map_update_batch);
else
BPF_DO_BATCH(map->ops->map_delete_batch);
err_put:
fdput(f);
return err;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
......@@ -3149,6 +3434,19 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
err = map_lookup_and_delete_elem(&attr);
break;
case BPF_MAP_LOOKUP_BATCH:
err = bpf_map_do_batch(&attr, uattr, BPF_MAP_LOOKUP_BATCH);
break;
case BPF_MAP_LOOKUP_AND_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr,
BPF_MAP_LOOKUP_AND_DELETE_BATCH);
break;
case BPF_MAP_UPDATE_BATCH:
err = bpf_map_do_batch(&attr, uattr, BPF_MAP_UPDATE_BATCH);
break;
case BPF_MAP_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
break;
default:
err = -EINVAL;
break;
......
......@@ -107,6 +107,10 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_ELEM,
BPF_MAP_FREEZE,
BPF_BTF_GET_NEXT_ID,
BPF_MAP_LOOKUP_BATCH,
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
};
enum bpf_map_type {
......@@ -420,6 +424,23 @@ union bpf_attr {
__u64 flags;
};
struct { /* struct used by BPF_MAP_*_BATCH commands */
__aligned_u64 in_batch; /* start batch,
* NULL to start from beginning
*/
__aligned_u64 out_batch; /* output: next start batch */
__aligned_u64 keys;
__aligned_u64 values;
__u32 count; /* input/output:
* input: # of key/value
* elements
* output: # of filled elements
*/
__u32 map_fd;
__u64 elem_flags;
__u64 flags;
} batch;
struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
......
......@@ -452,6 +452,64 @@ int bpf_map_freeze(int fd)
return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
}
static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
void *out_batch, void *keys, void *values,
__u32 *count,
const struct bpf_map_batch_opts *opts)
{
union bpf_attr attr = {};
int ret;
if (!OPTS_VALID(opts, bpf_map_batch_opts))
return -EINVAL;
memset(&attr, 0, sizeof(attr));
attr.batch.map_fd = fd;
attr.batch.in_batch = ptr_to_u64(in_batch);
attr.batch.out_batch = ptr_to_u64(out_batch);
attr.batch.keys = ptr_to_u64(keys);
attr.batch.values = ptr_to_u64(values);
attr.batch.count = *count;
attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0);
attr.batch.flags = OPTS_GET(opts, flags, 0);
ret = sys_bpf(cmd, &attr, sizeof(attr));
*count = attr.batch.count;
return ret;
}
int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
NULL, keys, NULL, count, opts);
}
int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
void *values, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
out_batch, keys, values, count, opts);
}
int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
void *keys, void *values, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
fd, in_batch, out_batch, keys, values,
count, opts);
}
int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
const struct bpf_map_batch_opts *opts)
{
return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
keys, values, count, opts);
}
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr;
......
......@@ -127,6 +127,28 @@ LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
LIBBPF_API int bpf_map_freeze(int fd);
struct bpf_map_batch_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u64 elem_flags;
__u64 flags;
};
#define bpf_map_batch_opts__last_field flags
LIBBPF_API int bpf_map_delete_batch(int fd, void *keys,
__u32 *count,
const struct bpf_map_batch_opts *opts);
LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,
void *keys, void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,
void *out_batch, void *keys,
void *values, __u32 *count,
const struct bpf_map_batch_opts *opts);
LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values,
__u32 *count,
const struct bpf_map_batch_opts *opts);
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
LIBBPF_API int bpf_obj_get(const char *pathname);
......
......@@ -214,6 +214,10 @@ LIBBPF_0.0.7 {
btf_dump__emit_type_decl;
bpf_link__disconnect;
bpf_map__attach_struct_ops;
bpf_map_delete_batch;
bpf_map_lookup_and_delete_batch;
bpf_map_lookup_batch;
bpf_map_update_batch;
bpf_object__find_program_by_name;
bpf_object__attach_skeleton;
bpf_object__destroy_skeleton;
......
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <test_maps.h>
static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
int *values)
{
int i, err;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
for (i = 0; i < max_entries; i++) {
keys[i] = i;
values[i] = i + 1;
}
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
}
static void map_batch_verify(int *visited, __u32 max_entries,
int *keys, int *values)
{
int i;
memset(visited, 0, max_entries * sizeof(*visited));
for (i = 0; i < max_entries; i++) {
CHECK(keys[i] + 1 != values[i], "key/value checking",
"error: i %d key %d value %d\n", i, keys[i], values[i]);
visited[i] = 1;
}
for (i = 0; i < max_entries; i++) {
CHECK(visited[i] != 1, "visited checking",
"error: keys array at index %d missing\n", i);
}
}
void test_array_map_batch_ops(void)
{
struct bpf_create_map_attr xattr = {
.name = "array_map",
.map_type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
};
int map_fd, *keys, *values, *visited;
__u32 count, total, total_success;
const __u32 max_entries = 10;
bool nospace_err;
__u64 batch = 0;
int err, step;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
xattr.max_entries = max_entries;
map_fd = bpf_create_map_xattr(&xattr);
CHECK(map_fd == -1,
"bpf_create_map_xattr()", "error:%s\n", strerror(errno));
keys = malloc(max_entries * sizeof(int));
values = malloc(max_entries * sizeof(int));
visited = malloc(max_entries * sizeof(int));
CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
strerror(errno));
/* populate elements to the map */
map_batch_update(map_fd, max_entries, keys, values);
/* test 1: lookup in a loop with various steps. */
total_success = 0;
for (step = 1; step < max_entries; step++) {
map_batch_update(map_fd, max_entries, keys, values);
map_batch_verify(visited, max_entries, keys, values);
memset(keys, 0, max_entries * sizeof(*keys));
memset(values, 0, max_entries * sizeof(*values));
batch = 0;
total = 0;
/* iteratively lookup/delete elements with 'step'
* elements each.
*/
count = step;
nospace_err = false;
while (true) {
err = bpf_map_lookup_batch(map_fd,
total ? &batch : NULL, &batch,
keys + total,
values + total,
&count, &opts);
CHECK((err && errno != ENOENT), "lookup with steps",
"error: %s\n", strerror(errno));
total += count;
if (err)
break;
}
if (nospace_err == true)
continue;
CHECK(total != max_entries, "lookup with steps",
"total = %u, max_entries = %u\n", total, max_entries);
map_batch_verify(visited, max_entries, keys, values);
total_success++;
}
CHECK(total_success == 0, "check total_success",
"unexpected failure\n");
printf("%s:PASS\n", __func__);
free(keys);
free(values);
free(visited);
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <bpf_util.h>
#include <test_maps.h>
static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
void *values, bool is_pcpu)
{
typedef BPF_DECLARE_PERCPU(int, value);
value *v = NULL;
int i, j, err;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
if (is_pcpu)
v = (value *)values;
for (i = 0; i < max_entries; i++) {
keys[i] = i + 1;
if (is_pcpu)
for (j = 0; j < bpf_num_possible_cpus(); j++)
bpf_percpu(v[i], j) = i + 2 + j;
else
((int *)values)[i] = i + 2;
}
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
}
static void map_batch_verify(int *visited, __u32 max_entries,
int *keys, void *values, bool is_pcpu)
{
typedef BPF_DECLARE_PERCPU(int, value);
value *v = NULL;
int i, j;
if (is_pcpu)
v = (value *)values;
memset(visited, 0, max_entries * sizeof(*visited));
for (i = 0; i < max_entries; i++) {
if (is_pcpu) {
for (j = 0; j < bpf_num_possible_cpus(); j++) {
CHECK(keys[i] + 1 + j != bpf_percpu(v[i], j),
"key/value checking",
"error: i %d j %d key %d value %d\n",
i, j, keys[i], bpf_percpu(v[i], j));
}
} else {
CHECK(keys[i] + 1 != ((int *)values)[i],
"key/value checking",
"error: i %d key %d value %d\n", i, keys[i],
((int *)values)[i]);
}
visited[i] = 1;
}
for (i = 0; i < max_entries; i++) {
CHECK(visited[i] != 1, "visited checking",
"error: keys array at index %d missing\n", i);
}
}
void __test_map_lookup_and_delete_batch(bool is_pcpu)
{
__u32 batch, count, total, total_success;
typedef BPF_DECLARE_PERCPU(int, value);
int map_fd, *keys, *visited, key;
const __u32 max_entries = 10;
value pcpu_values[max_entries];
int err, step, value_size;
bool nospace_err;
void *values;
struct bpf_create_map_attr xattr = {
.name = "hash_map",
.map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH :
BPF_MAP_TYPE_HASH,
.key_size = sizeof(int),
.value_size = sizeof(int),
};
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
xattr.max_entries = max_entries;
map_fd = bpf_create_map_xattr(&xattr);
CHECK(map_fd == -1,
"bpf_create_map_xattr()", "error:%s\n", strerror(errno));
value_size = is_pcpu ? sizeof(value) : sizeof(int);
keys = malloc(max_entries * sizeof(int));
if (is_pcpu)
values = pcpu_values;
else
values = malloc(max_entries * sizeof(int));
visited = malloc(max_entries * sizeof(int));
CHECK(!keys || !values || !visited, "malloc()",
"error:%s\n", strerror(errno));
/* test 1: lookup/delete an empty hash table, -ENOENT */
count = max_entries;
err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
values, &count, &opts);
CHECK((err && errno != ENOENT), "empty map",
"error: %s\n", strerror(errno));
/* populate elements to the map */
map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
/* test 2: lookup/delete with count = 0, success */
count = 0;
err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
values, &count, &opts);
CHECK(err, "count = 0", "error: %s\n", strerror(errno));
/* test 3: lookup/delete with count = max_entries, success */
memset(keys, 0, max_entries * sizeof(*keys));
memset(values, 0, max_entries * value_size);
count = max_entries;
err = bpf_map_lookup_and_delete_batch(map_fd, NULL, &batch, keys,
values, &count, &opts);
CHECK((err && errno != ENOENT), "count = max_entries",
"error: %s\n", strerror(errno));
CHECK(count != max_entries, "count = max_entries",
"count = %u, max_entries = %u\n", count, max_entries);
map_batch_verify(visited, max_entries, keys, values, is_pcpu);
/* bpf_map_get_next_key() should return -ENOENT for an empty map. */
err = bpf_map_get_next_key(map_fd, NULL, &key);
CHECK(!err, "bpf_map_get_next_key()", "error: %s\n", strerror(errno));
/* test 4: lookup/delete in a loop with various steps. */
total_success = 0;
for (step = 1; step < max_entries; step++) {
map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
memset(keys, 0, max_entries * sizeof(*keys));
memset(values, 0, max_entries * value_size);
total = 0;
/* iteratively lookup/delete elements with 'step'
* elements each
*/
count = step;
nospace_err = false;
while (true) {
err = bpf_map_lookup_batch(map_fd,
total ? &batch : NULL,
&batch, keys + total,
values +
total * value_size,
&count, &opts);
/* It is possible that we are failing due to buffer size
* not big enough. In such cases, let us just exit and
* go with large steps. Not that a buffer size with
* max_entries should always work.
*/
if (err && errno == ENOSPC) {
nospace_err = true;
break;
}
CHECK((err && errno != ENOENT), "lookup with steps",
"error: %s\n", strerror(errno));
total += count;
if (err)
break;
}
if (nospace_err == true)
continue;
CHECK(total != max_entries, "lookup with steps",
"total = %u, max_entries = %u\n", total, max_entries);
map_batch_verify(visited, max_entries, keys, values, is_pcpu);
total = 0;
count = step;
while (total < max_entries) {
if (max_entries - total < step)
count = max_entries - total;
err = bpf_map_delete_batch(map_fd,
keys + total,
&count, &opts);
CHECK((err && errno != ENOENT), "delete batch",
"error: %s\n", strerror(errno));
total += count;
if (err)
break;
}
CHECK(total != max_entries, "delete with steps",
"total = %u, max_entries = %u\n", total, max_entries);
/* check map is empty, errono == ENOENT */
err = bpf_map_get_next_key(map_fd, NULL, &key);
CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
"error: %s\n", strerror(errno));
/* iteratively lookup/delete elements with 'step'
* elements each
*/
map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
memset(keys, 0, max_entries * sizeof(*keys));
memset(values, 0, max_entries * value_size);
total = 0;
count = step;
nospace_err = false;
while (true) {
err = bpf_map_lookup_and_delete_batch(map_fd,
total ? &batch : NULL,
&batch, keys + total,
values +
total * value_size,
&count, &opts);
/* It is possible that we are failing due to buffer size
* not big enough. In such cases, let us just exit and
* go with large steps. Not that a buffer size with
* max_entries should always work.
*/
if (err && errno == ENOSPC) {
nospace_err = true;
break;
}
CHECK((err && errno != ENOENT), "lookup with steps",
"error: %s\n", strerror(errno));
total += count;
if (err)
break;
}
if (nospace_err == true)
continue;
CHECK(total != max_entries, "lookup/delete with steps",
"total = %u, max_entries = %u\n", total, max_entries);
map_batch_verify(visited, max_entries, keys, values, is_pcpu);
err = bpf_map_get_next_key(map_fd, NULL, &key);
CHECK(!err, "bpf_map_get_next_key()", "error: %s\n",
strerror(errno));
total_success++;
}
CHECK(total_success == 0, "check total_success",
"unexpected failure\n");
free(keys);
free(visited);
if (!is_pcpu)
free(values);
}
void htab_map_batch_ops(void)
{
__test_map_lookup_and_delete_batch(false);
printf("test_%s:PASS\n", __func__);
}
void htab_percpu_map_batch_ops(void)
{
__test_map_lookup_and_delete_batch(true);
printf("test_%s:PASS\n", __func__);
}
void test_htab_map_batch_ops(void)
{
htab_map_batch_ops();
htab_percpu_map_batch_ops();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment