Commit c9ae8c96 authored by Martin KaFai Lau's avatar Martin KaFai Lau

Merge branch 'fixes for concurrent htab updates'

Hou Tao says:

====================

From: Hou Tao <houtao1@huawei.com>

Hi,

The patchset aims to fix the issues found during investigating the
syzkaller problem reported in [0]. It seems that the concurrent updates
to the same hash-table bucket may fail as shown in patch 1.

Patch 1 uses preempt_disable() to fix the problem for
htab_use_raw_lock() case. For !htab_use_raw_lock() case, the problem is
left to "BPF specific memory allocator" patchset [1] in which
!htab_use_raw_lock() will be removed.

Patch 2 fixes the out-of-bound memory read problem reported in [0]. The
problem has the root cause as patch 1 and it is fixed by handling -EBUSY
from htab_lock_bucket() correctly.

Patch 3 add two cases for hash-table update: one for the reentrancy of
bpf_map_update_elem(), and another one for concurrent updates of the
same hash-table bucket.

Comments are always welcome.

Regards,
Tao

[0]: https://lore.kernel.org/bpf/CACkBjsbuxaR6cv0kXJoVnBfL9ZJXjjoUcMpw_Ogc313jSrg14A@mail.gmail.com/
[1]: https://lore.kernel.org/bpf/20220819214232.18784-1-alexei.starovoitov@gmail.com/

Change Log:

v4:
 * rebased on bpf-next
 * add htab_update to DENYLIST.s390x

v3: https://lore.kernel.org/bpf/20220829023709.1958204-1-houtao@huaweicloud.com/
 * patch 1: update commit message and add Fixes tag
 * patch 2: add Fixes tag
 * patch 3: elaborate the description of test cases

v2: https://lore.kernel.org/bpf/bd60ef93-1c6a-2db2-557d-b09b92ad22bd@huaweicloud.com/
 * Note the fix is for CONFIG_PREEMPT case in commit message and add
   Reviewed-by tag for patch 1
 * Drop patch "bpf: Allow normally concurrent map updates for !htab_use_raw_lock() case"

v1: https://lore.kernel.org/bpf/20220821033223.2598791-1-houtao@huaweicloud.com/
====================
Signed-off-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
parents 19707294 1c636b62
...@@ -162,17 +162,25 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab, ...@@ -162,17 +162,25 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
unsigned long *pflags) unsigned long *pflags)
{ {
unsigned long flags; unsigned long flags;
bool use_raw_lock;
hash = hash & HASHTAB_MAP_LOCK_MASK; hash = hash & HASHTAB_MAP_LOCK_MASK;
use_raw_lock = htab_use_raw_lock(htab);
if (use_raw_lock)
preempt_disable();
else
migrate_disable(); migrate_disable();
if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) { if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
__this_cpu_dec(*(htab->map_locked[hash])); __this_cpu_dec(*(htab->map_locked[hash]));
if (use_raw_lock)
preempt_enable();
else
migrate_enable(); migrate_enable();
return -EBUSY; return -EBUSY;
} }
if (htab_use_raw_lock(htab)) if (use_raw_lock)
raw_spin_lock_irqsave(&b->raw_lock, flags); raw_spin_lock_irqsave(&b->raw_lock, flags);
else else
spin_lock_irqsave(&b->lock, flags); spin_lock_irqsave(&b->lock, flags);
...@@ -185,12 +193,17 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab, ...@@ -185,12 +193,17 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
struct bucket *b, u32 hash, struct bucket *b, u32 hash,
unsigned long flags) unsigned long flags)
{ {
bool use_raw_lock = htab_use_raw_lock(htab);
hash = hash & HASHTAB_MAP_LOCK_MASK; hash = hash & HASHTAB_MAP_LOCK_MASK;
if (htab_use_raw_lock(htab)) if (use_raw_lock)
raw_spin_unlock_irqrestore(&b->raw_lock, flags); raw_spin_unlock_irqrestore(&b->raw_lock, flags);
else else
spin_unlock_irqrestore(&b->lock, flags); spin_unlock_irqrestore(&b->lock, flags);
__this_cpu_dec(*(htab->map_locked[hash])); __this_cpu_dec(*(htab->map_locked[hash]));
if (use_raw_lock)
preempt_enable();
else
migrate_enable(); migrate_enable();
} }
...@@ -1691,8 +1704,11 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, ...@@ -1691,8 +1704,11 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
/* do not grab the lock unless need it (bucket_cnt > 0). */ /* do not grab the lock unless need it (bucket_cnt > 0). */
if (locked) { if (locked) {
ret = htab_lock_bucket(htab, b, batch, &flags); ret = htab_lock_bucket(htab, b, batch, &flags);
if (ret) if (ret) {
goto next_batch; rcu_read_unlock();
bpf_enable_instrumentation();
goto after_loop;
}
} }
bucket_cnt = 0; bucket_cnt = 0;
......
...@@ -68,3 +68,4 @@ unpriv_bpf_disabled # fentry ...@@ -68,3 +68,4 @@ unpriv_bpf_disabled # fentry
setget_sockopt # attach unexpected error: -524 (trampoline) setget_sockopt # attach unexpected error: -524 (trampoline)
cb_refs # expected error message unexpected error: -524 (trampoline) cb_refs # expected error message unexpected error: -524 (trampoline)
cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc) cgroup_hierarchical_stats # JIT does not support calling kernel function (kfunc)
htab_update # failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
#define _GNU_SOURCE
#include <sched.h>
#include <stdbool.h>
#include <test_progs.h>
#include "htab_update.skel.h"
struct htab_update_ctx {
int fd;
int loop;
bool stop;
};
static void test_reenter_update(void)
{
struct htab_update *skel;
unsigned int key, value;
int err;
skel = htab_update__open();
if (!ASSERT_OK_PTR(skel, "htab_update__open"))
return;
/* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
err = htab_update__load(skel);
if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
goto out;
skel->bss->pid = getpid();
err = htab_update__attach(skel);
if (!ASSERT_OK(err, "htab_update__attach"))
goto out;
/* Will trigger the reentrancy of bpf_map_update_elem() */
key = 0;
value = 0;
err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
if (!ASSERT_OK(err, "add element"))
goto out;
ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
out:
htab_update__destroy(skel);
}
static void *htab_update_thread(void *arg)
{
struct htab_update_ctx *ctx = arg;
cpu_set_t cpus;
int i;
/* Pinned on CPU 0 */
CPU_ZERO(&cpus);
CPU_SET(0, &cpus);
pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus);
i = 0;
while (i++ < ctx->loop && !ctx->stop) {
unsigned int key = 0, value = 0;
int err;
err = bpf_map_update_elem(ctx->fd, &key, &value, 0);
if (err) {
ctx->stop = true;
return (void *)(long)err;
}
}
return NULL;
}
static void test_concurrent_update(void)
{
struct htab_update_ctx ctx;
struct htab_update *skel;
unsigned int i, nr;
pthread_t *tids;
int err;
skel = htab_update__open_and_load();
if (!ASSERT_OK_PTR(skel, "htab_update__open_and_load"))
return;
ctx.fd = bpf_map__fd(skel->maps.htab);
ctx.loop = 1000;
ctx.stop = false;
nr = 4;
tids = calloc(nr, sizeof(*tids));
if (!ASSERT_NEQ(tids, NULL, "no mem"))
goto out;
for (i = 0; i < nr; i++) {
err = pthread_create(&tids[i], NULL, htab_update_thread, &ctx);
if (!ASSERT_OK(err, "pthread_create")) {
unsigned int j;
ctx.stop = true;
for (j = 0; j < i; j++)
pthread_join(tids[j], NULL);
goto out;
}
}
for (i = 0; i < nr; i++) {
void *thread_err = NULL;
pthread_join(tids[i], &thread_err);
ASSERT_EQ(thread_err, NULL, "update error");
}
out:
if (tids)
free(tids);
htab_update__destroy(skel);
}
void test_htab_update(void)
{
if (test__start_subtest("reenter_update"))
test_reenter_update();
if (test__start_subtest("concurrent_update"))
test_concurrent_update();
}
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 1);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} htab SEC(".maps");
int pid = 0;
int update_err = 0;
SEC("?fentry/lookup_elem_raw")
int lookup_elem_raw(void *ctx)
{
__u32 key = 0, value = 1;
if ((bpf_get_current_pid_tgid() >> 32) != pid)
return 0;
update_err = bpf_map_update_elem(&htab, &key, &value, 0);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment