Commit fdcd4467 authored by Paolo Abeni's avatar Paolo Abeni

Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2024-02-22

The following pull-request contains BPF updates for your *net* tree.

We've added 11 non-merge commits during the last 24 day(s) which contain
a total of 15 files changed, 217 insertions(+), 17 deletions(-).

The main changes are:

1) Fix a syzkaller-triggered oops when attempting to read the vsyscall
   page through bpf_probe_read_kernel and friends, from Hou Tao.

2) Fix a kernel panic due to uninitialized iter position pointer in
   bpf_iter_task, from Yafang Shao.

3) Fix a race between bpf_timer_cancel_and_free and bpf_timer_cancel,
   from Martin KaFai Lau.

4) Fix a xsk warning in skb_add_rx_frag() (under CONFIG_DEBUG_NET)
   due to incorrect truesize accounting, from Sebastian Andrzej Siewior.

5) Fix a NULL pointer dereference in sk_psock_verdict_data_ready,
   from Shigeru Yoshida.

6) Fix a resolve_btfids warning when bpf_cpumask symbol cannot be
   resolved, from Hari Bathini.

bpf-for-netdev

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  bpf, sockmap: Fix NULL pointer dereference in sk_psock_verdict_data_ready()
  selftests/bpf: Add negtive test cases for task iter
  bpf: Fix an issue due to uninitialized bpf_iter_task
  selftests/bpf: Test racing between bpf_timer_cancel_and_free and bpf_timer_cancel
  bpf: Fix racing between bpf_timer_cancel_and_free and bpf_timer_cancel
  selftest/bpf: Test the read of vsyscall page under x86-64
  x86/mm: Disallow vsyscall page read for copy_from_kernel_nofault()
  x86/mm: Move is_vsyscall_vaddr() into asm/vsyscall.h
  bpf, scripts: Correct GPL license name
  xsk: Add truesize to skb_add_rx_frag().
  bpf: Fix warning for bpf_cpumask in verifier
====================

Link: https://lore.kernel.org/r/20240221231826.1404-1-daniel@iogearbox.netSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 3489182b 4cd12c60
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/seqlock.h> #include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h> #include <uapi/asm/vsyscall.h>
#include <asm/page_types.h>
#ifdef CONFIG_X86_VSYSCALL_EMULATION #ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void); extern void map_vsyscall(void);
...@@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code, ...@@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code,
} }
#endif #endif
/*
* The (legacy) vsyscall page is the long page in the kernel portion
* of the address space that has user-accessible permissions.
*/
static inline bool is_vsyscall_vaddr(unsigned long vaddr)
{
return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
}
#endif /* _ASM_X86_VSYSCALL_H */ #endif /* _ASM_X86_VSYSCALL_H */
...@@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, ...@@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl); show_opcodes(regs, loglvl);
} }
/*
* The (legacy) vsyscall page is the long page in the kernel portion
* of the address space that has user-accessible permissions.
*/
static bool is_vsyscall_vaddr(unsigned long vaddr)
{
return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
}
static void static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code) unsigned long address, u32 pkey, int si_code)
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <asm/vsyscall.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{ {
...@@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) ...@@ -15,6 +17,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
if (vaddr < TASK_SIZE_MAX + PAGE_SIZE) if (vaddr < TASK_SIZE_MAX + PAGE_SIZE)
return false; return false;
/*
* Reading from the vsyscall page may cause an unhandled fault in
* certain cases. Though it is at an address above TASK_SIZE_MAX, it is
* usually considered as a user space address.
*/
if (is_vsyscall_vaddr(vaddr))
return false;
/* /*
* Allow everything during early boot before 'x86_virt_bits' * Allow everything during early boot before 'x86_virt_bits'
* is initialized. Needed for instruction decoding in early * is initialized. Needed for instruction decoding in early
......
...@@ -1101,6 +1101,7 @@ struct bpf_hrtimer { ...@@ -1101,6 +1101,7 @@ struct bpf_hrtimer {
struct bpf_prog *prog; struct bpf_prog *prog;
void __rcu *callback_fn; void __rcu *callback_fn;
void *value; void *value;
struct rcu_head rcu;
}; };
/* the actual struct hidden inside uapi struct bpf_timer */ /* the actual struct hidden inside uapi struct bpf_timer */
...@@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) ...@@ -1332,6 +1333,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
if (in_nmi()) if (in_nmi())
return -EOPNOTSUPP; return -EOPNOTSUPP;
rcu_read_lock();
__bpf_spin_lock_irqsave(&timer->lock); __bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer; t = timer->timer;
if (!t) { if (!t) {
...@@ -1353,6 +1355,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer) ...@@ -1353,6 +1355,7 @@ BPF_CALL_1(bpf_timer_cancel, struct bpf_timer_kern *, timer)
* if it was running. * if it was running.
*/ */
ret = ret ?: hrtimer_cancel(&t->timer); ret = ret ?: hrtimer_cancel(&t->timer);
rcu_read_unlock();
return ret; return ret;
} }
...@@ -1407,7 +1410,7 @@ void bpf_timer_cancel_and_free(void *val) ...@@ -1407,7 +1410,7 @@ void bpf_timer_cancel_and_free(void *val)
*/ */
if (this_cpu_read(hrtimer_running) != t) if (this_cpu_read(hrtimer_running) != t)
hrtimer_cancel(&t->timer); hrtimer_cancel(&t->timer);
kfree(t); kfree_rcu(t, rcu);
} }
BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr) BPF_CALL_2(bpf_kptr_xchg, void *, map_value, void *, ptr)
......
...@@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it, ...@@ -978,6 +978,8 @@ __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) != BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
__alignof__(struct bpf_iter_task)); __alignof__(struct bpf_iter_task));
kit->pos = NULL;
switch (flags) { switch (flags) {
case BPF_TASK_ITER_ALL_THREADS: case BPF_TASK_ITER_ALL_THREADS:
case BPF_TASK_ITER_ALL_PROCS: case BPF_TASK_ITER_ALL_PROCS:
......
...@@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc) ...@@ -5227,7 +5227,9 @@ BTF_ID(struct, prog_test_ref_kfunc)
#ifdef CONFIG_CGROUPS #ifdef CONFIG_CGROUPS
BTF_ID(struct, cgroup) BTF_ID(struct, cgroup)
#endif #endif
#ifdef CONFIG_BPF_JIT
BTF_ID(struct, bpf_cpumask) BTF_ID(struct, bpf_cpumask)
#endif
BTF_ID(struct, task_struct) BTF_ID(struct, task_struct)
BTF_SET_END(rcu_protected_types) BTF_SET_END(rcu_protected_types)
......
...@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk) ...@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock(); rcu_read_lock();
psock = sk_psock(sk); psock = sk_psock(sk);
if (psock) if (psock) {
psock->saved_data_ready(sk); read_lock_bh(&sk->sk_callback_lock);
sk_psock_data_ready(sk, psock);
read_unlock_bh(&sk->sk_callback_lock);
}
rcu_read_unlock(); rcu_read_unlock();
} }
} }
......
...@@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, ...@@ -722,7 +722,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
memcpy(vaddr, buffer, len); memcpy(vaddr, buffer, len);
kunmap_local(vaddr); kunmap_local(vaddr);
skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
} }
if (first_frag && desc->options & XDP_TX_METADATA) { if (first_frag && desc->options & XDP_TX_METADATA) {
......
...@@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode ...@@ -513,7 +513,7 @@ eBPF programs can have an associated license, passed along with the bytecode
instructions to the kernel when the programs are loaded. The format for that instructions to the kernel when the programs are loaded. The format for that
string is identical to the one in use for kernel modules (Dual licenses, such string is identical to the one in use for kernel modules (Dual licenses, such
as "Dual BSD/GPL", may be used). Some helper functions are only accessible to as "Dual BSD/GPL", may be used). Some helper functions are only accessible to
programs that are compatible with the GNU Privacy License (GPL). programs that are compatible with the GNU General Public License (GNU GPL).
In order to use such helpers, the eBPF program must be loaded with the correct In order to use such helpers, the eBPF program must be loaded with the correct
license string passed (via **attr**) to the **bpf**\\ () system call, and this license string passed (via **attr**) to the **bpf**\\ () system call, and this
......
...@@ -193,6 +193,7 @@ static void subtest_task_iters(void) ...@@ -193,6 +193,7 @@ static void subtest_task_iters(void)
ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
pthread_mutex_unlock(&do_nothing_mutex); pthread_mutex_unlock(&do_nothing_mutex);
for (int i = 0; i < thread_num; i++) for (int i = 0; i < thread_num; i++)
ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join"); ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
#include "test_progs.h"
#include "read_vsyscall.skel.h"
#if defined(__x86_64__)
/* For VSYSCALL_ADDR */
#include <asm/vsyscall.h>
#else
/* To prevent build failure on non-x86 arch */
#define VSYSCALL_ADDR 0UL
#endif
struct read_ret_desc {
const char *name;
int ret;
} all_read[] = {
{ .name = "probe_read_kernel", .ret = -ERANGE },
{ .name = "probe_read_kernel_str", .ret = -ERANGE },
{ .name = "probe_read", .ret = -ERANGE },
{ .name = "probe_read_str", .ret = -ERANGE },
{ .name = "probe_read_user", .ret = -EFAULT },
{ .name = "probe_read_user_str", .ret = -EFAULT },
{ .name = "copy_from_user", .ret = -EFAULT },
{ .name = "copy_from_user_task", .ret = -EFAULT },
};
void test_read_vsyscall(void)
{
struct read_vsyscall *skel;
unsigned int i;
int err;
#if !defined(__x86_64__)
test__skip();
return;
#endif
skel = read_vsyscall__open_and_load();
if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
return;
skel->bss->target_pid = getpid();
err = read_vsyscall__attach(skel);
if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
goto out;
/* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
* but it doesn't affect the returned error codes.
*/
skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
usleep(1);
for (i = 0; i < ARRAY_SIZE(all_read); i++)
ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
out:
read_vsyscall__destroy(skel);
}
...@@ -4,10 +4,29 @@ ...@@ -4,10 +4,29 @@
#include "timer.skel.h" #include "timer.skel.h"
#include "timer_failure.skel.h" #include "timer_failure.skel.h"
#define NUM_THR 8
static void *spin_lock_thread(void *arg)
{
int i, err, prog_fd = *(int *)arg;
LIBBPF_OPTS(bpf_test_run_opts, topts);
for (i = 0; i < 10000; i++) {
err = bpf_prog_test_run_opts(prog_fd, &topts);
if (!ASSERT_OK(err, "test_run_opts err") ||
!ASSERT_OK(topts.retval, "test_run_opts retval"))
break;
}
pthread_exit(arg);
}
static int timer(struct timer *timer_skel) static int timer(struct timer *timer_skel)
{ {
int err, prog_fd; int i, err, prog_fd;
LIBBPF_OPTS(bpf_test_run_opts, topts); LIBBPF_OPTS(bpf_test_run_opts, topts);
pthread_t thread_id[NUM_THR];
void *ret;
err = timer__attach(timer_skel); err = timer__attach(timer_skel);
if (!ASSERT_OK(err, "timer_attach")) if (!ASSERT_OK(err, "timer_attach"))
...@@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel) ...@@ -43,6 +62,20 @@ static int timer(struct timer *timer_skel)
/* check that code paths completed */ /* check that code paths completed */
ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok"); ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
prog_fd = bpf_program__fd(timer_skel->progs.race);
for (i = 0; i < NUM_THR; i++) {
err = pthread_create(&thread_id[i], NULL,
&spin_lock_thread, &prog_fd);
if (!ASSERT_OK(err, "pthread_create"))
break;
}
while (i) {
err = pthread_join(thread_id[--i], &ret);
if (ASSERT_OK(err, "pthread_join"))
ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
}
return 0; return 0;
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
pid_t target_pid; pid_t target_pid;
int procs_cnt, threads_cnt, proc_threads_cnt; int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt;
void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym;
...@@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx) ...@@ -26,6 +26,16 @@ int iter_task_for_each_sleep(void *ctx)
procs_cnt = threads_cnt = proc_threads_cnt = 0; procs_cnt = threads_cnt = proc_threads_cnt = 0;
bpf_rcu_read_lock(); bpf_rcu_read_lock();
bpf_for_each(task, pos, NULL, ~0U) {
/* Below instructions shouldn't be executed for invalid flags */
invalid_cnt++;
}
bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) {
/* Below instructions shouldn't be executed for invalid task__nullable */
invalid_cnt++;
}
bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
if (pos->pid == target_pid) if (pos->pid == target_pid)
procs_cnt++; procs_cnt++;
......
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
int target_pid = 0;
void *user_ptr = 0;
int read_ret[8];
char _license[] SEC("license") = "GPL";
SEC("fentry/" SYS_PREFIX "sys_nanosleep")
int do_probe_read(void *ctx)
{
char buf[8];
if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
return 0;
read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
return 0;
}
SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
int do_copy_from_user(void *ctx)
{
char buf[8];
if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
return 0;
read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
bpf_get_current_task_btf(), 0);
return 0;
}
...@@ -51,7 +51,8 @@ struct { ...@@ -51,7 +51,8 @@ struct {
__uint(max_entries, 1); __uint(max_entries, 1);
__type(key, int); __type(key, int);
__type(value, struct elem); __type(value, struct elem);
} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"); } abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"),
race_array SEC(".maps");
__u64 bss_data; __u64 bss_data;
__u64 abs_data; __u64 abs_data;
...@@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a) ...@@ -390,3 +391,34 @@ int BPF_PROG2(test5, int, a)
return 0; return 0;
} }
static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer)
{
bpf_timer_start(timer, 1000000, 0);
return 0;
}
SEC("syscall")
int race(void *ctx)
{
struct bpf_timer *timer;
int err, race_key = 0;
struct elem init;
__builtin_memset(&init, 0, sizeof(struct elem));
bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
timer = bpf_map_lookup_elem(&race_array, &race_key);
if (!timer)
return 1;
err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
if (err && err != -EBUSY)
return 1;
bpf_timer_set_callback(timer, race_timer_callback);
bpf_timer_start(timer, 0, 0);
bpf_timer_cancel(timer);
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment