Commit 2900005e authored by Yan Zhu's avatar Yan Zhu Committed by Daniel Borkmann

bpf: Move BPF sysctls from kernel/sysctl.c to BPF core

We're moving sysctls out of kernel/sysctl.c as it is a mess. We
already moved all filesystem sysctls out. And with time the goal
is to move all sysctls out to their own subsystem/actual user.

kernel/sysctl.c has grown to an insane mess and its easy to run
into conflicts with it. The effort to move them out into various
subsystems is part of this.
Signed-off-by: default avatarYan Zhu <zhuyan34@huawei.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/bpf/20220407070759.29506-1-zhuyan34@huawei.com
parent 31231092
......@@ -4908,3 +4908,90 @@ const struct bpf_verifier_ops bpf_syscall_verifier_ops = {
const struct bpf_prog_ops bpf_syscall_prog_ops = {
.test_run = bpf_prog_test_run_syscall,
};
#ifdef CONFIG_SYSCTL
static int bpf_stats_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
static int saved_val;
int val, ret;
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&bpf_stats_enabled_mutex);
val = saved_val;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret && val != saved_val) {
if (val)
static_key_slow_inc(key);
else
static_key_slow_dec(key);
saved_val = val;
}
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}
void __weak unpriv_ebpf_notify(int new_state)
{
}
static int bpf_unpriv_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, unpriv_enable = *(int *)table->data;
bool locked_state = unpriv_enable == 1;
struct ctl_table tmp = *table;
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
tmp.data = &unpriv_enable;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
if (locked_state && unpriv_enable != 1)
return -EPERM;
*(int *)table->data = unpriv_enable;
}
unpriv_ebpf_notify(unpriv_enable);
return ret;
}
static struct ctl_table bpf_syscall_table[] = {
{
.procname = "unprivileged_bpf_disabled",
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
.data = &bpf_stats_enabled_key.key,
.maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = bpf_stats_handler,
},
{ }
};
static int __init bpf_syscall_sysctl_init(void)
{
register_sysctl_init("kernel", bpf_syscall_table);
return 0;
}
late_initcall(bpf_syscall_sysctl_init);
#endif /* CONFIG_SYSCTL */
......@@ -62,7 +62,6 @@
#include <linux/binfmts.h>
#include <linux/sched/sysctl.h>
#include <linux/kexec.h>
#include <linux/bpf.h>
#include <linux/mount.h>
#include <linux/userfaultfd_k.h>
#include <linux/latencytop.h>
......@@ -148,66 +147,6 @@ static const int max_extfrag_threshold = 1000;
#endif /* CONFIG_SYSCTL */
#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
static int bpf_stats_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct static_key *key = (struct static_key *)table->data;
static int saved_val;
int val, ret;
struct ctl_table tmp = {
.data = &val,
.maxlen = sizeof(val),
.mode = table->mode,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
};
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
mutex_lock(&bpf_stats_enabled_mutex);
val = saved_val;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret && val != saved_val) {
if (val)
static_key_slow_inc(key);
else
static_key_slow_dec(key);
saved_val = val;
}
mutex_unlock(&bpf_stats_enabled_mutex);
return ret;
}
void __weak unpriv_ebpf_notify(int new_state)
{
}
static int bpf_unpriv_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
int ret, unpriv_enable = *(int *)table->data;
bool locked_state = unpriv_enable == 1;
struct ctl_table tmp = *table;
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
tmp.data = &unpriv_enable;
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
if (locked_state && unpriv_enable != 1)
return -EPERM;
*(int *)table->data = unpriv_enable;
}
unpriv_ebpf_notify(unpriv_enable);
return ret;
}
#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */
/*
* /proc/sys support
*/
......@@ -2299,24 +2238,6 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_BPF_SYSCALL
{
.procname = "unprivileged_bpf_disabled",
.data = &sysctl_unprivileged_bpf_disabled,
.maxlen = sizeof(sysctl_unprivileged_bpf_disabled),
.mode = 0644,
.proc_handler = bpf_unpriv_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
},
{
.procname = "bpf_stats_enabled",
.data = &bpf_stats_enabled_key.key,
.maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
.proc_handler = bpf_stats_handler,
},
#endif
#if defined(CONFIG_TREE_RCU)
{
.procname = "panic_on_rcu_stall",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment