Commit b9d37bbb authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Alexei Starovoitov says:

====================
pull-request: bpf 2020-06-17

The following pull-request contains BPF updates for your *net* tree.

We've added 10 non-merge commits during the last 2 day(s) which contain
a total of 14 files changed, 158 insertions(+), 59 deletions(-).

The main changes are:

1) Important fix for bpf_probe_read_kernel_str() return value, from Andrii.

2) [gs]etsockopt fix for large optlen, from Stanislav.

3) devmap allocation fix, from Toke.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 69119673 8030e250
...@@ -86,6 +86,20 @@ then the next program in the chain (A) will see those changes, ...@@ -86,6 +86,20 @@ then the next program in the chain (A) will see those changes,
*not* the original input ``setsockopt`` arguments. The potentially *not* the original input ``setsockopt`` arguments. The potentially
modified values will be then passed down to the kernel. modified values will be then passed down to the kernel.
Large optval
============
When the ``optval`` is greater than the ``PAGE_SIZE``, the BPF program
can access only the first ``PAGE_SIZE`` of that data. So it has to options:
* Set ``optlen`` to zero, which indicates that the kernel should
use the original buffer from the userspace. Any modifications
done by the BPF program to the ``optval`` are ignored.
* Set ``optlen`` to the value less than ``PAGE_SIZE``, which
indicates that the kernel should use BPF's trimmed ``optval``.
When the BPF program returns with the ``optlen`` greater than
``PAGE_SIZE``, the userspace will receive ``EFAULT`` errno.
Example Example
======= =======
......
...@@ -3168,7 +3168,7 @@ union bpf_attr { ...@@ -3168,7 +3168,7 @@ union bpf_attr {
* Return * Return
* The id is returned or 0 in case the id could not be retrieved. * The id is returned or 0 in case the id could not be retrieved.
* *
* void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description * Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*. * Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
......
...@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp, ...@@ -1276,16 +1276,23 @@ static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen) static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
{ {
if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0) if (unlikely(max_optlen < 0))
return -EINVAL; return -EINVAL;
if (unlikely(max_optlen > PAGE_SIZE)) {
/* We don't expose optvals that are greater than PAGE_SIZE
* to the BPF program.
*/
max_optlen = PAGE_SIZE;
}
ctx->optval = kzalloc(max_optlen, GFP_USER); ctx->optval = kzalloc(max_optlen, GFP_USER);
if (!ctx->optval) if (!ctx->optval)
return -ENOMEM; return -ENOMEM;
ctx->optval_end = ctx->optval + max_optlen; ctx->optval_end = ctx->optval + max_optlen;
return 0; return max_optlen;
} }
static void sockopt_free_buf(struct bpf_sockopt_kern *ctx) static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
...@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -1319,13 +1326,13 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
*/ */
max_optlen = max_t(int, 16, *optlen); max_optlen = max_t(int, 16, *optlen);
ret = sockopt_alloc_buf(&ctx, max_optlen); max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
if (ret) if (max_optlen < 0)
return ret; return max_optlen;
ctx.optlen = *optlen; ctx.optlen = *optlen;
if (copy_from_user(ctx.optval, optval, *optlen) != 0) { if (copy_from_user(ctx.optval, optval, min(*optlen, max_optlen)) != 0) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
...@@ -1353,9 +1360,15 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level, ...@@ -1353,9 +1360,15 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
/* export any potential modifications */ /* export any potential modifications */
*level = ctx.level; *level = ctx.level;
*optname = ctx.optname; *optname = ctx.optname;
/* optlen == 0 from BPF indicates that we should
* use original userspace data.
*/
if (ctx.optlen != 0) {
*optlen = ctx.optlen; *optlen = ctx.optlen;
*kernel_optval = ctx.optval; *kernel_optval = ctx.optval;
} }
}
out: out:
if (ret) if (ret)
...@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1385,12 +1398,12 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
__cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT)) __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
return retval; return retval;
ret = sockopt_alloc_buf(&ctx, max_optlen);
if (ret)
return ret;
ctx.optlen = max_optlen; ctx.optlen = max_optlen;
max_optlen = sockopt_alloc_buf(&ctx, max_optlen);
if (max_optlen < 0)
return max_optlen;
if (!retval) { if (!retval) {
/* If kernel getsockopt finished successfully, /* If kernel getsockopt finished successfully,
* copy whatever was returned to the user back * copy whatever was returned to the user back
...@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1404,10 +1417,8 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out; goto out;
} }
if (ctx.optlen > max_optlen) if (copy_from_user(ctx.optval, optval,
ctx.optlen = max_optlen; min(ctx.optlen, max_optlen)) != 0) {
if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
...@@ -1436,11 +1447,13 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level, ...@@ -1436,11 +1447,13 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
goto out; goto out;
} }
if (ctx.optlen != 0) {
if (copy_to_user(optval, ctx.optval, ctx.optlen) || if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
put_user(ctx.optlen, optlen)) { put_user(ctx.optlen, optlen)) {
ret = -EFAULT; ret = -EFAULT;
goto out; goto out;
} }
}
ret = ctx.retval; ret = ctx.retval;
......
...@@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list); ...@@ -86,12 +86,13 @@ static DEFINE_PER_CPU(struct list_head, dev_flush_list);
static DEFINE_SPINLOCK(dev_map_lock); static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list); static LIST_HEAD(dev_map_list);
static struct hlist_head *dev_map_create_hash(unsigned int entries) static struct hlist_head *dev_map_create_hash(unsigned int entries,
int numa_node)
{ {
int i; int i;
struct hlist_head *hash; struct hlist_head *hash;
hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL); hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node);
if (hash != NULL) if (hash != NULL)
for (i = 0; i < entries; i++) for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]); INIT_HLIST_HEAD(&hash[i]);
...@@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) ...@@ -145,7 +146,8 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
return -EINVAL; return -EINVAL;
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets,
dtab->map.numa_node);
if (!dtab->dev_index_head) if (!dtab->dev_index_head)
goto free_charge; goto free_charge;
...@@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map) ...@@ -232,7 +234,7 @@ static void dev_map_free(struct bpf_map *map)
} }
} }
kfree(dtab->dev_index_head); bpf_map_area_free(dtab->dev_index_head);
} else { } else {
for (i = 0; i < dtab->map.max_entries; i++) { for (i = 0; i < dtab->map.max_entries; i++) {
struct bpf_dtab_netdev *dev; struct bpf_dtab_netdev *dev;
......
...@@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr) ...@@ -241,7 +241,7 @@ bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr)
if (unlikely(ret < 0)) if (unlikely(ret < 0))
goto fail; goto fail;
return 0; return ret;
fail: fail:
memset(dst, 0, size); memset(dst, 0, size);
return ret; return ret;
......
...@@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) ...@@ -462,6 +462,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
xdpf->len = totsize - metasize; xdpf->len = totsize - metasize;
xdpf->headroom = 0; xdpf->headroom = 0;
xdpf->metasize = metasize; xdpf->metasize = metasize;
xdpf->frame_sz = PAGE_SIZE;
xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
xsk_buff_free(xdp); xsk_buff_free(xdp);
......
...@@ -509,11 +509,8 @@ static void *alloc_rec_per_cpu(int record_size) ...@@ -509,11 +509,8 @@ static void *alloc_rec_per_cpu(int record_size)
{ {
unsigned int nr_cpus = bpf_num_possible_cpus(); unsigned int nr_cpus = bpf_num_possible_cpus();
void *array; void *array;
size_t size;
size = record_size * nr_cpus; array = calloc(nr_cpus, record_size);
array = malloc(size);
memset(array, 0, size);
if (!array) { if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
...@@ -528,8 +525,7 @@ static struct stats_record *alloc_stats_record(void) ...@@ -528,8 +525,7 @@ static struct stats_record *alloc_stats_record(void)
int i; int i;
/* Alloc main stats_record structure */ /* Alloc main stats_record structure */
rec = malloc(sizeof(*rec)); rec = calloc(1, sizeof(*rec));
memset(rec, 0, sizeof(*rec));
if (!rec) { if (!rec) {
fprintf(stderr, "Mem alloc error\n"); fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
......
...@@ -207,11 +207,8 @@ static struct datarec *alloc_record_per_cpu(void) ...@@ -207,11 +207,8 @@ static struct datarec *alloc_record_per_cpu(void)
{ {
unsigned int nr_cpus = bpf_num_possible_cpus(); unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec *array; struct datarec *array;
size_t size;
size = sizeof(struct datarec) * nr_cpus; array = calloc(nr_cpus, sizeof(struct datarec));
array = malloc(size);
memset(array, 0, size);
if (!array) { if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
...@@ -226,11 +223,11 @@ static struct stats_record *alloc_stats_record(void) ...@@ -226,11 +223,11 @@ static struct stats_record *alloc_stats_record(void)
size = sizeof(*rec) + n_cpus * sizeof(struct record); size = sizeof(*rec) + n_cpus * sizeof(struct record);
rec = malloc(size); rec = malloc(size);
memset(rec, 0, size);
if (!rec) { if (!rec) {
fprintf(stderr, "Mem alloc error\n"); fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
} }
memset(rec, 0, size);
rec->rx_cnt.cpu = alloc_record_per_cpu(); rec->rx_cnt.cpu = alloc_record_per_cpu();
rec->redir_err.cpu = alloc_record_per_cpu(); rec->redir_err.cpu = alloc_record_per_cpu();
rec->kthread.cpu = alloc_record_per_cpu(); rec->kthread.cpu = alloc_record_per_cpu();
......
...@@ -198,11 +198,8 @@ static struct datarec *alloc_record_per_cpu(void) ...@@ -198,11 +198,8 @@ static struct datarec *alloc_record_per_cpu(void)
{ {
unsigned int nr_cpus = bpf_num_possible_cpus(); unsigned int nr_cpus = bpf_num_possible_cpus();
struct datarec *array; struct datarec *array;
size_t size;
size = sizeof(struct datarec) * nr_cpus; array = calloc(nr_cpus, sizeof(struct datarec));
array = malloc(size);
memset(array, 0, size);
if (!array) { if (!array) {
fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus); fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
...@@ -214,11 +211,8 @@ static struct record *alloc_record_per_rxq(void) ...@@ -214,11 +211,8 @@ static struct record *alloc_record_per_rxq(void)
{ {
unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries; unsigned int nr_rxqs = bpf_map__def(rx_queue_index_map)->max_entries;
struct record *array; struct record *array;
size_t size;
size = sizeof(struct record) * nr_rxqs; array = calloc(nr_rxqs, sizeof(struct record));
array = malloc(size);
memset(array, 0, size);
if (!array) { if (!array) {
fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs); fprintf(stderr, "Mem alloc error (nr_rxqs:%u)\n", nr_rxqs);
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
...@@ -232,8 +226,7 @@ static struct stats_record *alloc_stats_record(void) ...@@ -232,8 +226,7 @@ static struct stats_record *alloc_stats_record(void)
struct stats_record *rec; struct stats_record *rec;
int i; int i;
rec = malloc(sizeof(*rec)); rec = calloc(1, sizeof(struct stats_record));
memset(rec, 0, sizeof(*rec));
if (!rec) { if (!rec) {
fprintf(stderr, "Mem alloc error\n"); fprintf(stderr, "Mem alloc error\n");
exit(EXIT_FAIL_MEM); exit(EXIT_FAIL_MEM);
......
...@@ -49,7 +49,7 @@ MAP COMMANDS ...@@ -49,7 +49,7 @@ MAP COMMANDS
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** } | | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** }
DESCRIPTION DESCRIPTION
=========== ===========
......
...@@ -49,6 +49,7 @@ const char * const map_type_name[] = { ...@@ -49,6 +49,7 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_STACK] = "stack", [BPF_MAP_TYPE_STACK] = "stack",
[BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage",
[BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops", [BPF_MAP_TYPE_STRUCT_OPS] = "struct_ops",
[BPF_MAP_TYPE_RINGBUF] = "ringbuf",
}; };
const size_t map_type_name_size = ARRAY_SIZE(map_type_name); const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
...@@ -1590,7 +1591,7 @@ static int do_help(int argc, char **argv) ...@@ -1590,7 +1591,7 @@ static int do_help(int argc, char **argv)
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops }\n" " queue | stack | sk_storage | struct_ops | ringbuf }\n"
" " HELP_SPEC_OPTIONS "\n" " " HELP_SPEC_OPTIONS "\n"
"", "",
bin_name, argv[-2]); bin_name, argv[-2]);
......
...@@ -3168,7 +3168,7 @@ union bpf_attr { ...@@ -3168,7 +3168,7 @@ union bpf_attr {
* Return * Return
* The id is returned or 0 in case the id could not be retrieved. * The id is returned or 0 in case the id could not be retrieved.
* *
* void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags) * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
* Description * Description
* Copy *size* bytes from *data* into a ring buffer *ringbuf*. * Copy *size* bytes from *data* into a ring buffer *ringbuf*.
* If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
......
...@@ -13,6 +13,7 @@ static int getsetsockopt(void) ...@@ -13,6 +13,7 @@ static int getsetsockopt(void)
char cc[16]; /* TCP_CA_NAME_MAX */ char cc[16]; /* TCP_CA_NAME_MAX */
} buf = {}; } buf = {};
socklen_t optlen; socklen_t optlen;
char *big_buf = NULL;
fd = socket(AF_INET, SOCK_STREAM, 0); fd = socket(AF_INET, SOCK_STREAM, 0);
if (fd < 0) { if (fd < 0) {
...@@ -22,24 +23,31 @@ static int getsetsockopt(void) ...@@ -22,24 +23,31 @@ static int getsetsockopt(void)
/* IP_TOS - BPF bypass */ /* IP_TOS - BPF bypass */
buf.u8[0] = 0x08; optlen = getpagesize() * 2;
err = setsockopt(fd, SOL_IP, IP_TOS, &buf, 1); big_buf = calloc(1, optlen);
if (!big_buf) {
log_err("Couldn't allocate two pages");
goto err;
}
*(int *)big_buf = 0x08;
err = setsockopt(fd, SOL_IP, IP_TOS, big_buf, optlen);
if (err) { if (err) {
log_err("Failed to call setsockopt(IP_TOS)"); log_err("Failed to call setsockopt(IP_TOS)");
goto err; goto err;
} }
buf.u8[0] = 0x00; memset(big_buf, 0, optlen);
optlen = 1; optlen = 1;
err = getsockopt(fd, SOL_IP, IP_TOS, &buf, &optlen); err = getsockopt(fd, SOL_IP, IP_TOS, big_buf, &optlen);
if (err) { if (err) {
log_err("Failed to call getsockopt(IP_TOS)"); log_err("Failed to call getsockopt(IP_TOS)");
goto err; goto err;
} }
if (buf.u8[0] != 0x08) { if (*(int *)big_buf != 0x08) {
log_err("Unexpected getsockopt(IP_TOS) buf[0] 0x%02x != 0x08", log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
buf.u8[0]); *(int *)big_buf);
goto err; goto err;
} }
...@@ -78,6 +86,28 @@ static int getsetsockopt(void) ...@@ -78,6 +86,28 @@ static int getsetsockopt(void)
goto err; goto err;
} }
/* IP_FREEBIND - BPF can't access optval past PAGE_SIZE */
optlen = getpagesize() * 2;
memset(big_buf, 0, optlen);
err = setsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, optlen);
if (err != 0) {
log_err("Failed to call setsockopt, ret=%d", err);
goto err;
}
err = getsockopt(fd, SOL_IP, IP_FREEBIND, big_buf, &optlen);
if (err != 0) {
log_err("Failed to call getsockopt, ret=%d", err);
goto err;
}
if (optlen != 1 || *(__u8 *)big_buf != 0x55) {
log_err("Unexpected IP_FREEBIND getsockopt, optlen=%d, optval=0x%x",
optlen, *(__u8 *)big_buf);
}
/* SO_SNDBUF is overwritten */ /* SO_SNDBUF is overwritten */
buf.u32 = 0x01010101; buf.u32 = 0x01010101;
...@@ -124,9 +154,11 @@ static int getsetsockopt(void) ...@@ -124,9 +154,11 @@ static int getsetsockopt(void)
goto err; goto err;
} }
free(big_buf);
close(fd); close(fd);
return 0; return 0;
err: err:
free(big_buf);
close(fd); close(fd);
return -1; return -1;
} }
......
...@@ -8,6 +8,10 @@ ...@@ -8,6 +8,10 @@
char _license[] SEC("license") = "GPL"; char _license[] SEC("license") = "GPL";
__u32 _version SEC("version") = 1; __u32 _version SEC("version") = 1;
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
#define SOL_CUSTOM 0xdeadbeef #define SOL_CUSTOM 0xdeadbeef
struct sockopt_sk { struct sockopt_sk {
...@@ -28,12 +32,14 @@ int _getsockopt(struct bpf_sockopt *ctx) ...@@ -28,12 +32,14 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval; __u8 *optval = ctx->optval;
struct sockopt_sk *storage; struct sockopt_sk *storage;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS; /* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel * let next BPF program in the cgroup chain or kernel
* handle it. * handle it.
*/ */
ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
return 1; return 1;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
/* Not interested in SOL_SOCKET:SO_SNDBUF; /* Not interested in SOL_SOCKET:SO_SNDBUF;
...@@ -51,6 +57,26 @@ int _getsockopt(struct bpf_sockopt *ctx) ...@@ -51,6 +57,26 @@ int _getsockopt(struct bpf_sockopt *ctx)
return 1; return 1;
} }
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
ctx->retval = 0; /* Reset system call return value to zero */
/* Always export 0x55 */
optval[0] = 0x55;
ctx->optlen = 1;
/* Userspace buffer is PAGE_SIZE * 2, but BPF
* program can only see the first PAGE_SIZE
* bytes of data.
*/
if (optval_end - optval != PAGE_SIZE)
return 0; /* EPERM, unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM) if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */ return 0; /* EPERM, deny everything except custom level */
...@@ -81,12 +107,14 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -81,12 +107,14 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval; __u8 *optval = ctx->optval;
struct sockopt_sk *storage; struct sockopt_sk *storage;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS; /* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel * let next BPF program in the cgroup chain or kernel
* handle it. * handle it.
*/ */
ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
return 1; return 1;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
/* Overwrite SO_SNDBUF value */ /* Overwrite SO_SNDBUF value */
...@@ -112,6 +140,28 @@ int _setsockopt(struct bpf_sockopt *ctx) ...@@ -112,6 +140,28 @@ int _setsockopt(struct bpf_sockopt *ctx)
return 1; return 1;
} }
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
if (ctx->optlen != PAGE_SIZE * 2)
return 0; /* EPERM, unexpected data size */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
/* Make sure we can trim the buffer. */
optval[0] = 0;
ctx->optlen = 1;
/* Usepace buffer is PAGE_SIZE * 2, but BPF
* program can only see the first PAGE_SIZE
* bytes of data.
*/
if (optval_end - optval != PAGE_SIZE)
return 0; /* EPERM, unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM) if (ctx->level != SOL_CUSTOM)
return 0; /* EPERM, deny everything except custom level */ return 0; /* EPERM, deny everything except custom level */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment