Commit 6e3bf9b0 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2018-08-18

The following pull-request contains BPF updates for your *net* tree.

The main changes are:

1) Fix a BPF selftest failure in test_cgroup_storage due to rlimit
   restrictions, from Yonghong.

2) Fix a suspicious RCU rcu_dereference_check() warning triggered
   from removing a device's XDP memory allocator by using the correct
   rhashtable lookup function, from Tariq.

3) A batch of BPF sockmap and ULP fixes mainly fixing leaks and races
   as well as enforcing module aliases for ULPs. Another fix for BPF
   map redirect to make them work again with tail calls, from Daniel.

4) Fix XDP BPF samples to unload their programs upon SIGTERM, from Jesper.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3fe49d69 f6069b9a
......@@ -543,7 +543,6 @@ struct bpf_redirect_info {
u32 flags;
struct bpf_map *map;
struct bpf_map *map_to_flush;
unsigned long map_owner;
u32 kern_flags;
};
......@@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void)
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
const struct bpf_insn *patch, u32 len);
void bpf_clear_redirect_map(struct bpf_map *map);
static inline bool xdp_return_frame_no_direct(void)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
......
......@@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp);
void tcp_get_available_ulp(char *buf, size_t len);
void tcp_cleanup_ulp(struct sock *sk);
#define MODULE_ALIAS_TCP_ULP(name) \
__MODULE_INFO(alias, alias_userspace, name); \
__MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
/* Call BPF_SOCK_OPS program that returns an int. If the return value
* is < 0, then the BPF op failed (for example if the loaded BPF
* program does not support the chosen operation or there is no BPF
......
......@@ -147,9 +147,8 @@ struct _bpf_dtab_netdev {
#define devmap_ifindex(fwd, map) \
(!fwd ? 0 : \
(!map ? 0 : \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)))
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \
......
......@@ -1579,7 +1579,7 @@ static bool bpf_prog_array_copy_core(struct bpf_prog_array __rcu *array,
struct bpf_prog_array_item *item;
int i = 0;
item = rcu_dereference(array)->items;
item = rcu_dereference_check(array, 1)->items;
for (; item->prog; item++) {
if (item->prog == &dummy_bpf_prog.prog)
continue;
......
......@@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map)
* It does __not__ ensure pending flush operations (if any) are
* complete.
*/
bpf_clear_redirect_map(map);
synchronize_rcu();
/* To ensure all pending flush operations have completed wait for flush
......
......@@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map)
list_del_rcu(&dtab->list);
spin_unlock(&dev_map_lock);
bpf_clear_redirect_map(map);
synchronize_rcu();
/* To ensure all pending flush operations have completed wait for flush
......
......@@ -58,6 +58,7 @@ struct bpf_stab {
struct bpf_map map;
struct sock **sock_map;
struct bpf_sock_progs progs;
raw_spinlock_t lock;
};
struct bucket {
......@@ -89,9 +90,9 @@ enum smap_psock_state {
struct smap_psock_map_entry {
struct list_head list;
struct bpf_map *map;
struct sock **entry;
struct htab_elem __rcu *hash_link;
struct bpf_htab __rcu *htab;
};
struct smap_psock {
......@@ -343,13 +344,18 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
e = psock_map_pop(sk, psock);
while (e) {
if (e->entry) {
osk = cmpxchg(e->entry, sk, NULL);
struct bpf_stab *stab = container_of(e->map, struct bpf_stab, map);
raw_spin_lock_bh(&stab->lock);
osk = *e->entry;
if (osk == sk) {
*e->entry = NULL;
smap_release_sock(psock, sk);
}
raw_spin_unlock_bh(&stab->lock);
} else {
struct htab_elem *link = rcu_dereference(e->hash_link);
struct bpf_htab *htab = rcu_dereference(e->htab);
struct bpf_htab *htab = container_of(e->map, struct bpf_htab, map);
struct hlist_head *head;
struct htab_elem *l;
struct bucket *b;
......@@ -370,6 +376,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
}
raw_spin_unlock_bh(&b->lock);
}
kfree(e);
e = psock_map_pop(sk, psock);
}
rcu_read_unlock();
......@@ -1641,6 +1648,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
raw_spin_lock_init(&stab->lock);
/* make sure page count doesn't overflow */
cost = (u64) stab->map.max_entries * sizeof(struct sock *);
......@@ -1675,8 +1683,10 @@ static void smap_list_map_remove(struct smap_psock *psock,
spin_lock_bh(&psock->maps_lock);
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
if (e->entry == entry)
if (e->entry == entry) {
list_del(&e->list);
kfree(e);
}
}
spin_unlock_bh(&psock->maps_lock);
}
......@@ -1690,8 +1700,10 @@ static void smap_list_hash_remove(struct smap_psock *psock,
list_for_each_entry_safe(e, tmp, &psock->maps, list) {
struct htab_elem *c = rcu_dereference(e->hash_link);
if (c == hash_link)
if (c == hash_link) {
list_del(&e->list);
kfree(e);
}
}
spin_unlock_bh(&psock->maps_lock);
}
......@@ -1711,14 +1723,15 @@ static void sock_map_free(struct bpf_map *map)
* and a grace period expire to ensure psock is really safe to remove.
*/
rcu_read_lock();
raw_spin_lock_bh(&stab->lock);
for (i = 0; i < stab->map.max_entries; i++) {
struct smap_psock *psock;
struct sock *sock;
sock = xchg(&stab->sock_map[i], NULL);
sock = stab->sock_map[i];
if (!sock)
continue;
stab->sock_map[i] = NULL;
psock = smap_psock_sk(sock);
/* This check handles a racing sock event that can get the
* sk_callback_lock before this case but after xchg happens
......@@ -1730,6 +1743,7 @@ static void sock_map_free(struct bpf_map *map)
smap_release_sock(psock, sock);
}
}
raw_spin_unlock_bh(&stab->lock);
rcu_read_unlock();
sock_map_remove_complete(stab);
......@@ -1773,19 +1787,23 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
if (k >= map->max_entries)
return -EINVAL;
sock = xchg(&stab->sock_map[k], NULL);
raw_spin_lock_bh(&stab->lock);
sock = stab->sock_map[k];
stab->sock_map[k] = NULL;
raw_spin_unlock_bh(&stab->lock);
if (!sock)
return -EINVAL;
psock = smap_psock_sk(sock);
if (!psock)
goto out;
if (psock->bpf_parse)
return 0;
if (psock->bpf_parse) {
write_lock_bh(&sock->sk_callback_lock);
smap_stop_sock(psock, sock);
write_unlock_bh(&sock->sk_callback_lock);
}
smap_list_map_remove(psock, &stab->sock_map[k]);
smap_release_sock(psock, sock);
out:
return 0;
}
......@@ -1821,11 +1839,9 @@ static int sock_map_delete_elem(struct bpf_map *map, void *key)
static int __sock_map_ctx_update_elem(struct bpf_map *map,
struct bpf_sock_progs *progs,
struct sock *sock,
struct sock **map_link,
void *key)
{
struct bpf_prog *verdict, *parse, *tx_msg;
struct smap_psock_map_entry *e = NULL;
struct smap_psock *psock;
bool new = false;
int err = 0;
......@@ -1898,14 +1914,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
new = true;
}
if (map_link) {
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e) {
err = -ENOMEM;
goto out_free;
}
}
/* 3. At this point we have a reference to a valid psock that is
* running. Attach any BPF programs needed.
*/
......@@ -1927,17 +1935,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
write_unlock_bh(&sock->sk_callback_lock);
}
/* 4. Place psock in sockmap for use and stop any programs on
* the old sock assuming its not the same sock we are replacing
* it with. Because we can only have a single set of programs if
* old_sock has a strp we can stop it.
*/
if (map_link) {
e->entry = map_link;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
}
return err;
out_free:
smap_release_sock(psock, sock);
......@@ -1948,7 +1945,6 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
}
if (tx_msg)
bpf_prog_put(tx_msg);
kfree(e);
return err;
}
......@@ -1958,36 +1954,57 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
{
struct bpf_stab *stab = container_of(map, struct bpf_stab, map);
struct bpf_sock_progs *progs = &stab->progs;
struct sock *osock, *sock;
struct sock *osock, *sock = skops->sk;
struct smap_psock_map_entry *e;
struct smap_psock *psock;
u32 i = *(u32 *)key;
int err;
if (unlikely(flags > BPF_EXIST))
return -EINVAL;
if (unlikely(i >= stab->map.max_entries))
return -E2BIG;
sock = READ_ONCE(stab->sock_map[i]);
if (flags == BPF_EXIST && !sock)
return -ENOENT;
else if (flags == BPF_NOEXIST && sock)
return -EEXIST;
e = kzalloc(sizeof(*e), GFP_ATOMIC | __GFP_NOWARN);
if (!e)
return -ENOMEM;
sock = skops->sk;
err = __sock_map_ctx_update_elem(map, progs, sock, &stab->sock_map[i],
key);
err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto out;
osock = xchg(&stab->sock_map[i], sock);
if (osock) {
struct smap_psock *opsock = smap_psock_sk(osock);
/* psock guaranteed to be present. */
psock = smap_psock_sk(sock);
raw_spin_lock_bh(&stab->lock);
osock = stab->sock_map[i];
if (osock && flags == BPF_NOEXIST) {
err = -EEXIST;
goto out_unlock;
}
if (!osock && flags == BPF_EXIST) {
err = -ENOENT;
goto out_unlock;
}
smap_list_map_remove(opsock, &stab->sock_map[i]);
smap_release_sock(opsock, osock);
e->entry = &stab->sock_map[i];
e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
stab->sock_map[i] = sock;
if (osock) {
psock = smap_psock_sk(osock);
smap_list_map_remove(psock, &stab->sock_map[i]);
smap_release_sock(psock, osock);
}
raw_spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
smap_release_sock(psock, sock);
raw_spin_unlock_bh(&stab->lock);
out:
kfree(e);
return err;
}
......@@ -2350,7 +2367,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
b = __select_bucket(htab, hash);
head = &b->head;
err = __sock_map_ctx_update_elem(map, progs, sock, NULL, key);
err = __sock_map_ctx_update_elem(map, progs, sock, key);
if (err)
goto err;
......@@ -2376,8 +2393,7 @@ static int sock_hash_ctx_update_elem(struct bpf_sock_ops_kern *skops,
}
rcu_assign_pointer(e->hash_link, l_new);
rcu_assign_pointer(e->htab,
container_of(map, struct bpf_htab, map));
e->map = map;
spin_lock_bh(&psock->maps_lock);
list_add_tail(&e->list, &psock->maps);
spin_unlock_bh(&psock->maps_lock);
......
......@@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
goto patch_call_imm;
}
if (insn->imm == BPF_FUNC_redirect_map) {
/* Note, we cannot use prog directly as imm as subsequent
* rewrites would still change the prog pointer. The only
* stable address we can use is aux, which also works with
* prog clones during blinding.
*/
u64 addr = (unsigned long)prog->aux;
struct bpf_insn r4_ld[] = {
BPF_LD_IMM64(BPF_REG_4, addr),
*insn,
};
cnt = ARRAY_SIZE(r4_ld);
new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt);
if (!new_prog)
return -ENOMEM;
delta += cnt - 1;
env->prog = prog = new_prog;
insn = new_prog->insnsi + i + delta;
}
patch_call_imm:
fn = env->ops->get_func_proto(insn->imm, env->prog);
/* all functions that have prototype and verifier allowed
......
......@@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map)
struct xsk_map *m = container_of(map, struct xsk_map, map);
int i;
bpf_clear_redirect_map(map);
synchronize_net();
for (i = 0; i < map->max_entries; i++) {
......
......@@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
}
}
static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
unsigned long aux)
void bpf_clear_redirect_map(struct bpf_map *map)
{
return (unsigned long)xdp_prog->aux != aux;
struct bpf_redirect_info *ri;
int cpu;
for_each_possible_cpu(cpu) {
ri = per_cpu_ptr(&bpf_redirect_info, cpu);
/* Avoid polluting remote cacheline due to writes if
* not needed. Once we pass this test, we need the
* cmpxchg() to make sure it hasn't been changed in
* the meantime by remote CPU.
*/
if (unlikely(READ_ONCE(ri->map) == map))
cmpxchg(&ri->map, map, NULL);
}
}
static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
struct bpf_prog *xdp_prog, struct bpf_map *map)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
void *fwd = NULL;
int err;
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
err = -EFAULT;
map = NULL;
goto err;
}
WRITE_ONCE(ri->map, NULL);
fwd = __xdp_map_lookup_elem(map, index);
if (!fwd) {
......@@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
struct net_device *fwd;
u32 index = ri->ifindex;
int err;
if (ri->map)
return xdp_do_redirect_map(dev, xdp, xdp_prog);
if (map)
return xdp_do_redirect_map(dev, xdp, xdp_prog, map);
fwd = dev_get_by_index_rcu(dev_net(dev), index);
ri->ifindex = 0;
......@@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
struct bpf_prog *xdp_prog,
struct bpf_map *map)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
unsigned long map_owner = ri->map_owner;
struct bpf_map *map = ri->map;
u32 index = ri->ifindex;
void *fwd = NULL;
int err = 0;
ri->ifindex = 0;
ri->map = NULL;
ri->map_owner = 0;
WRITE_ONCE(ri->map, NULL);
if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
err = -EFAULT;
map = NULL;
goto err;
}
fwd = __xdp_map_lookup_elem(map, index);
if (unlikely(!fwd)) {
err = -EINVAL;
......@@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
u32 index = ri->ifindex;
struct net_device *fwd;
int err = 0;
if (ri->map)
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog);
if (map)
return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
map);
ri->ifindex = 0;
fwd = dev_get_by_index_rcu(dev_net(dev), index);
if (unlikely(!fwd)) {
......@@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
ri->ifindex = ifindex;
ri->flags = flags;
ri->map = NULL;
ri->map_owner = 0;
WRITE_ONCE(ri->map, NULL);
return XDP_REDIRECT;
}
......@@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
unsigned long, map_owner)
BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
......@@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags
ri->ifindex = ifindex;
ri->flags = flags;
ri->map = map;
ri->map_owner = map_owner;
WRITE_ONCE(ri->map, map);
return XDP_REDIRECT;
}
/* Note, arg4 is hidden from users and populated by the verifier
* with the right pointer.
*/
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
.func = bpf_xdp_redirect_map,
.gpl_only = false,
......
......@@ -98,23 +98,15 @@ static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
{
struct xdp_mem_allocator *xa;
int id = xdp_rxq->mem.id;
int err;
if (id == 0)
return;
mutex_lock(&mem_id_lock);
xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
if (!xa) {
mutex_unlock(&mem_id_lock);
return;
}
err = rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params);
WARN_ON(err);
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
if (xa && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
mutex_unlock(&mem_id_lock);
}
......
......@@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name)
#ifdef CONFIG_MODULES
if (!ulp && capable(CAP_NET_ADMIN)) {
rcu_read_unlock();
request_module("%s", name);
request_module("tcp-ulp-%s", name);
rcu_read_lock();
ulp = tcp_ulp_find(name);
}
......@@ -129,6 +129,8 @@ void tcp_cleanup_ulp(struct sock *sk)
if (icsk->icsk_ulp_ops->release)
icsk->icsk_ulp_ops->release(sk);
module_put(icsk->icsk_ulp_ops->owner);
icsk->icsk_ulp_ops = NULL;
}
/* Change upper layer protocol for socket */
......
......@@ -45,6 +45,7 @@
MODULE_AUTHOR("Mellanox Technologies");
MODULE_DESCRIPTION("Transport Layer Security Support");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS_TCP_ULP("tls");
enum {
TLSV4,
......
......@@ -679,8 +679,9 @@ int main(int argc, char **argv)
return EXIT_FAIL_OPTION;
}
/* Remove XDP program when program is interrupted */
/* Remove XDP program when program is interrupted or killed */
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
......
......@@ -567,8 +567,9 @@ int main(int argc, char **argv)
exit(EXIT_FAIL_BPF);
}
/* Remove XDP program when program is interrupted */
/* Remove XDP program when program is interrupted or killed */
signal(SIGINT, int_exit);
signal(SIGTERM, int_exit);
if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
fprintf(stderr, "link set xdp fd failed\n");
......
......@@ -5,6 +5,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment