Commit 41f57cfd authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

Alexei Starovoitov says:

====================
pull-request: bpf 2020-02-19

The following pull-request contains BPF updates for your *net* tree.

We've added 10 non-merge commits during the last 10 day(s) which contain
a total of 10 files changed, 93 insertions(+), 31 deletions(-).

The main changes are:

1) batched bpf hashtab fixes from Brian and Yonghong.

2) various selftests and libbpf fixes.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents fca07a93 b9aff38d
......@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
* The same effect can be attained with the more generic
* **bpf_redirect_map**\ (), which requires specific maps to be
* used but offers better performance.
* The same effect can also be attained with the more generic
* **bpf_redirect_map**\ (), which uses a BPF map to store the
* redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
......@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
* When used to redirect packets to net devices, this helper
* provides a high performance increase over **bpf_redirect**\ ().
* This is due to various implementation details of the underlying
* mechanisms, one of which is the fact that **bpf_redirect_map**\
* () tries to send packet as a "bulk" to the device.
* See also bpf_redirect(), which only supports redirecting to an
* ifindex, but doesn't require a map to do so.
* Return
* **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
......
......@@ -4142,9 +4142,9 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
* EFAULT - verifier bug
* 0 - 99% match. The last 1% is validated by the verifier.
*/
int btf_check_func_type_match(struct bpf_verifier_log *log,
struct btf *btf1, const struct btf_type *t1,
struct btf *btf2, const struct btf_type *t2)
static int btf_check_func_type_match(struct bpf_verifier_log *log,
struct btf *btf1, const struct btf_type *t1,
struct btf *btf2, const struct btf_type *t2)
{
const struct btf_param *args1, *args2;
const char *fn1, *fn2, *s1, *s2;
......
......@@ -56,6 +56,7 @@ struct htab_elem {
union {
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
struct htab_elem *batch_flink;
};
};
};
......@@ -126,6 +127,17 @@ static void htab_free_elems(struct bpf_htab *htab)
bpf_map_area_free(htab->elems);
}
/* The LRU list has a lock (lru_lock). Each htab bucket has a lock
* (bucket_lock). If both locks need to be acquired together, the lock
* order is always lru_lock -> bucket_lock and this only happens in
* bpf_lru_list.c logic. For example, certain code path of
* bpf_lru_pop_free(), which is called by function prealloc_lru_pop(),
* will acquire lru_lock first followed by acquiring bucket_lock.
*
* In hashtab.c, to avoid deadlock, lock acquisition of
* bucket_lock followed by lru_lock is not allowed. In such cases,
* bucket_lock needs to be released first before acquiring lru_lock.
*/
static struct htab_elem *prealloc_lru_pop(struct bpf_htab *htab, void *key,
u32 hash)
{
......@@ -1256,10 +1268,12 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
void *ubatch = u64_to_user_ptr(attr->batch.in_batch);
u32 batch, max_count, size, bucket_size;
struct htab_elem *node_to_free = NULL;
u64 elem_map_flags, map_flags;
struct hlist_nulls_head *head;
struct hlist_nulls_node *n;
unsigned long flags;
unsigned long flags = 0;
bool locked = false;
struct htab_elem *l;
struct bucket *b;
int ret = 0;
......@@ -1319,15 +1333,25 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
dst_val = values;
b = &htab->buckets[batch];
head = &b->head;
raw_spin_lock_irqsave(&b->lock, flags);
/* do not grab the lock unless need it (bucket_cnt > 0). */
if (locked)
raw_spin_lock_irqsave(&b->lock, flags);
bucket_cnt = 0;
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
bucket_cnt++;
if (bucket_cnt && !locked) {
locked = true;
goto again_nocopy;
}
if (bucket_cnt > (max_count - total)) {
if (total == 0)
ret = -ENOSPC;
/* Note that since bucket_cnt > 0 here, it is implicit
* that the locked was grabbed, so release it.
*/
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
......@@ -1337,6 +1361,9 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
if (bucket_cnt > bucket_size) {
bucket_size = bucket_cnt;
/* Note that since bucket_cnt > 0 here, it is implicit
* that the locked was grabbed, so release it.
*/
raw_spin_unlock_irqrestore(&b->lock, flags);
rcu_read_unlock();
this_cpu_dec(bpf_prog_active);
......@@ -1346,6 +1373,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
goto alloc;
}
/* Next block is only safe to run if you have grabbed the lock */
if (!locked)
goto next_batch;
hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
memcpy(dst_key, l->key, key_size);
......@@ -1370,16 +1401,33 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
}
if (do_delete) {
hlist_nulls_del_rcu(&l->hash_node);
if (is_lru_map)
bpf_lru_push_free(&htab->lru, &l->lru_node);
else
/* bpf_lru_push_free() will acquire lru_lock, which
* may cause deadlock. See comments in function
* prealloc_lru_pop(). Let us do bpf_lru_push_free()
* after releasing the bucket lock.
*/
if (is_lru_map) {
l->batch_flink = node_to_free;
node_to_free = l;
} else {
free_htab_elem(htab, l);
}
}
dst_key += key_size;
dst_val += value_size;
}
raw_spin_unlock_irqrestore(&b->lock, flags);
locked = false;
while (node_to_free) {
l = node_to_free;
node_to_free = node_to_free->batch_flink;
bpf_lru_push_free(&htab->lru, &l->lru_node);
}
next_batch:
/* If we are not copying data, we can go to next bucket and avoid
* unlocking the rcu.
*/
......
......@@ -321,7 +321,7 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
ulen = info->jited_prog_len;
info->jited_prog_len = aux->offload->jited_len;
if (info->jited_prog_len & ulen) {
if (info->jited_prog_len && ulen) {
uinsns = u64_to_user_ptr(info->jited_prog_insns);
ulen = min_t(u32, info->jited_prog_len, ulen);
if (copy_to_user(uinsns, aux->offload->jited_image, ulen)) {
......
......@@ -217,6 +217,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
xskq_prod_submit(xs->rx);
__xskq_cons_release(xs->umem->fq);
sock_def_readable(&xs->sk);
}
......@@ -304,6 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
}
rcu_read_unlock();
......
......@@ -271,7 +271,8 @@ static inline void xskq_cons_release(struct xsk_queue *q)
{
/* To improve performance, only update local state here.
* Reflect this to global state when we get new entries
* from the ring in xskq_cons_get_entries().
* from the ring in xskq_cons_get_entries() and whenever
* Rx or Tx processing are completed in the NAPI loop.
*/
q->cached_cons++;
}
......
......@@ -1045,9 +1045,9 @@ union bpf_attr {
* supports redirection to the egress interface, and accepts no
* flag at all.
*
* The same effect can be attained with the more generic
* **bpf_redirect_map**\ (), which requires specific maps to be
* used but offers better performance.
* The same effect can also be attained with the more generic
* **bpf_redirect_map**\ (), which uses a BPF map to store the
* redirect target instead of providing it directly to the helper.
* Return
* For XDP, the helper returns **XDP_REDIRECT** on success or
* **XDP_ABORTED** on error. For other program types, the values
......@@ -1611,13 +1611,11 @@ union bpf_attr {
* the caller. Any higher bits in the *flags* argument must be
* unset.
*
* When used to redirect packets to net devices, this helper
* provides a high performance increase over **bpf_redirect**\ ().
* This is due to various implementation details of the underlying
* mechanisms, one of which is the fact that **bpf_redirect_map**\
* () tries to send packet as a "bulk" to the device.
* See also bpf_redirect(), which only supports redirecting to an
* ifindex, but doesn't require a map to do so.
* Return
* **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the **flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
......
......@@ -24,6 +24,7 @@
#include <endian.h>
#include <fcntl.h>
#include <errno.h>
#include <ctype.h>
#include <asm/unistd.h>
#include <linux/err.h>
#include <linux/kernel.h>
......@@ -1283,7 +1284,7 @@ static size_t bpf_map_mmap_sz(const struct bpf_map *map)
static char *internal_map_name(struct bpf_object *obj,
enum libbpf_map_type type)
{
char map_name[BPF_OBJ_NAME_LEN];
char map_name[BPF_OBJ_NAME_LEN], *p;
const char *sfx = libbpf_type_to_btf_name[type];
int sfx_len = max((size_t)7, strlen(sfx));
int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
......@@ -1292,6 +1293,11 @@ static char *internal_map_name(struct bpf_object *obj,
snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
sfx_len, libbpf_type_to_btf_name[type]);
/* sanitise map name to characters allowed by kernel */
for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
if (!isalnum(*p) && *p != '_' && *p != '.')
*p = '_';
return strdup(map_name);
}
......
......@@ -506,8 +506,10 @@ static void test_syncookie(int type, sa_family_t family)
.pass_on_failure = 0,
};
if (type != SOCK_STREAM)
if (type != SOCK_STREAM) {
test__skip();
return;
}
/*
* +1 for TCP-SYN and
......@@ -822,8 +824,10 @@ void test_select_reuseport(void)
goto out;
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
if (saved_tcp_fo < 0)
goto out;
saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0)
if (saved_tcp_syncookie < 0)
goto out;
if (enable_fastopen())
......
......@@ -3,6 +3,11 @@
#include "test_progs.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
#define TCP_REPAIR_ON 1
#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
static int connected_socket_v4(void)
{
struct sockaddr_in addr = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment