Commit d3406913 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'devmap_hash'

Toke Høiland-Jørgensen says:

====================
This series adds a new map type, devmap_hash, that works like the existing
devmap type, but using a hash-based indexing scheme. This is useful for the use
case where a devmap is indexed by ifindex (for instance for use with the routing
table lookup helper). For this use case, the regular devmap needs to be sized
after the maximum ifindex number, not the number of devices in it. A hash-based
indexing scheme makes it possible to size the map after the number of devices it
should contain instead.

This was previously part of my patch series that also turned the regular
bpf_redirect() helper into a map-based one; for this series I just pulled out
the patches that introduced the new map type.

Changelog:

v5:

- Dynamically set the number of hash buckets by rounding up max_entries to the
  nearest power of two (mirroring the regular hashmap), as suggested by Jesper.

v4:

- Remove check_memlock parameter that was left over from an earlier patch
  series.
- Reorder struct members to avoid holes.

v3:

- Rework the split into different patches
- Use spin_lock_irqsave()
- Also add documentation and bash completion definitions for bpftool

v2:

- Split commit adding the new map type so uapi and tools changes are separate.

Changes to these patches since the previous series:

- Rebase on top of the other devmap changes (makes this one simpler!)
- Don't enforce key==val, but allow arbitrary indexes.
- Rename the type to devmap_hash to reflect the fact that it's just a hashmap now.
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 475e31f8 1375dc4a
...@@ -713,7 +713,7 @@ struct xdp_buff; ...@@ -713,7 +713,7 @@ struct xdp_buff;
struct sk_buff; struct sk_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_insert_ctx(struct bpf_map *map, u32 index); struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
void __dev_map_flush(struct bpf_map *map); void __dev_map_flush(struct bpf_map *map);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx); struct net_device *dev_rx);
...@@ -721,7 +721,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ...@@ -721,7 +721,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog); struct bpf_prog *xdp_prog);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
void __cpu_map_flush(struct bpf_map *map); void __cpu_map_flush(struct bpf_map *map);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx); struct net_device *dev_rx);
...@@ -801,8 +800,10 @@ static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, ...@@ -801,8 +800,10 @@ static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map,
return NULL; return NULL;
} }
static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map,
u32 key)
{ {
return NULL;
} }
static inline void __dev_map_flush(struct bpf_map *map) static inline void __dev_map_flush(struct bpf_map *map)
...@@ -834,10 +835,6 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) ...@@ -834,10 +835,6 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
return NULL; return NULL;
} }
static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index)
{
}
static inline void __cpu_map_flush(struct bpf_map *map) static inline void __cpu_map_flush(struct bpf_map *map)
{ {
} }
......
...@@ -62,6 +62,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) ...@@ -62,6 +62,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
#ifdef CONFIG_NET #ifdef CONFIG_NET
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops)
#if defined(CONFIG_BPF_STREAM_PARSER) #if defined(CONFIG_BPF_STREAM_PARSER)
BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops)
......
...@@ -175,7 +175,8 @@ struct _bpf_dtab_netdev { ...@@ -175,7 +175,8 @@ struct _bpf_dtab_netdev {
#endif /* __DEVMAP_OBJ_TYPE */ #endif /* __DEVMAP_OBJ_TYPE */
#define devmap_ifindex(fwd, map) \ #define devmap_ifindex(fwd, map) \
((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ ((map->map_type == BPF_MAP_TYPE_DEVMAP || \
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ? \
((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0) ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)
#define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \
......
...@@ -134,6 +134,7 @@ enum bpf_map_type { ...@@ -134,6 +134,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE, BPF_MAP_TYPE_SK_STORAGE,
BPF_MAP_TYPE_DEVMAP_HASH,
}; };
/* Note that tracing related programs such as /* Note that tracing related programs such as
......
...@@ -37,6 +37,12 @@ ...@@ -37,6 +37,12 @@
* notifier hook walks the map we know that new dev references can not be * notifier hook walks the map we know that new dev references can not be
* added by the user because core infrastructure ensures dev_get_by_index() * added by the user because core infrastructure ensures dev_get_by_index()
* calls will fail at this point. * calls will fail at this point.
*
* The devmap_hash type is a map type which interprets keys as ifindexes and
* indexes these using a hashmap. This allows maps that use ifindex as key to be
* densely packed instead of having holes in the lookup array for unused
* ifindexes. The setup and packet enqueue/send code is shared between the two
* types of devmap; only the lookup and insertion is different.
*/ */
#include <linux/bpf.h> #include <linux/bpf.h>
#include <net/xdp.h> #include <net/xdp.h>
...@@ -59,10 +65,11 @@ struct xdp_bulk_queue { ...@@ -59,10 +65,11 @@ struct xdp_bulk_queue {
struct bpf_dtab_netdev { struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */ struct net_device *dev; /* must be first member, due to tracepoint */
struct hlist_node index_hlist;
struct bpf_dtab *dtab; struct bpf_dtab *dtab;
unsigned int bit;
struct xdp_bulk_queue __percpu *bulkq; struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu; struct rcu_head rcu;
unsigned int idx; /* keep track of map index for tracepoint */
}; };
struct bpf_dtab { struct bpf_dtab {
...@@ -70,33 +77,45 @@ struct bpf_dtab { ...@@ -70,33 +77,45 @@ struct bpf_dtab {
struct bpf_dtab_netdev **netdev_map; struct bpf_dtab_netdev **netdev_map;
struct list_head __percpu *flush_list; struct list_head __percpu *flush_list;
struct list_head list; struct list_head list;
/* these are only used for DEVMAP_HASH type maps */
struct hlist_head *dev_index_head;
spinlock_t index_lock;
unsigned int items;
u32 n_buckets;
}; };
static DEFINE_SPINLOCK(dev_map_lock); static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list); static LIST_HEAD(dev_map_list);
static struct bpf_map *dev_map_alloc(union bpf_attr *attr) static struct hlist_head *dev_map_create_hash(unsigned int entries)
{
int i;
struct hlist_head *hash;
hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL);
if (hash != NULL)
for (i = 0; i < entries; i++)
INIT_HLIST_HEAD(&hash[i]);
return hash;
}
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{ {
struct bpf_dtab *dtab;
int err, cpu; int err, cpu;
u64 cost; u64 cost;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
/* check sanity of attributes */ /* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 || if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return ERR_PTR(-EINVAL); return -EINVAL;
/* Lookup returns a pointer straight to dev->ifindex, so make sure the /* Lookup returns a pointer straight to dev->ifindex, so make sure the
* verifier prevents writes from the BPF side * verifier prevents writes from the BPF side
*/ */
attr->map_flags |= BPF_F_RDONLY_PROG; attr->map_flags |= BPF_F_RDONLY_PROG;
dtab = kzalloc(sizeof(*dtab), GFP_USER);
if (!dtab)
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&dtab->map, attr); bpf_map_init_from_attr(&dtab->map, attr);
...@@ -104,12 +123,18 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) ...@@ -104,12 +123,18 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
cost += sizeof(struct list_head) * num_possible_cpus(); cost += sizeof(struct list_head) * num_possible_cpus();
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
if (!dtab->n_buckets) /* Overflow check */
return -EINVAL;
cost += sizeof(struct hlist_head) * dtab->n_buckets;
}
/* if map size is larger than memlock limit, reject it */ /* if map size is larger than memlock limit, reject it */
err = bpf_map_charge_init(&dtab->map.memory, cost); err = bpf_map_charge_init(&dtab->map.memory, cost);
if (err) if (err)
goto free_dtab; return -EINVAL;
err = -ENOMEM;
dtab->flush_list = alloc_percpu(struct list_head); dtab->flush_list = alloc_percpu(struct list_head);
if (!dtab->flush_list) if (!dtab->flush_list)
...@@ -124,19 +149,48 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) ...@@ -124,19 +149,48 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
if (!dtab->netdev_map) if (!dtab->netdev_map)
goto free_percpu; goto free_percpu;
spin_lock(&dev_map_lock); if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
list_add_tail_rcu(&dtab->list, &dev_map_list); dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
spin_unlock(&dev_map_lock); if (!dtab->dev_index_head)
goto free_map_area;
return &dtab->map; spin_lock_init(&dtab->index_lock);
}
return 0;
free_map_area:
bpf_map_area_free(dtab->netdev_map);
free_percpu: free_percpu:
free_percpu(dtab->flush_list); free_percpu(dtab->flush_list);
free_charge: free_charge:
bpf_map_charge_finish(&dtab->map.memory); bpf_map_charge_finish(&dtab->map.memory);
free_dtab: return -ENOMEM;
kfree(dtab); }
return ERR_PTR(err);
static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
{
struct bpf_dtab *dtab;
int err;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
dtab = kzalloc(sizeof(*dtab), GFP_USER);
if (!dtab)
return ERR_PTR(-ENOMEM);
err = dev_map_init_map(dtab, attr);
if (err) {
kfree(dtab);
return ERR_PTR(err);
}
spin_lock(&dev_map_lock);
list_add_tail_rcu(&dtab->list, &dev_map_list);
spin_unlock(&dev_map_lock);
return &dtab->map;
} }
static void dev_map_free(struct bpf_map *map) static void dev_map_free(struct bpf_map *map)
...@@ -188,6 +242,7 @@ static void dev_map_free(struct bpf_map *map) ...@@ -188,6 +242,7 @@ static void dev_map_free(struct bpf_map *map)
free_percpu(dtab->flush_list); free_percpu(dtab->flush_list);
bpf_map_area_free(dtab->netdev_map); bpf_map_area_free(dtab->netdev_map);
kfree(dtab->dev_index_head);
kfree(dtab); kfree(dtab);
} }
...@@ -208,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) ...@@ -208,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
return 0; return 0;
} }
static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
int idx)
{
return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)];
}
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct hlist_head *head = dev_map_index_hash(dtab, key);
struct bpf_dtab_netdev *dev;
hlist_for_each_entry_rcu(dev, head, index_hlist)
if (dev->idx == key)
return dev;
return NULL;
}
static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
void *next_key)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
u32 idx, *next = next_key;
struct bpf_dtab_netdev *dev, *next_dev;
struct hlist_head *head;
int i = 0;
if (!key)
goto find_first;
idx = *(u32 *)key;
dev = __dev_map_hash_lookup_elem(map, idx);
if (!dev)
goto find_first;
next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)),
struct bpf_dtab_netdev, index_hlist);
if (next_dev) {
*next = next_dev->idx;
return 0;
}
i = idx & (dtab->n_buckets - 1);
i++;
find_first:
for (; i < dtab->n_buckets; i++) {
head = dev_map_index_hash(dtab, i);
next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
struct bpf_dtab_netdev,
index_hlist);
if (next_dev) {
*next = next_dev->idx;
return 0;
}
}
return -ENOENT;
}
static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
bool in_napi_ctx) bool in_napi_ctx)
{ {
...@@ -235,7 +354,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, ...@@ -235,7 +354,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
out: out:
bq->count = 0; bq->count = 0;
trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx,
sent, drops, bq->dev_rx, dev, err); sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL; bq->dev_rx = NULL;
__list_del_clearprev(&bq->flush_node); __list_del_clearprev(&bq->flush_node);
...@@ -363,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key) ...@@ -363,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
return dev ? &dev->ifindex : NULL; return dev ? &dev->ifindex : NULL;
} }
static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
*(u32 *)key);
struct net_device *dev = obj ? obj->dev : NULL;
return dev ? &dev->ifindex : NULL;
}
static void dev_map_flush_old(struct bpf_dtab_netdev *dev) static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{ {
if (dev->dev->netdev_ops->ndo_xdp_xmit) { if (dev->dev->netdev_ops->ndo_xdp_xmit) {
...@@ -412,17 +540,74 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) ...@@ -412,17 +540,74 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key)
return 0; return 0;
} }
static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
u64 map_flags)
{ {
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns; struct bpf_dtab_netdev *old_dev;
int k = *(u32 *)key;
unsigned long flags;
int ret = -ENOENT;
spin_lock_irqsave(&dtab->index_lock, flags);
old_dev = __dev_map_hash_lookup_elem(map, k);
if (old_dev) {
dtab->items--;
hlist_del_init_rcu(&old_dev->index_hlist);
call_rcu(&old_dev->rcu, __dev_map_entry_free);
ret = 0;
}
spin_unlock_irqrestore(&dtab->index_lock, flags);
return ret;
}
static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab *dtab,
u32 ifindex,
unsigned int idx)
{
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev;
struct xdp_bulk_queue *bq;
int cpu;
dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return ERR_PTR(-ENOMEM);
}
for_each_possible_cpu(cpu) {
bq = per_cpu_ptr(dev->bulkq, cpu);
bq->obj = dev;
}
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev);
return ERR_PTR(-EINVAL);
}
dev->idx = idx;
dev->dtab = dtab;
return dev;
}
static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev, *old_dev; struct bpf_dtab_netdev *dev, *old_dev;
u32 ifindex = *(u32 *)value; u32 ifindex = *(u32 *)value;
struct xdp_bulk_queue *bq;
u32 i = *(u32 *)key; u32 i = *(u32 *)key;
int cpu;
if (unlikely(map_flags > BPF_EXIST)) if (unlikely(map_flags > BPF_EXIST))
return -EINVAL; return -EINVAL;
...@@ -434,31 +619,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -434,31 +619,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) { if (!ifindex) {
dev = NULL; dev = NULL;
} else { } else {
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); dev = __dev_map_alloc_node(net, dtab, ifindex, i);
if (!dev) if (IS_ERR(dev))
return -ENOMEM; return PTR_ERR(dev);
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return -ENOMEM;
}
for_each_possible_cpu(cpu) {
bq = per_cpu_ptr(dev->bulkq, cpu);
bq->obj = dev;
}
dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev);
return -EINVAL;
}
dev->bit = i;
dev->dtab = dtab;
} }
/* Use call_rcu() here to ensure rcu critical sections have completed /* Use call_rcu() here to ensure rcu critical sections have completed
...@@ -472,6 +635,63 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, ...@@ -472,6 +635,63 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
return 0; return 0;
} }
static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
return __dev_map_update_elem(current->nsproxy->net_ns,
map, key, value, map_flags);
}
static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_dtab_netdev *dev, *old_dev;
u32 ifindex = *(u32 *)value;
u32 idx = *(u32 *)key;
unsigned long flags;
if (unlikely(map_flags > BPF_EXIST || !ifindex))
return -EINVAL;
old_dev = __dev_map_hash_lookup_elem(map, idx);
if (old_dev && (map_flags & BPF_NOEXIST))
return -EEXIST;
dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
if (IS_ERR(dev))
return PTR_ERR(dev);
spin_lock_irqsave(&dtab->index_lock, flags);
if (old_dev) {
hlist_del_rcu(&old_dev->index_hlist);
} else {
if (dtab->items >= dtab->map.max_entries) {
spin_unlock_irqrestore(&dtab->index_lock, flags);
call_rcu(&dev->rcu, __dev_map_entry_free);
return -E2BIG;
}
dtab->items++;
}
hlist_add_head_rcu(&dev->index_hlist,
dev_map_index_hash(dtab, idx));
spin_unlock_irqrestore(&dtab->index_lock, flags);
if (old_dev)
call_rcu(&old_dev->rcu, __dev_map_entry_free);
return 0;
}
static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
u64 map_flags)
{
return __dev_map_hash_update_elem(current->nsproxy->net_ns,
map, key, value, map_flags);
}
const struct bpf_map_ops dev_map_ops = { const struct bpf_map_ops dev_map_ops = {
.map_alloc = dev_map_alloc, .map_alloc = dev_map_alloc,
.map_free = dev_map_free, .map_free = dev_map_free,
...@@ -482,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = { ...@@ -482,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = {
.map_check_btf = map_check_no_btf, .map_check_btf = map_check_no_btf,
}; };
const struct bpf_map_ops dev_map_hash_ops = {
.map_alloc = dev_map_alloc,
.map_free = dev_map_free,
.map_get_next_key = dev_map_hash_get_next_key,
.map_lookup_elem = dev_map_hash_lookup_elem,
.map_update_elem = dev_map_hash_update_elem,
.map_delete_elem = dev_map_hash_delete_elem,
.map_check_btf = map_check_no_btf,
};
static int dev_map_notification(struct notifier_block *notifier, static int dev_map_notification(struct notifier_block *notifier,
ulong event, void *ptr) ulong event, void *ptr)
{ {
......
...@@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, ...@@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
goto error; goto error;
break; break;
case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
if (func_id != BPF_FUNC_redirect_map && if (func_id != BPF_FUNC_redirect_map &&
func_id != BPF_FUNC_map_lookup_elem) func_id != BPF_FUNC_map_lookup_elem)
goto error; goto error;
...@@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, ...@@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
break; break;
case BPF_FUNC_redirect_map: case BPF_FUNC_redirect_map:
if (map->map_type != BPF_MAP_TYPE_DEVMAP && if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
map->map_type != BPF_MAP_TYPE_CPUMAP && map->map_type != BPF_MAP_TYPE_CPUMAP &&
map->map_type != BPF_MAP_TYPE_XSKMAP) map->map_type != BPF_MAP_TYPE_XSKMAP)
goto error; goto error;
......
...@@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, ...@@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
int err; int err;
switch (map->map_type) { switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP: { case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH: {
struct bpf_dtab_netdev *dst = fwd; struct bpf_dtab_netdev *dst = fwd;
err = dev_map_enqueue(dst, xdp, dev_rx); err = dev_map_enqueue(dst, xdp, dev_rx);
...@@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void) ...@@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void)
if (map) { if (map) {
switch (map->map_type) { switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
__dev_map_flush(map); __dev_map_flush(map);
break; break;
case BPF_MAP_TYPE_CPUMAP: case BPF_MAP_TYPE_CPUMAP:
...@@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) ...@@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
switch (map->map_type) { switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP:
return __dev_map_lookup_elem(map, index); return __dev_map_lookup_elem(map, index);
case BPF_MAP_TYPE_DEVMAP_HASH:
return __dev_map_hash_lookup_elem(map, index);
case BPF_MAP_TYPE_CPUMAP: case BPF_MAP_TYPE_CPUMAP:
return __cpu_map_lookup_elem(map, index); return __cpu_map_lookup_elem(map, index);
case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_XSKMAP:
...@@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ...@@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
ri->tgt_value = NULL; ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL); WRITE_ONCE(ri->map, NULL);
if (map->map_type == BPF_MAP_TYPE_DEVMAP) { if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
struct bpf_dtab_netdev *dst = fwd; struct bpf_dtab_netdev *dst = fwd;
err = dev_map_generic_redirect(dst, skb, xdp_prog); err = dev_map_generic_redirect(dst, skb, xdp_prog);
......
...@@ -46,7 +46,7 @@ MAP COMMANDS ...@@ -46,7 +46,7 @@ MAP COMMANDS
| *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** | *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
| | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** | | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash**
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** } | | **queue** | **stack** }
......
...@@ -489,8 +489,8 @@ _bpftool() ...@@ -489,8 +489,8 @@ _bpftool()
perf_event_array percpu_hash percpu_array \ perf_event_array percpu_hash percpu_array \
stack_trace cgroup_array lru_hash \ stack_trace cgroup_array lru_hash \
lru_percpu_hash lpm_trie array_of_maps \ lru_percpu_hash lpm_trie array_of_maps \
hash_of_maps devmap sockmap cpumap xskmap \ hash_of_maps devmap devmap_hash sockmap cpumap \
sockhash cgroup_storage reuseport_sockarray \ xskmap sockhash cgroup_storage reuseport_sockarray \
percpu_cgroup_storage queue stack' -- \ percpu_cgroup_storage queue stack' -- \
"$cur" ) ) "$cur" ) )
return 0 return 0
......
...@@ -37,6 +37,7 @@ const char * const map_type_name[] = { ...@@ -37,6 +37,7 @@ const char * const map_type_name[] = {
[BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
[BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
[BPF_MAP_TYPE_DEVMAP] = "devmap", [BPF_MAP_TYPE_DEVMAP] = "devmap",
[BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash",
[BPF_MAP_TYPE_SOCKMAP] = "sockmap", [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
[BPF_MAP_TYPE_CPUMAP] = "cpumap", [BPF_MAP_TYPE_CPUMAP] = "cpumap",
[BPF_MAP_TYPE_XSKMAP] = "xskmap", [BPF_MAP_TYPE_XSKMAP] = "xskmap",
...@@ -1271,7 +1272,7 @@ static int do_help(int argc, char **argv) ...@@ -1271,7 +1272,7 @@ static int do_help(int argc, char **argv)
" TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n" " TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n"
" percpu_array | stack_trace | cgroup_array | lru_hash |\n" " percpu_array | stack_trace | cgroup_array | lru_hash |\n"
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | sockmap | cpumap | xskmap | sockhash |\n" " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
" " HELP_SPEC_OPTIONS "\n" " " HELP_SPEC_OPTIONS "\n"
"", "",
......
...@@ -134,6 +134,7 @@ enum bpf_map_type { ...@@ -134,6 +134,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE, BPF_MAP_TYPE_SK_STORAGE,
BPF_MAP_TYPE_DEVMAP_HASH,
}; };
/* Note that tracing related programs such as /* Note that tracing related programs such as
......
...@@ -244,6 +244,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) ...@@ -244,6 +244,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_ARRAY_OF_MAPS:
case BPF_MAP_TYPE_HASH_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS:
case BPF_MAP_TYPE_DEVMAP: case BPF_MAP_TYPE_DEVMAP:
case BPF_MAP_TYPE_DEVMAP_HASH:
case BPF_MAP_TYPE_SOCKMAP: case BPF_MAP_TYPE_SOCKMAP:
case BPF_MAP_TYPE_CPUMAP: case BPF_MAP_TYPE_CPUMAP:
case BPF_MAP_TYPE_XSKMAP: case BPF_MAP_TYPE_XSKMAP:
......
...@@ -508,6 +508,21 @@ static void test_devmap(unsigned int task, void *data) ...@@ -508,6 +508,21 @@ static void test_devmap(unsigned int task, void *data)
close(fd); close(fd);
} }
static void test_devmap_hash(unsigned int task, void *data)
{
int fd;
__u32 key, value;
fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value),
2, 0);
if (fd < 0) {
printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
exit(1);
}
close(fd);
}
static void test_queuemap(unsigned int task, void *data) static void test_queuemap(unsigned int task, void *data)
{ {
const int MAP_SIZE = 32; const int MAP_SIZE = 32;
...@@ -1684,6 +1699,7 @@ static void run_all_tests(void) ...@@ -1684,6 +1699,7 @@ static void run_all_tests(void)
test_arraymap_percpu_many_keys(); test_arraymap_percpu_many_keys();
test_devmap(0, NULL); test_devmap(0, NULL);
test_devmap_hash(0, NULL);
test_sockmap(0, NULL); test_sockmap(0, NULL);
test_map_large(); test_map_large();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment