Commit f088cabf authored by Andrii Nakryiko's avatar Andrii Nakryiko

Merge branch 'bpf-add-a-generic-bits-iterator'

Yafang Shao says:

====================
bpf: Add a generic bits iterator

Three new kfuncs, namely bpf_iter_bits_{new,next,destroy}, have been
added for the new bpf_iter_bits functionality. These kfuncs enable the
iteration of the bits from a given address and a given number of bits.

- bpf_iter_bits_new
  Initialize a new bits iterator for a given memory area. Due to the
  limitation of bpf memalloc, the max number of bits to be iterated
  over is (4096 * 8).
- bpf_iter_bits_next
  Get the next bit in a bpf_iter_bits
- bpf_iter_bits_destroy
  Destroy a bpf_iter_bits

The bits iterator can be used in any context and on any address.

Changes:
- v7->v8:
  Refine the interface to avoid dealing with endianness (Andrii)
- v6->v7:
  Fix endianness error for non-long-aligned data (Andrii)
- v5->v6:
  Add positive tests (Andrii)
- v4->v5:
  Simplify test cases (Andrii)
- v3->v4:
  - Fix endianness error on s390x (Andrii)
  - zero-initialize kit->bits_copy and zero out nr_bits (Andrii)
- v2->v3:
  Optimization for u64/u32 mask (Andrii)
- v1->v2:
  Simplify the CPU number verification code to avoid the failure on s390x
  (Eduard)
- bpf: Add bpf_iter_cpumask
  https://lwn.net/Articles/961104/
- bpf: Add new bpf helper bpf_for_each_cpu
  https://lwn.net/Articles/939939/
====================

Link: https://lore.kernel.org/r/20240517023034.48138-1-laoar.shao@gmail.comSigned-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
parents fbe3e847 6ba7acdb
......@@ -2744,6 +2744,122 @@ __bpf_kfunc void bpf_preempt_enable(void)
preempt_enable();
}
struct bpf_iter_bits {
__u64 __opaque[2];
} __aligned(8);
struct bpf_iter_bits_kern {
union {
unsigned long *bits;
unsigned long bits_copy;
};
u32 nr_bits;
int bit;
} __aligned(8);
/**
* bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
* @it: The new bpf_iter_bits to be created
* @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over
* @nr_words: The size of the specified memory area, measured in 8-byte units.
* Due to the limitation of memalloc, it can't be greater than 512.
*
* This function initializes a new bpf_iter_bits structure for iterating over
* a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It
* copies the data of the memory area to the newly created bpf_iter_bits @it for
* subsequent iteration operations.
*
* On success, 0 is returned. On failure, ERR is returned.
*/
__bpf_kfunc int
bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words)
{
struct bpf_iter_bits_kern *kit = (void *)it;
u32 nr_bytes = nr_words * sizeof(u64);
u32 nr_bits = BYTES_TO_BITS(nr_bytes);
int err;
BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
__alignof__(struct bpf_iter_bits));
kit->nr_bits = 0;
kit->bits_copy = 0;
kit->bit = -1;
if (!unsafe_ptr__ign || !nr_words)
return -EINVAL;
/* Optimization for u64 mask */
if (nr_bits == 64) {
err = bpf_probe_read_kernel_common(&kit->bits_copy, nr_bytes, unsafe_ptr__ign);
if (err)
return -EFAULT;
kit->nr_bits = nr_bits;
return 0;
}
/* Fallback to memalloc */
kit->bits = bpf_mem_alloc(&bpf_global_ma, nr_bytes);
if (!kit->bits)
return -ENOMEM;
err = bpf_probe_read_kernel_common(kit->bits, nr_bytes, unsafe_ptr__ign);
if (err) {
bpf_mem_free(&bpf_global_ma, kit->bits);
return err;
}
kit->nr_bits = nr_bits;
return 0;
}
/**
* bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
* @it: The bpf_iter_bits to be checked
*
* This function returns a pointer to a number representing the value of the
* next bit in the bits.
*
* If there are no further bits available, it returns NULL.
*/
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
{
struct bpf_iter_bits_kern *kit = (void *)it;
u32 nr_bits = kit->nr_bits;
const unsigned long *bits;
int bit;
if (nr_bits == 0)
return NULL;
bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
bit = find_next_bit(bits, nr_bits, kit->bit + 1);
if (bit >= nr_bits) {
kit->nr_bits = 0;
return NULL;
}
kit->bit = bit;
return &kit->bit;
}
/**
* bpf_iter_bits_destroy() - Destroy a bpf_iter_bits
* @it: The bpf_iter_bits to be destroyed
*
* Destroy the resource associated with the bpf_iter_bits.
*/
__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
{
struct bpf_iter_bits_kern *kit = (void *)it;
if (kit->nr_bits <= 64)
return;
bpf_mem_free(&bpf_global_ma, kit->bits);
}
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(generic_btf_ids)
......@@ -2826,6 +2942,9 @@ BTF_ID_FLAGS(func, bpf_wq_set_callback_impl)
BTF_ID_FLAGS(func, bpf_wq_start)
BTF_ID_FLAGS(func, bpf_preempt_disable)
BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
BTF_KFUNCS_END(common_btf_ids)
static const struct btf_kfunc_id_set common_kfunc_set = {
......
......@@ -85,6 +85,7 @@
#include "verifier_xadd.skel.h"
#include "verifier_xdp.skel.h"
#include "verifier_xdp_direct_packet_access.skel.h"
#include "verifier_bits_iter.skel.h"
#define MAX_ENTRIES 11
......@@ -200,6 +201,7 @@ void test_verifier_var_off(void) { RUN(verifier_var_off); }
void test_verifier_xadd(void) { RUN(verifier_xadd); }
void test_verifier_xdp(void) { RUN(verifier_xdp); }
void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
static int init_test_val_map(struct bpf_object *obj, char *map_name)
{
......
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
#include "task_kfunc_common.h"
char _license[] SEC("license") = "GPL";
int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign,
u32 nr_bits) __ksym __weak;
int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
SEC("iter.s/cgroup")
__description("bits iter without destroy")
__failure __msg("Unreleased reference")
int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
{
struct bpf_iter_bits it;
u64 data = 1;
bpf_iter_bits_new(&it, &data, 1);
bpf_iter_bits_next(&it);
return 0;
}
SEC("iter/cgroup")
__description("uninitialized iter in ->next()")
__failure __msg("expected an initialized iter_bits as arg #1")
int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
{
struct bpf_iter_bits *it = NULL;
bpf_iter_bits_next(it);
return 0;
}
SEC("iter/cgroup")
__description("uninitialized iter in ->destroy()")
__failure __msg("expected an initialized iter_bits as arg #1")
int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
{
struct bpf_iter_bits it = {};
bpf_iter_bits_destroy(&it);
return 0;
}
SEC("syscall")
__description("null pointer")
__success __retval(0)
int null_pointer(void)
{
int nr = 0;
int *bit;
bpf_for_each(bits, bit, NULL, 1)
nr++;
return nr;
}
SEC("syscall")
__description("bits copy")
__success __retval(10)
int bits_copy(void)
{
u64 data = 0xf7310UL; /* 4 + 3 + 2 + 1 + 0*/
int nr = 0;
int *bit;
bpf_for_each(bits, bit, &data, 1)
nr++;
return nr;
}
SEC("syscall")
__description("bits memalloc")
__success __retval(64)
int bits_memalloc(void)
{
u64 data[2];
int nr = 0;
int *bit;
__builtin_memset(&data, 0xf0, sizeof(data)); /* 4 * 16 */
bpf_for_each(bits, bit, &data[0], sizeof(data) / sizeof(u64))
nr++;
return nr;
}
SEC("syscall")
__description("bit index")
__success __retval(8)
int bit_index(void)
{
u64 data = 0x100;
int bit_idx = 0;
int *bit;
bpf_for_each(bits, bit, &data, 1) {
if (*bit == 0)
continue;
bit_idx = *bit;
}
return bit_idx;
}
SEC("syscall")
__description("bits nomem")
__success __retval(0)
int bits_nomem(void)
{
u64 data[4];
int nr = 0;
int *bit;
__builtin_memset(&data, 0xff, sizeof(data));
bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */
nr++;
return nr;
}
SEC("syscall")
__description("fewer words")
__success __retval(1)
int fewer_words(void)
{
u64 data[2] = {0x1, 0xff};
int nr = 0;
int *bit;
bpf_for_each(bits, bit, &data[0], 1)
nr++;
return nr;
}
SEC("syscall")
__description("zero words")
__success __retval(0)
int zero_words(void)
{
u64 data[2] = {0x1, 0xff};
int nr = 0;
int *bit;
bpf_for_each(bits, bit, &data[0], 0)
nr++;
return nr;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment