Commit 0ac10dc1 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'bpf_iter-uapi-fix'

Yonghong Song says:

====================
Andrii raised a concern that current uapi for bpf iterator map
element is a little restrictive and not suitable for future potential
complex customization. This is a valid suggestion, considering people
may indeed add more complex custimization to the iterator, e.g.,
cgroup_id + user_id, etc. for task or task_file. Another example might
be map_id plus additional control so that the bpf iterator may bail
out a bucket earlier if a bucket has too many elements which may hold
lock too long and impact other parts of systems.

Patch #1 modified uapi with kernel changes. Patch #2
adjusted libbpf api accordingly.

Changelogs:
  v3 -> v4:
    . add a forward declaration of bpf_iter_link_info in
      tools/lib/bpf/bpf.h in case that libbpf is built against
      not-latest uapi bpf.h.
    . target the patch set to "bpf" instead of "bpf-next"
  v2 -> v3:
    . undo "not reject iter_info.map.map_fd == 0" from v1.
      In the future map_fd may become optional, so let us use map_fd == 0
      indicating the map_fd is not set by user space.
    . add link_info_len to bpf_iter_attach_opts to ensure always correct
      link_info_len from user. Otherwise, libbpf may deduce incorrect
      link_info_len if it uses different uapi header than the user app.
  v1 -> v2:
    . ensure link_create target_fd/flags == 0 since they are not used. (Andrii)
    . if either of iter_info ptr == 0 or iter_info_len == 0, but not both,
      return error to user space. (Andrii)
    . do not reject iter_info.map.map_fd == 0, go ahead to use it trying to
      get a map reference since the map_fd is required for map_elem iterator.
    . use bpf_iter_link_info in bpf_iter_attach_opts instead of map_fd.
      this way, user space is responsible to set up bpf_iter_link_info and
      libbpf just passes the data to the kernel, simplifying libbpf design.
      (Andrii)
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents 6bcaf41f 74fc097d
......@@ -1214,15 +1214,17 @@ struct bpf_iter_aux_info {
struct bpf_map *map;
};
typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux);
typedef void (*bpf_iter_detach_target_t)(struct bpf_iter_aux_info *aux);
#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
const char *target;
bpf_iter_check_target_t check_target;
bpf_iter_attach_target_t attach_target;
bpf_iter_detach_target_t detach_target;
u32 ctx_arg_info_size;
enum bpf_iter_link_info req_linfo;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
const struct bpf_iter_seq_info *seq_info;
};
......
......@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type */
};
union bpf_iter_link_info {
struct {
__u32 map_fd;
} map;
};
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
......@@ -249,13 +255,6 @@ enum bpf_link_type {
MAX_BPF_LINK_TYPE,
};
enum bpf_iter_link_info {
BPF_ITER_LINK_UNSPEC = 0,
BPF_ITER_LINK_MAP_FD = 1,
MAX_BPF_ITER_LINK_INFO,
};
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
......@@ -623,6 +622,8 @@ union bpf_attr {
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
......
......@@ -338,8 +338,8 @@ static void bpf_iter_link_release(struct bpf_link *link)
struct bpf_iter_link *iter_link =
container_of(link, struct bpf_iter_link, link);
if (iter_link->aux.map)
bpf_map_put_with_uref(iter_link->aux.map);
if (iter_link->tinfo->reg_info->detach_target)
iter_link->tinfo->reg_info->detach_target(&iter_link->aux);
}
static void bpf_iter_link_dealloc(struct bpf_link *link)
......@@ -390,15 +390,35 @@ bool bpf_link_is_iter(struct bpf_link *link)
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
union bpf_iter_link_info __user *ulinfo;
struct bpf_link_primer link_primer;
struct bpf_iter_target_info *tinfo;
struct bpf_iter_aux_info aux = {};
union bpf_iter_link_info linfo;
struct bpf_iter_link *link;
u32 prog_btf_id, target_fd;
u32 prog_btf_id, linfo_len;
bool existed = false;
struct bpf_map *map;
int err;
if (attr->link_create.target_fd || attr->link_create.flags)
return -EINVAL;
memset(&linfo, 0, sizeof(union bpf_iter_link_info));
ulinfo = u64_to_user_ptr(attr->link_create.iter_info);
linfo_len = attr->link_create.iter_info_len;
if (!ulinfo ^ !linfo_len)
return -EINVAL;
if (ulinfo) {
err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo),
linfo_len);
if (err)
return err;
linfo_len = min_t(u32, linfo_len, sizeof(linfo));
if (copy_from_user(&linfo, ulinfo, linfo_len))
return -EFAULT;
}
prog_btf_id = prog->aux->attach_btf_id;
mutex_lock(&targets_mutex);
list_for_each_entry(tinfo, &targets, list) {
......@@ -411,13 +431,6 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
if (!existed)
return -ENOENT;
/* Make sure user supplied flags are target expected. */
target_fd = attr->link_create.target_fd;
if (attr->link_create.flags != tinfo->reg_info->req_linfo)
return -EINVAL;
if (!attr->link_create.flags && target_fd)
return -EINVAL;
link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
if (!link)
return -ENOMEM;
......@@ -431,28 +444,15 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
return err;
}
if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) {
map = bpf_map_get_with_uref(target_fd);
if (IS_ERR(map)) {
err = PTR_ERR(map);
goto cleanup_link;
}
aux.map = map;
err = tinfo->reg_info->check_target(prog, &aux);
if (tinfo->reg_info->attach_target) {
err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux);
if (err) {
bpf_map_put_with_uref(map);
goto cleanup_link;
bpf_link_cleanup(&link_primer);
return err;
}
link->aux.map = map;
}
return bpf_link_settle(&link_primer);
cleanup_link:
bpf_link_cleanup(&link_primer);
return err;
}
static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
......
......@@ -98,12 +98,21 @@ static struct bpf_iter_reg bpf_map_reg_info = {
.seq_info = &bpf_map_seq_info,
};
static int bpf_iter_check_map(struct bpf_prog *prog,
static int bpf_iter_attach_map(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
{
u32 key_acc_size, value_acc_size, key_size, value_size;
struct bpf_map *map = aux->map;
struct bpf_map *map;
bool is_percpu = false;
int err = -EINVAL;
if (!linfo->map.map_fd)
return -EBADF;
map = bpf_map_get_with_uref(linfo->map.map_fd);
if (IS_ERR(map))
return PTR_ERR(map);
if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
......@@ -112,7 +121,7 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
else if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_LRU_HASH &&
map->map_type != BPF_MAP_TYPE_ARRAY)
return -EINVAL;
goto put_map;
key_acc_size = prog->aux->max_rdonly_access;
value_acc_size = prog->aux->max_rdwr_access;
......@@ -122,10 +131,22 @@ static int bpf_iter_check_map(struct bpf_prog *prog,
else
value_size = round_up(map->value_size, 8) * num_possible_cpus();
if (key_acc_size > key_size || value_acc_size > value_size)
return -EACCES;
if (key_acc_size > key_size || value_acc_size > value_size) {
err = -EACCES;
goto put_map;
}
aux->map = map;
return 0;
put_map:
bpf_map_put_with_uref(map);
return err;
}
static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
{
bpf_map_put_with_uref(aux->map);
}
DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
......@@ -133,8 +154,8 @@ DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta,
static const struct bpf_iter_reg bpf_map_elem_reg_info = {
.target = "bpf_map_elem",
.check_target = bpf_iter_check_map,
.req_linfo = BPF_ITER_LINK_MAP_FD,
.attach_target = bpf_iter_attach_map,
.detach_target = bpf_iter_detach_map,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_map_elem, key),
......
......@@ -3883,7 +3883,7 @@ static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *
return -EINVAL;
}
#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
#define BPF_LINK_CREATE_LAST_FIELD link_create.iter_info_len
static int link_create(union bpf_attr *attr)
{
enum bpf_prog_type ptype;
......
......@@ -1384,18 +1384,39 @@ static int bpf_iter_init_sk_storage_map(void *priv_data,
return 0;
}
static int bpf_iter_check_map(struct bpf_prog *prog,
static int bpf_iter_attach_map(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
{
struct bpf_map *map = aux->map;
struct bpf_map *map;
int err = -EINVAL;
if (!linfo->map.map_fd)
return -EBADF;
map = bpf_map_get_with_uref(linfo->map.map_fd);
if (IS_ERR(map))
return PTR_ERR(map);
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
return -EINVAL;
goto put_map;
if (prog->aux->max_rdonly_access > map->value_size)
return -EACCES;
if (prog->aux->max_rdonly_access > map->value_size) {
err = -EACCES;
goto put_map;
}
aux->map = map;
return 0;
put_map:
bpf_map_put_with_uref(map);
return err;
}
static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
{
bpf_map_put_with_uref(aux->map);
}
static const struct seq_operations bpf_sk_storage_map_seq_ops = {
......@@ -1414,8 +1435,8 @@ static const struct bpf_iter_seq_info iter_seq_info = {
static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
.target = "bpf_sk_storage_map",
.check_target = bpf_iter_check_map,
.req_linfo = BPF_ITER_LINK_MAP_FD,
.attach_target = bpf_iter_attach_map,
.detach_target = bpf_iter_detach_map,
.ctx_arg_info_size = 2,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
......
......@@ -11,6 +11,7 @@
static int do_pin(int argc, char **argv)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
union bpf_iter_link_info linfo;
const char *objfile, *path;
struct bpf_program *prog;
struct bpf_object *obj;
......@@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv)
map_fd = map_parse_fd(&argc, &argv);
if (map_fd < 0)
return -1;
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = map_fd;
iter_opts.link_info = &linfo;
iter_opts.link_info_len = sizeof(linfo);
}
}
......@@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv)
goto close_obj;
}
if (map_fd >= 0)
iter_opts.map_fd = map_fd;
link = bpf_program__attach_iter(prog, &iter_opts);
if (IS_ERR(link)) {
err = PTR_ERR(link);
......
......@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type */
};
union bpf_iter_link_info {
struct {
__u32 map_fd;
} map;
};
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
......@@ -249,13 +255,6 @@ enum bpf_link_type {
MAX_BPF_LINK_TYPE,
};
enum bpf_iter_link_info {
BPF_ITER_LINK_UNSPEC = 0,
BPF_ITER_LINK_MAP_FD = 1,
MAX_BPF_ITER_LINK_INFO,
};
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
......@@ -623,6 +622,8 @@ union bpf_attr {
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
__aligned_u64 iter_info; /* extra bpf_iter_link_info */
__u32 iter_info_len; /* iter_info length */
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
......
......@@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
attr.link_create.iter_info =
ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
......
......@@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags;
union bpf_iter_link_info *iter_info;
__u32 iter_info_len;
};
#define bpf_link_create_opts__last_field flags
#define bpf_link_create_opts__last_field iter_info_len
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
......
......@@ -8306,10 +8306,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
return ERR_PTR(-EINVAL);
if (OPTS_HAS(opts, map_fd)) {
target_fd = opts->map_fd;
link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
}
link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
......
......@@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
struct bpf_iter_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 map_fd;
union bpf_iter_link_info *link_info;
__u32 link_info_len;
};
#define bpf_iter_attach_opts__last_field map_fd
#define bpf_iter_attach_opts__last_field link_info_len
LIBBPF_API struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
......
......@@ -468,6 +468,7 @@ static void test_bpf_hash_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_hash_map *skel;
int err, i, len, map_fd, iter_fd;
union bpf_iter_link_info linfo;
__u64 val, expected_val = 0;
struct bpf_link *link;
struct key_t {
......@@ -490,13 +491,16 @@ static void test_bpf_hash_map(void)
goto out;
/* iterator with hashmap2 and hashmap3 should fail */
opts.map_fd = bpf_map__fd(skel->maps.hashmap2);
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(!IS_ERR(link), "attach_iter",
"attach_iter for hashmap2 unexpected succeeded\n"))
goto out;
opts.map_fd = bpf_map__fd(skel->maps.hashmap3);
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(!IS_ERR(link), "attach_iter",
"attach_iter for hashmap3 unexpected succeeded\n"))
......@@ -519,7 +523,7 @@ static void test_bpf_hash_map(void)
goto out;
}
opts.map_fd = map_fd;
linfo.map.map_fd = map_fd;
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
......@@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_percpu_hash_map *skel;
int err, i, j, len, map_fd, iter_fd;
union bpf_iter_link_info linfo;
__u32 expected_val = 0;
struct bpf_link *link;
struct key_t {
......@@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void)
goto out;
}
opts.map_fd = map_fd;
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = map_fd;
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
......@@ -649,6 +657,7 @@ static void test_bpf_array_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
__u32 expected_key = 0, res_first_key;
struct bpf_iter_bpf_array_map *skel;
union bpf_iter_link_info linfo;
int err, i, map_fd, iter_fd;
struct bpf_link *link;
char buf[64] = {};
......@@ -673,7 +682,10 @@ static void test_bpf_array_map(void)
goto out;
}
opts.map_fd = map_fd;
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = map_fd;
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
......@@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_percpu_array_map *skel;
__u32 expected_key = 0, expected_val = 0;
union bpf_iter_link_info linfo;
int err, i, j, map_fd, iter_fd;
struct bpf_link *link;
char buf[64];
......@@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void)
goto out;
}
opts.map_fd = map_fd;
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = map_fd;
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
......@@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
int err, i, len, map_fd, iter_fd, num_sockets;
struct bpf_iter_bpf_sk_storage_map *skel;
union bpf_iter_link_info linfo;
int sock_fd[3] = {-1, -1, -1};
__u32 val, expected_val = 0;
struct bpf_link *link;
......@@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void)
goto out;
}
opts.map_fd = map_fd;
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = map_fd;
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
......@@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_test_kern5 *skel;
union bpf_iter_link_info linfo;
struct bpf_link *link;
skel = bpf_iter_test_kern5__open_and_load();
......@@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void)
"skeleton open_and_load failed\n"))
return;
opts.map_fd = bpf_map__fd(skel->maps.hashmap1);
memset(&linfo, 0, sizeof(linfo));
linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
opts.link_info = &linfo;
opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
bpf_link__destroy(link);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment