Commit 8596a75f authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'cgroup-bpf_link'

Andrii Nakryiko says:

====================
bpf_link abstraction itself was formalized in [0] with justifications for why
its semantics is a good fit for attaching BPF programs of various types. This
patch set adds bpf_link-based BPF program attachment mechanism for cgroup BPF
programs.

Cgroup BPF link is semantically compatible with current BPF_F_ALLOW_MULTI
semantics of attaching cgroup BPF programs directly. Thus cgroup bpf_link can
co-exist with legacy BPF program multi-attachment.

bpf_link is destroyed and automatically detached when the last open FD holding
the reference to bpf_link is closed. This means that by default, when the
process that created bpf_link exits, attached BPF program will be
automatically detached due to bpf_link's clean up code. Cgroup bpf_link, like
any other bpf_link, can be pinned in BPF FS and by those means survive the
exit of process that created the link. This is useful in many scenarios to
provide long-living BPF program attachments. Pinning also means that there
could be many owners of bpf_link through independent FDs.

Additionally, auto-detachmet of cgroup bpf_link is implemented. When cgroup is
dying it will automatically detach all active bpf_links. This ensures that
cgroup clean up is not delayed due to active bpf_link even despite no chance
for any BPF program to be run for a given cgroup. In that sense it's similar
to existing behavior of dropping refcnt of attached bpf_prog. But in the case
of bpf_link, bpf_link is not destroyed and is still available to user as long
as at least one active FD is still open (or if it's pinned in BPF FS).

There are two main cgroup-specific differences between bpf_link-based and
direct bpf_prog-based attachment.

First, as opposed to direct bpf_prog attachment, cgroup itself doesn't "own"
bpf_link, which makes it possible to auto-clean up attached bpf_link when user
process abruptly exits without explicitly detaching BPF program. This makes
for a safe default behavior proven in BPF tracing program types. But bpf_link
doesn't bump cgroup->bpf.refcnt as well and because of that doesn't prevent
cgroup from cleaning up its BPF state.

Second, only owners of bpf_link (those who created bpf_link in the first place
or obtained a new FD by opening bpf_link from BPF FS) can detach and/or update
it. This makes sure that no other process can accidentally remove/replace BPF
program.

This patch set also implements LINK_UPDATE sub-command, which allows to
replace bpf_link's underlying bpf_prog, similarly to BPF_F_REPLACE flag
behavior for direct bpf_prog cgroup attachment. Similarly to LINK_CREATE, it
is supposed to be generic command for different types of bpf_links.

  [0] https://lore.kernel.org/bpf/20200228223948.360936-1-andriin@fb.com/

v2->v3:
  - revert back to just MULTI mode (Alexei);
  - fix tinyconfig compilation warning (kbuild test robot);

v1->v2:
  - implement exclusive and overridable exclusive modes (Andrey Ignatov);
  - fix build for !CONFIG_CGROUP_BPF build;
  - add more selftests for non-multi mode and inter-operability;
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents e5ffcc91 7cccee42
...@@ -51,9 +51,18 @@ struct bpf_cgroup_storage { ...@@ -51,9 +51,18 @@ struct bpf_cgroup_storage {
struct rcu_head rcu; struct rcu_head rcu;
}; };
struct bpf_cgroup_link {
struct bpf_link link;
struct cgroup *cgroup;
enum bpf_attach_type type;
};
extern const struct bpf_link_ops bpf_cgroup_link_lops;
struct bpf_prog_list { struct bpf_prog_list {
struct list_head node; struct list_head node;
struct bpf_prog *prog; struct bpf_prog *prog;
struct bpf_cgroup_link *link;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
}; };
...@@ -84,20 +93,27 @@ struct cgroup_bpf { ...@@ -84,20 +93,27 @@ struct cgroup_bpf {
int cgroup_bpf_inherit(struct cgroup *cgrp); int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp);
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *replace_prog, struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags); enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type); enum bpf_attach_type type);
int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
struct bpf_prog *new_prog);
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr); union bpf_attr __user *uattr);
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *replace_prog, enum bpf_attach_type type, struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type,
u32 flags); u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags); enum bpf_attach_type type);
int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
struct bpf_prog *new_prog);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr); union bpf_attr __user *uattr);
...@@ -332,11 +348,13 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, ...@@ -332,11 +348,13 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog); enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr, int cgroup_bpf_prog_detach(const union bpf_attr *attr,
enum bpf_prog_type ptype); enum bpf_prog_type ptype);
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int cgroup_bpf_prog_query(const union bpf_attr *attr, int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr); union bpf_attr __user *uattr);
#else #else
struct bpf_prog; struct bpf_prog;
struct bpf_link;
struct cgroup_bpf {}; struct cgroup_bpf {};
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
...@@ -354,6 +372,19 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr, ...@@ -354,6 +372,19 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
return -EINVAL; return -EINVAL;
} }
static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
{
return -EINVAL;
}
static inline int cgroup_bpf_replace(struct bpf_link *link,
struct bpf_prog *old_prog,
struct bpf_prog *new_prog)
{
return -EINVAL;
}
static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr) union bpf_attr __user *uattr)
{ {
......
...@@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled; ...@@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled;
int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_prog_new_fd(struct bpf_prog *prog);
struct bpf_link; struct bpf_link {
atomic64_t refcnt;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
};
struct bpf_link_ops { struct bpf_link_ops {
void (*release)(struct bpf_link *link); void (*release)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link);
}; };
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog); struct bpf_prog *prog);
void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
int link_fd);
void bpf_link_inc(struct bpf_link *link); void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link); void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link);
......
...@@ -111,6 +111,8 @@ enum bpf_cmd { ...@@ -111,6 +111,8 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH, BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH, BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH, BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
}; };
enum bpf_map_type { enum bpf_map_type {
...@@ -541,7 +543,7 @@ union bpf_attr { ...@@ -541,7 +543,7 @@ union bpf_attr {
__u32 prog_cnt; __u32 prog_cnt;
} query; } query;
struct { struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name; __u64 name;
__u32 prog_fd; __u32 prog_fd;
} raw_tracepoint; } raw_tracepoint;
...@@ -569,6 +571,24 @@ union bpf_attr { ...@@ -569,6 +571,24 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */ __u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */ __u64 probe_addr; /* output: probe_addr */
} task_fd_query; } task_fd_query;
struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
__u32 target_fd; /* object to attach to */
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
/* new program fd to update link with */
__u32 new_prog_fd;
__u32 flags; /* extra flags */
/* expected link's program fd; is specified only if
* BPF_F_REPLACE flag is set in flags */
__u32 old_prog_fd;
} link_update;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF /* The description below is an attempt at providing documentation to eBPF
......
...@@ -80,6 +80,17 @@ static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[]) ...@@ -80,6 +80,17 @@ static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
bpf_cgroup_storage_unlink(storages[stype]); bpf_cgroup_storage_unlink(storages[stype]);
} }
/* Called when bpf_cgroup_link is auto-detached from dying cgroup.
* It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
* doesn't free link memory, which will eventually be done by bpf_link's
* release() callback, when its last FD is closed.
*/
static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
{
cgroup_put(link->cgroup);
link->cgroup = NULL;
}
/** /**
* cgroup_bpf_release() - put references of all bpf programs and * cgroup_bpf_release() - put references of all bpf programs and
* release all cgroup bpf data * release all cgroup bpf data
...@@ -100,7 +111,10 @@ static void cgroup_bpf_release(struct work_struct *work) ...@@ -100,7 +111,10 @@ static void cgroup_bpf_release(struct work_struct *work)
list_for_each_entry_safe(pl, tmp, progs, node) { list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node); list_del(&pl->node);
if (pl->prog)
bpf_prog_put(pl->prog); bpf_prog_put(pl->prog);
if (pl->link)
bpf_cgroup_link_auto_detach(pl->link);
bpf_cgroup_storages_unlink(pl->storage); bpf_cgroup_storages_unlink(pl->storage);
bpf_cgroup_storages_free(pl->storage); bpf_cgroup_storages_free(pl->storage);
kfree(pl); kfree(pl);
...@@ -134,6 +148,18 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref) ...@@ -134,6 +148,18 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
queue_work(system_wq, &cgrp->bpf.release_work); queue_work(system_wq, &cgrp->bpf.release_work);
} }
/* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
* link or direct prog.
*/
static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
{
if (pl->prog)
return pl->prog;
if (pl->link)
return pl->link->link.prog;
return NULL;
}
/* count number of elements in the list. /* count number of elements in the list.
* it's slow but the list cannot be long * it's slow but the list cannot be long
*/ */
...@@ -143,7 +169,7 @@ static u32 prog_list_length(struct list_head *head) ...@@ -143,7 +169,7 @@ static u32 prog_list_length(struct list_head *head)
u32 cnt = 0; u32 cnt = 0;
list_for_each_entry(pl, head, node) { list_for_each_entry(pl, head, node) {
if (!pl->prog) if (!prog_list_prog(pl))
continue; continue;
cnt++; cnt++;
} }
...@@ -212,11 +238,11 @@ static int compute_effective_progs(struct cgroup *cgrp, ...@@ -212,11 +238,11 @@ static int compute_effective_progs(struct cgroup *cgrp,
continue; continue;
list_for_each_entry(pl, &p->bpf.progs[type], node) { list_for_each_entry(pl, &p->bpf.progs[type], node) {
if (!pl->prog) if (!prog_list_prog(pl))
continue; continue;
item = &progs->items[cnt]; item = &progs->items[cnt];
item->prog = pl->prog; item->prog = prog_list_prog(pl);
bpf_cgroup_storages_assign(item->cgroup_storage, bpf_cgroup_storages_assign(item->cgroup_storage,
pl->storage); pl->storage);
cnt++; cnt++;
...@@ -333,19 +359,60 @@ static int update_effective_progs(struct cgroup *cgrp, ...@@ -333,19 +359,60 @@ static int update_effective_progs(struct cgroup *cgrp,
#define BPF_CGROUP_MAX_PROGS 64 #define BPF_CGROUP_MAX_PROGS 64
static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
struct bpf_prog *prog,
struct bpf_cgroup_link *link,
struct bpf_prog *replace_prog,
bool allow_multi)
{
struct bpf_prog_list *pl;
/* single-attach case */
if (!allow_multi) {
if (list_empty(progs))
return NULL;
return list_first_entry(progs, typeof(*pl), node);
}
list_for_each_entry(pl, progs, node) {
if (prog && pl->prog == prog)
/* disallow attaching the same prog twice */
return ERR_PTR(-EINVAL);
if (link && pl->link == link)
/* disallow attaching the same link twice */
return ERR_PTR(-EINVAL);
}
/* direct prog multi-attach w/ replacement case */
if (replace_prog) {
list_for_each_entry(pl, progs, node) {
if (pl->prog == replace_prog)
/* a match found */
return pl;
}
/* prog to replace not found for cgroup */
return ERR_PTR(-ENOENT);
}
return NULL;
}
/** /**
* __cgroup_bpf_attach() - Attach the program to a cgroup, and * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
* propagate the change to descendants * propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse * @cgrp: The cgroup which descendants to traverse
* @prog: A program to attach * @prog: A program to attach
* @link: A link to attach
* @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
* @type: Type of attach operation * @type: Type of attach operation
* @flags: Option flags * @flags: Option flags
* *
* Exactly one of @prog or @link can be non-null.
* Must be called with cgroup_mutex held. * Must be called with cgroup_mutex held.
*/ */
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *replace_prog, struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags) enum bpf_attach_type type, u32 flags)
{ {
u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
...@@ -353,13 +420,19 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ...@@ -353,13 +420,19 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *old_prog = NULL; struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
struct bpf_prog_list *pl, *replace_pl = NULL; struct bpf_prog_list *pl;
int err; int err;
if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
/* invalid combination */ /* invalid combination */
return -EINVAL; return -EINVAL;
if (link && (prog || replace_prog))
/* only either link or prog/replace_prog can be specified */
return -EINVAL;
if (!!replace_prog != !!(flags & BPF_F_REPLACE))
/* replace_prog implies BPF_F_REPLACE, and vice versa */
return -EINVAL;
if (!hierarchy_allows_attach(cgrp, type)) if (!hierarchy_allows_attach(cgrp, type))
return -EPERM; return -EPERM;
...@@ -374,26 +447,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ...@@ -374,26 +447,15 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG; return -E2BIG;
if (flags & BPF_F_ALLOW_MULTI) { pl = find_attach_entry(progs, prog, link, replace_prog,
list_for_each_entry(pl, progs, node) { flags & BPF_F_ALLOW_MULTI);
if (pl->prog == prog) if (IS_ERR(pl))
/* disallow attaching the same prog twice */ return PTR_ERR(pl);
return -EINVAL;
if (pl->prog == replace_prog)
replace_pl = pl;
}
if ((flags & BPF_F_REPLACE) && !replace_pl)
/* prog to replace not found for cgroup */
return -ENOENT;
} else if (!list_empty(progs)) {
replace_pl = list_first_entry(progs, typeof(*pl), node);
}
if (bpf_cgroup_storages_alloc(storage, prog)) if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog))
return -ENOMEM; return -ENOMEM;
if (replace_pl) { if (pl) {
pl = replace_pl;
old_prog = pl->prog; old_prog = pl->prog;
bpf_cgroup_storages_unlink(pl->storage); bpf_cgroup_storages_unlink(pl->storage);
bpf_cgroup_storages_assign(old_storage, pl->storage); bpf_cgroup_storages_assign(old_storage, pl->storage);
...@@ -407,6 +469,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ...@@ -407,6 +469,7 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
} }
pl->prog = prog; pl->prog = prog;
pl->link = link;
bpf_cgroup_storages_assign(pl->storage, storage); bpf_cgroup_storages_assign(pl->storage, storage);
cgrp->bpf.flags[type] = saved_flags; cgrp->bpf.flags[type] = saved_flags;
...@@ -414,80 +477,173 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, ...@@ -414,80 +477,173 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (err) if (err)
goto cleanup; goto cleanup;
static_branch_inc(&cgroup_bpf_enabled_key);
bpf_cgroup_storages_free(old_storage); bpf_cgroup_storages_free(old_storage);
if (old_prog) { if (old_prog)
bpf_prog_put(old_prog); bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key); else
} static_branch_inc(&cgroup_bpf_enabled_key);
bpf_cgroup_storages_link(storage, cgrp, type); bpf_cgroup_storages_link(pl->storage, cgrp, type);
return 0; return 0;
cleanup: cleanup:
/* and cleanup the prog list */ if (old_prog) {
pl->prog = old_prog; pl->prog = old_prog;
pl->link = NULL;
}
bpf_cgroup_storages_free(pl->storage); bpf_cgroup_storages_free(pl->storage);
bpf_cgroup_storages_assign(pl->storage, old_storage); bpf_cgroup_storages_assign(pl->storage, old_storage);
bpf_cgroup_storages_link(pl->storage, cgrp, type); bpf_cgroup_storages_link(pl->storage, cgrp, type);
if (!replace_pl) { if (!old_prog) {
list_del(&pl->node); list_del(&pl->node);
kfree(pl); kfree(pl);
} }
return err; return err;
} }
/* Swap updated BPF program for given link in effective program arrays across
* all descendant cgroups. This function is guaranteed to succeed.
*/
static void replace_effective_prog(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_cgroup_link *link)
{
struct bpf_prog_array_item *item;
struct cgroup_subsys_state *css;
struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
struct list_head *head;
struct cgroup *cg;
int pos;
css_for_each_descendant_pre(css, &cgrp->self) {
struct cgroup *desc = container_of(css, struct cgroup, self);
if (percpu_ref_is_zero(&desc->bpf.refcnt))
continue;
/* find position of link in effective progs array */
for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
continue;
head = &cg->bpf.progs[type];
list_for_each_entry(pl, head, node) {
if (!prog_list_prog(pl))
continue;
if (pl->link == link)
goto found;
pos++;
}
}
found:
BUG_ON(!cg);
progs = rcu_dereference_protected(
desc->bpf.effective[type],
lockdep_is_held(&cgroup_mutex));
item = &progs->items[pos];
WRITE_ONCE(item->prog, link->link.prog);
}
}
/** /**
* __cgroup_bpf_detach() - Detach the program from a cgroup, and * __cgroup_bpf_replace() - Replace link's program and propagate the change
* to descendants
* @cgrp: The cgroup which descendants to traverse
* @link: A link for which to replace BPF program
* @type: Type of attach operation
*
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
struct bpf_prog *new_prog)
{
struct list_head *progs = &cgrp->bpf.progs[link->type];
struct bpf_prog *old_prog;
struct bpf_prog_list *pl;
bool found = false;
if (link->link.prog->type != new_prog->type)
return -EINVAL;
list_for_each_entry(pl, progs, node) {
if (pl->link == link) {
found = true;
break;
}
}
if (!found)
return -ENOENT;
old_prog = xchg(&link->link.prog, new_prog);
replace_effective_prog(cgrp, link->type, link);
bpf_prog_put(old_prog);
return 0;
}
static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
struct bpf_prog *prog,
struct bpf_cgroup_link *link,
bool allow_multi)
{
struct bpf_prog_list *pl;
if (!allow_multi) {
if (list_empty(progs))
/* report error when trying to detach and nothing is attached */
return ERR_PTR(-ENOENT);
/* to maintain backward compatibility NONE and OVERRIDE cgroups
* allow detaching with invalid FD (prog==NULL) in legacy mode
*/
return list_first_entry(progs, typeof(*pl), node);
}
if (!prog && !link)
/* to detach MULTI prog the user has to specify valid FD
* of the program or link to be detached
*/
return ERR_PTR(-EINVAL);
/* find the prog or link and detach it */
list_for_each_entry(pl, progs, node) {
if (pl->prog == prog && pl->link == link)
return pl;
}
return ERR_PTR(-ENOENT);
}
/**
* __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
* propagate the change to descendants * propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse * @cgrp: The cgroup which descendants to traverse
* @prog: A program to detach or NULL * @prog: A program to detach or NULL
* @prog: A link to detach or NULL
* @type: Type of detach operation * @type: Type of detach operation
* *
* At most one of @prog or @link can be non-NULL.
* Must be called with cgroup_mutex held. * Must be called with cgroup_mutex held.
*/ */
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type) struct bpf_cgroup_link *link, enum bpf_attach_type type)
{ {
struct list_head *progs = &cgrp->bpf.progs[type]; struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type]; u32 flags = cgrp->bpf.flags[type];
struct bpf_prog *old_prog = NULL;
struct bpf_prog_list *pl; struct bpf_prog_list *pl;
struct bpf_prog *old_prog;
int err; int err;
if (flags & BPF_F_ALLOW_MULTI) { if (prog && link)
if (!prog) /* only one of prog or link can be specified */
/* to detach MULTI prog the user has to specify valid FD
* of the program to be detached
*/
return -EINVAL; return -EINVAL;
} else {
if (list_empty(progs))
/* report error when trying to detach and nothing is attached */
return -ENOENT;
}
if (flags & BPF_F_ALLOW_MULTI) { pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
/* find the prog and detach it */ if (IS_ERR(pl))
list_for_each_entry(pl, progs, node) { return PTR_ERR(pl);
if (pl->prog != prog)
continue; /* mark it deleted, so it's ignored while recomputing effective */
old_prog = prog;
/* mark it deleted, so it's ignored while
* recomputing effective
*/
pl->prog = NULL;
break;
}
if (!old_prog)
return -ENOENT;
} else {
/* to maintain backward compatibility NONE and OVERRIDE cgroups
* allow detaching with invalid FD (prog==NULL)
*/
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog; old_prog = pl->prog;
pl->prog = NULL; pl->prog = NULL;
} pl->link = NULL;
err = update_effective_progs(cgrp, type); err = update_effective_progs(cgrp, type);
if (err) if (err)
...@@ -501,14 +657,15 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, ...@@ -501,14 +657,15 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
if (list_empty(progs)) if (list_empty(progs))
/* last program was detached, reset flags to zero */ /* last program was detached, reset flags to zero */
cgrp->bpf.flags[type] = 0; cgrp->bpf.flags[type] = 0;
if (old_prog)
bpf_prog_put(old_prog); bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key); static_branch_dec(&cgroup_bpf_enabled_key);
return 0; return 0;
cleanup: cleanup:
/* and restore back old_prog */ /* restore back prog or link */
pl->prog = old_prog; pl->prog = old_prog;
pl->link = link;
return err; return err;
} }
...@@ -521,6 +678,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, ...@@ -521,6 +678,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
struct list_head *progs = &cgrp->bpf.progs[type]; struct list_head *progs = &cgrp->bpf.progs[type];
u32 flags = cgrp->bpf.flags[type]; u32 flags = cgrp->bpf.flags[type];
struct bpf_prog_array *effective; struct bpf_prog_array *effective;
struct bpf_prog *prog;
int cnt, ret = 0, i; int cnt, ret = 0, i;
effective = rcu_dereference_protected(cgrp->bpf.effective[type], effective = rcu_dereference_protected(cgrp->bpf.effective[type],
...@@ -551,7 +709,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, ...@@ -551,7 +709,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
i = 0; i = 0;
list_for_each_entry(pl, progs, node) { list_for_each_entry(pl, progs, node) {
id = pl->prog->aux->id; prog = prog_list_prog(pl);
id = prog->aux->id;
if (copy_to_user(prog_ids + i, &id, sizeof(id))) if (copy_to_user(prog_ids + i, &id, sizeof(id)))
return -EFAULT; return -EFAULT;
if (++i == cnt) if (++i == cnt)
...@@ -581,8 +740,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, ...@@ -581,8 +740,8 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
} }
} }
ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type, ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
attr->attach_flags); attr->attach_type, attr->attach_flags);
if (replace_prog) if (replace_prog)
bpf_prog_put(replace_prog); bpf_prog_put(replace_prog);
...@@ -604,7 +763,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) ...@@ -604,7 +763,7 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
if (IS_ERR(prog)) if (IS_ERR(prog))
prog = NULL; prog = NULL;
ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
if (prog) if (prog)
bpf_prog_put(prog); bpf_prog_put(prog);
...@@ -612,6 +771,90 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) ...@@ -612,6 +771,90 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
return ret; return ret;
} }
static void bpf_cgroup_link_release(struct bpf_link *link)
{
struct bpf_cgroup_link *cg_link =
container_of(link, struct bpf_cgroup_link, link);
/* link might have been auto-detached by dying cgroup already,
* in that case our work is done here
*/
if (!cg_link->cgroup)
return;
mutex_lock(&cgroup_mutex);
/* re-check cgroup under lock again */
if (!cg_link->cgroup) {
mutex_unlock(&cgroup_mutex);
return;
}
WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
cg_link->type));
mutex_unlock(&cgroup_mutex);
cgroup_put(cg_link->cgroup);
}
static void bpf_cgroup_link_dealloc(struct bpf_link *link)
{
struct bpf_cgroup_link *cg_link =
container_of(link, struct bpf_cgroup_link, link);
kfree(cg_link);
}
const struct bpf_link_ops bpf_cgroup_link_lops = {
.release = bpf_cgroup_link_release,
.dealloc = bpf_cgroup_link_dealloc,
};
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_cgroup_link *link;
struct file *link_file;
struct cgroup *cgrp;
int err, link_fd;
if (attr->link_create.flags)
return -EINVAL;
cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
err = -ENOMEM;
goto out_put_cgroup;
}
bpf_link_init(&link->link, &bpf_cgroup_link_lops, prog);
link->cgroup = cgrp;
link->type = attr->link_create.attach_type;
link_file = bpf_link_new_file(&link->link, &link_fd);
if (IS_ERR(link_file)) {
kfree(link);
err = PTR_ERR(link_file);
goto out_put_cgroup;
}
err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
BPF_F_ALLOW_MULTI);
if (err) {
bpf_link_cleanup(&link->link, link_file, link_fd);
goto out_put_cgroup;
}
fd_install(link_fd, link_file);
return link_fd;
out_put_cgroup:
cgroup_put(cgrp);
return err;
}
int cgroup_bpf_prog_query(const union bpf_attr *attr, int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr) union bpf_attr __user *uattr)
{ {
......
...@@ -2175,13 +2175,6 @@ static int bpf_obj_get(const union bpf_attr *attr) ...@@ -2175,13 +2175,6 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags); attr->file_flags);
} }
struct bpf_link {
atomic64_t refcnt;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
};
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog) struct bpf_prog *prog)
{ {
...@@ -2195,7 +2188,7 @@ void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, ...@@ -2195,7 +2188,7 @@ void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
* anon_inode's release() call. This helper manages marking bpf_link as * anon_inode's release() call. This helper manages marking bpf_link as
* defunct, releases anon_inode file and puts reserved FD. * defunct, releases anon_inode file and puts reserved FD.
*/ */
static void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
int link_fd) int link_fd)
{ {
link->prog = NULL; link->prog = NULL;
...@@ -2266,6 +2259,10 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) ...@@ -2266,6 +2259,10 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
link_type = "raw_tracepoint"; link_type = "raw_tracepoint";
else if (link->ops == &bpf_tracing_link_lops) else if (link->ops == &bpf_tracing_link_lops)
link_type = "tracing"; link_type = "tracing";
#ifdef CONFIG_CGROUP_BPF
else if (link->ops == &bpf_cgroup_link_lops)
link_type = "cgroup";
#endif
else else
link_type = "unknown"; link_type = "unknown";
...@@ -3553,6 +3550,104 @@ static int bpf_map_do_batch(const union bpf_attr *attr, ...@@ -3553,6 +3550,104 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
return err; return err;
} }
#define BPF_LINK_CREATE_LAST_FIELD link_create.flags
static int link_create(union bpf_attr *attr)
{
enum bpf_prog_type ptype;
struct bpf_prog *prog;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (CHECK_ATTR(BPF_LINK_CREATE))
return -EINVAL;
ptype = attach_type_to_prog_type(attr->link_create.attach_type);
if (ptype == BPF_PROG_TYPE_UNSPEC)
return -EINVAL;
prog = bpf_prog_get_type(attr->link_create.prog_fd, ptype);
if (IS_ERR(prog))
return PTR_ERR(prog);
ret = bpf_prog_attach_check_attach_type(prog,
attr->link_create.attach_type);
if (ret)
goto err_out;
switch (ptype) {
case BPF_PROG_TYPE_CGROUP_SKB:
case BPF_PROG_TYPE_CGROUP_SOCK:
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
case BPF_PROG_TYPE_SOCK_OPS:
case BPF_PROG_TYPE_CGROUP_DEVICE:
case BPF_PROG_TYPE_CGROUP_SYSCTL:
case BPF_PROG_TYPE_CGROUP_SOCKOPT:
ret = cgroup_bpf_link_attach(attr, prog);
break;
default:
ret = -EINVAL;
}
err_out:
if (ret < 0)
bpf_prog_put(prog);
return ret;
}
#define BPF_LINK_UPDATE_LAST_FIELD link_update.old_prog_fd
static int link_update(union bpf_attr *attr)
{
struct bpf_prog *old_prog = NULL, *new_prog;
struct bpf_link *link;
u32 flags;
int ret;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
if (CHECK_ATTR(BPF_LINK_UPDATE))
return -EINVAL;
flags = attr->link_update.flags;
if (flags & ~BPF_F_REPLACE)
return -EINVAL;
link = bpf_link_get_from_fd(attr->link_update.link_fd);
if (IS_ERR(link))
return PTR_ERR(link);
new_prog = bpf_prog_get(attr->link_update.new_prog_fd);
if (IS_ERR(new_prog))
return PTR_ERR(new_prog);
if (flags & BPF_F_REPLACE) {
old_prog = bpf_prog_get(attr->link_update.old_prog_fd);
if (IS_ERR(old_prog)) {
ret = PTR_ERR(old_prog);
old_prog = NULL;
goto out_put_progs;
}
}
#ifdef CONFIG_CGROUP_BPF
if (link->ops == &bpf_cgroup_link_lops) {
ret = cgroup_bpf_replace(link, old_prog, new_prog);
goto out_put_progs;
}
#endif
ret = -EINVAL;
out_put_progs:
if (old_prog)
bpf_prog_put(old_prog);
if (ret)
bpf_prog_put(new_prog);
return ret;
}
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{ {
union bpf_attr attr = {}; union bpf_attr attr = {};
...@@ -3663,6 +3758,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz ...@@ -3663,6 +3758,12 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_DELETE_BATCH: case BPF_MAP_DELETE_BATCH:
err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH); err = bpf_map_do_batch(&attr, uattr, BPF_MAP_DELETE_BATCH);
break; break;
case BPF_LINK_CREATE:
err = link_create(&attr);
break;
case BPF_LINK_UPDATE:
err = link_update(&attr);
break;
default: default:
err = -EINVAL; err = -EINVAL;
break; break;
......
...@@ -6303,27 +6303,58 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) ...@@ -6303,27 +6303,58 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#endif /* CONFIG_SOCK_CGROUP_DATA */ #endif /* CONFIG_SOCK_CGROUP_DATA */
#ifdef CONFIG_CGROUP_BPF #ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *replace_prog, enum bpf_attach_type type, struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type,
u32 flags) u32 flags)
{ {
int ret; int ret;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags); ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
return ret; return ret;
} }
int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
struct bpf_prog *new_prog)
{
struct bpf_cgroup_link *cg_link;
int ret;
if (link->ops != &bpf_cgroup_link_lops)
return -EINVAL;
cg_link = container_of(link, struct bpf_cgroup_link, link);
mutex_lock(&cgroup_mutex);
/* link might have been auto-released by dying cgroup, so fail */
if (!cg_link->cgroup) {
ret = -EINVAL;
goto out_unlock;
}
if (old_prog && link->prog != old_prog) {
ret = -EPERM;
goto out_unlock;
}
ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
out_unlock:
mutex_unlock(&cgroup_mutex);
return ret;
}
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags) enum bpf_attach_type type)
{ {
int ret; int ret;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
ret = __cgroup_bpf_detach(cgrp, prog, type); ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
return ret; return ret;
} }
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr) union bpf_attr __user *uattr)
{ {
......
...@@ -111,6 +111,8 @@ enum bpf_cmd { ...@@ -111,6 +111,8 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH, BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH, BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH, BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
}; };
enum bpf_map_type { enum bpf_map_type {
...@@ -541,7 +543,7 @@ union bpf_attr { ...@@ -541,7 +543,7 @@ union bpf_attr {
__u32 prog_cnt; __u32 prog_cnt;
} query; } query;
struct { struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name; __u64 name;
__u32 prog_fd; __u32 prog_fd;
} raw_tracepoint; } raw_tracepoint;
...@@ -569,6 +571,24 @@ union bpf_attr { ...@@ -569,6 +571,24 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */ __u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */ __u64 probe_addr; /* output: probe_addr */
} task_fd_query; } task_fd_query;
struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
__u32 target_fd; /* object to attach to */
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
/* new program fd to update link with */
__u32 new_prog_fd;
__u32 flags; /* extra flags */
/* expected link's program fd; is specified only if
* BPF_F_REPLACE flag is set in flags */
__u32 old_prog_fd;
} link_update;
} __attribute__((aligned(8))); } __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF /* The description below is an attempt at providing documentation to eBPF
......
...@@ -585,6 +585,40 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) ...@@ -585,6 +585,40 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr)); return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
} }
int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts)
{
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_link_create_opts))
return -EINVAL;
memset(&attr, 0, sizeof(attr));
attr.link_create.prog_fd = prog_fd;
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts)
{
union bpf_attr attr;
if (!OPTS_VALID(opts, bpf_link_update_opts))
return -EINVAL;
memset(&attr, 0, sizeof(attr));
attr.link_update.link_fd = link_fd;
attr.link_update.new_prog_fd = new_prog_fd;
attr.link_update.flags = OPTS_GET(opts, flags, 0);
attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
}
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt) __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{ {
......
...@@ -168,6 +168,25 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); ...@@ -168,6 +168,25 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type); enum bpf_attach_type type);
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
};
#define bpf_link_create_opts__last_field sz
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
const struct bpf_link_create_opts *opts);
struct bpf_link_update_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags; /* extra flags */
__u32 old_prog_fd; /* expected old program FD */
};
#define bpf_link_update_opts__last_field old_prog_fd
LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
const struct bpf_link_update_opts *opts);
struct bpf_prog_test_run_attr { struct bpf_prog_test_run_attr {
int prog_fd; int prog_fd;
int repeat; int repeat;
......
...@@ -6978,6 +6978,12 @@ struct bpf_link { ...@@ -6978,6 +6978,12 @@ struct bpf_link {
bool disconnected; bool disconnected;
}; };
/* Replace link's underlying BPF program with the new one */
int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
{
return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
}
/* Release "ownership" of underlying BPF resource (typically, BPF program /* Release "ownership" of underlying BPF resource (typically, BPF program
* attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
* link, when destructed through bpf_link__destroy() call won't attempt to * link, when destructed through bpf_link__destroy() call won't attempt to
...@@ -7533,6 +7539,46 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec, ...@@ -7533,6 +7539,46 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
return bpf_program__attach_lsm(prog); return bpf_program__attach_lsm(prog);
} }
struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
{
const struct bpf_sec_def *sec_def;
enum bpf_attach_type attach_type;
char errmsg[STRERR_BUFSIZE];
struct bpf_link *link;
int prog_fd, link_fd;
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
pr_warn("program '%s': can't attach before loaded\n",
bpf_program__title(prog, false));
return ERR_PTR(-EINVAL);
}
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
link->detach = &bpf_link__detach_fd;
attach_type = bpf_program__get_expected_attach_type(prog);
if (!attach_type) {
sec_def = find_sec_def(bpf_program__title(prog, false));
if (sec_def)
attach_type = sec_def->attach_type;
}
link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
if (link_fd < 0) {
link_fd = -errno;
free(link);
pr_warn("program '%s': failed to attach to cgroup: %s\n",
bpf_program__title(prog, false),
libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
return ERR_PTR(link_fd);
}
link->fd = link_fd;
return link;
}
struct bpf_link *bpf_program__attach(struct bpf_program *prog) struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{ {
const struct bpf_sec_def *sec_def; const struct bpf_sec_def *sec_def;
......
...@@ -224,6 +224,8 @@ LIBBPF_API int bpf_link__fd(const struct bpf_link *link); ...@@ -224,6 +224,8 @@ LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link); LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path); LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
LIBBPF_API int bpf_link__unpin(struct bpf_link *link); LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
...@@ -245,13 +247,17 @@ bpf_program__attach_tracepoint(struct bpf_program *prog, ...@@ -245,13 +247,17 @@ bpf_program__attach_tracepoint(struct bpf_program *prog,
LIBBPF_API struct bpf_link * LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog, bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name); const char *tp_name);
LIBBPF_API struct bpf_link * LIBBPF_API struct bpf_link *
bpf_program__attach_trace(struct bpf_program *prog); bpf_program__attach_trace(struct bpf_program *prog);
LIBBPF_API struct bpf_link * LIBBPF_API struct bpf_link *
bpf_program__attach_lsm(struct bpf_program *prog); bpf_program__attach_lsm(struct bpf_program *prog);
LIBBPF_API struct bpf_link *
bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
struct bpf_map; struct bpf_map;
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
struct bpf_insn; struct bpf_insn;
/* /*
......
...@@ -243,7 +243,11 @@ LIBBPF_0.0.8 { ...@@ -243,7 +243,11 @@ LIBBPF_0.0.8 {
bpf_link__pin; bpf_link__pin;
bpf_link__pin_path; bpf_link__pin_path;
bpf_link__unpin; bpf_link__unpin;
bpf_link__update_program;
bpf_link_create;
bpf_link_update;
bpf_map__set_initial_value; bpf_map__set_initial_value;
bpf_program__attach_cgroup;
bpf_program__attach_lsm; bpf_program__attach_lsm;
bpf_program__is_lsm; bpf_program__is_lsm;
bpf_program__set_attach_target; bpf_program__set_attach_target;
......
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
#include "test_cgroup_link.skel.h"
static __u32 duration = 0;
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
static struct test_cgroup_link *skel = NULL;
int ping_and_check(int exp_calls, int exp_alt_calls)
{
skel->bss->calls = 0;
skel->bss->alt_calls = 0;
CHECK_FAIL(system(PING_CMD));
if (CHECK(skel->bss->calls != exp_calls, "call_cnt",
"exp %d, got %d\n", exp_calls, skel->bss->calls))
return -EINVAL;
if (CHECK(skel->bss->alt_calls != exp_alt_calls, "alt_call_cnt",
"exp %d, got %d\n", exp_alt_calls, skel->bss->alt_calls))
return -EINVAL;
return 0;
}
void test_cgroup_link(void)
{
struct {
const char *path;
int fd;
} cgs[] = {
{ "/cg1" },
{ "/cg1/cg2" },
{ "/cg1/cg2/cg3" },
{ "/cg1/cg2/cg3/cg4" },
};
int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
__u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
int i = 0, err, prog_fd;
bool detach_legacy = false;
skel = test_cgroup_link__open_and_load();
if (CHECK(!skel, "skel_open_load", "failed to open/load skeleton\n"))
return;
prog_fd = bpf_program__fd(skel->progs.egress);
err = setup_cgroup_environment();
if (CHECK(err, "cg_init", "failed: %d\n", err))
goto cleanup;
for (i = 0; i < cg_nr; i++) {
cgs[i].fd = create_and_get_cgroup(cgs[i].path);
if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
goto cleanup;
}
err = join_cgroup(cgs[last_cg].path);
if (CHECK(err, "cg_join", "fail: %d\n", err))
goto cleanup;
for (i = 0; i < cg_nr; i++) {
links[i] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[i].fd);
if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
i, PTR_ERR(links[i])))
goto cleanup;
}
ping_and_check(cg_nr, 0);
/* query the number of effective progs and attach flags in root cg */
err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
&prog_cnt);
CHECK_FAIL(err);
CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
goto cleanup;
/* query the number of effective progs in last cg */
err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, NULL, NULL,
&prog_cnt);
CHECK_FAIL(err);
CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
cg_nr, prog_cnt))
goto cleanup;
/* query the effective prog IDs in last cg */
err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
prog_ids, &prog_cnt);
CHECK_FAIL(err);
CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
cg_nr, prog_cnt))
goto cleanup;
for (i = 1; i < prog_cnt; i++) {
CHECK(prog_ids[i - 1] != prog_ids[i], "prog_id_check",
"idx %d, prev id %d, cur id %d\n",
i, prog_ids[i - 1], prog_ids[i]);
}
/* detach bottom program and ping again */
bpf_link__destroy(links[last_cg]);
links[last_cg] = NULL;
ping_and_check(cg_nr - 1, 0);
/* mix in with non link-based multi-attachments */
err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
BPF_CGROUP_INET_EGRESS, BPF_F_ALLOW_MULTI);
if (CHECK(err, "cg_attach_legacy", "errno=%d\n", errno))
goto cleanup;
detach_legacy = true;
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
PTR_ERR(links[last_cg])))
goto cleanup;
ping_and_check(cg_nr + 1, 0);
/* detach link */
bpf_link__destroy(links[last_cg]);
links[last_cg] = NULL;
/* detach legacy */
err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
goto cleanup;
detach_legacy = false;
/* attach legacy exclusive prog attachment */
err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
BPF_CGROUP_INET_EGRESS, 0);
if (CHECK(err, "cg_attach_exclusive", "errno=%d\n", errno))
goto cleanup;
detach_legacy = true;
/* attempt to mix in with multi-attach bpf_link */
tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
bpf_link__destroy(tmp_link);
goto cleanup;
}
ping_and_check(cg_nr, 0);
/* detach */
err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
goto cleanup;
detach_legacy = false;
ping_and_check(cg_nr - 1, 0);
/* attach back link-based one */
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
PTR_ERR(links[last_cg])))
goto cleanup;
ping_and_check(cg_nr, 0);
/* check legacy exclusive prog can't be attached */
err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
BPF_CGROUP_INET_EGRESS, 0);
if (CHECK(!err, "cg_attach_exclusive", "unexpected success")) {
bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
goto cleanup;
}
/* replace BPF programs inside their links for all but first link */
for (i = 1; i < cg_nr; i++) {
err = bpf_link__update_program(links[i], skel->progs.egress_alt);
if (CHECK(err, "prog_upd", "link #%d\n", i))
goto cleanup;
}
ping_and_check(1, cg_nr - 1);
/* Attempt program update with wrong expected BPF program */
link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress_alt);
link_upd_opts.flags = BPF_F_REPLACE;
err = bpf_link_update(bpf_link__fd(links[0]),
bpf_program__fd(skel->progs.egress_alt),
&link_upd_opts);
if (CHECK(err == 0 || errno != EPERM, "prog_cmpxchg1",
"unexpectedly succeeded, err %d, errno %d\n", err, -errno))
goto cleanup;
/* Compare-exchange single link program from egress to egress_alt */
link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress);
link_upd_opts.flags = BPF_F_REPLACE;
err = bpf_link_update(bpf_link__fd(links[0]),
bpf_program__fd(skel->progs.egress_alt),
&link_upd_opts);
if (CHECK(err, "prog_cmpxchg2", "errno %d\n", -errno))
goto cleanup;
/* ping */
ping_and_check(0, cg_nr);
/* close cgroup FDs before detaching links */
for (i = 0; i < cg_nr; i++) {
if (cgs[i].fd > 0) {
close(cgs[i].fd);
cgs[i].fd = -1;
}
}
/* BPF programs should still get called */
ping_and_check(0, cg_nr);
/* leave cgroup and remove them, don't detach programs */
cleanup_cgroup_environment();
/* BPF programs should have been auto-detached */
ping_and_check(0, 0);
cleanup:
if (detach_legacy)
bpf_prog_detach2(prog_fd, cgs[last_cg].fd,
BPF_CGROUP_INET_EGRESS);
for (i = 0; i < cg_nr; i++) {
if (!IS_ERR(links[i]))
bpf_link__destroy(links[i]);
}
test_cgroup_link__destroy(skel);
for (i = 0; i < cg_nr; i++) {
if (cgs[i].fd > 0)
close(cgs[i].fd);
}
cleanup_cgroup_environment();
}
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
int calls = 0;
int alt_calls = 0;
SEC("cgroup_skb/egress1")
int egress(struct __sk_buff *skb)
{
__sync_fetch_and_add(&calls, 1);
return 1;
}
SEC("cgroup_skb/egress2")
int egress_alt(struct __sk_buff *skb)
{
__sync_fetch_and_add(&alt_calls, 1);
return 1;
}
char _license[] SEC("license") = "GPL";
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment