Commit babf3164 authored by Andrii Nakryiko's avatar Andrii Nakryiko Committed by Daniel Borkmann

bpf: Add bpf_link_new_file that doesn't install FD

Add bpf_link_new_file() API for cases when we need to ensure anon_inode is
successfully created before we proceed with expensive BPF program attachment
procedure, which will require equally (if not more so) expensive and
potentially failing compensation detachment procedure just because anon_inode
creation failed. This API allows to simplify code by ensuring first that
anon_inode is created and after BPF program is attached proceed with
fd_install() that can't fail.

After anon_inode file is created, link can't be just kfree()'d anymore,
because its destruction will be performed by deferred file_operations->release
call. For this, bpf_link API required specifying two separate operations:
release() and dealloc(), former performing detachment only, while the latter
frees memory used by bpf_link itself. dealloc() needs to be specified, because
struct bpf_link is frequently embedded into link type-specific container
struct (e.g., struct bpf_raw_tp_link), so bpf_link itself doesn't know how to
properly free the memory. In case when anon_inode file was successfully
created, but subsequent BPF attachment failed, bpf_link needs to be marked as
"defunct", so that file's release() callback will perform only memory
deallocation, but no detachment.

Convert raw tracepoint and tracing attachment to new API and eliminate
detachment from error handling path.
Signed-off-by: default avatarAndrii Nakryiko <andriin@fb.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200309231051.1270337-1-andriin@fb.com
parent aad32f4c
...@@ -1070,13 +1070,16 @@ struct bpf_link; ...@@ -1070,13 +1070,16 @@ struct bpf_link;
struct bpf_link_ops { struct bpf_link_ops {
void (*release)(struct bpf_link *link); void (*release)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link);
}; };
void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog); struct bpf_prog *prog);
void bpf_link_defunct(struct bpf_link *link);
void bpf_link_inc(struct bpf_link *link); void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link); void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link);
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
struct bpf_link *bpf_link_get_from_fd(u32 ufd); struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
......
...@@ -2188,6 +2188,11 @@ void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, ...@@ -2188,6 +2188,11 @@ void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
link->prog = prog; link->prog = prog;
} }
void bpf_link_defunct(struct bpf_link *link)
{
link->prog = NULL;
}
void bpf_link_inc(struct bpf_link *link) void bpf_link_inc(struct bpf_link *link)
{ {
atomic64_inc(&link->refcnt); atomic64_inc(&link->refcnt);
...@@ -2196,14 +2201,13 @@ void bpf_link_inc(struct bpf_link *link) ...@@ -2196,14 +2201,13 @@ void bpf_link_inc(struct bpf_link *link)
/* bpf_link_free is guaranteed to be called from process context */ /* bpf_link_free is guaranteed to be called from process context */
static void bpf_link_free(struct bpf_link *link) static void bpf_link_free(struct bpf_link *link)
{ {
struct bpf_prog *prog; if (link->prog) {
/* detach BPF program, clean up used resources */
/* remember prog locally, because release below will free link memory */
prog = link->prog;
/* extra clean up and kfree of container link struct */
link->ops->release(link); link->ops->release(link);
/* no more accesing of link members after this point */ bpf_prog_put(link->prog);
bpf_prog_put(prog); }
/* free bpf_link and its containing memory */
link->ops->dealloc(link);
} }
static void bpf_link_put_deferred(struct work_struct *work) static void bpf_link_put_deferred(struct work_struct *work)
...@@ -2281,6 +2285,33 @@ int bpf_link_new_fd(struct bpf_link *link) ...@@ -2281,6 +2285,33 @@ int bpf_link_new_fd(struct bpf_link *link)
return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
} }
/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
* instead of immediately installing fd in fdtable, just reserve it and
* return. Caller then need to either install it with fd_install(fd, file) or
* release with put_unused_fd(fd).
* This is useful for cases when bpf_link attachment/detachment are
* complicated and expensive operations and should be delayed until all the fd
* reservation and anon_inode creation succeeds.
*/
struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
{
struct file *file;
int fd;
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
return ERR_PTR(fd);
file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
if (IS_ERR(file)) {
put_unused_fd(fd);
return file;
}
*reserved_fd = fd;
return file;
}
struct bpf_link *bpf_link_get_from_fd(u32 ufd) struct bpf_link *bpf_link_get_from_fd(u32 ufd)
{ {
struct fd f = fdget(ufd); struct fd f = fdget(ufd);
...@@ -2305,21 +2336,27 @@ struct bpf_tracing_link { ...@@ -2305,21 +2336,27 @@ struct bpf_tracing_link {
}; };
static void bpf_tracing_link_release(struct bpf_link *link) static void bpf_tracing_link_release(struct bpf_link *link)
{
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
}
static void bpf_tracing_link_dealloc(struct bpf_link *link)
{ {
struct bpf_tracing_link *tr_link = struct bpf_tracing_link *tr_link =
container_of(link, struct bpf_tracing_link, link); container_of(link, struct bpf_tracing_link, link);
WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog));
kfree(tr_link); kfree(tr_link);
} }
static const struct bpf_link_ops bpf_tracing_link_lops = { static const struct bpf_link_ops bpf_tracing_link_lops = {
.release = bpf_tracing_link_release, .release = bpf_tracing_link_release,
.dealloc = bpf_tracing_link_dealloc,
}; };
static int bpf_tracing_prog_attach(struct bpf_prog *prog) static int bpf_tracing_prog_attach(struct bpf_prog *prog)
{ {
struct bpf_tracing_link *link; struct bpf_tracing_link *link;
struct file *link_file;
int link_fd, err; int link_fd, err;
if (prog->expected_attach_type != BPF_TRACE_FENTRY && if (prog->expected_attach_type != BPF_TRACE_FENTRY &&
...@@ -2337,20 +2374,24 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) ...@@ -2337,20 +2374,24 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
} }
bpf_link_init(&link->link, &bpf_tracing_link_lops, prog); bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
err = bpf_trampoline_link_prog(prog); link_file = bpf_link_new_file(&link->link, &link_fd);
if (err) if (IS_ERR(link_file)) {
goto out_free_link; kfree(link);
err = PTR_ERR(link_file);
goto out_put_prog;
}
link_fd = bpf_link_new_fd(&link->link); err = bpf_trampoline_link_prog(prog);
if (link_fd < 0) { if (err) {
WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); bpf_link_defunct(&link->link);
err = link_fd; fput(link_file);
goto out_free_link; put_unused_fd(link_fd);
goto out_put_prog;
} }
fd_install(link_fd, link_file);
return link_fd; return link_fd;
out_free_link:
kfree(link);
out_put_prog: out_put_prog:
bpf_prog_put(prog); bpf_prog_put(prog);
return err; return err;
...@@ -2368,19 +2409,28 @@ static void bpf_raw_tp_link_release(struct bpf_link *link) ...@@ -2368,19 +2409,28 @@ static void bpf_raw_tp_link_release(struct bpf_link *link)
bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog); bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
bpf_put_raw_tracepoint(raw_tp->btp); bpf_put_raw_tracepoint(raw_tp->btp);
}
static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
{
struct bpf_raw_tp_link *raw_tp =
container_of(link, struct bpf_raw_tp_link, link);
kfree(raw_tp); kfree(raw_tp);
} }
static const struct bpf_link_ops bpf_raw_tp_lops = { static const struct bpf_link_ops bpf_raw_tp_lops = {
.release = bpf_raw_tp_link_release, .release = bpf_raw_tp_link_release,
.dealloc = bpf_raw_tp_link_dealloc,
}; };
#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
static int bpf_raw_tracepoint_open(const union bpf_attr *attr) static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
{ {
struct bpf_raw_tp_link *raw_tp; struct bpf_raw_tp_link *link;
struct bpf_raw_event_map *btp; struct bpf_raw_event_map *btp;
struct file *link_file;
struct bpf_prog *prog; struct bpf_prog *prog;
const char *tp_name; const char *tp_name;
char buf[128]; char buf[128];
...@@ -2431,28 +2481,32 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) ...@@ -2431,28 +2481,32 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
goto out_put_prog; goto out_put_prog;
} }
raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); link = kzalloc(sizeof(*link), GFP_USER);
if (!raw_tp) { if (!link) {
err = -ENOMEM; err = -ENOMEM;
goto out_put_btp; goto out_put_btp;
} }
bpf_link_init(&raw_tp->link, &bpf_raw_tp_lops, prog); bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
raw_tp->btp = btp; link->btp = btp;
err = bpf_probe_register(raw_tp->btp, prog); link_file = bpf_link_new_file(&link->link, &link_fd);
if (err) if (IS_ERR(link_file)) {
goto out_free_tp; kfree(link);
err = PTR_ERR(link_file);
goto out_put_btp;
}
link_fd = bpf_link_new_fd(&raw_tp->link); err = bpf_probe_register(link->btp, prog);
if (link_fd < 0) { if (err) {
bpf_probe_unregister(raw_tp->btp, prog); bpf_link_defunct(&link->link);
err = link_fd; fput(link_file);
goto out_free_tp; put_unused_fd(link_fd);
goto out_put_btp;
} }
fd_install(link_fd, link_file);
return link_fd; return link_fd;
out_free_tp:
kfree(raw_tp);
out_put_btp: out_put_btp:
bpf_put_raw_tracepoint(btp); bpf_put_raw_tracepoint(btp);
out_put_prog: out_put_prog:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment