Commit f0d74c4d authored by Kui-Feng Lee's avatar Kui-Feng Lee Committed by Andrii Nakryiko

bpf: Parameterize task iterators.

Allow creating an iterator that loops through resources of one
thread/process.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.
Signed-off-by: default avatarKui-Feng Lee <kuifeng@fb.com>
Signed-off-by: default avatarAndrii Nakryiko <andrii@kernel.org>
Acked-by: default avatarYonghong Song <yhs@fb.com>
Acked-by: default avatarMartin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
parent 87dbdc23
...@@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags); ...@@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
extern int bpf_iter_ ## target(args); \ extern int bpf_iter_ ## target(args); \
int __init bpf_iter_ ## target(args) { return 0; } int __init bpf_iter_ ## target(args) { return 0; }
/*
* The task type of iterators.
*
* For BPF task iterators, they can be parameterized with various
* parameters to visit only some of tasks.
*
* BPF_TASK_ITER_ALL (default)
* Iterate over resources of every task.
*
* BPF_TASK_ITER_TID
* Iterate over resources of a task/tid.
*
* BPF_TASK_ITER_TGID
* Iterate over resources of every task of a process / task group.
*/
enum bpf_iter_task_type {
BPF_TASK_ITER_ALL = 0,
BPF_TASK_ITER_TID,
BPF_TASK_ITER_TGID,
};
struct bpf_iter_aux_info { struct bpf_iter_aux_info {
/* for map_elem iter */ /* for map_elem iter */
struct bpf_map *map; struct bpf_map *map;
...@@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info { ...@@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info {
struct cgroup *start; /* starting cgroup */ struct cgroup *start; /* starting cgroup */
enum bpf_cgroup_iter_order order; enum bpf_cgroup_iter_order order;
} cgroup; } cgroup;
struct {
enum bpf_iter_task_type type;
u32 pid;
} task;
}; };
typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog, typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
......
...@@ -110,6 +110,12 @@ union bpf_iter_link_info { ...@@ -110,6 +110,12 @@ union bpf_iter_link_info {
__u32 cgroup_fd; __u32 cgroup_fd;
__u64 cgroup_id; __u64 cgroup_id;
} cgroup; } cgroup;
/* Parameters of task iterators. */
struct {
__u32 tid;
__u32 pid;
__u32 pid_fd;
} task;
}; };
/* BPF syscall commands, see bpf(2) man-page for more details. */ /* BPF syscall commands, see bpf(2) man-page for more details. */
......
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
struct bpf_iter_seq_task_common { struct bpf_iter_seq_task_common {
struct pid_namespace *ns; struct pid_namespace *ns;
enum bpf_iter_task_type type;
u32 pid;
u32 pid_visiting;
}; };
struct bpf_iter_seq_task_info { struct bpf_iter_seq_task_info {
...@@ -22,18 +25,115 @@ struct bpf_iter_seq_task_info { ...@@ -22,18 +25,115 @@ struct bpf_iter_seq_task_info {
u32 tid; u32 tid;
}; };
static struct task_struct *task_seq_get_next(struct pid_namespace *ns, static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
u32 *tid,
bool skip_if_dup_files)
{
struct task_struct *task, *next_task;
struct pid *pid;
u32 saved_tid;
if (!*tid) {
/* The first time, the iterator calls this function. */
pid = find_pid_ns(common->pid, common->ns);
if (!pid)
return NULL;
task = get_pid_task(pid, PIDTYPE_TGID);
if (!task)
return NULL;
*tid = common->pid;
common->pid_visiting = common->pid;
return task;
}
/* If the control returns to user space and comes back to the
* kernel again, *tid and common->pid_visiting should be the
* same for task_seq_start() to pick up the correct task.
*/
if (*tid == common->pid_visiting) {
pid = find_pid_ns(common->pid_visiting, common->ns);
task = get_pid_task(pid, PIDTYPE_PID);
return task;
}
pid = find_pid_ns(common->pid_visiting, common->ns);
if (!pid)
return NULL;
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return NULL;
retry:
if (!pid_alive(task)) {
put_task_struct(task);
return NULL;
}
next_task = next_thread(task);
put_task_struct(task);
if (!next_task)
return NULL;
saved_tid = *tid;
*tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
if (!*tid || *tid == common->pid) {
/* Run out of tasks of a process. The tasks of a
* thread_group are linked as circular linked list.
*/
*tid = saved_tid;
return NULL;
}
get_task_struct(next_task);
common->pid_visiting = *tid;
if (skip_if_dup_files && task->files == task->group_leader->files) {
task = next_task;
goto retry;
}
return next_task;
}
static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
u32 *tid, u32 *tid,
bool skip_if_dup_files) bool skip_if_dup_files)
{ {
struct task_struct *task = NULL; struct task_struct *task = NULL;
struct pid *pid; struct pid *pid;
if (common->type == BPF_TASK_ITER_TID) {
if (*tid && *tid != common->pid)
return NULL;
rcu_read_lock();
pid = find_pid_ns(common->pid, common->ns);
if (pid) {
task = get_pid_task(pid, PIDTYPE_TGID);
*tid = common->pid;
}
rcu_read_unlock();
return task;
}
if (common->type == BPF_TASK_ITER_TGID) {
rcu_read_lock();
task = task_group_seq_get_next(common, tid, skip_if_dup_files);
rcu_read_unlock();
return task;
}
rcu_read_lock(); rcu_read_lock();
retry: retry:
pid = find_ge_pid(*tid, ns); pid = find_ge_pid(*tid, common->ns);
if (pid) { if (pid) {
*tid = pid_nr_ns(pid, ns); *tid = pid_nr_ns(pid, common->ns);
task = get_pid_task(pid, PIDTYPE_PID); task = get_pid_task(pid, PIDTYPE_PID);
if (!task) { if (!task) {
++*tid; ++*tid;
...@@ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos) ...@@ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
struct bpf_iter_seq_task_info *info = seq->private; struct bpf_iter_seq_task_info *info = seq->private;
struct task_struct *task; struct task_struct *task;
task = task_seq_get_next(info->common.ns, &info->tid, false); task = task_seq_get_next(&info->common, &info->tid, false);
if (!task) if (!task)
return NULL; return NULL;
...@@ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos) ...@@ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos; ++*pos;
++info->tid; ++info->tid;
put_task_struct((struct task_struct *)v); put_task_struct((struct task_struct *)v);
task = task_seq_get_next(info->common.ns, &info->tid, false); task = task_seq_get_next(&info->common, &info->tid, false);
if (!task) if (!task)
return NULL; return NULL;
...@@ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v) ...@@ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v)
put_task_struct((struct task_struct *)v); put_task_struct((struct task_struct *)v);
} }
static int bpf_iter_attach_task(struct bpf_prog *prog,
union bpf_iter_link_info *linfo,
struct bpf_iter_aux_info *aux)
{
unsigned int flags;
struct pid *pid;
pid_t tgid;
if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
return -EINVAL;
aux->task.type = BPF_TASK_ITER_ALL;
if (linfo->task.tid != 0) {
aux->task.type = BPF_TASK_ITER_TID;
aux->task.pid = linfo->task.tid;
}
if (linfo->task.pid != 0) {
aux->task.type = BPF_TASK_ITER_TGID;
aux->task.pid = linfo->task.pid;
}
if (linfo->task.pid_fd != 0) {
aux->task.type = BPF_TASK_ITER_TGID;
pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
if (IS_ERR(pid))
return PTR_ERR(pid);
tgid = pid_nr_ns(pid, task_active_pid_ns(current));
aux->task.pid = tgid;
put_pid(pid);
}
return 0;
}
static const struct seq_operations task_seq_ops = { static const struct seq_operations task_seq_ops = {
.start = task_seq_start, .start = task_seq_start,
.next = task_seq_next, .next = task_seq_next,
...@@ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info { ...@@ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info {
static struct file * static struct file *
task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
{ {
struct pid_namespace *ns = info->common.ns; u32 saved_tid = info->tid;
u32 curr_tid = info->tid;
struct task_struct *curr_task; struct task_struct *curr_task;
unsigned int curr_fd = info->fd; unsigned int curr_fd = info->fd;
...@@ -151,21 +285,18 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) ...@@ -151,21 +285,18 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
curr_task = info->task; curr_task = info->task;
curr_fd = info->fd; curr_fd = info->fd;
} else { } else {
curr_task = task_seq_get_next(ns, &curr_tid, true); curr_task = task_seq_get_next(&info->common, &info->tid, true);
if (!curr_task) { if (!curr_task) {
info->task = NULL; info->task = NULL;
info->tid = curr_tid;
return NULL; return NULL;
} }
/* set info->task and info->tid */ /* set info->task */
info->task = curr_task; info->task = curr_task;
if (curr_tid == info->tid) { if (saved_tid == info->tid)
curr_fd = info->fd; curr_fd = info->fd;
} else { else
info->tid = curr_tid;
curr_fd = 0; curr_fd = 0;
}
} }
rcu_read_lock(); rcu_read_lock();
...@@ -186,9 +317,15 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info) ...@@ -186,9 +317,15 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
/* the current task is done, go to the next task */ /* the current task is done, go to the next task */
rcu_read_unlock(); rcu_read_unlock();
put_task_struct(curr_task); put_task_struct(curr_task);
if (info->common.type == BPF_TASK_ITER_TID) {
info->task = NULL;
return NULL;
}
info->task = NULL; info->task = NULL;
info->fd = 0; info->fd = 0;
curr_tid = ++(info->tid); saved_tid = ++(info->tid);
goto again; goto again;
} }
...@@ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) ...@@ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
struct bpf_iter_seq_task_common *common = priv_data; struct bpf_iter_seq_task_common *common = priv_data;
common->ns = get_pid_ns(task_active_pid_ns(current)); common->ns = get_pid_ns(task_active_pid_ns(current));
common->type = aux->task.type;
common->pid = aux->task.pid;
return 0; return 0;
} }
...@@ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op { ...@@ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op {
static struct vm_area_struct * static struct vm_area_struct *
task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
{ {
struct pid_namespace *ns = info->common.ns;
enum bpf_task_vma_iter_find_op op; enum bpf_task_vma_iter_find_op op;
struct vm_area_struct *curr_vma; struct vm_area_struct *curr_vma;
struct task_struct *curr_task; struct task_struct *curr_task;
u32 curr_tid = info->tid; u32 saved_tid = info->tid;
/* If this function returns a non-NULL vma, it holds a reference to /* If this function returns a non-NULL vma, it holds a reference to
* the task_struct, and holds read lock on vma->mm->mmap_lock. * the task_struct, and holds read lock on vma->mm->mmap_lock.
...@@ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) ...@@ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
} }
} else { } else {
again: again:
curr_task = task_seq_get_next(ns, &curr_tid, true); curr_task = task_seq_get_next(&info->common, &info->tid, true);
if (!curr_task) { if (!curr_task) {
info->tid = curr_tid + 1; info->tid++;
goto finish; goto finish;
} }
if (curr_tid != info->tid) { if (saved_tid != info->tid) {
info->tid = curr_tid;
/* new task, process the first vma */ /* new task, process the first vma */
op = task_vma_iter_first_vma; op = task_vma_iter_first_vma;
} else { } else {
...@@ -430,9 +568,12 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info) ...@@ -430,9 +568,12 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
return curr_vma; return curr_vma;
next_task: next_task:
if (info->common.type == BPF_TASK_ITER_TID)
goto finish;
put_task_struct(curr_task); put_task_struct(curr_task);
info->task = NULL; info->task = NULL;
curr_tid++; info->tid++;
goto again; goto again;
finish: finish:
...@@ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = { ...@@ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = {
static struct bpf_iter_reg task_reg_info = { static struct bpf_iter_reg task_reg_info = {
.target = "task", .target = "task",
.attach_target = bpf_iter_attach_task,
.feature = BPF_ITER_RESCHED, .feature = BPF_ITER_RESCHED,
.ctx_arg_info_size = 1, .ctx_arg_info_size = 1,
.ctx_arg_info = { .ctx_arg_info = {
...@@ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = { ...@@ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = {
static struct bpf_iter_reg task_file_reg_info = { static struct bpf_iter_reg task_file_reg_info = {
.target = "task_file", .target = "task_file",
.attach_target = bpf_iter_attach_task,
.feature = BPF_ITER_RESCHED, .feature = BPF_ITER_RESCHED,
.ctx_arg_info_size = 2, .ctx_arg_info_size = 2,
.ctx_arg_info = { .ctx_arg_info = {
...@@ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = { ...@@ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = {
static struct bpf_iter_reg task_vma_reg_info = { static struct bpf_iter_reg task_vma_reg_info = {
.target = "task_vma", .target = "task_vma",
.attach_target = bpf_iter_attach_task,
.feature = BPF_ITER_RESCHED, .feature = BPF_ITER_RESCHED,
.ctx_arg_info_size = 2, .ctx_arg_info_size = 2,
.ctx_arg_info = { .ctx_arg_info = {
......
...@@ -110,6 +110,12 @@ union bpf_iter_link_info { ...@@ -110,6 +110,12 @@ union bpf_iter_link_info {
__u32 cgroup_fd; __u32 cgroup_fd;
__u64 cgroup_id; __u64 cgroup_id;
} cgroup; } cgroup;
/* Parameters of task iterators. */
struct {
__u32 tid;
__u32 pid;
__u32 pid_fd;
} task;
}; };
/* BPF syscall commands, see bpf(2) man-page for more details. */ /* BPF syscall commands, see bpf(2) man-page for more details. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment