Commit a3d4fd7a authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'uprobes/core' of...

Merge branch 'uprobes/core' of git://git.kernel.org/pub/scm/linux/kernel/git/oleg/misc into perf/core

Improve uprobes performance by adding 'pre-filtering' support,
by Oleg Nesterov:

	# time perl -e 'syscall -1 for 1..100_000'
	real    0m0.040s
	user    0m0.027s
	sys     0m0.010s

	# perf probe -x /lib/libc.so.6 syscall
	# perf record -e probe_libc:syscall sleep 100 &

Before this series:

	# time perl -e 'syscall -1 for 1..100_000'
	real    0m1.714s
	user    0m0.103s
	sys     0m1.607s

After:

	# time perl -e 'syscall -1 for 1..100_000'
	real    0m0.037s
	user    0m0.013s
	sys     0m0.023s
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 661e5915 b2fe8ba6
...@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) ...@@ -680,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->insn[i] == 0x66) if (auprobe->insn[i] == 0x66)
continue; continue;
if (auprobe->insn[i] == 0x90) if (auprobe->insn[i] == 0x90) {
regs->ip += i + 1;
return true; return true;
}
break; break;
} }
......
...@@ -135,16 +135,21 @@ struct hw_perf_event { ...@@ -135,16 +135,21 @@ struct hw_perf_event {
struct { /* software */ struct { /* software */
struct hrtimer hrtimer; struct hrtimer hrtimer;
}; };
struct { /* tracepoint */
struct task_struct *tp_target;
/* for tp_event->class */
struct list_head tp_list;
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT #ifdef CONFIG_HAVE_HW_BREAKPOINT
struct { /* breakpoint */ struct { /* breakpoint */
struct arch_hw_breakpoint info;
struct list_head bp_list;
/* /*
* Crufty hack to avoid the chicken and egg * Crufty hack to avoid the chicken and egg
* problem hw_breakpoint has with context * problem hw_breakpoint has with context
* creation and event initalization. * creation and event initalization.
*/ */
struct task_struct *bp_target; struct task_struct *bp_target;
struct arch_hw_breakpoint info;
struct list_head bp_list;
}; };
#endif #endif
}; };
......
...@@ -35,13 +35,20 @@ struct inode; ...@@ -35,13 +35,20 @@ struct inode;
# include <asm/uprobes.h> # include <asm/uprobes.h>
#endif #endif
#define UPROBE_HANDLER_REMOVE 1
#define UPROBE_HANDLER_MASK 1
enum uprobe_filter_ctx {
UPROBE_FILTER_REGISTER,
UPROBE_FILTER_UNREGISTER,
UPROBE_FILTER_MMAP,
};
struct uprobe_consumer { struct uprobe_consumer {
int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs); int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
/* bool (*filter)(struct uprobe_consumer *self,
* filter is optional; If a filter exists, handler is run enum uprobe_filter_ctx ctx,
* if and only if filter returns true. struct mm_struct *mm);
*/
bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
struct uprobe_consumer *next; struct uprobe_consumer *next;
}; };
...@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign ...@@ -94,6 +101,7 @@ extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsign
extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr); extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool);
extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
extern int uprobe_mmap(struct vm_area_struct *vma); extern int uprobe_mmap(struct vm_area_struct *vma);
extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end);
...@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) ...@@ -117,6 +125,11 @@ uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{ {
return -ENOSYS; return -ENOSYS;
} }
static inline int
uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool add)
{
return -ENOSYS;
}
static inline void static inline void
uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
{ {
......
...@@ -6162,11 +6162,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, ...@@ -6162,11 +6162,14 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (task) { if (task) {
event->attach_state = PERF_ATTACH_TASK; event->attach_state = PERF_ATTACH_TASK;
if (attr->type == PERF_TYPE_TRACEPOINT)
event->hw.tp_target = task;
#ifdef CONFIG_HAVE_HW_BREAKPOINT #ifdef CONFIG_HAVE_HW_BREAKPOINT
/* /*
* hw_breakpoint is a bit difficult here.. * hw_breakpoint is a bit difficult here..
*/ */
if (attr->type == PERF_TYPE_BREAKPOINT) else if (attr->type == PERF_TYPE_BREAKPOINT)
event->hw.bp_target = task; event->hw.bp_target = task;
#endif #endif
} }
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/pagemap.h> /* read_mapping_page */ #include <linux/pagemap.h> /* read_mapping_page */
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/export.h>
#include <linux/rmap.h> /* anon_vma_prepare */ #include <linux/rmap.h> /* anon_vma_prepare */
#include <linux/mmu_notifier.h> /* set_pte_at_notify */ #include <linux/mmu_notifier.h> /* set_pte_at_notify */
#include <linux/swap.h> /* try_to_free_swap */ #include <linux/swap.h> /* try_to_free_swap */
...@@ -41,58 +42,31 @@ ...@@ -41,58 +42,31 @@
#define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE
static struct rb_root uprobes_tree = RB_ROOT; static struct rb_root uprobes_tree = RB_ROOT;
static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
#define UPROBES_HASH_SZ 13
/* /*
* We need separate register/unregister and mmap/munmap lock hashes because * allows us to skip the uprobe_mmap if there are no uprobe events active
* of mmap_sem nesting. * at this time. Probably a fine grained per inode count is better?
*
* uprobe_register() needs to install probes on (potentially) all processes
* and thus needs to acquire multiple mmap_sems (consequtively, not
* concurrently), whereas uprobe_mmap() is called while holding mmap_sem
* for the particular process doing the mmap.
*
* uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem
* because of lock order against i_mmap_mutex. This means there's a hole in
* the register vma iteration where a mmap() can happen.
*
* Thus uprobe_register() can race with uprobe_mmap() and we can try and
* install a probe where one is already installed.
*/ */
#define no_uprobe_events() RB_EMPTY_ROOT(&uprobes_tree)
/* serialize (un)register */ static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
#define UPROBES_HASH_SZ 13
/* serialize uprobe->pending_list */ /* serialize uprobe->pending_list */
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ]) #define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
static struct percpu_rw_semaphore dup_mmap_sem; static struct percpu_rw_semaphore dup_mmap_sem;
/*
* uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
* events active at this time. Probably a fine grained per inode count is
* better?
*/
static atomic_t uprobe_events = ATOMIC_INIT(0);
/* Have a copy of original instruction */ /* Have a copy of original instruction */
#define UPROBE_COPY_INSN 0 #define UPROBE_COPY_INSN 0
/* Dont run handlers when first register/ last unregister in progress*/
#define UPROBE_RUN_HANDLER 1
/* Can skip singlestep */ /* Can skip singlestep */
#define UPROBE_SKIP_SSTEP 2 #define UPROBE_SKIP_SSTEP 1
struct uprobe { struct uprobe {
struct rb_node rb_node; /* node in the rb tree */ struct rb_node rb_node; /* node in the rb tree */
atomic_t ref; atomic_t ref;
struct rw_semaphore register_rwsem;
struct rw_semaphore consumer_rwsem; struct rw_semaphore consumer_rwsem;
struct mutex copy_mutex; /* TODO: kill me and UPROBE_COPY_INSN */
struct list_head pending_list; struct list_head pending_list;
struct uprobe_consumer *consumers; struct uprobe_consumer *consumers;
struct inode *inode; /* Also hold a ref to inode */ struct inode *inode; /* Also hold a ref to inode */
...@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe) ...@@ -430,9 +404,6 @@ static struct uprobe *insert_uprobe(struct uprobe *uprobe)
u = __insert_uprobe(uprobe); u = __insert_uprobe(uprobe);
spin_unlock(&uprobes_treelock); spin_unlock(&uprobes_treelock);
/* For now assume that the instruction need not be single-stepped */
__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
return u; return u;
} }
...@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) ...@@ -452,8 +423,10 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
uprobe->inode = igrab(inode); uprobe->inode = igrab(inode);
uprobe->offset = offset; uprobe->offset = offset;
init_rwsem(&uprobe->register_rwsem);
init_rwsem(&uprobe->consumer_rwsem); init_rwsem(&uprobe->consumer_rwsem);
mutex_init(&uprobe->copy_mutex); /* For now assume that the instruction need not be single-stepped */
__set_bit(UPROBE_SKIP_SSTEP, &uprobe->flags);
/* add to uprobes_tree, sorted on inode:offset */ /* add to uprobes_tree, sorted on inode:offset */
cur_uprobe = insert_uprobe(uprobe); cur_uprobe = insert_uprobe(uprobe);
...@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) ...@@ -463,38 +436,17 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
kfree(uprobe); kfree(uprobe);
uprobe = cur_uprobe; uprobe = cur_uprobe;
iput(inode); iput(inode);
} else {
atomic_inc(&uprobe_events);
} }
return uprobe; return uprobe;
} }
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{
struct uprobe_consumer *uc;
if (!test_bit(UPROBE_RUN_HANDLER, &uprobe->flags))
return;
down_read(&uprobe->consumer_rwsem);
for (uc = uprobe->consumers; uc; uc = uc->next) {
if (!uc->filter || uc->filter(uc, current))
uc->handler(uc, regs);
}
up_read(&uprobe->consumer_rwsem);
}
/* Returns the previous consumer */
static struct uprobe_consumer *
consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
{ {
down_write(&uprobe->consumer_rwsem); down_write(&uprobe->consumer_rwsem);
uc->next = uprobe->consumers; uc->next = uprobe->consumers;
uprobe->consumers = uc; uprobe->consumers = uc;
up_write(&uprobe->consumer_rwsem); up_write(&uprobe->consumer_rwsem);
return uc->next;
} }
/* /*
...@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, ...@@ -588,7 +540,8 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
return ret; return ret;
mutex_lock(&uprobe->copy_mutex); /* TODO: move this into _register, until then we abuse this sem. */
down_write(&uprobe->consumer_rwsem);
if (test_bit(UPROBE_COPY_INSN, &uprobe->flags)) if (test_bit(UPROBE_COPY_INSN, &uprobe->flags))
goto out; goto out;
...@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file, ...@@ -612,7 +565,30 @@ static int prepare_uprobe(struct uprobe *uprobe, struct file *file,
set_bit(UPROBE_COPY_INSN, &uprobe->flags); set_bit(UPROBE_COPY_INSN, &uprobe->flags);
out: out:
mutex_unlock(&uprobe->copy_mutex); up_write(&uprobe->consumer_rwsem);
return ret;
}
static inline bool consumer_filter(struct uprobe_consumer *uc,
enum uprobe_filter_ctx ctx, struct mm_struct *mm)
{
return !uc->filter || uc->filter(uc, ctx, mm);
}
static bool filter_chain(struct uprobe *uprobe,
enum uprobe_filter_ctx ctx, struct mm_struct *mm)
{
struct uprobe_consumer *uc;
bool ret = false;
down_read(&uprobe->consumer_rwsem);
for (uc = uprobe->consumers; uc; uc = uc->next) {
ret = consumer_filter(uc, ctx, mm);
if (ret)
break;
}
up_read(&uprobe->consumer_rwsem);
return ret; return ret;
} }
...@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, ...@@ -624,16 +600,6 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
bool first_uprobe; bool first_uprobe;
int ret; int ret;
/*
* If probe is being deleted, unregister thread could be done with
* the vma-rmap-walk through. Adding a probe now can be fatal since
* nobody will be able to cleanup. Also we could be from fork or
* mremap path, where the probe might have already been inserted.
* Hence behave as if probe already existed.
*/
if (!uprobe->consumers)
return 0;
ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr); ret = prepare_uprobe(uprobe, vma->vm_file, mm, vaddr);
if (ret) if (ret)
return ret; return ret;
...@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, ...@@ -658,14 +624,14 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
static int static int
remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr)
{ {
/* can happen if uprobe_register() fails */
if (!test_bit(MMF_HAS_UPROBES, &mm->flags))
return 0;
set_bit(MMF_RECALC_UPROBES, &mm->flags); set_bit(MMF_RECALC_UPROBES, &mm->flags);
return set_orig_insn(&uprobe->arch, mm, vaddr); return set_orig_insn(&uprobe->arch, mm, vaddr);
} }
static inline bool uprobe_is_active(struct uprobe *uprobe)
{
return !RB_EMPTY_NODE(&uprobe->rb_node);
}
/* /*
* There could be threads that have already hit the breakpoint. They * There could be threads that have already hit the breakpoint. They
* will recheck the current insn and restart if find_uprobe() fails. * will recheck the current insn and restart if find_uprobe() fails.
...@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad ...@@ -673,12 +639,15 @@ remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vad
*/ */
static void delete_uprobe(struct uprobe *uprobe) static void delete_uprobe(struct uprobe *uprobe)
{ {
if (WARN_ON(!uprobe_is_active(uprobe)))
return;
spin_lock(&uprobes_treelock); spin_lock(&uprobes_treelock);
rb_erase(&uprobe->rb_node, &uprobes_tree); rb_erase(&uprobe->rb_node, &uprobes_tree);
spin_unlock(&uprobes_treelock); spin_unlock(&uprobes_treelock);
RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */
iput(uprobe->inode); iput(uprobe->inode);
put_uprobe(uprobe); put_uprobe(uprobe);
atomic_dec(&uprobe_events);
} }
struct map_info { struct map_info {
...@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) ...@@ -764,8 +733,10 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
return curr; return curr;
} }
static int register_for_each_vma(struct uprobe *uprobe, bool is_register) static int
register_for_each_vma(struct uprobe *uprobe, struct uprobe_consumer *new)
{ {
bool is_register = !!new;
struct map_info *info; struct map_info *info;
int err = 0; int err = 0;
...@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) ...@@ -794,10 +765,16 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
vaddr_to_offset(vma, info->vaddr) != uprobe->offset) vaddr_to_offset(vma, info->vaddr) != uprobe->offset)
goto unlock; goto unlock;
if (is_register) if (is_register) {
err = install_breakpoint(uprobe, mm, vma, info->vaddr); /* consult only the "caller", new consumer. */
else if (consumer_filter(new,
err |= remove_breakpoint(uprobe, mm, info->vaddr); UPROBE_FILTER_REGISTER, mm))
err = install_breakpoint(uprobe, mm, vma, info->vaddr);
} else if (test_bit(MMF_HAS_UPROBES, &mm->flags)) {
if (!filter_chain(uprobe,
UPROBE_FILTER_UNREGISTER, mm))
err |= remove_breakpoint(uprobe, mm, info->vaddr);
}
unlock: unlock:
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
...@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) ...@@ -810,17 +787,23 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
return err; return err;
} }
static int __uprobe_register(struct uprobe *uprobe) static int __uprobe_register(struct uprobe *uprobe, struct uprobe_consumer *uc)
{ {
return register_for_each_vma(uprobe, true); consumer_add(uprobe, uc);
return register_for_each_vma(uprobe, uc);
} }
static void __uprobe_unregister(struct uprobe *uprobe) static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *uc)
{ {
if (!register_for_each_vma(uprobe, false)) int err;
delete_uprobe(uprobe);
if (!consumer_del(uprobe, uc)) /* WARN? */
return;
err = register_for_each_vma(uprobe, NULL);
/* TODO : cant unregister? schedule a worker thread */ /* TODO : cant unregister? schedule a worker thread */
if (!uprobe->consumers && !err)
delete_uprobe(uprobe);
} }
/* /*
...@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * ...@@ -845,31 +828,59 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *
struct uprobe *uprobe; struct uprobe *uprobe;
int ret; int ret;
if (!inode || !uc || uc->next) /* Racy, just to catch the obvious mistakes */
return -EINVAL;
if (offset > i_size_read(inode)) if (offset > i_size_read(inode))
return -EINVAL; return -EINVAL;
ret = 0; retry:
mutex_lock(uprobes_hash(inode));
uprobe = alloc_uprobe(inode, offset); uprobe = alloc_uprobe(inode, offset);
if (!uprobe)
if (!uprobe) { return -ENOMEM;
ret = -ENOMEM; /*
} else if (!consumer_add(uprobe, uc)) { * We can race with uprobe_unregister()->delete_uprobe().
ret = __uprobe_register(uprobe); * Check uprobe_is_active() and retry if it is false.
if (ret) { */
uprobe->consumers = NULL; down_write(&uprobe->register_rwsem);
__uprobe_unregister(uprobe); ret = -EAGAIN;
} else { if (likely(uprobe_is_active(uprobe))) {
set_bit(UPROBE_RUN_HANDLER, &uprobe->flags); ret = __uprobe_register(uprobe, uc);
} if (ret)
__uprobe_unregister(uprobe, uc);
} }
up_write(&uprobe->register_rwsem);
put_uprobe(uprobe);
mutex_unlock(uprobes_hash(inode)); if (unlikely(ret == -EAGAIN))
if (uprobe) goto retry;
put_uprobe(uprobe); return ret;
}
EXPORT_SYMBOL_GPL(uprobe_register);
/*
* uprobe_apply - unregister a already registered probe.
* @inode: the file in which the probe has to be removed.
* @offset: offset from the start of the file.
* @uc: consumer which wants to add more or remove some breakpoints
* @add: add or remove the breakpoints
*/
int uprobe_apply(struct inode *inode, loff_t offset,
struct uprobe_consumer *uc, bool add)
{
struct uprobe *uprobe;
struct uprobe_consumer *con;
int ret = -ENOENT;
uprobe = find_uprobe(inode, offset);
if (!uprobe)
return ret;
down_write(&uprobe->register_rwsem);
for (con = uprobe->consumers; con && con != uc ; con = con->next)
;
if (con)
ret = register_for_each_vma(uprobe, add ? uc : NULL);
up_write(&uprobe->register_rwsem);
put_uprobe(uprobe);
return ret; return ret;
} }
...@@ -884,24 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume ...@@ -884,24 +895,42 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume
{ {
struct uprobe *uprobe; struct uprobe *uprobe;
if (!inode || !uc)
return;
uprobe = find_uprobe(inode, offset); uprobe = find_uprobe(inode, offset);
if (!uprobe) if (!uprobe)
return; return;
mutex_lock(uprobes_hash(inode)); down_write(&uprobe->register_rwsem);
__uprobe_unregister(uprobe, uc);
up_write(&uprobe->register_rwsem);
put_uprobe(uprobe);
}
EXPORT_SYMBOL_GPL(uprobe_unregister);
if (consumer_del(uprobe, uc)) { static int unapply_uprobe(struct uprobe *uprobe, struct mm_struct *mm)
if (!uprobe->consumers) { {
__uprobe_unregister(uprobe); struct vm_area_struct *vma;
clear_bit(UPROBE_RUN_HANDLER, &uprobe->flags); int err = 0;
}
down_read(&mm->mmap_sem);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
unsigned long vaddr;
loff_t offset;
if (!valid_vma(vma, false) ||
vma->vm_file->f_mapping->host != uprobe->inode)
continue;
offset = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
if (uprobe->offset < offset ||
uprobe->offset >= offset + vma->vm_end - vma->vm_start)
continue;
vaddr = offset_to_vaddr(vma, uprobe->offset);
err |= remove_breakpoint(uprobe, mm, vaddr);
} }
up_read(&mm->mmap_sem);
mutex_unlock(uprobes_hash(inode)); return err;
put_uprobe(uprobe);
} }
static struct rb_node * static struct rb_node *
...@@ -978,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma) ...@@ -978,7 +1007,7 @@ int uprobe_mmap(struct vm_area_struct *vma)
struct uprobe *uprobe, *u; struct uprobe *uprobe, *u;
struct inode *inode; struct inode *inode;
if (!atomic_read(&uprobe_events) || !valid_vma(vma, true)) if (no_uprobe_events() || !valid_vma(vma, true))
return 0; return 0;
inode = vma->vm_file->f_mapping->host; inode = vma->vm_file->f_mapping->host;
...@@ -987,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma) ...@@ -987,9 +1016,14 @@ int uprobe_mmap(struct vm_area_struct *vma)
mutex_lock(uprobes_mmap_hash(inode)); mutex_lock(uprobes_mmap_hash(inode));
build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list);
/*
* We can race with uprobe_unregister(), this uprobe can be already
* removed. But in this case filter_chain() must return false, all
* consumers have gone away.
*/
list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
if (!fatal_signal_pending(current)) { if (!fatal_signal_pending(current) &&
filter_chain(uprobe, UPROBE_FILTER_MMAP, vma->vm_mm)) {
unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset);
install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
} }
...@@ -1024,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e ...@@ -1024,7 +1058,7 @@ vma_has_uprobes(struct vm_area_struct *vma, unsigned long start, unsigned long e
*/ */
void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end)
{ {
if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) if (no_uprobe_events() || !valid_vma(vma, false))
return; return;
if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */
...@@ -1041,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon ...@@ -1041,22 +1075,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
/* Slot allocation for XOL */ /* Slot allocation for XOL */
static int xol_add_vma(struct xol_area *area) static int xol_add_vma(struct xol_area *area)
{ {
struct mm_struct *mm; struct mm_struct *mm = current->mm;
int ret; int ret = -EALREADY;
area->page = alloc_page(GFP_HIGHUSER);
if (!area->page)
return -ENOMEM;
ret = -EALREADY;
mm = current->mm;
down_write(&mm->mmap_sem); down_write(&mm->mmap_sem);
if (mm->uprobes_state.xol_area) if (mm->uprobes_state.xol_area)
goto fail; goto fail;
ret = -ENOMEM; ret = -ENOMEM;
/* Try to map as high as possible, this is only a hint. */ /* Try to map as high as possible, this is only a hint. */
area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0);
if (area->vaddr & ~PAGE_MASK) { if (area->vaddr & ~PAGE_MASK) {
...@@ -1072,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area) ...@@ -1072,54 +1098,53 @@ static int xol_add_vma(struct xol_area *area)
smp_wmb(); /* pairs with get_xol_area() */ smp_wmb(); /* pairs with get_xol_area() */
mm->uprobes_state.xol_area = area; mm->uprobes_state.xol_area = area;
ret = 0; ret = 0;
fail:
fail:
up_write(&mm->mmap_sem); up_write(&mm->mmap_sem);
if (ret)
__free_page(area->page);
return ret; return ret;
} }
static struct xol_area *get_xol_area(struct mm_struct *mm)
{
struct xol_area *area;
area = mm->uprobes_state.xol_area;
smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
return area;
}
/* /*
* xol_alloc_area - Allocate process's xol_area. * get_xol_area - Allocate process's xol_area if necessary.
* This area will be used for storing instructions for execution out of * This area will be used for storing instructions for execution out of line.
* line.
* *
* Returns the allocated area or NULL. * Returns the allocated area or NULL.
*/ */
static struct xol_area *xol_alloc_area(void) static struct xol_area *get_xol_area(void)
{ {
struct mm_struct *mm = current->mm;
struct xol_area *area; struct xol_area *area;
area = mm->uprobes_state.xol_area;
if (area)
goto ret;
area = kzalloc(sizeof(*area), GFP_KERNEL); area = kzalloc(sizeof(*area), GFP_KERNEL);
if (unlikely(!area)) if (unlikely(!area))
return NULL; goto out;
area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL); area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long), GFP_KERNEL);
if (!area->bitmap) if (!area->bitmap)
goto fail; goto free_area;
area->page = alloc_page(GFP_HIGHUSER);
if (!area->page)
goto free_bitmap;
init_waitqueue_head(&area->wq); init_waitqueue_head(&area->wq);
if (!xol_add_vma(area)) if (!xol_add_vma(area))
return area; return area;
fail: __free_page(area->page);
free_bitmap:
kfree(area->bitmap); kfree(area->bitmap);
free_area:
kfree(area); kfree(area);
out:
return get_xol_area(current->mm); area = mm->uprobes_state.xol_area;
ret:
smp_read_barrier_depends(); /* pairs with wmb in xol_add_vma() */
return area;
} }
/* /*
...@@ -1185,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area) ...@@ -1185,33 +1210,26 @@ static unsigned long xol_take_insn_slot(struct xol_area *area)
} }
/* /*
* xol_get_insn_slot - If was not allocated a slot, then * xol_get_insn_slot - allocate a slot for xol.
* allocate a slot.
* Returns the allocated slot address or 0. * Returns the allocated slot address or 0.
*/ */
static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot_addr) static unsigned long xol_get_insn_slot(struct uprobe *uprobe)
{ {
struct xol_area *area; struct xol_area *area;
unsigned long offset; unsigned long offset;
unsigned long xol_vaddr;
void *vaddr; void *vaddr;
area = get_xol_area(current->mm); area = get_xol_area();
if (!area) { if (!area)
area = xol_alloc_area(); return 0;
if (!area)
return 0;
}
current->utask->xol_vaddr = xol_take_insn_slot(area);
/* xol_vaddr = xol_take_insn_slot(area);
* Initialize the slot if xol_vaddr points to valid if (unlikely(!xol_vaddr))
* instruction slot.
*/
if (unlikely(!current->utask->xol_vaddr))
return 0; return 0;
current->utask->vaddr = slot_addr; /* Initialize the slot */
offset = current->utask->xol_vaddr & ~PAGE_MASK; offset = xol_vaddr & ~PAGE_MASK;
vaddr = kmap_atomic(area->page); vaddr = kmap_atomic(area->page);
memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES); memcpy(vaddr + offset, uprobe->arch.insn, MAX_UINSN_BYTES);
kunmap_atomic(vaddr); kunmap_atomic(vaddr);
...@@ -1221,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot ...@@ -1221,7 +1239,7 @@ static unsigned long xol_get_insn_slot(struct uprobe *uprobe, unsigned long slot
*/ */
flush_dcache_page(area->page); flush_dcache_page(area->page);
return current->utask->xol_vaddr; return xol_vaddr;
} }
/* /*
...@@ -1239,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk) ...@@ -1239,8 +1257,7 @@ static void xol_free_insn_slot(struct task_struct *tsk)
return; return;
slot_addr = tsk->utask->xol_vaddr; slot_addr = tsk->utask->xol_vaddr;
if (unlikely(!slot_addr))
if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
return; return;
area = tsk->mm->uprobes_state.xol_area; area = tsk->mm->uprobes_state.xol_area;
...@@ -1302,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t) ...@@ -1302,33 +1319,48 @@ void uprobe_copy_process(struct task_struct *t)
} }
/* /*
* Allocate a uprobe_task object for the task. * Allocate a uprobe_task object for the task if if necessary.
* Called when the thread hits a breakpoint for the first time. * Called when the thread hits a breakpoint.
* *
* Returns: * Returns:
* - pointer to new uprobe_task on success * - pointer to new uprobe_task on success
* - NULL otherwise * - NULL otherwise
*/ */
static struct uprobe_task *add_utask(void) static struct uprobe_task *get_utask(void)
{ {
struct uprobe_task *utask; if (!current->utask)
current->utask = kzalloc(sizeof(struct uprobe_task), GFP_KERNEL);
utask = kzalloc(sizeof *utask, GFP_KERNEL); return current->utask;
if (unlikely(!utask))
return NULL;
current->utask = utask;
return utask;
} }
/* Prepare to single-step probed instruction out of line. */ /* Prepare to single-step probed instruction out of line. */
static int static int
pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long vaddr) pre_ssout(struct uprobe *uprobe, struct pt_regs *regs, unsigned long bp_vaddr)
{ {
if (xol_get_insn_slot(uprobe, vaddr) && !arch_uprobe_pre_xol(&uprobe->arch, regs)) struct uprobe_task *utask;
return 0; unsigned long xol_vaddr;
int err;
utask = get_utask();
if (!utask)
return -ENOMEM;
xol_vaddr = xol_get_insn_slot(uprobe);
if (!xol_vaddr)
return -ENOMEM;
utask->xol_vaddr = xol_vaddr;
utask->vaddr = bp_vaddr;
err = arch_uprobe_pre_xol(&uprobe->arch, regs);
if (unlikely(err)) {
xol_free_insn_slot(current);
return err;
}
return -EFAULT; utask->active_uprobe = uprobe;
utask->state = UTASK_SSTEP;
return 0;
} }
/* /*
...@@ -1390,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm) ...@@ -1390,6 +1422,7 @@ static void mmf_recalc_uprobes(struct mm_struct *mm)
* This is not strictly accurate, we can race with * This is not strictly accurate, we can race with
* uprobe_unregister() and see the already removed * uprobe_unregister() and see the already removed
* uprobe if delete_uprobe() was not yet called. * uprobe if delete_uprobe() was not yet called.
* Or this uprobe can be filtered out.
*/ */
if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end)) if (vma_has_uprobes(vma, vma->vm_start, vma->vm_end))
return; return;
...@@ -1451,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) ...@@ -1451,13 +1484,33 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp)
return uprobe; return uprobe;
} }
static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
{
struct uprobe_consumer *uc;
int remove = UPROBE_HANDLER_REMOVE;
down_read(&uprobe->register_rwsem);
for (uc = uprobe->consumers; uc; uc = uc->next) {
int rc = uc->handler(uc, regs);
WARN(rc & ~UPROBE_HANDLER_MASK,
"bad rc=0x%x from %pf()\n", rc, uc->handler);
remove &= rc;
}
if (remove && uprobe->consumers) {
WARN_ON(!uprobe_is_active(uprobe));
unapply_uprobe(uprobe, current->mm);
}
up_read(&uprobe->register_rwsem);
}
/* /*
* Run handler and ask thread to singlestep. * Run handler and ask thread to singlestep.
* Ensure all non-fatal signals cannot interrupt thread while it singlesteps. * Ensure all non-fatal signals cannot interrupt thread while it singlesteps.
*/ */
static void handle_swbp(struct pt_regs *regs) static void handle_swbp(struct pt_regs *regs)
{ {
struct uprobe_task *utask;
struct uprobe *uprobe; struct uprobe *uprobe;
unsigned long bp_vaddr; unsigned long bp_vaddr;
int uninitialized_var(is_swbp); int uninitialized_var(is_swbp);
...@@ -1482,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs) ...@@ -1482,6 +1535,10 @@ static void handle_swbp(struct pt_regs *regs)
} }
return; return;
} }
/* change it in advance for ->handler() and restart */
instruction_pointer_set(regs, bp_vaddr);
/* /*
* TODO: move copy_insn/etc into _register and remove this hack. * TODO: move copy_insn/etc into _register and remove this hack.
* After we hit the bp, _unregister + _register can install the * After we hit the bp, _unregister + _register can install the
...@@ -1489,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs) ...@@ -1489,32 +1546,16 @@ static void handle_swbp(struct pt_regs *regs)
*/ */
smp_rmb(); /* pairs with wmb() in install_breakpoint() */ smp_rmb(); /* pairs with wmb() in install_breakpoint() */
if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags))) if (unlikely(!test_bit(UPROBE_COPY_INSN, &uprobe->flags)))
goto restart; goto out;
utask = current->utask;
if (!utask) {
utask = add_utask();
/* Cannot allocate; re-execute the instruction. */
if (!utask)
goto restart;
}
handler_chain(uprobe, regs); handler_chain(uprobe, regs);
if (can_skip_sstep(uprobe, regs)) if (can_skip_sstep(uprobe, regs))
goto out; goto out;
if (!pre_ssout(uprobe, regs, bp_vaddr)) { if (!pre_ssout(uprobe, regs, bp_vaddr))
utask->active_uprobe = uprobe;
utask->state = UTASK_SSTEP;
return; return;
}
restart: /* can_skip_sstep() succeeded, or restart if can't singlestep */
/*
* cannot singlestep; cannot skip instruction;
* re-execute the instruction.
*/
instruction_pointer_set(regs, bp_vaddr);
out: out:
put_uprobe(uprobe); put_uprobe(uprobe);
} }
...@@ -1608,10 +1649,8 @@ static int __init init_uprobes(void) ...@@ -1608,10 +1649,8 @@ static int __init init_uprobes(void)
{ {
int i; int i;
for (i = 0; i < UPROBES_HASH_SZ; i++) { for (i = 0; i < UPROBES_HASH_SZ; i++)
mutex_init(&uprobes_mutex[i]);
mutex_init(&uprobes_mmap_mutex[i]); mutex_init(&uprobes_mmap_mutex[i]);
}
if (percpu_init_rwsem(&dup_mmap_sem)) if (percpu_init_rwsem(&dup_mmap_sem))
return -ENOMEM; return -ENOMEM;
......
...@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type, ...@@ -712,6 +712,12 @@ static int ptrace_regset(struct task_struct *task, int req, unsigned int type,
kiov->iov_len, kiov->iov_base); kiov->iov_len, kiov->iov_base);
} }
/*
* This is declared in linux/regset.h and defined in machine-dependent
* code. We put the export here, near the primary machine-neutral use,
* to ensure no machine forgets it.
*/
EXPORT_SYMBOL_GPL(task_user_regset_view);
#endif #endif
int ptrace_request(struct task_struct *child, long request, int ptrace_request(struct task_struct *child, long request,
......
...@@ -66,7 +66,6 @@ ...@@ -66,7 +66,6 @@
#define TP_FLAG_TRACE 1 #define TP_FLAG_TRACE 1
#define TP_FLAG_PROFILE 2 #define TP_FLAG_PROFILE 2
#define TP_FLAG_REGISTERED 4 #define TP_FLAG_REGISTERED 4
#define TP_FLAG_UPROBE 8
/* data_rloc: data relative location, compatible with u32 */ /* data_rloc: data relative location, compatible with u32 */
......
...@@ -28,20 +28,21 @@ ...@@ -28,20 +28,21 @@
#define UPROBE_EVENT_SYSTEM "uprobes" #define UPROBE_EVENT_SYSTEM "uprobes"
struct trace_uprobe_filter {
rwlock_t rwlock;
int nr_systemwide;
struct list_head perf_events;
};
/* /*
* uprobe event core functions * uprobe event core functions
*/ */
struct trace_uprobe;
struct uprobe_trace_consumer {
struct uprobe_consumer cons;
struct trace_uprobe *tu;
};
struct trace_uprobe { struct trace_uprobe {
struct list_head list; struct list_head list;
struct ftrace_event_class class; struct ftrace_event_class class;
struct ftrace_event_call call; struct ftrace_event_call call;
struct uprobe_trace_consumer *consumer; struct trace_uprobe_filter filter;
struct uprobe_consumer consumer;
struct inode *inode; struct inode *inode;
char *filename; char *filename;
unsigned long offset; unsigned long offset;
...@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list); ...@@ -64,6 +65,18 @@ static LIST_HEAD(uprobe_list);
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs);
static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter)
{
rwlock_init(&filter->rwlock);
filter->nr_systemwide = 0;
INIT_LIST_HEAD(&filter->perf_events);
}
static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter)
{
return !filter->nr_systemwide && list_empty(&filter->perf_events);
}
/* /*
* Allocate new trace_uprobe and initialize it (including uprobes). * Allocate new trace_uprobe and initialize it (including uprobes).
*/ */
...@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs) ...@@ -92,6 +105,8 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs)
goto error; goto error;
INIT_LIST_HEAD(&tu->list); INIT_LIST_HEAD(&tu->list);
tu->consumer.handler = uprobe_dispatcher;
init_trace_uprobe_filter(&tu->filter);
return tu; return tu;
error: error:
...@@ -253,16 +268,18 @@ static int create_trace_uprobe(int argc, char **argv) ...@@ -253,16 +268,18 @@ static int create_trace_uprobe(int argc, char **argv)
if (ret) if (ret)
goto fail_address_parse; goto fail_address_parse;
ret = kstrtoul(arg, 0, &offset);
if (ret)
goto fail_address_parse;
inode = igrab(path.dentry->d_inode); inode = igrab(path.dentry->d_inode);
if (!S_ISREG(inode->i_mode)) { path_put(&path);
if (!inode || !S_ISREG(inode->i_mode)) {
ret = -EINVAL; ret = -EINVAL;
goto fail_address_parse; goto fail_address_parse;
} }
ret = kstrtoul(arg, 0, &offset);
if (ret)
goto fail_address_parse;
argc -= 2; argc -= 2;
argv += 2; argv += 2;
...@@ -469,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = { ...@@ -469,7 +486,7 @@ static const struct file_operations uprobe_profile_ops = {
}; };
/* uprobe handler */ /* uprobe handler */
static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
{ {
struct uprobe_trace_entry_head *entry; struct uprobe_trace_entry_head *entry;
struct ring_buffer_event *event; struct ring_buffer_event *event;
...@@ -479,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) ...@@ -479,8 +496,6 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
unsigned long irq_flags; unsigned long irq_flags;
struct ftrace_event_call *call = &tu->call; struct ftrace_event_call *call = &tu->call;
tu->nhit++;
local_save_flags(irq_flags); local_save_flags(irq_flags);
pc = preempt_count(); pc = preempt_count();
...@@ -489,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) ...@@ -489,16 +504,18 @@ static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs)
event = trace_current_buffer_lock_reserve(&buffer, call->event.type, event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
size, irq_flags, pc); size, irq_flags, pc);
if (!event) if (!event)
return; return 0;
entry = ring_buffer_event_data(event); entry = ring_buffer_event_data(event);
entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); entry->ip = instruction_pointer(task_pt_regs(current));
data = (u8 *)&entry[1]; data = (u8 *)&entry[1];
for (i = 0; i < tu->nr_args; i++) for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
if (!filter_current_check_discard(buffer, call, entry, event)) if (!filter_current_check_discard(buffer, call, entry, event))
trace_buffer_unlock_commit(buffer, event, irq_flags, pc); trace_buffer_unlock_commit(buffer, event, irq_flags, pc);
return 0;
} }
/* Event entry printers */ /* Event entry printers */
...@@ -537,42 +554,43 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e ...@@ -537,42 +554,43 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_PARTIAL_LINE;
} }
static int probe_event_enable(struct trace_uprobe *tu, int flag) static inline bool is_trace_uprobe_enabled(struct trace_uprobe *tu)
{ {
struct uprobe_trace_consumer *utc; return tu->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE);
int ret = 0; }
if (!tu->inode || tu->consumer) typedef bool (*filter_func_t)(struct uprobe_consumer *self,
return -EINTR; enum uprobe_filter_ctx ctx,
struct mm_struct *mm);
static int
probe_event_enable(struct trace_uprobe *tu, int flag, filter_func_t filter)
{
int ret = 0;
utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); if (is_trace_uprobe_enabled(tu))
if (!utc)
return -EINTR; return -EINTR;
utc->cons.handler = uprobe_dispatcher; WARN_ON(!uprobe_filter_is_empty(&tu->filter));
utc->cons.filter = NULL;
ret = uprobe_register(tu->inode, tu->offset, &utc->cons);
if (ret) {
kfree(utc);
return ret;
}
tu->flags |= flag; tu->flags |= flag;
utc->tu = tu; tu->consumer.filter = filter;
tu->consumer = utc; ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);
if (ret)
tu->flags &= ~flag;
return 0; return ret;
} }
static void probe_event_disable(struct trace_uprobe *tu, int flag) static void probe_event_disable(struct trace_uprobe *tu, int flag)
{ {
if (!tu->inode || !tu->consumer) if (!is_trace_uprobe_enabled(tu))
return; return;
uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); WARN_ON(!uprobe_filter_is_empty(&tu->filter));
uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
tu->flags &= ~flag; tu->flags &= ~flag;
kfree(tu->consumer);
tu->consumer = NULL;
} }
static int uprobe_event_define_fields(struct ftrace_event_call *event_call) static int uprobe_event_define_fields(struct ftrace_event_call *event_call)
...@@ -646,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu) ...@@ -646,8 +664,96 @@ static int set_print_fmt(struct trace_uprobe *tu)
} }
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
static bool
__uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
{
struct perf_event *event;
if (filter->nr_systemwide)
return true;
list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
if (event->hw.tp_target->mm == mm)
return true;
}
return false;
}
static inline bool
uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
{
return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
}
static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
{
bool done;
write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) {
/*
* event->parent != NULL means copy_process(), we can avoid
* uprobe_apply(). current->mm must be probed and we can rely
* on dup_mmap() which preserves the already installed bp's.
*
* attr.enable_on_exec means that exec/mmap will install the
* breakpoints we need.
*/
done = tu->filter.nr_systemwide ||
event->parent || event->attr.enable_on_exec ||
uprobe_filter_event(tu, event);
list_add(&event->hw.tp_list, &tu->filter.perf_events);
} else {
done = tu->filter.nr_systemwide;
tu->filter.nr_systemwide++;
}
write_unlock(&tu->filter.rwlock);
if (!done)
uprobe_apply(tu->inode, tu->offset, &tu->consumer, true);
return 0;
}
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
{
bool done;
write_lock(&tu->filter.rwlock);
if (event->hw.tp_target) {
list_del(&event->hw.tp_list);
done = tu->filter.nr_systemwide ||
(event->hw.tp_target->flags & PF_EXITING) ||
uprobe_filter_event(tu, event);
} else {
tu->filter.nr_systemwide--;
done = tu->filter.nr_systemwide;
}
write_unlock(&tu->filter.rwlock);
if (!done)
uprobe_apply(tu->inode, tu->offset, &tu->consumer, false);
return 0;
}
static bool uprobe_perf_filter(struct uprobe_consumer *uc,
enum uprobe_filter_ctx ctx, struct mm_struct *mm)
{
struct trace_uprobe *tu;
int ret;
tu = container_of(uc, struct trace_uprobe, consumer);
read_lock(&tu->filter.rwlock);
ret = __uprobe_perf_filter(&tu->filter, mm);
read_unlock(&tu->filter.rwlock);
return ret;
}
/* uprobe profile handler */ /* uprobe profile handler */
static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
{ {
struct ftrace_event_call *call = &tu->call; struct ftrace_event_call *call = &tu->call;
struct uprobe_trace_entry_head *entry; struct uprobe_trace_entry_head *entry;
...@@ -656,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) ...@@ -656,11 +762,14 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
int size, __size, i; int size, __size, i;
int rctx; int rctx;
if (!uprobe_perf_filter(&tu->consumer, 0, current->mm))
return UPROBE_HANDLER_REMOVE;
__size = sizeof(*entry) + tu->size; __size = sizeof(*entry) + tu->size;
size = ALIGN(__size + sizeof(u32), sizeof(u64)); size = ALIGN(__size + sizeof(u32), sizeof(u64));
size -= sizeof(u32); size -= sizeof(u32);
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough"))
return; return 0;
preempt_disable(); preempt_disable();
...@@ -668,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) ...@@ -668,7 +777,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
if (!entry) if (!entry)
goto out; goto out;
entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); entry->ip = instruction_pointer(task_pt_regs(current));
data = (u8 *)&entry[1]; data = (u8 *)&entry[1];
for (i = 0; i < tu->nr_args; i++) for (i = 0; i < tu->nr_args; i++)
call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset);
...@@ -678,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) ...@@ -678,6 +787,7 @@ static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs)
out: out:
preempt_enable(); preempt_enable();
return 0;
} }
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
...@@ -688,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, ...@@ -688,7 +798,7 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
switch (type) { switch (type) {
case TRACE_REG_REGISTER: case TRACE_REG_REGISTER:
return probe_event_enable(tu, TP_FLAG_TRACE); return probe_event_enable(tu, TP_FLAG_TRACE, NULL);
case TRACE_REG_UNREGISTER: case TRACE_REG_UNREGISTER:
probe_event_disable(tu, TP_FLAG_TRACE); probe_event_disable(tu, TP_FLAG_TRACE);
...@@ -696,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, ...@@ -696,11 +806,18 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
case TRACE_REG_PERF_REGISTER: case TRACE_REG_PERF_REGISTER:
return probe_event_enable(tu, TP_FLAG_PROFILE); return probe_event_enable(tu, TP_FLAG_PROFILE, uprobe_perf_filter);
case TRACE_REG_PERF_UNREGISTER: case TRACE_REG_PERF_UNREGISTER:
probe_event_disable(tu, TP_FLAG_PROFILE); probe_event_disable(tu, TP_FLAG_PROFILE);
return 0; return 0;
case TRACE_REG_PERF_OPEN:
return uprobe_perf_open(tu, data);
case TRACE_REG_PERF_CLOSE:
return uprobe_perf_close(tu, data);
#endif #endif
default: default:
return 0; return 0;
...@@ -710,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, ...@@ -710,22 +827,20 @@ int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type,
static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
{ {
struct uprobe_trace_consumer *utc;
struct trace_uprobe *tu; struct trace_uprobe *tu;
int ret = 0;
utc = container_of(con, struct uprobe_trace_consumer, cons); tu = container_of(con, struct trace_uprobe, consumer);
tu = utc->tu; tu->nhit++;
if (!tu || tu->consumer != utc)
return 0;
if (tu->flags & TP_FLAG_TRACE) if (tu->flags & TP_FLAG_TRACE)
uprobe_trace_func(tu, regs); ret |= uprobe_trace_func(tu, regs);
#ifdef CONFIG_PERF_EVENTS #ifdef CONFIG_PERF_EVENTS
if (tu->flags & TP_FLAG_PROFILE) if (tu->flags & TP_FLAG_PROFILE)
uprobe_perf_func(tu, regs); ret |= uprobe_perf_func(tu, regs);
#endif #endif
return 0; return ret;
} }
static struct trace_event_functions uprobe_funcs = { static struct trace_event_functions uprobe_funcs = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment