Commit 24f1e32c authored by Frederic Weisbecker's avatar Frederic Weisbecker

hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events

This patch rebase the implementation of the breakpoints API on top of
perf events instances.

Each breakpoints are now perf events that handle the
register scheduling, thread/cpu attachment, etc..

The new layering is now made as follows:

       ptrace       kgdb      ftrace   perf syscall
          \          |          /         /
           \         |         /         /
                                        /
            Core breakpoint API        /
                                      /
                     |               /
                     |              /

              Breakpoints perf events

                     |
                     |

               Breakpoints PMU ---- Debug Register constraints handling
                                    (Part of core breakpoint API)
                     |
                     |

             Hardware debug registers

Reasons of this rewrite:

- Use the centralized/optimized pmu registers scheduling,
  implying an easier arch integration
- More powerful register handling: perf attributes (pinned/flexible
  events, exclusive/non-exclusive, tunable period, etc...)

Impact:

- New perf ABI: the hardware breakpoints counters
- Ptrace breakpoints setting remains tricky and still needs some per
  thread breakpoints references.

Todo (in the order):

- Support breakpoints perf counter events for perf tools (ie: implement
  perf_bpcounter_event())
- Support from perf tools

Changes in v2:

- Follow the perf "event " rename
- The ptrace regression have been fixed (ptrace breakpoint perf events
  weren't released when a task ended)
- Drop the struct hw_breakpoint and store generic fields in
  perf_event_attr.
- Separate core and arch specific headers, drop
  asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h
- Use new generic len/type for breakpoint
- Handle off case: when breakpoints api is not supported by an arch

Changes in v3:

- Fix broken CONFIG_KVM, we need to propagate the breakpoint api
  changes to kvm when we exit the guest and restore the bp registers
  to the host.

Changes in v4:

- Drop the hw_breakpoint_restore() stub as it is only used by KVM
- EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a
  module
- Restore the breakpoints unconditionally on kvm guest exit:
  TIF_DEBUG_THREAD doesn't anymore cover every cases of running
  breakpoints and vcpu->arch.switch_db_regs might not always be
  set when the guest used debug registers.
  (Waiting for a reliable optimization)

Changes in v5:

- Split-up the asm-generic/hw-breakpoint.h moving to
  linux/hw_breakpoint.h into a separate patch
- Optimize the breakpoints restoring while switching from kvm guest
  to host. We only want to restore the state if we have active
  breakpoints to the host, otherwise we don't care about messed-up
  address registers.
- Add asm/hw_breakpoint.h to Kbuild
- Fix bad breakpoint type in trace_selftest.c

Changes in v6:

- Fix wrong header inclusion in trace.h (triggered a build
  error with CONFIG_FTRACE_SELFTEST
Signed-off-by: default avatarFrederic Weisbecker <fweisbec@gmail.com>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jan Kiszka <jan.kiszka@web.de>
Cc: Jiri Slaby <jirislaby@gmail.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: Paul Mundt <lethal@linux-sh.org>
parent 2da3e160
...@@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES ...@@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES
config HAVE_HW_BREAKPOINT config HAVE_HW_BREAKPOINT
bool bool
depends on HAVE_PERF_EVENTS
select ANON_INODES
select PERF_EVENTS
source "kernel/gcov/Kconfig" source "kernel/gcov/Kconfig"
...@@ -10,6 +10,7 @@ header-y += ptrace-abi.h ...@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
header-y += sigcontext32.h header-y += sigcontext32.h
header-y += ucontext.h header-y += ucontext.h
header-y += processor-flags.h header-y += processor-flags.h
header-y += hw_breakpoint.h
unifdef-y += e820.h unifdef-y += e820.h
unifdef-y += ist.h unifdef-y += ist.h
......
...@@ -75,13 +75,8 @@ ...@@ -75,13 +75,8 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
/* For process management */ DECLARE_PER_CPU(unsigned long, dr7);
extern void flush_thread_hw_breakpoint(struct task_struct *tsk);
extern int copy_thread_hw_breakpoint(struct task_struct *tsk,
struct task_struct *child, unsigned long clone_flags);
/* For CPU management */
extern void load_debug_registers(void);
static inline void hw_breakpoint_disable(void) static inline void hw_breakpoint_disable(void)
{ {
/* Zero the control register for HW Breakpoint */ /* Zero the control register for HW Breakpoint */
...@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void) ...@@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void)
set_debugreg(0UL, 3); set_debugreg(0UL, 3);
} }
#ifdef CONFIG_KVM
extern void hw_breakpoint_restore(void);
#endif
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_DEBUGREG_H */ #endif /* _ASM_X86_DEBUGREG_H */
...@@ -4,6 +4,11 @@ ...@@ -4,6 +4,11 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
#define __ARCH_HW_BREAKPOINT_H #define __ARCH_HW_BREAKPOINT_H
/*
* The name should probably be something dealt in
* a higher level. While dealing with the user
* (display/resolving)
*/
struct arch_hw_breakpoint { struct arch_hw_breakpoint {
char *name; /* Contains name of the symbol to set bkpt */ char *name; /* Contains name of the symbol to set bkpt */
unsigned long address; unsigned long address;
...@@ -12,44 +17,57 @@ struct arch_hw_breakpoint { ...@@ -12,44 +17,57 @@ struct arch_hw_breakpoint {
}; };
#include <linux/kdebug.h> #include <linux/kdebug.h>
#include <linux/hw_breakpoint.h> #include <linux/percpu.h>
#include <linux/list.h>
/* Available HW breakpoint length encodings */ /* Available HW breakpoint length encodings */
#define HW_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_1 0x40
#define HW_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_2 0x44
#define HW_BREAKPOINT_LEN_4 0x4c #define X86_BREAKPOINT_LEN_4 0x4c
#define HW_BREAKPOINT_LEN_EXECUTE 0x40 #define X86_BREAKPOINT_LEN_EXECUTE 0x40
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
#define HW_BREAKPOINT_LEN_8 0x48 #define X86_BREAKPOINT_LEN_8 0x48
#endif #endif
/* Available HW breakpoint type encodings */ /* Available HW breakpoint type encodings */
/* trigger on instruction execute */ /* trigger on instruction execute */
#define HW_BREAKPOINT_EXECUTE 0x80 #define X86_BREAKPOINT_EXECUTE 0x80
/* trigger on memory write */ /* trigger on memory write */
#define HW_BREAKPOINT_WRITE 0x81 #define X86_BREAKPOINT_WRITE 0x81
/* trigger on memory read or write */ /* trigger on memory read or write */
#define HW_BREAKPOINT_RW 0x83 #define X86_BREAKPOINT_RW 0x83
/* Total number of available HW breakpoint registers */ /* Total number of available HW breakpoint registers */
#define HBP_NUM 4 #define HBP_NUM 4
extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; struct perf_event;
DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); struct pmu;
extern unsigned int hbp_user_refcount[HBP_NUM];
extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk);
extern void arch_uninstall_thread_hw_breakpoint(void);
extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
struct task_struct *tsk); struct task_struct *tsk);
extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk);
extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk);
extern void arch_update_kernel_hw_breakpoint(void *);
extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
unsigned long val, void *data); unsigned long val, void *data);
int arch_install_hw_breakpoint(struct perf_event *bp);
void arch_uninstall_hw_breakpoint(struct perf_event *bp);
void hw_breakpoint_pmu_read(struct perf_event *bp);
void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
extern void
arch_fill_perf_breakpoint(struct perf_event *bp);
unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
extern int arch_bp_generic_fields(int x86_len, int x86_type,
int *gen_len, int *gen_type);
extern struct pmu perf_ops_bp;
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _I386_HW_BREAKPOINT_H */ #endif /* _I386_HW_BREAKPOINT_H */
...@@ -423,6 +423,8 @@ extern unsigned int xstate_size; ...@@ -423,6 +423,8 @@ extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *); extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep; extern struct kmem_cache *task_xstate_cachep;
struct perf_event;
struct thread_struct { struct thread_struct {
/* Cached TLS descriptors: */ /* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
...@@ -444,12 +446,10 @@ struct thread_struct { ...@@ -444,12 +446,10 @@ struct thread_struct {
unsigned long fs; unsigned long fs;
#endif #endif
unsigned long gs; unsigned long gs;
/* Hardware debugging registers: */ /* Save middle states of ptrace breakpoints */
unsigned long debugreg[HBP_NUM]; struct perf_event *ptrace_bps[HBP_NUM];
unsigned long debugreg6; /* Debug status used for traps, single steps, etc... */
unsigned long debugreg7; unsigned long debugreg6;
/* Hardware breakpoint info */
struct hw_breakpoint *hbp[HBP_NUM];
/* Fault info: */ /* Fault info: */
unsigned long cr2; unsigned long cr2;
unsigned long trap_no; unsigned long trap_no;
......
This diff is collapsed.
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include <linux/clockchips.h> #include <linux/clockchips.h>
#include <linux/random.h> #include <linux/random.h>
#include <trace/events/power.h> #include <trace/events/power.h>
#include <linux/hw_breakpoint.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/syscalls.h> #include <asm/syscalls.h>
...@@ -18,7 +19,6 @@ ...@@ -18,7 +19,6 @@
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/ds.h> #include <asm/ds.h>
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/hw_breakpoint.h>
unsigned long idle_halt; unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt); EXPORT_SYMBOL(idle_halt);
...@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk) ...@@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk)
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL; tsk->thread.xstate = NULL;
} }
if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
flush_thread_hw_breakpoint(tsk);
WARN(tsk->thread.ds_ctx, "leaking DS context\n"); WARN(tsk->thread.ds_ctx, "leaking DS context\n");
} }
...@@ -107,8 +105,7 @@ void flush_thread(void) ...@@ -107,8 +105,7 @@ void flush_thread(void)
} }
#endif #endif
if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) flush_ptrace_hw_breakpoint(tsk);
flush_thread_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/* /*
* Forget coprocessor state.. * Forget coprocessor state..
......
...@@ -59,7 +59,6 @@ ...@@ -59,7 +59,6 @@
#include <asm/syscalls.h> #include <asm/syscalls.h>
#include <asm/ds.h> #include <asm/ds.h>
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/hw_breakpoint.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
...@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.io_bitmap_ptr = NULL; p->thread.io_bitmap_ptr = NULL;
tsk = current; tsk = current;
err = -ENOMEM; err = -ENOMEM;
if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG)))
if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
goto out;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
...@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
err = do_set_thread_area(p, -1, err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0); (struct user_desc __user *)childregs->si, 0);
out:
if (err && p->thread.io_bitmap_ptr) { if (err && p->thread.io_bitmap_ptr) {
kfree(p->thread.io_bitmap_ptr); kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0; p->thread.io_bitmap_max = 0;
} }
if (err)
flush_thread_hw_breakpoint(p);
clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL; p->thread.ds_ctx = NULL;
...@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
lazy_load_gs(next->gs); lazy_load_gs(next->gs);
percpu_write(current_task, next_p); percpu_write(current_task, next_p);
/*
* There's a problem with moving the arch_install_thread_hw_breakpoint()
* call before current is updated. Suppose a kernel breakpoint is
* triggered in between the two, the hw-breakpoint handler will see that
* the 'current' task does not have TIF_DEBUG flag set and will think it
* is leftover from an old task (lazy switching) and will erase it. Then
* until the next context switch, no user-breakpoints will be installed.
*
* The real problem is that it's impossible to update both current and
* physical debug registers at the same instant, so there will always be
* a window in which they disagree and a breakpoint might get triggered.
* Since we use lazy switching, we are forced to assume that a
* disagreement means that current is correct and the exception is due
* to lazy debug register switching.
*/
if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
arch_install_thread_hw_breakpoint(next_p);
return prev_p; return prev_p;
} }
......
...@@ -53,7 +53,6 @@ ...@@ -53,7 +53,6 @@
#include <asm/syscalls.h> #include <asm/syscalls.h>
#include <asm/ds.h> #include <asm/ds.h>
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/hw_breakpoint.h>
asmlinkage extern void ret_from_fork(void); asmlinkage extern void ret_from_fork(void);
...@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task) ...@@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task)
BUG(); BUG();
} }
} }
if (unlikely(dead_task->thread.debugreg7))
flush_thread_hw_breakpoint(dead_task);
} }
static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
...@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
savesegment(ds, p->thread.ds); savesegment(ds, p->thread.ds);
err = -ENOMEM; err = -ENOMEM;
if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
if (copy_thread_hw_breakpoint(me, p, clone_flags))
goto out;
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
...@@ -351,8 +346,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -351,8 +346,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
kfree(p->thread.io_bitmap_ptr); kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0; p->thread.io_bitmap_max = 0;
} }
if (err)
flush_thread_hw_breakpoint(p);
return err; return err;
} }
...@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) ...@@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/ */
if (preload_fpu) if (preload_fpu)
__math_state_restore(); __math_state_restore();
/*
* There's a problem with moving the arch_install_thread_hw_breakpoint()
* call before current is updated. Suppose a kernel breakpoint is
* triggered in between the two, the hw-breakpoint handler will see that
* the 'current' task does not have TIF_DEBUG flag set and will think it
* is leftover from an old task (lazy switching) and will erase it. Then
* until the next context switch, no user-breakpoints will be installed.
*
* The real problem is that it's impossible to update both current and
* physical debug registers at the same instant, so there will always be
* a window in which they disagree and a breakpoint might get triggered.
* Since we use lazy switching, we are forced to assume that a
* disagreement means that current is correct and the exception is due
* to lazy debug register switching.
*/
if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG)))
arch_install_thread_hw_breakpoint(next_p);
return prev_p; return prev_p;
} }
......
...@@ -22,6 +22,8 @@ ...@@ -22,6 +22,8 @@
#include <linux/seccomp.h> #include <linux/seccomp.h>
#include <linux/signal.h> #include <linux/signal.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, ...@@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target,
return ret; return ret;
} }
/* static void ptrace_triggered(struct perf_event *bp, void *data)
* Decode the length and type bits for a particular breakpoint as
* stored in debug register 7. Return the "enabled" status.
*/
static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len,
unsigned *type)
{
int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
*len = (bp_info & 0xc) | 0x40;
*type = (bp_info & 0x3) | 0x80;
return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
}
static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
{ {
struct thread_struct *thread = &(current->thread);
int i; int i;
struct thread_struct *thread = &(current->thread);
/* /*
* Store in the virtual DR6 register the fact that the breakpoint * Store in the virtual DR6 register the fact that the breakpoint
* was hit so the thread's debugger will see it. * was hit so the thread's debugger will see it.
*/ */
for (i = 0; i < hbp_kernel_pos; i++) for (i = 0; i < HBP_NUM; i++) {
/* if (thread->ptrace_bps[i] == bp)
* We will check bp->info.address against the address stored in
* thread's hbp structure and not debugreg[i]. This is to ensure
* that the corresponding bit for 'i' in DR7 register is enabled
*/
if (bp->info.address == thread->hbp[i]->info.address)
break; break;
}
thread->debugreg6 |= (DR_TRAP0 << i); thread->debugreg6 |= (DR_TRAP0 << i);
} }
/*
* Walk through every ptrace breakpoints for this thread and
* build the dr7 value on top of their attributes.
*
*/
static unsigned long ptrace_get_dr7(struct perf_event *bp[])
{
int i;
int dr7 = 0;
struct arch_hw_breakpoint *info;
for (i = 0; i < HBP_NUM; i++) {
if (bp[i] && !bp[i]->attr.disabled) {
info = counter_arch_bp(bp[i]);
dr7 |= encode_dr7(i, info->len, info->type);
}
}
return dr7;
}
/* /*
* Handle ptrace writes to debug register 7. * Handle ptrace writes to debug register 7.
*/ */
static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{ {
struct thread_struct *thread = &(tsk->thread); struct thread_struct *thread = &(tsk->thread);
unsigned long old_dr7 = thread->debugreg7; unsigned long old_dr7;
int i, orig_ret = 0, rc = 0; int i, orig_ret = 0, rc = 0;
int enabled, second_pass = 0; int enabled, second_pass = 0;
unsigned len, type; unsigned len, type;
struct hw_breakpoint *bp; int gen_len, gen_type;
struct perf_event *bp;
data &= ~DR_CONTROL_RESERVED; data &= ~DR_CONTROL_RESERVED;
old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
restore: restore:
/* /*
* Loop through all the hardware breakpoints, making the * Loop through all the hardware breakpoints, making the
...@@ -496,11 +503,12 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) ...@@ -496,11 +503,12 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
*/ */
for (i = 0; i < HBP_NUM; i++) { for (i = 0; i < HBP_NUM; i++) {
enabled = decode_dr7(data, i, &len, &type); enabled = decode_dr7(data, i, &len, &type);
bp = thread->hbp[i]; bp = thread->ptrace_bps[i];
if (!enabled) { if (!enabled) {
if (bp) { if (bp) {
/* Don't unregister the breakpoints right-away, /*
* Don't unregister the breakpoints right-away,
* unless all register_user_hw_breakpoint() * unless all register_user_hw_breakpoint()
* requests have succeeded. This prevents * requests have succeeded. This prevents
* any window of opportunity for debug * any window of opportunity for debug
...@@ -508,27 +516,45 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) ...@@ -508,27 +516,45 @@ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
*/ */
if (!second_pass) if (!second_pass)
continue; continue;
unregister_user_hw_breakpoint(tsk, bp); thread->ptrace_bps[i] = NULL;
kfree(bp); unregister_hw_breakpoint(bp);
} }
continue; continue;
} }
/*
* We shoud have at least an inactive breakpoint at this
* slot. It means the user is writing dr7 without having
* written the address register first
*/
if (!bp) { if (!bp) {
rc = -ENOMEM; rc = -EINVAL;
bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); break;
if (bp) { }
bp->info.address = thread->debugreg[i];
bp->triggered = ptrace_triggered; rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
bp->info.len = len;
bp->info.type = type;
rc = register_user_hw_breakpoint(tsk, bp);
if (rc)
kfree(bp);
}
} else
rc = modify_user_hw_breakpoint(tsk, bp);
if (rc) if (rc)
break; break;
/*
* This is a temporary thing as bp is unregistered/registered
* to simulate modification
*/
bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
gen_type, bp->callback,
tsk, true);
thread->ptrace_bps[i] = NULL;
if (!bp) { /* incorrect bp, or we have a bug in bp API */
rc = -EINVAL;
break;
}
if (IS_ERR(bp)) {
rc = PTR_ERR(bp);
bp = NULL;
break;
}
thread->ptrace_bps[i] = bp;
} }
/* /*
* Make a second pass to free the remaining unused breakpoints * Make a second pass to free the remaining unused breakpoints
...@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) ...@@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
struct thread_struct *thread = &(tsk->thread); struct thread_struct *thread = &(tsk->thread);
unsigned long val = 0; unsigned long val = 0;
if (n < HBP_NUM) if (n < HBP_NUM) {
val = thread->debugreg[n]; struct perf_event *bp;
else if (n == 6) bp = thread->ptrace_bps[n];
if (!bp)
return 0;
val = bp->hw.info.address;
} else if (n == 6) {
val = thread->debugreg6; val = thread->debugreg6;
else if (n == 7) } else if (n == 7) {
val = thread->debugreg7; val = ptrace_get_dr7(thread->ptrace_bps);
}
return val; return val;
} }
static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
unsigned long addr)
{
struct perf_event *bp;
struct thread_struct *t = &tsk->thread;
if (!t->ptrace_bps[nr]) {
/*
* Put stub len and type to register (reserve) an inactive but
* correct bp
*/
bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
HW_BREAKPOINT_W,
ptrace_triggered, tsk,
false);
} else {
bp = t->ptrace_bps[nr];
t->ptrace_bps[nr] = NULL;
bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
bp->attr.bp_type,
bp->callback,
tsk,
bp->attr.disabled);
}
if (!bp)
return -EIO;
/*
* CHECKME: the previous code returned -EIO if the addr wasn't a
* valid task virtual addr. The new one will return -EINVAL in this
* case.
* -EINVAL may be what we want for in-kernel breakpoints users, but
* -EIO looks better for ptrace, since we refuse a register writing
* for the user. And anyway this is the previous behaviour.
*/
if (IS_ERR(bp))
return PTR_ERR(bp);
t->ptrace_bps[nr] = bp;
return 0;
}
/* /*
* Handle PTRACE_POKEUSR calls for the debug register area. * Handle PTRACE_POKEUSR calls for the debug register area.
*/ */
...@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) ...@@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
return -EIO; return -EIO;
if (n == 6) { if (n == 6) {
tsk->thread.debugreg6 = val; thread->debugreg6 = val;
goto ret_path; goto ret_path;
} }
if (n < HBP_NUM) { if (n < HBP_NUM) {
if (thread->hbp[n]) { rc = ptrace_set_breakpoint_addr(tsk, n, val);
if (arch_check_va_in_userspace(val, if (rc)
thread->hbp[n]->info.len) == 0) { return rc;
rc = -EIO;
goto ret_path;
}
thread->hbp[n]->info.address = val;
}
thread->debugreg[n] = val;
} }
/* All that's left is DR7 */ /* All that's left is DR7 */
if (n == 7) if (n == 7)
......
...@@ -64,7 +64,6 @@ ...@@ -64,7 +64,6 @@
#include <asm/apic.h> #include <asm/apic.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/uv/uv.h> #include <asm/uv/uv.h>
#include <asm/debugreg.h>
#include <linux/mc146818rtc.h> #include <linux/mc146818rtc.h>
#include <asm/smpboot_hooks.h> #include <asm/smpboot_hooks.h>
...@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused) ...@@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused)
x86_cpuinit.setup_percpu_clockev(); x86_cpuinit.setup_percpu_clockev();
wmb(); wmb();
load_debug_registers();
cpu_idle(); cpu_idle();
} }
...@@ -1269,7 +1267,6 @@ void cpu_disable_common(void) ...@@ -1269,7 +1267,6 @@ void cpu_disable_common(void)
remove_cpu_from_maps(cpu); remove_cpu_from_maps(cpu);
unlock_vector_lock(); unlock_vector_lock();
fixup_irqs(); fixup_irqs();
hw_breakpoint_disable();
} }
int native_cpu_disable(void) int native_cpu_disable(void)
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace.h"
#include <asm/debugreg.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/desc.h> #include <asm/desc.h>
...@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
trace_kvm_entry(vcpu->vcpu_id); trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu, kvm_run); kvm_x86_ops->run(vcpu, kvm_run);
if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { /*
set_debugreg(current->thread.debugreg[0], 0); * If the guest has used debug registers, at least dr7
set_debugreg(current->thread.debugreg[1], 1); * will be disabled while returning to the host.
set_debugreg(current->thread.debugreg[2], 2); * If we don't have active breakpoints in the host, we don't
set_debugreg(current->thread.debugreg[3], 3); * care about the messed up debug address registers. But if
set_debugreg(current->thread.debugreg6, 6); * we have some of them active, restore the old state.
set_debugreg(current->thread.debugreg7, 7); */
} if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK)
hw_breakpoint_restore();
set_bit(KVM_REQ_KICK, &vcpu->requests); set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable(); local_irq_enable();
......
...@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt) ...@@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt)
ctxt->cr4 = read_cr4(); ctxt->cr4 = read_cr4();
ctxt->cr8 = read_cr8(); ctxt->cr8 = read_cr8();
#endif #endif
hw_breakpoint_disable();
} }
/* Needed by apm.c */ /* Needed by apm.c */
...@@ -144,11 +143,6 @@ static void fix_processor_context(void) ...@@ -144,11 +143,6 @@ static void fix_processor_context(void)
#endif #endif
load_TR_desc(); /* This does ltr */ load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */ load_LDT(&current->active_mm->context); /* This does lldt */
/*
* Now maybe reload the debug registers
*/
load_debug_registers();
} }
/** /**
......
#ifndef _LINUX_HW_BREAKPOINT_H #ifndef _LINUX_HW_BREAKPOINT_H
#define _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H
#include <linux/perf_event.h>
#ifdef __KERNEL__ enum {
#include <linux/list.h> HW_BREAKPOINT_LEN_1 = 1,
#include <linux/types.h> HW_BREAKPOINT_LEN_2 = 2,
#include <linux/kallsyms.h> HW_BREAKPOINT_LEN_4 = 4,
HW_BREAKPOINT_LEN_8 = 8,
/**
* struct hw_breakpoint - unified kernel/user-space hardware breakpoint
* @triggered: callback invoked after target address access
* @info: arch-specific breakpoint info (address, length, and type)
*
* %hw_breakpoint structures are the kernel's way of representing
* hardware breakpoints. These are data breakpoints
* (also known as "watchpoints", triggered on data access), and the breakpoint's
* target address can be located in either kernel space or user space.
*
* The breakpoint's address, length, and type are highly
* architecture-specific. The values are encoded in the @info field; you
* specify them when registering the breakpoint. To examine the encoded
* values use hw_breakpoint_get_{kaddress,uaddress,len,type}(), declared
* below.
*
* The address is specified as a regular kernel pointer (for kernel-space
* breakponts) or as an %__user pointer (for user-space breakpoints).
* With register_user_hw_breakpoint(), the address must refer to a
* location in user space. The breakpoint will be active only while the
* requested task is running. Conversely with
* register_kernel_hw_breakpoint(), the address must refer to a location
* in kernel space, and the breakpoint will be active on all CPUs
* regardless of the current task.
*
* The length is the breakpoint's extent in bytes, which is subject to
* certain limitations. include/asm/hw_breakpoint.h contains macros
* defining the available lengths for a specific architecture. Note that
* the address's alignment must match the length. The breakpoint will
* catch accesses to any byte in the range from address to address +
* (length - 1).
*
* The breakpoint's type indicates the sort of access that will cause it
* to trigger. Possible values may include:
*
* %HW_BREAKPOINT_RW (triggered on read or write access),
* %HW_BREAKPOINT_WRITE (triggered on write access), and
* %HW_BREAKPOINT_READ (triggered on read access).
*
* Appropriate macros are defined in include/asm/hw_breakpoint.h; not all
* possibilities are available on all architectures. Execute breakpoints
* must have length equal to the special value %HW_BREAKPOINT_LEN_EXECUTE.
*
* When a breakpoint gets hit, the @triggered callback is
* invoked in_interrupt with a pointer to the %hw_breakpoint structure and the
* processor registers.
* Data breakpoints occur after the memory access has taken place.
* Breakpoints are disabled during execution @triggered, to avoid
* recursive traps and allow unhindered access to breakpointed memory.
*
* This sample code sets a breakpoint on pid_max and registers a callback
* function for writes to that variable. Note that it is not portable
* as written, because not all architectures support HW_BREAKPOINT_LEN_4.
*
* ----------------------------------------------------------------------
*
* #include <asm/hw_breakpoint.h>
*
* struct hw_breakpoint my_bp;
*
* static void my_triggered(struct hw_breakpoint *bp, struct pt_regs *regs)
* {
* printk(KERN_DEBUG "Inside triggered routine of breakpoint exception\n");
* dump_stack();
* .......<more debugging output>........
* }
*
* static struct hw_breakpoint my_bp;
*
* static int init_module(void)
* {
* ..........<do anything>............
* my_bp.info.type = HW_BREAKPOINT_WRITE;
* my_bp.info.len = HW_BREAKPOINT_LEN_4;
*
* my_bp.installed = (void *)my_bp_installed;
*
* rc = register_kernel_hw_breakpoint(&my_bp);
* ..........<do anything>............
* }
*
* static void cleanup_module(void)
* {
* ..........<do anything>............
* unregister_kernel_hw_breakpoint(&my_bp);
* ..........<do anything>............
* }
*
* ----------------------------------------------------------------------
*/
struct hw_breakpoint {
void (*triggered)(struct hw_breakpoint *, struct pt_regs *);
struct arch_hw_breakpoint info;
}; };
/* enum {
* len and type values are defined in include/asm/hw_breakpoint.h. HW_BREAKPOINT_R = 1,
* Available values vary according to the architecture. On i386 the HW_BREAKPOINT_W = 2,
* possibilities are: HW_BREAKPOINT_X = 4,
* };
* HW_BREAKPOINT_LEN_1
* HW_BREAKPOINT_LEN_2 static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
* HW_BREAKPOINT_LEN_4 {
* HW_BREAKPOINT_RW return &bp->hw.info;
* HW_BREAKPOINT_READ }
*
* On other architectures HW_BREAKPOINT_LEN_8 may be available, and the static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
* 1-, 2-, and 4-byte lengths may be unavailable. There also may be {
* HW_BREAKPOINT_WRITE. You can use #ifdef to check at compile time. return bp->attr.bp_addr;
*/ }
static inline int hw_breakpoint_type(struct perf_event *bp)
{
return bp->attr.bp_type;
}
static inline int hw_breakpoint_len(struct perf_event *bp)
{
return bp->attr.bp_len;
}
#ifdef CONFIG_HAVE_HW_BREAKPOINT
extern struct perf_event *
register_user_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active);
/* FIXME: only change from the attr, and don't unregister */
extern struct perf_event *
modify_user_hw_breakpoint(struct perf_event *bp,
unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active);
extern int register_user_hw_breakpoint(struct task_struct *tsk,
struct hw_breakpoint *bp);
extern int modify_user_hw_breakpoint(struct task_struct *tsk,
struct hw_breakpoint *bp);
extern void unregister_user_hw_breakpoint(struct task_struct *tsk,
struct hw_breakpoint *bp);
/* /*
* Kernel breakpoints are not associated with any particular thread. * Kernel breakpoints are not associated with any particular thread.
*/ */
extern int register_kernel_hw_breakpoint(struct hw_breakpoint *bp); extern struct perf_event *
extern void unregister_kernel_hw_breakpoint(struct hw_breakpoint *bp); register_wide_hw_breakpoint_cpu(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
int cpu,
bool active);
extern struct perf_event **
register_wide_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
bool active);
extern int register_perf_hw_breakpoint(struct perf_event *bp);
extern int __register_perf_hw_breakpoint(struct perf_event *bp);
extern void unregister_hw_breakpoint(struct perf_event *bp);
extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
extern int reserve_bp_slot(struct perf_event *bp);
extern void release_bp_slot(struct perf_event *bp);
extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
#else /* !CONFIG_HAVE_HW_BREAKPOINT */
static inline struct perf_event *
register_user_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active) { return NULL; }
static inline struct perf_event *
modify_user_hw_breakpoint(struct perf_event *bp,
unsigned long addr,
int len,
int type,
perf_callback_t triggered,
struct task_struct *tsk,
bool active) { return NULL; }
static inline struct perf_event *
register_wide_hw_breakpoint_cpu(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
int cpu,
bool active) { return NULL; }
static inline struct perf_event **
register_wide_hw_breakpoint(unsigned long addr,
int len,
int type,
perf_callback_t triggered,
bool active) { return NULL; }
static inline int
register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline int
__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
static inline void
unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
static inline int
reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
static inline void release_bp_slot(struct perf_event *bp) { }
static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
extern unsigned int hbp_kernel_pos; #endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* __KERNEL__ */ #endif /* _LINUX_HW_BREAKPOINT_H */
#endif /* _LINUX_HW_BREAKPOINT_H */
...@@ -18,6 +18,10 @@ ...@@ -18,6 +18,10 @@
#include <linux/ioctl.h> #include <linux/ioctl.h>
#include <asm/byteorder.h> #include <asm/byteorder.h>
#ifdef CONFIG_HAVE_HW_BREAKPOINT
#include <asm/hw_breakpoint.h>
#endif
/* /*
* User-space ABI bits: * User-space ABI bits:
*/ */
...@@ -31,6 +35,7 @@ enum perf_type_id { ...@@ -31,6 +35,7 @@ enum perf_type_id {
PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3, PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4, PERF_TYPE_RAW = 4,
PERF_TYPE_BREAKPOINT = 5,
PERF_TYPE_MAX, /* non-ABI */ PERF_TYPE_MAX, /* non-ABI */
}; };
...@@ -207,6 +212,15 @@ struct perf_event_attr { ...@@ -207,6 +212,15 @@ struct perf_event_attr {
__u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_events; /* wakeup every n events */
__u32 wakeup_watermark; /* bytes before wakeup */ __u32 wakeup_watermark; /* bytes before wakeup */
}; };
union {
struct { /* Hardware breakpoint info */
__u64 bp_addr;
__u32 bp_type;
__u32 bp_len;
};
};
__u32 __reserved_2; __u32 __reserved_2;
__u64 __reserved_3; __u64 __reserved_3;
...@@ -476,6 +490,11 @@ struct hw_perf_event { ...@@ -476,6 +490,11 @@ struct hw_perf_event {
atomic64_t count; atomic64_t count;
struct hrtimer hrtimer; struct hrtimer hrtimer;
}; };
#ifdef CONFIG_HAVE_HW_BREAKPOINT
union { /* breakpoint */
struct arch_hw_breakpoint info;
};
#endif
}; };
atomic64_t prev_count; atomic64_t prev_count;
u64 sample_period; u64 sample_period;
...@@ -588,7 +607,7 @@ struct perf_event { ...@@ -588,7 +607,7 @@ struct perf_event {
u64 tstamp_running; u64 tstamp_running;
u64 tstamp_stopped; u64 tstamp_stopped;
struct perf_event_attr attr; struct perf_event_attr attr;
struct hw_perf_event hw; struct hw_perf_event hw;
struct perf_event_context *ctx; struct perf_event_context *ctx;
...@@ -643,6 +662,8 @@ struct perf_event { ...@@ -643,6 +662,8 @@ struct perf_event {
perf_callback_t callback; perf_callback_t callback;
perf_callback_t event_callback;
#endif /* CONFIG_PERF_EVENTS */ #endif /* CONFIG_PERF_EVENTS */
}; };
...@@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate; ...@@ -831,6 +852,7 @@ extern int sysctl_perf_event_sample_rate;
extern void perf_event_init(void); extern void perf_event_init(void);
extern void perf_tp_event(int event_id, u64 addr, u64 count, extern void perf_tp_event(int event_id, u64 addr, u64 count,
void *record, int entry_size); void *record, int entry_size);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags #ifndef perf_misc_flags
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
...@@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } ...@@ -865,6 +887,8 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
static inline void static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi, perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { } struct pt_regs *regs, u64 addr) { }
static inline void
perf_bp_event(struct perf_event *event, void *data) { }
static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_comm(struct task_struct *tsk) { }
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include <linux/init_task.h> #include <linux/init_task.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <trace/events/sched.h> #include <trace/events/sched.h>
#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code) ...@@ -979,6 +980,10 @@ NORET_TYPE void do_exit(long code)
proc_exit_connector(tsk); proc_exit_connector(tsk);
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
flush_ptrace_hw_breakpoint(tsk);
/* /*
* Flush inherited counters to the parent - before the parent * Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications. * gets woken up by child-exit notifications.
......
This diff is collapsed.
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/perf_event.h> #include <linux/perf_event.h>
#include <linux/ftrace_event.h> #include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h> #include <asm/irq_regs.h>
...@@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event) ...@@ -4229,6 +4230,51 @@ static void perf_event_free_filter(struct perf_event *event)
#endif /* CONFIG_EVENT_PROFILE */ #endif /* CONFIG_EVENT_PROFILE */
#ifdef CONFIG_HAVE_HW_BREAKPOINT
static void bp_perf_event_destroy(struct perf_event *event)
{
release_bp_slot(event);
}
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
int err;
/*
* The breakpoint is already filled if we haven't created the counter
* through perf syscall
* FIXME: manage to get trigerred to NULL if it comes from syscalls
*/
if (!bp->callback)
err = register_perf_hw_breakpoint(bp);
else
err = __register_perf_hw_breakpoint(bp);
if (err)
return ERR_PTR(err);
bp->destroy = bp_perf_event_destroy;
return &perf_ops_bp;
}
void perf_bp_event(struct perf_event *bp, void *regs)
{
/* TODO */
}
#else
static void bp_perf_event_destroy(struct perf_event *event)
{
}
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
return NULL;
}
void perf_bp_event(struct perf_event *bp, void *regs)
{
}
#endif
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
static void sw_perf_event_destroy(struct perf_event *event) static void sw_perf_event_destroy(struct perf_event *event)
...@@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr, ...@@ -4375,6 +4421,11 @@ perf_event_alloc(struct perf_event_attr *attr,
pmu = tp_perf_event_init(event); pmu = tp_perf_event_init(event);
break; break;
case PERF_TYPE_BREAKPOINT:
pmu = bp_perf_event_init(event);
break;
default: default:
break; break;
} }
...@@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, ...@@ -4686,7 +4737,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
ctx = find_get_context(pid, cpu); ctx = find_get_context(pid, cpu);
if (IS_ERR(ctx)) if (IS_ERR(ctx))
return NULL ; return NULL;
event = perf_event_alloc(attr, cpu, ctx, NULL, event = perf_event_alloc(attr, cpu, ctx, NULL,
NULL, callback, GFP_KERNEL); NULL, callback, GFP_KERNEL);
......
...@@ -11,14 +11,11 @@ ...@@ -11,14 +11,11 @@
#include <linux/ftrace.h> #include <linux/ftrace.h>
#include <trace/boot.h> #include <trace/boot.h>
#include <linux/kmemtrace.h> #include <linux/kmemtrace.h>
#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h> #include <linux/trace_seq.h>
#include <linux/ftrace_event.h> #include <linux/ftrace_event.h>
#ifdef CONFIG_KSYM_TRACER
#include <asm/hw_breakpoint.h>
#endif
enum trace_type { enum trace_type {
__TRACE_FIRST_TYPE = 0, __TRACE_FIRST_TYPE = 0,
......
...@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry, ...@@ -372,11 +372,11 @@ FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
F_STRUCT( F_STRUCT(
__field( unsigned long, ip ) __field( unsigned long, ip )
__field( unsigned char, type ) __field( unsigned char, type )
__array( char , ksym_name, KSYM_NAME_LEN )
__array( char , cmd, TASK_COMM_LEN ) __array( char , cmd, TASK_COMM_LEN )
__field( unsigned long, addr )
), ),
F_printk("ip: %pF type: %d ksym_name: %s cmd: %s", F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
(void *)__entry->ip, (unsigned int)__entry->type, (void *)__entry->ip, (unsigned int)__entry->type,
__entry->ksym_name, __entry->cmd) (void *)__entry->addr, __entry->cmd)
); );
...@@ -29,7 +29,11 @@ ...@@ -29,7 +29,11 @@
#include "trace_stat.h" #include "trace_stat.h"
#include "trace.h" #include "trace.h"
/* For now, let us restrict the no. of symbols traced simultaneously to number #include <linux/hw_breakpoint.h>
#include <asm/hw_breakpoint.h>
/*
* For now, let us restrict the no. of symbols traced simultaneously to number
* of available hardware breakpoint registers. * of available hardware breakpoint registers.
*/ */
#define KSYM_TRACER_MAX HBP_NUM #define KSYM_TRACER_MAX HBP_NUM
...@@ -37,8 +41,10 @@ ...@@ -37,8 +41,10 @@
#define KSYM_TRACER_OP_LEN 3 /* rw- */ #define KSYM_TRACER_OP_LEN 3 /* rw- */
struct trace_ksym { struct trace_ksym {
struct hw_breakpoint *ksym_hbp; struct perf_event **ksym_hbp;
unsigned long ksym_addr; unsigned long ksym_addr;
int type;
int len;
#ifdef CONFIG_PROFILE_KSYM_TRACER #ifdef CONFIG_PROFILE_KSYM_TRACER
unsigned long counter; unsigned long counter;
#endif #endif
...@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) ...@@ -75,10 +81,11 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
} }
#endif /* CONFIG_PROFILE_KSYM_TRACER */ #endif /* CONFIG_PROFILE_KSYM_TRACER */
void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) void ksym_hbp_handler(struct perf_event *hbp, void *data)
{ {
struct ring_buffer_event *event; struct ring_buffer_event *event;
struct ksym_trace_entry *entry; struct ksym_trace_entry *entry;
struct pt_regs *regs = data;
struct ring_buffer *buffer; struct ring_buffer *buffer;
int pc; int pc;
...@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs) ...@@ -96,12 +103,12 @@ void ksym_hbp_handler(struct hw_breakpoint *hbp, struct pt_regs *regs)
entry = ring_buffer_event_data(event); entry = ring_buffer_event_data(event);
entry->ip = instruction_pointer(regs); entry->ip = instruction_pointer(regs);
entry->type = hbp->info.type; entry->type = hw_breakpoint_type(hbp);
strlcpy(entry->ksym_name, hbp->info.name, KSYM_SYMBOL_LEN); entry->addr = hw_breakpoint_addr(hbp);
strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
#ifdef CONFIG_PROFILE_KSYM_TRACER #ifdef CONFIG_PROFILE_KSYM_TRACER
ksym_collect_stats(hbp->info.address); ksym_collect_stats(hw_breakpoint_addr(hbp));
#endif /* CONFIG_PROFILE_KSYM_TRACER */ #endif /* CONFIG_PROFILE_KSYM_TRACER */
trace_buffer_unlock_commit(buffer, event, 0, pc); trace_buffer_unlock_commit(buffer, event, 0, pc);
...@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str) ...@@ -120,31 +127,21 @@ static int ksym_trace_get_access_type(char *str)
int access = 0; int access = 0;
if (str[0] == 'r') if (str[0] == 'r')
access += 4; access |= HW_BREAKPOINT_R;
else if (str[0] != '-')
return -EINVAL;
if (str[1] == 'w') if (str[1] == 'w')
access += 2; access |= HW_BREAKPOINT_W;
else if (str[1] != '-')
return -EINVAL;
if (str[2] != '-') if (str[2] == 'x')
return -EINVAL; access |= HW_BREAKPOINT_X;
switch (access) { switch (access) {
case 6: case HW_BREAKPOINT_W:
access = HW_BREAKPOINT_RW; case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
break; return access;
case 4: default:
access = -EINVAL; return -EINVAL;
break;
case 2:
access = HW_BREAKPOINT_WRITE;
break;
} }
return access;
} }
/* /*
...@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) ...@@ -194,36 +191,33 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
if (!entry) if (!entry)
return -ENOMEM; return -ENOMEM;
entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); entry->type = op;
if (!entry->ksym_hbp) entry->ksym_addr = addr;
goto err; entry->len = HW_BREAKPOINT_LEN_4;
entry->ksym_hbp->info.name = kstrdup(ksymname, GFP_KERNEL); ret = -EAGAIN;
if (!entry->ksym_hbp->info.name) entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
goto err; entry->len, entry->type,
ksym_hbp_handler, true);
entry->ksym_hbp->info.type = op; if (IS_ERR(entry->ksym_hbp)) {
entry->ksym_addr = entry->ksym_hbp->info.address = addr; entry->ksym_hbp = NULL;
#ifdef CONFIG_X86 ret = PTR_ERR(entry->ksym_hbp);
entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4; }
#endif
entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
ret = register_kernel_hw_breakpoint(entry->ksym_hbp); if (!entry->ksym_hbp) {
if (ret < 0) {
printk(KERN_INFO "ksym_tracer request failed. Try again" printk(KERN_INFO "ksym_tracer request failed. Try again"
" later!!\n"); " later!!\n");
ret = -EAGAIN;
goto err; goto err;
} }
hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
ksym_filter_entry_count++; ksym_filter_entry_count++;
return 0; return 0;
err: err:
if (entry->ksym_hbp)
kfree(entry->ksym_hbp->info.name);
kfree(entry->ksym_hbp);
kfree(entry); kfree(entry);
return ret; return ret;
} }
...@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, ...@@ -244,10 +238,10 @@ static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
mutex_lock(&ksym_tracer_mutex); mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
ret = trace_seq_printf(s, "%s:", entry->ksym_hbp->info.name); ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE) if (entry->type == HW_BREAKPOINT_W)
ret = trace_seq_puts(s, "-w-\n"); ret = trace_seq_puts(s, "-w-\n");
else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW) else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
ret = trace_seq_puts(s, "rw-\n"); ret = trace_seq_puts(s, "rw-\n");
WARN_ON_ONCE(!ret); WARN_ON_ONCE(!ret);
} }
...@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void) ...@@ -269,12 +263,10 @@ static void __ksym_trace_reset(void)
mutex_lock(&ksym_tracer_mutex); mutex_lock(&ksym_tracer_mutex);
hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
ksym_hlist) { ksym_hlist) {
unregister_kernel_hw_breakpoint(entry->ksym_hbp); unregister_wide_hw_breakpoint(entry->ksym_hbp);
ksym_filter_entry_count--; ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist)); hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu(); synchronize_rcu();
kfree(entry->ksym_hbp->info.name);
kfree(entry->ksym_hbp);
kfree(entry); kfree(entry);
} }
mutex_unlock(&ksym_tracer_mutex); mutex_unlock(&ksym_tracer_mutex);
...@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, ...@@ -327,7 +319,7 @@ static ssize_t ksym_trace_filter_write(struct file *file,
hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
if (entry->ksym_addr == ksym_addr) { if (entry->ksym_addr == ksym_addr) {
/* Check for malformed request: (6) */ /* Check for malformed request: (6) */
if (entry->ksym_hbp->info.type != op) if (entry->type != op)
changed = 1; changed = 1;
else else
goto out; goto out;
...@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file, ...@@ -335,18 +327,21 @@ static ssize_t ksym_trace_filter_write(struct file *file,
} }
} }
if (changed) { if (changed) {
unregister_kernel_hw_breakpoint(entry->ksym_hbp); unregister_wide_hw_breakpoint(entry->ksym_hbp);
entry->ksym_hbp->info.type = op; entry->type = op;
if (op > 0) { if (op > 0) {
ret = register_kernel_hw_breakpoint(entry->ksym_hbp); entry->ksym_hbp =
if (ret == 0) register_wide_hw_breakpoint(entry->ksym_addr,
entry->len, entry->type,
ksym_hbp_handler, true);
if (IS_ERR(entry->ksym_hbp))
entry->ksym_hbp = NULL;
if (!entry->ksym_hbp)
goto out; goto out;
} }
ksym_filter_entry_count--; ksym_filter_entry_count--;
hlist_del_rcu(&(entry->ksym_hlist)); hlist_del_rcu(&(entry->ksym_hlist));
synchronize_rcu(); synchronize_rcu();
kfree(entry->ksym_hbp->info.name);
kfree(entry->ksym_hbp);
kfree(entry); kfree(entry);
ret = 0; ret = 0;
goto out; goto out;
...@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter) ...@@ -413,16 +408,16 @@ static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
trace_assign_type(field, entry); trace_assign_type(field, entry);
ret = trace_seq_printf(s, "%11s-%-5d [%03d] %-30s ", field->cmd, ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
entry->pid, iter->cpu, field->ksym_name); entry->pid, iter->cpu, (char *)field->addr);
if (!ret) if (!ret)
return TRACE_TYPE_PARTIAL_LINE; return TRACE_TYPE_PARTIAL_LINE;
switch (field->type) { switch (field->type) {
case HW_BREAKPOINT_WRITE: case HW_BREAKPOINT_W:
ret = trace_seq_printf(s, " W "); ret = trace_seq_printf(s, " W ");
break; break;
case HW_BREAKPOINT_RW: case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
ret = trace_seq_printf(s, " RW "); ret = trace_seq_printf(s, " RW ");
break; break;
default: default:
...@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v) ...@@ -490,14 +485,13 @@ static int ksym_tracer_stat_show(struct seq_file *m, void *v)
entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
if (entry->ksym_hbp) access_type = entry->type;
access_type = entry->ksym_hbp->info.type;
switch (access_type) { switch (access_type) {
case HW_BREAKPOINT_WRITE: case HW_BREAKPOINT_W:
seq_puts(m, " W "); seq_puts(m, " W ");
break; break;
case HW_BREAKPOINT_RW: case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
seq_puts(m, " RW "); seq_puts(m, " RW ");
break; break;
default: default:
......
...@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) ...@@ -828,7 +828,8 @@ trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
ksym_selftest_dummy = 0; ksym_selftest_dummy = 0;
/* Register the read-write tracing request */ /* Register the read-write tracing request */
ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW, ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY,
HW_BREAKPOINT_R | HW_BREAKPOINT_W,
(unsigned long)(&ksym_selftest_dummy)); (unsigned long)(&ksym_selftest_dummy));
if (ret < 0) { if (ret < 0) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment