Commit 0ad5d703 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'tracing/hw-branch-tracing' into tracing/core

Merge reason: this topic is ready for upstream now. It passed
              Oleg's review and Andrew had no further mm/*
              objections/observations either.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parents 44347d94 1cb81b14
...@@ -506,7 +506,6 @@ config X86_PTRACE_BTS ...@@ -506,7 +506,6 @@ config X86_PTRACE_BTS
bool "Branch Trace Store" bool "Branch Trace Store"
default y default y
depends on X86_DEBUGCTLMSR depends on X86_DEBUGCTLMSR
depends on BROKEN
---help--- ---help---
This adds a ptrace interface to the hardware's branch trace store. This adds a ptrace interface to the hardware's branch trace store.
......
...@@ -167,6 +167,15 @@ config IOMMU_LEAK ...@@ -167,6 +167,15 @@ config IOMMU_LEAK
Add a simple leak tracer to the IOMMU code. This is useful when you Add a simple leak tracer to the IOMMU code. This is useful when you
are debugging a buggy device driver that leaks IOMMU mappings. are debugging a buggy device driver that leaks IOMMU mappings.
config X86_DS_SELFTEST
bool "DS selftest"
default y
depends on DEBUG_KERNEL
depends on X86_DS
---help---
Perform Debug Store selftests at boot time.
If in doubt, say "N".
config HAVE_MMIOTRACE_SUPPORT config HAVE_MMIOTRACE_SUPPORT
def_bool y def_bool y
......
...@@ -15,8 +15,8 @@ ...@@ -15,8 +15,8 @@
* - buffer allocation (memory accounting) * - buffer allocation (memory accounting)
* *
* *
* Copyright (C) 2007-2008 Intel Corporation. * Copyright (C) 2007-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/ */
#ifndef _ASM_X86_DS_H #ifndef _ASM_X86_DS_H
...@@ -83,8 +83,10 @@ enum ds_feature { ...@@ -83,8 +83,10 @@ enum ds_feature {
* The interrupt threshold is independent from the overflow callback * The interrupt threshold is independent from the overflow callback
* to allow users to use their own overflow interrupt handling mechanism. * to allow users to use their own overflow interrupt handling mechanism.
* *
* task: the task to request recording for; * The function might sleep.
* NULL for per-cpu recording on the current cpu *
* task: the task to request recording for
* cpu: the cpu to request recording for
* base: the base pointer for the (non-pageable) buffer; * base: the base pointer for the (non-pageable) buffer;
* size: the size of the provided buffer in bytes * size: the size of the provided buffer in bytes
* ovfl: pointer to a function to be called on buffer overflow; * ovfl: pointer to a function to be called on buffer overflow;
...@@ -93,19 +95,28 @@ enum ds_feature { ...@@ -93,19 +95,28 @@ enum ds_feature {
* -1 if no interrupt threshold is requested. * -1 if no interrupt threshold is requested.
* flags: a bit-mask of the above flags * flags: a bit-mask of the above flags
*/ */
extern struct bts_tracer *ds_request_bts(struct task_struct *task, extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size, void *base, size_t size,
bts_ovfl_callback_t ovfl, bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags); size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
void *base, size_t size, bts_ovfl_callback_t ovfl,
pebs_ovfl_callback_t ovfl, size_t th, unsigned int flags);
size_t th, unsigned int flags); extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags);
/* /*
* Release BTS or PEBS resources * Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing * Suspend and resume BTS or PEBS tracing
* *
* Must be called with irq's enabled.
*
* tracer: the tracer handle returned from ds_request_~() * tracer: the tracer handle returned from ds_request_~()
*/ */
extern void ds_release_bts(struct bts_tracer *tracer); extern void ds_release_bts(struct bts_tracer *tracer);
...@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); ...@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
extern void ds_suspend_pebs(struct pebs_tracer *tracer); extern void ds_suspend_pebs(struct pebs_tracer *tracer);
extern void ds_resume_pebs(struct pebs_tracer *tracer); extern void ds_resume_pebs(struct pebs_tracer *tracer);
/*
* Release BTS or PEBS resources
* Suspend and resume BTS or PEBS tracing
*
* Cpu tracers must call this on the traced cpu.
* Task tracers must call ds_release_~_noirq() for themselves.
*
* May be called with irq's disabled.
*
* Returns 0 if successful;
* -EPERM if the cpu tracer does not trace the current cpu.
* -EPERM if the task tracer does not trace itself.
*
* tracer: the tracer handle returned from ds_request_~()
*/
extern int ds_release_bts_noirq(struct bts_tracer *tracer);
extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
/* /*
* The raw DS buffer state as it is used for BTS and PEBS recording. * The raw DS buffer state as it is used for BTS and PEBS recording.
...@@ -170,9 +203,9 @@ struct bts_struct { ...@@ -170,9 +203,9 @@ struct bts_struct {
} lbr; } lbr;
/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
struct { struct {
__u64 jiffies; __u64 clock;
pid_t pid; pid_t pid;
} timestamp; } event;
} variant; } variant;
}; };
...@@ -201,8 +234,12 @@ struct bts_trace { ...@@ -201,8 +234,12 @@ struct bts_trace {
struct pebs_trace { struct pebs_trace {
struct ds_trace ds; struct ds_trace ds;
/* the PEBS reset value */ /* the number of valid counters in the below array */
unsigned long long reset_value; unsigned int counters;
#define MAX_PEBS_COUNTERS 4
/* the counter reset value */
unsigned long long counter_reset[MAX_PEBS_COUNTERS];
}; };
...@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); ...@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
* Returns 0 on success; -Eerrno on error * Returns 0 on success; -Eerrno on error
* *
* tracer: the tracer handle returned from ds_request_pebs() * tracer: the tracer handle returned from ds_request_pebs()
* counter: the index of the counter
* value: the new counter reset value * value: the new counter reset value
*/ */
extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
unsigned int counter, u64 value);
/* /*
* Initialization * Initialization
...@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); ...@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
*/ */
extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
/*
* Task clone/init and cleanup work
*/
extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
extern void ds_exit_thread(struct task_struct *tsk);
#else /* CONFIG_X86_DS */ #else /* CONFIG_X86_DS */
struct cpuinfo_x86; struct cpuinfo_x86;
static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
static inline void ds_switch_to(struct task_struct *prev, static inline void ds_switch_to(struct task_struct *prev,
struct task_struct *next) {} struct task_struct *next) {}
static inline void ds_copy_thread(struct task_struct *tsk,
struct task_struct *father) {}
static inline void ds_exit_thread(struct task_struct *tsk) {}
#endif /* CONFIG_X86_DS */ #endif /* CONFIG_X86_DS */
#endif /* _ASM_X86_DS_H */ #endif /* _ASM_X86_DS_H */
...@@ -460,14 +460,8 @@ struct thread_struct { ...@@ -460,14 +460,8 @@ struct thread_struct {
unsigned io_bitmap_max; unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr; unsigned long debugctlmsr;
#ifdef CONFIG_X86_DS /* Debug Store context; see asm/ds.h */
/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
struct ds_context *ds_ctx; struct ds_context *ds_ctx;
#endif /* CONFIG_X86_DS */
#ifdef CONFIG_X86_PTRACE_BTS
/* the signal to send on a bts buffer overflow */
unsigned int bts_ovfl_signal;
#endif /* CONFIG_X86_PTRACE_BTS */
}; };
static inline unsigned long native_get_debugreg(int regno) static inline unsigned long native_get_debugreg(int regno)
...@@ -795,6 +789,21 @@ static inline unsigned long get_debugctlmsr(void) ...@@ -795,6 +789,21 @@ static inline unsigned long get_debugctlmsr(void)
return debugctlmsr; return debugctlmsr;
} }
static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
{
u64 debugctlmsr = 0;
u32 val1, val2;
#ifndef CONFIG_X86_DEBUGCTLMSR
if (boot_cpu_data.x86 < 6)
return 0;
#endif
rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
debugctlmsr = val1 | ((u64)val2 << 32);
return debugctlmsr;
}
static inline void update_debugctlmsr(unsigned long debugctlmsr) static inline void update_debugctlmsr(unsigned long debugctlmsr)
{ {
#ifndef CONFIG_X86_DEBUGCTLMSR #ifndef CONFIG_X86_DEBUGCTLMSR
...@@ -804,6 +813,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) ...@@ -804,6 +813,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
} }
static inline void update_debugctlmsr_on_cpu(int cpu,
unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
if (boot_cpu_data.x86 < 6)
return;
#endif
wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
(u32)((u64)debugctlmsr),
(u32)((u64)debugctlmsr >> 32));
}
/* /*
* from system description table in BIOS. Mostly for MCA use, but * from system description table in BIOS. Mostly for MCA use, but
* others may find it useful: * others may find it useful:
......
...@@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, ...@@ -235,12 +235,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate); struct user_desc __user *info, int can_allocate);
extern void x86_ptrace_untrace(struct task_struct *); #ifdef CONFIG_X86_PTRACE_BTS
extern void x86_ptrace_fork(struct task_struct *child, extern void ptrace_bts_untrace(struct task_struct *tsk);
unsigned long clone_flags);
#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk)
#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) #endif /* CONFIG_X86_PTRACE_BTS */
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
......
...@@ -44,6 +44,7 @@ obj-y += process.o ...@@ -44,6 +44,7 @@ obj-y += process.o
obj-y += i387.o xsave.o obj-y += i387.o xsave.o
obj-y += ptrace.o obj-y += ptrace.o
obj-$(CONFIG_X86_DS) += ds.o obj-$(CONFIG_X86_DS) += ds.o
obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_X86_32) += tls.o
obj-$(CONFIG_IA32_EMULATION) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o
obj-y += step.o obj-y += step.o
......
...@@ -19,45 +19,61 @@ ...@@ -19,45 +19,61 @@
* Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
*/ */
#include <linux/kernel.h>
#include <asm/ds.h>
#include <linux/errno.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/slab.h> #include <linux/errno.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/kernel.h> #include <linux/trace_clock.h>
#include <asm/ds.h>
#include "ds_selftest.h"
/* /*
* The configuration for a particular DS hardware implementation. * The configuration for a particular DS hardware implementation:
*/ */
struct ds_configuration { struct ds_configuration {
/* the name of the configuration */ /* The name of the configuration: */
const char *name; const char *name;
/* the size of one pointer-typed field in the DS structure and
in the BTS and PEBS buffers in bytes; /* The size of pointer-typed fields in DS, BTS, and PEBS: */
this covers the first 8 DS fields related to buffer management. */ unsigned char sizeof_ptr_field;
unsigned char sizeof_field;
/* the size of a BTS/PEBS record in bytes */ /* The size of a BTS/PEBS record in bytes: */
unsigned char sizeof_rec[2]; unsigned char sizeof_rec[2];
/* a series of bit-masks to control various features indexed
* by enum ds_feature */ /* The number of pebs counter reset values in the DS structure. */
unsigned long ctl[dsf_ctl_max]; unsigned char nr_counter_reset;
/* Control bit-masks indexed by enum ds_feature: */
unsigned long ctl[dsf_ctl_max];
}; };
static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); static struct ds_configuration ds_cfg __read_mostly;
/* Maximal size of a DS configuration: */
#define MAX_SIZEOF_DS 0x80
#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) /* Maximal size of a BTS record: */
#define MAX_SIZEOF_BTS (3 * 8)
#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ /* BTS and PEBS buffer alignment: */
#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ #define DS_ALIGNMENT (1 << 3)
#define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */
#define BTS_CONTROL \ /* Number of buffer pointers in DS: */
(ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ #define NUM_DS_PTR_FIELDS 8
ds_cfg.ctl[dsf_bts_overflow])
/* Size of a pebs reset value in DS: */
#define PEBS_RESET_FIELD_SIZE 8
/* Mask of control bits in the DS MSR register: */
#define BTS_CONTROL \
( ds_cfg.ctl[dsf_bts] | \
ds_cfg.ctl[dsf_bts_kernel] | \
ds_cfg.ctl[dsf_bts_user] | \
ds_cfg.ctl[dsf_bts_overflow] )
/* /*
* A BTS or PEBS tracer. * A BTS or PEBS tracer.
...@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); ...@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
* to identify tracers. * to identify tracers.
*/ */
struct ds_tracer { struct ds_tracer {
/* the DS context (partially) owned by this tracer */ /* The DS context (partially) owned by this tracer. */
struct ds_context *context; struct ds_context *context;
/* the buffer provided on ds_request() and its size in bytes */ /* The buffer provided on ds_request() and its size in bytes. */
void *buffer; void *buffer;
size_t size; size_t size;
}; };
struct bts_tracer { struct bts_tracer {
/* the common DS part */ /* The common DS part: */
struct ds_tracer ds; struct ds_tracer ds;
/* the trace including the DS configuration */
struct bts_trace trace; /* The trace including the DS configuration: */
/* buffer overflow notification function */ struct bts_trace trace;
bts_ovfl_callback_t ovfl;
/* Buffer overflow notification function: */
bts_ovfl_callback_t ovfl;
/* Active flags affecting trace collection. */
unsigned int flags;
}; };
struct pebs_tracer { struct pebs_tracer {
/* the common DS part */ /* The common DS part: */
struct ds_tracer ds; struct ds_tracer ds;
/* the trace including the DS configuration */
struct pebs_trace trace; /* The trace including the DS configuration: */
/* buffer overflow notification function */ struct pebs_trace trace;
pebs_ovfl_callback_t ovfl;
/* Buffer overflow notification function: */
pebs_ovfl_callback_t ovfl;
}; };
/* /*
...@@ -97,6 +120,7 @@ struct pebs_tracer { ...@@ -97,6 +120,7 @@ struct pebs_tracer {
* *
* The DS configuration consists of the following fields; different * The DS configuration consists of the following fields; different
* architetures vary in the size of those fields. * architetures vary in the size of those fields.
*
* - double-word aligned base linear address of the BTS buffer * - double-word aligned base linear address of the BTS buffer
* - write pointer into the BTS buffer * - write pointer into the BTS buffer
* - end linear address of the BTS buffer (one byte beyond the end of * - end linear address of the BTS buffer (one byte beyond the end of
...@@ -135,21 +159,22 @@ enum ds_field { ...@@ -135,21 +159,22 @@ enum ds_field {
}; };
enum ds_qualifier { enum ds_qualifier {
ds_bts = 0, ds_bts = 0,
ds_pebs ds_pebs
}; };
static inline unsigned long ds_get(const unsigned char *base, static inline unsigned long
enum ds_qualifier qual, enum ds_field field) ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
{ {
base += (ds_cfg.sizeof_field * (field + (4 * qual))); base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
return *(unsigned long *)base; return *(unsigned long *)base;
} }
static inline void ds_set(unsigned char *base, enum ds_qualifier qual, static inline void
enum ds_field field, unsigned long value) ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
unsigned long value)
{ {
base += (ds_cfg.sizeof_field * (field + (4 * qual))); base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
(*(unsigned long *)base) = value; (*(unsigned long *)base) = value;
} }
...@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, ...@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
*/ */
static DEFINE_SPINLOCK(ds_lock); static DEFINE_SPINLOCK(ds_lock);
/* /*
* We either support (system-wide) per-cpu or per-thread allocation. * We either support (system-wide) per-cpu or per-thread allocation.
* We distinguish the two based on the task_struct pointer, where a * We distinguish the two based on the task_struct pointer, where a
...@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock); ...@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock);
*/ */
static atomic_t tracers = ATOMIC_INIT(0); static atomic_t tracers = ATOMIC_INIT(0);
static inline void get_tracer(struct task_struct *task) static inline int get_tracer(struct task_struct *task)
{ {
if (task) int error;
spin_lock_irq(&ds_lock);
if (task) {
error = -EPERM;
if (atomic_read(&tracers) < 0)
goto out;
atomic_inc(&tracers); atomic_inc(&tracers);
else } else {
error = -EPERM;
if (atomic_read(&tracers) > 0)
goto out;
atomic_dec(&tracers); atomic_dec(&tracers);
}
error = 0;
out:
spin_unlock_irq(&ds_lock);
return error;
} }
static inline void put_tracer(struct task_struct *task) static inline void put_tracer(struct task_struct *task)
...@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task) ...@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task)
atomic_inc(&tracers); atomic_inc(&tracers);
} }
static inline int check_tracer(struct task_struct *task)
{
return task ?
(atomic_read(&tracers) >= 0) :
(atomic_read(&tracers) <= 0);
}
/* /*
* The DS context is either attached to a thread or to a cpu: * The DS context is either attached to a thread or to a cpu:
* - in the former case, the thread_struct contains a pointer to the * - in the former case, the thread_struct contains a pointer to the
...@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task) ...@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task)
* deallocated when the last user puts the context. * deallocated when the last user puts the context.
*/ */
struct ds_context { struct ds_context {
/* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ /* The DS configuration; goes into MSR_IA32_DS_AREA: */
unsigned char ds[MAX_SIZEOF_DS]; unsigned char ds[MAX_SIZEOF_DS];
/* the owner of the BTS and PEBS configuration, respectively */
struct bts_tracer *bts_master; /* The owner of the BTS and PEBS configuration, respectively: */
struct pebs_tracer *pebs_master; struct bts_tracer *bts_master;
/* use count */ struct pebs_tracer *pebs_master;
unsigned long count;
/* a pointer to the context location inside the thread_struct
* or the per_cpu context array */
struct ds_context **this;
/* a pointer to the task owning this context, or NULL, if the
* context is owned by a cpu */
struct task_struct *task;
};
static DEFINE_PER_CPU(struct ds_context *, system_context_array); /* Use count: */
unsigned long count;
#define system_context per_cpu(system_context_array, smp_processor_id()) /* Pointer to the context pointer field: */
struct ds_context **this;
/* The traced task; NULL for cpu tracing: */
struct task_struct *task;
/* The traced cpu; only valid if task is NULL: */
int cpu;
};
static DEFINE_PER_CPU(struct ds_context *, cpu_context);
static inline struct ds_context *ds_get_context(struct task_struct *task)
static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
{ {
struct ds_context **p_context = struct ds_context **p_context =
(task ? &task->thread.ds_ctx : &system_context); (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
struct ds_context *context = NULL; struct ds_context *context = NULL;
struct ds_context *new_context = NULL; struct ds_context *new_context = NULL;
unsigned long irq;
/* Chances are small that we already have a context. */ /* Chances are small that we already have a context. */
new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
if (!new_context) if (!new_context)
return NULL; return NULL;
spin_lock_irqsave(&ds_lock, irq); spin_lock_irq(&ds_lock);
context = *p_context; context = *p_context;
if (!context) { if (likely(!context)) {
context = new_context; context = new_context;
context->this = p_context; context->this = p_context;
context->task = task; context->task = task;
context->cpu = cpu;
context->count = 0; context->count = 0;
if (task)
set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
if (!task || (task == current))
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
*p_context = context; *p_context = context;
} }
context->count++; context->count++;
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
if (context != new_context) if (context != new_context)
kfree(new_context); kfree(new_context);
...@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) ...@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
return context; return context;
} }
static inline void ds_put_context(struct ds_context *context) static void ds_put_context(struct ds_context *context)
{ {
struct task_struct *task;
unsigned long irq; unsigned long irq;
if (!context) if (!context)
...@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context) ...@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context)
*(context->this) = NULL; *(context->this) = NULL;
if (context->task) task = context->task;
clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
if (task)
clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
if (!context->task || (context->task == current)) /*
wrmsrl(MSR_IA32_DS_AREA, 0); * We leave the (now dangling) pointer to the DS configuration in
* the DS_AREA msr. This is as good or as bad as replacing it with
* NULL - the hardware would crash if we enabled tracing.
*
* This saves us some problems with having to write an msr on a
* different cpu while preventing others from doing the same for the
* next context for that same cpu.
*/
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irqrestore(&ds_lock, irq);
/* The context might still be in use for context switching. */
if (task && (task != current))
wait_task_context_switch(task);
kfree(context); kfree(context);
} }
static void ds_install_ds_area(struct ds_context *context)
{
unsigned long ds;
ds = (unsigned long)context->ds;
/*
* There is a race between the bts master and the pebs master.
*
* The thread/cpu access is synchronized via get/put_cpu() for
* task tracing and via wrmsr_on_cpu for cpu tracing.
*
* If bts and pebs are collected for the same task or same cpu,
* the same confiuration is written twice.
*/
if (context->task) {
get_cpu();
if (context->task == current)
wrmsrl(MSR_IA32_DS_AREA, ds);
set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
put_cpu();
} else
wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
(u32)((u64)ds), (u32)((u64)ds >> 32));
}
/* /*
* Call the tracer's callback on a buffer overflow. * Call the tracer's callback on a buffer overflow.
...@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) ...@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
* The remainder of any partially written record is zeroed out. * The remainder of any partially written record is zeroed out.
* *
* context: the DS context * context: the DS context
* qual: the buffer type * qual: the buffer type
* record: the data to write * record: the data to write
* size: the size of the data * size: the size of the data
*/ */
static int ds_write(struct ds_context *context, enum ds_qualifier qual, static int ds_write(struct ds_context *context, enum ds_qualifier qual,
const void *record, size_t size) const void *record, size_t size)
...@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, ...@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
unsigned long write_size, adj_write_size; unsigned long write_size, adj_write_size;
/* /*
* write as much as possible without producing an * Write as much as possible without producing an
* overflow interrupt. * overflow interrupt.
* *
* interrupt_threshold must either be * Interrupt_threshold must either be
* - bigger than absolute_maximum or * - bigger than absolute_maximum or
* - point to a record between buffer_base and absolute_maximum * - point to a record between buffer_base and absolute_maximum
* *
* index points to a valid record. * Index points to a valid record.
*/ */
base = ds_get(context->ds, qual, ds_buffer_base); base = ds_get(context->ds, qual, ds_buffer_base);
index = ds_get(context->ds, qual, ds_index); index = ds_get(context->ds, qual, ds_index);
...@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, ...@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
write_end = min(end, int_th); write_end = min(end, int_th);
/* if we are already beyond the interrupt threshold, /*
* we fill the entire buffer */ * If we are already beyond the interrupt threshold,
* we fill the entire buffer.
*/
if (write_end <= index) if (write_end <= index)
write_end = end; write_end = end;
...@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, ...@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
adj_write_size *= ds_cfg.sizeof_rec[qual]; adj_write_size *= ds_cfg.sizeof_rec[qual];
/* zero out trailing bytes */ /* Zero out trailing bytes. */
memset((char *)index + write_size, 0, memset((char *)index + write_size, 0,
adj_write_size - write_size); adj_write_size - write_size);
index += adj_write_size; index += adj_write_size;
...@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, ...@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
* Later architectures use 64bit pointers throughout, whereas earlier * Later architectures use 64bit pointers throughout, whereas earlier
* architectures use 32bit pointers in 32bit mode. * architectures use 32bit pointers in 32bit mode.
* *
* We compute the base address for the first 8 fields based on: * We compute the base address for the fields based on:
* - the field size stored in the DS configuration * - the field size stored in the DS configuration
* - the relative field position * - the relative field position
* *
...@@ -431,23 +501,23 @@ enum bts_field { ...@@ -431,23 +501,23 @@ enum bts_field {
bts_to, bts_to,
bts_flags, bts_flags,
bts_qual = bts_from, bts_qual = bts_from,
bts_jiffies = bts_to, bts_clock = bts_to,
bts_pid = bts_flags, bts_pid = bts_flags,
bts_qual_mask = (bts_qual_max - 1), bts_qual_mask = (bts_qual_max - 1),
bts_escape = ((unsigned long)-1 & ~bts_qual_mask) bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
}; };
static inline unsigned long bts_get(const char *base, enum bts_field field) static inline unsigned long bts_get(const char *base, enum bts_field field)
{ {
base += (ds_cfg.sizeof_field * field); base += (ds_cfg.sizeof_ptr_field * field);
return *(unsigned long *)base; return *(unsigned long *)base;
} }
static inline void bts_set(char *base, enum bts_field field, unsigned long val) static inline void bts_set(char *base, enum bts_field field, unsigned long val)
{ {
base += (ds_cfg.sizeof_field * field);; base += (ds_cfg.sizeof_ptr_field * field);;
(*(unsigned long *)base) = val; (*(unsigned long *)base) = val;
} }
...@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) ...@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
* *
* return: bytes read/written on success; -Eerrno, otherwise * return: bytes read/written on success; -Eerrno, otherwise
*/ */
static int bts_read(struct bts_tracer *tracer, const void *at, static int
struct bts_struct *out) bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
{ {
if (!tracer) if (!tracer)
return -EINVAL; return -EINVAL;
...@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at, ...@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at,
memset(out, 0, sizeof(*out)); memset(out, 0, sizeof(*out));
if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); out->variant.event.clock = bts_get(at, bts_clock);
out->variant.timestamp.pid = bts_get(at, bts_pid); out->variant.event.pid = bts_get(at, bts_pid);
} else { } else {
out->qualifier = bts_branch; out->qualifier = bts_branch;
out->variant.lbr.from = bts_get(at, bts_from); out->variant.lbr.from = bts_get(at, bts_from);
...@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) ...@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
case bts_task_arrives: case bts_task_arrives:
case bts_task_departs: case bts_task_departs:
bts_set(raw, bts_qual, (bts_escape | in->qualifier)); bts_set(raw, bts_qual, (bts_escape | in->qualifier));
bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); bts_set(raw, bts_clock, in->variant.event.clock);
bts_set(raw, bts_pid, in->variant.timestamp.pid); bts_set(raw, bts_pid, in->variant.event.pid);
break; break;
default: default:
return -EINVAL; return -EINVAL;
...@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, ...@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
unsigned int flags) { unsigned int flags) {
unsigned long buffer, adj; unsigned long buffer, adj;
/* adjust the buffer address and size to meet alignment /*
* Adjust the buffer address and size to meet alignment
* constraints: * constraints:
* - buffer is double-word aligned * - buffer is double-word aligned
* - size is multiple of record size * - size is multiple of record size
...@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, ...@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
trace->begin = (void *)buffer; trace->begin = (void *)buffer;
trace->top = trace->begin; trace->top = trace->begin;
trace->end = (void *)(buffer + size); trace->end = (void *)(buffer + size);
/* The value for 'no threshold' is -1, which will set the /*
* The value for 'no threshold' is -1, which will set the
* threshold outside of the buffer, just like we want it. * threshold outside of the buffer, just like we want it.
*/ */
ith *= ds_cfg.sizeof_rec[qual];
trace->ith = (void *)(buffer + size - ith); trace->ith = (void *)(buffer + size - ith);
trace->flags = flags; trace->flags = flags;
...@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, ...@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
enum ds_qualifier qual, struct task_struct *task, enum ds_qualifier qual, struct task_struct *task,
void *base, size_t size, size_t th, unsigned int flags) int cpu, void *base, size_t size, size_t th)
{ {
struct ds_context *context; struct ds_context *context;
int error; int error;
size_t req_size;
error = -EOPNOTSUPP;
if (!ds_cfg.sizeof_rec[qual])
goto out;
error = -EINVAL; error = -EINVAL;
if (!base) if (!base)
goto out; goto out;
/* we require some space to do alignment adjustments below */ req_size = ds_cfg.sizeof_rec[qual];
/* We might need space for alignment adjustments. */
if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
req_size += DS_ALIGNMENT;
error = -EINVAL; error = -EINVAL;
if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) if (size < req_size)
goto out; goto out;
if (th != (size_t)-1) { if (th != (size_t)-1) {
...@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, ...@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
tracer->size = size; tracer->size = size;
error = -ENOMEM; error = -ENOMEM;
context = ds_get_context(task); context = ds_get_context(task, cpu);
if (!context) if (!context)
goto out; goto out;
tracer->context = context; tracer->context = context;
ds_init_ds_trace(trace, qual, base, size, th, flags); /*
* Defer any tracer-specific initialization work for the context until
* context ownership has been clarified.
*/
error = 0; error = 0;
out: out:
return error; return error;
} }
struct bts_tracer *ds_request_bts(struct task_struct *task, static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
void *base, size_t size, void *base, size_t size,
bts_ovfl_callback_t ovfl, size_t th, bts_ovfl_callback_t ovfl, size_t th,
unsigned int flags) unsigned int flags)
{ {
struct bts_tracer *tracer; struct bts_tracer *tracer;
unsigned long irq;
int error; int error;
/* Buffer overflow notification is not yet implemented. */
error = -EOPNOTSUPP; error = -EOPNOTSUPP;
if (!ds_cfg.ctl[dsf_bts]) if (ovfl)
goto out; goto out;
/* buffer overflow notification is not yet implemented */ error = get_tracer(task);
error = -EOPNOTSUPP; if (error < 0)
if (ovfl)
goto out; goto out;
error = -ENOMEM; error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer) if (!tracer)
goto out; goto out_put_tracer;
tracer->ovfl = ovfl; tracer->ovfl = ovfl;
/* Do some more error checking and acquire a tracing context. */
error = ds_request(&tracer->ds, &tracer->trace.ds, error = ds_request(&tracer->ds, &tracer->trace.ds,
ds_bts, task, base, size, th, flags); ds_bts, task, cpu, base, size, th);
if (error < 0) if (error < 0)
goto out_tracer; goto out_tracer;
/* Claim the bts part of the tracing context we acquired above. */
spin_lock_irqsave(&ds_lock, irq); spin_lock_irq(&ds_lock);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
get_tracer(task);
error = -EPERM; error = -EPERM;
if (tracer->ds.context->bts_master) if (tracer->ds.context->bts_master)
goto out_put_tracer; goto out_unlock;
tracer->ds.context->bts_master = tracer; tracer->ds.context->bts_master = tracer;
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
/*
* Now that we own the bts part of the context, let's complete the
* initialization for that part.
*/
ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
ds_install_ds_area(tracer->ds.context);
tracer->trace.read = bts_read; tracer->trace.read = bts_read;
tracer->trace.write = bts_write; tracer->trace.write = bts_write;
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); /* Start tracing. */
ds_resume_bts(tracer); ds_resume_bts(tracer);
return tracer; return tracer;
out_put_tracer:
put_tracer(task);
out_unlock: out_unlock:
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
ds_put_context(tracer->ds.context); ds_put_context(tracer->ds.context);
out_tracer: out_tracer:
kfree(tracer); kfree(tracer);
out_put_tracer:
put_tracer(task);
out: out:
return ERR_PTR(error); return ERR_PTR(error);
} }
struct pebs_tracer *ds_request_pebs(struct task_struct *task, struct bts_tracer *ds_request_bts_task(struct task_struct *task,
void *base, size_t size, void *base, size_t size,
pebs_ovfl_callback_t ovfl, size_t th, bts_ovfl_callback_t ovfl,
unsigned int flags) size_t th, unsigned int flags)
{
return ds_request_bts(task, 0, base, size, ovfl, th, flags);
}
struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
bts_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
}
static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
void *base, size_t size,
pebs_ovfl_callback_t ovfl, size_t th,
unsigned int flags)
{ {
struct pebs_tracer *tracer; struct pebs_tracer *tracer;
unsigned long irq;
int error; int error;
/* buffer overflow notification is not yet implemented */ /* Buffer overflow notification is not yet implemented. */
error = -EOPNOTSUPP; error = -EOPNOTSUPP;
if (ovfl) if (ovfl)
goto out; goto out;
error = get_tracer(task);
if (error < 0)
goto out;
error = -ENOMEM; error = -ENOMEM;
tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
if (!tracer) if (!tracer)
goto out; goto out_put_tracer;
tracer->ovfl = ovfl; tracer->ovfl = ovfl;
/* Do some more error checking and acquire a tracing context. */
error = ds_request(&tracer->ds, &tracer->trace.ds, error = ds_request(&tracer->ds, &tracer->trace.ds,
ds_pebs, task, base, size, th, flags); ds_pebs, task, cpu, base, size, th);
if (error < 0) if (error < 0)
goto out_tracer; goto out_tracer;
spin_lock_irqsave(&ds_lock, irq); /* Claim the pebs part of the tracing context we acquired above. */
spin_lock_irq(&ds_lock);
error = -EPERM;
if (!check_tracer(task))
goto out_unlock;
get_tracer(task);
error = -EPERM; error = -EPERM;
if (tracer->ds.context->pebs_master) if (tracer->ds.context->pebs_master)
goto out_put_tracer; goto out_unlock;
tracer->ds.context->pebs_master = tracer; tracer->ds.context->pebs_master = tracer;
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
/*
* Now that we own the pebs part of the context, let's complete the
* initialization for that part.
*/
ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
ds_install_ds_area(tracer->ds.context);
/* Start tracing. */
ds_resume_pebs(tracer); ds_resume_pebs(tracer);
return tracer; return tracer;
out_put_tracer:
put_tracer(task);
out_unlock: out_unlock:
spin_unlock_irqrestore(&ds_lock, irq); spin_unlock_irq(&ds_lock);
ds_put_context(tracer->ds.context); ds_put_context(tracer->ds.context);
out_tracer: out_tracer:
kfree(tracer); kfree(tracer);
out_put_tracer:
put_tracer(task);
out: out:
return ERR_PTR(error); return ERR_PTR(error);
} }
void ds_release_bts(struct bts_tracer *tracer) struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{ {
if (!tracer) return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
return; }
ds_suspend_bts(tracer); struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
pebs_ovfl_callback_t ovfl,
size_t th, unsigned int flags)
{
return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
}
static void ds_free_bts(struct bts_tracer *tracer)
{
struct task_struct *task;
task = tracer->ds.context->task;
WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
tracer->ds.context->bts_master = NULL; tracer->ds.context->bts_master = NULL;
put_tracer(tracer->ds.context->task); /* Make sure tracing stopped and the tracer is not in use. */
if (task && (task != current))
wait_task_context_switch(task);
ds_put_context(tracer->ds.context); ds_put_context(tracer->ds.context);
put_tracer(task);
kfree(tracer); kfree(tracer);
} }
void ds_release_bts(struct bts_tracer *tracer)
{
might_sleep();
if (!tracer)
return;
ds_suspend_bts(tracer);
ds_free_bts(tracer);
}
int ds_release_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long irq;
int error;
if (!tracer)
return 0;
task = tracer->ds.context->task;
local_irq_save(irq);
error = -EPERM;
if (!task &&
(tracer->ds.context->cpu != smp_processor_id()))
goto out;
error = -EPERM;
if (task && (task != current))
goto out;
ds_suspend_bts_noirq(tracer);
ds_free_bts(tracer);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static void update_task_debugctlmsr(struct task_struct *task,
unsigned long debugctlmsr)
{
task->thread.debugctlmsr = debugctlmsr;
get_cpu();
if (task == current)
update_debugctlmsr(debugctlmsr);
put_cpu();
}
void ds_suspend_bts(struct bts_tracer *tracer) void ds_suspend_bts(struct bts_tracer *tracer)
{ {
struct task_struct *task; struct task_struct *task;
unsigned long debugctlmsr;
int cpu;
if (!tracer) if (!tracer)
return; return;
tracer->flags = 0;
task = tracer->ds.context->task; task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
if (!task || (task == current)) WARN_ON(!task && irqs_disabled());
update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
if (task) { debugctlmsr = (task ?
task->thread.debugctlmsr &= ~BTS_CONTROL; task->thread.debugctlmsr :
get_debugctlmsr_on_cpu(cpu));
debugctlmsr &= ~BTS_CONTROL;
if (!task->thread.debugctlmsr) if (task)
clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); update_task_debugctlmsr(task, debugctlmsr);
} else
update_debugctlmsr_on_cpu(cpu, debugctlmsr);
} }
void ds_resume_bts(struct bts_tracer *tracer) int ds_suspend_bts_noirq(struct bts_tracer *tracer)
{ {
struct task_struct *task; struct task_struct *task;
unsigned long control; unsigned long debugctlmsr, irq;
int cpu, error = 0;
if (!tracer) if (!tracer)
return; return 0;
tracer->flags = 0;
task = tracer->ds.context->task; task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
local_irq_save(irq);
error = -EPERM;
if (!task && (cpu != smp_processor_id()))
goto out;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr());
debugctlmsr &= ~BTS_CONTROL;
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr(debugctlmsr);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static unsigned long ds_bts_control(struct bts_tracer *tracer)
{
unsigned long control;
control = ds_cfg.ctl[dsf_bts]; control = ds_cfg.ctl[dsf_bts];
if (!(tracer->trace.ds.flags & BTS_KERNEL)) if (!(tracer->trace.ds.flags & BTS_KERNEL))
...@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer) ...@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer)
if (!(tracer->trace.ds.flags & BTS_USER)) if (!(tracer->trace.ds.flags & BTS_USER))
control |= ds_cfg.ctl[dsf_bts_user]; control |= ds_cfg.ctl[dsf_bts_user];
if (task) { return control;
task->thread.debugctlmsr |= control;
set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
}
if (!task || (task == current))
update_debugctlmsr(get_debugctlmsr() | control);
} }
void ds_release_pebs(struct pebs_tracer *tracer) void ds_resume_bts(struct bts_tracer *tracer)
{ {
struct task_struct *task;
unsigned long debugctlmsr;
int cpu;
if (!tracer) if (!tracer)
return; return;
ds_suspend_pebs(tracer); tracer->flags = tracer->trace.ds.flags;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
WARN_ON(!task && irqs_disabled());
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr_on_cpu(cpu));
debugctlmsr |= ds_bts_control(tracer);
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr_on_cpu(cpu, debugctlmsr);
}
int ds_resume_bts_noirq(struct bts_tracer *tracer)
{
struct task_struct *task;
unsigned long debugctlmsr, irq;
int cpu, error = 0;
if (!tracer)
return 0;
tracer->flags = tracer->trace.ds.flags;
task = tracer->ds.context->task;
cpu = tracer->ds.context->cpu;
local_irq_save(irq);
error = -EPERM;
if (!task && (cpu != smp_processor_id()))
goto out;
debugctlmsr = (task ?
task->thread.debugctlmsr :
get_debugctlmsr());
debugctlmsr |= ds_bts_control(tracer);
if (task)
update_task_debugctlmsr(task, debugctlmsr);
else
update_debugctlmsr(debugctlmsr);
error = 0;
out:
local_irq_restore(irq);
return error;
}
static void ds_free_pebs(struct pebs_tracer *tracer)
{
struct task_struct *task;
task = tracer->ds.context->task;
WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
tracer->ds.context->pebs_master = NULL; tracer->ds.context->pebs_master = NULL;
put_tracer(tracer->ds.context->task);
ds_put_context(tracer->ds.context); ds_put_context(tracer->ds.context);
put_tracer(task);
kfree(tracer); kfree(tracer);
} }
void ds_release_pebs(struct pebs_tracer *tracer)
{
might_sleep();
if (!tracer)
return;
ds_suspend_pebs(tracer);
ds_free_pebs(tracer);
}
int ds_release_pebs_noirq(struct pebs_tracer *tracer)
{
struct task_struct *task;
unsigned long irq;
int error;
if (!tracer)
return 0;
task = tracer->ds.context->task;
local_irq_save(irq);
error = -EPERM;
if (!task &&
(tracer->ds.context->cpu != smp_processor_id()))
goto out;
error = -EPERM;
if (task && (task != current))
goto out;
ds_suspend_pebs_noirq(tracer);
ds_free_pebs(tracer);
error = 0;
out:
local_irq_restore(irq);
return error;
}
void ds_suspend_pebs(struct pebs_tracer *tracer) void ds_suspend_pebs(struct pebs_tracer *tracer)
{ {
} }
int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
{
return 0;
}
void ds_resume_pebs(struct pebs_tracer *tracer) void ds_resume_pebs(struct pebs_tracer *tracer)
{ {
} }
int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
{
return 0;
}
const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
{ {
if (!tracer) if (!tracer)
...@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) ...@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
return NULL; return NULL;
ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
tracer->trace.reset_value =
*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); tracer->trace.counters = ds_cfg.nr_counter_reset;
memcpy(tracer->trace.counter_reset,
tracer->ds.context->ds +
(NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
return &tracer->trace; return &tracer->trace;
} }
...@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer) ...@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
tracer->trace.ds.top = tracer->trace.ds.begin; tracer->trace.ds.top = tracer->trace.ds.begin;
ds_set(tracer->ds.context->ds, ds_bts, ds_index, ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
(unsigned long)tracer->trace.ds.top); (unsigned long)tracer->trace.ds.top);
return 0; return 0;
} }
int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) int ds_set_pebs_reset(struct pebs_tracer *tracer,
unsigned int counter, u64 value)
{ {
if (!tracer) if (!tracer)
return -EINVAL; return -EINVAL;
*(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; if (ds_cfg.nr_counter_reset < counter)
return -EINVAL;
*(u64 *)(tracer->ds.context->ds +
(NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
(counter * PEBS_RESET_FIELD_SIZE)) = value;
return 0; return 0;
} }
...@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = { ...@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = {
.ctl[dsf_bts] = (1 << 2) | (1 << 3), .ctl[dsf_bts] = (1 << 2) | (1 << 3),
.ctl[dsf_bts_kernel] = (1 << 5), .ctl[dsf_bts_kernel] = (1 << 5),
.ctl[dsf_bts_user] = (1 << 6), .ctl[dsf_bts_user] = (1 << 6),
.nr_counter_reset = 1,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10,
#else
.sizeof_rec[ds_pebs] = sizeof(long) * 18,
#endif
}; };
static const struct ds_configuration ds_cfg_pentium_m = { static const struct ds_configuration ds_cfg_pentium_m = {
.name = "Pentium M", .name = "Pentium M",
.ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts] = (1 << 6) | (1 << 7),
.nr_counter_reset = 1,
.sizeof_field = sizeof(long),
.sizeof_rec[ds_bts] = sizeof(long) * 3,
#ifdef __i386__
.sizeof_rec[ds_pebs] = sizeof(long) * 10,
#else
.sizeof_rec[ds_pebs] = sizeof(long) * 18,
#endif
}; };
static const struct ds_configuration ds_cfg_core2_atom = { static const struct ds_configuration ds_cfg_core2_atom = {
.name = "Core 2/Atom", .name = "Core 2/Atom",
.ctl[dsf_bts] = (1 << 6) | (1 << 7), .ctl[dsf_bts] = (1 << 6) | (1 << 7),
.ctl[dsf_bts_kernel] = (1 << 9), .ctl[dsf_bts_kernel] = (1 << 9),
.ctl[dsf_bts_user] = (1 << 10), .ctl[dsf_bts_user] = (1 << 10),
.nr_counter_reset = 1,
.sizeof_field = 8, };
.sizeof_rec[ds_bts] = 8 * 3, static const struct ds_configuration ds_cfg_core_i7 = {
.sizeof_rec[ds_pebs] = 8 * 18, .name = "Core i7",
.ctl[dsf_bts] = (1 << 6) | (1 << 7),
.ctl[dsf_bts_kernel] = (1 << 9),
.ctl[dsf_bts_user] = (1 << 10),
.nr_counter_reset = 4,
}; };
static void static void
ds_configure(const struct ds_configuration *cfg) ds_configure(const struct ds_configuration *cfg,
struct cpuinfo_x86 *cpu)
{ {
unsigned long nr_pebs_fields = 0;
printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
#ifdef __i386__
nr_pebs_fields = 10;
#else
nr_pebs_fields = 18;
#endif
/*
* Starting with version 2, architectural performance
* monitoring supports a format specifier.
*/
if ((cpuid_eax(0xa) & 0xff) > 1) {
unsigned long perf_capabilities, format;
rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
format = (perf_capabilities >> 8) & 0xf;
switch (format) {
case 0:
nr_pebs_fields = 18;
break;
case 1:
nr_pebs_fields = 22;
break;
default:
printk(KERN_INFO
"[ds] unknown PEBS format: %lu\n", format);
nr_pebs_fields = 0;
break;
}
}
memset(&ds_cfg, 0, sizeof(ds_cfg)); memset(&ds_cfg, 0, sizeof(ds_cfg));
ds_cfg = *cfg; ds_cfg = *cfg;
printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); ds_cfg.sizeof_ptr_field =
(cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3;
ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
if (!cpu_has_bts) { if (!cpu_has(cpu, X86_FEATURE_BTS)) {
ds_cfg.ctl[dsf_bts] = 0; ds_cfg.sizeof_rec[ds_bts] = 0;
printk(KERN_INFO "[ds] bts not available\n"); printk(KERN_INFO "[ds] bts not available\n");
} }
if (!cpu_has_pebs) if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
ds_cfg.sizeof_rec[ds_pebs] = 0;
printk(KERN_INFO "[ds] pebs not available\n"); printk(KERN_INFO "[ds] pebs not available\n");
}
printk(KERN_INFO "[ds] sizes: address: %u bit, ",
8 * ds_cfg.sizeof_ptr_field);
printk("bts/pebs record: %u/%u bytes\n",
ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
} }
void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
{ {
/* Only configure the first cpu. Others are identical. */
if (ds_cfg.name)
return;
switch (c->x86) { switch (c->x86) {
case 0x6: case 0x6:
switch (c->x86_model) { switch (c->x86_model) {
case 0x9: case 0x9:
case 0xd: /* Pentium M */ case 0xd: /* Pentium M */
ds_configure(&ds_cfg_pentium_m); ds_configure(&ds_cfg_pentium_m, c);
break; break;
case 0xf: case 0xf:
case 0x17: /* Core2 */ case 0x17: /* Core2 */
case 0x1c: /* Atom */ case 0x1c: /* Atom */
ds_configure(&ds_cfg_core2_atom); ds_configure(&ds_cfg_core2_atom, c);
break;
case 0x1a: /* Core i7 */
ds_configure(&ds_cfg_core_i7, c);
break; break;
case 0x1a: /* i7 */
default: default:
/* sorry, don't know about them */ /* Sorry, don't know about them. */
break; break;
} }
break; break;
...@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) ...@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
case 0x0: case 0x0:
case 0x1: case 0x1:
case 0x2: /* Netburst */ case 0x2: /* Netburst */
ds_configure(&ds_cfg_netburst); ds_configure(&ds_cfg_netburst, c);
break; break;
default: default:
/* sorry, don't know about them */ /* Sorry, don't know about them. */
break; break;
} }
break; break;
default: default:
/* sorry, don't know about them */ /* Sorry, don't know about them. */
break; break;
} }
} }
static inline void ds_take_timestamp(struct ds_context *context,
enum bts_qualifier qualifier,
struct task_struct *task)
{
struct bts_tracer *tracer = context->bts_master;
struct bts_struct ts;
/* Prevent compilers from reading the tracer pointer twice. */
barrier();
if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
return;
memset(&ts, 0, sizeof(ts));
ts.qualifier = qualifier;
ts.variant.event.clock = trace_clock_global();
ts.variant.event.pid = task->pid;
bts_write(tracer, &ts);
}
/* /*
* Change the DS configuration from tracing prev to tracing next. * Change the DS configuration from tracing prev to tracing next.
*/ */
void ds_switch_to(struct task_struct *prev, struct task_struct *next) void ds_switch_to(struct task_struct *prev, struct task_struct *next)
{ {
struct ds_context *prev_ctx = prev->thread.ds_ctx; struct ds_context *prev_ctx = prev->thread.ds_ctx;
struct ds_context *next_ctx = next->thread.ds_ctx; struct ds_context *next_ctx = next->thread.ds_ctx;
unsigned long debugctlmsr = next->thread.debugctlmsr;
/* Make sure all data is read before we start. */
barrier();
if (prev_ctx) { if (prev_ctx) {
update_debugctlmsr(0); update_debugctlmsr(0);
if (prev_ctx->bts_master && ds_take_timestamp(prev_ctx, bts_task_departs, prev);
(prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
struct bts_struct ts = {
.qualifier = bts_task_departs,
.variant.timestamp.jiffies = jiffies_64,
.variant.timestamp.pid = prev->pid
};
bts_write(prev_ctx->bts_master, &ts);
}
} }
if (next_ctx) { if (next_ctx) {
if (next_ctx->bts_master && ds_take_timestamp(next_ctx, bts_task_arrives, next);
(next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
struct bts_struct ts = {
.qualifier = bts_task_arrives,
.variant.timestamp.jiffies = jiffies_64,
.variant.timestamp.pid = next->pid
};
bts_write(next_ctx->bts_master, &ts);
}
wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
} }
update_debugctlmsr(next->thread.debugctlmsr); update_debugctlmsr(debugctlmsr);
} }
void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) static __init int ds_selftest(void)
{ {
clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); if (ds_cfg.sizeof_rec[ds_bts]) {
tsk->thread.ds_ctx = NULL; int error;
}
void ds_exit_thread(struct task_struct *tsk) error = ds_selftest_bts();
{ if (error) {
WARN(1, "[ds] selftest failed. disabling bts.\n");
ds_cfg.sizeof_rec[ds_bts] = 0;
}
}
if (ds_cfg.sizeof_rec[ds_pebs]) {
int error;
error = ds_selftest_pebs();
if (error) {
WARN(1, "[ds] selftest failed. disabling pebs.\n");
ds_cfg.sizeof_rec[ds_pebs] = 0;
}
}
return 0;
} }
device_initcall(ds_selftest);
/*
* Debug Store support - selftest
*
*
* Copyright (C) 2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2009
*/
#include "ds_selftest.h"
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/ds.h>
#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */
#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */
struct ds_selftest_bts_conf {
struct bts_tracer *tracer;
int error;
int (*suspend)(struct bts_tracer *);
int (*resume)(struct bts_tracer *);
};
static int ds_selftest_bts_consistency(const struct bts_trace *trace)
{
int error = 0;
if (!trace) {
printk(KERN_CONT "failed to access trace...");
/* Bail out. Other tests are pointless. */
return -1;
}
if (!trace->read) {
printk(KERN_CONT "bts read not available...");
error = -1;
}
/* Do some sanity checks on the trace configuration. */
if (!trace->ds.n) {
printk(KERN_CONT "empty bts buffer...");
error = -1;
}
if (!trace->ds.size) {
printk(KERN_CONT "bad bts trace setup...");
error = -1;
}
if (trace->ds.end !=
(char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
printk(KERN_CONT "bad bts buffer setup...");
error = -1;
}
/*
* We allow top in [begin; end], since its not clear when the
* overflow adjustment happens: after the increment or before the
* write.
*/
if ((trace->ds.top < trace->ds.begin) ||
(trace->ds.end < trace->ds.top)) {
printk(KERN_CONT "bts top out of bounds...");
error = -1;
}
return error;
}
static int ds_selftest_bts_read(struct bts_tracer *tracer,
const struct bts_trace *trace,
const void *from, const void *to)
{
const unsigned char *at;
/*
* Check a few things which do not belong to this test.
* They should be covered by other tests.
*/
if (!trace)
return -1;
if (!trace->read)
return -1;
if (to < from)
return -1;
if (from < trace->ds.begin)
return -1;
if (trace->ds.end < to)
return -1;
if (!trace->ds.size)
return -1;
/* Now to the test itself. */
for (at = from; (void *)at < to; at += trace->ds.size) {
struct bts_struct bts;
unsigned long index;
int error;
if (((void *)at - trace->ds.begin) % trace->ds.size) {
printk(KERN_CONT
"read from non-integer index...");
return -1;
}
index = ((void *)at - trace->ds.begin) / trace->ds.size;
memset(&bts, 0, sizeof(bts));
error = trace->read(tracer, at, &bts);
if (error < 0) {
printk(KERN_CONT
"error reading bts trace at [%lu] (0x%p)...",
index, at);
return error;
}
switch (bts.qualifier) {
case BTS_BRANCH:
break;
default:
printk(KERN_CONT
"unexpected bts entry %llu at [%lu] (0x%p)...",
bts.qualifier, index, at);
return -1;
}
}
return 0;
}
static void ds_selftest_bts_cpu(void *arg)
{
struct ds_selftest_bts_conf *conf = arg;
const struct bts_trace *trace;
void *top;
if (IS_ERR(conf->tracer)) {
conf->error = PTR_ERR(conf->tracer);
conf->tracer = NULL;
printk(KERN_CONT
"initialization failed (err: %d)...", conf->error);
return;
}
/* We should meanwhile have enough trace. */
conf->error = conf->suspend(conf->tracer);
if (conf->error < 0)
return;
/* Let's see if we can access the trace. */
trace = ds_read_bts(conf->tracer);
conf->error = ds_selftest_bts_consistency(trace);
if (conf->error < 0)
return;
/* If everything went well, we should have a few trace entries. */
if (trace->ds.top == trace->ds.begin) {
/*
* It is possible but highly unlikely that we got a
* buffer overflow and end up at exactly the same
* position we started from.
* Let's issue a warning, but continue.
*/
printk(KERN_CONT "no trace/overflow...");
}
/* Let's try to read the trace we collected. */
conf->error =
ds_selftest_bts_read(conf->tracer, trace,
trace->ds.begin, trace->ds.top);
if (conf->error < 0)
return;
/*
* Let's read the trace again.
* Since we suspended tracing, we should get the same result.
*/
top = trace->ds.top;
trace = ds_read_bts(conf->tracer);
conf->error = ds_selftest_bts_consistency(trace);
if (conf->error < 0)
return;
if (top != trace->ds.top) {
printk(KERN_CONT "suspend not working...");
conf->error = -1;
return;
}
/* Let's collect some more trace - see if resume is working. */
conf->error = conf->resume(conf->tracer);
if (conf->error < 0)
return;
conf->error = conf->suspend(conf->tracer);
if (conf->error < 0)
return;
trace = ds_read_bts(conf->tracer);
conf->error = ds_selftest_bts_consistency(trace);
if (conf->error < 0)
return;
if (trace->ds.top == top) {
/*
* It is possible but highly unlikely that we got a
* buffer overflow and end up at exactly the same
* position we started from.
* Let's issue a warning and check the full trace.
*/
printk(KERN_CONT
"no resume progress/overflow...");
conf->error =
ds_selftest_bts_read(conf->tracer, trace,
trace->ds.begin, trace->ds.end);
} else if (trace->ds.top < top) {
/*
* We had a buffer overflow - the entire buffer should
* contain trace records.
*/
conf->error =
ds_selftest_bts_read(conf->tracer, trace,
trace->ds.begin, trace->ds.end);
} else {
/*
* It is quite likely that the buffer did not overflow.
* Let's just check the delta trace.
*/
conf->error =
ds_selftest_bts_read(conf->tracer, trace, top,
trace->ds.top);
}
if (conf->error < 0)
return;
conf->error = 0;
}
static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
{
ds_suspend_bts(tracer);
return 0;
}
static int ds_resume_bts_wrap(struct bts_tracer *tracer)
{
ds_resume_bts(tracer);
return 0;
}
static void ds_release_bts_noirq_wrap(void *tracer)
{
(void)ds_release_bts_noirq(tracer);
}
static int ds_selftest_bts_bad_release_noirq(int cpu,
struct bts_tracer *tracer)
{
int error = -EPERM;
/* Try to release the tracer on the wrong cpu. */
get_cpu();
if (cpu != smp_processor_id()) {
error = ds_release_bts_noirq(tracer);
if (error != -EPERM)
printk(KERN_CONT "release on wrong cpu...");
}
put_cpu();
return error ? 0 : -1;
}
static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
{
struct bts_tracer *tracer;
int error;
/* Try to request cpu tracing while task tracing is active. */
tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
(size_t)-1, BTS_KERNEL);
error = PTR_ERR(tracer);
if (!IS_ERR(tracer)) {
ds_release_bts(tracer);
error = 0;
}
if (error != -EPERM)
printk(KERN_CONT "cpu/task tracing overlap...");
return error ? 0 : -1;
}
static int ds_selftest_bts_bad_request_task(void *buffer)
{
struct bts_tracer *tracer;
int error;
/* Try to request cpu tracing while task tracing is active. */
tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
(size_t)-1, BTS_KERNEL);
error = PTR_ERR(tracer);
if (!IS_ERR(tracer)) {
error = 0;
ds_release_bts(tracer);
}
if (error != -EPERM)
printk(KERN_CONT "task/cpu tracing overlap...");
return error ? 0 : -1;
}
int ds_selftest_bts(void)
{
struct ds_selftest_bts_conf conf;
unsigned char buffer[BUFFER_SIZE], *small_buffer;
unsigned long irq;
int cpu;
printk(KERN_INFO "[ds] bts selftest...");
conf.error = 0;
small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
get_online_cpus();
for_each_online_cpu(cpu) {
conf.suspend = ds_suspend_bts_wrap;
conf.resume = ds_resume_bts_wrap;
conf.tracer =
ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
ds_selftest_bts_cpu(&conf);
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_task(buffer);
ds_release_bts(conf.tracer);
if (conf.error < 0)
goto out;
conf.suspend = ds_suspend_bts_noirq;
conf.resume = ds_resume_bts_noirq;
conf.tracer =
ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
if (conf.error >= 0) {
conf.error =
ds_selftest_bts_bad_release_noirq(cpu,
conf.tracer);
/* We must not release the tracer twice. */
if (conf.error < 0)
conf.tracer = NULL;
}
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_task(buffer);
smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
conf.tracer, 1);
if (conf.error < 0)
goto out;
}
conf.suspend = ds_suspend_bts_wrap;
conf.resume = ds_resume_bts_wrap;
conf.tracer =
ds_request_bts_task(current, buffer, BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
ds_selftest_bts_cpu(&conf);
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
ds_release_bts(conf.tracer);
if (conf.error < 0)
goto out;
conf.suspend = ds_suspend_bts_noirq;
conf.resume = ds_resume_bts_noirq;
conf.tracer =
ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
local_irq_save(irq);
ds_selftest_bts_cpu(&conf);
if (conf.error >= 0)
conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
ds_release_bts_noirq(conf.tracer);
local_irq_restore(irq);
if (conf.error < 0)
goto out;
conf.error = 0;
out:
put_online_cpus();
printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
return conf.error;
}
int ds_selftest_pebs(void)
{
return 0;
}
/*
* Debug Store support - selftest
*
*
* Copyright (C) 2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@intel.com>, 2009
*/
#ifdef CONFIG_X86_DS_SELFTEST
extern int ds_selftest_bts(void);
extern int ds_selftest_pebs(void);
#else
static inline int ds_selftest_bts(void) { return 0; }
static inline int ds_selftest_pebs(void) { return 0; }
#endif
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#include <asm/idle.h> #include <asm/idle.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/ds.h>
unsigned long idle_halt; unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt); EXPORT_SYMBOL(idle_halt);
...@@ -45,6 +46,8 @@ void free_thread_xstate(struct task_struct *tsk) ...@@ -45,6 +46,8 @@ void free_thread_xstate(struct task_struct *tsk)
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL; tsk->thread.xstate = NULL;
} }
WARN(tsk->thread.ds_ctx, "leaking DS context\n");
} }
void free_thread_info(struct thread_info *ti) void free_thread_info(struct thread_info *ti)
...@@ -83,8 +86,6 @@ void exit_thread(void) ...@@ -83,8 +86,6 @@ void exit_thread(void)
put_cpu(); put_cpu();
kfree(bp); kfree(bp);
} }
ds_exit_thread(current);
} }
void flush_thread(void) void flush_thread(void)
......
...@@ -290,7 +290,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -290,7 +290,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.io_bitmap_max = 0; p->thread.io_bitmap_max = 0;
} }
ds_copy_thread(p, current); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
p->thread.debugctlmsr = 0; p->thread.debugctlmsr = 0;
......
...@@ -335,7 +335,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -335,7 +335,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
goto out; goto out;
} }
ds_copy_thread(p, me); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
p->thread.ds_ctx = NULL;
clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
p->thread.debugctlmsr = 0; p->thread.debugctlmsr = 0;
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/seccomp.h> #include <linux/seccomp.h>
#include <linux/signal.h> #include <linux/signal.h>
#include <linux/workqueue.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
...@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target, ...@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target,
} }
#ifdef CONFIG_X86_PTRACE_BTS #ifdef CONFIG_X86_PTRACE_BTS
/*
* A branch trace store context.
*
* Contexts may only be installed by ptrace_bts_config() and only for
* ptraced tasks.
*
* Contexts are destroyed when the tracee is detached from the tracer.
* The actual destruction work requires interrupts enabled, so the
* work is deferred and will be scheduled during __ptrace_unlink().
*
* Contexts hold an additional task_struct reference on the traced
* task, as well as a reference on the tracer's mm.
*
* Ptrace already holds a task_struct for the duration of ptrace operations,
* but since destruction is deferred, it may be executed after both
* tracer and tracee exited.
*/
struct bts_context {
/* The branch trace handle. */
struct bts_tracer *tracer;
/* The buffer used to store the branch trace and its size. */
void *buffer;
unsigned int size;
/* The mm that paid for the above buffer. */
struct mm_struct *mm;
/* The task this context belongs to. */
struct task_struct *task;
/* The signal to send on a bts buffer overflow. */
unsigned int bts_ovfl_signal;
/* The work struct to destroy a context. */
struct work_struct work;
};
static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
{
void *buffer = NULL;
int err = -ENOMEM;
err = account_locked_memory(current->mm, current->signal->rlim, size);
if (err < 0)
return err;
buffer = kzalloc(size, GFP_KERNEL);
if (!buffer)
goto out_refund;
context->buffer = buffer;
context->size = size;
context->mm = get_task_mm(current);
return 0;
out_refund:
refund_locked_memory(current->mm, size);
return err;
}
static inline void free_bts_buffer(struct bts_context *context)
{
if (!context->buffer)
return;
kfree(context->buffer);
context->buffer = NULL;
refund_locked_memory(context->mm, context->size);
context->size = 0;
mmput(context->mm);
context->mm = NULL;
}
static void free_bts_context_work(struct work_struct *w)
{
struct bts_context *context;
context = container_of(w, struct bts_context, work);
ds_release_bts(context->tracer);
put_task_struct(context->task);
free_bts_buffer(context);
kfree(context);
}
static inline void free_bts_context(struct bts_context *context)
{
INIT_WORK(&context->work, free_bts_context_work);
schedule_work(&context->work);
}
static inline struct bts_context *alloc_bts_context(struct task_struct *task)
{
struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
if (context) {
context->task = task;
task->bts = context;
get_task_struct(task);
}
return context;
}
static int ptrace_bts_read_record(struct task_struct *child, size_t index, static int ptrace_bts_read_record(struct task_struct *child, size_t index,
struct bts_struct __user *out) struct bts_struct __user *out)
{ {
struct bts_context *context;
const struct bts_trace *trace; const struct bts_trace *trace;
struct bts_struct bts; struct bts_struct bts;
const unsigned char *at; const unsigned char *at;
int error; int error;
trace = ds_read_bts(child->bts); context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace) if (!trace)
return -EPERM; return -ESRCH;
at = trace->ds.top - ((index + 1) * trace->ds.size); at = trace->ds.top - ((index + 1) * trace->ds.size);
if ((void *)at < trace->ds.begin) if ((void *)at < trace->ds.begin)
...@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, ...@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
if (!trace->read) if (!trace->read)
return -EOPNOTSUPP; return -EOPNOTSUPP;
error = trace->read(child->bts, at, &bts); error = trace->read(context->tracer, at, &bts);
if (error < 0) if (error < 0)
return error; return error;
...@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child, ...@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child,
long size, long size,
struct bts_struct __user *out) struct bts_struct __user *out)
{ {
struct bts_context *context;
const struct bts_trace *trace; const struct bts_trace *trace;
const unsigned char *at; const unsigned char *at;
int error, drained = 0; int error, drained = 0;
trace = ds_read_bts(child->bts); context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace) if (!trace)
return -EPERM; return -ESRCH;
if (!trace->read) if (!trace->read)
return -EOPNOTSUPP; return -EOPNOTSUPP;
...@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child, ...@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child,
for (at = trace->ds.begin; (void *)at < trace->ds.top; for (at = trace->ds.begin; (void *)at < trace->ds.top;
out++, drained++, at += trace->ds.size) { out++, drained++, at += trace->ds.size) {
struct bts_struct bts; struct bts_struct bts;
int error;
error = trace->read(child->bts, at, &bts); error = trace->read(context->tracer, at, &bts);
if (error < 0) if (error < 0)
return error; return error;
...@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child, ...@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child,
memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
error = ds_reset_bts(child->bts); error = ds_reset_bts(context->tracer);
if (error < 0) if (error < 0)
return error; return error;
return drained; return drained;
} }
static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
{
child->bts_buffer = alloc_locked_buffer(size);
if (!child->bts_buffer)
return -ENOMEM;
child->bts_size = size;
return 0;
}
static void ptrace_bts_free_buffer(struct task_struct *child)
{
free_locked_buffer(child->bts_buffer, child->bts_size);
child->bts_buffer = NULL;
child->bts_size = 0;
}
static int ptrace_bts_config(struct task_struct *child, static int ptrace_bts_config(struct task_struct *child,
long cfg_size, long cfg_size,
const struct ptrace_bts_config __user *ucfg) const struct ptrace_bts_config __user *ucfg)
{ {
struct bts_context *context;
struct ptrace_bts_config cfg; struct ptrace_bts_config cfg;
unsigned int flags = 0; unsigned int flags = 0;
...@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child, ...@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child,
if (copy_from_user(&cfg, ucfg, sizeof(cfg))) if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
return -EFAULT; return -EFAULT;
if (child->bts) { context = child->bts;
ds_release_bts(child->bts); if (!context)
child->bts = NULL; context = alloc_bts_context(child);
} if (!context)
return -ENOMEM;
if (cfg.flags & PTRACE_BTS_O_SIGNAL) { if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
if (!cfg.signal) if (!cfg.signal)
return -EINVAL; return -EINVAL;
child->thread.bts_ovfl_signal = cfg.signal;
return -EOPNOTSUPP; return -EOPNOTSUPP;
context->bts_ovfl_signal = cfg.signal;
} }
if ((cfg.flags & PTRACE_BTS_O_ALLOC) && ds_release_bts(context->tracer);
(cfg.size != child->bts_size)) { context->tracer = NULL;
int error;
ptrace_bts_free_buffer(child); if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
int err;
error = ptrace_bts_allocate_buffer(child, cfg.size); free_bts_buffer(context);
if (error < 0) if (!cfg.size)
return error; return 0;
err = alloc_bts_buffer(context, cfg.size);
if (err < 0)
return err;
} }
if (cfg.flags & PTRACE_BTS_O_TRACE) if (cfg.flags & PTRACE_BTS_O_TRACE)
...@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child, ...@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child,
if (cfg.flags & PTRACE_BTS_O_SCHED) if (cfg.flags & PTRACE_BTS_O_SCHED)
flags |= BTS_TIMESTAMPS; flags |= BTS_TIMESTAMPS;
child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, context->tracer =
/* ovfl = */ NULL, /* th = */ (size_t)-1, ds_request_bts_task(child, context->buffer, context->size,
flags); NULL, (size_t)-1, flags);
if (IS_ERR(child->bts)) { if (unlikely(IS_ERR(context->tracer))) {
int error = PTR_ERR(child->bts); int error = PTR_ERR(context->tracer);
ptrace_bts_free_buffer(child);
child->bts = NULL;
free_bts_buffer(context);
context->tracer = NULL;
return error; return error;
} }
...@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child, ...@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child,
long cfg_size, long cfg_size,
struct ptrace_bts_config __user *ucfg) struct ptrace_bts_config __user *ucfg)
{ {
struct bts_context *context;
const struct bts_trace *trace; const struct bts_trace *trace;
struct ptrace_bts_config cfg; struct ptrace_bts_config cfg;
context = child->bts;
if (!context)
return -ESRCH;
if (cfg_size < sizeof(cfg)) if (cfg_size < sizeof(cfg))
return -EIO; return -EIO;
trace = ds_read_bts(child->bts); trace = ds_read_bts(context->tracer);
if (!trace) if (!trace)
return -EPERM; return -ESRCH;
memset(&cfg, 0, sizeof(cfg)); memset(&cfg, 0, sizeof(cfg));
cfg.size = trace->ds.end - trace->ds.begin; cfg.size = trace->ds.end - trace->ds.begin;
cfg.signal = child->thread.bts_ovfl_signal; cfg.signal = context->bts_ovfl_signal;
cfg.bts_size = sizeof(struct bts_struct); cfg.bts_size = sizeof(struct bts_struct);
if (cfg.signal) if (cfg.signal)
cfg.flags |= PTRACE_BTS_O_SIGNAL; cfg.flags |= PTRACE_BTS_O_SIGNAL;
...@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child, ...@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child,
static int ptrace_bts_clear(struct task_struct *child) static int ptrace_bts_clear(struct task_struct *child)
{ {
struct bts_context *context;
const struct bts_trace *trace; const struct bts_trace *trace;
trace = ds_read_bts(child->bts); context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace) if (!trace)
return -EPERM; return -ESRCH;
memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
return ds_reset_bts(child->bts); return ds_reset_bts(context->tracer);
} }
static int ptrace_bts_size(struct task_struct *child) static int ptrace_bts_size(struct task_struct *child)
{ {
struct bts_context *context;
const struct bts_trace *trace; const struct bts_trace *trace;
trace = ds_read_bts(child->bts); context = child->bts;
if (!context)
return -ESRCH;
trace = ds_read_bts(context->tracer);
if (!trace) if (!trace)
return -EPERM; return -ESRCH;
return (trace->ds.top - trace->ds.begin) / trace->ds.size; return (trace->ds.top - trace->ds.begin) / trace->ds.size;
} }
static void ptrace_bts_fork(struct task_struct *tsk) /*
{ * Called from __ptrace_unlink() after the child has been moved back
tsk->bts = NULL; * to its original parent.
tsk->bts_buffer = NULL; */
tsk->bts_size = 0; void ptrace_bts_untrace(struct task_struct *child)
tsk->thread.bts_ovfl_signal = 0;
}
static void ptrace_bts_untrace(struct task_struct *child)
{ {
if (unlikely(child->bts)) { if (unlikely(child->bts)) {
ds_release_bts(child->bts); free_bts_context(child->bts);
child->bts = NULL; child->bts = NULL;
/* We cannot update total_vm and locked_vm since
child's mm is already gone. But we can reclaim the
memory. */
kfree(child->bts_buffer);
child->bts_buffer = NULL;
child->bts_size = 0;
} }
} }
static void ptrace_bts_detach(struct task_struct *child)
{
/*
* Ptrace_detach() races with ptrace_untrace() in case
* the child dies and is reaped by another thread.
*
* We only do the memory accounting at this point and
* leave the buffer deallocation and the bts tracer
* release to ptrace_bts_untrace() which will be called
* later on with tasklist_lock held.
*/
release_locked_buffer(child->bts_buffer, child->bts_size);
}
#else
static inline void ptrace_bts_fork(struct task_struct *tsk) {}
static inline void ptrace_bts_detach(struct task_struct *child) {}
static inline void ptrace_bts_untrace(struct task_struct *child) {}
#endif /* CONFIG_X86_PTRACE_BTS */ #endif /* CONFIG_X86_PTRACE_BTS */
void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
{
ptrace_bts_fork(child);
}
void x86_ptrace_untrace(struct task_struct *child)
{
ptrace_bts_untrace(child);
}
/* /*
* Called by kernel/ptrace.c when detaching.. * Called by kernel/ptrace.c when detaching..
* *
...@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child) ...@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child)
#ifdef TIF_SYSCALL_EMU #ifdef TIF_SYSCALL_EMU
clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
#endif #endif
ptrace_bts_detach(child);
} }
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
......
...@@ -19,6 +19,7 @@ struct anon_vma; ...@@ -19,6 +19,7 @@ struct anon_vma;
struct file_ra_state; struct file_ra_state;
struct user_struct; struct user_struct;
struct writeback_control; struct writeback_control;
struct rlimit;
#ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */ #ifndef CONFIG_DISCONTIGMEM /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr; extern unsigned long max_mapnr;
...@@ -1319,8 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page, ...@@ -1319,8 +1320,8 @@ int vmemmap_populate_basepages(struct page *start_page,
int vmemmap_populate(struct page *start_page, unsigned long pages, int node); int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
void vmemmap_populate_print_last(void); void vmemmap_populate_print_last(void);
extern void *alloc_locked_buffer(size_t size); extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
extern void free_locked_buffer(void *buffer, size_t size); size_t size);
extern void release_locked_buffer(void *buffer, size_t size); extern void refund_locked_memory(struct mm_struct *mm, size_t size);
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */ #endif /* _LINUX_MM_H */
...@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child, ...@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child,
struct task_struct *new_parent); struct task_struct *new_parent);
extern void __ptrace_unlink(struct task_struct *child); extern void __ptrace_unlink(struct task_struct *child);
extern void exit_ptrace(struct task_struct *tracer); extern void exit_ptrace(struct task_struct *tracer);
extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
#define PTRACE_MODE_READ 1 #define PTRACE_MODE_READ 1
#define PTRACE_MODE_ATTACH 2 #define PTRACE_MODE_ATTACH 2
/* Returns 0 on success, -errno on denial. */ /* Returns 0 on success, -errno on denial. */
...@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task) ...@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task)
#define arch_ptrace_untrace(task) do { } while (0) #define arch_ptrace_untrace(task) do { } while (0)
#endif #endif
#ifndef arch_ptrace_fork
/*
* Do machine-specific work to initialize a new task.
*
* This is called from copy_process().
*/
#define arch_ptrace_fork(child, clone_flags) do { } while (0)
#endif
extern int task_current_syscall(struct task_struct *target, long *callno, extern int task_current_syscall(struct task_struct *target, long *callno,
unsigned long args[6], unsigned int maxargs, unsigned long args[6], unsigned int maxargs,
unsigned long *sp, unsigned long *pc); unsigned long *sp, unsigned long *pc);
......
...@@ -96,8 +96,8 @@ struct exec_domain; ...@@ -96,8 +96,8 @@ struct exec_domain;
struct futex_pi_state; struct futex_pi_state;
struct robust_list_head; struct robust_list_head;
struct bio; struct bio;
struct bts_tracer;
struct fs_struct; struct fs_struct;
struct bts_context;
/* /*
* List of flags we want to share for kernel threads, * List of flags we want to share for kernel threads,
...@@ -1209,18 +1209,11 @@ struct task_struct { ...@@ -1209,18 +1209,11 @@ struct task_struct {
struct list_head ptraced; struct list_head ptraced;
struct list_head ptrace_entry; struct list_head ptrace_entry;
#ifdef CONFIG_X86_PTRACE_BTS
/* /*
* This is the tracer handle for the ptrace BTS extension. * This is the tracer handle for the ptrace BTS extension.
* This field actually belongs to the ptracer task. * This field actually belongs to the ptracer task.
*/ */
struct bts_tracer *bts; struct bts_context *bts;
/*
* The buffer to hold the BTS data.
*/
void *bts_buffer;
size_t bts_size;
#endif /* CONFIG_X86_PTRACE_BTS */
/* PID/PID hash table linkage. */ /* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX]; struct pid_link pids[PIDTYPE_MAX];
...@@ -2003,8 +1996,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from); ...@@ -2003,8 +1996,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
extern char *get_task_comm(char *to, struct task_struct *tsk); extern char *get_task_comm(char *to, struct task_struct *tsk);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void wait_task_context_switch(struct task_struct *p);
extern unsigned long wait_task_inactive(struct task_struct *, long match_state); extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
#else #else
static inline void wait_task_context_switch(struct task_struct *p) {}
static inline unsigned long wait_task_inactive(struct task_struct *p, static inline unsigned long wait_task_inactive(struct task_struct *p,
long match_state) long match_state)
{ {
......
...@@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o ...@@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o obj-$(CONFIG_SLOW_WORK) += slow-work.o
......
...@@ -1088,8 +1088,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1088,8 +1088,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES #ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */ p->blocked_on = NULL; /* not blocked yet */
#endif #endif
if (unlikely(current->ptrace))
ptrace_fork(p, clone_flags); p->bts = NULL;
/* Perform scheduler related setup. Assign this task to a CPU. */ /* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags); sched_fork(p, clone_flags);
......
...@@ -24,16 +24,6 @@ ...@@ -24,16 +24,6 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
/*
* Initialize a new task whose father had been ptraced.
*
* Called from copy_process().
*/
void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
{
arch_ptrace_fork(child, clone_flags);
}
/* /*
* ptrace a task: make the debugger its new parent and * ptrace a task: make the debugger its new parent and
* move it to the ptrace list. * move it to the ptrace list.
......
...@@ -2010,6 +2010,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) ...@@ -2010,6 +2010,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
return 1; return 1;
} }
/*
* wait_task_context_switch - wait for a thread to complete at least one
* context switch.
*
* @p must not be current.
*/
void wait_task_context_switch(struct task_struct *p)
{
unsigned long nvcsw, nivcsw, flags;
int running;
struct rq *rq;
nvcsw = p->nvcsw;
nivcsw = p->nivcsw;
for (;;) {
/*
* The runqueue is assigned before the actual context
* switch. We need to take the runqueue lock.
*
* We could check initially without the lock but it is
* very likely that we need to take the lock in every
* iteration.
*/
rq = task_rq_lock(p, &flags);
running = task_running(rq, p);
task_rq_unlock(rq, &flags);
if (likely(!running))
break;
/*
* The switch count is incremented before the actual
* context switch. We thus wait for two switches to be
* sure at least one completed.
*/
if ((p->nvcsw - nvcsw) > 1)
break;
if ((p->nivcsw - nivcsw) > 1)
break;
cpu_relax();
}
}
/* /*
* wait_task_inactive - wait for a thread to unschedule. * wait_task_inactive - wait for a thread to unschedule.
* *
......
...@@ -15,12 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES ...@@ -15,12 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif endif
#
# Make the trace clocks available generally: it's infrastructure
# relied on by ptrace for example:
#
obj-y += trace_clock.o
obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
obj-$(CONFIG_RING_BUFFER) += ring_buffer.o obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
obj-$(CONFIG_TRACING) += trace.o obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_TRACING) += trace_output.o obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o obj-$(CONFIG_TRACING) += trace_printk.o
......
...@@ -538,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace, ...@@ -538,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
struct trace_array *tr); struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace, extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr); struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */ #endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data); extern void *head_page(struct trace_array_cpu *data);
......
/* /*
* h/w branch tracer for x86 based on bts * h/w branch tracer for x86 based on BTS
* *
* Copyright (C) 2008-2009 Intel Corporation. * Copyright (C) 2008-2009 Intel Corporation.
* Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009 * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
*/ */
#include <linux/spinlock.h>
#include <linux/kallsyms.h> #include <linux/kallsyms.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/ftrace.h> #include <linux/ftrace.h>
...@@ -15,110 +14,119 @@ ...@@ -15,110 +14,119 @@
#include <asm/ds.h> #include <asm/ds.h>
#include "trace.h"
#include "trace_output.h" #include "trace_output.h"
#include "trace.h"
#define SIZEOF_BTS (1 << 13) #define BTS_BUFFER_SIZE (1 << 13)
/*
* The tracer lock protects the below per-cpu tracer array.
* It needs to be held to:
* - start tracing on all cpus
* - stop tracing on all cpus
* - start tracing on a single hotplug cpu
* - stop tracing on a single hotplug cpu
* - read the trace from all cpus
* - read the trace from a single cpu
*/
static DEFINE_SPINLOCK(bts_tracer_lock);
static DEFINE_PER_CPU(struct bts_tracer *, tracer); static DEFINE_PER_CPU(struct bts_tracer *, tracer);
static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer); static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
#define this_tracer per_cpu(tracer, smp_processor_id()) #define this_tracer per_cpu(tracer, smp_processor_id())
#define this_buffer per_cpu(buffer, smp_processor_id())
static int __read_mostly trace_hw_branches_enabled; static int trace_hw_branches_enabled __read_mostly;
static int trace_hw_branches_suspended __read_mostly;
static struct trace_array *hw_branch_trace __read_mostly; static struct trace_array *hw_branch_trace __read_mostly;
/* static void bts_trace_init_cpu(int cpu)
* Start tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_start_cpu(void *arg)
{ {
if (this_tracer) per_cpu(tracer, cpu) =
ds_release_bts(this_tracer); ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
NULL, (size_t)-1, BTS_KERNEL);
this_tracer =
ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS, if (IS_ERR(per_cpu(tracer, cpu)))
/* ovfl = */ NULL, /* th = */ (size_t)-1, per_cpu(tracer, cpu) = NULL;
BTS_KERNEL);
if (IS_ERR(this_tracer)) {
this_tracer = NULL;
return;
}
} }
static void bts_trace_start(struct trace_array *tr) static int bts_trace_init(struct trace_array *tr)
{ {
spin_lock(&bts_tracer_lock); int cpu;
hw_branch_trace = tr;
trace_hw_branches_enabled = 0;
on_each_cpu(bts_trace_start_cpu, NULL, 1); get_online_cpus();
trace_hw_branches_enabled = 1; for_each_online_cpu(cpu) {
bts_trace_init_cpu(cpu);
spin_unlock(&bts_tracer_lock); if (likely(per_cpu(tracer, cpu)))
trace_hw_branches_enabled = 1;
}
trace_hw_branches_suspended = 0;
put_online_cpus();
/* If we could not enable tracing on a single cpu, we fail. */
return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
} }
/* static void bts_trace_reset(struct trace_array *tr)
* Stop tracing on the current cpu.
* The argument is ignored.
*
* pre: bts_tracer_lock must be locked.
*/
static void bts_trace_stop_cpu(void *arg)
{ {
if (this_tracer) { int cpu;
ds_release_bts(this_tracer);
this_tracer = NULL; get_online_cpus();
for_each_online_cpu(cpu) {
if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
} }
trace_hw_branches_enabled = 0;
trace_hw_branches_suspended = 0;
put_online_cpus();
} }
static void bts_trace_stop(struct trace_array *tr) static void bts_trace_start(struct trace_array *tr)
{ {
spin_lock(&bts_tracer_lock); int cpu;
trace_hw_branches_enabled = 0; get_online_cpus();
on_each_cpu(bts_trace_stop_cpu, NULL, 1); for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 0;
put_online_cpus();
}
spin_unlock(&bts_tracer_lock); static void bts_trace_stop(struct trace_array *tr)
{
int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
trace_hw_branches_suspended = 1;
put_online_cpus();
} }
static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb, static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
unsigned long action, void *hcpu) unsigned long action, void *hcpu)
{ {
unsigned int cpu = (unsigned long)hcpu; int cpu = (long)hcpu;
spin_lock(&bts_tracer_lock);
if (!trace_hw_branches_enabled)
goto out;
switch (action) { switch (action) {
case CPU_ONLINE: case CPU_ONLINE:
case CPU_DOWN_FAILED: case CPU_DOWN_FAILED:
smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1); /* The notification is sent with interrupts enabled. */
if (trace_hw_branches_enabled) {
bts_trace_init_cpu(cpu);
if (trace_hw_branches_suspended &&
likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
}
break; break;
case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE:
smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1); /* The notification is sent with interrupts enabled. */
break; if (likely(per_cpu(tracer, cpu))) {
ds_release_bts(per_cpu(tracer, cpu));
per_cpu(tracer, cpu) = NULL;
}
} }
out:
spin_unlock(&bts_tracer_lock);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = { ...@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
.notifier_call = bts_hotcpu_handler .notifier_call = bts_hotcpu_handler
}; };
static int bts_trace_init(struct trace_array *tr)
{
hw_branch_trace = tr;
bts_trace_start(tr);
return 0;
}
static void bts_trace_reset(struct trace_array *tr)
{
bts_trace_stop(tr);
}
static void bts_trace_print_header(struct seq_file *m) static void bts_trace_print_header(struct seq_file *m)
{ {
seq_puts(m, "# CPU# TO <- FROM\n"); seq_puts(m, "# CPU# TO <- FROM\n");
...@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m) ...@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m)
static enum print_line_t bts_trace_print_line(struct trace_iterator *iter) static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
{ {
unsigned long symflags = TRACE_ITER_SYM_OFFSET;
struct trace_entry *entry = iter->ent; struct trace_entry *entry = iter->ent;
struct trace_seq *seq = &iter->seq; struct trace_seq *seq = &iter->seq;
struct hw_branch_entry *it; struct hw_branch_entry *it;
unsigned long symflags = TRACE_ITER_SYM_OFFSET;
trace_assign_type(it, entry); trace_assign_type(it, entry);
...@@ -226,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at) ...@@ -226,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
/* /*
* Collect the trace on the current cpu and write it into the ftrace buffer. * Collect the trace on the current cpu and write it into the ftrace buffer.
* *
* pre: bts_tracer_lock must be locked * pre: tracing must be suspended on the current cpu
*/ */
static void trace_bts_cpu(void *arg) static void trace_bts_cpu(void *arg)
{ {
struct trace_array *tr = (struct trace_array *) arg; struct trace_array *tr = (struct trace_array *)arg;
const struct bts_trace *trace; const struct bts_trace *trace;
unsigned char *at; unsigned char *at;
...@@ -243,10 +237,9 @@ static void trace_bts_cpu(void *arg) ...@@ -243,10 +237,9 @@ static void trace_bts_cpu(void *arg)
if (unlikely(!this_tracer)) if (unlikely(!this_tracer))
return; return;
ds_suspend_bts(this_tracer);
trace = ds_read_bts(this_tracer); trace = ds_read_bts(this_tracer);
if (!trace) if (!trace)
goto out; return;
for (at = trace->ds.top; (void *)at < trace->ds.end; for (at = trace->ds.top; (void *)at < trace->ds.end;
at += trace->ds.size) at += trace->ds.size)
...@@ -255,18 +248,27 @@ static void trace_bts_cpu(void *arg) ...@@ -255,18 +248,27 @@ static void trace_bts_cpu(void *arg)
for (at = trace->ds.begin; (void *)at < trace->ds.top; for (at = trace->ds.begin; (void *)at < trace->ds.top;
at += trace->ds.size) at += trace->ds.size)
trace_bts_at(trace, at); trace_bts_at(trace, at);
out:
ds_resume_bts(this_tracer);
} }
static void trace_bts_prepare(struct trace_iterator *iter) static void trace_bts_prepare(struct trace_iterator *iter)
{ {
spin_lock(&bts_tracer_lock); int cpu;
get_online_cpus();
for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_suspend_bts(per_cpu(tracer, cpu));
/*
* We need to collect the trace on the respective cpu since ftrace
* implicitly adds the record for the current cpu.
* Once that is more flexible, we could collect the data from any cpu.
*/
on_each_cpu(trace_bts_cpu, iter->tr, 1); on_each_cpu(trace_bts_cpu, iter->tr, 1);
spin_unlock(&bts_tracer_lock); for_each_online_cpu(cpu)
if (likely(per_cpu(tracer, cpu)))
ds_resume_bts(per_cpu(tracer, cpu));
put_online_cpus();
} }
static void trace_bts_close(struct trace_iterator *iter) static void trace_bts_close(struct trace_iterator *iter)
...@@ -276,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter) ...@@ -276,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter)
void trace_hw_branch_oops(void) void trace_hw_branch_oops(void)
{ {
spin_lock(&bts_tracer_lock); if (this_tracer) {
ds_suspend_bts_noirq(this_tracer);
trace_bts_cpu(hw_branch_trace); trace_bts_cpu(hw_branch_trace);
ds_resume_bts_noirq(this_tracer);
spin_unlock(&bts_tracer_lock); }
} }
struct tracer bts_tracer __read_mostly = struct tracer bts_tracer __read_mostly =
...@@ -293,7 +295,10 @@ struct tracer bts_tracer __read_mostly = ...@@ -293,7 +295,10 @@ struct tracer bts_tracer __read_mostly =
.start = bts_trace_start, .start = bts_trace_start,
.stop = bts_trace_stop, .stop = bts_trace_stop,
.open = trace_bts_prepare, .open = trace_bts_prepare,
.close = trace_bts_close .close = trace_bts_close,
#ifdef CONFIG_FTRACE_SELFTEST
.selftest = trace_selftest_startup_hw_branches,
#endif /* CONFIG_FTRACE_SELFTEST */
}; };
__init static int init_bts_trace(void) __init static int init_bts_trace(void)
......
...@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry) ...@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_BRANCH: case TRACE_BRANCH:
case TRACE_GRAPH_ENT: case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET: case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
return 1; return 1;
} }
return 0; return 0;
...@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ...@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
#else #else
# define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; }) # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_DYNAMIC_FTRACE */
/* /*
* Simple verification test of ftrace function tracer. * Simple verification test of ftrace function tracer.
* Enable ftrace, sleep 1/10 second, and then read the trace * Enable ftrace, sleep 1/10 second, and then read the trace
...@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) ...@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
return ret; return ret;
} }
#endif /* CONFIG_BRANCH_TRACER */ #endif /* CONFIG_BRANCH_TRACER */
#ifdef CONFIG_HW_BRANCH_TRACER
int
trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr)
{
struct trace_iterator *iter;
struct tracer tracer;
unsigned long count;
int ret;
if (!trace->open) {
printk(KERN_CONT "missing open function...");
return -1;
}
ret = tracer_init(trace, tr);
if (ret) {
warn_failed_init_tracer(trace, ret);
return ret;
}
/*
* The hw-branch tracer needs to collect the trace from the various
* cpu trace buffers - before tracing is stopped.
*/
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
memcpy(&tracer, trace, sizeof(tracer));
iter->trace = &tracer;
iter->tr = tr;
iter->pos = -1;
mutex_init(&iter->mutex);
trace->open(iter);
mutex_destroy(&iter->mutex);
kfree(iter);
tracing_stop();
ret = trace_test_buffer(tr, &count);
trace->reset(tr);
tracing_start();
if (!ret && !count) {
printk(KERN_CONT "no entries found..");
ret = -1;
}
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */
...@@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user) ...@@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user)
free_uid(user); free_uid(user);
} }
void *alloc_locked_buffer(size_t size) int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
size_t size)
{ {
unsigned long rlim, vm, pgsz; unsigned long lim, vm, pgsz;
void *buffer = NULL; int error = -ENOMEM;
pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem); down_write(&mm->mmap_sem);
rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
vm = current->mm->total_vm + pgsz;
if (rlim < vm)
goto out;
rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
vm = current->mm->locked_vm + pgsz; vm = mm->total_vm + pgsz;
if (rlim < vm) if (lim < vm)
goto out; goto out;
buffer = kzalloc(size, GFP_KERNEL); lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
if (!buffer) vm = mm->locked_vm + pgsz;
if (lim < vm)
goto out; goto out;
current->mm->total_vm += pgsz; mm->total_vm += pgsz;
current->mm->locked_vm += pgsz; mm->locked_vm += pgsz;
error = 0;
out: out:
up_write(&current->mm->mmap_sem); up_write(&mm->mmap_sem);
return buffer; return error;
} }
void release_locked_buffer(void *buffer, size_t size) void refund_locked_memory(struct mm_struct *mm, size_t size)
{ {
unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
down_write(&current->mm->mmap_sem); down_write(&mm->mmap_sem);
current->mm->total_vm -= pgsz;
current->mm->locked_vm -= pgsz;
up_write(&current->mm->mmap_sem);
}
void free_locked_buffer(void *buffer, size_t size) mm->total_vm -= pgsz;
{ mm->locked_vm -= pgsz;
release_locked_buffer(buffer, size);
kfree(buffer); up_write(&mm->mmap_sem);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment