Commit 0fd0e3da authored by Pekka Paalanen's avatar Pekka Paalanen Committed by Thomas Gleixner

x86: mmiotrace full patch, preview 1

kmmio.c handles the list of mmio probes with callbacks, list of traced
pages, and attaching into the page fault handler and die notifier. It
arms, traps and disarms the given pages, this is the core of mmiotrace.

mmio-mod.c is a user interface, hooking into ioremap functions and
registering the mmio probes. It also decodes the required information
from trapped mmio accesses via the pre and post callbacks in each probe.
Currently, hooking into ioremap functions works by redefining the symbols
of the target (binary) kernel module, so that it calls the traced
versions of the functions.

The most notable changes done since the last discussion are:
- kmmio.c is a built-in, not part of the module
- direct call from fault.c to kmmio.c, removing all dynamic hooks
- prepare for unregistering probes at any time
- make kmmio re-initializable and accessible to more than one user
- rewrite kmmio locking to remove all spinlocks from page fault path

Can I abuse call_rcu() like I do in kmmio.c:unregister_kmmio_probe()
or is there a better way?

The function called via call_rcu() itself calls call_rcu() again,
will this work or break? There I need a second grace period for RCU
after the first grace period for page faults.

Mmiotrace itself (mmio-mod.c) is still a module, I am going to attack
that next. At some point I will start looking into how to make mmiotrace
a tracer component of ftrace (thanks for the hint, Ingo). Ftrace should
make the user space part of mmiotracing as simple as
'cat /debug/trace/mmio > dump.txt'.
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
parent f5136380
......@@ -15,7 +15,6 @@ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
struct mm_struct init_mm = INIT_MM(init_mm);
EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */
EXPORT_SYMBOL_GPL(init_mm);
/*
* Initial thread structure.
......
obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o
obj-$(CONFIG_MMIOTRACE) += mmiotrace.o
mmiotrace-objs := pf_in.o kmmio.o mmio-mod.o
mmiotrace-objs := pf_in.o mmio-mod.o
obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o
This diff is collapsed.
#ifndef _LINUX_KMMIO_H
#define _LINUX_KMMIO_H
#include <linux/list.h>
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/version.h>
#include <linux/kdebug.h>
struct kmmio_probe;
struct kmmio_fault_page;
struct pt_regs;
typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
struct pt_regs *, unsigned long addr);
typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
unsigned long condition, struct pt_regs *);
struct kmmio_probe {
struct list_head list;
/* start location of the probe point */
unsigned long addr;
/* length of the probe region */
unsigned long len;
/* Called before addr is executed. */
kmmio_pre_handler_t pre_handler;
/* Called after addr is executed, unless... */
kmmio_post_handler_t post_handler;
};
struct kmmio_fault_page {
struct list_head list;
/* location of the fault page */
unsigned long page;
int count;
};
/* kmmio is active by some kmmio_probes? */
static inline int is_kmmio_active(void)
{
extern unsigned int kmmio_count;
return kmmio_count;
}
int init_kmmio(void);
void cleanup_kmmio(void);
int register_kmmio_probe(struct kmmio_probe *p);
void unregister_kmmio_probe(struct kmmio_probe *p);
#endif /* _LINUX_KMMIO_H */
......@@ -32,7 +32,6 @@
#include <asm/atomic.h>
#include <linux/percpu.h>
#include "kmmio.h"
#include "pf_in.h"
/* This app's relay channel files will appear in /debug/mmio-trace */
......@@ -129,18 +128,17 @@ static void print_pte(unsigned long address)
pte_t *pte = lookup_address(address, &level);
if (!pte) {
printk(KERN_ERR "Error in %s: no pte for page 0x%08lx\n",
__FUNCTION__, address);
pr_err(MODULE_NAME ": Error in %s: no pte for page 0x%08lx\n",
__func__, address);
return;
}
if (level == PG_LEVEL_2M) {
printk(KERN_EMERG MODULE_NAME ": 4MB pages are not "
"currently supported: %lx\n",
address);
pr_emerg(MODULE_NAME ": 4MB pages are not currently "
"supported: %lx\n", address);
BUG();
}
printk(KERN_DEBUG MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
pr_info(MODULE_NAME ": pte for 0x%lx: 0x%lx 0x%lx\n",
address, pte_val(*pte),
pte_val(*pte) & _PAGE_PRESENT);
}
......@@ -152,7 +150,7 @@ static void print_pte(unsigned long address)
static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
{
const struct trap_reason *my_reason = &get_cpu_var(pf_reason);
printk(KERN_EMERG MODULE_NAME ": unexpected fault for address: %lx, "
pr_emerg(MODULE_NAME ": unexpected fault for address: %lx, "
"last fault for address: %lx\n",
addr, my_reason->addr);
print_pte(addr);
......@@ -160,20 +158,17 @@ static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr)
print_symbol(KERN_EMERG "faulting EIP is at %s\n", regs->ip);
print_symbol(KERN_EMERG "last faulting EIP was at %s\n",
my_reason->ip);
printk(KERN_EMERG
"eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
printk(KERN_EMERG
"esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
regs->si, regs->di, regs->bp, regs->sp);
#else
print_symbol(KERN_EMERG "faulting RIP is at %s\n", regs->ip);
print_symbol(KERN_EMERG "last faulting RIP was at %s\n",
my_reason->ip);
printk(KERN_EMERG "rax: %016lx rcx: %016lx rdx: %016lx\n",
pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n",
regs->ax, regs->cx, regs->dx);
printk(KERN_EMERG "rsi: %016lx rdi: %016lx "
"rbp: %016lx rsp: %016lx\n",
pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n",
regs->si, regs->di, regs->bp, regs->sp);
#endif
put_cpu_var(pf_reason);
......@@ -251,10 +246,15 @@ static void post(struct kmmio_probe *p, unsigned long condition,
struct trap_reason *my_reason = &get_cpu_var(pf_reason);
struct mm_io_header_rw *my_trace = &get_cpu_var(cpu_trace);
/*
* XXX: This might not get called, if the probe is removed while
* trace hit is on flight.
*/
/* this should always return the active_trace count to 0 */
my_reason->active_traces--;
if (my_reason->active_traces) {
printk(KERN_EMERG MODULE_NAME ": unexpected post handler");
pr_emerg(MODULE_NAME ": unexpected post handler");
BUG();
}
......@@ -283,14 +283,13 @@ static int subbuf_start_handler(struct rchan_buf *buf, void *subbuf,
atomic_t *drop = &per_cpu(dropped, cpu);
int count;
if (relay_buf_full(buf)) {
if (atomic_inc_return(drop) == 1) {
printk(KERN_ERR MODULE_NAME ": cpu %d buffer full!\n",
cpu);
}
if (atomic_inc_return(drop) == 1)
pr_err(MODULE_NAME ": cpu %d buffer full!\n", cpu);
return 0;
} else if ((count = atomic_read(drop))) {
printk(KERN_ERR MODULE_NAME
": cpu %d buffer no longer full, "
}
count = atomic_read(drop);
if (count) {
pr_err(MODULE_NAME ": cpu %d buffer no longer full, "
"missed %d events.\n",
cpu, count);
atomic_sub(count, drop);
......@@ -407,8 +406,8 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
/* Don't trace the low PCI/ISA area, it's always mapped.. */
if (!ISA_trace && (offset < ISA_END_ADDRESS) &&
(offset + size > ISA_START_ADDRESS)) {
printk(KERN_NOTICE MODULE_NAME ": Ignoring map of low "
"PCI/ISA area (0x%lx-0x%lx)\n",
pr_notice(MODULE_NAME ": Ignoring map of low PCI/ISA area "
"(0x%lx-0x%lx)\n",
offset, offset + size);
return;
}
......@@ -418,7 +417,7 @@ static void ioremap_trace_core(unsigned long offset, unsigned long size,
void __iomem *ioremap_cache_trace(unsigned long offset, unsigned long size)
{
void __iomem *p = ioremap_cache(offset, size);
printk(KERN_DEBUG MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
pr_debug(MODULE_NAME ": ioremap_cache(0x%lx, 0x%lx) = %p\n",
offset, size, p);
ioremap_trace_core(offset, size, p);
return p;
......@@ -428,7 +427,7 @@ EXPORT_SYMBOL(ioremap_cache_trace);
void __iomem *ioremap_nocache_trace(unsigned long offset, unsigned long size)
{
void __iomem *p = ioremap_nocache(offset, size);
printk(KERN_DEBUG MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
pr_debug(MODULE_NAME ": ioremap_nocache(0x%lx, 0x%lx) = %p\n",
offset, size, p);
ioremap_trace_core(offset, size, p);
return p;
......@@ -455,7 +454,7 @@ void iounmap_trace(volatile void __iomem *addr)
};
struct remap_trace *trace;
struct remap_trace *tmp;
printk(KERN_DEBUG MODULE_NAME ": Unmapping %p.\n", addr);
pr_debug(MODULE_NAME ": Unmapping %p.\n", addr);
record_timestamp(&event.header);
spin_lock(&trace_list_lock);
......@@ -481,7 +480,7 @@ static void clear_trace_list(void)
spin_lock(&trace_list_lock);
list_for_each_entry_safe(trace, tmp, &trace_list, list) {
printk(KERN_WARNING MODULE_NAME ": purging non-iounmapped "
pr_warning(MODULE_NAME ": purging non-iounmapped "
"trace @0x%08lx, size 0x%lx.\n",
trace->probe.addr, trace->probe.len);
if (!nommiotrace)
......@@ -500,39 +499,37 @@ static int __init init(void)
dir = debugfs_create_dir(APP_DIR, NULL);
if (!dir) {
printk(KERN_ERR MODULE_NAME
": Couldn't create relay app directory.\n");
pr_err(MODULE_NAME ": Couldn't create relay app directory.\n");
return -ENOMEM;
}
chan = create_channel(subbuf_size, n_subbufs);
if (!chan) {
debugfs_remove(dir);
printk(KERN_ERR MODULE_NAME
": relay app channel creation failed\n");
pr_err(MODULE_NAME ": relay app channel creation failed\n");
return -ENOMEM;
}
init_kmmio();
reference_kmmio();
proc_marker_file = create_proc_entry(MARKER_FILE, 0, NULL);
if (proc_marker_file)
proc_marker_file->write_proc = write_marker;
printk(KERN_DEBUG MODULE_NAME ": loaded.\n");
pr_debug(MODULE_NAME ": loaded.\n");
if (nommiotrace)
printk(KERN_DEBUG MODULE_NAME ": MMIO tracing disabled.\n");
pr_info(MODULE_NAME ": MMIO tracing disabled.\n");
if (ISA_trace)
printk(KERN_WARNING MODULE_NAME
": Warning! low ISA range will be traced.\n");
pr_warning(MODULE_NAME ": Warning! low ISA range will be "
"traced.\n");
return 0;
}
static void __exit cleanup(void)
{
printk(KERN_DEBUG MODULE_NAME ": unload...\n");
pr_debug(MODULE_NAME ": unload...\n");
clear_trace_list();
cleanup_kmmio();
unreference_kmmio();
remove_proc_entry(MARKER_FILE, NULL);
destroy_channel();
if (dir)
......
......@@ -19,7 +19,7 @@
*
*/
/* $Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp $
/* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp
* Copyright by Intel Crop., 2002
* Louis Zhuang (louis.zhuang@intel.com)
*
......
......@@ -41,8 +41,7 @@ static void do_test(void)
{
void __iomem *p = ioremap_nocache_trace(mmio_address, 0x4000);
if (!p) {
printk(KERN_ERR MODULE_NAME ": could not ioremap IO memory, "
"aborting.\n");
pr_err(MODULE_NAME ": could not ioremap, aborting.\n");
return;
}
do_write_test(p);
......@@ -53,14 +52,14 @@ static void do_test(void)
static int __init init(void)
{
if (mmio_address == 0) {
printk(KERN_ERR MODULE_NAME ": you have to use the module "
"argument mmio_address.\n");
printk(KERN_ERR MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
pr_err(MODULE_NAME ": you have to use the module argument "
"mmio_address.\n");
pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS"
" YOU REALLY KNOW WHAT YOU ARE DOING!\n");
return -ENXIO;
}
printk(KERN_WARNING MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx "
"in PCI address space, and writing "
"rubbish in there.\n", mmio_address);
do_test();
......@@ -69,7 +68,7 @@ static int __init init(void)
static void __exit cleanup(void)
{
printk(KERN_DEBUG MODULE_NAME ": unloaded.\n");
pr_debug(MODULE_NAME ": unloaded.\n");
}
module_init(init);
......
......@@ -10,6 +10,7 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mmiotrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/smp.h>
......@@ -49,60 +50,14 @@
#define PF_RSVD (1<<3)
#define PF_INSTR (1<<4)
#ifdef CONFIG_MMIOTRACE_HOOKS
static pf_handler_func mmiotrace_pf_handler; /* protected by RCU */
static DEFINE_SPINLOCK(mmiotrace_handler_lock);
int mmiotrace_register_pf(pf_handler_func new_pfh)
{
int ret = 0;
unsigned long flags;
spin_lock_irqsave(&mmiotrace_handler_lock, flags);
if (mmiotrace_pf_handler)
ret = -EBUSY;
else
mmiotrace_pf_handler = new_pfh;
spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(mmiotrace_register_pf);
/**
* mmiotrace_unregister_pf:
* The caller must ensure @old_pfh is not in use anymore before freeing it.
* This function does not guarantee it. The handler function pointer is
* protected by RCU, so you can do this by e.g. calling synchronize_rcu().
*/
int mmiotrace_unregister_pf(pf_handler_func old_pfh)
{
int ret = 0;
unsigned long flags;
spin_lock_irqsave(&mmiotrace_handler_lock, flags);
if (mmiotrace_pf_handler != old_pfh)
ret = -EPERM;
else
mmiotrace_pf_handler = NULL;
spin_unlock_irqrestore(&mmiotrace_handler_lock, flags);
return ret;
}
EXPORT_SYMBOL_GPL(mmiotrace_unregister_pf);
#endif /* CONFIG_MMIOTRACE_HOOKS */
/* returns non-zero if do_page_fault() should return */
static inline int call_mmiotrace(struct pt_regs *regs,
unsigned long error_code,
unsigned long address)
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
#ifdef CONFIG_MMIOTRACE_HOOKS
int ret = 0;
rcu_read_lock();
if (mmiotrace_pf_handler)
ret = mmiotrace_pf_handler(regs, error_code, address);
rcu_read_unlock();
return ret;
#else
return 0;
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
return -1;
#endif
return 0;
}
static inline int notify_page_fault(struct pt_regs *regs)
......@@ -657,7 +612,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (notify_page_fault(regs))
return;
if (call_mmiotrace(regs, error_code, address))
if (unlikely(kmmio_fault(regs, address)))
return;
/*
......
......@@ -35,11 +35,4 @@ extern void show_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
typedef int (*pf_handler_func)(struct pt_regs *regs,
unsigned long error_code,
unsigned long address);
extern int mmiotrace_register_pf(pf_handler_func new_pfh);
extern int mmiotrace_unregister_pf(pf_handler_func old_pfh);
#endif
......@@ -3,6 +3,44 @@
#include <asm/types.h>
#ifdef __KERNEL__
#include <linux/list.h>
struct kmmio_probe;
struct pt_regs;
typedef void (*kmmio_pre_handler_t)(struct kmmio_probe *,
struct pt_regs *, unsigned long addr);
typedef void (*kmmio_post_handler_t)(struct kmmio_probe *,
unsigned long condition, struct pt_regs *);
struct kmmio_probe {
struct list_head list;
unsigned long addr; /* start location of the probe point */
unsigned long len; /* length of the probe region */
kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
kmmio_post_handler_t post_handler; /* Called after addr is executed */
};
/* kmmio is active by some kmmio_probes? */
static inline int is_kmmio_active(void)
{
extern unsigned int kmmio_count;
return kmmio_count;
}
extern void reference_kmmio(void);
extern void unreference_kmmio(void);
extern int register_kmmio_probe(struct kmmio_probe *p);
extern void unregister_kmmio_probe(struct kmmio_probe *p);
/* Called from page fault handler. */
extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
#endif /* __KERNEL__ */
/*
* If you change anything here, you must bump MMIO_VERSION.
* This is the relay data format for user space.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment