trace.c 177 KB
Newer Older
1 2 3
/*
 * ring buffer based function tracer
 *
4
 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 6 7 8 9 10 11
 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
 *
 * Originally taken from the RT patch by:
 *    Arnaldo Carvalho de Melo <acme@redhat.com>
 *
 * Based on code from the latency_tracer, that is:
 *  Copyright (C) 2004-2006 Ingo Molnar
12
 *  Copyright (C) 2004 Nadia Yvette Chambers
13
 */
14
#include <linux/ring_buffer.h>
15
#include <generated/utsrelease.h>
16 17
#include <linux/stacktrace.h>
#include <linux/writeback.h>
18 19
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
20
#include <linux/notifier.h>
21
#include <linux/irqflags.h>
22
#include <linux/debugfs.h>
23
#include <linux/tracefs.h>
24
#include <linux/pagemap.h>
25 26 27
#include <linux/hardirq.h>
#include <linux/linkage.h>
#include <linux/uaccess.h>
28
#include <linux/kprobes.h>
29 30 31
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
32
#include <linux/splice.h>
33
#include <linux/kdebug.h>
34
#include <linux/string.h>
35
#include <linux/mount.h>
36
#include <linux/rwsem.h>
37
#include <linux/slab.h>
38 39
#include <linux/ctype.h>
#include <linux/init.h>
40
#include <linux/poll.h>
41
#include <linux/nmi.h>
42
#include <linux/fs.h>
43
#include <linux/sched/rt.h>
Ingo Molnar's avatar
Ingo Molnar committed
44

45
#include "trace.h"
46
#include "trace_output.h"
47

48 49 50 51
/*
 * On boot up, the ring buffer is set to the minimum size, so that
 * we do not waste memory on systems that are not using tracing.
 */
52
bool ring_buffer_expanded;
53

54 55
/*
 * We need to change this state when a selftest is running.
56 57
 * A selftest will lurk into the ring-buffer to count the
 * entries inserted during the selftest although some concurrent
58
 * insertions into the ring-buffer such as trace_printk could occurred
59 60
 * at the same time, giving false positive or negative results.
 */
61
static bool __read_mostly tracing_selftest_running;
62

63 64 65
/*
 * If a tracer is running, we do not want to run SELFTEST.
 */
66
bool __read_mostly tracing_selftest_disabled;
67

68 69 70 71
/* Pipe tracepoints to printk */
struct trace_iterator *tracepoint_print_iter;
int tracepoint_printk;

72 73 74 75 76
/* For tracers that don't implement custom flags */
static struct tracer_opt dummy_tracer_opt[] = {
	{ }
};

77 78
static int
dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
79 80 81
{
	return 0;
}
82

83 84 85 86 87 88 89
/*
 * To prevent the comm cache from being overwritten when no
 * tracing is active, only save the comm when a trace event
 * occurred.
 */
static DEFINE_PER_CPU(bool, trace_cmdline_save);

90 91 92 93 94 95
/*
 * Kill all tracing for good (never come back).
 * It is initialized to 1 but will turn to zero if the initialization
 * of the tracer is successful. But that is the only place that sets
 * this back to zero.
 */
96
static int tracing_disabled = 1;
97

98
cpumask_var_t __read_mostly	tracing_buffer_mask;
99

100 101 102 103 104 105 106 107 108 109 110
/*
 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
 *
 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
 * is set, then ftrace_dump is called. This will output the contents
 * of the ftrace buffers to the console.  This is very useful for
 * capturing traces that lead to crashes and outputing it to a
 * serial console.
 *
 * It is default off, but you can enable it with either specifying
 * "ftrace_dump_on_oops" in the kernel command line, or setting
111 112 113
 * /proc/sys/kernel/ftrace_dump_on_oops
 * Set 1 if you want to dump buffers of all CPUs
 * Set 2 if you want to dump the buffer of the CPU that triggered oops
114
 */
115 116

enum ftrace_dump_mode ftrace_dump_on_oops;
117

118 119 120
/* When set, tracing will stop when a WARN*() is hit */
int __disable_trace_on_warning;

121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
#ifdef CONFIG_TRACE_ENUM_MAP_FILE
/* Map of enums to their values, for "enum_map" file */
struct trace_enum_map_head {
	struct module			*mod;
	unsigned long			length;
};

union trace_enum_map_item;

struct trace_enum_map_tail {
	/*
	 * "end" is first and points to NULL as it must be different
	 * than "mod" or "enum_string"
	 */
	union trace_enum_map_item	*next;
	const char			*end;	/* points to NULL */
};

static DEFINE_MUTEX(trace_enum_mutex);

/*
 * The trace_enum_maps are saved in an array with two extra elements,
 * one at the beginning, and one at the end. The beginning item contains
 * the count of the saved maps (head.length), and the module they
 * belong to if not built in (head.mod). The ending item contains a
 * pointer to the next array of saved enum_map items.
 */
union trace_enum_map_item {
	struct trace_enum_map		map;
	struct trace_enum_map_head	head;
	struct trace_enum_map_tail	tail;
};

static union trace_enum_map_item *trace_enum_maps;
#endif /* CONFIG_TRACE_ENUM_MAP_FILE */

157
static int tracing_set_tracer(struct trace_array *tr, const char *buf);
158

159 160
#define MAX_TRACER_SIZE		100
static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
161
static char *default_bootup_tracer;
162

163 164
static bool allocate_snapshot;

165
static int __init set_cmdline_ftrace(char *str)
166
{
167
	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
168
	default_bootup_tracer = bootup_tracer_buf;
169
	/* We are using ftrace early, expand it */
170
	ring_buffer_expanded = true;
171 172
	return 1;
}
173
__setup("ftrace=", set_cmdline_ftrace);
174

175 176
static int __init set_ftrace_dump_on_oops(char *str)
{
177 178 179 180 181 182 183 184 185 186 187
	if (*str++ != '=' || !*str) {
		ftrace_dump_on_oops = DUMP_ALL;
		return 1;
	}

	if (!strcmp("orig_cpu", str)) {
		ftrace_dump_on_oops = DUMP_ORIG;
                return 1;
        }

        return 0;
188 189
}
__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
Steven Rostedt's avatar
Steven Rostedt committed
190

191 192
static int __init stop_trace_on_warning(char *str)
{
193 194
	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
		__disable_trace_on_warning = 1;
195 196
	return 1;
}
197
__setup("traceoff_on_warning", stop_trace_on_warning);
198

199
static int __init boot_alloc_snapshot(char *str)
200 201 202 203 204 205
{
	allocate_snapshot = true;
	/* We also need the main ring buffer expanded */
	ring_buffer_expanded = true;
	return 1;
}
206
__setup("alloc_snapshot", boot_alloc_snapshot);
207

208 209 210 211 212

static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;

static int __init set_trace_boot_options(char *str)
{
213
	strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
214 215 216 217
	return 0;
}
__setup("trace_options=", set_trace_boot_options);

218 219 220 221 222 223 224 225 226 227 228
static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
static char *trace_boot_clock __initdata;

static int __init set_trace_boot_clock(char *str)
{
	strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
	trace_boot_clock = trace_boot_clock_buf;
	return 0;
}
__setup("trace_clock=", set_trace_boot_clock);

229 230 231 232 233 234 235
static int __init set_tracepoint_printk(char *str)
{
	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
		tracepoint_printk = 1;
	return 1;
}
__setup("tp_printk", set_tracepoint_printk);
236

237
unsigned long long ns2usecs(cycle_t nsec)
238 239 240 241 242 243
{
	nsec += 500;
	do_div(nsec, 1000);
	return nsec;
}

244 245 246 247 248 249 250 251
/* trace_flags holds trace_options default values */
#define TRACE_DEFAULT_FLAGS						\
	(FUNCTION_DEFAULT_FLAGS |					\
	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)

252 253 254 255
/* trace_options that are only supported by global_trace */
#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)

256 257 258
/* trace_flags that are default zero for instances */
#define ZEROED_TRACE_FLAGS \
	TRACE_ITER_EVENT_FORK
259

Steven Rostedt's avatar
Steven Rostedt committed
260 261 262 263 264 265 266 267 268 269 270 271
/*
 * The global_trace is the descriptor that holds the tracing
 * buffers for the live tracing. For each CPU, it contains
 * a link list of pages that will store trace entries. The
 * page descriptor of the pages in the memory is used to hold
 * the link list by linking the lru item in the page descriptor
 * to each of the pages in the buffer per CPU.
 *
 * For each active CPU there is a data field that holds the
 * pages for the buffer for that CPU. Each CPU has the same number
 * of pages allocated for its buffer.
 */
272 273 274
static struct trace_array global_trace = {
	.trace_flags = TRACE_DEFAULT_FLAGS,
};
275

276
LIST_HEAD(ftrace_trace_arrays);
277

278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
int trace_array_get(struct trace_array *this_tr)
{
	struct trace_array *tr;
	int ret = -ENODEV;

	mutex_lock(&trace_types_lock);
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		if (tr == this_tr) {
			tr->ref++;
			ret = 0;
			break;
		}
	}
	mutex_unlock(&trace_types_lock);

	return ret;
}

static void __trace_array_put(struct trace_array *this_tr)
{
	WARN_ON(!this_tr->ref);
	this_tr->ref--;
}

void trace_array_put(struct trace_array *this_tr)
{
	mutex_lock(&trace_types_lock);
	__trace_array_put(this_tr);
	mutex_unlock(&trace_types_lock);
}

309
int call_filter_check_discard(struct trace_event_call *call, void *rec,
310 311 312 313 314 315 316 317 318 319
			      struct ring_buffer *buffer,
			      struct ring_buffer_event *event)
{
	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
	    !filter_match_preds(call->filter, rec)) {
		ring_buffer_discard_commit(buffer, event);
		return 1;
	}

	return 0;
320 321
}

322
static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
323 324 325 326
{
	u64 ts;

	/* Early boot up does not have a buffer yet */
327
	if (!buf->buffer)
328 329
		return trace_clock_local();

330 331
	ts = ring_buffer_time_stamp(buf->buffer, cpu);
	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
332 333 334

	return ts;
}
335

336 337 338 339 340
cycle_t ftrace_now(int cpu)
{
	return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
}

341 342 343 344 345 346 347 348 349
/**
 * tracing_is_enabled - Show if global_trace has been disabled
 *
 * Shows if the global trace has been enabled or not. It uses the
 * mirror flag "buffer_disabled" to be used in fast paths such as for
 * the irqsoff tracer. But it may be inaccurate due to races. If you
 * need to know the accurate state, use tracing_is_on() which is a little
 * slower, but accurate.
 */
350 351
int tracing_is_enabled(void)
{
352 353 354 355 356 357 358
	/*
	 * For quick access (irqsoff uses this in fast path), just
	 * return the mirror variable of the state of the ring buffer.
	 * It's a little racy, but we don't really care.
	 */
	smp_rmb();
	return !global_trace.buffer_disabled;
359 360
}

Steven Rostedt's avatar
Steven Rostedt committed
361
/*
362 363 364
 * trace_buf_size is the size in bytes that is allocated
 * for a buffer. Note, the number of bytes is always rounded
 * to page size.
365 366 367 368 369
 *
 * This number is purposely set to a low number of 16384.
 * If the dump on oops happens, it will be much appreciated
 * to not have to wait for all that output. Anyway this can be
 * boot time and run time configurable.
Steven Rostedt's avatar
Steven Rostedt committed
370
 */
371
#define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
372

373
static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
374

Steven Rostedt's avatar
Steven Rostedt committed
375
/* trace_types holds a link list of available tracers. */
376
static struct tracer		*trace_types __read_mostly;
Steven Rostedt's avatar
Steven Rostedt committed
377 378 379 380

/*
 * trace_types_lock is used to protect the trace_types list.
 */
381
DEFINE_MUTEX(trace_types_lock);
Steven Rostedt's avatar
Steven Rostedt committed
382

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
/*
 * serialize the access of the ring buffer
 *
 * ring buffer serializes readers, but it is low level protection.
 * The validity of the events (which returns by ring_buffer_peek() ..etc)
 * are not protected by ring buffer.
 *
 * The content of events may become garbage if we allow other process consumes
 * these events concurrently:
 *   A) the page of the consumed events may become a normal page
 *      (not reader page) in ring buffer, and this page will be rewrited
 *      by events producer.
 *   B) The page of the consumed events may become a page for splice_read,
 *      and this page will be returned to system.
 *
 * These primitives allow multi process access to different cpu ring buffer
 * concurrently.
 *
 * These primitives don't distinguish read-only and read-consume access.
 * Multi read-only access are also serialized.
 */

#ifdef CONFIG_SMP
static DECLARE_RWSEM(all_cpu_access_lock);
static DEFINE_PER_CPU(struct mutex, cpu_access_lock);

static inline void trace_access_lock(int cpu)
{
411
	if (cpu == RING_BUFFER_ALL_CPUS) {
412 413 414 415 416
		/* gain it for accessing the whole ring buffer. */
		down_write(&all_cpu_access_lock);
	} else {
		/* gain it for accessing a cpu ring buffer. */

417
		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
418 419 420 421 422 423 424 425 426
		down_read(&all_cpu_access_lock);

		/* Secondly block other access to this @cpu ring buffer. */
		mutex_lock(&per_cpu(cpu_access_lock, cpu));
	}
}

static inline void trace_access_unlock(int cpu)
{
427
	if (cpu == RING_BUFFER_ALL_CPUS) {
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
		up_write(&all_cpu_access_lock);
	} else {
		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
		up_read(&all_cpu_access_lock);
	}
}

static inline void trace_access_lock_init(void)
{
	int cpu;

	for_each_possible_cpu(cpu)
		mutex_init(&per_cpu(cpu_access_lock, cpu));
}

#else

static DEFINE_MUTEX(access_lock);

static inline void trace_access_lock(int cpu)
{
	(void)cpu;
	mutex_lock(&access_lock);
}

static inline void trace_access_unlock(int cpu)
{
	(void)cpu;
	mutex_unlock(&access_lock);
}

static inline void trace_access_lock_init(void)
{
}

#endif

465 466 467 468
#ifdef CONFIG_STACKTRACE
static void __ftrace_trace_stack(struct ring_buffer *buffer,
				 unsigned long flags,
				 int skip, int pc, struct pt_regs *regs);
469 470
static inline void ftrace_trace_stack(struct trace_array *tr,
				      struct ring_buffer *buffer,
471 472
				      unsigned long flags,
				      int skip, int pc, struct pt_regs *regs);
473

474 475 476 477 478 479
#else
static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
					unsigned long flags,
					int skip, int pc, struct pt_regs *regs)
{
}
480 481
static inline void ftrace_trace_stack(struct trace_array *tr,
				      struct ring_buffer *buffer,
482 483
				      unsigned long flags,
				      int skip, int pc, struct pt_regs *regs)
484 485 486
{
}

487 488
#endif

489
static void tracer_tracing_on(struct trace_array *tr)
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
{
	if (tr->trace_buffer.buffer)
		ring_buffer_record_on(tr->trace_buffer.buffer);
	/*
	 * This flag is looked at when buffers haven't been allocated
	 * yet, or by some tracers (like irqsoff), that just want to
	 * know if the ring buffer has been disabled, but it can handle
	 * races of where it gets disabled but we still do a record.
	 * As the check is in the fast path of the tracers, it is more
	 * important to be fast than accurate.
	 */
	tr->buffer_disabled = 0;
	/* Make the flag seen by readers */
	smp_wmb();
}

506 507 508 509 510 511 512 513
/**
 * tracing_on - enable tracing buffers
 *
 * This function enables tracing buffers that may have been
 * disabled with tracing_off.
 */
void tracing_on(void)
{
514
	tracer_tracing_on(&global_trace);
515 516 517
}
EXPORT_SYMBOL_GPL(tracing_on);

518 519 520 521 522 523 524 525 526 527 528 529 530
/**
 * __trace_puts - write a constant string into the trace buffer.
 * @ip:	   The address of the caller
 * @str:   The constant string to write
 * @size:  The size of the string.
 */
int __trace_puts(unsigned long ip, const char *str, int size)
{
	struct ring_buffer_event *event;
	struct ring_buffer *buffer;
	struct print_entry *entry;
	unsigned long irq_flags;
	int alloc;
531 532
	int pc;

533
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
534 535
		return 0;

536
	pc = preempt_count();
537

538 539 540
	if (unlikely(tracing_selftest_running || tracing_disabled))
		return 0;

541 542 543 544 545
	alloc = sizeof(*entry) + size + 2; /* possible \n added */

	local_save_flags(irq_flags);
	buffer = global_trace.trace_buffer.buffer;
	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
546
					  irq_flags, pc);
547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562
	if (!event)
		return 0;

	entry = ring_buffer_event_data(event);
	entry->ip = ip;

	memcpy(&entry->buf, str, size);

	/* Add a newline if necessary */
	if (entry->buf[size - 1] != '\n') {
		entry->buf[size] = '\n';
		entry->buf[size + 1] = '\0';
	} else
		entry->buf[size] = '\0';

	__buffer_unlock_commit(buffer, event);
563
	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580

	return size;
}
EXPORT_SYMBOL_GPL(__trace_puts);

/**
 * __trace_bputs - write the pointer to a constant string into trace buffer
 * @ip:	   The address of the caller
 * @str:   The constant string to write to the buffer to
 */
int __trace_bputs(unsigned long ip, const char *str)
{
	struct ring_buffer_event *event;
	struct ring_buffer *buffer;
	struct bputs_entry *entry;
	unsigned long irq_flags;
	int size = sizeof(struct bputs_entry);
581 582
	int pc;

583
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
584 585
		return 0;

586
	pc = preempt_count();
587

588 589 590
	if (unlikely(tracing_selftest_running || tracing_disabled))
		return 0;

591 592 593
	local_save_flags(irq_flags);
	buffer = global_trace.trace_buffer.buffer;
	event = trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
594
					  irq_flags, pc);
595 596 597 598 599 600 601 602
	if (!event)
		return 0;

	entry = ring_buffer_event_data(event);
	entry->ip			= ip;
	entry->str			= str;

	__buffer_unlock_commit(buffer, event);
603
	ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
604 605 606 607 608

	return 1;
}
EXPORT_SYMBOL_GPL(__trace_bputs);

609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
#ifdef CONFIG_TRACER_SNAPSHOT
/**
 * trace_snapshot - take a snapshot of the current buffer.
 *
 * This causes a swap between the snapshot buffer and the current live
 * tracing buffer. You can use this to take snapshots of the live
 * trace when some condition is triggered, but continue to trace.
 *
 * Note, make sure to allocate the snapshot with either
 * a tracing_snapshot_alloc(), or by doing it manually
 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
 *
 * If the snapshot buffer is not allocated, it will stop tracing.
 * Basically making a permanent snapshot.
 */
void tracing_snapshot(void)
{
	struct trace_array *tr = &global_trace;
	struct tracer *tracer = tr->current_trace;
	unsigned long flags;

630 631 632 633 634 635
	if (in_nmi()) {
		internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
		internal_trace_puts("*** snapshot is being ignored        ***\n");
		return;
	}

636
	if (!tr->allocated_snapshot) {
637 638
		internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
		internal_trace_puts("*** stopping trace here!   ***\n");
639 640 641 642 643 644
		tracing_off();
		return;
	}

	/* Note, snapshot can not be used when the tracer uses it */
	if (tracer->use_max_tr) {
645 646
		internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
		internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
647 648 649 650 651 652 653
		return;
	}

	local_irq_save(flags);
	update_max_tr(tr, current, smp_processor_id());
	local_irq_restore(flags);
}
654
EXPORT_SYMBOL_GPL(tracing_snapshot);
655 656 657

static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
					struct trace_buffer *size_buf, int cpu_id);
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);

static int alloc_snapshot(struct trace_array *tr)
{
	int ret;

	if (!tr->allocated_snapshot) {

		/* allocate spare buffer */
		ret = resize_buffer_duplicate_size(&tr->max_buffer,
				   &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
		if (ret < 0)
			return ret;

		tr->allocated_snapshot = true;
	}

	return 0;
}

678
static void free_snapshot(struct trace_array *tr)
679 680 681 682 683 684 685 686 687 688 689
{
	/*
	 * We don't free the ring buffer. instead, resize it because
	 * The max_tr ring buffer has some state (e.g. ring->clock) and
	 * we want preserve it.
	 */
	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
	set_buffer_entries(&tr->max_buffer, 1);
	tracing_reset_online_cpus(&tr->max_buffer);
	tr->allocated_snapshot = false;
}
690

691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
/**
 * tracing_alloc_snapshot - allocate snapshot buffer.
 *
 * This only allocates the snapshot buffer if it isn't already
 * allocated - it doesn't also take a snapshot.
 *
 * This is meant to be used in cases where the snapshot buffer needs
 * to be set up for events that can't sleep but need to be able to
 * trigger a snapshot.
 */
int tracing_alloc_snapshot(void)
{
	struct trace_array *tr = &global_trace;
	int ret;

	ret = alloc_snapshot(tr);
	WARN_ON(ret < 0);

	return ret;
}
EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);

713 714 715 716 717 718 719 720 721 722 723 724 725 726 727
/**
 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
 *
 * This is similar to trace_snapshot(), but it will allocate the
 * snapshot buffer if it isn't already allocated. Use this only
 * where it is safe to sleep, as the allocation may sleep.
 *
 * This causes a swap between the snapshot buffer and the current live
 * tracing buffer. You can use this to take snapshots of the live
 * trace when some condition is triggered, but continue to trace.
 */
void tracing_snapshot_alloc(void)
{
	int ret;

728 729
	ret = tracing_alloc_snapshot();
	if (ret < 0)
730
		return;
731 732 733

	tracing_snapshot();
}
734
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
735 736 737 738 739
#else
void tracing_snapshot(void)
{
	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
}
740
EXPORT_SYMBOL_GPL(tracing_snapshot);
741 742 743 744 745 746
int tracing_alloc_snapshot(void)
{
	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
	return -ENODEV;
}
EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
747 748 749 750 751
void tracing_snapshot_alloc(void)
{
	/* Give warning */
	tracing_snapshot();
}
752
EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
753 754
#endif /* CONFIG_TRACER_SNAPSHOT */

755
static void tracer_tracing_off(struct trace_array *tr)
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
{
	if (tr->trace_buffer.buffer)
		ring_buffer_record_off(tr->trace_buffer.buffer);
	/*
	 * This flag is looked at when buffers haven't been allocated
	 * yet, or by some tracers (like irqsoff), that just want to
	 * know if the ring buffer has been disabled, but it can handle
	 * races of where it gets disabled but we still do a record.
	 * As the check is in the fast path of the tracers, it is more
	 * important to be fast than accurate.
	 */
	tr->buffer_disabled = 1;
	/* Make the flag seen by readers */
	smp_wmb();
}

772 773 774 775 776 777 778 779 780 781
/**
 * tracing_off - turn off tracing buffers
 *
 * This function stops the tracing buffers from recording data.
 * It does not disable any overhead the tracers themselves may
 * be causing. This function simply causes all recording to
 * the ring buffers to fail.
 */
void tracing_off(void)
{
782
	tracer_tracing_off(&global_trace);
783 784 785
}
EXPORT_SYMBOL_GPL(tracing_off);

786 787 788 789 790 791
void disable_trace_on_warning(void)
{
	if (__disable_trace_on_warning)
		tracing_off();
}

792 793 794 795 796 797
/**
 * tracer_tracing_is_on - show real state of ring buffer enabled
 * @tr : the trace array to know if ring buffer is enabled
 *
 * Shows real state of the ring buffer if it is enabled or not.
 */
798
static int tracer_tracing_is_on(struct trace_array *tr)
799 800 801 802 803 804
{
	if (tr->trace_buffer.buffer)
		return ring_buffer_record_is_on(tr->trace_buffer.buffer);
	return !tr->buffer_disabled;
}

805 806 807 808 809
/**
 * tracing_is_on - show state of ring buffers enabled
 */
int tracing_is_on(void)
{
810
	return tracer_tracing_is_on(&global_trace);
811 812 813
}
EXPORT_SYMBOL_GPL(tracing_is_on);

814
static int __init set_buf_size(char *str)
815
{
816
	unsigned long buf_size;
817

818 819
	if (!str)
		return 0;
820
	buf_size = memparse(str, &str);
821
	/* nr_entries can not be zero */
822
	if (buf_size == 0)
823
		return 0;
824
	trace_buf_size = buf_size;
825 826
	return 1;
}
827
__setup("trace_buf_size=", set_buf_size);
828

829 830
static int __init set_tracing_thresh(char *str)
{
831
	unsigned long threshold;
832 833 834 835
	int ret;

	if (!str)
		return 0;
836
	ret = kstrtoul(str, 0, &threshold);
837 838
	if (ret < 0)
		return 0;
839
	tracing_thresh = threshold * 1000;
840 841 842 843
	return 1;
}
__setup("tracing_thresh=", set_tracing_thresh);

Steven Rostedt's avatar
Steven Rostedt committed
844 845 846 847 848
unsigned long nsecs_to_usecs(unsigned long nsecs)
{
	return nsecs / 1000;
}

849 850 851 852 853 854 855 856 857
/*
 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
 * It uses C(a, b) where 'a' is the enum name and 'b' is the string that
 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
 * of strings in the order that the enums were defined.
 */
#undef C
#define C(a, b) b

Steven Rostedt's avatar
Steven Rostedt committed
858
/* These must match the bit postions in trace_iterator_flags */
859
static const char *trace_options[] = {
860
	TRACE_FLAGS
861 862 863
	NULL
};

864 865 866
static struct {
	u64 (*func)(void);
	const char *name;
867
	int in_ns;		/* is this clock in nanoseconds? */
868
} trace_clocks[] = {
869 870 871
	{ trace_clock_local,		"local",	1 },
	{ trace_clock_global,		"global",	1 },
	{ trace_clock_counter,		"counter",	0 },
872
	{ trace_clock_jiffies,		"uptime",	0 },
873 874
	{ trace_clock,			"perf",		1 },
	{ ktime_get_mono_fast_ns,	"mono",		1 },
875
	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
876
	ARCH_TRACE_CLOCKS
877 878
};

879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
/*
 * trace_parser_get_init - gets the buffer for trace parser
 */
int trace_parser_get_init(struct trace_parser *parser, int size)
{
	memset(parser, 0, sizeof(*parser));

	parser->buffer = kmalloc(size, GFP_KERNEL);
	if (!parser->buffer)
		return 1;

	parser->size = size;
	return 0;
}

/*
 * trace_parser_put - frees the buffer for trace parser
 */
void trace_parser_put(struct trace_parser *parser)
{
	kfree(parser->buffer);
}

/*
 * trace_get_user - reads the user input string separated by  space
 * (matched by isspace(ch))
 *
 * For each string found the 'struct trace_parser' is updated,
 * and the function returns.
 *
 * Returns number of bytes read.
 *
 * See kernel/trace/trace.h for 'struct trace_parser' details.
 */
int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
	size_t cnt, loff_t *ppos)
{
	char ch;
	size_t read = 0;
	ssize_t ret;

	if (!*ppos)
		trace_parser_clear(parser);

	ret = get_user(ch, ubuf++);
	if (ret)
		goto out;

	read++;
	cnt--;

	/*
	 * The parser is not finished with the last write,
	 * continue reading the user input without skipping spaces.
	 */
	if (!parser->cont) {
		/* skip white space */
		while (cnt && isspace(ch)) {
			ret = get_user(ch, ubuf++);
			if (ret)
				goto out;
			read++;
			cnt--;
		}

		/* only spaces were written */
		if (isspace(ch)) {
			*ppos += read;
			ret = read;
			goto out;
		}

		parser->idx = 0;
	}

	/* read the non-space input */
	while (cnt && !isspace(ch)) {
956
		if (parser->idx < parser->size - 1)
957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
			parser->buffer[parser->idx++] = ch;
		else {
			ret = -EINVAL;
			goto out;
		}
		ret = get_user(ch, ubuf++);
		if (ret)
			goto out;
		read++;
		cnt--;
	}

	/* We either got finished input or we have to wait for another call. */
	if (isspace(ch)) {
		parser->buffer[parser->idx] = 0;
		parser->cont = false;
973
	} else if (parser->idx < parser->size - 1) {
974 975
		parser->cont = true;
		parser->buffer[parser->idx++] = ch;
976 977 978
	} else {
		ret = -EINVAL;
		goto out;
979 980 981 982 983 984 985 986 987
	}

	*ppos += read;
	ret = read;

out:
	return ret;
}

988
/* TODO add a seq_buf_to_buffer() */
989
static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
990 991 992
{
	int len;

993
	if (trace_seq_used(s) <= s->seq.readpos)
994 995
		return -EBUSY;

996
	len = trace_seq_used(s) - s->seq.readpos;
997 998
	if (cnt > len)
		cnt = len;
999
	memcpy(buf, s->buffer + s->seq.readpos, cnt);
1000

1001
	s->seq.readpos += cnt;
1002 1003 1004
	return cnt;
}

1005 1006
unsigned long __read_mostly	tracing_thresh;

1007 1008 1009 1010 1011 1012 1013 1014 1015
#ifdef CONFIG_TRACER_MAX_TRACE
/*
 * Copy the new maximum trace into the separate maximum-trace
 * structure. (this way the maximum trace is permanently saved,
 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
 */
static void
__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
1016 1017 1018 1019
	struct trace_buffer *trace_buf = &tr->trace_buffer;
	struct trace_buffer *max_buf = &tr->max_buffer;
	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1020

1021 1022
	max_buf->cpu = cpu;
	max_buf->time_start = data->preempt_timestamp;
1023

1024
	max_data->saved_latency = tr->max_latency;
1025 1026
	max_data->critical_start = data->critical_start;
	max_data->critical_end = data->critical_end;
1027

1028
	memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1029
	max_data->pid = tsk->pid;
1030 1031 1032 1033 1034 1035 1036 1037 1038
	/*
	 * If tsk == current, then use current_uid(), as that does not use
	 * RCU. The irq tracer can be called out of RCU scope.
	 */
	if (tsk == current)
		max_data->uid = current_uid();
	else
		max_data->uid = task_uid(tsk);

1039 1040 1041
	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
	max_data->policy = tsk->policy;
	max_data->rt_priority = tsk->rt_priority;
1042 1043 1044 1045 1046

	/* record this tasks comm */
	tracing_record_cmdline(tsk);
}

Steven Rostedt's avatar
Steven Rostedt committed
1047 1048 1049 1050 1051 1052 1053 1054 1055
/**
 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
 * @tr: tracer
 * @tsk: the task with the latency
 * @cpu: The cpu that initiated the trace.
 *
 * Flip the buffers between the @tr and the max_tr and record information
 * about which task was the cause of this latency.
 */
Ingo Molnar's avatar
Ingo Molnar committed
1056
void
1057 1058
update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
1059
	struct ring_buffer *buf;
1060

1061
	if (tr->stop_count)
1062 1063
		return;

1064
	WARN_ON_ONCE(!irqs_disabled());
1065

1066
	if (!tr->allocated_snapshot) {
1067
		/* Only the nop tracer should hit this when disabling */
1068
		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1069
		return;
1070
	}
1071

1072
	arch_spin_lock(&tr->max_lock);
1073

1074 1075 1076
	buf = tr->trace_buffer.buffer;
	tr->trace_buffer.buffer = tr->max_buffer.buffer;
	tr->max_buffer.buffer = buf;
1077

1078
	__update_max_tr(tr, tsk, cpu);
1079
	arch_spin_unlock(&tr->max_lock);
1080 1081 1082 1083 1084 1085 1086
}

/**
 * update_max_tr_single - only copy one trace over, and reset the rest
 * @tr - tracer
 * @tsk - task with the latency
 * @cpu - the cpu of the buffer to copy.
Steven Rostedt's avatar
Steven Rostedt committed
1087 1088
 *
 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1089
 */
Ingo Molnar's avatar
Ingo Molnar committed
1090
void
1091 1092
update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
{
1093
	int ret;
1094

1095
	if (tr->stop_count)
1096 1097
		return;

1098
	WARN_ON_ONCE(!irqs_disabled());
1099
	if (!tr->allocated_snapshot) {
1100
		/* Only the nop tracer should hit this when disabling */
1101
		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1102
		return;
1103
	}
1104

1105
	arch_spin_lock(&tr->max_lock);
1106

1107
	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1108

1109 1110 1111 1112 1113 1114 1115
	if (ret == -EBUSY) {
		/*
		 * We failed to swap the buffer due to a commit taking
		 * place on this CPU. We fail to record, but we reset
		 * the max trace buffer (no one writes directly to it)
		 * and flag that it failed.
		 */
1116
		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1117 1118 1119 1120
			"Failed to swap buffers due to commit in progress\n");
	}

	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1121 1122

	__update_max_tr(tr, tsk, cpu);
1123
	arch_spin_unlock(&tr->max_lock);
1124
}
1125
#endif /* CONFIG_TRACER_MAX_TRACE */
1126

1127
static int wait_on_pipe(struct trace_iterator *iter, bool full)
1128
{
1129 1130
	/* Iterators are static, they should be filled or empty */
	if (trace_buffer_iter(iter, iter->cpu_file))
1131
		return 0;
1132

1133 1134
	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
				full);
1135 1136
}

1137 1138 1139 1140 1141 1142
#ifdef CONFIG_FTRACE_STARTUP_TEST
static int run_tracer_selftest(struct tracer *type)
{
	struct trace_array *tr = &global_trace;
	struct tracer *saved_tracer = tr->current_trace;
	int ret;
1143

1144 1145
	if (!type->selftest || tracing_selftest_disabled)
		return 0;
1146 1147

	/*
1148 1149 1150 1151 1152
	 * Run a selftest on this tracer.
	 * Here we reset the trace buffer, and set the current
	 * tracer to be this tracer. The tracer can then run some
	 * internal tracing to verify that everything is in order.
	 * If we fail, we do not register this tracer.
1153
	 */
1154
	tracing_reset_online_cpus(&tr->trace_buffer);
1155

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
	tr->current_trace = type;

#ifdef CONFIG_TRACER_MAX_TRACE
	if (type->use_max_tr) {
		/* If we expanded the buffers, make sure the max is expanded too */
		if (ring_buffer_expanded)
			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
					   RING_BUFFER_ALL_CPUS);
		tr->allocated_snapshot = true;
	}
#endif

	/* the test is responsible for initializing and enabling */
	pr_info("Testing tracer %s: ", type->name);
	ret = type->selftest(type, tr);
	/* the test is responsible for resetting too */
	tr->current_trace = saved_tracer;
	if (ret) {
		printk(KERN_CONT "FAILED!\n");
		/* Add the warning after printing 'FAILED' */
		WARN_ON(1);
		return -1;
	}
	/* Only reset on passing, to avoid touching corrupted buffers */
	tracing_reset_online_cpus(&tr->trace_buffer);

#ifdef CONFIG_TRACER_MAX_TRACE
	if (type->use_max_tr) {
		tr->allocated_snapshot = false;
1185

1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
		/* Shrink the max buffer again */
		if (ring_buffer_expanded)
			ring_buffer_resize(tr->max_buffer.buffer, 1,
					   RING_BUFFER_ALL_CPUS);
	}
#endif

	printk(KERN_CONT "PASSED\n");
	return 0;
}
#else
static inline int run_tracer_selftest(struct tracer *type)
{
	return 0;
1200
}
1201
#endif /* CONFIG_FTRACE_STARTUP_TEST */
1202

1203 1204
static void add_tracer_options(struct trace_array *tr, struct tracer *t);

1205 1206
static void __init apply_trace_boot_options(void);

Steven Rostedt's avatar
Steven Rostedt committed
1207 1208 1209 1210 1211 1212
/**
 * register_tracer - register a tracer with the ftrace system.
 * @type - the plugin for the tracer
 *
 * Register a new plugin tracer.
 */
1213
int __init register_tracer(struct tracer *type)
1214 1215 1216 1217 1218 1219 1220 1221 1222
{
	struct tracer *t;
	int ret = 0;

	if (!type->name) {
		pr_info("Tracer must have a name\n");
		return -1;
	}

1223
	if (strlen(type->name) >= MAX_TRACER_SIZE) {
1224 1225 1226 1227
		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
		return -1;
	}

1228
	mutex_lock(&trace_types_lock);
Ingo Molnar's avatar
Ingo Molnar committed
1229

1230 1231
	tracing_selftest_running = true;

1232 1233 1234
	for (t = trace_types; t; t = t->next) {
		if (strcmp(type->name, t->name) == 0) {
			/* already found */
1235
			pr_info("Tracer %s already registered\n",
1236 1237 1238 1239 1240 1241
				type->name);
			ret = -1;
			goto out;
		}
	}

1242 1243
	if (!type->set_flag)
		type->set_flag = &dummy_set_flag;
1244 1245 1246
	if (!type->flags) {
		/*allocate a dummy tracer_flags*/
		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1247 1248 1249 1250
		if (!type->flags) {
			ret = -ENOMEM;
			goto out;
		}
1251 1252 1253
		type->flags->val = 0;
		type->flags->opts = dummy_tracer_opt;
	} else
1254 1255
		if (!type->flags->opts)
			type->flags->opts = dummy_tracer_opt;
1256

1257 1258 1259
	/* store the tracer for __set_tracer_option */
	type->flags->trace = type;

1260 1261 1262
	ret = run_tracer_selftest(type);
	if (ret < 0)
		goto out;
Steven Rostedt's avatar
Steven Rostedt committed
1263

1264 1265
	type->next = trace_types;
	trace_types = type;
1266
	add_tracer_options(&global_trace, type);
Steven Rostedt's avatar
Steven Rostedt committed
1267

1268
 out:
1269
	tracing_selftest_running = false;
1270 1271
	mutex_unlock(&trace_types_lock);

Steven Rostedt's avatar
Steven Rostedt committed
1272 1273 1274
	if (ret || !default_bootup_tracer)
		goto out_unlock;

1275
	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
Steven Rostedt's avatar
Steven Rostedt committed
1276 1277 1278 1279
		goto out_unlock;

	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
	/* Do we want this tracer to start on bootup? */
1280
	tracing_set_tracer(&global_trace, type->name);
Steven Rostedt's avatar
Steven Rostedt committed
1281
	default_bootup_tracer = NULL;
1282 1283 1284

	apply_trace_boot_options();

Steven Rostedt's avatar
Steven Rostedt committed
1285
	/* disable other selftests, since this will break it. */
1286
	tracing_selftest_disabled = true;
1287
#ifdef CONFIG_FTRACE_STARTUP_TEST
Steven Rostedt's avatar
Steven Rostedt committed
1288 1289
	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
	       type->name);
1290 1291
#endif

Steven Rostedt's avatar
Steven Rostedt committed
1292
 out_unlock:
1293 1294 1295
	return ret;
}

1296
void tracing_reset(struct trace_buffer *buf, int cpu)
1297
{
1298
	struct ring_buffer *buffer = buf->buffer;
1299

1300 1301 1302
	if (!buffer)
		return;

1303 1304 1305 1306
	ring_buffer_record_disable(buffer);

	/* Make sure all commits have finished */
	synchronize_sched();
1307
	ring_buffer_reset_cpu(buffer, cpu);
1308 1309 1310 1311

	ring_buffer_record_enable(buffer);
}

1312
void tracing_reset_online_cpus(struct trace_buffer *buf)
1313
{
1314
	struct ring_buffer *buffer = buf->buffer;
1315 1316
	int cpu;

1317 1318 1319
	if (!buffer)
		return;

1320 1321 1322 1323 1324
	ring_buffer_record_disable(buffer);

	/* Make sure all commits have finished */
	synchronize_sched();

1325
	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1326 1327

	for_each_online_cpu(cpu)
1328
		ring_buffer_reset_cpu(buffer, cpu);
1329 1330

	ring_buffer_record_enable(buffer);
1331 1332
}

1333
/* Must have trace_types_lock held */
1334
void tracing_reset_all_online_cpus(void)
1335
{
1336 1337 1338
	struct trace_array *tr;

	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1339 1340 1341 1342
		tracing_reset_online_cpus(&tr->trace_buffer);
#ifdef CONFIG_TRACER_MAX_TRACE
		tracing_reset_online_cpus(&tr->max_buffer);
#endif
1343
	}
1344 1345
}

1346
#define SAVED_CMDLINES_DEFAULT 128
1347
#define NO_CMDLINE_MAP UINT_MAX
1348
static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1349 1350 1351 1352 1353 1354 1355 1356
struct saved_cmdlines_buffer {
	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
	unsigned *map_cmdline_to_pid;
	unsigned cmdline_num;
	int cmdline_idx;
	char *saved_cmdlines;
};
static struct saved_cmdlines_buffer *savedcmd;
1357 1358

/* temporary disable recording */
1359
static atomic_t trace_record_cmdline_disabled __read_mostly;
1360

1361 1362 1363 1364 1365 1366
static inline char *get_saved_cmdlines(int idx)
{
	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
}

static inline void set_cmdline(int idx, const char *cmdline)
1367
{
1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
	memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
}

static int allocate_cmdlines_buffer(unsigned int val,
				    struct saved_cmdlines_buffer *s)
{
	s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
					GFP_KERNEL);
	if (!s->map_cmdline_to_pid)
		return -ENOMEM;

	s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
	if (!s->saved_cmdlines) {
		kfree(s->map_cmdline_to_pid);
		return -ENOMEM;
	}

	s->cmdline_idx = 0;
	s->cmdline_num = val;
	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
	       sizeof(s->map_pid_to_cmdline));
	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
	       val * sizeof(*s->map_cmdline_to_pid));

	return 0;
}

static int trace_create_savedcmd(void)
{
	int ret;

1399
	savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410
	if (!savedcmd)
		return -ENOMEM;

	ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
	if (ret < 0) {
		kfree(savedcmd);
		savedcmd = NULL;
		return -ENOMEM;
	}

	return 0;
1411 1412
}

1413 1414
int is_tracing_stopped(void)
{
1415
	return global_trace.stop_count;
1416 1417
}

1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431
/**
 * tracing_start - quick start of the tracer
 *
 * If tracing is enabled but was stopped by tracing_stop,
 * this will start the tracer back up.
 */
void tracing_start(void)
{
	struct ring_buffer *buffer;
	unsigned long flags;

	if (tracing_disabled)
		return;

1432 1433 1434
	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
	if (--global_trace.stop_count) {
		if (global_trace.stop_count < 0) {
1435 1436
			/* Someone screwed up their debugging */
			WARN_ON_ONCE(1);
1437
			global_trace.stop_count = 0;
1438
		}
1439 1440 1441
		goto out;
	}

1442
	/* Prevent the buffers from switching */
1443
	arch_spin_lock(&global_trace.max_lock);
1444

1445
	buffer = global_trace.trace_buffer.buffer;
1446 1447 1448
	if (buffer)
		ring_buffer_record_enable(buffer);

1449 1450
#ifdef CONFIG_TRACER_MAX_TRACE
	buffer = global_trace.max_buffer.buffer;
1451 1452
	if (buffer)
		ring_buffer_record_enable(buffer);
1453
#endif
1454

1455
	arch_spin_unlock(&global_trace.max_lock);
1456

1457
 out:
1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483
	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
}

static void tracing_start_tr(struct trace_array *tr)
{
	struct ring_buffer *buffer;
	unsigned long flags;

	if (tracing_disabled)
		return;

	/* If global, we need to also start the max tracer */
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
		return tracing_start();

	raw_spin_lock_irqsave(&tr->start_lock, flags);

	if (--tr->stop_count) {
		if (tr->stop_count < 0) {
			/* Someone screwed up their debugging */
			WARN_ON_ONCE(1);
			tr->stop_count = 0;
		}
		goto out;
	}

1484
	buffer = tr->trace_buffer.buffer;
1485 1486 1487 1488 1489
	if (buffer)
		ring_buffer_record_enable(buffer);

 out:
	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502
}

/**
 * tracing_stop - quick stop of the tracer
 *
 * Light weight way to stop tracing. Use in conjunction with
 * tracing_start.
 */
void tracing_stop(void)
{
	struct ring_buffer *buffer;
	unsigned long flags;

1503 1504
	raw_spin_lock_irqsave(&global_trace.start_lock, flags);
	if (global_trace.stop_count++)
1505 1506
		goto out;

1507
	/* Prevent the buffers from switching */
1508
	arch_spin_lock(&global_trace.max_lock);
1509

1510
	buffer = global_trace.trace_buffer.buffer;
1511 1512 1513
	if (buffer)
		ring_buffer_record_disable(buffer);

1514 1515
#ifdef CONFIG_TRACER_MAX_TRACE
	buffer = global_trace.max_buffer.buffer;
1516 1517
	if (buffer)
		ring_buffer_record_disable(buffer);
1518
#endif
1519

1520
	arch_spin_unlock(&global_trace.max_lock);
1521

1522
 out:
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538
	raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
}

static void tracing_stop_tr(struct trace_array *tr)
{
	struct ring_buffer *buffer;
	unsigned long flags;

	/* If global, we need to also stop the max tracer */
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
		return tracing_stop();

	raw_spin_lock_irqsave(&tr->start_lock, flags);
	if (tr->stop_count++)
		goto out;

1539
	buffer = tr->trace_buffer.buffer;
1540 1541 1542 1543 1544
	if (buffer)
		ring_buffer_record_disable(buffer);

 out:
	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1545 1546
}

Ingo Molnar's avatar
Ingo Molnar committed
1547
void trace_stop_cmdline_recording(void);
1548

1549
static int trace_save_cmdline(struct task_struct *tsk)
1550
{
1551
	unsigned pid, idx;
1552 1553

	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1554
		return 0;
1555 1556 1557 1558 1559 1560 1561

	/*
	 * It's not the end of the world if we don't get
	 * the lock, but we also don't want to spin
	 * nor do we want to disable interrupts,
	 * so if we miss here, then better luck next time.
	 */
1562
	if (!arch_spin_trylock(&trace_cmdline_lock))
1563
		return 0;
1564

1565
	idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1566
	if (idx == NO_CMDLINE_MAP) {
1567
		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1568

1569 1570 1571 1572 1573 1574
		/*
		 * Check whether the cmdline buffer at idx has a pid
		 * mapped. We are going to overwrite that entry so we
		 * need to clear the map_pid_to_cmdline. Otherwise we
		 * would read the new comm for the old pid.
		 */
1575
		pid = savedcmd->map_cmdline_to_pid[idx];
1576
		if (pid != NO_CMDLINE_MAP)
1577
			savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1578

1579 1580
		savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
		savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1581

1582
		savedcmd->cmdline_idx = idx;
1583 1584
	}

1585
	set_cmdline(idx, tsk->comm);
1586

1587
	arch_spin_unlock(&trace_cmdline_lock);
1588 1589

	return 1;
1590 1591
}

1592
static void __trace_find_cmdline(int pid, char comm[])
1593 1594 1595
{
	unsigned map;

1596 1597 1598 1599
	if (!pid) {
		strcpy(comm, "<idle>");
		return;
	}
1600

1601 1602 1603 1604 1605
	if (WARN_ON_ONCE(pid < 0)) {
		strcpy(comm, "<XXX>");
		return;
	}

1606 1607 1608 1609
	if (pid > PID_MAX_DEFAULT) {
		strcpy(comm, "<...>");
		return;
	}
1610

1611
	map = savedcmd->map_pid_to_cmdline[pid];
1612
	if (map != NO_CMDLINE_MAP)
1613
		strcpy(comm, get_saved_cmdlines(map));
1614 1615
	else
		strcpy(comm, "<...>");
1616 1617 1618 1619 1620 1621 1622 1623
}

void trace_find_cmdline(int pid, char comm[])
{
	preempt_disable();
	arch_spin_lock(&trace_cmdline_lock);

	__trace_find_cmdline(pid, comm);
1624

1625
	arch_spin_unlock(&trace_cmdline_lock);
1626
	preempt_enable();
1627 1628
}

Ingo Molnar's avatar
Ingo Molnar committed
1629
void tracing_record_cmdline(struct task_struct *tsk)
1630
{
1631
	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1632 1633
		return;

1634 1635 1636
	if (!__this_cpu_read(trace_cmdline_save))
		return;

1637 1638
	if (trace_save_cmdline(tsk))
		__this_cpu_write(trace_cmdline_save, false);
1639 1640
}

1641
void
1642 1643
tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
			     int pc)
1644 1645 1646
{
	struct task_struct *tsk = current;

1647 1648 1649
	entry->preempt_count		= pc & 0xff;
	entry->pid			= (tsk) ? tsk->pid : 0;
	entry->flags =
1650
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
1651
		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
1652 1653 1654
#else
		TRACE_FLAG_IRQS_NOSUPPORT |
#endif
1655
		((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
1656 1657
		((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
		((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
1658 1659
		(tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
		(test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
1660
}
1661
EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
1662

1663 1664 1665 1666 1667
struct ring_buffer_event *
trace_buffer_lock_reserve(struct ring_buffer *buffer,
			  int type,
			  unsigned long len,
			  unsigned long flags, int pc)
1668 1669 1670
{
	struct ring_buffer_event *event;

1671
	event = ring_buffer_lock_reserve(buffer, len);
1672 1673 1674 1675 1676 1677 1678 1679 1680 1681
	if (event != NULL) {
		struct trace_entry *ent = ring_buffer_event_data(event);

		tracing_generic_entry_update(ent, flags, pc);
		ent->type = type;
	}

	return event;
}

1682 1683 1684 1685 1686 1687 1688
void
__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
{
	__this_cpu_write(trace_cmdline_save, true);
	ring_buffer_unlock_commit(buffer, event);
}

1689 1690 1691 1692
void trace_buffer_unlock_commit(struct trace_array *tr,
				struct ring_buffer *buffer,
				struct ring_buffer_event *event,
				unsigned long flags, int pc)
1693
{
1694
	__buffer_unlock_commit(buffer, event);
1695

1696
	ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
1697
	ftrace_trace_userstack(buffer, flags, pc);
1698
}
1699
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
1700

1701 1702
static struct ring_buffer *temp_buffer;

1703 1704
struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
1705
			  struct trace_event_file *trace_file,
1706 1707 1708
			  int type, unsigned long len,
			  unsigned long flags, int pc)
{
1709 1710
	struct ring_buffer_event *entry;

1711
	*current_rb = trace_file->tr->trace_buffer.buffer;
1712
	entry = trace_buffer_lock_reserve(*current_rb,
1713
					 type, len, flags, pc);
1714 1715 1716 1717 1718 1719
	/*
	 * If tracing is off, but we have triggers enabled
	 * we still need to look at the event data. Use the temp_buffer
	 * to store the trace event for the tigger to use. It's recusive
	 * safe and will not be recorded anywhere.
	 */
1720
	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
1721 1722 1723 1724 1725
		*current_rb = temp_buffer;
		entry = trace_buffer_lock_reserve(*current_rb,
						  type, len, flags, pc);
	}
	return entry;
1726 1727 1728
}
EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);

1729
struct ring_buffer_event *
1730 1731
trace_current_buffer_lock_reserve(struct ring_buffer **current_rb,
				  int type, unsigned long len,
1732 1733
				  unsigned long flags, int pc)
{
1734
	*current_rb = global_trace.trace_buffer.buffer;
1735
	return trace_buffer_lock_reserve(*current_rb,
1736 1737
					 type, len, flags, pc);
}
1738
EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
1739

1740 1741
void trace_buffer_unlock_commit_regs(struct trace_array *tr,
				     struct ring_buffer *buffer,
1742 1743 1744
				     struct ring_buffer_event *event,
				     unsigned long flags, int pc,
				     struct pt_regs *regs)
1745
{
1746
	__buffer_unlock_commit(buffer, event);
1747

1748
	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
1749 1750
	ftrace_trace_userstack(buffer, flags, pc);
}
1751
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
1752

1753 1754
void trace_current_buffer_discard_commit(struct ring_buffer *buffer,
					 struct ring_buffer_event *event)
1755
{
1756
	ring_buffer_discard_commit(buffer, event);
1757
}
1758
EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
1759

Ingo Molnar's avatar
Ingo Molnar committed
1760
void
1761
trace_function(struct trace_array *tr,
1762 1763
	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
	       int pc)
1764
{
1765
	struct trace_event_call *call = &event_function;
1766
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
1767
	struct ring_buffer_event *event;
1768
	struct ftrace_entry *entry;
1769

1770
	event = trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
1771
					  flags, pc);
1772 1773 1774
	if (!event)
		return;
	entry	= ring_buffer_event_data(event);
1775 1776
	entry->ip			= ip;
	entry->parent_ip		= parent_ip;
1777

1778
	if (!call_filter_check_discard(call, entry, buffer, event))
1779
		__buffer_unlock_commit(buffer, event);
1780 1781
}

1782
#ifdef CONFIG_STACKTRACE
1783 1784 1785 1786 1787 1788 1789 1790 1791

#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
struct ftrace_stack {
	unsigned long		calls[FTRACE_STACK_MAX_ENTRIES];
};

static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
static DEFINE_PER_CPU(int, ftrace_stack_reserve);

1792
static void __ftrace_trace_stack(struct ring_buffer *buffer,
1793
				 unsigned long flags,
1794
				 int skip, int pc, struct pt_regs *regs)
Ingo Molnar's avatar
Ingo Molnar committed
1795
{
1796
	struct trace_event_call *call = &event_kernel_stack;
1797
	struct ring_buffer_event *event;
1798
	struct stack_entry *entry;
Ingo Molnar's avatar
Ingo Molnar committed
1799
	struct stack_trace trace;
1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813
	int use_stack;
	int size = FTRACE_STACK_ENTRIES;

	trace.nr_entries	= 0;
	trace.skip		= skip;

	/*
	 * Since events can happen in NMIs there's no safe way to
	 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
	 * or NMI comes in, it will just have to use the default
	 * FTRACE_STACK_SIZE.
	 */
	preempt_disable_notrace();

1814
	use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
1815 1816 1817 1818 1819 1820 1821 1822 1823
	/*
	 * We don't need any atomic variables, just a barrier.
	 * If an interrupt comes in, we don't care, because it would
	 * have exited and put the counter back to what we want.
	 * We just need a barrier to keep gcc from moving things
	 * around.
	 */
	barrier();
	if (use_stack == 1) {
1824
		trace.entries		= this_cpu_ptr(ftrace_stack.calls);
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838
		trace.max_entries	= FTRACE_STACK_MAX_ENTRIES;

		if (regs)
			save_stack_trace_regs(regs, &trace);
		else
			save_stack_trace(&trace);

		if (trace.nr_entries > size)
			size = trace.nr_entries;
	} else
		/* From now on, use_stack is a boolean */
		use_stack = 0;

	size *= sizeof(unsigned long);
Ingo Molnar's avatar
Ingo Molnar committed
1839

1840
	event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
1841
					  sizeof(*entry) + size, flags, pc);
1842
	if (!event)
1843 1844
		goto out;
	entry = ring_buffer_event_data(event);
Ingo Molnar's avatar
Ingo Molnar committed
1845

1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860
	memset(&entry->caller, 0, size);

	if (use_stack)
		memcpy(&entry->caller, trace.entries,
		       trace.nr_entries * sizeof(unsigned long));
	else {
		trace.max_entries	= FTRACE_STACK_ENTRIES;
		trace.entries		= entry->caller;
		if (regs)
			save_stack_trace_regs(regs, &trace);
		else
			save_stack_trace(&trace);
	}

	entry->size = trace.nr_entries;
Ingo Molnar's avatar
Ingo Molnar committed
1861

1862
	if (!call_filter_check_discard(call, entry, buffer, event))
1863
		__buffer_unlock_commit(buffer, event);
1864 1865 1866 1867

 out:
	/* Again, don't let gcc optimize things here */
	barrier();
1868
	__this_cpu_dec(ftrace_stack_reserve);
1869 1870
	preempt_enable_notrace();

Ingo Molnar's avatar
Ingo Molnar committed
1871 1872
}

1873 1874
static inline void ftrace_trace_stack(struct trace_array *tr,
				      struct ring_buffer *buffer,
1875 1876
				      unsigned long flags,
				      int skip, int pc, struct pt_regs *regs)
1877
{
1878
	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
1879 1880
		return;

1881
	__ftrace_trace_stack(buffer, flags, skip, pc, regs);
1882 1883
}

1884 1885
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
		   int pc)
1886
{
1887
	__ftrace_trace_stack(tr->trace_buffer.buffer, flags, skip, pc, NULL);
1888 1889
}

1890 1891
/**
 * trace_dump_stack - record a stack back trace in the trace buffer
1892
 * @skip: Number of functions to skip (helper handlers)
1893
 */
1894
void trace_dump_stack(int skip)
1895 1896 1897 1898
{
	unsigned long flags;

	if (tracing_disabled || tracing_selftest_running)
1899
		return;
1900 1901 1902

	local_save_flags(flags);

1903 1904 1905 1906 1907 1908 1909
	/*
	 * Skip 3 more, seems to get us at the caller of
	 * this function.
	 */
	skip += 3;
	__ftrace_trace_stack(global_trace.trace_buffer.buffer,
			     flags, skip, preempt_count(), NULL);
1910 1911
}

1912 1913
static DEFINE_PER_CPU(int, user_stack_count);

1914 1915
void
ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
1916
{
1917
	struct trace_event_call *call = &event_user_stack;
1918
	struct ring_buffer_event *event;
1919 1920 1921
	struct userstack_entry *entry;
	struct stack_trace trace;

1922
	if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
1923 1924
		return;

1925 1926 1927 1928 1929 1930
	/*
	 * NMIs can not handle page faults, even with fix ups.
	 * The save user stack can (and often does) fault.
	 */
	if (unlikely(in_nmi()))
		return;
1931

1932 1933 1934 1935 1936 1937 1938 1939 1940 1941
	/*
	 * prevent recursion, since the user stack tracing may
	 * trigger other kernel events.
	 */
	preempt_disable();
	if (__this_cpu_read(user_stack_count))
		goto out;

	__this_cpu_inc(user_stack_count);

1942
	event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
1943
					  sizeof(*entry), flags, pc);
1944
	if (!event)
Li Zefan's avatar
Li Zefan committed
1945
		goto out_drop_count;
1946 1947
	entry	= ring_buffer_event_data(event);

1948
	entry->tgid		= current->tgid;
1949 1950 1951 1952 1953 1954 1955 1956
	memset(&entry->caller, 0, sizeof(entry->caller));

	trace.nr_entries	= 0;
	trace.max_entries	= FTRACE_STACK_ENTRIES;
	trace.skip		= 0;
	trace.entries		= entry->caller;

	save_stack_trace_user(&trace);
1957
	if (!call_filter_check_discard(call, entry, buffer, event))
1958
		__buffer_unlock_commit(buffer, event);
1959

Li Zefan's avatar
Li Zefan committed
1960
 out_drop_count:
1961 1962 1963
	__this_cpu_dec(user_stack_count);
 out:
	preempt_enable();
1964 1965
}

1966 1967
#ifdef UNUSED
static void __trace_userstack(struct trace_array *tr, unsigned long flags)
1968
{
1969
	ftrace_trace_userstack(tr, flags, preempt_count());
1970
}
1971
#endif /* UNUSED */
1972

1973 1974
#endif /* CONFIG_STACKTRACE */

1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
/* created for use with alloc_percpu */
struct trace_buffer_struct {
	char buffer[TRACE_BUF_SIZE];
};

static struct trace_buffer_struct *trace_percpu_buffer;
static struct trace_buffer_struct *trace_percpu_sirq_buffer;
static struct trace_buffer_struct *trace_percpu_irq_buffer;
static struct trace_buffer_struct *trace_percpu_nmi_buffer;

/*
 * The buffer used is dependent on the context. There is a per cpu
 * buffer for normal context, softirq contex, hard irq context and
 * for NMI context. Thise allows for lockless recording.
 *
 * Note, if the buffers failed to be allocated, then this returns NULL
 */
static char *get_trace_buf(void)
{
	struct trace_buffer_struct *percpu_buffer;

	/*
	 * If we have allocated per cpu buffers, then we do not
	 * need to do any locking.
	 */
	if (in_nmi())
		percpu_buffer = trace_percpu_nmi_buffer;
	else if (in_irq())
		percpu_buffer = trace_percpu_irq_buffer;
	else if (in_softirq())
		percpu_buffer = trace_percpu_sirq_buffer;
	else
		percpu_buffer = trace_percpu_buffer;

	if (!percpu_buffer)
		return NULL;

2012
	return this_cpu_ptr(&percpu_buffer->buffer[0]);
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055
}

static int alloc_percpu_trace_buffer(void)
{
	struct trace_buffer_struct *buffers;
	struct trace_buffer_struct *sirq_buffers;
	struct trace_buffer_struct *irq_buffers;
	struct trace_buffer_struct *nmi_buffers;

	buffers = alloc_percpu(struct trace_buffer_struct);
	if (!buffers)
		goto err_warn;

	sirq_buffers = alloc_percpu(struct trace_buffer_struct);
	if (!sirq_buffers)
		goto err_sirq;

	irq_buffers = alloc_percpu(struct trace_buffer_struct);
	if (!irq_buffers)
		goto err_irq;

	nmi_buffers = alloc_percpu(struct trace_buffer_struct);
	if (!nmi_buffers)
		goto err_nmi;

	trace_percpu_buffer = buffers;
	trace_percpu_sirq_buffer = sirq_buffers;
	trace_percpu_irq_buffer = irq_buffers;
	trace_percpu_nmi_buffer = nmi_buffers;

	return 0;

 err_nmi:
	free_percpu(irq_buffers);
 err_irq:
	free_percpu(sirq_buffers);
 err_sirq:
	free_percpu(buffers);
 err_warn:
	WARN(1, "Could not allocate percpu trace_printk buffer");
	return -ENOMEM;
}

2056 2057
static int buffers_allocated;

2058 2059 2060 2061 2062 2063 2064 2065
void trace_printk_init_buffers(void)
{
	if (buffers_allocated)
		return;

	if (alloc_percpu_trace_buffer())
		return;

2066 2067
	/* trace_printk() is for debug use only. Don't use it in production. */

2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081
	pr_warn("\n");
	pr_warn("**********************************************************\n");
	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
	pr_warn("**                                                      **\n");
	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
	pr_warn("**                                                      **\n");
	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
	pr_warn("** unsafe for production use.                           **\n");
	pr_warn("**                                                      **\n");
	pr_warn("** If you see this message and you are not debugging    **\n");
	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
	pr_warn("**                                                      **\n");
	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
	pr_warn("**********************************************************\n");
2082

2083 2084 2085
	/* Expand the buffers to set size */
	tracing_update_buffers();

2086
	buffers_allocated = 1;
2087 2088 2089 2090 2091 2092 2093

	/*
	 * trace_printk_init_buffers() can be called by modules.
	 * If that happens, then we need to start cmdline recording
	 * directly here. If the global_trace.buffer is already
	 * allocated here, then this was called by module code.
	 */
2094
	if (global_trace.trace_buffer.buffer)
2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114
		tracing_start_cmdline_record();
}

void trace_printk_start_comm(void)
{
	/* Start tracing comms if trace printk is set */
	if (!buffers_allocated)
		return;
	tracing_start_cmdline_record();
}

static void trace_printk_start_stop_comm(int enabled)
{
	if (!buffers_allocated)
		return;

	if (enabled)
		tracing_start_cmdline_record();
	else
		tracing_stop_cmdline_record();
2115 2116
}

2117
/**
2118
 * trace_vbprintk - write binary msg to tracing buffer
2119 2120
 *
 */
2121
int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2122
{
2123
	struct trace_event_call *call = &event_bprint;
2124
	struct ring_buffer_event *event;
2125
	struct ring_buffer *buffer;
2126
	struct trace_array *tr = &global_trace;
2127
	struct bprint_entry *entry;
2128
	unsigned long flags;
2129 2130
	char *tbuffer;
	int len = 0, size, pc;
2131 2132 2133 2134 2135 2136 2137 2138

	if (unlikely(tracing_selftest_running || tracing_disabled))
		return 0;

	/* Don't pollute graph traces with trace_vprintk internals */
	pause_graph_tracing();

	pc = preempt_count();
2139
	preempt_disable_notrace();
2140

2141 2142 2143
	tbuffer = get_trace_buf();
	if (!tbuffer) {
		len = 0;
2144
		goto out;
2145
	}
2146

2147
	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2148

2149 2150
	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
		goto out;
2151

2152
	local_save_flags(flags);
2153
	size = sizeof(*entry) + sizeof(u32) * len;
2154
	buffer = tr->trace_buffer.buffer;
2155 2156
	event = trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
					  flags, pc);
2157
	if (!event)
2158
		goto out;
2159 2160 2161 2162
	entry = ring_buffer_event_data(event);
	entry->ip			= ip;
	entry->fmt			= fmt;

2163
	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2164
	if (!call_filter_check_discard(call, entry, buffer, event)) {
2165
		__buffer_unlock_commit(buffer, event);
2166
		ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2167
	}
2168 2169

out:
2170
	preempt_enable_notrace();
2171 2172 2173 2174
	unpause_graph_tracing();

	return len;
}
2175 2176
EXPORT_SYMBOL_GPL(trace_vbprintk);

2177 2178 2179
static int
__trace_array_vprintk(struct ring_buffer *buffer,
		      unsigned long ip, const char *fmt, va_list args)
2180
{
2181
	struct trace_event_call *call = &event_print;
2182
	struct ring_buffer_event *event;
2183
	int len = 0, size, pc;
2184
	struct print_entry *entry;
2185 2186
	unsigned long flags;
	char *tbuffer;
2187 2188 2189 2190

	if (tracing_disabled || tracing_selftest_running)
		return 0;

2191 2192 2193
	/* Don't pollute graph traces with trace_vprintk internals */
	pause_graph_tracing();

2194 2195 2196
	pc = preempt_count();
	preempt_disable_notrace();

2197 2198 2199 2200

	tbuffer = get_trace_buf();
	if (!tbuffer) {
		len = 0;
2201
		goto out;
2202
	}
2203

2204
	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2205

2206
	local_save_flags(flags);
2207
	size = sizeof(*entry) + len + 1;
2208
	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2209
					  flags, pc);
2210
	if (!event)
2211
		goto out;
2212
	entry = ring_buffer_event_data(event);
2213
	entry->ip = ip;
2214

2215
	memcpy(&entry->buf, tbuffer, len + 1);
2216
	if (!call_filter_check_discard(call, entry, buffer, event)) {
2217
		__buffer_unlock_commit(buffer, event);
2218
		ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2219
	}
2220 2221
 out:
	preempt_enable_notrace();
2222
	unpause_graph_tracing();
2223 2224 2225

	return len;
}
2226

2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238
int trace_array_vprintk(struct trace_array *tr,
			unsigned long ip, const char *fmt, va_list args)
{
	return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
}

int trace_array_printk(struct trace_array *tr,
		       unsigned long ip, const char *fmt, ...)
{
	int ret;
	va_list ap;

2239
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253
		return 0;

	va_start(ap, fmt);
	ret = trace_array_vprintk(tr, ip, fmt, ap);
	va_end(ap);
	return ret;
}

int trace_array_printk_buf(struct ring_buffer *buffer,
			   unsigned long ip, const char *fmt, ...)
{
	int ret;
	va_list ap;

2254
	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2255 2256 2257 2258 2259 2260 2261 2262
		return 0;

	va_start(ap, fmt);
	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
	va_end(ap);
	return ret;
}

2263 2264
int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
{
2265
	return trace_array_vprintk(&global_trace, ip, fmt, args);
2266
}
2267 2268
EXPORT_SYMBOL_GPL(trace_vprintk);

2269
static void trace_iterator_increment(struct trace_iterator *iter)
2270
{
2271 2272
	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);

2273
	iter->idx++;
2274 2275
	if (buf_iter)
		ring_buffer_read(buf_iter, NULL);
2276 2277
}

Ingo Molnar's avatar
Ingo Molnar committed
2278
static struct trace_entry *
2279 2280
peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
		unsigned long *lost_events)
2281
{
2282
	struct ring_buffer_event *event;
2283
	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2284

2285 2286 2287
	if (buf_iter)
		event = ring_buffer_iter_peek(buf_iter, ts);
	else
2288
		event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2289
					 lost_events);
2290

2291 2292 2293 2294 2295 2296
	if (event) {
		iter->ent_size = ring_buffer_event_length(event);
		return ring_buffer_event_data(event);
	}
	iter->ent_size = 0;
	return NULL;
2297
}
2298

2299
static struct trace_entry *
2300 2301
__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
		  unsigned long *missing_events, u64 *ent_ts)
2302
{
2303
	struct ring_buffer *buffer = iter->trace_buffer->buffer;
2304
	struct trace_entry *ent, *next = NULL;
2305
	unsigned long lost_events = 0, next_lost = 0;
2306
	int cpu_file = iter->cpu_file;
2307
	u64 next_ts = 0, ts;
2308
	int next_cpu = -1;
2309
	int next_size = 0;
2310 2311
	int cpu;

2312 2313 2314 2315
	/*
	 * If we are in a per_cpu trace file, don't bother by iterating over
	 * all cpu and peek directly.
	 */
2316
	if (cpu_file > RING_BUFFER_ALL_CPUS) {
2317 2318
		if (ring_buffer_empty_cpu(buffer, cpu_file))
			return NULL;
2319
		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2320 2321 2322 2323 2324 2325
		if (ent_cpu)
			*ent_cpu = cpu_file;

		return ent;
	}

2326
	for_each_tracing_cpu(cpu) {
2327

2328 2329
		if (ring_buffer_empty_cpu(buffer, cpu))
			continue;
2330

2331
		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
2332

Ingo Molnar's avatar
Ingo Molnar committed
2333 2334 2335
		/*
		 * Pick the entry with the smallest timestamp:
		 */
2336
		if (ent && (!next || ts < next_ts)) {
2337 2338
			next = ent;
			next_cpu = cpu;
2339
			next_ts = ts;
2340
			next_lost = lost_events;
2341
			next_size = iter->ent_size;
2342 2343 2344
		}
	}

2345 2346
	iter->ent_size = next_size;

2347 2348 2349
	if (ent_cpu)
		*ent_cpu = next_cpu;

2350 2351 2352
	if (ent_ts)
		*ent_ts = next_ts;

2353 2354 2355
	if (missing_events)
		*missing_events = next_lost;

2356 2357 2358
	return next;
}

2359
/* Find the next real entry, without updating the iterator itself */
2360 2361
struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
					  int *ent_cpu, u64 *ent_ts)
2362
{
2363
	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
2364 2365 2366
}

/* Find the next real entry, and increment the iterator to the next entry */
2367
void *trace_find_next_entry_inc(struct trace_iterator *iter)
2368
{
2369 2370
	iter->ent = __find_next_entry(iter, &iter->cpu,
				      &iter->lost_events, &iter->ts);
2371

2372
	if (iter->ent)
2373
		trace_iterator_increment(iter);
2374

2375
	return iter->ent ? iter : NULL;
2376
}
2377

Ingo Molnar's avatar
Ingo Molnar committed
2378
static void trace_consume(struct trace_iterator *iter)
2379
{
2380
	ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
2381
			    &iter->lost_events);
2382 2383
}

Ingo Molnar's avatar
Ingo Molnar committed
2384
static void *s_next(struct seq_file *m, void *v, loff_t *pos)
2385 2386 2387
{
	struct trace_iterator *iter = m->private;
	int i = (int)*pos;
Ingo Molnar's avatar
Ingo Molnar committed
2388
	void *ent;
2389

2390 2391
	WARN_ON_ONCE(iter->leftover);

2392 2393 2394 2395 2396 2397 2398
	(*pos)++;

	/* can't go backwards */
	if (iter->idx > i)
		return NULL;

	if (iter->idx < 0)
2399
		ent = trace_find_next_entry_inc(iter);
2400 2401 2402 2403
	else
		ent = iter;

	while (ent && iter->idx < i)
2404
		ent = trace_find_next_entry_inc(iter);
2405 2406 2407 2408 2409 2410

	iter->pos = *pos;

	return ent;
}

2411
void tracing_iter_reset(struct trace_iterator *iter, int cpu)
2412 2413 2414 2415 2416 2417
{
	struct ring_buffer_event *event;
	struct ring_buffer_iter *buf_iter;
	unsigned long entries = 0;
	u64 ts;

2418
	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
2419

2420 2421
	buf_iter = trace_buffer_iter(iter, cpu);
	if (!buf_iter)
2422 2423 2424 2425 2426 2427 2428 2429 2430 2431
		return;

	ring_buffer_iter_reset(buf_iter);

	/*
	 * We could have the case with the max latency tracers
	 * that a reset never took place on a cpu. This is evident
	 * by the timestamp being before the start of the buffer.
	 */
	while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
2432
		if (ts >= iter->trace_buffer->time_start)
2433 2434 2435 2436 2437
			break;
		entries++;
		ring_buffer_read(buf_iter, NULL);
	}

2438
	per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
2439 2440
}

2441 2442 2443 2444
/*
 * The current tracer is copied to avoid a global locking
 * all around.
 */
2445 2446 2447
static void *s_start(struct seq_file *m, loff_t *pos)
{
	struct trace_iterator *iter = m->private;
2448
	struct trace_array *tr = iter->tr;
2449
	int cpu_file = iter->cpu_file;
2450 2451
	void *p = NULL;
	loff_t l = 0;
2452
	int cpu;
2453

2454 2455 2456 2457 2458 2459
	/*
	 * copy the tracer to avoid using a global lock all around.
	 * iter->trace is a copy of current_trace, the pointer to the
	 * name may be used instead of a strcmp(), as iter->trace->name
	 * will point to the same string as current_trace->name.
	 */
2460
	mutex_lock(&trace_types_lock);
2461 2462
	if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
		*iter->trace = *tr->current_trace;
2463
	mutex_unlock(&trace_types_lock);
2464

2465
#ifdef CONFIG_TRACER_MAX_TRACE
2466 2467
	if (iter->snapshot && iter->trace->use_max_tr)
		return ERR_PTR(-EBUSY);
2468
#endif
2469 2470 2471

	if (!iter->snapshot)
		atomic_inc(&trace_record_cmdline_disabled);
2472 2473 2474 2475 2476 2477

	if (*pos != iter->pos) {
		iter->ent = NULL;
		iter->cpu = 0;
		iter->idx = -1;

2478
		if (cpu_file == RING_BUFFER_ALL_CPUS) {
2479
			for_each_tracing_cpu(cpu)
2480
				tracing_iter_reset(iter, cpu);
2481
		} else
2482
			tracing_iter_reset(iter, cpu_file);
2483

2484
		iter->leftover = 0;
2485 2486 2487 2488
		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
			;

	} else {
2489 2490 2491 2492 2493 2494 2495 2496 2497 2498
		/*
		 * If we overflowed the seq_file before, then we want
		 * to just reuse the trace_seq buffer again.
		 */
		if (iter->leftover)
			p = iter;
		else {
			l = *pos - 1;
			p = s_next(m, p, &l);
		}
2499 2500
	}

2501
	trace_event_read_lock();
2502
	trace_access_lock(cpu_file);
2503 2504 2505 2506 2507
	return p;
}

static void s_stop(struct seq_file *m, void *p)
{
2508 2509
	struct trace_iterator *iter = m->private;

2510
#ifdef CONFIG_TRACER_MAX_TRACE
2511 2512
	if (iter->snapshot && iter->trace->use_max_tr)
		return;
2513
#endif
2514 2515 2516

	if (!iter->snapshot)
		atomic_dec(&trace_record_cmdline_disabled);
2517

2518
	trace_access_unlock(iter->cpu_file);
2519
	trace_event_read_unlock();
2520 2521
}

2522
static void
2523 2524
get_total_entries(struct trace_buffer *buf,
		  unsigned long *total, unsigned long *entries)
2525 2526 2527 2528 2529 2530 2531 2532
{
	unsigned long count;
	int cpu;

	*total = 0;
	*entries = 0;

	for_each_tracing_cpu(cpu) {
2533
		count = ring_buffer_entries_cpu(buf->buffer, cpu);
2534 2535 2536 2537 2538
		/*
		 * If this buffer has skipped entries, then we hold all
		 * entries for the trace and we need to ignore the
		 * ones before the time stamp.
		 */
2539 2540
		if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
			count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
2541 2542 2543 2544
			/* total is the same as the entries */
			*total += count;
		} else
			*total += count +
2545
				ring_buffer_overrun_cpu(buf->buffer, cpu);
2546 2547 2548 2549
		*entries += count;
	}
}

Ingo Molnar's avatar
Ingo Molnar committed
2550
static void print_lat_help_header(struct seq_file *m)
2551
{
2552 2553 2554 2555 2556 2557 2558 2559
	seq_puts(m, "#                  _------=> CPU#            \n"
		    "#                 / _-----=> irqs-off        \n"
		    "#                | / _----=> need-resched    \n"
		    "#                || / _---=> hardirq/softirq \n"
		    "#                ||| / _--=> preempt-depth   \n"
		    "#                |||| /     delay            \n"
		    "#  cmd     pid   ||||| time  |   caller      \n"
		    "#     \\   /      |||||  \\    |   /         \n");
2560 2561
}

2562
static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
2563
{
2564 2565 2566
	unsigned long total;
	unsigned long entries;

2567
	get_total_entries(buf, &total, &entries);
2568 2569 2570 2571 2572
	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
		   entries, total, num_online_cpus());
	seq_puts(m, "#\n");
}

2573
static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
2574
{
2575
	print_event_info(buf, m);
2576 2577
	seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
		    "#              | |       |          |         |\n");
2578 2579
}

2580
static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
2581
{
2582
	print_event_info(buf, m);
2583 2584 2585 2586 2587 2588 2589
	seq_puts(m, "#                              _-----=> irqs-off\n"
		    "#                             / _----=> need-resched\n"
		    "#                            | / _---=> hardirq/softirq\n"
		    "#                            || / _--=> preempt-depth\n"
		    "#                            ||| /     delay\n"
		    "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
		    "#              | |       |   ||||       |         |\n");
2590
}
2591

2592
void
2593 2594
print_trace_header(struct seq_file *m, struct trace_iterator *iter)
{
2595
	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
2596 2597
	struct trace_buffer *buf = iter->trace_buffer;
	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
2598
	struct tracer *type = iter->trace;
2599 2600
	unsigned long entries;
	unsigned long total;
2601 2602
	const char *name = "preemption";

2603
	name = type->name;
2604

2605
	get_total_entries(buf, &total, &entries);
2606

2607
	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
2608
		   name, UTS_RELEASE);
2609
	seq_puts(m, "# -----------------------------------"
2610
		 "---------------------------------\n");
2611
	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
2612
		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
Steven Rostedt's avatar
Steven Rostedt committed
2613
		   nsecs_to_usecs(data->saved_latency),
2614
		   entries,
2615
		   total,
2616
		   buf->cpu,
2617 2618 2619 2620
#if defined(CONFIG_PREEMPT_NONE)
		   "server",
#elif defined(CONFIG_PREEMPT_VOLUNTARY)
		   "desktop",
2621
#elif defined(CONFIG_PREEMPT)
2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632
		   "preempt",
#else
		   "unknown",
#endif
		   /* These are reserved for later use */
		   0, 0, 0, 0);
#ifdef CONFIG_SMP
	seq_printf(m, " #P:%d)\n", num_online_cpus());
#else
	seq_puts(m, ")\n");
#endif
2633 2634
	seq_puts(m, "#    -----------------\n");
	seq_printf(m, "#    | task: %.16s-%d "
2635
		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
2636 2637
		   data->comm, data->pid,
		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
2638
		   data->policy, data->rt_priority);
2639
	seq_puts(m, "#    -----------------\n");
2640 2641

	if (data->critical_start) {
2642
		seq_puts(m, "#  => started at: ");
2643 2644
		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
		trace_print_seq(m, &iter->seq);
2645
		seq_puts(m, "\n#  => ended at:   ");
2646 2647
		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
		trace_print_seq(m, &iter->seq);
2648
		seq_puts(m, "\n#\n");
2649 2650
	}

2651
	seq_puts(m, "#\n");
2652 2653
}

2654 2655 2656
static void test_cpu_buff_start(struct trace_iterator *iter)
{
	struct trace_seq *s = &iter->seq;
2657
	struct trace_array *tr = iter->tr;
2658

2659
	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
2660 2661 2662 2663 2664
		return;

	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
		return;

2665
	if (iter->started && cpumask_test_cpu(iter->cpu, iter->started))
2666 2667
		return;

2668
	if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
2669 2670
		return;

2671 2672
	if (iter->started)
		cpumask_set_cpu(iter->cpu, iter->started);
2673 2674 2675 2676 2677

	/* Don't print started cpu buffer for the first entry of the trace */
	if (iter->idx > 1)
		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
				iter->cpu);
2678 2679
}

2680
static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
2681
{
2682
	struct trace_array *tr = iter->tr;
2683
	struct trace_seq *s = &iter->seq;
2684
	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
Ingo Molnar's avatar
Ingo Molnar committed
2685
	struct trace_entry *entry;
2686
	struct trace_event *event;
2687

Ingo Molnar's avatar
Ingo Molnar committed
2688
	entry = iter->ent;
2689

2690 2691
	test_cpu_buff_start(iter);

2692
	event = ftrace_find_event(entry->type);
2693

2694
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2695 2696 2697 2698
		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
			trace_print_lat_context(iter);
		else
			trace_print_context(iter);
2699
	}
2700

2701 2702 2703
	if (trace_seq_has_overflowed(s))
		return TRACE_TYPE_PARTIAL_LINE;

2704
	if (event)
2705
		return event->funcs->trace(iter, sym_flags, event);
2706

2707
	trace_seq_printf(s, "Unknown type %d\n", entry->type);
2708

2709
	return trace_handle_return(s);
2710 2711
}

2712
static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
Ingo Molnar's avatar
Ingo Molnar committed
2713
{
2714
	struct trace_array *tr = iter->tr;
Ingo Molnar's avatar
Ingo Molnar committed
2715 2716
	struct trace_seq *s = &iter->seq;
	struct trace_entry *entry;
2717
	struct trace_event *event;
Ingo Molnar's avatar
Ingo Molnar committed
2718 2719

	entry = iter->ent;
2720

2721
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
2722 2723 2724 2725 2726
		trace_seq_printf(s, "%d %d %llu ",
				 entry->pid, iter->cpu, iter->ts);

	if (trace_seq_has_overflowed(s))
		return TRACE_TYPE_PARTIAL_LINE;
Ingo Molnar's avatar
Ingo Molnar committed
2727

2728
	event = ftrace_find_event(entry->type);
2729
	if (event)
2730
		return event->funcs->raw(iter, 0, event);
2731

2732
	trace_seq_printf(s, "%d ?\n", entry->type);
2733

2734
	return trace_handle_return(s);
Ingo Molnar's avatar
Ingo Molnar committed
2735 2736
}

2737
static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
2738
{
2739
	struct trace_array *tr = iter->tr;
2740 2741 2742
	struct trace_seq *s = &iter->seq;
	unsigned char newline = '\n';
	struct trace_entry *entry;
2743
	struct trace_event *event;
2744 2745

	entry = iter->ent;
2746

2747
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2748 2749 2750 2751 2752
		SEQ_PUT_HEX_FIELD(s, entry->pid);
		SEQ_PUT_HEX_FIELD(s, iter->cpu);
		SEQ_PUT_HEX_FIELD(s, iter->ts);
		if (trace_seq_has_overflowed(s))
			return TRACE_TYPE_PARTIAL_LINE;
2753
	}
2754

2755
	event = ftrace_find_event(entry->type);
2756
	if (event) {
2757
		enum print_line_t ret = event->funcs->hex(iter, 0, event);
2758 2759 2760
		if (ret != TRACE_TYPE_HANDLED)
			return ret;
	}
2761

2762
	SEQ_PUT_FIELD(s, newline);
2763

2764
	return trace_handle_return(s);
2765 2766
}

2767
static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
Ingo Molnar's avatar
Ingo Molnar committed
2768
{
2769
	struct trace_array *tr = iter->tr;
Ingo Molnar's avatar
Ingo Molnar committed
2770 2771
	struct trace_seq *s = &iter->seq;
	struct trace_entry *entry;
2772
	struct trace_event *event;
Ingo Molnar's avatar
Ingo Molnar committed
2773 2774

	entry = iter->ent;
2775

2776
	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
2777 2778 2779 2780 2781
		SEQ_PUT_FIELD(s, entry->pid);
		SEQ_PUT_FIELD(s, iter->cpu);
		SEQ_PUT_FIELD(s, iter->ts);
		if (trace_seq_has_overflowed(s))
			return TRACE_TYPE_PARTIAL_LINE;
2782
	}
Ingo Molnar's avatar
Ingo Molnar committed
2783

2784
	event = ftrace_find_event(entry->type);
2785 2786
	return event ? event->funcs->binary(iter, 0, event) :
		TRACE_TYPE_HANDLED;
Ingo Molnar's avatar
Ingo Molnar committed
2787 2788
}

2789
int trace_empty(struct trace_iterator *iter)
2790
{
2791
	struct ring_buffer_iter *buf_iter;
2792 2793
	int cpu;

2794
	/* If we are looking at one CPU buffer, only check that one */
2795
	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
2796
		cpu = iter->cpu_file;
2797 2798 2799
		buf_iter = trace_buffer_iter(iter, cpu);
		if (buf_iter) {
			if (!ring_buffer_iter_empty(buf_iter))
2800 2801
				return 0;
		} else {
2802
			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2803 2804 2805 2806 2807
				return 0;
		}
		return 1;
	}

2808
	for_each_tracing_cpu(cpu) {
2809 2810 2811
		buf_iter = trace_buffer_iter(iter, cpu);
		if (buf_iter) {
			if (!ring_buffer_iter_empty(buf_iter))
2812 2813
				return 0;
		} else {
2814
			if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
2815 2816
				return 0;
		}
2817
	}
2818

2819
	return 1;
2820 2821
}

2822
/*  Called with trace_event_read_lock() held. */
2823
enum print_line_t print_trace_line(struct trace_iterator *iter)
Ingo Molnar's avatar
Ingo Molnar committed
2824
{
2825 2826
	struct trace_array *tr = iter->tr;
	unsigned long trace_flags = tr->trace_flags;
2827 2828
	enum print_line_t ret;

2829 2830 2831 2832 2833 2834
	if (iter->lost_events) {
		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
				 iter->cpu, iter->lost_events);
		if (trace_seq_has_overflowed(&iter->seq))
			return TRACE_TYPE_PARTIAL_LINE;
	}
2835

2836 2837 2838 2839 2840
	if (iter->trace && iter->trace->print_line) {
		ret = iter->trace->print_line(iter);
		if (ret != TRACE_TYPE_UNHANDLED)
			return ret;
	}
2841

2842 2843 2844 2845 2846
	if (iter->ent->type == TRACE_BPUTS &&
			trace_flags & TRACE_ITER_PRINTK &&
			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
		return trace_print_bputs_msg_only(iter);

2847 2848 2849
	if (iter->ent->type == TRACE_BPRINT &&
			trace_flags & TRACE_ITER_PRINTK &&
			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2850
		return trace_print_bprintk_msg_only(iter);
2851

2852 2853 2854
	if (iter->ent->type == TRACE_PRINT &&
			trace_flags & TRACE_ITER_PRINTK &&
			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
2855
		return trace_print_printk_msg_only(iter);
2856

Ingo Molnar's avatar
Ingo Molnar committed
2857 2858 2859
	if (trace_flags & TRACE_ITER_BIN)
		return print_bin_fmt(iter);

2860 2861 2862
	if (trace_flags & TRACE_ITER_HEX)
		return print_hex_fmt(iter);

Ingo Molnar's avatar
Ingo Molnar committed
2863 2864 2865 2866 2867 2868
	if (trace_flags & TRACE_ITER_RAW)
		return print_raw_fmt(iter);

	return print_trace_fmt(iter);
}

2869 2870 2871
void trace_latency_header(struct seq_file *m)
{
	struct trace_iterator *iter = m->private;
2872
	struct trace_array *tr = iter->tr;
2873 2874 2875 2876 2877 2878 2879 2880

	/* print nothing if the buffers are empty */
	if (trace_empty(iter))
		return;

	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
		print_trace_header(m, iter);

2881
	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
2882 2883 2884
		print_lat_help_header(m);
}

2885 2886 2887
void trace_default_header(struct seq_file *m)
{
	struct trace_iterator *iter = m->private;
2888 2889
	struct trace_array *tr = iter->tr;
	unsigned long trace_flags = tr->trace_flags;
2890

2891 2892 2893
	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
		return;

2894 2895 2896 2897 2898 2899 2900 2901
	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
		/* print nothing if the buffers are empty */
		if (trace_empty(iter))
			return;
		print_trace_header(m, iter);
		if (!(trace_flags & TRACE_ITER_VERBOSE))
			print_lat_help_header(m);
	} else {
2902 2903
		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
			if (trace_flags & TRACE_ITER_IRQ_INFO)
2904
				print_func_help_header_irq(iter->trace_buffer, m);
2905
			else
2906
				print_func_help_header(iter->trace_buffer, m);
2907
		}
2908 2909 2910
	}
}

2911 2912 2913 2914
static void test_ftrace_alive(struct seq_file *m)
{
	if (!ftrace_is_dead())
		return;
2915 2916
	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
		    "#          MAY BE MISSING FUNCTION EVENTS\n");
2917 2918
}

2919
#ifdef CONFIG_TRACER_MAX_TRACE
2920
static void show_snapshot_main_help(struct seq_file *m)
2921
{
2922 2923 2924 2925 2926 2927
	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
		    "#                      Takes a snapshot of the main buffer.\n"
		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
		    "#                      (Doesn't have to be '2' works with any number that\n"
		    "#                       is not a '0' or '1')\n");
2928
}
2929 2930 2931

static void show_snapshot_percpu_help(struct seq_file *m)
{
2932
	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
2933
#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
2934 2935
	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
2936
#else
2937 2938
	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
		    "#                     Must use main snapshot file to allocate.\n");
2939
#endif
2940 2941 2942
	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
		    "#                      (Doesn't have to be '2' works with any number that\n"
		    "#                       is not a '0' or '1')\n");
2943 2944
}

2945 2946
static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
{
2947
	if (iter->tr->allocated_snapshot)
2948
		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
2949
	else
2950
		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
2951

2952
	seq_puts(m, "# Snapshot commands:\n");
2953 2954 2955 2956
	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
		show_snapshot_main_help(m);
	else
		show_snapshot_percpu_help(m);
2957 2958 2959 2960 2961 2962
}
#else
/* Should never be called */
static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
#endif

2963 2964 2965
static int s_show(struct seq_file *m, void *v)
{
	struct trace_iterator *iter = v;
2966
	int ret;
2967 2968 2969 2970 2971

	if (iter->ent == NULL) {
		if (iter->tr) {
			seq_printf(m, "# tracer: %s\n", iter->trace->name);
			seq_puts(m, "#\n");
2972
			test_ftrace_alive(m);
2973
		}
2974 2975 2976
		if (iter->snapshot && trace_empty(iter))
			print_snapshot_help(m, iter);
		else if (iter->trace && iter->trace->print_header)
2977
			iter->trace->print_header(m);
2978 2979 2980
		else
			trace_default_header(m);

2981 2982 2983 2984 2985 2986 2987 2988 2989 2990
	} else if (iter->leftover) {
		/*
		 * If we filled the seq_file buffer earlier, we
		 * want to just show it now.
		 */
		ret = trace_print_seq(m, &iter->seq);

		/* ret should this time be zero, but you never know */
		iter->leftover = ret;

2991
	} else {
Ingo Molnar's avatar
Ingo Molnar committed
2992
		print_trace_line(iter);
2993 2994 2995 2996 2997 2998 2999 3000 3001
		ret = trace_print_seq(m, &iter->seq);
		/*
		 * If we overflow the seq_file buffer, then it will
		 * ask us for this data again at start up.
		 * Use that instead.
		 *  ret is 0 if seq_file write succeeded.
		 *        -1 otherwise.
		 */
		iter->leftover = ret;
3002 3003 3004 3005 3006
	}

	return 0;
}

3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017
/*
 * Should be used after trace_array_get(), trace_types_lock
 * ensures that i_cdev was already initialized.
 */
static inline int tracing_get_cpu(struct inode *inode)
{
	if (inode->i_cdev) /* See trace_create_cpu_file() */
		return (long)inode->i_cdev - 1;
	return RING_BUFFER_ALL_CPUS;
}

3018
static const struct seq_operations tracer_seq_ops = {
Ingo Molnar's avatar
Ingo Molnar committed
3019 3020 3021 3022
	.start		= s_start,
	.next		= s_next,
	.stop		= s_stop,
	.show		= s_show,
3023 3024
};

Ingo Molnar's avatar
Ingo Molnar committed
3025
static struct trace_iterator *
3026
__tracing_open(struct inode *inode, struct file *file, bool snapshot)
3027
{
3028
	struct trace_array *tr = inode->i_private;
3029
	struct trace_iterator *iter;
3030
	int cpu;
3031

3032 3033
	if (tracing_disabled)
		return ERR_PTR(-ENODEV);
Steven Rostedt's avatar
Steven Rostedt committed
3034

3035
	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3036 3037
	if (!iter)
		return ERR_PTR(-ENOMEM);
3038

3039
	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3040
				    GFP_KERNEL);
3041 3042 3043
	if (!iter->buffer_iter)
		goto release;

3044 3045 3046 3047
	/*
	 * We make a copy of the current tracer to avoid concurrent
	 * changes on it while we are reading.
	 */
3048
	mutex_lock(&trace_types_lock);
3049
	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3050
	if (!iter->trace)
3051
		goto fail;
3052

3053
	*iter->trace = *tr->current_trace;
3054

3055
	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3056 3057
		goto fail;

3058 3059 3060
	iter->tr = tr;

#ifdef CONFIG_TRACER_MAX_TRACE
3061 3062
	/* Currently only the top directory has a snapshot */
	if (tr->current_trace->print_max || snapshot)
3063
		iter->trace_buffer = &tr->max_buffer;
3064
	else
3065 3066
#endif
		iter->trace_buffer = &tr->trace_buffer;
3067
	iter->snapshot = snapshot;
3068
	iter->pos = -1;
3069
	iter->cpu_file = tracing_get_cpu(inode);
3070
	mutex_init(&iter->mutex);
3071

3072 3073
	/* Notify the tracer early; before we stop tracing. */
	if (iter->trace && iter->trace->open)
3074
		iter->trace->open(iter);
3075

3076
	/* Annotate start of buffers if we had overruns */
3077
	if (ring_buffer_overruns(iter->trace_buffer->buffer))
3078 3079
		iter->iter_flags |= TRACE_FILE_ANNOTATE;

3080
	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
3081
	if (trace_clocks[tr->clock_id].in_ns)
3082 3083
		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;

3084 3085
	/* stop the trace while dumping if we are not opening "snapshot" */
	if (!iter->snapshot)
3086
		tracing_stop_tr(tr);
3087

3088
	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3089 3090
		for_each_tracing_cpu(cpu) {
			iter->buffer_iter[cpu] =
3091
				ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3092 3093 3094 3095
		}
		ring_buffer_read_prepare_sync();
		for_each_tracing_cpu(cpu) {
			ring_buffer_read_start(iter->buffer_iter[cpu]);
3096
			tracing_iter_reset(iter, cpu);
3097 3098 3099
		}
	} else {
		cpu = iter->cpu_file;
3100
		iter->buffer_iter[cpu] =
3101
			ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3102 3103
		ring_buffer_read_prepare_sync();
		ring_buffer_read_start(iter->buffer_iter[cpu]);
3104
		tracing_iter_reset(iter, cpu);
3105 3106
	}

3107 3108 3109
	mutex_unlock(&trace_types_lock);

	return iter;
3110

3111
 fail:
3112
	mutex_unlock(&trace_types_lock);
3113
	kfree(iter->trace);
3114
	kfree(iter->buffer_iter);
3115
release:
3116 3117
	seq_release_private(inode, file);
	return ERR_PTR(-ENOMEM);
3118 3119 3120 3121
}

int tracing_open_generic(struct inode *inode, struct file *filp)
{
Steven Rostedt's avatar
Steven Rostedt committed
3122 3123 3124
	if (tracing_disabled)
		return -ENODEV;

3125 3126 3127 3128
	filp->private_data = inode->i_private;
	return 0;
}

3129 3130 3131 3132 3133
bool tracing_is_disabled(void)
{
	return (tracing_disabled) ? true: false;
}

3134 3135 3136 3137
/*
 * Open and update trace_array ref count.
 * Must have the current trace_array passed to it.
 */
3138
static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152
{
	struct trace_array *tr = inode->i_private;

	if (tracing_disabled)
		return -ENODEV;

	if (trace_array_get(tr) < 0)
		return -ENODEV;

	filp->private_data = inode->i_private;

	return 0;
}

3153
static int tracing_release(struct inode *inode, struct file *file)
3154
{
3155
	struct trace_array *tr = inode->i_private;
3156
	struct seq_file *m = file->private_data;
3157
	struct trace_iterator *iter;
3158
	int cpu;
3159

3160
	if (!(file->f_mode & FMODE_READ)) {
3161
		trace_array_put(tr);
3162
		return 0;
3163
	}
3164

3165
	/* Writes do not use seq_file */
3166
	iter = m->private;
3167
	mutex_lock(&trace_types_lock);
3168

3169 3170 3171 3172 3173
	for_each_tracing_cpu(cpu) {
		if (iter->buffer_iter[cpu])
			ring_buffer_read_finish(iter->buffer_iter[cpu]);
	}

3174 3175 3176
	if (iter->trace && iter->trace->close)
		iter->trace->close(iter);

3177 3178
	if (!iter->snapshot)
		/* reenable tracing if it was previously enabled */
3179
		tracing_start_tr(tr);
3180 3181 3182

	__trace_array_put(tr);

3183 3184
	mutex_unlock(&trace_types_lock);

3185
	mutex_destroy(&iter->mutex);
3186
	free_cpumask_var(iter->started);
3187
	kfree(iter->trace);
3188
	kfree(iter->buffer_iter);
3189
	seq_release_private(inode, file);
3190

3191 3192 3193
	return 0;
}

3194 3195 3196 3197 3198
static int tracing_release_generic_tr(struct inode *inode, struct file *file)
{
	struct trace_array *tr = inode->i_private;

	trace_array_put(tr);
3199 3200 3201
	return 0;
}

3202 3203 3204 3205 3206 3207 3208 3209 3210
static int tracing_single_release_tr(struct inode *inode, struct file *file)
{
	struct trace_array *tr = inode->i_private;

	trace_array_put(tr);

	return single_release(inode, file);
}

3211 3212
static int tracing_open(struct inode *inode, struct file *file)
{
3213
	struct trace_array *tr = inode->i_private;
3214 3215
	struct trace_iterator *iter;
	int ret = 0;
3216

3217 3218 3219
	if (trace_array_get(tr) < 0)
		return -ENODEV;

3220
	/* If this file was open for write, then erase contents */
3221 3222 3223 3224
	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
		int cpu = tracing_get_cpu(inode);

		if (cpu == RING_BUFFER_ALL_CPUS)
3225
			tracing_reset_online_cpus(&tr->trace_buffer);
3226
		else
3227
			tracing_reset(&tr->trace_buffer, cpu);
3228
	}
3229

3230
	if (file->f_mode & FMODE_READ) {
3231
		iter = __tracing_open(inode, file, false);
3232 3233
		if (IS_ERR(iter))
			ret = PTR_ERR(iter);
3234
		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3235 3236
			iter->iter_flags |= TRACE_FILE_LAT_FMT;
	}
3237 3238 3239 3240

	if (ret < 0)
		trace_array_put(tr);

3241 3242 3243
	return ret;
}

3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264
/*
 * Some tracers are not suitable for instance buffers.
 * A tracer is always available for the global array (toplevel)
 * or if it explicitly states that it is.
 */
static bool
trace_ok_for_array(struct tracer *t, struct trace_array *tr)
{
	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
}

/* Find the next tracer that this trace array may use */
static struct tracer *
get_tracer_for_array(struct trace_array *tr, struct tracer *t)
{
	while (t && !trace_ok_for_array(t, tr))
		t = t->next;

	return t;
}

Ingo Molnar's avatar
Ingo Molnar committed
3265
static void *
3266 3267
t_next(struct seq_file *m, void *v, loff_t *pos)
{
3268
	struct trace_array *tr = m->private;
3269
	struct tracer *t = v;
3270 3271 3272 3273

	(*pos)++;

	if (t)
3274
		t = get_tracer_for_array(tr, t->next);
3275 3276 3277 3278 3279 3280

	return t;
}

static void *t_start(struct seq_file *m, loff_t *pos)
{
3281
	struct trace_array *tr = m->private;
3282
	struct tracer *t;
3283 3284 3285
	loff_t l = 0;

	mutex_lock(&trace_types_lock);
3286 3287 3288 3289

	t = get_tracer_for_array(tr, trace_types);
	for (; t && l < *pos; t = t_next(m, t, &l))
			;
3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305

	return t;
}

static void t_stop(struct seq_file *m, void *p)
{
	mutex_unlock(&trace_types_lock);
}

static int t_show(struct seq_file *m, void *v)
{
	struct tracer *t = v;

	if (!t)
		return 0;

3306
	seq_puts(m, t->name);
3307 3308 3309 3310 3311 3312 3313 3314
	if (t->next)
		seq_putc(m, ' ');
	else
		seq_putc(m, '\n');

	return 0;
}

3315
static const struct seq_operations show_traces_seq_ops = {
Ingo Molnar's avatar
Ingo Molnar committed
3316 3317 3318 3319
	.start		= t_start,
	.next		= t_next,
	.stop		= t_stop,
	.show		= t_show,
3320 3321 3322 3323
};

static int show_traces_open(struct inode *inode, struct file *file)
{
3324 3325 3326 3327
	struct trace_array *tr = inode->i_private;
	struct seq_file *m;
	int ret;

Steven Rostedt's avatar
Steven Rostedt committed
3328 3329 3330
	if (tracing_disabled)
		return -ENODEV;

3331 3332 3333 3334 3335 3336 3337 3338
	ret = seq_open(file, &show_traces_seq_ops);
	if (ret)
		return ret;

	m = file->private_data;
	m->private = tr;

	return 0;
3339 3340
}

3341 3342 3343 3344 3345 3346 3347
static ssize_t
tracing_write_stub(struct file *filp, const char __user *ubuf,
		   size_t count, loff_t *ppos)
{
	return count;
}

3348
loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
3349
{
3350 3351
	int ret;

3352
	if (file->f_mode & FMODE_READ)
3353
		ret = seq_lseek(file, offset, whence);
3354
	else
3355 3356 3357
		file->f_pos = ret = 0;

	return ret;
3358 3359
}

3360
static const struct file_operations tracing_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
3361 3362
	.open		= tracing_open,
	.read		= seq_read,
3363
	.write		= tracing_write_stub,
3364
	.llseek		= tracing_lseek,
Ingo Molnar's avatar
Ingo Molnar committed
3365
	.release	= tracing_release,
3366 3367
};

3368
static const struct file_operations show_traces_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
3369 3370 3371
	.open		= show_traces_open,
	.read		= seq_read,
	.release	= seq_release,
3372
	.llseek		= seq_lseek,
Ingo Molnar's avatar
Ingo Molnar committed
3373 3374
};

3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386
/*
 * The tracer itself will not take this lock, but still we want
 * to provide a consistent cpumask to user-space:
 */
static DEFINE_MUTEX(tracing_cpumask_update_lock);

/*
 * Temporary storage for the character representation of the
 * CPU bitmask (and one more byte for the newline):
 */
static char mask_str[NR_CPUS + 1];

Ingo Molnar's avatar
Ingo Molnar committed
3387 3388 3389 3390
static ssize_t
tracing_cpumask_read(struct file *filp, char __user *ubuf,
		     size_t count, loff_t *ppos)
{
3391
	struct trace_array *tr = file_inode(filp)->i_private;
3392
	int len;
Ingo Molnar's avatar
Ingo Molnar committed
3393 3394

	mutex_lock(&tracing_cpumask_update_lock);
3395

3396 3397 3398
	len = snprintf(mask_str, count, "%*pb\n",
		       cpumask_pr_args(tr->tracing_cpumask));
	if (len >= count) {
3399 3400 3401 3402 3403 3404
		count = -EINVAL;
		goto out_err;
	}
	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);

out_err:
Ingo Molnar's avatar
Ingo Molnar committed
3405 3406 3407 3408 3409 3410 3411 3412 3413
	mutex_unlock(&tracing_cpumask_update_lock);

	return count;
}

static ssize_t
tracing_cpumask_write(struct file *filp, const char __user *ubuf,
		      size_t count, loff_t *ppos)
{
3414
	struct trace_array *tr = file_inode(filp)->i_private;
3415
	cpumask_var_t tracing_cpumask_new;
3416
	int err, cpu;
3417 3418 3419

	if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
		return -ENOMEM;
Ingo Molnar's avatar
Ingo Molnar committed
3420

3421
	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
Ingo Molnar's avatar
Ingo Molnar committed
3422
	if (err)
3423 3424
		goto err_unlock;

3425 3426
	mutex_lock(&tracing_cpumask_update_lock);

3427
	local_irq_disable();
3428
	arch_spin_lock(&tr->max_lock);
3429
	for_each_tracing_cpu(cpu) {
3430 3431 3432 3433
		/*
		 * Increase/decrease the disabled counter if we are
		 * about to flip a bit in the cpumask:
		 */
3434
		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3435
				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3436 3437
			atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
			ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
3438
		}
3439
		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
3440
				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
3441 3442
			atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
			ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
3443 3444
		}
	}
3445
	arch_spin_unlock(&tr->max_lock);
3446
	local_irq_enable();
3447

3448
	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
3449 3450

	mutex_unlock(&tracing_cpumask_update_lock);
3451
	free_cpumask_var(tracing_cpumask_new);
Ingo Molnar's avatar
Ingo Molnar committed
3452 3453

	return count;
3454 3455

err_unlock:
3456
	free_cpumask_var(tracing_cpumask_new);
3457 3458

	return err;
Ingo Molnar's avatar
Ingo Molnar committed
3459 3460
}

3461
static const struct file_operations tracing_cpumask_fops = {
3462
	.open		= tracing_open_generic_tr,
Ingo Molnar's avatar
Ingo Molnar committed
3463 3464
	.read		= tracing_cpumask_read,
	.write		= tracing_cpumask_write,
3465
	.release	= tracing_release_generic_tr,
3466
	.llseek		= generic_file_llseek,
3467 3468
};

3469
static int tracing_trace_options_show(struct seq_file *m, void *v)
3470
{
3471
	struct tracer_opt *trace_opts;
3472
	struct trace_array *tr = m->private;
3473 3474
	u32 tracer_flags;
	int i;
3475

3476
	mutex_lock(&trace_types_lock);
3477 3478
	tracer_flags = tr->current_trace->flags->val;
	trace_opts = tr->current_trace->flags->opts;
3479

3480
	for (i = 0; trace_options[i]; i++) {
3481
		if (tr->trace_flags & (1 << i))
3482
			seq_printf(m, "%s\n", trace_options[i]);
3483
		else
3484
			seq_printf(m, "no%s\n", trace_options[i]);
3485 3486
	}

3487 3488
	for (i = 0; trace_opts[i].name; i++) {
		if (tracer_flags & trace_opts[i].bit)
3489
			seq_printf(m, "%s\n", trace_opts[i].name);
3490
		else
3491
			seq_printf(m, "no%s\n", trace_opts[i].name);
3492
	}
3493
	mutex_unlock(&trace_types_lock);
3494

3495
	return 0;
3496 3497
}

3498
static int __set_tracer_option(struct trace_array *tr,
3499 3500 3501
			       struct tracer_flags *tracer_flags,
			       struct tracer_opt *opts, int neg)
{
3502
	struct tracer *trace = tracer_flags->trace;
3503
	int ret;
3504

3505
	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
3506 3507 3508 3509 3510 3511 3512 3513
	if (ret)
		return ret;

	if (neg)
		tracer_flags->val &= ~opts->bit;
	else
		tracer_flags->val |= opts->bit;
	return 0;
3514 3515
}

3516
/* Try to assign a tracer specific option */
3517
static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
3518
{
3519
	struct tracer *trace = tr->current_trace;
3520
	struct tracer_flags *tracer_flags = trace->flags;
3521
	struct tracer_opt *opts = NULL;
3522
	int i;
3523

3524 3525
	for (i = 0; tracer_flags->opts[i].name; i++) {
		opts = &tracer_flags->opts[i];
3526

3527
		if (strcmp(cmp, opts->name) == 0)
3528
			return __set_tracer_option(tr, trace->flags, opts, neg);
3529 3530
	}

3531
	return -EINVAL;
3532 3533
}

3534 3535 3536 3537 3538 3539 3540 3541 3542
/* Some tracers require overwrite to stay enabled */
int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
{
	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
		return -1;

	return 0;
}

3543
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
3544 3545
{
	/* do nothing if flag is already set */
3546
	if (!!(tr->trace_flags & mask) == !!enabled)
3547 3548 3549
		return 0;

	/* Give the tracer a chance to approve the change */
3550
	if (tr->current_trace->flag_changed)
3551
		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
3552
			return -EINVAL;
3553 3554

	if (enabled)
3555
		tr->trace_flags |= mask;
3556
	else
3557
		tr->trace_flags &= ~mask;
3558 3559 3560

	if (mask == TRACE_ITER_RECORD_CMD)
		trace_event_enable_cmd_record(enabled);
3561

3562 3563 3564
	if (mask == TRACE_ITER_EVENT_FORK)
		trace_event_follow_fork(tr, enabled);

3565
	if (mask == TRACE_ITER_OVERWRITE) {
3566
		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
3567
#ifdef CONFIG_TRACER_MAX_TRACE
3568
		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
3569 3570
#endif
	}
3571

3572
	if (mask == TRACE_ITER_PRINTK) {
3573
		trace_printk_start_stop_comm(enabled);
3574 3575
		trace_printk_control(enabled);
	}
3576 3577

	return 0;
3578 3579
}

3580
static int trace_set_options(struct trace_array *tr, char *option)
3581
{
3582
	char *cmp;
3583
	int neg = 0;
3584
	int ret = -ENODEV;
3585
	int i;
3586
	size_t orig_len = strlen(option);
3587

3588
	cmp = strstrip(option);
3589

3590
	if (strncmp(cmp, "no", 2) == 0) {
3591 3592 3593 3594
		neg = 1;
		cmp += 2;
	}

3595 3596
	mutex_lock(&trace_types_lock);

3597
	for (i = 0; trace_options[i]; i++) {
3598
		if (strcmp(cmp, trace_options[i]) == 0) {
3599
			ret = set_tracer_flag(tr, 1 << i, !neg);
3600 3601 3602
			break;
		}
	}
3603 3604

	/* If no option could be set, test the specific tracer options */
3605
	if (!trace_options[i])
3606
		ret = set_tracer_option(tr, cmp, neg);
3607 3608

	mutex_unlock(&trace_types_lock);
3609

3610 3611 3612 3613 3614 3615 3616
	/*
	 * If the first trailing whitespace is replaced with '\0' by strstrip,
	 * turn it back into a space.
	 */
	if (orig_len > strlen(option))
		option[strlen(option)] = ' ';

3617 3618 3619
	return ret;
}

3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630
static void __init apply_trace_boot_options(void)
{
	char *buf = trace_boot_options_buf;
	char *option;

	while (true) {
		option = strsep(&buf, ",");

		if (!option)
			break;

3631 3632
		if (*option)
			trace_set_options(&global_trace, option);
3633 3634 3635 3636 3637 3638 3639

		/* Put back the comma to allow this to be called again */
		if (buf)
			*(buf - 1) = ',';
	}
}

3640 3641 3642 3643
static ssize_t
tracing_trace_options_write(struct file *filp, const char __user *ubuf,
			size_t cnt, loff_t *ppos)
{
3644 3645
	struct seq_file *m = filp->private_data;
	struct trace_array *tr = m->private;
3646
	char buf[64];
3647
	int ret;
3648 3649 3650 3651

	if (cnt >= sizeof(buf))
		return -EINVAL;

3652
	if (copy_from_user(buf, ubuf, cnt))
3653 3654
		return -EFAULT;

3655 3656
	buf[cnt] = 0;

3657
	ret = trace_set_options(tr, buf);
3658 3659
	if (ret < 0)
		return ret;
3660

3661
	*ppos += cnt;
3662 3663 3664 3665

	return cnt;
}

3666 3667
static int tracing_trace_options_open(struct inode *inode, struct file *file)
{
3668
	struct trace_array *tr = inode->i_private;
3669
	int ret;
3670

3671 3672
	if (tracing_disabled)
		return -ENODEV;
3673

3674 3675 3676
	if (trace_array_get(tr) < 0)
		return -ENODEV;

3677 3678 3679 3680 3681
	ret = single_open(file, tracing_trace_options_show, inode->i_private);
	if (ret < 0)
		trace_array_put(tr);

	return ret;
3682 3683
}

3684
static const struct file_operations tracing_iter_fops = {
3685 3686 3687
	.open		= tracing_trace_options_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
3688
	.release	= tracing_single_release_tr,
3689
	.write		= tracing_trace_options_write,
3690 3691
};

Ingo Molnar's avatar
Ingo Molnar committed
3692 3693
static const char readme_msg[] =
	"tracing mini-HOWTO:\n\n"
3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717
	"# echo 0 > tracing_on : quick way to disable tracing\n"
	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
	" Important files:\n"
	"  trace\t\t\t- The static contents of the buffer\n"
	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
	"  current_tracer\t- function and latency tracers\n"
	"  available_tracers\t- list of configured tracers for current_tracer\n"
	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
	"  trace_clock\t\t-change the clock used to order events\n"
	"       local:   Per cpu clock but may not be synced across CPUs\n"
	"      global:   Synced across CPUs but slows tracing down.\n"
	"     counter:   Not a clock, but just an increment\n"
	"      uptime:   Jiffy counter from time of boot\n"
	"        perf:   Same clock that perf events use\n"
#ifdef CONFIG_X86_64
	"     x86-tsc:   TSC cycle counter\n"
#endif
	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
	"  tracing_cpumask\t- Limit which CPUs to trace\n"
	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
	"\t\t\t  Remove sub-buffer with rmdir\n"
	"  trace_options\t\t- Set format or modify how tracing happens\n"
3718 3719
	"\t\t\t  Disable an option by adding a suffix 'no' to the\n"
	"\t\t\t  option name\n"
3720
	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
3721 3722
#ifdef CONFIG_DYNAMIC_FTRACE
	"\n  available_filter_functions - list of functions that can be filtered on\n"
3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733
	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
	"\t\t\t  functions\n"
	"\t     accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
	"\t     modules: Can select a group via module\n"
	"\t      Format: :mod:<module-name>\n"
	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
	"\t    triggers: a command to perform when function is hit\n"
	"\t      Format: <function>:<trigger>[:count]\n"
	"\t     trigger: traceon, traceoff\n"
	"\t\t      enable_event:<system>:<event>\n"
	"\t\t      disable_event:<system>:<event>\n"
3734
#ifdef CONFIG_STACKTRACE
3735
	"\t\t      stacktrace\n"
3736 3737
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
3738
	"\t\t      snapshot\n"
3739
#endif
3740 3741
	"\t\t      dump\n"
	"\t\t      cpudump\n"
3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753
	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
	"\t     The first one will disable tracing every time do_fault is hit\n"
	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
	"\t       The first time do trap is hit and it disables tracing, the\n"
	"\t       counter will decrement to 2. If tracing is already disabled,\n"
	"\t       the counter will not decrement. It only decrements when the\n"
	"\t       trigger did work\n"
	"\t     To remove trigger without count:\n"
	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
	"\t     To remove trigger with a count:\n"
	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
3754
	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
3755 3756 3757
	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
	"\t    modules: Can select a group via module command :mod:\n"
	"\t    Does not accept triggers\n"
3758 3759
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_TRACER
3760 3761
	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
	"\t\t    (function)\n"
3762 3763 3764
#endif
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
3765
	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
3766 3767 3768
	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
3769 3770 3771
	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
	"\t\t\t  snapshot buffer. Read the contents for more\n"
	"\t\t\t  information\n"
3772
#endif
3773
#ifdef CONFIG_STACK_TRACER
3774 3775
	"  stack_trace\t\t- Shows the max stack trace when active\n"
	"  stack_max_size\t- Shows current max stack size that was traced\n"
3776 3777
	"\t\t\t  Write into this file to reset the max size (trigger a\n"
	"\t\t\t  new trace)\n"
3778
#ifdef CONFIG_DYNAMIC_FTRACE
3779 3780
	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
	"\t\t\t  traces\n"
3781
#endif
3782
#endif /* CONFIG_STACK_TRACER */
3783 3784 3785
	"  events/\t\t- Directory containing all trace event subsystems:\n"
	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
3786 3787
	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
	"\t\t\t  events\n"
3788
	"      filter\t\t- If set, only events passing filter are traced\n"
3789 3790
	"  events/<system>/<event>/\t- Directory containing control files for\n"
	"\t\t\t  <event>:\n"
3791 3792 3793
	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
	"      filter\t\t- If set, only events passing filter are traced\n"
	"      trigger\t\t- If set, a command to perform when event is hit\n"
3794 3795 3796 3797
	"\t    Format: <trigger>[:count][if <filter>]\n"
	"\t   trigger: traceon, traceoff\n"
	"\t            enable_event:<system>:<event>\n"
	"\t            disable_event:<system>:<event>\n"
3798 3799 3800 3801
#ifdef CONFIG_HIST_TRIGGERS
	"\t            enable_hist:<system>:<event>\n"
	"\t            disable_hist:<system>:<event>\n"
#endif
3802
#ifdef CONFIG_STACKTRACE
3803
	"\t\t    stacktrace\n"
3804 3805
#endif
#ifdef CONFIG_TRACER_SNAPSHOT
3806
	"\t\t    snapshot\n"
3807 3808 3809
#endif
#ifdef CONFIG_HIST_TRIGGERS
	"\t\t    hist (see below)\n"
3810
#endif
3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825
	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
	"\t                  events/block/block_unplug/trigger\n"
	"\t   The first disables tracing every time block_unplug is hit.\n"
	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
	"\t   Like function triggers, the counter is only decremented if it\n"
	"\t    enabled or disabled tracing.\n"
	"\t   To remove a trigger without a count:\n"
	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
	"\t   To remove a trigger with a count:\n"
	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
	"\t   Filters can be ignored when removing a trigger.\n"
3826 3827
#ifdef CONFIG_HIST_TRIGGERS
	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
3828
	"\t    Format: hist:keys=<field1[,field2,...]>\n"
3829
	"\t            [:values=<field1[,field2,...]>]\n"
3830
	"\t            [:sort=<field1[,field2,...]>]\n"
3831
	"\t            [:size=#entries]\n"
3832
	"\t            [:pause][:continue][:clear]\n"
3833
	"\t            [:name=histname1]\n"
3834 3835
	"\t            [if <filter>]\n\n"
	"\t    When a matching event is hit, an entry is added to a hash\n"
3836 3837 3838
	"\t    table using the key(s) and value(s) named, and the value of a\n"
	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
	"\t    correspond to fields in the event's format description.  Keys\n"
3839 3840 3841 3842 3843 3844 3845
	"\t    can be any field, or the special string 'stacktrace'.\n"
	"\t    Compound keys consisting of up to two fields can be specified\n"
	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
	"\t    fields.  Sort keys consisting of up to two fields can be\n"
	"\t    specified using the 'sort' keyword.  The sort direction can\n"
	"\t    be modified by appending '.descending' or '.ascending' to a\n"
	"\t    sort field.  The 'size' parameter can be used to specify more\n"
3846 3847 3848 3849
	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
	"\t    its histogram data will be shared with other triggers of the\n"
	"\t    same name, and trigger hits will update this common data.\n\n"
3850
	"\t    Reading the 'hist' file for the event will dump the hash\n"
3851 3852
	"\t    table in its entirety to stdout.  If there are multiple hist\n"
	"\t    triggers attached to an event, there will be a table for each\n"
3853 3854 3855 3856 3857
	"\t    trigger in the output.  The table displayed for a named\n"
	"\t    trigger will be the same as any other instance having the\n"
	"\t    same name.  The default format used to display a given field\n"
	"\t    can be modified by appending any of the following modifiers\n"
	"\t    to the field name, as applicable:\n\n"
3858 3859
	"\t            .hex        display a number as a hex value\n"
	"\t            .sym        display an address as a symbol\n"
3860
	"\t            .sym-offset display an address as a symbol and offset\n"
3861 3862
	"\t            .execname   display a common_pid as a program name\n"
	"\t            .syscall    display a syscall id as a syscall name\n\n"
3863
	"\t            .log2       display log2 value rather than raw number\n\n"
3864 3865 3866 3867
	"\t    The 'pause' parameter can be used to pause an existing hist\n"
	"\t    trigger or to start a hist trigger but not log any events\n"
	"\t    until told to do so.  'continue' can be used to start or\n"
	"\t    restart a paused hist trigger.\n\n"
3868 3869 3870
	"\t    The 'clear' parameter will clear the contents of a running\n"
	"\t    hist trigger and leave its current paused/active state\n"
	"\t    unchanged.\n\n"
3871 3872 3873 3874
	"\t    The enable_hist and disable_hist triggers can be used to\n"
	"\t    have one event conditionally start and stop another event's\n"
	"\t    already-attached hist trigger.  The syntax is analagous to\n"
	"\t    the enable_event and disable_event triggers.\n"
3875
#endif
Ingo Molnar's avatar
Ingo Molnar committed
3876 3877 3878 3879 3880 3881 3882 3883 3884 3885
;

static ssize_t
tracing_readme_read(struct file *filp, char __user *ubuf,
		       size_t cnt, loff_t *ppos)
{
	return simple_read_from_buffer(ubuf, cnt, ppos,
					readme_msg, strlen(readme_msg));
}

3886
static const struct file_operations tracing_readme_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
3887 3888
	.open		= tracing_open_generic,
	.read		= tracing_readme_read,
3889
	.llseek		= generic_file_llseek,
Ingo Molnar's avatar
Ingo Molnar committed
3890 3891
};

3892 3893 3894
static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
{
	unsigned int *ptr = v;
3895

3896 3897
	if (*pos || m->count)
		ptr++;
3898

3899
	(*pos)++;
3900

3901 3902
	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
	     ptr++) {
3903 3904
		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
			continue;
3905

3906 3907
		return ptr;
	}
3908

3909 3910 3911 3912 3913 3914 3915
	return NULL;
}

static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
{
	void *v;
	loff_t l = 0;
3916

3917 3918 3919
	preempt_disable();
	arch_spin_lock(&trace_cmdline_lock);

3920
	v = &savedcmd->map_cmdline_to_pid[0];
3921 3922 3923 3924
	while (l <= *pos) {
		v = saved_cmdlines_next(m, v, &l);
		if (!v)
			return NULL;
3925 3926
	}

3927 3928 3929 3930 3931
	return v;
}

static void saved_cmdlines_stop(struct seq_file *m, void *v)
{
3932 3933
	arch_spin_unlock(&trace_cmdline_lock);
	preempt_enable();
3934
}
3935

3936 3937 3938 3939
static int saved_cmdlines_show(struct seq_file *m, void *v)
{
	char buf[TASK_COMM_LEN];
	unsigned int *pid = v;
3940

3941
	__trace_find_cmdline(*pid, buf);
3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958
	seq_printf(m, "%d %s\n", *pid, buf);
	return 0;
}

static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
	.start		= saved_cmdlines_start,
	.next		= saved_cmdlines_next,
	.stop		= saved_cmdlines_stop,
	.show		= saved_cmdlines_show,
};

static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
{
	if (tracing_disabled)
		return -ENODEV;

	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
3959 3960 3961
}

static const struct file_operations tracing_saved_cmdlines_fops = {
3962 3963 3964 3965
	.open		= tracing_saved_cmdlines_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
3966 3967
};

3968 3969 3970 3971 3972 3973 3974 3975
static ssize_t
tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
				 size_t cnt, loff_t *ppos)
{
	char buf[64];
	int r;

	arch_spin_lock(&trace_cmdline_lock);
3976
	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992
	arch_spin_unlock(&trace_cmdline_lock);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
{
	kfree(s->saved_cmdlines);
	kfree(s->map_cmdline_to_pid);
	kfree(s);
}

static int tracing_resize_saved_cmdlines(unsigned int val)
{
	struct saved_cmdlines_buffer *s, *savedcmd_temp;

3993
	s = kmalloc(sizeof(*s), GFP_KERNEL);
3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040
	if (!s)
		return -ENOMEM;

	if (allocate_cmdlines_buffer(val, s) < 0) {
		kfree(s);
		return -ENOMEM;
	}

	arch_spin_lock(&trace_cmdline_lock);
	savedcmd_temp = savedcmd;
	savedcmd = s;
	arch_spin_unlock(&trace_cmdline_lock);
	free_saved_cmdlines_buffer(savedcmd_temp);

	return 0;
}

static ssize_t
tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
				  size_t cnt, loff_t *ppos)
{
	unsigned long val;
	int ret;

	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
		return ret;

	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
	if (!val || val > PID_MAX_DEFAULT)
		return -EINVAL;

	ret = tracing_resize_saved_cmdlines((unsigned int)val);
	if (ret < 0)
		return ret;

	*ppos += cnt;

	return cnt;
}

static const struct file_operations tracing_saved_cmdlines_size_fops = {
	.open		= tracing_open_generic,
	.read		= tracing_saved_cmdlines_size_read,
	.write		= tracing_saved_cmdlines_size_write,
};

4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157
#ifdef CONFIG_TRACE_ENUM_MAP_FILE
static union trace_enum_map_item *
update_enum_map(union trace_enum_map_item *ptr)
{
	if (!ptr->map.enum_string) {
		if (ptr->tail.next) {
			ptr = ptr->tail.next;
			/* Set ptr to the next real item (skip head) */
			ptr++;
		} else
			return NULL;
	}
	return ptr;
}

static void *enum_map_next(struct seq_file *m, void *v, loff_t *pos)
{
	union trace_enum_map_item *ptr = v;

	/*
	 * Paranoid! If ptr points to end, we don't want to increment past it.
	 * This really should never happen.
	 */
	ptr = update_enum_map(ptr);
	if (WARN_ON_ONCE(!ptr))
		return NULL;

	ptr++;

	(*pos)++;

	ptr = update_enum_map(ptr);

	return ptr;
}

static void *enum_map_start(struct seq_file *m, loff_t *pos)
{
	union trace_enum_map_item *v;
	loff_t l = 0;

	mutex_lock(&trace_enum_mutex);

	v = trace_enum_maps;
	if (v)
		v++;

	while (v && l < *pos) {
		v = enum_map_next(m, v, &l);
	}

	return v;
}

static void enum_map_stop(struct seq_file *m, void *v)
{
	mutex_unlock(&trace_enum_mutex);
}

static int enum_map_show(struct seq_file *m, void *v)
{
	union trace_enum_map_item *ptr = v;

	seq_printf(m, "%s %ld (%s)\n",
		   ptr->map.enum_string, ptr->map.enum_value,
		   ptr->map.system);

	return 0;
}

static const struct seq_operations tracing_enum_map_seq_ops = {
	.start		= enum_map_start,
	.next		= enum_map_next,
	.stop		= enum_map_stop,
	.show		= enum_map_show,
};

static int tracing_enum_map_open(struct inode *inode, struct file *filp)
{
	if (tracing_disabled)
		return -ENODEV;

	return seq_open(filp, &tracing_enum_map_seq_ops);
}

static const struct file_operations tracing_enum_map_fops = {
	.open		= tracing_enum_map_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= seq_release,
};

static inline union trace_enum_map_item *
trace_enum_jmp_to_tail(union trace_enum_map_item *ptr)
{
	/* Return tail of array given the head */
	return ptr + ptr->head.length + 1;
}

static void
trace_insert_enum_map_file(struct module *mod, struct trace_enum_map **start,
			   int len)
{
	struct trace_enum_map **stop;
	struct trace_enum_map **map;
	union trace_enum_map_item *map_array;
	union trace_enum_map_item *ptr;

	stop = start + len;

	/*
	 * The trace_enum_maps contains the map plus a head and tail item,
	 * where the head holds the module and length of array, and the
	 * tail holds a pointer to the next list.
	 */
	map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
	if (!map_array) {
4158
		pr_warn("Unable to allocate trace enum mapping\n");
4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203
		return;
	}

	mutex_lock(&trace_enum_mutex);

	if (!trace_enum_maps)
		trace_enum_maps = map_array;
	else {
		ptr = trace_enum_maps;
		for (;;) {
			ptr = trace_enum_jmp_to_tail(ptr);
			if (!ptr->tail.next)
				break;
			ptr = ptr->tail.next;

		}
		ptr->tail.next = map_array;
	}
	map_array->head.mod = mod;
	map_array->head.length = len;
	map_array++;

	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
		map_array->map = **map;
		map_array++;
	}
	memset(map_array, 0, sizeof(*map_array));

	mutex_unlock(&trace_enum_mutex);
}

static void trace_create_enum_file(struct dentry *d_tracer)
{
	trace_create_file("enum_map", 0444, d_tracer,
			  NULL, &tracing_enum_map_fops);
}

#else /* CONFIG_TRACE_ENUM_MAP_FILE */
static inline void trace_create_enum_file(struct dentry *d_tracer) { }
static inline void trace_insert_enum_map_file(struct module *mod,
			      struct trace_enum_map **start, int len) { }
#endif /* !CONFIG_TRACE_ENUM_MAP_FILE */

static void trace_insert_enum_map(struct module *mod,
				  struct trace_enum_map **start, int len)
4204 4205 4206 4207 4208 4209 4210 4211 4212
{
	struct trace_enum_map **map;

	if (len <= 0)
		return;

	map = start;

	trace_event_enum_update(map, len);
4213 4214

	trace_insert_enum_map_file(mod, start, len);
4215 4216
}

4217 4218 4219 4220
static ssize_t
tracing_set_trace_read(struct file *filp, char __user *ubuf,
		       size_t cnt, loff_t *ppos)
{
4221
	struct trace_array *tr = filp->private_data;
4222
	char buf[MAX_TRACER_SIZE+2];
4223 4224 4225
	int r;

	mutex_lock(&trace_types_lock);
4226
	r = sprintf(buf, "%s\n", tr->current_trace->name);
4227 4228
	mutex_unlock(&trace_types_lock);

Ingo Molnar's avatar
Ingo Molnar committed
4229
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4230 4231
}

4232 4233
int tracer_init(struct tracer *t, struct trace_array *tr)
{
4234
	tracing_reset_online_cpus(&tr->trace_buffer);
4235 4236 4237
	return t->init(tr);
}

4238
static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4239 4240
{
	int cpu;
4241

4242
	for_each_tracing_cpu(cpu)
4243
		per_cpu_ptr(buf->data, cpu)->entries = val;
4244 4245
}

4246
#ifdef CONFIG_TRACER_MAX_TRACE
4247
/* resize @tr's buffer to the size of @size_tr's entries */
4248 4249
static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
					struct trace_buffer *size_buf, int cpu_id)
4250 4251 4252 4253 4254
{
	int cpu, ret = 0;

	if (cpu_id == RING_BUFFER_ALL_CPUS) {
		for_each_tracing_cpu(cpu) {
4255 4256
			ret = ring_buffer_resize(trace_buf->buffer,
				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4257 4258
			if (ret < 0)
				break;
4259 4260
			per_cpu_ptr(trace_buf->data, cpu)->entries =
				per_cpu_ptr(size_buf->data, cpu)->entries;
4261 4262
		}
	} else {
4263 4264
		ret = ring_buffer_resize(trace_buf->buffer,
				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4265
		if (ret == 0)
4266 4267
			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
				per_cpu_ptr(size_buf->data, cpu_id)->entries;
4268 4269 4270 4271
	}

	return ret;
}
4272
#endif /* CONFIG_TRACER_MAX_TRACE */
4273

4274 4275
static int __tracing_resize_ring_buffer(struct trace_array *tr,
					unsigned long size, int cpu)
4276 4277 4278 4279 4280
{
	int ret;

	/*
	 * If kernel or user changes the size of the ring buffer
4281 4282
	 * we use the size that was given, and we can forget about
	 * expanding it later.
4283
	 */
4284
	ring_buffer_expanded = true;
4285

4286
	/* May be called before buffers are initialized */
4287
	if (!tr->trace_buffer.buffer)
4288 4289
		return 0;

4290
	ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4291 4292 4293
	if (ret < 0)
		return ret;

4294
#ifdef CONFIG_TRACER_MAX_TRACE
4295 4296
	if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
	    !tr->current_trace->use_max_tr)
4297 4298
		goto out;

4299
	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
4300
	if (ret < 0) {
4301 4302
		int r = resize_buffer_duplicate_size(&tr->trace_buffer,
						     &tr->trace_buffer, cpu);
4303
		if (r < 0) {
4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317
			/*
			 * AARGH! We are left with different
			 * size max buffer!!!!
			 * The max buffer is our "snapshot" buffer.
			 * When a tracer needs a snapshot (one of the
			 * latency tracers), it swaps the max buffer
			 * with the saved snap shot. We succeeded to
			 * update the size of the main buffer, but failed to
			 * update the size of the max buffer. But when we tried
			 * to reset the main buffer to the original size, we
			 * failed there too. This is very unlikely to
			 * happen, but if it does, warn and kill all
			 * tracing.
			 */
4318 4319 4320 4321 4322 4323
			WARN_ON(1);
			tracing_disabled = 1;
		}
		return ret;
	}

4324
	if (cpu == RING_BUFFER_ALL_CPUS)
4325
		set_buffer_entries(&tr->max_buffer, size);
4326
	else
4327
		per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
4328

4329
 out:
4330 4331
#endif /* CONFIG_TRACER_MAX_TRACE */

4332
	if (cpu == RING_BUFFER_ALL_CPUS)
4333
		set_buffer_entries(&tr->trace_buffer, size);
4334
	else
4335
		per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
4336 4337 4338 4339

	return ret;
}

4340 4341
static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
					  unsigned long size, int cpu_id)
4342
{
4343
	int ret = size;
4344 4345 4346

	mutex_lock(&trace_types_lock);

4347 4348 4349 4350 4351 4352 4353
	if (cpu_id != RING_BUFFER_ALL_CPUS) {
		/* make sure, this cpu is enabled in the mask */
		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
			ret = -EINVAL;
			goto out;
		}
	}
4354

4355
	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
4356 4357 4358
	if (ret < 0)
		ret = -ENOMEM;

4359
out:
4360 4361 4362 4363 4364
	mutex_unlock(&trace_types_lock);

	return ret;
}

4365

4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379
/**
 * tracing_update_buffers - used by tracing facility to expand ring buffers
 *
 * To save on memory when the tracing is never used on a system with it
 * configured in. The ring buffers are set to a minimum size. But once
 * a user starts to use the tracing facility, then they need to grow
 * to their default size.
 *
 * This function is to be called when a tracer is about to be used.
 */
int tracing_update_buffers(void)
{
	int ret = 0;

4380
	mutex_lock(&trace_types_lock);
4381
	if (!ring_buffer_expanded)
4382
		ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
4383
						RING_BUFFER_ALL_CPUS);
4384
	mutex_unlock(&trace_types_lock);
4385 4386 4387 4388

	return ret;
}

4389 4390
struct trace_option_dentry;

4391
static void
4392
create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
4393

4394 4395 4396 4397 4398 4399 4400 4401 4402
/*
 * Used to clear out the tracer before deletion of an instance.
 * Must have trace_types_lock held.
 */
static void tracing_set_nop(struct trace_array *tr)
{
	if (tr->current_trace == &nop_trace)
		return;
	
4403
	tr->current_trace->enabled--;
4404 4405 4406 4407 4408 4409 4410

	if (tr->current_trace->reset)
		tr->current_trace->reset(tr);

	tr->current_trace = &nop_trace;
}

4411
static void add_tracer_options(struct trace_array *tr, struct tracer *t)
4412
{
4413 4414 4415 4416
	/* Only enable if the directory has been created already. */
	if (!tr->dir)
		return;

4417
	create_trace_option_files(tr, t);
4418 4419 4420 4421
}

static int tracing_set_tracer(struct trace_array *tr, const char *buf)
{
4422
	struct tracer *t;
4423
#ifdef CONFIG_TRACER_MAX_TRACE
4424
	bool had_max_tr;
4425
#endif
4426
	int ret = 0;
4427

4428 4429
	mutex_lock(&trace_types_lock);

4430
	if (!ring_buffer_expanded) {
4431
		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
4432
						RING_BUFFER_ALL_CPUS);
4433
		if (ret < 0)
4434
			goto out;
4435 4436 4437
		ret = 0;
	}

4438 4439 4440 4441
	for (t = trace_types; t; t = t->next) {
		if (strcmp(t->name, buf) == 0)
			break;
	}
4442 4443 4444 4445
	if (!t) {
		ret = -EINVAL;
		goto out;
	}
4446
	if (t == tr->current_trace)
4447 4448
		goto out;

4449 4450 4451 4452 4453 4454
	/* Some tracers are only allowed for the top level buffer */
	if (!trace_ok_for_array(t, tr)) {
		ret = -EINVAL;
		goto out;
	}

4455 4456 4457 4458 4459 4460
	/* If trace pipe files are being read, we can't change the tracer */
	if (tr->current_trace->ref) {
		ret = -EBUSY;
		goto out;
	}

4461
	trace_branch_disable();
4462

4463
	tr->current_trace->enabled--;
4464

4465 4466
	if (tr->current_trace->reset)
		tr->current_trace->reset(tr);
4467

4468
	/* Current trace needs to be nop_trace before synchronize_sched */
4469
	tr->current_trace = &nop_trace;
4470

4471 4472
#ifdef CONFIG_TRACER_MAX_TRACE
	had_max_tr = tr->allocated_snapshot;
4473 4474 4475 4476 4477 4478 4479 4480 4481 4482

	if (had_max_tr && !t->use_max_tr) {
		/*
		 * We need to make sure that the update_max_tr sees that
		 * current_trace changed to nop_trace to keep it from
		 * swapping the buffers after we resize it.
		 * The update_max_tr is called from interrupts disabled
		 * so a synchronized_sched() is sufficient.
		 */
		synchronize_sched();
4483
		free_snapshot(tr);
4484
	}
4485 4486 4487
#endif

#ifdef CONFIG_TRACER_MAX_TRACE
4488
	if (t->use_max_tr && !had_max_tr) {
4489
		ret = alloc_snapshot(tr);
4490 4491
		if (ret < 0)
			goto out;
4492
	}
4493
#endif
4494

4495
	if (t->init) {
4496
		ret = tracer_init(t, tr);
4497 4498 4499
		if (ret)
			goto out;
	}
4500

4501
	tr->current_trace = t;
4502
	tr->current_trace->enabled++;
4503
	trace_branch_enable(tr);
4504 4505 4506
 out:
	mutex_unlock(&trace_types_lock);

4507 4508 4509 4510 4511 4512 4513
	return ret;
}

static ssize_t
tracing_set_trace_write(struct file *filp, const char __user *ubuf,
			size_t cnt, loff_t *ppos)
{
4514
	struct trace_array *tr = filp->private_data;
4515
	char buf[MAX_TRACER_SIZE+1];
4516 4517
	int i;
	size_t ret;
4518 4519 4520
	int err;

	ret = cnt;
4521

4522 4523
	if (cnt > MAX_TRACER_SIZE)
		cnt = MAX_TRACER_SIZE;
4524

4525
	if (copy_from_user(buf, ubuf, cnt))
4526 4527 4528 4529 4530 4531 4532 4533
		return -EFAULT;

	buf[cnt] = 0;

	/* strip ending whitespace. */
	for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
		buf[i] = 0;

4534
	err = tracing_set_tracer(tr, buf);
4535 4536
	if (err)
		return err;
4537

4538
	*ppos += ret;
4539

4540
	return ret;
4541 4542 4543
}

static ssize_t
4544 4545
tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
		   size_t cnt, loff_t *ppos)
4546 4547 4548 4549
{
	char buf[64];
	int r;

Steven Rostedt's avatar
Steven Rostedt committed
4550
	r = snprintf(buf, sizeof(buf), "%ld\n",
4551
		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
Steven Rostedt's avatar
Steven Rostedt committed
4552 4553
	if (r > sizeof(buf))
		r = sizeof(buf);
Ingo Molnar's avatar
Ingo Molnar committed
4554
	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4555 4556 4557
}

static ssize_t
4558 4559
tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
		    size_t cnt, loff_t *ppos)
4560
{
4561
	unsigned long val;
4562
	int ret;
4563

4564 4565
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
4566
		return ret;
4567 4568 4569 4570 4571 4572

	*ptr = val * 1000;

	return cnt;
}

4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604
static ssize_t
tracing_thresh_read(struct file *filp, char __user *ubuf,
		    size_t cnt, loff_t *ppos)
{
	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
}

static ssize_t
tracing_thresh_write(struct file *filp, const char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	struct trace_array *tr = filp->private_data;
	int ret;

	mutex_lock(&trace_types_lock);
	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
	if (ret < 0)
		goto out;

	if (tr->current_trace->update_thresh) {
		ret = tr->current_trace->update_thresh(tr);
		if (ret < 0)
			goto out;
	}

	ret = cnt;
out:
	mutex_unlock(&trace_types_lock);

	return ret;
}

4605 4606
#ifdef CONFIG_TRACER_MAX_TRACE

4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620
static ssize_t
tracing_max_lat_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
	return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
}

static ssize_t
tracing_max_lat_write(struct file *filp, const char __user *ubuf,
		      size_t cnt, loff_t *ppos)
{
	return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
}

4621 4622
#endif

4623 4624
static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
4625
	struct trace_array *tr = inode->i_private;
4626
	struct trace_iterator *iter;
4627
	int ret = 0;
4628 4629 4630 4631

	if (tracing_disabled)
		return -ENODEV;

4632 4633 4634
	if (trace_array_get(tr) < 0)
		return -ENODEV;

4635 4636
	mutex_lock(&trace_types_lock);

4637 4638
	/* create a buffer to store the information to pass to userspace */
	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
4639 4640
	if (!iter) {
		ret = -ENOMEM;
4641
		__trace_array_put(tr);
4642 4643
		goto out;
	}
4644

4645
	trace_seq_init(&iter->seq);
4646
	iter->trace = tr->current_trace;
4647

4648
	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
4649
		ret = -ENOMEM;
4650
		goto fail;
4651 4652
	}

4653
	/* trace pipe does not show start of buffer */
4654
	cpumask_setall(iter->started);
4655

4656
	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4657 4658
		iter->iter_flags |= TRACE_FILE_LAT_FMT;

4659
	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4660
	if (trace_clocks[tr->clock_id].in_ns)
4661 4662
		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;

4663 4664 4665
	iter->tr = tr;
	iter->trace_buffer = &tr->trace_buffer;
	iter->cpu_file = tracing_get_cpu(inode);
4666
	mutex_init(&iter->mutex);
4667 4668
	filp->private_data = iter;

4669 4670 4671
	if (iter->trace->pipe_open)
		iter->trace->pipe_open(iter);

4672
	nonseekable_open(inode, filp);
4673 4674

	tr->current_trace->ref++;
4675 4676 4677
out:
	mutex_unlock(&trace_types_lock);
	return ret;
4678 4679 4680 4681

fail:
	kfree(iter->trace);
	kfree(iter);
4682
	__trace_array_put(tr);
4683 4684
	mutex_unlock(&trace_types_lock);
	return ret;
4685 4686 4687 4688 4689
}

static int tracing_release_pipe(struct inode *inode, struct file *file)
{
	struct trace_iterator *iter = file->private_data;
4690
	struct trace_array *tr = inode->i_private;
4691

4692 4693
	mutex_lock(&trace_types_lock);

4694 4695
	tr->current_trace->ref--;

4696
	if (iter->trace->pipe_close)
4697 4698
		iter->trace->pipe_close(iter);

4699 4700
	mutex_unlock(&trace_types_lock);

4701
	free_cpumask_var(iter->started);
4702
	mutex_destroy(&iter->mutex);
4703 4704
	kfree(iter);

4705 4706
	trace_array_put(tr);

4707 4708 4709
	return 0;
}

4710
static unsigned int
4711
trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
4712
{
4713 4714
	struct trace_array *tr = iter->tr;

4715 4716 4717
	/* Iterators are static, they should be filled or empty */
	if (trace_buffer_iter(iter, iter->cpu_file))
		return POLLIN | POLLRDNORM;
4718

4719
	if (tr->trace_flags & TRACE_ITER_BLOCK)
4720 4721 4722 4723
		/*
		 * Always select as readable when in blocking mode
		 */
		return POLLIN | POLLRDNORM;
4724
	else
4725
		return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
4726
					     filp, poll_table);
4727 4728
}

4729 4730 4731 4732 4733 4734
static unsigned int
tracing_poll_pipe(struct file *filp, poll_table *poll_table)
{
	struct trace_iterator *iter = filp->private_data;

	return trace_poll(iter, filp, poll_table);
4735 4736
}

4737
/* Must be called with iter->mutex held. */
4738
static int tracing_wait_pipe(struct file *filp)
4739 4740
{
	struct trace_iterator *iter = filp->private_data;
4741
	int ret;
4742 4743

	while (trace_empty(iter)) {
4744

4745
		if ((filp->f_flags & O_NONBLOCK)) {
4746
			return -EAGAIN;
4747
		}
4748

4749
		/*
4750
		 * We block until we read something and tracing is disabled.
4751 4752 4753 4754 4755 4756 4757
		 * We still block if tracing is disabled, but we have never
		 * read anything. This allows a user to cat this file, and
		 * then enable tracing. But after we have read something,
		 * we give an EOF when tracing is again disabled.
		 *
		 * iter->pos will be 0 if we haven't read anything.
		 */
4758
		if (!tracing_is_on() && iter->pos)
4759
			break;
4760 4761 4762

		mutex_unlock(&iter->mutex);

4763
		ret = wait_on_pipe(iter, false);
4764 4765 4766

		mutex_lock(&iter->mutex);

4767 4768
		if (ret)
			return ret;
4769 4770
	}

4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788
	return 1;
}

/*
 * Consumer reader.
 */
static ssize_t
tracing_read_pipe(struct file *filp, char __user *ubuf,
		  size_t cnt, loff_t *ppos)
{
	struct trace_iterator *iter = filp->private_data;
	ssize_t sret;

	/* return any leftover data */
	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
	if (sret != -EBUSY)
		return sret;

4789
	trace_seq_init(&iter->seq);
4790

4791 4792 4793 4794 4795 4796
	/*
	 * Avoid more than one consumer on a single file descriptor
	 * This is just a matter of traces coherency, the ring buffer itself
	 * is protected.
	 */
	mutex_lock(&iter->mutex);
4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807
	if (iter->trace->read) {
		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
		if (sret)
			goto out;
	}

waitagain:
	sret = tracing_wait_pipe(filp);
	if (sret <= 0)
		goto out;

4808
	/* stop when tracing is finished */
4809 4810
	if (trace_empty(iter)) {
		sret = 0;
4811
		goto out;
4812
	}
4813 4814 4815 4816

	if (cnt >= PAGE_SIZE)
		cnt = PAGE_SIZE - 1;

4817 4818 4819 4820
	/* reset all but tr, trace, and overruns */
	memset(&iter->seq, 0,
	       sizeof(struct trace_iterator) -
	       offsetof(struct trace_iterator, seq));
4821
	cpumask_clear(iter->started);
4822
	iter->pos = -1;
4823

4824
	trace_event_read_lock();
4825
	trace_access_lock(iter->cpu_file);
4826
	while (trace_find_next_entry_inc(iter) != NULL) {
4827
		enum print_line_t ret;
4828
		int save_len = iter->seq.seq.len;
Steven Rostedt's avatar
Steven Rostedt committed
4829

Ingo Molnar's avatar
Ingo Molnar committed
4830
		ret = print_trace_line(iter);
4831
		if (ret == TRACE_TYPE_PARTIAL_LINE) {
Steven Rostedt's avatar
Steven Rostedt committed
4832
			/* don't print partial lines */
4833
			iter->seq.seq.len = save_len;
4834
			break;
Steven Rostedt's avatar
Steven Rostedt committed
4835
		}
4836 4837
		if (ret != TRACE_TYPE_NO_CONSUME)
			trace_consume(iter);
4838

4839
		if (trace_seq_used(&iter->seq) >= cnt)
4840
			break;
4841 4842 4843 4844 4845 4846 4847 4848

		/*
		 * Setting the full flag means we reached the trace_seq buffer
		 * size and we should leave by partial output condition above.
		 * One of the trace_seq_* functions is not used properly.
		 */
		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
			  iter->ent->type);
4849
	}
4850
	trace_access_unlock(iter->cpu_file);
4851
	trace_event_read_unlock();
4852 4853

	/* Now copy what we have to the user */
4854
	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
4855
	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
4856
		trace_seq_init(&iter->seq);
4857 4858

	/*
Lucas De Marchi's avatar
Lucas De Marchi committed
4859
	 * If there was nothing to send to user, in spite of consuming trace
4860 4861
	 * entries, go back to wait for more entries.
	 */
4862
	if (sret == -EBUSY)
4863
		goto waitagain;
4864

4865
out:
4866
	mutex_unlock(&iter->mutex);
4867

4868
	return sret;
4869 4870
}

4871 4872 4873 4874 4875 4876
static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
				     unsigned int idx)
{
	__free_page(spd->pages[idx]);
}

4877
static const struct pipe_buf_operations tracing_pipe_buf_ops = {
4878 4879
	.can_merge		= 0,
	.confirm		= generic_pipe_buf_confirm,
4880
	.release		= generic_pipe_buf_release,
4881 4882
	.steal			= generic_pipe_buf_steal,
	.get			= generic_pipe_buf_get,
4883 4884
};

4885
static size_t
4886
tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
4887 4888
{
	size_t count;
4889
	int save_len;
4890 4891 4892 4893
	int ret;

	/* Seq buffer is page-sized, exactly what we need. */
	for (;;) {
4894
		save_len = iter->seq.seq.len;
4895
		ret = print_trace_line(iter);
4896 4897 4898

		if (trace_seq_has_overflowed(&iter->seq)) {
			iter->seq.seq.len = save_len;
4899 4900
			break;
		}
4901 4902 4903 4904 4905 4906

		/*
		 * This should not be hit, because it should only
		 * be set if the iter->seq overflowed. But check it
		 * anyway to be safe.
		 */
4907
		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4908 4909 4910 4911
			iter->seq.seq.len = save_len;
			break;
		}

4912
		count = trace_seq_used(&iter->seq) - save_len;
4913 4914 4915
		if (rem < count) {
			rem = 0;
			iter->seq.seq.len = save_len;
4916 4917 4918
			break;
		}

4919 4920
		if (ret != TRACE_TYPE_NO_CONSUME)
			trace_consume(iter);
4921
		rem -= count;
4922
		if (!trace_find_next_entry_inc(iter))	{
4923 4924 4925 4926 4927 4928 4929 4930 4931
			rem = 0;
			iter->ent = NULL;
			break;
		}
	}

	return rem;
}

4932 4933 4934 4935 4936 4937
static ssize_t tracing_splice_read_pipe(struct file *filp,
					loff_t *ppos,
					struct pipe_inode_info *pipe,
					size_t len,
					unsigned int flags)
{
4938 4939
	struct page *pages_def[PIPE_DEF_BUFFERS];
	struct partial_page partial_def[PIPE_DEF_BUFFERS];
4940 4941
	struct trace_iterator *iter = filp->private_data;
	struct splice_pipe_desc spd = {
4942 4943
		.pages		= pages_def,
		.partial	= partial_def,
4944
		.nr_pages	= 0, /* This gets updated below. */
4945
		.nr_pages_max	= PIPE_DEF_BUFFERS,
4946 4947 4948
		.flags		= flags,
		.ops		= &tracing_pipe_buf_ops,
		.spd_release	= tracing_spd_release_pipe,
4949 4950
	};
	ssize_t ret;
4951
	size_t rem;
4952 4953
	unsigned int i;

4954 4955 4956
	if (splice_grow_spd(pipe, &spd))
		return -ENOMEM;

4957
	mutex_lock(&iter->mutex);
4958 4959 4960 4961 4962

	if (iter->trace->splice_read) {
		ret = iter->trace->splice_read(iter, filp,
					       ppos, pipe, len, flags);
		if (ret)
4963
			goto out_err;
4964 4965 4966 4967
	}

	ret = tracing_wait_pipe(filp);
	if (ret <= 0)
4968
		goto out_err;
4969

4970
	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
4971
		ret = -EFAULT;
4972
		goto out_err;
4973 4974
	}

4975
	trace_event_read_lock();
4976
	trace_access_lock(iter->cpu_file);
4977

4978
	/* Fill as many pages as possible. */
4979
	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
4980 4981
		spd.pages[i] = alloc_page(GFP_KERNEL);
		if (!spd.pages[i])
4982
			break;
4983

4984
		rem = tracing_fill_pipe_page(rem, iter);
4985 4986 4987

		/* Copy the data into the page, so we can start over. */
		ret = trace_seq_to_buffer(&iter->seq,
4988
					  page_address(spd.pages[i]),
4989
					  trace_seq_used(&iter->seq));
4990
		if (ret < 0) {
4991
			__free_page(spd.pages[i]);
4992 4993
			break;
		}
4994
		spd.partial[i].offset = 0;
4995
		spd.partial[i].len = trace_seq_used(&iter->seq);
4996

4997
		trace_seq_init(&iter->seq);
4998 4999
	}

5000
	trace_access_unlock(iter->cpu_file);
5001
	trace_event_read_unlock();
5002
	mutex_unlock(&iter->mutex);
5003 5004 5005

	spd.nr_pages = i;

5006 5007 5008 5009
	if (i)
		ret = splice_to_pipe(pipe, &spd);
	else
		ret = 0;
5010
out:
5011
	splice_shrink_spd(&spd);
5012
	return ret;
5013

5014
out_err:
5015
	mutex_unlock(&iter->mutex);
5016
	goto out;
5017 5018
}

5019 5020 5021 5022
static ssize_t
tracing_entries_read(struct file *filp, char __user *ubuf,
		     size_t cnt, loff_t *ppos)
{
5023 5024 5025
	struct inode *inode = file_inode(filp);
	struct trace_array *tr = inode->i_private;
	int cpu = tracing_get_cpu(inode);
5026 5027 5028
	char buf[64];
	int r = 0;
	ssize_t ret;
5029

5030
	mutex_lock(&trace_types_lock);
5031

5032
	if (cpu == RING_BUFFER_ALL_CPUS) {
5033 5034 5035 5036 5037 5038 5039 5040 5041
		int cpu, buf_size_same;
		unsigned long size;

		size = 0;
		buf_size_same = 1;
		/* check if all cpu sizes are same */
		for_each_tracing_cpu(cpu) {
			/* fill in the size from first enabled cpu */
			if (size == 0)
5042 5043
				size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
			if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058
				buf_size_same = 0;
				break;
			}
		}

		if (buf_size_same) {
			if (!ring_buffer_expanded)
				r = sprintf(buf, "%lu (expanded: %lu)\n",
					    size >> 10,
					    trace_buf_size >> 10);
			else
				r = sprintf(buf, "%lu\n", size >> 10);
		} else
			r = sprintf(buf, "X\n");
	} else
5059
		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5060

5061 5062
	mutex_unlock(&trace_types_lock);

5063 5064
	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
	return ret;
5065 5066 5067 5068 5069 5070
}

static ssize_t
tracing_entries_write(struct file *filp, const char __user *ubuf,
		      size_t cnt, loff_t *ppos)
{
5071 5072
	struct inode *inode = file_inode(filp);
	struct trace_array *tr = inode->i_private;
5073
	unsigned long val;
5074
	int ret;
5075

5076 5077
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
5078
		return ret;
5079 5080 5081 5082 5083

	/* must have at least 1 entry */
	if (!val)
		return -EINVAL;

5084 5085
	/* value is in KB */
	val <<= 10;
5086
	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5087 5088
	if (ret < 0)
		return ret;
5089

5090
	*ppos += cnt;
5091

5092 5093
	return cnt;
}
5094

5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105
static ssize_t
tracing_total_entries_read(struct file *filp, char __user *ubuf,
				size_t cnt, loff_t *ppos)
{
	struct trace_array *tr = filp->private_data;
	char buf[64];
	int r, cpu;
	unsigned long size = 0, expanded_size = 0;

	mutex_lock(&trace_types_lock);
	for_each_tracing_cpu(cpu) {
5106
		size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118
		if (!ring_buffer_expanded)
			expanded_size += trace_buf_size >> 10;
	}
	if (ring_buffer_expanded)
		r = sprintf(buf, "%lu\n", size);
	else
		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
	mutex_unlock(&trace_types_lock);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

5119 5120 5121 5122 5123 5124 5125 5126 5127 5128
static ssize_t
tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
			  size_t cnt, loff_t *ppos)
{
	/*
	 * There is no need to read what the user has written, this function
	 * is just to make sure that there is no error when "echo" is used
	 */

	*ppos += cnt;
5129 5130 5131 5132

	return cnt;
}

5133 5134 5135
static int
tracing_free_buffer_release(struct inode *inode, struct file *filp)
{
5136 5137
	struct trace_array *tr = inode->i_private;

5138
	/* disable tracing ? */
5139
	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5140
		tracer_tracing_off(tr);
5141
	/* resize the ring buffer to 0 */
5142
	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5143

5144 5145
	trace_array_put(tr);

5146 5147 5148
	return 0;
}

5149 5150 5151 5152
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
					size_t cnt, loff_t *fpos)
{
5153
	unsigned long addr = (unsigned long)ubuf;
5154
	struct trace_array *tr = filp->private_data;
5155 5156 5157 5158 5159
	struct ring_buffer_event *event;
	struct ring_buffer *buffer;
	struct print_entry *entry;
	unsigned long irq_flags;
	struct page *pages[2];
5160
	void *map_page[2];
5161 5162 5163 5164 5165 5166
	int nr_pages = 1;
	ssize_t written;
	int offset;
	int size;
	int len;
	int ret;
5167
	int i;
5168

5169
	if (tracing_disabled)
5170 5171
		return -EINVAL;

5172
	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5173 5174
		return -EINVAL;

5175 5176 5177
	if (cnt > TRACE_BUF_SIZE)
		cnt = TRACE_BUF_SIZE;

5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192
	/*
	 * Userspace is injecting traces into the kernel trace buffer.
	 * We want to be as non intrusive as possible.
	 * To do so, we do not want to allocate any special buffers
	 * or take any locks, but instead write the userspace data
	 * straight into the ring buffer.
	 *
	 * First we need to pin the userspace buffer into memory,
	 * which, most likely it is, because it just referenced it.
	 * But there's no guarantee that it is. By using get_user_pages_fast()
	 * and kmap_atomic/kunmap_atomic() we can get access to the
	 * pages directly. We then write the data directly into the
	 * ring buffer.
	 */
	BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5193

5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206
	/* check if we cross pages */
	if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
		nr_pages = 2;

	offset = addr & (PAGE_SIZE - 1);
	addr &= PAGE_MASK;

	ret = get_user_pages_fast(addr, nr_pages, 0, pages);
	if (ret < nr_pages) {
		while (--ret >= 0)
			put_page(pages[ret]);
		written = -EFAULT;
		goto out;
5207
	}
5208

5209 5210
	for (i = 0; i < nr_pages; i++)
		map_page[i] = kmap_atomic(pages[i]);
5211 5212 5213

	local_save_flags(irq_flags);
	size = sizeof(*entry) + cnt + 2; /* possible \n added */
5214
	buffer = tr->trace_buffer.buffer;
5215 5216 5217 5218 5219 5220
	event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
					  irq_flags, preempt_count());
	if (!event) {
		/* Ring buffer disabled, return as if not open for write */
		written = -EBADF;
		goto out_unlock;
5221
	}
5222 5223 5224 5225 5226 5227

	entry = ring_buffer_event_data(event);
	entry->ip = _THIS_IP_;

	if (nr_pages == 2) {
		len = PAGE_SIZE - offset;
5228 5229
		memcpy(&entry->buf, map_page[0] + offset, len);
		memcpy(&entry->buf[len], map_page[1], cnt - len);
5230
	} else
5231
		memcpy(&entry->buf, map_page[0] + offset, cnt);
5232

5233 5234 5235 5236 5237 5238
	if (entry->buf[cnt - 1] != '\n') {
		entry->buf[cnt] = '\n';
		entry->buf[cnt + 1] = '\0';
	} else
		entry->buf[cnt] = '\0';

5239
	__buffer_unlock_commit(buffer, event);
5240

5241
	written = cnt;
5242

5243
	*fpos += written;
5244

5245
 out_unlock:
5246
	for (i = nr_pages - 1; i >= 0; i--) {
5247 5248 5249
		kunmap_atomic(map_page[i]);
		put_page(pages[i]);
	}
5250
 out:
5251
	return written;
5252 5253
}

5254
static int tracing_clock_show(struct seq_file *m, void *v)
5255
{
5256
	struct trace_array *tr = m->private;
5257 5258 5259
	int i;

	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5260
		seq_printf(m,
5261
			"%s%s%s%s", i ? " " : "",
5262 5263
			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
			i == tr->clock_id ? "]" : "");
5264
	seq_putc(m, '\n');
5265

5266
	return 0;
5267 5268
}

5269
static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281
{
	int i;

	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
		if (strcmp(trace_clocks[i].name, clockstr) == 0)
			break;
	}
	if (i == ARRAY_SIZE(trace_clocks))
		return -EINVAL;

	mutex_lock(&trace_types_lock);

5282 5283
	tr->clock_id = i;

5284
	ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
5285

5286 5287 5288 5289
	/*
	 * New clock may not be consistent with the previous clock.
	 * Reset the buffer so that it doesn't have incomparable timestamps.
	 */
5290
	tracing_reset_online_cpus(&tr->trace_buffer);
5291 5292 5293 5294

#ifdef CONFIG_TRACER_MAX_TRACE
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
5295
	tracing_reset_online_cpus(&tr->max_buffer);
5296
#endif
5297

5298 5299
	mutex_unlock(&trace_types_lock);

5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314
	return 0;
}

static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
				   size_t cnt, loff_t *fpos)
{
	struct seq_file *m = filp->private_data;
	struct trace_array *tr = m->private;
	char buf[64];
	const char *clockstr;
	int ret;

	if (cnt >= sizeof(buf))
		return -EINVAL;

5315
	if (copy_from_user(buf, ubuf, cnt))
5316 5317 5318 5319 5320 5321 5322 5323 5324 5325
		return -EFAULT;

	buf[cnt] = 0;

	clockstr = strstrip(buf);

	ret = tracing_set_clock(tr, clockstr);
	if (ret)
		return ret;

5326 5327 5328 5329 5330
	*fpos += cnt;

	return cnt;
}

5331 5332
static int tracing_clock_open(struct inode *inode, struct file *file)
{
5333 5334 5335
	struct trace_array *tr = inode->i_private;
	int ret;

5336 5337
	if (tracing_disabled)
		return -ENODEV;
5338

5339 5340 5341 5342 5343 5344 5345 5346
	if (trace_array_get(tr))
		return -ENODEV;

	ret = single_open(file, tracing_clock_show, inode->i_private);
	if (ret < 0)
		trace_array_put(tr);

	return ret;
5347 5348
}

5349 5350 5351 5352 5353 5354
struct ftrace_buffer_info {
	struct trace_iterator	iter;
	void			*spare;
	unsigned int		read;
};

5355 5356 5357
#ifdef CONFIG_TRACER_SNAPSHOT
static int tracing_snapshot_open(struct inode *inode, struct file *file)
{
5358
	struct trace_array *tr = inode->i_private;
5359
	struct trace_iterator *iter;
5360
	struct seq_file *m;
5361 5362
	int ret = 0;

5363 5364 5365
	if (trace_array_get(tr) < 0)
		return -ENODEV;

5366
	if (file->f_mode & FMODE_READ) {
5367
		iter = __tracing_open(inode, file, true);
5368 5369
		if (IS_ERR(iter))
			ret = PTR_ERR(iter);
5370 5371
	} else {
		/* Writes still need the seq_file to hold the private data */
5372
		ret = -ENOMEM;
5373 5374
		m = kzalloc(sizeof(*m), GFP_KERNEL);
		if (!m)
5375
			goto out;
5376 5377 5378
		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
		if (!iter) {
			kfree(m);
5379
			goto out;
5380
		}
5381 5382
		ret = 0;

5383
		iter->tr = tr;
5384 5385
		iter->trace_buffer = &tr->max_buffer;
		iter->cpu_file = tracing_get_cpu(inode);
5386 5387
		m->private = iter;
		file->private_data = m;
5388
	}
5389
out:
5390 5391 5392
	if (ret < 0)
		trace_array_put(tr);

5393 5394 5395 5396 5397 5398 5399
	return ret;
}

static ssize_t
tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
		       loff_t *ppos)
{
5400 5401 5402
	struct seq_file *m = filp->private_data;
	struct trace_iterator *iter = m->private;
	struct trace_array *tr = iter->tr;
5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415
	unsigned long val;
	int ret;

	ret = tracing_update_buffers();
	if (ret < 0)
		return ret;

	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
		return ret;

	mutex_lock(&trace_types_lock);

5416
	if (tr->current_trace->use_max_tr) {
5417 5418 5419 5420 5421 5422
		ret = -EBUSY;
		goto out;
	}

	switch (val) {
	case 0:
5423 5424 5425
		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
			ret = -EINVAL;
			break;
5426
		}
5427 5428
		if (tr->allocated_snapshot)
			free_snapshot(tr);
5429 5430
		break;
	case 1:
5431 5432 5433 5434 5435 5436 5437
/* Only allow per-cpu swap if the ring buffer supports it */
#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
			ret = -EINVAL;
			break;
		}
#endif
5438
		if (!tr->allocated_snapshot) {
5439
			ret = alloc_snapshot(tr);
5440 5441 5442 5443 5444
			if (ret < 0)
				break;
		}
		local_irq_disable();
		/* Now, we're going to swap */
5445
		if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
5446
			update_max_tr(tr, current, smp_processor_id());
5447
		else
5448
			update_max_tr_single(tr, current, iter->cpu_file);
5449 5450 5451
		local_irq_enable();
		break;
	default:
5452
		if (tr->allocated_snapshot) {
5453 5454 5455 5456 5457
			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
				tracing_reset_online_cpus(&tr->max_buffer);
			else
				tracing_reset(&tr->max_buffer, iter->cpu_file);
		}
5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468
		break;
	}

	if (ret >= 0) {
		*ppos += cnt;
		ret = cnt;
	}
out:
	mutex_unlock(&trace_types_lock);
	return ret;
}
5469 5470 5471 5472

static int tracing_snapshot_release(struct inode *inode, struct file *file)
{
	struct seq_file *m = file->private_data;
5473 5474 5475
	int ret;

	ret = tracing_release(inode, file);
5476 5477

	if (file->f_mode & FMODE_READ)
5478
		return ret;
5479 5480 5481 5482 5483 5484 5485 5486 5487

	/* If write only, the seq_file is just a stub */
	if (m)
		kfree(m->private);
	kfree(m);

	return 0;
}

5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516
static int tracing_buffers_open(struct inode *inode, struct file *filp);
static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
				    size_t count, loff_t *ppos);
static int tracing_buffers_release(struct inode *inode, struct file *file);
static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);

static int snapshot_raw_open(struct inode *inode, struct file *filp)
{
	struct ftrace_buffer_info *info;
	int ret;

	ret = tracing_buffers_open(inode, filp);
	if (ret < 0)
		return ret;

	info = filp->private_data;

	if (info->iter.trace->use_max_tr) {
		tracing_buffers_release(inode, filp);
		return -EBUSY;
	}

	info->iter.snapshot = true;
	info->iter.trace_buffer = &info->iter.tr->max_buffer;

	return ret;
}

5517 5518 5519
#endif /* CONFIG_TRACER_SNAPSHOT */


5520 5521 5522 5523 5524 5525 5526
static const struct file_operations tracing_thresh_fops = {
	.open		= tracing_open_generic,
	.read		= tracing_thresh_read,
	.write		= tracing_thresh_write,
	.llseek		= generic_file_llseek,
};

5527
#ifdef CONFIG_TRACER_MAX_TRACE
5528
static const struct file_operations tracing_max_lat_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
5529 5530 5531
	.open		= tracing_open_generic,
	.read		= tracing_max_lat_read,
	.write		= tracing_max_lat_write,
5532
	.llseek		= generic_file_llseek,
5533
};
5534
#endif
5535

5536
static const struct file_operations set_tracer_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
5537 5538 5539
	.open		= tracing_open_generic,
	.read		= tracing_set_trace_read,
	.write		= tracing_set_trace_write,
5540
	.llseek		= generic_file_llseek,
5541 5542
};

5543
static const struct file_operations tracing_pipe_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
5544
	.open		= tracing_open_pipe,
5545
	.poll		= tracing_poll_pipe,
Ingo Molnar's avatar
Ingo Molnar committed
5546
	.read		= tracing_read_pipe,
5547
	.splice_read	= tracing_splice_read_pipe,
Ingo Molnar's avatar
Ingo Molnar committed
5548
	.release	= tracing_release_pipe,
5549
	.llseek		= no_llseek,
5550 5551
};

5552
static const struct file_operations tracing_entries_fops = {
5553
	.open		= tracing_open_generic_tr,
5554 5555
	.read		= tracing_entries_read,
	.write		= tracing_entries_write,
5556
	.llseek		= generic_file_llseek,
5557
	.release	= tracing_release_generic_tr,
5558 5559
};

5560
static const struct file_operations tracing_total_entries_fops = {
5561
	.open		= tracing_open_generic_tr,
5562 5563
	.read		= tracing_total_entries_read,
	.llseek		= generic_file_llseek,
5564
	.release	= tracing_release_generic_tr,
5565 5566
};

5567
static const struct file_operations tracing_free_buffer_fops = {
5568
	.open		= tracing_open_generic_tr,
5569 5570 5571 5572
	.write		= tracing_free_buffer_write,
	.release	= tracing_free_buffer_release,
};

5573
static const struct file_operations tracing_mark_fops = {
5574
	.open		= tracing_open_generic_tr,
5575
	.write		= tracing_mark_write,
5576
	.llseek		= generic_file_llseek,
5577
	.release	= tracing_release_generic_tr,
5578 5579
};

5580
static const struct file_operations trace_clock_fops = {
5581 5582 5583
	.open		= tracing_clock_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
5584
	.release	= tracing_single_release_tr,
5585 5586 5587
	.write		= tracing_clock_write,
};

5588 5589 5590 5591 5592
#ifdef CONFIG_TRACER_SNAPSHOT
static const struct file_operations snapshot_fops = {
	.open		= tracing_snapshot_open,
	.read		= seq_read,
	.write		= tracing_snapshot_write,
5593
	.llseek		= tracing_lseek,
5594
	.release	= tracing_snapshot_release,
5595 5596
};

5597 5598 5599 5600 5601 5602
static const struct file_operations snapshot_raw_fops = {
	.open		= snapshot_raw_open,
	.read		= tracing_buffers_read,
	.release	= tracing_buffers_release,
	.splice_read	= tracing_buffers_splice_read,
	.llseek		= no_llseek,
5603 5604
};

5605 5606
#endif /* CONFIG_TRACER_SNAPSHOT */

5607 5608
static int tracing_buffers_open(struct inode *inode, struct file *filp)
{
5609
	struct trace_array *tr = inode->i_private;
5610
	struct ftrace_buffer_info *info;
5611
	int ret;
5612 5613 5614 5615

	if (tracing_disabled)
		return -ENODEV;

5616 5617 5618
	if (trace_array_get(tr) < 0)
		return -ENODEV;

5619
	info = kzalloc(sizeof(*info), GFP_KERNEL);
5620 5621
	if (!info) {
		trace_array_put(tr);
5622
		return -ENOMEM;
5623
	}
5624

5625 5626
	mutex_lock(&trace_types_lock);

5627
	info->iter.tr		= tr;
5628
	info->iter.cpu_file	= tracing_get_cpu(inode);
5629
	info->iter.trace	= tr->current_trace;
5630
	info->iter.trace_buffer = &tr->trace_buffer;
5631
	info->spare		= NULL;
5632
	/* Force reading ring buffer for first read */
5633
	info->read		= (unsigned int)-1;
5634 5635 5636

	filp->private_data = info;

5637 5638
	tr->current_trace->ref++;

5639 5640
	mutex_unlock(&trace_types_lock);

5641 5642 5643 5644 5645
	ret = nonseekable_open(inode, filp);
	if (ret < 0)
		trace_array_put(tr);

	return ret;
5646 5647
}

5648 5649 5650 5651 5652 5653 5654 5655 5656
static unsigned int
tracing_buffers_poll(struct file *filp, poll_table *poll_table)
{
	struct ftrace_buffer_info *info = filp->private_data;
	struct trace_iterator *iter = &info->iter;

	return trace_poll(iter, filp, poll_table);
}

5657 5658 5659 5660 5661
static ssize_t
tracing_buffers_read(struct file *filp, char __user *ubuf,
		     size_t count, loff_t *ppos)
{
	struct ftrace_buffer_info *info = filp->private_data;
5662
	struct trace_iterator *iter = &info->iter;
5663
	ssize_t ret;
5664
	ssize_t size;
5665

5666 5667 5668
	if (!count)
		return 0;

5669
#ifdef CONFIG_TRACER_MAX_TRACE
5670 5671
	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
		return -EBUSY;
5672 5673
#endif

5674
	if (!info->spare)
5675 5676
		info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
							  iter->cpu_file);
5677
	if (!info->spare)
5678
		return -ENOMEM;
5679

5680 5681 5682 5683
	/* Do we have previous read data to read? */
	if (info->read < PAGE_SIZE)
		goto read;

5684
 again:
5685
	trace_access_lock(iter->cpu_file);
5686
	ret = ring_buffer_read_page(iter->trace_buffer->buffer,
5687 5688
				    &info->spare,
				    count,
5689 5690
				    iter->cpu_file, 0);
	trace_access_unlock(iter->cpu_file);
5691

5692 5693
	if (ret < 0) {
		if (trace_empty(iter)) {
5694 5695 5696
			if ((filp->f_flags & O_NONBLOCK))
				return -EAGAIN;

5697
			ret = wait_on_pipe(iter, false);
5698 5699 5700
			if (ret)
				return ret;

5701 5702
			goto again;
		}
5703
		return 0;
5704
	}
5705 5706

	info->read = 0;
5707
 read:
5708 5709 5710 5711 5712
	size = PAGE_SIZE - info->read;
	if (size > count)
		size = count;

	ret = copy_to_user(ubuf, info->spare + info->read, size);
5713 5714 5715
	if (ret == size)
		return -EFAULT;

5716 5717
	size -= ret;

5718 5719 5720 5721 5722 5723 5724 5725 5726
	*ppos += size;
	info->read += size;

	return size;
}

static int tracing_buffers_release(struct inode *inode, struct file *file)
{
	struct ftrace_buffer_info *info = file->private_data;
5727
	struct trace_iterator *iter = &info->iter;
5728

5729 5730
	mutex_lock(&trace_types_lock);

5731 5732
	iter->tr->current_trace->ref--;

5733
	__trace_array_put(iter->tr);
5734

5735
	if (info->spare)
5736
		ring_buffer_free_read_page(iter->trace_buffer->buffer, info->spare);
5737 5738
	kfree(info);

5739 5740
	mutex_unlock(&trace_types_lock);

5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771
	return 0;
}

struct buffer_ref {
	struct ring_buffer	*buffer;
	void			*page;
	int			ref;
};

static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
				    struct pipe_buffer *buf)
{
	struct buffer_ref *ref = (struct buffer_ref *)buf->private;

	if (--ref->ref)
		return;

	ring_buffer_free_read_page(ref->buffer, ref->page);
	kfree(ref);
	buf->private = 0;
}

static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
				struct pipe_buffer *buf)
{
	struct buffer_ref *ref = (struct buffer_ref *)buf->private;

	ref->ref++;
}

/* Pipe buffer operations for a buffer. */
5772
static const struct pipe_buf_operations buffer_pipe_buf_ops = {
5773 5774 5775
	.can_merge		= 0,
	.confirm		= generic_pipe_buf_confirm,
	.release		= buffer_pipe_buf_release,
5776
	.steal			= generic_pipe_buf_steal,
5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802
	.get			= buffer_pipe_buf_get,
};

/*
 * Callback from splice_to_pipe(), if we need to release some pages
 * at the end of the spd in case we error'ed out in filling the pipe.
 */
static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
{
	struct buffer_ref *ref =
		(struct buffer_ref *)spd->partial[i].private;

	if (--ref->ref)
		return;

	ring_buffer_free_read_page(ref->buffer, ref->page);
	kfree(ref);
	spd->partial[i].private = 0;
}

static ssize_t
tracing_buffers_splice_read(struct file *file, loff_t *ppos,
			    struct pipe_inode_info *pipe, size_t len,
			    unsigned int flags)
{
	struct ftrace_buffer_info *info = file->private_data;
5803
	struct trace_iterator *iter = &info->iter;
5804 5805
	struct partial_page partial_def[PIPE_DEF_BUFFERS];
	struct page *pages_def[PIPE_DEF_BUFFERS];
5806
	struct splice_pipe_desc spd = {
5807 5808
		.pages		= pages_def,
		.partial	= partial_def,
5809
		.nr_pages_max	= PIPE_DEF_BUFFERS,
5810 5811 5812 5813 5814
		.flags		= flags,
		.ops		= &buffer_pipe_buf_ops,
		.spd_release	= buffer_spd_release,
	};
	struct buffer_ref *ref;
5815
	int entries, size, i;
5816
	ssize_t ret = 0;
5817

5818
#ifdef CONFIG_TRACER_MAX_TRACE
5819 5820
	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
		return -EBUSY;
5821 5822
#endif

5823 5824
	if (splice_grow_spd(pipe, &spd))
		return -ENOMEM;
5825

5826 5827
	if (*ppos & (PAGE_SIZE - 1))
		return -EINVAL;
5828 5829

	if (len & (PAGE_SIZE - 1)) {
5830 5831
		if (len < PAGE_SIZE)
			return -EINVAL;
5832 5833 5834
		len &= PAGE_MASK;
	}

5835 5836
 again:
	trace_access_lock(iter->cpu_file);
5837
	entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5838

5839
	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
5840 5841 5842 5843
		struct page *page;
		int r;

		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
5844 5845
		if (!ref) {
			ret = -ENOMEM;
5846
			break;
5847
		}
5848

5849
		ref->ref = 1;
5850
		ref->buffer = iter->trace_buffer->buffer;
5851
		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
5852
		if (!ref->page) {
5853
			ret = -ENOMEM;
5854 5855 5856 5857 5858
			kfree(ref);
			break;
		}

		r = ring_buffer_read_page(ref->buffer, &ref->page,
5859
					  len, iter->cpu_file, 1);
5860
		if (r < 0) {
5861
			ring_buffer_free_read_page(ref->buffer, ref->page);
5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880
			kfree(ref);
			break;
		}

		/*
		 * zero out any left over data, this is going to
		 * user land.
		 */
		size = ring_buffer_page_len(ref->page);
		if (size < PAGE_SIZE)
			memset(ref->page + size, 0, PAGE_SIZE - size);

		page = virt_to_page(ref->page);

		spd.pages[i] = page;
		spd.partial[i].len = PAGE_SIZE;
		spd.partial[i].offset = 0;
		spd.partial[i].private = (unsigned long)ref;
		spd.nr_pages++;
5881
		*ppos += PAGE_SIZE;
5882

5883
		entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
5884 5885
	}

5886
	trace_access_unlock(iter->cpu_file);
5887 5888 5889 5890
	spd.nr_pages = i;

	/* did we read anything? */
	if (!spd.nr_pages) {
5891
		if (ret)
5892 5893 5894 5895
			return ret;

		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
			return -EAGAIN;
5896

5897
		ret = wait_on_pipe(iter, true);
5898
		if (ret)
5899
			return ret;
5900

5901
		goto again;
5902 5903 5904
	}

	ret = splice_to_pipe(pipe, &spd);
5905
	splice_shrink_spd(&spd);
5906

5907 5908 5909 5910 5911 5912
	return ret;
}

static const struct file_operations tracing_buffers_fops = {
	.open		= tracing_buffers_open,
	.read		= tracing_buffers_read,
5913
	.poll		= tracing_buffers_poll,
5914 5915 5916 5917 5918
	.release	= tracing_buffers_release,
	.splice_read	= tracing_buffers_splice_read,
	.llseek		= no_llseek,
};

5919 5920 5921 5922
static ssize_t
tracing_stats_read(struct file *filp, char __user *ubuf,
		   size_t count, loff_t *ppos)
{
5923 5924
	struct inode *inode = file_inode(filp);
	struct trace_array *tr = inode->i_private;
5925
	struct trace_buffer *trace_buf = &tr->trace_buffer;
5926
	int cpu = tracing_get_cpu(inode);
5927 5928
	struct trace_seq *s;
	unsigned long cnt;
5929 5930
	unsigned long long t;
	unsigned long usec_rem;
5931

5932
	s = kmalloc(sizeof(*s), GFP_KERNEL);
5933
	if (!s)
5934
		return -ENOMEM;
5935 5936 5937

	trace_seq_init(s);

5938
	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
5939 5940
	trace_seq_printf(s, "entries: %ld\n", cnt);

5941
	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
5942 5943
	trace_seq_printf(s, "overrun: %ld\n", cnt);

5944
	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
5945 5946
	trace_seq_printf(s, "commit overrun: %ld\n", cnt);

5947
	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
5948 5949
	trace_seq_printf(s, "bytes: %ld\n", cnt);

5950
	if (trace_clocks[tr->clock_id].in_ns) {
5951
		/* local or global for trace_clock */
5952
		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5953 5954 5955 5956
		usec_rem = do_div(t, USEC_PER_SEC);
		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
								t, usec_rem);

5957
		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
5958 5959 5960 5961 5962
		usec_rem = do_div(t, USEC_PER_SEC);
		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
	} else {
		/* counter or tsc mode for trace_clock */
		trace_seq_printf(s, "oldest event ts: %llu\n",
5963
				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
5964

5965
		trace_seq_printf(s, "now ts: %llu\n",
5966
				ring_buffer_time_stamp(trace_buf->buffer, cpu));
5967
	}
5968

5969
	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
5970 5971
	trace_seq_printf(s, "dropped events: %ld\n", cnt);

5972
	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
5973 5974
	trace_seq_printf(s, "read events: %ld\n", cnt);

5975 5976
	count = simple_read_from_buffer(ubuf, count, ppos,
					s->buffer, trace_seq_used(s));
5977 5978 5979 5980 5981 5982 5983

	kfree(s);

	return count;
}

static const struct file_operations tracing_stats_fops = {
5984
	.open		= tracing_open_generic_tr,
5985
	.read		= tracing_stats_read,
5986
	.llseek		= generic_file_llseek,
5987
	.release	= tracing_release_generic_tr,
5988 5989
};

5990 5991
#ifdef CONFIG_DYNAMIC_FTRACE

5992 5993 5994 5995 5996
int __weak ftrace_arch_read_dyn_info(char *buf, int size)
{
	return 0;
}

5997
static ssize_t
5998
tracing_read_dyn_info(struct file *filp, char __user *ubuf,
5999 6000
		  size_t cnt, loff_t *ppos)
{
6001 6002
	static char ftrace_dyn_info_buffer[1024];
	static DEFINE_MUTEX(dyn_info_mutex);
6003
	unsigned long *p = filp->private_data;
6004
	char *buf = ftrace_dyn_info_buffer;
6005
	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
6006 6007
	int r;

6008 6009
	mutex_lock(&dyn_info_mutex);
	r = sprintf(buf, "%ld ", *p);
Ingo Molnar's avatar
Ingo Molnar committed
6010

6011
	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
6012 6013 6014 6015 6016 6017 6018
	buf[r++] = '\n';

	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);

	mutex_unlock(&dyn_info_mutex);

	return r;
6019 6020
}

6021
static const struct file_operations tracing_dyn_info_fops = {
Ingo Molnar's avatar
Ingo Molnar committed
6022
	.open		= tracing_open_generic,
6023
	.read		= tracing_read_dyn_info,
6024
	.llseek		= generic_file_llseek,
6025
};
6026
#endif /* CONFIG_DYNAMIC_FTRACE */
6027

6028 6029 6030 6031 6032 6033
#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
static void
ftrace_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
{
	tracing_snapshot();
}
6034

6035 6036
static void
ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip, void **data)
6037
{
6038 6039 6040 6041
	unsigned long *count = (long *)data;

	if (!*count)
		return;
6042

6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056
	if (*count != -1)
		(*count)--;

	tracing_snapshot();
}

static int
ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
		      struct ftrace_probe_ops *ops, void *data)
{
	long count = (long)data;

	seq_printf(m, "%ps:", (void *)ip);

6057
	seq_puts(m, "snapshot");
6058 6059

	if (count == -1)
6060
		seq_puts(m, ":unlimited\n");
6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126
	else
		seq_printf(m, ":count=%ld\n", count);

	return 0;
}

static struct ftrace_probe_ops snapshot_probe_ops = {
	.func			= ftrace_snapshot,
	.print			= ftrace_snapshot_print,
};

static struct ftrace_probe_ops snapshot_count_probe_ops = {
	.func			= ftrace_count_snapshot,
	.print			= ftrace_snapshot_print,
};

static int
ftrace_trace_snapshot_callback(struct ftrace_hash *hash,
			       char *glob, char *cmd, char *param, int enable)
{
	struct ftrace_probe_ops *ops;
	void *count = (void *)-1;
	char *number;
	int ret;

	/* hash funcs only work with set_ftrace_filter */
	if (!enable)
		return -EINVAL;

	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;

	if (glob[0] == '!') {
		unregister_ftrace_function_probe_func(glob+1, ops);
		return 0;
	}

	if (!param)
		goto out_reg;

	number = strsep(&param, ":");

	if (!strlen(number))
		goto out_reg;

	/*
	 * We use the callback data field (which is a pointer)
	 * as our counter.
	 */
	ret = kstrtoul(number, 0, (unsigned long *)&count);
	if (ret)
		return ret;

 out_reg:
	ret = register_ftrace_function_probe(glob, ops, count);

	if (ret >= 0)
		alloc_snapshot(&global_trace);

	return ret < 0 ? ret : 0;
}

static struct ftrace_func_command ftrace_snapshot_cmd = {
	.name			= "snapshot",
	.func			= ftrace_trace_snapshot_callback,
};

6127
static __init int register_snapshot_cmd(void)
6128 6129 6130 6131
{
	return register_ftrace_command(&ftrace_snapshot_cmd);
}
#else
6132
static inline __init int register_snapshot_cmd(void) { return 0; }
6133
#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6134

6135
static struct dentry *tracing_get_dentry(struct trace_array *tr)
6136
{
6137 6138 6139 6140 6141 6142 6143 6144
	if (WARN_ON(!tr->dir))
		return ERR_PTR(-ENODEV);

	/* Top directory uses NULL as the parent */
	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
		return NULL;

	/* All sub buffers have a descriptor */
6145
	return tr->dir;
6146 6147
}

6148
static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6149 6150 6151
{
	struct dentry *d_tracer;

6152 6153
	if (tr->percpu_dir)
		return tr->percpu_dir;
6154

6155
	d_tracer = tracing_get_dentry(tr);
6156
	if (IS_ERR(d_tracer))
6157 6158
		return NULL;

6159
	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6160

6161
	WARN_ONCE(!tr->percpu_dir,
6162
		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6163

6164
	return tr->percpu_dir;
6165 6166
}

6167 6168 6169 6170 6171 6172 6173
static struct dentry *
trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
		      void *data, long cpu, const struct file_operations *fops)
{
	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);

	if (ret) /* See tracing_get_cpu() */
6174
		d_inode(ret)->i_cdev = (void *)(cpu + 1);
6175 6176 6177
	return ret;
}

6178
static void
6179
tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6180
{
6181
	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6182
	struct dentry *d_cpu;
6183
	char cpu_dir[30]; /* 30 characters should be more than enough */
6184

6185 6186 6187
	if (!d_percpu)
		return;

6188
	snprintf(cpu_dir, 30, "cpu%ld", cpu);
6189
	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6190
	if (!d_cpu) {
6191
		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6192 6193
		return;
	}
6194

6195
	/* per cpu trace_pipe */
6196
	trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6197
				tr, cpu, &tracing_pipe_fops);
6198 6199

	/* per cpu trace */
6200
	trace_create_cpu_file("trace", 0644, d_cpu,
6201
				tr, cpu, &tracing_fops);
6202

6203
	trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6204
				tr, cpu, &tracing_buffers_fops);
6205

6206
	trace_create_cpu_file("stats", 0444, d_cpu,
6207
				tr, cpu, &tracing_stats_fops);
6208

6209
	trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6210
				tr, cpu, &tracing_entries_fops);
6211 6212

#ifdef CONFIG_TRACER_SNAPSHOT
6213
	trace_create_cpu_file("snapshot", 0644, d_cpu,
6214
				tr, cpu, &snapshot_fops);
6215

6216
	trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6217
				tr, cpu, &snapshot_raw_fops);
6218
#endif
6219 6220
}

Steven Rostedt's avatar
Steven Rostedt committed
6221 6222 6223 6224 6225
#ifdef CONFIG_FTRACE_SELFTEST
/* Let selftest have access to static functions in this file */
#include "trace_selftest.c"
#endif

6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248
static ssize_t
trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
			loff_t *ppos)
{
	struct trace_option_dentry *topt = filp->private_data;
	char *buf;

	if (topt->flags->val & topt->opt->bit)
		buf = "1\n";
	else
		buf = "0\n";

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
}

static ssize_t
trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
			 loff_t *ppos)
{
	struct trace_option_dentry *topt = filp->private_data;
	unsigned long val;
	int ret;

6249 6250
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
6251 6252
		return ret;

6253 6254
	if (val != 0 && val != 1)
		return -EINVAL;
6255

6256
	if (!!(topt->flags->val & topt->opt->bit) != val) {
6257
		mutex_lock(&trace_types_lock);
6258
		ret = __set_tracer_option(topt->tr, topt->flags,
6259
					  topt->opt, !val);
6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274
		mutex_unlock(&trace_types_lock);
		if (ret)
			return ret;
	}

	*ppos += cnt;

	return cnt;
}


static const struct file_operations trace_options_fops = {
	.open = tracing_open_generic,
	.read = trace_options_read,
	.write = trace_options_write,
6275
	.llseek	= generic_file_llseek,
6276 6277
};

6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310
/*
 * In order to pass in both the trace_array descriptor as well as the index
 * to the flag that the trace option file represents, the trace_array
 * has a character array of trace_flags_index[], which holds the index
 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
 * The address of this character array is passed to the flag option file
 * read/write callbacks.
 *
 * In order to extract both the index and the trace_array descriptor,
 * get_tr_index() uses the following algorithm.
 *
 *   idx = *ptr;
 *
 * As the pointer itself contains the address of the index (remember
 * index[1] == 1).
 *
 * Then to get the trace_array descriptor, by subtracting that index
 * from the ptr, we get to the start of the index itself.
 *
 *   ptr - idx == &index[0]
 *
 * Then a simple container_of() from that pointer gets us to the
 * trace_array descriptor.
 */
static void get_tr_index(void *data, struct trace_array **ptr,
			 unsigned int *pindex)
{
	*pindex = *(unsigned char *)data;

	*ptr = container_of(data - *pindex, struct trace_array,
			    trace_flags_index);
}

6311 6312 6313 6314
static ssize_t
trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
			loff_t *ppos)
{
6315 6316 6317
	void *tr_index = filp->private_data;
	struct trace_array *tr;
	unsigned int index;
6318 6319
	char *buf;

6320 6321 6322
	get_tr_index(tr_index, &tr, &index);

	if (tr->trace_flags & (1 << index))
6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333
		buf = "1\n";
	else
		buf = "0\n";

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
}

static ssize_t
trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
			 loff_t *ppos)
{
6334 6335 6336
	void *tr_index = filp->private_data;
	struct trace_array *tr;
	unsigned int index;
6337 6338 6339
	unsigned long val;
	int ret;

6340 6341
	get_tr_index(tr_index, &tr, &index);

6342 6343
	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
6344 6345
		return ret;

6346
	if (val != 0 && val != 1)
6347
		return -EINVAL;
6348 6349

	mutex_lock(&trace_types_lock);
6350
	ret = set_tracer_flag(tr, 1 << index, val);
6351
	mutex_unlock(&trace_types_lock);
6352

6353 6354 6355
	if (ret < 0)
		return ret;

6356 6357 6358 6359 6360 6361 6362 6363 6364
	*ppos += cnt;

	return cnt;
}

static const struct file_operations trace_options_core_fops = {
	.open = tracing_open_generic,
	.read = trace_options_core_read,
	.write = trace_options_core_write,
6365
	.llseek = generic_file_llseek,
6366 6367
};

6368
struct dentry *trace_create_file(const char *name,
Al Viro's avatar
Al Viro committed
6369
				 umode_t mode,
6370 6371 6372 6373 6374 6375
				 struct dentry *parent,
				 void *data,
				 const struct file_operations *fops)
{
	struct dentry *ret;

6376
	ret = tracefs_create_file(name, mode, parent, data, fops);
6377
	if (!ret)
6378
		pr_warn("Could not create tracefs '%s' entry\n", name);
6379 6380 6381 6382 6383

	return ret;
}


6384
static struct dentry *trace_options_init_dentry(struct trace_array *tr)
6385 6386 6387
{
	struct dentry *d_tracer;

6388 6389
	if (tr->options)
		return tr->options;
6390

6391
	d_tracer = tracing_get_dentry(tr);
6392
	if (IS_ERR(d_tracer))
6393 6394
		return NULL;

6395
	tr->options = tracefs_create_dir("options", d_tracer);
6396
	if (!tr->options) {
6397
		pr_warn("Could not create tracefs directory 'options'\n");
6398 6399 6400
		return NULL;
	}

6401
	return tr->options;
6402 6403
}

6404
static void
6405 6406
create_trace_option_file(struct trace_array *tr,
			 struct trace_option_dentry *topt,
6407 6408 6409 6410 6411
			 struct tracer_flags *flags,
			 struct tracer_opt *opt)
{
	struct dentry *t_options;

6412
	t_options = trace_options_init_dentry(tr);
6413 6414 6415 6416 6417
	if (!t_options)
		return;

	topt->flags = flags;
	topt->opt = opt;
6418
	topt->tr = tr;
6419

6420
	topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
6421 6422 6423 6424
				    &trace_options_fops);

}

6425
static void
6426
create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
6427 6428
{
	struct trace_option_dentry *topts;
6429
	struct trace_options *tr_topts;
6430 6431 6432
	struct tracer_flags *flags;
	struct tracer_opt *opts;
	int cnt;
6433
	int i;
6434 6435

	if (!tracer)
6436
		return;
6437 6438 6439 6440

	flags = tracer->flags;

	if (!flags || !flags->opts)
6441 6442 6443 6444 6445 6446 6447 6448 6449 6450
		return;

	/*
	 * If this is an instance, only create flags for tracers
	 * the instance may have.
	 */
	if (!trace_ok_for_array(tracer, tr))
		return;

	for (i = 0; i < tr->nr_topts; i++) {
6451 6452
		/* Make sure there's no duplicate flags. */
		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
6453 6454
			return;
	}
6455 6456 6457 6458 6459 6460

	opts = flags->opts;

	for (cnt = 0; opts[cnt].name; cnt++)
		;

6461
	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
6462
	if (!topts)
6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475
		return;

	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
			    GFP_KERNEL);
	if (!tr_topts) {
		kfree(topts);
		return;
	}

	tr->topts = tr_topts;
	tr->topts[tr->nr_topts].tracer = tracer;
	tr->topts[tr->nr_topts].topts = topts;
	tr->nr_topts++;
6476

6477
	for (cnt = 0; opts[cnt].name; cnt++) {
6478
		create_trace_option_file(tr, &topts[cnt], flags,
6479
					 &opts[cnt]);
6480 6481 6482 6483
		WARN_ONCE(topts[cnt].entry == NULL,
			  "Failed to create trace option: %s",
			  opts[cnt].name);
	}
6484 6485
}

6486
static struct dentry *
6487 6488
create_trace_option_core_file(struct trace_array *tr,
			      const char *option, long index)
6489 6490 6491
{
	struct dentry *t_options;

6492
	t_options = trace_options_init_dentry(tr);
6493 6494 6495
	if (!t_options)
		return NULL;

6496 6497 6498
	return trace_create_file(option, 0644, t_options,
				 (void *)&tr->trace_flags_index[index],
				 &trace_options_core_fops);
6499 6500
}

6501
static void create_trace_options_dir(struct trace_array *tr)
6502 6503
{
	struct dentry *t_options;
6504
	bool top_level = tr == &global_trace;
6505 6506
	int i;

6507
	t_options = trace_options_init_dentry(tr);
6508 6509 6510
	if (!t_options)
		return;

6511 6512 6513 6514 6515
	for (i = 0; trace_options[i]; i++) {
		if (top_level ||
		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
			create_trace_option_core_file(tr, trace_options[i], i);
	}
6516 6517
}

6518 6519 6520 6521
static ssize_t
rb_simple_read(struct file *filp, char __user *ubuf,
	       size_t cnt, loff_t *ppos)
{
6522
	struct trace_array *tr = filp->private_data;
6523 6524 6525
	char buf[64];
	int r;

6526
	r = tracer_tracing_is_on(tr);
6527 6528 6529 6530 6531 6532 6533 6534 6535
	r = sprintf(buf, "%d\n", r);

	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
}

static ssize_t
rb_simple_write(struct file *filp, const char __user *ubuf,
		size_t cnt, loff_t *ppos)
{
6536
	struct trace_array *tr = filp->private_data;
6537
	struct ring_buffer *buffer = tr->trace_buffer.buffer;
6538 6539 6540 6541 6542 6543 6544 6545
	unsigned long val;
	int ret;

	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
	if (ret)
		return ret;

	if (buffer) {
6546 6547
		mutex_lock(&trace_types_lock);
		if (val) {
6548
			tracer_tracing_on(tr);
6549 6550
			if (tr->current_trace->start)
				tr->current_trace->start(tr);
6551
		} else {
6552
			tracer_tracing_off(tr);
6553 6554
			if (tr->current_trace->stop)
				tr->current_trace->stop(tr);
6555 6556
		}
		mutex_unlock(&trace_types_lock);
6557 6558 6559 6560 6561 6562 6563 6564
	}

	(*ppos)++;

	return cnt;
}

static const struct file_operations rb_simple_fops = {
6565
	.open		= tracing_open_generic_tr,
6566 6567
	.read		= rb_simple_read,
	.write		= rb_simple_write,
6568
	.release	= tracing_release_generic_tr,
6569 6570 6571
	.llseek		= default_llseek,
};

6572 6573 6574
struct dentry *trace_instance_dir;

static void
6575
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
6576

6577 6578
static int
allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
6579 6580
{
	enum ring_buffer_flags rb_flags;
6581

6582
	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
6583

6584 6585
	buf->tr = tr;

6586 6587 6588
	buf->buffer = ring_buffer_alloc(size, rb_flags);
	if (!buf->buffer)
		return -ENOMEM;
6589

6590 6591 6592 6593 6594
	buf->data = alloc_percpu(struct trace_array_cpu);
	if (!buf->data) {
		ring_buffer_free(buf->buffer);
		return -ENOMEM;
	}
6595 6596 6597 6598 6599

	/* Allocate the first page for all buffers */
	set_buffer_entries(&tr->trace_buffer,
			   ring_buffer_size(tr->trace_buffer.buffer, 0));

6600 6601
	return 0;
}
6602

6603 6604 6605
static int allocate_trace_buffers(struct trace_array *tr, int size)
{
	int ret;
6606

6607 6608 6609
	ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
	if (ret)
		return ret;
6610

6611 6612 6613 6614
#ifdef CONFIG_TRACER_MAX_TRACE
	ret = allocate_trace_buffer(tr, &tr->max_buffer,
				    allocate_snapshot ? size : 1);
	if (WARN_ON(ret)) {
6615
		ring_buffer_free(tr->trace_buffer.buffer);
6616 6617 6618 6619
		free_percpu(tr->trace_buffer.data);
		return -ENOMEM;
	}
	tr->allocated_snapshot = allocate_snapshot;
6620

6621 6622 6623 6624 6625
	/*
	 * Only the top level trace array gets its snapshot allocated
	 * from the kernel command line.
	 */
	allocate_snapshot = false;
6626
#endif
6627
	return 0;
6628 6629
}

6630 6631 6632 6633 6634 6635 6636 6637 6638 6639
static void free_trace_buffer(struct trace_buffer *buf)
{
	if (buf->buffer) {
		ring_buffer_free(buf->buffer);
		buf->buffer = NULL;
		free_percpu(buf->data);
		buf->data = NULL;
	}
}

6640 6641 6642 6643 6644
static void free_trace_buffers(struct trace_array *tr)
{
	if (!tr)
		return;

6645
	free_trace_buffer(&tr->trace_buffer);
6646 6647

#ifdef CONFIG_TRACER_MAX_TRACE
6648
	free_trace_buffer(&tr->max_buffer);
6649 6650 6651
#endif
}

6652 6653 6654 6655 6656 6657 6658 6659 6660
static void init_trace_flags_index(struct trace_array *tr)
{
	int i;

	/* Used by the trace options files */
	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
		tr->trace_flags_index[i] = i;
}

6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675
static void __update_tracer_options(struct trace_array *tr)
{
	struct tracer *t;

	for (t = trace_types; t; t = t->next)
		add_tracer_options(tr, t);
}

static void update_tracer_options(struct trace_array *tr)
{
	mutex_lock(&trace_types_lock);
	__update_tracer_options(tr);
	mutex_unlock(&trace_types_lock);
}

6676
static int instance_mkdir(const char *name)
6677
{
6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697
	struct trace_array *tr;
	int ret;

	mutex_lock(&trace_types_lock);

	ret = -EEXIST;
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		if (tr->name && strcmp(tr->name, name) == 0)
			goto out_unlock;
	}

	ret = -ENOMEM;
	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
	if (!tr)
		goto out_unlock;

	tr->name = kstrdup(name, GFP_KERNEL);
	if (!tr->name)
		goto out_free_tr;

6698 6699 6700
	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
		goto out_free_tr;

6701
	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
6702

6703 6704
	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);

6705 6706
	raw_spin_lock_init(&tr->start_lock);

6707 6708
	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;

6709 6710 6711 6712 6713
	tr->current_trace = &nop_trace;

	INIT_LIST_HEAD(&tr->systems);
	INIT_LIST_HEAD(&tr->events);

6714
	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
6715 6716
		goto out_free_tr;

6717
	tr->dir = tracefs_create_dir(name, trace_instance_dir);
6718 6719 6720 6721
	if (!tr->dir)
		goto out_free_tr;

	ret = event_trace_add_tracer(tr->dir, tr);
6722
	if (ret) {
6723
		tracefs_remove_recursive(tr->dir);
6724
		goto out_free_tr;
6725
	}
6726

6727
	init_tracer_tracefs(tr, tr->dir);
6728
	init_trace_flags_index(tr);
6729
	__update_tracer_options(tr);
6730 6731 6732 6733 6734 6735 6736 6737

	list_add(&tr->list, &ftrace_trace_arrays);

	mutex_unlock(&trace_types_lock);

	return 0;

 out_free_tr:
6738
	free_trace_buffers(tr);
6739
	free_cpumask_var(tr->tracing_cpumask);
6740 6741 6742 6743 6744 6745 6746 6747 6748 6749
	kfree(tr->name);
	kfree(tr);

 out_unlock:
	mutex_unlock(&trace_types_lock);

	return ret;

}

6750
static int instance_rmdir(const char *name)
6751 6752 6753 6754
{
	struct trace_array *tr;
	int found = 0;
	int ret;
6755
	int i;
6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768

	mutex_lock(&trace_types_lock);

	ret = -ENODEV;
	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
		if (tr->name && strcmp(tr->name, name) == 0) {
			found = 1;
			break;
		}
	}
	if (!found)
		goto out_unlock;

6769
	ret = -EBUSY;
6770
	if (tr->ref || (tr->current_trace && tr->current_trace->ref))
6771 6772
		goto out_unlock;

6773 6774
	list_del(&tr->list);

6775 6776 6777 6778 6779 6780
	/* Disable all the flags that were enabled coming in */
	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
		if ((1 << i) & ZEROED_TRACE_FLAGS)
			set_tracer_flag(tr, 1 << i, 0);
	}

6781
	tracing_set_nop(tr);
6782
	event_trace_del_tracer(tr);
6783
	ftrace_destroy_function_files(tr);
6784
	tracefs_remove_recursive(tr->dir);
6785
	free_trace_buffers(tr);
6786

6787 6788 6789 6790 6791
	for (i = 0; i < tr->nr_topts; i++) {
		kfree(tr->topts[i].topts);
	}
	kfree(tr->topts);

6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802
	kfree(tr->name);
	kfree(tr);

	ret = 0;

 out_unlock:
	mutex_unlock(&trace_types_lock);

	return ret;
}

6803 6804
static __init void create_trace_instances(struct dentry *d_tracer)
{
6805 6806 6807
	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
							 instance_mkdir,
							 instance_rmdir);
6808 6809 6810 6811
	if (WARN_ON(!trace_instance_dir))
		return;
}

6812
static void
6813
init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
6814
{
6815
	int cpu;
6816

6817 6818 6819 6820 6821 6822
	trace_create_file("available_tracers", 0444, d_tracer,
			tr, &show_traces_fops);

	trace_create_file("current_tracer", 0644, d_tracer,
			tr, &set_tracer_fops);

6823 6824 6825
	trace_create_file("tracing_cpumask", 0644, d_tracer,
			  tr, &tracing_cpumask_fops);

6826 6827 6828 6829
	trace_create_file("trace_options", 0644, d_tracer,
			  tr, &tracing_iter_fops);

	trace_create_file("trace", 0644, d_tracer,
6830
			  tr, &tracing_fops);
6831 6832

	trace_create_file("trace_pipe", 0444, d_tracer,
6833
			  tr, &tracing_pipe_fops);
6834 6835

	trace_create_file("buffer_size_kb", 0644, d_tracer,
6836
			  tr, &tracing_entries_fops);
6837 6838 6839 6840

	trace_create_file("buffer_total_size_kb", 0444, d_tracer,
			  tr, &tracing_total_entries_fops);

6841
	trace_create_file("free_buffer", 0200, d_tracer,
6842 6843 6844 6845 6846 6847 6848 6849 6850
			  tr, &tracing_free_buffer_fops);

	trace_create_file("trace_marker", 0220, d_tracer,
			  tr, &tracing_mark_fops);

	trace_create_file("trace_clock", 0644, d_tracer, tr,
			  &trace_clock_fops);

	trace_create_file("tracing_on", 0644, d_tracer,
6851
			  tr, &rb_simple_fops);
6852

6853 6854
	create_trace_options_dir(tr);

6855 6856 6857 6858 6859
#ifdef CONFIG_TRACER_MAX_TRACE
	trace_create_file("tracing_max_latency", 0644, d_tracer,
			&tr->max_latency, &tracing_max_lat_fops);
#endif

6860 6861 6862
	if (ftrace_create_function_files(tr, d_tracer))
		WARN(1, "Could not allocate function filter files");

6863 6864
#ifdef CONFIG_TRACER_SNAPSHOT
	trace_create_file("snapshot", 0644, d_tracer,
6865
			  tr, &snapshot_fops);
6866
#endif
6867 6868

	for_each_tracing_cpu(cpu)
6869
		tracing_init_tracefs_percpu(tr, cpu);
6870

6871 6872
}

6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894
static struct vfsmount *trace_automount(void *ingore)
{
	struct vfsmount *mnt;
	struct file_system_type *type;

	/*
	 * To maintain backward compatibility for tools that mount
	 * debugfs to get to the tracing facility, tracefs is automatically
	 * mounted to the debugfs/tracing directory.
	 */
	type = get_fs_type("tracefs");
	if (!type)
		return NULL;
	mnt = vfs_kern_mount(type, 0, "tracefs", NULL);
	put_filesystem(type);
	if (IS_ERR(mnt))
		return NULL;
	mntget(mnt);

	return mnt;
}

6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905
/**
 * tracing_init_dentry - initialize top level trace array
 *
 * This is called when creating files or directories in the tracing
 * directory. It is called via fs_initcall() by any of the boot up code
 * and expects to return the dentry of the top level tracing directory.
 */
struct dentry *tracing_init_dentry(void)
{
	struct trace_array *tr = &global_trace;

6906
	/* The top level trace array uses  NULL as parent */
6907
	if (tr->dir)
6908
		return NULL;
6909

6910 6911 6912
	if (WARN_ON(!tracefs_initialized()) ||
		(IS_ENABLED(CONFIG_DEBUG_FS) &&
		 WARN_ON(!debugfs_initialized())))
6913 6914
		return ERR_PTR(-ENODEV);

6915 6916 6917 6918 6919 6920 6921 6922
	/*
	 * As there may still be users that expect the tracing
	 * files to exist in debugfs/tracing, we must automount
	 * the tracefs file system there, so older tools still
	 * work with the newer kerenl.
	 */
	tr->dir = debugfs_create_automount("tracing", NULL,
					   trace_automount, NULL);
6923 6924 6925 6926 6927
	if (!tr->dir) {
		pr_warn_once("Could not create debugfs directory 'tracing'\n");
		return ERR_PTR(-ENOMEM);
	}

6928
	return NULL;
6929 6930
}

6931 6932 6933 6934 6935
extern struct trace_enum_map *__start_ftrace_enum_maps[];
extern struct trace_enum_map *__stop_ftrace_enum_maps[];

static void __init trace_enum_init(void)
{
6936 6937 6938
	int len;

	len = __stop_ftrace_enum_maps - __start_ftrace_enum_maps;
6939
	trace_insert_enum_map(NULL, __start_ftrace_enum_maps, len);
6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954
}

#ifdef CONFIG_MODULES
static void trace_module_add_enums(struct module *mod)
{
	if (!mod->num_trace_enums)
		return;

	/*
	 * Modules with bad taint do not have events created, do
	 * not bother with enums either.
	 */
	if (trace_module_has_bad_taint(mod))
		return;

6955
	trace_insert_enum_map(mod, mod->trace_enums, mod->num_trace_enums);
6956 6957
}

6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989
#ifdef CONFIG_TRACE_ENUM_MAP_FILE
static void trace_module_remove_enums(struct module *mod)
{
	union trace_enum_map_item *map;
	union trace_enum_map_item **last = &trace_enum_maps;

	if (!mod->num_trace_enums)
		return;

	mutex_lock(&trace_enum_mutex);

	map = trace_enum_maps;

	while (map) {
		if (map->head.mod == mod)
			break;
		map = trace_enum_jmp_to_tail(map);
		last = &map->tail.next;
		map = map->tail.next;
	}
	if (!map)
		goto out;

	*last = trace_enum_jmp_to_tail(map)->tail.next;
	kfree(map);
 out:
	mutex_unlock(&trace_enum_mutex);
}
#else
static inline void trace_module_remove_enums(struct module *mod) { }
#endif /* CONFIG_TRACE_ENUM_MAP_FILE */

6990 6991 6992 6993 6994 6995 6996 6997 6998
static int trace_module_notify(struct notifier_block *self,
			       unsigned long val, void *data)
{
	struct module *mod = data;

	switch (val) {
	case MODULE_STATE_COMING:
		trace_module_add_enums(mod);
		break;
6999 7000 7001
	case MODULE_STATE_GOING:
		trace_module_remove_enums(mod);
		break;
7002 7003 7004
	}

	return 0;
7005 7006
}

7007 7008 7009 7010
static struct notifier_block trace_module_nb = {
	.notifier_call = trace_module_notify,
	.priority = 0,
};
7011
#endif /* CONFIG_MODULES */
7012

7013
static __init int tracer_init_tracefs(void)
7014 7015 7016
{
	struct dentry *d_tracer;

7017 7018
	trace_access_lock_init();

7019
	d_tracer = tracing_init_dentry();
7020
	if (IS_ERR(d_tracer))
7021
		return 0;
7022

7023
	init_tracer_tracefs(&global_trace, d_tracer);
7024

7025
	trace_create_file("tracing_thresh", 0644, d_tracer,
7026
			&global_trace, &tracing_thresh_fops);
7027

7028
	trace_create_file("README", 0444, d_tracer,
7029 7030
			NULL, &tracing_readme_fops);

7031 7032
	trace_create_file("saved_cmdlines", 0444, d_tracer,
			NULL, &tracing_saved_cmdlines_fops);
7033

7034 7035 7036
	trace_create_file("saved_cmdlines_size", 0644, d_tracer,
			  NULL, &tracing_saved_cmdlines_size_fops);

7037 7038
	trace_enum_init();

7039 7040
	trace_create_enum_file(d_tracer);

7041 7042 7043 7044
#ifdef CONFIG_MODULES
	register_module_notifier(&trace_module_nb);
#endif

7045
#ifdef CONFIG_DYNAMIC_FTRACE
7046 7047
	trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
			&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7048
#endif
7049

7050
	create_trace_instances(d_tracer);
7051

7052
	update_tracer_options(&global_trace);
7053

7054
	return 0;
7055 7056
}

7057 7058 7059
static int trace_panic_handler(struct notifier_block *this,
			       unsigned long event, void *unused)
{
7060
	if (ftrace_dump_on_oops)
7061
		ftrace_dump(ftrace_dump_on_oops);
7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076
	return NOTIFY_OK;
}

static struct notifier_block trace_panic_notifier = {
	.notifier_call  = trace_panic_handler,
	.next           = NULL,
	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
};

static int trace_die_handler(struct notifier_block *self,
			     unsigned long val,
			     void *data)
{
	switch (val) {
	case DIE_OOPS:
7077
		if (ftrace_dump_on_oops)
7078
			ftrace_dump(ftrace_dump_on_oops);
7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101
		break;
	default:
		break;
	}
	return NOTIFY_OK;
}

static struct notifier_block trace_die_notifier = {
	.notifier_call = trace_die_handler,
	.priority = 200
};

/*
 * printk is set to max of 1024, we really don't need it that big.
 * Nothing should be printing 1000 characters anyway.
 */
#define TRACE_MAX_PRINT		1000

/*
 * Define here KERN_TRACE so that we have one place to modify
 * it if we decide to change what log level the ftrace dump
 * should be at.
 */
7102
#define KERN_TRACE		KERN_EMERG
7103

7104
void
7105 7106 7107
trace_printk_seq(struct trace_seq *s)
{
	/* Probably should print a warning here. */
7108 7109
	if (s->seq.len >= TRACE_MAX_PRINT)
		s->seq.len = TRACE_MAX_PRINT;
7110

7111 7112 7113 7114 7115 7116 7117
	/*
	 * More paranoid code. Although the buffer size is set to
	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
	 * an extra layer of protection.
	 */
	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
		s->seq.len = s->seq.size - 1;
7118 7119

	/* should be zero ended, but we are paranoid. */
7120
	s->buffer[s->seq.len] = 0;
7121 7122 7123

	printk(KERN_TRACE "%s", s->buffer);

7124
	trace_seq_init(s);
7125 7126
}

7127 7128 7129
void trace_init_global_iter(struct trace_iterator *iter)
{
	iter->tr = &global_trace;
7130
	iter->trace = iter->tr->current_trace;
7131
	iter->cpu_file = RING_BUFFER_ALL_CPUS;
7132
	iter->trace_buffer = &global_trace.trace_buffer;
7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143

	if (iter->trace && iter->trace->open)
		iter->trace->open(iter);

	/* Annotate start of buffers if we had overruns */
	if (ring_buffer_overruns(iter->trace_buffer->buffer))
		iter->iter_flags |= TRACE_FILE_ANNOTATE;

	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
	if (trace_clocks[iter->tr->clock_id].in_ns)
		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7144 7145
}

7146
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7147 7148 7149
{
	/* use static because iter can be a bit big for the stack */
	static struct trace_iterator iter;
7150
	static atomic_t dump_running;
7151
	struct trace_array *tr = &global_trace;
7152
	unsigned int old_userobj;
7153 7154
	unsigned long flags;
	int cnt = 0, cpu;
7155

7156 7157 7158 7159 7160
	/* Only allow one dump user at a time. */
	if (atomic_inc_return(&dump_running) != 1) {
		atomic_dec(&dump_running);
		return;
	}
7161

7162 7163 7164 7165 7166 7167 7168 7169
	/*
	 * Always turn off tracing when we dump.
	 * We don't need to show trace output of what happens
	 * between multiple crashes.
	 *
	 * If the user does a sysrq-z, then they can re-enable
	 * tracing with echo 1 > tracing_on.
	 */
7170
	tracing_off();
7171

7172
	local_irq_save(flags);
7173

7174
	/* Simulate the iterator */
7175 7176
	trace_init_global_iter(&iter);

7177
	for_each_tracing_cpu(cpu) {
7178
		atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7179 7180
	}

7181
	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7182

7183
	/* don't look at user memory in panic mode */
7184
	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7185

7186 7187
	switch (oops_dump_mode) {
	case DUMP_ALL:
7188
		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7189 7190 7191 7192 7193 7194 7195 7196
		break;
	case DUMP_ORIG:
		iter.cpu_file = raw_smp_processor_id();
		break;
	case DUMP_NONE:
		goto out_enable;
	default:
		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7197
		iter.cpu_file = RING_BUFFER_ALL_CPUS;
7198 7199 7200
	}

	printk(KERN_TRACE "Dumping ftrace buffer:\n");
7201

7202 7203 7204 7205 7206 7207
	/* Did function tracer already get disabled? */
	if (ftrace_is_dead()) {
		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
	}

7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228
	/*
	 * We need to stop all tracing on all CPUS to read the
	 * the next buffer. This is a bit expensive, but is
	 * not done often. We fill all what we can read,
	 * and then release the locks again.
	 */

	while (!trace_empty(&iter)) {

		if (!cnt)
			printk(KERN_TRACE "---------------------------------\n");

		cnt++;

		/* reset all but tr, trace, and overruns */
		memset(&iter.seq, 0,
		       sizeof(struct trace_iterator) -
		       offsetof(struct trace_iterator, seq));
		iter.iter_flags |= TRACE_FILE_LAT_FMT;
		iter.pos = -1;

7229
		if (trace_find_next_entry_inc(&iter) != NULL) {
7230 7231 7232 7233 7234
			int ret;

			ret = print_trace_line(&iter);
			if (ret != TRACE_TYPE_NO_CONSUME)
				trace_consume(&iter);
7235
		}
7236
		touch_nmi_watchdog();
7237 7238 7239 7240 7241 7242 7243 7244 7245

		trace_printk_seq(&iter.seq);
	}

	if (!cnt)
		printk(KERN_TRACE "   (ftrace buffer empty)\n");
	else
		printk(KERN_TRACE "---------------------------------\n");

7246
 out_enable:
7247
	tr->trace_flags |= old_userobj;
7248

7249 7250
	for_each_tracing_cpu(cpu) {
		atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7251
	}
7252
 	atomic_dec(&dump_running);
7253
	local_irq_restore(flags);
7254
}
7255
EXPORT_SYMBOL_GPL(ftrace_dump);
7256

7257
__init static int tracer_alloc_buffers(void)
7258
{
7259
	int ring_buf_size;
7260
	int ret = -ENOMEM;
7261

7262 7263 7264 7265
	/*
	 * Make sure we don't accidently add more trace options
	 * than we have bits for.
	 */
7266
	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
7267

7268 7269 7270
	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
		goto out;

7271
	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
7272
		goto out_free_buffer_mask;
7273

7274 7275
	/* Only allocate trace_printk buffers if a trace_printk exists */
	if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
7276
		/* Must be called before global_trace.buffer is allocated */
7277 7278
		trace_printk_init_buffers();

7279 7280 7281 7282 7283 7284
	/* To save memory, keep the ring buffer size to its minimum */
	if (ring_buffer_expanded)
		ring_buf_size = trace_buf_size;
	else
		ring_buf_size = 1;

7285
	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
7286
	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
7287

7288 7289
	raw_spin_lock_init(&global_trace.start_lock);

7290 7291 7292 7293 7294
	/* Used for event triggers */
	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
	if (!temp_buffer)
		goto out_free_cpumask;

7295 7296 7297
	if (trace_create_savedcmd() < 0)
		goto out_free_temp_buffer;

7298
	/* TODO: make the number of buffers hot pluggable with CPUS */
7299
	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
7300 7301
		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
		WARN_ON(1);
7302
		goto out_free_savedcmd;
7303
	}
7304

7305 7306
	if (global_trace.buffer_disabled)
		tracing_off();
7307

7308 7309 7310
	if (trace_boot_clock) {
		ret = tracing_set_clock(&global_trace, trace_boot_clock);
		if (ret < 0)
7311 7312
			pr_warn("Trace clock %s not defined, going back to default\n",
				trace_boot_clock);
7313 7314
	}

7315 7316 7317 7318 7319
	/*
	 * register_tracer() might reference current_trace, so it
	 * needs to be set before we register anything. This is
	 * just a bootstrap of current_trace anyway.
	 */
7320 7321
	global_trace.current_trace = &nop_trace;

7322 7323
	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;

7324 7325
	ftrace_init_global_array_ops(&global_trace);

7326 7327
	init_trace_flags_index(&global_trace);

7328 7329
	register_tracer(&nop_trace);

Steven Rostedt's avatar
Steven Rostedt committed
7330 7331
	/* All seems OK, enable tracing */
	tracing_disabled = 0;
7332

7333 7334 7335 7336
	atomic_notifier_chain_register(&panic_notifier_list,
				       &trace_panic_notifier);

	register_die_notifier(&trace_die_notifier);
7337

7338 7339 7340 7341 7342 7343
	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;

	INIT_LIST_HEAD(&global_trace.systems);
	INIT_LIST_HEAD(&global_trace.events);
	list_add(&global_trace.list, &ftrace_trace_arrays);

7344
	apply_trace_boot_options();
7345

7346 7347
	register_snapshot_cmd();

7348
	return 0;
7349

7350 7351
out_free_savedcmd:
	free_saved_cmdlines_buffer(savedcmd);
7352 7353
out_free_temp_buffer:
	ring_buffer_free(temp_buffer);
7354
out_free_cpumask:
7355
	free_cpumask_var(global_trace.tracing_cpumask);
7356 7357 7358 7359
out_free_buffer_mask:
	free_cpumask_var(tracing_buffer_mask);
out:
	return ret;
7360
}
7361

7362 7363
void __init trace_init(void)
{
7364 7365 7366 7367 7368 7369
	if (tracepoint_printk) {
		tracepoint_print_iter =
			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
		if (WARN_ON(!tracepoint_print_iter))
			tracepoint_printk = 0;
	}
7370
	tracer_alloc_buffers();
7371
	trace_event_init();
7372 7373
}

7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392
__init static int clear_boot_tracer(void)
{
	/*
	 * The default tracer at boot buffer is an init section.
	 * This function is called in lateinit. If we did not
	 * find the boot tracer, then clear it out, to prevent
	 * later registration from accessing the buffer that is
	 * about to be freed.
	 */
	if (!default_bootup_tracer)
		return 0;

	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
	       default_bootup_tracer);
	default_bootup_tracer = NULL;

	return 0;
}

7393
fs_initcall(tracer_init_tracefs);
7394
late_initcall(clear_boot_tracer);