ftrace: add tracing of context switches
This patch adds context switch tracing, of the format of:
_------=> CPU#
/ _-----=> irqs-off
| / _----=> need-resched
|| / _---=> hardirq/softirq
||| / _--=> preempt-depth
|||| /
||||| delay
cmd pid ||||| time | pid:prio:state
\ / ||||| \ | /
swapper-0 1d..3 137us+: 0:140:R --> 2912:120
sshd-2912 1d..3 216us+: 2912:120:S --> 0:140
swapper-0 1d..3 261us+: 0:140:R --> 2912:120
bash-2920 0d..3 267us+: 2920:120:S --> 0:140
sshd-2912 1d..3 330us!: 2912:120:S --> 0:140
swapper-0 1d..3 2389us+: 0:140:R --> 2847:120
yum-upda-2847 1d..3 2411us!: 2847:120:S --> 0:140
swapper-0 0d..3 11089us+: 0:140:R --> 3139:120
gdm-bina-3139 0d..3 11113us!: 3139:120:S --> 0:140
swapper-0 1d..3 102328us+: 0:140:R --> 2847:120
yum-upda-2847 1d..3 102348us!: 2847:120:S --> 0:140
"sched_switch" is added to /debugfs/tracing/available_tracers
[ Eugene Teo <eugeneteo@kernel.sg: remove unused tracing_sched_switch_enabled ]
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1399f37..5d6aa92 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -13,6 +13,7 @@
depends on DEBUG_KERNEL && HAVE_FTRACE
select FRAME_POINTER
select TRACING
+ select CONTEXT_SWITCH_TRACER
help
Enable the kernel to trace every kernel function. This is done
by using a compiler feature to insert a small, 5-byte No-Operation
@@ -21,3 +22,13 @@
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
small and not measurable even in micro-benchmarks.
+
+config CONTEXT_SWITCH_TRACER
+ bool "Trace process context switches"
+ depends on DEBUG_KERNEL
+ select TRACING
+ select MARKERS
+ help
+ This tracer gets called from the context switch and records
+ all switching of tasks.
+
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 6bb5e50..6b54ceb 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_FTRACE) += libftrace.o
obj-$(CONFIG_TRACING) += trace.o
+obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_FTRACE) += trace_functions.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
new file mode 100644
index 0000000..3e4771d
--- /dev/null
+++ b/kernel/trace/trace_sched_switch.c
@@ -0,0 +1,116 @@
+/*
+ * trace context switch
+ *
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/marker.h>
+#include <linux/ftrace.h>
+
+#include "trace.h"
+
+static struct trace_array *ctx_trace;
+static int __read_mostly tracer_enabled;
+
+static void notrace
+ctx_switch_func(struct task_struct *prev, struct task_struct *next)
+{
+ struct trace_array *tr = ctx_trace;
+ struct trace_array_cpu *data;
+ unsigned long flags;
+ long disabled;
+ int cpu;
+
+ if (!tracer_enabled)
+ return;
+
+ raw_local_irq_save(flags);
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+ disabled = atomic_inc_return(&data->disabled);
+
+ if (likely(disabled == 1))
+ tracing_sched_switch_trace(tr, data, prev, next, flags);
+
+ atomic_dec(&data->disabled);
+ raw_local_irq_restore(flags);
+}
+
+void ftrace_ctx_switch(struct task_struct *prev, struct task_struct *next)
+{
+ tracing_record_cmdline(prev);
+
+ /*
+ * If tracer_switch_func only points to the local
+ * switch func, it still needs the ptr passed to it.
+ */
+ ctx_switch_func(prev, next);
+
+ /*
+ * Chain to the wakeup tracer (this is a NOP if disabled):
+ */
+ wakeup_sched_switch(prev, next);
+}
+
+static notrace void sched_switch_reset(struct trace_array *tr)
+{
+ int cpu;
+
+ tr->time_start = now(tr->cpu);
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr->data[cpu]);
+}
+
+static notrace void start_sched_trace(struct trace_array *tr)
+{
+ sched_switch_reset(tr);
+ tracer_enabled = 1;
+}
+
+static notrace void stop_sched_trace(struct trace_array *tr)
+{
+ tracer_enabled = 0;
+}
+
+static notrace void sched_switch_trace_init(struct trace_array *tr)
+{
+ ctx_trace = tr;
+
+ if (tr->ctrl)
+ start_sched_trace(tr);
+}
+
+static notrace void sched_switch_trace_reset(struct trace_array *tr)
+{
+ if (tr->ctrl)
+ stop_sched_trace(tr);
+}
+
+static void sched_switch_trace_ctrl_update(struct trace_array *tr)
+{
+ /* When starting a new trace, reset the buffers */
+ if (tr->ctrl)
+ start_sched_trace(tr);
+ else
+ stop_sched_trace(tr);
+}
+
+static struct tracer sched_switch_trace __read_mostly =
+{
+ .name = "sched_switch",
+ .init = sched_switch_trace_init,
+ .reset = sched_switch_trace_reset,
+ .ctrl_update = sched_switch_trace_ctrl_update,
+};
+
+__init static int init_sched_switch_trace(void)
+{
+ return register_tracer(&sched_switch_trace);
+}
+device_initcall(init_sched_switch_trace);