[PATCH] schedule profileing

From: William Lee Irwin III <wli@holomorphy.com> The patch (from Ingo) below is quite interesting, it allows the use of readprofile not for statistical tine sampling, but for seeing where calls to schedule() come from, so it can give some insight to the "where do my context switches come from" question. Boot with `profile=schedul2' to activate this feature. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] schedule profileing
From: William Lee Irwin III <wli@holomorphy.com> The patch (from Ingo) below is quite interesting, it allows the use of readprofile not for statistical tine sampling, but for seeing where calls to schedule() come from, so it can give some insight to the "where do my context switches come from" question. Boot with `profile=schedul2' to activate this feature. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
f0fd8a8d · Arjan van de Ven · Linus Torvalds · e271fc85 · f0fd8a8d · f0fd8a8d
Commit f0fd8a8d authored Aug 26, 2004 by Arjan van de Ven Committed by Linus Torvalds Aug 26, 2004
4 changed files
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -914,7 +914,10 @@ running once the system is up.
 			Ranges are in pairs (memory base and size).

 	profile=	[KNL] Enable kernel profiling via /proc/profile
-			(param: profile step/bucket size as a power of 2)
+			{ schedule | <number> }
+			(param: schedule - profile schedule points}
+			(param: profile step/bucket size as a power of 2 for
+				statistical time based profiling)

 	prompt_ramdisk=	[RAM] List of RAM disks to prompt for floppy disk
 			before loading.

--- a/include/asm-i386/hw_irq.h
+++ b/include/asm-i386/hw_irq.h
@@ -68,27 +68,13 @@ extern atomic_t irq_mis_count;

 #define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))

-/*
- * The profiling function is SMP safe. (nothing can mess
- * around with "current", and the profiling counters are
- * updated with atomic operations). This is especially
- * useful with a profiling multiplier != 1
- */
-static inline void x86_do_profile(struct pt_regs * regs)
+static inline void __do_profile(unsigned long eip)
 {
-	unsigned long eip;
 	extern unsigned long prof_cpu_mask;
 
-	profile_hook(regs);
- 
-	if (user_mode(regs))
-		return;
- 
 	if (!prof_buffer)
 		return;

-	eip = regs->eip;
- 
 	/*
 	 * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
 	 * (default is all CPUs.)
@@ -108,6 +94,24 @@ static inline void x86_do_profile(struct pt_regs * regs)
 	atomic_inc((atomic_t *)&prof_buffer[eip]);
 }
 
+#define kern_profile(eip) __do_profile(eip)
+
+/*
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
+ */
+static inline void x86_do_profile(struct pt_regs * regs)
+{
+	profile_hook(regs);
+
+	if (prof_on != 1 || user_mode(regs))
+		return;
+
+	__do_profile(regs->eip);
+}
+
 #if defined(CONFIG_X86_IO_APIC)
 static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i)
 {

--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -18,10 +18,18 @@ int prof_on;
 int __init profile_setup(char * str)
 {
 	int par;
+
+	if (!strncmp(str, "schedule", 8)) {
+		prof_on = 2;
+		printk(KERN_INFO "kernel schedule profiling enabled\n");
+		if (str[7] == ',')
+			str += 8;
+	}
 	if (get_option(&str,&par)) {
 		prof_shift = par;
 		prof_on = 1;
-		printk(KERN_INFO "kernel profiling enabled\n");
+		printk(KERN_INFO "kernel profiling enabled (shift: %ld)\n",
+			prof_shift);
 	}
 	return 1;
 }

--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3220,6 +3220,10 @@ static int setscheduler(pid_t pid, int policy, struct sched_param __user *param)
 				policy != SCHED_NORMAL)
 			goto out_unlock;
 	}
+#ifdef kern_profile
+	if (unlikely(prof_on == 2))
+		__do_profile((unsigned long)__builtin_return_address(0));
+#endif

 	/*
 	 * Valid priorities for SCHED_FIFO and SCHED_RR are