diff -aurpN -X /home/fletch/.diff.exclude 272-config_irqbal/arch/i386/kernel/apic.c 274-percpu_real_loadavg/arch/i386/kernel/apic.c
--- 272-config_irqbal/arch/i386/kernel/apic.c	Wed Feb  4 23:03:15 2004
+++ 274-percpu_real_loadavg/arch/i386/kernel/apic.c	Wed Feb 11 10:14:18 2004
@@ -1030,7 +1030,7 @@ int setup_profiling_timer(unsigned int m
  * multiplier is 1 and it can be changed by writing the new multiplier
  * value into /proc/profile.
  */
-
+extern void calc_load_cpu(int cpu);
 inline void smp_local_timer_interrupt(struct pt_regs * regs)
 {
 	int cpu = smp_processor_id();
@@ -1058,6 +1058,7 @@ inline void smp_local_timer_interrupt(st
 
 #ifdef CONFIG_SMP
 		update_process_times(user_mode(regs));
+		calc_load_cpu(cpu);
 #endif
 	}
 
diff -aurpN -X /home/fletch/.diff.exclude 272-config_irqbal/fs/proc/proc_misc.c 274-percpu_real_loadavg/fs/proc/proc_misc.c
--- 272-config_irqbal/fs/proc/proc_misc.c	Wed Feb 11 10:13:53 2004
+++ 274-percpu_real_loadavg/fs/proc/proc_misc.c	Wed Feb 11 10:14:18 2004
@@ -134,6 +134,41 @@ static struct vmalloc_info get_vmalloc_i
 	return vmi;
 }
 
+static int real_loadavg_read_proc(char *page, char **start, off_t off,
+				 int count, int *eof, void *data)
+{
+	int a, b, c, cpu;
+	int len;
+
+	a = tasks_running[0] + (FIXED_1/200);
+	b = tasks_running[1] + (FIXED_1/200);
+	c = tasks_running[2] + (FIXED_1/200);
+	len = sprintf(page,"Domain    load1    load2    load3  nr_run/nr_thrd\n");
+	len += sprintf(page+len,"SYSTEM %5d.%02d %5d.%02d %5d.%02d %7ld/%7d\n",
+		LOAD_INT(a), LOAD_FRAC(a),
+		LOAD_INT(b), LOAD_FRAC(b),
+		LOAD_INT(c), LOAD_FRAC(c),
+		nr_running(), nr_threads);
+	for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+		unsigned long nr_running;
+		if (!cpu_online(cpu))
+			continue;
+		preempt_disable();
+		a = per_cpu(cpu_tasks_running,cpu)[0] + (FIXED_1/200);
+		b = per_cpu(cpu_tasks_running,cpu)[1] + (FIXED_1/200);
+		c = per_cpu(cpu_tasks_running,cpu)[2] + (FIXED_1/200);
+		nr_running = nr_running_cpu(cpu);
+		preempt_enable();
+		len += sprintf(page+len, "%5d  %5d.%02d %5d.%02d %5d.%02d %7ld/%7d\n",
+			cpu,
+			LOAD_INT(a), LOAD_FRAC(a), 
+			LOAD_INT(b), LOAD_FRAC(b),
+			LOAD_INT(c), LOAD_FRAC(c),
+			nr_running, nr_threads);
+	}
+	return proc_calc_metrics(page, start, off, count, eof, len);
+}
+
 static int uptime_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {
@@ -755,6 +790,7 @@ void __init proc_misc_init(void)
 		int (*read_proc)(char*,char**,off_t,int,int*,void*);
 	} *p, simple_ones[] = {
 		{"loadavg",     loadavg_read_proc},
+		{"real_loadavg",real_loadavg_read_proc},
 		{"uptime",	uptime_read_proc},
 		{"meminfo",	meminfo_read_proc},
 		{"version",	version_read_proc},
diff -aurpN -X /home/fletch/.diff.exclude 272-config_irqbal/include/linux/sched.h 274-percpu_real_loadavg/include/linux/sched.h
--- 272-config_irqbal/include/linux/sched.h	Wed Feb 11 09:02:46 2004
+++ 274-percpu_real_loadavg/include/linux/sched.h	Wed Feb 11 10:14:18 2004
@@ -71,7 +71,11 @@ struct exec_domain;
  *    the EXP_n values would be 1981, 2034 and 2043 if still using only
  *    11 bit fractions.
  */
-extern unsigned long avenrun[];		/* Load averages */
+extern unsigned long avenrun[];				/* Load averages */
+extern unsigned long tasks_running[3]; 			/* Real load averages */
+DECLARE_PER_CPU(unsigned long[3],cpu_tasks_running);	/* Real load averages per cpu */
+
+extern unsigned long tasks_running[];	/* Real load averages */
 
 #define FSHIFT		11		/* nr of bits of precision */
 #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
@@ -93,6 +97,7 @@ extern int last_pid;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
+extern unsigned long nr_running_cpu(int i);
 extern unsigned long nr_uninterruptible(void);
 extern unsigned long nr_iowait(void);
 
diff -aurpN -X /home/fletch/.diff.exclude 272-config_irqbal/kernel/sched.c 274-percpu_real_loadavg/kernel/sched.c
--- 272-config_irqbal/kernel/sched.c	Wed Feb 11 09:02:50 2004
+++ 274-percpu_real_loadavg/kernel/sched.c	Wed Feb 11 10:14:18 2004
@@ -1039,6 +1039,11 @@ unsigned long nr_running(void)
 	return sum;
 }
 
+unsigned long nr_running_cpu(int cpu)
+{
+	return cpu_rq(cpu)->nr_running;
+}
+
 unsigned long nr_uninterruptible(void)
 {
 	unsigned long i, sum = 0;
diff -aurpN -X /home/fletch/.diff.exclude 272-config_irqbal/kernel/timer.c 274-percpu_real_loadavg/kernel/timer.c
--- 272-config_irqbal/kernel/timer.c	Wed Feb  4 23:03:42 2004
+++ 274-percpu_real_loadavg/kernel/timer.c	Wed Feb 11 10:14:18 2004
@@ -765,6 +765,8 @@ static unsigned long count_active_tasks(
  * Requires xtime_lock to access.
  */
 unsigned long avenrun[3];
+unsigned long tasks_running[3];
+DEFINE_PER_CPU(unsigned long[3],cpu_tasks_running);
 
 /*
  * calc_load - given tick count, update the avenrun load estimates.
@@ -772,7 +774,7 @@ unsigned long avenrun[3];
  */
 static inline void calc_load(unsigned long ticks)
 {
-	unsigned long active_tasks; /* fixed-point */
+	unsigned long active_tasks, running_tasks; /* fixed-point */
 	static int count = LOAD_FREQ;
 
 	count -= ticks;
@@ -782,7 +784,37 @@ static inline void calc_load(unsigned lo
 		CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 		CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 		CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+		running_tasks = nr_running() * FIXED_1;
+		CALC_LOAD(tasks_running[0], EXP_1,  running_tasks);
+		CALC_LOAD(tasks_running[1], EXP_5,  running_tasks);
+		CALC_LOAD(tasks_running[2], EXP_15, running_tasks);
 	}
+}
+
+/*
+ * This does the frequency calculation a little bit different from the
+ * global version above.  It doesn't ever look at the kernel's concept
+ * of time, it just updates that stats every LOAD_FREQ times into the
+ * function.
+ *
+ * Using jiffies is more accurate, but there _are_ just statistics, so
+ * they're not worth messing with xtime_lock and company.  If we miss
+ * an interrupt or two, big deal.
+ */
+void calc_load_cpu(int cpu)
+{
+	unsigned long running_tasks;
+	static DEFINE_PER_CPU(int, count) = { LOAD_FREQ };
+	
+	per_cpu(count, cpu)--;
+	if (per_cpu(count, cpu) != 0)
+		return;
+
+	per_cpu(count, cpu) += LOAD_FREQ;
+	running_tasks = nr_running_cpu(cpu) * FIXED_1;
+	CALC_LOAD(per_cpu(cpu_tasks_running, cpu)[0], EXP_1,  running_tasks);
+	CALC_LOAD(per_cpu(cpu_tasks_running, cpu)[1], EXP_5,  running_tasks);
+	CALC_LOAD(per_cpu(cpu_tasks_running, cpu)[2], EXP_15, running_tasks);
 }
 
 /* jiffies at the most recent update of wall time */