Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/ppc64/oprofile/Kconfig b/arch/ppc64/oprofile/Kconfig
new file mode 100644
index 0000000..5ade198
--- /dev/null
+++ b/arch/ppc64/oprofile/Kconfig
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+	depends on EXPERIMENTAL
+
+config PROFILING
+	bool "Profiling support (EXPERIMENTAL)"
+	help
+	  Say Y here to enable the extended profiling support mechanisms used
+	  by profilers such as OProfile.
+	  
+
+config OPROFILE
+	tristate "OProfile system profiling (EXPERIMENTAL)"
+	depends on PROFILING
+	help
+	  OProfile is a profiling system capable of profiling the
+	  whole system, include the kernel, kernel modules, libraries,
+	  and applications.
+
+	  If unsure, say N.
+
+endmenu
+
diff --git a/arch/ppc64/oprofile/Makefile b/arch/ppc64/oprofile/Makefile
new file mode 100644
index 0000000..162dbf0
--- /dev/null
+++ b/arch/ppc64/oprofile/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
+		oprof.o cpu_buffer.o buffer_sync.o \
+		event_buffer.o oprofile_files.o \
+		oprofilefs.o oprofile_stats.o \
+		timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) common.o op_model_rs64.o op_model_power4.o
diff --git a/arch/ppc64/oprofile/common.c b/arch/ppc64/oprofile/common.c
new file mode 100644
index 0000000..b28bfda
--- /dev/null
+++ b/arch/ppc64/oprofile/common.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * Based on alpha version.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/errno.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/pmc.h>
+
+#include "op_impl.h"
+
+extern struct op_ppc64_model op_model_rs64;
+extern struct op_ppc64_model op_model_power4;
+static struct op_ppc64_model *model;
+
+static struct op_counter_config ctr[OP_MAX_COUNTER];
+static struct op_system_config sys;
+
+static void op_handle_interrupt(struct pt_regs *regs)
+{
+	model->handle_interrupt(regs, ctr);
+}
+
+static int op_ppc64_setup(void)
+{
+	int err;
+
+	/* Grab the hardware */
+	err = reserve_pmc_hardware(op_handle_interrupt);
+	if (err)
+		return err;
+
+	/* Pre-compute the values to stuff in the hardware registers.  */
+	model->reg_setup(ctr, &sys, model->num_counters);
+
+	/* Configure the registers on all cpus.  */
+	on_each_cpu(model->cpu_setup, NULL, 0, 1);
+
+	return 0;
+}
+
+static void op_ppc64_shutdown(void)
+{
+	release_pmc_hardware();
+}
+
+static void op_ppc64_cpu_start(void *dummy)
+{
+	model->start(ctr);
+}
+
+static int op_ppc64_start(void)
+{
+	on_each_cpu(op_ppc64_cpu_start, NULL, 0, 1);
+	return 0;
+}
+
+static inline void op_ppc64_cpu_stop(void *dummy)
+{
+	model->stop();
+}
+
+static void op_ppc64_stop(void)
+{
+	on_each_cpu(op_ppc64_cpu_stop, NULL, 0, 1);
+}
+
+static int op_ppc64_create_files(struct super_block *sb, struct dentry *root)
+{
+	int i;
+
+	/*
+	 * There is one mmcr0, mmcr1 and mmcra for setting the events for
+	 * all of the counters.
+	 */
+	oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0);
+	oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1);
+	oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra);
+
+	for (i = 0; i < model->num_counters; ++i) {
+		struct dentry *dir;
+		char buf[3];
+
+		snprintf(buf, sizeof buf, "%d", i);
+		dir = oprofilefs_mkdir(sb, root, buf);
+
+		oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled);
+		oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event);
+		oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count);
+		/*
+		 * We dont support per counter user/kernel selection, but
+		 * we leave the entries because userspace expects them
+		 */
+		oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel);
+		oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user);
+		oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask);
+	}
+
+	oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel);
+	oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user);
+	oprofilefs_create_ulong(sb, root, "backtrace_spinlocks",
+				&sys.backtrace_spinlocks);
+
+	/* Default to tracing both kernel and user */
+	sys.enable_kernel = 1;
+	sys.enable_user = 1;
+
+	/* Turn on backtracing through spinlocks by default */
+	sys.backtrace_spinlocks = 1;
+
+	return 0;
+}
+
+int __init oprofile_arch_init(struct oprofile_operations *ops)
+{
+	unsigned int pvr;
+
+	pvr = mfspr(SPRN_PVR);
+
+	switch (PVR_VER(pvr)) {
+		case PV_630:
+		case PV_630p:
+			model = &op_model_rs64;
+			model->num_counters = 8;
+			ops->cpu_type = "ppc64/power3";
+			break;
+
+		case PV_NORTHSTAR:
+		case PV_PULSAR:
+		case PV_ICESTAR:
+		case PV_SSTAR:
+			model = &op_model_rs64;
+			model->num_counters = 8;
+			ops->cpu_type = "ppc64/rs64";
+			break;
+
+		case PV_POWER4:
+		case PV_POWER4p:
+			model = &op_model_power4;
+			model->num_counters = 8;
+			ops->cpu_type = "ppc64/power4";
+			break;
+
+		case PV_970:
+		case PV_970FX:
+			model = &op_model_power4;
+			model->num_counters = 8;
+			ops->cpu_type = "ppc64/970";
+			break;
+
+		case PV_POWER5:
+		case PV_POWER5p:
+			model = &op_model_power4;
+			model->num_counters = 6;
+			ops->cpu_type = "ppc64/power5";
+			break;
+
+		default:
+			return -ENODEV;
+	}
+
+	ops->create_files = op_ppc64_create_files;
+	ops->setup = op_ppc64_setup;
+	ops->shutdown = op_ppc64_shutdown;
+	ops->start = op_ppc64_start;
+	ops->stop = op_ppc64_stop;
+
+	printk(KERN_INFO "oprofile: using %s performance monitoring.\n",
+	       ops->cpu_type);
+
+	return 0;
+}
+
+void oprofile_arch_exit(void)
+{
+}
diff --git a/arch/ppc64/oprofile/op_impl.h b/arch/ppc64/oprofile/op_impl.h
new file mode 100644
index 0000000..7fa7eaa
--- /dev/null
+++ b/arch/ppc64/oprofile/op_impl.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * Based on alpha version.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef OP_IMPL_H
+#define OP_IMPL_H 1
+
+#define OP_MAX_COUNTER 8
+
+/* Per-counter configuration as set via oprofilefs.  */
+struct op_counter_config {
+	unsigned long valid;
+	unsigned long enabled;
+	unsigned long event;
+	unsigned long count;
+	unsigned long kernel;
+	/* We dont support per counter user/kernel selection */
+	unsigned long user;
+	unsigned long unit_mask;
+};
+
+/* System-wide configuration as set via oprofilefs.  */
+struct op_system_config {
+	unsigned long mmcr0;
+	unsigned long mmcr1;
+	unsigned long mmcra;
+	unsigned long enable_kernel;
+	unsigned long enable_user;
+	unsigned long backtrace_spinlocks;
+};
+
+/* Per-arch configuration */
+struct op_ppc64_model {
+	void (*reg_setup) (struct op_counter_config *,
+			   struct op_system_config *,
+			   int num_counters);
+	void (*cpu_setup) (void *);
+	void (*start) (struct op_counter_config *);
+	void (*stop) (void);
+	void (*handle_interrupt) (struct pt_regs *,
+				  struct op_counter_config *);
+	int num_counters;
+};
+
+static inline unsigned int ctr_read(unsigned int i)
+{
+	switch(i) {
+	case 0:
+		return mfspr(SPRN_PMC1);
+	case 1:
+		return mfspr(SPRN_PMC2);
+	case 2:
+		return mfspr(SPRN_PMC3);
+	case 3:
+		return mfspr(SPRN_PMC4);
+	case 4:
+		return mfspr(SPRN_PMC5);
+	case 5:
+		return mfspr(SPRN_PMC6);
+	case 6:
+		return mfspr(SPRN_PMC7);
+	case 7:
+		return mfspr(SPRN_PMC8);
+	default:
+		return 0;
+	}
+}
+
+static inline void ctr_write(unsigned int i, unsigned int val)
+{
+	switch(i) {
+	case 0:
+		mtspr(SPRN_PMC1, val);
+		break;
+	case 1:
+		mtspr(SPRN_PMC2, val);
+		break;
+	case 2:
+		mtspr(SPRN_PMC3, val);
+		break;
+	case 3:
+		mtspr(SPRN_PMC4, val);
+		break;
+	case 4:
+		mtspr(SPRN_PMC5, val);
+		break;
+	case 5:
+		mtspr(SPRN_PMC6, val);
+		break;
+	case 6:
+		mtspr(SPRN_PMC7, val);
+		break;
+	case 7:
+		mtspr(SPRN_PMC8, val);
+		break;
+	default:
+		break;
+	}
+}
+
+#endif
diff --git a/arch/ppc64/oprofile/op_model_power4.c b/arch/ppc64/oprofile/op_model_power4.c
new file mode 100644
index 0000000..3d103d6
--- /dev/null
+++ b/arch/ppc64/oprofile/op_model_power4.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+#include <asm/systemcfg.h>
+#include <asm/rtas.h>
+
+#define dbg(args...)
+
+#include "op_impl.h"
+
+static unsigned long reset_value[OP_MAX_COUNTER];
+
+static int num_counters;
+static int oprofile_running;
+static int mmcra_has_sihv;
+
+/* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
+static u32 mmcr0_val;
+static u64 mmcr1_val;
+static u32 mmcra_val;
+
+/*
+ * Since we do not have an NMI, backtracing through spinlocks is
+ * only a best guess. In light of this, allow it to be disabled at
+ * runtime.
+ */
+static int backtrace_spinlocks;
+
+static void power4_reg_setup(struct op_counter_config *ctr,
+			     struct op_system_config *sys,
+			     int num_ctrs)
+{
+	int i;
+
+	num_counters = num_ctrs;
+
+	/*
+	 * SIHV / SIPR bits are only implemented on POWER4+ (GQ) and above.
+	 * However we disable it on all POWER4 until we verify it works
+	 * (I was seeing some strange behaviour last time I tried).
+	 *
+	 * It has been verified to work on POWER5 so we enable it there.
+	 */
+	if (cpu_has_feature(CPU_FTR_MMCRA_SIHV))
+		mmcra_has_sihv = 1;
+
+	/*
+	 * The performance counter event settings are given in the mmcr0,
+	 * mmcr1 and mmcra values passed from the user in the
+	 * op_system_config structure (sys variable).
+	 */
+	mmcr0_val = sys->mmcr0;
+	mmcr1_val = sys->mmcr1;
+	mmcra_val = sys->mmcra;
+
+	backtrace_spinlocks = sys->backtrace_spinlocks;
+
+	for (i = 0; i < num_counters; ++i)
+		reset_value[i] = 0x80000000UL - ctr[i].count;
+
+	/* setup user and kernel profiling */
+	if (sys->enable_kernel)
+		mmcr0_val &= ~MMCR0_KERNEL_DISABLE;
+	else
+		mmcr0_val |= MMCR0_KERNEL_DISABLE;
+
+	if (sys->enable_user)
+		mmcr0_val &= ~MMCR0_PROBLEM_DISABLE;
+	else
+		mmcr0_val |= MMCR0_PROBLEM_DISABLE;
+}
+
+extern void ppc64_enable_pmcs(void);
+
+static void power4_cpu_setup(void *unused)
+{
+	unsigned int mmcr0 = mmcr0_val;
+	unsigned long mmcra = mmcra_val;
+
+	ppc64_enable_pmcs();
+
+	/* set the freeze bit */
+	mmcr0 |= MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE;
+	mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	mtspr(SPRN_MMCR1, mmcr1_val);
+
+	mmcra |= MMCRA_SAMPLE_ENABLE;
+	mtspr(SPRN_MMCRA, mmcra);
+
+	dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
+	    mfspr(SPRN_MMCR0));
+	dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
+	    mfspr(SPRN_MMCR1));
+	dbg("setup on cpu %d, mmcra %lx\n", smp_processor_id(),
+	    mfspr(SPRN_MMCRA));
+}
+
+static void power4_start(struct op_counter_config *ctr)
+{
+	int i;
+	unsigned int mmcr0;
+
+	/* set the PMM bit (see comment below) */
+	mtmsrd(mfmsr() | MSR_PMM);
+
+	for (i = 0; i < num_counters; ++i) {
+		if (ctr[i].enabled) {
+			ctr_write(i, reset_value[i]);
+		} else {
+			ctr_write(i, 0);
+		}
+	}
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+
+	/*
+	 * We must clear the PMAO bit on some (GQ) chips. Just do it
+	 * all the time
+	 */
+	mmcr0 &= ~MMCR0_PMAO;
+
+	/*
+	 * now clear the freeze bit, counting will not start until we
+	 * rfid from this excetion, because only at that point will
+	 * the PMM bit be cleared
+	 */
+	mmcr0 &= ~MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	oprofile_running = 1;
+
+	dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
+}
+
+static void power4_stop(void)
+{
+	unsigned int mmcr0;
+
+	/* freeze counters */
+	mmcr0 = mfspr(SPRN_MMCR0);
+	mmcr0 |= MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	oprofile_running = 0;
+
+	dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
+
+	mb();
+}
+
+/* Fake functions used by canonicalize_pc */
+static void __attribute_used__ hypervisor_bucket(void)
+{
+}
+
+static void __attribute_used__ rtas_bucket(void)
+{
+}
+
+static void __attribute_used__ kernel_unknown_bucket(void)
+{
+}
+
+static unsigned long check_spinlock_pc(struct pt_regs *regs,
+				       unsigned long profile_pc)
+{
+	unsigned long pc = instruction_pointer(regs);
+
+	/*
+	 * If both the SIAR (sampled instruction) and the perfmon exception
+	 * occurred in a spinlock region then we account the sample to the
+	 * calling function. This isnt 100% correct, we really need soft
+	 * IRQ disable so we always get the perfmon exception at the
+	 * point at which the SIAR is set.
+	 */
+	if (backtrace_spinlocks && in_lock_functions(pc) &&
+			in_lock_functions(profile_pc))
+		return regs->link;
+	else
+		return profile_pc;
+}
+
+/*
+ * On GQ and newer the MMCRA stores the HV and PR bits at the time
+ * the SIAR was sampled. We use that to work out if the SIAR was sampled in
+ * the hypervisor, our exception vectors or RTAS.
+ */
+static unsigned long get_pc(struct pt_regs *regs)
+{
+	unsigned long pc = mfspr(SPRN_SIAR);
+	unsigned long mmcra;
+
+	/* Cant do much about it */
+	if (!mmcra_has_sihv)
+		return check_spinlock_pc(regs, pc);
+
+	mmcra = mfspr(SPRN_MMCRA);
+
+	/* Were we in the hypervisor? */
+	if ((systemcfg->platform == PLATFORM_PSERIES_LPAR) &&
+	    (mmcra & MMCRA_SIHV))
+		/* function descriptor madness */
+		return *((unsigned long *)hypervisor_bucket);
+
+	/* We were in userspace, nothing to do */
+	if (mmcra & MMCRA_SIPR)
+		return pc;
+
+#ifdef CONFIG_PPC_RTAS
+	/* Were we in RTAS? */
+	if (pc >= rtas.base && pc < (rtas.base + rtas.size))
+		/* function descriptor madness */
+		return *((unsigned long *)rtas_bucket);
+#endif
+
+	/* Were we in our exception vectors or SLB real mode miss handler? */
+	if (pc < 0x1000000UL)
+		return (unsigned long)__va(pc);
+
+	/* Not sure where we were */
+	if (pc < KERNELBASE)
+		/* function descriptor madness */
+		return *((unsigned long *)kernel_unknown_bucket);
+
+	return check_spinlock_pc(regs, pc);
+}
+
+static int get_kernel(unsigned long pc)
+{
+	int is_kernel;
+
+	if (!mmcra_has_sihv) {
+		is_kernel = (pc >= KERNELBASE);
+	} else {
+		unsigned long mmcra = mfspr(SPRN_MMCRA);
+		is_kernel = ((mmcra & MMCRA_SIPR) == 0);
+	}
+
+	return is_kernel;
+}
+
+static void power4_handle_interrupt(struct pt_regs *regs,
+				    struct op_counter_config *ctr)
+{
+	unsigned long pc;
+	int is_kernel;
+	int val;
+	int i;
+	unsigned int mmcr0;
+
+	pc = get_pc(regs);
+	is_kernel = get_kernel(pc);
+
+	/* set the PMM bit (see comment below) */
+	mtmsrd(mfmsr() | MSR_PMM);
+
+	for (i = 0; i < num_counters; ++i) {
+		val = ctr_read(i);
+		if (val < 0) {
+			if (oprofile_running && ctr[i].enabled) {
+				oprofile_add_pc(pc, is_kernel, i);
+				ctr_write(i, reset_value[i]);
+			} else {
+				ctr_write(i, 0);
+			}
+		}
+	}
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+
+	/* reset the perfmon trigger */
+	mmcr0 |= MMCR0_PMXE;
+
+	/*
+	 * We must clear the PMAO bit on some (GQ) chips. Just do it
+	 * all the time
+	 */
+	mmcr0 &= ~MMCR0_PMAO;
+
+	/*
+	 * now clear the freeze bit, counting will not start until we
+	 * rfid from this exception, because only at that point will
+	 * the PMM bit be cleared
+	 */
+	mmcr0 &= ~MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+}
+
+struct op_ppc64_model op_model_power4 = {
+	.reg_setup		= power4_reg_setup,
+	.cpu_setup		= power4_cpu_setup,
+	.start			= power4_start,
+	.stop			= power4_stop,
+	.handle_interrupt	= power4_handle_interrupt,
+};
diff --git a/arch/ppc64/oprofile/op_model_rs64.c b/arch/ppc64/oprofile/op_model_rs64.c
new file mode 100644
index 0000000..bcec506
--- /dev/null
+++ b/arch/ppc64/oprofile/op_model_rs64.c
@@ -0,0 +1,219 @@
+/*
+ * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <asm/ptrace.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/cputable.h>
+
+#define dbg(args...)
+
+#include "op_impl.h"
+
+static void ctrl_write(unsigned int i, unsigned int val)
+{
+	unsigned int tmp = 0;
+	unsigned long shift = 0, mask = 0;
+
+	dbg("ctrl_write %d %x\n", i, val);
+
+	switch(i) {
+	case 0:
+		tmp = mfspr(SPRN_MMCR0);
+		shift = 6;
+		mask = 0x7F;
+		break;
+	case 1:
+		tmp = mfspr(SPRN_MMCR0);
+		shift = 0;
+		mask = 0x3F;
+		break;
+	case 2:
+		tmp = mfspr(SPRN_MMCR1);
+		shift = 31 - 4;
+		mask = 0x1F;
+		break;
+	case 3:
+		tmp = mfspr(SPRN_MMCR1);
+		shift = 31 - 9;
+		mask = 0x1F;
+		break;
+	case 4:
+		tmp = mfspr(SPRN_MMCR1);
+		shift = 31 - 14;
+		mask = 0x1F;
+		break;
+	case 5:
+		tmp = mfspr(SPRN_MMCR1);
+		shift = 31 - 19;
+		mask = 0x1F;
+		break;
+	case 6:
+		tmp = mfspr(SPRN_MMCR1);
+		shift = 31 - 24;
+		mask = 0x1F;
+		break;
+	case 7:
+		tmp = mfspr(SPRN_MMCR1);
+		shift = 31 - 28;
+		mask = 0xF;
+		break;
+	}
+
+	tmp = tmp & ~(mask << shift);
+	tmp |= val << shift;
+
+	switch(i) {
+		case 0:
+		case 1:
+			mtspr(SPRN_MMCR0, tmp);
+			break;
+		default:
+			mtspr(SPRN_MMCR1, tmp);
+	}
+
+	dbg("ctrl_write mmcr0 %lx mmcr1 %lx\n", mfspr(SPRN_MMCR0),
+	       mfspr(SPRN_MMCR1));
+}
+
+static unsigned long reset_value[OP_MAX_COUNTER];
+
+static int num_counters;
+
+static void rs64_reg_setup(struct op_counter_config *ctr,
+			   struct op_system_config *sys,
+			   int num_ctrs)
+{
+	int i;
+
+	num_counters = num_ctrs;
+
+	for (i = 0; i < num_counters; ++i)
+		reset_value[i] = 0x80000000UL - ctr[i].count;
+
+	/* XXX setup user and kernel profiling */
+}
+
+static void rs64_cpu_setup(void *unused)
+{
+	unsigned int mmcr0;
+
+	/* reset MMCR0 and set the freeze bit */
+	mmcr0 = MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	/* reset MMCR1, MMCRA */
+	mtspr(SPRN_MMCR1, 0);
+
+	if (cpu_has_feature(CPU_FTR_MMCRA))
+		mtspr(SPRN_MMCRA, 0);
+
+	mmcr0 |= MMCR0_FCM1|MMCR0_PMXE|MMCR0_FCECE;
+	/* Only applies to POWER3, but should be safe on RS64 */
+	mmcr0 |= MMCR0_PMC1CE|MMCR0_PMCjCE;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	dbg("setup on cpu %d, mmcr0 %lx\n", smp_processor_id(),
+	    mfspr(SPRN_MMCR0));
+	dbg("setup on cpu %d, mmcr1 %lx\n", smp_processor_id(),
+	    mfspr(SPRN_MMCR1));
+}
+
+static void rs64_start(struct op_counter_config *ctr)
+{
+	int i;
+	unsigned int mmcr0;
+
+	/* set the PMM bit (see comment below) */
+	mtmsrd(mfmsr() | MSR_PMM);
+
+	for (i = 0; i < num_counters; ++i) {
+		if (ctr[i].enabled) {
+			ctr_write(i, reset_value[i]);
+			ctrl_write(i, ctr[i].event);
+		} else {
+			ctr_write(i, 0);
+		}
+	}
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+
+	/*
+	 * now clear the freeze bit, counting will not start until we
+	 * rfid from this excetion, because only at that point will
+	 * the PMM bit be cleared
+	 */
+	mmcr0 &= ~MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	dbg("start on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
+}
+
+static void rs64_stop(void)
+{
+	unsigned int mmcr0;
+
+	/* freeze counters */
+	mmcr0 = mfspr(SPRN_MMCR0);
+	mmcr0 |= MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+
+	dbg("stop on cpu %d, mmcr0 %x\n", smp_processor_id(), mmcr0);
+
+	mb();
+}
+
+static void rs64_handle_interrupt(struct pt_regs *regs,
+				  struct op_counter_config *ctr)
+{
+	unsigned int mmcr0;
+	int val;
+	int i;
+	unsigned long pc = mfspr(SPRN_SIAR);
+	int is_kernel = (pc >= KERNELBASE);
+
+	/* set the PMM bit (see comment below) */
+	mtmsrd(mfmsr() | MSR_PMM);
+
+	for (i = 0; i < num_counters; ++i) {
+		val = ctr_read(i);
+		if (val < 0) {
+			if (ctr[i].enabled) {
+				oprofile_add_pc(pc, is_kernel, i);
+				ctr_write(i, reset_value[i]);
+			} else {
+				ctr_write(i, 0);
+			}
+		}
+	}
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+
+	/* reset the perfmon trigger */
+	mmcr0 |= MMCR0_PMXE;
+
+	/*
+	 * now clear the freeze bit, counting will not start until we
+	 * rfid from this exception, because only at that point will
+	 * the PMM bit be cleared
+	 */
+	mmcr0 &= ~MMCR0_FC;
+	mtspr(SPRN_MMCR0, mmcr0);
+}
+
+struct op_ppc64_model op_model_rs64 = {
+	.reg_setup		= rs64_reg_setup,
+	.cpu_setup		= rs64_cpu_setup,
+	.start			= rs64_start,
+	.stop			= rs64_stop,
+	.handle_interrupt	= rs64_handle_interrupt,
+};