[SPARC]: Try to start getting SMP back into shape.

Todo items:
 - IRQ_INPROGRESS flag - use sparc64 irq buckets, or generic irq_desc?
 - sun4d
 - re-indent large chunks of sun4m_smp.c
 - some places assume sequential cpu numbering (i.e. 0,1 instead of 0,2)

Last I checked (with 2.6.14), random programs segfault with dual
HyperSPARC.  And with SuperSPARC II's, it seems stable but will
eventually die from a write lock error (wrong lock owner or something).

I haven't tried the HyperSPARC + highmem combination recently, so that
may still be a problem.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index 1dde312..70b375a 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -40,15 +40,11 @@
 extern void calibrate_delay(void);
 
 extern volatile int smp_processors_ready;
-extern int smp_num_cpus;
 extern volatile unsigned long cpu_callin_map[NR_CPUS];
 extern unsigned char boot_cpu_id;
-extern int smp_activated;
-extern volatile int __cpu_number_map[NR_CPUS];
-extern volatile int __cpu_logical_map[NR_CPUS];
-extern volatile unsigned long ipi_count;
-extern volatile int smp_process_available;
-extern volatile int smp_commenced;
+
+extern cpumask_t smp_commenced_mask;
+
 extern int __smp4m_processor_id(void);
 
 /*#define SMP_DEBUG*/
@@ -77,8 +73,6 @@
 	local_flush_cache_all();
 	local_flush_tlb_all();
 
-	set_irq_udt(boot_cpu_id);
-
 	/* Get our local ticker going. */
 	smp_setup_percpu_timer();
 
@@ -95,8 +89,9 @@
 	 * to call the scheduler code.
 	 */
 	/* Allow master to continue. */
-	swap((unsigned long *)&cpu_callin_map[cpuid], 1);
+	swap(&cpu_callin_map[cpuid], 1);
 
+	/* XXX: What's up with all the flushes? */
 	local_flush_cache_all();
 	local_flush_tlb_all();
 	
@@ -111,13 +106,14 @@
 	atomic_inc(&init_mm.mm_count);
 	current->active_mm = &init_mm;
 
-	while(!smp_commenced)
-		barrier();
-
-	local_flush_cache_all();
-	local_flush_tlb_all();
+	while (!cpu_isset(cpuid, smp_commenced_mask))
+		mb();
 
 	local_irq_enable();
+
+	cpu_set(cpuid, cpu_online_map);
+	/* last one in gets all the interrupts (for testing) */
+	set_irq_udt(boot_cpu_id);
 }
 
 extern void init_IRQ(void);
@@ -134,102 +130,76 @@
 
 void __init smp4m_boot_cpus(void)
 {
-	int cpucount = 0;
-	int i, mid;
-
-	printk("Entering SMP Mode...\n");
-
-	local_irq_enable();
-	cpus_clear(cpu_present_map);
-
-	for (i = 0; !cpu_find_by_instance(i, NULL, &mid); i++)
-		cpu_set(mid, cpu_present_map);
-
-	for(i=0; i < NR_CPUS; i++) {
-		__cpu_number_map[i] = -1;
-		__cpu_logical_map[i] = -1;
-	}
-
-	__cpu_number_map[boot_cpu_id] = 0;
-	__cpu_logical_map[0] = boot_cpu_id;
-	current_thread_info()->cpu = boot_cpu_id;
-
-	smp_store_cpu_info(boot_cpu_id);
-	set_irq_udt(boot_cpu_id);
 	smp_setup_percpu_timer();
 	local_flush_cache_all();
-	if(cpu_find_by_instance(1, NULL, NULL))
-		return;  /* Not an MP box. */
-	for(i = 0; i < NR_CPUS; i++) {
-		if(i == boot_cpu_id)
-			continue;
+}
 
-		if (cpu_isset(i, cpu_present_map)) {
-			extern unsigned long sun4m_cpu_startup;
-			unsigned long *entry = &sun4m_cpu_startup;
-			struct task_struct *p;
-			int timeout;
+int smp4m_boot_one_cpu(int i)
+{
+	extern unsigned long sun4m_cpu_startup;
+	unsigned long *entry = &sun4m_cpu_startup;
+	struct task_struct *p;
+	int timeout;
+	int cpu_node;
 
-			/* Cook up an idler for this guy. */
-			p = fork_idle(i);
-			cpucount++;
-			current_set[i] = task_thread_info(p);
-			/* See trampoline.S for details... */
-			entry += ((i-1) * 3);
+	cpu_find_by_mid(i, &cpu_node);
 
-			/*
-			 * Initialize the contexts table
-			 * Since the call to prom_startcpu() trashes the structure,
-			 * we need to re-initialize it for each cpu
-			 */
-			smp_penguin_ctable.which_io = 0;
-			smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys;
-			smp_penguin_ctable.reg_size = 0;
+	/* Cook up an idler for this guy. */
+	p = fork_idle(i);
+	current_set[i] = task_thread_info(p);
+	/* See trampoline.S for details... */
+	entry += ((i-1) * 3);
 
-			/* whirrr, whirrr, whirrrrrrrrr... */
-			printk("Starting CPU %d at %p\n", i, entry);
-			local_flush_cache_all();
-			prom_startcpu(cpu_data(i).prom_node,
-				      &smp_penguin_ctable, 0, (char *)entry);
+	/*
+	 * Initialize the contexts table
+	 * Since the call to prom_startcpu() trashes the structure,
+	 * we need to re-initialize it for each cpu
+	 */
+	smp_penguin_ctable.which_io = 0;
+	smp_penguin_ctable.phys_addr = (unsigned int) srmmu_ctx_table_phys;
+	smp_penguin_ctable.reg_size = 0;
 
-			/* wheee... it's going... */
-			for(timeout = 0; timeout < 10000; timeout++) {
-				if(cpu_callin_map[i])
-					break;
-				udelay(200);
-			}
-			if(cpu_callin_map[i]) {
-				/* Another "Red Snapper". */
-				__cpu_number_map[i] = i;
-				__cpu_logical_map[i] = i;
-			} else {
-				cpucount--;
-				printk("Processor %d is stuck.\n", i);
-			}
-		}
-		if(!(cpu_callin_map[i])) {
-			cpu_clear(i, cpu_present_map);
-			__cpu_number_map[i] = -1;
-		}
-	}
+	/* whirrr, whirrr, whirrrrrrrrr... */
+	printk("Starting CPU %d at %p\n", i, entry);
 	local_flush_cache_all();
-	if(cpucount == 0) {
-		printk("Error: only one Processor found.\n");
-		cpu_present_map = cpumask_of_cpu(smp_processor_id());
-	} else {
-		unsigned long bogosum = 0;
-		for_each_present_cpu(i)
-			bogosum += cpu_data(i).udelay_val;
-		printk("Total of %d Processors activated (%lu.%02lu BogoMIPS).\n",
-		       cpucount + 1,
-		       bogosum/(500000/HZ),
-		       (bogosum/(5000/HZ))%100);
-		smp_activated = 1;
-		smp_num_cpus = cpucount + 1;
+	prom_startcpu(cpu_node,
+		      &smp_penguin_ctable, 0, (char *)entry);
+
+	/* wheee... it's going... */
+	for(timeout = 0; timeout < 10000; timeout++) {
+		if(cpu_callin_map[i])
+			break;
+		udelay(200);
 	}
 
+	if (!(cpu_callin_map[i])) {
+		printk("Processor %d is stuck.\n", i);
+		return -ENODEV;
+	}
+
+	local_flush_cache_all();
+	return 0;
+}
+
+void __init smp4m_smp_done(void)
+{
+	int i, first;
+	int *prev;
+
+	/* setup cpu list for irq rotation */
+	first = 0;
+	prev = &first;
+	for (i = 0; i < NR_CPUS; i++) {
+		if (cpu_online(i)) {
+			*prev = i;
+			prev = &cpu_data(i).next;
+		}
+	}
+	*prev = first;
+	local_flush_cache_all();
+
 	/* Free unneeded trap tables */
-	if (!cpu_isset(i, cpu_present_map)) {
+	if (!cpu_isset(1, cpu_present_map)) {
 		ClearPageReserved(virt_to_page(trapbase_cpu1));
 		init_page_count(virt_to_page(trapbase_cpu1));
 		free_page((unsigned long)trapbase_cpu1);
@@ -263,6 +233,9 @@
  */
 void smp4m_irq_rotate(int cpu)
 {
+	int next = cpu_data(cpu).next;
+	if (next != cpu)
+		set_irq_udt(next);
 }
 
 /* Cross calls, in order to work efficiently and atomically do all
@@ -289,7 +262,7 @@
 
 	smp_cpu_in_msg[me]++;
 	if(target == MSG_ALL_BUT_SELF || target == MSG_ALL) {
-		mask = cpu_present_map;
+		mask = cpu_online_map;
 		if(target == MSG_ALL_BUT_SELF)
 			cpu_clear(me, mask);
 		for(i = 0; i < 4; i++) {
@@ -314,8 +287,8 @@
 	unsigned long arg3;
 	unsigned long arg4;
 	unsigned long arg5;
-	unsigned long processors_in[NR_CPUS];  /* Set when ipi entered. */
-	unsigned long processors_out[NR_CPUS]; /* Set when ipi exited. */
+	unsigned long processors_in[SUN4M_NCPUS];  /* Set when ipi entered. */
+	unsigned long processors_out[SUN4M_NCPUS]; /* Set when ipi exited. */
 } ccall_info;
 
 static DEFINE_SPINLOCK(cross_call_lock);
@@ -324,8 +297,7 @@
 void smp4m_cross_call(smpfunc_t func, unsigned long arg1, unsigned long arg2,
 		    unsigned long arg3, unsigned long arg4, unsigned long arg5)
 {
-	if(smp_processors_ready) {
-		register int ncpus = smp_num_cpus;
+		register int ncpus = SUN4M_NCPUS;
 		unsigned long flags;
 
 		spin_lock_irqsave(&cross_call_lock, flags);
@@ -340,7 +312,7 @@
 
 		/* Init receive/complete mapping, plus fire the IPI's off. */
 		{
-			cpumask_t mask = cpu_present_map;
+			cpumask_t mask = cpu_online_map;
 			register int i;
 
 			cpu_clear(smp_processor_id(), mask);
@@ -373,7 +345,6 @@
 		}
 
 		spin_unlock_irqrestore(&cross_call_lock, flags);
-	}
 }
 
 /* Running cross calls. */