sched: zap the migration init / cache-hot balancing code
[cascardo/linux.git] / arch / sparc64 / kernel / smp.c
index 1fac215..40e40f9 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/pagemap.h>
 #include <linux/threads.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/delay.h>
 #include <asm/tlb.h>
 #include <asm/sections.h>
 #include <asm/prom.h>
+#include <asm/mdesc.h>
 
 extern void calibrate_delay(void);
 
+int sparc64_multi_core __read_mostly;
+
 /* Please don't make this stuff initdata!!!  --DaveM */
 unsigned char boot_cpu_id;
 
@@ -51,6 +53,8 @@ cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
 cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
 cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
        { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
+cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
+       { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
 static cpumask_t smp_commenced_mask;
 static cpumask_t cpu_callout_map;
 
@@ -76,53 +80,6 @@ void smp_bogo(struct seq_file *m)
                           i, cpu_data(i).clock_tick);
 }
 
-void __init smp_store_cpu_info(int id)
-{
-       struct device_node *dp;
-       int def;
-
-       cpu_data(id).udelay_val                 = loops_per_jiffy;
-
-       cpu_find_by_mid(id, &dp);
-       cpu_data(id).clock_tick =
-               of_getintprop_default(dp, "clock-frequency", 0);
-
-       def = ((tlb_type == hypervisor) ? (8 * 1024) : (16 * 1024));
-       cpu_data(id).dcache_size =
-               of_getintprop_default(dp, "dcache-size", def);
-
-       def = 32;
-       cpu_data(id).dcache_line_size =
-               of_getintprop_default(dp, "dcache-line-size", def);
-
-       def = 16 * 1024;
-       cpu_data(id).icache_size =
-               of_getintprop_default(dp, "icache-size", def);
-
-       def = 32;
-       cpu_data(id).icache_line_size =
-               of_getintprop_default(dp, "icache-line-size", def);
-
-       def = ((tlb_type == hypervisor) ?
-              (3 * 1024 * 1024) :
-              (4 * 1024 * 1024));
-       cpu_data(id).ecache_size =
-               of_getintprop_default(dp, "ecache-size", def);
-
-       def = 64;
-       cpu_data(id).ecache_line_size =
-               of_getintprop_default(dp, "ecache-line-size", def);
-
-       printk("CPU[%d]: Caches "
-              "D[sz(%d):line_sz(%d)] "
-              "I[sz(%d):line_sz(%d)] "
-              "E[sz(%d):line_sz(%d)]\n",
-              id,
-              cpu_data(id).dcache_size, cpu_data(id).dcache_line_size,
-              cpu_data(id).icache_size, cpu_data(id).icache_line_size,
-              cpu_data(id).ecache_size, cpu_data(id).ecache_line_size);
-}
-
 extern void setup_sparc64_timer(void);
 
 static volatile unsigned long callin_flag = 0;
@@ -146,7 +103,7 @@ void __init smp_callin(void)
        local_irq_enable();
 
        calibrate_delay();
-       smp_store_cpu_info(cpuid);
+       cpu_data(cpuid).udelay_val = loops_per_jiffy;
        callin_flag = 1;
        __asm__ __volatile__("membar #Sync\n\t"
                             "flush  %%g6" : : : "memory");
@@ -341,9 +298,8 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
 
                prom_startcpu_cpuid(cpu, entry, cookie);
        } else {
-               struct device_node *dp;
+               struct device_node *dp = of_find_node_by_cpuid(cpu);
 
-               cpu_find_by_mid(cpu, &dp);
                prom_startcpu(dp->node, entry, cookie);
        }
 
@@ -448,7 +404,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
 static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
 {
        u64 pstate, ver;
-       int nack_busy_id, is_jbus;
+       int nack_busy_id, is_jbus, need_more;
 
        if (cpus_empty(mask))
                return;
@@ -464,6 +420,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
        __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
 
 retry:
+       need_more = 0;
        __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
                             : : "r" (pstate), "i" (PSTATE_IE));
 
@@ -492,6 +449,10 @@ retry:
                                : /* no outputs */
                                : "r" (target), "i" (ASI_INTR_W));
                        nack_busy_id++;
+                       if (nack_busy_id == 32) {
+                               need_more = 1;
+                               break;
+                       }
                }
        }
 
@@ -508,6 +469,16 @@ retry:
                        if (dispatch_stat == 0UL) {
                                __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
                                                     : : "r" (pstate));
+                               if (unlikely(need_more)) {
+                                       int i, cnt = 0;
+                                       for_each_cpu_mask(i, mask) {
+                                               cpu_clear(i, mask);
+                                               cnt++;
+                                               if (cnt == 32)
+                                                       break;
+                                       }
+                                       goto retry;
+                               }
                                return;
                        }
                        if (!--stuck)
@@ -545,6 +516,8 @@ retry:
                                if ((dispatch_stat & check_mask) == 0)
                                        cpu_clear(i, mask);
                                this_busy_nack += 2;
+                               if (this_busy_nack == 64)
+                                       break;
                        }
 
                        goto retry;
@@ -562,6 +535,9 @@ static void hypervisor_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t
        unsigned long flags, status;
        int cnt, retries, this_cpu, prev_sent, i;
 
+       if (cpus_empty(mask))
+               return;
+
        /* We have to do this whole thing with interrupts fully disabled.
         * Otherwise if we send an xcall from interrupt context it will
         * corrupt both our mondo block and cpu list state.
@@ -1187,101 +1163,64 @@ int setup_profiling_timer(unsigned int multiplier)
        return -EINVAL;
 }
 
-static void __init smp_tune_scheduling(void)
-{
-       struct device_node *dp;
-       int instance;
-       unsigned int def, smallest = ~0U;
-
-       def = ((tlb_type == hypervisor) ?
-              (3 * 1024 * 1024) :
-              (4 * 1024 * 1024));
-
-       instance = 0;
-       while (!cpu_find_by_instance(instance, &dp, NULL)) {
-               unsigned int val;
-
-               val = of_getintprop_default(dp, "ecache-size", def);
-               if (val < smallest)
-                       smallest = val;
-
-               instance++;
-       }
-
-       /* Any value less than 256K is nonsense.  */
-       if (smallest < (256U * 1024U))
-               smallest = 256 * 1024;
-
-       max_cache_size = smallest;
-
-       if (smallest < 1U * 1024U * 1024U)
-               printk(KERN_INFO "Using max_cache_size of %uKB\n",
-                      smallest / 1024U);
-       else
-               printk(KERN_INFO "Using max_cache_size of %uMB\n",
-                      smallest / 1024U / 1024U);
-}
-
 /* Constrain the number of cpus to max_cpus.  */
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
        int i;
 
        if (num_possible_cpus() > max_cpus) {
-               int instance, mid;
-
-               instance = 0;
-               while (!cpu_find_by_instance(instance, NULL, &mid)) {
-                       if (mid != boot_cpu_id) {
-                               cpu_clear(mid, phys_cpu_present_map);
-                               cpu_clear(mid, cpu_present_map);
+               for_each_possible_cpu(i) {
+                       if (i != boot_cpu_id) {
+                               cpu_clear(i, phys_cpu_present_map);
+                               cpu_clear(i, cpu_present_map);
                                if (num_possible_cpus() <= max_cpus)
                                        break;
                        }
-                       instance++;
                }
        }
 
+       cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
+}
+
+void __devinit smp_prepare_boot_cpu(void)
+{
+}
+
+void __devinit smp_fill_in_sib_core_maps(void)
+{
+       unsigned int i;
+
        for_each_possible_cpu(i) {
-               if (tlb_type == hypervisor) {
-                       int j;
+               unsigned int j;
 
-                       /* XXX get this mapping from machine description */
-                       for_each_possible_cpu(j) {
-                               if ((j >> 2) == (i >> 2))
-                                       cpu_set(j, cpu_sibling_map[i]);
-                       }
-               } else {
-                       cpu_set(i, cpu_sibling_map[i]);
+               if (cpu_data(i).core_id == 0) {
+                       cpu_set(i, cpu_core_map[i]);
+                       continue;
+               }
+
+               for_each_possible_cpu(j) {
+                       if (cpu_data(i).core_id ==
+                           cpu_data(j).core_id)
+                               cpu_set(j, cpu_core_map[i]);
                }
        }
 
-       smp_store_cpu_info(boot_cpu_id);
-       smp_tune_scheduling();
-}
+       for_each_possible_cpu(i) {
+               unsigned int j;
 
-/* Set this up early so that things like the scheduler can init
- * properly.  We use the same cpu mask for both the present and
- * possible cpu map.
- */
-void __init smp_setup_cpu_possible_map(void)
-{
-       int instance, mid;
+               if (cpu_data(i).proc_id == -1) {
+                       cpu_set(i, cpu_sibling_map[i]);
+                       continue;
+               }
 
-       instance = 0;
-       while (!cpu_find_by_instance(instance, NULL, &mid)) {
-               if (mid < NR_CPUS) {
-                       cpu_set(mid, phys_cpu_present_map);
-                       cpu_set(mid, cpu_present_map);
+               for_each_possible_cpu(j) {
+                       if (cpu_data(i).proc_id ==
+                           cpu_data(j).proc_id)
+                               cpu_set(j, cpu_sibling_map[i]);
                }
-               instance++;
        }
 }
 
-void __devinit smp_prepare_boot_cpu(void)
-{
-}
-
 int __cpuinit __cpu_up(unsigned int cpu)
 {
        int ret = smp_boot_one_cpu(cpu);
@@ -1335,7 +1274,7 @@ unsigned long __per_cpu_shift __read_mostly;
 EXPORT_SYMBOL(__per_cpu_base);
 EXPORT_SYMBOL(__per_cpu_shift);
 
-void __init setup_per_cpu_areas(void)
+void __init real_setup_per_cpu_areas(void)
 {
        unsigned long goal, size, i;
        char *ptr;