cpuidle: Single/Global registration of idle states
This patch makes the cpuidle_states structure global (single copy)
instead of per-cpu. The statistics needed on per-cpu basis
by the governor are kept per-cpu. This simplifies the cpuidle
subsystem as state registration is done by single cpu only.
Having single copy of cpuidle_states saves memory. Rare case
of asymmetric C-states can be handled within the cpuidle driver
and architectures such as POWER do not have asymmetric C-states.
Having single/global registration of all the idle states,
dynamic C-state transitions on x86 are handled by
the boot cpu. Here, the boot cpu would disable all the devices,
re-populate the states and later enable all the devices,
irrespective of the cpu that would receive the notification first.
Reference:
https://lkml.org/lkml/2011/4/25/83
Signed-off-by: Deepthi Dharwar <deepthi@linux.vnet.ibm.com>
Signed-off-by: Trinabh Gupta <g.trinabh@gmail.com>
Tested-by: Jean Pihet <j-pihet@ti.com>
Reviewed-by: Kevin Hilman <khilman@ti.com>
Acked-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Kevin Hilman <khilman@ti.com>
Signed-off-by: Len Brown <len.brown@intel.com>
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 3aa8d4c..5be9d59 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -81,7 +81,8 @@
static unsigned int lapic_timer_reliable_states = (1 << 1); /* Default to only C1 */
static struct cpuidle_device __percpu *intel_idle_cpuidle_devices;
-static int intel_idle(struct cpuidle_device *dev, int index);
+static int intel_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index);
static struct cpuidle_state *cpuidle_state_table;
@@ -227,13 +228,15 @@
/**
* intel_idle
* @dev: cpuidle_device
+ * @drv: cpuidle driver
* @index: index of cpuidle state
*
*/
-static int intel_idle(struct cpuidle_device *dev, int index)
+static int intel_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
{
unsigned long ecx = 1; /* break on interrupt flag */
- struct cpuidle_state *state = &dev->states[index];
+ struct cpuidle_state *state = &drv->states[index];
struct cpuidle_state_usage *state_usage = &dev->states_usage[index];
unsigned long eax = (unsigned long)cpuidle_get_statedata(state_usage);
unsigned int cstate;
@@ -420,6 +423,60 @@
return;
}
/*
+ * intel_idle_cpuidle_driver_init()
+ * allocate, initialize cpuidle_states
+ */
+static int intel_idle_cpuidle_driver_init(void)
+{
+ int cstate;
+ struct cpuidle_driver *drv = &intel_idle_driver;
+
+ drv->state_count = 1;
+
+ for (cstate = 1; cstate < MWAIT_MAX_NUM_CSTATES; ++cstate) {
+ int num_substates;
+
+ if (cstate > max_cstate) {
+ printk(PREFIX "max_cstate %d reached\n",
+ max_cstate);
+ break;
+ }
+
+ /* does the state exist in CPUID.MWAIT? */
+ num_substates = (mwait_substates >> ((cstate) * 4))
+ & MWAIT_SUBSTATE_MASK;
+ if (num_substates == 0)
+ continue;
+ /* is the state not enabled? */
+ if (cpuidle_state_table[cstate].enter == NULL) {
+ /* does the driver not know about the state? */
+ if (*cpuidle_state_table[cstate].name == '\0')
+ pr_debug(PREFIX "unaware of model 0x%x"
+ " MWAIT %d please"
+ " contact lenb@kernel.org",
+ boot_cpu_data.x86_model, cstate);
+ continue;
+ }
+
+ if ((cstate > 2) &&
+ !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ mark_tsc_unstable("TSC halts in idle"
+ " states deeper than C2");
+
+ drv->states[drv->state_count] = /* structure copy */
+ cpuidle_state_table[cstate];
+
+ drv->state_count += 1;
+ }
+
+ if (auto_demotion_disable_flags)
+ smp_call_function(auto_demotion_disable, NULL, 1);
+
+ return 0;
+}
+
+
+/*
* intel_idle_cpuidle_devices_init()
* allocate, initialize, register cpuidle_devices
*/
@@ -453,23 +510,9 @@
continue;
/* is the state not enabled? */
if (cpuidle_state_table[cstate].enter == NULL) {
- /* does the driver not know about the state? */
- if (*cpuidle_state_table[cstate].name == '\0')
- pr_debug(PREFIX "unaware of model 0x%x"
- " MWAIT %d please"
- " contact lenb@kernel.org",
- boot_cpu_data.x86_model, cstate);
continue;
}
- if ((cstate > 2) &&
- !boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
- mark_tsc_unstable("TSC halts in idle"
- " states deeper than C2");
-
- dev->states[dev->state_count] = /* structure copy */
- cpuidle_state_table[cstate];
-
dev->states_usage[dev->state_count].driver_data =
(void *)get_driver_data(cstate);
@@ -484,8 +527,6 @@
return -EIO;
}
}
- if (auto_demotion_disable_flags)
- smp_call_function(auto_demotion_disable, NULL, 1);
return 0;
}
@@ -503,6 +544,7 @@
if (retval)
return retval;
+ intel_idle_cpuidle_driver_init();
retval = cpuidle_register_driver(&intel_idle_driver);
if (retval) {
printk(KERN_DEBUG PREFIX "intel_idle yielding to %s",