#include <asm/cpu_device_id.h>
#include <asm/cpufeature.h>
+ #if IS_ENABLED(CONFIG_ACPI)
+ #include <acpi/processor.h>
+ #endif
+
#define BYT_RATIOS 0x66a
#define BYT_VIDS 0x66b
#define BYT_TURBO_RATIOS 0x66c
#define int_tofp(X) ((int64_t)(X) << FRAC_BITS)
#define fp_toint(X) ((X) >> FRAC_BITS)
-
static inline int32_t mul_fp(int32_t x, int32_t y)
{
return ((int64_t)x * (int64_t)y) >> FRAC_BITS;
int current_pstate;
int min_pstate;
int max_pstate;
+ int max_pstate_physical;
int scaling;
int turbo_pstate;
};
u64 prev_mperf;
u64 prev_tsc;
struct sample sample;
+ #if IS_ENABLED(CONFIG_ACPI)
+ struct acpi_processor_performance acpi_perf_data;
+ #endif
};
static struct cpudata **all_cpu_data;
struct pstate_funcs {
int (*get_max)(void);
+ int (*get_max_physical)(void);
int (*get_min)(void);
int (*get_turbo)(void);
int (*get_scaling)(void);
static struct pstate_adjust_policy pid_params;
static struct pstate_funcs pstate_funcs;
static int hwp_active;
+ static int no_acpi_perf;
struct perf_limits {
int no_turbo;
int max_sysfs_pct;
int min_policy_pct;
int min_sysfs_pct;
+ int max_perf_ctl;
+ int min_perf_ctl;
};
static struct perf_limits limits = {
.max_sysfs_pct = 100,
.min_policy_pct = 0,
.min_sysfs_pct = 0,
+ .max_perf_ctl = 0,
+ .min_perf_ctl = 0,
};
+ #if IS_ENABLED(CONFIG_ACPI)
+ /*
+ * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
+ * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
+ * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
+ * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
+ * target ratio 0x17. The _PSS control value stores in a format which can be
+ * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
+ * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
+ * This function converts the _PSS control value to intel pstate driver format
+ * for comparison and assignment.
+ */
+ static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
+ {
+ return cpu->acpi_perf_data.states[index].control >> 8;
+ }
+
+ static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
+ {
+ struct cpudata *cpu;
+ int ret;
+ bool turbo_absent = false;
+ int max_pstate_index;
+ int min_pss_ctl, max_pss_ctl, turbo_pss_ctl;
+ int i;
+
+ cpu = all_cpu_data[policy->cpu];
+
+ pr_debug("intel_pstate: default limits 0x%x 0x%x 0x%x\n",
+ cpu->pstate.min_pstate, cpu->pstate.max_pstate,
+ cpu->pstate.turbo_pstate);
+
+ if (!cpu->acpi_perf_data.shared_cpu_map &&
+ zalloc_cpumask_var_node(&cpu->acpi_perf_data.shared_cpu_map,
+ GFP_KERNEL, cpu_to_node(policy->cpu))) {
+ return -ENOMEM;
+ }
+
+ ret = acpi_processor_register_performance(&cpu->acpi_perf_data,
+ policy->cpu);
+ if (ret)
+ return ret;
+
+ /*
+ * Check if the control value in _PSS is for PERF_CTL MSR, which should
+ * guarantee that the states returned by it map to the states in our
+ * list directly.
+ */
+ if (cpu->acpi_perf_data.control_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE)
+ return -EIO;
+
+ pr_debug("intel_pstate: CPU%u - ACPI _PSS perf data\n", policy->cpu);
+ for (i = 0; i < cpu->acpi_perf_data.state_count; i++)
+ pr_debug(" %cP%d: %u MHz, %u mW, 0x%x\n",
+ (i == cpu->acpi_perf_data.state ? '*' : ' '), i,
+ (u32) cpu->acpi_perf_data.states[i].core_frequency,
+ (u32) cpu->acpi_perf_data.states[i].power,
+ (u32) cpu->acpi_perf_data.states[i].control);
+
+ /*
+ * If there is only one entry _PSS, simply ignore _PSS and continue as
+ * usual without taking _PSS into account
+ */
+ if (cpu->acpi_perf_data.state_count < 2)
+ return 0;
+
+ turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
+ min_pss_ctl = convert_to_native_pstate_format(cpu,
+ cpu->acpi_perf_data.state_count - 1);
+ /* Check if there is a turbo freq in _PSS */
+ if (turbo_pss_ctl <= cpu->pstate.max_pstate &&
+ turbo_pss_ctl > cpu->pstate.min_pstate) {
+ pr_debug("intel_pstate: no turbo range exists in _PSS\n");
+ limits.no_turbo = limits.turbo_disabled = 1;
+ cpu->pstate.turbo_pstate = cpu->pstate.max_pstate;
+ turbo_absent = true;
+ }
+
+ /* Check if the max non turbo p state < Intel P state max */
+ max_pstate_index = turbo_absent ? 0 : 1;
+ max_pss_ctl = convert_to_native_pstate_format(cpu, max_pstate_index);
+ if (max_pss_ctl < cpu->pstate.max_pstate &&
+ max_pss_ctl > cpu->pstate.min_pstate)
+ cpu->pstate.max_pstate = max_pss_ctl;
+
+ /* check If min perf > Intel P State min */
+ if (min_pss_ctl > cpu->pstate.min_pstate &&
+ min_pss_ctl < cpu->pstate.max_pstate) {
+ cpu->pstate.min_pstate = min_pss_ctl;
+ policy->cpuinfo.min_freq = min_pss_ctl * cpu->pstate.scaling;
+ }
+
+ if (turbo_absent)
+ policy->cpuinfo.max_freq = cpu->pstate.max_pstate *
+ cpu->pstate.scaling;
+ else {
+ policy->cpuinfo.max_freq = cpu->pstate.turbo_pstate *
+ cpu->pstate.scaling;
+ /*
+ * The _PSS table doesn't contain whole turbo frequency range.
+ * This just contains +1 MHZ above the max non turbo frequency,
+ * with control value corresponding to max turbo ratio. But
+ * when cpufreq set policy is called, it will call with this
+ * max frequency, which will cause a reduced performance as
+ * this driver uses real max turbo frequency as the max
+ * frequeny. So correct this frequency in _PSS table to
+ * correct max turbo frequency based on the turbo ratio.
+ * Also need to convert to MHz as _PSS freq is in MHz.
+ */
+ cpu->acpi_perf_data.states[0].core_frequency =
+ turbo_pss_ctl * 100;
+ }
+
+ pr_debug("intel_pstate: Updated limits using _PSS 0x%x 0x%x 0x%x\n",
+ cpu->pstate.min_pstate, cpu->pstate.max_pstate,
+ cpu->pstate.turbo_pstate);
+ pr_debug("intel_pstate: policy max_freq=%d Khz min_freq = %d KHz\n",
+ policy->cpuinfo.max_freq, policy->cpuinfo.min_freq);
+
+ return 0;
+ }
+
+ static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+ {
+ struct cpudata *cpu;
+
+ if (!no_acpi_perf)
+ return 0;
+
+ cpu = all_cpu_data[policy->cpu];
+ acpi_processor_unregister_performance(policy->cpu);
+ return 0;
+ }
+
+ #else
+ static int intel_pstate_init_perf_limits(struct cpufreq_policy *policy)
+ {
+ return 0;
+ }
+
+ static int intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
+ {
+ return 0;
+ }
+ #endif
+
static inline void pid_reset(struct _pid *pid, int setpoint, int busy,
int deadband, int integral) {
pid->setpoint = setpoint;
return (value >> 40) & 0xFF;
}
- static int core_get_max_pstate(void)
+ static int core_get_max_pstate_physical(void)
{
u64 value;
return (value >> 8) & 0xFF;
}
+ static int core_get_max_pstate(void)
+ {
+ u64 tar;
+ u64 plat_info;
+ int max_pstate;
+ int err;
+
+ rdmsrl(MSR_PLATFORM_INFO, plat_info);
+ max_pstate = (plat_info >> 8) & 0xFF;
+
+ err = rdmsrl_safe(MSR_TURBO_ACTIVATION_RATIO, &tar);
+ if (!err) {
+ /* Do some sanity checking for safety */
+ if (plat_info & 0x600000000) {
+ u64 tdp_ctrl;
+ u64 tdp_ratio;
+ int tdp_msr;
+
+ err = rdmsrl_safe(MSR_CONFIG_TDP_CONTROL, &tdp_ctrl);
+ if (err)
+ goto skip_tar;
+
+ tdp_msr = MSR_CONFIG_TDP_NOMINAL + tdp_ctrl;
+ err = rdmsrl_safe(tdp_msr, &tdp_ratio);
+ if (err)
+ goto skip_tar;
+
+ if (tdp_ratio - 1 == tar) {
+ max_pstate = tar;
+ pr_debug("max_pstate=TAC %x\n", max_pstate);
+ } else {
+ goto skip_tar;
+ }
+ }
+ }
+
+ skip_tar:
+ return max_pstate;
+ }
+
static int core_get_turbo_pstate(void)
{
u64 value;
},
.funcs = {
.get_max = core_get_max_pstate,
+ .get_max_physical = core_get_max_pstate_physical,
.get_min = core_get_min_pstate,
.get_turbo = core_get_turbo_pstate,
.get_scaling = core_get_scaling,
},
.funcs = {
.get_max = byt_get_max_pstate,
+ .get_max_physical = byt_get_max_pstate,
.get_min = byt_get_min_pstate,
.get_turbo = byt_get_turbo_pstate,
.set = byt_set_pstate,
},
.funcs = {
.get_max = core_get_max_pstate,
+ .get_max_physical = core_get_max_pstate_physical,
.get_min = core_get_min_pstate,
.get_turbo = knl_get_turbo_pstate,
.get_scaling = core_get_scaling,
* policy, or by cpu specific default values determined through
* experimentation.
*/
- max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
- *max = clamp_t(int, max_perf_adj,
- cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
+ if (limits.max_perf_ctl && limits.max_sysfs_pct >=
+ limits.max_policy_pct) {
+ *max = limits.max_perf_ctl;
+ } else {
+ max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf),
+ limits.max_perf));
+ *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate,
+ cpu->pstate.turbo_pstate);
+ }
- min_perf = fp_toint(mul_fp(int_tofp(max_perf), limits.min_perf));
- *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+ if (limits.min_perf_ctl) {
+ *min = limits.min_perf_ctl;
+ } else {
+ min_perf = fp_toint(mul_fp(int_tofp(max_perf),
+ limits.min_perf));
+ *min = clamp_t(int, min_perf, cpu->pstate.min_pstate, max_perf);
+ }
}
static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate, bool force)
{
cpu->pstate.min_pstate = pstate_funcs.get_min();
cpu->pstate.max_pstate = pstate_funcs.get_max();
+ cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical();
cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
cpu->pstate.scaling = pstate_funcs.get_scaling();
sample->freq = fp_toint(
mul_fp(int_tofp(
- cpu->pstate.max_pstate * cpu->pstate.scaling / 100),
+ cpu->pstate.max_pstate_physical *
+ cpu->pstate.scaling / 100),
core_pct));
sample->core_pct_busy = (int32_t)core_pct;
local_irq_save(flags);
rdmsrl(MSR_IA32_APERF, aperf);
rdmsrl(MSR_IA32_MPERF, mperf);
+ if (cpu->prev_mperf == mperf) {
+ local_irq_restore(flags);
+ return;
+ }
+
tsc = rdtsc();
local_irq_restore(flags);
* specified pstate.
*/
core_busy = cpu->sample.core_pct_busy;
- max_pstate = int_tofp(cpu->pstate.max_pstate);
+ max_pstate = int_tofp(cpu->pstate.max_pstate_physical);
current_pstate = int_tofp(cpu->pstate.current_pstate);
core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
static int intel_pstate_set_policy(struct cpufreq_policy *policy)
{
+ #if IS_ENABLED(CONFIG_ACPI)
+ struct cpudata *cpu;
+ int i;
+ #endif
+ pr_debug("intel_pstate: %s max %u policy->max %u\n", __func__,
+ policy->cpuinfo.max_freq, policy->max);
if (!policy->cpuinfo.max_freq)
return -ENODEV;
limits.max_perf_pct = 100;
limits.max_perf = int_tofp(1);
limits.no_turbo = 0;
+ limits.max_perf_ctl = 0;
+ limits.min_perf_ctl = 0;
return 0;
}
limits.min_perf = div_fp(int_tofp(limits.min_perf_pct), int_tofp(100));
limits.max_perf = div_fp(int_tofp(limits.max_perf_pct), int_tofp(100));
+ #if IS_ENABLED(CONFIG_ACPI)
+ cpu = all_cpu_data[policy->cpu];
+ for (i = 0; i < cpu->acpi_perf_data.state_count; i++) {
+ int control;
+
+ control = convert_to_native_pstate_format(cpu, i);
+ if (control * cpu->pstate.scaling == policy->max)
+ limits.max_perf_ctl = control;
+ if (control * cpu->pstate.scaling == policy->min)
+ limits.min_perf_ctl = control;
+ }
+
+ pr_debug("intel_pstate: max %u policy_max %u perf_ctl [0x%x-0x%x]\n",
+ policy->cpuinfo.max_freq, policy->max, limits.min_perf_ctl,
+ limits.max_perf_ctl);
+ #endif
+
if (hwp_active)
intel_pstate_hwp_set();
policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
policy->cpuinfo.max_freq =
cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+ if (!no_acpi_perf)
+ intel_pstate_init_perf_limits(policy);
+ /*
+ * If there is no acpi perf data or error, we ignore and use Intel P
+ * state calculated limits, So this is not fatal error.
+ */
policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
cpumask_set_cpu(policy->cpu, policy->cpus);
return 0;
}
+ static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
+ {
+ return intel_pstate_exit_perf_limits(policy);
+ }
+
static struct cpufreq_driver intel_pstate_driver = {
.flags = CPUFREQ_CONST_LOOPS,
.verify = intel_pstate_verify_policy,
.setpolicy = intel_pstate_set_policy,
.get = intel_pstate_get,
.init = intel_pstate_cpu_init,
+ .exit = intel_pstate_cpu_exit,
.stop_cpu = intel_pstate_stop_cpu,
.name = "intel_pstate",
};
static void copy_cpu_funcs(struct pstate_funcs *funcs)
{
pstate_funcs.get_max = funcs->get_max;
+ pstate_funcs.get_max_physical = funcs->get_max_physical;
pstate_funcs.get_min = funcs->get_min;
pstate_funcs.get_turbo = funcs->get_turbo;
pstate_funcs.get_scaling = funcs->get_scaling;
}
#if IS_ENABLED(CONFIG_ACPI)
- #include <acpi/processor.h>
static bool intel_pstate_no_acpi_pss(void)
{
force_load = 1;
if (!strcmp(str, "hwp_only"))
hwp_only = 1;
+ if (!strcmp(str, "no_acpi"))
+ no_acpi_perf = 1;
+
return 0;
}
early_param("intel_pstate", intel_pstate_setup);