ANDROID: thermal: cpu_cooling: Migrate to using the EM framework

The newly introduced Energy Model framework manages power cost tables in
a generic way. Moreover, it supports a wide variety of models since the
tables can come from DT, firmware, etc. On the other hand, the
cpu_cooling subsystem manages its own power cost tables using only DT
information.

In order to avoid the duplication of data in the kernel, and in order to
enable IPA with EMs comming from more than just DT, remove the private
tables from cpu_cooling.c and migrate it to using the centralized EM
framework.

The case where the thermal subsystem is used without an Energy Model
(cpufreq_cooling_ops) is handled by looking at CPUFreq's frequency table
which is already a dependency for cpu_cooling.c anyway.

Signed-off-by: Quentin Perret <quentin.perret@arm.com>
Change-Id: I064e4b0e56d1c52b82a4dc800a1fa90456429911
This commit is contained in:
Quentin Perret 2018-09-04 12:51:08 +01:00
parent 4cb65352a3
commit 2054105d16

View File

@ -31,6 +31,7 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/cpu_cooling.h>
#include <linux/energy_model.h>
#include <trace/events/thermal.h>
@ -48,19 +49,6 @@
* ...
*/
/**
* struct freq_table - frequency table along with power entries
* @frequency: frequency in KHz
* @power: power in mW
*
* This structure is built when the cooling device registers and helps
* in translating frequency to power and vice versa.
*/
struct freq_table {
u32 frequency;
u32 power;
};
/**
* struct time_in_idle - Idle time stats
* @time: previous reading of the absolute time that this cpu was idle
@ -82,7 +70,7 @@ struct time_in_idle {
* frequency.
* @max_level: maximum cooling level. One less than total number of valid
* cpufreq frequencies.
* @freq_table: Freq table in descending order of frequencies
* @em: Reference on the Energy Model of the device
* @cdev: thermal_cooling_device pointer to keep track of the
* registered cooling device.
* @policy: cpufreq policy.
@ -98,7 +86,7 @@ struct cpufreq_cooling_device {
unsigned int cpufreq_state;
unsigned int clipped_freq;
unsigned int max_level;
struct freq_table *freq_table; /* In descending order */
struct em_perf_domain *em;
struct thermal_cooling_device *cdev;
struct cpufreq_policy *policy;
struct list_head node;
@ -111,26 +99,6 @@ static LIST_HEAD(cpufreq_cdev_list);
/* Below code defines functions to be used for cpufreq as cooling device */
/**
* get_level: Find the level for a particular frequency
* @cpufreq_cdev: cpufreq_cdev for which the property is required
* @freq: Frequency
*
* Return: level corresponding to the frequency.
*/
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
unsigned int freq)
{
struct freq_table *freq_table = cpufreq_cdev->freq_table;
unsigned long level;
for (level = 1; level <= cpufreq_cdev->max_level; level++)
if (freq > freq_table[level].frequency)
break;
return level - 1;
}
/**
* cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
* @nb: struct notifier_block * with callback info.
@ -184,105 +152,52 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
#ifdef CONFIG_ENERGY_MODEL
/**
* update_freq_table() - Update the freq table with power numbers
* @cpufreq_cdev: the cpufreq cooling device in which to update the table
* @capacitance: dynamic power coefficient for these cpus
* get_level: Find the level for a particular frequency
* @cpufreq_cdev: cpufreq_cdev for which the property is required
* @freq: Frequency
*
* Update the freq table with power numbers. This table will be used in
* cpu_power_to_freq() and cpu_freq_to_power() to convert between power and
* frequency efficiently. Power is stored in mW, frequency in KHz. The
* resulting table is in descending order.
*
* Return: 0 on success, -EINVAL if there are no OPPs for any CPUs,
* or -ENOMEM if we run out of memory.
* Return: level corresponding to the frequency.
*/
static int update_freq_table(struct cpufreq_cooling_device *cpufreq_cdev,
u32 capacitance)
static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
unsigned int freq)
{
struct freq_table *freq_table = cpufreq_cdev->freq_table;
struct dev_pm_opp *opp;
struct device *dev = NULL;
int num_opps = 0, cpu = cpufreq_cdev->policy->cpu, i;
int i;
dev = get_cpu_device(cpu);
if (unlikely(!dev)) {
dev_warn(&cpufreq_cdev->cdev->device,
"No cpu device for cpu %d\n", cpu);
return -ENODEV;
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
if (freq > cpufreq_cdev->em->table[i].frequency)
break;
}
num_opps = dev_pm_opp_get_opp_count(dev);
if (num_opps < 0)
return num_opps;
/*
* The cpufreq table is also built from the OPP table and so the count
* should match.
*/
if (num_opps != cpufreq_cdev->max_level + 1) {
dev_warn(dev, "Number of OPPs not matching with max_levels\n");
return -EINVAL;
}
for (i = 0; i <= cpufreq_cdev->max_level; i++) {
unsigned long freq = freq_table[i].frequency * 1000;
u32 freq_mhz = freq_table[i].frequency / 1000;
u64 power;
u32 voltage_mv;
/*
* Find ceil frequency as 'freq' may be slightly lower than OPP
* freq due to truncation while converting to kHz.
*/
opp = dev_pm_opp_find_freq_ceil(dev, &freq);
if (IS_ERR(opp)) {
dev_err(dev, "failed to get opp for %lu frequency\n",
freq);
return -EINVAL;
}
voltage_mv = dev_pm_opp_get_voltage(opp) / 1000;
dev_pm_opp_put(opp);
/*
* Do the multiplication with MHz and millivolt so as
* to not overflow.
*/
power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv;
do_div(power, 1000000000);
/* power is stored in mW */
freq_table[i].power = power;
}
return 0;
return cpufreq_cdev->max_level - i - 1;
}
static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
u32 freq)
{
int i;
struct freq_table *freq_table = cpufreq_cdev->freq_table;
for (i = 1; i <= cpufreq_cdev->max_level; i++)
if (freq > freq_table[i].frequency)
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
if (freq > cpufreq_cdev->em->table[i].frequency)
break;
}
return freq_table[i - 1].power;
return cpufreq_cdev->em->table[i + 1].power;
}
static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
u32 power)
{
int i;
struct freq_table *freq_table = cpufreq_cdev->freq_table;
for (i = 1; i <= cpufreq_cdev->max_level; i++)
if (power > freq_table[i].power)
for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
if (power > cpufreq_cdev->em->table[i].power)
break;
}
return freq_table[i - 1].frequency;
return cpufreq_cdev->em->table[i + 1].frequency;
}
/**
@ -332,6 +247,7 @@ static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
}
#endif
/* cpufreq cooling device callback functions are defined below */
@ -374,6 +290,30 @@ static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
return 0;
}
static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
unsigned long state)
{
struct cpufreq_policy *policy;
unsigned long idx;
#ifdef CONFIG_ENERGY_MODEL
/* Use the Energy Model table if available */
if (cpufreq_cdev->em) {
idx = cpufreq_cdev->max_level - state;
return cpufreq_cdev->em->table[idx].frequency;
}
#endif
/* Otherwise, fallback on the CPUFreq table */
policy = cpufreq_cdev->policy;
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
idx = cpufreq_cdev->max_level - state;
else
idx = state;
return policy->freq_table[idx].frequency;
}
/**
* cpufreq_set_cur_state - callback function to set the current cooling state.
* @cdev: thermal cooling device pointer.
@ -398,7 +338,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
if (cpufreq_cdev->cpufreq_state == state)
return 0;
clip_freq = cpufreq_cdev->freq_table[state].frequency;
clip_freq = get_state_freq(cpufreq_cdev, state);
cpufreq_cdev->cpufreq_state = state;
cpufreq_cdev->clipped_freq = clip_freq;
@ -407,6 +347,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
return 0;
}
#ifdef CONFIG_ENERGY_MODEL
/**
* cpufreq_get_requested_power() - get the current power
* @cdev: &thermal_cooling_device pointer
@ -497,7 +438,7 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
struct thermal_zone_device *tz,
unsigned long state, u32 *power)
{
unsigned int freq, num_cpus;
unsigned int freq, num_cpus, idx;
struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
/* Request state should be less than max_level */
@ -506,7 +447,8 @@ static int cpufreq_state2power(struct thermal_cooling_device *cdev,
num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
freq = cpufreq_cdev->freq_table[state].frequency;
idx = cpufreq_cdev->max_level - state;
freq = cpufreq_cdev->em->table[idx].frequency;
*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
return 0;
@ -553,14 +495,6 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev,
return 0;
}
/* Bind cpufreq callbacks to thermal cooling device ops */
static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
.get_max_state = cpufreq_get_max_state,
.get_cur_state = cpufreq_get_cur_state,
.set_cur_state = cpufreq_set_cur_state,
};
static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
.get_max_state = cpufreq_get_max_state,
.get_cur_state = cpufreq_get_cur_state,
@ -569,32 +503,27 @@ static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = {
.state2power = cpufreq_state2power,
.power2state = cpufreq_power2state,
};
#endif
/* Bind cpufreq callbacks to thermal cooling device ops */
static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
.get_max_state = cpufreq_get_max_state,
.get_cur_state = cpufreq_get_cur_state,
.set_cur_state = cpufreq_set_cur_state,
};
/* Notifier for cpufreq policy change */
static struct notifier_block thermal_cpufreq_notifier_block = {
.notifier_call = cpufreq_thermal_notifier,
};
static unsigned int find_next_max(struct cpufreq_frequency_table *table,
unsigned int prev_max)
{
struct cpufreq_frequency_table *pos;
unsigned int max = 0;
cpufreq_for_each_valid_entry(pos, table) {
if (pos->frequency > max && pos->frequency < prev_max)
max = pos->frequency;
}
return max;
}
/**
* __cpufreq_cooling_register - helper function to create cpufreq cooling device
* @np: a valid struct device_node to the cooling device device tree node
* @policy: cpufreq policy
* Normally this should be same as cpufreq policy->related_cpus.
* @capacitance: dynamic power coefficient for these cpus
* @try_model: true if a power model should be used
*
* This interface function registers the cpufreq cooling device with the name
* "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@ -606,12 +535,12 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table,
*/
static struct thermal_cooling_device *
__cpufreq_cooling_register(struct device_node *np,
struct cpufreq_policy *policy, u32 capacitance)
struct cpufreq_policy *policy, bool try_model)
{
struct thermal_cooling_device *cdev;
struct cpufreq_cooling_device *cpufreq_cdev;
char dev_name[THERMAL_NAME_LENGTH];
unsigned int freq, i, num_cpus;
unsigned int i, num_cpus;
int ret;
struct thermal_cooling_device_ops *cooling_ops;
bool first;
@ -645,54 +574,36 @@ __cpufreq_cooling_register(struct device_node *np,
/* max_level is an index, not a counter */
cpufreq_cdev->max_level = i - 1;
cpufreq_cdev->freq_table = kmalloc_array(i,
sizeof(*cpufreq_cdev->freq_table),
GFP_KERNEL);
if (!cpufreq_cdev->freq_table) {
cdev = ERR_PTR(-ENOMEM);
goto free_idle_time;
}
#ifdef CONFIG_ENERGY_MODEL
if (try_model) {
struct em_perf_domain *em = em_cpu_get(policy->cpu);
if (!em || !cpumask_equal(policy->cpus, to_cpumask(em->cpus))) {
cdev = ERR_PTR(-EINVAL);
goto free_idle_time;
}
cpufreq_cdev->em = em;
cooling_ops = &cpufreq_power_cooling_ops;
} else
#endif
cooling_ops = &cpufreq_cooling_ops;
ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
if (ret < 0) {
cdev = ERR_PTR(ret);
goto free_table;
goto free_idle_time;
}
cpufreq_cdev->id = ret;
snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
cpufreq_cdev->id);
/* Fill freq-table in descending order of frequencies */
for (i = 0, freq = -1; i <= cpufreq_cdev->max_level; i++) {
freq = find_next_max(policy->freq_table, freq);
cpufreq_cdev->freq_table[i].frequency = freq;
/* Warn for duplicate entries */
if (!freq)
pr_warn("%s: table has duplicate entries\n", __func__);
else
pr_debug("%s: freq:%u KHz\n", __func__, freq);
}
if (capacitance) {
ret = update_freq_table(cpufreq_cdev, capacitance);
if (ret) {
cdev = ERR_PTR(ret);
goto remove_ida;
}
cooling_ops = &cpufreq_power_cooling_ops;
} else {
cooling_ops = &cpufreq_cooling_ops;
}
cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
cooling_ops);
if (IS_ERR(cdev))
goto remove_ida;
cpufreq_cdev->clipped_freq = cpufreq_cdev->freq_table[0].frequency;
cpufreq_cdev->clipped_freq = get_state_freq(cpufreq_cdev, 0);
cpufreq_cdev->cdev = cdev;
mutex_lock(&cooling_list_lock);
@ -709,8 +620,6 @@ __cpufreq_cooling_register(struct device_node *np,
remove_ida:
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
free_table:
kfree(cpufreq_cdev->freq_table);
free_idle_time:
kfree(cpufreq_cdev->idle_time);
free_cdev:
@ -732,7 +641,7 @@ free_cdev:
struct thermal_cooling_device *
cpufreq_cooling_register(struct cpufreq_policy *policy)
{
return __cpufreq_cooling_register(NULL, policy, 0);
return __cpufreq_cooling_register(NULL, policy, false);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
@ -760,7 +669,6 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
{
struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
struct thermal_cooling_device *cdev = NULL;
u32 capacitance = 0;
if (!np) {
pr_err("cpu_cooling: OF node not available for cpu%d\n",
@ -769,10 +677,7 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy)
}
if (of_find_property(np, "#cooling-cells", NULL)) {
of_property_read_u32(np, "dynamic-power-coefficient",
&capacitance);
cdev = __cpufreq_cooling_register(np, policy, capacitance);
cdev = __cpufreq_cooling_register(np, policy, true);
if (IS_ERR(cdev)) {
pr_err("cpu_cooling: cpu%d is not running as cooling device: %ld\n",
policy->cpu, PTR_ERR(cdev));
@ -814,7 +719,6 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
thermal_cooling_device_unregister(cpufreq_cdev->cdev);
ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
kfree(cpufreq_cdev->idle_time);
kfree(cpufreq_cdev->freq_table);
kfree(cpufreq_cdev);
}
EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);