xref: /OK3568_Linux_fs/kernel/drivers/thermal/cpufreq_cooling.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *  linux/drivers/thermal/cpufreq_cooling.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  *  Copyright (C) 2012-2018 Linaro Limited.
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  *  Authors:	Amit Daniel <amit.kachhap@linaro.org>
10*4882a593Smuzhiyun  *		Viresh Kumar <viresh.kumar@linaro.org>
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  */
13*4882a593Smuzhiyun #include <linux/cpu.h>
14*4882a593Smuzhiyun #include <linux/cpufreq.h>
15*4882a593Smuzhiyun #include <linux/cpu_cooling.h>
16*4882a593Smuzhiyun #include <linux/energy_model.h>
17*4882a593Smuzhiyun #include <linux/err.h>
18*4882a593Smuzhiyun #include <linux/export.h>
19*4882a593Smuzhiyun #include <linux/idr.h>
20*4882a593Smuzhiyun #include <linux/pm_opp.h>
21*4882a593Smuzhiyun #include <linux/pm_qos.h>
22*4882a593Smuzhiyun #include <linux/slab.h>
23*4882a593Smuzhiyun #include <linux/thermal.h>
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun #include <trace/events/thermal.h>
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun /*
28*4882a593Smuzhiyun  * Cooling state <-> CPUFreq frequency
29*4882a593Smuzhiyun  *
30*4882a593Smuzhiyun  * Cooling states are translated to frequencies throughout this driver and this
31*4882a593Smuzhiyun  * is the relation between them.
32*4882a593Smuzhiyun  *
33*4882a593Smuzhiyun  * Highest cooling state corresponds to lowest possible frequency.
34*4882a593Smuzhiyun  *
35*4882a593Smuzhiyun  * i.e.
36*4882a593Smuzhiyun  *	level 0 --> 1st Max Freq
37*4882a593Smuzhiyun  *	level 1 --> 2nd Max Freq
38*4882a593Smuzhiyun  *	...
39*4882a593Smuzhiyun  */
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun /**
42*4882a593Smuzhiyun  * struct time_in_idle - Idle time stats
43*4882a593Smuzhiyun  * @time: previous reading of the absolute time that this cpu was idle
44*4882a593Smuzhiyun  * @timestamp: wall time of the last invocation of get_cpu_idle_time_us()
45*4882a593Smuzhiyun  */
46*4882a593Smuzhiyun struct time_in_idle {
47*4882a593Smuzhiyun 	u64 time;
48*4882a593Smuzhiyun 	u64 timestamp;
49*4882a593Smuzhiyun };
50*4882a593Smuzhiyun 
51*4882a593Smuzhiyun /**
52*4882a593Smuzhiyun  * struct cpufreq_cooling_device - data for cooling device with cpufreq
53*4882a593Smuzhiyun  * @id: unique integer value corresponding to each cpufreq_cooling_device
54*4882a593Smuzhiyun  *	registered.
55*4882a593Smuzhiyun  * @last_load: load measured by the latest call to cpufreq_get_requested_power()
56*4882a593Smuzhiyun  * @cpufreq_state: integer value representing the current state of cpufreq
57*4882a593Smuzhiyun  *	cooling	devices.
58*4882a593Smuzhiyun  * @max_level: maximum cooling level. One less than total number of valid
59*4882a593Smuzhiyun  *	cpufreq frequencies.
60*4882a593Smuzhiyun  * @em: Reference on the Energy Model of the device
61*4882a593Smuzhiyun  * @cdev: thermal_cooling_device pointer to keep track of the
62*4882a593Smuzhiyun  *	registered cooling device.
63*4882a593Smuzhiyun  * @policy: cpufreq policy.
64*4882a593Smuzhiyun  * @node: list_head to link all cpufreq_cooling_device together.
65*4882a593Smuzhiyun  * @idle_time: idle time stats
66*4882a593Smuzhiyun  * @qos_req: PM QoS contraint to apply
67*4882a593Smuzhiyun  *
68*4882a593Smuzhiyun  * This structure is required for keeping information of each registered
69*4882a593Smuzhiyun  * cpufreq_cooling_device.
70*4882a593Smuzhiyun  */
71*4882a593Smuzhiyun struct cpufreq_cooling_device {
72*4882a593Smuzhiyun 	int id;
73*4882a593Smuzhiyun 	u32 last_load;
74*4882a593Smuzhiyun 	unsigned int cpufreq_state;
75*4882a593Smuzhiyun 	unsigned int max_level;
76*4882a593Smuzhiyun 	struct em_perf_domain *em;
77*4882a593Smuzhiyun 	struct cpufreq_policy *policy;
78*4882a593Smuzhiyun 	struct list_head node;
79*4882a593Smuzhiyun 	struct time_in_idle *idle_time;
80*4882a593Smuzhiyun 	struct freq_qos_request qos_req;
81*4882a593Smuzhiyun };
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun static DEFINE_IDA(cpufreq_ida);
84*4882a593Smuzhiyun static DEFINE_MUTEX(cooling_list_lock);
85*4882a593Smuzhiyun static LIST_HEAD(cpufreq_cdev_list);
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
88*4882a593Smuzhiyun /**
89*4882a593Smuzhiyun  * get_level: Find the level for a particular frequency
90*4882a593Smuzhiyun  * @cpufreq_cdev: cpufreq_cdev for which the property is required
91*4882a593Smuzhiyun  * @freq: Frequency
92*4882a593Smuzhiyun  *
93*4882a593Smuzhiyun  * Return: level corresponding to the frequency.
94*4882a593Smuzhiyun  */
get_level(struct cpufreq_cooling_device * cpufreq_cdev,unsigned int freq)95*4882a593Smuzhiyun static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev,
96*4882a593Smuzhiyun 			       unsigned int freq)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun 	int i;
99*4882a593Smuzhiyun 
100*4882a593Smuzhiyun 	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
101*4882a593Smuzhiyun 		if (freq > cpufreq_cdev->em->table[i].frequency)
102*4882a593Smuzhiyun 			break;
103*4882a593Smuzhiyun 	}
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 	return cpufreq_cdev->max_level - i - 1;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun 
cpu_freq_to_power(struct cpufreq_cooling_device * cpufreq_cdev,u32 freq)108*4882a593Smuzhiyun static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev,
109*4882a593Smuzhiyun 			     u32 freq)
110*4882a593Smuzhiyun {
111*4882a593Smuzhiyun 	int i;
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun 	for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) {
114*4882a593Smuzhiyun 		if (freq > cpufreq_cdev->em->table[i].frequency)
115*4882a593Smuzhiyun 			break;
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	return cpufreq_cdev->em->table[i + 1].power;
119*4882a593Smuzhiyun }
120*4882a593Smuzhiyun 
cpu_power_to_freq(struct cpufreq_cooling_device * cpufreq_cdev,u32 power)121*4882a593Smuzhiyun static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev,
122*4882a593Smuzhiyun 			     u32 power)
123*4882a593Smuzhiyun {
124*4882a593Smuzhiyun 	int i;
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	for (i = cpufreq_cdev->max_level; i > 0; i--) {
127*4882a593Smuzhiyun 		if (power >= cpufreq_cdev->em->table[i].power)
128*4882a593Smuzhiyun 			break;
129*4882a593Smuzhiyun 	}
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	return cpufreq_cdev->em->table[i].frequency;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun /**
135*4882a593Smuzhiyun  * get_load() - get load for a cpu since last updated
136*4882a593Smuzhiyun  * @cpufreq_cdev:	&struct cpufreq_cooling_device for this cpu
137*4882a593Smuzhiyun  * @cpu:	cpu number
138*4882a593Smuzhiyun  * @cpu_idx:	index of the cpu in time_in_idle*
139*4882a593Smuzhiyun  *
140*4882a593Smuzhiyun  * Return: The average load of cpu @cpu in percentage since this
141*4882a593Smuzhiyun  * function was last called.
142*4882a593Smuzhiyun  */
get_load(struct cpufreq_cooling_device * cpufreq_cdev,int cpu,int cpu_idx)143*4882a593Smuzhiyun static u32 get_load(struct cpufreq_cooling_device *cpufreq_cdev, int cpu,
144*4882a593Smuzhiyun 		    int cpu_idx)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	u32 load;
147*4882a593Smuzhiyun 	u64 now, now_idle, delta_time, delta_idle;
148*4882a593Smuzhiyun 	struct time_in_idle *idle_time = &cpufreq_cdev->idle_time[cpu_idx];
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	now_idle = get_cpu_idle_time(cpu, &now, 0);
151*4882a593Smuzhiyun 	delta_idle = now_idle - idle_time->time;
152*4882a593Smuzhiyun 	delta_time = now - idle_time->timestamp;
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 	if (delta_time <= delta_idle)
155*4882a593Smuzhiyun 		load = 0;
156*4882a593Smuzhiyun 	else
157*4882a593Smuzhiyun 		load = div64_u64(100 * (delta_time - delta_idle), delta_time);
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	idle_time->time = now_idle;
160*4882a593Smuzhiyun 	idle_time->timestamp = now;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	return load;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun /**
166*4882a593Smuzhiyun  * get_dynamic_power() - calculate the dynamic power
167*4882a593Smuzhiyun  * @cpufreq_cdev:	&cpufreq_cooling_device for this cdev
168*4882a593Smuzhiyun  * @freq:	current frequency
169*4882a593Smuzhiyun  *
170*4882a593Smuzhiyun  * Return: the dynamic power consumed by the cpus described by
171*4882a593Smuzhiyun  * @cpufreq_cdev.
172*4882a593Smuzhiyun  */
get_dynamic_power(struct cpufreq_cooling_device * cpufreq_cdev,unsigned long freq)173*4882a593Smuzhiyun static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_cdev,
174*4882a593Smuzhiyun 			     unsigned long freq)
175*4882a593Smuzhiyun {
176*4882a593Smuzhiyun 	u32 raw_cpu_power;
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun 	raw_cpu_power = cpu_freq_to_power(cpufreq_cdev, freq);
179*4882a593Smuzhiyun 	return (raw_cpu_power * cpufreq_cdev->last_load) / 100;
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun /**
183*4882a593Smuzhiyun  * cpufreq_get_requested_power() - get the current power
184*4882a593Smuzhiyun  * @cdev:	&thermal_cooling_device pointer
185*4882a593Smuzhiyun  * @power:	pointer in which to store the resulting power
186*4882a593Smuzhiyun  *
187*4882a593Smuzhiyun  * Calculate the current power consumption of the cpus in milliwatts
188*4882a593Smuzhiyun  * and store it in @power.  This function should actually calculate
189*4882a593Smuzhiyun  * the requested power, but it's hard to get the frequency that
190*4882a593Smuzhiyun  * cpufreq would have assigned if there were no thermal limits.
191*4882a593Smuzhiyun  * Instead, we calculate the current power on the assumption that the
192*4882a593Smuzhiyun  * immediate future will look like the immediate past.
193*4882a593Smuzhiyun  *
194*4882a593Smuzhiyun  * We use the current frequency and the average load since this
195*4882a593Smuzhiyun  * function was last called.  In reality, there could have been
196*4882a593Smuzhiyun  * multiple opps since this function was last called and that affects
197*4882a593Smuzhiyun  * the load calculation.  While it's not perfectly accurate, this
198*4882a593Smuzhiyun  * simplification is good enough and works.  REVISIT this, as more
199*4882a593Smuzhiyun  * complex code may be needed if experiments show that it's not
200*4882a593Smuzhiyun  * accurate enough.
201*4882a593Smuzhiyun  *
202*4882a593Smuzhiyun  * Return: 0 on success, -E* if getting the static power failed.
203*4882a593Smuzhiyun  */
cpufreq_get_requested_power(struct thermal_cooling_device * cdev,u32 * power)204*4882a593Smuzhiyun static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev,
205*4882a593Smuzhiyun 				       u32 *power)
206*4882a593Smuzhiyun {
207*4882a593Smuzhiyun 	unsigned long freq;
208*4882a593Smuzhiyun 	int i = 0, cpu;
209*4882a593Smuzhiyun 	u32 total_load = 0;
210*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
211*4882a593Smuzhiyun 	struct cpufreq_policy *policy = cpufreq_cdev->policy;
212*4882a593Smuzhiyun 	u32 *load_cpu = NULL;
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	freq = cpufreq_quick_get(policy->cpu);
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun 	if (trace_thermal_power_cpu_get_power_enabled()) {
217*4882a593Smuzhiyun 		u32 ncpus = cpumask_weight(policy->related_cpus);
218*4882a593Smuzhiyun 
219*4882a593Smuzhiyun 		load_cpu = kcalloc(ncpus, sizeof(*load_cpu), GFP_KERNEL);
220*4882a593Smuzhiyun 	}
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	for_each_cpu(cpu, policy->related_cpus) {
223*4882a593Smuzhiyun 		u32 load;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 		if (cpu_online(cpu))
226*4882a593Smuzhiyun 			load = get_load(cpufreq_cdev, cpu, i);
227*4882a593Smuzhiyun 		else
228*4882a593Smuzhiyun 			load = 0;
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 		total_load += load;
231*4882a593Smuzhiyun 		if (load_cpu)
232*4882a593Smuzhiyun 			load_cpu[i] = load;
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 		i++;
235*4882a593Smuzhiyun 	}
236*4882a593Smuzhiyun 
237*4882a593Smuzhiyun 	cpufreq_cdev->last_load = total_load;
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	*power = get_dynamic_power(cpufreq_cdev, freq);
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	if (load_cpu) {
242*4882a593Smuzhiyun 		trace_thermal_power_cpu_get_power(policy->related_cpus, freq,
243*4882a593Smuzhiyun 						  load_cpu, i, *power);
244*4882a593Smuzhiyun 
245*4882a593Smuzhiyun 		kfree(load_cpu);
246*4882a593Smuzhiyun 	}
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	return 0;
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun /**
252*4882a593Smuzhiyun  * cpufreq_state2power() - convert a cpu cdev state to power consumed
253*4882a593Smuzhiyun  * @cdev:	&thermal_cooling_device pointer
254*4882a593Smuzhiyun  * @state:	cooling device state to be converted
255*4882a593Smuzhiyun  * @power:	pointer in which to store the resulting power
256*4882a593Smuzhiyun  *
257*4882a593Smuzhiyun  * Convert cooling device state @state into power consumption in
258*4882a593Smuzhiyun  * milliwatts assuming 100% load.  Store the calculated power in
259*4882a593Smuzhiyun  * @power.
260*4882a593Smuzhiyun  *
261*4882a593Smuzhiyun  * Return: 0 on success, -EINVAL if the cooling device state could not
262*4882a593Smuzhiyun  * be converted into a frequency or other -E* if there was an error
263*4882a593Smuzhiyun  * when calculating the static power.
264*4882a593Smuzhiyun  */
cpufreq_state2power(struct thermal_cooling_device * cdev,unsigned long state,u32 * power)265*4882a593Smuzhiyun static int cpufreq_state2power(struct thermal_cooling_device *cdev,
266*4882a593Smuzhiyun 			       unsigned long state, u32 *power)
267*4882a593Smuzhiyun {
268*4882a593Smuzhiyun 	unsigned int freq, num_cpus, idx;
269*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	/* Request state should be less than max_level */
272*4882a593Smuzhiyun 	if (state > cpufreq_cdev->max_level)
273*4882a593Smuzhiyun 		return -EINVAL;
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	num_cpus = cpumask_weight(cpufreq_cdev->policy->cpus);
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	idx = cpufreq_cdev->max_level - state;
278*4882a593Smuzhiyun 	freq = cpufreq_cdev->em->table[idx].frequency;
279*4882a593Smuzhiyun 	*power = cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus;
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	return 0;
282*4882a593Smuzhiyun }
283*4882a593Smuzhiyun 
284*4882a593Smuzhiyun /**
285*4882a593Smuzhiyun  * cpufreq_power2state() - convert power to a cooling device state
286*4882a593Smuzhiyun  * @cdev:	&thermal_cooling_device pointer
287*4882a593Smuzhiyun  * @power:	power in milliwatts to be converted
288*4882a593Smuzhiyun  * @state:	pointer in which to store the resulting state
289*4882a593Smuzhiyun  *
290*4882a593Smuzhiyun  * Calculate a cooling device state for the cpus described by @cdev
291*4882a593Smuzhiyun  * that would allow them to consume at most @power mW and store it in
292*4882a593Smuzhiyun  * @state.  Note that this calculation depends on external factors
293*4882a593Smuzhiyun  * such as the cpu load or the current static power.  Calling this
294*4882a593Smuzhiyun  * function with the same power as input can yield different cooling
295*4882a593Smuzhiyun  * device states depending on those external factors.
296*4882a593Smuzhiyun  *
297*4882a593Smuzhiyun  * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if
298*4882a593Smuzhiyun  * the calculated frequency could not be converted to a valid state.
299*4882a593Smuzhiyun  * The latter should not happen unless the frequencies available to
300*4882a593Smuzhiyun  * cpufreq have changed since the initialization of the cpu cooling
301*4882a593Smuzhiyun  * device.
302*4882a593Smuzhiyun  */
cpufreq_power2state(struct thermal_cooling_device * cdev,u32 power,unsigned long * state)303*4882a593Smuzhiyun static int cpufreq_power2state(struct thermal_cooling_device *cdev,
304*4882a593Smuzhiyun 			       u32 power, unsigned long *state)
305*4882a593Smuzhiyun {
306*4882a593Smuzhiyun 	unsigned int target_freq;
307*4882a593Smuzhiyun 	u32 last_load, normalised_power;
308*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
309*4882a593Smuzhiyun 	struct cpufreq_policy *policy = cpufreq_cdev->policy;
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	last_load = cpufreq_cdev->last_load ?: 1;
312*4882a593Smuzhiyun 	normalised_power = (power * 100) / last_load;
313*4882a593Smuzhiyun 	target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power);
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	*state = get_level(cpufreq_cdev, target_freq);
316*4882a593Smuzhiyun 	trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state,
317*4882a593Smuzhiyun 				      power);
318*4882a593Smuzhiyun 	return 0;
319*4882a593Smuzhiyun }
320*4882a593Smuzhiyun 
em_is_sane(struct cpufreq_cooling_device * cpufreq_cdev,struct em_perf_domain * em)321*4882a593Smuzhiyun static inline bool em_is_sane(struct cpufreq_cooling_device *cpufreq_cdev,
322*4882a593Smuzhiyun 			      struct em_perf_domain *em) {
323*4882a593Smuzhiyun 	struct cpufreq_policy *policy;
324*4882a593Smuzhiyun 	unsigned int nr_levels;
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	if (!em)
327*4882a593Smuzhiyun 		return false;
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 	policy = cpufreq_cdev->policy;
330*4882a593Smuzhiyun 	if (!cpumask_equal(policy->related_cpus, em_span_cpus(em))) {
331*4882a593Smuzhiyun 		pr_err("The span of pd %*pbl is misaligned with cpufreq policy %*pbl\n",
332*4882a593Smuzhiyun 			cpumask_pr_args(em_span_cpus(em)),
333*4882a593Smuzhiyun 			cpumask_pr_args(policy->related_cpus));
334*4882a593Smuzhiyun 		return false;
335*4882a593Smuzhiyun 	}
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	nr_levels = cpufreq_cdev->max_level + 1;
338*4882a593Smuzhiyun 	if (em_pd_nr_perf_states(em) != nr_levels) {
339*4882a593Smuzhiyun 		pr_err("The number of performance states in pd %*pbl (%u) doesn't match the number of cooling levels (%u)\n",
340*4882a593Smuzhiyun 			cpumask_pr_args(em_span_cpus(em)),
341*4882a593Smuzhiyun 			em_pd_nr_perf_states(em), nr_levels);
342*4882a593Smuzhiyun 		return false;
343*4882a593Smuzhiyun 	}
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	return true;
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun #endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */
348*4882a593Smuzhiyun 
get_state_freq(struct cpufreq_cooling_device * cpufreq_cdev,unsigned long state)349*4882a593Smuzhiyun static unsigned int get_state_freq(struct cpufreq_cooling_device *cpufreq_cdev,
350*4882a593Smuzhiyun 				   unsigned long state)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	struct cpufreq_policy *policy;
353*4882a593Smuzhiyun 	unsigned long idx;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
356*4882a593Smuzhiyun 	/* Use the Energy Model table if available */
357*4882a593Smuzhiyun 	if (cpufreq_cdev->em) {
358*4882a593Smuzhiyun 		idx = cpufreq_cdev->max_level - state;
359*4882a593Smuzhiyun 		return cpufreq_cdev->em->table[idx].frequency;
360*4882a593Smuzhiyun 	}
361*4882a593Smuzhiyun #endif
362*4882a593Smuzhiyun 
363*4882a593Smuzhiyun 	/* Otherwise, fallback on the CPUFreq table */
364*4882a593Smuzhiyun 	policy = cpufreq_cdev->policy;
365*4882a593Smuzhiyun 	if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
366*4882a593Smuzhiyun 		idx = cpufreq_cdev->max_level - state;
367*4882a593Smuzhiyun 	else
368*4882a593Smuzhiyun 		idx = state;
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	return policy->freq_table[idx].frequency;
371*4882a593Smuzhiyun }
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun /* cpufreq cooling device callback functions are defined below */
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun /**
376*4882a593Smuzhiyun  * cpufreq_get_max_state - callback function to get the max cooling state.
377*4882a593Smuzhiyun  * @cdev: thermal cooling device pointer.
378*4882a593Smuzhiyun  * @state: fill this variable with the max cooling state.
379*4882a593Smuzhiyun  *
380*4882a593Smuzhiyun  * Callback for the thermal cooling device to return the cpufreq
381*4882a593Smuzhiyun  * max cooling state.
382*4882a593Smuzhiyun  *
383*4882a593Smuzhiyun  * Return: 0 on success, an error code otherwise.
384*4882a593Smuzhiyun  */
cpufreq_get_max_state(struct thermal_cooling_device * cdev,unsigned long * state)385*4882a593Smuzhiyun static int cpufreq_get_max_state(struct thermal_cooling_device *cdev,
386*4882a593Smuzhiyun 				 unsigned long *state)
387*4882a593Smuzhiyun {
388*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	*state = cpufreq_cdev->max_level;
391*4882a593Smuzhiyun 	return 0;
392*4882a593Smuzhiyun }
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun /**
395*4882a593Smuzhiyun  * cpufreq_get_cur_state - callback function to get the current cooling state.
396*4882a593Smuzhiyun  * @cdev: thermal cooling device pointer.
397*4882a593Smuzhiyun  * @state: fill this variable with the current cooling state.
398*4882a593Smuzhiyun  *
399*4882a593Smuzhiyun  * Callback for the thermal cooling device to return the cpufreq
400*4882a593Smuzhiyun  * current cooling state.
401*4882a593Smuzhiyun  *
402*4882a593Smuzhiyun  * Return: 0 on success, an error code otherwise.
403*4882a593Smuzhiyun  */
cpufreq_get_cur_state(struct thermal_cooling_device * cdev,unsigned long * state)404*4882a593Smuzhiyun static int cpufreq_get_cur_state(struct thermal_cooling_device *cdev,
405*4882a593Smuzhiyun 				 unsigned long *state)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	*state = cpufreq_cdev->cpufreq_state;
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	return 0;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun /**
415*4882a593Smuzhiyun  * cpufreq_set_cur_state - callback function to set the current cooling state.
416*4882a593Smuzhiyun  * @cdev: thermal cooling device pointer.
417*4882a593Smuzhiyun  * @state: set this variable to the current cooling state.
418*4882a593Smuzhiyun  *
419*4882a593Smuzhiyun  * Callback for the thermal cooling device to change the cpufreq
420*4882a593Smuzhiyun  * current cooling state.
421*4882a593Smuzhiyun  *
422*4882a593Smuzhiyun  * Return: 0 on success, an error code otherwise.
423*4882a593Smuzhiyun  */
cpufreq_set_cur_state(struct thermal_cooling_device * cdev,unsigned long state)424*4882a593Smuzhiyun static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev,
425*4882a593Smuzhiyun 				 unsigned long state)
426*4882a593Smuzhiyun {
427*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata;
428*4882a593Smuzhiyun 	struct cpumask *cpus;
429*4882a593Smuzhiyun 	unsigned int frequency;
430*4882a593Smuzhiyun 	unsigned long max_capacity, capacity;
431*4882a593Smuzhiyun 	int ret;
432*4882a593Smuzhiyun 
433*4882a593Smuzhiyun 	/* Request state should be less than max_level */
434*4882a593Smuzhiyun 	if (state > cpufreq_cdev->max_level)
435*4882a593Smuzhiyun 		return -EINVAL;
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun 	/* Check if the old cooling action is same as new cooling action */
438*4882a593Smuzhiyun 	if (cpufreq_cdev->cpufreq_state == state)
439*4882a593Smuzhiyun 		return 0;
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun 	frequency = get_state_freq(cpufreq_cdev, state);
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency);
444*4882a593Smuzhiyun 	if (ret >= 0) {
445*4882a593Smuzhiyun 		cpufreq_cdev->cpufreq_state = state;
446*4882a593Smuzhiyun 		cpus = cpufreq_cdev->policy->related_cpus;
447*4882a593Smuzhiyun 		max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus));
448*4882a593Smuzhiyun 		capacity = frequency * max_capacity;
449*4882a593Smuzhiyun 		capacity /= cpufreq_cdev->policy->cpuinfo.max_freq;
450*4882a593Smuzhiyun 		arch_set_thermal_pressure(cpus, max_capacity - capacity);
451*4882a593Smuzhiyun 		ret = 0;
452*4882a593Smuzhiyun 	}
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	return ret;
455*4882a593Smuzhiyun }
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun /* Bind cpufreq callbacks to thermal cooling device ops */
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun static struct thermal_cooling_device_ops cpufreq_cooling_ops = {
460*4882a593Smuzhiyun 	.get_max_state		= cpufreq_get_max_state,
461*4882a593Smuzhiyun 	.get_cur_state		= cpufreq_get_cur_state,
462*4882a593Smuzhiyun 	.set_cur_state		= cpufreq_set_cur_state,
463*4882a593Smuzhiyun };
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun /**
466*4882a593Smuzhiyun  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
467*4882a593Smuzhiyun  * @np: a valid struct device_node to the cooling device device tree node
468*4882a593Smuzhiyun  * @policy: cpufreq policy
469*4882a593Smuzhiyun  * Normally this should be same as cpufreq policy->related_cpus.
470*4882a593Smuzhiyun  * @em: Energy Model of the cpufreq policy
471*4882a593Smuzhiyun  *
472*4882a593Smuzhiyun  * This interface function registers the cpufreq cooling device with the name
473*4882a593Smuzhiyun  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
474*4882a593Smuzhiyun  * cooling devices. It also gives the opportunity to link the cooling device
475*4882a593Smuzhiyun  * with a device tree node, in order to bind it via the thermal DT code.
476*4882a593Smuzhiyun  *
477*4882a593Smuzhiyun  * Return: a valid struct thermal_cooling_device pointer on success,
478*4882a593Smuzhiyun  * on failure, it returns a corresponding ERR_PTR().
479*4882a593Smuzhiyun  */
480*4882a593Smuzhiyun static struct thermal_cooling_device *
__cpufreq_cooling_register(struct device_node * np,struct cpufreq_policy * policy,struct em_perf_domain * em)481*4882a593Smuzhiyun __cpufreq_cooling_register(struct device_node *np,
482*4882a593Smuzhiyun 			struct cpufreq_policy *policy,
483*4882a593Smuzhiyun 			struct em_perf_domain *em)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun 	struct thermal_cooling_device *cdev;
486*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev;
487*4882a593Smuzhiyun 	char dev_name[THERMAL_NAME_LENGTH];
488*4882a593Smuzhiyun 	unsigned int i, num_cpus;
489*4882a593Smuzhiyun 	struct device *dev;
490*4882a593Smuzhiyun 	int ret;
491*4882a593Smuzhiyun 	struct thermal_cooling_device_ops *cooling_ops;
492*4882a593Smuzhiyun 
493*4882a593Smuzhiyun 	dev = get_cpu_device(policy->cpu);
494*4882a593Smuzhiyun 	if (unlikely(!dev)) {
495*4882a593Smuzhiyun 		pr_warn("No cpu device for cpu %d\n", policy->cpu);
496*4882a593Smuzhiyun 		return ERR_PTR(-ENODEV);
497*4882a593Smuzhiyun 	}
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun 	if (IS_ERR_OR_NULL(policy)) {
501*4882a593Smuzhiyun 		pr_err("%s: cpufreq policy isn't valid: %p\n", __func__, policy);
502*4882a593Smuzhiyun 		return ERR_PTR(-EINVAL);
503*4882a593Smuzhiyun 	}
504*4882a593Smuzhiyun 
505*4882a593Smuzhiyun 	i = cpufreq_table_count_valid_entries(policy);
506*4882a593Smuzhiyun 	if (!i) {
507*4882a593Smuzhiyun 		pr_debug("%s: CPUFreq table not found or has no valid entries\n",
508*4882a593Smuzhiyun 			 __func__);
509*4882a593Smuzhiyun 		return ERR_PTR(-ENODEV);
510*4882a593Smuzhiyun 	}
511*4882a593Smuzhiyun 
512*4882a593Smuzhiyun 	cpufreq_cdev = kzalloc(sizeof(*cpufreq_cdev), GFP_KERNEL);
513*4882a593Smuzhiyun 	if (!cpufreq_cdev)
514*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	cpufreq_cdev->policy = policy;
517*4882a593Smuzhiyun 	num_cpus = cpumask_weight(policy->related_cpus);
518*4882a593Smuzhiyun 	cpufreq_cdev->idle_time = kcalloc(num_cpus,
519*4882a593Smuzhiyun 					 sizeof(*cpufreq_cdev->idle_time),
520*4882a593Smuzhiyun 					 GFP_KERNEL);
521*4882a593Smuzhiyun 	if (!cpufreq_cdev->idle_time) {
522*4882a593Smuzhiyun 		cdev = ERR_PTR(-ENOMEM);
523*4882a593Smuzhiyun 		goto free_cdev;
524*4882a593Smuzhiyun 	}
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 	/* max_level is an index, not a counter */
527*4882a593Smuzhiyun 	cpufreq_cdev->max_level = i - 1;
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
530*4882a593Smuzhiyun 	if (ret < 0) {
531*4882a593Smuzhiyun 		cdev = ERR_PTR(ret);
532*4882a593Smuzhiyun 		goto free_idle_time;
533*4882a593Smuzhiyun 	}
534*4882a593Smuzhiyun 	cpufreq_cdev->id = ret;
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
537*4882a593Smuzhiyun 		 cpufreq_cdev->id);
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 	cooling_ops = &cpufreq_cooling_ops;
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
542*4882a593Smuzhiyun 	if (em_is_sane(cpufreq_cdev, em)) {
543*4882a593Smuzhiyun 		cpufreq_cdev->em = em;
544*4882a593Smuzhiyun 		cooling_ops->get_requested_power = cpufreq_get_requested_power;
545*4882a593Smuzhiyun 		cooling_ops->state2power = cpufreq_state2power;
546*4882a593Smuzhiyun 		cooling_ops->power2state = cpufreq_power2state;
547*4882a593Smuzhiyun 	} else
548*4882a593Smuzhiyun #endif
549*4882a593Smuzhiyun 	if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED) {
550*4882a593Smuzhiyun 		pr_err("%s: unsorted frequency tables are not supported\n",
551*4882a593Smuzhiyun 		       __func__);
552*4882a593Smuzhiyun 		cdev = ERR_PTR(-EINVAL);
553*4882a593Smuzhiyun 		goto remove_ida;
554*4882a593Smuzhiyun 	}
555*4882a593Smuzhiyun 
556*4882a593Smuzhiyun 	ret = freq_qos_add_request(&policy->constraints,
557*4882a593Smuzhiyun 				   &cpufreq_cdev->qos_req, FREQ_QOS_MAX,
558*4882a593Smuzhiyun 				   get_state_freq(cpufreq_cdev, 0));
559*4882a593Smuzhiyun 	if (ret < 0) {
560*4882a593Smuzhiyun 		pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
561*4882a593Smuzhiyun 		       ret);
562*4882a593Smuzhiyun 		cdev = ERR_PTR(ret);
563*4882a593Smuzhiyun 		goto remove_ida;
564*4882a593Smuzhiyun 	}
565*4882a593Smuzhiyun 
566*4882a593Smuzhiyun 	cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
567*4882a593Smuzhiyun 						  cooling_ops);
568*4882a593Smuzhiyun 	if (IS_ERR(cdev))
569*4882a593Smuzhiyun 		goto remove_qos_req;
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	mutex_lock(&cooling_list_lock);
572*4882a593Smuzhiyun 	list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
573*4882a593Smuzhiyun 	mutex_unlock(&cooling_list_lock);
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	return cdev;
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun remove_qos_req:
578*4882a593Smuzhiyun 	freq_qos_remove_request(&cpufreq_cdev->qos_req);
579*4882a593Smuzhiyun remove_ida:
580*4882a593Smuzhiyun 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
581*4882a593Smuzhiyun free_idle_time:
582*4882a593Smuzhiyun 	kfree(cpufreq_cdev->idle_time);
583*4882a593Smuzhiyun free_cdev:
584*4882a593Smuzhiyun 	kfree(cpufreq_cdev);
585*4882a593Smuzhiyun 	return cdev;
586*4882a593Smuzhiyun }
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun /**
589*4882a593Smuzhiyun  * cpufreq_cooling_register - function to create cpufreq cooling device.
590*4882a593Smuzhiyun  * @policy: cpufreq policy
591*4882a593Smuzhiyun  *
592*4882a593Smuzhiyun  * This interface function registers the cpufreq cooling device with the name
593*4882a593Smuzhiyun  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
594*4882a593Smuzhiyun  * cooling devices.
595*4882a593Smuzhiyun  *
596*4882a593Smuzhiyun  * Return: a valid struct thermal_cooling_device pointer on success,
597*4882a593Smuzhiyun  * on failure, it returns a corresponding ERR_PTR().
598*4882a593Smuzhiyun  */
599*4882a593Smuzhiyun struct thermal_cooling_device *
cpufreq_cooling_register(struct cpufreq_policy * policy)600*4882a593Smuzhiyun cpufreq_cooling_register(struct cpufreq_policy *policy)
601*4882a593Smuzhiyun {
602*4882a593Smuzhiyun 	return __cpufreq_cooling_register(NULL, policy, NULL);
603*4882a593Smuzhiyun }
604*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun /**
607*4882a593Smuzhiyun  * of_cpufreq_cooling_register - function to create cpufreq cooling device.
608*4882a593Smuzhiyun  * @policy: cpufreq policy
609*4882a593Smuzhiyun  *
610*4882a593Smuzhiyun  * This interface function registers the cpufreq cooling device with the name
611*4882a593Smuzhiyun  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
612*4882a593Smuzhiyun  * cooling devices. Using this API, the cpufreq cooling device will be
613*4882a593Smuzhiyun  * linked to the device tree node provided.
614*4882a593Smuzhiyun  *
615*4882a593Smuzhiyun  * Using this function, the cooling device will implement the power
616*4882a593Smuzhiyun  * extensions by using a simple cpu power model.  The cpus must have
617*4882a593Smuzhiyun  * registered their OPPs using the OPP library.
618*4882a593Smuzhiyun  *
619*4882a593Smuzhiyun  * It also takes into account, if property present in policy CPU node, the
620*4882a593Smuzhiyun  * static power consumed by the cpu.
621*4882a593Smuzhiyun  *
622*4882a593Smuzhiyun  * Return: a valid struct thermal_cooling_device pointer on success,
623*4882a593Smuzhiyun  * and NULL on failure.
624*4882a593Smuzhiyun  */
625*4882a593Smuzhiyun struct thermal_cooling_device *
of_cpufreq_cooling_register(struct cpufreq_policy * policy)626*4882a593Smuzhiyun of_cpufreq_cooling_register(struct cpufreq_policy *policy)
627*4882a593Smuzhiyun {
628*4882a593Smuzhiyun 	struct device_node *np = of_get_cpu_node(policy->cpu, NULL);
629*4882a593Smuzhiyun 	struct thermal_cooling_device *cdev = NULL;
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 	if (!np) {
632*4882a593Smuzhiyun 		pr_err("cpufreq_cooling: OF node not available for cpu%d\n",
633*4882a593Smuzhiyun 		       policy->cpu);
634*4882a593Smuzhiyun 		return NULL;
635*4882a593Smuzhiyun 	}
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	if (of_find_property(np, "#cooling-cells", NULL)) {
638*4882a593Smuzhiyun 		struct em_perf_domain *em = em_cpu_get(policy->cpu);
639*4882a593Smuzhiyun 
640*4882a593Smuzhiyun 		cdev = __cpufreq_cooling_register(np, policy, em);
641*4882a593Smuzhiyun 		if (IS_ERR(cdev)) {
642*4882a593Smuzhiyun 			pr_err("cpufreq_cooling: cpu%d failed to register as cooling device: %ld\n",
643*4882a593Smuzhiyun 			       policy->cpu, PTR_ERR(cdev));
644*4882a593Smuzhiyun 			cdev = NULL;
645*4882a593Smuzhiyun 		}
646*4882a593Smuzhiyun 	}
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun 	of_node_put(np);
649*4882a593Smuzhiyun 	return cdev;
650*4882a593Smuzhiyun }
651*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register);
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun /**
654*4882a593Smuzhiyun  * cpufreq_cooling_unregister - function to remove cpufreq cooling device.
655*4882a593Smuzhiyun  * @cdev: thermal cooling device pointer.
656*4882a593Smuzhiyun  *
657*4882a593Smuzhiyun  * This interface function unregisters the "thermal-cpufreq-%x" cooling device.
658*4882a593Smuzhiyun  */
cpufreq_cooling_unregister(struct thermal_cooling_device * cdev)659*4882a593Smuzhiyun void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun 	struct cpufreq_cooling_device *cpufreq_cdev;
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 	if (!cdev)
664*4882a593Smuzhiyun 		return;
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	cpufreq_cdev = cdev->devdata;
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	mutex_lock(&cooling_list_lock);
669*4882a593Smuzhiyun 	list_del(&cpufreq_cdev->node);
670*4882a593Smuzhiyun 	mutex_unlock(&cooling_list_lock);
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 	thermal_cooling_device_unregister(cdev);
673*4882a593Smuzhiyun 	freq_qos_remove_request(&cpufreq_cdev->qos_req);
674*4882a593Smuzhiyun 	ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
675*4882a593Smuzhiyun 	kfree(cpufreq_cdev->idle_time);
676*4882a593Smuzhiyun 	kfree(cpufreq_cdev);
677*4882a593Smuzhiyun }
678*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);
679