xref: /OK3568_Linux_fs/kernel/drivers/thermal/intel/intel_powerclamp.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * intel_powerclamp.c - package c-state idle injection
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Copyright (c) 2012, Intel Corporation.
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Authors:
8*4882a593Smuzhiyun  *     Arjan van de Ven <arjan@linux.intel.com>
9*4882a593Smuzhiyun  *     Jacob Pan <jacob.jun.pan@linux.intel.com>
10*4882a593Smuzhiyun  *
11*4882a593Smuzhiyun  *	TODO:
12*4882a593Smuzhiyun  *           1. better handle wakeup from external interrupts, currently a fixed
13*4882a593Smuzhiyun  *              compensation is added to clamping duration when excessive amount
14*4882a593Smuzhiyun  *              of wakeups are observed during idle time. the reason is that in
15*4882a593Smuzhiyun  *              case of external interrupts without need for ack, clamping down
16*4882a593Smuzhiyun  *              cpu in non-irq context does not reduce irq. for majority of the
17*4882a593Smuzhiyun  *              cases, clamping down cpu does help reduce irq as well, we should
18*4882a593Smuzhiyun  *              be able to differentiate the two cases and give a quantitative
19*4882a593Smuzhiyun  *              solution for the irqs that we can control. perhaps based on
20*4882a593Smuzhiyun  *              get_cpu_iowait_time_us()
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  *	     2. synchronization with other hw blocks
23*4882a593Smuzhiyun  */
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun #include <linux/module.h>
28*4882a593Smuzhiyun #include <linux/kernel.h>
29*4882a593Smuzhiyun #include <linux/delay.h>
30*4882a593Smuzhiyun #include <linux/kthread.h>
31*4882a593Smuzhiyun #include <linux/cpu.h>
32*4882a593Smuzhiyun #include <linux/thermal.h>
33*4882a593Smuzhiyun #include <linux/slab.h>
34*4882a593Smuzhiyun #include <linux/tick.h>
35*4882a593Smuzhiyun #include <linux/debugfs.h>
36*4882a593Smuzhiyun #include <linux/seq_file.h>
37*4882a593Smuzhiyun #include <linux/sched/rt.h>
38*4882a593Smuzhiyun #include <uapi/linux/sched/types.h>
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun #include <asm/nmi.h>
41*4882a593Smuzhiyun #include <asm/msr.h>
42*4882a593Smuzhiyun #include <asm/mwait.h>
43*4882a593Smuzhiyun #include <asm/cpu_device_id.h>
44*4882a593Smuzhiyun #include <asm/hardirq.h>
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun #define MAX_TARGET_RATIO (50U)
47*4882a593Smuzhiyun /* For each undisturbed clamping period (no extra wake ups during idle time),
48*4882a593Smuzhiyun  * we increment the confidence counter for the given target ratio.
49*4882a593Smuzhiyun  * CONFIDENCE_OK defines the level where runtime calibration results are
50*4882a593Smuzhiyun  * valid.
51*4882a593Smuzhiyun  */
52*4882a593Smuzhiyun #define CONFIDENCE_OK (3)
53*4882a593Smuzhiyun /* Default idle injection duration, driver adjust sleep time to meet target
54*4882a593Smuzhiyun  * idle ratio. Similar to frequency modulation.
55*4882a593Smuzhiyun  */
56*4882a593Smuzhiyun #define DEFAULT_DURATION_JIFFIES (6)
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun static unsigned int target_mwait;
59*4882a593Smuzhiyun static struct dentry *debug_dir;
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun /* user selected target */
62*4882a593Smuzhiyun static unsigned int set_target_ratio;
63*4882a593Smuzhiyun static unsigned int current_ratio;
64*4882a593Smuzhiyun static bool should_skip;
65*4882a593Smuzhiyun static bool reduce_irq;
66*4882a593Smuzhiyun static atomic_t idle_wakeup_counter;
67*4882a593Smuzhiyun static unsigned int control_cpu; /* The cpu assigned to collect stat and update
68*4882a593Smuzhiyun 				  * control parameters. default to BSP but BSP
69*4882a593Smuzhiyun 				  * can be offlined.
70*4882a593Smuzhiyun 				  */
71*4882a593Smuzhiyun static bool clamping;
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun struct powerclamp_worker_data {
74*4882a593Smuzhiyun 	struct kthread_worker *worker;
75*4882a593Smuzhiyun 	struct kthread_work balancing_work;
76*4882a593Smuzhiyun 	struct kthread_delayed_work idle_injection_work;
77*4882a593Smuzhiyun 	unsigned int cpu;
78*4882a593Smuzhiyun 	unsigned int count;
79*4882a593Smuzhiyun 	unsigned int guard;
80*4882a593Smuzhiyun 	unsigned int window_size_now;
81*4882a593Smuzhiyun 	unsigned int target_ratio;
82*4882a593Smuzhiyun 	unsigned int duration_jiffies;
83*4882a593Smuzhiyun 	bool clamping;
84*4882a593Smuzhiyun };
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun static struct powerclamp_worker_data __percpu *worker_data;
87*4882a593Smuzhiyun static struct thermal_cooling_device *cooling_dev;
88*4882a593Smuzhiyun static unsigned long *cpu_clamping_mask;  /* bit map for tracking per cpu
89*4882a593Smuzhiyun 					   * clamping kthread worker
90*4882a593Smuzhiyun 					   */
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun static unsigned int duration;
93*4882a593Smuzhiyun static unsigned int pkg_cstate_ratio_cur;
94*4882a593Smuzhiyun static unsigned int window_size;
95*4882a593Smuzhiyun 
duration_set(const char * arg,const struct kernel_param * kp)96*4882a593Smuzhiyun static int duration_set(const char *arg, const struct kernel_param *kp)
97*4882a593Smuzhiyun {
98*4882a593Smuzhiyun 	int ret = 0;
99*4882a593Smuzhiyun 	unsigned long new_duration;
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	ret = kstrtoul(arg, 10, &new_duration);
102*4882a593Smuzhiyun 	if (ret)
103*4882a593Smuzhiyun 		goto exit;
104*4882a593Smuzhiyun 	if (new_duration > 25 || new_duration < 6) {
105*4882a593Smuzhiyun 		pr_err("Out of recommended range %lu, between 6-25ms\n",
106*4882a593Smuzhiyun 			new_duration);
107*4882a593Smuzhiyun 		ret = -EINVAL;
108*4882a593Smuzhiyun 	}
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	duration = clamp(new_duration, 6ul, 25ul);
111*4882a593Smuzhiyun 	smp_mb();
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun exit:
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	return ret;
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun static const struct kernel_param_ops duration_ops = {
119*4882a593Smuzhiyun 	.set = duration_set,
120*4882a593Smuzhiyun 	.get = param_get_int,
121*4882a593Smuzhiyun };
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun 
124*4882a593Smuzhiyun module_param_cb(duration, &duration_ops, &duration, 0644);
125*4882a593Smuzhiyun MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun struct powerclamp_calibration_data {
128*4882a593Smuzhiyun 	unsigned long confidence;  /* used for calibration, basically a counter
129*4882a593Smuzhiyun 				    * gets incremented each time a clamping
130*4882a593Smuzhiyun 				    * period is completed without extra wakeups
131*4882a593Smuzhiyun 				    * once that counter is reached given level,
132*4882a593Smuzhiyun 				    * compensation is deemed usable.
133*4882a593Smuzhiyun 				    */
134*4882a593Smuzhiyun 	unsigned long steady_comp; /* steady state compensation used when
135*4882a593Smuzhiyun 				    * no extra wakeups occurred.
136*4882a593Smuzhiyun 				    */
137*4882a593Smuzhiyun 	unsigned long dynamic_comp; /* compensate excessive wakeup from idle
138*4882a593Smuzhiyun 				     * mostly from external interrupts.
139*4882a593Smuzhiyun 				     */
140*4882a593Smuzhiyun };
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
143*4882a593Smuzhiyun 
window_size_set(const char * arg,const struct kernel_param * kp)144*4882a593Smuzhiyun static int window_size_set(const char *arg, const struct kernel_param *kp)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	int ret = 0;
147*4882a593Smuzhiyun 	unsigned long new_window_size;
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	ret = kstrtoul(arg, 10, &new_window_size);
150*4882a593Smuzhiyun 	if (ret)
151*4882a593Smuzhiyun 		goto exit_win;
152*4882a593Smuzhiyun 	if (new_window_size > 10 || new_window_size < 2) {
153*4882a593Smuzhiyun 		pr_err("Out of recommended window size %lu, between 2-10\n",
154*4882a593Smuzhiyun 			new_window_size);
155*4882a593Smuzhiyun 		ret = -EINVAL;
156*4882a593Smuzhiyun 	}
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 	window_size = clamp(new_window_size, 2ul, 10ul);
159*4882a593Smuzhiyun 	smp_mb();
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun exit_win:
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	return ret;
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun static const struct kernel_param_ops window_size_ops = {
167*4882a593Smuzhiyun 	.set = window_size_set,
168*4882a593Smuzhiyun 	.get = param_get_int,
169*4882a593Smuzhiyun };
170*4882a593Smuzhiyun 
171*4882a593Smuzhiyun module_param_cb(window_size, &window_size_ops, &window_size, 0644);
172*4882a593Smuzhiyun MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
173*4882a593Smuzhiyun 	"\tpowerclamp controls idle ratio within this window. larger\n"
174*4882a593Smuzhiyun 	"\twindow size results in slower response time but more smooth\n"
175*4882a593Smuzhiyun 	"\tclamping results. default to 2.");
176*4882a593Smuzhiyun 
find_target_mwait(void)177*4882a593Smuzhiyun static void find_target_mwait(void)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun 	unsigned int eax, ebx, ecx, edx;
180*4882a593Smuzhiyun 	unsigned int highest_cstate = 0;
181*4882a593Smuzhiyun 	unsigned int highest_subcstate = 0;
182*4882a593Smuzhiyun 	int i;
183*4882a593Smuzhiyun 
184*4882a593Smuzhiyun 	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
185*4882a593Smuzhiyun 		return;
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
188*4882a593Smuzhiyun 
189*4882a593Smuzhiyun 	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
190*4882a593Smuzhiyun 	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
191*4882a593Smuzhiyun 		return;
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	edx >>= MWAIT_SUBSTATE_SIZE;
194*4882a593Smuzhiyun 	for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
195*4882a593Smuzhiyun 		if (edx & MWAIT_SUBSTATE_MASK) {
196*4882a593Smuzhiyun 			highest_cstate = i;
197*4882a593Smuzhiyun 			highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
198*4882a593Smuzhiyun 		}
199*4882a593Smuzhiyun 	}
200*4882a593Smuzhiyun 	target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
201*4882a593Smuzhiyun 		(highest_subcstate - 1);
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun }
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun struct pkg_cstate_info {
206*4882a593Smuzhiyun 	bool skip;
207*4882a593Smuzhiyun 	int msr_index;
208*4882a593Smuzhiyun 	int cstate_id;
209*4882a593Smuzhiyun };
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun #define PKG_CSTATE_INIT(id) {				\
212*4882a593Smuzhiyun 		.msr_index = MSR_PKG_C##id##_RESIDENCY, \
213*4882a593Smuzhiyun 		.cstate_id = id				\
214*4882a593Smuzhiyun 			}
215*4882a593Smuzhiyun 
216*4882a593Smuzhiyun static struct pkg_cstate_info pkg_cstates[] = {
217*4882a593Smuzhiyun 	PKG_CSTATE_INIT(2),
218*4882a593Smuzhiyun 	PKG_CSTATE_INIT(3),
219*4882a593Smuzhiyun 	PKG_CSTATE_INIT(6),
220*4882a593Smuzhiyun 	PKG_CSTATE_INIT(7),
221*4882a593Smuzhiyun 	PKG_CSTATE_INIT(8),
222*4882a593Smuzhiyun 	PKG_CSTATE_INIT(9),
223*4882a593Smuzhiyun 	PKG_CSTATE_INIT(10),
224*4882a593Smuzhiyun 	{NULL},
225*4882a593Smuzhiyun };
226*4882a593Smuzhiyun 
has_pkg_state_counter(void)227*4882a593Smuzhiyun static bool has_pkg_state_counter(void)
228*4882a593Smuzhiyun {
229*4882a593Smuzhiyun 	u64 val;
230*4882a593Smuzhiyun 	struct pkg_cstate_info *info = pkg_cstates;
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	/* check if any one of the counter msrs exists */
233*4882a593Smuzhiyun 	while (info->msr_index) {
234*4882a593Smuzhiyun 		if (!rdmsrl_safe(info->msr_index, &val))
235*4882a593Smuzhiyun 			return true;
236*4882a593Smuzhiyun 		info++;
237*4882a593Smuzhiyun 	}
238*4882a593Smuzhiyun 
239*4882a593Smuzhiyun 	return false;
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun 
pkg_state_counter(void)242*4882a593Smuzhiyun static u64 pkg_state_counter(void)
243*4882a593Smuzhiyun {
244*4882a593Smuzhiyun 	u64 val;
245*4882a593Smuzhiyun 	u64 count = 0;
246*4882a593Smuzhiyun 	struct pkg_cstate_info *info = pkg_cstates;
247*4882a593Smuzhiyun 
248*4882a593Smuzhiyun 	while (info->msr_index) {
249*4882a593Smuzhiyun 		if (!info->skip) {
250*4882a593Smuzhiyun 			if (!rdmsrl_safe(info->msr_index, &val))
251*4882a593Smuzhiyun 				count += val;
252*4882a593Smuzhiyun 			else
253*4882a593Smuzhiyun 				info->skip = true;
254*4882a593Smuzhiyun 		}
255*4882a593Smuzhiyun 		info++;
256*4882a593Smuzhiyun 	}
257*4882a593Smuzhiyun 
258*4882a593Smuzhiyun 	return count;
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun 
get_compensation(int ratio)261*4882a593Smuzhiyun static unsigned int get_compensation(int ratio)
262*4882a593Smuzhiyun {
263*4882a593Smuzhiyun 	unsigned int comp = 0;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	/* we only use compensation if all adjacent ones are good */
266*4882a593Smuzhiyun 	if (ratio == 1 &&
267*4882a593Smuzhiyun 		cal_data[ratio].confidence >= CONFIDENCE_OK &&
268*4882a593Smuzhiyun 		cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
269*4882a593Smuzhiyun 		cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
270*4882a593Smuzhiyun 		comp = (cal_data[ratio].steady_comp +
271*4882a593Smuzhiyun 			cal_data[ratio + 1].steady_comp +
272*4882a593Smuzhiyun 			cal_data[ratio + 2].steady_comp) / 3;
273*4882a593Smuzhiyun 	} else if (ratio == MAX_TARGET_RATIO - 1 &&
274*4882a593Smuzhiyun 		cal_data[ratio].confidence >= CONFIDENCE_OK &&
275*4882a593Smuzhiyun 		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
276*4882a593Smuzhiyun 		cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
277*4882a593Smuzhiyun 		comp = (cal_data[ratio].steady_comp +
278*4882a593Smuzhiyun 			cal_data[ratio - 1].steady_comp +
279*4882a593Smuzhiyun 			cal_data[ratio - 2].steady_comp) / 3;
280*4882a593Smuzhiyun 	} else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
281*4882a593Smuzhiyun 		cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
282*4882a593Smuzhiyun 		cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
283*4882a593Smuzhiyun 		comp = (cal_data[ratio].steady_comp +
284*4882a593Smuzhiyun 			cal_data[ratio - 1].steady_comp +
285*4882a593Smuzhiyun 			cal_data[ratio + 1].steady_comp) / 3;
286*4882a593Smuzhiyun 	}
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	/* REVISIT: simple penalty of double idle injection */
289*4882a593Smuzhiyun 	if (reduce_irq)
290*4882a593Smuzhiyun 		comp = ratio;
291*4882a593Smuzhiyun 	/* do not exceed limit */
292*4882a593Smuzhiyun 	if (comp + ratio >= MAX_TARGET_RATIO)
293*4882a593Smuzhiyun 		comp = MAX_TARGET_RATIO - ratio - 1;
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	return comp;
296*4882a593Smuzhiyun }
297*4882a593Smuzhiyun 
adjust_compensation(int target_ratio,unsigned int win)298*4882a593Smuzhiyun static void adjust_compensation(int target_ratio, unsigned int win)
299*4882a593Smuzhiyun {
300*4882a593Smuzhiyun 	int delta;
301*4882a593Smuzhiyun 	struct powerclamp_calibration_data *d = &cal_data[target_ratio];
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	/*
304*4882a593Smuzhiyun 	 * adjust compensations if confidence level has not been reached or
305*4882a593Smuzhiyun 	 * there are too many wakeups during the last idle injection period, we
306*4882a593Smuzhiyun 	 * cannot trust the data for compensation.
307*4882a593Smuzhiyun 	 */
308*4882a593Smuzhiyun 	if (d->confidence >= CONFIDENCE_OK ||
309*4882a593Smuzhiyun 		atomic_read(&idle_wakeup_counter) >
310*4882a593Smuzhiyun 		win * num_online_cpus())
311*4882a593Smuzhiyun 		return;
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	delta = set_target_ratio - current_ratio;
314*4882a593Smuzhiyun 	/* filter out bad data */
315*4882a593Smuzhiyun 	if (delta >= 0 && delta <= (1+target_ratio/10)) {
316*4882a593Smuzhiyun 		if (d->steady_comp)
317*4882a593Smuzhiyun 			d->steady_comp =
318*4882a593Smuzhiyun 				roundup(delta+d->steady_comp, 2)/2;
319*4882a593Smuzhiyun 		else
320*4882a593Smuzhiyun 			d->steady_comp = delta;
321*4882a593Smuzhiyun 		d->confidence++;
322*4882a593Smuzhiyun 	}
323*4882a593Smuzhiyun }
324*4882a593Smuzhiyun 
powerclamp_adjust_controls(unsigned int target_ratio,unsigned int guard,unsigned int win)325*4882a593Smuzhiyun static bool powerclamp_adjust_controls(unsigned int target_ratio,
326*4882a593Smuzhiyun 				unsigned int guard, unsigned int win)
327*4882a593Smuzhiyun {
328*4882a593Smuzhiyun 	static u64 msr_last, tsc_last;
329*4882a593Smuzhiyun 	u64 msr_now, tsc_now;
330*4882a593Smuzhiyun 	u64 val64;
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun 	/* check result for the last window */
333*4882a593Smuzhiyun 	msr_now = pkg_state_counter();
334*4882a593Smuzhiyun 	tsc_now = rdtsc();
335*4882a593Smuzhiyun 
336*4882a593Smuzhiyun 	/* calculate pkg cstate vs tsc ratio */
337*4882a593Smuzhiyun 	if (!msr_last || !tsc_last)
338*4882a593Smuzhiyun 		current_ratio = 1;
339*4882a593Smuzhiyun 	else if (tsc_now-tsc_last) {
340*4882a593Smuzhiyun 		val64 = 100*(msr_now-msr_last);
341*4882a593Smuzhiyun 		do_div(val64, (tsc_now-tsc_last));
342*4882a593Smuzhiyun 		current_ratio = val64;
343*4882a593Smuzhiyun 	}
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun 	/* update record */
346*4882a593Smuzhiyun 	msr_last = msr_now;
347*4882a593Smuzhiyun 	tsc_last = tsc_now;
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	adjust_compensation(target_ratio, win);
350*4882a593Smuzhiyun 	/*
351*4882a593Smuzhiyun 	 * too many external interrupts, set flag such
352*4882a593Smuzhiyun 	 * that we can take measure later.
353*4882a593Smuzhiyun 	 */
354*4882a593Smuzhiyun 	reduce_irq = atomic_read(&idle_wakeup_counter) >=
355*4882a593Smuzhiyun 		2 * win * num_online_cpus();
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 	atomic_set(&idle_wakeup_counter, 0);
358*4882a593Smuzhiyun 	/* if we are above target+guard, skip */
359*4882a593Smuzhiyun 	return set_target_ratio + guard <= current_ratio;
360*4882a593Smuzhiyun }
361*4882a593Smuzhiyun 
clamp_balancing_func(struct kthread_work * work)362*4882a593Smuzhiyun static void clamp_balancing_func(struct kthread_work *work)
363*4882a593Smuzhiyun {
364*4882a593Smuzhiyun 	struct powerclamp_worker_data *w_data;
365*4882a593Smuzhiyun 	int sleeptime;
366*4882a593Smuzhiyun 	unsigned long target_jiffies;
367*4882a593Smuzhiyun 	unsigned int compensated_ratio;
368*4882a593Smuzhiyun 	int interval; /* jiffies to sleep for each attempt */
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 	w_data = container_of(work, struct powerclamp_worker_data,
371*4882a593Smuzhiyun 			      balancing_work);
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 	/*
374*4882a593Smuzhiyun 	 * make sure user selected ratio does not take effect until
375*4882a593Smuzhiyun 	 * the next round. adjust target_ratio if user has changed
376*4882a593Smuzhiyun 	 * target such that we can converge quickly.
377*4882a593Smuzhiyun 	 */
378*4882a593Smuzhiyun 	w_data->target_ratio = READ_ONCE(set_target_ratio);
379*4882a593Smuzhiyun 	w_data->guard = 1 + w_data->target_ratio / 20;
380*4882a593Smuzhiyun 	w_data->window_size_now = window_size;
381*4882a593Smuzhiyun 	w_data->duration_jiffies = msecs_to_jiffies(duration);
382*4882a593Smuzhiyun 	w_data->count++;
383*4882a593Smuzhiyun 
384*4882a593Smuzhiyun 	/*
385*4882a593Smuzhiyun 	 * systems may have different ability to enter package level
386*4882a593Smuzhiyun 	 * c-states, thus we need to compensate the injected idle ratio
387*4882a593Smuzhiyun 	 * to achieve the actual target reported by the HW.
388*4882a593Smuzhiyun 	 */
389*4882a593Smuzhiyun 	compensated_ratio = w_data->target_ratio +
390*4882a593Smuzhiyun 		get_compensation(w_data->target_ratio);
391*4882a593Smuzhiyun 	if (compensated_ratio <= 0)
392*4882a593Smuzhiyun 		compensated_ratio = 1;
393*4882a593Smuzhiyun 	interval = w_data->duration_jiffies * 100 / compensated_ratio;
394*4882a593Smuzhiyun 
395*4882a593Smuzhiyun 	/* align idle time */
396*4882a593Smuzhiyun 	target_jiffies = roundup(jiffies, interval);
397*4882a593Smuzhiyun 	sleeptime = target_jiffies - jiffies;
398*4882a593Smuzhiyun 	if (sleeptime <= 0)
399*4882a593Smuzhiyun 		sleeptime = 1;
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
402*4882a593Smuzhiyun 		kthread_queue_delayed_work(w_data->worker,
403*4882a593Smuzhiyun 					   &w_data->idle_injection_work,
404*4882a593Smuzhiyun 					   sleeptime);
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun 
clamp_idle_injection_func(struct kthread_work * work)407*4882a593Smuzhiyun static void clamp_idle_injection_func(struct kthread_work *work)
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun 	struct powerclamp_worker_data *w_data;
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 	w_data = container_of(work, struct powerclamp_worker_data,
412*4882a593Smuzhiyun 			      idle_injection_work.work);
413*4882a593Smuzhiyun 
414*4882a593Smuzhiyun 	/*
415*4882a593Smuzhiyun 	 * only elected controlling cpu can collect stats and update
416*4882a593Smuzhiyun 	 * control parameters.
417*4882a593Smuzhiyun 	 */
418*4882a593Smuzhiyun 	if (w_data->cpu == control_cpu &&
419*4882a593Smuzhiyun 	    !(w_data->count % w_data->window_size_now)) {
420*4882a593Smuzhiyun 		should_skip =
421*4882a593Smuzhiyun 			powerclamp_adjust_controls(w_data->target_ratio,
422*4882a593Smuzhiyun 						   w_data->guard,
423*4882a593Smuzhiyun 						   w_data->window_size_now);
424*4882a593Smuzhiyun 		smp_mb();
425*4882a593Smuzhiyun 	}
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	if (should_skip)
428*4882a593Smuzhiyun 		goto balance;
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	play_idle(jiffies_to_usecs(w_data->duration_jiffies));
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun balance:
433*4882a593Smuzhiyun 	if (clamping && w_data->clamping && cpu_online(w_data->cpu))
434*4882a593Smuzhiyun 		kthread_queue_work(w_data->worker, &w_data->balancing_work);
435*4882a593Smuzhiyun }
436*4882a593Smuzhiyun 
437*4882a593Smuzhiyun /*
438*4882a593Smuzhiyun  * 1 HZ polling while clamping is active, useful for userspace
439*4882a593Smuzhiyun  * to monitor actual idle ratio.
440*4882a593Smuzhiyun  */
441*4882a593Smuzhiyun static void poll_pkg_cstate(struct work_struct *dummy);
442*4882a593Smuzhiyun static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
poll_pkg_cstate(struct work_struct * dummy)443*4882a593Smuzhiyun static void poll_pkg_cstate(struct work_struct *dummy)
444*4882a593Smuzhiyun {
445*4882a593Smuzhiyun 	static u64 msr_last;
446*4882a593Smuzhiyun 	static u64 tsc_last;
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	u64 msr_now;
449*4882a593Smuzhiyun 	u64 tsc_now;
450*4882a593Smuzhiyun 	u64 val64;
451*4882a593Smuzhiyun 
452*4882a593Smuzhiyun 	msr_now = pkg_state_counter();
453*4882a593Smuzhiyun 	tsc_now = rdtsc();
454*4882a593Smuzhiyun 
455*4882a593Smuzhiyun 	/* calculate pkg cstate vs tsc ratio */
456*4882a593Smuzhiyun 	if (!msr_last || !tsc_last)
457*4882a593Smuzhiyun 		pkg_cstate_ratio_cur = 1;
458*4882a593Smuzhiyun 	else {
459*4882a593Smuzhiyun 		if (tsc_now - tsc_last) {
460*4882a593Smuzhiyun 			val64 = 100 * (msr_now - msr_last);
461*4882a593Smuzhiyun 			do_div(val64, (tsc_now - tsc_last));
462*4882a593Smuzhiyun 			pkg_cstate_ratio_cur = val64;
463*4882a593Smuzhiyun 		}
464*4882a593Smuzhiyun 	}
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 	/* update record */
467*4882a593Smuzhiyun 	msr_last = msr_now;
468*4882a593Smuzhiyun 	tsc_last = tsc_now;
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	if (true == clamping)
471*4882a593Smuzhiyun 		schedule_delayed_work(&poll_pkg_cstate_work, HZ);
472*4882a593Smuzhiyun }
473*4882a593Smuzhiyun 
start_power_clamp_worker(unsigned long cpu)474*4882a593Smuzhiyun static void start_power_clamp_worker(unsigned long cpu)
475*4882a593Smuzhiyun {
476*4882a593Smuzhiyun 	struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
477*4882a593Smuzhiyun 	struct kthread_worker *worker;
478*4882a593Smuzhiyun 
479*4882a593Smuzhiyun 	worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu);
480*4882a593Smuzhiyun 	if (IS_ERR(worker))
481*4882a593Smuzhiyun 		return;
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 	w_data->worker = worker;
484*4882a593Smuzhiyun 	w_data->count = 0;
485*4882a593Smuzhiyun 	w_data->cpu = cpu;
486*4882a593Smuzhiyun 	w_data->clamping = true;
487*4882a593Smuzhiyun 	set_bit(cpu, cpu_clamping_mask);
488*4882a593Smuzhiyun 	sched_set_fifo(worker->task);
489*4882a593Smuzhiyun 	kthread_init_work(&w_data->balancing_work, clamp_balancing_func);
490*4882a593Smuzhiyun 	kthread_init_delayed_work(&w_data->idle_injection_work,
491*4882a593Smuzhiyun 				  clamp_idle_injection_func);
492*4882a593Smuzhiyun 	kthread_queue_work(w_data->worker, &w_data->balancing_work);
493*4882a593Smuzhiyun }
494*4882a593Smuzhiyun 
stop_power_clamp_worker(unsigned long cpu)495*4882a593Smuzhiyun static void stop_power_clamp_worker(unsigned long cpu)
496*4882a593Smuzhiyun {
497*4882a593Smuzhiyun 	struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu);
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	if (!w_data->worker)
500*4882a593Smuzhiyun 		return;
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 	w_data->clamping = false;
503*4882a593Smuzhiyun 	/*
504*4882a593Smuzhiyun 	 * Make sure that all works that get queued after this point see
505*4882a593Smuzhiyun 	 * the clamping disabled. The counter part is not needed because
506*4882a593Smuzhiyun 	 * there is an implicit memory barrier when the queued work
507*4882a593Smuzhiyun 	 * is proceed.
508*4882a593Smuzhiyun 	 */
509*4882a593Smuzhiyun 	smp_wmb();
510*4882a593Smuzhiyun 	kthread_cancel_work_sync(&w_data->balancing_work);
511*4882a593Smuzhiyun 	kthread_cancel_delayed_work_sync(&w_data->idle_injection_work);
512*4882a593Smuzhiyun 	/*
513*4882a593Smuzhiyun 	 * The balancing work still might be queued here because
514*4882a593Smuzhiyun 	 * the handling of the "clapming" variable, cancel, and queue
515*4882a593Smuzhiyun 	 * operations are not synchronized via a lock. But it is not
516*4882a593Smuzhiyun 	 * a big deal. The balancing work is fast and destroy kthread
517*4882a593Smuzhiyun 	 * will wait for it.
518*4882a593Smuzhiyun 	 */
519*4882a593Smuzhiyun 	clear_bit(w_data->cpu, cpu_clamping_mask);
520*4882a593Smuzhiyun 	kthread_destroy_worker(w_data->worker);
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 	w_data->worker = NULL;
523*4882a593Smuzhiyun }
524*4882a593Smuzhiyun 
start_power_clamp(void)525*4882a593Smuzhiyun static int start_power_clamp(void)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun 	unsigned long cpu;
528*4882a593Smuzhiyun 
529*4882a593Smuzhiyun 	set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1);
530*4882a593Smuzhiyun 	/* prevent cpu hotplug */
531*4882a593Smuzhiyun 	get_online_cpus();
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	/* prefer BSP */
534*4882a593Smuzhiyun 	control_cpu = cpumask_first(cpu_online_mask);
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	clamping = true;
537*4882a593Smuzhiyun 	schedule_delayed_work(&poll_pkg_cstate_work, 0);
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 	/* start one kthread worker per online cpu */
540*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
541*4882a593Smuzhiyun 		start_power_clamp_worker(cpu);
542*4882a593Smuzhiyun 	}
543*4882a593Smuzhiyun 	put_online_cpus();
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun 	return 0;
546*4882a593Smuzhiyun }
547*4882a593Smuzhiyun 
end_power_clamp(void)548*4882a593Smuzhiyun static void end_power_clamp(void)
549*4882a593Smuzhiyun {
550*4882a593Smuzhiyun 	int i;
551*4882a593Smuzhiyun 
552*4882a593Smuzhiyun 	/*
553*4882a593Smuzhiyun 	 * Block requeuing in all the kthread workers. They will flush and
554*4882a593Smuzhiyun 	 * stop faster.
555*4882a593Smuzhiyun 	 */
556*4882a593Smuzhiyun 	clamping = false;
557*4882a593Smuzhiyun 	if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) {
558*4882a593Smuzhiyun 		for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) {
559*4882a593Smuzhiyun 			pr_debug("clamping worker for cpu %d alive, destroy\n",
560*4882a593Smuzhiyun 				 i);
561*4882a593Smuzhiyun 			stop_power_clamp_worker(i);
562*4882a593Smuzhiyun 		}
563*4882a593Smuzhiyun 	}
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
powerclamp_cpu_online(unsigned int cpu)566*4882a593Smuzhiyun static int powerclamp_cpu_online(unsigned int cpu)
567*4882a593Smuzhiyun {
568*4882a593Smuzhiyun 	if (clamping == false)
569*4882a593Smuzhiyun 		return 0;
570*4882a593Smuzhiyun 	start_power_clamp_worker(cpu);
571*4882a593Smuzhiyun 	/* prefer BSP as controlling CPU */
572*4882a593Smuzhiyun 	if (cpu == 0) {
573*4882a593Smuzhiyun 		control_cpu = 0;
574*4882a593Smuzhiyun 		smp_mb();
575*4882a593Smuzhiyun 	}
576*4882a593Smuzhiyun 	return 0;
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun 
powerclamp_cpu_predown(unsigned int cpu)579*4882a593Smuzhiyun static int powerclamp_cpu_predown(unsigned int cpu)
580*4882a593Smuzhiyun {
581*4882a593Smuzhiyun 	if (clamping == false)
582*4882a593Smuzhiyun 		return 0;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	stop_power_clamp_worker(cpu);
585*4882a593Smuzhiyun 	if (cpu != control_cpu)
586*4882a593Smuzhiyun 		return 0;
587*4882a593Smuzhiyun 
588*4882a593Smuzhiyun 	control_cpu = cpumask_first(cpu_online_mask);
589*4882a593Smuzhiyun 	if (control_cpu == cpu)
590*4882a593Smuzhiyun 		control_cpu = cpumask_next(cpu, cpu_online_mask);
591*4882a593Smuzhiyun 	smp_mb();
592*4882a593Smuzhiyun 	return 0;
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun 
powerclamp_get_max_state(struct thermal_cooling_device * cdev,unsigned long * state)595*4882a593Smuzhiyun static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
596*4882a593Smuzhiyun 				 unsigned long *state)
597*4882a593Smuzhiyun {
598*4882a593Smuzhiyun 	*state = MAX_TARGET_RATIO;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	return 0;
601*4882a593Smuzhiyun }
602*4882a593Smuzhiyun 
powerclamp_get_cur_state(struct thermal_cooling_device * cdev,unsigned long * state)603*4882a593Smuzhiyun static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
604*4882a593Smuzhiyun 				 unsigned long *state)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun 	if (true == clamping)
607*4882a593Smuzhiyun 		*state = pkg_cstate_ratio_cur;
608*4882a593Smuzhiyun 	else
609*4882a593Smuzhiyun 		/* to save power, do not poll idle ratio while not clamping */
610*4882a593Smuzhiyun 		*state = -1; /* indicates invalid state */
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	return 0;
613*4882a593Smuzhiyun }
614*4882a593Smuzhiyun 
powerclamp_set_cur_state(struct thermal_cooling_device * cdev,unsigned long new_target_ratio)615*4882a593Smuzhiyun static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
616*4882a593Smuzhiyun 				 unsigned long new_target_ratio)
617*4882a593Smuzhiyun {
618*4882a593Smuzhiyun 	int ret = 0;
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 	new_target_ratio = clamp(new_target_ratio, 0UL,
621*4882a593Smuzhiyun 				(unsigned long) (MAX_TARGET_RATIO-1));
622*4882a593Smuzhiyun 	if (set_target_ratio == 0 && new_target_ratio > 0) {
623*4882a593Smuzhiyun 		pr_info("Start idle injection to reduce power\n");
624*4882a593Smuzhiyun 		set_target_ratio = new_target_ratio;
625*4882a593Smuzhiyun 		ret = start_power_clamp();
626*4882a593Smuzhiyun 		goto exit_set;
627*4882a593Smuzhiyun 	} else	if (set_target_ratio > 0 && new_target_ratio == 0) {
628*4882a593Smuzhiyun 		pr_info("Stop forced idle injection\n");
629*4882a593Smuzhiyun 		end_power_clamp();
630*4882a593Smuzhiyun 		set_target_ratio = 0;
631*4882a593Smuzhiyun 	} else	/* adjust currently running */ {
632*4882a593Smuzhiyun 		set_target_ratio = new_target_ratio;
633*4882a593Smuzhiyun 		/* make new set_target_ratio visible to other cpus */
634*4882a593Smuzhiyun 		smp_mb();
635*4882a593Smuzhiyun 	}
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun exit_set:
638*4882a593Smuzhiyun 	return ret;
639*4882a593Smuzhiyun }
640*4882a593Smuzhiyun 
641*4882a593Smuzhiyun /* bind to generic thermal layer as cooling device*/
642*4882a593Smuzhiyun static struct thermal_cooling_device_ops powerclamp_cooling_ops = {
643*4882a593Smuzhiyun 	.get_max_state = powerclamp_get_max_state,
644*4882a593Smuzhiyun 	.get_cur_state = powerclamp_get_cur_state,
645*4882a593Smuzhiyun 	.set_cur_state = powerclamp_set_cur_state,
646*4882a593Smuzhiyun };
647*4882a593Smuzhiyun 
648*4882a593Smuzhiyun static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
649*4882a593Smuzhiyun 	X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
650*4882a593Smuzhiyun 	{}
651*4882a593Smuzhiyun };
652*4882a593Smuzhiyun MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
653*4882a593Smuzhiyun 
powerclamp_probe(void)654*4882a593Smuzhiyun static int __init powerclamp_probe(void)
655*4882a593Smuzhiyun {
656*4882a593Smuzhiyun 
657*4882a593Smuzhiyun 	if (!x86_match_cpu(intel_powerclamp_ids)) {
658*4882a593Smuzhiyun 		pr_err("CPU does not support MWAIT\n");
659*4882a593Smuzhiyun 		return -ENODEV;
660*4882a593Smuzhiyun 	}
661*4882a593Smuzhiyun 
662*4882a593Smuzhiyun 	/* The goal for idle time alignment is to achieve package cstate. */
663*4882a593Smuzhiyun 	if (!has_pkg_state_counter()) {
664*4882a593Smuzhiyun 		pr_info("No package C-state available\n");
665*4882a593Smuzhiyun 		return -ENODEV;
666*4882a593Smuzhiyun 	}
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	/* find the deepest mwait value */
669*4882a593Smuzhiyun 	find_target_mwait();
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun 	return 0;
672*4882a593Smuzhiyun }
673*4882a593Smuzhiyun 
powerclamp_debug_show(struct seq_file * m,void * unused)674*4882a593Smuzhiyun static int powerclamp_debug_show(struct seq_file *m, void *unused)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun 	int i = 0;
677*4882a593Smuzhiyun 
678*4882a593Smuzhiyun 	seq_printf(m, "controlling cpu: %d\n", control_cpu);
679*4882a593Smuzhiyun 	seq_printf(m, "pct confidence steady dynamic (compensation)\n");
680*4882a593Smuzhiyun 	for (i = 0; i < MAX_TARGET_RATIO; i++) {
681*4882a593Smuzhiyun 		seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
682*4882a593Smuzhiyun 			i,
683*4882a593Smuzhiyun 			cal_data[i].confidence,
684*4882a593Smuzhiyun 			cal_data[i].steady_comp,
685*4882a593Smuzhiyun 			cal_data[i].dynamic_comp);
686*4882a593Smuzhiyun 	}
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 	return 0;
689*4882a593Smuzhiyun }
690*4882a593Smuzhiyun 
691*4882a593Smuzhiyun DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
692*4882a593Smuzhiyun 
powerclamp_create_debug_files(void)693*4882a593Smuzhiyun static inline void powerclamp_create_debug_files(void)
694*4882a593Smuzhiyun {
695*4882a593Smuzhiyun 	debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
696*4882a593Smuzhiyun 
697*4882a593Smuzhiyun 	debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
698*4882a593Smuzhiyun 			    &powerclamp_debug_fops);
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun static enum cpuhp_state hp_state;
702*4882a593Smuzhiyun 
powerclamp_init(void)703*4882a593Smuzhiyun static int __init powerclamp_init(void)
704*4882a593Smuzhiyun {
705*4882a593Smuzhiyun 	int retval;
706*4882a593Smuzhiyun 	int bitmap_size;
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long);
709*4882a593Smuzhiyun 	cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL);
710*4882a593Smuzhiyun 	if (!cpu_clamping_mask)
711*4882a593Smuzhiyun 		return -ENOMEM;
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun 	/* probe cpu features and ids here */
714*4882a593Smuzhiyun 	retval = powerclamp_probe();
715*4882a593Smuzhiyun 	if (retval)
716*4882a593Smuzhiyun 		goto exit_free;
717*4882a593Smuzhiyun 
718*4882a593Smuzhiyun 	/* set default limit, maybe adjusted during runtime based on feedback */
719*4882a593Smuzhiyun 	window_size = 2;
720*4882a593Smuzhiyun 	retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
721*4882a593Smuzhiyun 					   "thermal/intel_powerclamp:online",
722*4882a593Smuzhiyun 					   powerclamp_cpu_online,
723*4882a593Smuzhiyun 					   powerclamp_cpu_predown);
724*4882a593Smuzhiyun 	if (retval < 0)
725*4882a593Smuzhiyun 		goto exit_free;
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	hp_state = retval;
728*4882a593Smuzhiyun 
729*4882a593Smuzhiyun 	worker_data = alloc_percpu(struct powerclamp_worker_data);
730*4882a593Smuzhiyun 	if (!worker_data) {
731*4882a593Smuzhiyun 		retval = -ENOMEM;
732*4882a593Smuzhiyun 		goto exit_unregister;
733*4882a593Smuzhiyun 	}
734*4882a593Smuzhiyun 
735*4882a593Smuzhiyun 	cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
736*4882a593Smuzhiyun 						&powerclamp_cooling_ops);
737*4882a593Smuzhiyun 	if (IS_ERR(cooling_dev)) {
738*4882a593Smuzhiyun 		retval = -ENODEV;
739*4882a593Smuzhiyun 		goto exit_free_thread;
740*4882a593Smuzhiyun 	}
741*4882a593Smuzhiyun 
742*4882a593Smuzhiyun 	if (!duration)
743*4882a593Smuzhiyun 		duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES);
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun 	powerclamp_create_debug_files();
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun 	return 0;
748*4882a593Smuzhiyun 
749*4882a593Smuzhiyun exit_free_thread:
750*4882a593Smuzhiyun 	free_percpu(worker_data);
751*4882a593Smuzhiyun exit_unregister:
752*4882a593Smuzhiyun 	cpuhp_remove_state_nocalls(hp_state);
753*4882a593Smuzhiyun exit_free:
754*4882a593Smuzhiyun 	kfree(cpu_clamping_mask);
755*4882a593Smuzhiyun 	return retval;
756*4882a593Smuzhiyun }
757*4882a593Smuzhiyun module_init(powerclamp_init);
758*4882a593Smuzhiyun 
powerclamp_exit(void)759*4882a593Smuzhiyun static void __exit powerclamp_exit(void)
760*4882a593Smuzhiyun {
761*4882a593Smuzhiyun 	end_power_clamp();
762*4882a593Smuzhiyun 	cpuhp_remove_state_nocalls(hp_state);
763*4882a593Smuzhiyun 	free_percpu(worker_data);
764*4882a593Smuzhiyun 	thermal_cooling_device_unregister(cooling_dev);
765*4882a593Smuzhiyun 	kfree(cpu_clamping_mask);
766*4882a593Smuzhiyun 
767*4882a593Smuzhiyun 	cancel_delayed_work_sync(&poll_pkg_cstate_work);
768*4882a593Smuzhiyun 	debugfs_remove_recursive(debug_dir);
769*4882a593Smuzhiyun }
770*4882a593Smuzhiyun module_exit(powerclamp_exit);
771*4882a593Smuzhiyun 
772*4882a593Smuzhiyun MODULE_LICENSE("GPL");
773*4882a593Smuzhiyun MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
774*4882a593Smuzhiyun MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
775*4882a593Smuzhiyun MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
776