xref: /OK3568_Linux_fs/kernel/drivers/cpufreq/cpufreq_governor.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * drivers/cpufreq/cpufreq_governor.c
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * CPUFREQ governors common code
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Copyright	(C) 2001 Russell King
8*4882a593Smuzhiyun  *		(C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>.
9*4882a593Smuzhiyun  *		(C) 2003 Jun Nakajima <jun.nakajima@intel.com>
10*4882a593Smuzhiyun  *		(C) 2009 Alexander Clouter <alex@digriz.org.uk>
11*4882a593Smuzhiyun  *		(c) 2012 Viresh Kumar <viresh.kumar@linaro.org>
12*4882a593Smuzhiyun  */
13*4882a593Smuzhiyun 
14*4882a593Smuzhiyun #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15*4882a593Smuzhiyun 
16*4882a593Smuzhiyun #include <linux/export.h>
17*4882a593Smuzhiyun #include <linux/kernel_stat.h>
18*4882a593Smuzhiyun #include <linux/slab.h>
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include "cpufreq_governor.h"
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun #define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL	(2 * TICK_NSEC / NSEC_PER_USEC)
23*4882a593Smuzhiyun 
24*4882a593Smuzhiyun static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
25*4882a593Smuzhiyun 
26*4882a593Smuzhiyun static DEFINE_MUTEX(gov_dbs_data_mutex);
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun /* Common sysfs tunables */
29*4882a593Smuzhiyun /*
30*4882a593Smuzhiyun  * store_sampling_rate - update sampling rate effective immediately if needed.
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  * If new rate is smaller than the old, simply updating
33*4882a593Smuzhiyun  * dbs.sampling_rate might not be appropriate. For example, if the
34*4882a593Smuzhiyun  * original sampling_rate was 1 second and the requested new sampling rate is 10
35*4882a593Smuzhiyun  * ms because the user needs immediate reaction from ondemand governor, but not
36*4882a593Smuzhiyun  * sure if higher frequency will be required or not, then, the governor may
37*4882a593Smuzhiyun  * change the sampling rate too late; up to 1 second later. Thus, if we are
38*4882a593Smuzhiyun  * reducing the sampling rate, we need to make the new value effective
39*4882a593Smuzhiyun  * immediately.
40*4882a593Smuzhiyun  *
41*4882a593Smuzhiyun  * This must be called with dbs_data->mutex held, otherwise traversing
42*4882a593Smuzhiyun  * policy_dbs_list isn't safe.
43*4882a593Smuzhiyun  */
store_sampling_rate(struct gov_attr_set * attr_set,const char * buf,size_t count)44*4882a593Smuzhiyun ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
45*4882a593Smuzhiyun 			    size_t count)
46*4882a593Smuzhiyun {
47*4882a593Smuzhiyun 	struct dbs_data *dbs_data = to_dbs_data(attr_set);
48*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
49*4882a593Smuzhiyun 	unsigned int sampling_interval;
50*4882a593Smuzhiyun 	int ret;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	ret = sscanf(buf, "%u", &sampling_interval);
53*4882a593Smuzhiyun 	if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
54*4882a593Smuzhiyun 		return -EINVAL;
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 	dbs_data->sampling_rate = sampling_interval;
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun 	/*
59*4882a593Smuzhiyun 	 * We are operating under dbs_data->mutex and so the list and its
60*4882a593Smuzhiyun 	 * entries can't be freed concurrently.
61*4882a593Smuzhiyun 	 */
62*4882a593Smuzhiyun 	list_for_each_entry(policy_dbs, &attr_set->policy_list, list) {
63*4882a593Smuzhiyun 		mutex_lock(&policy_dbs->update_mutex);
64*4882a593Smuzhiyun 		/*
65*4882a593Smuzhiyun 		 * On 32-bit architectures this may race with the
66*4882a593Smuzhiyun 		 * sample_delay_ns read in dbs_update_util_handler(), but that
67*4882a593Smuzhiyun 		 * really doesn't matter.  If the read returns a value that's
68*4882a593Smuzhiyun 		 * too big, the sample will be skipped, but the next invocation
69*4882a593Smuzhiyun 		 * of dbs_update_util_handler() (when the update has been
70*4882a593Smuzhiyun 		 * completed) will take a sample.
71*4882a593Smuzhiyun 		 *
72*4882a593Smuzhiyun 		 * If this runs in parallel with dbs_work_handler(), we may end
73*4882a593Smuzhiyun 		 * up overwriting the sample_delay_ns value that it has just
74*4882a593Smuzhiyun 		 * written, but it will be corrected next time a sample is
75*4882a593Smuzhiyun 		 * taken, so it shouldn't be significant.
76*4882a593Smuzhiyun 		 */
77*4882a593Smuzhiyun 		gov_update_sample_delay(policy_dbs, 0);
78*4882a593Smuzhiyun 		mutex_unlock(&policy_dbs->update_mutex);
79*4882a593Smuzhiyun 	}
80*4882a593Smuzhiyun 
81*4882a593Smuzhiyun 	return count;
82*4882a593Smuzhiyun }
83*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(store_sampling_rate);
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun /**
86*4882a593Smuzhiyun  * gov_update_cpu_data - Update CPU load data.
87*4882a593Smuzhiyun  * @dbs_data: Top-level governor data pointer.
88*4882a593Smuzhiyun  *
89*4882a593Smuzhiyun  * Update CPU load data for all CPUs in the domain governed by @dbs_data
90*4882a593Smuzhiyun  * (that may be a single policy or a bunch of them if governor tunables are
91*4882a593Smuzhiyun  * system-wide).
92*4882a593Smuzhiyun  *
93*4882a593Smuzhiyun  * Call under the @dbs_data mutex.
94*4882a593Smuzhiyun  */
gov_update_cpu_data(struct dbs_data * dbs_data)95*4882a593Smuzhiyun void gov_update_cpu_data(struct dbs_data *dbs_data)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	list_for_each_entry(policy_dbs, &dbs_data->attr_set.policy_list, list) {
100*4882a593Smuzhiyun 		unsigned int j;
101*4882a593Smuzhiyun 
102*4882a593Smuzhiyun 		for_each_cpu(j, policy_dbs->policy->cpus) {
103*4882a593Smuzhiyun 			struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun 			j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time,
106*4882a593Smuzhiyun 								  dbs_data->io_is_busy);
107*4882a593Smuzhiyun 			if (dbs_data->ignore_nice_load)
108*4882a593Smuzhiyun 				j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
109*4882a593Smuzhiyun 		}
110*4882a593Smuzhiyun 	}
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(gov_update_cpu_data);
113*4882a593Smuzhiyun 
dbs_update(struct cpufreq_policy * policy)114*4882a593Smuzhiyun unsigned int dbs_update(struct cpufreq_policy *policy)
115*4882a593Smuzhiyun {
116*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs = policy->governor_data;
117*4882a593Smuzhiyun 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
118*4882a593Smuzhiyun 	unsigned int ignore_nice = dbs_data->ignore_nice_load;
119*4882a593Smuzhiyun 	unsigned int max_load = 0, idle_periods = UINT_MAX;
120*4882a593Smuzhiyun 	unsigned int sampling_rate, io_busy, j;
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun 	/*
123*4882a593Smuzhiyun 	 * Sometimes governors may use an additional multiplier to increase
124*4882a593Smuzhiyun 	 * sample delays temporarily.  Apply that multiplier to sampling_rate
125*4882a593Smuzhiyun 	 * so as to keep the wake-up-from-idle detection logic a bit
126*4882a593Smuzhiyun 	 * conservative.
127*4882a593Smuzhiyun 	 */
128*4882a593Smuzhiyun 	sampling_rate = dbs_data->sampling_rate * policy_dbs->rate_mult;
129*4882a593Smuzhiyun 	/*
130*4882a593Smuzhiyun 	 * For the purpose of ondemand, waiting for disk IO is an indication
131*4882a593Smuzhiyun 	 * that you're performance critical, and not that the system is actually
132*4882a593Smuzhiyun 	 * idle, so do not add the iowait time to the CPU idle time then.
133*4882a593Smuzhiyun 	 */
134*4882a593Smuzhiyun 	io_busy = dbs_data->io_is_busy;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	/* Get Absolute Load */
137*4882a593Smuzhiyun 	for_each_cpu(j, policy->cpus) {
138*4882a593Smuzhiyun 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
139*4882a593Smuzhiyun 		u64 update_time, cur_idle_time;
140*4882a593Smuzhiyun 		unsigned int idle_time, time_elapsed;
141*4882a593Smuzhiyun 		unsigned int load;
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 		cur_idle_time = get_cpu_idle_time(j, &update_time, io_busy);
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 		time_elapsed = update_time - j_cdbs->prev_update_time;
146*4882a593Smuzhiyun 		j_cdbs->prev_update_time = update_time;
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 		idle_time = cur_idle_time - j_cdbs->prev_cpu_idle;
149*4882a593Smuzhiyun 		j_cdbs->prev_cpu_idle = cur_idle_time;
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 		if (ignore_nice) {
152*4882a593Smuzhiyun 			u64 cur_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
153*4882a593Smuzhiyun 
154*4882a593Smuzhiyun 			idle_time += div_u64(cur_nice - j_cdbs->prev_cpu_nice, NSEC_PER_USEC);
155*4882a593Smuzhiyun 			j_cdbs->prev_cpu_nice = cur_nice;
156*4882a593Smuzhiyun 		}
157*4882a593Smuzhiyun 
158*4882a593Smuzhiyun 		if (unlikely(!time_elapsed)) {
159*4882a593Smuzhiyun 			/*
160*4882a593Smuzhiyun 			 * That can only happen when this function is called
161*4882a593Smuzhiyun 			 * twice in a row with a very short interval between the
162*4882a593Smuzhiyun 			 * calls, so the previous load value can be used then.
163*4882a593Smuzhiyun 			 */
164*4882a593Smuzhiyun 			load = j_cdbs->prev_load;
165*4882a593Smuzhiyun 		} else if (unlikely((int)idle_time > 2 * sampling_rate &&
166*4882a593Smuzhiyun 				    j_cdbs->prev_load)) {
167*4882a593Smuzhiyun 			/*
168*4882a593Smuzhiyun 			 * If the CPU had gone completely idle and a task has
169*4882a593Smuzhiyun 			 * just woken up on this CPU now, it would be unfair to
170*4882a593Smuzhiyun 			 * calculate 'load' the usual way for this elapsed
171*4882a593Smuzhiyun 			 * time-window, because it would show near-zero load,
172*4882a593Smuzhiyun 			 * irrespective of how CPU intensive that task actually
173*4882a593Smuzhiyun 			 * was. This is undesirable for latency-sensitive bursty
174*4882a593Smuzhiyun 			 * workloads.
175*4882a593Smuzhiyun 			 *
176*4882a593Smuzhiyun 			 * To avoid this, reuse the 'load' from the previous
177*4882a593Smuzhiyun 			 * time-window and give this task a chance to start with
178*4882a593Smuzhiyun 			 * a reasonably high CPU frequency. However, that
179*4882a593Smuzhiyun 			 * shouldn't be over-done, lest we get stuck at a high
180*4882a593Smuzhiyun 			 * load (high frequency) for too long, even when the
181*4882a593Smuzhiyun 			 * current system load has actually dropped down, so
182*4882a593Smuzhiyun 			 * clear prev_load to guarantee that the load will be
183*4882a593Smuzhiyun 			 * computed again next time.
184*4882a593Smuzhiyun 			 *
185*4882a593Smuzhiyun 			 * Detecting this situation is easy: an unusually large
186*4882a593Smuzhiyun 			 * 'idle_time' (as compared to the sampling rate)
187*4882a593Smuzhiyun 			 * indicates this scenario.
188*4882a593Smuzhiyun 			 */
189*4882a593Smuzhiyun 			load = j_cdbs->prev_load;
190*4882a593Smuzhiyun 			j_cdbs->prev_load = 0;
191*4882a593Smuzhiyun 		} else {
192*4882a593Smuzhiyun 			if (time_elapsed >= idle_time) {
193*4882a593Smuzhiyun 				load = 100 * (time_elapsed - idle_time) / time_elapsed;
194*4882a593Smuzhiyun 			} else {
195*4882a593Smuzhiyun 				/*
196*4882a593Smuzhiyun 				 * That can happen if idle_time is returned by
197*4882a593Smuzhiyun 				 * get_cpu_idle_time_jiffy().  In that case
198*4882a593Smuzhiyun 				 * idle_time is roughly equal to the difference
199*4882a593Smuzhiyun 				 * between time_elapsed and "busy time" obtained
200*4882a593Smuzhiyun 				 * from CPU statistics.  Then, the "busy time"
201*4882a593Smuzhiyun 				 * can end up being greater than time_elapsed
202*4882a593Smuzhiyun 				 * (for example, if jiffies_64 and the CPU
203*4882a593Smuzhiyun 				 * statistics are updated by different CPUs),
204*4882a593Smuzhiyun 				 * so idle_time may in fact be negative.  That
205*4882a593Smuzhiyun 				 * means, though, that the CPU was busy all
206*4882a593Smuzhiyun 				 * the time (on the rough average) during the
207*4882a593Smuzhiyun 				 * last sampling interval and 100 can be
208*4882a593Smuzhiyun 				 * returned as the load.
209*4882a593Smuzhiyun 				 */
210*4882a593Smuzhiyun 				load = (int)idle_time < 0 ? 100 : 0;
211*4882a593Smuzhiyun 			}
212*4882a593Smuzhiyun 			j_cdbs->prev_load = load;
213*4882a593Smuzhiyun 		}
214*4882a593Smuzhiyun 
215*4882a593Smuzhiyun 		if (unlikely((int)idle_time > 2 * sampling_rate)) {
216*4882a593Smuzhiyun 			unsigned int periods = idle_time / sampling_rate;
217*4882a593Smuzhiyun 
218*4882a593Smuzhiyun 			if (periods < idle_periods)
219*4882a593Smuzhiyun 				idle_periods = periods;
220*4882a593Smuzhiyun 		}
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 		if (load > max_load)
223*4882a593Smuzhiyun 			max_load = load;
224*4882a593Smuzhiyun 	}
225*4882a593Smuzhiyun 
226*4882a593Smuzhiyun 	policy_dbs->idle_periods = idle_periods;
227*4882a593Smuzhiyun 
228*4882a593Smuzhiyun 	return max_load;
229*4882a593Smuzhiyun }
230*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(dbs_update);
231*4882a593Smuzhiyun 
dbs_work_handler(struct work_struct * work)232*4882a593Smuzhiyun static void dbs_work_handler(struct work_struct *work)
233*4882a593Smuzhiyun {
234*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
235*4882a593Smuzhiyun 	struct cpufreq_policy *policy;
236*4882a593Smuzhiyun 	struct dbs_governor *gov;
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	policy_dbs = container_of(work, struct policy_dbs_info, work);
239*4882a593Smuzhiyun 	policy = policy_dbs->policy;
240*4882a593Smuzhiyun 	gov = dbs_governor_of(policy);
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	/*
243*4882a593Smuzhiyun 	 * Make sure cpufreq_governor_limits() isn't evaluating load or the
244*4882a593Smuzhiyun 	 * ondemand governor isn't updating the sampling rate in parallel.
245*4882a593Smuzhiyun 	 */
246*4882a593Smuzhiyun 	mutex_lock(&policy_dbs->update_mutex);
247*4882a593Smuzhiyun 	gov_update_sample_delay(policy_dbs, gov->gov_dbs_update(policy));
248*4882a593Smuzhiyun 	mutex_unlock(&policy_dbs->update_mutex);
249*4882a593Smuzhiyun 
250*4882a593Smuzhiyun 	/* Allow the utilization update handler to queue up more work. */
251*4882a593Smuzhiyun 	atomic_set(&policy_dbs->work_count, 0);
252*4882a593Smuzhiyun 	/*
253*4882a593Smuzhiyun 	 * If the update below is reordered with respect to the sample delay
254*4882a593Smuzhiyun 	 * modification, the utilization update handler may end up using a stale
255*4882a593Smuzhiyun 	 * sample delay value.
256*4882a593Smuzhiyun 	 */
257*4882a593Smuzhiyun 	smp_wmb();
258*4882a593Smuzhiyun 	policy_dbs->work_in_progress = false;
259*4882a593Smuzhiyun }
260*4882a593Smuzhiyun 
dbs_irq_work(struct irq_work * irq_work)261*4882a593Smuzhiyun static void dbs_irq_work(struct irq_work *irq_work)
262*4882a593Smuzhiyun {
263*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	policy_dbs = container_of(irq_work, struct policy_dbs_info, irq_work);
266*4882a593Smuzhiyun 	schedule_work_on(smp_processor_id(), &policy_dbs->work);
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun 
dbs_update_util_handler(struct update_util_data * data,u64 time,unsigned int flags)269*4882a593Smuzhiyun static void dbs_update_util_handler(struct update_util_data *data, u64 time,
270*4882a593Smuzhiyun 				    unsigned int flags)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun 	struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
273*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
274*4882a593Smuzhiyun 	u64 delta_ns, lst;
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	if (!cpufreq_this_cpu_can_update(policy_dbs->policy))
277*4882a593Smuzhiyun 		return;
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun 	/*
280*4882a593Smuzhiyun 	 * The work may not be allowed to be queued up right now.
281*4882a593Smuzhiyun 	 * Possible reasons:
282*4882a593Smuzhiyun 	 * - Work has already been queued up or is in progress.
283*4882a593Smuzhiyun 	 * - It is too early (too little time from the previous sample).
284*4882a593Smuzhiyun 	 */
285*4882a593Smuzhiyun 	if (policy_dbs->work_in_progress)
286*4882a593Smuzhiyun 		return;
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 	/*
289*4882a593Smuzhiyun 	 * If the reads below are reordered before the check above, the value
290*4882a593Smuzhiyun 	 * of sample_delay_ns used in the computation may be stale.
291*4882a593Smuzhiyun 	 */
292*4882a593Smuzhiyun 	smp_rmb();
293*4882a593Smuzhiyun 	lst = READ_ONCE(policy_dbs->last_sample_time);
294*4882a593Smuzhiyun 	delta_ns = time - lst;
295*4882a593Smuzhiyun 	if ((s64)delta_ns < policy_dbs->sample_delay_ns)
296*4882a593Smuzhiyun 		return;
297*4882a593Smuzhiyun 
298*4882a593Smuzhiyun 	/*
299*4882a593Smuzhiyun 	 * If the policy is not shared, the irq_work may be queued up right away
300*4882a593Smuzhiyun 	 * at this point.  Otherwise, we need to ensure that only one of the
301*4882a593Smuzhiyun 	 * CPUs sharing the policy will do that.
302*4882a593Smuzhiyun 	 */
303*4882a593Smuzhiyun 	if (policy_dbs->is_shared) {
304*4882a593Smuzhiyun 		if (!atomic_add_unless(&policy_dbs->work_count, 1, 1))
305*4882a593Smuzhiyun 			return;
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun 		/*
308*4882a593Smuzhiyun 		 * If another CPU updated last_sample_time in the meantime, we
309*4882a593Smuzhiyun 		 * shouldn't be here, so clear the work counter and bail out.
310*4882a593Smuzhiyun 		 */
311*4882a593Smuzhiyun 		if (unlikely(lst != READ_ONCE(policy_dbs->last_sample_time))) {
312*4882a593Smuzhiyun 			atomic_set(&policy_dbs->work_count, 0);
313*4882a593Smuzhiyun 			return;
314*4882a593Smuzhiyun 		}
315*4882a593Smuzhiyun 	}
316*4882a593Smuzhiyun 
317*4882a593Smuzhiyun 	policy_dbs->last_sample_time = time;
318*4882a593Smuzhiyun 	policy_dbs->work_in_progress = true;
319*4882a593Smuzhiyun 	irq_work_queue(&policy_dbs->irq_work);
320*4882a593Smuzhiyun }
321*4882a593Smuzhiyun 
gov_set_update_util(struct policy_dbs_info * policy_dbs,unsigned int delay_us)322*4882a593Smuzhiyun static void gov_set_update_util(struct policy_dbs_info *policy_dbs,
323*4882a593Smuzhiyun 				unsigned int delay_us)
324*4882a593Smuzhiyun {
325*4882a593Smuzhiyun 	struct cpufreq_policy *policy = policy_dbs->policy;
326*4882a593Smuzhiyun 	int cpu;
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	gov_update_sample_delay(policy_dbs, delay_us);
329*4882a593Smuzhiyun 	policy_dbs->last_sample_time = 0;
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	for_each_cpu(cpu, policy->cpus) {
332*4882a593Smuzhiyun 		struct cpu_dbs_info *cdbs = &per_cpu(cpu_dbs, cpu);
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 		cpufreq_add_update_util_hook(cpu, &cdbs->update_util,
335*4882a593Smuzhiyun 					     dbs_update_util_handler);
336*4882a593Smuzhiyun 	}
337*4882a593Smuzhiyun }
338*4882a593Smuzhiyun 
gov_clear_update_util(struct cpufreq_policy * policy)339*4882a593Smuzhiyun static inline void gov_clear_update_util(struct cpufreq_policy *policy)
340*4882a593Smuzhiyun {
341*4882a593Smuzhiyun 	int i;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	for_each_cpu(i, policy->cpus)
344*4882a593Smuzhiyun 		cpufreq_remove_update_util_hook(i);
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	synchronize_rcu();
347*4882a593Smuzhiyun }
348*4882a593Smuzhiyun 
alloc_policy_dbs_info(struct cpufreq_policy * policy,struct dbs_governor * gov)349*4882a593Smuzhiyun static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy,
350*4882a593Smuzhiyun 						     struct dbs_governor *gov)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
353*4882a593Smuzhiyun 	int j;
354*4882a593Smuzhiyun 
355*4882a593Smuzhiyun 	/* Allocate memory for per-policy governor data. */
356*4882a593Smuzhiyun 	policy_dbs = gov->alloc();
357*4882a593Smuzhiyun 	if (!policy_dbs)
358*4882a593Smuzhiyun 		return NULL;
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 	policy_dbs->policy = policy;
361*4882a593Smuzhiyun 	mutex_init(&policy_dbs->update_mutex);
362*4882a593Smuzhiyun 	atomic_set(&policy_dbs->work_count, 0);
363*4882a593Smuzhiyun 	init_irq_work(&policy_dbs->irq_work, dbs_irq_work);
364*4882a593Smuzhiyun 	INIT_WORK(&policy_dbs->work, dbs_work_handler);
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	/* Set policy_dbs for all CPUs, online+offline */
367*4882a593Smuzhiyun 	for_each_cpu(j, policy->related_cpus) {
368*4882a593Smuzhiyun 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 		j_cdbs->policy_dbs = policy_dbs;
371*4882a593Smuzhiyun 	}
372*4882a593Smuzhiyun 	return policy_dbs;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun 
free_policy_dbs_info(struct policy_dbs_info * policy_dbs,struct dbs_governor * gov)375*4882a593Smuzhiyun static void free_policy_dbs_info(struct policy_dbs_info *policy_dbs,
376*4882a593Smuzhiyun 				 struct dbs_governor *gov)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun 	int j;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	mutex_destroy(&policy_dbs->update_mutex);
381*4882a593Smuzhiyun 
382*4882a593Smuzhiyun 	for_each_cpu(j, policy_dbs->policy->related_cpus) {
383*4882a593Smuzhiyun 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
384*4882a593Smuzhiyun 
385*4882a593Smuzhiyun 		j_cdbs->policy_dbs = NULL;
386*4882a593Smuzhiyun 		j_cdbs->update_util.func = NULL;
387*4882a593Smuzhiyun 	}
388*4882a593Smuzhiyun 	gov->free(policy_dbs);
389*4882a593Smuzhiyun }
390*4882a593Smuzhiyun 
cpufreq_dbs_governor_init(struct cpufreq_policy * policy)391*4882a593Smuzhiyun int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
392*4882a593Smuzhiyun {
393*4882a593Smuzhiyun 	struct dbs_governor *gov = dbs_governor_of(policy);
394*4882a593Smuzhiyun 	struct dbs_data *dbs_data;
395*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
396*4882a593Smuzhiyun 	int ret = 0;
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	/* State should be equivalent to EXIT */
399*4882a593Smuzhiyun 	if (policy->governor_data)
400*4882a593Smuzhiyun 		return -EBUSY;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	policy_dbs = alloc_policy_dbs_info(policy, gov);
403*4882a593Smuzhiyun 	if (!policy_dbs)
404*4882a593Smuzhiyun 		return -ENOMEM;
405*4882a593Smuzhiyun 
406*4882a593Smuzhiyun 	/* Protect gov->gdbs_data against concurrent updates. */
407*4882a593Smuzhiyun 	mutex_lock(&gov_dbs_data_mutex);
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun 	dbs_data = gov->gdbs_data;
410*4882a593Smuzhiyun 	if (dbs_data) {
411*4882a593Smuzhiyun 		if (WARN_ON(have_governor_per_policy())) {
412*4882a593Smuzhiyun 			ret = -EINVAL;
413*4882a593Smuzhiyun 			goto free_policy_dbs_info;
414*4882a593Smuzhiyun 		}
415*4882a593Smuzhiyun 		policy_dbs->dbs_data = dbs_data;
416*4882a593Smuzhiyun 		policy->governor_data = policy_dbs;
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 		gov_attr_set_get(&dbs_data->attr_set, &policy_dbs->list);
419*4882a593Smuzhiyun 		goto out;
420*4882a593Smuzhiyun 	}
421*4882a593Smuzhiyun 
422*4882a593Smuzhiyun 	dbs_data = kzalloc(sizeof(*dbs_data), GFP_KERNEL);
423*4882a593Smuzhiyun 	if (!dbs_data) {
424*4882a593Smuzhiyun 		ret = -ENOMEM;
425*4882a593Smuzhiyun 		goto free_policy_dbs_info;
426*4882a593Smuzhiyun 	}
427*4882a593Smuzhiyun 
428*4882a593Smuzhiyun 	gov_attr_set_init(&dbs_data->attr_set, &policy_dbs->list);
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	ret = gov->init(dbs_data);
431*4882a593Smuzhiyun 	if (ret)
432*4882a593Smuzhiyun 		goto free_policy_dbs_info;
433*4882a593Smuzhiyun 
434*4882a593Smuzhiyun 	/*
435*4882a593Smuzhiyun 	 * The sampling interval should not be less than the transition latency
436*4882a593Smuzhiyun 	 * of the CPU and it also cannot be too small for dbs_update() to work
437*4882a593Smuzhiyun 	 * correctly.
438*4882a593Smuzhiyun 	 */
439*4882a593Smuzhiyun 	dbs_data->sampling_rate = max_t(unsigned int,
440*4882a593Smuzhiyun 					CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
441*4882a593Smuzhiyun 					cpufreq_policy_transition_delay_us(policy));
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	if (!have_governor_per_policy())
444*4882a593Smuzhiyun 		gov->gdbs_data = dbs_data;
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	policy_dbs->dbs_data = dbs_data;
447*4882a593Smuzhiyun 	policy->governor_data = policy_dbs;
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 	gov->kobj_type.sysfs_ops = &governor_sysfs_ops;
450*4882a593Smuzhiyun 	ret = kobject_init_and_add(&dbs_data->attr_set.kobj, &gov->kobj_type,
451*4882a593Smuzhiyun 				   get_governor_parent_kobj(policy),
452*4882a593Smuzhiyun 				   "%s", gov->gov.name);
453*4882a593Smuzhiyun 	if (!ret)
454*4882a593Smuzhiyun 		goto out;
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	/* Failure, so roll back. */
457*4882a593Smuzhiyun 	pr_err("initialization failed (dbs_data kobject init error %d)\n", ret);
458*4882a593Smuzhiyun 
459*4882a593Smuzhiyun 	kobject_put(&dbs_data->attr_set.kobj);
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	policy->governor_data = NULL;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	if (!have_governor_per_policy())
464*4882a593Smuzhiyun 		gov->gdbs_data = NULL;
465*4882a593Smuzhiyun 	gov->exit(dbs_data);
466*4882a593Smuzhiyun 	kfree(dbs_data);
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun free_policy_dbs_info:
469*4882a593Smuzhiyun 	free_policy_dbs_info(policy_dbs, gov);
470*4882a593Smuzhiyun 
471*4882a593Smuzhiyun out:
472*4882a593Smuzhiyun 	mutex_unlock(&gov_dbs_data_mutex);
473*4882a593Smuzhiyun 	return ret;
474*4882a593Smuzhiyun }
475*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_init);
476*4882a593Smuzhiyun 
cpufreq_dbs_governor_exit(struct cpufreq_policy * policy)477*4882a593Smuzhiyun void cpufreq_dbs_governor_exit(struct cpufreq_policy *policy)
478*4882a593Smuzhiyun {
479*4882a593Smuzhiyun 	struct dbs_governor *gov = dbs_governor_of(policy);
480*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs = policy->governor_data;
481*4882a593Smuzhiyun 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
482*4882a593Smuzhiyun 	unsigned int count;
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	/* Protect gov->gdbs_data against concurrent updates. */
485*4882a593Smuzhiyun 	mutex_lock(&gov_dbs_data_mutex);
486*4882a593Smuzhiyun 
487*4882a593Smuzhiyun 	count = gov_attr_set_put(&dbs_data->attr_set, &policy_dbs->list);
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun 	policy->governor_data = NULL;
490*4882a593Smuzhiyun 
491*4882a593Smuzhiyun 	if (!count) {
492*4882a593Smuzhiyun 		if (!have_governor_per_policy())
493*4882a593Smuzhiyun 			gov->gdbs_data = NULL;
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun 		gov->exit(dbs_data);
496*4882a593Smuzhiyun 		kfree(dbs_data);
497*4882a593Smuzhiyun 	}
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 	free_policy_dbs_info(policy_dbs, gov);
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	mutex_unlock(&gov_dbs_data_mutex);
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_exit);
504*4882a593Smuzhiyun 
cpufreq_dbs_governor_start(struct cpufreq_policy * policy)505*4882a593Smuzhiyun int cpufreq_dbs_governor_start(struct cpufreq_policy *policy)
506*4882a593Smuzhiyun {
507*4882a593Smuzhiyun 	struct dbs_governor *gov = dbs_governor_of(policy);
508*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs = policy->governor_data;
509*4882a593Smuzhiyun 	struct dbs_data *dbs_data = policy_dbs->dbs_data;
510*4882a593Smuzhiyun 	unsigned int sampling_rate, ignore_nice, j;
511*4882a593Smuzhiyun 	unsigned int io_busy;
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	if (!policy->cur)
514*4882a593Smuzhiyun 		return -EINVAL;
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 	policy_dbs->is_shared = policy_is_shared(policy);
517*4882a593Smuzhiyun 	policy_dbs->rate_mult = 1;
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun 	sampling_rate = dbs_data->sampling_rate;
520*4882a593Smuzhiyun 	ignore_nice = dbs_data->ignore_nice_load;
521*4882a593Smuzhiyun 	io_busy = dbs_data->io_is_busy;
522*4882a593Smuzhiyun 
523*4882a593Smuzhiyun 	for_each_cpu(j, policy->cpus) {
524*4882a593Smuzhiyun 		struct cpu_dbs_info *j_cdbs = &per_cpu(cpu_dbs, j);
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun 		j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_update_time, io_busy);
527*4882a593Smuzhiyun 		/*
528*4882a593Smuzhiyun 		 * Make the first invocation of dbs_update() compute the load.
529*4882a593Smuzhiyun 		 */
530*4882a593Smuzhiyun 		j_cdbs->prev_load = 0;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 		if (ignore_nice)
533*4882a593Smuzhiyun 			j_cdbs->prev_cpu_nice = kcpustat_field(&kcpustat_cpu(j), CPUTIME_NICE, j);
534*4882a593Smuzhiyun 	}
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	gov->start(policy);
537*4882a593Smuzhiyun 
538*4882a593Smuzhiyun 	gov_set_update_util(policy_dbs, sampling_rate);
539*4882a593Smuzhiyun 	return 0;
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_start);
542*4882a593Smuzhiyun 
cpufreq_dbs_governor_stop(struct cpufreq_policy * policy)543*4882a593Smuzhiyun void cpufreq_dbs_governor_stop(struct cpufreq_policy *policy)
544*4882a593Smuzhiyun {
545*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs = policy->governor_data;
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 	gov_clear_update_util(policy_dbs->policy);
548*4882a593Smuzhiyun 	irq_work_sync(&policy_dbs->irq_work);
549*4882a593Smuzhiyun 	cancel_work_sync(&policy_dbs->work);
550*4882a593Smuzhiyun 	atomic_set(&policy_dbs->work_count, 0);
551*4882a593Smuzhiyun 	policy_dbs->work_in_progress = false;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_stop);
554*4882a593Smuzhiyun 
cpufreq_dbs_governor_limits(struct cpufreq_policy * policy)555*4882a593Smuzhiyun void cpufreq_dbs_governor_limits(struct cpufreq_policy *policy)
556*4882a593Smuzhiyun {
557*4882a593Smuzhiyun 	struct policy_dbs_info *policy_dbs;
558*4882a593Smuzhiyun 
559*4882a593Smuzhiyun 	/* Protect gov->gdbs_data against cpufreq_dbs_governor_exit() */
560*4882a593Smuzhiyun 	mutex_lock(&gov_dbs_data_mutex);
561*4882a593Smuzhiyun 	policy_dbs = policy->governor_data;
562*4882a593Smuzhiyun 	if (!policy_dbs)
563*4882a593Smuzhiyun 		goto out;
564*4882a593Smuzhiyun 
565*4882a593Smuzhiyun 	mutex_lock(&policy_dbs->update_mutex);
566*4882a593Smuzhiyun 	cpufreq_policy_apply_limits(policy);
567*4882a593Smuzhiyun 	gov_update_sample_delay(policy_dbs, 0);
568*4882a593Smuzhiyun 	mutex_unlock(&policy_dbs->update_mutex);
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun out:
571*4882a593Smuzhiyun 	mutex_unlock(&gov_dbs_data_mutex);
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun EXPORT_SYMBOL_GPL(cpufreq_dbs_governor_limits);
574