xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/ipa/mali_kbase_ipa.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2016-2022 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <linux/thermal.h>
23 #include <linux/devfreq_cooling.h>
24 #include <linux/of.h>
25 #include "mali_kbase.h"
26 #include "mali_kbase_ipa.h"
27 #include "mali_kbase_ipa_debugfs.h"
28 #include "mali_kbase_ipa_simple.h"
29 #include "backend/gpu/mali_kbase_pm_internal.h"
30 #include "backend/gpu/mali_kbase_devfreq.h"
31 #include <linux/pm_opp.h>
32 
33 #define KBASE_IPA_FALLBACK_MODEL_NAME "mali-simple-power-model"
34 
35 /* Polling by thermal governor starts when the temperature exceeds the certain
36  * trip point. In order to have meaningful value for the counters, when the
37  * polling starts and first call to kbase_get_real_power() is made, it is
38  * required to reset the counter values every now and then.
39  * It is reasonable to do the reset every second if no polling is being done,
40  * the counter model implementation also assumes max sampling interval of 1 sec.
41  */
42 #define RESET_INTERVAL_MS ((s64)1000)
43 
kbase_ipa_model_recalculate(struct kbase_ipa_model * model)44 int kbase_ipa_model_recalculate(struct kbase_ipa_model *model)
45 {
46 	int err = 0;
47 
48 	lockdep_assert_held(&model->kbdev->ipa.lock);
49 
50 	if (model->ops->recalculate) {
51 		err = model->ops->recalculate(model);
52 		if (err) {
53 			dev_err(model->kbdev->dev,
54 				"recalculation of power model %s returned error %d\n",
55 				model->ops->name, err);
56 		}
57 	}
58 
59 	return err;
60 }
61 
kbase_ipa_model_ops_find(struct kbase_device * kbdev,const char * name)62 const struct kbase_ipa_model_ops *kbase_ipa_model_ops_find(struct kbase_device *kbdev,
63 							   const char *name)
64 {
65 	if (!strcmp(name, kbase_simple_ipa_model_ops.name))
66 		return &kbase_simple_ipa_model_ops;
67 
68 	return kbase_ipa_counter_model_ops_find(kbdev, name);
69 }
70 KBASE_EXPORT_TEST_API(kbase_ipa_model_ops_find);
71 
kbase_ipa_model_name_from_id(u32 gpu_id)72 const char *kbase_ipa_model_name_from_id(u32 gpu_id)
73 {
74 	const char *model_name =
75 		kbase_ipa_counter_model_name_from_id(gpu_id);
76 
77 	if (!model_name)
78 		return KBASE_IPA_FALLBACK_MODEL_NAME;
79 	else
80 		return model_name;
81 }
82 KBASE_EXPORT_TEST_API(kbase_ipa_model_name_from_id);
83 
get_model_dt_node(struct kbase_ipa_model * model,bool dt_required)84 static struct device_node *get_model_dt_node(struct kbase_ipa_model *model,
85 					     bool dt_required)
86 {
87 	struct device_node *model_dt_node = NULL;
88 	char compat_string[64];
89 
90 	if (unlikely(!scnprintf(compat_string, sizeof(compat_string), "arm,%s", model->ops->name)))
91 		return NULL;
92 
93 	/* of_find_compatible_node() will call of_node_put() on the root node,
94 	 * so take a reference on it first.
95 	 */
96 	of_node_get(model->kbdev->dev->of_node);
97 	model_dt_node = of_find_compatible_node(model->kbdev->dev->of_node,
98 						NULL, compat_string);
99 	if (!model_dt_node && !model->missing_dt_node_warning) {
100 		if (dt_required)
101 			dev_warn(model->kbdev->dev,
102 			"Couldn't find power_model DT node matching \'%s\'\n",
103 			compat_string);
104 		model->missing_dt_node_warning = true;
105 	}
106 
107 	return model_dt_node;
108 }
109 
kbase_ipa_model_add_param_s32(struct kbase_ipa_model * model,const char * name,s32 * addr,size_t num_elems,bool dt_required)110 int kbase_ipa_model_add_param_s32(struct kbase_ipa_model *model,
111 				  const char *name, s32 *addr,
112 				  size_t num_elems, bool dt_required)
113 {
114 	int err = -EINVAL, i;
115 	struct device_node *model_dt_node = get_model_dt_node(model,
116 								dt_required);
117 	char *origin;
118 
119 	err = of_property_read_u32_array(model_dt_node, name, (u32 *)addr, num_elems);
120 	/* We're done with model_dt_node now, so drop the reference taken in
121 	 * get_model_dt_node()/of_find_compatible_node().
122 	 */
123 	of_node_put(model_dt_node);
124 
125 	if (err && dt_required) {
126 		memset(addr, 0, sizeof(s32) * num_elems);
127 		dev_warn(model->kbdev->dev,
128 			 "Error %d, no DT entry: %s.%s = %zu*[0]\n",
129 			 err, model->ops->name, name, num_elems);
130 		origin = "zero";
131 	} else if (err && !dt_required) {
132 		origin = "default";
133 	} else /* !err */ {
134 		origin = "DT";
135 	}
136 
137 	/* Create a unique debugfs entry for each element */
138 	for (i = 0; i < num_elems; ++i) {
139 		char elem_name[32];
140 
141 		if (num_elems == 1) {
142 			if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s", name))) {
143 				err = -ENOMEM;
144 				goto exit;
145 			}
146 		} else {
147 			if (unlikely(!scnprintf(elem_name, sizeof(elem_name), "%s.%d", name, i))) {
148 				err = -ENOMEM;
149 				goto exit;
150 			}
151 		}
152 
153 		dev_dbg(model->kbdev->dev, "%s.%s = %d (%s)\n",
154 			model->ops->name, elem_name, addr[i], origin);
155 
156 		err = kbase_ipa_model_param_add(model, elem_name,
157 						&addr[i], sizeof(s32),
158 						PARAM_TYPE_S32);
159 		if (err)
160 			goto exit;
161 	}
162 exit:
163 	return err;
164 }
165 
kbase_ipa_model_add_param_string(struct kbase_ipa_model * model,const char * name,char * addr,size_t size,bool dt_required)166 int kbase_ipa_model_add_param_string(struct kbase_ipa_model *model,
167 				     const char *name, char *addr,
168 				     size_t size, bool dt_required)
169 {
170 	int err;
171 	struct device_node *model_dt_node = get_model_dt_node(model,
172 								dt_required);
173 	const char *string_prop_value = "";
174 	char *origin;
175 
176 	err = of_property_read_string(model_dt_node, name,
177 				      &string_prop_value);
178 
179 	/* We're done with model_dt_node now, so drop the reference taken in
180 	 * get_model_dt_node()/of_find_compatible_node().
181 	 */
182 	of_node_put(model_dt_node);
183 
184 	if (err && dt_required) {
185 		strncpy(addr, "", size - 1);
186 		dev_warn(model->kbdev->dev,
187 			 "Error %d, no DT entry: %s.%s = \'%s\'\n",
188 			 err, model->ops->name, name, addr);
189 		err = 0;
190 		origin = "zero";
191 	} else if (err && !dt_required) {
192 		origin = "default";
193 	} else /* !err */ {
194 		strncpy(addr, string_prop_value, size - 1);
195 		origin = "DT";
196 	}
197 
198 	addr[size - 1] = '\0';
199 
200 	dev_dbg(model->kbdev->dev, "%s.%s = \'%s\' (%s)\n",
201 		model->ops->name, name, string_prop_value, origin);
202 
203 	err = kbase_ipa_model_param_add(model, name, addr, size,
204 					PARAM_TYPE_STRING);
205 	return err;
206 }
207 
kbase_ipa_term_model(struct kbase_ipa_model * model)208 void kbase_ipa_term_model(struct kbase_ipa_model *model)
209 {
210 	if (!model)
211 		return;
212 
213 	lockdep_assert_held(&model->kbdev->ipa.lock);
214 
215 	if (model->ops->term)
216 		model->ops->term(model);
217 
218 	kbase_ipa_model_param_free_all(model);
219 
220 	kfree(model);
221 }
222 KBASE_EXPORT_TEST_API(kbase_ipa_term_model);
223 
kbase_ipa_init_model(struct kbase_device * kbdev,const struct kbase_ipa_model_ops * ops)224 struct kbase_ipa_model *kbase_ipa_init_model(struct kbase_device *kbdev,
225 					     const struct kbase_ipa_model_ops *ops)
226 {
227 	struct kbase_ipa_model *model;
228 	int err;
229 
230 	lockdep_assert_held(&kbdev->ipa.lock);
231 
232 	if (!ops || !ops->name)
233 		return NULL;
234 
235 	model = kzalloc(sizeof(struct kbase_ipa_model), GFP_KERNEL);
236 	if (!model)
237 		return NULL;
238 
239 	model->kbdev = kbdev;
240 	model->ops = ops;
241 	INIT_LIST_HEAD(&model->params);
242 
243 	err = model->ops->init(model);
244 	if (err) {
245 		dev_err(kbdev->dev,
246 			"init of power model \'%s\' returned error %d\n",
247 			ops->name, err);
248 		kfree(model);
249 		return NULL;
250 	}
251 
252 	err = kbase_ipa_model_recalculate(model);
253 	if (err) {
254 		kbase_ipa_term_model(model);
255 		return NULL;
256 	}
257 
258 	return model;
259 }
260 KBASE_EXPORT_TEST_API(kbase_ipa_init_model);
261 
kbase_ipa_term_locked(struct kbase_device * kbdev)262 static void kbase_ipa_term_locked(struct kbase_device *kbdev)
263 {
264 	lockdep_assert_held(&kbdev->ipa.lock);
265 
266 	/* Clean up the models */
267 	if (kbdev->ipa.configured_model != kbdev->ipa.fallback_model)
268 		kbase_ipa_term_model(kbdev->ipa.configured_model);
269 	kbase_ipa_term_model(kbdev->ipa.fallback_model);
270 
271 	kbdev->ipa.configured_model = NULL;
272 	kbdev->ipa.fallback_model = NULL;
273 }
274 
kbase_ipa_init(struct kbase_device * kbdev)275 int kbase_ipa_init(struct kbase_device *kbdev)
276 {
277 
278 	const char *model_name;
279 	const struct kbase_ipa_model_ops *ops;
280 	struct kbase_ipa_model *default_model = NULL;
281 	int err;
282 
283 	mutex_init(&kbdev->ipa.lock);
284 	/*
285 	 * Lock during init to avoid warnings from lockdep_assert_held (there
286 	 * shouldn't be any concurrent access yet).
287 	 */
288 	mutex_lock(&kbdev->ipa.lock);
289 
290 	/* The simple IPA model must *always* be present.*/
291 	ops = kbase_ipa_model_ops_find(kbdev, KBASE_IPA_FALLBACK_MODEL_NAME);
292 
293 	default_model = kbase_ipa_init_model(kbdev, ops);
294 	if (!default_model) {
295 		err = -EINVAL;
296 		goto end;
297 	}
298 
299 	kbdev->ipa.fallback_model = default_model;
300 	err = of_property_read_string(kbdev->dev->of_node,
301 				      "ipa-model",
302 				      &model_name);
303 	if (err) {
304 		/* Attempt to load a match from GPU-ID */
305 		u32 gpu_id;
306 
307 		gpu_id = kbdev->gpu_props.props.raw_props.gpu_id;
308 		model_name = kbase_ipa_model_name_from_id(gpu_id);
309 		dev_dbg(kbdev->dev,
310 			"Inferring model from GPU ID 0x%x: \'%s\'\n",
311 			gpu_id, model_name);
312 		err = 0;
313 	} else {
314 		dev_dbg(kbdev->dev,
315 			"Using ipa-model parameter from DT: \'%s\'\n",
316 			model_name);
317 	}
318 
319 	if (strcmp(KBASE_IPA_FALLBACK_MODEL_NAME, model_name) != 0) {
320 		ops = kbase_ipa_model_ops_find(kbdev, model_name);
321 		kbdev->ipa.configured_model = kbase_ipa_init_model(kbdev, ops);
322 		if (!kbdev->ipa.configured_model) {
323 			dev_warn(kbdev->dev,
324 				"Failed to initialize ipa-model: \'%s\'\n"
325 				"Falling back on default model\n",
326 				model_name);
327 			kbdev->ipa.configured_model = default_model;
328 		}
329 	} else {
330 		kbdev->ipa.configured_model = default_model;
331 	}
332 
333 	kbdev->ipa.last_sample_time = ktime_get_raw();
334 
335 end:
336 	if (err)
337 		kbase_ipa_term_locked(kbdev);
338 	else
339 		dev_info(kbdev->dev,
340 			 "Using configured power model %s, and fallback %s\n",
341 			 kbdev->ipa.configured_model->ops->name,
342 			 kbdev->ipa.fallback_model->ops->name);
343 
344 	mutex_unlock(&kbdev->ipa.lock);
345 	return err;
346 }
347 KBASE_EXPORT_TEST_API(kbase_ipa_init);
348 
kbase_ipa_term(struct kbase_device * kbdev)349 void kbase_ipa_term(struct kbase_device *kbdev)
350 {
351 	mutex_lock(&kbdev->ipa.lock);
352 	kbase_ipa_term_locked(kbdev);
353 	mutex_unlock(&kbdev->ipa.lock);
354 
355 	mutex_destroy(&kbdev->ipa.lock);
356 }
357 KBASE_EXPORT_TEST_API(kbase_ipa_term);
358 
359 /**
360  * kbase_scale_dynamic_power() - Scale a dynamic power coefficient to an OPP
361  * @c:		Dynamic model coefficient, in pW/(Hz V^2). Should be in range
362  *		0 < c < 2^26 to prevent overflow.
363  * @freq:	Frequency, in Hz. Range: 2^23 < freq < 2^30 (~8MHz to ~1GHz)
364  * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
365  *
366  * Keep a record of the approximate range of each value at every stage of the
367  * calculation, to ensure we don't overflow. This makes heavy use of the
368  * approximations 1000 = 2^10 and 1000000 = 2^20, but does the actual
369  * calculations in decimal for increased accuracy.
370  *
371  * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
372  */
kbase_scale_dynamic_power(const u32 c,const u32 freq,const u32 voltage)373 static u32 kbase_scale_dynamic_power(const u32 c, const u32 freq,
374 				     const u32 voltage)
375 {
376 	/* Range: 2^8 < v2 < 2^16 m(V^2) */
377 	const u32 v2 = (voltage * voltage) / 1000;
378 
379 	/* Range: 2^3 < f_MHz < 2^10 MHz */
380 	const u32 f_MHz = freq / 1000000;
381 
382 	/* Range: 2^11 < v2f_big < 2^26 kHz V^2 */
383 	const u32 v2f_big = v2 * f_MHz;
384 
385 	/* Range: 2^1 < v2f < 2^16 MHz V^2 */
386 	const u32 v2f = v2f_big / 1000;
387 
388 	/* Range (working backwards from next line): 0 < v2fc < 2^23 uW.
389 	 * Must be < 2^42 to avoid overflowing the return value.
390 	 */
391 	const u64 v2fc = (u64) c * (u64) v2f;
392 
393 	/* Range: 0 < v2fc / 1000 < 2^13 mW */
394 	return div_u64(v2fc, 1000);
395 }
396 
397 /**
398  * kbase_scale_static_power() - Scale a static power coefficient to an OPP
399  * @c:		Static model coefficient, in uW/V^3. Should be in range
400  *		0 < c < 2^32 to prevent overflow.
401  * @voltage:	Voltage, in mV. Range: 2^9 < voltage < 2^13 (~0.5V to ~8V)
402  *
403  * Return: Power consumption, in mW. Range: 0 < p < 2^13 (0W to ~8W)
404  */
kbase_scale_static_power(const u32 c,const u32 voltage)405 static u32 kbase_scale_static_power(const u32 c, const u32 voltage)
406 {
407 	/* Range: 2^8 < v2 < 2^16 m(V^2) */
408 	const u32 v2 = (voltage * voltage) / 1000;
409 
410 	/* Range: 2^17 < v3_big < 2^29 m(V^2) mV */
411 	const u32 v3_big = v2 * voltage;
412 
413 	/* Range: 2^7 < v3 < 2^19 m(V^3) */
414 	const u32 v3 = v3_big / 1000;
415 
416 	/*
417 	 * Range (working backwards from next line): 0 < v3c_big < 2^33 nW.
418 	 * The result should be < 2^52 to avoid overflowing the return value.
419 	 */
420 	const u64 v3c_big = (u64) c * (u64) v3;
421 
422 	/* Range: 0 < v3c_big / 1000000 < 2^13 mW */
423 	return div_u64(v3c_big, 1000000);
424 }
425 
kbase_ipa_protection_mode_switch_event(struct kbase_device * kbdev)426 void kbase_ipa_protection_mode_switch_event(struct kbase_device *kbdev)
427 {
428 	lockdep_assert_held(&kbdev->hwaccess_lock);
429 
430 	/* Record the event of GPU entering protected mode. */
431 	kbdev->ipa_protection_mode_switched = true;
432 }
433 
get_current_model(struct kbase_device * kbdev)434 static struct kbase_ipa_model *get_current_model(struct kbase_device *kbdev)
435 {
436 	struct kbase_ipa_model *model;
437 	unsigned long flags;
438 
439 	lockdep_assert_held(&kbdev->ipa.lock);
440 
441 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
442 
443 	if (kbdev->ipa_protection_mode_switched ||
444 			kbdev->ipa.force_fallback_model)
445 		model = kbdev->ipa.fallback_model;
446 	else
447 		model = kbdev->ipa.configured_model;
448 
449 	/*
450 	 * Having taken cognizance of the fact that whether GPU earlier
451 	 * protected mode or not, the event can be now reset (if GPU is not
452 	 * currently in protected mode) so that configured model is used
453 	 * for the next sample.
454 	 */
455 	if (!kbdev->protected_mode)
456 		kbdev->ipa_protection_mode_switched = false;
457 
458 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
459 
460 	return model;
461 }
462 
get_static_power_locked(struct kbase_device * kbdev,struct kbase_ipa_model * model,unsigned long voltage)463 static u32 get_static_power_locked(struct kbase_device *kbdev,
464 				   struct kbase_ipa_model *model,
465 				   unsigned long voltage)
466 {
467 	u32 power = 0;
468 	int err;
469 	u32 power_coeff;
470 
471 	lockdep_assert_held(&model->kbdev->ipa.lock);
472 
473 	if (!model->ops->get_static_coeff)
474 		model = kbdev->ipa.fallback_model;
475 
476 	if (model->ops->get_static_coeff) {
477 		err = model->ops->get_static_coeff(model, &power_coeff);
478 		if (!err)
479 			power = kbase_scale_static_power(power_coeff,
480 							 (u32) voltage);
481 	}
482 
483 	return power;
484 }
485 
486 #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
487 #if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
488 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
kbase_get_static_power(struct devfreq * df,unsigned long voltage)489 static unsigned long kbase_get_static_power(struct devfreq *df,
490 					    unsigned long voltage)
491 #else
492 static unsigned long kbase_get_static_power(unsigned long voltage)
493 #endif
494 {
495 	struct kbase_ipa_model *model;
496 	u32 power = 0;
497 #if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
498 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
499 	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
500 #else
501 	struct kbase_device *kbdev = kbase_find_device(-1);
502 #endif
503 
504 	if (!kbdev)
505 		return 0ul;
506 
507 	mutex_lock(&kbdev->ipa.lock);
508 
509 	model = get_current_model(kbdev);
510 	power = get_static_power_locked(kbdev, model, voltage);
511 
512 	mutex_unlock(&kbdev->ipa.lock);
513 
514 #if !(defined(CONFIG_MALI_PWRSOFT_765) ||                                      \
515 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
516 	kbase_release_device(kbdev);
517 #endif
518 
519 	return power;
520 }
521 #endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */
522 
523 /**
524  * opp_translate_freq_voltage() - Translate nominal OPP frequency from
525  *                                devicetree into the real frequency for
526  *                                top-level and shader cores.
527  * @kbdev:            Device pointer
528  * @nominal_freq:     Nominal frequency in Hz.
529  * @nominal_voltage:  Nominal voltage, in mV.
530  * @freqs:            Pointer to array of real frequency values.
531  * @volts:            Pointer to array of voltages.
532  *
533  * If there are 2 clock domains, then top-level and shader cores can operate
534  * at different frequency and voltage level. The nominal frequency ("opp-hz")
535  * used by devfreq from the devicetree may not be same as the real frequency
536  * at which top-level and shader cores are operating, so a translation is
537  * needed.
538  * Nominal voltage shall always be same as the real voltage for top-level.
539  */
opp_translate_freq_voltage(struct kbase_device * kbdev,unsigned long nominal_freq,unsigned long nominal_voltage,unsigned long * freqs,unsigned long * volts)540 static void opp_translate_freq_voltage(struct kbase_device *kbdev,
541 				       unsigned long nominal_freq,
542 				       unsigned long nominal_voltage,
543 				       unsigned long *freqs,
544 				       unsigned long *volts)
545 {
546 #if IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
547 	/* An arbitrary voltage and frequency value can be chosen for testing
548 	 * in no mali configuration which may not match with any OPP level.
549 	 */
550 	freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_freq;
551 	volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL] = nominal_voltage;
552 
553 	freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_freq;
554 	volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] = nominal_voltage;
555 #else
556 	u64 core_mask;
557 	unsigned int i;
558 
559 	kbase_devfreq_opp_translate(kbdev, nominal_freq, &core_mask,
560 				    freqs, volts);
561 	CSTD_UNUSED(core_mask);
562 
563 	/* Convert micro volts to milli volts */
564 	for (i = 0; i < kbdev->nr_clocks; i++)
565 		volts[i] /= 1000;
566 
567 	if (kbdev->nr_clocks == 1) {
568 		freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
569 			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
570 		volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES] =
571 			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
572 	}
573 #endif
574 }
575 
576 #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
577 #if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
578 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
kbase_get_dynamic_power(struct devfreq * df,unsigned long freq,unsigned long voltage)579 static unsigned long kbase_get_dynamic_power(struct devfreq *df,
580 					     unsigned long freq,
581 					     unsigned long voltage)
582 #else
583 static unsigned long kbase_get_dynamic_power(unsigned long freq,
584 					     unsigned long voltage)
585 #endif
586 {
587 	struct kbase_ipa_model *model;
588 	unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
589 	unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
590 	u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
591 	u32 power = 0;
592 	int err = 0;
593 #if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
594 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
595 	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
596 #else
597 	struct kbase_device *kbdev = kbase_find_device(-1);
598 #endif
599 
600 	if (!kbdev)
601 		return 0ul;
602 
603 	mutex_lock(&kbdev->ipa.lock);
604 
605 	model = kbdev->ipa.fallback_model;
606 
607 	err = model->ops->get_dynamic_coeff(model, power_coeffs);
608 
609 	if (!err) {
610 		opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts);
611 
612 		power = kbase_scale_dynamic_power(
613 			power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
614 			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
615 			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
616 
617 		/* Here unlike kbase_get_real_power(), shader core frequency is
618 		 * used for the scaling as simple power model is used to obtain
619 		 * the value of dynamic coefficient (which is a fixed value
620 		 * retrieved from the device tree).
621 		 */
622 		power += kbase_scale_dynamic_power(
623 			 power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES],
624 			 freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES],
625 			 volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);
626 	} else
627 		dev_err_ratelimited(kbdev->dev,
628 				    "Model %s returned error code %d\n",
629 				    model->ops->name, err);
630 
631 	mutex_unlock(&kbdev->ipa.lock);
632 
633 #if !(defined(CONFIG_MALI_PWRSOFT_765) ||                                      \
634 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
635 	kbase_release_device(kbdev);
636 #endif
637 
638 	return power;
639 }
640 #endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */
641 
kbase_get_real_power_locked(struct kbase_device * kbdev,u32 * power,unsigned long freq,unsigned long voltage)642 int kbase_get_real_power_locked(struct kbase_device *kbdev, u32 *power,
643 				unsigned long freq,
644 				unsigned long voltage)
645 {
646 	struct kbase_ipa_model *model;
647 	unsigned long freqs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
648 	unsigned long volts[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
649 	u32 power_coeffs[KBASE_IPA_BLOCK_TYPE_NUM] = {0};
650 	struct kbasep_pm_metrics diff;
651 	u64 total_time;
652 	bool skip_utilization_scaling = false;
653 	int err = 0;
654 
655 	lockdep_assert_held(&kbdev->ipa.lock);
656 
657 	kbase_pm_get_dvfs_metrics(kbdev, &kbdev->ipa.last_metrics, &diff);
658 
659 	model = get_current_model(kbdev);
660 
661 	err = model->ops->get_dynamic_coeff(model, power_coeffs);
662 
663 	/* If the counter model returns an error (e.g. switching back to
664 	 * protected mode and failing to read counters, or a counter sample
665 	 * with too few cycles), revert to the fallback model.
666 	 */
667 	if (err && model != kbdev->ipa.fallback_model) {
668 		/* No meaningful scaling for GPU utilization can be done if
669 		 * the sampling interval was too long. This is equivalent to
670 		 * assuming GPU was busy throughout (similar to what is done
671 		 * during protected mode).
672 		 */
673 		if (err == -EOVERFLOW)
674 			skip_utilization_scaling = true;
675 
676 		model = kbdev->ipa.fallback_model;
677 		err = model->ops->get_dynamic_coeff(model, power_coeffs);
678 	}
679 
680 	if (WARN_ON(err))
681 		return err;
682 
683 	opp_translate_freq_voltage(kbdev, freq, voltage, freqs, volts);
684 
685 	*power = kbase_scale_dynamic_power(
686 			power_coeffs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
687 			freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL],
688 			volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
689 
690 	if (power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]) {
691 		unsigned long freq = freqs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES];
692 
693 		/* As per the HW team, the top-level frequency needs to be used
694 		 * for the scaling if the counter based model was used as
695 		 * counter values are normalized with the GPU_ACTIVE counter
696 		 * value, which increments at the rate of top-level frequency.
697 		 */
698 		if (model != kbdev->ipa.fallback_model)
699 			freq = freqs[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL];
700 
701 		*power += kbase_scale_dynamic_power(
702 				power_coeffs[KBASE_IPA_BLOCK_TYPE_SHADER_CORES],
703 				freq, volts[KBASE_IPA_BLOCK_TYPE_SHADER_CORES]);
704 	}
705 
706 	if (!skip_utilization_scaling) {
707 		/* time_busy / total_time cannot be >1, so assigning the 64-bit
708 		 * result of div_u64 to *power cannot overflow.
709 		 */
710 		total_time = diff.time_busy + (u64) diff.time_idle;
711 		*power = div_u64(*power * (u64) diff.time_busy,
712 				 max(total_time, 1ull));
713 	}
714 
715 	*power += get_static_power_locked(kbdev, model,
716 				volts[KBASE_IPA_BLOCK_TYPE_TOP_LEVEL]);
717 
718 	return err;
719 }
720 KBASE_EXPORT_TEST_API(kbase_get_real_power_locked);
721 
kbase_get_real_power(struct devfreq * df,u32 * power,unsigned long freq,unsigned long voltage)722 int kbase_get_real_power(struct devfreq *df, u32 *power,
723 				unsigned long freq,
724 				unsigned long voltage)
725 {
726 	int ret;
727 	struct kbase_device *kbdev = dev_get_drvdata(&df->dev);
728 
729 	if (!kbdev)
730 		return -ENODEV;
731 
732 	mutex_lock(&kbdev->ipa.lock);
733 	ret = kbase_get_real_power_locked(kbdev, power, freq, voltage);
734 	mutex_unlock(&kbdev->ipa.lock);
735 
736 	return ret;
737 }
738 KBASE_EXPORT_TEST_API(kbase_get_real_power);
739 
740 struct devfreq_cooling_power kbase_ipa_power_model_ops = {
741 #if KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE
742 	.get_static_power = &kbase_get_static_power,
743 	.get_dynamic_power = &kbase_get_dynamic_power,
744 #endif /* KERNEL_VERSION(5, 10, 0) > LINUX_VERSION_CODE */
745 #if defined(CONFIG_MALI_PWRSOFT_765) ||                                        \
746 	KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE
747 	.get_real_power = &kbase_get_real_power,
748 #endif
749 };
750 KBASE_EXPORT_TEST_API(kbase_ipa_power_model_ops);
751 
kbase_ipa_reset_data(struct kbase_device * kbdev)752 void kbase_ipa_reset_data(struct kbase_device *kbdev)
753 {
754 	ktime_t now, diff;
755 	s64 elapsed_time;
756 
757 	mutex_lock(&kbdev->ipa.lock);
758 
759 	now = ktime_get_raw();
760 	diff = ktime_sub(now, kbdev->ipa.last_sample_time);
761 	elapsed_time = ktime_to_ms(diff);
762 
763 	if (elapsed_time > RESET_INTERVAL_MS) {
764 		struct kbasep_pm_metrics diff;
765 		struct kbase_ipa_model *model;
766 
767 		kbase_pm_get_dvfs_metrics(
768 			kbdev, &kbdev->ipa.last_metrics, &diff);
769 
770 		model = get_current_model(kbdev);
771 		if (model != kbdev->ipa.fallback_model)
772 			model->ops->reset_counter_data(model);
773 
774 		kbdev->ipa.last_sample_time = ktime_get_raw();
775 	}
776 
777 	mutex_unlock(&kbdev->ipa.lock);
778 }
779