xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/mali400/mali/common/mali_kernel_utilization.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * Copyright (C) 2010-2014, 2016-2017 ARM Limited. All rights reserved.
3  *
4  * This program is free software and is provided to you under the terms of the GNU General Public License version 2
5  * as published by the Free Software Foundation, and any use by you of this program is subject to the terms of such GNU licence.
6  *
7  * A copy of the licence is included with the program, and can also be obtained from Free Software
8  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
9  */
10 
11 #include "mali_kernel_utilization.h"
12 #include "mali_osk.h"
13 #include "mali_osk_mali.h"
14 #include "mali_kernel_common.h"
15 #include "mali_session.h"
16 #include "mali_scheduler.h"
17 
18 #include "mali_executor.h"
19 #include "mali_dvfs_policy.h"
20 #include "mali_control_timer.h"
21 
22 /* Thresholds for GP bound detection. */
23 #define MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD 240
24 #define MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD 250
25 
26 static _mali_osk_spinlock_irq_t *utilization_data_lock;
27 
28 static u32 num_running_gp_cores = 0;
29 static u32 num_running_pp_cores = 0;
30 
31 static u64 work_start_time_gpu = 0;
32 static u64 work_start_time_gp = 0;
33 static u64 work_start_time_pp = 0;
34 static u64 accumulated_work_time_gpu = 0;
35 static u64 accumulated_work_time_gp = 0;
36 static u64 accumulated_work_time_pp = 0;
37 
38 static u32 last_utilization_gpu = 0 ;
39 static u32 last_utilization_gp = 0 ;
40 static u32 last_utilization_pp = 0 ;
41 
42 void (*mali_utilization_callback)(struct mali_gpu_utilization_data *data) = NULL;
43 
44 /* Define the first timer control timer timeout in milliseconds */
45 static u32 mali_control_first_timeout = 100;
46 static struct mali_gpu_utilization_data mali_util_data = {0, };
47 
mali_utilization_calculate(u64 * start_time,u64 * time_period,mali_bool * need_add_timer)48 struct mali_gpu_utilization_data *mali_utilization_calculate(u64 *start_time, u64 *time_period, mali_bool *need_add_timer)
49 {
50 	u64 time_now;
51 	u32 leading_zeroes;
52 	u32 shift_val;
53 	u32 work_normalized_gpu;
54 	u32 work_normalized_gp;
55 	u32 work_normalized_pp;
56 	u32 period_normalized;
57 	u32 utilization_gpu;
58 	u32 utilization_gp;
59 	u32 utilization_pp;
60 
61 	mali_utilization_data_lock();
62 
63 	time_now = _mali_osk_time_get_ns();
64 
65 	*time_period = time_now - *start_time;
66 
67 	if (accumulated_work_time_gpu == 0 && work_start_time_gpu == 0) {
68 		mali_control_timer_pause();
69 		/*
70 		 * No work done for this period
71 		 * - No need to reschedule timer
72 		 * - Report zero usage
73 		 */
74 		last_utilization_gpu = 0;
75 		last_utilization_gp = 0;
76 		last_utilization_pp = 0;
77 
78 		mali_util_data.utilization_gpu = last_utilization_gpu;
79 		mali_util_data.utilization_gp = last_utilization_gp;
80 		mali_util_data.utilization_pp = last_utilization_pp;
81 
82 		mali_utilization_data_unlock();
83 
84 		*need_add_timer = MALI_FALSE;
85 
86 		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
87 
88 		MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
89 		MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
90 		MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
91 
92 		return &mali_util_data;
93 	}
94 
95 	/* If we are currently busy, update working period up to now */
96 	if (work_start_time_gpu != 0) {
97 		accumulated_work_time_gpu += (time_now - work_start_time_gpu);
98 		work_start_time_gpu = time_now;
99 
100 		/* GP and/or PP will also be busy if the GPU is busy at this point */
101 
102 		if (work_start_time_gp != 0) {
103 			accumulated_work_time_gp += (time_now - work_start_time_gp);
104 			work_start_time_gp = time_now;
105 		}
106 
107 		if (work_start_time_pp != 0) {
108 			accumulated_work_time_pp += (time_now - work_start_time_pp);
109 			work_start_time_pp = time_now;
110 		}
111 	}
112 
113 	/*
114 	 * We have two 64-bit values, a dividend and a divisor.
115 	 * To avoid dependencies to a 64-bit divider, we shift down the two values
116 	 * equally first.
117 	 * We shift the dividend up and possibly the divisor down, making the result X in 256.
118 	 */
119 
120 	/* Shift the 64-bit values down so they fit inside a 32-bit integer */
121 	leading_zeroes = _mali_osk_clz((u32)(*time_period >> 32));
122 	shift_val = 32 - leading_zeroes;
123 	work_normalized_gpu = (u32)(accumulated_work_time_gpu >> shift_val);
124 	work_normalized_gp = (u32)(accumulated_work_time_gp >> shift_val);
125 	work_normalized_pp = (u32)(accumulated_work_time_pp >> shift_val);
126 	period_normalized = (u32)(*time_period >> shift_val);
127 
128 	/*
129 	 * Now, we should report the usage in parts of 256
130 	 * this means we must shift up the dividend or down the divisor by 8
131 	 * (we could do a combination, but we just use one for simplicity,
132 	 * but the end result should be good enough anyway)
133 	 */
134 	if (period_normalized > 0x00FFFFFF) {
135 		/* The divisor is so big that it is safe to shift it down */
136 		period_normalized >>= 8;
137 	} else {
138 		/*
139 		 * The divisor is so small that we can shift up the dividend, without loosing any data.
140 		 * (dividend is always smaller than the divisor)
141 		 */
142 		work_normalized_gpu <<= 8;
143 		work_normalized_gp <<= 8;
144 		work_normalized_pp <<= 8;
145 	}
146 
147 	utilization_gpu = work_normalized_gpu / period_normalized;
148 	utilization_gp = work_normalized_gp / period_normalized;
149 	utilization_pp = work_normalized_pp / period_normalized;
150 
151 	last_utilization_gpu = utilization_gpu;
152 	last_utilization_gp = utilization_gp;
153 	last_utilization_pp = utilization_pp;
154 
155 	if ((MALI_GP_BOUND_GP_UTILIZATION_THRESHOLD < last_utilization_gp) &&
156 	    (MALI_GP_BOUND_PP_UTILIZATION_THRESHOLD > last_utilization_pp)) {
157 		mali_executor_hint_enable(MALI_EXECUTOR_HINT_GP_BOUND);
158 	} else {
159 		mali_executor_hint_disable(MALI_EXECUTOR_HINT_GP_BOUND);
160 	}
161 
162 	/* starting a new period */
163 	accumulated_work_time_gpu = 0;
164 	accumulated_work_time_gp = 0;
165 	accumulated_work_time_pp = 0;
166 
167 	*start_time = time_now;
168 
169 	mali_util_data.utilization_gp = last_utilization_gp;
170 	mali_util_data.utilization_gpu = last_utilization_gpu;
171 	mali_util_data.utilization_pp = last_utilization_pp;
172 
173 	mali_utilization_data_unlock();
174 
175 	*need_add_timer = MALI_TRUE;
176 
177 	MALI_DEBUG_PRINT(4, ("last_utilization_gpu = %d \n", last_utilization_gpu));
178 	MALI_DEBUG_PRINT(4, ("last_utilization_gp = %d \n", last_utilization_gp));
179 	MALI_DEBUG_PRINT(4, ("last_utilization_pp = %d \n", last_utilization_pp));
180 
181 	return &mali_util_data;
182 }
183 
mali_utilization_init(void)184 _mali_osk_errcode_t mali_utilization_init(void)
185 {
186 #if USING_GPU_UTILIZATION
187 	_mali_osk_device_data data;
188 
189 	if (_MALI_OSK_ERR_OK == _mali_osk_device_data_get(&data)) {
190 		if (NULL != data.utilization_callback) {
191 			mali_utilization_callback = data.utilization_callback;
192 			MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: Utilization handler installed \n"));
193 		}
194 	}
195 #endif /* defined(USING_GPU_UTILIZATION) */
196 
197 	if (NULL == mali_utilization_callback) {
198 		MALI_DEBUG_PRINT(2, ("Mali GPU Utilization: No platform utilization handler installed\n"));
199 	}
200 
201 	utilization_data_lock = _mali_osk_spinlock_irq_init(_MALI_OSK_LOCKFLAG_ORDERED, _MALI_OSK_LOCK_ORDER_UTILIZATION);
202 	if (NULL == utilization_data_lock) {
203 		return _MALI_OSK_ERR_FAULT;
204 	}
205 
206 	num_running_gp_cores = 0;
207 	num_running_pp_cores = 0;
208 
209 	return _MALI_OSK_ERR_OK;
210 }
211 
mali_utilization_term(void)212 void mali_utilization_term(void)
213 {
214 	if (NULL != utilization_data_lock) {
215 		_mali_osk_spinlock_irq_term(utilization_data_lock);
216 	}
217 }
218 
mali_utilization_gp_start(void)219 void mali_utilization_gp_start(void)
220 {
221 	mali_utilization_data_lock();
222 
223 	++num_running_gp_cores;
224 	if (1 == num_running_gp_cores) {
225 		u64 time_now = _mali_osk_time_get_ns();
226 
227 		/* First GP core started, consider GP busy from now and onwards */
228 		work_start_time_gp = time_now;
229 
230 		if (0 == num_running_pp_cores) {
231 			mali_bool is_resume = MALI_FALSE;
232 			/*
233 			 * There are no PP cores running, so this is also the point
234 			 * at which we consider the GPU to be busy as well.
235 			 */
236 			work_start_time_gpu = time_now;
237 
238 			is_resume  = mali_control_timer_resume(time_now);
239 
240 			mali_utilization_data_unlock();
241 
242 			if (is_resume) {
243 				/* Do some policy in new period for performance consideration */
244 #if defined(CONFIG_MALI_DVFS)
245 				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
246 				mali_session_max_window_num();
247 				if (0 == last_utilization_gpu) {
248 					/*
249 					 * for mali_dev_pause is called in set clock,
250 					 * so each time we change clock, we will set clock to
251 					 * highest step even if under down clock case,
252 					 * it is not nessesary, so we only set the clock under
253 					 * last time utilization equal 0, we stop the timer then
254 					 * start the GPU again case
255 					 */
256 					mali_dvfs_policy_new_period();
257 				}
258 #endif
259 				/*
260 				 * First timeout using short interval for power consideration
261 				 * because we give full power in the new period, but if the
262 				 * job loading is light, finish in 10ms, the other time all keep
263 				 * in high freq it will wast time.
264 				 */
265 				mali_control_timer_add(mali_control_first_timeout);
266 			}
267 		} else {
268 			mali_utilization_data_unlock();
269 		}
270 
271 	} else {
272 		/* Nothing to do */
273 		mali_utilization_data_unlock();
274 	}
275 }
276 
mali_utilization_pp_start(void)277 void mali_utilization_pp_start(void)
278 {
279 	mali_utilization_data_lock();
280 
281 	++num_running_pp_cores;
282 	if (1 == num_running_pp_cores) {
283 		u64 time_now = _mali_osk_time_get_ns();
284 
285 		/* First PP core started, consider PP busy from now and onwards */
286 		work_start_time_pp = time_now;
287 
288 		if (0 == num_running_gp_cores) {
289 			mali_bool is_resume = MALI_FALSE;
290 			/*
291 			 * There are no GP cores running, so this is also the point
292 			 * at which we consider the GPU to be busy as well.
293 			 */
294 			work_start_time_gpu = time_now;
295 
296 			/* Start a new period if stoped */
297 			is_resume = mali_control_timer_resume(time_now);
298 
299 			mali_utilization_data_unlock();
300 
301 			if (is_resume) {
302 #if defined(CONFIG_MALI_DVFS)
303 				/* Clear session->number_of_window_jobs, prepare parameter for dvfs */
304 				mali_session_max_window_num();
305 				if (0 == last_utilization_gpu) {
306 					/*
307 					 * for mali_dev_pause is called in set clock,
308 					 * so each time we change clock, we will set clock to
309 					 * highest step even if under down clock case,
310 					 * it is not nessesary, so we only set the clock under
311 					 * last time utilization equal 0, we stop the timer then
312 					 * start the GPU again case
313 					 */
314 					mali_dvfs_policy_new_period();
315 				}
316 #endif
317 
318 				/*
319 				 * First timeout using short interval for power consideration
320 				 * because we give full power in the new period, but if the
321 				 * job loading is light, finish in 10ms, the other time all keep
322 				 * in high freq it will wast time.
323 				 */
324 				mali_control_timer_add(mali_control_first_timeout);
325 			}
326 		} else {
327 			mali_utilization_data_unlock();
328 		}
329 	} else {
330 		/* Nothing to do */
331 		mali_utilization_data_unlock();
332 	}
333 }
334 
mali_utilization_gp_end(void)335 void mali_utilization_gp_end(void)
336 {
337 	mali_utilization_data_lock();
338 
339 	--num_running_gp_cores;
340 	if (0 == num_running_gp_cores) {
341 		u64 time_now = _mali_osk_time_get_ns();
342 
343 		/* Last GP core ended, consider GP idle from now and onwards */
344 		accumulated_work_time_gp += (time_now - work_start_time_gp);
345 		work_start_time_gp = 0;
346 
347 		if (0 == num_running_pp_cores) {
348 			/*
349 			 * There are no PP cores running, so this is also the point
350 			 * at which we consider the GPU to be idle as well.
351 			 */
352 			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
353 			work_start_time_gpu = 0;
354 		}
355 	}
356 
357 	mali_utilization_data_unlock();
358 }
359 
mali_utilization_pp_end(void)360 void mali_utilization_pp_end(void)
361 {
362 	mali_utilization_data_lock();
363 
364 	--num_running_pp_cores;
365 	if (0 == num_running_pp_cores) {
366 		u64 time_now = _mali_osk_time_get_ns();
367 
368 		/* Last PP core ended, consider PP idle from now and onwards */
369 		accumulated_work_time_pp += (time_now - work_start_time_pp);
370 		work_start_time_pp = 0;
371 
372 		if (0 == num_running_gp_cores) {
373 			/*
374 			 * There are no GP cores running, so this is also the point
375 			 * at which we consider the GPU to be idle as well.
376 			 */
377 			accumulated_work_time_gpu += (time_now - work_start_time_gpu);
378 			work_start_time_gpu = 0;
379 		}
380 	}
381 
382 	mali_utilization_data_unlock();
383 }
384 
mali_utilization_enabled(void)385 mali_bool mali_utilization_enabled(void)
386 {
387 #if defined(CONFIG_MALI_DVFS)
388 	return mali_dvfs_policy_enabled();
389 #else
390 	return (NULL != mali_utilization_callback);
391 #endif /* defined(CONFIG_MALI_DVFS) */
392 }
393 
mali_utilization_platform_realize(struct mali_gpu_utilization_data * util_data)394 void mali_utilization_platform_realize(struct mali_gpu_utilization_data *util_data)
395 {
396 	MALI_DEBUG_ASSERT_POINTER(mali_utilization_callback);
397 
398 	mali_utilization_callback(util_data);
399 }
400 
mali_utilization_reset(void)401 void mali_utilization_reset(void)
402 {
403 	accumulated_work_time_gpu = 0;
404 	accumulated_work_time_gp = 0;
405 	accumulated_work_time_pp = 0;
406 
407 	last_utilization_gpu = 0;
408 	last_utilization_gp = 0;
409 	last_utilization_pp = 0;
410 }
411 
mali_utilization_data_lock(void)412 void mali_utilization_data_lock(void)
413 {
414 	_mali_osk_spinlock_irq_lock(utilization_data_lock);
415 }
416 
mali_utilization_data_unlock(void)417 void mali_utilization_data_unlock(void)
418 {
419 	_mali_osk_spinlock_irq_unlock(utilization_data_lock);
420 }
421 
mali_utilization_data_assert_locked(void)422 void mali_utilization_data_assert_locked(void)
423 {
424 	MALI_DEBUG_ASSERT_LOCK_HELD(utilization_data_lock);
425 }
426 
_mali_ukk_utilization_gp_pp(void)427 u32 _mali_ukk_utilization_gp_pp(void)
428 {
429 	return last_utilization_gpu;
430 }
431 
_mali_ukk_utilization_gp(void)432 u32 _mali_ukk_utilization_gp(void)
433 {
434 	return last_utilization_gp;
435 }
436 
_mali_ukk_utilization_pp(void)437 u32 _mali_ukk_utilization_pp(void)
438 {
439 	return last_utilization_pp;
440 }
441