xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/midgard/backend/gpu/mali_kbase_js_affinity.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  *
3  * (C) COPYRIGHT 2010-2016 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15 
16 
17 
18 
19 
20 /*
21  * Base kernel affinity manager APIs
22  */
23 
24 #include <mali_kbase.h>
25 #include "mali_kbase_js_affinity.h"
26 #include "mali_kbase_hw.h"
27 
28 #include <backend/gpu/mali_kbase_pm_internal.h>
29 
30 
kbase_js_can_run_job_on_slot_no_lock(struct kbase_device * kbdev,int js)31 bool kbase_js_can_run_job_on_slot_no_lock(struct kbase_device *kbdev,
32 									int js)
33 {
34 	/*
35 	 * Here are the reasons for using job slot 2:
36 	 * - BASE_HW_ISSUE_8987 (which is entirely used for that purpose)
37 	 * - In absence of the above, then:
38 	 *  - Atoms with BASE_JD_REQ_COHERENT_GROUP
39 	 *  - But, only when there aren't contexts with
40 	 *  KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, because the atoms that run on
41 	 *  all cores on slot 1 could be blocked by those using a coherent group
42 	 *  on slot 2
43 	 *  - And, only when you actually have 2 or more coregroups - if you
44 	 *  only have 1 coregroup, then having jobs for slot 2 implies they'd
45 	 *  also be for slot 1, meaning you'll get interference from them. Jobs
46 	 *  able to run on slot 2 could also block jobs that can only run on
47 	 *  slot 1 (tiler jobs)
48 	 */
49 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_8987))
50 		return true;
51 
52 	if (js != 2)
53 		return true;
54 
55 	/* Only deal with js==2 now: */
56 	if (kbdev->gpu_props.num_core_groups > 1) {
57 		/* Only use slot 2 in the 2+ coregroup case */
58 		if (kbasep_js_ctx_attr_is_attr_on_runpool(kbdev,
59 					KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES) ==
60 								false) {
61 			/* ...But only when we *don't* have atoms that run on
62 			 * all cores */
63 
64 			/* No specific check for BASE_JD_REQ_COHERENT_GROUP
65 			 * atoms - the policy will sort that out */
66 			return true;
67 		}
68 	}
69 
70 	/* Above checks failed mean we shouldn't use slot 2 */
71 	return false;
72 }
73 
74 /*
75  * As long as it has been decided to have a deeper modification of
76  * what job scheduler, power manager and affinity manager will
77  * implement, this function is just an intermediate step that
78  * assumes:
79  * - all working cores will be powered on when this is called.
80  * - largest current configuration is 2 core groups.
81  * - It has been decided not to have hardcoded values so the low
82  *   and high cores in a core split will be evently distributed.
83  * - Odd combinations of core requirements have been filtered out
84  *   and do not get to this function (e.g. CS+T+NSS is not
85  *   supported here).
86  * - This function is frequently called and can be optimized,
87  *   (see notes in loops), but as the functionallity will likely
88  *   be modified, optimization has not been addressed.
89 */
kbase_js_choose_affinity(u64 * const affinity,struct kbase_device * kbdev,struct kbase_jd_atom * katom,int js)90 bool kbase_js_choose_affinity(u64 * const affinity,
91 					struct kbase_device *kbdev,
92 					struct kbase_jd_atom *katom, int js)
93 {
94 	base_jd_core_req core_req = katom->core_req;
95 	unsigned int num_core_groups = kbdev->gpu_props.num_core_groups;
96 	u64 core_availability_mask;
97 
98 	lockdep_assert_held(&kbdev->hwaccess_lock);
99 
100 	core_availability_mask = kbase_pm_ca_get_core_mask(kbdev);
101 
102 	/*
103 	 * If no cores are currently available (core availability policy is
104 	 * transitioning) then fail.
105 	 */
106 	if (0 == core_availability_mask) {
107 		*affinity = 0;
108 		return false;
109 	}
110 
111 	KBASE_DEBUG_ASSERT(js >= 0);
112 
113 	if ((core_req & (BASE_JD_REQ_FS | BASE_JD_REQ_CS | BASE_JD_REQ_T)) ==
114 								BASE_JD_REQ_T) {
115 		 /* If the hardware supports XAFFINITY then we'll only enable
116 		  * the tiler (which is the default so this is a no-op),
117 		  * otherwise enable shader core 0. */
118 		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
119 			*affinity = 1;
120 		else
121 			*affinity = 0;
122 
123 		return true;
124 	}
125 
126 	if (1 == kbdev->gpu_props.num_cores) {
127 		/* trivial case only one core, nothing to do */
128 		*affinity = core_availability_mask &
129 				kbdev->pm.debug_core_mask[js];
130 	} else {
131 		if ((core_req & (BASE_JD_REQ_COHERENT_GROUP |
132 					BASE_JD_REQ_SPECIFIC_COHERENT_GROUP))) {
133 			if (js == 0 || num_core_groups == 1) {
134 				/* js[0] and single-core-group systems just get
135 				 * the first core group */
136 				*affinity =
137 				kbdev->gpu_props.props.coherency_info.group[0].core_mask
138 						& core_availability_mask &
139 						kbdev->pm.debug_core_mask[js];
140 			} else {
141 				/* js[1], js[2] use core groups 0, 1 for
142 				 * dual-core-group systems */
143 				u32 core_group_idx = ((u32) js) - 1;
144 
145 				KBASE_DEBUG_ASSERT(core_group_idx <
146 							num_core_groups);
147 				*affinity =
148 				kbdev->gpu_props.props.coherency_info.group[core_group_idx].core_mask
149 						& core_availability_mask &
150 						kbdev->pm.debug_core_mask[js];
151 
152 				/* If the job is specifically targeting core
153 				 * group 1 and the core availability policy is
154 				 * keeping that core group off, then fail */
155 				if (*affinity == 0 && core_group_idx == 1 &&
156 						kbdev->pm.backend.cg1_disabled
157 								== true)
158 					katom->event_code =
159 							BASE_JD_EVENT_PM_EVENT;
160 			}
161 		} else {
162 			/* All cores are available when no core split is
163 			 * required */
164 			*affinity = core_availability_mask &
165 					kbdev->pm.debug_core_mask[js];
166 		}
167 	}
168 
169 	/*
170 	 * If no cores are currently available in the desired core group(s)
171 	 * (core availability policy is transitioning) then fail.
172 	 */
173 	if (*affinity == 0)
174 		return false;
175 
176 	/* Enable core 0 if tiler required for hardware without XAFFINITY
177 	 * support (notes above) */
178 	if (core_req & BASE_JD_REQ_T) {
179 		if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_XAFFINITY))
180 			*affinity = *affinity | 1;
181 	}
182 
183 	return true;
184 }
185 
kbase_js_affinity_is_violating(struct kbase_device * kbdev,u64 * affinities)186 static inline bool kbase_js_affinity_is_violating(
187 						struct kbase_device *kbdev,
188 								u64 *affinities)
189 {
190 	/* This implementation checks whether the two slots involved in Generic
191 	 * thread creation have intersecting affinity. This is due to micro-
192 	 * architectural issues where a job in slot A targetting cores used by
193 	 * slot B could prevent the job in slot B from making progress until the
194 	 * job in slot A has completed.
195 	 */
196 	u64 affinity_set_left;
197 	u64 affinity_set_right;
198 	u64 intersection;
199 
200 	KBASE_DEBUG_ASSERT(affinities != NULL);
201 
202 	affinity_set_left = affinities[1];
203 
204 	affinity_set_right = affinities[2];
205 
206 	/* A violation occurs when any bit in the left_set is also in the
207 	 * right_set */
208 	intersection = affinity_set_left & affinity_set_right;
209 
210 	return (bool) (intersection != (u64) 0u);
211 }
212 
kbase_js_affinity_would_violate(struct kbase_device * kbdev,int js,u64 affinity)213 bool kbase_js_affinity_would_violate(struct kbase_device *kbdev, int js,
214 								u64 affinity)
215 {
216 	struct kbasep_js_device_data *js_devdata;
217 	u64 new_affinities[BASE_JM_MAX_NR_SLOTS];
218 
219 	KBASE_DEBUG_ASSERT(kbdev != NULL);
220 	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
221 	js_devdata = &kbdev->js_data;
222 
223 	memcpy(new_affinities, js_devdata->runpool_irq.slot_affinities,
224 			sizeof(js_devdata->runpool_irq.slot_affinities));
225 
226 	new_affinities[js] |= affinity;
227 
228 	return kbase_js_affinity_is_violating(kbdev, new_affinities);
229 }
230 
kbase_js_affinity_retain_slot_cores(struct kbase_device * kbdev,int js,u64 affinity)231 void kbase_js_affinity_retain_slot_cores(struct kbase_device *kbdev, int js,
232 								u64 affinity)
233 {
234 	struct kbasep_js_device_data *js_devdata;
235 	u64 cores;
236 
237 	KBASE_DEBUG_ASSERT(kbdev != NULL);
238 	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
239 	js_devdata = &kbdev->js_data;
240 
241 	KBASE_DEBUG_ASSERT(kbase_js_affinity_would_violate(kbdev, js, affinity)
242 								== false);
243 
244 	cores = affinity;
245 	while (cores) {
246 		int bitnum = fls64(cores) - 1;
247 		u64 bit = 1ULL << bitnum;
248 		s8 cnt;
249 
250 		cnt =
251 		++(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
252 
253 		if (cnt == 1)
254 			js_devdata->runpool_irq.slot_affinities[js] |= bit;
255 
256 		cores &= ~bit;
257 	}
258 }
259 
kbase_js_affinity_release_slot_cores(struct kbase_device * kbdev,int js,u64 affinity)260 void kbase_js_affinity_release_slot_cores(struct kbase_device *kbdev, int js,
261 								u64 affinity)
262 {
263 	struct kbasep_js_device_data *js_devdata;
264 	u64 cores;
265 
266 	KBASE_DEBUG_ASSERT(kbdev != NULL);
267 	KBASE_DEBUG_ASSERT(js < BASE_JM_MAX_NR_SLOTS);
268 	js_devdata = &kbdev->js_data;
269 
270 	cores = affinity;
271 	while (cores) {
272 		int bitnum = fls64(cores) - 1;
273 		u64 bit = 1ULL << bitnum;
274 		s8 cnt;
275 
276 		KBASE_DEBUG_ASSERT(
277 		js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum] > 0);
278 
279 		cnt =
280 		--(js_devdata->runpool_irq.slot_affinity_refcount[js][bitnum]);
281 
282 		if (0 == cnt)
283 			js_devdata->runpool_irq.slot_affinities[js] &= ~bit;
284 
285 		cores &= ~bit;
286 	}
287 }
288 
289 #if KBASE_TRACE_ENABLE
kbase_js_debug_log_current_affinities(struct kbase_device * kbdev)290 void kbase_js_debug_log_current_affinities(struct kbase_device *kbdev)
291 {
292 	struct kbasep_js_device_data *js_devdata;
293 	int slot_nr;
294 
295 	KBASE_DEBUG_ASSERT(kbdev != NULL);
296 	js_devdata = &kbdev->js_data;
297 
298 	for (slot_nr = 0; slot_nr < 3; ++slot_nr)
299 		KBASE_TRACE_ADD_SLOT_INFO(kbdev, JS_AFFINITY_CURRENT, NULL,
300 							NULL, 0u, slot_nr,
301 			(u32) js_devdata->runpool_irq.slot_affinities[slot_nr]);
302 }
303 #endif				/* KBASE_TRACE_ENABLE  */
304