1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /*
23 * Base kernel property query APIs
24 */
25
26 #include <mali_kbase.h>
27 #include <gpu/mali_kbase_gpu_regmap.h>
28 #include <mali_kbase_gpuprops.h>
29 #include <mali_kbase_hwaccess_gpuprops.h>
30 #include <mali_kbase_config_defaults.h>
31 #include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
32 #include <linux/clk.h>
33 #include <backend/gpu/mali_kbase_pm_internal.h>
34 #include <linux/of_platform.h>
35 #include <linux/moduleparam.h>
36
37
kbase_gpuprops_construct_coherent_groups(struct base_gpu_props * const props)38 static void kbase_gpuprops_construct_coherent_groups(
39 struct base_gpu_props * const props)
40 {
41 struct mali_base_gpu_coherent_group *current_group;
42 u64 group_present;
43 u64 group_mask;
44 u64 first_set, first_set_prev;
45 u32 num_groups = 0;
46
47 KBASE_DEBUG_ASSERT(props != NULL);
48
49 props->coherency_info.coherency = props->raw_props.mem_features;
50 props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
51
52 if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
53 /* Group is l2 coherent */
54 group_present = props->raw_props.l2_present;
55 } else {
56 /* Group is l1 coherent */
57 group_present = props->raw_props.shader_present;
58 }
59
60 /*
61 * The coherent group mask can be computed from the l2 present
62 * register.
63 *
64 * For the coherent group n:
65 * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
66 * where first_set is group_present with only its nth set-bit kept
67 * (i.e. the position from where a new group starts).
68 *
69 * For instance if the groups are l2 coherent and l2_present=0x0..01111:
70 * The first mask is:
71 * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
72 * = (0x0..010 - 1) & ~(0x0..01 - 1)
73 * = 0x0..00f
74 * The second mask is:
75 * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
76 * = (0x0..100 - 1) & ~(0x0..010 - 1)
77 * = 0x0..0f0
78 * And so on until all the bits from group_present have been cleared
79 * (i.e. there is no group left).
80 */
81
82 current_group = props->coherency_info.group;
83 first_set = group_present & ~(group_present - 1);
84
85 while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
86 group_present -= first_set; /* Clear the current group bit */
87 first_set_prev = first_set;
88
89 first_set = group_present & ~(group_present - 1);
90 group_mask = (first_set - 1) & ~(first_set_prev - 1);
91
92 /* Populate the coherent_group structure for each group */
93 current_group->core_mask = group_mask & props->raw_props.shader_present;
94 current_group->num_cores = hweight64(current_group->core_mask);
95
96 num_groups++;
97 current_group++;
98 }
99
100 if (group_present != 0)
101 pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
102
103 props->coherency_info.num_groups = num_groups;
104 }
105
106 /**
107 * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
108 * @kbdev: The &struct kbase_device structure for the device
109 * @curr_config: The &struct curr_config_props structure to receive the result
110 *
111 * Fill the &struct curr_config_props structure with values from the GPU
112 * configuration registers.
113 *
114 * Return: Zero on success, Linux error code on failure
115 */
kbase_gpuprops_get_curr_config_props(struct kbase_device * kbdev,struct curr_config_props * const curr_config)116 int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
117 struct curr_config_props * const curr_config)
118 {
119 struct kbase_current_config_regdump curr_config_regdump;
120 int err;
121
122 if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
123 return -EINVAL;
124
125 /* If update not needed just return. */
126 if (!curr_config->update_needed)
127 return 0;
128
129 /* Dump relevant registers */
130 err = kbase_backend_gpuprops_get_curr_config(kbdev,
131 &curr_config_regdump);
132 if (err)
133 return err;
134
135 curr_config->l2_slices =
136 KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1;
137
138 curr_config->l2_present =
139 ((u64) curr_config_regdump.l2_present_hi << 32) +
140 curr_config_regdump.l2_present_lo;
141
142 curr_config->shader_present =
143 ((u64) curr_config_regdump.shader_present_hi << 32) +
144 curr_config_regdump.shader_present_lo;
145
146 curr_config->num_cores = hweight64(curr_config->shader_present);
147
148 curr_config->update_needed = false;
149
150 return 0;
151 }
152
153 /**
154 * kbase_gpuprops_req_curr_config_update - Request Current Config Update
155 * @kbdev: The &struct kbase_device structure for the device
156 *
157 * Requests the current configuration to be updated next time the
158 * kbase_gpuprops_get_curr_config_props() is called.
159 *
160 * Return: Zero on success, Linux error code on failure
161 */
kbase_gpuprops_req_curr_config_update(struct kbase_device * kbdev)162 int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev)
163 {
164 if (WARN_ON(!kbdev))
165 return -EINVAL;
166
167 kbdev->gpu_props.curr_config.update_needed = true;
168 return 0;
169 }
170
171 /**
172 * kbase_gpuprops_get_props - Get the GPU configuration
173 * @gpu_props: The &struct base_gpu_props structure
174 * @kbdev: The &struct kbase_device structure for the device
175 *
176 * Fill the &struct base_gpu_props structure with values from the GPU
177 * configuration registers. Only the raw properties are filled in this function.
178 *
179 * Return: Zero on success, Linux error code on failure
180 */
kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,struct kbase_device * kbdev)181 static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
182 struct kbase_device *kbdev)
183 {
184 struct kbase_gpuprops_regdump regdump;
185 int i;
186 int err;
187
188 KBASE_DEBUG_ASSERT(kbdev != NULL);
189 KBASE_DEBUG_ASSERT(gpu_props != NULL);
190
191 /* Dump relevant registers */
192 err = kbase_backend_gpuprops_get(kbdev, ®dump);
193 if (err)
194 return err;
195
196 gpu_props->raw_props.gpu_id = regdump.gpu_id;
197 gpu_props->raw_props.tiler_features = regdump.tiler_features;
198 gpu_props->raw_props.mem_features = regdump.mem_features;
199 gpu_props->raw_props.mmu_features = regdump.mmu_features;
200 gpu_props->raw_props.l2_features = regdump.l2_features;
201
202 gpu_props->raw_props.as_present = regdump.as_present;
203 gpu_props->raw_props.js_present = regdump.js_present;
204 gpu_props->raw_props.shader_present =
205 ((u64) regdump.shader_present_hi << 32) +
206 regdump.shader_present_lo;
207 gpu_props->raw_props.tiler_present =
208 ((u64) regdump.tiler_present_hi << 32) +
209 regdump.tiler_present_lo;
210 gpu_props->raw_props.l2_present =
211 ((u64) regdump.l2_present_hi << 32) +
212 regdump.l2_present_lo;
213 gpu_props->raw_props.stack_present =
214 ((u64) regdump.stack_present_hi << 32) +
215 regdump.stack_present_lo;
216
217 for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
218 gpu_props->raw_props.js_features[i] = regdump.js_features[i];
219
220 for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
221 gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
222
223 gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
224 gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
225 gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
226 gpu_props->raw_props.thread_features = regdump.thread_features;
227 gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
228
229 gpu_props->raw_props.gpu_features =
230 ((u64) regdump.gpu_features_hi << 32) +
231 regdump.gpu_features_lo;
232
233 return 0;
234 }
235
kbase_gpuprops_update_core_props_gpu_id(struct base_gpu_props * const gpu_props)236 void kbase_gpuprops_update_core_props_gpu_id(
237 struct base_gpu_props * const gpu_props)
238 {
239 gpu_props->core_props.version_status =
240 KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
241 gpu_props->core_props.minor_revision =
242 KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
243 gpu_props->core_props.major_revision =
244 KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
245 gpu_props->core_props.product_id =
246 KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
247 }
248
249 /**
250 * kbase_gpuprops_update_max_config_props - Updates the max config properties in
251 * the base_gpu_props.
252 * @base_props: The &struct base_gpu_props structure
253 * @kbdev: The &struct kbase_device structure for the device
254 *
255 * Updates the &struct base_gpu_props structure with the max config properties.
256 */
kbase_gpuprops_update_max_config_props(struct base_gpu_props * const base_props,struct kbase_device * kbdev)257 static void kbase_gpuprops_update_max_config_props(
258 struct base_gpu_props * const base_props, struct kbase_device *kbdev)
259 {
260 int l2_n = 0;
261
262 if (WARN_ON(!kbdev) || WARN_ON(!base_props))
263 return;
264
265 /* return if the max_config is not set during arbif initialization */
266 if (kbdev->gpu_props.max_config.core_mask == 0)
267 return;
268
269 /*
270 * Set the base_props with the maximum config values to ensure that the
271 * user space will always be based on the maximum resources available.
272 */
273 base_props->l2_props.num_l2_slices =
274 kbdev->gpu_props.max_config.l2_slices;
275 base_props->raw_props.shader_present =
276 kbdev->gpu_props.max_config.core_mask;
277 /*
278 * Update l2_present in the raw data to be consistent with the
279 * max_config.l2_slices number.
280 */
281 base_props->raw_props.l2_present = 0;
282 for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) {
283 base_props->raw_props.l2_present <<= 1;
284 base_props->raw_props.l2_present |= 0x1;
285 }
286 /*
287 * Update the coherency_info data using just one core group. For
288 * architectures where the max_config is provided by the arbiter it is
289 * not necessary to split the shader core groups in different coherent
290 * groups.
291 */
292 base_props->coherency_info.coherency =
293 base_props->raw_props.mem_features;
294 base_props->coherency_info.num_core_groups = 1;
295 base_props->coherency_info.num_groups = 1;
296 base_props->coherency_info.group[0].core_mask =
297 kbdev->gpu_props.max_config.core_mask;
298 base_props->coherency_info.group[0].num_cores =
299 hweight32(kbdev->gpu_props.max_config.core_mask);
300 }
301
302 /**
303 * kbase_gpuprops_calculate_props - Calculate the derived properties
304 * @gpu_props: The &struct base_gpu_props structure
305 * @kbdev: The &struct kbase_device structure for the device
306 *
307 * Fill the &struct base_gpu_props structure with values derived from the GPU
308 * configuration registers
309 */
kbase_gpuprops_calculate_props(struct base_gpu_props * const gpu_props,struct kbase_device * kbdev)310 static void kbase_gpuprops_calculate_props(
311 struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
312 {
313 int i;
314
315 /* Populate the base_gpu_props structure */
316 kbase_gpuprops_update_core_props_gpu_id(gpu_props);
317 gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
318 #if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE
319 gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
320 #else
321 gpu_props->core_props.gpu_available_memory_size =
322 totalram_pages() << PAGE_SHIFT;
323 #endif
324
325 for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
326 gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
327
328 gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
329 gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
330
331 /* Field with number of l2 slices is added to MEM_FEATURES register
332 * since t76x. Below code assumes that for older GPU reserved bits will
333 * be read as zero.
334 */
335 gpu_props->l2_props.num_l2_slices =
336 KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
337
338 gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
339 gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
340
341 if (gpu_props->raw_props.thread_max_threads == 0)
342 gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
343 else
344 gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
345
346 if (gpu_props->raw_props.thread_max_workgroup_size == 0)
347 gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
348 else
349 gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
350
351 if (gpu_props->raw_props.thread_max_barrier_size == 0)
352 gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
353 else
354 gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
355
356 if (gpu_props->raw_props.thread_tls_alloc == 0)
357 gpu_props->thread_props.tls_alloc =
358 gpu_props->thread_props.max_threads;
359 else
360 gpu_props->thread_props.tls_alloc =
361 gpu_props->raw_props.thread_tls_alloc;
362
363 #if MALI_USE_CSF
364 gpu_props->thread_props.max_registers =
365 KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
366 gpu_props->thread_props.impl_tech =
367 KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
368 gpu_props->thread_props.max_task_queue =
369 KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
370 gpu_props->thread_props.max_thread_group_split = 0;
371 #else
372 gpu_props->thread_props.max_registers =
373 KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
374 gpu_props->thread_props.max_task_queue =
375 KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
376 gpu_props->thread_props.max_thread_group_split =
377 KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
378 gpu_props->thread_props.impl_tech =
379 KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
380 #endif
381
382 /* If values are not specified, then use defaults */
383 if (gpu_props->thread_props.max_registers == 0) {
384 gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
385 gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
386 gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
387 }
388
389 /*
390 * If the maximum resources allocated information is available it is
391 * necessary to update the base_gpu_props with the max_config info to
392 * the userspace. This is applicable to systems that receive this
393 * information from the arbiter.
394 */
395 if (kbdev->gpu_props.max_config.core_mask)
396 /* Update the max config properties in the base_gpu_props */
397 kbase_gpuprops_update_max_config_props(gpu_props,
398 kbdev);
399 else
400 /* Initialize the coherent_group structure for each group */
401 kbase_gpuprops_construct_coherent_groups(gpu_props);
402 }
403
kbase_gpuprops_set_max_config(struct kbase_device * kbdev,const struct max_config_props * max_config)404 void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
405 const struct max_config_props *max_config)
406 {
407 if (WARN_ON(!kbdev) || WARN_ON(!max_config))
408 return;
409
410 kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices;
411 kbdev->gpu_props.max_config.core_mask = max_config->core_mask;
412 }
413
kbase_gpuprops_set(struct kbase_device * kbdev)414 void kbase_gpuprops_set(struct kbase_device *kbdev)
415 {
416 struct kbase_gpu_props *gpu_props;
417 struct gpu_raw_gpu_props *raw;
418
419 if (WARN_ON(!kbdev))
420 return;
421 gpu_props = &kbdev->gpu_props;
422 raw = &gpu_props->props.raw_props;
423
424 /* Initialize the base_gpu_props structure from the hardware */
425 kbase_gpuprops_get_props(&gpu_props->props, kbdev);
426
427 /* Populate the derived properties */
428 kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
429
430 /* Populate kbase-only fields */
431 gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
432 gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
433
434 gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
435
436 gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
437 gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
438
439 gpu_props->num_cores = hweight64(raw->shader_present);
440 gpu_props->num_core_groups =
441 gpu_props->props.coherency_info.num_core_groups;
442 gpu_props->num_address_spaces = hweight32(raw->as_present);
443 gpu_props->num_job_slots = hweight32(raw->js_present);
444
445 /*
446 * Current configuration is used on HW interactions so that the maximum
447 * config is just used for user space avoiding interactions with parts
448 * of the hardware that might not be allocated to the kbase instance at
449 * that moment.
450 */
451 kbase_gpuprops_req_curr_config_update(kbdev);
452 kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config);
453 }
454
kbase_gpuprops_set_features(struct kbase_device * kbdev)455 int kbase_gpuprops_set_features(struct kbase_device *kbdev)
456 {
457 struct base_gpu_props *gpu_props;
458 struct kbase_gpuprops_regdump regdump;
459 int err;
460
461 gpu_props = &kbdev->gpu_props.props;
462
463 /* Dump relevant registers */
464 err = kbase_backend_gpuprops_get_features(kbdev, ®dump);
465 if (err)
466 return err;
467
468 /*
469 * Copy the raw value from the register, later this will get turned
470 * into the selected coherency mode.
471 * Additionally, add non-coherent mode, as this is always supported.
472 */
473 gpu_props->raw_props.coherency_mode = regdump.coherency_features |
474 COHERENCY_FEATURE_BIT(COHERENCY_NONE);
475
476 if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
477 gpu_props->thread_props.max_thread_group_split = 0;
478
479 /*
480 * The CORE_FEATURES register has different meanings depending on GPU.
481 * On tGOx, bits[3:0] encode num_exec_engines.
482 * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed,
483 * instead.
484 * GPUs like tTIx have additional fields like LSC_SIZE that are
485 * otherwise reserved/RAZ on older GPUs.
486 */
487 gpu_props->raw_props.core_features = regdump.core_features;
488
489 #if !MALI_USE_CSF
490 gpu_props->core_props.num_exec_engines =
491 KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
492 #endif
493
494 return err;
495 }
496
497 /*
498 * Module parameters to allow the L2 size and hash configuration to be
499 * overridden.
500 *
501 * These parameters must be set on insmod to take effect, and are not visible
502 * in sysfs.
503 */
504 static u8 override_l2_size;
505 module_param(override_l2_size, byte, 0000);
506 MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing");
507
508 static u8 override_l2_hash;
509 module_param(override_l2_hash, byte, 0000);
510 MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
511
512 static u32 l2_hash_values[ASN_HASH_COUNT] = {
513 0,
514 };
515 static unsigned int num_override_l2_hash_values;
516 module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
517 MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
518
519 /* Definitions for range of supported user defined hash functions for GPUs
520 * that support L2_CONFIG and not ASN_HASH features. Supported hash function
521 * range from 0b1000-0b1111 inclusive. Selection of any other values will
522 * lead to undefined behavior.
523 */
524 #define USER_DEFINED_HASH_LO ((u8)0x08)
525 #define USER_DEFINED_HASH_HI ((u8)0x0F)
526
527 enum l2_config_override_result {
528 L2_CONFIG_OVERRIDE_FAIL = -1,
529 L2_CONFIG_OVERRIDE_NONE,
530 L2_CONFIG_OVERRIDE_OK,
531 };
532
533 /**
534 * kbase_read_l2_config_from_dt - Read L2 configuration
535 * @kbdev: The kbase device for which to get the L2 configuration.
536 *
537 * Check for L2 configuration overrides in module parameters and device tree.
538 * Override values in module parameters take priority over override values in
539 * device tree.
540 *
541 * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly
542 * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided.
543 * L2_CONFIG_OVERRIDE_FAIL otherwise.
544 */
545 static enum l2_config_override_result
kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)546 kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
547 {
548 struct device_node *np = kbdev->dev->of_node;
549
550 if (!np)
551 return L2_CONFIG_OVERRIDE_NONE;
552
553 if (override_l2_size)
554 kbdev->l2_size_override = override_l2_size;
555 else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
556 kbdev->l2_size_override = 0;
557
558 /* Check overriding value is supported, if not will result in
559 * undefined behavior.
560 */
561 if (override_l2_hash >= USER_DEFINED_HASH_LO &&
562 override_l2_hash <= USER_DEFINED_HASH_HI)
563 kbdev->l2_hash_override = override_l2_hash;
564 else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override))
565 kbdev->l2_hash_override = 0;
566
567 kbdev->l2_hash_values_override = false;
568 if (num_override_l2_hash_values) {
569 unsigned int i;
570
571 kbdev->l2_hash_values_override = true;
572 for (i = 0; i < num_override_l2_hash_values; i++)
573 kbdev->l2_hash_values[i] = l2_hash_values[i];
574 } else if (!of_property_read_u32_array(np, "l2-hash-values",
575 kbdev->l2_hash_values,
576 ASN_HASH_COUNT))
577 kbdev->l2_hash_values_override = true;
578
579 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
580 (kbdev->l2_hash_override)) {
581 dev_err(kbdev->dev, "l2-hash not supported\n");
582 return L2_CONFIG_OVERRIDE_FAIL;
583 }
584
585 if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
586 (kbdev->l2_hash_values_override)) {
587 dev_err(kbdev->dev, "l2-hash-values not supported\n");
588 return L2_CONFIG_OVERRIDE_FAIL;
589 }
590
591 if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) {
592 dev_err(kbdev->dev,
593 "both l2-hash & l2-hash-values not supported\n");
594 return L2_CONFIG_OVERRIDE_FAIL;
595 }
596
597 if (kbdev->l2_size_override || kbdev->l2_hash_override ||
598 kbdev->l2_hash_values_override)
599 return L2_CONFIG_OVERRIDE_OK;
600
601 return L2_CONFIG_OVERRIDE_NONE;
602 }
603
kbase_gpuprops_update_l2_features(struct kbase_device * kbdev)604 int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
605 {
606 int err = 0;
607
608 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
609 struct kbase_gpuprops_regdump regdump;
610 struct base_gpu_props *gpu_props = &kbdev->gpu_props.props;
611
612 /* Check for L2 cache size & hash overrides */
613 switch (kbase_read_l2_config_from_dt(kbdev)) {
614 case L2_CONFIG_OVERRIDE_FAIL:
615 err = -EIO;
616 goto exit;
617 case L2_CONFIG_OVERRIDE_NONE:
618 goto exit;
619 default:
620 break;
621 }
622
623 /* pm.active_count is expected to be 1 here, which is set in
624 * kbase_hwaccess_pm_powerup().
625 */
626 WARN_ON(kbdev->pm.active_count != 1);
627 /* The new settings for L2 cache can only be applied when it is
628 * off, so first do the power down.
629 */
630 kbase_pm_context_idle(kbdev);
631 kbase_pm_wait_for_desired_state(kbdev);
632
633 /* Need L2 to get powered to reflect to L2_FEATURES */
634 kbase_pm_context_active(kbdev);
635
636 /* Wait for the completion of L2 power transition */
637 kbase_pm_wait_for_l2_powered(kbdev);
638
639 /* Dump L2_FEATURES register */
640 err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump);
641 if (err)
642 goto exit;
643
644 dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
645 regdump.l2_features);
646 dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n",
647 regdump.l2_config);
648
649 if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) {
650 int idx;
651 const bool asn_he = regdump.l2_config &
652 L2_CONFIG_ASN_HASH_ENABLE_MASK;
653 #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
654 if (!asn_he && kbdev->l2_hash_values_override)
655 dev_err(kbdev->dev,
656 "Failed to use requested ASN_HASH, fallback to default");
657 #endif
658 for (idx = 0; idx < ASN_HASH_COUNT; idx++)
659 dev_info(kbdev->dev,
660 "%s ASN_HASH[%d] is [0x%08x]\n",
661 asn_he ? "Overridden" : "Default", idx,
662 regdump.l2_asn_hash[idx]);
663 }
664
665 /* Update gpuprops with reflected L2_FEATURES */
666 gpu_props->raw_props.l2_features = regdump.l2_features;
667 gpu_props->l2_props.log2_cache_size =
668 KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
669 }
670
671 exit:
672 return err;
673 }
674
675 static struct {
676 u32 type;
677 size_t offset;
678 int size;
679 } gpu_property_mapping[] = {
680 #define PROP(name, member) \
681 {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
682 sizeof(((struct base_gpu_props *)0)->member)}
683 PROP(PRODUCT_ID, core_props.product_id),
684 PROP(VERSION_STATUS, core_props.version_status),
685 PROP(MINOR_REVISION, core_props.minor_revision),
686 PROP(MAJOR_REVISION, core_props.major_revision),
687 PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
688 PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
689 PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
690 PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
691 PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
692 PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]),
693 PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
694
695 #if MALI_USE_CSF
696 #define BACKWARDS_COMPAT_PROP(name, type) \
697 { \
698 KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \
699 }
700 BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8),
701 #else
702 PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
703 #endif
704
705 PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
706 PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
707 PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
708
709 PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
710 PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
711
712 PROP(MAX_THREADS, thread_props.max_threads),
713 PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
714 PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
715 PROP(MAX_REGISTERS, thread_props.max_registers),
716 PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
717 PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
718 PROP(IMPL_TECH, thread_props.impl_tech),
719 PROP(TLS_ALLOC, thread_props.tls_alloc),
720
721 PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
722 PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
723 PROP(RAW_L2_PRESENT, raw_props.l2_present),
724 PROP(RAW_STACK_PRESENT, raw_props.stack_present),
725 PROP(RAW_L2_FEATURES, raw_props.l2_features),
726 PROP(RAW_CORE_FEATURES, raw_props.core_features),
727 PROP(RAW_MEM_FEATURES, raw_props.mem_features),
728 PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
729 PROP(RAW_AS_PRESENT, raw_props.as_present),
730 PROP(RAW_JS_PRESENT, raw_props.js_present),
731 PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
732 PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
733 PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
734 PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
735 PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
736 PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
737 PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
738 PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
739 PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
740 PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
741 PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
742 PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
743 PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
744 PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
745 PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
746 PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
747 PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
748 PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
749 PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
750 PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
751 PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]),
752 PROP(RAW_GPU_ID, raw_props.gpu_id),
753 PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
754 PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size),
755 PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
756 PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
757 PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
758 PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc),
759 PROP(RAW_GPU_FEATURES, raw_props.gpu_features),
760 PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
761 PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
762 PROP(COHERENCY_COHERENCY, coherency_info.coherency),
763 PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
764 PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
765 PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
766 PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
767 PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
768 PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
769 PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
770 PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
771 PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
772 PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
773 PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
774 PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
775 PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
776 PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
777 PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
778 PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
779
780 #undef PROP
781 };
782
kbase_gpuprops_populate_user_buffer(struct kbase_device * kbdev)783 int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
784 {
785 struct kbase_gpu_props *kprops = &kbdev->gpu_props;
786 struct base_gpu_props *props = &kprops->props;
787 u32 count = ARRAY_SIZE(gpu_property_mapping);
788 u32 i;
789 u32 size = 0;
790 u8 *p;
791
792 for (i = 0; i < count; i++) {
793 /* 4 bytes for the ID, and the size of the property */
794 size += 4 + gpu_property_mapping[i].size;
795 }
796
797 kprops->prop_buffer_size = size;
798 kprops->prop_buffer = kzalloc(size, GFP_KERNEL);
799
800 if (!kprops->prop_buffer) {
801 kprops->prop_buffer_size = 0;
802 return -ENOMEM;
803 }
804
805 p = kprops->prop_buffer;
806
807 #define WRITE_U8(v) (*p++ = (v) & 0xFF)
808 #define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
809 #define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
810 #define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
811
812 for (i = 0; i < count; i++) {
813 u32 type = gpu_property_mapping[i].type;
814 u8 type_size;
815 const size_t offset = gpu_property_mapping[i].offset;
816 const u64 dummy_backwards_compat_value = (u64)0;
817 const void *field;
818
819 if (likely(offset < sizeof(struct base_gpu_props)))
820 field = ((const u8 *)props) + offset;
821 else
822 field = &dummy_backwards_compat_value;
823
824 switch (gpu_property_mapping[i].size) {
825 case 1:
826 type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
827 break;
828 case 2:
829 type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
830 break;
831 case 4:
832 type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
833 break;
834 case 8:
835 type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
836 break;
837 default:
838 dev_err(kbdev->dev,
839 "Invalid gpu_property_mapping type=%d size=%d",
840 type, gpu_property_mapping[i].size);
841 return -EINVAL;
842 }
843
844 WRITE_U32((type<<2) | type_size);
845
846 switch (type_size) {
847 case KBASE_GPUPROP_VALUE_SIZE_U8:
848 WRITE_U8(*((const u8 *)field));
849 break;
850 case KBASE_GPUPROP_VALUE_SIZE_U16:
851 WRITE_U16(*((const u16 *)field));
852 break;
853 case KBASE_GPUPROP_VALUE_SIZE_U32:
854 WRITE_U32(*((const u32 *)field));
855 break;
856 case KBASE_GPUPROP_VALUE_SIZE_U64:
857 WRITE_U64(*((const u64 *)field));
858 break;
859 default: /* Cannot be reached */
860 WARN_ON(1);
861 return -EINVAL;
862 }
863 }
864
865 return 0;
866 }
867
kbase_gpuprops_free_user_buffer(struct kbase_device * kbdev)868 void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev)
869 {
870 kfree(kbdev->gpu_props.prop_buffer);
871 }
872
kbase_device_populate_max_freq(struct kbase_device * kbdev)873 int kbase_device_populate_max_freq(struct kbase_device *kbdev)
874 {
875 struct mali_base_gpu_core_props *core_props;
876
877 /* obtain max configured gpu frequency, if devfreq is enabled then
878 * this will be overridden by the highest operating point found
879 */
880 core_props = &(kbdev->gpu_props.props.core_props);
881 #ifdef GPU_FREQ_KHZ_MAX
882 core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
883 #else
884 core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX;
885 #endif
886
887 return 0;
888 }
889