1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * This program is free software and is provided to you under the terms of the
7*4882a593Smuzhiyun * GNU General Public License version 2 as published by the Free Software
8*4882a593Smuzhiyun * Foundation, and any use by you of this program is subject to the terms
9*4882a593Smuzhiyun * of such GNU license.
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * This program is distributed in the hope that it will be useful,
12*4882a593Smuzhiyun * but WITHOUT ANY WARRANTY; without even the implied warranty of
13*4882a593Smuzhiyun * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*4882a593Smuzhiyun * GNU General Public License for more details.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * You should have received a copy of the GNU General Public License
17*4882a593Smuzhiyun * along with this program; if not, you can access it online at
18*4882a593Smuzhiyun * http://www.gnu.org/licenses/gpl-2.0.html.
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun */
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /*
23*4882a593Smuzhiyun * Base kernel property query APIs
24*4882a593Smuzhiyun */
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun #include <mali_kbase.h>
27*4882a593Smuzhiyun #include <gpu/mali_kbase_gpu_regmap.h>
28*4882a593Smuzhiyun #include <mali_kbase_gpuprops.h>
29*4882a593Smuzhiyun #include <mali_kbase_hwaccess_gpuprops.h>
30*4882a593Smuzhiyun #include <mali_kbase_config_defaults.h>
31*4882a593Smuzhiyun #include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
32*4882a593Smuzhiyun #include <linux/clk.h>
33*4882a593Smuzhiyun #include <backend/gpu/mali_kbase_pm_internal.h>
34*4882a593Smuzhiyun #include <linux/of_platform.h>
35*4882a593Smuzhiyun #include <linux/moduleparam.h>
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun
kbase_gpuprops_construct_coherent_groups(struct base_gpu_props * const props)38*4882a593Smuzhiyun static void kbase_gpuprops_construct_coherent_groups(
39*4882a593Smuzhiyun struct base_gpu_props * const props)
40*4882a593Smuzhiyun {
41*4882a593Smuzhiyun struct mali_base_gpu_coherent_group *current_group;
42*4882a593Smuzhiyun u64 group_present;
43*4882a593Smuzhiyun u64 group_mask;
44*4882a593Smuzhiyun u64 first_set, first_set_prev;
45*4882a593Smuzhiyun u32 num_groups = 0;
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(props != NULL);
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun props->coherency_info.coherency = props->raw_props.mem_features;
50*4882a593Smuzhiyun props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
53*4882a593Smuzhiyun /* Group is l2 coherent */
54*4882a593Smuzhiyun group_present = props->raw_props.l2_present;
55*4882a593Smuzhiyun } else {
56*4882a593Smuzhiyun /* Group is l1 coherent */
57*4882a593Smuzhiyun group_present = props->raw_props.shader_present;
58*4882a593Smuzhiyun }
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun /*
61*4882a593Smuzhiyun * The coherent group mask can be computed from the l2 present
62*4882a593Smuzhiyun * register.
63*4882a593Smuzhiyun *
64*4882a593Smuzhiyun * For the coherent group n:
65*4882a593Smuzhiyun * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
66*4882a593Smuzhiyun * where first_set is group_present with only its nth set-bit kept
67*4882a593Smuzhiyun * (i.e. the position from where a new group starts).
68*4882a593Smuzhiyun *
69*4882a593Smuzhiyun * For instance if the groups are l2 coherent and l2_present=0x0..01111:
70*4882a593Smuzhiyun * The first mask is:
71*4882a593Smuzhiyun * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
72*4882a593Smuzhiyun * = (0x0..010 - 1) & ~(0x0..01 - 1)
73*4882a593Smuzhiyun * = 0x0..00f
74*4882a593Smuzhiyun * The second mask is:
75*4882a593Smuzhiyun * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
76*4882a593Smuzhiyun * = (0x0..100 - 1) & ~(0x0..010 - 1)
77*4882a593Smuzhiyun * = 0x0..0f0
78*4882a593Smuzhiyun * And so on until all the bits from group_present have been cleared
79*4882a593Smuzhiyun * (i.e. there is no group left).
80*4882a593Smuzhiyun */
81*4882a593Smuzhiyun
82*4882a593Smuzhiyun current_group = props->coherency_info.group;
83*4882a593Smuzhiyun first_set = group_present & ~(group_present - 1);
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
86*4882a593Smuzhiyun group_present -= first_set; /* Clear the current group bit */
87*4882a593Smuzhiyun first_set_prev = first_set;
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun first_set = group_present & ~(group_present - 1);
90*4882a593Smuzhiyun group_mask = (first_set - 1) & ~(first_set_prev - 1);
91*4882a593Smuzhiyun
92*4882a593Smuzhiyun /* Populate the coherent_group structure for each group */
93*4882a593Smuzhiyun current_group->core_mask = group_mask & props->raw_props.shader_present;
94*4882a593Smuzhiyun current_group->num_cores = hweight64(current_group->core_mask);
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun num_groups++;
97*4882a593Smuzhiyun current_group++;
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
100*4882a593Smuzhiyun if (group_present != 0)
101*4882a593Smuzhiyun pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun props->coherency_info.num_groups = num_groups;
104*4882a593Smuzhiyun }
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun /**
107*4882a593Smuzhiyun * kbase_gpuprops_get_curr_config_props - Get the current allocated resources
108*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
109*4882a593Smuzhiyun * @curr_config: The &struct curr_config_props structure to receive the result
110*4882a593Smuzhiyun *
111*4882a593Smuzhiyun * Fill the &struct curr_config_props structure with values from the GPU
112*4882a593Smuzhiyun * configuration registers.
113*4882a593Smuzhiyun *
114*4882a593Smuzhiyun * Return: Zero on success, Linux error code on failure
115*4882a593Smuzhiyun */
kbase_gpuprops_get_curr_config_props(struct kbase_device * kbdev,struct curr_config_props * const curr_config)116*4882a593Smuzhiyun int kbase_gpuprops_get_curr_config_props(struct kbase_device *kbdev,
117*4882a593Smuzhiyun struct curr_config_props * const curr_config)
118*4882a593Smuzhiyun {
119*4882a593Smuzhiyun struct kbase_current_config_regdump curr_config_regdump;
120*4882a593Smuzhiyun int err;
121*4882a593Smuzhiyun
122*4882a593Smuzhiyun if (WARN_ON(!kbdev) || WARN_ON(!curr_config))
123*4882a593Smuzhiyun return -EINVAL;
124*4882a593Smuzhiyun
125*4882a593Smuzhiyun /* If update not needed just return. */
126*4882a593Smuzhiyun if (!curr_config->update_needed)
127*4882a593Smuzhiyun return 0;
128*4882a593Smuzhiyun
129*4882a593Smuzhiyun /* Dump relevant registers */
130*4882a593Smuzhiyun err = kbase_backend_gpuprops_get_curr_config(kbdev,
131*4882a593Smuzhiyun &curr_config_regdump);
132*4882a593Smuzhiyun if (err)
133*4882a593Smuzhiyun return err;
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun curr_config->l2_slices =
136*4882a593Smuzhiyun KBASE_UBFX32(curr_config_regdump.mem_features, 8U, 4) + 1;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun curr_config->l2_present =
139*4882a593Smuzhiyun ((u64) curr_config_regdump.l2_present_hi << 32) +
140*4882a593Smuzhiyun curr_config_regdump.l2_present_lo;
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun curr_config->shader_present =
143*4882a593Smuzhiyun ((u64) curr_config_regdump.shader_present_hi << 32) +
144*4882a593Smuzhiyun curr_config_regdump.shader_present_lo;
145*4882a593Smuzhiyun
146*4882a593Smuzhiyun curr_config->num_cores = hweight64(curr_config->shader_present);
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun curr_config->update_needed = false;
149*4882a593Smuzhiyun
150*4882a593Smuzhiyun return 0;
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun /**
154*4882a593Smuzhiyun * kbase_gpuprops_req_curr_config_update - Request Current Config Update
155*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
156*4882a593Smuzhiyun *
157*4882a593Smuzhiyun * Requests the current configuration to be updated next time the
158*4882a593Smuzhiyun * kbase_gpuprops_get_curr_config_props() is called.
159*4882a593Smuzhiyun *
160*4882a593Smuzhiyun * Return: Zero on success, Linux error code on failure
161*4882a593Smuzhiyun */
kbase_gpuprops_req_curr_config_update(struct kbase_device * kbdev)162*4882a593Smuzhiyun int kbase_gpuprops_req_curr_config_update(struct kbase_device *kbdev)
163*4882a593Smuzhiyun {
164*4882a593Smuzhiyun if (WARN_ON(!kbdev))
165*4882a593Smuzhiyun return -EINVAL;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun kbdev->gpu_props.curr_config.update_needed = true;
168*4882a593Smuzhiyun return 0;
169*4882a593Smuzhiyun }
170*4882a593Smuzhiyun
171*4882a593Smuzhiyun /**
172*4882a593Smuzhiyun * kbase_gpuprops_get_props - Get the GPU configuration
173*4882a593Smuzhiyun * @gpu_props: The &struct base_gpu_props structure
174*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
175*4882a593Smuzhiyun *
176*4882a593Smuzhiyun * Fill the &struct base_gpu_props structure with values from the GPU
177*4882a593Smuzhiyun * configuration registers. Only the raw properties are filled in this function.
178*4882a593Smuzhiyun *
179*4882a593Smuzhiyun * Return: Zero on success, Linux error code on failure
180*4882a593Smuzhiyun */
kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,struct kbase_device * kbdev)181*4882a593Smuzhiyun static int kbase_gpuprops_get_props(struct base_gpu_props * const gpu_props,
182*4882a593Smuzhiyun struct kbase_device *kbdev)
183*4882a593Smuzhiyun {
184*4882a593Smuzhiyun struct kbase_gpuprops_regdump regdump;
185*4882a593Smuzhiyun int i;
186*4882a593Smuzhiyun int err;
187*4882a593Smuzhiyun
188*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kbdev != NULL);
189*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(gpu_props != NULL);
190*4882a593Smuzhiyun
191*4882a593Smuzhiyun /* Dump relevant registers */
192*4882a593Smuzhiyun err = kbase_backend_gpuprops_get(kbdev, ®dump);
193*4882a593Smuzhiyun if (err)
194*4882a593Smuzhiyun return err;
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun gpu_props->raw_props.gpu_id = regdump.gpu_id;
197*4882a593Smuzhiyun gpu_props->raw_props.tiler_features = regdump.tiler_features;
198*4882a593Smuzhiyun gpu_props->raw_props.mem_features = regdump.mem_features;
199*4882a593Smuzhiyun gpu_props->raw_props.mmu_features = regdump.mmu_features;
200*4882a593Smuzhiyun gpu_props->raw_props.l2_features = regdump.l2_features;
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun gpu_props->raw_props.as_present = regdump.as_present;
203*4882a593Smuzhiyun gpu_props->raw_props.js_present = regdump.js_present;
204*4882a593Smuzhiyun gpu_props->raw_props.shader_present =
205*4882a593Smuzhiyun ((u64) regdump.shader_present_hi << 32) +
206*4882a593Smuzhiyun regdump.shader_present_lo;
207*4882a593Smuzhiyun gpu_props->raw_props.tiler_present =
208*4882a593Smuzhiyun ((u64) regdump.tiler_present_hi << 32) +
209*4882a593Smuzhiyun regdump.tiler_present_lo;
210*4882a593Smuzhiyun gpu_props->raw_props.l2_present =
211*4882a593Smuzhiyun ((u64) regdump.l2_present_hi << 32) +
212*4882a593Smuzhiyun regdump.l2_present_lo;
213*4882a593Smuzhiyun gpu_props->raw_props.stack_present =
214*4882a593Smuzhiyun ((u64) regdump.stack_present_hi << 32) +
215*4882a593Smuzhiyun regdump.stack_present_lo;
216*4882a593Smuzhiyun
217*4882a593Smuzhiyun for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
218*4882a593Smuzhiyun gpu_props->raw_props.js_features[i] = regdump.js_features[i];
219*4882a593Smuzhiyun
220*4882a593Smuzhiyun for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
221*4882a593Smuzhiyun gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
224*4882a593Smuzhiyun gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
225*4882a593Smuzhiyun gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
226*4882a593Smuzhiyun gpu_props->raw_props.thread_features = regdump.thread_features;
227*4882a593Smuzhiyun gpu_props->raw_props.thread_tls_alloc = regdump.thread_tls_alloc;
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun gpu_props->raw_props.gpu_features =
230*4882a593Smuzhiyun ((u64) regdump.gpu_features_hi << 32) +
231*4882a593Smuzhiyun regdump.gpu_features_lo;
232*4882a593Smuzhiyun
233*4882a593Smuzhiyun return 0;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
kbase_gpuprops_update_core_props_gpu_id(struct base_gpu_props * const gpu_props)236*4882a593Smuzhiyun void kbase_gpuprops_update_core_props_gpu_id(
237*4882a593Smuzhiyun struct base_gpu_props * const gpu_props)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun gpu_props->core_props.version_status =
240*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
241*4882a593Smuzhiyun gpu_props->core_props.minor_revision =
242*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
243*4882a593Smuzhiyun gpu_props->core_props.major_revision =
244*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
245*4882a593Smuzhiyun gpu_props->core_props.product_id =
246*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
247*4882a593Smuzhiyun }
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun /**
250*4882a593Smuzhiyun * kbase_gpuprops_update_max_config_props - Updates the max config properties in
251*4882a593Smuzhiyun * the base_gpu_props.
252*4882a593Smuzhiyun * @base_props: The &struct base_gpu_props structure
253*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
254*4882a593Smuzhiyun *
255*4882a593Smuzhiyun * Updates the &struct base_gpu_props structure with the max config properties.
256*4882a593Smuzhiyun */
kbase_gpuprops_update_max_config_props(struct base_gpu_props * const base_props,struct kbase_device * kbdev)257*4882a593Smuzhiyun static void kbase_gpuprops_update_max_config_props(
258*4882a593Smuzhiyun struct base_gpu_props * const base_props, struct kbase_device *kbdev)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun int l2_n = 0;
261*4882a593Smuzhiyun
262*4882a593Smuzhiyun if (WARN_ON(!kbdev) || WARN_ON(!base_props))
263*4882a593Smuzhiyun return;
264*4882a593Smuzhiyun
265*4882a593Smuzhiyun /* return if the max_config is not set during arbif initialization */
266*4882a593Smuzhiyun if (kbdev->gpu_props.max_config.core_mask == 0)
267*4882a593Smuzhiyun return;
268*4882a593Smuzhiyun
269*4882a593Smuzhiyun /*
270*4882a593Smuzhiyun * Set the base_props with the maximum config values to ensure that the
271*4882a593Smuzhiyun * user space will always be based on the maximum resources available.
272*4882a593Smuzhiyun */
273*4882a593Smuzhiyun base_props->l2_props.num_l2_slices =
274*4882a593Smuzhiyun kbdev->gpu_props.max_config.l2_slices;
275*4882a593Smuzhiyun base_props->raw_props.shader_present =
276*4882a593Smuzhiyun kbdev->gpu_props.max_config.core_mask;
277*4882a593Smuzhiyun /*
278*4882a593Smuzhiyun * Update l2_present in the raw data to be consistent with the
279*4882a593Smuzhiyun * max_config.l2_slices number.
280*4882a593Smuzhiyun */
281*4882a593Smuzhiyun base_props->raw_props.l2_present = 0;
282*4882a593Smuzhiyun for (l2_n = 0; l2_n < base_props->l2_props.num_l2_slices; l2_n++) {
283*4882a593Smuzhiyun base_props->raw_props.l2_present <<= 1;
284*4882a593Smuzhiyun base_props->raw_props.l2_present |= 0x1;
285*4882a593Smuzhiyun }
286*4882a593Smuzhiyun /*
287*4882a593Smuzhiyun * Update the coherency_info data using just one core group. For
288*4882a593Smuzhiyun * architectures where the max_config is provided by the arbiter it is
289*4882a593Smuzhiyun * not necessary to split the shader core groups in different coherent
290*4882a593Smuzhiyun * groups.
291*4882a593Smuzhiyun */
292*4882a593Smuzhiyun base_props->coherency_info.coherency =
293*4882a593Smuzhiyun base_props->raw_props.mem_features;
294*4882a593Smuzhiyun base_props->coherency_info.num_core_groups = 1;
295*4882a593Smuzhiyun base_props->coherency_info.num_groups = 1;
296*4882a593Smuzhiyun base_props->coherency_info.group[0].core_mask =
297*4882a593Smuzhiyun kbdev->gpu_props.max_config.core_mask;
298*4882a593Smuzhiyun base_props->coherency_info.group[0].num_cores =
299*4882a593Smuzhiyun hweight32(kbdev->gpu_props.max_config.core_mask);
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
302*4882a593Smuzhiyun /**
303*4882a593Smuzhiyun * kbase_gpuprops_calculate_props - Calculate the derived properties
304*4882a593Smuzhiyun * @gpu_props: The &struct base_gpu_props structure
305*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
306*4882a593Smuzhiyun *
307*4882a593Smuzhiyun * Fill the &struct base_gpu_props structure with values derived from the GPU
308*4882a593Smuzhiyun * configuration registers
309*4882a593Smuzhiyun */
kbase_gpuprops_calculate_props(struct base_gpu_props * const gpu_props,struct kbase_device * kbdev)310*4882a593Smuzhiyun static void kbase_gpuprops_calculate_props(
311*4882a593Smuzhiyun struct base_gpu_props * const gpu_props, struct kbase_device *kbdev)
312*4882a593Smuzhiyun {
313*4882a593Smuzhiyun int i;
314*4882a593Smuzhiyun
315*4882a593Smuzhiyun /* Populate the base_gpu_props structure */
316*4882a593Smuzhiyun kbase_gpuprops_update_core_props_gpu_id(gpu_props);
317*4882a593Smuzhiyun gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
318*4882a593Smuzhiyun #if KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE
319*4882a593Smuzhiyun gpu_props->core_props.gpu_available_memory_size = totalram_pages << PAGE_SHIFT;
320*4882a593Smuzhiyun #else
321*4882a593Smuzhiyun gpu_props->core_props.gpu_available_memory_size =
322*4882a593Smuzhiyun totalram_pages() << PAGE_SHIFT;
323*4882a593Smuzhiyun #endif
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
326*4882a593Smuzhiyun gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
329*4882a593Smuzhiyun gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun /* Field with number of l2 slices is added to MEM_FEATURES register
332*4882a593Smuzhiyun * since t76x. Below code assumes that for older GPU reserved bits will
333*4882a593Smuzhiyun * be read as zero.
334*4882a593Smuzhiyun */
335*4882a593Smuzhiyun gpu_props->l2_props.num_l2_slices =
336*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
337*4882a593Smuzhiyun
338*4882a593Smuzhiyun gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
339*4882a593Smuzhiyun gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun if (gpu_props->raw_props.thread_max_threads == 0)
342*4882a593Smuzhiyun gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
343*4882a593Smuzhiyun else
344*4882a593Smuzhiyun gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun if (gpu_props->raw_props.thread_max_workgroup_size == 0)
347*4882a593Smuzhiyun gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
348*4882a593Smuzhiyun else
349*4882a593Smuzhiyun gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun if (gpu_props->raw_props.thread_max_barrier_size == 0)
352*4882a593Smuzhiyun gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
353*4882a593Smuzhiyun else
354*4882a593Smuzhiyun gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun if (gpu_props->raw_props.thread_tls_alloc == 0)
357*4882a593Smuzhiyun gpu_props->thread_props.tls_alloc =
358*4882a593Smuzhiyun gpu_props->thread_props.max_threads;
359*4882a593Smuzhiyun else
360*4882a593Smuzhiyun gpu_props->thread_props.tls_alloc =
361*4882a593Smuzhiyun gpu_props->raw_props.thread_tls_alloc;
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun #if MALI_USE_CSF
364*4882a593Smuzhiyun gpu_props->thread_props.max_registers =
365*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 22);
366*4882a593Smuzhiyun gpu_props->thread_props.impl_tech =
367*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 22U, 2);
368*4882a593Smuzhiyun gpu_props->thread_props.max_task_queue =
369*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 8);
370*4882a593Smuzhiyun gpu_props->thread_props.max_thread_group_split = 0;
371*4882a593Smuzhiyun #else
372*4882a593Smuzhiyun gpu_props->thread_props.max_registers =
373*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
374*4882a593Smuzhiyun gpu_props->thread_props.max_task_queue =
375*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
376*4882a593Smuzhiyun gpu_props->thread_props.max_thread_group_split =
377*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
378*4882a593Smuzhiyun gpu_props->thread_props.impl_tech =
379*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
380*4882a593Smuzhiyun #endif
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun /* If values are not specified, then use defaults */
383*4882a593Smuzhiyun if (gpu_props->thread_props.max_registers == 0) {
384*4882a593Smuzhiyun gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
385*4882a593Smuzhiyun gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
386*4882a593Smuzhiyun gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
387*4882a593Smuzhiyun }
388*4882a593Smuzhiyun
389*4882a593Smuzhiyun /*
390*4882a593Smuzhiyun * If the maximum resources allocated information is available it is
391*4882a593Smuzhiyun * necessary to update the base_gpu_props with the max_config info to
392*4882a593Smuzhiyun * the userspace. This is applicable to systems that receive this
393*4882a593Smuzhiyun * information from the arbiter.
394*4882a593Smuzhiyun */
395*4882a593Smuzhiyun if (kbdev->gpu_props.max_config.core_mask)
396*4882a593Smuzhiyun /* Update the max config properties in the base_gpu_props */
397*4882a593Smuzhiyun kbase_gpuprops_update_max_config_props(gpu_props,
398*4882a593Smuzhiyun kbdev);
399*4882a593Smuzhiyun else
400*4882a593Smuzhiyun /* Initialize the coherent_group structure for each group */
401*4882a593Smuzhiyun kbase_gpuprops_construct_coherent_groups(gpu_props);
402*4882a593Smuzhiyun }
403*4882a593Smuzhiyun
kbase_gpuprops_set_max_config(struct kbase_device * kbdev,const struct max_config_props * max_config)404*4882a593Smuzhiyun void kbase_gpuprops_set_max_config(struct kbase_device *kbdev,
405*4882a593Smuzhiyun const struct max_config_props *max_config)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun if (WARN_ON(!kbdev) || WARN_ON(!max_config))
408*4882a593Smuzhiyun return;
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun kbdev->gpu_props.max_config.l2_slices = max_config->l2_slices;
411*4882a593Smuzhiyun kbdev->gpu_props.max_config.core_mask = max_config->core_mask;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
kbase_gpuprops_set(struct kbase_device * kbdev)414*4882a593Smuzhiyun void kbase_gpuprops_set(struct kbase_device *kbdev)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun struct kbase_gpu_props *gpu_props;
417*4882a593Smuzhiyun struct gpu_raw_gpu_props *raw;
418*4882a593Smuzhiyun
419*4882a593Smuzhiyun if (WARN_ON(!kbdev))
420*4882a593Smuzhiyun return;
421*4882a593Smuzhiyun gpu_props = &kbdev->gpu_props;
422*4882a593Smuzhiyun raw = &gpu_props->props.raw_props;
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun /* Initialize the base_gpu_props structure from the hardware */
425*4882a593Smuzhiyun kbase_gpuprops_get_props(&gpu_props->props, kbdev);
426*4882a593Smuzhiyun
427*4882a593Smuzhiyun /* Populate the derived properties */
428*4882a593Smuzhiyun kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
429*4882a593Smuzhiyun
430*4882a593Smuzhiyun /* Populate kbase-only fields */
431*4882a593Smuzhiyun gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
432*4882a593Smuzhiyun gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
433*4882a593Smuzhiyun
434*4882a593Smuzhiyun gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
437*4882a593Smuzhiyun gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
438*4882a593Smuzhiyun
439*4882a593Smuzhiyun gpu_props->num_cores = hweight64(raw->shader_present);
440*4882a593Smuzhiyun gpu_props->num_core_groups =
441*4882a593Smuzhiyun gpu_props->props.coherency_info.num_core_groups;
442*4882a593Smuzhiyun gpu_props->num_address_spaces = hweight32(raw->as_present);
443*4882a593Smuzhiyun gpu_props->num_job_slots = hweight32(raw->js_present);
444*4882a593Smuzhiyun
445*4882a593Smuzhiyun /*
446*4882a593Smuzhiyun * Current configuration is used on HW interactions so that the maximum
447*4882a593Smuzhiyun * config is just used for user space avoiding interactions with parts
448*4882a593Smuzhiyun * of the hardware that might not be allocated to the kbase instance at
449*4882a593Smuzhiyun * that moment.
450*4882a593Smuzhiyun */
451*4882a593Smuzhiyun kbase_gpuprops_req_curr_config_update(kbdev);
452*4882a593Smuzhiyun kbase_gpuprops_get_curr_config_props(kbdev, &gpu_props->curr_config);
453*4882a593Smuzhiyun }
454*4882a593Smuzhiyun
kbase_gpuprops_set_features(struct kbase_device * kbdev)455*4882a593Smuzhiyun int kbase_gpuprops_set_features(struct kbase_device *kbdev)
456*4882a593Smuzhiyun {
457*4882a593Smuzhiyun struct base_gpu_props *gpu_props;
458*4882a593Smuzhiyun struct kbase_gpuprops_regdump regdump;
459*4882a593Smuzhiyun int err;
460*4882a593Smuzhiyun
461*4882a593Smuzhiyun gpu_props = &kbdev->gpu_props.props;
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun /* Dump relevant registers */
464*4882a593Smuzhiyun err = kbase_backend_gpuprops_get_features(kbdev, ®dump);
465*4882a593Smuzhiyun if (err)
466*4882a593Smuzhiyun return err;
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun /*
469*4882a593Smuzhiyun * Copy the raw value from the register, later this will get turned
470*4882a593Smuzhiyun * into the selected coherency mode.
471*4882a593Smuzhiyun * Additionally, add non-coherent mode, as this is always supported.
472*4882a593Smuzhiyun */
473*4882a593Smuzhiyun gpu_props->raw_props.coherency_mode = regdump.coherency_features |
474*4882a593Smuzhiyun COHERENCY_FEATURE_BIT(COHERENCY_NONE);
475*4882a593Smuzhiyun
476*4882a593Smuzhiyun if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_THREAD_GROUP_SPLIT))
477*4882a593Smuzhiyun gpu_props->thread_props.max_thread_group_split = 0;
478*4882a593Smuzhiyun
479*4882a593Smuzhiyun /*
480*4882a593Smuzhiyun * The CORE_FEATURES register has different meanings depending on GPU.
481*4882a593Smuzhiyun * On tGOx, bits[3:0] encode num_exec_engines.
482*4882a593Smuzhiyun * On CSF GPUs, bits[7:0] is an enumeration that needs to be parsed,
483*4882a593Smuzhiyun * instead.
484*4882a593Smuzhiyun * GPUs like tTIx have additional fields like LSC_SIZE that are
485*4882a593Smuzhiyun * otherwise reserved/RAZ on older GPUs.
486*4882a593Smuzhiyun */
487*4882a593Smuzhiyun gpu_props->raw_props.core_features = regdump.core_features;
488*4882a593Smuzhiyun
489*4882a593Smuzhiyun #if !MALI_USE_CSF
490*4882a593Smuzhiyun gpu_props->core_props.num_exec_engines =
491*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.core_features, 0, 4);
492*4882a593Smuzhiyun #endif
493*4882a593Smuzhiyun
494*4882a593Smuzhiyun return err;
495*4882a593Smuzhiyun }
496*4882a593Smuzhiyun
497*4882a593Smuzhiyun /*
498*4882a593Smuzhiyun * Module parameters to allow the L2 size and hash configuration to be
499*4882a593Smuzhiyun * overridden.
500*4882a593Smuzhiyun *
501*4882a593Smuzhiyun * These parameters must be set on insmod to take effect, and are not visible
502*4882a593Smuzhiyun * in sysfs.
503*4882a593Smuzhiyun */
504*4882a593Smuzhiyun static u8 override_l2_size;
505*4882a593Smuzhiyun module_param(override_l2_size, byte, 0000);
506*4882a593Smuzhiyun MODULE_PARM_DESC(override_l2_size, "Override L2 size config for testing");
507*4882a593Smuzhiyun
508*4882a593Smuzhiyun static u8 override_l2_hash;
509*4882a593Smuzhiyun module_param(override_l2_hash, byte, 0000);
510*4882a593Smuzhiyun MODULE_PARM_DESC(override_l2_hash, "Override L2 hash config for testing");
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun static u32 l2_hash_values[ASN_HASH_COUNT] = {
513*4882a593Smuzhiyun 0,
514*4882a593Smuzhiyun };
515*4882a593Smuzhiyun static unsigned int num_override_l2_hash_values;
516*4882a593Smuzhiyun module_param_array(l2_hash_values, uint, &num_override_l2_hash_values, 0000);
517*4882a593Smuzhiyun MODULE_PARM_DESC(l2_hash_values, "Override L2 hash values config for testing");
518*4882a593Smuzhiyun
519*4882a593Smuzhiyun /* Definitions for range of supported user defined hash functions for GPUs
520*4882a593Smuzhiyun * that support L2_CONFIG and not ASN_HASH features. Supported hash function
521*4882a593Smuzhiyun * range from 0b1000-0b1111 inclusive. Selection of any other values will
522*4882a593Smuzhiyun * lead to undefined behavior.
523*4882a593Smuzhiyun */
524*4882a593Smuzhiyun #define USER_DEFINED_HASH_LO ((u8)0x08)
525*4882a593Smuzhiyun #define USER_DEFINED_HASH_HI ((u8)0x0F)
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun enum l2_config_override_result {
528*4882a593Smuzhiyun L2_CONFIG_OVERRIDE_FAIL = -1,
529*4882a593Smuzhiyun L2_CONFIG_OVERRIDE_NONE,
530*4882a593Smuzhiyun L2_CONFIG_OVERRIDE_OK,
531*4882a593Smuzhiyun };
532*4882a593Smuzhiyun
533*4882a593Smuzhiyun /**
534*4882a593Smuzhiyun * kbase_read_l2_config_from_dt - Read L2 configuration
535*4882a593Smuzhiyun * @kbdev: The kbase device for which to get the L2 configuration.
536*4882a593Smuzhiyun *
537*4882a593Smuzhiyun * Check for L2 configuration overrides in module parameters and device tree.
538*4882a593Smuzhiyun * Override values in module parameters take priority over override values in
539*4882a593Smuzhiyun * device tree.
540*4882a593Smuzhiyun *
541*4882a593Smuzhiyun * Return: L2_CONFIG_OVERRIDE_OK if either size or hash, or both was properly
542*4882a593Smuzhiyun * overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided.
543*4882a593Smuzhiyun * L2_CONFIG_OVERRIDE_FAIL otherwise.
544*4882a593Smuzhiyun */
545*4882a593Smuzhiyun static enum l2_config_override_result
kbase_read_l2_config_from_dt(struct kbase_device * const kbdev)546*4882a593Smuzhiyun kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
547*4882a593Smuzhiyun {
548*4882a593Smuzhiyun struct device_node *np = kbdev->dev->of_node;
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun if (!np)
551*4882a593Smuzhiyun return L2_CONFIG_OVERRIDE_NONE;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun if (override_l2_size)
554*4882a593Smuzhiyun kbdev->l2_size_override = override_l2_size;
555*4882a593Smuzhiyun else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
556*4882a593Smuzhiyun kbdev->l2_size_override = 0;
557*4882a593Smuzhiyun
558*4882a593Smuzhiyun /* Check overriding value is supported, if not will result in
559*4882a593Smuzhiyun * undefined behavior.
560*4882a593Smuzhiyun */
561*4882a593Smuzhiyun if (override_l2_hash >= USER_DEFINED_HASH_LO &&
562*4882a593Smuzhiyun override_l2_hash <= USER_DEFINED_HASH_HI)
563*4882a593Smuzhiyun kbdev->l2_hash_override = override_l2_hash;
564*4882a593Smuzhiyun else if (of_property_read_u8(np, "l2-hash", &kbdev->l2_hash_override))
565*4882a593Smuzhiyun kbdev->l2_hash_override = 0;
566*4882a593Smuzhiyun
567*4882a593Smuzhiyun kbdev->l2_hash_values_override = false;
568*4882a593Smuzhiyun if (num_override_l2_hash_values) {
569*4882a593Smuzhiyun unsigned int i;
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun kbdev->l2_hash_values_override = true;
572*4882a593Smuzhiyun for (i = 0; i < num_override_l2_hash_values; i++)
573*4882a593Smuzhiyun kbdev->l2_hash_values[i] = l2_hash_values[i];
574*4882a593Smuzhiyun } else if (!of_property_read_u32_array(np, "l2-hash-values",
575*4882a593Smuzhiyun kbdev->l2_hash_values,
576*4882a593Smuzhiyun ASN_HASH_COUNT))
577*4882a593Smuzhiyun kbdev->l2_hash_values_override = true;
578*4882a593Smuzhiyun
579*4882a593Smuzhiyun if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
580*4882a593Smuzhiyun (kbdev->l2_hash_override)) {
581*4882a593Smuzhiyun dev_err(kbdev->dev, "l2-hash not supported\n");
582*4882a593Smuzhiyun return L2_CONFIG_OVERRIDE_FAIL;
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun
585*4882a593Smuzhiyun if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH) &&
586*4882a593Smuzhiyun (kbdev->l2_hash_values_override)) {
587*4882a593Smuzhiyun dev_err(kbdev->dev, "l2-hash-values not supported\n");
588*4882a593Smuzhiyun return L2_CONFIG_OVERRIDE_FAIL;
589*4882a593Smuzhiyun }
590*4882a593Smuzhiyun
591*4882a593Smuzhiyun if (kbdev->l2_hash_override && kbdev->l2_hash_values_override) {
592*4882a593Smuzhiyun dev_err(kbdev->dev,
593*4882a593Smuzhiyun "both l2-hash & l2-hash-values not supported\n");
594*4882a593Smuzhiyun return L2_CONFIG_OVERRIDE_FAIL;
595*4882a593Smuzhiyun }
596*4882a593Smuzhiyun
597*4882a593Smuzhiyun if (kbdev->l2_size_override || kbdev->l2_hash_override ||
598*4882a593Smuzhiyun kbdev->l2_hash_values_override)
599*4882a593Smuzhiyun return L2_CONFIG_OVERRIDE_OK;
600*4882a593Smuzhiyun
601*4882a593Smuzhiyun return L2_CONFIG_OVERRIDE_NONE;
602*4882a593Smuzhiyun }
603*4882a593Smuzhiyun
kbase_gpuprops_update_l2_features(struct kbase_device * kbdev)604*4882a593Smuzhiyun int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
605*4882a593Smuzhiyun {
606*4882a593Smuzhiyun int err = 0;
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_L2_CONFIG)) {
609*4882a593Smuzhiyun struct kbase_gpuprops_regdump regdump;
610*4882a593Smuzhiyun struct base_gpu_props *gpu_props = &kbdev->gpu_props.props;
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun /* Check for L2 cache size & hash overrides */
613*4882a593Smuzhiyun switch (kbase_read_l2_config_from_dt(kbdev)) {
614*4882a593Smuzhiyun case L2_CONFIG_OVERRIDE_FAIL:
615*4882a593Smuzhiyun err = -EIO;
616*4882a593Smuzhiyun goto exit;
617*4882a593Smuzhiyun case L2_CONFIG_OVERRIDE_NONE:
618*4882a593Smuzhiyun goto exit;
619*4882a593Smuzhiyun default:
620*4882a593Smuzhiyun break;
621*4882a593Smuzhiyun }
622*4882a593Smuzhiyun
623*4882a593Smuzhiyun /* pm.active_count is expected to be 1 here, which is set in
624*4882a593Smuzhiyun * kbase_hwaccess_pm_powerup().
625*4882a593Smuzhiyun */
626*4882a593Smuzhiyun WARN_ON(kbdev->pm.active_count != 1);
627*4882a593Smuzhiyun /* The new settings for L2 cache can only be applied when it is
628*4882a593Smuzhiyun * off, so first do the power down.
629*4882a593Smuzhiyun */
630*4882a593Smuzhiyun kbase_pm_context_idle(kbdev);
631*4882a593Smuzhiyun kbase_pm_wait_for_desired_state(kbdev);
632*4882a593Smuzhiyun
633*4882a593Smuzhiyun /* Need L2 to get powered to reflect to L2_FEATURES */
634*4882a593Smuzhiyun kbase_pm_context_active(kbdev);
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun /* Wait for the completion of L2 power transition */
637*4882a593Smuzhiyun kbase_pm_wait_for_l2_powered(kbdev);
638*4882a593Smuzhiyun
639*4882a593Smuzhiyun /* Dump L2_FEATURES register */
640*4882a593Smuzhiyun err = kbase_backend_gpuprops_get_l2_features(kbdev, ®dump);
641*4882a593Smuzhiyun if (err)
642*4882a593Smuzhiyun goto exit;
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun dev_info(kbdev->dev, "Reflected L2_FEATURES is 0x%x\n",
645*4882a593Smuzhiyun regdump.l2_features);
646*4882a593Smuzhiyun dev_info(kbdev->dev, "Reflected L2_CONFIG is 0x%08x\n",
647*4882a593Smuzhiyun regdump.l2_config);
648*4882a593Smuzhiyun
649*4882a593Smuzhiyun if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_ASN_HASH)) {
650*4882a593Smuzhiyun int idx;
651*4882a593Smuzhiyun const bool asn_he = regdump.l2_config &
652*4882a593Smuzhiyun L2_CONFIG_ASN_HASH_ENABLE_MASK;
653*4882a593Smuzhiyun #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
654*4882a593Smuzhiyun if (!asn_he && kbdev->l2_hash_values_override)
655*4882a593Smuzhiyun dev_err(kbdev->dev,
656*4882a593Smuzhiyun "Failed to use requested ASN_HASH, fallback to default");
657*4882a593Smuzhiyun #endif
658*4882a593Smuzhiyun for (idx = 0; idx < ASN_HASH_COUNT; idx++)
659*4882a593Smuzhiyun dev_info(kbdev->dev,
660*4882a593Smuzhiyun "%s ASN_HASH[%d] is [0x%08x]\n",
661*4882a593Smuzhiyun asn_he ? "Overridden" : "Default", idx,
662*4882a593Smuzhiyun regdump.l2_asn_hash[idx]);
663*4882a593Smuzhiyun }
664*4882a593Smuzhiyun
665*4882a593Smuzhiyun /* Update gpuprops with reflected L2_FEATURES */
666*4882a593Smuzhiyun gpu_props->raw_props.l2_features = regdump.l2_features;
667*4882a593Smuzhiyun gpu_props->l2_props.log2_cache_size =
668*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun
671*4882a593Smuzhiyun exit:
672*4882a593Smuzhiyun return err;
673*4882a593Smuzhiyun }
674*4882a593Smuzhiyun
675*4882a593Smuzhiyun static struct {
676*4882a593Smuzhiyun u32 type;
677*4882a593Smuzhiyun size_t offset;
678*4882a593Smuzhiyun int size;
679*4882a593Smuzhiyun } gpu_property_mapping[] = {
680*4882a593Smuzhiyun #define PROP(name, member) \
681*4882a593Smuzhiyun {KBASE_GPUPROP_ ## name, offsetof(struct base_gpu_props, member), \
682*4882a593Smuzhiyun sizeof(((struct base_gpu_props *)0)->member)}
683*4882a593Smuzhiyun PROP(PRODUCT_ID, core_props.product_id),
684*4882a593Smuzhiyun PROP(VERSION_STATUS, core_props.version_status),
685*4882a593Smuzhiyun PROP(MINOR_REVISION, core_props.minor_revision),
686*4882a593Smuzhiyun PROP(MAJOR_REVISION, core_props.major_revision),
687*4882a593Smuzhiyun PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
688*4882a593Smuzhiyun PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
689*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
690*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
691*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
692*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_3, core_props.texture_features[3]),
693*4882a593Smuzhiyun PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
694*4882a593Smuzhiyun
695*4882a593Smuzhiyun #if MALI_USE_CSF
696*4882a593Smuzhiyun #define BACKWARDS_COMPAT_PROP(name, type) \
697*4882a593Smuzhiyun { \
698*4882a593Smuzhiyun KBASE_GPUPROP_##name, SIZE_MAX, sizeof(type) \
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun BACKWARDS_COMPAT_PROP(NUM_EXEC_ENGINES, u8),
701*4882a593Smuzhiyun #else
702*4882a593Smuzhiyun PROP(NUM_EXEC_ENGINES, core_props.num_exec_engines),
703*4882a593Smuzhiyun #endif
704*4882a593Smuzhiyun
705*4882a593Smuzhiyun PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
706*4882a593Smuzhiyun PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
707*4882a593Smuzhiyun PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
708*4882a593Smuzhiyun
709*4882a593Smuzhiyun PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
710*4882a593Smuzhiyun PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
711*4882a593Smuzhiyun
712*4882a593Smuzhiyun PROP(MAX_THREADS, thread_props.max_threads),
713*4882a593Smuzhiyun PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
714*4882a593Smuzhiyun PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
715*4882a593Smuzhiyun PROP(MAX_REGISTERS, thread_props.max_registers),
716*4882a593Smuzhiyun PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
717*4882a593Smuzhiyun PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
718*4882a593Smuzhiyun PROP(IMPL_TECH, thread_props.impl_tech),
719*4882a593Smuzhiyun PROP(TLS_ALLOC, thread_props.tls_alloc),
720*4882a593Smuzhiyun
721*4882a593Smuzhiyun PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
722*4882a593Smuzhiyun PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
723*4882a593Smuzhiyun PROP(RAW_L2_PRESENT, raw_props.l2_present),
724*4882a593Smuzhiyun PROP(RAW_STACK_PRESENT, raw_props.stack_present),
725*4882a593Smuzhiyun PROP(RAW_L2_FEATURES, raw_props.l2_features),
726*4882a593Smuzhiyun PROP(RAW_CORE_FEATURES, raw_props.core_features),
727*4882a593Smuzhiyun PROP(RAW_MEM_FEATURES, raw_props.mem_features),
728*4882a593Smuzhiyun PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
729*4882a593Smuzhiyun PROP(RAW_AS_PRESENT, raw_props.as_present),
730*4882a593Smuzhiyun PROP(RAW_JS_PRESENT, raw_props.js_present),
731*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
732*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
733*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
734*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
735*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
736*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
737*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
738*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
739*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
740*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
741*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
742*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
743*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
744*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
745*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
746*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
747*4882a593Smuzhiyun PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
748*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
749*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
750*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
751*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_3, raw_props.texture_features[3]),
752*4882a593Smuzhiyun PROP(RAW_GPU_ID, raw_props.gpu_id),
753*4882a593Smuzhiyun PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
754*4882a593Smuzhiyun PROP(RAW_THREAD_MAX_WORKGROUP_SIZE, raw_props.thread_max_workgroup_size),
755*4882a593Smuzhiyun PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
756*4882a593Smuzhiyun PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
757*4882a593Smuzhiyun PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
758*4882a593Smuzhiyun PROP(RAW_THREAD_TLS_ALLOC, raw_props.thread_tls_alloc),
759*4882a593Smuzhiyun PROP(RAW_GPU_FEATURES, raw_props.gpu_features),
760*4882a593Smuzhiyun PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
761*4882a593Smuzhiyun PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
762*4882a593Smuzhiyun PROP(COHERENCY_COHERENCY, coherency_info.coherency),
763*4882a593Smuzhiyun PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
764*4882a593Smuzhiyun PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
765*4882a593Smuzhiyun PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
766*4882a593Smuzhiyun PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
767*4882a593Smuzhiyun PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
768*4882a593Smuzhiyun PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
769*4882a593Smuzhiyun PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
770*4882a593Smuzhiyun PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
771*4882a593Smuzhiyun PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
772*4882a593Smuzhiyun PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
773*4882a593Smuzhiyun PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
774*4882a593Smuzhiyun PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
775*4882a593Smuzhiyun PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
776*4882a593Smuzhiyun PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
777*4882a593Smuzhiyun PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
778*4882a593Smuzhiyun PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
779*4882a593Smuzhiyun
780*4882a593Smuzhiyun #undef PROP
781*4882a593Smuzhiyun };
782*4882a593Smuzhiyun
kbase_gpuprops_populate_user_buffer(struct kbase_device * kbdev)783*4882a593Smuzhiyun int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
784*4882a593Smuzhiyun {
785*4882a593Smuzhiyun struct kbase_gpu_props *kprops = &kbdev->gpu_props;
786*4882a593Smuzhiyun struct base_gpu_props *props = &kprops->props;
787*4882a593Smuzhiyun u32 count = ARRAY_SIZE(gpu_property_mapping);
788*4882a593Smuzhiyun u32 i;
789*4882a593Smuzhiyun u32 size = 0;
790*4882a593Smuzhiyun u8 *p;
791*4882a593Smuzhiyun
792*4882a593Smuzhiyun for (i = 0; i < count; i++) {
793*4882a593Smuzhiyun /* 4 bytes for the ID, and the size of the property */
794*4882a593Smuzhiyun size += 4 + gpu_property_mapping[i].size;
795*4882a593Smuzhiyun }
796*4882a593Smuzhiyun
797*4882a593Smuzhiyun kprops->prop_buffer_size = size;
798*4882a593Smuzhiyun kprops->prop_buffer = kzalloc(size, GFP_KERNEL);
799*4882a593Smuzhiyun
800*4882a593Smuzhiyun if (!kprops->prop_buffer) {
801*4882a593Smuzhiyun kprops->prop_buffer_size = 0;
802*4882a593Smuzhiyun return -ENOMEM;
803*4882a593Smuzhiyun }
804*4882a593Smuzhiyun
805*4882a593Smuzhiyun p = kprops->prop_buffer;
806*4882a593Smuzhiyun
807*4882a593Smuzhiyun #define WRITE_U8(v) (*p++ = (v) & 0xFF)
808*4882a593Smuzhiyun #define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
809*4882a593Smuzhiyun #define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
810*4882a593Smuzhiyun #define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
811*4882a593Smuzhiyun
812*4882a593Smuzhiyun for (i = 0; i < count; i++) {
813*4882a593Smuzhiyun u32 type = gpu_property_mapping[i].type;
814*4882a593Smuzhiyun u8 type_size;
815*4882a593Smuzhiyun const size_t offset = gpu_property_mapping[i].offset;
816*4882a593Smuzhiyun const u64 dummy_backwards_compat_value = (u64)0;
817*4882a593Smuzhiyun const void *field;
818*4882a593Smuzhiyun
819*4882a593Smuzhiyun if (likely(offset < sizeof(struct base_gpu_props)))
820*4882a593Smuzhiyun field = ((const u8 *)props) + offset;
821*4882a593Smuzhiyun else
822*4882a593Smuzhiyun field = &dummy_backwards_compat_value;
823*4882a593Smuzhiyun
824*4882a593Smuzhiyun switch (gpu_property_mapping[i].size) {
825*4882a593Smuzhiyun case 1:
826*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
827*4882a593Smuzhiyun break;
828*4882a593Smuzhiyun case 2:
829*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
830*4882a593Smuzhiyun break;
831*4882a593Smuzhiyun case 4:
832*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
833*4882a593Smuzhiyun break;
834*4882a593Smuzhiyun case 8:
835*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
836*4882a593Smuzhiyun break;
837*4882a593Smuzhiyun default:
838*4882a593Smuzhiyun dev_err(kbdev->dev,
839*4882a593Smuzhiyun "Invalid gpu_property_mapping type=%d size=%d",
840*4882a593Smuzhiyun type, gpu_property_mapping[i].size);
841*4882a593Smuzhiyun return -EINVAL;
842*4882a593Smuzhiyun }
843*4882a593Smuzhiyun
844*4882a593Smuzhiyun WRITE_U32((type<<2) | type_size);
845*4882a593Smuzhiyun
846*4882a593Smuzhiyun switch (type_size) {
847*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U8:
848*4882a593Smuzhiyun WRITE_U8(*((const u8 *)field));
849*4882a593Smuzhiyun break;
850*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U16:
851*4882a593Smuzhiyun WRITE_U16(*((const u16 *)field));
852*4882a593Smuzhiyun break;
853*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U32:
854*4882a593Smuzhiyun WRITE_U32(*((const u32 *)field));
855*4882a593Smuzhiyun break;
856*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U64:
857*4882a593Smuzhiyun WRITE_U64(*((const u64 *)field));
858*4882a593Smuzhiyun break;
859*4882a593Smuzhiyun default: /* Cannot be reached */
860*4882a593Smuzhiyun WARN_ON(1);
861*4882a593Smuzhiyun return -EINVAL;
862*4882a593Smuzhiyun }
863*4882a593Smuzhiyun }
864*4882a593Smuzhiyun
865*4882a593Smuzhiyun return 0;
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun
kbase_gpuprops_free_user_buffer(struct kbase_device * kbdev)868*4882a593Smuzhiyun void kbase_gpuprops_free_user_buffer(struct kbase_device *kbdev)
869*4882a593Smuzhiyun {
870*4882a593Smuzhiyun kfree(kbdev->gpu_props.prop_buffer);
871*4882a593Smuzhiyun }
872*4882a593Smuzhiyun
kbase_device_populate_max_freq(struct kbase_device * kbdev)873*4882a593Smuzhiyun int kbase_device_populate_max_freq(struct kbase_device *kbdev)
874*4882a593Smuzhiyun {
875*4882a593Smuzhiyun struct mali_base_gpu_core_props *core_props;
876*4882a593Smuzhiyun
877*4882a593Smuzhiyun /* obtain max configured gpu frequency, if devfreq is enabled then
878*4882a593Smuzhiyun * this will be overridden by the highest operating point found
879*4882a593Smuzhiyun */
880*4882a593Smuzhiyun core_props = &(kbdev->gpu_props.props.core_props);
881*4882a593Smuzhiyun #ifdef GPU_FREQ_KHZ_MAX
882*4882a593Smuzhiyun core_props->gpu_freq_khz_max = GPU_FREQ_KHZ_MAX;
883*4882a593Smuzhiyun #else
884*4882a593Smuzhiyun core_props->gpu_freq_khz_max = DEFAULT_GPU_FREQ_KHZ_MAX;
885*4882a593Smuzhiyun #endif
886*4882a593Smuzhiyun
887*4882a593Smuzhiyun return 0;
888*4882a593Smuzhiyun }
889