1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun *
3*4882a593Smuzhiyun * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * This program is free software and is provided to you under the terms of the
6*4882a593Smuzhiyun * GNU General Public License version 2 as published by the Free Software
7*4882a593Smuzhiyun * Foundation, and any use by you of this program is subject to the terms
8*4882a593Smuzhiyun * of such GNU licence.
9*4882a593Smuzhiyun *
10*4882a593Smuzhiyun * A copy of the licence is included with the program, and can also be obtained
11*4882a593Smuzhiyun * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12*4882a593Smuzhiyun * Boston, MA 02110-1301, USA.
13*4882a593Smuzhiyun *
14*4882a593Smuzhiyun */
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun /*
21*4882a593Smuzhiyun * Base kernel property query APIs
22*4882a593Smuzhiyun */
23*4882a593Smuzhiyun
24*4882a593Smuzhiyun #include <mali_kbase.h>
25*4882a593Smuzhiyun #include <mali_midg_regmap.h>
26*4882a593Smuzhiyun #include <mali_kbase_gpuprops.h>
27*4882a593Smuzhiyun #include <mali_kbase_config_defaults.h>
28*4882a593Smuzhiyun #include <mali_kbase_hwaccess_gpuprops.h>
29*4882a593Smuzhiyun #include "mali_kbase_ioctl.h"
30*4882a593Smuzhiyun #include <linux/clk.h>
31*4882a593Smuzhiyun
32*4882a593Smuzhiyun /**
33*4882a593Smuzhiyun * KBASE_UBFX32 - Extracts bits from a 32-bit bitfield.
34*4882a593Smuzhiyun * @value: The value from which to extract bits.
35*4882a593Smuzhiyun * @offset: The first bit to extract (0 being the LSB).
36*4882a593Smuzhiyun * @size: The number of bits to extract.
37*4882a593Smuzhiyun *
38*4882a593Smuzhiyun * Context: @offset + @size <= 32.
39*4882a593Smuzhiyun *
40*4882a593Smuzhiyun * Return: Bits [@offset, @offset + @size) from @value.
41*4882a593Smuzhiyun */
42*4882a593Smuzhiyun /* from mali_cdsb.h */
43*4882a593Smuzhiyun #define KBASE_UBFX32(value, offset, size) \
44*4882a593Smuzhiyun (((u32)(value) >> (u32)(offset)) & (u32)((1ULL << (u32)(size)) - 1))
45*4882a593Smuzhiyun
kbase_gpuprops_uk_get_props(struct kbase_context * kctx,struct kbase_uk_gpuprops * const kbase_props)46*4882a593Smuzhiyun int kbase_gpuprops_uk_get_props(struct kbase_context *kctx, struct kbase_uk_gpuprops * const kbase_props)
47*4882a593Smuzhiyun {
48*4882a593Smuzhiyun kbase_gpu_clk_speed_func get_gpu_speed_mhz;
49*4882a593Smuzhiyun u32 gpu_speed_mhz;
50*4882a593Smuzhiyun int rc = 1;
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(NULL != kctx);
53*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(NULL != kbase_props);
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun /* Current GPU speed is requested from the system integrator via the GPU_SPEED_FUNC function.
56*4882a593Smuzhiyun * If that function fails, or the function is not provided by the system integrator, we report the maximum
57*4882a593Smuzhiyun * GPU speed as specified by GPU_FREQ_KHZ_MAX.
58*4882a593Smuzhiyun */
59*4882a593Smuzhiyun get_gpu_speed_mhz = (kbase_gpu_clk_speed_func) GPU_SPEED_FUNC;
60*4882a593Smuzhiyun if (get_gpu_speed_mhz != NULL) {
61*4882a593Smuzhiyun rc = get_gpu_speed_mhz(&gpu_speed_mhz);
62*4882a593Smuzhiyun #ifdef CONFIG_MALI_DEBUG
63*4882a593Smuzhiyun /* Issue a warning message when the reported GPU speed falls outside the min/max range */
64*4882a593Smuzhiyun if (rc == 0) {
65*4882a593Smuzhiyun u32 gpu_speed_khz = gpu_speed_mhz * 1000;
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun if (gpu_speed_khz < kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min ||
68*4882a593Smuzhiyun gpu_speed_khz > kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max)
69*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "GPU Speed is outside of min/max range (got %lu Khz, min %lu Khz, max %lu Khz)\n",
70*4882a593Smuzhiyun (unsigned long)gpu_speed_khz,
71*4882a593Smuzhiyun (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_min,
72*4882a593Smuzhiyun (unsigned long)kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max);
73*4882a593Smuzhiyun }
74*4882a593Smuzhiyun #endif /* CONFIG_MALI_DEBUG */
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun if (kctx->kbdev->clock) {
77*4882a593Smuzhiyun gpu_speed_mhz = clk_get_rate(kctx->kbdev->clock) / 1000000;
78*4882a593Smuzhiyun rc = 0;
79*4882a593Smuzhiyun }
80*4882a593Smuzhiyun if (rc != 0)
81*4882a593Smuzhiyun gpu_speed_mhz = kctx->kbdev->gpu_props.props.core_props.gpu_freq_khz_max / 1000;
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun kctx->kbdev->gpu_props.props.core_props.gpu_speed_mhz = gpu_speed_mhz;
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun memcpy(&kbase_props->props, &kctx->kbdev->gpu_props.props, sizeof(kbase_props->props));
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun /* Before API 8.2 they expect L3 cache info here, which was always 0 */
88*4882a593Smuzhiyun if (kctx->api_version < KBASE_API_VERSION(8, 2))
89*4882a593Smuzhiyun kbase_props->props.raw_props.suspend_size = 0;
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun return 0;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun
kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)94*4882a593Smuzhiyun static void kbase_gpuprops_construct_coherent_groups(base_gpu_props * const props)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun struct mali_base_gpu_coherent_group *current_group;
97*4882a593Smuzhiyun u64 group_present;
98*4882a593Smuzhiyun u64 group_mask;
99*4882a593Smuzhiyun u64 first_set, first_set_prev;
100*4882a593Smuzhiyun u32 num_groups = 0;
101*4882a593Smuzhiyun
102*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(NULL != props);
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun props->coherency_info.coherency = props->raw_props.mem_features;
105*4882a593Smuzhiyun props->coherency_info.num_core_groups = hweight64(props->raw_props.l2_present);
106*4882a593Smuzhiyun
107*4882a593Smuzhiyun if (props->coherency_info.coherency & GROUPS_L2_COHERENT) {
108*4882a593Smuzhiyun /* Group is l2 coherent */
109*4882a593Smuzhiyun group_present = props->raw_props.l2_present;
110*4882a593Smuzhiyun } else {
111*4882a593Smuzhiyun /* Group is l1 coherent */
112*4882a593Smuzhiyun group_present = props->raw_props.shader_present;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /*
116*4882a593Smuzhiyun * The coherent group mask can be computed from the l2 present
117*4882a593Smuzhiyun * register.
118*4882a593Smuzhiyun *
119*4882a593Smuzhiyun * For the coherent group n:
120*4882a593Smuzhiyun * group_mask[n] = (first_set[n] - 1) & ~(first_set[n-1] - 1)
121*4882a593Smuzhiyun * where first_set is group_present with only its nth set-bit kept
122*4882a593Smuzhiyun * (i.e. the position from where a new group starts).
123*4882a593Smuzhiyun *
124*4882a593Smuzhiyun * For instance if the groups are l2 coherent and l2_present=0x0..01111:
125*4882a593Smuzhiyun * The first mask is:
126*4882a593Smuzhiyun * group_mask[1] = (first_set[1] - 1) & ~(first_set[0] - 1)
127*4882a593Smuzhiyun * = (0x0..010 - 1) & ~(0x0..01 - 1)
128*4882a593Smuzhiyun * = 0x0..00f
129*4882a593Smuzhiyun * The second mask is:
130*4882a593Smuzhiyun * group_mask[2] = (first_set[2] - 1) & ~(first_set[1] - 1)
131*4882a593Smuzhiyun * = (0x0..100 - 1) & ~(0x0..010 - 1)
132*4882a593Smuzhiyun * = 0x0..0f0
133*4882a593Smuzhiyun * And so on until all the bits from group_present have been cleared
134*4882a593Smuzhiyun * (i.e. there is no group left).
135*4882a593Smuzhiyun */
136*4882a593Smuzhiyun
137*4882a593Smuzhiyun current_group = props->coherency_info.group;
138*4882a593Smuzhiyun first_set = group_present & ~(group_present - 1);
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun while (group_present != 0 && num_groups < BASE_MAX_COHERENT_GROUPS) {
141*4882a593Smuzhiyun group_present -= first_set; /* Clear the current group bit */
142*4882a593Smuzhiyun first_set_prev = first_set;
143*4882a593Smuzhiyun
144*4882a593Smuzhiyun first_set = group_present & ~(group_present - 1);
145*4882a593Smuzhiyun group_mask = (first_set - 1) & ~(first_set_prev - 1);
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun /* Populate the coherent_group structure for each group */
148*4882a593Smuzhiyun current_group->core_mask = group_mask & props->raw_props.shader_present;
149*4882a593Smuzhiyun current_group->num_cores = hweight64(current_group->core_mask);
150*4882a593Smuzhiyun
151*4882a593Smuzhiyun num_groups++;
152*4882a593Smuzhiyun current_group++;
153*4882a593Smuzhiyun }
154*4882a593Smuzhiyun
155*4882a593Smuzhiyun if (group_present != 0)
156*4882a593Smuzhiyun pr_warn("Too many coherent groups (keeping only %d groups).\n", BASE_MAX_COHERENT_GROUPS);
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun props->coherency_info.num_groups = num_groups;
159*4882a593Smuzhiyun }
160*4882a593Smuzhiyun
161*4882a593Smuzhiyun /**
162*4882a593Smuzhiyun * kbase_gpuprops_get_props - Get the GPU configuration
163*4882a593Smuzhiyun * @gpu_props: The &base_gpu_props structure
164*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
165*4882a593Smuzhiyun *
166*4882a593Smuzhiyun * Fill the &base_gpu_props structure with values from the GPU configuration
167*4882a593Smuzhiyun * registers. Only the raw properties are filled in this function
168*4882a593Smuzhiyun */
kbase_gpuprops_get_props(base_gpu_props * const gpu_props,struct kbase_device * kbdev)169*4882a593Smuzhiyun static void kbase_gpuprops_get_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
170*4882a593Smuzhiyun {
171*4882a593Smuzhiyun struct kbase_gpuprops_regdump regdump;
172*4882a593Smuzhiyun int i;
173*4882a593Smuzhiyun
174*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(NULL != kbdev);
175*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(NULL != gpu_props);
176*4882a593Smuzhiyun
177*4882a593Smuzhiyun /* Dump relevant registers */
178*4882a593Smuzhiyun kbase_backend_gpuprops_get(kbdev, ®dump);
179*4882a593Smuzhiyun
180*4882a593Smuzhiyun gpu_props->raw_props.gpu_id = regdump.gpu_id;
181*4882a593Smuzhiyun gpu_props->raw_props.tiler_features = regdump.tiler_features;
182*4882a593Smuzhiyun gpu_props->raw_props.mem_features = regdump.mem_features;
183*4882a593Smuzhiyun gpu_props->raw_props.mmu_features = regdump.mmu_features;
184*4882a593Smuzhiyun gpu_props->raw_props.l2_features = regdump.l2_features;
185*4882a593Smuzhiyun gpu_props->raw_props.suspend_size = regdump.suspend_size;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun gpu_props->raw_props.as_present = regdump.as_present;
188*4882a593Smuzhiyun gpu_props->raw_props.js_present = regdump.js_present;
189*4882a593Smuzhiyun gpu_props->raw_props.shader_present =
190*4882a593Smuzhiyun ((u64) regdump.shader_present_hi << 32) +
191*4882a593Smuzhiyun regdump.shader_present_lo;
192*4882a593Smuzhiyun gpu_props->raw_props.tiler_present =
193*4882a593Smuzhiyun ((u64) regdump.tiler_present_hi << 32) +
194*4882a593Smuzhiyun regdump.tiler_present_lo;
195*4882a593Smuzhiyun gpu_props->raw_props.l2_present =
196*4882a593Smuzhiyun ((u64) regdump.l2_present_hi << 32) +
197*4882a593Smuzhiyun regdump.l2_present_lo;
198*4882a593Smuzhiyun #ifdef CONFIG_MALI_CORESTACK
199*4882a593Smuzhiyun gpu_props->raw_props.stack_present =
200*4882a593Smuzhiyun ((u64) regdump.stack_present_hi << 32) +
201*4882a593Smuzhiyun regdump.stack_present_lo;
202*4882a593Smuzhiyun #else /* CONFIG_MALI_CORESTACK */
203*4882a593Smuzhiyun gpu_props->raw_props.stack_present = 0;
204*4882a593Smuzhiyun #endif /* CONFIG_MALI_CORESTACK */
205*4882a593Smuzhiyun
206*4882a593Smuzhiyun for (i = 0; i < GPU_MAX_JOB_SLOTS; i++)
207*4882a593Smuzhiyun gpu_props->raw_props.js_features[i] = regdump.js_features[i];
208*4882a593Smuzhiyun
209*4882a593Smuzhiyun for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
210*4882a593Smuzhiyun gpu_props->raw_props.texture_features[i] = regdump.texture_features[i];
211*4882a593Smuzhiyun
212*4882a593Smuzhiyun gpu_props->raw_props.thread_max_barrier_size = regdump.thread_max_barrier_size;
213*4882a593Smuzhiyun gpu_props->raw_props.thread_max_threads = regdump.thread_max_threads;
214*4882a593Smuzhiyun gpu_props->raw_props.thread_max_workgroup_size = regdump.thread_max_workgroup_size;
215*4882a593Smuzhiyun gpu_props->raw_props.thread_features = regdump.thread_features;
216*4882a593Smuzhiyun }
217*4882a593Smuzhiyun
kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)218*4882a593Smuzhiyun void kbase_gpuprops_update_core_props_gpu_id(base_gpu_props * const gpu_props)
219*4882a593Smuzhiyun {
220*4882a593Smuzhiyun gpu_props->core_props.version_status = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 0U, 4);
221*4882a593Smuzhiyun gpu_props->core_props.minor_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 4U, 8);
222*4882a593Smuzhiyun gpu_props->core_props.major_revision = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 12U, 4);
223*4882a593Smuzhiyun gpu_props->core_props.product_id = KBASE_UBFX32(gpu_props->raw_props.gpu_id, 16U, 16);
224*4882a593Smuzhiyun }
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun /**
227*4882a593Smuzhiyun * kbase_gpuprops_calculate_props - Calculate the derived properties
228*4882a593Smuzhiyun * @gpu_props: The &base_gpu_props structure
229*4882a593Smuzhiyun * @kbdev: The &struct kbase_device structure for the device
230*4882a593Smuzhiyun *
231*4882a593Smuzhiyun * Fill the &base_gpu_props structure with values derived from the GPU
232*4882a593Smuzhiyun * configuration registers
233*4882a593Smuzhiyun */
kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props,struct kbase_device * kbdev)234*4882a593Smuzhiyun static void kbase_gpuprops_calculate_props(base_gpu_props * const gpu_props, struct kbase_device *kbdev)
235*4882a593Smuzhiyun {
236*4882a593Smuzhiyun int i;
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun /* Populate the base_gpu_props structure */
239*4882a593Smuzhiyun kbase_gpuprops_update_core_props_gpu_id(gpu_props);
240*4882a593Smuzhiyun gpu_props->core_props.log2_program_counter_size = KBASE_GPU_PC_SIZE_LOG2;
241*4882a593Smuzhiyun gpu_props->core_props.gpu_available_memory_size = totalram_pages() << PAGE_SHIFT;
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun for (i = 0; i < BASE_GPU_NUM_TEXTURE_FEATURES_REGISTERS; i++)
244*4882a593Smuzhiyun gpu_props->core_props.texture_features[i] = gpu_props->raw_props.texture_features[i];
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun gpu_props->l2_props.log2_line_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 0U, 8);
247*4882a593Smuzhiyun gpu_props->l2_props.log2_cache_size = KBASE_UBFX32(gpu_props->raw_props.l2_features, 16U, 8);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun /* Field with number of l2 slices is added to MEM_FEATURES register
250*4882a593Smuzhiyun * since t76x. Below code assumes that for older GPU reserved bits will
251*4882a593Smuzhiyun * be read as zero. */
252*4882a593Smuzhiyun gpu_props->l2_props.num_l2_slices =
253*4882a593Smuzhiyun KBASE_UBFX32(gpu_props->raw_props.mem_features, 8U, 4) + 1;
254*4882a593Smuzhiyun
255*4882a593Smuzhiyun gpu_props->tiler_props.bin_size_bytes = 1 << KBASE_UBFX32(gpu_props->raw_props.tiler_features, 0U, 6);
256*4882a593Smuzhiyun gpu_props->tiler_props.max_active_levels = KBASE_UBFX32(gpu_props->raw_props.tiler_features, 8U, 4);
257*4882a593Smuzhiyun
258*4882a593Smuzhiyun if (gpu_props->raw_props.thread_max_threads == 0)
259*4882a593Smuzhiyun gpu_props->thread_props.max_threads = THREAD_MT_DEFAULT;
260*4882a593Smuzhiyun else
261*4882a593Smuzhiyun gpu_props->thread_props.max_threads = gpu_props->raw_props.thread_max_threads;
262*4882a593Smuzhiyun
263*4882a593Smuzhiyun if (gpu_props->raw_props.thread_max_workgroup_size == 0)
264*4882a593Smuzhiyun gpu_props->thread_props.max_workgroup_size = THREAD_MWS_DEFAULT;
265*4882a593Smuzhiyun else
266*4882a593Smuzhiyun gpu_props->thread_props.max_workgroup_size = gpu_props->raw_props.thread_max_workgroup_size;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun if (gpu_props->raw_props.thread_max_barrier_size == 0)
269*4882a593Smuzhiyun gpu_props->thread_props.max_barrier_size = THREAD_MBS_DEFAULT;
270*4882a593Smuzhiyun else
271*4882a593Smuzhiyun gpu_props->thread_props.max_barrier_size = gpu_props->raw_props.thread_max_barrier_size;
272*4882a593Smuzhiyun
273*4882a593Smuzhiyun gpu_props->thread_props.max_registers = KBASE_UBFX32(gpu_props->raw_props.thread_features, 0U, 16);
274*4882a593Smuzhiyun gpu_props->thread_props.max_task_queue = KBASE_UBFX32(gpu_props->raw_props.thread_features, 16U, 8);
275*4882a593Smuzhiyun gpu_props->thread_props.max_thread_group_split = KBASE_UBFX32(gpu_props->raw_props.thread_features, 24U, 6);
276*4882a593Smuzhiyun gpu_props->thread_props.impl_tech = KBASE_UBFX32(gpu_props->raw_props.thread_features, 30U, 2);
277*4882a593Smuzhiyun
278*4882a593Smuzhiyun /* If values are not specified, then use defaults */
279*4882a593Smuzhiyun if (gpu_props->thread_props.max_registers == 0) {
280*4882a593Smuzhiyun gpu_props->thread_props.max_registers = THREAD_MR_DEFAULT;
281*4882a593Smuzhiyun gpu_props->thread_props.max_task_queue = THREAD_MTQ_DEFAULT;
282*4882a593Smuzhiyun gpu_props->thread_props.max_thread_group_split = THREAD_MTGS_DEFAULT;
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun /* Initialize the coherent_group structure for each group */
285*4882a593Smuzhiyun kbase_gpuprops_construct_coherent_groups(gpu_props);
286*4882a593Smuzhiyun }
287*4882a593Smuzhiyun
kbase_gpuprops_set(struct kbase_device * kbdev)288*4882a593Smuzhiyun void kbase_gpuprops_set(struct kbase_device *kbdev)
289*4882a593Smuzhiyun {
290*4882a593Smuzhiyun struct kbase_gpu_props *gpu_props;
291*4882a593Smuzhiyun struct gpu_raw_gpu_props *raw;
292*4882a593Smuzhiyun
293*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(NULL != kbdev);
294*4882a593Smuzhiyun gpu_props = &kbdev->gpu_props;
295*4882a593Smuzhiyun raw = &gpu_props->props.raw_props;
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun /* Initialize the base_gpu_props structure from the hardware */
298*4882a593Smuzhiyun kbase_gpuprops_get_props(&gpu_props->props, kbdev);
299*4882a593Smuzhiyun
300*4882a593Smuzhiyun /* Populate the derived properties */
301*4882a593Smuzhiyun kbase_gpuprops_calculate_props(&gpu_props->props, kbdev);
302*4882a593Smuzhiyun
303*4882a593Smuzhiyun /* Populate kbase-only fields */
304*4882a593Smuzhiyun gpu_props->l2_props.associativity = KBASE_UBFX32(raw->l2_features, 8U, 8);
305*4882a593Smuzhiyun gpu_props->l2_props.external_bus_width = KBASE_UBFX32(raw->l2_features, 24U, 8);
306*4882a593Smuzhiyun
307*4882a593Smuzhiyun gpu_props->mem.core_group = KBASE_UBFX32(raw->mem_features, 0U, 1);
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun gpu_props->mmu.va_bits = KBASE_UBFX32(raw->mmu_features, 0U, 8);
310*4882a593Smuzhiyun gpu_props->mmu.pa_bits = KBASE_UBFX32(raw->mmu_features, 8U, 8);
311*4882a593Smuzhiyun
312*4882a593Smuzhiyun gpu_props->num_cores = hweight64(raw->shader_present);
313*4882a593Smuzhiyun gpu_props->num_core_groups = hweight64(raw->l2_present);
314*4882a593Smuzhiyun gpu_props->num_address_spaces = hweight32(raw->as_present);
315*4882a593Smuzhiyun gpu_props->num_job_slots = hweight32(raw->js_present);
316*4882a593Smuzhiyun }
317*4882a593Smuzhiyun
kbase_gpuprops_set_features(struct kbase_device * kbdev)318*4882a593Smuzhiyun void kbase_gpuprops_set_features(struct kbase_device *kbdev)
319*4882a593Smuzhiyun {
320*4882a593Smuzhiyun base_gpu_props *gpu_props;
321*4882a593Smuzhiyun struct kbase_gpuprops_regdump regdump;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun gpu_props = &kbdev->gpu_props.props;
324*4882a593Smuzhiyun
325*4882a593Smuzhiyun /* Dump relevant registers */
326*4882a593Smuzhiyun kbase_backend_gpuprops_get_features(kbdev, ®dump);
327*4882a593Smuzhiyun
328*4882a593Smuzhiyun /*
329*4882a593Smuzhiyun * Copy the raw value from the register, later this will get turned
330*4882a593Smuzhiyun * into the selected coherency mode.
331*4882a593Smuzhiyun * Additionally, add non-coherent mode, as this is always supported.
332*4882a593Smuzhiyun */
333*4882a593Smuzhiyun gpu_props->raw_props.coherency_mode = regdump.coherency_features |
334*4882a593Smuzhiyun COHERENCY_FEATURE_BIT(COHERENCY_NONE);
335*4882a593Smuzhiyun }
336*4882a593Smuzhiyun
337*4882a593Smuzhiyun static struct {
338*4882a593Smuzhiyun u32 type;
339*4882a593Smuzhiyun size_t offset;
340*4882a593Smuzhiyun int size;
341*4882a593Smuzhiyun } gpu_property_mapping[] = {
342*4882a593Smuzhiyun #define PROP(name, member) \
343*4882a593Smuzhiyun {KBASE_GPUPROP_ ## name, offsetof(struct mali_base_gpu_props, member), \
344*4882a593Smuzhiyun sizeof(((struct mali_base_gpu_props *)0)->member)}
345*4882a593Smuzhiyun PROP(PRODUCT_ID, core_props.product_id),
346*4882a593Smuzhiyun PROP(VERSION_STATUS, core_props.version_status),
347*4882a593Smuzhiyun PROP(MINOR_REVISION, core_props.minor_revision),
348*4882a593Smuzhiyun PROP(MAJOR_REVISION, core_props.major_revision),
349*4882a593Smuzhiyun PROP(GPU_SPEED_MHZ, core_props.gpu_speed_mhz),
350*4882a593Smuzhiyun PROP(GPU_FREQ_KHZ_MAX, core_props.gpu_freq_khz_max),
351*4882a593Smuzhiyun PROP(GPU_FREQ_KHZ_MIN, core_props.gpu_freq_khz_min),
352*4882a593Smuzhiyun PROP(LOG2_PROGRAM_COUNTER_SIZE, core_props.log2_program_counter_size),
353*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_0, core_props.texture_features[0]),
354*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_1, core_props.texture_features[1]),
355*4882a593Smuzhiyun PROP(TEXTURE_FEATURES_2, core_props.texture_features[2]),
356*4882a593Smuzhiyun PROP(GPU_AVAILABLE_MEMORY_SIZE, core_props.gpu_available_memory_size),
357*4882a593Smuzhiyun
358*4882a593Smuzhiyun PROP(L2_LOG2_LINE_SIZE, l2_props.log2_line_size),
359*4882a593Smuzhiyun PROP(L2_LOG2_CACHE_SIZE, l2_props.log2_cache_size),
360*4882a593Smuzhiyun PROP(L2_NUM_L2_SLICES, l2_props.num_l2_slices),
361*4882a593Smuzhiyun
362*4882a593Smuzhiyun PROP(TILER_BIN_SIZE_BYTES, tiler_props.bin_size_bytes),
363*4882a593Smuzhiyun PROP(TILER_MAX_ACTIVE_LEVELS, tiler_props.max_active_levels),
364*4882a593Smuzhiyun
365*4882a593Smuzhiyun PROP(MAX_THREADS, thread_props.max_threads),
366*4882a593Smuzhiyun PROP(MAX_WORKGROUP_SIZE, thread_props.max_workgroup_size),
367*4882a593Smuzhiyun PROP(MAX_BARRIER_SIZE, thread_props.max_barrier_size),
368*4882a593Smuzhiyun PROP(MAX_REGISTERS, thread_props.max_registers),
369*4882a593Smuzhiyun PROP(MAX_TASK_QUEUE, thread_props.max_task_queue),
370*4882a593Smuzhiyun PROP(MAX_THREAD_GROUP_SPLIT, thread_props.max_thread_group_split),
371*4882a593Smuzhiyun PROP(IMPL_TECH, thread_props.impl_tech),
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun PROP(RAW_SHADER_PRESENT, raw_props.shader_present),
374*4882a593Smuzhiyun PROP(RAW_TILER_PRESENT, raw_props.tiler_present),
375*4882a593Smuzhiyun PROP(RAW_L2_PRESENT, raw_props.l2_present),
376*4882a593Smuzhiyun PROP(RAW_STACK_PRESENT, raw_props.stack_present),
377*4882a593Smuzhiyun PROP(RAW_L2_FEATURES, raw_props.l2_features),
378*4882a593Smuzhiyun PROP(RAW_SUSPEND_SIZE, raw_props.suspend_size),
379*4882a593Smuzhiyun PROP(RAW_MEM_FEATURES, raw_props.mem_features),
380*4882a593Smuzhiyun PROP(RAW_MMU_FEATURES, raw_props.mmu_features),
381*4882a593Smuzhiyun PROP(RAW_AS_PRESENT, raw_props.as_present),
382*4882a593Smuzhiyun PROP(RAW_JS_PRESENT, raw_props.js_present),
383*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_0, raw_props.js_features[0]),
384*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_1, raw_props.js_features[1]),
385*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_2, raw_props.js_features[2]),
386*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_3, raw_props.js_features[3]),
387*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_4, raw_props.js_features[4]),
388*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_5, raw_props.js_features[5]),
389*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_6, raw_props.js_features[6]),
390*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_7, raw_props.js_features[7]),
391*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_8, raw_props.js_features[8]),
392*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_9, raw_props.js_features[9]),
393*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_10, raw_props.js_features[10]),
394*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_11, raw_props.js_features[11]),
395*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_12, raw_props.js_features[12]),
396*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_13, raw_props.js_features[13]),
397*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_14, raw_props.js_features[14]),
398*4882a593Smuzhiyun PROP(RAW_JS_FEATURES_15, raw_props.js_features[15]),
399*4882a593Smuzhiyun PROP(RAW_TILER_FEATURES, raw_props.tiler_features),
400*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_0, raw_props.texture_features[0]),
401*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_1, raw_props.texture_features[1]),
402*4882a593Smuzhiyun PROP(RAW_TEXTURE_FEATURES_2, raw_props.texture_features[2]),
403*4882a593Smuzhiyun PROP(RAW_GPU_ID, raw_props.gpu_id),
404*4882a593Smuzhiyun PROP(RAW_THREAD_MAX_THREADS, raw_props.thread_max_threads),
405*4882a593Smuzhiyun PROP(RAW_THREAD_MAX_WORKGROUP_SIZE,
406*4882a593Smuzhiyun raw_props.thread_max_workgroup_size),
407*4882a593Smuzhiyun PROP(RAW_THREAD_MAX_BARRIER_SIZE, raw_props.thread_max_barrier_size),
408*4882a593Smuzhiyun PROP(RAW_THREAD_FEATURES, raw_props.thread_features),
409*4882a593Smuzhiyun PROP(RAW_COHERENCY_MODE, raw_props.coherency_mode),
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun PROP(COHERENCY_NUM_GROUPS, coherency_info.num_groups),
412*4882a593Smuzhiyun PROP(COHERENCY_NUM_CORE_GROUPS, coherency_info.num_core_groups),
413*4882a593Smuzhiyun PROP(COHERENCY_COHERENCY, coherency_info.coherency),
414*4882a593Smuzhiyun PROP(COHERENCY_GROUP_0, coherency_info.group[0].core_mask),
415*4882a593Smuzhiyun PROP(COHERENCY_GROUP_1, coherency_info.group[1].core_mask),
416*4882a593Smuzhiyun PROP(COHERENCY_GROUP_2, coherency_info.group[2].core_mask),
417*4882a593Smuzhiyun PROP(COHERENCY_GROUP_3, coherency_info.group[3].core_mask),
418*4882a593Smuzhiyun PROP(COHERENCY_GROUP_4, coherency_info.group[4].core_mask),
419*4882a593Smuzhiyun PROP(COHERENCY_GROUP_5, coherency_info.group[5].core_mask),
420*4882a593Smuzhiyun PROP(COHERENCY_GROUP_6, coherency_info.group[6].core_mask),
421*4882a593Smuzhiyun PROP(COHERENCY_GROUP_7, coherency_info.group[7].core_mask),
422*4882a593Smuzhiyun PROP(COHERENCY_GROUP_8, coherency_info.group[8].core_mask),
423*4882a593Smuzhiyun PROP(COHERENCY_GROUP_9, coherency_info.group[9].core_mask),
424*4882a593Smuzhiyun PROP(COHERENCY_GROUP_10, coherency_info.group[10].core_mask),
425*4882a593Smuzhiyun PROP(COHERENCY_GROUP_11, coherency_info.group[11].core_mask),
426*4882a593Smuzhiyun PROP(COHERENCY_GROUP_12, coherency_info.group[12].core_mask),
427*4882a593Smuzhiyun PROP(COHERENCY_GROUP_13, coherency_info.group[13].core_mask),
428*4882a593Smuzhiyun PROP(COHERENCY_GROUP_14, coherency_info.group[14].core_mask),
429*4882a593Smuzhiyun PROP(COHERENCY_GROUP_15, coherency_info.group[15].core_mask),
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun #undef PROP
432*4882a593Smuzhiyun };
433*4882a593Smuzhiyun
kbase_gpuprops_populate_user_buffer(struct kbase_device * kbdev)434*4882a593Smuzhiyun int kbase_gpuprops_populate_user_buffer(struct kbase_device *kbdev)
435*4882a593Smuzhiyun {
436*4882a593Smuzhiyun struct kbase_gpu_props *kprops = &kbdev->gpu_props;
437*4882a593Smuzhiyun struct mali_base_gpu_props *props = &kprops->props;
438*4882a593Smuzhiyun u32 count = ARRAY_SIZE(gpu_property_mapping);
439*4882a593Smuzhiyun u32 i;
440*4882a593Smuzhiyun u32 size = 0;
441*4882a593Smuzhiyun u8 *p;
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun for (i = 0; i < count; i++) {
444*4882a593Smuzhiyun /* 4 bytes for the ID, and the size of the property */
445*4882a593Smuzhiyun size += 4 + gpu_property_mapping[i].size;
446*4882a593Smuzhiyun }
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun kprops->prop_buffer_size = size;
449*4882a593Smuzhiyun kprops->prop_buffer = kmalloc(size, GFP_KERNEL);
450*4882a593Smuzhiyun
451*4882a593Smuzhiyun if (!kprops->prop_buffer) {
452*4882a593Smuzhiyun kprops->prop_buffer_size = 0;
453*4882a593Smuzhiyun return -ENOMEM;
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun p = kprops->prop_buffer;
457*4882a593Smuzhiyun
458*4882a593Smuzhiyun #define WRITE_U8(v) (*p++ = (v) & 0xFF)
459*4882a593Smuzhiyun #define WRITE_U16(v) do { WRITE_U8(v); WRITE_U8((v) >> 8); } while (0)
460*4882a593Smuzhiyun #define WRITE_U32(v) do { WRITE_U16(v); WRITE_U16((v) >> 16); } while (0)
461*4882a593Smuzhiyun #define WRITE_U64(v) do { WRITE_U32(v); WRITE_U32((v) >> 32); } while (0)
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun for (i = 0; i < count; i++) {
464*4882a593Smuzhiyun u32 type = gpu_property_mapping[i].type;
465*4882a593Smuzhiyun u8 type_size;
466*4882a593Smuzhiyun void *field = ((u8 *)props) + gpu_property_mapping[i].offset;
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun switch (gpu_property_mapping[i].size) {
469*4882a593Smuzhiyun case 1:
470*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U8;
471*4882a593Smuzhiyun break;
472*4882a593Smuzhiyun case 2:
473*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U16;
474*4882a593Smuzhiyun break;
475*4882a593Smuzhiyun case 4:
476*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U32;
477*4882a593Smuzhiyun break;
478*4882a593Smuzhiyun case 8:
479*4882a593Smuzhiyun type_size = KBASE_GPUPROP_VALUE_SIZE_U64;
480*4882a593Smuzhiyun break;
481*4882a593Smuzhiyun default:
482*4882a593Smuzhiyun dev_err(kbdev->dev,
483*4882a593Smuzhiyun "Invalid gpu_property_mapping type=%d size=%d",
484*4882a593Smuzhiyun type, gpu_property_mapping[i].size);
485*4882a593Smuzhiyun return -EINVAL;
486*4882a593Smuzhiyun }
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun WRITE_U32((type<<2) | type_size);
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun switch (type_size) {
491*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U8:
492*4882a593Smuzhiyun WRITE_U8(*((u8 *)field));
493*4882a593Smuzhiyun break;
494*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U16:
495*4882a593Smuzhiyun WRITE_U16(*((u16 *)field));
496*4882a593Smuzhiyun break;
497*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U32:
498*4882a593Smuzhiyun WRITE_U32(*((u32 *)field));
499*4882a593Smuzhiyun break;
500*4882a593Smuzhiyun case KBASE_GPUPROP_VALUE_SIZE_U64:
501*4882a593Smuzhiyun WRITE_U64(*((u64 *)field));
502*4882a593Smuzhiyun break;
503*4882a593Smuzhiyun default: /* Cannot be reached */
504*4882a593Smuzhiyun WARN_ON(1);
505*4882a593Smuzhiyun return -EINVAL;
506*4882a593Smuzhiyun }
507*4882a593Smuzhiyun }
508*4882a593Smuzhiyun
509*4882a593Smuzhiyun return 0;
510*4882a593Smuzhiyun }
511