1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2017-2022 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #include "mali_kbase_ipa_counter_common_jm.h"
23 #include "ipa/mali_kbase_ipa_debugfs.h"
24
25 #define DEFAULT_SCALING_FACTOR 5
26
27 /* If the value of GPU_ACTIVE is below this, use the simple model
28 * instead, to avoid extrapolating small amounts of counter data across
29 * large sample periods.
30 */
31 #define DEFAULT_MIN_SAMPLE_CYCLES 10000
32
33 /**
34 * kbase_ipa_read_hwcnt() - read a counter value
35 * @model_data: pointer to model data
36 * @offset: offset, in bytes, into vinstr buffer
37 *
38 * Return: A 32-bit counter value. Range: 0 < value < 2^27 (worst case would be
39 * incrementing every cycle over a ~100ms sample period at a high frequency,
40 * e.g. 1 GHz: 2^30 * 0.1seconds ~= 2^27.
41 */
kbase_ipa_read_hwcnt(struct kbase_ipa_model_vinstr_data * model_data,u32 offset)42 static inline u32 kbase_ipa_read_hwcnt(
43 struct kbase_ipa_model_vinstr_data *model_data,
44 u32 offset)
45 {
46 u8 *p = (u8 *)model_data->dump_buf.dump_buf;
47 u64 val = *(u64 *)&p[offset];
48
49 return (val > U32_MAX) ? U32_MAX : (u32)val;
50 }
51
kbase_ipa_add_saturate(s64 a,s64 b)52 static inline s64 kbase_ipa_add_saturate(s64 a, s64 b)
53 {
54 s64 rtn;
55
56 if (a > 0 && (S64_MAX - a) < b)
57 rtn = S64_MAX;
58 else if (a < 0 && (S64_MIN - a) > b)
59 rtn = S64_MIN;
60 else
61 rtn = a + b;
62
63 return rtn;
64 }
65
kbase_ipa_sum_all_shader_cores(struct kbase_ipa_model_vinstr_data * model_data,s32 coeff,u32 counter)66 s64 kbase_ipa_sum_all_shader_cores(
67 struct kbase_ipa_model_vinstr_data *model_data,
68 s32 coeff, u32 counter)
69 {
70 struct kbase_device *kbdev = model_data->kbdev;
71 u64 core_mask;
72 u32 base = 0;
73 s64 ret = 0;
74
75 core_mask = kbdev->gpu_props.props.coherency_info.group[0].core_mask;
76 while (core_mask != 0ull) {
77 if ((core_mask & 1ull) != 0ull) {
78 /* 0 < counter_value < 2^27 */
79 u32 counter_value = kbase_ipa_read_hwcnt(model_data,
80 base + counter);
81
82 /* 0 < ret < 2^27 * max_num_cores = 2^32 */
83 ret = kbase_ipa_add_saturate(ret, counter_value);
84 }
85 base += KBASE_IPA_NR_BYTES_PER_BLOCK;
86 core_mask >>= 1;
87 }
88
89 /* Range: -2^54 < ret * coeff < 2^54 */
90 return ret * coeff;
91 }
92
kbase_ipa_sum_all_memsys_blocks(struct kbase_ipa_model_vinstr_data * model_data,s32 coeff,u32 counter)93 s64 kbase_ipa_sum_all_memsys_blocks(
94 struct kbase_ipa_model_vinstr_data *model_data,
95 s32 coeff, u32 counter)
96 {
97 struct kbase_device *kbdev = model_data->kbdev;
98 const u32 num_blocks = kbdev->gpu_props.props.l2_props.num_l2_slices;
99 u32 base = 0;
100 s64 ret = 0;
101 u32 i;
102
103 for (i = 0; i < num_blocks; i++) {
104 /* 0 < counter_value < 2^27 */
105 u32 counter_value = kbase_ipa_read_hwcnt(model_data,
106 base + counter);
107
108 /* 0 < ret < 2^27 * max_num_memsys_blocks = 2^29 */
109 ret = kbase_ipa_add_saturate(ret, counter_value);
110 base += KBASE_IPA_NR_BYTES_PER_BLOCK;
111 }
112
113 /* Range: -2^51 < ret * coeff < 2^51 */
114 return ret * coeff;
115 }
116
kbase_ipa_single_counter(struct kbase_ipa_model_vinstr_data * model_data,s32 coeff,u32 counter)117 s64 kbase_ipa_single_counter(
118 struct kbase_ipa_model_vinstr_data *model_data,
119 s32 coeff, u32 counter)
120 {
121 /* Range: 0 < counter_value < 2^27 */
122 const u32 counter_value = kbase_ipa_read_hwcnt(model_data, counter);
123
124 /* Range: -2^49 < ret < 2^49 */
125 return counter_value * (s64) coeff;
126 }
127
kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data * model_data)128 int kbase_ipa_attach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
129 {
130 int errcode;
131 struct kbase_device *kbdev = model_data->kbdev;
132 struct kbase_hwcnt_virtualizer *hvirt = kbdev->hwcnt_gpu_virt;
133 struct kbase_hwcnt_enable_map enable_map;
134 const struct kbase_hwcnt_metadata *metadata =
135 kbase_hwcnt_virtualizer_metadata(hvirt);
136
137 if (!metadata)
138 return -1;
139
140 errcode = kbase_hwcnt_enable_map_alloc(metadata, &enable_map);
141 if (errcode) {
142 dev_err(kbdev->dev, "Failed to allocate IPA enable map");
143 return errcode;
144 }
145
146 kbase_hwcnt_enable_map_enable_all(&enable_map);
147
148 /* Disable cycle counter only. */
149 enable_map.clk_enable_map = 0;
150
151 errcode = kbase_hwcnt_virtualizer_client_create(
152 hvirt, &enable_map, &model_data->hvirt_cli);
153 kbase_hwcnt_enable_map_free(&enable_map);
154 if (errcode) {
155 dev_err(kbdev->dev, "Failed to register IPA with virtualizer");
156 model_data->hvirt_cli = NULL;
157 return errcode;
158 }
159
160 errcode = kbase_hwcnt_dump_buffer_alloc(
161 metadata, &model_data->dump_buf);
162 if (errcode) {
163 dev_err(kbdev->dev, "Failed to allocate IPA dump buffer");
164 kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
165 model_data->hvirt_cli = NULL;
166 return errcode;
167 }
168
169 return 0;
170 }
171
kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data * model_data)172 void kbase_ipa_detach_vinstr(struct kbase_ipa_model_vinstr_data *model_data)
173 {
174 if (model_data->hvirt_cli) {
175 kbase_hwcnt_virtualizer_client_destroy(model_data->hvirt_cli);
176 kbase_hwcnt_dump_buffer_free(&model_data->dump_buf);
177 model_data->hvirt_cli = NULL;
178 }
179 }
180
kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model * model,u32 * coeffp)181 int kbase_ipa_vinstr_dynamic_coeff(struct kbase_ipa_model *model, u32 *coeffp)
182 {
183 struct kbase_ipa_model_vinstr_data *model_data =
184 (struct kbase_ipa_model_vinstr_data *)model->model_data;
185 s64 energy = 0;
186 size_t i;
187 u64 coeff = 0, coeff_mul = 0;
188 u64 start_ts_ns, end_ts_ns;
189 u32 active_cycles;
190 int err = 0;
191
192 err = kbase_hwcnt_virtualizer_client_dump(model_data->hvirt_cli,
193 &start_ts_ns, &end_ts_ns, &model_data->dump_buf);
194 if (err)
195 goto err0;
196
197 /* Range: 0 (GPU not used at all), to the max sampling interval, say
198 * 1s, * max GPU frequency (GPU 100% utilized).
199 * 0 <= active_cycles <= 1 * ~2GHz
200 * 0 <= active_cycles < 2^31
201 */
202 active_cycles = model_data->get_active_cycles(model_data);
203
204 if (active_cycles < (u32) max(model_data->min_sample_cycles, 0)) {
205 err = -ENODATA;
206 goto err0;
207 }
208
209 /* Range: 1 <= active_cycles < 2^31 */
210 active_cycles = max(1u, active_cycles);
211
212 /* Range of 'energy' is +/- 2^54 * number of IPA groups (~8), so around
213 * -2^57 < energy < 2^57
214 */
215 for (i = 0; i < model_data->groups_def_num; i++) {
216 const struct kbase_ipa_group *group = &model_data->groups_def[i];
217 s32 coeff = model_data->group_values[i];
218 s64 group_energy = group->op(model_data, coeff,
219 group->counter_block_offset);
220
221 energy = kbase_ipa_add_saturate(energy, group_energy);
222 }
223
224 /* Range: 0 <= coeff < 2^57 */
225 if (energy > 0)
226 coeff = energy;
227
228 /* Range: 0 <= coeff < 2^57 (because active_cycles >= 1). However, this
229 * can be constrained further: Counter values can only be increased by
230 * a theoretical maximum of about 64k per clock cycle. Beyond this,
231 * we'd have to sample every 1ms to avoid them overflowing at the
232 * lowest clock frequency (say 100MHz). Therefore, we can write the
233 * range of 'coeff' in terms of active_cycles:
234 *
235 * coeff = SUM(coeffN * counterN * num_cores_for_counterN)
236 * coeff <= SUM(coeffN * counterN) * max_num_cores
237 * coeff <= num_IPA_groups * max_coeff * max_counter * max_num_cores
238 * (substitute max_counter = 2^16 * active_cycles)
239 * coeff <= num_IPA_groups * max_coeff * 2^16 * active_cycles * max_num_cores
240 * coeff <= 2^3 * 2^22 * 2^16 * active_cycles * 2^5
241 * coeff <= 2^46 * active_cycles
242 *
243 * So after the division: 0 <= coeff <= 2^46
244 */
245 coeff = div_u64(coeff, active_cycles);
246
247 /* Not all models were derived at the same reference voltage. Voltage
248 * scaling is done by multiplying by V^2, so we need to *divide* by
249 * Vref^2 here.
250 * Range: 0 <= coeff <= 2^49
251 */
252 coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
253 /* Range: 0 <= coeff <= 2^52 */
254 coeff = div_u64(coeff * 1000, max(model_data->reference_voltage, 1));
255
256 /* Scale by user-specified integer factor.
257 * Range: 0 <= coeff_mul < 2^57
258 */
259 coeff_mul = coeff * model_data->scaling_factor;
260
261 /* The power models have results with units
262 * mW/(MHz V^2), i.e. nW/(Hz V^2). With precision of 1/1000000, this
263 * becomes fW/(Hz V^2), which are the units of coeff_mul. However,
264 * kbase_scale_dynamic_power() expects units of pW/(Hz V^2), so divide
265 * by 1000.
266 * Range: 0 <= coeff_mul < 2^47
267 */
268 coeff_mul = div_u64(coeff_mul, 1000u);
269
270 err0:
271 /* Clamp to a sensible range - 2^16 gives about 14W at 400MHz/750mV */
272 *coeffp = clamp(coeff_mul, (u64) 0, (u64) 1 << 16);
273 return err;
274 }
275
kbase_ipa_vinstr_reset_data(struct kbase_ipa_model * model)276 void kbase_ipa_vinstr_reset_data(struct kbase_ipa_model *model)
277 {
278 /* Currently not implemented */
279 WARN_ON_ONCE(1);
280 }
281
kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model * model,const struct kbase_ipa_group * ipa_groups_def,size_t ipa_group_size,kbase_ipa_get_active_cycles_callback get_active_cycles,s32 reference_voltage)282 int kbase_ipa_vinstr_common_model_init(struct kbase_ipa_model *model,
283 const struct kbase_ipa_group *ipa_groups_def,
284 size_t ipa_group_size,
285 kbase_ipa_get_active_cycles_callback get_active_cycles,
286 s32 reference_voltage)
287 {
288 int err = 0;
289 size_t i;
290 struct kbase_ipa_model_vinstr_data *model_data;
291
292 if (!model || !ipa_groups_def || !ipa_group_size || !get_active_cycles)
293 return -EINVAL;
294
295 model_data = kzalloc(sizeof(*model_data), GFP_KERNEL);
296 if (!model_data)
297 return -ENOMEM;
298
299 model_data->kbdev = model->kbdev;
300 model_data->groups_def = ipa_groups_def;
301 model_data->groups_def_num = ipa_group_size;
302 model_data->get_active_cycles = get_active_cycles;
303
304 model->model_data = (void *) model_data;
305
306 for (i = 0; i < model_data->groups_def_num; ++i) {
307 const struct kbase_ipa_group *group = &model_data->groups_def[i];
308
309 model_data->group_values[i] = group->default_value;
310 err = kbase_ipa_model_add_param_s32(model, group->name,
311 &model_data->group_values[i],
312 1, false);
313 if (err)
314 goto exit;
315 }
316
317 model_data->scaling_factor = DEFAULT_SCALING_FACTOR;
318 err = kbase_ipa_model_add_param_s32(model, "scale",
319 &model_data->scaling_factor,
320 1, false);
321 if (err)
322 goto exit;
323
324 model_data->min_sample_cycles = DEFAULT_MIN_SAMPLE_CYCLES;
325 err = kbase_ipa_model_add_param_s32(model, "min_sample_cycles",
326 &model_data->min_sample_cycles,
327 1, false);
328 if (err)
329 goto exit;
330
331 model_data->reference_voltage = reference_voltage;
332 err = kbase_ipa_model_add_param_s32(model, "reference_voltage",
333 &model_data->reference_voltage,
334 1, false);
335 if (err)
336 goto exit;
337
338 err = kbase_ipa_attach_vinstr(model_data);
339
340 exit:
341 if (err) {
342 kbase_ipa_model_param_free_all(model);
343 kfree(model_data);
344 }
345 return err;
346 }
347
kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model * model)348 void kbase_ipa_vinstr_common_model_term(struct kbase_ipa_model *model)
349 {
350 struct kbase_ipa_model_vinstr_data *model_data =
351 (struct kbase_ipa_model_vinstr_data *)model->model_data;
352
353 kbase_ipa_detach_vinstr(model_data);
354 kfree(model_data);
355 }
356