1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2021-2022 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #include "mali_kbase.h"
23 #include "mali_kbase_kinstr_prfcnt.h"
24 #include "hwcnt/mali_kbase_hwcnt_virtualizer.h"
25 #include "hwcnt/mali_kbase_hwcnt_gpu.h"
26 #include <uapi/gpu/arm/bifrost/mali_kbase_ioctl.h>
27 #include "mali_malisw.h"
28 #include "mali_kbase_debug.h"
29
30 #include <linux/anon_inodes.h>
31 #include <linux/fcntl.h>
32 #include <linux/fs.h>
33 #include <linux/hrtimer.h>
34 #include <linux/log2.h>
35 #include <linux/mm.h>
36 #include <linux/mutex.h>
37 #include <linux/poll.h>
38 #include <linux/slab.h>
39 #include <linux/overflow.h>
40 #include <linux/version_compat_defs.h>
41 #include <linux/workqueue.h>
42
43 /* Explicitly include epoll header for old kernels. Not required from 4.16. */
44 #if KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE
45 #include <uapi/linux/eventpoll.h>
46 #endif
47
48 /* The minimum allowed interval between dumps, in nanoseconds
49 * (equivalent to 10KHz)
50 */
51 #define DUMP_INTERVAL_MIN_NS (100 * NSEC_PER_USEC)
52
53 /* The maximum allowed buffers per client */
54 #define MAX_BUFFER_COUNT 32
55
56 /**
57 * struct kbase_kinstr_prfcnt_context - IOCTL interface for userspace hardware
58 * counters.
59 * @hvirt: Hardware counter virtualizer used by kinstr_prfcnt.
60 * @info_item_count: Number of metadata elements.
61 * @metadata: Hardware counter metadata provided by virtualizer.
62 * @lock: Lock protecting kinstr_prfcnt state.
63 * @suspend_count: Suspend reference count. If non-zero, timer and worker
64 * are prevented from being re-scheduled.
65 * @client_count: Number of kinstr_prfcnt clients.
66 * @clients: List of kinstr_prfcnt clients.
67 * @dump_timer: Timer that enqueues dump_work to a workqueue.
68 * @dump_work: Worker for performing periodic counter dumps.
69 */
70 struct kbase_kinstr_prfcnt_context {
71 struct kbase_hwcnt_virtualizer *hvirt;
72 u32 info_item_count;
73 const struct kbase_hwcnt_metadata *metadata;
74 struct mutex lock;
75 size_t suspend_count;
76 size_t client_count;
77 struct list_head clients;
78 struct hrtimer dump_timer;
79 struct work_struct dump_work;
80 };
81
82 /**
83 * struct kbase_kinstr_prfcnt_sample - Buffer and descriptor for sample data.
84 * @sample_meta: Pointer to sample metadata.
85 * @dump_buf: Dump buffer containing sample data.
86 */
87 struct kbase_kinstr_prfcnt_sample {
88 struct prfcnt_metadata *sample_meta;
89 struct kbase_hwcnt_dump_buffer dump_buf;
90 };
91
92 /**
93 * struct kbase_kinstr_prfcnt_sample_array - Array of sample data.
94 * @user_buf: Address of allocated userspace buffer. A single allocation is used
95 * for all Dump Buffers in the array.
96 * @sample_count: Number of allocated samples.
97 * @samples: Non-NULL pointer to the array of Dump Buffers.
98 */
99 struct kbase_kinstr_prfcnt_sample_array {
100 u8 *user_buf;
101 size_t sample_count;
102 struct kbase_kinstr_prfcnt_sample *samples;
103 };
104
105 /**
106 * struct kbase_kinstr_prfcnt_client_config - Client session configuration.
107 * @prfcnt_mode: Sampling mode: either manual or periodic.
108 * @counter_set: Set of performance counter blocks.
109 * @scope: Scope of performance counters to capture.
110 * @buffer_count: Number of buffers used to store samples.
111 * @period_ns: Sampling period, in nanoseconds, or 0 if manual mode.
112 * @phys_em: Enable map used by the GPU.
113 */
114 struct kbase_kinstr_prfcnt_client_config {
115 u8 prfcnt_mode;
116 u8 counter_set;
117 u8 scope;
118 u16 buffer_count;
119 u64 period_ns;
120 struct kbase_hwcnt_physical_enable_map phys_em;
121 };
122
123 /**
124 * enum kbase_kinstr_prfcnt_client_init_state - A list of
125 * initialisation states that the
126 * kinstr_prfcnt client can be at
127 * during initialisation. Useful
128 * for terminating a partially
129 * initialised client.
130 *
131 * @KINSTR_PRFCNT_UNINITIALISED : Client is uninitialised
132 * @KINSTR_PRFCNT_PARSE_SETUP : Parse the setup session
133 * @KINSTR_PRFCNT_ENABLE_MAP : Allocate memory for enable map
134 * @KINSTR_PRFCNT_DUMP_BUFFER : Allocate memory for dump buffer
135 * @KINSTR_PRFCNT_SAMPLE_ARRAY : Allocate memory for and initialise sample array
136 * @KINSTR_PRFCNT_VIRTUALIZER_CLIENT : Create virtualizer client
137 * @KINSTR_PRFCNT_WAITQ_MUTEX : Create and initialise mutex and waitqueue
138 * @KINSTR_PRFCNT_INITIALISED : Client is fully initialised
139 */
140 enum kbase_kinstr_prfcnt_client_init_state {
141 KINSTR_PRFCNT_UNINITIALISED,
142 KINSTR_PRFCNT_PARSE_SETUP = KINSTR_PRFCNT_UNINITIALISED,
143 KINSTR_PRFCNT_ENABLE_MAP,
144 KINSTR_PRFCNT_DUMP_BUFFER,
145 KINSTR_PRFCNT_SAMPLE_ARRAY,
146 KINSTR_PRFCNT_VIRTUALIZER_CLIENT,
147 KINSTR_PRFCNT_WAITQ_MUTEX,
148 KINSTR_PRFCNT_INITIALISED
149 };
150
151 /**
152 * struct kbase_kinstr_prfcnt_client - A kinstr_prfcnt client attached
153 * to a kinstr_prfcnt context.
154 * @kinstr_ctx: kinstr_prfcnt context client is attached to.
155 * @hvcli: Hardware counter virtualizer client.
156 * @node: Node used to attach this client to list in
157 * kinstr_prfcnt context.
158 * @cmd_sync_lock: Lock coordinating the reader interface for commands.
159 * @next_dump_time_ns: Time in ns when this client's next periodic dump must
160 * occur. If 0, not a periodic client.
161 * @dump_interval_ns: Interval between periodic dumps. If 0, not a periodic
162 * client.
163 * @sample_flags: Flags for the current active dumping sample, marking
164 * the conditions/events during the dump duration.
165 * @active: True if the client has been started.
166 * @config: Configuration of the client session.
167 * @enable_map: Counters enable map.
168 * @tmp_buf: Temporary buffer to use before handing over dump to
169 * client.
170 * @sample_arr: Array of dump buffers allocated by this client.
171 * @read_idx: Index of buffer read by userspace.
172 * @write_idx: Index of buffer being written by dump worker.
173 * @fetch_idx: Index of buffer being fetched by userspace, but
174 * pending a confirmation of being read (consumed) if it
175 * differs from the read_idx.
176 * @waitq: Client's notification queue.
177 * @sample_size: Size of the data required for one sample, in bytes.
178 * @sample_count: Number of samples the client is able to capture.
179 * @user_data: User data associated with the session.
180 * This is set when the session is started and stopped.
181 * This value is ignored for control commands that
182 * provide another value.
183 */
184 struct kbase_kinstr_prfcnt_client {
185 struct kbase_kinstr_prfcnt_context *kinstr_ctx;
186 struct kbase_hwcnt_virtualizer_client *hvcli;
187 struct list_head node;
188 struct mutex cmd_sync_lock;
189 u64 next_dump_time_ns;
190 u32 dump_interval_ns;
191 u32 sample_flags;
192 bool active;
193 struct kbase_kinstr_prfcnt_client_config config;
194 struct kbase_hwcnt_enable_map enable_map;
195 struct kbase_hwcnt_dump_buffer tmp_buf;
196 struct kbase_kinstr_prfcnt_sample_array sample_arr;
197 atomic_t read_idx;
198 atomic_t write_idx;
199 atomic_t fetch_idx;
200 wait_queue_head_t waitq;
201 size_t sample_size;
202 size_t sample_count;
203 u64 user_data;
204 };
205
206 static struct prfcnt_enum_item kinstr_prfcnt_supported_requests[] = {
207 {
208 /* Request description for MODE request */
209 .hdr = {
210 .item_type = PRFCNT_ENUM_TYPE_REQUEST,
211 .item_version = PRFCNT_READER_API_VERSION,
212 },
213 .u.request = {
214 .request_item_type = PRFCNT_REQUEST_MODE,
215 .versions_mask = 0x1,
216 },
217 },
218 {
219 /* Request description for ENABLE request */
220 .hdr = {
221 .item_type = PRFCNT_ENUM_TYPE_REQUEST,
222 .item_version = PRFCNT_READER_API_VERSION,
223 },
224 .u.request = {
225 .request_item_type = PRFCNT_REQUEST_ENABLE,
226 .versions_mask = 0x1,
227 },
228 },
229 };
230
231 /**
232 * kbasep_kinstr_prfcnt_hwcnt_reader_poll() - hwcnt reader's poll.
233 * @filp: Non-NULL pointer to file structure.
234 * @wait: Non-NULL pointer to poll table.
235 *
236 * Return: EPOLLIN | EPOLLRDNORM if data can be read without blocking, 0 if
237 * data can not be read without blocking, else EPOLLHUP | EPOLLERR.
238 */
239 static __poll_t
kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file * filp,struct poll_table_struct * wait)240 kbasep_kinstr_prfcnt_hwcnt_reader_poll(struct file *filp,
241 struct poll_table_struct *wait)
242 {
243 struct kbase_kinstr_prfcnt_client *cli;
244
245 if (!filp || !wait)
246 return EPOLLHUP | EPOLLERR;
247
248 cli = filp->private_data;
249
250 if (!cli)
251 return EPOLLHUP | EPOLLERR;
252
253 poll_wait(filp, &cli->waitq, wait);
254
255 if (atomic_read(&cli->write_idx) != atomic_read(&cli->fetch_idx))
256 return EPOLLIN | EPOLLRDNORM;
257
258 return (__poll_t)0;
259 }
260
261 /**
262 * kbasep_kinstr_prfcnt_next_dump_time_ns() - Calculate the next periodic
263 * dump time.
264 * @cur_ts_ns: Current time in nanoseconds.
265 * @interval: Interval between dumps in nanoseconds.
266 *
267 * Return: 0 if interval is 0 (i.e. a non-periodic client), or the next dump
268 * time that occurs after cur_ts_ns.
269 */
kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns,u32 interval)270 static u64 kbasep_kinstr_prfcnt_next_dump_time_ns(u64 cur_ts_ns, u32 interval)
271 {
272 /* Non-periodic client */
273 if (interval == 0)
274 return 0;
275
276 /*
277 * Return the next interval after the current time relative to t=0.
278 * This means multiple clients with the same period will synchronize,
279 * regardless of when they were started, allowing the worker to be
280 * scheduled less frequently.
281 */
282 do_div(cur_ts_ns, interval);
283
284 return (cur_ts_ns + 1) * interval;
285 }
286
287 /**
288 * kbasep_kinstr_prfcnt_timestamp_ns() - Get the current time in nanoseconds.
289 *
290 * Return: Current time in nanoseconds.
291 */
kbasep_kinstr_prfcnt_timestamp_ns(void)292 static u64 kbasep_kinstr_prfcnt_timestamp_ns(void)
293 {
294 return ktime_get_raw_ns();
295 }
296
297 /**
298 * kbasep_kinstr_prfcnt_reschedule_worker() - Update next dump times for all
299 * periodic kinstr_prfcnt clients,
300 * then reschedule the dump worker
301 * appropriately.
302 * @kinstr_ctx: Non-NULL pointer to the kinstr_prfcnt context.
303 *
304 * If there are no periodic clients, then the dump worker will not be
305 * rescheduled. Else, the dump worker will be rescheduled for the next
306 * periodic client dump.
307 */
kbasep_kinstr_prfcnt_reschedule_worker(struct kbase_kinstr_prfcnt_context * kinstr_ctx)308 static void kbasep_kinstr_prfcnt_reschedule_worker(
309 struct kbase_kinstr_prfcnt_context *kinstr_ctx)
310 {
311 u64 cur_ts_ns;
312 u64 shortest_period_ns = U64_MAX;
313 struct kbase_kinstr_prfcnt_client *pos;
314
315 WARN_ON(!kinstr_ctx);
316 lockdep_assert_held(&kinstr_ctx->lock);
317 cur_ts_ns = kbasep_kinstr_prfcnt_timestamp_ns();
318
319 /*
320 * This loop fulfills 2 separate tasks that don't affect each other:
321 *
322 * 1) Determine the shortest period.
323 * 2) Update the next dump time of clients that have already been
324 * dumped. It's important not to alter the next dump time of clients
325 * that haven't been dumped yet.
326 *
327 * For the sake of efficiency, the rescheduling decision ignores the time
328 * of the next dump and just uses the shortest period among all periodic
329 * clients. It is more efficient to serve multiple dump requests at once,
330 * rather than trying to reschedule the worker to serve each request
331 * individually.
332 */
333 list_for_each_entry(pos, &kinstr_ctx->clients, node) {
334 /* Ignore clients that are not periodic or not active. */
335 if (pos->active && pos->dump_interval_ns > 0) {
336 shortest_period_ns =
337 MIN(shortest_period_ns, pos->dump_interval_ns);
338
339 /* Next dump should happen exactly one period after the last dump.
340 * If last dump was overdue and scheduled to happen more than one
341 * period ago, compensate for that by scheduling next dump in the
342 * immediate future.
343 */
344 if (pos->next_dump_time_ns < cur_ts_ns)
345 pos->next_dump_time_ns =
346 MAX(cur_ts_ns + 1,
347 pos->next_dump_time_ns +
348 pos->dump_interval_ns);
349 }
350 }
351
352 /* Cancel the timer if it is already pending */
353 hrtimer_cancel(&kinstr_ctx->dump_timer);
354
355 /* Start the timer if there are periodic clients and kinstr_prfcnt is not
356 * suspended.
357 */
358 if ((shortest_period_ns != U64_MAX) &&
359 (kinstr_ctx->suspend_count == 0)) {
360 u64 next_schedule_time_ns =
361 kbasep_kinstr_prfcnt_next_dump_time_ns(
362 cur_ts_ns, shortest_period_ns);
363 hrtimer_start(&kinstr_ctx->dump_timer,
364 ns_to_ktime(next_schedule_time_ns - cur_ts_ns),
365 HRTIMER_MODE_REL);
366 }
367 }
368
369 static enum prfcnt_block_type
kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)370 kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(u64 type)
371 {
372 enum prfcnt_block_type block_type;
373
374 switch (type) {
375 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE:
376 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE2:
377 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE3:
378 block_type = PRFCNT_BLOCK_TYPE_FE;
379 break;
380
381 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER:
382 block_type = PRFCNT_BLOCK_TYPE_TILER;
383 break;
384
385 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC:
386 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC2:
387 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC3:
388 block_type = PRFCNT_BLOCK_TYPE_SHADER_CORE;
389 break;
390
391 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS:
392 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS2:
393 block_type = PRFCNT_BLOCK_TYPE_MEMORY;
394 break;
395
396 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_FE_UNDEFINED:
397 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_SC_UNDEFINED:
398 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_TILER_UNDEFINED:
399 case KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS_UNDEFINED:
400 default:
401 block_type = PRFCNT_BLOCK_TYPE_RESERVED;
402 break;
403 }
404
405 return block_type;
406 }
407
kbase_kinstr_is_block_type_reserved(const struct kbase_hwcnt_metadata * metadata,size_t grp,size_t blk)408 static bool kbase_kinstr_is_block_type_reserved(const struct kbase_hwcnt_metadata *metadata,
409 size_t grp, size_t blk)
410 {
411 enum prfcnt_block_type block_type = kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
412 kbase_hwcnt_metadata_block_type(metadata, grp, blk));
413
414 return block_type == PRFCNT_BLOCK_TYPE_RESERVED;
415 }
416
417 /**
418 * kbasep_kinstr_prfcnt_set_block_meta_items() - Populate a sample's block meta
419 * item array.
420 * @enable_map: Non-NULL pointer to the map of enabled counters.
421 * @dst: Non-NULL pointer to the sample's dump buffer object.
422 * @block_meta_base: Non-NULL double pointer to the start of the block meta
423 * data items.
424 * @base_addr: Address of allocated pages for array of samples. Used
425 * to calculate offset of block values.
426 * @counter_set: The SET which blocks represent.
427 *
428 * Return: 0 on success, else error code.
429 */
kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map * enable_map,struct kbase_hwcnt_dump_buffer * dst,struct prfcnt_metadata ** block_meta_base,u8 * base_addr,u8 counter_set)430 int kbasep_kinstr_prfcnt_set_block_meta_items(struct kbase_hwcnt_enable_map *enable_map,
431 struct kbase_hwcnt_dump_buffer *dst,
432 struct prfcnt_metadata **block_meta_base,
433 u8 *base_addr, u8 counter_set)
434 {
435 size_t grp, blk, blk_inst;
436 struct prfcnt_metadata **ptr_md = block_meta_base;
437 const struct kbase_hwcnt_metadata *metadata;
438 uint8_t block_idx = 0;
439
440 if (!dst || !*block_meta_base)
441 return -EINVAL;
442
443 metadata = dst->metadata;
444 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
445 u8 *dst_blk;
446
447 /* Block indices must be reported with no gaps. */
448 if (blk_inst == 0)
449 block_idx = 0;
450
451 /* Skip unavailable or non-enabled blocks */
452 if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
453 !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
454 !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
455 continue;
456
457 dst_blk = (u8 *)kbase_hwcnt_dump_buffer_block_instance(dst, grp, blk, blk_inst);
458 (*ptr_md)->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_BLOCK;
459 (*ptr_md)->hdr.item_version = PRFCNT_READER_API_VERSION;
460 (*ptr_md)->u.block_md.block_type =
461 kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
462 kbase_hwcnt_metadata_block_type(metadata, grp,
463 blk));
464 (*ptr_md)->u.block_md.block_idx = block_idx;
465 (*ptr_md)->u.block_md.set = counter_set;
466 (*ptr_md)->u.block_md.block_state = BLOCK_STATE_UNKNOWN;
467 (*ptr_md)->u.block_md.values_offset = (u32)(dst_blk - base_addr);
468
469 /* update the buf meta data block pointer to next item */
470 (*ptr_md)++;
471 block_idx++;
472 }
473
474 return 0;
475 }
476
477 /**
478 * kbasep_kinstr_prfcnt_set_sample_metadata() - Set sample metadata for sample
479 * output.
480 * @cli: Non-NULL pointer to a kinstr_prfcnt client.
481 * @dump_buf: Non-NULL pointer to dump buffer where sample is stored.
482 * @ptr_md: Non-NULL pointer to sample metadata.
483 */
kbasep_kinstr_prfcnt_set_sample_metadata(struct kbase_kinstr_prfcnt_client * cli,struct kbase_hwcnt_dump_buffer * dump_buf,struct prfcnt_metadata * ptr_md)484 static void kbasep_kinstr_prfcnt_set_sample_metadata(
485 struct kbase_kinstr_prfcnt_client *cli,
486 struct kbase_hwcnt_dump_buffer *dump_buf,
487 struct prfcnt_metadata *ptr_md)
488 {
489 u8 clk_cnt, i;
490
491 clk_cnt = cli->kinstr_ctx->metadata->clk_cnt;
492
493 /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item */
494 ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_SAMPLE;
495 ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION;
496 ptr_md->u.sample_md.seq = atomic_read(&cli->write_idx);
497 ptr_md->u.sample_md.flags = cli->sample_flags;
498
499 /* Place the PRFCNT_SAMPLE_META_TYPE_CLOCK optionally as the 2nd */
500 ptr_md++;
501 if (clk_cnt > MAX_REPORTED_DOMAINS)
502 clk_cnt = MAX_REPORTED_DOMAINS;
503
504 /* Handle the prfcnt_clock_metadata meta item */
505 ptr_md->hdr.item_type = PRFCNT_SAMPLE_META_TYPE_CLOCK;
506 ptr_md->hdr.item_version = PRFCNT_READER_API_VERSION;
507 ptr_md->u.clock_md.num_domains = clk_cnt;
508 for (i = 0; i < clk_cnt; i++)
509 ptr_md->u.clock_md.cycles[i] = dump_buf->clk_cnt_buf[i];
510
511 /* Dealing with counter blocks */
512 ptr_md++;
513 if (WARN_ON(kbasep_kinstr_prfcnt_set_block_meta_items(&cli->enable_map, dump_buf, &ptr_md,
514 cli->sample_arr.user_buf,
515 cli->config.counter_set)))
516 return;
517
518 /* Handle the last sentinel item */
519 ptr_md->hdr.item_type = FLEX_LIST_TYPE_NONE;
520 ptr_md->hdr.item_version = 0;
521 }
522
523 /**
524 * kbasep_kinstr_prfcnt_client_output_sample() - Assemble a sample for output.
525 * @cli: Non-NULL pointer to a kinstr_prfcnt client.
526 * @buf_idx: The index to the sample array for saving the sample.
527 * @user_data: User data to return to the user.
528 * @ts_start_ns: Time stamp for the start point of the sample dump.
529 * @ts_end_ns: Time stamp for the end point of the sample dump.
530 */
kbasep_kinstr_prfcnt_client_output_sample(struct kbase_kinstr_prfcnt_client * cli,unsigned int buf_idx,u64 user_data,u64 ts_start_ns,u64 ts_end_ns)531 static void kbasep_kinstr_prfcnt_client_output_sample(
532 struct kbase_kinstr_prfcnt_client *cli, unsigned int buf_idx,
533 u64 user_data, u64 ts_start_ns, u64 ts_end_ns)
534 {
535 struct kbase_hwcnt_dump_buffer *dump_buf;
536 struct kbase_hwcnt_dump_buffer *tmp_buf = &cli->tmp_buf;
537 struct prfcnt_metadata *ptr_md;
538
539 if (WARN_ON(buf_idx >= cli->sample_arr.sample_count))
540 return;
541
542 dump_buf = &cli->sample_arr.samples[buf_idx].dump_buf;
543 ptr_md = cli->sample_arr.samples[buf_idx].sample_meta;
544
545 /* Patch the dump buf headers, to hide the counters that other hwcnt
546 * clients are using.
547 */
548 kbase_hwcnt_gpu_patch_dump_headers(tmp_buf, &cli->enable_map);
549
550 /* Copy the temp buffer to the userspace visible buffer. The strict
551 * variant will explicitly zero any non-enabled counters to ensure
552 * nothing except exactly what the user asked for is made visible.
553 */
554 kbase_hwcnt_dump_buffer_copy_strict(dump_buf, tmp_buf,
555 &cli->enable_map);
556
557 /* PRFCNT_SAMPLE_META_TYPE_SAMPLE must be the first item.
558 * Set timestamp and user data for real dump.
559 */
560 ptr_md->u.sample_md.timestamp_start = ts_start_ns;
561 ptr_md->u.sample_md.timestamp_end = ts_end_ns;
562 ptr_md->u.sample_md.user_data = user_data;
563
564 kbasep_kinstr_prfcnt_set_sample_metadata(cli, dump_buf, ptr_md);
565 }
566
567 /**
568 * kbasep_kinstr_prfcnt_client_dump() - Perform a dump for a client.
569 * @cli: Non-NULL pointer to a kinstr_prfcnt client.
570 * @event_id: Event type that triggered the dump.
571 * @user_data: User data to return to the user.
572 *
573 * Return: 0 on success, else error code.
574 */
kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client * cli,enum base_hwcnt_reader_event event_id,u64 user_data)575 static int kbasep_kinstr_prfcnt_client_dump(struct kbase_kinstr_prfcnt_client *cli,
576 enum base_hwcnt_reader_event event_id, u64 user_data)
577 {
578 int ret;
579 u64 ts_start_ns = 0;
580 u64 ts_end_ns = 0;
581 unsigned int write_idx;
582 unsigned int read_idx;
583 size_t available_samples_count;
584
585 WARN_ON(!cli);
586 lockdep_assert_held(&cli->kinstr_ctx->lock);
587
588 write_idx = atomic_read(&cli->write_idx);
589 read_idx = atomic_read(&cli->read_idx);
590
591 /* Check if there is a place to copy HWC block into. Calculate the
592 * number of available samples count, by taking into account the type
593 * of dump.
594 */
595 available_samples_count = cli->sample_arr.sample_count;
596 WARN_ON(available_samples_count < 1);
597 /* Reserve one slot to store the implicit sample taken on CMD_STOP */
598 available_samples_count -= 1;
599 if (write_idx - read_idx == available_samples_count) {
600 /* For periodic sampling, the current active dump
601 * will be accumulated in the next sample, when
602 * a buffer becomes available.
603 */
604 if (event_id == BASE_HWCNT_READER_EVENT_PERIODIC)
605 cli->sample_flags |= SAMPLE_FLAG_OVERFLOW;
606 return -EBUSY;
607 }
608
609 /* For the rest of the function, use the actual sample_count
610 * that represents the real size of the array.
611 */
612 write_idx %= cli->sample_arr.sample_count;
613
614 ret = kbase_hwcnt_virtualizer_client_dump(cli->hvcli, &ts_start_ns, &ts_end_ns,
615 &cli->tmp_buf);
616 /* HWC dump error, set the sample with error flag */
617 if (ret)
618 cli->sample_flags |= SAMPLE_FLAG_ERROR;
619
620 /* Make the sample ready and copy it to the userspace mapped buffer */
621 kbasep_kinstr_prfcnt_client_output_sample(cli, write_idx, user_data, ts_start_ns,
622 ts_end_ns);
623
624 /* Notify client. Make sure all changes to memory are visible. */
625 wmb();
626 atomic_inc(&cli->write_idx);
627 wake_up_interruptible(&cli->waitq);
628 /* Reset the flags for the next sample dump */
629 cli->sample_flags = 0;
630
631 return 0;
632 }
633
634 static int
kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client * cli,u64 user_data)635 kbasep_kinstr_prfcnt_client_start(struct kbase_kinstr_prfcnt_client *cli,
636 u64 user_data)
637 {
638 int ret;
639 u64 tm_start, tm_end;
640 unsigned int write_idx;
641 unsigned int read_idx;
642 size_t available_samples_count;
643
644 WARN_ON(!cli);
645 lockdep_assert_held(&cli->cmd_sync_lock);
646
647 /* If the client is already started, the command is a no-op */
648 if (cli->active)
649 return 0;
650
651 write_idx = atomic_read(&cli->write_idx);
652 read_idx = atomic_read(&cli->read_idx);
653
654 /* Check whether there is space to store atleast an implicit sample
655 * corresponding to CMD_STOP.
656 */
657 available_samples_count = cli->sample_count - (write_idx - read_idx);
658 if (!available_samples_count)
659 return -EBUSY;
660
661 kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
662 &cli->config.phys_em);
663
664 /* Enable all the available clk_enable_map. */
665 cli->enable_map.clk_enable_map = (1ull << cli->kinstr_ctx->metadata->clk_cnt) - 1;
666
667 mutex_lock(&cli->kinstr_ctx->lock);
668 /* Enable HWC from the configuration of the client creation */
669 ret = kbase_hwcnt_virtualizer_client_set_counters(
670 cli->hvcli, &cli->enable_map, &tm_start, &tm_end, NULL);
671
672 if (!ret) {
673 cli->active = true;
674 cli->user_data = user_data;
675 cli->sample_flags = 0;
676
677 if (cli->dump_interval_ns)
678 kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx);
679 }
680
681 mutex_unlock(&cli->kinstr_ctx->lock);
682
683 return ret;
684 }
685
686 static int
kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client * cli,u64 user_data)687 kbasep_kinstr_prfcnt_client_stop(struct kbase_kinstr_prfcnt_client *cli,
688 u64 user_data)
689 {
690 int ret;
691 u64 tm_start = 0;
692 u64 tm_end = 0;
693 struct kbase_hwcnt_physical_enable_map phys_em;
694 size_t available_samples_count;
695 unsigned int write_idx;
696 unsigned int read_idx;
697
698 WARN_ON(!cli);
699 lockdep_assert_held(&cli->cmd_sync_lock);
700
701 /* If the client is not started, the command is invalid */
702 if (!cli->active)
703 return -EINVAL;
704
705 mutex_lock(&cli->kinstr_ctx->lock);
706
707 /* Disable counters under the lock, so we do not race with the
708 * sampling thread.
709 */
710 phys_em.fe_bm = 0;
711 phys_em.tiler_bm = 0;
712 phys_em.mmu_l2_bm = 0;
713 phys_em.shader_bm = 0;
714
715 kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map, &phys_em);
716
717 /* Check whether one has the buffer to hold the last sample */
718 write_idx = atomic_read(&cli->write_idx);
719 read_idx = atomic_read(&cli->read_idx);
720
721 available_samples_count = cli->sample_count - (write_idx - read_idx);
722
723 ret = kbase_hwcnt_virtualizer_client_set_counters(cli->hvcli,
724 &cli->enable_map,
725 &tm_start, &tm_end,
726 &cli->tmp_buf);
727 /* If the last stop sample is in error, set the sample flag */
728 if (ret)
729 cli->sample_flags |= SAMPLE_FLAG_ERROR;
730
731 /* There must be a place to save the last stop produced sample */
732 if (!WARN_ON(!available_samples_count)) {
733 write_idx %= cli->sample_arr.sample_count;
734 /* Handle the last stop sample */
735 kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
736 &cli->config.phys_em);
737 /* As this is a stop sample, mark it as MANUAL */
738 kbasep_kinstr_prfcnt_client_output_sample(
739 cli, write_idx, user_data, tm_start, tm_end);
740 /* Notify client. Make sure all changes to memory are visible. */
741 wmb();
742 atomic_inc(&cli->write_idx);
743 wake_up_interruptible(&cli->waitq);
744 }
745
746 cli->active = false;
747 cli->user_data = user_data;
748
749 if (cli->dump_interval_ns)
750 kbasep_kinstr_prfcnt_reschedule_worker(cli->kinstr_ctx);
751
752 mutex_unlock(&cli->kinstr_ctx->lock);
753
754 return 0;
755 }
756
757 static int
kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client * cli,u64 user_data)758 kbasep_kinstr_prfcnt_client_sync_dump(struct kbase_kinstr_prfcnt_client *cli,
759 u64 user_data)
760 {
761 int ret;
762
763 lockdep_assert_held(&cli->cmd_sync_lock);
764
765 /* If the client is not started, or not manual, the command invalid */
766 if (!cli->active || cli->dump_interval_ns)
767 return -EINVAL;
768
769 mutex_lock(&cli->kinstr_ctx->lock);
770
771 ret = kbasep_kinstr_prfcnt_client_dump(cli, BASE_HWCNT_READER_EVENT_MANUAL, user_data);
772
773 mutex_unlock(&cli->kinstr_ctx->lock);
774
775 return ret;
776 }
777
778 static int
kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client * cli)779 kbasep_kinstr_prfcnt_client_discard(struct kbase_kinstr_prfcnt_client *cli)
780 {
781 unsigned int write_idx;
782
783 WARN_ON(!cli);
784 lockdep_assert_held(&cli->cmd_sync_lock);
785
786 mutex_lock(&cli->kinstr_ctx->lock);
787
788 write_idx = atomic_read(&cli->write_idx);
789
790 /* Discard (clear) all internally buffered samples. Note, if there
791 * is a fetched sample in flight, one should not touch the read index,
792 * leaving it alone for the put-sample operation to update it. The
793 * consistency between the read_idx and the fetch_idx is coordinated by
794 * holding the cli->cmd_sync_lock.
795 */
796 if (atomic_read(&cli->fetch_idx) != atomic_read(&cli->read_idx)) {
797 atomic_set(&cli->fetch_idx, write_idx);
798 } else {
799 atomic_set(&cli->fetch_idx, write_idx);
800 atomic_set(&cli->read_idx, write_idx);
801 }
802
803 mutex_unlock(&cli->kinstr_ctx->lock);
804
805 return 0;
806 }
807
kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client * cli,struct prfcnt_control_cmd * control_cmd)808 int kbasep_kinstr_prfcnt_cmd(struct kbase_kinstr_prfcnt_client *cli,
809 struct prfcnt_control_cmd *control_cmd)
810 {
811 int ret = 0;
812
813 mutex_lock(&cli->cmd_sync_lock);
814
815 switch (control_cmd->cmd) {
816 case PRFCNT_CONTROL_CMD_START:
817 ret = kbasep_kinstr_prfcnt_client_start(cli,
818 control_cmd->user_data);
819 break;
820 case PRFCNT_CONTROL_CMD_STOP:
821 ret = kbasep_kinstr_prfcnt_client_stop(cli,
822 control_cmd->user_data);
823 break;
824 case PRFCNT_CONTROL_CMD_SAMPLE_SYNC:
825 ret = kbasep_kinstr_prfcnt_client_sync_dump(
826 cli, control_cmd->user_data);
827 break;
828 case PRFCNT_CONTROL_CMD_DISCARD:
829 ret = kbasep_kinstr_prfcnt_client_discard(cli);
830 break;
831 default:
832 ret = -EINVAL;
833 break;
834 }
835
836 mutex_unlock(&cli->cmd_sync_lock);
837
838 return ret;
839 }
840
841 static int
kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client * cli,struct prfcnt_sample_access * sample_access)842 kbasep_kinstr_prfcnt_get_sample(struct kbase_kinstr_prfcnt_client *cli,
843 struct prfcnt_sample_access *sample_access)
844 {
845 unsigned int write_idx;
846 unsigned int read_idx;
847 unsigned int fetch_idx;
848 u64 sample_offset_bytes;
849 struct prfcnt_metadata *sample_meta;
850 int err = 0;
851
852 mutex_lock(&cli->cmd_sync_lock);
853 write_idx = atomic_read(&cli->write_idx);
854 read_idx = atomic_read(&cli->read_idx);
855
856 if (write_idx == read_idx) {
857 err = -EINVAL;
858 goto error_out;
859 }
860
861 /* If the client interface has already had a sample been fetched,
862 * reflected by the fetch index not equal to read_idx, i.e., typically
863 * read_idx + 1 == fetch_idx,
864 * further fetch is not allowed until the previously fetched buffer
865 * is put back (which brings the read_idx == fetch_idx). As a design,
866 * the above add one equal condition (i.e. typical cases) may only be
867 * untrue if there had been an interface operation on sample discard,
868 * after the sample in question already been fetched, in which case,
869 * the fetch_idx could have a delta larger than 1 relative to the
870 * read_idx.
871 */
872 fetch_idx = atomic_read(&cli->fetch_idx);
873 if (read_idx != fetch_idx) {
874 err = -EBUSY;
875 goto error_out;
876 }
877
878 read_idx %= cli->sample_arr.sample_count;
879 sample_meta = cli->sample_arr.samples[read_idx].sample_meta;
880 sample_offset_bytes = (u8 *)sample_meta - cli->sample_arr.user_buf;
881
882 sample_access->sequence = sample_meta->u.sample_md.seq;
883 sample_access->sample_offset_bytes = sample_offset_bytes;
884
885 /* Marking a sample has been fetched by advancing the fetch index */
886 atomic_inc(&cli->fetch_idx);
887
888 error_out:
889 mutex_unlock(&cli->cmd_sync_lock);
890 return err;
891 }
892
893 static int
kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client * cli,struct prfcnt_sample_access * sample_access)894 kbasep_kinstr_prfcnt_put_sample(struct kbase_kinstr_prfcnt_client *cli,
895 struct prfcnt_sample_access *sample_access)
896 {
897 unsigned int write_idx;
898 unsigned int read_idx;
899 unsigned int fetch_idx;
900 u64 sample_offset_bytes;
901 int err = 0;
902
903 mutex_lock(&cli->cmd_sync_lock);
904 write_idx = atomic_read(&cli->write_idx);
905 read_idx = atomic_read(&cli->read_idx);
906
907 if (write_idx == read_idx || sample_access->sequence != read_idx) {
908 err = -EINVAL;
909 goto error_out;
910 }
911
912 read_idx %= cli->sample_arr.sample_count;
913 sample_offset_bytes =
914 (u8 *)cli->sample_arr.samples[read_idx].sample_meta - cli->sample_arr.user_buf;
915
916 if (sample_access->sample_offset_bytes != sample_offset_bytes) {
917 err = -EINVAL;
918 goto error_out;
919 }
920
921 fetch_idx = atomic_read(&cli->fetch_idx);
922 WARN_ON(read_idx == fetch_idx);
923 /* Setting the read_idx matching the fetch_idx, signals no in-flight
924 * fetched sample.
925 */
926 atomic_set(&cli->read_idx, fetch_idx);
927
928 error_out:
929 mutex_unlock(&cli->cmd_sync_lock);
930 return err;
931 }
932
933 /**
934 * kbasep_kinstr_prfcnt_hwcnt_reader_ioctl() - hwcnt reader's ioctl.
935 * @filp: Non-NULL pointer to file structure.
936 * @cmd: User command.
937 * @arg: Command's argument.
938 *
939 * Return: 0 on success, else error code.
940 */
kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file * filp,unsigned int cmd,unsigned long arg)941 static long kbasep_kinstr_prfcnt_hwcnt_reader_ioctl(struct file *filp,
942 unsigned int cmd,
943 unsigned long arg)
944 {
945 long rcode = 0;
946 struct kbase_kinstr_prfcnt_client *cli;
947 void __user *uarg = (void __user *)arg;
948
949 if (!filp)
950 return -EINVAL;
951
952 cli = filp->private_data;
953
954 if (!cli)
955 return -EINVAL;
956
957 switch (_IOC_NR(cmd)) {
958 case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_CMD): {
959 struct prfcnt_control_cmd control_cmd;
960 int err;
961
962 err = copy_from_user(&control_cmd, uarg, sizeof(control_cmd));
963 if (err)
964 return -EFAULT;
965 rcode = kbasep_kinstr_prfcnt_cmd(cli, &control_cmd);
966 } break;
967 case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_GET_SAMPLE): {
968 struct prfcnt_sample_access sample_access;
969 int err;
970
971 memset(&sample_access, 0, sizeof(sample_access));
972 rcode = kbasep_kinstr_prfcnt_get_sample(cli, &sample_access);
973 err = copy_to_user(uarg, &sample_access, sizeof(sample_access));
974 if (err)
975 return -EFAULT;
976 } break;
977 case _IOC_NR(KBASE_IOCTL_KINSTR_PRFCNT_PUT_SAMPLE): {
978 struct prfcnt_sample_access sample_access;
979 int err;
980
981 err = copy_from_user(&sample_access, uarg,
982 sizeof(sample_access));
983 if (err)
984 return -EFAULT;
985 rcode = kbasep_kinstr_prfcnt_put_sample(cli, &sample_access);
986 } break;
987 default:
988 rcode = -EINVAL;
989 break;
990 }
991
992 return rcode;
993 }
994
995 /**
996 * kbasep_kinstr_prfcnt_hwcnt_reader_mmap() - hwcnt reader's mmap.
997 * @filp: Non-NULL pointer to file structure.
998 * @vma: Non-NULL pointer to vma structure.
999 *
1000 * Return: 0 on success, else error code.
1001 */
kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file * filp,struct vm_area_struct * vma)1002 static int kbasep_kinstr_prfcnt_hwcnt_reader_mmap(struct file *filp,
1003 struct vm_area_struct *vma)
1004 {
1005 struct kbase_kinstr_prfcnt_client *cli;
1006
1007 if (!filp || !vma)
1008 return -EINVAL;
1009
1010 cli = filp->private_data;
1011 if (!cli)
1012 return -EINVAL;
1013
1014 return remap_vmalloc_range(vma, cli->sample_arr.user_buf, 0);
1015 }
1016
kbasep_kinstr_prfcnt_sample_array_free(struct kbase_kinstr_prfcnt_sample_array * sample_arr)1017 static void kbasep_kinstr_prfcnt_sample_array_free(
1018 struct kbase_kinstr_prfcnt_sample_array *sample_arr)
1019 {
1020 if (!sample_arr)
1021 return;
1022
1023 kfree(sample_arr->samples);
1024 vfree(sample_arr->user_buf);
1025 memset(sample_arr, 0, sizeof(*sample_arr));
1026 }
1027
1028 static void
kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client * cli,enum kbase_kinstr_prfcnt_client_init_state init_state)1029 kbasep_kinstr_prfcnt_client_destroy_partial(struct kbase_kinstr_prfcnt_client *cli,
1030 enum kbase_kinstr_prfcnt_client_init_state init_state)
1031 {
1032 if (!cli)
1033 return;
1034
1035 while (init_state-- > KINSTR_PRFCNT_UNINITIALISED) {
1036 switch (init_state) {
1037 case KINSTR_PRFCNT_INITIALISED:
1038 /* This shouldn't be reached */
1039 break;
1040 case KINSTR_PRFCNT_WAITQ_MUTEX:
1041 mutex_destroy(&cli->cmd_sync_lock);
1042 break;
1043 case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
1044 kbase_hwcnt_virtualizer_client_destroy(cli->hvcli);
1045 break;
1046 case KINSTR_PRFCNT_SAMPLE_ARRAY:
1047 kbasep_kinstr_prfcnt_sample_array_free(&cli->sample_arr);
1048 break;
1049 case KINSTR_PRFCNT_DUMP_BUFFER:
1050 kbase_hwcnt_dump_buffer_free(&cli->tmp_buf);
1051 break;
1052 case KINSTR_PRFCNT_ENABLE_MAP:
1053 kbase_hwcnt_enable_map_free(&cli->enable_map);
1054 break;
1055 case KINSTR_PRFCNT_PARSE_SETUP:
1056 /* Nothing to do here */
1057 break;
1058 }
1059 }
1060 kfree(cli);
1061 }
1062
kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client * cli)1063 void kbasep_kinstr_prfcnt_client_destroy(struct kbase_kinstr_prfcnt_client *cli)
1064 {
1065 kbasep_kinstr_prfcnt_client_destroy_partial(cli, KINSTR_PRFCNT_INITIALISED);
1066 }
1067
1068 /**
1069 * kbasep_kinstr_prfcnt_hwcnt_reader_release() - hwcnt reader's release.
1070 * @inode: Non-NULL pointer to inode structure.
1071 * @filp: Non-NULL pointer to file structure.
1072 *
1073 * Return: 0 always.
1074 */
kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode * inode,struct file * filp)1075 static int kbasep_kinstr_prfcnt_hwcnt_reader_release(struct inode *inode,
1076 struct file *filp)
1077 {
1078 struct kbase_kinstr_prfcnt_client *cli = filp->private_data;
1079
1080 mutex_lock(&cli->kinstr_ctx->lock);
1081
1082 WARN_ON(cli->kinstr_ctx->client_count == 0);
1083 if (cli->kinstr_ctx->client_count > 0)
1084 cli->kinstr_ctx->client_count--;
1085 list_del(&cli->node);
1086
1087 mutex_unlock(&cli->kinstr_ctx->lock);
1088
1089 kbasep_kinstr_prfcnt_client_destroy(cli);
1090
1091 return 0;
1092 }
1093
1094 /* kinstr_prfcnt client file operations */
1095 static const struct file_operations kinstr_prfcnt_client_fops = {
1096 .owner = THIS_MODULE,
1097 .poll = kbasep_kinstr_prfcnt_hwcnt_reader_poll,
1098 .unlocked_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl,
1099 .compat_ioctl = kbasep_kinstr_prfcnt_hwcnt_reader_ioctl,
1100 .mmap = kbasep_kinstr_prfcnt_hwcnt_reader_mmap,
1101 .release = kbasep_kinstr_prfcnt_hwcnt_reader_release,
1102 };
1103
kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata * metadata,struct kbase_hwcnt_enable_map * enable_map)1104 size_t kbasep_kinstr_prfcnt_get_sample_md_count(const struct kbase_hwcnt_metadata *metadata,
1105 struct kbase_hwcnt_enable_map *enable_map)
1106 {
1107 size_t grp, blk, blk_inst;
1108 size_t md_count = 0;
1109
1110 if (!metadata)
1111 return 0;
1112
1113 kbase_hwcnt_metadata_for_each_block(metadata, grp, blk, blk_inst) {
1114 /* Skip unavailable, non-enabled or reserved blocks */
1115 if (kbase_kinstr_is_block_type_reserved(metadata, grp, blk) ||
1116 !kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk, blk_inst) ||
1117 !kbase_hwcnt_enable_map_block_enabled(enable_map, grp, blk, blk_inst))
1118 continue;
1119
1120 md_count++;
1121 }
1122
1123 /* add counts for clock_meta and sample meta, respectively */
1124 md_count += 2;
1125
1126 /* Reserve one for last sentinel item. */
1127 md_count++;
1128
1129 return md_count;
1130 }
1131
kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_client * cli,const struct kbase_hwcnt_metadata * metadata)1132 static size_t kbasep_kinstr_prfcnt_get_sample_size(struct kbase_kinstr_prfcnt_client *cli,
1133 const struct kbase_hwcnt_metadata *metadata)
1134 {
1135 size_t dump_buf_bytes;
1136 size_t clk_cnt_buf_bytes;
1137 size_t sample_meta_bytes;
1138 struct kbase_hwcnt_dump_buffer *dump_buf = &cli->tmp_buf;
1139 size_t md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map);
1140
1141 if (!metadata)
1142 return 0;
1143
1144 sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count;
1145 dump_buf_bytes = metadata->dump_buf_bytes;
1146 clk_cnt_buf_bytes = sizeof(*dump_buf->clk_cnt_buf) * metadata->clk_cnt;
1147
1148 return (sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes);
1149 }
1150
1151 /**
1152 * kbasep_kinstr_prfcnt_dump_worker()- Dump worker, that dumps all periodic
1153 * clients that need to be dumped, then
1154 * reschedules itself.
1155 * @work: Work structure.
1156 */
kbasep_kinstr_prfcnt_dump_worker(struct work_struct * work)1157 static void kbasep_kinstr_prfcnt_dump_worker(struct work_struct *work)
1158 {
1159 struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of(
1160 work, struct kbase_kinstr_prfcnt_context, dump_work);
1161 struct kbase_kinstr_prfcnt_client *pos;
1162 u64 cur_time_ns;
1163
1164 mutex_lock(&kinstr_ctx->lock);
1165
1166 cur_time_ns = kbasep_kinstr_prfcnt_timestamp_ns();
1167
1168 list_for_each_entry(pos, &kinstr_ctx->clients, node) {
1169 if (pos->active && (pos->next_dump_time_ns != 0) &&
1170 (pos->next_dump_time_ns < cur_time_ns))
1171 kbasep_kinstr_prfcnt_client_dump(pos, BASE_HWCNT_READER_EVENT_PERIODIC,
1172 pos->user_data);
1173 }
1174
1175 kbasep_kinstr_prfcnt_reschedule_worker(kinstr_ctx);
1176
1177 mutex_unlock(&kinstr_ctx->lock);
1178 }
1179
1180 /**
1181 * kbasep_kinstr_prfcnt_dump_timer() - Dump timer that schedules the dump worker for
1182 * execution as soon as possible.
1183 * @timer: Timer structure.
1184 *
1185 * Return: HRTIMER_NORESTART always.
1186 */
1187 static enum hrtimer_restart
kbasep_kinstr_prfcnt_dump_timer(struct hrtimer * timer)1188 kbasep_kinstr_prfcnt_dump_timer(struct hrtimer *timer)
1189 {
1190 struct kbase_kinstr_prfcnt_context *kinstr_ctx = container_of(
1191 timer, struct kbase_kinstr_prfcnt_context, dump_timer);
1192
1193 /* We don't need to check kinstr_ctx->suspend_count here.
1194 * Suspend and resume functions already ensure that the worker
1195 * is cancelled when the driver is suspended, and resumed when
1196 * the suspend_count reaches 0.
1197 */
1198 kbase_hwcnt_virtualizer_queue_work(kinstr_ctx->hvirt,
1199 &kinstr_ctx->dump_work);
1200
1201 return HRTIMER_NORESTART;
1202 }
1203
kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer * hvirt,struct kbase_kinstr_prfcnt_context ** out_kinstr_ctx)1204 int kbase_kinstr_prfcnt_init(struct kbase_hwcnt_virtualizer *hvirt,
1205 struct kbase_kinstr_prfcnt_context **out_kinstr_ctx)
1206 {
1207 struct kbase_kinstr_prfcnt_context *kinstr_ctx;
1208 const struct kbase_hwcnt_metadata *metadata;
1209
1210 if (!hvirt || !out_kinstr_ctx)
1211 return -EINVAL;
1212
1213 metadata = kbase_hwcnt_virtualizer_metadata(hvirt);
1214
1215 if (!metadata)
1216 return -EINVAL;
1217
1218 kinstr_ctx = kzalloc(sizeof(*kinstr_ctx), GFP_KERNEL);
1219
1220 if (!kinstr_ctx)
1221 return -ENOMEM;
1222
1223 kinstr_ctx->hvirt = hvirt;
1224 kinstr_ctx->metadata = metadata;
1225
1226 mutex_init(&kinstr_ctx->lock);
1227 INIT_LIST_HEAD(&kinstr_ctx->clients);
1228 hrtimer_init(&kinstr_ctx->dump_timer, CLOCK_MONOTONIC,
1229 HRTIMER_MODE_REL);
1230 kinstr_ctx->dump_timer.function = kbasep_kinstr_prfcnt_dump_timer;
1231 INIT_WORK(&kinstr_ctx->dump_work, kbasep_kinstr_prfcnt_dump_worker);
1232
1233 *out_kinstr_ctx = kinstr_ctx;
1234 return 0;
1235 }
1236
kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context * kinstr_ctx)1237 void kbase_kinstr_prfcnt_term(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
1238 {
1239 if (!kinstr_ctx)
1240 return;
1241
1242 /* Non-zero client count implies client leak */
1243 if (WARN_ON(kinstr_ctx->client_count > 0)) {
1244 struct kbase_kinstr_prfcnt_client *pos, *n;
1245
1246 list_for_each_entry_safe (pos, n, &kinstr_ctx->clients, node) {
1247 list_del(&pos->node);
1248 kinstr_ctx->client_count--;
1249 kbasep_kinstr_prfcnt_client_destroy(pos);
1250 }
1251 }
1252
1253 cancel_work_sync(&kinstr_ctx->dump_work);
1254
1255 WARN_ON(kinstr_ctx->client_count > 0);
1256 kfree(kinstr_ctx);
1257 }
1258
kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context * kinstr_ctx)1259 void kbase_kinstr_prfcnt_suspend(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
1260 {
1261 if (WARN_ON(!kinstr_ctx))
1262 return;
1263
1264 mutex_lock(&kinstr_ctx->lock);
1265
1266 if (!WARN_ON(kinstr_ctx->suspend_count == SIZE_MAX))
1267 kinstr_ctx->suspend_count++;
1268
1269 mutex_unlock(&kinstr_ctx->lock);
1270
1271 /* Always sync cancel the timer and then the worker, regardless of the
1272 * new suspend count.
1273 *
1274 * This ensures concurrent calls to kbase_kinstr_prfcnt_suspend() always block
1275 * until kinstr_prfcnt is fully suspended.
1276 *
1277 * The timer is canceled before the worker, as the timer
1278 * unconditionally re-enqueues the worker, but the worker checks the
1279 * suspend_count that we just incremented before rescheduling the timer.
1280 *
1281 * Therefore if we cancel the worker first, the timer might re-enqueue
1282 * the worker before we cancel the timer, but the opposite is not
1283 * possible.
1284 */
1285 hrtimer_cancel(&kinstr_ctx->dump_timer);
1286 cancel_work_sync(&kinstr_ctx->dump_work);
1287 }
1288
kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context * kinstr_ctx)1289 void kbase_kinstr_prfcnt_resume(struct kbase_kinstr_prfcnt_context *kinstr_ctx)
1290 {
1291 if (WARN_ON(!kinstr_ctx))
1292 return;
1293
1294 mutex_lock(&kinstr_ctx->lock);
1295
1296 if (!WARN_ON(kinstr_ctx->suspend_count == 0)) {
1297 kinstr_ctx->suspend_count--;
1298
1299 /* Last resume, so re-enqueue the worker if we have any periodic
1300 * clients.
1301 */
1302 if (kinstr_ctx->suspend_count == 0) {
1303 struct kbase_kinstr_prfcnt_client *pos;
1304 bool has_periodic_clients = false;
1305
1306 list_for_each_entry (pos, &kinstr_ctx->clients, node) {
1307 if (pos->dump_interval_ns != 0) {
1308 has_periodic_clients = true;
1309 break;
1310 }
1311 }
1312
1313 if (has_periodic_clients)
1314 kbase_hwcnt_virtualizer_queue_work(
1315 kinstr_ctx->hvirt,
1316 &kinstr_ctx->dump_work);
1317 }
1318 }
1319
1320 mutex_unlock(&kinstr_ctx->lock);
1321 }
1322
kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_client * cli,const struct kbase_hwcnt_metadata * metadata)1323 static int kbasep_kinstr_prfcnt_sample_array_alloc(struct kbase_kinstr_prfcnt_client *cli,
1324 const struct kbase_hwcnt_metadata *metadata)
1325 {
1326 struct kbase_kinstr_prfcnt_sample_array *sample_arr = &cli->sample_arr;
1327 struct kbase_kinstr_prfcnt_sample *samples;
1328 size_t sample_idx;
1329 size_t dump_buf_bytes;
1330 size_t clk_cnt_buf_bytes;
1331 size_t sample_meta_bytes;
1332 size_t md_count;
1333 size_t sample_size;
1334 size_t buffer_count = cli->config.buffer_count;
1335
1336 if (!metadata || !sample_arr)
1337 return -EINVAL;
1338
1339 md_count = kbasep_kinstr_prfcnt_get_sample_md_count(metadata, &cli->enable_map);
1340 sample_meta_bytes = sizeof(struct prfcnt_metadata) * md_count;
1341 dump_buf_bytes = metadata->dump_buf_bytes;
1342 clk_cnt_buf_bytes =
1343 sizeof(*samples->dump_buf.clk_cnt_buf) * metadata->clk_cnt;
1344 sample_size = sample_meta_bytes + dump_buf_bytes + clk_cnt_buf_bytes;
1345
1346 samples = kmalloc_array(buffer_count, sizeof(*samples), GFP_KERNEL);
1347
1348 if (!samples)
1349 return -ENOMEM;
1350
1351 sample_arr->user_buf = vmalloc_user(sample_size * buffer_count);
1352
1353 if (!sample_arr->user_buf) {
1354 kfree(samples);
1355 return -ENOMEM;
1356 }
1357
1358 sample_arr->sample_count = buffer_count;
1359 sample_arr->samples = samples;
1360
1361 for (sample_idx = 0; sample_idx < buffer_count; sample_idx++) {
1362 const size_t sample_meta_offset = sample_size * sample_idx;
1363 const size_t dump_buf_offset =
1364 sample_meta_offset + sample_meta_bytes;
1365 const size_t clk_cnt_buf_offset =
1366 dump_buf_offset + dump_buf_bytes;
1367
1368 /* Internal layout in a sample buffer: [sample metadata, dump_buf, clk_cnt_buf]. */
1369 samples[sample_idx].dump_buf.metadata = metadata;
1370 samples[sample_idx].sample_meta =
1371 (struct prfcnt_metadata *)(sample_arr->user_buf + sample_meta_offset);
1372 samples[sample_idx].dump_buf.dump_buf =
1373 (u64 *)(sample_arr->user_buf + dump_buf_offset);
1374 samples[sample_idx].dump_buf.clk_cnt_buf =
1375 (u64 *)(sample_arr->user_buf + clk_cnt_buf_offset);
1376 }
1377
1378 return 0;
1379 }
1380
prfcnt_mode_supported(u8 mode)1381 static bool prfcnt_mode_supported(u8 mode)
1382 {
1383 return (mode == PRFCNT_MODE_MANUAL) || (mode == PRFCNT_MODE_PERIODIC);
1384 }
1385
1386 static void
kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t * phys_em,const uint64_t * enable_mask)1387 kbasep_kinstr_prfcnt_block_enable_to_physical(uint32_t *phys_em,
1388 const uint64_t *enable_mask)
1389 {
1390 *phys_em |= kbase_hwcnt_backend_gpu_block_map_to_physical(
1391 enable_mask[0], enable_mask[1]);
1392 }
1393
1394 /**
1395 * kbasep_kinstr_prfcnt_parse_request_enable - Parse an enable request
1396 * @req_enable: Performance counters enable request to parse.
1397 * @config: Client object the session configuration should be written to.
1398 *
1399 * This function parses a performance counters enable request.
1400 * This type of request specifies a bitmask of HW counters to enable
1401 * for one performance counters block type. In addition to that,
1402 * a performance counters enable request may also set "global"
1403 * configuration properties that affect the whole session, like the
1404 * performance counters set, which shall be compatible with the same value
1405 * set by other performance request items.
1406 *
1407 * Return: 0 on success, else error code.
1408 */
kbasep_kinstr_prfcnt_parse_request_enable(const struct prfcnt_request_enable * req_enable,struct kbase_kinstr_prfcnt_client_config * config)1409 static int kbasep_kinstr_prfcnt_parse_request_enable(
1410 const struct prfcnt_request_enable *req_enable,
1411 struct kbase_kinstr_prfcnt_client_config *config)
1412 {
1413 int err = 0;
1414 u8 req_set = KBASE_HWCNT_SET_UNDEFINED, default_set;
1415
1416 switch (req_enable->set) {
1417 case PRFCNT_SET_PRIMARY:
1418 req_set = KBASE_HWCNT_SET_PRIMARY;
1419 break;
1420 case PRFCNT_SET_SECONDARY:
1421 req_set = KBASE_HWCNT_SET_SECONDARY;
1422 break;
1423 case PRFCNT_SET_TERTIARY:
1424 req_set = KBASE_HWCNT_SET_TERTIARY;
1425 break;
1426 default:
1427 err = -EINVAL;
1428 break;
1429 }
1430
1431 /* The performance counter set is a "global" property that affects
1432 * the whole session. Either this is the first request that sets
1433 * the value, or it shall be identical to all previous requests.
1434 */
1435 if (!err) {
1436 if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED)
1437 config->counter_set = req_set;
1438 else if (config->counter_set != req_set)
1439 err = -EINVAL;
1440 }
1441
1442 /* Temporarily, the requested set cannot be different from the default
1443 * set because it's the only one to be supported. This will change in
1444 * the future.
1445 */
1446 #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
1447 default_set = KBASE_HWCNT_SET_SECONDARY;
1448 #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
1449 default_set = KBASE_HWCNT_SET_TERTIARY;
1450 #else
1451 /* Default to primary */
1452 default_set = KBASE_HWCNT_SET_PRIMARY;
1453 #endif
1454
1455 if (req_set != default_set)
1456 err = -EINVAL;
1457
1458 if (err < 0)
1459 return err;
1460
1461 /* Enable the performance counters based on the bitmask provided
1462 * by the user space client.
1463 * It is possible to receive multiple requests for the same counter
1464 * block, in which case the bitmask will be a logical OR of all the
1465 * bitmasks given by the client.
1466 */
1467 switch (req_enable->block_type) {
1468 case PRFCNT_BLOCK_TYPE_FE:
1469 kbasep_kinstr_prfcnt_block_enable_to_physical(
1470 &config->phys_em.fe_bm, req_enable->enable_mask);
1471 break;
1472 case PRFCNT_BLOCK_TYPE_TILER:
1473 kbasep_kinstr_prfcnt_block_enable_to_physical(
1474 &config->phys_em.tiler_bm, req_enable->enable_mask);
1475 break;
1476 case PRFCNT_BLOCK_TYPE_MEMORY:
1477 kbasep_kinstr_prfcnt_block_enable_to_physical(
1478 &config->phys_em.mmu_l2_bm, req_enable->enable_mask);
1479 break;
1480 case PRFCNT_BLOCK_TYPE_SHADER_CORE:
1481 kbasep_kinstr_prfcnt_block_enable_to_physical(
1482 &config->phys_em.shader_bm, req_enable->enable_mask);
1483 break;
1484 default:
1485 err = -EINVAL;
1486 break;
1487 }
1488
1489 return err;
1490 }
1491
1492 /**
1493 * kbasep_kinstr_prfcnt_parse_request_scope - Parse a scope request
1494 * @req_scope: Performance counters scope request to parse.
1495 * @config: Client object the session configuration should be written to.
1496 *
1497 * This function parses a performance counters scope request.
1498 * There are only 2 acceptable outcomes: either the client leaves the scope
1499 * as undefined, or all the scope requests are set to the same value.
1500 *
1501 * Return: 0 on success, else error code.
1502 */
kbasep_kinstr_prfcnt_parse_request_scope(const struct prfcnt_request_scope * req_scope,struct kbase_kinstr_prfcnt_client_config * config)1503 static int kbasep_kinstr_prfcnt_parse_request_scope(
1504 const struct prfcnt_request_scope *req_scope,
1505 struct kbase_kinstr_prfcnt_client_config *config)
1506 {
1507 int err = 0;
1508
1509 if (config->scope == PRFCNT_SCOPE_RESERVED)
1510 config->scope = req_scope->scope;
1511 else if (config->scope != req_scope->scope)
1512 err = -EINVAL;
1513
1514 return err;
1515 }
1516
1517 /**
1518 * kbasep_kinstr_prfcnt_parse_setup - Parse session setup
1519 * @kinstr_ctx: Pointer to the kinstr_prfcnt context.
1520 * @setup: Session setup information to parse.
1521 * @config: Client object the session configuration should be written to.
1522 * @req_arr: Pointer to array of request items for client session.
1523 *
1524 * This function parses the list of "request" items sent by the user space
1525 * client, and writes the configuration for the new client to be created
1526 * for the session.
1527 *
1528 * Return: 0 on success, else error code.
1529 */
kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context * kinstr_ctx,union kbase_ioctl_kinstr_prfcnt_setup * setup,struct kbase_kinstr_prfcnt_client_config * config,struct prfcnt_request_item * req_arr)1530 static int kbasep_kinstr_prfcnt_parse_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
1531 union kbase_ioctl_kinstr_prfcnt_setup *setup,
1532 struct kbase_kinstr_prfcnt_client_config *config,
1533 struct prfcnt_request_item *req_arr)
1534 {
1535 uint32_t i;
1536 unsigned int item_count = setup->in.request_item_count;
1537 int err = 0;
1538
1539 if (req_arr[item_count - 1].hdr.item_type != FLEX_LIST_TYPE_NONE ||
1540 req_arr[item_count - 1].hdr.item_version != 0) {
1541 return -EINVAL;
1542 }
1543
1544 /* The session configuration can only feature one value for some
1545 * properties (like capture mode, block counter set and scope), but the
1546 * client may potential issue multiple requests and try to set more than
1547 * one value for those properties. While issuing multiple requests for the
1548 * same property is allowed by the protocol, asking for different values
1549 * is illegal. Leaving these properties as undefined is illegal, too.
1550 */
1551 config->prfcnt_mode = PRFCNT_MODE_RESERVED;
1552 config->counter_set = KBASE_HWCNT_SET_UNDEFINED;
1553 config->scope = PRFCNT_SCOPE_RESERVED;
1554
1555 for (i = 0; i < item_count - 1; i++) {
1556 if (req_arr[i].hdr.item_version > PRFCNT_READER_API_VERSION) {
1557 err = -EINVAL;
1558 break;
1559 }
1560
1561 switch (req_arr[i].hdr.item_type) {
1562 /* Capture mode is initialized as undefined.
1563 * The first request of this type sets the capture mode.
1564 * The protocol allows the client to send redundant requests,
1565 * but only if they replicate the same value that has already
1566 * been set by the first request.
1567 */
1568 case PRFCNT_REQUEST_TYPE_MODE:
1569 if (!prfcnt_mode_supported(req_arr[i].u.req_mode.mode))
1570 err = -EINVAL;
1571 else if (config->prfcnt_mode == PRFCNT_MODE_RESERVED)
1572 config->prfcnt_mode =
1573 req_arr[i].u.req_mode.mode;
1574 else if (req_arr[i].u.req_mode.mode !=
1575 config->prfcnt_mode)
1576 err = -EINVAL;
1577
1578 if (err < 0)
1579 break;
1580
1581 if (config->prfcnt_mode == PRFCNT_MODE_PERIODIC) {
1582 config->period_ns =
1583 req_arr[i]
1584 .u.req_mode.mode_config.periodic
1585 .period_ns;
1586
1587 if ((config->period_ns != 0) &&
1588 (config->period_ns <
1589 DUMP_INTERVAL_MIN_NS)) {
1590 config->period_ns =
1591 DUMP_INTERVAL_MIN_NS;
1592 }
1593
1594 if (config->period_ns == 0)
1595 err = -EINVAL;
1596 }
1597 break;
1598
1599 case PRFCNT_REQUEST_TYPE_ENABLE:
1600 err = kbasep_kinstr_prfcnt_parse_request_enable(
1601 &req_arr[i].u.req_enable, config);
1602 break;
1603
1604 case PRFCNT_REQUEST_TYPE_SCOPE:
1605 err = kbasep_kinstr_prfcnt_parse_request_scope(
1606 &req_arr[i].u.req_scope, config);
1607 break;
1608
1609 default:
1610 err = -EINVAL;
1611 break;
1612 }
1613
1614 if (err < 0)
1615 break;
1616 }
1617
1618 if (!err) {
1619 /* Verify that properties (like capture mode and block counter
1620 * set) have been defined by the user space client.
1621 */
1622 if (config->prfcnt_mode == PRFCNT_MODE_RESERVED)
1623 err = -EINVAL;
1624
1625 if (config->counter_set == KBASE_HWCNT_SET_UNDEFINED)
1626 err = -EINVAL;
1627 }
1628
1629 return err;
1630 }
1631
kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context * kinstr_ctx,union kbase_ioctl_kinstr_prfcnt_setup * setup,struct kbase_kinstr_prfcnt_client ** out_vcli,struct prfcnt_request_item * req_arr)1632 int kbasep_kinstr_prfcnt_client_create(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
1633 union kbase_ioctl_kinstr_prfcnt_setup *setup,
1634 struct kbase_kinstr_prfcnt_client **out_vcli,
1635 struct prfcnt_request_item *req_arr)
1636 {
1637 int err;
1638 struct kbase_kinstr_prfcnt_client *cli;
1639 enum kbase_kinstr_prfcnt_client_init_state init_state;
1640
1641 if (WARN_ON(!kinstr_ctx))
1642 return -EINVAL;
1643
1644 if (WARN_ON(!setup))
1645 return -EINVAL;
1646
1647 if (WARN_ON(!req_arr))
1648 return -EINVAL;
1649
1650 cli = kzalloc(sizeof(*cli), GFP_KERNEL);
1651
1652 if (!cli)
1653 return -ENOMEM;
1654
1655 for (init_state = KINSTR_PRFCNT_UNINITIALISED; init_state < KINSTR_PRFCNT_INITIALISED;
1656 init_state++) {
1657 err = 0;
1658 switch (init_state) {
1659 case KINSTR_PRFCNT_PARSE_SETUP:
1660 cli->kinstr_ctx = kinstr_ctx;
1661 err = kbasep_kinstr_prfcnt_parse_setup(kinstr_ctx, setup, &cli->config,
1662 req_arr);
1663
1664 break;
1665
1666 case KINSTR_PRFCNT_ENABLE_MAP:
1667 cli->config.buffer_count = MAX_BUFFER_COUNT;
1668 cli->dump_interval_ns = cli->config.period_ns;
1669 cli->next_dump_time_ns = 0;
1670 cli->active = false;
1671 atomic_set(&cli->write_idx, 0);
1672 atomic_set(&cli->read_idx, 0);
1673 atomic_set(&cli->fetch_idx, 0);
1674
1675 err = kbase_hwcnt_enable_map_alloc(kinstr_ctx->metadata, &cli->enable_map);
1676 break;
1677
1678 case KINSTR_PRFCNT_DUMP_BUFFER:
1679 kbase_hwcnt_gpu_enable_map_from_physical(&cli->enable_map,
1680 &cli->config.phys_em);
1681
1682 cli->sample_count = cli->config.buffer_count;
1683 cli->sample_size =
1684 kbasep_kinstr_prfcnt_get_sample_size(cli, kinstr_ctx->metadata);
1685
1686 /* Use virtualizer's metadata to alloc tmp buffer which interacts with
1687 * the HWC virtualizer.
1688 */
1689 err = kbase_hwcnt_dump_buffer_alloc(kinstr_ctx->metadata, &cli->tmp_buf);
1690 break;
1691
1692 case KINSTR_PRFCNT_SAMPLE_ARRAY:
1693 /* Disable clock map in setup, and enable clock map when start */
1694 cli->enable_map.clk_enable_map = 0;
1695
1696 /* Use metadata from virtualizer to allocate dump buffers if
1697 * kinstr_prfcnt doesn't have the truncated metadata.
1698 */
1699 err = kbasep_kinstr_prfcnt_sample_array_alloc(cli, kinstr_ctx->metadata);
1700
1701 break;
1702
1703 case KINSTR_PRFCNT_VIRTUALIZER_CLIENT:
1704 /* Set enable map to be 0 to prevent virtualizer to init and kick the
1705 * backend to count.
1706 */
1707 kbase_hwcnt_gpu_enable_map_from_physical(
1708 &cli->enable_map, &(struct kbase_hwcnt_physical_enable_map){ 0 });
1709
1710 err = kbase_hwcnt_virtualizer_client_create(kinstr_ctx->hvirt,
1711 &cli->enable_map, &cli->hvcli);
1712 break;
1713
1714 case KINSTR_PRFCNT_WAITQ_MUTEX:
1715 init_waitqueue_head(&cli->waitq);
1716 mutex_init(&cli->cmd_sync_lock);
1717 break;
1718
1719 case KINSTR_PRFCNT_INITIALISED:
1720 /* This shouldn't be reached */
1721 break;
1722 }
1723
1724 if (err < 0) {
1725 kbasep_kinstr_prfcnt_client_destroy_partial(cli, init_state);
1726 return err;
1727 }
1728 }
1729 *out_vcli = cli;
1730
1731 return 0;
1732
1733 }
1734
kbasep_kinstr_prfcnt_get_block_info_count(const struct kbase_hwcnt_metadata * metadata)1735 static size_t kbasep_kinstr_prfcnt_get_block_info_count(
1736 const struct kbase_hwcnt_metadata *metadata)
1737 {
1738 size_t grp, blk;
1739 size_t block_info_count = 0;
1740
1741 if (!metadata)
1742 return 0;
1743
1744 for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) {
1745 for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) {
1746 if (!kbase_kinstr_is_block_type_reserved(metadata, grp, blk))
1747 block_info_count++;
1748 }
1749 }
1750
1751 return block_info_count;
1752 }
1753
kbasep_kinstr_prfcnt_get_request_info_list(struct prfcnt_enum_item * item_arr,size_t * arr_idx)1754 static void kbasep_kinstr_prfcnt_get_request_info_list(
1755 struct prfcnt_enum_item *item_arr, size_t *arr_idx)
1756 {
1757 memcpy(&item_arr[*arr_idx], kinstr_prfcnt_supported_requests,
1758 sizeof(kinstr_prfcnt_supported_requests));
1759 *arr_idx += ARRAY_SIZE(kinstr_prfcnt_supported_requests);
1760 }
1761
kbasep_kinstr_prfcnt_get_sample_info_item(const struct kbase_hwcnt_metadata * metadata,struct prfcnt_enum_item * item_arr,size_t * arr_idx)1762 static void kbasep_kinstr_prfcnt_get_sample_info_item(const struct kbase_hwcnt_metadata *metadata,
1763 struct prfcnt_enum_item *item_arr,
1764 size_t *arr_idx)
1765 {
1766 struct prfcnt_enum_item sample_info = {
1767 .hdr = {
1768 .item_type = PRFCNT_ENUM_TYPE_SAMPLE_INFO,
1769 .item_version = PRFCNT_READER_API_VERSION,
1770 },
1771 .u.sample_info = {
1772 .num_clock_domains = metadata->clk_cnt,
1773 },
1774 };
1775
1776 item_arr[*arr_idx] = sample_info;
1777 *arr_idx += 1;
1778 }
1779
kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata * metadata,size_t block_set,struct prfcnt_enum_item * item_arr,size_t * arr_idx)1780 int kbasep_kinstr_prfcnt_get_block_info_list(const struct kbase_hwcnt_metadata *metadata,
1781 size_t block_set, struct prfcnt_enum_item *item_arr,
1782 size_t *arr_idx)
1783 {
1784 size_t grp, blk;
1785
1786 if (!metadata || !item_arr || !arr_idx)
1787 return -EINVAL;
1788
1789 for (grp = 0; grp < kbase_hwcnt_metadata_group_count(metadata); grp++) {
1790 for (blk = 0; blk < kbase_hwcnt_metadata_block_count(metadata, grp); blk++) {
1791 size_t blk_inst;
1792 size_t unused_blk_inst_count = 0;
1793 size_t blk_inst_count =
1794 kbase_hwcnt_metadata_block_instance_count(metadata, grp, blk);
1795 enum prfcnt_block_type block_type =
1796 kbase_hwcnt_metadata_block_type_to_prfcnt_block_type(
1797 kbase_hwcnt_metadata_block_type(metadata, grp, blk));
1798
1799 if (block_type == PRFCNT_BLOCK_TYPE_RESERVED)
1800 continue;
1801
1802 /* Count number of unused blocks to updated number of instances */
1803 for (blk_inst = 0; blk_inst < blk_inst_count; blk_inst++) {
1804 if (!kbase_hwcnt_metadata_block_instance_avail(metadata, grp, blk,
1805 blk_inst))
1806 unused_blk_inst_count++;
1807 }
1808
1809 item_arr[(*arr_idx)++] = (struct prfcnt_enum_item){
1810 .hdr = {
1811 .item_type = PRFCNT_ENUM_TYPE_BLOCK,
1812 .item_version = PRFCNT_READER_API_VERSION,
1813 },
1814 .u.block_counter = {
1815 .set = block_set,
1816 .block_type = block_type,
1817 .num_instances = blk_inst_count - unused_blk_inst_count,
1818 .num_values = kbase_hwcnt_metadata_block_values_count(
1819 metadata, grp, blk),
1820 /* The bitmask of available counters should be dynamic.
1821 * Temporarily, it is set to U64_MAX, waiting for the
1822 * required functionality to be available in the future.
1823 */
1824 .counter_mask = {U64_MAX, U64_MAX},
1825 },
1826 };
1827 }
1828 }
1829
1830 return 0;
1831 }
1832
kbasep_kinstr_prfcnt_enum_info_count(struct kbase_kinstr_prfcnt_context * kinstr_ctx,struct kbase_ioctl_kinstr_prfcnt_enum_info * enum_info)1833 static int kbasep_kinstr_prfcnt_enum_info_count(
1834 struct kbase_kinstr_prfcnt_context *kinstr_ctx,
1835 struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
1836 {
1837 uint32_t count = 0;
1838 size_t block_info_count = 0;
1839 const struct kbase_hwcnt_metadata *metadata;
1840
1841 count = ARRAY_SIZE(kinstr_prfcnt_supported_requests);
1842 metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
1843
1844 /* Add the sample_info (clock domain) descriptive item */
1845 count++;
1846
1847 /* Other blocks based on meta data */
1848 block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata);
1849 count += block_info_count;
1850
1851 /* Reserve one for the last sentinel item. */
1852 count++;
1853 enum_info->info_item_count = count;
1854 enum_info->info_item_size = sizeof(struct prfcnt_enum_item);
1855 kinstr_ctx->info_item_count = count;
1856
1857 return 0;
1858 }
1859
kbasep_kinstr_prfcnt_enum_info_list(struct kbase_kinstr_prfcnt_context * kinstr_ctx,struct kbase_ioctl_kinstr_prfcnt_enum_info * enum_info)1860 static int kbasep_kinstr_prfcnt_enum_info_list(
1861 struct kbase_kinstr_prfcnt_context *kinstr_ctx,
1862 struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
1863 {
1864 struct prfcnt_enum_item *prfcnt_item_arr;
1865 size_t arr_idx = 0;
1866 int err = 0;
1867 size_t block_info_count = 0;
1868 const struct kbase_hwcnt_metadata *metadata;
1869
1870 if ((enum_info->info_item_size == 0) ||
1871 (enum_info->info_item_count == 0) || !enum_info->info_list_ptr)
1872 return -EINVAL;
1873
1874 if (enum_info->info_item_count != kinstr_ctx->info_item_count)
1875 return -EINVAL;
1876
1877 prfcnt_item_arr = kcalloc(enum_info->info_item_count,
1878 sizeof(*prfcnt_item_arr), GFP_KERNEL);
1879 if (!prfcnt_item_arr)
1880 return -ENOMEM;
1881
1882 kbasep_kinstr_prfcnt_get_request_info_list(prfcnt_item_arr, &arr_idx);
1883
1884 metadata = kbase_hwcnt_virtualizer_metadata(kinstr_ctx->hvirt);
1885 /* Place the sample_info item */
1886 kbasep_kinstr_prfcnt_get_sample_info_item(metadata, prfcnt_item_arr, &arr_idx);
1887
1888 block_info_count = kbasep_kinstr_prfcnt_get_block_info_count(metadata);
1889
1890 if (arr_idx + block_info_count >= enum_info->info_item_count)
1891 err = -EINVAL;
1892
1893 if (!err) {
1894 size_t counter_set;
1895
1896 #if defined(CONFIG_MALI_BIFROST_PRFCNT_SET_SECONDARY)
1897 counter_set = KBASE_HWCNT_SET_SECONDARY;
1898 #elif defined(CONFIG_MALI_PRFCNT_SET_TERTIARY)
1899 counter_set = KBASE_HWCNT_SET_TERTIARY;
1900 #else
1901 /* Default to primary */
1902 counter_set = KBASE_HWCNT_SET_PRIMARY;
1903 #endif
1904 kbasep_kinstr_prfcnt_get_block_info_list(
1905 metadata, counter_set, prfcnt_item_arr, &arr_idx);
1906 if (arr_idx != enum_info->info_item_count - 1)
1907 err = -EINVAL;
1908 }
1909
1910 /* The last sentinel item. */
1911 prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_type =
1912 FLEX_LIST_TYPE_NONE;
1913 prfcnt_item_arr[enum_info->info_item_count - 1].hdr.item_version = 0;
1914
1915 if (!err) {
1916 unsigned long bytes =
1917 enum_info->info_item_count * sizeof(*prfcnt_item_arr);
1918
1919 if (copy_to_user(u64_to_user_ptr(enum_info->info_list_ptr),
1920 prfcnt_item_arr, bytes))
1921 err = -EFAULT;
1922 }
1923
1924 kfree(prfcnt_item_arr);
1925 return err;
1926 }
1927
kbase_kinstr_prfcnt_enum_info(struct kbase_kinstr_prfcnt_context * kinstr_ctx,struct kbase_ioctl_kinstr_prfcnt_enum_info * enum_info)1928 int kbase_kinstr_prfcnt_enum_info(
1929 struct kbase_kinstr_prfcnt_context *kinstr_ctx,
1930 struct kbase_ioctl_kinstr_prfcnt_enum_info *enum_info)
1931 {
1932 int err;
1933
1934 if (!kinstr_ctx || !enum_info)
1935 return -EINVAL;
1936
1937 if (!enum_info->info_list_ptr)
1938 err = kbasep_kinstr_prfcnt_enum_info_count(kinstr_ctx,
1939 enum_info);
1940 else
1941 err = kbasep_kinstr_prfcnt_enum_info_list(kinstr_ctx,
1942 enum_info);
1943
1944 return err;
1945 }
1946
kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context * kinstr_ctx,union kbase_ioctl_kinstr_prfcnt_setup * setup)1947 int kbase_kinstr_prfcnt_setup(struct kbase_kinstr_prfcnt_context *kinstr_ctx,
1948 union kbase_ioctl_kinstr_prfcnt_setup *setup)
1949 {
1950 int err;
1951 size_t item_count;
1952 size_t bytes;
1953 struct prfcnt_request_item *req_arr = NULL;
1954 struct kbase_kinstr_prfcnt_client *cli = NULL;
1955 const size_t max_bytes = 32 * sizeof(*req_arr);
1956
1957 if (!kinstr_ctx || !setup)
1958 return -EINVAL;
1959
1960 item_count = setup->in.request_item_count;
1961
1962 /* Limiting the request items to 2x of the expected: accommodating
1963 * moderate duplications but rejecting excessive abuses.
1964 */
1965 if (!setup->in.requests_ptr || (item_count < 2) || (setup->in.request_item_size == 0) ||
1966 item_count > 2 * kinstr_ctx->info_item_count) {
1967 return -EINVAL;
1968 }
1969
1970 if (check_mul_overflow(item_count, sizeof(*req_arr), &bytes))
1971 return -EINVAL;
1972
1973 /* Further limiting the max bytes to copy from userspace by setting it in the following
1974 * fashion: a maximum of 1 mode item, 4 types of 3 sets for a total of 12 enable items,
1975 * each currently at the size of prfcnt_request_item.
1976 *
1977 * Note: if more request types get added, this max limit needs to be updated.
1978 */
1979 if (bytes > max_bytes)
1980 return -EINVAL;
1981
1982 req_arr = memdup_user(u64_to_user_ptr(setup->in.requests_ptr), bytes);
1983
1984 if (IS_ERR(req_arr))
1985 return PTR_ERR(req_arr);
1986
1987 err = kbasep_kinstr_prfcnt_client_create(kinstr_ctx, setup, &cli, req_arr);
1988
1989 if (err < 0)
1990 goto error;
1991
1992 mutex_lock(&kinstr_ctx->lock);
1993 kinstr_ctx->client_count++;
1994 list_add(&cli->node, &kinstr_ctx->clients);
1995 mutex_unlock(&kinstr_ctx->lock);
1996
1997 setup->out.prfcnt_metadata_item_size = sizeof(struct prfcnt_metadata);
1998 setup->out.prfcnt_mmap_size_bytes =
1999 cli->sample_size * cli->sample_count;
2000
2001 /* Expose to user-space only once the client is fully initialized */
2002 err = anon_inode_getfd("[mali_kinstr_prfcnt_desc]",
2003 &kinstr_prfcnt_client_fops, cli,
2004 O_RDONLY | O_CLOEXEC);
2005
2006 if (err < 0)
2007 goto client_installed_error;
2008
2009 goto free_buf;
2010
2011 client_installed_error:
2012 mutex_lock(&kinstr_ctx->lock);
2013 kinstr_ctx->client_count--;
2014 list_del(&cli->node);
2015 mutex_unlock(&kinstr_ctx->lock);
2016 error:
2017 kbasep_kinstr_prfcnt_client_destroy(cli);
2018 free_buf:
2019 kfree(req_arr);
2020 return err;
2021 }
2022