xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <mali_kbase.h>
23 #include <gpu/mali_kbase_gpu_fault.h>
24 #include <mali_kbase_reset_gpu.h>
25 #include "mali_kbase_csf.h"
26 #include "backend/gpu/mali_kbase_pm_internal.h"
27 #include <linux/export.h>
28 #include <linux/priority_control_manager.h>
29 #include <linux/shmem_fs.h>
30 #include <csf/mali_kbase_csf_registers.h>
31 #include "mali_kbase_csf_tiler_heap.h"
32 #include <mmu/mali_kbase_mmu.h>
33 #include "mali_kbase_csf_timeout.h"
34 #include <csf/ipa_control/mali_kbase_csf_ipa_control.h>
35 #include <mali_kbase_hwaccess_time.h>
36 #include "mali_kbase_csf_event.h"
37 #include <tl/mali_kbase_tracepoints.h>
38 #include "mali_kbase_csf_mcu_shared_reg.h"
39 
40 #define CS_REQ_EXCEPTION_MASK (CS_REQ_FAULT_MASK | CS_REQ_FATAL_MASK)
41 #define CS_ACK_EXCEPTION_MASK (CS_ACK_FAULT_MASK | CS_ACK_FATAL_MASK)
42 
43 #define CS_RING_BUFFER_MAX_SIZE ((uint32_t)(1 << 31)) /* 2GiB */
44 #define CS_RING_BUFFER_MIN_SIZE ((uint32_t)4096)
45 
46 #define PROTM_ALLOC_MAX_RETRIES ((u8)5)
47 
48 const u8 kbasep_csf_queue_group_priority_to_relative[BASE_QUEUE_GROUP_PRIORITY_COUNT] = {
49 	KBASE_QUEUE_GROUP_PRIORITY_HIGH,
50 	KBASE_QUEUE_GROUP_PRIORITY_MEDIUM,
51 	KBASE_QUEUE_GROUP_PRIORITY_LOW,
52 	KBASE_QUEUE_GROUP_PRIORITY_REALTIME
53 };
54 const u8 kbasep_csf_relative_to_queue_group_priority[KBASE_QUEUE_GROUP_PRIORITY_COUNT] = {
55 	BASE_QUEUE_GROUP_PRIORITY_REALTIME,
56 	BASE_QUEUE_GROUP_PRIORITY_HIGH,
57 	BASE_QUEUE_GROUP_PRIORITY_MEDIUM,
58 	BASE_QUEUE_GROUP_PRIORITY_LOW
59 };
60 
61 /*
62  * struct irq_idle_and_protm_track - Object that tracks the idle and protected mode
63  *                                   request information in an interrupt case across
64  *                                   groups.
65  *
66  * @protm_grp: Possibly schedulable group that requested protected mode in the interrupt.
67  *             If NULL, no such case observed in the tracked interrupt case.
68  * @idle_seq:  The highest priority group that notified idle. If no such instance in the
69  *             interrupt case, marked with the largest field value: U32_MAX.
70  * @idle_slot: The slot number if @p idle_seq is valid in the given tracking case.
71  */
72 struct irq_idle_and_protm_track {
73 	struct kbase_queue_group *protm_grp;
74 	u32 idle_seq;
75 	s8 idle_slot;
76 };
77 
78 /**
79  * kbasep_ctx_user_reg_page_mapping_term() - Terminate resources for USER Register Page.
80  *
81  * @kctx:   Pointer to the kbase context
82  */
kbasep_ctx_user_reg_page_mapping_term(struct kbase_context * kctx)83 static void kbasep_ctx_user_reg_page_mapping_term(struct kbase_context *kctx)
84 {
85 	struct kbase_device *kbdev = kctx->kbdev;
86 
87 	if (unlikely(kctx->csf.user_reg.vma))
88 		dev_err(kbdev->dev, "VMA for USER Register page exist on termination of ctx %d_%d",
89 			kctx->tgid, kctx->id);
90 	if (WARN_ON_ONCE(!list_empty(&kctx->csf.user_reg.link)))
91 		list_del_init(&kctx->csf.user_reg.link);
92 }
93 
94 /**
95  * kbasep_ctx_user_reg_page_mapping_init() - Initialize resources for USER Register Page.
96  *
97  * @kctx:   Pointer to the kbase context
98  *
99  * @return: 0 on success.
100  */
kbasep_ctx_user_reg_page_mapping_init(struct kbase_context * kctx)101 static int kbasep_ctx_user_reg_page_mapping_init(struct kbase_context *kctx)
102 {
103 	INIT_LIST_HEAD(&kctx->csf.user_reg.link);
104 	kctx->csf.user_reg.vma = NULL;
105 	kctx->csf.user_reg.file_offset = 0;
106 
107 	return 0;
108 }
109 
put_user_pages_mmap_handle(struct kbase_context * kctx,struct kbase_queue * queue)110 static void put_user_pages_mmap_handle(struct kbase_context *kctx,
111 			struct kbase_queue *queue)
112 {
113 	unsigned long cookie_nr;
114 
115 	lockdep_assert_held(&kctx->csf.lock);
116 
117 	if (queue->handle == BASEP_MEM_INVALID_HANDLE)
118 		return;
119 
120 	cookie_nr =
121 		PFN_DOWN(queue->handle - BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
122 
123 	if (!WARN_ON(kctx->csf.user_pages_info[cookie_nr] != queue)) {
124 		/* free up cookie */
125 		kctx->csf.user_pages_info[cookie_nr] = NULL;
126 		bitmap_set(kctx->csf.cookies, cookie_nr, 1);
127 	}
128 
129 	queue->handle = BASEP_MEM_INVALID_HANDLE;
130 }
131 
132 /* Reserve a cookie, to be returned as a handle to userspace for creating
133  * the CPU mapping of the pair of input/output pages and Hw doorbell page.
134  * Will return 0 in case of success otherwise negative on failure.
135  */
get_user_pages_mmap_handle(struct kbase_context * kctx,struct kbase_queue * queue)136 static int get_user_pages_mmap_handle(struct kbase_context *kctx,
137 			struct kbase_queue *queue)
138 {
139 	unsigned long cookie, cookie_nr;
140 
141 	lockdep_assert_held(&kctx->csf.lock);
142 
143 	if (bitmap_empty(kctx->csf.cookies,
144 				KBASE_CSF_NUM_USER_IO_PAGES_HANDLE)) {
145 		dev_err(kctx->kbdev->dev,
146 			"No csf cookies available for allocation!");
147 		return -ENOMEM;
148 	}
149 
150 	/* allocate a cookie */
151 	cookie_nr = find_first_bit(kctx->csf.cookies,
152 				KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
153 	if (kctx->csf.user_pages_info[cookie_nr]) {
154 		dev_err(kctx->kbdev->dev,
155 			"Inconsistent state of csf cookies!");
156 		return -EINVAL;
157 	}
158 	kctx->csf.user_pages_info[cookie_nr] = queue;
159 	bitmap_clear(kctx->csf.cookies, cookie_nr, 1);
160 
161 	/* relocate to correct base */
162 	cookie = cookie_nr + PFN_DOWN(BASEP_MEM_CSF_USER_IO_PAGES_HANDLE);
163 	cookie <<= PAGE_SHIFT;
164 
165 	queue->handle = (u64)cookie;
166 
167 	return 0;
168 }
169 
init_user_io_pages(struct kbase_queue * queue)170 static void init_user_io_pages(struct kbase_queue *queue)
171 {
172 	u32 *input_addr = (u32 *)(queue->user_io_addr);
173 	u32 *output_addr = (u32 *)(queue->user_io_addr + PAGE_SIZE);
174 
175 	input_addr[CS_INSERT_LO/4] = 0;
176 	input_addr[CS_INSERT_HI/4] = 0;
177 
178 	input_addr[CS_EXTRACT_INIT_LO/4] = 0;
179 	input_addr[CS_EXTRACT_INIT_HI/4] = 0;
180 
181 	output_addr[CS_EXTRACT_LO/4] = 0;
182 	output_addr[CS_EXTRACT_HI/4] = 0;
183 
184 	output_addr[CS_ACTIVE/4] = 0;
185 }
186 
kernel_unmap_user_io_pages(struct kbase_context * kctx,struct kbase_queue * queue)187 static void kernel_unmap_user_io_pages(struct kbase_context *kctx,
188 			struct kbase_queue *queue)
189 {
190 	kbase_gpu_vm_lock(kctx);
191 
192 	vunmap(queue->user_io_addr);
193 
194 	WARN_ON(atomic_read(&kctx->permanent_mapped_pages) < KBASEP_NUM_CS_USER_IO_PAGES);
195 	atomic_sub(KBASEP_NUM_CS_USER_IO_PAGES, &kctx->permanent_mapped_pages);
196 
197 	kbase_gpu_vm_unlock(kctx);
198 }
199 
kernel_map_user_io_pages(struct kbase_context * kctx,struct kbase_queue * queue)200 static int kernel_map_user_io_pages(struct kbase_context *kctx,
201 			struct kbase_queue *queue)
202 {
203 	struct page *page_list[2];
204 	pgprot_t cpu_map_prot;
205 	unsigned long flags;
206 	char *user_io_addr;
207 	int ret = 0;
208 	size_t i;
209 
210 	kbase_gpu_vm_lock(kctx);
211 
212 	if (ARRAY_SIZE(page_list) > (KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES -
213 			 atomic_read(&kctx->permanent_mapped_pages))) {
214 		ret = -ENOMEM;
215 		goto unlock;
216 	}
217 
218 	/* The pages are mapped to Userspace also, so use the same mapping
219 	 * attributes as used inside the CPU page fault handler.
220 	 */
221 	if (kctx->kbdev->system_coherency == COHERENCY_NONE)
222 		cpu_map_prot = pgprot_writecombine(PAGE_KERNEL);
223 	else
224 		cpu_map_prot = PAGE_KERNEL;
225 
226 	for (i = 0; i < ARRAY_SIZE(page_list); i++)
227 		page_list[i] = as_page(queue->phys[i]);
228 
229 	user_io_addr = vmap(page_list, ARRAY_SIZE(page_list), VM_MAP, cpu_map_prot);
230 
231 	if (!user_io_addr) {
232 		dev_err(kctx->kbdev->dev,
233 			"%s(): user_io_addr is NULL, queue: %p",
234 			__func__,
235 			queue);
236 		ret = -ENOMEM;
237 	} else {
238 		atomic_add(ARRAY_SIZE(page_list), &kctx->permanent_mapped_pages);
239 	}
240 
241 	kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
242 	queue->user_io_addr = user_io_addr;
243 	kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
244 
245 unlock:
246 	kbase_gpu_vm_unlock(kctx);
247 	return ret;
248 }
249 
250 static void term_queue_group(struct kbase_queue_group *group);
251 static void get_queue(struct kbase_queue *queue);
252 static void release_queue(struct kbase_queue *queue);
253 
254 /**
255  * kbase_csf_free_command_stream_user_pages() - Free the resources allocated
256  *				    for a queue at the time of bind.
257  *
258  * @kctx:	Address of the kbase context within which the queue was created.
259  * @queue:	Pointer to the queue to be unlinked.
260  *
261  * This function will free the pair of physical pages allocated for a GPU
262  * command queue, and also release the hardware doorbell page, that were mapped
263  * into the process address space to enable direct submission of commands to
264  * the hardware. Also releases the reference taken on the queue when the mapping
265  * was created.
266  *
267  * This function will be called only when the mapping is being removed and
268  * so the resources for queue will not get freed up until the mapping is
269  * removed even though userspace could have terminated the queue.
270  * Kernel will ensure that the termination of Kbase context would only be
271  * triggered after the mapping is removed.
272  *
273  * If an explicit or implicit unbind was missed by the userspace then the
274  * mapping will persist. On process exit kernel itself will remove the mapping.
275  */
kbase_csf_free_command_stream_user_pages(struct kbase_context * kctx,struct kbase_queue * queue)276 void kbase_csf_free_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
277 {
278 	kernel_unmap_user_io_pages(kctx, queue);
279 
280 	kbase_mem_pool_free_pages(
281 		&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
282 		KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, true, false);
283 	kbase_process_page_usage_dec(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
284 
285 	/* The user_io_gpu_va should have been unmapped inside the scheduler */
286 	WARN_ONCE(queue->user_io_gpu_va, "Userio pages appears still have mapping");
287 
288 	/* If the queue has already been terminated by userspace
289 	 * then the ref count for queue object will drop to 0 here.
290 	 */
291 	release_queue(queue);
292 }
293 KBASE_EXPORT_TEST_API(kbase_csf_free_command_stream_user_pages);
294 
kbase_csf_alloc_command_stream_user_pages(struct kbase_context * kctx,struct kbase_queue * queue)295 int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx, struct kbase_queue *queue)
296 {
297 	struct kbase_device *kbdev = kctx->kbdev;
298 	int ret;
299 
300 	lockdep_assert_held(&kctx->csf.lock);
301 
302 	ret = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
303 					 KBASEP_NUM_CS_USER_IO_PAGES,
304 					 queue->phys, false, kctx->task);
305 	if (ret != KBASEP_NUM_CS_USER_IO_PAGES) {
306 		/* Marking both the phys to zero for indicating there is no phys allocated */
307 		queue->phys[0].tagged_addr = 0;
308 		queue->phys[1].tagged_addr = 0;
309 		return -ENOMEM;
310 	}
311 
312 	ret = kernel_map_user_io_pages(kctx, queue);
313 	if (ret)
314 		goto kernel_map_failed;
315 
316 	kbase_process_page_usage_inc(kctx, KBASEP_NUM_CS_USER_IO_PAGES);
317 	init_user_io_pages(queue);
318 
319 	/* user_io_gpu_va is only mapped when scheduler decides to put the queue
320 	 * on slot at runtime. Initialize it to 0, signalling no mapping.
321 	 */
322 	queue->user_io_gpu_va = 0;
323 
324 	mutex_lock(&kbdev->csf.reg_lock);
325 	if (kbdev->csf.db_file_offsets > (U32_MAX - BASEP_QUEUE_NR_MMAP_USER_PAGES + 1))
326 		kbdev->csf.db_file_offsets = 0;
327 
328 	queue->db_file_offset = kbdev->csf.db_file_offsets;
329 	kbdev->csf.db_file_offsets += BASEP_QUEUE_NR_MMAP_USER_PAGES;
330 	WARN(kbase_refcount_read(&queue->refcount) != 1,
331 	     "Incorrect refcounting for queue object\n");
332 	/* This is the second reference taken on the queue object and
333 	 * would be dropped only when the IO mapping is removed either
334 	 * explicitly by userspace or implicitly by kernel on process exit.
335 	 */
336 	get_queue(queue);
337 	queue->bind_state = KBASE_CSF_QUEUE_BOUND;
338 	mutex_unlock(&kbdev->csf.reg_lock);
339 
340 	return 0;
341 
342 kernel_map_failed:
343 	kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
344 				  KBASEP_NUM_CS_USER_IO_PAGES, queue->phys, false, false);
345 	/* Marking both the phys to zero for indicating there is no phys allocated */
346 	queue->phys[0].tagged_addr = 0;
347 	queue->phys[1].tagged_addr = 0;
348 
349 	return ret;
350 }
351 KBASE_EXPORT_TEST_API(kbase_csf_alloc_command_stream_user_pages);
352 
find_queue_group(struct kbase_context * kctx,u8 group_handle)353 static struct kbase_queue_group *find_queue_group(struct kbase_context *kctx,
354 	u8 group_handle)
355 {
356 	uint index = group_handle;
357 
358 	lockdep_assert_held(&kctx->csf.lock);
359 
360 	if (index < MAX_QUEUE_GROUP_NUM && kctx->csf.queue_groups[index]) {
361 		if (WARN_ON(kctx->csf.queue_groups[index]->handle != index))
362 			return NULL;
363 		return kctx->csf.queue_groups[index];
364 	}
365 
366 	return NULL;
367 }
368 
kbase_csf_find_queue_group(struct kbase_context * kctx,u8 group_handle)369 struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx, u8 group_handle)
370 {
371 	return find_queue_group(kctx, group_handle);
372 }
373 KBASE_EXPORT_TEST_API(kbase_csf_find_queue_group);
374 
kbase_csf_queue_group_handle_is_valid(struct kbase_context * kctx,u8 group_handle)375 int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx,
376 	u8 group_handle)
377 {
378 	struct kbase_queue_group *group;
379 
380 	mutex_lock(&kctx->csf.lock);
381 	group = find_queue_group(kctx, group_handle);
382 	mutex_unlock(&kctx->csf.lock);
383 
384 	return group ? 0 : -EINVAL;
385 }
386 
find_queue(struct kbase_context * kctx,u64 base_addr)387 static struct kbase_queue *find_queue(struct kbase_context *kctx, u64 base_addr)
388 {
389 	struct kbase_queue *queue;
390 
391 	lockdep_assert_held(&kctx->csf.lock);
392 
393 	list_for_each_entry(queue, &kctx->csf.queue_list, link) {
394 		if (base_addr == queue->base_addr)
395 			return queue;
396 	}
397 
398 	return NULL;
399 }
400 
get_queue(struct kbase_queue * queue)401 static void get_queue(struct kbase_queue *queue)
402 {
403 	WARN_ON(!kbase_refcount_inc_not_zero(&queue->refcount));
404 }
405 
release_queue(struct kbase_queue * queue)406 static void release_queue(struct kbase_queue *queue)
407 {
408 	lockdep_assert_held(&queue->kctx->csf.lock);
409 	if (kbase_refcount_dec_and_test(&queue->refcount)) {
410 		/* The queue can't still be on the per context list. */
411 		WARN_ON(!list_empty(&queue->link));
412 		WARN_ON(queue->group);
413 		dev_dbg(queue->kctx->kbdev->dev,
414 			"Remove any pending command queue fatal from ctx %d_%d",
415 			queue->kctx->tgid, queue->kctx->id);
416 		kbase_csf_event_remove_error(queue->kctx, &queue->error);
417 
418 		/* After this the Userspace would be able to free the
419 		 * memory for GPU queue. In case the Userspace missed
420 		 * terminating the queue, the cleanup will happen on
421 		 * context termination where tear down of region tracker
422 		 * would free up the GPU queue memory.
423 		 */
424 		kbase_gpu_vm_lock(queue->kctx);
425 		kbase_va_region_no_user_free_dec(queue->queue_reg);
426 		kbase_gpu_vm_unlock(queue->kctx);
427 
428 		kfree(queue);
429 	}
430 }
431 
432 static void oom_event_worker(struct work_struct *data);
433 static void cs_error_worker(struct work_struct *data);
434 
435 /* Between reg and reg_ex, one and only one must be null */
csf_queue_register_internal(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_register * reg,struct kbase_ioctl_cs_queue_register_ex * reg_ex)436 static int csf_queue_register_internal(struct kbase_context *kctx,
437 			struct kbase_ioctl_cs_queue_register *reg,
438 			struct kbase_ioctl_cs_queue_register_ex *reg_ex)
439 {
440 	struct kbase_queue *queue;
441 	int ret = 0;
442 	struct kbase_va_region *region;
443 	u64 queue_addr;
444 	size_t queue_size;
445 
446 	/* Only one pointer expected, otherwise coding error */
447 	if ((reg == NULL && reg_ex == NULL) || (reg && reg_ex)) {
448 		dev_dbg(kctx->kbdev->dev,
449 			"Error, one and only one param-ptr expected!");
450 		return -EINVAL;
451 	}
452 
453 	/* struct kbase_ioctl_cs_queue_register_ex contains a full
454 	 * struct kbase_ioctl_cs_queue_register at the start address. So
455 	 * the pointer can be safely cast to pointing to a
456 	 * kbase_ioctl_cs_queue_register object.
457 	 */
458 	if (reg_ex)
459 		reg = (struct kbase_ioctl_cs_queue_register *)reg_ex;
460 
461 	/* Validate the queue priority */
462 	if (reg->priority > BASE_QUEUE_MAX_PRIORITY)
463 		return -EINVAL;
464 
465 	queue_addr = reg->buffer_gpu_addr;
466 	queue_size = reg->buffer_size >> PAGE_SHIFT;
467 
468 	mutex_lock(&kctx->csf.lock);
469 
470 	/* Check if queue is already registered */
471 	if (find_queue(kctx, queue_addr) != NULL) {
472 		ret = -EINVAL;
473 		goto out;
474 	}
475 
476 	/* Check if the queue address is valid */
477 	kbase_gpu_vm_lock(kctx);
478 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
479 								    queue_addr);
480 
481 	if (kbase_is_region_invalid_or_free(region) || kbase_is_region_shrinkable(region) ||
482 	    region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
483 		ret = -ENOENT;
484 		goto out_unlock_vm;
485 	}
486 
487 	if (queue_size > (region->nr_pages -
488 			  ((queue_addr >> PAGE_SHIFT) - region->start_pfn))) {
489 		ret = -EINVAL;
490 		goto out_unlock_vm;
491 	}
492 
493 	/* Check address validity on cs_trace buffer etc. Don't care
494 	 * if not enabled (i.e. when size is 0).
495 	 */
496 	if (reg_ex && reg_ex->ex_buffer_size) {
497 		int buf_pages = (reg_ex->ex_buffer_size +
498 				 (1 << PAGE_SHIFT) - 1) >> PAGE_SHIFT;
499 		struct kbase_va_region *region_ex =
500 			kbase_region_tracker_find_region_enclosing_address(kctx,
501 									   reg_ex->ex_buffer_base);
502 
503 		if (kbase_is_region_invalid_or_free(region_ex)) {
504 			ret = -ENOENT;
505 			goto out_unlock_vm;
506 		}
507 
508 		if (buf_pages > (region_ex->nr_pages -
509 				 ((reg_ex->ex_buffer_base >> PAGE_SHIFT) - region_ex->start_pfn))) {
510 			ret = -EINVAL;
511 			goto out_unlock_vm;
512 		}
513 
514 		region_ex = kbase_region_tracker_find_region_enclosing_address(
515 			kctx, reg_ex->ex_offset_var_addr);
516 		if (kbase_is_region_invalid_or_free(region_ex)) {
517 			ret = -ENOENT;
518 			goto out_unlock_vm;
519 		}
520 	}
521 
522 	queue = kzalloc(sizeof(struct kbase_queue), GFP_KERNEL);
523 
524 	if (!queue) {
525 		ret = -ENOMEM;
526 		goto out_unlock_vm;
527 	}
528 
529 	queue->kctx = kctx;
530 	queue->base_addr = queue_addr;
531 
532 	queue->queue_reg = region;
533 	kbase_va_region_no_user_free_inc(region);
534 
535 	queue->size = (queue_size << PAGE_SHIFT);
536 	queue->csi_index = KBASEP_IF_NR_INVALID;
537 	queue->enabled = false;
538 
539 	queue->priority = reg->priority;
540 	kbase_refcount_set(&queue->refcount, 1);
541 
542 	queue->group = NULL;
543 	queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
544 	queue->handle = BASEP_MEM_INVALID_HANDLE;
545 	queue->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
546 
547 	queue->status_wait = 0;
548 	queue->sync_ptr = 0;
549 	queue->sync_value = 0;
550 
551 #if IS_ENABLED(CONFIG_DEBUG_FS)
552 	queue->saved_cmd_ptr = 0;
553 #endif
554 
555 	queue->sb_status = 0;
556 	queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
557 
558 	atomic_set(&queue->pending, 0);
559 
560 	INIT_LIST_HEAD(&queue->link);
561 	INIT_LIST_HEAD(&queue->error.link);
562 	INIT_WORK(&queue->oom_event_work, oom_event_worker);
563 	INIT_WORK(&queue->cs_error_work, cs_error_worker);
564 	list_add(&queue->link, &kctx->csf.queue_list);
565 
566 	queue->extract_ofs = 0;
567 
568 	region->user_data = queue;
569 
570 	/* Initialize the cs_trace configuration parameters, When buffer_size
571 	 * is 0, trace is disabled. Here we only update the fields when
572 	 * enabled, otherwise leave them as default zeros.
573 	 */
574 	if (reg_ex && reg_ex->ex_buffer_size) {
575 		u32 cfg = CS_INSTR_CONFIG_EVENT_SIZE_SET(
576 					0, reg_ex->ex_event_size);
577 		cfg = CS_INSTR_CONFIG_EVENT_STATE_SET(
578 					cfg, reg_ex->ex_event_state);
579 
580 		queue->trace_cfg = cfg;
581 		queue->trace_buffer_size = reg_ex->ex_buffer_size;
582 		queue->trace_buffer_base = reg_ex->ex_buffer_base;
583 		queue->trace_offset_ptr = reg_ex->ex_offset_var_addr;
584 	}
585 
586 out_unlock_vm:
587 	kbase_gpu_vm_unlock(kctx);
588 out:
589 	mutex_unlock(&kctx->csf.lock);
590 
591 	return ret;
592 }
593 
kbase_csf_queue_register(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_register * reg)594 int kbase_csf_queue_register(struct kbase_context *kctx,
595 			     struct kbase_ioctl_cs_queue_register *reg)
596 {
597 	/* Validate the ring buffer configuration parameters */
598 	if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
599 	    reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
600 	    reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
601 	    reg->buffer_gpu_addr & ~PAGE_MASK)
602 		return -EINVAL;
603 
604 	return csf_queue_register_internal(kctx, reg, NULL);
605 }
606 
kbase_csf_queue_register_ex(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_register_ex * reg)607 int kbase_csf_queue_register_ex(struct kbase_context *kctx,
608 				struct kbase_ioctl_cs_queue_register_ex *reg)
609 {
610 	struct kbase_csf_global_iface const *const iface =
611 						&kctx->kbdev->csf.global_iface;
612 	u32 const glb_version = iface->version;
613 	u32 instr = iface->instr_features;
614 	u8 max_size = GLB_INSTR_FEATURES_EVENT_SIZE_MAX_GET(instr);
615 	u32 min_buf_size = (1u << reg->ex_event_size) *
616 			GLB_INSTR_FEATURES_OFFSET_UPDATE_RATE_GET(instr);
617 
618 	/* If cs_trace_command not supported, the call fails */
619 	if (glb_version < kbase_csf_interface_version(1, 1, 0))
620 		return -EINVAL;
621 
622 	/* Validate the ring buffer configuration parameters */
623 	if (reg->buffer_size < CS_RING_BUFFER_MIN_SIZE ||
624 	    reg->buffer_size > CS_RING_BUFFER_MAX_SIZE ||
625 	    reg->buffer_size & (reg->buffer_size - 1) || !reg->buffer_gpu_addr ||
626 	    reg->buffer_gpu_addr & ~PAGE_MASK)
627 		return -EINVAL;
628 
629 	/* Validate the cs_trace configuration parameters */
630 	if (reg->ex_buffer_size &&
631 		((reg->ex_event_size > max_size) ||
632 			(reg->ex_buffer_size & (reg->ex_buffer_size - 1)) ||
633 			(reg->ex_buffer_size < min_buf_size)))
634 		return -EINVAL;
635 
636 	return csf_queue_register_internal(kctx, NULL, reg);
637 }
638 
639 static void unbind_queue(struct kbase_context *kctx,
640 		struct kbase_queue *queue);
641 
kbase_csf_queue_terminate(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_terminate * term)642 void kbase_csf_queue_terminate(struct kbase_context *kctx,
643 			      struct kbase_ioctl_cs_queue_terminate *term)
644 {
645 	struct kbase_device *kbdev = kctx->kbdev;
646 	struct kbase_queue *queue;
647 	int err;
648 	bool reset_prevented = false;
649 
650 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
651 	if (err)
652 		dev_warn(
653 			kbdev->dev,
654 			"Unsuccessful GPU reset detected when terminating queue (buffer_addr=0x%.16llx), attempting to terminate regardless",
655 			term->buffer_gpu_addr);
656 	else
657 		reset_prevented = true;
658 
659 	mutex_lock(&kctx->csf.lock);
660 	queue = find_queue(kctx, term->buffer_gpu_addr);
661 
662 	if (queue) {
663 		/* As the GPU queue has been terminated by the
664 		 * user space, undo the actions that were performed when the
665 		 * queue was registered i.e. remove the queue from the per
666 		 * context list & release the initial reference. The subsequent
667 		 * lookups for the queue in find_queue() would fail.
668 		 */
669 		list_del_init(&queue->link);
670 
671 		/* Stop the CSI to which queue was bound */
672 		unbind_queue(kctx, queue);
673 
674 		kbase_gpu_vm_lock(kctx);
675 		if (!WARN_ON(!queue->queue_reg))
676 			queue->queue_reg->user_data = NULL;
677 		kbase_gpu_vm_unlock(kctx);
678 
679 		release_queue(queue);
680 	}
681 
682 	mutex_unlock(&kctx->csf.lock);
683 	if (reset_prevented)
684 		kbase_reset_gpu_allow(kbdev);
685 }
686 
kbase_csf_queue_bind(struct kbase_context * kctx,union kbase_ioctl_cs_queue_bind * bind)687 int kbase_csf_queue_bind(struct kbase_context *kctx, union kbase_ioctl_cs_queue_bind *bind)
688 {
689 	struct kbase_queue *queue;
690 	struct kbase_queue_group *group;
691 	u8 max_streams;
692 	int ret = -EINVAL;
693 
694 	mutex_lock(&kctx->csf.lock);
695 
696 	group = find_queue_group(kctx, bind->in.group_handle);
697 	queue = find_queue(kctx, bind->in.buffer_gpu_addr);
698 
699 	if (!group || !queue)
700 		goto out;
701 
702 	/* For the time being, all CSGs have the same number of CSs
703 	 * so we check CSG 0 for this number
704 	 */
705 	max_streams = kctx->kbdev->csf.global_iface.groups[0].stream_num;
706 
707 	if (bind->in.csi_index >= max_streams)
708 		goto out;
709 
710 	if (group->run_state == KBASE_CSF_GROUP_TERMINATED)
711 		goto out;
712 
713 	if (queue->group || group->bound_queues[bind->in.csi_index])
714 		goto out;
715 
716 	ret = get_user_pages_mmap_handle(kctx, queue);
717 	if (ret)
718 		goto out;
719 
720 	bind->out.mmap_handle = queue->handle;
721 	group->bound_queues[bind->in.csi_index] = queue;
722 	queue->group = group;
723 	queue->csi_index = bind->in.csi_index;
724 	queue->bind_state = KBASE_CSF_QUEUE_BIND_IN_PROGRESS;
725 
726 out:
727 	mutex_unlock(&kctx->csf.lock);
728 
729 	return ret;
730 }
731 
get_bound_queue_group(struct kbase_queue * queue)732 static struct kbase_queue_group *get_bound_queue_group(
733 					struct kbase_queue *queue)
734 {
735 	struct kbase_context *kctx = queue->kctx;
736 	struct kbase_queue_group *group;
737 
738 	if (queue->bind_state == KBASE_CSF_QUEUE_UNBOUND)
739 		return NULL;
740 
741 	if (!queue->group)
742 		return NULL;
743 
744 	if (queue->csi_index == KBASEP_IF_NR_INVALID) {
745 		dev_warn(kctx->kbdev->dev, "CS interface index is incorrect\n");
746 		return NULL;
747 	}
748 
749 	group = queue->group;
750 
751 	if (group->bound_queues[queue->csi_index] != queue) {
752 		dev_warn(kctx->kbdev->dev, "Incorrect mapping between queues & queue groups\n");
753 		return NULL;
754 	}
755 
756 	return group;
757 }
758 
enqueue_gpu_submission_work(struct kbase_context * const kctx)759 static void enqueue_gpu_submission_work(struct kbase_context *const kctx)
760 {
761 	queue_work(system_highpri_wq, &kctx->csf.pending_submission_work);
762 }
763 
764 /**
765  * pending_submission_worker() - Work item to process pending kicked GPU command queues.
766  *
767  * @work: Pointer to pending_submission_work.
768  *
769  * This function starts all pending queues, for which the work
770  * was previously submitted via ioctl call from application thread.
771  * If the queue is already scheduled and resident, it will be started
772  * right away, otherwise once the group is made resident.
773  */
pending_submission_worker(struct work_struct * work)774 static void pending_submission_worker(struct work_struct *work)
775 {
776 	struct kbase_context *kctx =
777 		container_of(work, struct kbase_context, csf.pending_submission_work);
778 	struct kbase_device *kbdev = kctx->kbdev;
779 	struct kbase_queue *queue;
780 	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
781 
782 	if (err) {
783 		dev_err(kbdev->dev, "Unsuccessful GPU reset detected when kicking queue ");
784 		return;
785 	}
786 
787 	mutex_lock(&kctx->csf.lock);
788 
789 	/* Iterate through the queue list and schedule the pending ones for submission. */
790 	list_for_each_entry(queue, &kctx->csf.queue_list, link) {
791 		if (atomic_cmpxchg(&queue->pending, 1, 0) == 1) {
792 			struct kbase_queue_group *group = get_bound_queue_group(queue);
793 			int ret;
794 
795 			if (!group || queue->bind_state != KBASE_CSF_QUEUE_BOUND) {
796 				dev_dbg(kbdev->dev, "queue is not bound to a group");
797 				continue;
798 			}
799 
800 			ret = kbase_csf_scheduler_queue_start(queue);
801 			if (unlikely(ret)) {
802 				dev_dbg(kbdev->dev, "Failed to start queue");
803 				if (ret == -EBUSY) {
804 					atomic_cmpxchg(&queue->pending, 0, 1);
805 					enqueue_gpu_submission_work(kctx);
806 				}
807 			}
808 		}
809 	}
810 
811 	mutex_unlock(&kctx->csf.lock);
812 
813 	kbase_reset_gpu_allow(kbdev);
814 }
815 
kbase_csf_ring_csg_doorbell(struct kbase_device * kbdev,int slot)816 void kbase_csf_ring_csg_doorbell(struct kbase_device *kbdev, int slot)
817 {
818 	if (WARN_ON(slot < 0))
819 		return;
820 
821 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
822 
823 	kbase_csf_ring_csg_slots_doorbell(kbdev, (u32) (1 << slot));
824 }
825 
kbase_csf_ring_csg_slots_doorbell(struct kbase_device * kbdev,u32 slot_bitmap)826 void kbase_csf_ring_csg_slots_doorbell(struct kbase_device *kbdev,
827 				       u32 slot_bitmap)
828 {
829 	const struct kbase_csf_global_iface *const global_iface =
830 		&kbdev->csf.global_iface;
831 	const u32 allowed_bitmap =
832 		(u32) ((1U << kbdev->csf.global_iface.group_num) - 1);
833 	u32 value;
834 
835 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
836 
837 	if (WARN_ON(slot_bitmap > allowed_bitmap))
838 		return;
839 
840 	/* The access to GLB_DB_REQ/ACK needs to be ordered with respect to CSG_REQ/ACK and
841 	 * CSG_DB_REQ/ACK to avoid a scenario where a CSI request overlaps with a CSG request
842 	 * or 2 CSI requests overlap and FW ends up missing the 2nd request.
843 	 * Memory barrier is required, both on Host and FW side, to guarantee the ordering.
844 	 *
845 	 * 'osh' is used as CPU and GPU would be in the same Outer shareable domain.
846 	 */
847 	dmb(osh);
848 
849 	value = kbase_csf_firmware_global_output(global_iface, GLB_DB_ACK);
850 	value ^= slot_bitmap;
851 	kbase_csf_firmware_global_input_mask(global_iface, GLB_DB_REQ, value,
852 					     slot_bitmap);
853 
854 	kbase_csf_ring_doorbell(kbdev, CSF_KERNEL_DOORBELL_NR);
855 }
856 
kbase_csf_ring_cs_user_doorbell(struct kbase_device * kbdev,struct kbase_queue * queue)857 void kbase_csf_ring_cs_user_doorbell(struct kbase_device *kbdev,
858 			struct kbase_queue *queue)
859 {
860 	mutex_lock(&kbdev->csf.reg_lock);
861 
862 	if (queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID)
863 		kbase_csf_ring_doorbell(kbdev, queue->doorbell_nr);
864 
865 	mutex_unlock(&kbdev->csf.reg_lock);
866 }
867 
kbase_csf_ring_cs_kernel_doorbell(struct kbase_device * kbdev,int csi_index,int csg_nr,bool ring_csg_doorbell)868 void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev,
869 				       int csi_index, int csg_nr,
870 				       bool ring_csg_doorbell)
871 {
872 	struct kbase_csf_cmd_stream_group_info *ginfo;
873 	u32 value;
874 
875 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
876 
877 	if (WARN_ON(csg_nr < 0) ||
878 	    WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
879 		return;
880 
881 	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
882 
883 	if (WARN_ON(csi_index < 0) ||
884 	    WARN_ON(csi_index >= ginfo->stream_num))
885 		return;
886 
887 	/* The access to CSG_DB_REQ/ACK needs to be ordered with respect to
888 	 * CS_REQ/ACK to avoid a scenario where CSG_DB_REQ/ACK becomes visibile to
889 	 * FW before CS_REQ/ACK is set.
890 	 *
891 	 * 'osh' is used as CPU and GPU would be in the same outer shareable domain.
892 	 */
893 	dmb(osh);
894 
895 	value = kbase_csf_firmware_csg_output(ginfo, CSG_DB_ACK);
896 	value ^= (1 << csi_index);
897 	kbase_csf_firmware_csg_input_mask(ginfo, CSG_DB_REQ, value,
898 					  1 << csi_index);
899 
900 	if (likely(ring_csg_doorbell))
901 		kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
902 }
903 
kbase_csf_queue_kick(struct kbase_context * kctx,struct kbase_ioctl_cs_queue_kick * kick)904 int kbase_csf_queue_kick(struct kbase_context *kctx,
905 			 struct kbase_ioctl_cs_queue_kick *kick)
906 {
907 	struct kbase_device *kbdev = kctx->kbdev;
908 	bool trigger_submission = false;
909 	struct kbase_va_region *region;
910 	int err = 0;
911 
912 	KBASE_TLSTREAM_TL_KBASE_GPUCMDQUEUE_KICK(kbdev, kctx->id, kick->buffer_gpu_addr);
913 
914 	/* GPU work submission happening asynchronously to prevent the contention with
915 	 * scheduler lock and as the result blocking application thread. For this reason,
916 	 * the vm_lock is used here to get the reference to the queue based on its buffer_gpu_addr
917 	 * from the context list of active va_regions.
918 	 * Once the target queue is found the pending flag is set to one atomically avoiding
919 	 * a race between submission ioctl thread and the work item.
920 	 */
921 	kbase_gpu_vm_lock(kctx);
922 	region = kbase_region_tracker_find_region_enclosing_address(kctx, kick->buffer_gpu_addr);
923 	if (!kbase_is_region_invalid_or_free(region)) {
924 		struct kbase_queue *queue = region->user_data;
925 
926 		if (queue) {
927 			atomic_cmpxchg(&queue->pending, 0, 1);
928 			trigger_submission = true;
929 		}
930 	} else {
931 		dev_dbg(kbdev->dev,
932 			"Attempt to kick GPU queue without a valid command buffer region");
933 		err = -EFAULT;
934 	}
935 	kbase_gpu_vm_unlock(kctx);
936 
937 	if (likely(trigger_submission))
938 		enqueue_gpu_submission_work(kctx);
939 
940 	return err;
941 }
942 
unbind_stopped_queue(struct kbase_context * kctx,struct kbase_queue * queue)943 static void unbind_stopped_queue(struct kbase_context *kctx,
944 			struct kbase_queue *queue)
945 {
946 	lockdep_assert_held(&kctx->csf.lock);
947 
948 	if (WARN_ON(queue->csi_index < 0))
949 		return;
950 
951 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
952 		unsigned long flags;
953 
954 		kbase_csf_scheduler_spin_lock(kctx->kbdev, &flags);
955 		bitmap_clear(queue->group->protm_pending_bitmap,
956 				queue->csi_index, 1);
957 		KBASE_KTRACE_ADD_CSF_GRP_Q(kctx->kbdev, CSI_PROTM_PEND_CLEAR,
958 			 queue->group, queue, queue->group->protm_pending_bitmap[0]);
959 		queue->group->bound_queues[queue->csi_index] = NULL;
960 		queue->group = NULL;
961 		kbase_csf_scheduler_spin_unlock(kctx->kbdev, flags);
962 
963 		put_user_pages_mmap_handle(kctx, queue);
964 		WARN_ON_ONCE(queue->doorbell_nr != KBASEP_USER_DB_NR_INVALID);
965 		queue->bind_state = KBASE_CSF_QUEUE_UNBOUND;
966 	}
967 }
968 /**
969  * unbind_queue() - Remove the linkage between a GPU command queue and the group
970  *		    to which it was bound or being bound.
971  *
972  * @kctx:	Address of the kbase context within which the queue was created.
973  * @queue:	Pointer to the queue to be unlinked.
974  *
975  * This function will also send the stop request to firmware for the CS
976  * if the group to which the GPU command queue was bound is scheduled.
977  *
978  * This function would be called when :-
979  * - queue is being unbound. This would happen when the IO mapping
980  *   created on bind is removed explicitly by userspace or the process
981  *   is getting exited.
982  * - queue group is being terminated which still has queues bound
983  *   to it. This could happen on an explicit terminate request from userspace
984  *   or when the kbase context is being terminated.
985  * - queue is being terminated without completing the bind operation.
986  *   This could happen if either the queue group is terminated
987  *   after the CS_QUEUE_BIND ioctl but before the 2nd part of bind operation
988  *   to create the IO mapping is initiated.
989  * - There is a failure in executing the 2nd part of bind operation, inside the
990  *   mmap handler, which creates the IO mapping for queue.
991  */
992 
unbind_queue(struct kbase_context * kctx,struct kbase_queue * queue)993 static void unbind_queue(struct kbase_context *kctx, struct kbase_queue *queue)
994 {
995 	kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
996 	lockdep_assert_held(&kctx->csf.lock);
997 
998 	if (queue->bind_state != KBASE_CSF_QUEUE_UNBOUND) {
999 		if (queue->bind_state == KBASE_CSF_QUEUE_BOUND)
1000 			kbase_csf_scheduler_queue_stop(queue);
1001 
1002 		unbind_stopped_queue(kctx, queue);
1003 	}
1004 }
1005 
kbase_csf_queue_phys_allocated(struct kbase_queue * queue)1006 static bool kbase_csf_queue_phys_allocated(struct kbase_queue *queue)
1007 {
1008 	/* The queue's phys are zeroed when allocation fails. Both of them being
1009 	 * zero is an impossible condition for a successful allocated set of phy pages.
1010 	 */
1011 
1012 	return (queue->phys[0].tagged_addr | queue->phys[1].tagged_addr);
1013 }
1014 
kbase_csf_queue_unbind(struct kbase_queue * queue,bool process_exit)1015 void kbase_csf_queue_unbind(struct kbase_queue *queue, bool process_exit)
1016 {
1017 	struct kbase_context *kctx = queue->kctx;
1018 
1019 	lockdep_assert_held(&kctx->csf.lock);
1020 
1021 	/* As the process itself is exiting, the termination of queue group can
1022 	 * be done which would be much faster than stopping of individual
1023 	 * queues. This would ensure a faster exit for the process especially
1024 	 * in the case where CSI gets stuck.
1025 	 * The CSI STOP request will wait for the in flight work to drain
1026 	 * whereas CSG TERM request would result in an immediate abort or
1027 	 * cancellation of the pending work.
1028 	 */
1029 	if (process_exit) {
1030 		struct kbase_queue_group *group = get_bound_queue_group(queue);
1031 
1032 		if (group)
1033 			term_queue_group(group);
1034 
1035 		WARN_ON(queue->bind_state != KBASE_CSF_QUEUE_UNBOUND);
1036 	} else {
1037 		unbind_queue(kctx, queue);
1038 	}
1039 
1040 	/* Free the resources, if allocated phys for this queue */
1041 	if (kbase_csf_queue_phys_allocated(queue))
1042 		kbase_csf_free_command_stream_user_pages(kctx, queue);
1043 }
1044 
kbase_csf_queue_unbind_stopped(struct kbase_queue * queue)1045 void kbase_csf_queue_unbind_stopped(struct kbase_queue *queue)
1046 {
1047 	struct kbase_context *kctx = queue->kctx;
1048 
1049 	lockdep_assert_held(&kctx->csf.lock);
1050 
1051 	WARN_ON(queue->bind_state == KBASE_CSF_QUEUE_BOUND);
1052 	unbind_stopped_queue(kctx, queue);
1053 
1054 	/* Free the resources, if allocated phys for this queue */
1055 	if (kbase_csf_queue_phys_allocated(queue))
1056 		kbase_csf_free_command_stream_user_pages(kctx, queue);
1057 }
1058 
1059 /**
1060  * find_free_group_handle() - Find a free handle for a queue group
1061  *
1062  * @kctx: Address of the kbase context within which the queue group
1063  *        is to be created.
1064  *
1065  * Return: a queue group handle on success, or a negative error code on failure.
1066  */
find_free_group_handle(struct kbase_context * const kctx)1067 static int find_free_group_handle(struct kbase_context *const kctx)
1068 {
1069 	/* find the available index in the array of CSGs per this context */
1070 	int idx, group_handle = -ENOMEM;
1071 
1072 	lockdep_assert_held(&kctx->csf.lock);
1073 
1074 	for (idx = 0;
1075 		(idx != MAX_QUEUE_GROUP_NUM) && (group_handle < 0);
1076 		idx++) {
1077 		if (!kctx->csf.queue_groups[idx])
1078 			group_handle = idx;
1079 	}
1080 
1081 	return group_handle;
1082 }
1083 
1084 /**
1085  * iface_has_enough_streams() - Check that at least one CSG supports
1086  *                              a given number of CS
1087  *
1088  * @kbdev:  Instance of a GPU platform device that implements a CSF interface.
1089  * @cs_min: Minimum number of CSs required.
1090  *
1091  * Return: true if at least one CSG supports the given number
1092  *         of CSs (or more); otherwise false.
1093  */
iface_has_enough_streams(struct kbase_device * const kbdev,u32 const cs_min)1094 static bool iface_has_enough_streams(struct kbase_device *const kbdev,
1095 	u32 const cs_min)
1096 {
1097 	bool has_enough = false;
1098 	struct kbase_csf_cmd_stream_group_info *const groups =
1099 		kbdev->csf.global_iface.groups;
1100 	const u32 group_num = kbdev->csf.global_iface.group_num;
1101 	u32 i;
1102 
1103 	for (i = 0; (i < group_num) && !has_enough; i++) {
1104 		if (groups[i].stream_num >= cs_min)
1105 			has_enough = true;
1106 	}
1107 
1108 	return has_enough;
1109 }
1110 
1111 /**
1112  * create_normal_suspend_buffer() - Create normal-mode suspend buffer per
1113  *					queue group
1114  *
1115  * @kctx:	Pointer to kbase context where the queue group is created at
1116  * @s_buf:	Pointer to suspend buffer that is attached to queue group
1117  *
1118  * Return: 0 if phy-pages for the suspend buffer is successfully allocated.
1119  *	   Otherwise -ENOMEM or error code.
1120  */
create_normal_suspend_buffer(struct kbase_context * const kctx,struct kbase_normal_suspend_buffer * s_buf)1121 static int create_normal_suspend_buffer(struct kbase_context *const kctx,
1122 		struct kbase_normal_suspend_buffer *s_buf)
1123 {
1124 	const size_t nr_pages =
1125 		PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1126 	int err;
1127 
1128 	lockdep_assert_held(&kctx->csf.lock);
1129 
1130 	/* The suspend buffer's mapping address is valid only when the CSG is to
1131 	 * run on slot, initializing it 0, signalling the buffer is not mapped.
1132 	 */
1133 	s_buf->gpu_va = 0;
1134 
1135 	s_buf->phy = kcalloc(nr_pages, sizeof(*s_buf->phy), GFP_KERNEL);
1136 
1137 	if (!s_buf->phy)
1138 		return -ENOMEM;
1139 
1140 	/* Get physical page for a normal suspend buffer */
1141 	err = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1142 					 &s_buf->phy[0], false, kctx->task);
1143 
1144 	if (err < 0) {
1145 		kfree(s_buf->phy);
1146 		return err;
1147 	}
1148 
1149 	kbase_process_page_usage_inc(kctx, nr_pages);
1150 	return 0;
1151 }
1152 
1153 static void timer_event_worker(struct work_struct *data);
1154 static void protm_event_worker(struct work_struct *data);
1155 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
1156 		struct kbase_normal_suspend_buffer *s_buf);
1157 
1158 /**
1159  * create_suspend_buffers - Setup normal and protected mode
1160  *				suspend buffers.
1161  *
1162  * @kctx:	Address of the kbase context within which the queue group
1163  *		is to be created.
1164  * @group:	Pointer to GPU command queue group data.
1165  *
1166  * Return: 0 if suspend buffers are successfully allocated. Otherwise -ENOMEM.
1167  */
create_suspend_buffers(struct kbase_context * const kctx,struct kbase_queue_group * const group)1168 static int create_suspend_buffers(struct kbase_context *const kctx,
1169 		struct kbase_queue_group * const group)
1170 {
1171 	if (create_normal_suspend_buffer(kctx, &group->normal_suspend_buf)) {
1172 		dev_err(kctx->kbdev->dev, "Failed to create normal suspend buffer\n");
1173 		return -ENOMEM;
1174 	}
1175 
1176 	/* Protected suspend buffer, runtime binding so just initialize it */
1177 	group->protected_suspend_buf.gpu_va = 0;
1178 	group->protected_suspend_buf.pma = NULL;
1179 	group->protected_suspend_buf.alloc_retries = 0;
1180 
1181 	return 0;
1182 }
1183 
1184 /**
1185  * generate_group_uid() - Makes an ID unique to all kernel base devices
1186  *                        and contexts, for a queue group and CSG.
1187  *
1188  * Return:      A unique ID in the form of an unsigned 32-bit integer
1189  */
generate_group_uid(void)1190 static u32 generate_group_uid(void)
1191 {
1192 	static atomic_t global_csg_uid = ATOMIC_INIT(0);
1193 
1194 	return (u32)atomic_inc_return(&global_csg_uid);
1195 }
1196 
1197 /**
1198  * create_queue_group() - Create a queue group
1199  *
1200  * @kctx:	Address of the kbase context within which the queue group
1201  *		is to be created.
1202  * @create:	Address of a structure which contains details of the
1203  *		queue group which is to be created.
1204  *
1205  * Return: a queue group handle on success, or a negative error code on failure.
1206  */
create_queue_group(struct kbase_context * const kctx,union kbase_ioctl_cs_queue_group_create * const create)1207 static int create_queue_group(struct kbase_context *const kctx,
1208 	union kbase_ioctl_cs_queue_group_create *const create)
1209 {
1210 	int group_handle = find_free_group_handle(kctx);
1211 
1212 	if (group_handle < 0) {
1213 		dev_dbg(kctx->kbdev->dev,
1214 			"All queue group handles are already in use");
1215 	} else {
1216 		struct kbase_queue_group * const group =
1217 			kmalloc(sizeof(struct kbase_queue_group),
1218 					GFP_KERNEL);
1219 
1220 		lockdep_assert_held(&kctx->csf.lock);
1221 
1222 		if (!group) {
1223 			dev_err(kctx->kbdev->dev, "Failed to allocate a queue\n");
1224 			group_handle = -ENOMEM;
1225 		} else {
1226 			int err = 0;
1227 
1228 			group->kctx = kctx;
1229 			group->handle = group_handle;
1230 			group->csg_nr = KBASEP_CSG_NR_INVALID;
1231 
1232 			group->tiler_mask = create->in.tiler_mask;
1233 			group->fragment_mask = create->in.fragment_mask;
1234 			group->compute_mask = create->in.compute_mask;
1235 
1236 			group->tiler_max = create->in.tiler_max;
1237 			group->fragment_max = create->in.fragment_max;
1238 			group->compute_max = create->in.compute_max;
1239 			group->csi_handlers = create->in.csi_handlers;
1240 			group->priority = kbase_csf_priority_queue_group_priority_to_relative(
1241 				kbase_csf_priority_check(kctx->kbdev, create->in.priority));
1242 			group->doorbell_nr = KBASEP_USER_DB_NR_INVALID;
1243 			group->faulted = false;
1244 			group->cs_unrecoverable = false;
1245 			group->reevaluate_idle_status = false;
1246 
1247 			group->csg_reg = NULL;
1248 			group->csg_reg_bind_retries = 0;
1249 
1250 			group->dvs_buf = create->in.dvs_buf;
1251 
1252 #if IS_ENABLED(CONFIG_DEBUG_FS)
1253 			group->deschedule_deferred_cnt = 0;
1254 #endif
1255 
1256 			group->group_uid = generate_group_uid();
1257 			create->out.group_uid = group->group_uid;
1258 
1259 			INIT_LIST_HEAD(&group->link);
1260 			INIT_LIST_HEAD(&group->link_to_schedule);
1261 			INIT_LIST_HEAD(&group->error_fatal.link);
1262 			INIT_LIST_HEAD(&group->error_timeout.link);
1263 			INIT_LIST_HEAD(&group->error_tiler_oom.link);
1264 			INIT_WORK(&group->timer_event_work, timer_event_worker);
1265 			INIT_WORK(&group->protm_event_work, protm_event_worker);
1266 			bitmap_zero(group->protm_pending_bitmap,
1267 					MAX_SUPPORTED_STREAMS_PER_GROUP);
1268 
1269 			group->run_state = KBASE_CSF_GROUP_INACTIVE;
1270 			KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_INACTIVE, group,
1271 						group->run_state);
1272 
1273 			err = create_suspend_buffers(kctx, group);
1274 
1275 			if (err < 0) {
1276 				kfree(group);
1277 				group_handle = err;
1278 			} else {
1279 				int j;
1280 
1281 				kctx->csf.queue_groups[group_handle] = group;
1282 				for (j = 0; j < MAX_SUPPORTED_STREAMS_PER_GROUP;
1283 						j++)
1284 					group->bound_queues[j] = NULL;
1285 			}
1286 		}
1287 	}
1288 
1289 	return group_handle;
1290 }
1291 
dvs_supported(u32 csf_version)1292 static bool dvs_supported(u32 csf_version)
1293 {
1294 	if (GLB_VERSION_MAJOR_GET(csf_version) < 3)
1295 		return false;
1296 
1297 	if (GLB_VERSION_MAJOR_GET(csf_version) == 3)
1298 		if (GLB_VERSION_MINOR_GET(csf_version) < 2)
1299 			return false;
1300 
1301 	return true;
1302 }
1303 
kbase_csf_queue_group_create(struct kbase_context * const kctx,union kbase_ioctl_cs_queue_group_create * const create)1304 int kbase_csf_queue_group_create(struct kbase_context *const kctx,
1305 			union kbase_ioctl_cs_queue_group_create *const create)
1306 {
1307 	int err = 0;
1308 	const u32 tiler_count = hweight64(create->in.tiler_mask);
1309 	const u32 fragment_count = hweight64(create->in.fragment_mask);
1310 	const u32 compute_count = hweight64(create->in.compute_mask);
1311 	size_t i;
1312 
1313 	for (i = 0; i < sizeof(create->in.padding); i++) {
1314 		if (create->in.padding[i] != 0) {
1315 			dev_warn(kctx->kbdev->dev, "Invalid padding not 0 in queue group create\n");
1316 			return -EINVAL;
1317 		}
1318 	}
1319 
1320 	mutex_lock(&kctx->csf.lock);
1321 
1322 	if ((create->in.tiler_max > tiler_count) ||
1323 	    (create->in.fragment_max > fragment_count) ||
1324 	    (create->in.compute_max > compute_count)) {
1325 		dev_dbg(kctx->kbdev->dev,
1326 			"Invalid maximum number of endpoints for a queue group");
1327 		err = -EINVAL;
1328 	} else if (create->in.priority >= BASE_QUEUE_GROUP_PRIORITY_COUNT) {
1329 		dev_dbg(kctx->kbdev->dev, "Invalid queue group priority %u",
1330 			(unsigned int)create->in.priority);
1331 		err = -EINVAL;
1332 	} else if (!iface_has_enough_streams(kctx->kbdev, create->in.cs_min)) {
1333 		dev_dbg(kctx->kbdev->dev,
1334 			"No CSG has at least %d CSs",
1335 			create->in.cs_min);
1336 		err = -EINVAL;
1337 	} else if (create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK) {
1338 		dev_warn(kctx->kbdev->dev, "Unknown exception handler flags set: %u",
1339 			 create->in.csi_handlers & ~BASE_CSF_EXCEPTION_HANDLER_FLAGS_MASK);
1340 		err = -EINVAL;
1341 	} else if (!dvs_supported(kctx->kbdev->csf.global_iface.version) &&
1342 		   create->in.dvs_buf) {
1343 		dev_warn(
1344 			kctx->kbdev->dev,
1345 			"GPU does not support DVS but userspace is trying to use it");
1346 		err = -EINVAL;
1347 	} else if (dvs_supported(kctx->kbdev->csf.global_iface.version) &&
1348 		   !CSG_DVS_BUF_BUFFER_POINTER_GET(create->in.dvs_buf) &&
1349 		   CSG_DVS_BUF_BUFFER_SIZE_GET(create->in.dvs_buf)) {
1350 		dev_warn(kctx->kbdev->dev,
1351 			 "DVS buffer pointer is null but size is not 0");
1352 		err = -EINVAL;
1353 	} else {
1354 		/* For the CSG which satisfies the condition for having
1355 		 * the needed number of CSs, check whether it also conforms
1356 		 * with the requirements for at least one of its CSs having
1357 		 * the iterator of the needed type
1358 		 * (note: for CSF v1.0 all CSs in a CSG will have access to
1359 		 * the same iterators)
1360 		 */
1361 		const int group_handle = create_queue_group(kctx, create);
1362 
1363 		if (group_handle >= 0)
1364 			create->out.group_handle = group_handle;
1365 		else
1366 			err = group_handle;
1367 	}
1368 
1369 	mutex_unlock(&kctx->csf.lock);
1370 
1371 	return err;
1372 }
1373 
1374 /**
1375  * term_normal_suspend_buffer() - Free normal-mode suspend buffer of queue group
1376  *
1377  * @kctx:	Pointer to kbase context where queue group belongs to
1378  * @s_buf:	Pointer to queue group suspend buffer to be freed
1379  */
term_normal_suspend_buffer(struct kbase_context * const kctx,struct kbase_normal_suspend_buffer * s_buf)1380 static void term_normal_suspend_buffer(struct kbase_context *const kctx,
1381 				       struct kbase_normal_suspend_buffer *s_buf)
1382 {
1383 	const size_t nr_pages = PFN_UP(kctx->kbdev->csf.global_iface.groups[0].suspend_size);
1384 
1385 	lockdep_assert_held(&kctx->csf.lock);
1386 
1387 	/* The group should not have a bind remaining on any suspend buf region */
1388 	WARN_ONCE(s_buf->gpu_va, "Suspend buffer address should be 0 at termination");
1389 
1390 	kbase_mem_pool_free_pages(&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], nr_pages,
1391 				  &s_buf->phy[0], false, false);
1392 	kbase_process_page_usage_dec(kctx, nr_pages);
1393 
1394 	kfree(s_buf->phy);
1395 	s_buf->phy = NULL;
1396 }
1397 
1398 /**
1399  * term_protected_suspend_buffer() - Free protected-mode suspend buffer of
1400  *					queue group
1401  *
1402  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
1403  * @sbuf: Pointer to queue group suspend buffer to be freed
1404  */
term_protected_suspend_buffer(struct kbase_device * const kbdev,struct kbase_protected_suspend_buffer * sbuf)1405 static void term_protected_suspend_buffer(struct kbase_device *const kbdev,
1406 					  struct kbase_protected_suspend_buffer *sbuf)
1407 {
1408 	WARN_ONCE(sbuf->gpu_va, "Suspend buf should have been unmapped inside scheduler!");
1409 	if (sbuf->pma) {
1410 		const size_t nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
1411 		kbase_csf_protected_memory_free(kbdev, sbuf->pma, nr_pages, true);
1412 		sbuf->pma = NULL;
1413 	}
1414 }
1415 
kbase_csf_term_descheduled_queue_group(struct kbase_queue_group * group)1416 void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
1417 {
1418 	struct kbase_context *kctx = group->kctx;
1419 
1420 	/* Currently each group supports the same number of CS */
1421 	u32 max_streams =
1422 		kctx->kbdev->csf.global_iface.groups[0].stream_num;
1423 	u32 i;
1424 
1425 	lockdep_assert_held(&kctx->csf.lock);
1426 
1427 	WARN_ON(group->run_state != KBASE_CSF_GROUP_INACTIVE &&
1428 		group->run_state != KBASE_CSF_GROUP_FAULT_EVICTED);
1429 
1430 	for (i = 0; i < max_streams; i++) {
1431 		struct kbase_queue *queue =
1432 				group->bound_queues[i];
1433 
1434 		/* The group is already being evicted from the scheduler */
1435 		if (queue)
1436 			unbind_stopped_queue(kctx, queue);
1437 	}
1438 
1439 	term_normal_suspend_buffer(kctx, &group->normal_suspend_buf);
1440 	if (kctx->kbdev->csf.pma_dev)
1441 		term_protected_suspend_buffer(kctx->kbdev,
1442 			&group->protected_suspend_buf);
1443 
1444 	group->run_state = KBASE_CSF_GROUP_TERMINATED;
1445 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, CSF_GROUP_TERMINATED, group, group->run_state);
1446 }
1447 
1448 /**
1449  * term_queue_group - Terminate a GPU command queue group.
1450  *
1451  * @group: Pointer to GPU command queue group data.
1452  *
1453  * Terminates a GPU command queue group. From the userspace perspective the
1454  * group will still exist but it can't bind new queues to it. Userspace can
1455  * still add work in queues bound to the group but it won't be executed. (This
1456  * is because the IO mapping created upon binding such queues is still intact.)
1457  */
term_queue_group(struct kbase_queue_group * group)1458 static void term_queue_group(struct kbase_queue_group *group)
1459 {
1460 	struct kbase_context *kctx = group->kctx;
1461 
1462 	kbase_reset_gpu_assert_failed_or_prevented(kctx->kbdev);
1463 	lockdep_assert_held(&kctx->csf.lock);
1464 
1465 	/* Stop the group and evict it from the scheduler */
1466 	kbase_csf_scheduler_group_deschedule(group);
1467 
1468 	if (group->run_state == KBASE_CSF_GROUP_TERMINATED)
1469 		return;
1470 
1471 	dev_dbg(kctx->kbdev->dev, "group %d terminating", group->handle);
1472 
1473 	kbase_csf_term_descheduled_queue_group(group);
1474 }
1475 
1476 /**
1477  * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
1478  *         become 0 that was taken when the group deschedule had to be deferred.
1479  *
1480  * @group: Pointer to GPU command queue group that is being deleted.
1481  *
1482  * This function is called when Userspace deletes the group and after the group
1483  * has been descheduled. The function synchronizes with the other threads that were
1484  * also trying to deschedule the group whilst the dumping was going on for a fault.
1485  * Please refer the documentation of wait_for_dump_complete_on_group_deschedule()
1486  * for more details.
1487  */
wait_group_deferred_deschedule_completion(struct kbase_queue_group * group)1488 static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *group)
1489 {
1490 #if IS_ENABLED(CONFIG_DEBUG_FS)
1491 	struct kbase_context *kctx = group->kctx;
1492 
1493 	lockdep_assert_held(&kctx->csf.lock);
1494 
1495 	if (likely(!group->deschedule_deferred_cnt))
1496 		return;
1497 
1498 	mutex_unlock(&kctx->csf.lock);
1499 	wait_event(kctx->kbdev->csf.event_wait, !group->deschedule_deferred_cnt);
1500 	mutex_lock(&kctx->csf.lock);
1501 #endif
1502 }
1503 
cancel_queue_group_events(struct kbase_queue_group * group)1504 static void cancel_queue_group_events(struct kbase_queue_group *group)
1505 {
1506 	cancel_work_sync(&group->timer_event_work);
1507 	cancel_work_sync(&group->protm_event_work);
1508 }
1509 
remove_pending_group_fatal_error(struct kbase_queue_group * group)1510 static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
1511 {
1512 	struct kbase_context *kctx = group->kctx;
1513 
1514 	dev_dbg(kctx->kbdev->dev,
1515 		"Remove any pending group fatal error from context %pK\n",
1516 		(void *)group->kctx);
1517 
1518 	kbase_csf_event_remove_error(kctx, &group->error_tiler_oom);
1519 	kbase_csf_event_remove_error(kctx, &group->error_timeout);
1520 	kbase_csf_event_remove_error(kctx, &group->error_fatal);
1521 }
1522 
kbase_csf_queue_group_terminate(struct kbase_context * kctx,u8 group_handle)1523 void kbase_csf_queue_group_terminate(struct kbase_context *kctx,
1524 				     u8 group_handle)
1525 {
1526 	struct kbase_queue_group *group;
1527 	int err;
1528 	bool reset_prevented = false;
1529 	struct kbase_device *const kbdev = kctx->kbdev;
1530 
1531 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
1532 	if (err)
1533 		dev_warn(
1534 			kbdev->dev,
1535 			"Unsuccessful GPU reset detected when terminating group %d, attempting to terminate regardless",
1536 			group_handle);
1537 	else
1538 		reset_prevented = true;
1539 
1540 	mutex_lock(&kctx->csf.lock);
1541 
1542 	group = find_queue_group(kctx, group_handle);
1543 
1544 	if (group) {
1545 		kctx->csf.queue_groups[group_handle] = NULL;
1546 		/* Stop the running of the given group */
1547 		term_queue_group(group);
1548 		mutex_unlock(&kctx->csf.lock);
1549 
1550 		if (reset_prevented) {
1551 			/* Allow GPU reset before cancelling the group specific
1552 			 * work item to avoid potential deadlock.
1553 			 * Reset prevention isn't needed after group termination.
1554 			 */
1555 			kbase_reset_gpu_allow(kbdev);
1556 			reset_prevented = false;
1557 		}
1558 
1559 		/* Cancel any pending event callbacks. If one is in progress
1560 		 * then this thread waits synchronously for it to complete (which
1561 		 * is why we must unlock the context first). We already ensured
1562 		 * that no more callbacks can be enqueued by terminating the group.
1563 		 */
1564 		cancel_queue_group_events(group);
1565 
1566 		mutex_lock(&kctx->csf.lock);
1567 
1568 		/* Clean up after the termination */
1569 		remove_pending_group_fatal_error(group);
1570 
1571 		wait_group_deferred_deschedule_completion(group);
1572 	}
1573 
1574 	mutex_unlock(&kctx->csf.lock);
1575 	if (reset_prevented)
1576 		kbase_reset_gpu_allow(kbdev);
1577 
1578 	kfree(group);
1579 }
1580 KBASE_EXPORT_TEST_API(kbase_csf_queue_group_terminate);
1581 
1582 #if IS_ENABLED(CONFIG_MALI_VECTOR_DUMP) || MALI_UNIT_TEST
kbase_csf_queue_group_suspend(struct kbase_context * kctx,struct kbase_suspend_copy_buffer * sus_buf,u8 group_handle)1583 int kbase_csf_queue_group_suspend(struct kbase_context *kctx,
1584 				  struct kbase_suspend_copy_buffer *sus_buf,
1585 				  u8 group_handle)
1586 {
1587 	struct kbase_device *const kbdev = kctx->kbdev;
1588 	int err;
1589 	struct kbase_queue_group *group;
1590 
1591 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
1592 	if (err) {
1593 		dev_warn(
1594 			kbdev->dev,
1595 			"Unsuccessful GPU reset detected when suspending group %d",
1596 			group_handle);
1597 		return err;
1598 	}
1599 	mutex_lock(&kctx->csf.lock);
1600 
1601 	group = find_queue_group(kctx, group_handle);
1602 	if (group)
1603 		err = kbase_csf_scheduler_group_copy_suspend_buf(group,
1604 								 sus_buf);
1605 	else
1606 		err = -EINVAL;
1607 
1608 	mutex_unlock(&kctx->csf.lock);
1609 	kbase_reset_gpu_allow(kbdev);
1610 
1611 	return err;
1612 }
1613 #endif
1614 
kbase_csf_add_group_fatal_error(struct kbase_queue_group * const group,struct base_gpu_queue_group_error const * const err_payload)1615 void kbase_csf_add_group_fatal_error(
1616 	struct kbase_queue_group *const group,
1617 	struct base_gpu_queue_group_error const *const err_payload)
1618 {
1619 	struct base_csf_notification error;
1620 
1621 	if (WARN_ON(!group))
1622 		return;
1623 
1624 	if (WARN_ON(!err_payload))
1625 		return;
1626 
1627 	error = (struct base_csf_notification) {
1628 		.type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
1629 		.payload = {
1630 			.csg_error = {
1631 				.handle = group->handle,
1632 				.error = *err_payload
1633 			}
1634 		}
1635 	};
1636 
1637 	kbase_csf_event_add_error(group->kctx, &group->error_fatal, &error);
1638 }
1639 
kbase_csf_active_queue_groups_reset(struct kbase_device * kbdev,struct kbase_context * kctx)1640 void kbase_csf_active_queue_groups_reset(struct kbase_device *kbdev,
1641 					 struct kbase_context *kctx)
1642 {
1643 	struct list_head evicted_groups;
1644 	struct kbase_queue_group *group;
1645 	int i;
1646 
1647 	INIT_LIST_HEAD(&evicted_groups);
1648 
1649 	mutex_lock(&kctx->csf.lock);
1650 
1651 	kbase_csf_scheduler_evict_ctx_slots(kbdev, kctx, &evicted_groups);
1652 	while (!list_empty(&evicted_groups)) {
1653 		group = list_first_entry(&evicted_groups,
1654 				struct kbase_queue_group, link);
1655 
1656 		dev_dbg(kbdev->dev, "Context %d_%d active group %d terminated",
1657 			    kctx->tgid, kctx->id, group->handle);
1658 		kbase_csf_term_descheduled_queue_group(group);
1659 		list_del_init(&group->link);
1660 	}
1661 
1662 	/* Acting on the queue groups that are pending to be terminated. */
1663 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1664 		group = kctx->csf.queue_groups[i];
1665 		if (group &&
1666 		    group->run_state == KBASE_CSF_GROUP_FAULT_EVICTED)
1667 			kbase_csf_term_descheduled_queue_group(group);
1668 	}
1669 
1670 	mutex_unlock(&kctx->csf.lock);
1671 }
1672 
kbase_csf_ctx_init(struct kbase_context * kctx)1673 int kbase_csf_ctx_init(struct kbase_context *kctx)
1674 {
1675 	int err = -ENOMEM;
1676 
1677 	INIT_LIST_HEAD(&kctx->csf.queue_list);
1678 	INIT_LIST_HEAD(&kctx->csf.link);
1679 
1680 	kbase_csf_event_init(kctx);
1681 
1682 	/* Mark all the cookies as 'free' */
1683 	bitmap_fill(kctx->csf.cookies, KBASE_CSF_NUM_USER_IO_PAGES_HANDLE);
1684 
1685 	kctx->csf.wq = alloc_workqueue("mali_kbase_csf_wq",
1686 					WQ_UNBOUND, 1);
1687 
1688 	if (likely(kctx->csf.wq)) {
1689 		err = kbase_csf_scheduler_context_init(kctx);
1690 
1691 		if (likely(!err)) {
1692 			err = kbase_csf_kcpu_queue_context_init(kctx);
1693 
1694 			if (likely(!err)) {
1695 				err = kbase_csf_tiler_heap_context_init(kctx);
1696 
1697 				if (likely(!err)) {
1698 					mutex_init(&kctx->csf.lock);
1699 					INIT_WORK(&kctx->csf.pending_submission_work,
1700 						  pending_submission_worker);
1701 
1702 					err = kbasep_ctx_user_reg_page_mapping_init(kctx);
1703 
1704 					if (unlikely(err))
1705 						kbase_csf_tiler_heap_context_term(kctx);
1706 				}
1707 
1708 				if (unlikely(err))
1709 					kbase_csf_kcpu_queue_context_term(kctx);
1710 			}
1711 
1712 			if (unlikely(err))
1713 				kbase_csf_scheduler_context_term(kctx);
1714 		}
1715 
1716 		if (unlikely(err))
1717 			destroy_workqueue(kctx->csf.wq);
1718 	}
1719 
1720 	return err;
1721 }
1722 
kbase_csf_ctx_handle_fault(struct kbase_context * kctx,struct kbase_fault * fault)1723 void kbase_csf_ctx_handle_fault(struct kbase_context *kctx,
1724 		struct kbase_fault *fault)
1725 {
1726 	int gr;
1727 	bool reported = false;
1728 	struct base_gpu_queue_group_error err_payload;
1729 	int err;
1730 	struct kbase_device *kbdev;
1731 
1732 	if (WARN_ON(!kctx))
1733 		return;
1734 
1735 	if (WARN_ON(!fault))
1736 		return;
1737 
1738 	kbdev = kctx->kbdev;
1739 	err = kbase_reset_gpu_try_prevent(kbdev);
1740 	/* Regardless of whether reset failed or is currently happening, exit
1741 	 * early
1742 	 */
1743 	if (err)
1744 		return;
1745 
1746 	err_payload = (struct base_gpu_queue_group_error) {
1747 		.error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
1748 		.payload = {
1749 			.fatal_group = {
1750 				.sideband = fault->addr,
1751 				.status = fault->status,
1752 			}
1753 		}
1754 	};
1755 
1756 	mutex_lock(&kctx->csf.lock);
1757 
1758 	for (gr = 0; gr < MAX_QUEUE_GROUP_NUM; gr++) {
1759 		struct kbase_queue_group *const group =
1760 			kctx->csf.queue_groups[gr];
1761 
1762 		if (group && group->run_state != KBASE_CSF_GROUP_TERMINATED) {
1763 			term_queue_group(group);
1764 			kbase_csf_add_group_fatal_error(group, &err_payload);
1765 			reported = true;
1766 		}
1767 	}
1768 
1769 	mutex_unlock(&kctx->csf.lock);
1770 
1771 	if (reported)
1772 		kbase_event_wakeup(kctx);
1773 
1774 	kbase_reset_gpu_allow(kbdev);
1775 }
1776 
kbase_csf_ctx_term(struct kbase_context * kctx)1777 void kbase_csf_ctx_term(struct kbase_context *kctx)
1778 {
1779 	struct kbase_device *kbdev = kctx->kbdev;
1780 	struct kbase_as *as = NULL;
1781 	unsigned long flags;
1782 	u32 i;
1783 	int err;
1784 	bool reset_prevented = false;
1785 
1786 	/* As the kbase context is terminating, its debugfs sub-directory would
1787 	 * have been removed already and so would be the debugfs file created
1788 	 * for queue groups & kcpu queues, hence no need to explicitly remove
1789 	 * those debugfs files.
1790 	 */
1791 
1792 	/* Wait for a GPU reset if it is happening, prevent it if not happening */
1793 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
1794 	if (err)
1795 		dev_warn(
1796 			kbdev->dev,
1797 			"Unsuccessful GPU reset detected when terminating csf context (%d_%d), attempting to terminate regardless",
1798 			kctx->tgid, kctx->id);
1799 	else
1800 		reset_prevented = true;
1801 
1802 	mutex_lock(&kctx->csf.lock);
1803 
1804 	/* Iterate through the queue groups that were not terminated by
1805 	 * userspace and issue the term request to firmware for them.
1806 	 */
1807 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1808 		struct kbase_queue_group *group = kctx->csf.queue_groups[i];
1809 
1810 		if (group) {
1811 			remove_pending_group_fatal_error(group);
1812 			term_queue_group(group);
1813 		}
1814 	}
1815 	mutex_unlock(&kctx->csf.lock);
1816 
1817 	if (reset_prevented)
1818 		kbase_reset_gpu_allow(kbdev);
1819 
1820 	cancel_work_sync(&kctx->csf.pending_submission_work);
1821 
1822 	/* Now that all queue groups have been terminated, there can be no
1823 	 * more OoM or timer event interrupts but there can be inflight work
1824 	 * items. Destroying the wq will implicitly flush those work items.
1825 	 */
1826 	destroy_workqueue(kctx->csf.wq);
1827 
1828 	/* Wait for the firmware error work item to also finish as it could
1829 	 * be affecting this outgoing context also.
1830 	 */
1831 	flush_work(&kctx->kbdev->csf.fw_error_work);
1832 
1833 	/* A work item to handle page_fault/bus_fault/gpu_fault could be
1834 	 * pending for the outgoing context. Flush the workqueue that will
1835 	 * execute that work item.
1836 	 */
1837 	spin_lock_irqsave(&kctx->kbdev->hwaccess_lock, flags);
1838 	if (kctx->as_nr != KBASEP_AS_NR_INVALID)
1839 		as = &kctx->kbdev->as[kctx->as_nr];
1840 	spin_unlock_irqrestore(&kctx->kbdev->hwaccess_lock, flags);
1841 	if (as)
1842 		flush_workqueue(as->pf_wq);
1843 
1844 	mutex_lock(&kctx->csf.lock);
1845 
1846 	for (i = 0; i < MAX_QUEUE_GROUP_NUM; i++) {
1847 		kfree(kctx->csf.queue_groups[i]);
1848 		kctx->csf.queue_groups[i] = NULL;
1849 	}
1850 
1851 	/* Iterate through the queues that were not terminated by
1852 	 * userspace and do the required cleanup for them.
1853 	 */
1854 	while (!list_empty(&kctx->csf.queue_list)) {
1855 		struct kbase_queue *queue;
1856 
1857 		queue = list_first_entry(&kctx->csf.queue_list,
1858 						struct kbase_queue, link);
1859 
1860 		/* The reference held when the IO mapping was created on bind
1861 		 * would have been dropped otherwise the termination of Kbase
1862 		 * context itself wouldn't have kicked-in. So there shall be
1863 		 * only one reference left that was taken when queue was
1864 		 * registered.
1865 		 */
1866 		WARN_ON(kbase_refcount_read(&queue->refcount) != 1);
1867 		list_del_init(&queue->link);
1868 		release_queue(queue);
1869 	}
1870 
1871 	mutex_unlock(&kctx->csf.lock);
1872 
1873 	kbasep_ctx_user_reg_page_mapping_term(kctx);
1874 	kbase_csf_tiler_heap_context_term(kctx);
1875 	kbase_csf_kcpu_queue_context_term(kctx);
1876 	kbase_csf_scheduler_context_term(kctx);
1877 	kbase_csf_event_term(kctx);
1878 
1879 	mutex_destroy(&kctx->csf.lock);
1880 }
1881 
1882 /**
1883  * handle_oom_event - Handle the OoM event generated by the firmware for the
1884  *                    CSI.
1885  *
1886  * @group:  Pointer to the CSG group the oom-event belongs to.
1887  * @stream: Pointer to the structure containing info provided by the firmware
1888  *          about the CSI.
1889  *
1890  * This function will handle the OoM event request from the firmware for the
1891  * CS. It will retrieve the address of heap context and heap's
1892  * statistics (like number of render passes in-flight) from the CS's kernel
1893  * output page and pass them to the tiler heap function to allocate a
1894  * new chunk.
1895  * It will also update the CS's kernel input page with the address
1896  * of a new chunk that was allocated.
1897  *
1898  * Return: 0 if successfully handled the request, otherwise a negative error
1899  *         code on failure.
1900  */
handle_oom_event(struct kbase_queue_group * const group,struct kbase_csf_cmd_stream_info const * const stream)1901 static int handle_oom_event(struct kbase_queue_group *const group,
1902 			    struct kbase_csf_cmd_stream_info const *const stream)
1903 {
1904 	struct kbase_context *const kctx = group->kctx;
1905 	u64 gpu_heap_va =
1906 		kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_LO) |
1907 		((u64)kbase_csf_firmware_cs_output(stream, CS_HEAP_ADDRESS_HI) << 32);
1908 	const u32 vt_start =
1909 		kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_START);
1910 	const u32 vt_end =
1911 		kbase_csf_firmware_cs_output(stream, CS_HEAP_VT_END);
1912 	const u32 frag_end =
1913 		kbase_csf_firmware_cs_output(stream, CS_HEAP_FRAG_END);
1914 	u32 renderpasses_in_flight;
1915 	u32 pending_frag_count;
1916 	u64 new_chunk_ptr;
1917 	int err;
1918 	bool frag_end_err = false;
1919 
1920 	if ((frag_end > vt_end) || (vt_end >= vt_start)) {
1921 		frag_end_err = true;
1922 		dev_dbg(kctx->kbdev->dev, "Invalid Heap statistics provided by firmware: vt_start %d, vt_end %d, frag_end %d\n",
1923 			 vt_start, vt_end, frag_end);
1924 	}
1925 	if (frag_end_err) {
1926 		renderpasses_in_flight = 1;
1927 		pending_frag_count = 1;
1928 	} else {
1929 		renderpasses_in_flight = vt_start - frag_end;
1930 		pending_frag_count = vt_end - frag_end;
1931 	}
1932 
1933 	err = kbase_csf_tiler_heap_alloc_new_chunk(kctx,
1934 		gpu_heap_va, renderpasses_in_flight, pending_frag_count, &new_chunk_ptr);
1935 
1936 	if ((group->csi_handlers & BASE_CSF_TILER_OOM_EXCEPTION_FLAG) &&
1937 	    (pending_frag_count == 0) && (err == -ENOMEM || err == -EBUSY)) {
1938 		/* The group allows incremental rendering, trigger it */
1939 		new_chunk_ptr = 0;
1940 		dev_dbg(kctx->kbdev->dev, "Group-%d (slot-%d) enter incremental render\n",
1941 			group->handle, group->csg_nr);
1942 	} else if (err == -EBUSY) {
1943 		/* Acknowledge with a NULL chunk (firmware will then wait for
1944 		 * the fragment jobs to complete and release chunks)
1945 		 */
1946 		new_chunk_ptr = 0;
1947 	} else if (err)
1948 		return err;
1949 
1950 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_LO,
1951 				new_chunk_ptr & 0xFFFFFFFF);
1952 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_START_HI,
1953 				new_chunk_ptr >> 32);
1954 
1955 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_LO,
1956 				new_chunk_ptr & 0xFFFFFFFF);
1957 	kbase_csf_firmware_cs_input(stream, CS_TILER_HEAP_END_HI,
1958 				new_chunk_ptr >> 32);
1959 
1960 	return 0;
1961 }
1962 
1963 /**
1964  * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event
1965  *
1966  * @group: Pointer to the GPU command queue group that encountered the error
1967  */
report_tiler_oom_error(struct kbase_queue_group * group)1968 static void report_tiler_oom_error(struct kbase_queue_group *group)
1969 {
1970 	struct base_csf_notification const
1971 		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
1972 			  .payload = {
1973 				  .csg_error = {
1974 					  .handle = group->handle,
1975 					  .error = {
1976 						  .error_type =
1977 							  BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
1978 					  } } } };
1979 
1980 	kbase_csf_event_add_error(group->kctx,
1981 				  &group->error_tiler_oom,
1982 				  &error);
1983 	kbase_event_wakeup(group->kctx);
1984 }
1985 
flush_gpu_cache_on_fatal_error(struct kbase_device * kbdev)1986 static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
1987 {
1988 	int err;
1989 	const unsigned int cache_flush_wait_timeout_ms = 2000;
1990 
1991 	kbase_pm_lock(kbdev);
1992 	/* With the advent of partial cache flush, dirty cache lines could
1993 	 * be left in the GPU L2 caches by terminating the queue group here
1994 	 * without waiting for proper cache maintenance. A full cache flush
1995 	 * here will prevent these dirty cache lines from being arbitrarily
1996 	 * evicted later and possible causing memory corruption.
1997 	 */
1998 	if (kbdev->pm.backend.gpu_powered) {
1999 		kbase_gpu_start_cache_clean(kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
2000 		err = kbase_gpu_wait_cache_clean_timeout(kbdev, cache_flush_wait_timeout_ms);
2001 
2002 		if (err) {
2003 			dev_warn(
2004 				kbdev->dev,
2005 				"[%llu] Timeout waiting for cache clean to complete after fatal error",
2006 				kbase_backend_get_cycle_cnt(kbdev));
2007 
2008 			if (kbase_prepare_to_reset_gpu(kbdev, RESET_FLAGS_HWC_UNRECOVERABLE_ERROR))
2009 				kbase_reset_gpu(kbdev);
2010 		}
2011 	}
2012 
2013 	kbase_pm_unlock(kbdev);
2014 }
2015 
2016 /**
2017  * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
2018  *
2019  * @queue: Pointer to queue for which out-of-memory event was received.
2020  *
2021  * Called with the CSF locked for the affected GPU virtual address space.
2022  * Do not call in interrupt context.
2023  *
2024  * Handles tiler out-of-memory for a GPU command queue and then clears the
2025  * notification to allow the firmware to report out-of-memory again in future.
2026  * If the out-of-memory condition was successfully handled then this function
2027  * rings the relevant doorbell to notify the firmware; otherwise, it terminates
2028  * the GPU command queue group to which the queue is bound and notify a waiting
2029  * user space client of the failure.
2030  */
kbase_queue_oom_event(struct kbase_queue * const queue)2031 static void kbase_queue_oom_event(struct kbase_queue *const queue)
2032 {
2033 	struct kbase_context *const kctx = queue->kctx;
2034 	struct kbase_device *const kbdev = kctx->kbdev;
2035 	struct kbase_queue_group *group;
2036 	int slot_num, err;
2037 	struct kbase_csf_cmd_stream_group_info const *ginfo;
2038 	struct kbase_csf_cmd_stream_info const *stream;
2039 	int csi_index = queue->csi_index;
2040 	u32 cs_oom_ack, cs_oom_req;
2041 	unsigned long flags;
2042 
2043 	lockdep_assert_held(&kctx->csf.lock);
2044 
2045 	group = get_bound_queue_group(queue);
2046 	if (!group) {
2047 		dev_warn(kctx->kbdev->dev, "queue not bound\n");
2048 		return;
2049 	}
2050 
2051 	kbase_csf_scheduler_lock(kbdev);
2052 
2053 	slot_num = kbase_csf_scheduler_group_get_slot(group);
2054 
2055 	/* The group could have gone off slot before this work item got
2056 	 * a chance to execute.
2057 	 */
2058 	if (slot_num < 0)
2059 		goto unlock;
2060 
2061 	/* If the bound group is on slot yet the kctx is marked with disabled
2062 	 * on address-space fault, the group is pending to be killed. So skip
2063 	 * the inflight oom operation.
2064 	 */
2065 	if (kbase_ctx_flag(kctx, KCTX_AS_DISABLED_ON_FAULT))
2066 		goto unlock;
2067 
2068 	ginfo = &kbdev->csf.global_iface.groups[slot_num];
2069 	stream = &ginfo->streams[csi_index];
2070 	cs_oom_ack = kbase_csf_firmware_cs_output(stream, CS_ACK) &
2071 		     CS_ACK_TILER_OOM_MASK;
2072 	cs_oom_req = kbase_csf_firmware_cs_input_read(stream, CS_REQ) &
2073 		     CS_REQ_TILER_OOM_MASK;
2074 
2075 	/* The group could have already undergone suspend-resume cycle before
2076 	 * this work item got a chance to execute. On CSG resume the CS_ACK
2077 	 * register is set by firmware to reflect the CS_REQ register, which
2078 	 * implies that all events signaled before suspension are implicitly
2079 	 * acknowledged.
2080 	 * A new OoM event is expected to be generated after resume.
2081 	 */
2082 	if (cs_oom_ack == cs_oom_req)
2083 		goto unlock;
2084 
2085 	err = handle_oom_event(group, stream);
2086 
2087 	kbase_csf_scheduler_spin_lock(kbdev, &flags);
2088 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_oom_ack,
2089 					 CS_REQ_TILER_OOM_MASK);
2090 	kbase_csf_ring_cs_kernel_doorbell(kbdev, csi_index, slot_num, true);
2091 	kbase_csf_scheduler_spin_unlock(kbdev, flags);
2092 
2093 	if (unlikely(err)) {
2094 		dev_warn(
2095 			kbdev->dev,
2096 			"Queue group to be terminated, couldn't handle the OoM event\n");
2097 		kbase_debug_csf_fault_notify(kbdev, kctx, DF_TILER_OOM);
2098 		kbase_csf_scheduler_unlock(kbdev);
2099 		term_queue_group(group);
2100 		flush_gpu_cache_on_fatal_error(kbdev);
2101 		report_tiler_oom_error(group);
2102 		return;
2103 	}
2104 unlock:
2105 	kbase_csf_scheduler_unlock(kbdev);
2106 }
2107 
2108 /**
2109  * oom_event_worker - Tiler out-of-memory handler called from a workqueue.
2110  *
2111  * @data: Pointer to a work_struct embedded in GPU command queue data.
2112  *
2113  * Handles a tiler out-of-memory condition for a GPU command queue and then
2114  * releases a reference that was added to prevent the queue being destroyed
2115  * while this work item was pending on a workqueue.
2116  */
oom_event_worker(struct work_struct * data)2117 static void oom_event_worker(struct work_struct *data)
2118 {
2119 	struct kbase_queue *queue =
2120 		container_of(data, struct kbase_queue, oom_event_work);
2121 	struct kbase_context *kctx = queue->kctx;
2122 	struct kbase_device *const kbdev = kctx->kbdev;
2123 
2124 	int err = kbase_reset_gpu_try_prevent(kbdev);
2125 
2126 	/* Regardless of whether reset failed or is currently happening, exit
2127 	 * early
2128 	 */
2129 	if (err)
2130 		return;
2131 
2132 	mutex_lock(&kctx->csf.lock);
2133 
2134 	kbase_queue_oom_event(queue);
2135 	release_queue(queue);
2136 
2137 	mutex_unlock(&kctx->csf.lock);
2138 	kbase_reset_gpu_allow(kbdev);
2139 }
2140 
2141 /**
2142  * report_group_timeout_error - Report the timeout error for the group to userspace.
2143  *
2144  * @group: Pointer to the group for which timeout error occurred
2145  */
report_group_timeout_error(struct kbase_queue_group * const group)2146 static void report_group_timeout_error(struct kbase_queue_group *const group)
2147 {
2148 	struct base_csf_notification const
2149 		error = { .type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2150 			  .payload = {
2151 				  .csg_error = {
2152 					  .handle = group->handle,
2153 					  .error = {
2154 						  .error_type =
2155 							  BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
2156 					  } } } };
2157 
2158 	dev_warn(group->kctx->kbdev->dev,
2159 		 "Notify the event notification thread, forward progress timeout (%llu cycles)\n",
2160 		 kbase_csf_timeout_get(group->kctx->kbdev));
2161 
2162 	kbase_csf_event_add_error(group->kctx, &group->error_timeout, &error);
2163 	kbase_event_wakeup(group->kctx);
2164 }
2165 
2166 /**
2167  * timer_event_worker - Handle the progress timeout error for the group
2168  *
2169  * @data: Pointer to a work_struct embedded in GPU command queue group data.
2170  *
2171  * Terminate the CSG and report the error to userspace
2172  */
timer_event_worker(struct work_struct * data)2173 static void timer_event_worker(struct work_struct *data)
2174 {
2175 	struct kbase_queue_group *const group =
2176 		container_of(data, struct kbase_queue_group, timer_event_work);
2177 	struct kbase_context *const kctx = group->kctx;
2178 	struct kbase_device *const kbdev = kctx->kbdev;
2179 	bool reset_prevented = false;
2180 	int err = kbase_reset_gpu_prevent_and_wait(kbdev);
2181 
2182 	if (err)
2183 		dev_warn(
2184 			kbdev->dev,
2185 			"Unsuccessful GPU reset detected when terminating group %d on progress timeout, attempting to terminate regardless",
2186 			group->handle);
2187 	else
2188 		reset_prevented = true;
2189 
2190 	mutex_lock(&kctx->csf.lock);
2191 
2192 	term_queue_group(group);
2193 	flush_gpu_cache_on_fatal_error(kbdev);
2194 	report_group_timeout_error(group);
2195 
2196 	mutex_unlock(&kctx->csf.lock);
2197 	if (reset_prevented)
2198 		kbase_reset_gpu_allow(kbdev);
2199 }
2200 
2201 /**
2202  * handle_progress_timer_event - Progress timer timeout event handler.
2203  *
2204  * @group: Pointer to GPU queue group for which the timeout event is received.
2205  *
2206  * Notify a waiting user space client of the timeout.
2207  * Enqueue a work item to terminate the group and notify the event notification
2208  * thread of progress timeout fault for the GPU command queue group.
2209  */
handle_progress_timer_event(struct kbase_queue_group * const group)2210 static void handle_progress_timer_event(struct kbase_queue_group *const group)
2211 {
2212 	kbase_debug_csf_fault_notify(group->kctx->kbdev, group->kctx,
2213 		DF_PROGRESS_TIMER_TIMEOUT);
2214 
2215 	queue_work(group->kctx->csf.wq, &group->timer_event_work);
2216 }
2217 
2218 /**
2219  * alloc_grp_protected_suspend_buffer_pages() -  Allocate physical pages from the protected
2220  *                                               memory for the protected mode suspend buffer.
2221  * @group: Pointer to the GPU queue group.
2222  *
2223  * Return: 0 if suspend buffer allocation is successful or if its already allocated, otherwise
2224  * negative error value.
2225  */
alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group * const group)2226 static int alloc_grp_protected_suspend_buffer_pages(struct kbase_queue_group *const group)
2227 {
2228 	struct kbase_device *const kbdev = group->kctx->kbdev;
2229 	struct kbase_context *kctx = group->kctx;
2230 	struct tagged_addr *phys = NULL;
2231 	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
2232 	size_t nr_pages;
2233 	int err = 0;
2234 
2235 	if (likely(sbuf->pma))
2236 		return 0;
2237 
2238 	nr_pages = PFN_UP(kbdev->csf.global_iface.groups[0].suspend_size);
2239 	phys = kcalloc(nr_pages, sizeof(*phys), GFP_KERNEL);
2240 	if (unlikely(!phys)) {
2241 		err = -ENOMEM;
2242 		goto phys_free;
2243 	}
2244 
2245 	mutex_lock(&kctx->csf.lock);
2246 	kbase_csf_scheduler_lock(kbdev);
2247 
2248 	if (unlikely(!group->csg_reg)) {
2249 		/* The only chance of the bound csg_reg is removed from the group is
2250 		 * that it has been put off slot by the scheduler and the csg_reg resource
2251 		 * is contended by other groups. In this case, it needs another occasion for
2252 		 * mapping the pma, which needs a bound csg_reg. Since the group is already
2253 		 * off-slot, returning no error is harmless as the scheduler, when place the
2254 		 * group back on-slot again would do the required MMU map operation on the
2255 		 * allocated and retained pma.
2256 		 */
2257 		WARN_ON(group->csg_nr >= 0);
2258 		dev_dbg(kbdev->dev, "No bound csg_reg for group_%d_%d_%d to enter protected mode",
2259 			group->kctx->tgid, group->kctx->id, group->handle);
2260 		goto unlock;
2261 	}
2262 
2263 	/* Allocate the protected mode pages */
2264 	sbuf->pma = kbase_csf_protected_memory_alloc(kbdev, phys, nr_pages, true);
2265 	if (unlikely(!sbuf->pma)) {
2266 		err = -ENOMEM;
2267 		goto unlock;
2268 	}
2269 
2270 	/* Map the bound susp_reg to the just allocated pma pages */
2271 	err = kbase_csf_mcu_shared_group_update_pmode_map(kbdev, group);
2272 
2273 unlock:
2274 	kbase_csf_scheduler_unlock(kbdev);
2275 	mutex_unlock(&kctx->csf.lock);
2276 phys_free:
2277 	kfree(phys);
2278 	return err;
2279 }
2280 
report_group_fatal_error(struct kbase_queue_group * const group)2281 static void report_group_fatal_error(struct kbase_queue_group *const group)
2282 {
2283 	struct base_gpu_queue_group_error const
2284 		err_payload = { .error_type = BASE_GPU_QUEUE_GROUP_ERROR_FATAL,
2285 				.payload = { .fatal_group = {
2286 						     .status = GPU_EXCEPTION_TYPE_SW_FAULT_0,
2287 					     } } };
2288 
2289 	kbase_csf_add_group_fatal_error(group, &err_payload);
2290 	kbase_event_wakeup(group->kctx);
2291 }
2292 
2293 /**
2294  * protm_event_worker - Protected mode switch request event handler
2295  *			called from a workqueue.
2296  *
2297  * @data: Pointer to a work_struct embedded in GPU command queue group data.
2298  *
2299  * Request to switch to protected mode.
2300  */
protm_event_worker(struct work_struct * data)2301 static void protm_event_worker(struct work_struct *data)
2302 {
2303 	struct kbase_queue_group *const group =
2304 		container_of(data, struct kbase_queue_group, protm_event_work);
2305 	struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
2306 	int err = 0;
2307 
2308 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START,
2309 				 group, 0u);
2310 
2311 	err = alloc_grp_protected_suspend_buffer_pages(group);
2312 	if (!err) {
2313 		kbase_csf_scheduler_group_protm_enter(group);
2314 	} else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
2315 		sbuf->alloc_retries++;
2316 		/* try again to allocate pages */
2317 		queue_work(group->kctx->csf.wq, &group->protm_event_work);
2318 	} else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
2319 		dev_err(group->kctx->kbdev->dev,
2320 			"Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
2321 			group->handle, group->kctx->tgid, group->kctx->id);
2322 		report_group_fatal_error(group);
2323 	}
2324 
2325 	KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END,
2326 				 group, 0u);
2327 }
2328 
2329 /**
2330  * handle_fault_event - Handler for CS fault.
2331  *
2332  * @queue:  Pointer to queue for which fault event was received.
2333  * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
2334  *          the queue.
2335  *
2336  * Print required information about the CS fault and notify the user space client
2337  * about the fault.
2338  */
2339 static void
handle_fault_event(struct kbase_queue * const queue,const u32 cs_ack)2340 handle_fault_event(struct kbase_queue *const queue, const u32 cs_ack)
2341 {
2342 	struct kbase_device *const kbdev = queue->kctx->kbdev;
2343 	struct kbase_csf_cmd_stream_group_info const *ginfo =
2344 			&kbdev->csf.global_iface.groups[queue->group->csg_nr];
2345 	struct kbase_csf_cmd_stream_info const *stream =
2346 			&ginfo->streams[queue->csi_index];
2347 	const u32 cs_fault = kbase_csf_firmware_cs_output(stream, CS_FAULT);
2348 	const u64 cs_fault_info =
2349 		kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_LO) |
2350 		((u64)kbase_csf_firmware_cs_output(stream, CS_FAULT_INFO_HI)
2351 		 << 32);
2352 	const u8 cs_fault_exception_type =
2353 		CS_FAULT_EXCEPTION_TYPE_GET(cs_fault);
2354 	const u32 cs_fault_exception_data =
2355 		CS_FAULT_EXCEPTION_DATA_GET(cs_fault);
2356 	const u64 cs_fault_info_exception_data =
2357 		CS_FAULT_INFO_EXCEPTION_DATA_GET(cs_fault_info);
2358 
2359 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2360 
2361 	dev_warn(kbdev->dev,
2362 		 "Ctx %d_%d Group %d CSG %d CSI: %d\n"
2363 		 "CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
2364 		 "CS_FAULT.EXCEPTION_DATA: 0x%x\n"
2365 		 "CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
2366 		 queue->kctx->tgid, queue->kctx->id, queue->group->handle,
2367 		 queue->group->csg_nr, queue->csi_index,
2368 		 cs_fault_exception_type,
2369 		 kbase_gpu_exception_name(cs_fault_exception_type),
2370 		 cs_fault_exception_data, cs_fault_info_exception_data);
2371 
2372 
2373 #if IS_ENABLED(CONFIG_DEBUG_FS)
2374 	/* CS_RESOURCE_TERMINATED type fault event can be ignored from the
2375 	 * standpoint of dump on error. It is used to report fault for the CSIs
2376 	 * that are associated with the same CSG as the CSI for which the actual
2377 	 * fault was reported by the Iterator.
2378 	 * Dumping would be triggered when the actual fault is reported.
2379 	 *
2380 	 * CS_INHERIT_FAULT can also be ignored. It could happen due to the error
2381 	 * in other types of queues (cpu/kcpu). If a fault had occurred in some
2382 	 * other GPU queue then the dump would have been performed anyways when
2383 	 * that fault was reported.
2384 	 */
2385 	if ((cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_INHERIT_FAULT) &&
2386 	    (cs_fault_exception_type != CS_FAULT_EXCEPTION_TYPE_CS_RESOURCE_TERMINATED)) {
2387 		if (unlikely(kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FAULT))) {
2388 			get_queue(queue);
2389 			queue->cs_error = cs_fault;
2390 			queue->cs_error_info = cs_fault_info;
2391 			queue->cs_error_fatal = false;
2392 			if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
2393 				release_queue(queue);
2394 			return;
2395 		}
2396 	}
2397 #endif
2398 
2399 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2400 					 CS_REQ_FAULT_MASK);
2401 	kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index, queue->group->csg_nr, true);
2402 }
2403 
report_queue_fatal_error(struct kbase_queue * const queue,u32 cs_fatal,u64 cs_fatal_info,u8 group_handle)2404 static void report_queue_fatal_error(struct kbase_queue *const queue,
2405 				     u32 cs_fatal, u64 cs_fatal_info,
2406 				     u8 group_handle)
2407 {
2408 	struct base_csf_notification error = {
2409 		.type = BASE_CSF_NOTIFICATION_GPU_QUEUE_GROUP_ERROR,
2410 		.payload = {
2411 			.csg_error = {
2412 				.handle = group_handle,
2413 				.error = {
2414 					.error_type =
2415 					BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
2416 					.payload = {
2417 						.fatal_queue = {
2418 						.sideband = cs_fatal_info,
2419 						.status = cs_fatal,
2420 						.csi_index = queue->csi_index,
2421 						}
2422 					}
2423 				}
2424 			}
2425 		}
2426 	};
2427 
2428 	kbase_csf_event_add_error(queue->kctx, &queue->error, &error);
2429 	kbase_event_wakeup(queue->kctx);
2430 }
2431 
2432 /**
2433  * fatal_event_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
2434  *
2435  * @data: Pointer to a work_struct embedded in GPU command queue.
2436  *
2437  * Terminate the CSG and report the error to userspace.
2438  */
cs_error_worker(struct work_struct * const data)2439 static void cs_error_worker(struct work_struct *const data)
2440 {
2441 	struct kbase_queue *const queue =
2442 		container_of(data, struct kbase_queue, cs_error_work);
2443 	struct kbase_context *const kctx = queue->kctx;
2444 	struct kbase_device *const kbdev = kctx->kbdev;
2445 	struct kbase_queue_group *group;
2446 	u8 group_handle;
2447 	bool reset_prevented = false;
2448 	int err;
2449 
2450 	kbase_debug_csf_fault_wait_completion(kbdev);
2451 	err = kbase_reset_gpu_prevent_and_wait(kbdev);
2452 
2453 	if (err)
2454 		dev_warn(
2455 			kbdev->dev,
2456 			"Unsuccessful GPU reset detected when terminating group to handle fatal event, attempting to terminate regardless");
2457 	else
2458 		reset_prevented = true;
2459 
2460 	mutex_lock(&kctx->csf.lock);
2461 
2462 	group = get_bound_queue_group(queue);
2463 	if (!group) {
2464 		dev_warn(kbdev->dev, "queue not bound when handling fatal event");
2465 		goto unlock;
2466 	}
2467 
2468 #if IS_ENABLED(CONFIG_DEBUG_FS)
2469 	if (!queue->cs_error_fatal) {
2470 		unsigned long flags;
2471 		int slot_num;
2472 
2473 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
2474 		slot_num = kbase_csf_scheduler_group_get_slot_locked(group);
2475 		if (slot_num >= 0) {
2476 			struct kbase_csf_cmd_stream_group_info const *ginfo =
2477 				&kbdev->csf.global_iface.groups[slot_num];
2478 			struct kbase_csf_cmd_stream_info const *stream =
2479 				&ginfo->streams[queue->csi_index];
2480 			u32 const cs_ack =
2481 				kbase_csf_firmware_cs_output(stream, CS_ACK);
2482 
2483 			kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2484 				CS_REQ_FAULT_MASK);
2485 			kbase_csf_ring_cs_kernel_doorbell(kbdev, queue->csi_index,
2486 				slot_num, true);
2487 		}
2488 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
2489 		goto unlock;
2490 	}
2491 #endif
2492 
2493 	group_handle = group->handle;
2494 	term_queue_group(group);
2495 	flush_gpu_cache_on_fatal_error(kbdev);
2496 	report_queue_fatal_error(queue, queue->cs_error, queue->cs_error_info,
2497 				 group_handle);
2498 
2499 unlock:
2500 	release_queue(queue);
2501 	mutex_unlock(&kctx->csf.lock);
2502 	if (reset_prevented)
2503 		kbase_reset_gpu_allow(kbdev);
2504 }
2505 
2506 /**
2507  * handle_fatal_event - Handler for CS fatal.
2508  *
2509  * @queue:    Pointer to queue for which fatal event was received.
2510  * @stream:   Pointer to the structure containing info provided by the
2511  *            firmware about the CSI.
2512  * @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
2513  *          the queue.
2514  *
2515  * Notify a waiting user space client of the CS fatal and prints meaningful
2516  * information.
2517  * Enqueue a work item to terminate the group and report the fatal error
2518  * to user space.
2519  */
2520 static void
handle_fatal_event(struct kbase_queue * const queue,struct kbase_csf_cmd_stream_info const * const stream,u32 cs_ack)2521 handle_fatal_event(struct kbase_queue *const queue,
2522 		   struct kbase_csf_cmd_stream_info const *const stream,
2523 		   u32 cs_ack)
2524 {
2525 	const u32 cs_fatal = kbase_csf_firmware_cs_output(stream, CS_FATAL);
2526 	const u64 cs_fatal_info =
2527 		kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_LO) |
2528 		((u64)kbase_csf_firmware_cs_output(stream, CS_FATAL_INFO_HI)
2529 		 << 32);
2530 	const u32 cs_fatal_exception_type =
2531 		CS_FATAL_EXCEPTION_TYPE_GET(cs_fatal);
2532 	const u32 cs_fatal_exception_data =
2533 		CS_FATAL_EXCEPTION_DATA_GET(cs_fatal);
2534 	const u64 cs_fatal_info_exception_data =
2535 		CS_FATAL_INFO_EXCEPTION_DATA_GET(cs_fatal_info);
2536 	struct kbase_device *const kbdev = queue->kctx->kbdev;
2537 
2538 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2539 
2540 	dev_warn(kbdev->dev,
2541 		 "Ctx %d_%d Group %d CSG %d CSI: %d\n"
2542 		 "CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
2543 		 "CS_FATAL.EXCEPTION_DATA: 0x%x\n"
2544 		 "CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
2545 		 queue->kctx->tgid, queue->kctx->id, queue->group->handle,
2546 		 queue->group->csg_nr, queue->csi_index,
2547 		 cs_fatal_exception_type,
2548 		 kbase_gpu_exception_name(cs_fatal_exception_type),
2549 		 cs_fatal_exception_data, cs_fatal_info_exception_data);
2550 
2551 	if (cs_fatal_exception_type ==
2552 			CS_FATAL_EXCEPTION_TYPE_FIRMWARE_INTERNAL_ERROR) {
2553 		kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_FW_INTERNAL_ERROR);
2554 		queue_work(system_wq, &kbdev->csf.fw_error_work);
2555 	} else {
2556 		kbase_debug_csf_fault_notify(kbdev, queue->kctx, DF_CS_FATAL);
2557 		if (cs_fatal_exception_type == CS_FATAL_EXCEPTION_TYPE_CS_UNRECOVERABLE) {
2558 			queue->group->cs_unrecoverable = true;
2559 			if (kbase_prepare_to_reset_gpu(queue->kctx->kbdev, RESET_FLAGS_NONE))
2560 				kbase_reset_gpu(queue->kctx->kbdev);
2561 		}
2562 		get_queue(queue);
2563 		queue->cs_error = cs_fatal;
2564 		queue->cs_error_info = cs_fatal_info;
2565 		queue->cs_error_fatal = true;
2566 		if (!queue_work(queue->kctx->csf.wq, &queue->cs_error_work))
2567 			release_queue(queue);
2568 	}
2569 
2570 	kbase_csf_firmware_cs_input_mask(stream, CS_REQ, cs_ack,
2571 					CS_REQ_FATAL_MASK);
2572 
2573 }
2574 
2575 /**
2576  * process_cs_interrupts - Process interrupts for a CS.
2577  *
2578  * @group:  Pointer to GPU command queue group data.
2579  * @ginfo:  The CSG interface provided by the firmware.
2580  * @irqreq: CSG's IRQ request bitmask (one bit per CS).
2581  * @irqack: CSG's IRQ acknowledge bitmask (one bit per CS).
2582  * @track: Pointer that tracks the highest scanout priority idle CSG
2583  *         and any newly potentially viable protected mode requesting
2584  *          CSG in current IRQ context.
2585  *
2586  * If the interrupt request bitmask differs from the acknowledge bitmask
2587  * then the firmware is notifying the host of an event concerning those
2588  * CSs indicated by bits whose value differs. The actions required
2589  * are then determined by examining which notification flags differ between
2590  * the request and acknowledge registers for the individual CS(s).
2591  */
process_cs_interrupts(struct kbase_queue_group * const group,struct kbase_csf_cmd_stream_group_info const * const ginfo,u32 const irqreq,u32 const irqack,struct irq_idle_and_protm_track * track)2592 static void process_cs_interrupts(struct kbase_queue_group *const group,
2593 				  struct kbase_csf_cmd_stream_group_info const *const ginfo,
2594 				  u32 const irqreq, u32 const irqack,
2595 				  struct irq_idle_and_protm_track *track)
2596 {
2597 	struct kbase_device *const kbdev = group->kctx->kbdev;
2598 	u32 remaining = irqreq ^ irqack;
2599 	bool protm_pend = false;
2600 	const bool group_suspending =
2601 		!kbase_csf_scheduler_group_events_enabled(kbdev, group);
2602 
2603 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2604 
2605 	while (remaining != 0) {
2606 		int const i = ffs(remaining) - 1;
2607 		struct kbase_queue *const queue = group->bound_queues[i];
2608 
2609 		remaining &= ~(1 << i);
2610 
2611 		/* The queue pointer can be NULL, but if it isn't NULL then it
2612 		 * cannot disappear since scheduler spinlock is held and before
2613 		 * freeing a bound queue it has to be first unbound which
2614 		 * requires scheduler spinlock.
2615 		 */
2616 		if (queue && !WARN_ON(queue->csi_index != i)) {
2617 			struct kbase_csf_cmd_stream_info const *const stream =
2618 				&ginfo->streams[i];
2619 			u32 const cs_req = kbase_csf_firmware_cs_input_read(
2620 				stream, CS_REQ);
2621 			u32 const cs_ack =
2622 				kbase_csf_firmware_cs_output(stream, CS_ACK);
2623 			struct workqueue_struct *wq = group->kctx->csf.wq;
2624 
2625 			if ((cs_ack & CS_ACK_FATAL_MASK) != (cs_req & CS_REQ_FATAL_MASK)) {
2626 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
2627 							 group, queue, cs_req ^ cs_ack);
2628 				handle_fatal_event(queue, stream, cs_ack);
2629 			}
2630 
2631 			if ((cs_ack & CS_ACK_FAULT_MASK) != (cs_req & CS_REQ_FAULT_MASK)) {
2632 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_FAULT,
2633 							 group, queue, cs_req ^ cs_ack);
2634 				handle_fault_event(queue, cs_ack);
2635 			}
2636 
2637 			/* PROTM_PEND and TILER_OOM can be safely ignored
2638 			 * because they will be raised again if the group
2639 			 * is assigned a CSG slot in future.
2640 			 */
2641 			if (group_suspending) {
2642 				u32 const cs_req_remain = cs_req & ~CS_REQ_EXCEPTION_MASK;
2643 				u32 const cs_ack_remain = cs_ack & ~CS_ACK_EXCEPTION_MASK;
2644 
2645 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev,
2646 							 CSI_INTERRUPT_GROUP_SUSPENDS_IGNORED,
2647 							 group, queue,
2648 							 cs_req_remain ^ cs_ack_remain);
2649 				continue;
2650 			}
2651 
2652 			if (((cs_req & CS_REQ_TILER_OOM_MASK) ^
2653 			     (cs_ack & CS_ACK_TILER_OOM_MASK))) {
2654 				get_queue(queue);
2655 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_TILER_OOM,
2656 							 group, queue, cs_req ^ cs_ack);
2657 				if (!queue_work(wq, &queue->oom_event_work)) {
2658 					/* The work item shall not have been
2659 					 * already queued, there can be only
2660 					 * one pending OoM event for a
2661 					 * queue.
2662 					 */
2663 					dev_warn(
2664 						kbdev->dev,
2665 						"Tiler OOM work pending: queue %d group %d (ctx %d_%d)",
2666 						queue->csi_index, group->handle, queue->kctx->tgid,
2667 						queue->kctx->id);
2668 					release_queue(queue);
2669 				}
2670 			}
2671 
2672 			if ((cs_req & CS_REQ_PROTM_PEND_MASK) ^
2673 			    (cs_ack & CS_ACK_PROTM_PEND_MASK)) {
2674 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_INTERRUPT_PROTM_PEND,
2675 							 group, queue, cs_req ^ cs_ack);
2676 
2677 				dev_dbg(kbdev->dev,
2678 					"Protected mode entry request for queue on csi %d bound to group-%d on slot %d",
2679 					queue->csi_index, group->handle,
2680 					group->csg_nr);
2681 
2682 				bitmap_set(group->protm_pending_bitmap, i, 1);
2683 				KBASE_KTRACE_ADD_CSF_GRP_Q(kbdev, CSI_PROTM_PEND_SET, group, queue,
2684 							   group->protm_pending_bitmap[0]);
2685 				protm_pend = true;
2686 			}
2687 		}
2688 	}
2689 
2690 	if (protm_pend) {
2691 		struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
2692 
2693 		if (scheduler->tick_protm_pending_seq > group->scan_seq_num) {
2694 			scheduler->tick_protm_pending_seq = group->scan_seq_num;
2695 			track->protm_grp = group;
2696 		}
2697 
2698 		if (!group->protected_suspend_buf.pma)
2699 			queue_work(group->kctx->csf.wq, &group->protm_event_work);
2700 
2701 		if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
2702 			clear_bit(group->csg_nr,
2703 				  scheduler->csg_slots_idle_mask);
2704 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_CLEAR, group,
2705 							scheduler->csg_slots_idle_mask[0]);
2706 			dev_dbg(kbdev->dev,
2707 				"Group-%d on slot %d de-idled by protm request",
2708 				group->handle, group->csg_nr);
2709 		}
2710 	}
2711 }
2712 
2713 /**
2714  * process_csg_interrupts - Process interrupts for a CSG.
2715  *
2716  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2717  * @csg_nr: CSG number.
2718  * @track: Pointer that tracks the highest idle CSG and the newly possible viable
2719  *         protected mode requesting group, in current IRQ context.
2720  *
2721  * Handles interrupts for a CSG and for CSs within it.
2722  *
2723  * If the CSG's request register value differs from its acknowledge register
2724  * then the firmware is notifying the host of an event concerning the whole
2725  * group. The actions required are then determined by examining which
2726  * notification flags differ between those two register values.
2727  *
2728  * See process_cs_interrupts() for details of per-stream interrupt handling.
2729  */
process_csg_interrupts(struct kbase_device * const kbdev,int const csg_nr,struct irq_idle_and_protm_track * track)2730 static void process_csg_interrupts(struct kbase_device *const kbdev, int const csg_nr,
2731 				   struct irq_idle_and_protm_track *track)
2732 {
2733 	struct kbase_csf_cmd_stream_group_info *ginfo;
2734 	struct kbase_queue_group *group = NULL;
2735 	u32 req, ack, irqreq, irqack;
2736 
2737 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2738 
2739 	if (WARN_ON(csg_nr >= kbdev->csf.global_iface.group_num))
2740 		return;
2741 
2742 	ginfo = &kbdev->csf.global_iface.groups[csg_nr];
2743 	req = kbase_csf_firmware_csg_input_read(ginfo, CSG_REQ);
2744 	ack = kbase_csf_firmware_csg_output(ginfo, CSG_ACK);
2745 	irqreq = kbase_csf_firmware_csg_output(ginfo, CSG_IRQ_REQ);
2746 	irqack = kbase_csf_firmware_csg_input_read(ginfo, CSG_IRQ_ACK);
2747 
2748 	/* There may not be any pending CSG/CS interrupts to process */
2749 	if ((req == ack) && (irqreq == irqack))
2750 		return;
2751 
2752 	/* Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before
2753 	 * examining the CS_ACK & CS_REQ bits. This would ensure that Host
2754 	 * doesn't misses an interrupt for the CS in the race scenario where
2755 	 * whilst Host is servicing an interrupt for the CS, firmware sends
2756 	 * another interrupt for that CS.
2757 	 */
2758 	kbase_csf_firmware_csg_input(ginfo, CSG_IRQ_ACK, irqreq);
2759 
2760 	group = kbase_csf_scheduler_get_group_on_slot(kbdev, csg_nr);
2761 
2762 	/* The group pointer can be NULL here if interrupts for the group
2763 	 * (like SYNC_UPDATE, IDLE notification) were delayed and arrived
2764 	 * just after the suspension of group completed. However if not NULL
2765 	 * then the group pointer cannot disappear even if User tries to
2766 	 * terminate the group whilst this loop is running as scheduler
2767 	 * spinlock is held and for freeing a group that is resident on a CSG
2768 	 * slot scheduler spinlock is required.
2769 	 */
2770 	if (!group)
2771 		return;
2772 
2773 	if (WARN_ON(kbase_csf_scheduler_group_get_slot_locked(group) != csg_nr))
2774 		return;
2775 
2776 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_START, group, csg_nr);
2777 
2778 	if ((req ^ ack) & CSG_REQ_SYNC_UPDATE_MASK) {
2779 		kbase_csf_firmware_csg_input_mask(ginfo,
2780 			CSG_REQ, ack, CSG_REQ_SYNC_UPDATE_MASK);
2781 
2782 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_SYNC_UPDATE, group, req ^ ack);
2783 
2784 		/* SYNC_UPDATE events shall invalidate GPU idle event */
2785 		atomic_set(&kbdev->csf.scheduler.gpu_no_longer_idle, true);
2786 
2787 		kbase_csf_event_signal_cpu_only(group->kctx);
2788 	}
2789 
2790 	if ((req ^ ack) & CSG_REQ_IDLE_MASK) {
2791 		struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
2792 
2793 		KBASE_TLSTREAM_TL_KBASE_DEVICE_CSG_IDLE(
2794 			kbdev, kbdev->gpu_props.props.raw_props.gpu_id, csg_nr);
2795 
2796 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2797 			CSG_REQ_IDLE_MASK);
2798 
2799 		set_bit(csg_nr, scheduler->csg_slots_idle_mask);
2800 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_SLOT_IDLE_SET, group,
2801 					 scheduler->csg_slots_idle_mask[0]);
2802 		KBASE_KTRACE_ADD_CSF_GRP(kbdev,  CSG_INTERRUPT_IDLE, group, req ^ ack);
2803 		dev_dbg(kbdev->dev, "Idle notification received for Group %u on slot %d\n",
2804 			 group->handle, csg_nr);
2805 
2806 		if (atomic_read(&scheduler->non_idle_offslot_grps)) {
2807 			/* If there are non-idle CSGs waiting for a slot, fire
2808 			 * a tock for a replacement.
2809 			 */
2810 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NON_IDLE_GROUPS,
2811 						group, req ^ ack);
2812 			kbase_csf_scheduler_invoke_tock(kbdev);
2813 		} else {
2814 			KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_NO_NON_IDLE_GROUPS,
2815 						group, req ^ ack);
2816 		}
2817 
2818 		if (group->scan_seq_num < track->idle_seq) {
2819 			track->idle_seq = group->scan_seq_num;
2820 			track->idle_slot = csg_nr;
2821 		}
2822 	}
2823 
2824 	if ((req ^ ack) & CSG_REQ_PROGRESS_TIMER_EVENT_MASK) {
2825 		kbase_csf_firmware_csg_input_mask(ginfo, CSG_REQ, ack,
2826 						  CSG_REQ_PROGRESS_TIMER_EVENT_MASK);
2827 
2828 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROGRESS_TIMER_EVENT, group,
2829 					 req ^ ack);
2830 		dev_info(
2831 			kbdev->dev,
2832 			"[%llu] Iterator PROGRESS_TIMER timeout notification received for group %u of ctx %d_%d on slot %d\n",
2833 			kbase_backend_get_cycle_cnt(kbdev), group->handle, group->kctx->tgid,
2834 			group->kctx->id, csg_nr);
2835 
2836 		handle_progress_timer_event(group);
2837 	}
2838 
2839 	process_cs_interrupts(group, ginfo, irqreq, irqack, track);
2840 
2841 	KBASE_KTRACE_ADD_CSF_GRP(kbdev, CSG_INTERRUPT_PROCESS_END, group,
2842 				 ((u64)req ^ ack) | (((u64)irqreq ^ irqack) << 32));
2843 }
2844 
2845 /**
2846  * process_prfcnt_interrupts - Process performance counter interrupts.
2847  *
2848  * @kbdev:   Instance of a GPU platform device that implements a CSF interface.
2849  * @glb_req: Global request register value.
2850  * @glb_ack: Global acknowledge register value.
2851  *
2852  * Handles interrupts issued by the firmware that relate to the performance
2853  * counters. For example, on completion of a performance counter sample. It is
2854  * expected that the scheduler spinlock is already held on calling this
2855  * function.
2856  */
process_prfcnt_interrupts(struct kbase_device * kbdev,u32 glb_req,u32 glb_ack)2857 static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req,
2858 				      u32 glb_ack)
2859 {
2860 	const struct kbase_csf_global_iface *const global_iface =
2861 		&kbdev->csf.global_iface;
2862 
2863 	lockdep_assert_held(&kbdev->csf.scheduler.interrupt_lock);
2864 
2865 	/* Process PRFCNT_SAMPLE interrupt. */
2866 	if (kbdev->csf.hwcnt.request_pending &&
2867 	    ((glb_req & GLB_REQ_PRFCNT_SAMPLE_MASK) ==
2868 	     (glb_ack & GLB_REQ_PRFCNT_SAMPLE_MASK))) {
2869 		kbdev->csf.hwcnt.request_pending = false;
2870 
2871 		dev_dbg(kbdev->dev, "PRFCNT_SAMPLE done interrupt received.");
2872 
2873 		kbase_hwcnt_backend_csf_on_prfcnt_sample(
2874 			&kbdev->hwcnt_gpu_iface);
2875 	}
2876 
2877 	/* Process PRFCNT_ENABLE interrupt. */
2878 	if (kbdev->csf.hwcnt.enable_pending &&
2879 	    ((glb_req & GLB_REQ_PRFCNT_ENABLE_MASK) ==
2880 	     (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK))) {
2881 		kbdev->csf.hwcnt.enable_pending = false;
2882 
2883 		dev_dbg(kbdev->dev,
2884 			"PRFCNT_ENABLE status changed interrupt received.");
2885 
2886 		if (glb_ack & GLB_REQ_PRFCNT_ENABLE_MASK)
2887 			kbase_hwcnt_backend_csf_on_prfcnt_enable(
2888 				&kbdev->hwcnt_gpu_iface);
2889 		else
2890 			kbase_hwcnt_backend_csf_on_prfcnt_disable(
2891 				&kbdev->hwcnt_gpu_iface);
2892 	}
2893 
2894 	/* Process PRFCNT_THRESHOLD interrupt. */
2895 	if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_THRESHOLD_MASK) {
2896 		dev_dbg(kbdev->dev, "PRFCNT_THRESHOLD interrupt received.");
2897 
2898 		kbase_hwcnt_backend_csf_on_prfcnt_threshold(
2899 			&kbdev->hwcnt_gpu_iface);
2900 
2901 		/* Set the GLB_REQ.PRFCNT_THRESHOLD flag back to
2902 		 * the same value as GLB_ACK.PRFCNT_THRESHOLD
2903 		 * flag in order to enable reporting of another
2904 		 * PRFCNT_THRESHOLD event.
2905 		 */
2906 		kbase_csf_firmware_global_input_mask(
2907 			global_iface, GLB_REQ, glb_ack,
2908 			GLB_REQ_PRFCNT_THRESHOLD_MASK);
2909 	}
2910 
2911 	/* Process PRFCNT_OVERFLOW interrupt. */
2912 	if ((glb_req ^ glb_ack) & GLB_REQ_PRFCNT_OVERFLOW_MASK) {
2913 		dev_dbg(kbdev->dev, "PRFCNT_OVERFLOW interrupt received.");
2914 
2915 		kbase_hwcnt_backend_csf_on_prfcnt_overflow(
2916 			&kbdev->hwcnt_gpu_iface);
2917 
2918 		/* Set the GLB_REQ.PRFCNT_OVERFLOW flag back to
2919 		 * the same value as GLB_ACK.PRFCNT_OVERFLOW
2920 		 * flag in order to enable reporting of another
2921 		 * PRFCNT_OVERFLOW event.
2922 		 */
2923 		kbase_csf_firmware_global_input_mask(
2924 			global_iface, GLB_REQ, glb_ack,
2925 			GLB_REQ_PRFCNT_OVERFLOW_MASK);
2926 	}
2927 }
2928 
2929 /**
2930  * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
2931  *
2932  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2933  * @glb_req: Global request register value.
2934  * @glb_ack: Global acknowledge register value.
2935  *
2936  * This function checks if the PROTM_ENTER Global request had completed and
2937  * appropriately sends notification about the protected mode entry to components
2938  * like IPA, HWC, IPA_CONTROL.
2939  */
check_protm_enter_req_complete(struct kbase_device * kbdev,u32 glb_req,u32 glb_ack)2940 static inline void check_protm_enter_req_complete(struct kbase_device *kbdev,
2941 						  u32 glb_req, u32 glb_ack)
2942 {
2943 	lockdep_assert_held(&kbdev->hwaccess_lock);
2944 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2945 
2946 	if (likely(!kbdev->csf.scheduler.active_protm_grp))
2947 		return;
2948 
2949 	if (kbdev->protected_mode)
2950 		return;
2951 
2952 	if ((glb_req & GLB_REQ_PROTM_ENTER_MASK) !=
2953 	    (glb_ack & GLB_REQ_PROTM_ENTER_MASK))
2954 		return;
2955 
2956 	dev_dbg(kbdev->dev, "Protected mode entry interrupt received");
2957 
2958 	kbdev->protected_mode = true;
2959 	kbase_ipa_protection_mode_switch_event(kbdev);
2960 	kbase_ipa_control_protm_entered(kbdev);
2961 	kbase_hwcnt_backend_csf_protm_entered(&kbdev->hwcnt_gpu_iface);
2962 }
2963 
2964 /**
2965  * process_protm_exit - Handle the protected mode exit interrupt
2966  *
2967  * @kbdev: Instance of a GPU platform device that implements a CSF interface.
2968  * @glb_ack: Global acknowledge register value.
2969  *
2970  * This function handles the PROTM_EXIT interrupt and sends notification
2971  * about the protected mode exit to components like HWC, IPA_CONTROL.
2972  */
process_protm_exit(struct kbase_device * kbdev,u32 glb_ack)2973 static inline void process_protm_exit(struct kbase_device *kbdev, u32 glb_ack)
2974 {
2975 	const struct kbase_csf_global_iface *const global_iface =
2976 		&kbdev->csf.global_iface;
2977 	struct kbase_csf_scheduler *scheduler =	&kbdev->csf.scheduler;
2978 
2979 	lockdep_assert_held(&kbdev->hwaccess_lock);
2980 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
2981 
2982 	dev_dbg(kbdev->dev, "Protected mode exit interrupt received");
2983 
2984 	kbase_csf_firmware_global_input_mask(global_iface, GLB_REQ, glb_ack,
2985 					     GLB_REQ_PROTM_EXIT_MASK);
2986 
2987 	if (likely(scheduler->active_protm_grp)) {
2988 		KBASE_KTRACE_ADD_CSF_GRP(kbdev, SCHEDULER_PROTM_EXIT,
2989 					 scheduler->active_protm_grp, 0u);
2990 		scheduler->active_protm_grp = NULL;
2991 	} else {
2992 		dev_warn(kbdev->dev, "PROTM_EXIT interrupt after no pmode group");
2993 	}
2994 
2995 	if (!WARN_ON(!kbdev->protected_mode)) {
2996 		kbdev->protected_mode = false;
2997 		kbase_ipa_control_protm_exited(kbdev);
2998 		kbase_hwcnt_backend_csf_protm_exited(&kbdev->hwcnt_gpu_iface);
2999 	}
3000 
3001 #if IS_ENABLED(CONFIG_MALI_CORESIGHT)
3002 	kbase_debug_coresight_csf_enable_pmode_exit(kbdev);
3003 #endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
3004 }
3005 
process_tracked_info_for_protm(struct kbase_device * kbdev,struct irq_idle_and_protm_track * track)3006 static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
3007 						  struct irq_idle_and_protm_track *track)
3008 {
3009 	struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
3010 	struct kbase_queue_group *group = track->protm_grp;
3011 	u32 current_protm_pending_seq = scheduler->tick_protm_pending_seq;
3012 
3013 	kbase_csf_scheduler_spin_lock_assert_held(kbdev);
3014 
3015 	if (likely(current_protm_pending_seq == KBASEP_TICK_PROTM_PEND_SCAN_SEQ_NR_INVALID))
3016 		return;
3017 
3018 	/* Handle protm from the tracked information */
3019 	if (track->idle_seq < current_protm_pending_seq) {
3020 		/* If the protm enter was prevented due to groups priority, then fire a tock
3021 		 * for the scheduler to re-examine the case.
3022 		 */
3023 		dev_dbg(kbdev->dev, "Attempt pending protm from idle slot %d\n", track->idle_slot);
3024 		kbase_csf_scheduler_invoke_tock(kbdev);
3025 	} else if (group) {
3026 		u32 i, num_groups = kbdev->csf.global_iface.group_num;
3027 		struct kbase_queue_group *grp;
3028 		bool tock_triggered = false;
3029 
3030 		/* A new protm request, and track->idle_seq is not sufficient, check across
3031 		 * previously notified idle CSGs in the current tick/tock cycle.
3032 		 */
3033 		for_each_set_bit(i, scheduler->csg_slots_idle_mask, num_groups) {
3034 			if (i == track->idle_slot)
3035 				continue;
3036 			grp = kbase_csf_scheduler_get_group_on_slot(kbdev, i);
3037 			/* If not NULL then the group pointer cannot disappear as the
3038 			 * scheduler spinlock is held.
3039 			 */
3040 			if (grp == NULL)
3041 				continue;
3042 
3043 			if (grp->scan_seq_num < current_protm_pending_seq) {
3044 				tock_triggered = true;
3045 				dev_dbg(kbdev->dev,
3046 					"Attempt new protm from tick/tock idle slot %d\n", i);
3047 				kbase_csf_scheduler_invoke_tock(kbdev);
3048 				break;
3049 			}
3050 		}
3051 
3052 		if (!tock_triggered) {
3053 			dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n",
3054 				group->handle, group->csg_nr);
3055 			queue_work(group->kctx->csf.wq, &group->protm_event_work);
3056 		}
3057 	}
3058 }
3059 
order_job_irq_clear_with_iface_mem_read(void)3060 static void order_job_irq_clear_with_iface_mem_read(void)
3061 {
3062 	/* Ensure that write to the JOB_IRQ_CLEAR is ordered with regards to the
3063 	 * read from interface memory. The ordering is needed considering the way
3064 	 * FW & Kbase writes to the JOB_IRQ_RAWSTAT and JOB_IRQ_CLEAR registers
3065 	 * without any synchronization. Without the barrier there is no guarantee
3066 	 * about the ordering, the write to IRQ_CLEAR can take effect after the read
3067 	 * from interface memory and that could cause a problem for the scenario where
3068 	 * FW sends back to back notifications for the same CSG for events like
3069 	 * SYNC_UPDATE and IDLE, but Kbase gets a single IRQ and observes only the
3070 	 * first event. Similar thing can happen with glb events like CFG_ALLOC_EN
3071 	 * acknowledgment and GPU idle notification.
3072 	 *
3073 	 *       MCU                                    CPU
3074 	 *  ---------------                         ----------------
3075 	 *  Update interface memory                 Write to IRQ_CLEAR to clear current IRQ
3076 	 *  <barrier>                               <barrier>
3077 	 *  Write to IRQ_RAWSTAT to raise new IRQ   Read interface memory
3078 	 */
3079 
3080 	/* CPU and GPU would be in the same Outer shareable domain */
3081 	dmb(osh);
3082 }
3083 
kbase_csf_interrupt(struct kbase_device * kbdev,u32 val)3084 void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
3085 {
3086 	bool deferred_handling_glb_idle_irq = false;
3087 
3088 	lockdep_assert_held(&kbdev->hwaccess_lock);
3089 
3090 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_START, NULL, val);
3091 
3092 	do {
3093 		unsigned long flags;
3094 		u32 csg_interrupts = val & ~JOB_IRQ_GLOBAL_IF;
3095 		struct irq_idle_and_protm_track track = { .protm_grp = NULL, .idle_seq = U32_MAX };
3096 		bool glb_idle_irq_received = false;
3097 
3098 		kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), val);
3099 		order_job_irq_clear_with_iface_mem_read();
3100 
3101 		if (csg_interrupts != 0) {
3102 			kbase_csf_scheduler_spin_lock(kbdev, &flags);
3103 			/* Looping through and track the highest idle and protm groups */
3104 			while (csg_interrupts != 0) {
3105 				int const csg_nr = ffs(csg_interrupts) - 1;
3106 
3107 				process_csg_interrupts(kbdev, csg_nr, &track);
3108 				csg_interrupts &= ~(1 << csg_nr);
3109 			}
3110 
3111 			/* Handle protm from the tracked information */
3112 			process_tracked_info_for_protm(kbdev, &track);
3113 			kbase_csf_scheduler_spin_unlock(kbdev, flags);
3114 		}
3115 
3116 		if (val & JOB_IRQ_GLOBAL_IF) {
3117 			const struct kbase_csf_global_iface *const global_iface =
3118 				&kbdev->csf.global_iface;
3119 
3120 			kbdev->csf.interrupt_received = true;
3121 
3122 			if (!kbdev->csf.firmware_reloaded)
3123 				kbase_csf_firmware_reload_completed(kbdev);
3124 			else if (global_iface->output) {
3125 				u32 glb_req, glb_ack;
3126 
3127 				kbase_csf_scheduler_spin_lock(kbdev, &flags);
3128 				glb_req =
3129 					kbase_csf_firmware_global_input_read(global_iface, GLB_REQ);
3130 				glb_ack = kbase_csf_firmware_global_output(global_iface, GLB_ACK);
3131 				KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_GLB_REQ_ACK, NULL,
3132 						 glb_req ^ glb_ack);
3133 
3134 				check_protm_enter_req_complete(kbdev, glb_req, glb_ack);
3135 
3136 				if ((glb_req ^ glb_ack) & GLB_REQ_PROTM_EXIT_MASK)
3137 					process_protm_exit(kbdev, glb_ack);
3138 
3139 				/* Handle IDLE Hysteresis notification event */
3140 				if ((glb_req ^ glb_ack) & GLB_REQ_IDLE_EVENT_MASK) {
3141 					dev_dbg(kbdev->dev, "Idle-hysteresis event flagged");
3142 					kbase_csf_firmware_global_input_mask(
3143 						global_iface, GLB_REQ, glb_ack,
3144 						GLB_REQ_IDLE_EVENT_MASK);
3145 
3146 					glb_idle_irq_received = true;
3147 					/* Defer handling this IRQ to account for a race condition
3148 					 * where the idle worker could be executed before we have
3149 					 * finished handling all pending IRQs (including CSG IDLE
3150 					 * IRQs).
3151 					 */
3152 					deferred_handling_glb_idle_irq = true;
3153 				}
3154 
3155 				process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
3156 
3157 				kbase_csf_scheduler_spin_unlock(kbdev, flags);
3158 
3159 				/* Invoke the MCU state machine as a state transition
3160 				 * might have completed.
3161 				 */
3162 				kbase_pm_update_state(kbdev);
3163 			}
3164 		}
3165 
3166 		if (!glb_idle_irq_received)
3167 			break;
3168 		/* Attempt to serve potential IRQs that might have occurred
3169 		 * whilst handling the previous IRQ. In case we have observed
3170 		 * the GLB IDLE IRQ without all CSGs having been marked as
3171 		 * idle, the GPU would be treated as no longer idle and left
3172 		 * powered on.
3173 		 */
3174 		val = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_STATUS));
3175 	} while (val);
3176 
3177 	if (deferred_handling_glb_idle_irq) {
3178 		unsigned long flags;
3179 
3180 		kbase_csf_scheduler_spin_lock(kbdev, &flags);
3181 		kbase_csf_scheduler_process_gpu_idle_event(kbdev);
3182 		kbase_csf_scheduler_spin_unlock(kbdev, flags);
3183 	}
3184 
3185 	wake_up_all(&kbdev->csf.event_wait);
3186 
3187 	KBASE_KTRACE_ADD(kbdev, CSF_INTERRUPT_END, NULL, val);
3188 }
3189 
kbase_csf_doorbell_mapping_term(struct kbase_device * kbdev)3190 void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
3191 {
3192 	if (kbdev->csf.db_filp) {
3193 		struct page *page = as_page(kbdev->csf.dummy_db_page);
3194 
3195 		kbase_mem_pool_free(
3196 			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
3197 			page, false);
3198 
3199 		fput(kbdev->csf.db_filp);
3200 	}
3201 }
3202 
kbase_csf_doorbell_mapping_init(struct kbase_device * kbdev)3203 int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
3204 {
3205 	struct tagged_addr phys;
3206 	struct file *filp;
3207 	int ret;
3208 
3209 	filp = shmem_file_setup("mali csf db", MAX_LFS_FILESIZE, VM_NORESERVE);
3210 	if (IS_ERR(filp))
3211 		return PTR_ERR(filp);
3212 
3213 	ret = kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
3214 					 false, NULL);
3215 
3216 	if (ret <= 0) {
3217 		fput(filp);
3218 		return ret;
3219 	}
3220 
3221 	kbdev->csf.db_filp = filp;
3222 	kbdev->csf.dummy_db_page = phys;
3223 	kbdev->csf.db_file_offsets = 0;
3224 
3225 	return 0;
3226 }
3227 
kbase_csf_free_dummy_user_reg_page(struct kbase_device * kbdev)3228 void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
3229 {
3230 	if (kbdev->csf.user_reg.filp) {
3231 		struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
3232 
3233 		kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
3234 		fput(kbdev->csf.user_reg.filp);
3235 	}
3236 }
3237 
kbase_csf_setup_dummy_user_reg_page(struct kbase_device * kbdev)3238 int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
3239 {
3240 	struct tagged_addr phys;
3241 	struct file *filp;
3242 	struct page *page;
3243 	u32 *addr;
3244 
3245 	kbdev->csf.user_reg.filp = NULL;
3246 
3247 	filp = shmem_file_setup("mali csf user_reg", MAX_LFS_FILESIZE, VM_NORESERVE);
3248 	if (IS_ERR(filp)) {
3249 		dev_err(kbdev->dev, "failed to get an unlinked file for user_reg");
3250 		return PTR_ERR(filp);
3251 	}
3252 
3253 	if (kbase_mem_pool_alloc_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
3254 				       false, NULL) <= 0) {
3255 		fput(filp);
3256 		return -ENOMEM;
3257 	}
3258 
3259 	page = as_page(phys);
3260 	addr = kmap_atomic(page);
3261 
3262 	/* Write a special value for the latest flush register inside the
3263 	 * dummy page
3264 	 */
3265 	addr[LATEST_FLUSH / sizeof(u32)] = POWER_DOWN_LATEST_FLUSH_VALUE;
3266 
3267 	kbase_sync_single_for_device(kbdev, kbase_dma_addr(page) + LATEST_FLUSH, sizeof(u32),
3268 				     DMA_BIDIRECTIONAL);
3269 	kunmap_atomic(addr);
3270 
3271 	kbdev->csf.user_reg.filp = filp;
3272 	kbdev->csf.user_reg.dummy_page = phys;
3273 	kbdev->csf.user_reg.file_offset = 0;
3274 	return 0;
3275 }
3276 
kbase_csf_priority_check(struct kbase_device * kbdev,u8 req_priority)3277 u8 kbase_csf_priority_check(struct kbase_device *kbdev, u8 req_priority)
3278 {
3279 	struct priority_control_manager_device *pcm_device = kbdev->pcm_dev;
3280 	u8 out_priority = req_priority;
3281 
3282 	if (pcm_device) {
3283 		req_priority = kbase_csf_priority_queue_group_priority_to_relative(req_priority);
3284 		out_priority = pcm_device->ops.pcm_scheduler_priority_check(pcm_device, current, req_priority);
3285 		out_priority = kbase_csf_priority_relative_to_queue_group_priority(out_priority);
3286 	}
3287 
3288 	return out_priority;
3289 }
3290