xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_tiler_heap.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <tl/mali_kbase_tracepoints.h>
23 
24 #include "mali_kbase_csf_tiler_heap.h"
25 #include "mali_kbase_csf_tiler_heap_def.h"
26 #include "mali_kbase_csf_heap_context_alloc.h"
27 
28 /* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
29 #define HEAP_SHRINK_STOP_LIMIT (1)
30 
31 /**
32  * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
33  *
34  * @cdsbp_0:       Descriptor_type and buffer_type
35  * @size:          The size of the current heap chunk
36  * @pointer:       Pointer to the current heap chunk
37  * @low_pointer:   Pointer to low end of current heap chunk
38  * @high_pointer:  Pointer to high end of current heap chunk
39  */
40 struct kbase_csf_gpu_buffer_heap {
41 	u32 cdsbp_0;
42 	u32 size;
43 	u64 pointer;
44 	u64 low_pointer;
45 	u64 high_pointer;
46 } __packed;
47 
48 /**
49  * encode_chunk_ptr - Encode the address and size of a chunk as an integer.
50  *
51  * @chunk_size: Size of a tiler heap chunk, in bytes.
52  * @chunk_addr: GPU virtual address of the same tiler heap chunk.
53  *
54  * The size and address of the next chunk in a list are packed into a single
55  * 64-bit value for storage in a chunk's header. This function returns that
56  * value.
57  *
58  * Return: Next chunk pointer suitable for writing into a chunk header.
59  */
encode_chunk_ptr(u32 const chunk_size,u64 const chunk_addr)60 static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr)
61 {
62 	u64 encoded_size, encoded_addr;
63 
64 	WARN_ON(chunk_size & ~CHUNK_SIZE_MASK);
65 	WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK);
66 
67 	encoded_size =
68 		(u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) <<
69 		CHUNK_HDR_NEXT_SIZE_POS;
70 
71 	encoded_addr =
72 		(chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) <<
73 		CHUNK_HDR_NEXT_ADDR_POS;
74 
75 	return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) |
76 		(encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK);
77 }
78 
79 /**
80  * get_last_chunk - Get the last chunk of a tiler heap
81  *
82  * @heap:  Pointer to the tiler heap.
83  *
84  * Return: The address of the most recently-linked chunk, or NULL if none.
85  */
get_last_chunk(struct kbase_csf_tiler_heap * const heap)86 static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
87 	struct kbase_csf_tiler_heap *const heap)
88 {
89 	if (list_empty(&heap->chunks_list))
90 		return NULL;
91 
92 	return list_last_entry(&heap->chunks_list,
93 		struct kbase_csf_tiler_heap_chunk, link);
94 }
95 
96 /**
97  * remove_external_chunk_mappings - Remove external mappings from a chunk that
98  *                                  is being transitioned to the tiler heap
99  *                                  memory system.
100  *
101  * @kctx:  kbase context the chunk belongs to.
102  * @chunk: The chunk whose external mappings are going to be removed.
103  *
104  * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
105  * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
106  * parts of kbase outside of tiler heap management should not take references on its physical
107  * pages, and should not modify them.
108  */
remove_external_chunk_mappings(struct kbase_context * const kctx,struct kbase_csf_tiler_heap_chunk * chunk)109 static void remove_external_chunk_mappings(struct kbase_context *const kctx,
110 					   struct kbase_csf_tiler_heap_chunk *chunk)
111 {
112 	lockdep_assert_held(&kctx->reg_lock);
113 
114 	if (chunk->region->cpu_alloc != NULL) {
115 		kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
116 					     chunk->region->cpu_alloc->nents);
117 	}
118 #if !defined(CONFIG_MALI_VECTOR_DUMP)
119 	chunk->region->flags |= KBASE_REG_DONT_NEED;
120 #endif
121 
122 	dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
123 }
124 
125 /**
126  * link_chunk - Link a chunk into a tiler heap
127  *
128  * @heap:  Pointer to the tiler heap.
129  * @chunk: Pointer to the heap chunk to be linked.
130  *
131  * Unless the @chunk is the first in the kernel's list of chunks belonging to
132  * a given tiler heap, this function stores the size and address of the @chunk
133  * in the header of the preceding chunk. This requires the GPU memory region
134  * containing the header to be mapped temporarily, which can fail.
135  *
136  * Return: 0 if successful or a negative error code on failure.
137  */
link_chunk(struct kbase_csf_tiler_heap * const heap,struct kbase_csf_tiler_heap_chunk * const chunk)138 static int link_chunk(struct kbase_csf_tiler_heap *const heap,
139 	struct kbase_csf_tiler_heap_chunk *const chunk)
140 {
141 	struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap);
142 
143 	if (prev) {
144 		struct kbase_context *const kctx = heap->kctx;
145 		u64 *prev_hdr = prev->map.addr;
146 
147 		WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
148 		     "Cannot support CPU cached chunks without sync operations");
149 
150 		*prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
151 
152 		dev_dbg(kctx->kbdev->dev,
153 			"Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
154 			prev->gpu_va, chunk->gpu_va);
155 	}
156 
157 	return 0;
158 }
159 
160 /**
161  * init_chunk - Initialize and link a tiler heap chunk
162  *
163  * @heap:  Pointer to the tiler heap.
164  * @chunk: Pointer to the heap chunk to be initialized and linked.
165  * @link_with_prev: Flag to indicate if the new chunk needs to be linked with
166  *                  the previously allocated chunk.
167  *
168  * Zero-initialize a new chunk's header (including its pointer to the next
169  * chunk, which doesn't exist yet) and then update the previous chunk's
170  * header to link the new chunk into the chunk list.
171  *
172  * Return: 0 if successful or a negative error code on failure.
173  */
init_chunk(struct kbase_csf_tiler_heap * const heap,struct kbase_csf_tiler_heap_chunk * const chunk,bool link_with_prev)174 static int init_chunk(struct kbase_csf_tiler_heap *const heap,
175 	struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
176 {
177 	int err = 0;
178 	u64 *chunk_hdr;
179 	struct kbase_context *const kctx = heap->kctx;
180 
181 	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
182 
183 	if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
184 		dev_err(kctx->kbdev->dev,
185 			"Tiler heap chunk address is unusable\n");
186 		return -EINVAL;
187 	}
188 
189 	WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
190 	     "Cannot support CPU cached chunks without sync operations");
191 	chunk_hdr = chunk->map.addr;
192 	if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
193 		 "Tiler chunk kernel mapping was not large enough for zero-init")) {
194 		return -EINVAL;
195 	}
196 
197 	memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
198 	INIT_LIST_HEAD(&chunk->link);
199 
200 	if (link_with_prev)
201 		err = link_chunk(heap, chunk);
202 
203 	if (unlikely(err)) {
204 		dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
205 		return -EINVAL;
206 	}
207 
208 	list_add_tail(&chunk->link, &heap->chunks_list);
209 	heap->chunk_count++;
210 
211 	return err;
212 }
213 
214 /**
215  * remove_unlinked_chunk - Remove a chunk that is not currently linked into a
216  *                         heap.
217  *
218  * @kctx:  Kbase context that was used to allocate the memory.
219  * @chunk: Chunk that has been allocated, but not linked into a heap.
220  */
remove_unlinked_chunk(struct kbase_context * kctx,struct kbase_csf_tiler_heap_chunk * chunk)221 static void remove_unlinked_chunk(struct kbase_context *kctx,
222 				  struct kbase_csf_tiler_heap_chunk *chunk)
223 {
224 	if (WARN_ON(!list_empty(&chunk->link)))
225 		return;
226 
227 	kbase_gpu_vm_lock(kctx);
228 	kbase_vunmap(kctx, &chunk->map);
229 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
230 	 * regions), and so we must clear that flag too before freeing.
231 	 * For "no user free count", we check that the count is 1 as it is a shrinkable region;
232 	 * no other code part within kbase can take a reference to it.
233 	 */
234 	WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
235 	kbase_va_region_no_user_free_dec(chunk->region);
236 #if !defined(CONFIG_MALI_VECTOR_DUMP)
237 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
238 #endif
239 	kbase_mem_free_region(kctx, chunk->region);
240 	kbase_gpu_vm_unlock(kctx);
241 
242 	kfree(chunk);
243 }
244 
245 /**
246  * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
247  *                   region for the chunk, and provide a kernel mapping.
248  * @kctx:       kbase context with which the chunk will be linked
249  * @chunk_size: the size of the chunk from the corresponding heap
250  *
251  * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
252  * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
253  * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
254  * lock).
255  *
256  * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
257  * ensure that it is protected from being mapped by other parts of kbase.
258  *
259  * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
260  * shrinker path, as it may be otherwise shrunk at any time.
261  *
262  * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
263  *         on failure
264  */
alloc_new_chunk(struct kbase_context * kctx,u64 chunk_size)265 static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx,
266 							  u64 chunk_size)
267 {
268 	u64 nr_pages = PFN_UP(chunk_size);
269 	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
270 		    BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
271 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
272 	/* The chunk kernel mapping needs to be large enough to:
273 	 * - initially zero the CHUNK_HDR_SIZE area
274 	 * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
275 	 */
276 	const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
277 
278 	/* Calls to this function are inherently synchronous, with respect to
279 	 * MMU operations.
280 	 */
281 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
282 	flags |= kbase_mem_group_id_set(kctx->jit_group_id);
283 
284 	chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
285 	if (unlikely(!chunk)) {
286 		dev_err(kctx->kbdev->dev,
287 			"No kernel memory for a new tiler heap chunk\n");
288 		return NULL;
289 	}
290 
291 	/* Allocate GPU memory for the new chunk. */
292 	chunk->region =
293 		kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
294 
295 	if (unlikely(!chunk->region)) {
296 		dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
297 		goto unroll_chunk;
298 	}
299 
300 	kbase_gpu_vm_lock(kctx);
301 
302 	/* Some checks done here as NO_USER_FREE still allows such things to be made
303 	 * whilst we had dropped the region lock
304 	 */
305 	if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
306 		dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
307 		goto unroll_region;
308 	}
309 
310 	/* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
311 	 * thread can have the "no user free" refcount increased between kbase_mem_alloc
312 	 * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
313 	 * remove_external_chunk_mappings (below).
314 	 *
315 	 * It should be fine and not a security risk if we let the region leak till
316 	 * region tracker termination in such a case.
317 	 */
318 	if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
319 		dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
320 		goto unroll_region;
321 	}
322 
323 	/* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
324 	 * being requested, it's useful to document in code what those restrictions are, and ensure
325 	 * they remain in place in future.
326 	 */
327 	if (WARN(!chunk->region->gpu_alloc,
328 		 "NO_USER_FREE chunks should not have had their alloc freed")) {
329 		goto unroll_region;
330 	}
331 
332 	if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
333 		 "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
334 		goto unroll_region;
335 	}
336 
337 	if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
338 		 "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
339 		goto unroll_region;
340 	}
341 
342 	if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
343 		 "NO_USER_FREE chunks should not have been made ephemeral")) {
344 		goto unroll_region;
345 	}
346 
347 	if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
348 		 "NO_USER_FREE chunks should not have been aliased")) {
349 		goto unroll_region;
350 	}
351 
352 	if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
353 				     (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map,
354 				     KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
355 		dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
356 		goto unroll_region;
357 	}
358 
359 	remove_external_chunk_mappings(kctx, chunk);
360 	kbase_gpu_vm_unlock(kctx);
361 
362 	/* If page migration is enabled, we don't want to migrate tiler heap pages.
363 	 * This does not change if the constituent pages are already marked as isolated.
364 	 */
365 	if (kbase_page_migration_enabled)
366 		kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
367 
368 	return chunk;
369 
370 unroll_region:
371 	/* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
372 	 * regions), and so we must clear that flag too before freeing.
373 	 */
374 	kbase_va_region_no_user_free_dec(chunk->region);
375 #if !defined(CONFIG_MALI_VECTOR_DUMP)
376 	chunk->region->flags &= ~KBASE_REG_DONT_NEED;
377 #endif
378 	kbase_mem_free_region(kctx, chunk->region);
379 	kbase_gpu_vm_unlock(kctx);
380 unroll_chunk:
381 	kfree(chunk);
382 	return NULL;
383 }
384 
385 /**
386  * create_chunk - Create a tiler heap chunk
387  *
388  * @heap: Pointer to the tiler heap for which to allocate memory.
389  *
390  * This function allocates a chunk of memory for a tiler heap, adds it to the
391  * the list of chunks associated with that heap both on the host side and in GPU
392  * memory.
393  *
394  * Return: 0 if successful or a negative error code on failure.
395  */
create_chunk(struct kbase_csf_tiler_heap * const heap)396 static int create_chunk(struct kbase_csf_tiler_heap *const heap)
397 {
398 	int err = 0;
399 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
400 
401 	chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
402 	if (unlikely(!chunk)) {
403 		err = -ENOMEM;
404 		goto allocation_failure;
405 	}
406 
407 	mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
408 	err = init_chunk(heap, chunk, true);
409 	mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
410 
411 	if (unlikely(err))
412 		goto initialization_failure;
413 
414 	dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
415 
416 	return 0;
417 initialization_failure:
418 	remove_unlinked_chunk(heap->kctx, chunk);
419 allocation_failure:
420 	return err;
421 }
422 
423 /**
424  * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
425  *
426  * @heap: Pointer to a tiler heap.
427  *
428  * This function empties the list of chunks associated with a tiler heap by freeing all chunks
429  * previously allocated by @create_chunk.
430  *
431  * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
432  * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
433  * kbase_context.region_lock.
434  *
435  * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
436  *          list of chunks used by the GPU, therefore it is only safe to use this function when
437  *          deleting a heap.
438  */
delete_all_chunks(struct kbase_csf_tiler_heap * heap)439 static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
440 {
441 	struct kbase_context *const kctx = heap->kctx;
442 	struct list_head *entry = NULL, *tmp = NULL;
443 
444 	WARN(!list_empty(&heap->link),
445 	     "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
446 
447 	list_for_each_safe(entry, tmp, &heap->chunks_list) {
448 		struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
449 			entry, struct kbase_csf_tiler_heap_chunk, link);
450 
451 		list_del_init(&chunk->link);
452 		heap->chunk_count--;
453 
454 		remove_unlinked_chunk(kctx, chunk);
455 	}
456 }
457 
458 /**
459  * create_initial_chunks - Create the initial list of chunks for a tiler heap
460  *
461  * @heap:    Pointer to the tiler heap for which to allocate memory.
462  * @nchunks: Number of chunks to create.
463  *
464  * This function allocates a given number of chunks for a tiler heap and
465  * adds them to the list of chunks associated with that heap.
466  *
467  * Return: 0 if successful or a negative error code on failure.
468  */
create_initial_chunks(struct kbase_csf_tiler_heap * const heap,u32 const nchunks)469 static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
470 	u32 const nchunks)
471 {
472 	int err = 0;
473 	u32 i;
474 
475 	for (i = 0; (i < nchunks) && likely(!err); i++)
476 		err = create_chunk(heap);
477 
478 	if (unlikely(err))
479 		delete_all_chunks(heap);
480 
481 	return err;
482 }
483 
484 /**
485  * delete_heap - Delete an unlinked tiler heap
486  *
487  * @heap: Pointer to a tiler heap to be deleted.
488  *
489  * This function frees any chunks allocated for a tiler heap previously
490  * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
491  * the firmware is also freed.
492  *
493  * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
494  * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
495  * kbase_context.region_lock.
496  */
delete_heap(struct kbase_csf_tiler_heap * heap)497 static void delete_heap(struct kbase_csf_tiler_heap *heap)
498 {
499 	struct kbase_context *const kctx = heap->kctx;
500 
501 	dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
502 
503 	WARN(!list_empty(&heap->link),
504 	     "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
505 
506 	/* Make sure that all of the VA regions corresponding to the chunks are
507 	 * freed at this time and that the work queue is not trying to access freed
508 	 * memory.
509 	 *
510 	 * Note: since the heap is unlinked, and that no references are made to chunks other
511 	 * than from their heap, there is no need to separately move the chunks out of the
512 	 * heap->chunks_list to delete them.
513 	 */
514 	delete_all_chunks(heap);
515 
516 	kbase_vunmap(kctx, &heap->gpu_va_map);
517 	/* We could optimize context destruction by not freeing leaked heap
518 	 * contexts but it doesn't seem worth the extra complexity. After this
519 	 * point, the suballocation is returned to the heap context allocator and
520 	 * may be overwritten with new data, meaning heap->gpu_va should not
521 	 * be used past this point.
522 	 */
523 	kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
524 		heap->gpu_va);
525 
526 	WARN_ON(heap->chunk_count);
527 	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
528 		heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
529 		heap->target_in_flight, 0);
530 
531 	if (heap->buf_desc_reg) {
532 		kbase_vunmap(kctx, &heap->buf_desc_map);
533 		kbase_gpu_vm_lock(kctx);
534 		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
535 		kbase_gpu_vm_unlock(kctx);
536 	}
537 
538 	kfree(heap);
539 }
540 
541 /**
542  * find_tiler_heap - Find a tiler heap from the address of its heap context
543  *
544  * @kctx:        Pointer to the kbase context to search for a tiler heap.
545  * @heap_gpu_va: GPU virtual address of a heap context structure.
546  *
547  * Each tiler heap managed by the kernel has an associated heap context
548  * structure used by the firmware. This function finds a tiler heap object from
549  * the GPU virtual address of its associated heap context. The heap context
550  * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
551  * same @kctx.
552  *
553  * Return: pointer to the tiler heap object, or NULL if not found.
554  */
find_tiler_heap(struct kbase_context * const kctx,u64 const heap_gpu_va)555 static struct kbase_csf_tiler_heap *find_tiler_heap(
556 	struct kbase_context *const kctx, u64 const heap_gpu_va)
557 {
558 	struct kbase_csf_tiler_heap *heap = NULL;
559 
560 	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
561 
562 	list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
563 		if (heap_gpu_va == heap->gpu_va)
564 			return heap;
565 	}
566 
567 	dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n",
568 		heap_gpu_va);
569 
570 	return NULL;
571 }
572 
find_chunk(struct kbase_csf_tiler_heap * heap,u64 const chunk_gpu_va)573 static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap,
574 						     u64 const chunk_gpu_va)
575 {
576 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
577 
578 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
579 
580 	list_for_each_entry(chunk, &heap->chunks_list, link) {
581 		if (chunk->gpu_va == chunk_gpu_va)
582 			return chunk;
583 	}
584 
585 	dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
586 
587 	return NULL;
588 }
589 
kbase_csf_tiler_heap_context_init(struct kbase_context * const kctx)590 int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx)
591 {
592 	int err = kbase_csf_heap_context_allocator_init(
593 		&kctx->csf.tiler_heaps.ctx_alloc, kctx);
594 
595 	if (unlikely(err))
596 		return err;
597 
598 	INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list);
599 	mutex_init(&kctx->csf.tiler_heaps.lock);
600 
601 	dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n");
602 
603 	return 0;
604 }
605 
kbase_csf_tiler_heap_context_term(struct kbase_context * const kctx)606 void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
607 {
608 	LIST_HEAD(local_heaps_list);
609 	struct list_head *entry = NULL, *tmp = NULL;
610 
611 	dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
612 
613 	mutex_lock(&kctx->csf.tiler_heaps.lock);
614 	list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
615 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
616 
617 	list_for_each_safe(entry, tmp, &local_heaps_list) {
618 		struct kbase_csf_tiler_heap *heap = list_entry(
619 			entry, struct kbase_csf_tiler_heap, link);
620 
621 		list_del_init(&heap->link);
622 		delete_heap(heap);
623 	}
624 
625 	mutex_destroy(&kctx->csf.tiler_heaps.lock);
626 
627 	kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
628 }
629 
630 /**
631  * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
632  *                                               the tiler heap buffer descriptor
633  *                                               is suitable for the purpose.
634  * @kctx: kbase context of the tiler heap
635  * @reg:  VA region being checked for suitability
636  *
637  * The tiler heap buffer descriptor memory does not admit page faults according
638  * to its design, so it must have the entirety of the backing upon allocation,
639  * and it has to remain alive as long as the tiler heap is alive, meaning it
640  * cannot be allocated from JIT/Ephemeral, or user freeable memory.
641  *
642  * Return: true on suitability, false otherwise.
643  */
kbasep_is_buffer_descriptor_region_suitable(struct kbase_context * const kctx,struct kbase_va_region * const reg)644 static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
645 							struct kbase_va_region *const reg)
646 {
647 	if (kbase_is_region_invalid_or_free(reg)) {
648 		dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
649 		return false;
650 	}
651 
652 	if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
653 	    (reg->flags & KBASE_REG_PF_GROW)) {
654 		dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
655 		return false;
656 	}
657 
658 	if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
659 		dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
660 		return false;
661 	}
662 
663 	if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) ||
664 	    (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
665 		dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
666 		return false;
667 	}
668 
669 	return true;
670 }
671 
672 #define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
673 
kbase_csf_tiler_heap_init(struct kbase_context * const kctx,u32 const chunk_size,u32 const initial_chunks,u32 const max_chunks,u16 const target_in_flight,u64 const buf_desc_va,u64 * const heap_gpu_va,u64 * const first_chunk_va)674 int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
675 			      u32 const initial_chunks, u32 const max_chunks,
676 			      u16 const target_in_flight, u64 const buf_desc_va,
677 			      u64 *const heap_gpu_va, u64 *const first_chunk_va)
678 {
679 	int err = 0;
680 	struct kbase_csf_tiler_heap *heap = NULL;
681 	struct kbase_csf_heap_context_allocator *const ctx_alloc =
682 		&kctx->csf.tiler_heaps.ctx_alloc;
683 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
684 	struct kbase_va_region *gpu_va_reg = NULL;
685 	void *vmap_ptr = NULL;
686 
687 	dev_dbg(kctx->kbdev->dev,
688 		"Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
689 		initial_chunks, max_chunks, chunk_size, buf_desc_va);
690 
691 	if (!kbase_mem_allow_alloc(kctx))
692 		return -EINVAL;
693 
694 	if (chunk_size == 0)
695 		return -EINVAL;
696 
697 	if (chunk_size & ~CHUNK_SIZE_MASK)
698 		return -EINVAL;
699 
700 	if (initial_chunks == 0)
701 		return -EINVAL;
702 
703 	if (initial_chunks > max_chunks)
704 		return -EINVAL;
705 
706 	if (target_in_flight == 0)
707 		return -EINVAL;
708 
709 	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
710 	if (unlikely(!heap)) {
711 		dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
712 		return -ENOMEM;
713 	}
714 
715 	heap->kctx = kctx;
716 	heap->chunk_size = chunk_size;
717 	heap->max_chunks = max_chunks;
718 	heap->target_in_flight = target_in_flight;
719 	heap->buf_desc_checked = false;
720 	INIT_LIST_HEAD(&heap->chunks_list);
721 	INIT_LIST_HEAD(&heap->link);
722 
723 	/* Check on the buffer descriptor virtual Address */
724 	if (buf_desc_va) {
725 		struct kbase_va_region *buf_desc_reg;
726 
727 		kbase_gpu_vm_lock(kctx);
728 		buf_desc_reg =
729 			kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
730 
731 		if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
732 			kbase_gpu_vm_unlock(kctx);
733 			dev_err(kctx->kbdev->dev,
734 				"Could not find a suitable VA region for the tiler heap buf desc!\n");
735 			err = -EINVAL;
736 			goto buf_desc_not_suitable;
737 		}
738 
739 		/* If we don't prevent userspace from unmapping this, we may run into
740 		 * use-after-free, as we don't check for the existence of the region throughout.
741 		 */
742 
743 		heap->buf_desc_va = buf_desc_va;
744 		heap->buf_desc_reg = buf_desc_reg;
745 		kbase_va_region_no_user_free_inc(buf_desc_reg);
746 
747 		vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
748 					  KBASE_REG_CPU_RD, &heap->buf_desc_map,
749 					  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
750 
751 		if (kbase_page_migration_enabled)
752 			kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
753 
754 		kbase_gpu_vm_unlock(kctx);
755 
756 		if (unlikely(!vmap_ptr)) {
757 			dev_err(kctx->kbdev->dev,
758 				"Could not vmap buffer descriptor into kernel memory (err %d)\n",
759 				err);
760 			err = -ENOMEM;
761 			goto buf_desc_vmap_failed;
762 		}
763 	}
764 
765 	heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
766 	if (unlikely(!heap->gpu_va)) {
767 		dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
768 		err = -ENOMEM;
769 		goto heap_context_alloc_failed;
770 	}
771 
772 	gpu_va_reg = ctx_alloc->region;
773 
774 	kbase_gpu_vm_lock(kctx);
775 	/* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
776 	 * on kctx termination (after all syscalls on kctx have finished), and so it is safe to
777 	 * assume that gpu_va_reg is still present.
778 	 */
779 	vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
780 				  (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map,
781 				  KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
782 	kbase_gpu_vm_unlock(kctx);
783 	if (unlikely(!vmap_ptr)) {
784 		dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
785 		err = -ENOMEM;
786 		goto heap_context_vmap_failed;
787 	}
788 
789 	err = create_initial_chunks(heap, initial_chunks);
790 	if (unlikely(err)) {
791 		dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
792 		goto create_chunks_failed;
793 	}
794 	chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
795 
796 	*heap_gpu_va = heap->gpu_va;
797 	*first_chunk_va = chunk->gpu_va;
798 
799 	mutex_lock(&kctx->csf.tiler_heaps.lock);
800 	kctx->csf.tiler_heaps.nr_of_heaps++;
801 	heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
802 	list_add(&heap->link, &kctx->csf.tiler_heaps.list);
803 
804 	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
805 					    PFN_UP(heap->chunk_size * heap->max_chunks),
806 					    PFN_UP(heap->chunk_size * heap->chunk_count),
807 					    heap->max_chunks, heap->chunk_size, heap->chunk_count,
808 					    heap->target_in_flight, 0);
809 
810 #if defined(CONFIG_MALI_VECTOR_DUMP)
811 	list_for_each_entry(chunk, &heap->chunks_list, link) {
812 		KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
813 							 chunk->gpu_va);
814 	}
815 #endif
816 	kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
817 	kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
818 	if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
819 		kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
820 
821 	dev_dbg(kctx->kbdev->dev,
822 		"Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
823 		buf_desc_va, kctx->tgid, kctx->id);
824 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
825 
826 	return 0;
827 
828 create_chunks_failed:
829 	kbase_vunmap(kctx, &heap->gpu_va_map);
830 heap_context_vmap_failed:
831 	kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
832 heap_context_alloc_failed:
833 	if (heap->buf_desc_reg)
834 		kbase_vunmap(kctx, &heap->buf_desc_map);
835 buf_desc_vmap_failed:
836 	if (heap->buf_desc_reg) {
837 		kbase_gpu_vm_lock(kctx);
838 		kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
839 		kbase_gpu_vm_unlock(kctx);
840 	}
841 buf_desc_not_suitable:
842 	kfree(heap);
843 	return err;
844 }
845 
kbase_csf_tiler_heap_term(struct kbase_context * const kctx,u64 const heap_gpu_va)846 int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
847 	u64 const heap_gpu_va)
848 {
849 	int err = 0;
850 	struct kbase_csf_tiler_heap *heap = NULL;
851 	u32 chunk_count = 0;
852 	u64 heap_size = 0;
853 
854 	mutex_lock(&kctx->csf.tiler_heaps.lock);
855 	heap = find_tiler_heap(kctx, heap_gpu_va);
856 	if (likely(heap)) {
857 		chunk_count = heap->chunk_count;
858 		heap_size = heap->chunk_size * chunk_count;
859 
860 		list_del_init(&heap->link);
861 	} else {
862 		err = -EINVAL;
863 	}
864 
865 	/* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
866 	 * at all times
867 	 */
868 	if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
869 		kctx->running_total_tiler_heap_memory -= heap_size;
870 	else
871 		dev_warn(kctx->kbdev->dev,
872 			 "Running total tiler heap memory lower than expected!");
873 	if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
874 		kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
875 	else
876 		dev_warn(kctx->kbdev->dev,
877 			 "Running total tiler chunk count lower than expected!");
878 	if (!err)
879 		dev_dbg(kctx->kbdev->dev,
880 			"Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
881 			heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
882 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
883 
884 	/* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
885 	 * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
886 	 */
887 	if (likely(heap))
888 		delete_heap(heap);
889 
890 	return err;
891 }
892 
893 /**
894  * validate_allocation_request - Check whether the chunk allocation request
895  *                               received on tiler OOM should be handled at
896  *                               current time.
897  *
898  * @heap:               The tiler heap the OOM is associated with
899  * @nr_in_flight:       Number of fragment jobs in flight
900  * @pending_frag_count: Number of pending fragment jobs
901  *
902  * Context: must hold the tiler heap lock to guarantee its lifetime
903  *
904  * Return:
905  * * 0       - allowed to allocate an additional chunk
906  * * -EINVAL - invalid
907  * * -EBUSY  - there are fragment jobs still in flight, which may free chunks
908  *             after completing
909  * * -ENOMEM - the targeted number of in-flight chunks has been reached and
910  *             no new ones will be allocated
911  */
validate_allocation_request(struct kbase_csf_tiler_heap * heap,u32 nr_in_flight,u32 pending_frag_count)912 static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
913 				       u32 pending_frag_count)
914 {
915 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
916 
917 	if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight))
918 		return -EINVAL;
919 
920 	if (nr_in_flight <= heap->target_in_flight) {
921 		if (heap->chunk_count < heap->max_chunks) {
922 			/* Not exceeded the target number of render passes yet so be
923 			 * generous with memory.
924 			 */
925 			return 0;
926 		} else if (pending_frag_count > 0) {
927 			return -EBUSY;
928 		} else {
929 			return -ENOMEM;
930 		}
931 	} else {
932 		/* Reached target number of render passes in flight.
933 		 * Wait for some of them to finish
934 		 */
935 		return -EBUSY;
936 	}
937 	return -ENOMEM;
938 }
939 
kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context * kctx,u64 gpu_heap_va,u32 nr_in_flight,u32 pending_frag_count,u64 * new_chunk_ptr)940 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
941 	u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
942 {
943 	struct kbase_csf_tiler_heap *heap;
944 	struct kbase_csf_tiler_heap_chunk *chunk;
945 	int err = -EINVAL;
946 	u64 chunk_size = 0;
947 	u64 heap_id = 0;
948 
949 	/* To avoid potential locking issues during allocation, this is handled
950 	 * in three phases:
951 	 * 1. Take the lock, find the corresponding heap, and find its chunk size
952 	 * (this is always 2 MB, but may change down the line).
953 	 * 2. Allocate memory for the chunk and its region.
954 	 * 3. If the heap still exists, link it to the end of the list. If it
955 	 * doesn't, roll back the allocation.
956 	 */
957 
958 	mutex_lock(&kctx->csf.tiler_heaps.lock);
959 	heap = find_tiler_heap(kctx, gpu_heap_va);
960 	if (likely(heap)) {
961 		chunk_size = heap->chunk_size;
962 		heap_id = heap->heap_id;
963 	} else {
964 		dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
965 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
966 		goto prelink_failure;
967 	}
968 
969 	err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
970 	if (unlikely(err)) {
971 		/* The allocation request can be legitimate, but be invoked on a heap
972 		 * that has already reached the maximum pre-configured capacity. This
973 		 * is useful debug information, but should not be treated as an error,
974 		 * since the request will be re-sent at a later point.
975 		 */
976 		dev_dbg(kctx->kbdev->dev,
977 			"Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
978 			gpu_heap_va, err);
979 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
980 		goto prelink_failure;
981 	}
982 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
983 	/* this heap must not be used whilst we have dropped the lock */
984 	heap = NULL;
985 
986 	chunk = alloc_new_chunk(kctx, chunk_size);
987 	if (unlikely(!chunk)) {
988 		dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
989 			chunk_size, kctx->tgid, kctx->id);
990 		goto prelink_failure;
991 	}
992 
993 	/* After this point, the heap that we were targeting could already have had the needed
994 	 * chunks allocated, if we were handling multiple OoM events on multiple threads, so
995 	 * we need to revalidate the need for the allocation.
996 	 */
997 	mutex_lock(&kctx->csf.tiler_heaps.lock);
998 	heap = find_tiler_heap(kctx, gpu_heap_va);
999 
1000 	if (unlikely(!heap)) {
1001 		dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
1002 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
1003 		goto unroll_chunk;
1004 	}
1005 
1006 	if (heap_id != heap->heap_id) {
1007 		dev_err(kctx->kbdev->dev,
1008 			"Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
1009 			gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
1010 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
1011 		goto unroll_chunk;
1012 	}
1013 
1014 	if (WARN_ON(chunk_size != heap->chunk_size)) {
1015 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
1016 		goto unroll_chunk;
1017 	}
1018 
1019 	err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
1020 	if (unlikely(err)) {
1021 		dev_warn(
1022 			kctx->kbdev->dev,
1023 			"Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
1024 			gpu_heap_va, err);
1025 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
1026 		goto unroll_chunk;
1027 	}
1028 
1029 	err = init_chunk(heap, chunk, false);
1030 
1031 	/* On error, the chunk would not be linked, so we can still treat it as an unlinked
1032 	 * chunk for error handling.
1033 	 */
1034 	if (unlikely(err)) {
1035 		dev_err(kctx->kbdev->dev,
1036 			"Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
1037 			chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
1038 		mutex_unlock(&kctx->csf.tiler_heaps.lock);
1039 		goto unroll_chunk;
1040 	}
1041 
1042 	*new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
1043 
1044 	/* update total and peak tiler heap memory record */
1045 	kctx->running_total_tiler_heap_nr_chunks++;
1046 	kctx->running_total_tiler_heap_memory += heap->chunk_size;
1047 
1048 	if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
1049 		kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
1050 
1051 	KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
1052 					    PFN_UP(heap->chunk_size * heap->max_chunks),
1053 					    PFN_UP(heap->chunk_size * heap->chunk_count),
1054 					    heap->max_chunks, heap->chunk_size, heap->chunk_count,
1055 					    heap->target_in_flight, nr_in_flight);
1056 
1057 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
1058 
1059 	return err;
1060 unroll_chunk:
1061 	remove_unlinked_chunk(kctx, chunk);
1062 prelink_failure:
1063 	return err;
1064 }
1065 
delete_chunk_physical_pages(struct kbase_csf_tiler_heap * heap,u64 chunk_gpu_va,u64 * hdr_val)1066 static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
1067 					u64 *hdr_val)
1068 {
1069 	int err;
1070 	u64 *chunk_hdr;
1071 	struct kbase_context *kctx = heap->kctx;
1072 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
1073 
1074 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1075 
1076 	chunk = find_chunk(heap, chunk_gpu_va);
1077 	if (unlikely(!chunk)) {
1078 		dev_warn(kctx->kbdev->dev,
1079 			 "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
1080 			 heap->gpu_va, chunk_gpu_va);
1081 		return false;
1082 	}
1083 
1084 	WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
1085 	     "Cannot support CPU cached chunks without sync operations");
1086 	chunk_hdr = chunk->map.addr;
1087 	*hdr_val = *chunk_hdr;
1088 
1089 	dev_dbg(kctx->kbdev->dev,
1090 		"Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
1091 		chunk_gpu_va, heap->gpu_va, *hdr_val);
1092 
1093 	err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
1094 	if (unlikely(err)) {
1095 		dev_warn(
1096 			kctx->kbdev->dev,
1097 			"Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
1098 			chunk_gpu_va, heap->gpu_va, err);
1099 
1100 		/* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
1101 		 * the heap's chunk list and try a different heap.
1102 		 */
1103 
1104 		return false;
1105 	}
1106 	/* Destroy the mapping before the physical pages which are mapped are destroyed. */
1107 	kbase_vunmap(kctx, &chunk->map);
1108 
1109 	err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
1110 					  chunk->region->gpu_alloc->nents);
1111 	if (unlikely(err)) {
1112 		dev_warn(
1113 			kctx->kbdev->dev,
1114 			"Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
1115 			chunk_gpu_va, heap->gpu_va, err);
1116 
1117 		/* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
1118 		 * anyway, so continue instead of returning early.
1119 		 *
1120 		 * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
1121 		 * its mapping removed, as that could lead to problems. It's safest to instead
1122 		 * continue with deferred destruction of the chunk.
1123 		 */
1124 	}
1125 
1126 	dev_dbg(kctx->kbdev->dev,
1127 		"Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
1128 		chunk_gpu_va, heap->gpu_va, *hdr_val);
1129 
1130 	mutex_lock(&heap->kctx->jit_evict_lock);
1131 	list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
1132 	mutex_unlock(&heap->kctx->jit_evict_lock);
1133 
1134 	list_del(&chunk->link);
1135 	heap->chunk_count--;
1136 	kfree(chunk);
1137 
1138 	return true;
1139 }
1140 
sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap * heap,struct kbase_csf_gpu_buffer_heap * desc)1141 static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
1142 					 struct kbase_csf_gpu_buffer_heap *desc)
1143 {
1144 	u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
1145 
1146 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1147 
1148 	if (first_hoarded_chunk_gpu_va) {
1149 		struct kbase_csf_tiler_heap_chunk *chunk =
1150 			find_chunk(heap, first_hoarded_chunk_gpu_va);
1151 
1152 		if (likely(chunk)) {
1153 			dev_dbg(heap->kctx->kbdev->dev,
1154 				"Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
1155 				heap->buf_desc_va);
1156 
1157 			heap->buf_desc_checked = true;
1158 			return;
1159 		}
1160 	}
1161 	/* If there is no match, defer the check to next time */
1162 	dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
1163 		heap->buf_desc_va);
1164 }
1165 
can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap * heap,u64 * chunk_gpu_va_ptr)1166 static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr)
1167 {
1168 	struct kbase_context *kctx = heap->kctx;
1169 
1170 	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1171 
1172 	/* Initialize the descriptor pointer value to 0 */
1173 	*chunk_gpu_va_ptr = 0;
1174 
1175 	/* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
1176 	if (heap->buf_desc_reg && !heap->buf_desc_checked) {
1177 		struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
1178 
1179 		/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
1180 		if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
1181 			kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
1182 
1183 		sanity_check_gpu_buffer_heap(heap, desc);
1184 		if (heap->buf_desc_checked)
1185 			*chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
1186 	}
1187 
1188 	return heap->buf_desc_checked;
1189 }
1190 
delete_hoarded_chunks(struct kbase_csf_tiler_heap * heap)1191 static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
1192 {
1193 	u32 freed = 0;
1194 	u64 chunk_gpu_va = 0;
1195 	struct kbase_context *kctx = heap->kctx;
1196 	struct kbase_csf_tiler_heap_chunk *chunk = NULL;
1197 
1198 	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1199 
1200 	if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
1201 		u64 chunk_hdr_val;
1202 		u64 *hw_hdr;
1203 
1204 		if (!chunk_gpu_va) {
1205 			struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
1206 
1207 			/* BufferDescriptor is supplied by userspace, so could be CPU-cached */
1208 			if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
1209 				kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
1210 						       KBASE_SYNC_TO_CPU);
1211 			chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
1212 
1213 			if (!chunk_gpu_va) {
1214 				dev_dbg(kctx->kbdev->dev,
1215 					"Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
1216 					heap->buf_desc_va);
1217 				goto out;
1218 			}
1219 		}
1220 
1221 		chunk = find_chunk(heap, chunk_gpu_va);
1222 		if (unlikely(!chunk))
1223 			goto out;
1224 
1225 		WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
1226 		     "Cannot support CPU cached chunks without sync operations");
1227 		hw_hdr = chunk->map.addr;
1228 
1229 		/* Move onto the next chunk relevant information */
1230 		chunk_hdr_val = *hw_hdr;
1231 		chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1232 
1233 		while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
1234 			bool success =
1235 				delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
1236 
1237 			if (!success)
1238 				break;
1239 
1240 			freed++;
1241 			/* On success, chunk_hdr_val is updated, extract the next chunk address */
1242 			chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1243 		}
1244 
1245 		/* Update the existing hardware chunk header, after reclaim deletion of chunks */
1246 		*hw_hdr = chunk_hdr_val;
1247 
1248 		dev_dbg(heap->kctx->kbdev->dev,
1249 			"HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
1250 			chunk_hdr_val);
1251 	} else {
1252 		dev_dbg(kctx->kbdev->dev,
1253 			"Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
1254 			heap->buf_desc_va);
1255 	}
1256 out:
1257 	return freed;
1258 }
1259 
delete_unused_chunk_pages(struct kbase_csf_tiler_heap * heap)1260 static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
1261 {
1262 	u32 freed_chunks = 0;
1263 	u64 freed_pages = 0;
1264 	u64 chunk_gpu_va;
1265 	u64 chunk_hdr_val;
1266 	struct kbase_context *kctx = heap->kctx;
1267 	u64 *ctx_ptr;
1268 
1269 	lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1270 
1271 	WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
1272 	     "Cannot support CPU cached heap context without sync operations");
1273 
1274 	ctx_ptr = heap->gpu_va_map.addr;
1275 
1276 	/* Extract the first chunk address from the context's free_list_head */
1277 	chunk_hdr_val = *ctx_ptr;
1278 	chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1279 
1280 	while (chunk_gpu_va) {
1281 		u64 hdr_val;
1282 		bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
1283 
1284 		if (!success)
1285 			break;
1286 
1287 		freed_chunks++;
1288 		chunk_hdr_val = hdr_val;
1289 		/* extract the next chunk address */
1290 		chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1291 	}
1292 
1293 	/* Update the post-scan deletion to context header */
1294 	*ctx_ptr = chunk_hdr_val;
1295 
1296 	/* Try to scan the HW hoarded list of unused chunks */
1297 	freed_chunks += delete_hoarded_chunks(heap);
1298 	freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
1299 	dev_dbg(heap->kctx->kbdev->dev,
1300 		"Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
1301 		freed_chunks, freed_pages, chunk_hdr_val);
1302 
1303 	/* Update context tiler heaps memory usage */
1304 	kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
1305 	kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
1306 	return freed_pages;
1307 }
1308 
kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context * kctx,u32 to_free)1309 u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
1310 {
1311 	u64 freed = 0;
1312 	struct kbase_csf_tiler_heap *heap;
1313 
1314 	mutex_lock(&kctx->csf.tiler_heaps.lock);
1315 
1316 	list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
1317 		freed += delete_unused_chunk_pages(heap);
1318 
1319 		/* If freed enough, then stop here */
1320 		if (freed >= to_free)
1321 			break;
1322 	}
1323 
1324 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
1325 	/* The scan is surely not more than 4-G pages, but for logic flow limit it */
1326 	if (WARN_ON(unlikely(freed > U32_MAX)))
1327 		return U32_MAX;
1328 	else
1329 		return (u32)freed;
1330 }
1331 
count_unused_heap_pages(struct kbase_csf_tiler_heap * heap)1332 static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
1333 {
1334 	u32 chunk_cnt = 0;
1335 	u64 page_cnt = 0;
1336 
1337 	lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1338 
1339 	/* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
1340 	 * in the chunk list walk. The downside is that the number is a less reliable guide for
1341 	 * later on scan (free) calls on this heap for what actually is freeable.
1342 	 */
1343 	if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
1344 		chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
1345 		page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
1346 	}
1347 
1348 	dev_dbg(heap->kctx->kbdev->dev,
1349 		"Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
1350 		page_cnt, heap->gpu_va);
1351 
1352 	return page_cnt;
1353 }
1354 
kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context * kctx)1355 u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
1356 {
1357 	u64 page_cnt = 0;
1358 	struct kbase_csf_tiler_heap *heap;
1359 
1360 	mutex_lock(&kctx->csf.tiler_heaps.lock);
1361 
1362 	list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
1363 		page_cnt += count_unused_heap_pages(heap);
1364 
1365 	mutex_unlock(&kctx->csf.tiler_heaps.lock);
1366 
1367 	/* The count is surely not more than 4-G pages, but for logic flow limit it */
1368 	if (WARN_ON(unlikely(page_cnt > U32_MAX)))
1369 		return U32_MAX;
1370 	else
1371 		return (u32)page_cnt;
1372 }
1373