1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 #include <tl/mali_kbase_tracepoints.h>
23
24 #include "mali_kbase_csf_tiler_heap.h"
25 #include "mali_kbase_csf_tiler_heap_def.h"
26 #include "mali_kbase_csf_heap_context_alloc.h"
27
28 /* Tiler heap shrink stop limit for maintaining a minimum number of chunks */
29 #define HEAP_SHRINK_STOP_LIMIT (1)
30
31 /**
32 * struct kbase_csf_gpu_buffer_heap - A gpu buffer object specific to tiler heap
33 *
34 * @cdsbp_0: Descriptor_type and buffer_type
35 * @size: The size of the current heap chunk
36 * @pointer: Pointer to the current heap chunk
37 * @low_pointer: Pointer to low end of current heap chunk
38 * @high_pointer: Pointer to high end of current heap chunk
39 */
40 struct kbase_csf_gpu_buffer_heap {
41 u32 cdsbp_0;
42 u32 size;
43 u64 pointer;
44 u64 low_pointer;
45 u64 high_pointer;
46 } __packed;
47
48 /**
49 * encode_chunk_ptr - Encode the address and size of a chunk as an integer.
50 *
51 * @chunk_size: Size of a tiler heap chunk, in bytes.
52 * @chunk_addr: GPU virtual address of the same tiler heap chunk.
53 *
54 * The size and address of the next chunk in a list are packed into a single
55 * 64-bit value for storage in a chunk's header. This function returns that
56 * value.
57 *
58 * Return: Next chunk pointer suitable for writing into a chunk header.
59 */
encode_chunk_ptr(u32 const chunk_size,u64 const chunk_addr)60 static u64 encode_chunk_ptr(u32 const chunk_size, u64 const chunk_addr)
61 {
62 u64 encoded_size, encoded_addr;
63
64 WARN_ON(chunk_size & ~CHUNK_SIZE_MASK);
65 WARN_ON(chunk_addr & ~CHUNK_ADDR_MASK);
66
67 encoded_size =
68 (u64)(chunk_size >> CHUNK_HDR_NEXT_SIZE_ENCODE_SHIFT) <<
69 CHUNK_HDR_NEXT_SIZE_POS;
70
71 encoded_addr =
72 (chunk_addr >> CHUNK_HDR_NEXT_ADDR_ENCODE_SHIFT) <<
73 CHUNK_HDR_NEXT_ADDR_POS;
74
75 return (encoded_size & CHUNK_HDR_NEXT_SIZE_MASK) |
76 (encoded_addr & CHUNK_HDR_NEXT_ADDR_MASK);
77 }
78
79 /**
80 * get_last_chunk - Get the last chunk of a tiler heap
81 *
82 * @heap: Pointer to the tiler heap.
83 *
84 * Return: The address of the most recently-linked chunk, or NULL if none.
85 */
get_last_chunk(struct kbase_csf_tiler_heap * const heap)86 static struct kbase_csf_tiler_heap_chunk *get_last_chunk(
87 struct kbase_csf_tiler_heap *const heap)
88 {
89 if (list_empty(&heap->chunks_list))
90 return NULL;
91
92 return list_last_entry(&heap->chunks_list,
93 struct kbase_csf_tiler_heap_chunk, link);
94 }
95
96 /**
97 * remove_external_chunk_mappings - Remove external mappings from a chunk that
98 * is being transitioned to the tiler heap
99 * memory system.
100 *
101 * @kctx: kbase context the chunk belongs to.
102 * @chunk: The chunk whose external mappings are going to be removed.
103 *
104 * This function marks the region as DONT NEED. Along with NO_USER_FREE, this indicates
105 * that the VA region is owned by the tiler heap and could potentially be shrunk at any time. Other
106 * parts of kbase outside of tiler heap management should not take references on its physical
107 * pages, and should not modify them.
108 */
remove_external_chunk_mappings(struct kbase_context * const kctx,struct kbase_csf_tiler_heap_chunk * chunk)109 static void remove_external_chunk_mappings(struct kbase_context *const kctx,
110 struct kbase_csf_tiler_heap_chunk *chunk)
111 {
112 lockdep_assert_held(&kctx->reg_lock);
113
114 if (chunk->region->cpu_alloc != NULL) {
115 kbase_mem_shrink_cpu_mapping(kctx, chunk->region, 0,
116 chunk->region->cpu_alloc->nents);
117 }
118 #if !defined(CONFIG_MALI_VECTOR_DUMP)
119 chunk->region->flags |= KBASE_REG_DONT_NEED;
120 #endif
121
122 dev_dbg(kctx->kbdev->dev, "Removed external mappings from chunk 0x%llX", chunk->gpu_va);
123 }
124
125 /**
126 * link_chunk - Link a chunk into a tiler heap
127 *
128 * @heap: Pointer to the tiler heap.
129 * @chunk: Pointer to the heap chunk to be linked.
130 *
131 * Unless the @chunk is the first in the kernel's list of chunks belonging to
132 * a given tiler heap, this function stores the size and address of the @chunk
133 * in the header of the preceding chunk. This requires the GPU memory region
134 * containing the header to be mapped temporarily, which can fail.
135 *
136 * Return: 0 if successful or a negative error code on failure.
137 */
link_chunk(struct kbase_csf_tiler_heap * const heap,struct kbase_csf_tiler_heap_chunk * const chunk)138 static int link_chunk(struct kbase_csf_tiler_heap *const heap,
139 struct kbase_csf_tiler_heap_chunk *const chunk)
140 {
141 struct kbase_csf_tiler_heap_chunk *const prev = get_last_chunk(heap);
142
143 if (prev) {
144 struct kbase_context *const kctx = heap->kctx;
145 u64 *prev_hdr = prev->map.addr;
146
147 WARN((prev->region->flags & KBASE_REG_CPU_CACHED),
148 "Cannot support CPU cached chunks without sync operations");
149
150 *prev_hdr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
151
152 dev_dbg(kctx->kbdev->dev,
153 "Linked tiler heap chunks, 0x%llX -> 0x%llX\n",
154 prev->gpu_va, chunk->gpu_va);
155 }
156
157 return 0;
158 }
159
160 /**
161 * init_chunk - Initialize and link a tiler heap chunk
162 *
163 * @heap: Pointer to the tiler heap.
164 * @chunk: Pointer to the heap chunk to be initialized and linked.
165 * @link_with_prev: Flag to indicate if the new chunk needs to be linked with
166 * the previously allocated chunk.
167 *
168 * Zero-initialize a new chunk's header (including its pointer to the next
169 * chunk, which doesn't exist yet) and then update the previous chunk's
170 * header to link the new chunk into the chunk list.
171 *
172 * Return: 0 if successful or a negative error code on failure.
173 */
init_chunk(struct kbase_csf_tiler_heap * const heap,struct kbase_csf_tiler_heap_chunk * const chunk,bool link_with_prev)174 static int init_chunk(struct kbase_csf_tiler_heap *const heap,
175 struct kbase_csf_tiler_heap_chunk *const chunk, bool link_with_prev)
176 {
177 int err = 0;
178 u64 *chunk_hdr;
179 struct kbase_context *const kctx = heap->kctx;
180
181 lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
182
183 if (unlikely(chunk->gpu_va & ~CHUNK_ADDR_MASK)) {
184 dev_err(kctx->kbdev->dev,
185 "Tiler heap chunk address is unusable\n");
186 return -EINVAL;
187 }
188
189 WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
190 "Cannot support CPU cached chunks without sync operations");
191 chunk_hdr = chunk->map.addr;
192 if (WARN(chunk->map.size < CHUNK_HDR_SIZE,
193 "Tiler chunk kernel mapping was not large enough for zero-init")) {
194 return -EINVAL;
195 }
196
197 memset(chunk_hdr, 0, CHUNK_HDR_SIZE);
198 INIT_LIST_HEAD(&chunk->link);
199
200 if (link_with_prev)
201 err = link_chunk(heap, chunk);
202
203 if (unlikely(err)) {
204 dev_err(kctx->kbdev->dev, "Failed to link a chunk to a tiler heap\n");
205 return -EINVAL;
206 }
207
208 list_add_tail(&chunk->link, &heap->chunks_list);
209 heap->chunk_count++;
210
211 return err;
212 }
213
214 /**
215 * remove_unlinked_chunk - Remove a chunk that is not currently linked into a
216 * heap.
217 *
218 * @kctx: Kbase context that was used to allocate the memory.
219 * @chunk: Chunk that has been allocated, but not linked into a heap.
220 */
remove_unlinked_chunk(struct kbase_context * kctx,struct kbase_csf_tiler_heap_chunk * chunk)221 static void remove_unlinked_chunk(struct kbase_context *kctx,
222 struct kbase_csf_tiler_heap_chunk *chunk)
223 {
224 if (WARN_ON(!list_empty(&chunk->link)))
225 return;
226
227 kbase_gpu_vm_lock(kctx);
228 kbase_vunmap(kctx, &chunk->map);
229 /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
230 * regions), and so we must clear that flag too before freeing.
231 * For "no user free count", we check that the count is 1 as it is a shrinkable region;
232 * no other code part within kbase can take a reference to it.
233 */
234 WARN_ON(atomic_read(&chunk->region->no_user_free_count) > 1);
235 kbase_va_region_no_user_free_dec(chunk->region);
236 #if !defined(CONFIG_MALI_VECTOR_DUMP)
237 chunk->region->flags &= ~KBASE_REG_DONT_NEED;
238 #endif
239 kbase_mem_free_region(kctx, chunk->region);
240 kbase_gpu_vm_unlock(kctx);
241
242 kfree(chunk);
243 }
244
245 /**
246 * alloc_new_chunk - Allocate new chunk metadata for the tiler heap, reserve a fully backed VA
247 * region for the chunk, and provide a kernel mapping.
248 * @kctx: kbase context with which the chunk will be linked
249 * @chunk_size: the size of the chunk from the corresponding heap
250 *
251 * Allocate the chunk tracking metadata and a corresponding fully backed VA region for the
252 * chunk. The kernel may need to invoke the reclaim path while trying to fulfill the allocation, so
253 * we cannot hold any lock that would be held in the shrinker paths (JIT evict lock or tiler heap
254 * lock).
255 *
256 * Since the chunk may have its physical backing removed, to prevent use-after-free scenarios we
257 * ensure that it is protected from being mapped by other parts of kbase.
258 *
259 * The chunk's GPU memory can be accessed via its 'map' member, but should only be done so by the
260 * shrinker path, as it may be otherwise shrunk at any time.
261 *
262 * Return: pointer to kbase_csf_tiler_heap_chunk on success or a NULL pointer
263 * on failure
264 */
alloc_new_chunk(struct kbase_context * kctx,u64 chunk_size)265 static struct kbase_csf_tiler_heap_chunk *alloc_new_chunk(struct kbase_context *kctx,
266 u64 chunk_size)
267 {
268 u64 nr_pages = PFN_UP(chunk_size);
269 u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
270 BASEP_MEM_NO_USER_FREE | BASE_MEM_COHERENT_LOCAL | BASE_MEM_PROT_CPU_RD;
271 struct kbase_csf_tiler_heap_chunk *chunk = NULL;
272 /* The chunk kernel mapping needs to be large enough to:
273 * - initially zero the CHUNK_HDR_SIZE area
274 * - on shrinking, access the NEXT_CHUNK_ADDR_SIZE area
275 */
276 const size_t chunk_kernel_map_size = max(CHUNK_HDR_SIZE, NEXT_CHUNK_ADDR_SIZE);
277
278 /* Calls to this function are inherently synchronous, with respect to
279 * MMU operations.
280 */
281 const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
282 flags |= kbase_mem_group_id_set(kctx->jit_group_id);
283
284 chunk = kzalloc(sizeof(*chunk), GFP_KERNEL);
285 if (unlikely(!chunk)) {
286 dev_err(kctx->kbdev->dev,
287 "No kernel memory for a new tiler heap chunk\n");
288 return NULL;
289 }
290
291 /* Allocate GPU memory for the new chunk. */
292 chunk->region =
293 kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags, &chunk->gpu_va, mmu_sync_info);
294
295 if (unlikely(!chunk->region)) {
296 dev_err(kctx->kbdev->dev, "Failed to allocate a tiler heap chunk!\n");
297 goto unroll_chunk;
298 }
299
300 kbase_gpu_vm_lock(kctx);
301
302 /* Some checks done here as NO_USER_FREE still allows such things to be made
303 * whilst we had dropped the region lock
304 */
305 if (unlikely(atomic_read(&chunk->region->gpu_alloc->kernel_mappings) > 0)) {
306 dev_err(kctx->kbdev->dev, "Chunk region has active kernel mappings!\n");
307 goto unroll_region;
308 }
309
310 /* There is a race condition with regard to KBASE_REG_DONT_NEED, where another
311 * thread can have the "no user free" refcount increased between kbase_mem_alloc
312 * and kbase_gpu_vm_lock (above) and before KBASE_REG_DONT_NEED is set by
313 * remove_external_chunk_mappings (below).
314 *
315 * It should be fine and not a security risk if we let the region leak till
316 * region tracker termination in such a case.
317 */
318 if (unlikely(atomic_read(&chunk->region->no_user_free_count) > 1)) {
319 dev_err(kctx->kbdev->dev, "Chunk region has no_user_free_count > 1!\n");
320 goto unroll_region;
321 }
322
323 /* Whilst we can be sure of a number of other restrictions due to BASEP_MEM_NO_USER_FREE
324 * being requested, it's useful to document in code what those restrictions are, and ensure
325 * they remain in place in future.
326 */
327 if (WARN(!chunk->region->gpu_alloc,
328 "NO_USER_FREE chunks should not have had their alloc freed")) {
329 goto unroll_region;
330 }
331
332 if (WARN(chunk->region->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE,
333 "NO_USER_FREE chunks should not have been freed and then reallocated as imported/non-native regions")) {
334 goto unroll_region;
335 }
336
337 if (WARN((chunk->region->flags & KBASE_REG_ACTIVE_JIT_ALLOC),
338 "NO_USER_FREE chunks should not have been freed and then reallocated as JIT regions")) {
339 goto unroll_region;
340 }
341
342 if (WARN((chunk->region->flags & KBASE_REG_DONT_NEED),
343 "NO_USER_FREE chunks should not have been made ephemeral")) {
344 goto unroll_region;
345 }
346
347 if (WARN(atomic_read(&chunk->region->cpu_alloc->gpu_mappings) > 1,
348 "NO_USER_FREE chunks should not have been aliased")) {
349 goto unroll_region;
350 }
351
352 if (unlikely(!kbase_vmap_reg(kctx, chunk->region, chunk->gpu_va, chunk_kernel_map_size,
353 (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &chunk->map,
354 KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING))) {
355 dev_err(kctx->kbdev->dev, "Failed to map chunk header for shrinking!\n");
356 goto unroll_region;
357 }
358
359 remove_external_chunk_mappings(kctx, chunk);
360 kbase_gpu_vm_unlock(kctx);
361
362 /* If page migration is enabled, we don't want to migrate tiler heap pages.
363 * This does not change if the constituent pages are already marked as isolated.
364 */
365 if (kbase_page_migration_enabled)
366 kbase_set_phy_alloc_page_status(chunk->region->gpu_alloc, NOT_MOVABLE);
367
368 return chunk;
369
370 unroll_region:
371 /* KBASE_REG_DONT_NEED regions will be confused with ephemeral regions (inc freed JIT
372 * regions), and so we must clear that flag too before freeing.
373 */
374 kbase_va_region_no_user_free_dec(chunk->region);
375 #if !defined(CONFIG_MALI_VECTOR_DUMP)
376 chunk->region->flags &= ~KBASE_REG_DONT_NEED;
377 #endif
378 kbase_mem_free_region(kctx, chunk->region);
379 kbase_gpu_vm_unlock(kctx);
380 unroll_chunk:
381 kfree(chunk);
382 return NULL;
383 }
384
385 /**
386 * create_chunk - Create a tiler heap chunk
387 *
388 * @heap: Pointer to the tiler heap for which to allocate memory.
389 *
390 * This function allocates a chunk of memory for a tiler heap, adds it to the
391 * the list of chunks associated with that heap both on the host side and in GPU
392 * memory.
393 *
394 * Return: 0 if successful or a negative error code on failure.
395 */
create_chunk(struct kbase_csf_tiler_heap * const heap)396 static int create_chunk(struct kbase_csf_tiler_heap *const heap)
397 {
398 int err = 0;
399 struct kbase_csf_tiler_heap_chunk *chunk = NULL;
400
401 chunk = alloc_new_chunk(heap->kctx, heap->chunk_size);
402 if (unlikely(!chunk)) {
403 err = -ENOMEM;
404 goto allocation_failure;
405 }
406
407 mutex_lock(&heap->kctx->csf.tiler_heaps.lock);
408 err = init_chunk(heap, chunk, true);
409 mutex_unlock(&heap->kctx->csf.tiler_heaps.lock);
410
411 if (unlikely(err))
412 goto initialization_failure;
413
414 dev_dbg(heap->kctx->kbdev->dev, "Created tiler heap chunk 0x%llX\n", chunk->gpu_va);
415
416 return 0;
417 initialization_failure:
418 remove_unlinked_chunk(heap->kctx, chunk);
419 allocation_failure:
420 return err;
421 }
422
423 /**
424 * delete_all_chunks - Delete all chunks belonging to an unlinked tiler heap
425 *
426 * @heap: Pointer to a tiler heap.
427 *
428 * This function empties the list of chunks associated with a tiler heap by freeing all chunks
429 * previously allocated by @create_chunk.
430 *
431 * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
432 * tiler_heaps lock cannot be held whilst deleting its chunks due to also needing the &struct
433 * kbase_context.region_lock.
434 *
435 * WARNING: Whilst the deleted chunks are unlinked from host memory, they are not unlinked from the
436 * list of chunks used by the GPU, therefore it is only safe to use this function when
437 * deleting a heap.
438 */
delete_all_chunks(struct kbase_csf_tiler_heap * heap)439 static void delete_all_chunks(struct kbase_csf_tiler_heap *heap)
440 {
441 struct kbase_context *const kctx = heap->kctx;
442 struct list_head *entry = NULL, *tmp = NULL;
443
444 WARN(!list_empty(&heap->link),
445 "Deleting a heap's chunks when that heap is still linked requires the tiler_heaps lock, which cannot be held by the caller");
446
447 list_for_each_safe(entry, tmp, &heap->chunks_list) {
448 struct kbase_csf_tiler_heap_chunk *chunk = list_entry(
449 entry, struct kbase_csf_tiler_heap_chunk, link);
450
451 list_del_init(&chunk->link);
452 heap->chunk_count--;
453
454 remove_unlinked_chunk(kctx, chunk);
455 }
456 }
457
458 /**
459 * create_initial_chunks - Create the initial list of chunks for a tiler heap
460 *
461 * @heap: Pointer to the tiler heap for which to allocate memory.
462 * @nchunks: Number of chunks to create.
463 *
464 * This function allocates a given number of chunks for a tiler heap and
465 * adds them to the list of chunks associated with that heap.
466 *
467 * Return: 0 if successful or a negative error code on failure.
468 */
create_initial_chunks(struct kbase_csf_tiler_heap * const heap,u32 const nchunks)469 static int create_initial_chunks(struct kbase_csf_tiler_heap *const heap,
470 u32 const nchunks)
471 {
472 int err = 0;
473 u32 i;
474
475 for (i = 0; (i < nchunks) && likely(!err); i++)
476 err = create_chunk(heap);
477
478 if (unlikely(err))
479 delete_all_chunks(heap);
480
481 return err;
482 }
483
484 /**
485 * delete_heap - Delete an unlinked tiler heap
486 *
487 * @heap: Pointer to a tiler heap to be deleted.
488 *
489 * This function frees any chunks allocated for a tiler heap previously
490 * initialized by @kbase_csf_tiler_heap_init. The heap context structure used by
491 * the firmware is also freed.
492 *
493 * The heap must not be reachable from a &struct kbase_context.csf.tiler_heaps.list, as the
494 * tiler_heaps lock cannot be held whilst deleting it due to also needing the &struct
495 * kbase_context.region_lock.
496 */
delete_heap(struct kbase_csf_tiler_heap * heap)497 static void delete_heap(struct kbase_csf_tiler_heap *heap)
498 {
499 struct kbase_context *const kctx = heap->kctx;
500
501 dev_dbg(kctx->kbdev->dev, "Deleting tiler heap 0x%llX\n", heap->gpu_va);
502
503 WARN(!list_empty(&heap->link),
504 "Deleting a heap that is still linked requires the tiler_heaps lock, which cannot be held by the caller");
505
506 /* Make sure that all of the VA regions corresponding to the chunks are
507 * freed at this time and that the work queue is not trying to access freed
508 * memory.
509 *
510 * Note: since the heap is unlinked, and that no references are made to chunks other
511 * than from their heap, there is no need to separately move the chunks out of the
512 * heap->chunks_list to delete them.
513 */
514 delete_all_chunks(heap);
515
516 kbase_vunmap(kctx, &heap->gpu_va_map);
517 /* We could optimize context destruction by not freeing leaked heap
518 * contexts but it doesn't seem worth the extra complexity. After this
519 * point, the suballocation is returned to the heap context allocator and
520 * may be overwritten with new data, meaning heap->gpu_va should not
521 * be used past this point.
522 */
523 kbase_csf_heap_context_allocator_free(&kctx->csf.tiler_heaps.ctx_alloc,
524 heap->gpu_va);
525
526 WARN_ON(heap->chunk_count);
527 KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id,
528 heap->heap_id, 0, 0, heap->max_chunks, heap->chunk_size, 0,
529 heap->target_in_flight, 0);
530
531 if (heap->buf_desc_reg) {
532 kbase_vunmap(kctx, &heap->buf_desc_map);
533 kbase_gpu_vm_lock(kctx);
534 kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
535 kbase_gpu_vm_unlock(kctx);
536 }
537
538 kfree(heap);
539 }
540
541 /**
542 * find_tiler_heap - Find a tiler heap from the address of its heap context
543 *
544 * @kctx: Pointer to the kbase context to search for a tiler heap.
545 * @heap_gpu_va: GPU virtual address of a heap context structure.
546 *
547 * Each tiler heap managed by the kernel has an associated heap context
548 * structure used by the firmware. This function finds a tiler heap object from
549 * the GPU virtual address of its associated heap context. The heap context
550 * should have been allocated by @kbase_csf_heap_context_allocator_alloc in the
551 * same @kctx.
552 *
553 * Return: pointer to the tiler heap object, or NULL if not found.
554 */
find_tiler_heap(struct kbase_context * const kctx,u64 const heap_gpu_va)555 static struct kbase_csf_tiler_heap *find_tiler_heap(
556 struct kbase_context *const kctx, u64 const heap_gpu_va)
557 {
558 struct kbase_csf_tiler_heap *heap = NULL;
559
560 lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
561
562 list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
563 if (heap_gpu_va == heap->gpu_va)
564 return heap;
565 }
566
567 dev_dbg(kctx->kbdev->dev, "Tiler heap 0x%llX was not found\n",
568 heap_gpu_va);
569
570 return NULL;
571 }
572
find_chunk(struct kbase_csf_tiler_heap * heap,u64 const chunk_gpu_va)573 static struct kbase_csf_tiler_heap_chunk *find_chunk(struct kbase_csf_tiler_heap *heap,
574 u64 const chunk_gpu_va)
575 {
576 struct kbase_csf_tiler_heap_chunk *chunk = NULL;
577
578 lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
579
580 list_for_each_entry(chunk, &heap->chunks_list, link) {
581 if (chunk->gpu_va == chunk_gpu_va)
582 return chunk;
583 }
584
585 dev_dbg(heap->kctx->kbdev->dev, "Tiler heap chunk 0x%llX was not found\n", chunk_gpu_va);
586
587 return NULL;
588 }
589
kbase_csf_tiler_heap_context_init(struct kbase_context * const kctx)590 int kbase_csf_tiler_heap_context_init(struct kbase_context *const kctx)
591 {
592 int err = kbase_csf_heap_context_allocator_init(
593 &kctx->csf.tiler_heaps.ctx_alloc, kctx);
594
595 if (unlikely(err))
596 return err;
597
598 INIT_LIST_HEAD(&kctx->csf.tiler_heaps.list);
599 mutex_init(&kctx->csf.tiler_heaps.lock);
600
601 dev_dbg(kctx->kbdev->dev, "Initialized a context for tiler heaps\n");
602
603 return 0;
604 }
605
kbase_csf_tiler_heap_context_term(struct kbase_context * const kctx)606 void kbase_csf_tiler_heap_context_term(struct kbase_context *const kctx)
607 {
608 LIST_HEAD(local_heaps_list);
609 struct list_head *entry = NULL, *tmp = NULL;
610
611 dev_dbg(kctx->kbdev->dev, "Terminating a context for tiler heaps\n");
612
613 mutex_lock(&kctx->csf.tiler_heaps.lock);
614 list_splice_init(&kctx->csf.tiler_heaps.list, &local_heaps_list);
615 mutex_unlock(&kctx->csf.tiler_heaps.lock);
616
617 list_for_each_safe(entry, tmp, &local_heaps_list) {
618 struct kbase_csf_tiler_heap *heap = list_entry(
619 entry, struct kbase_csf_tiler_heap, link);
620
621 list_del_init(&heap->link);
622 delete_heap(heap);
623 }
624
625 mutex_destroy(&kctx->csf.tiler_heaps.lock);
626
627 kbase_csf_heap_context_allocator_term(&kctx->csf.tiler_heaps.ctx_alloc);
628 }
629
630 /**
631 * kbasep_is_buffer_descriptor_region_suitable - Check if a VA region chosen to house
632 * the tiler heap buffer descriptor
633 * is suitable for the purpose.
634 * @kctx: kbase context of the tiler heap
635 * @reg: VA region being checked for suitability
636 *
637 * The tiler heap buffer descriptor memory does not admit page faults according
638 * to its design, so it must have the entirety of the backing upon allocation,
639 * and it has to remain alive as long as the tiler heap is alive, meaning it
640 * cannot be allocated from JIT/Ephemeral, or user freeable memory.
641 *
642 * Return: true on suitability, false otherwise.
643 */
kbasep_is_buffer_descriptor_region_suitable(struct kbase_context * const kctx,struct kbase_va_region * const reg)644 static bool kbasep_is_buffer_descriptor_region_suitable(struct kbase_context *const kctx,
645 struct kbase_va_region *const reg)
646 {
647 if (kbase_is_region_invalid_or_free(reg)) {
648 dev_err(kctx->kbdev->dev, "Region is either invalid or free!\n");
649 return false;
650 }
651
652 if (!(reg->flags & KBASE_REG_CPU_RD) || kbase_is_region_shrinkable(reg) ||
653 (reg->flags & KBASE_REG_PF_GROW)) {
654 dev_err(kctx->kbdev->dev, "Region has invalid flags: 0x%lX!\n", reg->flags);
655 return false;
656 }
657
658 if (reg->gpu_alloc->type != KBASE_MEM_TYPE_NATIVE) {
659 dev_err(kctx->kbdev->dev, "Region has invalid type!\n");
660 return false;
661 }
662
663 if ((reg->nr_pages != kbase_reg_current_backed_size(reg)) ||
664 (reg->nr_pages < PFN_UP(sizeof(struct kbase_csf_gpu_buffer_heap)))) {
665 dev_err(kctx->kbdev->dev, "Region has invalid backing!\n");
666 return false;
667 }
668
669 return true;
670 }
671
672 #define TILER_BUF_DESC_SIZE (sizeof(struct kbase_csf_gpu_buffer_heap))
673
kbase_csf_tiler_heap_init(struct kbase_context * const kctx,u32 const chunk_size,u32 const initial_chunks,u32 const max_chunks,u16 const target_in_flight,u64 const buf_desc_va,u64 * const heap_gpu_va,u64 * const first_chunk_va)674 int kbase_csf_tiler_heap_init(struct kbase_context *const kctx, u32 const chunk_size,
675 u32 const initial_chunks, u32 const max_chunks,
676 u16 const target_in_flight, u64 const buf_desc_va,
677 u64 *const heap_gpu_va, u64 *const first_chunk_va)
678 {
679 int err = 0;
680 struct kbase_csf_tiler_heap *heap = NULL;
681 struct kbase_csf_heap_context_allocator *const ctx_alloc =
682 &kctx->csf.tiler_heaps.ctx_alloc;
683 struct kbase_csf_tiler_heap_chunk *chunk = NULL;
684 struct kbase_va_region *gpu_va_reg = NULL;
685 void *vmap_ptr = NULL;
686
687 dev_dbg(kctx->kbdev->dev,
688 "Creating a tiler heap with %u chunks (limit: %u) of size %u, buf_desc_va: 0x%llx\n",
689 initial_chunks, max_chunks, chunk_size, buf_desc_va);
690
691 if (!kbase_mem_allow_alloc(kctx))
692 return -EINVAL;
693
694 if (chunk_size == 0)
695 return -EINVAL;
696
697 if (chunk_size & ~CHUNK_SIZE_MASK)
698 return -EINVAL;
699
700 if (initial_chunks == 0)
701 return -EINVAL;
702
703 if (initial_chunks > max_chunks)
704 return -EINVAL;
705
706 if (target_in_flight == 0)
707 return -EINVAL;
708
709 heap = kzalloc(sizeof(*heap), GFP_KERNEL);
710 if (unlikely(!heap)) {
711 dev_err(kctx->kbdev->dev, "No kernel memory for a new tiler heap");
712 return -ENOMEM;
713 }
714
715 heap->kctx = kctx;
716 heap->chunk_size = chunk_size;
717 heap->max_chunks = max_chunks;
718 heap->target_in_flight = target_in_flight;
719 heap->buf_desc_checked = false;
720 INIT_LIST_HEAD(&heap->chunks_list);
721 INIT_LIST_HEAD(&heap->link);
722
723 /* Check on the buffer descriptor virtual Address */
724 if (buf_desc_va) {
725 struct kbase_va_region *buf_desc_reg;
726
727 kbase_gpu_vm_lock(kctx);
728 buf_desc_reg =
729 kbase_region_tracker_find_region_enclosing_address(kctx, buf_desc_va);
730
731 if (!kbasep_is_buffer_descriptor_region_suitable(kctx, buf_desc_reg)) {
732 kbase_gpu_vm_unlock(kctx);
733 dev_err(kctx->kbdev->dev,
734 "Could not find a suitable VA region for the tiler heap buf desc!\n");
735 err = -EINVAL;
736 goto buf_desc_not_suitable;
737 }
738
739 /* If we don't prevent userspace from unmapping this, we may run into
740 * use-after-free, as we don't check for the existence of the region throughout.
741 */
742
743 heap->buf_desc_va = buf_desc_va;
744 heap->buf_desc_reg = buf_desc_reg;
745 kbase_va_region_no_user_free_inc(buf_desc_reg);
746
747 vmap_ptr = kbase_vmap_reg(kctx, buf_desc_reg, buf_desc_va, TILER_BUF_DESC_SIZE,
748 KBASE_REG_CPU_RD, &heap->buf_desc_map,
749 KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
750
751 if (kbase_page_migration_enabled)
752 kbase_set_phy_alloc_page_status(buf_desc_reg->gpu_alloc, NOT_MOVABLE);
753
754 kbase_gpu_vm_unlock(kctx);
755
756 if (unlikely(!vmap_ptr)) {
757 dev_err(kctx->kbdev->dev,
758 "Could not vmap buffer descriptor into kernel memory (err %d)\n",
759 err);
760 err = -ENOMEM;
761 goto buf_desc_vmap_failed;
762 }
763 }
764
765 heap->gpu_va = kbase_csf_heap_context_allocator_alloc(ctx_alloc);
766 if (unlikely(!heap->gpu_va)) {
767 dev_dbg(kctx->kbdev->dev, "Failed to allocate a tiler heap context\n");
768 err = -ENOMEM;
769 goto heap_context_alloc_failed;
770 }
771
772 gpu_va_reg = ctx_alloc->region;
773
774 kbase_gpu_vm_lock(kctx);
775 /* gpu_va_reg was created with BASEP_MEM_NO_USER_FREE, the code to unset this only happens
776 * on kctx termination (after all syscalls on kctx have finished), and so it is safe to
777 * assume that gpu_va_reg is still present.
778 */
779 vmap_ptr = kbase_vmap_reg(kctx, gpu_va_reg, heap->gpu_va, NEXT_CHUNK_ADDR_SIZE,
780 (KBASE_REG_CPU_RD | KBASE_REG_CPU_WR), &heap->gpu_va_map,
781 KBASE_VMAP_FLAG_PERMANENT_MAP_ACCOUNTING);
782 kbase_gpu_vm_unlock(kctx);
783 if (unlikely(!vmap_ptr)) {
784 dev_dbg(kctx->kbdev->dev, "Failed to vmap the correct heap GPU VA address\n");
785 err = -ENOMEM;
786 goto heap_context_vmap_failed;
787 }
788
789 err = create_initial_chunks(heap, initial_chunks);
790 if (unlikely(err)) {
791 dev_dbg(kctx->kbdev->dev, "Failed to create the initial tiler heap chunks\n");
792 goto create_chunks_failed;
793 }
794 chunk = list_first_entry(&heap->chunks_list, struct kbase_csf_tiler_heap_chunk, link);
795
796 *heap_gpu_va = heap->gpu_va;
797 *first_chunk_va = chunk->gpu_va;
798
799 mutex_lock(&kctx->csf.tiler_heaps.lock);
800 kctx->csf.tiler_heaps.nr_of_heaps++;
801 heap->heap_id = kctx->csf.tiler_heaps.nr_of_heaps;
802 list_add(&heap->link, &kctx->csf.tiler_heaps.list);
803
804 KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
805 PFN_UP(heap->chunk_size * heap->max_chunks),
806 PFN_UP(heap->chunk_size * heap->chunk_count),
807 heap->max_chunks, heap->chunk_size, heap->chunk_count,
808 heap->target_in_flight, 0);
809
810 #if defined(CONFIG_MALI_VECTOR_DUMP)
811 list_for_each_entry(chunk, &heap->chunks_list, link) {
812 KBASE_TLSTREAM_JD_TILER_HEAP_CHUNK_ALLOC(kctx->kbdev, kctx->id, heap->heap_id,
813 chunk->gpu_va);
814 }
815 #endif
816 kctx->running_total_tiler_heap_nr_chunks += heap->chunk_count;
817 kctx->running_total_tiler_heap_memory += (u64)heap->chunk_size * heap->chunk_count;
818 if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
819 kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
820
821 dev_dbg(kctx->kbdev->dev,
822 "Created tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n", heap->gpu_va,
823 buf_desc_va, kctx->tgid, kctx->id);
824 mutex_unlock(&kctx->csf.tiler_heaps.lock);
825
826 return 0;
827
828 create_chunks_failed:
829 kbase_vunmap(kctx, &heap->gpu_va_map);
830 heap_context_vmap_failed:
831 kbase_csf_heap_context_allocator_free(ctx_alloc, heap->gpu_va);
832 heap_context_alloc_failed:
833 if (heap->buf_desc_reg)
834 kbase_vunmap(kctx, &heap->buf_desc_map);
835 buf_desc_vmap_failed:
836 if (heap->buf_desc_reg) {
837 kbase_gpu_vm_lock(kctx);
838 kbase_va_region_no_user_free_dec(heap->buf_desc_reg);
839 kbase_gpu_vm_unlock(kctx);
840 }
841 buf_desc_not_suitable:
842 kfree(heap);
843 return err;
844 }
845
kbase_csf_tiler_heap_term(struct kbase_context * const kctx,u64 const heap_gpu_va)846 int kbase_csf_tiler_heap_term(struct kbase_context *const kctx,
847 u64 const heap_gpu_va)
848 {
849 int err = 0;
850 struct kbase_csf_tiler_heap *heap = NULL;
851 u32 chunk_count = 0;
852 u64 heap_size = 0;
853
854 mutex_lock(&kctx->csf.tiler_heaps.lock);
855 heap = find_tiler_heap(kctx, heap_gpu_va);
856 if (likely(heap)) {
857 chunk_count = heap->chunk_count;
858 heap_size = heap->chunk_size * chunk_count;
859
860 list_del_init(&heap->link);
861 } else {
862 err = -EINVAL;
863 }
864
865 /* Update stats whilst still holding the lock so they are in sync with the tiler_heaps.list
866 * at all times
867 */
868 if (likely(kctx->running_total_tiler_heap_memory >= heap_size))
869 kctx->running_total_tiler_heap_memory -= heap_size;
870 else
871 dev_warn(kctx->kbdev->dev,
872 "Running total tiler heap memory lower than expected!");
873 if (likely(kctx->running_total_tiler_heap_nr_chunks >= chunk_count))
874 kctx->running_total_tiler_heap_nr_chunks -= chunk_count;
875 else
876 dev_warn(kctx->kbdev->dev,
877 "Running total tiler chunk count lower than expected!");
878 if (!err)
879 dev_dbg(kctx->kbdev->dev,
880 "Terminated tiler heap 0x%llX, buffer descriptor 0x%llX, ctx_%d_%d\n",
881 heap->gpu_va, heap->buf_desc_va, kctx->tgid, kctx->id);
882 mutex_unlock(&kctx->csf.tiler_heaps.lock);
883
884 /* Deletion requires the kctx->reg_lock, so must only operate on it whilst unlinked from
885 * the kctx's csf.tiler_heaps.list, and without holding the csf.tiler_heaps.lock
886 */
887 if (likely(heap))
888 delete_heap(heap);
889
890 return err;
891 }
892
893 /**
894 * validate_allocation_request - Check whether the chunk allocation request
895 * received on tiler OOM should be handled at
896 * current time.
897 *
898 * @heap: The tiler heap the OOM is associated with
899 * @nr_in_flight: Number of fragment jobs in flight
900 * @pending_frag_count: Number of pending fragment jobs
901 *
902 * Context: must hold the tiler heap lock to guarantee its lifetime
903 *
904 * Return:
905 * * 0 - allowed to allocate an additional chunk
906 * * -EINVAL - invalid
907 * * -EBUSY - there are fragment jobs still in flight, which may free chunks
908 * after completing
909 * * -ENOMEM - the targeted number of in-flight chunks has been reached and
910 * no new ones will be allocated
911 */
validate_allocation_request(struct kbase_csf_tiler_heap * heap,u32 nr_in_flight,u32 pending_frag_count)912 static int validate_allocation_request(struct kbase_csf_tiler_heap *heap, u32 nr_in_flight,
913 u32 pending_frag_count)
914 {
915 lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
916
917 if (WARN_ON(!nr_in_flight) || WARN_ON(pending_frag_count > nr_in_flight))
918 return -EINVAL;
919
920 if (nr_in_flight <= heap->target_in_flight) {
921 if (heap->chunk_count < heap->max_chunks) {
922 /* Not exceeded the target number of render passes yet so be
923 * generous with memory.
924 */
925 return 0;
926 } else if (pending_frag_count > 0) {
927 return -EBUSY;
928 } else {
929 return -ENOMEM;
930 }
931 } else {
932 /* Reached target number of render passes in flight.
933 * Wait for some of them to finish
934 */
935 return -EBUSY;
936 }
937 return -ENOMEM;
938 }
939
kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context * kctx,u64 gpu_heap_va,u32 nr_in_flight,u32 pending_frag_count,u64 * new_chunk_ptr)940 int kbase_csf_tiler_heap_alloc_new_chunk(struct kbase_context *kctx,
941 u64 gpu_heap_va, u32 nr_in_flight, u32 pending_frag_count, u64 *new_chunk_ptr)
942 {
943 struct kbase_csf_tiler_heap *heap;
944 struct kbase_csf_tiler_heap_chunk *chunk;
945 int err = -EINVAL;
946 u64 chunk_size = 0;
947 u64 heap_id = 0;
948
949 /* To avoid potential locking issues during allocation, this is handled
950 * in three phases:
951 * 1. Take the lock, find the corresponding heap, and find its chunk size
952 * (this is always 2 MB, but may change down the line).
953 * 2. Allocate memory for the chunk and its region.
954 * 3. If the heap still exists, link it to the end of the list. If it
955 * doesn't, roll back the allocation.
956 */
957
958 mutex_lock(&kctx->csf.tiler_heaps.lock);
959 heap = find_tiler_heap(kctx, gpu_heap_va);
960 if (likely(heap)) {
961 chunk_size = heap->chunk_size;
962 heap_id = heap->heap_id;
963 } else {
964 dev_err(kctx->kbdev->dev, "Heap 0x%llX does not exist", gpu_heap_va);
965 mutex_unlock(&kctx->csf.tiler_heaps.lock);
966 goto prelink_failure;
967 }
968
969 err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
970 if (unlikely(err)) {
971 /* The allocation request can be legitimate, but be invoked on a heap
972 * that has already reached the maximum pre-configured capacity. This
973 * is useful debug information, but should not be treated as an error,
974 * since the request will be re-sent at a later point.
975 */
976 dev_dbg(kctx->kbdev->dev,
977 "Not allocating new chunk for heap 0x%llX due to current heap state (err %d)",
978 gpu_heap_va, err);
979 mutex_unlock(&kctx->csf.tiler_heaps.lock);
980 goto prelink_failure;
981 }
982 mutex_unlock(&kctx->csf.tiler_heaps.lock);
983 /* this heap must not be used whilst we have dropped the lock */
984 heap = NULL;
985
986 chunk = alloc_new_chunk(kctx, chunk_size);
987 if (unlikely(!chunk)) {
988 dev_err(kctx->kbdev->dev, "Could not allocate chunk of size %lld for ctx %d_%d",
989 chunk_size, kctx->tgid, kctx->id);
990 goto prelink_failure;
991 }
992
993 /* After this point, the heap that we were targeting could already have had the needed
994 * chunks allocated, if we were handling multiple OoM events on multiple threads, so
995 * we need to revalidate the need for the allocation.
996 */
997 mutex_lock(&kctx->csf.tiler_heaps.lock);
998 heap = find_tiler_heap(kctx, gpu_heap_va);
999
1000 if (unlikely(!heap)) {
1001 dev_err(kctx->kbdev->dev, "Tiler heap 0x%llX no longer exists!\n", gpu_heap_va);
1002 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1003 goto unroll_chunk;
1004 }
1005
1006 if (heap_id != heap->heap_id) {
1007 dev_err(kctx->kbdev->dev,
1008 "Tiler heap 0x%llX was removed from ctx %d_%d while allocating chunk of size %lld!",
1009 gpu_heap_va, kctx->tgid, kctx->id, chunk_size);
1010 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1011 goto unroll_chunk;
1012 }
1013
1014 if (WARN_ON(chunk_size != heap->chunk_size)) {
1015 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1016 goto unroll_chunk;
1017 }
1018
1019 err = validate_allocation_request(heap, nr_in_flight, pending_frag_count);
1020 if (unlikely(err)) {
1021 dev_warn(
1022 kctx->kbdev->dev,
1023 "Aborting linking chunk to heap 0x%llX: heap state changed during allocation (err %d)",
1024 gpu_heap_va, err);
1025 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1026 goto unroll_chunk;
1027 }
1028
1029 err = init_chunk(heap, chunk, false);
1030
1031 /* On error, the chunk would not be linked, so we can still treat it as an unlinked
1032 * chunk for error handling.
1033 */
1034 if (unlikely(err)) {
1035 dev_err(kctx->kbdev->dev,
1036 "Could not link chunk(0x%llX) with tiler heap 0%llX in ctx %d_%d due to error %d",
1037 chunk->gpu_va, gpu_heap_va, kctx->tgid, kctx->id, err);
1038 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1039 goto unroll_chunk;
1040 }
1041
1042 *new_chunk_ptr = encode_chunk_ptr(heap->chunk_size, chunk->gpu_va);
1043
1044 /* update total and peak tiler heap memory record */
1045 kctx->running_total_tiler_heap_nr_chunks++;
1046 kctx->running_total_tiler_heap_memory += heap->chunk_size;
1047
1048 if (kctx->running_total_tiler_heap_memory > kctx->peak_total_tiler_heap_memory)
1049 kctx->peak_total_tiler_heap_memory = kctx->running_total_tiler_heap_memory;
1050
1051 KBASE_TLSTREAM_AUX_TILER_HEAP_STATS(kctx->kbdev, kctx->id, heap->heap_id,
1052 PFN_UP(heap->chunk_size * heap->max_chunks),
1053 PFN_UP(heap->chunk_size * heap->chunk_count),
1054 heap->max_chunks, heap->chunk_size, heap->chunk_count,
1055 heap->target_in_flight, nr_in_flight);
1056
1057 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1058
1059 return err;
1060 unroll_chunk:
1061 remove_unlinked_chunk(kctx, chunk);
1062 prelink_failure:
1063 return err;
1064 }
1065
delete_chunk_physical_pages(struct kbase_csf_tiler_heap * heap,u64 chunk_gpu_va,u64 * hdr_val)1066 static bool delete_chunk_physical_pages(struct kbase_csf_tiler_heap *heap, u64 chunk_gpu_va,
1067 u64 *hdr_val)
1068 {
1069 int err;
1070 u64 *chunk_hdr;
1071 struct kbase_context *kctx = heap->kctx;
1072 struct kbase_csf_tiler_heap_chunk *chunk = NULL;
1073
1074 lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1075
1076 chunk = find_chunk(heap, chunk_gpu_va);
1077 if (unlikely(!chunk)) {
1078 dev_warn(kctx->kbdev->dev,
1079 "Failed to find tiler heap(0x%llX) chunk(0x%llX) for reclaim-delete\n",
1080 heap->gpu_va, chunk_gpu_va);
1081 return false;
1082 }
1083
1084 WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
1085 "Cannot support CPU cached chunks without sync operations");
1086 chunk_hdr = chunk->map.addr;
1087 *hdr_val = *chunk_hdr;
1088
1089 dev_dbg(kctx->kbdev->dev,
1090 "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
1091 chunk_gpu_va, heap->gpu_va, *hdr_val);
1092
1093 err = kbase_mem_shrink_gpu_mapping(kctx, chunk->region, 0, chunk->region->gpu_alloc->nents);
1094 if (unlikely(err)) {
1095 dev_warn(
1096 kctx->kbdev->dev,
1097 "Reclaim: shrinking GPU mapping failed on chunk(0x%llx) in heap(0x%llx) (err %d)\n",
1098 chunk_gpu_va, heap->gpu_va, err);
1099
1100 /* Cannot free the pages whilst references on the GPU remain, so keep the chunk on
1101 * the heap's chunk list and try a different heap.
1102 */
1103
1104 return false;
1105 }
1106 /* Destroy the mapping before the physical pages which are mapped are destroyed. */
1107 kbase_vunmap(kctx, &chunk->map);
1108
1109 err = kbase_free_phy_pages_helper(chunk->region->gpu_alloc,
1110 chunk->region->gpu_alloc->nents);
1111 if (unlikely(err)) {
1112 dev_warn(
1113 kctx->kbdev->dev,
1114 "Reclaim: remove physical backing failed on chunk(0x%llx) in heap(0x%llx) (err %d), continuing with deferred removal\n",
1115 chunk_gpu_va, heap->gpu_va, err);
1116
1117 /* kbase_free_phy_pages_helper() should only fail on invalid input, and WARNs
1118 * anyway, so continue instead of returning early.
1119 *
1120 * Indeed, we don't want to leave the chunk on the heap's chunk list whilst it has
1121 * its mapping removed, as that could lead to problems. It's safest to instead
1122 * continue with deferred destruction of the chunk.
1123 */
1124 }
1125
1126 dev_dbg(kctx->kbdev->dev,
1127 "Reclaim: delete chunk(0x%llx) in heap(0x%llx), header value(0x%llX)\n",
1128 chunk_gpu_va, heap->gpu_va, *hdr_val);
1129
1130 mutex_lock(&heap->kctx->jit_evict_lock);
1131 list_move(&chunk->region->jit_node, &kctx->jit_destroy_head);
1132 mutex_unlock(&heap->kctx->jit_evict_lock);
1133
1134 list_del(&chunk->link);
1135 heap->chunk_count--;
1136 kfree(chunk);
1137
1138 return true;
1139 }
1140
sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap * heap,struct kbase_csf_gpu_buffer_heap * desc)1141 static void sanity_check_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap,
1142 struct kbase_csf_gpu_buffer_heap *desc)
1143 {
1144 u64 first_hoarded_chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
1145
1146 lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1147
1148 if (first_hoarded_chunk_gpu_va) {
1149 struct kbase_csf_tiler_heap_chunk *chunk =
1150 find_chunk(heap, first_hoarded_chunk_gpu_va);
1151
1152 if (likely(chunk)) {
1153 dev_dbg(heap->kctx->kbdev->dev,
1154 "Buffer descriptor 0x%llX sanity check ok, HW reclaim allowed\n",
1155 heap->buf_desc_va);
1156
1157 heap->buf_desc_checked = true;
1158 return;
1159 }
1160 }
1161 /* If there is no match, defer the check to next time */
1162 dev_dbg(heap->kctx->kbdev->dev, "Buffer descriptor 0x%llX runtime sanity check deferred\n",
1163 heap->buf_desc_va);
1164 }
1165
can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap * heap,u64 * chunk_gpu_va_ptr)1166 static bool can_read_hw_gpu_buffer_heap(struct kbase_csf_tiler_heap *heap, u64 *chunk_gpu_va_ptr)
1167 {
1168 struct kbase_context *kctx = heap->kctx;
1169
1170 lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1171
1172 /* Initialize the descriptor pointer value to 0 */
1173 *chunk_gpu_va_ptr = 0;
1174
1175 /* The BufferDescriptor on heap is a hint on creation, do a sanity check at runtime */
1176 if (heap->buf_desc_reg && !heap->buf_desc_checked) {
1177 struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
1178
1179 /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
1180 if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
1181 kbase_sync_mem_regions(kctx, &heap->buf_desc_map, KBASE_SYNC_TO_CPU);
1182
1183 sanity_check_gpu_buffer_heap(heap, desc);
1184 if (heap->buf_desc_checked)
1185 *chunk_gpu_va_ptr = desc->pointer & CHUNK_ADDR_MASK;
1186 }
1187
1188 return heap->buf_desc_checked;
1189 }
1190
delete_hoarded_chunks(struct kbase_csf_tiler_heap * heap)1191 static u32 delete_hoarded_chunks(struct kbase_csf_tiler_heap *heap)
1192 {
1193 u32 freed = 0;
1194 u64 chunk_gpu_va = 0;
1195 struct kbase_context *kctx = heap->kctx;
1196 struct kbase_csf_tiler_heap_chunk *chunk = NULL;
1197
1198 lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1199
1200 if (can_read_hw_gpu_buffer_heap(heap, &chunk_gpu_va)) {
1201 u64 chunk_hdr_val;
1202 u64 *hw_hdr;
1203
1204 if (!chunk_gpu_va) {
1205 struct kbase_csf_gpu_buffer_heap *desc = heap->buf_desc_map.addr;
1206
1207 /* BufferDescriptor is supplied by userspace, so could be CPU-cached */
1208 if (heap->buf_desc_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED)
1209 kbase_sync_mem_regions(kctx, &heap->buf_desc_map,
1210 KBASE_SYNC_TO_CPU);
1211 chunk_gpu_va = desc->pointer & CHUNK_ADDR_MASK;
1212
1213 if (!chunk_gpu_va) {
1214 dev_dbg(kctx->kbdev->dev,
1215 "Buffer descriptor 0x%llX has no chunks (NULL) for reclaim scan\n",
1216 heap->buf_desc_va);
1217 goto out;
1218 }
1219 }
1220
1221 chunk = find_chunk(heap, chunk_gpu_va);
1222 if (unlikely(!chunk))
1223 goto out;
1224
1225 WARN((chunk->region->flags & KBASE_REG_CPU_CACHED),
1226 "Cannot support CPU cached chunks without sync operations");
1227 hw_hdr = chunk->map.addr;
1228
1229 /* Move onto the next chunk relevant information */
1230 chunk_hdr_val = *hw_hdr;
1231 chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1232
1233 while (chunk_gpu_va && heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
1234 bool success =
1235 delete_chunk_physical_pages(heap, chunk_gpu_va, &chunk_hdr_val);
1236
1237 if (!success)
1238 break;
1239
1240 freed++;
1241 /* On success, chunk_hdr_val is updated, extract the next chunk address */
1242 chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1243 }
1244
1245 /* Update the existing hardware chunk header, after reclaim deletion of chunks */
1246 *hw_hdr = chunk_hdr_val;
1247
1248 dev_dbg(heap->kctx->kbdev->dev,
1249 "HW reclaim scan freed chunks: %u, set hw_hdr[0]: 0x%llX\n", freed,
1250 chunk_hdr_val);
1251 } else {
1252 dev_dbg(kctx->kbdev->dev,
1253 "Skip HW reclaim scan, (disabled: buffer descriptor 0x%llX)\n",
1254 heap->buf_desc_va);
1255 }
1256 out:
1257 return freed;
1258 }
1259
delete_unused_chunk_pages(struct kbase_csf_tiler_heap * heap)1260 static u64 delete_unused_chunk_pages(struct kbase_csf_tiler_heap *heap)
1261 {
1262 u32 freed_chunks = 0;
1263 u64 freed_pages = 0;
1264 u64 chunk_gpu_va;
1265 u64 chunk_hdr_val;
1266 struct kbase_context *kctx = heap->kctx;
1267 u64 *ctx_ptr;
1268
1269 lockdep_assert_held(&kctx->csf.tiler_heaps.lock);
1270
1271 WARN(heap->gpu_va_map.flags & KBASE_VMAP_FLAG_SYNC_NEEDED,
1272 "Cannot support CPU cached heap context without sync operations");
1273
1274 ctx_ptr = heap->gpu_va_map.addr;
1275
1276 /* Extract the first chunk address from the context's free_list_head */
1277 chunk_hdr_val = *ctx_ptr;
1278 chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1279
1280 while (chunk_gpu_va) {
1281 u64 hdr_val;
1282 bool success = delete_chunk_physical_pages(heap, chunk_gpu_va, &hdr_val);
1283
1284 if (!success)
1285 break;
1286
1287 freed_chunks++;
1288 chunk_hdr_val = hdr_val;
1289 /* extract the next chunk address */
1290 chunk_gpu_va = chunk_hdr_val & CHUNK_ADDR_MASK;
1291 }
1292
1293 /* Update the post-scan deletion to context header */
1294 *ctx_ptr = chunk_hdr_val;
1295
1296 /* Try to scan the HW hoarded list of unused chunks */
1297 freed_chunks += delete_hoarded_chunks(heap);
1298 freed_pages = freed_chunks * PFN_UP(heap->chunk_size);
1299 dev_dbg(heap->kctx->kbdev->dev,
1300 "Scan reclaim freed chunks/pages %u/%llu, set heap-ctx_u64[0]: 0x%llX\n",
1301 freed_chunks, freed_pages, chunk_hdr_val);
1302
1303 /* Update context tiler heaps memory usage */
1304 kctx->running_total_tiler_heap_memory -= freed_pages << PAGE_SHIFT;
1305 kctx->running_total_tiler_heap_nr_chunks -= freed_chunks;
1306 return freed_pages;
1307 }
1308
kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context * kctx,u32 to_free)1309 u32 kbase_csf_tiler_heap_scan_kctx_unused_pages(struct kbase_context *kctx, u32 to_free)
1310 {
1311 u64 freed = 0;
1312 struct kbase_csf_tiler_heap *heap;
1313
1314 mutex_lock(&kctx->csf.tiler_heaps.lock);
1315
1316 list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link) {
1317 freed += delete_unused_chunk_pages(heap);
1318
1319 /* If freed enough, then stop here */
1320 if (freed >= to_free)
1321 break;
1322 }
1323
1324 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1325 /* The scan is surely not more than 4-G pages, but for logic flow limit it */
1326 if (WARN_ON(unlikely(freed > U32_MAX)))
1327 return U32_MAX;
1328 else
1329 return (u32)freed;
1330 }
1331
count_unused_heap_pages(struct kbase_csf_tiler_heap * heap)1332 static u64 count_unused_heap_pages(struct kbase_csf_tiler_heap *heap)
1333 {
1334 u32 chunk_cnt = 0;
1335 u64 page_cnt = 0;
1336
1337 lockdep_assert_held(&heap->kctx->csf.tiler_heaps.lock);
1338
1339 /* Here the count is basically an informed estimate, avoiding the costly mapping/unmaping
1340 * in the chunk list walk. The downside is that the number is a less reliable guide for
1341 * later on scan (free) calls on this heap for what actually is freeable.
1342 */
1343 if (heap->chunk_count > HEAP_SHRINK_STOP_LIMIT) {
1344 chunk_cnt = heap->chunk_count - HEAP_SHRINK_STOP_LIMIT;
1345 page_cnt = chunk_cnt * PFN_UP(heap->chunk_size);
1346 }
1347
1348 dev_dbg(heap->kctx->kbdev->dev,
1349 "Reclaim count chunks/pages %u/%llu (estimated), heap_va: 0x%llX\n", chunk_cnt,
1350 page_cnt, heap->gpu_va);
1351
1352 return page_cnt;
1353 }
1354
kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context * kctx)1355 u32 kbase_csf_tiler_heap_count_kctx_unused_pages(struct kbase_context *kctx)
1356 {
1357 u64 page_cnt = 0;
1358 struct kbase_csf_tiler_heap *heap;
1359
1360 mutex_lock(&kctx->csf.tiler_heaps.lock);
1361
1362 list_for_each_entry(heap, &kctx->csf.tiler_heaps.list, link)
1363 page_cnt += count_unused_heap_pages(heap);
1364
1365 mutex_unlock(&kctx->csf.tiler_heaps.lock);
1366
1367 /* The count is surely not more than 4-G pages, but for logic flow limit it */
1368 if (WARN_ON(unlikely(page_cnt > U32_MAX)))
1369 return U32_MAX;
1370 else
1371 return (u32)page_cnt;
1372 }
1373