xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/csf/mali_kbase_csf_heap_context_alloc.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #include <mali_kbase.h>
23 #include "mali_kbase_csf_heap_context_alloc.h"
24 
25 /* Size of one heap context structure, in bytes. */
26 #define HEAP_CTX_SIZE ((u32)32)
27 
28 /**
29  * sub_alloc - Sub-allocate a heap context from a GPU memory region
30  *
31  * @ctx_alloc: Pointer to the heap context allocator.
32  *
33  * Return: GPU virtual address of the allocated heap context or 0 on failure.
34  */
sub_alloc(struct kbase_csf_heap_context_allocator * const ctx_alloc)35 static u64 sub_alloc(struct kbase_csf_heap_context_allocator *const ctx_alloc)
36 {
37 	struct kbase_context *const kctx = ctx_alloc->kctx;
38 	unsigned long heap_nr = 0;
39 	u32 ctx_offset = 0;
40 	u64 heap_gpu_va = 0;
41 	struct kbase_vmap_struct mapping;
42 	void *ctx_ptr = NULL;
43 
44 	lockdep_assert_held(&ctx_alloc->lock);
45 
46 	heap_nr = find_first_zero_bit(ctx_alloc->in_use,
47 		MAX_TILER_HEAPS);
48 
49 	if (unlikely(heap_nr >= MAX_TILER_HEAPS)) {
50 		dev_dbg(kctx->kbdev->dev,
51 			"No free tiler heap contexts in the pool");
52 		return 0;
53 	}
54 
55 	ctx_offset = heap_nr * ctx_alloc->heap_context_size_aligned;
56 	heap_gpu_va = ctx_alloc->gpu_va + ctx_offset;
57 	ctx_ptr = kbase_vmap_prot(kctx, heap_gpu_va,
58 		ctx_alloc->heap_context_size_aligned, KBASE_REG_CPU_WR, &mapping);
59 
60 	if (unlikely(!ctx_ptr)) {
61 		dev_err(kctx->kbdev->dev,
62 			"Failed to map tiler heap context %lu (0x%llX)\n",
63 			heap_nr, heap_gpu_va);
64 		return 0;
65 	}
66 
67 	memset(ctx_ptr, 0, ctx_alloc->heap_context_size_aligned);
68 	kbase_vunmap(ctx_ptr, &mapping);
69 
70 	bitmap_set(ctx_alloc->in_use, heap_nr, 1);
71 
72 	dev_dbg(kctx->kbdev->dev, "Allocated tiler heap context %lu (0x%llX)\n",
73 		heap_nr, heap_gpu_va);
74 
75 	return heap_gpu_va;
76 }
77 
78 /**
79  * evict_heap_context - Evict the data of heap context from GPU's L2 cache.
80  *
81  * @ctx_alloc:   Pointer to the heap context allocator.
82  * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
83  *
84  * This function is called when memory for the heap context is freed. It uses the
85  * FLUSH_PA_RANGE command to evict the data of heap context, so on older CSF GPUs
86  * there is nothing done. The whole GPU cache is anyways expected to be flushed
87  * on older GPUs when initial chunks of the heap are freed just before the memory
88  * for heap context is freed.
89  */
evict_heap_context(struct kbase_csf_heap_context_allocator * const ctx_alloc,u64 const heap_gpu_va)90 static void evict_heap_context(struct kbase_csf_heap_context_allocator *const ctx_alloc,
91 			      u64 const heap_gpu_va)
92 {
93 	struct kbase_context *const kctx = ctx_alloc->kctx;
94 	u32 offset_in_bytes = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
95 	u32 offset_within_page = offset_in_bytes & ~PAGE_MASK;
96 	u32 page_index = offset_in_bytes >> PAGE_SHIFT;
97 	struct tagged_addr page =
98 		kbase_get_gpu_phy_pages(ctx_alloc->region)[page_index];
99 	phys_addr_t heap_context_pa = as_phys_addr_t(page) + offset_within_page;
100 
101 	lockdep_assert_held(&ctx_alloc->lock);
102 
103 	/* There is no need to take vm_lock here as the ctx_alloc region is protected
104 	 * via a nonzero no_user_free_count. The region and the backing page can't
105 	 * disappear whilst this function is executing. Flush type is passed as FLUSH_PT
106 	 * to CLN+INV L2 only.
107 	 */
108 	kbase_mmu_flush_pa_range(kctx->kbdev, kctx,
109 				heap_context_pa, ctx_alloc->heap_context_size_aligned,
110 				KBASE_MMU_OP_FLUSH_PT);
111 }
112 
113 /**
114  * sub_free - Free a heap context sub-allocated from a GPU memory region
115  *
116  * @ctx_alloc:   Pointer to the heap context allocator.
117  * @heap_gpu_va: The GPU virtual address of a heap context structure to free.
118  */
sub_free(struct kbase_csf_heap_context_allocator * const ctx_alloc,u64 const heap_gpu_va)119 static void sub_free(struct kbase_csf_heap_context_allocator *const ctx_alloc,
120 	u64 const heap_gpu_va)
121 {
122 	struct kbase_context *const kctx = ctx_alloc->kctx;
123 	u32 ctx_offset = 0;
124 	unsigned int heap_nr = 0;
125 
126 	lockdep_assert_held(&ctx_alloc->lock);
127 
128 	if (WARN_ON(!ctx_alloc->region))
129 		return;
130 
131 	if (WARN_ON(heap_gpu_va < ctx_alloc->gpu_va))
132 		return;
133 
134 	ctx_offset = (u32)(heap_gpu_va - ctx_alloc->gpu_va);
135 
136 	if (WARN_ON(ctx_offset >= (ctx_alloc->region->nr_pages << PAGE_SHIFT)) ||
137 		WARN_ON(ctx_offset % ctx_alloc->heap_context_size_aligned))
138 		return;
139 
140 	evict_heap_context(ctx_alloc, heap_gpu_va);
141 
142 	heap_nr = ctx_offset / ctx_alloc->heap_context_size_aligned;
143 	dev_dbg(kctx->kbdev->dev,
144 		"Freed tiler heap context %d (0x%llX)\n", heap_nr, heap_gpu_va);
145 
146 	bitmap_clear(ctx_alloc->in_use, heap_nr, 1);
147 }
148 
kbase_csf_heap_context_allocator_init(struct kbase_csf_heap_context_allocator * const ctx_alloc,struct kbase_context * const kctx)149 int kbase_csf_heap_context_allocator_init(
150 	struct kbase_csf_heap_context_allocator *const ctx_alloc,
151 	struct kbase_context *const kctx)
152 {
153 	const u32 gpu_cache_line_size =
154 		(1U << kctx->kbdev->gpu_props.props.l2_props.log2_line_size);
155 
156 	/* We cannot pre-allocate GPU memory here because the
157 	 * custom VA zone may not have been created yet.
158 	 */
159 	ctx_alloc->kctx = kctx;
160 	ctx_alloc->region = NULL;
161 	ctx_alloc->gpu_va = 0;
162 	ctx_alloc->heap_context_size_aligned =
163 		(HEAP_CTX_SIZE + gpu_cache_line_size - 1) & ~(gpu_cache_line_size - 1);
164 
165 	mutex_init(&ctx_alloc->lock);
166 	bitmap_zero(ctx_alloc->in_use, MAX_TILER_HEAPS);
167 
168 	dev_dbg(kctx->kbdev->dev,
169 		"Initialized a tiler heap context allocator\n");
170 
171 	return 0;
172 }
173 
kbase_csf_heap_context_allocator_term(struct kbase_csf_heap_context_allocator * const ctx_alloc)174 void kbase_csf_heap_context_allocator_term(
175 	struct kbase_csf_heap_context_allocator *const ctx_alloc)
176 {
177 	struct kbase_context *const kctx = ctx_alloc->kctx;
178 
179 	dev_dbg(kctx->kbdev->dev,
180 		"Terminating tiler heap context allocator\n");
181 
182 	if (ctx_alloc->region) {
183 		kbase_gpu_vm_lock(kctx);
184 		WARN_ON(!kbase_va_region_is_no_user_free(ctx_alloc->region));
185 
186 		kbase_va_region_no_user_free_dec(ctx_alloc->region);
187 		kbase_mem_free_region(kctx, ctx_alloc->region);
188 		kbase_gpu_vm_unlock(kctx);
189 	}
190 
191 	mutex_destroy(&ctx_alloc->lock);
192 }
193 
kbase_csf_heap_context_allocator_alloc(struct kbase_csf_heap_context_allocator * const ctx_alloc)194 u64 kbase_csf_heap_context_allocator_alloc(
195 	struct kbase_csf_heap_context_allocator *const ctx_alloc)
196 {
197 	struct kbase_context *const kctx = ctx_alloc->kctx;
198 	u64 flags = BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR | BASE_MEM_PROT_CPU_WR |
199 		    BASEP_MEM_NO_USER_FREE | BASE_MEM_PROT_CPU_RD;
200 	u64 nr_pages = PFN_UP(MAX_TILER_HEAPS * ctx_alloc->heap_context_size_aligned);
201 	u64 heap_gpu_va = 0;
202 
203 	/* Calls to this function are inherently asynchronous, with respect to
204 	 * MMU operations.
205 	 */
206 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
207 
208 	mutex_lock(&ctx_alloc->lock);
209 
210 	/* If the pool of heap contexts wasn't already allocated then
211 	 * allocate it.
212 	 */
213 	if (!ctx_alloc->region) {
214 		ctx_alloc->region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
215 						    &ctx_alloc->gpu_va, mmu_sync_info);
216 	}
217 
218 	/* If the pool still isn't allocated then an error occurred. */
219 	if (unlikely(!ctx_alloc->region))
220 		dev_dbg(kctx->kbdev->dev, "Failed to allocate a pool of tiler heap contexts");
221 	else
222 		heap_gpu_va = sub_alloc(ctx_alloc);
223 
224 	mutex_unlock(&ctx_alloc->lock);
225 
226 	return heap_gpu_va;
227 }
228 
kbase_csf_heap_context_allocator_free(struct kbase_csf_heap_context_allocator * const ctx_alloc,u64 const heap_gpu_va)229 void kbase_csf_heap_context_allocator_free(
230 	struct kbase_csf_heap_context_allocator *const ctx_alloc,
231 	u64 const heap_gpu_va)
232 {
233 	mutex_lock(&ctx_alloc->lock);
234 	sub_free(ctx_alloc, heap_gpu_va);
235 	mutex_unlock(&ctx_alloc->lock);
236 }
237