xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/thirdparty/mali_kbase_mmap.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  * This program is free software and is provided to you under the terms of the
3  * GNU General Public License version 2 as published by the Free Software
4  * Foundation, and any use by you of this program is subject to the terms
5  * of such GNU licence.
6  *
7  * A copy of the licence is included with the program, and can also be obtained
8  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
9  * Boston, MA  02110-1301, USA.
10  */
11 
12 #include "linux/mman.h"
13 #include <linux/version_compat_defs.h>
14 #include <mali_kbase.h>
15 
16 /* mali_kbase_mmap.c
17  *
18  * This file contains Linux specific implementation of
19  * kbase_context_get_unmapped_area() interface.
20  */
21 
22 
23 /**
24  * align_and_check() - Align the specified pointer to the provided alignment and
25  *                     check that it is still in range.
26  * @gap_end:        Highest possible start address for allocation (end of gap in
27  *                  address space)
28  * @gap_start:      Start address of current memory area / gap in address space
29  * @info:           vm_unmapped_area_info structure passed to caller, containing
30  *                  alignment, length and limits for the allocation
31  * @is_shader_code: True if the allocation is for shader code (which has
32  *                  additional alignment requirements)
33  * @is_same_4gb_page: True if the allocation needs to reside completely within
34  *                    a 4GB chunk
35  *
36  * Return: true if gap_end is now aligned correctly and is still in range,
37  *         false otherwise
38  */
align_and_check(unsigned long * gap_end,unsigned long gap_start,struct vm_unmapped_area_info * info,bool is_shader_code,bool is_same_4gb_page)39 static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
40 		struct vm_unmapped_area_info *info, bool is_shader_code,
41 		bool is_same_4gb_page)
42 {
43 	/* Compute highest gap address at the desired alignment */
44 	(*gap_end) -= info->length;
45 	(*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
46 
47 	if (is_shader_code) {
48 		/* Check for 4GB boundary */
49 		if (0 == (*gap_end & BASE_MEM_MASK_4GB))
50 			(*gap_end) -= (info->align_offset ? info->align_offset :
51 					info->length);
52 		if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB))
53 			(*gap_end) -= (info->align_offset ? info->align_offset :
54 					info->length);
55 
56 		if (!(*gap_end & BASE_MEM_MASK_4GB) || !((*gap_end +
57 				info->length) & BASE_MEM_MASK_4GB))
58 			return false;
59 	} else if (is_same_4gb_page) {
60 		unsigned long start = *gap_end;
61 		unsigned long end = *gap_end + info->length;
62 		unsigned long mask = ~((unsigned long)U32_MAX);
63 
64 		/* Check if 4GB boundary is straddled */
65 		if ((start & mask) != ((end - 1) & mask)) {
66 			unsigned long offset = end - (end & mask);
67 			/* This is to ensure that alignment doesn't get
68 			 * disturbed in an attempt to prevent straddling at
69 			 * 4GB boundary. The GPU VA is aligned to 2MB when the
70 			 * allocation size is > 2MB and there is enough CPU &
71 			 * GPU virtual space.
72 			 */
73 			unsigned long rounded_offset =
74 					ALIGN(offset, info->align_mask + 1);
75 
76 			start -= rounded_offset;
77 			end -= rounded_offset;
78 
79 			*gap_end = start;
80 
81 			/* The preceding 4GB boundary shall not get straddled,
82 			 * even after accounting for the alignment, as the
83 			 * size of allocation is limited to 4GB and the initial
84 			 * start location was already aligned.
85 			 */
86 			WARN_ON((start & mask) != ((end - 1) & mask));
87 		}
88 	}
89 
90 
91 	if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
92 		return false;
93 
94 	return true;
95 }
96 
97 /**
98  * kbase_unmapped_area_topdown() - allocates new areas top-down from
99  *                                 below the stack limit.
100  * @info:              Information about the memory area to allocate.
101  * @is_shader_code:    Boolean which denotes whether the allocated area is
102  *                      intended for the use by shader core in which case a
103  *                      special alignment requirements apply.
104  * @is_same_4gb_page: Boolean which indicates whether the allocated area needs
105  *                    to reside completely within a 4GB chunk.
106  *
107  * The unmapped_area_topdown() function in the Linux kernel is not exported
108  * using EXPORT_SYMBOL_GPL macro. To allow us to call this function from a
109  * module and also make use of the fact that some of the requirements for
110  * the unmapped area are known in advance, we implemented an extended version
111  * of this function and prefixed it with 'kbase_'.
112  *
113  * The difference in the call parameter list comes from the fact that
114  * kbase_unmapped_area_topdown() is called with additional parameters which
115  * are provided to indicate whether the allocation is for a shader core memory,
116  * which has additional alignment requirements, and whether the allocation can
117  * straddle a 4GB boundary.
118  *
119  * The modification of the original Linux function lies in how the computation
120  * of the highest gap address at the desired alignment is performed once the
121  * gap with desirable properties is found. For this purpose a special function
122  * is introduced (@ref align_and_check()) which beside computing the gap end
123  * at the desired alignment also performs additional alignment checks for the
124  * case when the memory is executable shader core memory, for which it is
125  * ensured that the gap does not end on a 4GB boundary, and for the case when
126  * memory needs to be confined within a 4GB chunk.
127  *
128  * Return: address of the found gap end (high limit) if area is found;
129  *         -ENOMEM if search is unsuccessful
130  */
131 
kbase_unmapped_area_topdown(struct vm_unmapped_area_info * info,bool is_shader_code,bool is_same_4gb_page)132 static unsigned long kbase_unmapped_area_topdown(struct vm_unmapped_area_info
133 		*info, bool is_shader_code, bool is_same_4gb_page)
134 {
135 #if (KERNEL_VERSION(6, 1, 0) > LINUX_VERSION_CODE)
136 	struct mm_struct *mm = current->mm;
137 	struct vm_area_struct *vma;
138 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
139 
140 	/* Adjust search length to account for worst case alignment overhead */
141 	length = info->length + info->align_mask;
142 	if (length < info->length)
143 		return -ENOMEM;
144 
145 	/*
146 	 * Adjust search limits by the desired length.
147 	 * See implementation comment at top of unmapped_area().
148 	 */
149 	gap_end = info->high_limit;
150 	if (gap_end < length)
151 		return -ENOMEM;
152 	high_limit = gap_end - length;
153 
154 	if (info->low_limit > high_limit)
155 		return -ENOMEM;
156 	low_limit = info->low_limit + length;
157 
158 	/* Check highest gap, which does not precede any rbtree node */
159 	gap_start = mm->highest_vm_end;
160 	if (gap_start <= high_limit) {
161 		if (align_and_check(&gap_end, gap_start, info,
162 				is_shader_code, is_same_4gb_page))
163 			return gap_end;
164 	}
165 
166 	/* Check if rbtree root looks promising */
167 	if (RB_EMPTY_ROOT(&mm->mm_rb))
168 		return -ENOMEM;
169 	vma = rb_entry(mm->mm_rb.rb_node, struct vm_area_struct, vm_rb);
170 	if (vma->rb_subtree_gap < length)
171 		return -ENOMEM;
172 
173 	while (true) {
174 		/* Visit right subtree if it looks promising */
175 		gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
176 		if (gap_start <= high_limit && vma->vm_rb.rb_right) {
177 			struct vm_area_struct *right =
178 				rb_entry(vma->vm_rb.rb_right,
179 					 struct vm_area_struct, vm_rb);
180 			if (right->rb_subtree_gap >= length) {
181 				vma = right;
182 				continue;
183 			}
184 		}
185 
186 check_current:
187 		/* Check if current node has a suitable gap */
188 		gap_end = vma->vm_start;
189 		if (gap_end < low_limit)
190 			return -ENOMEM;
191 		if (gap_start <= high_limit && gap_end - gap_start >= length) {
192 			/* We found a suitable gap. Clip it with the original
193 			 * high_limit.
194 			 */
195 			if (gap_end > info->high_limit)
196 				gap_end = info->high_limit;
197 
198 			if (align_and_check(&gap_end, gap_start, info,
199 					is_shader_code, is_same_4gb_page))
200 				return gap_end;
201 		}
202 
203 		/* Visit left subtree if it looks promising */
204 		if (vma->vm_rb.rb_left) {
205 			struct vm_area_struct *left =
206 				rb_entry(vma->vm_rb.rb_left,
207 					 struct vm_area_struct, vm_rb);
208 			if (left->rb_subtree_gap >= length) {
209 				vma = left;
210 				continue;
211 			}
212 		}
213 
214 		/* Go back up the rbtree to find next candidate node */
215 		while (true) {
216 			struct rb_node *prev = &vma->vm_rb;
217 
218 			if (!rb_parent(prev))
219 				return -ENOMEM;
220 			vma = rb_entry(rb_parent(prev),
221 				       struct vm_area_struct, vm_rb);
222 			if (prev == vma->vm_rb.rb_right) {
223 				gap_start = vma->vm_prev ?
224 					vma->vm_prev->vm_end : 0;
225 				goto check_current;
226 			}
227 		}
228 	}
229 #else
230 	unsigned long length, high_limit, gap_start, gap_end;
231 
232 	MA_STATE(mas, &current->mm->mm_mt, 0, 0);
233 	/* Adjust search length to account for worst case alignment overhead */
234 	length = info->length + info->align_mask;
235 	if (length < info->length)
236 		return -ENOMEM;
237 
238 	/*
239 	 * Adjust search limits by the desired length.
240 	 * See implementation comment at top of unmapped_area().
241 	 */
242 	gap_end = info->high_limit;
243 	if (gap_end < length)
244 		return -ENOMEM;
245 	high_limit = gap_end - length;
246 
247 	if (info->low_limit > high_limit)
248 		return -ENOMEM;
249 
250 	while (true) {
251 		if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
252 			return -ENOMEM;
253 		gap_end = mas.last + 1;
254 		gap_start = mas.min;
255 
256 		if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
257 			return gap_end;
258 	}
259 #endif
260 	return -ENOMEM;
261 }
262 
263 
264 /* This function is based on Linux kernel's arch_get_unmapped_area, but
265  * simplified slightly. Modifications come from the fact that some values
266  * about the memory area are known in advance.
267  */
kbase_context_get_unmapped_area(struct kbase_context * const kctx,const unsigned long addr,const unsigned long len,const unsigned long pgoff,const unsigned long flags)268 unsigned long kbase_context_get_unmapped_area(struct kbase_context *const kctx,
269 		const unsigned long addr, const unsigned long len,
270 		const unsigned long pgoff, const unsigned long flags)
271 {
272 	struct mm_struct *mm = current->mm;
273 	struct vm_unmapped_area_info info;
274 	unsigned long align_offset = 0;
275 	unsigned long align_mask = 0;
276 #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
277 	unsigned long high_limit = arch_get_mmap_base(addr, mm->mmap_base);
278 	unsigned long low_limit = max_t(unsigned long, PAGE_SIZE, kbase_mmap_min_addr);
279 #else
280 	unsigned long high_limit = mm->mmap_base;
281 	unsigned long low_limit = PAGE_SIZE;
282 #endif
283 	int cpu_va_bits = BITS_PER_LONG;
284 	int gpu_pc_bits =
285 	      kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
286 	bool is_shader_code = false;
287 	bool is_same_4gb_page = false;
288 	unsigned long ret;
289 
290 	/* the 'nolock' form is used here:
291 	 * - the base_pfn of the SAME_VA zone does not change
292 	 * - in normal use, va_size_pages is constant once the first allocation
293 	 *   begins
294 	 *
295 	 * However, in abnormal use this function could be processing whilst
296 	 * another new zone is being setup in a different thread (e.g. to
297 	 * borrow part of the SAME_VA zone). In the worst case, this path may
298 	 * witness a higher SAME_VA end_pfn than the code setting up the new
299 	 * zone.
300 	 *
301 	 * This is safe because once we reach the main allocation functions,
302 	 * we'll see the updated SAME_VA end_pfn and will determine that there
303 	 * is no free region at the address found originally by too large a
304 	 * same_va_end_addr here, and will fail the allocation gracefully.
305 	 */
306 	struct kbase_reg_zone *zone =
307 		kbase_ctx_reg_zone_get_nolock(kctx, KBASE_REG_ZONE_SAME_VA);
308 	u64 same_va_end_addr = kbase_reg_zone_end_pfn(zone) << PAGE_SHIFT;
309 #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
310 	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
311 
312 	/* requested length too big for entire address space */
313 	if (len > mmap_end - kbase_mmap_min_addr)
314 		return -ENOMEM;
315 #endif
316 
317 	/* err on fixed address */
318 	if ((flags & MAP_FIXED) || addr)
319 		return -EINVAL;
320 
321 #if IS_ENABLED(CONFIG_64BIT)
322 	/* too big? */
323 	if (len > TASK_SIZE - SZ_2M)
324 		return -ENOMEM;
325 
326 	if (!kbase_ctx_flag(kctx, KCTX_COMPAT)) {
327 		high_limit =
328 			min_t(unsigned long, high_limit, same_va_end_addr);
329 
330 		/* If there's enough (> 33 bits) of GPU VA space, align
331 		 * to 2MB boundaries.
332 		 */
333 		if (kctx->kbdev->gpu_props.mmu.va_bits > 33) {
334 			if (len >= SZ_2M) {
335 				align_offset = SZ_2M;
336 				align_mask = SZ_2M - 1;
337 			}
338 		}
339 
340 		low_limit = SZ_2M;
341 	} else {
342 		cpu_va_bits = 32;
343 	}
344 #endif /* CONFIG_64BIT */
345 	if ((PFN_DOWN(BASE_MEM_COOKIE_BASE) <= pgoff) &&
346 		(PFN_DOWN(BASE_MEM_FIRST_FREE_ADDRESS) > pgoff)) {
347 		int cookie = pgoff - PFN_DOWN(BASE_MEM_COOKIE_BASE);
348 		struct kbase_va_region *reg;
349 
350 		/* Need to hold gpu vm lock when using reg */
351 		kbase_gpu_vm_lock(kctx);
352 		reg = kctx->pending_regions[cookie];
353 		if (!reg) {
354 			kbase_gpu_vm_unlock(kctx);
355 			return -EINVAL;
356 		}
357 		if (!(reg->flags & KBASE_REG_GPU_NX)) {
358 			if (cpu_va_bits > gpu_pc_bits) {
359 				align_offset = 1ULL << gpu_pc_bits;
360 				align_mask = align_offset - 1;
361 				is_shader_code = true;
362 			}
363 #if !MALI_USE_CSF
364 		} else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
365 			unsigned long extension_bytes =
366 				(unsigned long)(reg->extension
367 						<< PAGE_SHIFT);
368 			/* kbase_check_alloc_sizes() already satisfies
369 			 * these checks, but they're here to avoid
370 			 * maintenance hazards due to the assumptions
371 			 * involved
372 			 */
373 			WARN_ON(reg->extension >
374 				(ULONG_MAX >> PAGE_SHIFT));
375 			WARN_ON(reg->initial_commit > (ULONG_MAX >> PAGE_SHIFT));
376 			WARN_ON(!is_power_of_2(extension_bytes));
377 			align_mask = extension_bytes - 1;
378 			align_offset =
379 				extension_bytes -
380 				(reg->initial_commit << PAGE_SHIFT);
381 #endif /* !MALI_USE_CSF */
382 		} else if (reg->flags & KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
383 			is_same_4gb_page = true;
384 		}
385 		kbase_gpu_vm_unlock(kctx);
386 #ifndef CONFIG_64BIT
387 	} else {
388 		return current->mm->get_unmapped_area(
389 			kctx->filp, addr, len, pgoff, flags);
390 #endif
391 	}
392 
393 	info.flags = 0;
394 	info.length = len;
395 	info.low_limit = low_limit;
396 	info.high_limit = high_limit;
397 	info.align_offset = align_offset;
398 	info.align_mask = align_mask;
399 
400 	ret = kbase_unmapped_area_topdown(&info, is_shader_code,
401 			is_same_4gb_page);
402 
403 	if (IS_ERR_VALUE(ret) && high_limit == mm->mmap_base &&
404 	    high_limit < same_va_end_addr) {
405 #if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
406 		/* Retry above TASK_UNMAPPED_BASE */
407 		info.low_limit = TASK_UNMAPPED_BASE;
408 		info.high_limit = min_t(u64, mmap_end, same_va_end_addr);
409 #else
410 		/* Retry above mmap_base */
411 		info.low_limit = mm->mmap_base;
412 		info.high_limit = min_t(u64, TASK_SIZE, same_va_end_addr);
413 #endif
414 
415 		ret = kbase_unmapped_area_topdown(&info, is_shader_code,
416 				is_same_4gb_page);
417 	}
418 
419 	return ret;
420 }
421